From a7dc41fdb5c4fc69ca2189412e9ba47ddf9aa084 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 5 Feb 2014 00:36:10 -0500 Subject: v5 v6: ao, set document tags --- lib/sisu/v5/ao_doc_str.rb | 5 ++++- lib/sisu/v5/ao_numbering.rb | 34 +++++++++++++++++++++------------- lib/sisu/v6/ao_doc_str.rb | 5 ++++- lib/sisu/v6/ao_numbering.rb | 34 +++++++++++++++++++++------------- 4 files changed, 50 insertions(+), 28 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v5/ao_doc_str.rb b/lib/sisu/v5/ao_doc_str.rb index f7168c26..dd7f32f3 100644 --- a/lib/sisu/v5/ao_doc_str.rb +++ b/lib/sisu/v5/ao_doc_str.rb @@ -163,10 +163,13 @@ module SiSU_AO_DocumentStructureExtract str=str.gsub(/(^|[ ])\*~([a-z0-9._-]+)(?=[ #{Mx[:br_nl]}]|$)/i, "\\1#{Mx[:tag_o]}\\2#{Mx[:tag_c]}"). gsub(/ [ ]+/i,' ') - tags=str.scan(/#{Mx[:tag_o]}(\S+?)#{Mx[:tag_c]}/).flatten + tags=str.scan(/#{Mx[:tag_o]}(\S+?)#{Mx[:tag_c]}/).flatten.uniq str=str.gsub(/[ ]?#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}[ ]?/,' ') #may be issues with spaces would leave one, but "code" blocks? end tags=nametag ? (tags << nametag) : tags + tags.each do |t| + t.gsub!(/[^a-z0-9._-]/,'') + end end [str,tags] end diff --git a/lib/sisu/v5/ao_numbering.rb b/lib/sisu/v5/ao_numbering.rb index 8caa5ccc..984675cc 100644 --- a/lib/sisu/v5/ao_numbering.rb +++ b/lib/sisu/v5/ao_numbering.rb @@ -78,6 +78,14 @@ module SiSU_AO_Numbering data=set_heading_top(data) unless @md.set_heading_top [data,tags_map,ocn_html_seg_map] end + def set_tags(tags,tag) + tags=if not tag.empty? \ + and tag !~/^\d+$/ + tag=tag.gsub(/[^a-z0-9._-]/,'') + [tag,tags].flatten + else tags + end + end def number_plaintext_para(data) @tuned_file=[] data.each do |dob| @@ -168,10 +176,10 @@ module SiSU_AO_Numbering and not @md.seg_names.include?(title_no) if dob.ln==no1 dob.name="#{title_no}" if not dob.name - dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,title_no) tag=dob.obj.gsub(/(Article|Clause|Section|Chapter)\s+/,"\\1_#{title_no}").downcase tag=heading_tag_clean(tag) - dob.tags=[tag,dob.tags].flatten if tag !~/^\d+$/ #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,tag) dob.obj=(dob.obj =~/(Article|Clause|Section)\s+/) \ ? (dob.obj.gsub(/(Article|Clause|Section)\s+/,"\\1 #{title_no} ")) : (dob.obj.gsub(/^/,"#{title_no}. ")) #fix stop later @@ -179,7 +187,7 @@ module SiSU_AO_Numbering if dob.ln !=no1 \ and dob.obj =~/^[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review dob.name ="#{title_no}" if not dob.name - dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,title_no) dob.obj=dob.obj.gsub(/^/,"#{title_no}. ") end @md.seg_names << title_no @@ -187,28 +195,28 @@ module SiSU_AO_Numbering if dob.ln!=no1 \ and dob.name!~/^[a-z_\.]+$/ \ and dob.obj !~/[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required # not fixed, work on - dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,title_no) dob.obj=dob.obj.gsub(/^/i,"#{title_no}. ") end end if dob.ln==no1 #watch because here you change dob.name - dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,"h#{title_no}") end if dob.ln==no2 #watch because here you change dob.name t_no2+=1; t_no3=0 title_no="#{t_no1}.#{t_no2}" - dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,"h#{title_no}") dob=number_sub_heading(dob,no2,title_no) end if dob.ln==no3 #watch because here you change dob.name t_no3+=1 title_no="#{t_no1}.#{t_no2}.#{t_no3}" - dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,"h#{title_no}") dob=number_sub_heading(dob,no3,title_no) end elsif dob.ln.to_s =~/^[0-6]/ \ and dob.name =~ /^[\w-]+-/ # endnotes, watch2005# endnotes, watch2005 - dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,dob.name) dob.name.gsub(/^([a-z_\.]+)-$/,'\1') end elsif dob.is ==:heading \ @@ -219,13 +227,13 @@ module SiSU_AO_Numbering and dob.ln.to_s =~/^[0-9]/ \ and dob.obj =~ /^([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d dob.name=$1 - dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,dob.name) end if @md.toc_lev_limit end elsif defined? dob.name \ and dob.name - dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,dob.name) end dob.tags=dob.tags.uniq if defined? dob.tags @tuned_file << dob @@ -312,7 +320,7 @@ module SiSU_AO_Numbering if @md.seg_names.is_a?(Array) \ and not @md.seg_names.include?(possible_seg_name) dob.name=possible_seg_name - dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ + dob.tags=set_tags(dob.tags,dob.name) @md.seg_names << possible_seg_name elsif (@md.opt.act[:verbose_plus][:set]==:on \ or @md.opt.act[:maintenance][:set]==:on) @@ -323,7 +331,7 @@ module SiSU_AO_Numbering and dob.name #extract segment name from embedded document structure info if @md.seg_names.is_a?(Array) \ and not @md.seg_names.include?(dob.name) - dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ + dob.tags=set_tags(dob.tags,dob.name) @md.seg_names << dob.name end end @@ -334,7 +342,7 @@ module SiSU_AO_Numbering if @md.seg_names.is_a?(Array) \ and not @md.seg_names.include?(segn_auto) dob.name=segn_auto - dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,dob.name) @md.seg_names << segn_auto else puts 'segment name (numbering) error' end diff --git a/lib/sisu/v6/ao_doc_str.rb b/lib/sisu/v6/ao_doc_str.rb index 0cdd553b..d8c012fd 100644 --- a/lib/sisu/v6/ao_doc_str.rb +++ b/lib/sisu/v6/ao_doc_str.rb @@ -163,10 +163,13 @@ module SiSU_AO_DocumentStructureExtract str=str.gsub(/(^|[ ])\*~([a-z0-9._-]+)(?=[ #{Mx[:br_nl]}]|$)/i, "\\1#{Mx[:tag_o]}\\2#{Mx[:tag_c]}"). gsub(/ [ ]+/i,' ') - tags=str.scan(/#{Mx[:tag_o]}(\S+?)#{Mx[:tag_c]}/).flatten + tags=str.scan(/#{Mx[:tag_o]}(\S+?)#{Mx[:tag_c]}/).flatten.uniq str=str.gsub(/[ ]?#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}[ ]?/,' ') #may be issues with spaces would leave one, but "code" blocks? end tags=nametag ? (tags << nametag) : tags + tags.each do |t| + t.gsub!(/[^a-z0-9._-]/,'') + end end [str,tags] end diff --git a/lib/sisu/v6/ao_numbering.rb b/lib/sisu/v6/ao_numbering.rb index 23d9fd73..acb2351a 100644 --- a/lib/sisu/v6/ao_numbering.rb +++ b/lib/sisu/v6/ao_numbering.rb @@ -78,6 +78,14 @@ module SiSU_AO_Numbering data=set_heading_top(data) unless @md.set_heading_top [data,tags_map,ocn_html_seg_map] end + def set_tags(tags,tag) + tags=if not tag.empty? \ + and tag !~/^\d+$/ + tag=tag.gsub(/[^a-z0-9._-]/,'') + [tag,tags].flatten + else tags + end + end def number_plaintext_para(data) @tuned_file=[] data.each do |dob| @@ -168,10 +176,10 @@ module SiSU_AO_Numbering and not @md.seg_names.include?(title_no) if dob.ln==no1 dob.name="#{title_no}" if not dob.name - dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,title_no) tag=dob.obj.gsub(/(Article|Clause|Section|Chapter)\s+/,"\\1_#{title_no}").downcase tag=heading_tag_clean(tag) - dob.tags=[tag,dob.tags].flatten if tag !~/^\d+$/ #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,tag) dob.obj=(dob.obj =~/(Article|Clause|Section)\s+/) \ ? (dob.obj.gsub(/(Article|Clause|Section)\s+/,"\\1 #{title_no} ")) : (dob.obj.gsub(/^/,"#{title_no}. ")) #fix stop later @@ -179,7 +187,7 @@ module SiSU_AO_Numbering if dob.ln !=no1 \ and dob.obj =~/^[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review dob.name ="#{title_no}" if not dob.name - dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,title_no) dob.obj=dob.obj.gsub(/^/,"#{title_no}. ") end @md.seg_names << title_no @@ -187,28 +195,28 @@ module SiSU_AO_Numbering if dob.ln!=no1 \ and dob.name!~/^[a-z_\.]+$/ \ and dob.obj !~/[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required # not fixed, work on - dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,title_no) dob.obj=dob.obj.gsub(/^/i,"#{title_no}. ") end end if dob.ln==no1 #watch because here you change dob.name - dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,"h#{title_no}") end if dob.ln==no2 #watch because here you change dob.name t_no2+=1; t_no3=0 title_no="#{t_no1}.#{t_no2}" - dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,"h#{title_no}") dob=number_sub_heading(dob,no2,title_no) end if dob.ln==no3 #watch because here you change dob.name t_no3+=1 title_no="#{t_no1}.#{t_no2}.#{t_no3}" - dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,"h#{title_no}") dob=number_sub_heading(dob,no3,title_no) end elsif dob.ln.to_s =~/^[0-6]/ \ and dob.name =~ /^[\w-]+-/ # endnotes, watch2005# endnotes, watch2005 - dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,dob.name) dob.name.gsub(/^([a-z_\.]+)-$/,'\1') end elsif dob.is ==:heading \ @@ -219,13 +227,13 @@ module SiSU_AO_Numbering and dob.ln.to_s =~/^[0-9]/ \ and dob.obj =~ /^([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d dob.name=$1 - dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,dob.name) end if @md.toc_lev_limit end elsif defined? dob.name \ and dob.name - dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,dob.name) end dob.tags=dob.tags.uniq if defined? dob.tags @tuned_file << dob @@ -312,7 +320,7 @@ module SiSU_AO_Numbering if @md.seg_names.is_a?(Array) \ and not @md.seg_names.include?(possible_seg_name) dob.name=possible_seg_name - dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ + dob.tags=set_tags(dob.tags,dob.name) @md.seg_names << possible_seg_name elsif (@md.opt.act[:verbose_plus][:set]==:on \ or @md.opt.act[:maintenance][:set]==:on) @@ -323,7 +331,7 @@ module SiSU_AO_Numbering and dob.name #extract segment name from embedded document structure info if @md.seg_names.is_a?(Array) \ and not @md.seg_names.include?(dob.name) - dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ + dob.tags=set_tags(dob.tags,dob.name) @md.seg_names << dob.name end end @@ -334,7 +342,7 @@ module SiSU_AO_Numbering if @md.seg_names.is_a?(Array) \ and not @md.seg_names.include?(segn_auto) dob.name=segn_auto - dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs + dob.tags=set_tags(dob.tags,dob.name) @md.seg_names << segn_auto else puts 'segment name (numbering) error' end -- cgit v1.2.3