aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2014-02-05 00:36:10 -0500
committerRalph Amissah <ralph@amissah.com>2014-02-05 00:36:10 -0500
commita7dc41fdb5c4fc69ca2189412e9ba47ddf9aa084 (patch)
tree0f20f40bec5dd80af5830dbbdc70978734d30aa5 /lib/sisu
parentv5 v6: epub, odt, check availability of zip program (diff)
v5 v6: ao, set document tags
Diffstat (limited to 'lib/sisu')
-rw-r--r--lib/sisu/v5/ao_doc_str.rb5
-rw-r--r--lib/sisu/v5/ao_numbering.rb34
-rw-r--r--lib/sisu/v6/ao_doc_str.rb5
-rw-r--r--lib/sisu/v6/ao_numbering.rb34
4 files changed, 50 insertions, 28 deletions
diff --git a/lib/sisu/v5/ao_doc_str.rb b/lib/sisu/v5/ao_doc_str.rb
index f7168c26..dd7f32f3 100644
--- a/lib/sisu/v5/ao_doc_str.rb
+++ b/lib/sisu/v5/ao_doc_str.rb
@@ -163,10 +163,13 @@ module SiSU_AO_DocumentStructureExtract
str=str.gsub(/(^|[ ])\*~([a-z0-9._-]+)(?=[ #{Mx[:br_nl]}]|$)/i,
"\\1#{Mx[:tag_o]}\\2#{Mx[:tag_c]}").
gsub(/ [ ]+/i,' ')
- tags=str.scan(/#{Mx[:tag_o]}(\S+?)#{Mx[:tag_c]}/).flatten
+ tags=str.scan(/#{Mx[:tag_o]}(\S+?)#{Mx[:tag_c]}/).flatten.uniq
str=str.gsub(/[ ]?#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}[ ]?/,' ') #may be issues with spaces would leave one, but "code" blocks?
end
tags=nametag ? (tags << nametag) : tags
+ tags.each do |t|
+ t.gsub!(/[^a-z0-9._-]/,'')
+ end
end
[str,tags]
end
diff --git a/lib/sisu/v5/ao_numbering.rb b/lib/sisu/v5/ao_numbering.rb
index 8caa5ccc..984675cc 100644
--- a/lib/sisu/v5/ao_numbering.rb
+++ b/lib/sisu/v5/ao_numbering.rb
@@ -78,6 +78,14 @@ module SiSU_AO_Numbering
data=set_heading_top(data) unless @md.set_heading_top
[data,tags_map,ocn_html_seg_map]
end
+ def set_tags(tags,tag)
+ tags=if not tag.empty? \
+ and tag !~/^\d+$/
+ tag=tag.gsub(/[^a-z0-9._-]/,'')
+ [tag,tags].flatten
+ else tags
+ end
+ end
def number_plaintext_para(data)
@tuned_file=[]
data.each do |dob|
@@ -168,10 +176,10 @@ module SiSU_AO_Numbering
and not @md.seg_names.include?(title_no)
if dob.ln==no1
dob.name="#{title_no}" if not dob.name
- dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,title_no)
tag=dob.obj.gsub(/(Article|Clause|Section|Chapter)\s+/,"\\1_#{title_no}").downcase
tag=heading_tag_clean(tag)
- dob.tags=[tag,dob.tags].flatten if tag !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,tag)
dob.obj=(dob.obj =~/(Article|Clause|Section)\s+/) \
? (dob.obj.gsub(/(Article|Clause|Section)\s+/,"\\1 #{title_no} "))
: (dob.obj.gsub(/^/,"#{title_no}. ")) #fix stop later
@@ -179,7 +187,7 @@ module SiSU_AO_Numbering
if dob.ln !=no1 \
and dob.obj =~/^[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review
dob.name ="#{title_no}" if not dob.name
- dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,title_no)
dob.obj=dob.obj.gsub(/^/,"#{title_no}. ")
end
@md.seg_names << title_no
@@ -187,28 +195,28 @@ module SiSU_AO_Numbering
if dob.ln!=no1 \
and dob.name!~/^[a-z_\.]+$/ \
and dob.obj !~/[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required # not fixed, work on
- dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,title_no)
dob.obj=dob.obj.gsub(/^/i,"#{title_no}. ")
end
end
if dob.ln==no1 #watch because here you change dob.name
- dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,"h#{title_no}")
end
if dob.ln==no2 #watch because here you change dob.name
t_no2+=1; t_no3=0
title_no="#{t_no1}.#{t_no2}"
- dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,"h#{title_no}")
dob=number_sub_heading(dob,no2,title_no)
end
if dob.ln==no3 #watch because here you change dob.name
t_no3+=1
title_no="#{t_no1}.#{t_no2}.#{t_no3}"
- dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,"h#{title_no}")
dob=number_sub_heading(dob,no3,title_no)
end
elsif dob.ln.to_s =~/^[0-6]/ \
and dob.name =~ /^[\w-]+-/ # endnotes, watch2005# endnotes, watch2005
- dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,dob.name)
dob.name.gsub(/^([a-z_\.]+)-$/,'\1')
end
elsif dob.is ==:heading \
@@ -219,13 +227,13 @@ module SiSU_AO_Numbering
and dob.ln.to_s =~/^[0-9]/ \
and dob.obj =~ /^([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d
dob.name=$1
- dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,dob.name)
end
if @md.toc_lev_limit
end
elsif defined? dob.name \
and dob.name
- dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,dob.name)
end
dob.tags=dob.tags.uniq if defined? dob.tags
@tuned_file << dob
@@ -312,7 +320,7 @@ module SiSU_AO_Numbering
if @md.seg_names.is_a?(Array) \
and not @md.seg_names.include?(possible_seg_name)
dob.name=possible_seg_name
- dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/
+ dob.tags=set_tags(dob.tags,dob.name)
@md.seg_names << possible_seg_name
elsif (@md.opt.act[:verbose_plus][:set]==:on \
or @md.opt.act[:maintenance][:set]==:on)
@@ -323,7 +331,7 @@ module SiSU_AO_Numbering
and dob.name #extract segment name from embedded document structure info
if @md.seg_names.is_a?(Array) \
and not @md.seg_names.include?(dob.name)
- dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/
+ dob.tags=set_tags(dob.tags,dob.name)
@md.seg_names << dob.name
end
end
@@ -334,7 +342,7 @@ module SiSU_AO_Numbering
if @md.seg_names.is_a?(Array) \
and not @md.seg_names.include?(segn_auto)
dob.name=segn_auto
- dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,dob.name)
@md.seg_names << segn_auto
else puts 'segment name (numbering) error'
end
diff --git a/lib/sisu/v6/ao_doc_str.rb b/lib/sisu/v6/ao_doc_str.rb
index 0cdd553b..d8c012fd 100644
--- a/lib/sisu/v6/ao_doc_str.rb
+++ b/lib/sisu/v6/ao_doc_str.rb
@@ -163,10 +163,13 @@ module SiSU_AO_DocumentStructureExtract
str=str.gsub(/(^|[ ])\*~([a-z0-9._-]+)(?=[ #{Mx[:br_nl]}]|$)/i,
"\\1#{Mx[:tag_o]}\\2#{Mx[:tag_c]}").
gsub(/ [ ]+/i,' ')
- tags=str.scan(/#{Mx[:tag_o]}(\S+?)#{Mx[:tag_c]}/).flatten
+ tags=str.scan(/#{Mx[:tag_o]}(\S+?)#{Mx[:tag_c]}/).flatten.uniq
str=str.gsub(/[ ]?#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}[ ]?/,' ') #may be issues with spaces would leave one, but "code" blocks?
end
tags=nametag ? (tags << nametag) : tags
+ tags.each do |t|
+ t.gsub!(/[^a-z0-9._-]/,'')
+ end
end
[str,tags]
end
diff --git a/lib/sisu/v6/ao_numbering.rb b/lib/sisu/v6/ao_numbering.rb
index 23d9fd73..acb2351a 100644
--- a/lib/sisu/v6/ao_numbering.rb
+++ b/lib/sisu/v6/ao_numbering.rb
@@ -78,6 +78,14 @@ module SiSU_AO_Numbering
data=set_heading_top(data) unless @md.set_heading_top
[data,tags_map,ocn_html_seg_map]
end
+ def set_tags(tags,tag)
+ tags=if not tag.empty? \
+ and tag !~/^\d+$/
+ tag=tag.gsub(/[^a-z0-9._-]/,'')
+ [tag,tags].flatten
+ else tags
+ end
+ end
def number_plaintext_para(data)
@tuned_file=[]
data.each do |dob|
@@ -168,10 +176,10 @@ module SiSU_AO_Numbering
and not @md.seg_names.include?(title_no)
if dob.ln==no1
dob.name="#{title_no}" if not dob.name
- dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,title_no)
tag=dob.obj.gsub(/(Article|Clause|Section|Chapter)\s+/,"\\1_#{title_no}").downcase
tag=heading_tag_clean(tag)
- dob.tags=[tag,dob.tags].flatten if tag !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,tag)
dob.obj=(dob.obj =~/(Article|Clause|Section)\s+/) \
? (dob.obj.gsub(/(Article|Clause|Section)\s+/,"\\1 #{title_no} "))
: (dob.obj.gsub(/^/,"#{title_no}. ")) #fix stop later
@@ -179,7 +187,7 @@ module SiSU_AO_Numbering
if dob.ln !=no1 \
and dob.obj =~/^[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review
dob.name ="#{title_no}" if not dob.name
- dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,title_no)
dob.obj=dob.obj.gsub(/^/,"#{title_no}. ")
end
@md.seg_names << title_no
@@ -187,28 +195,28 @@ module SiSU_AO_Numbering
if dob.ln!=no1 \
and dob.name!~/^[a-z_\.]+$/ \
and dob.obj !~/[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required # not fixed, work on
- dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,title_no)
dob.obj=dob.obj.gsub(/^/i,"#{title_no}. ")
end
end
if dob.ln==no1 #watch because here you change dob.name
- dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,"h#{title_no}")
end
if dob.ln==no2 #watch because here you change dob.name
t_no2+=1; t_no3=0
title_no="#{t_no1}.#{t_no2}"
- dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,"h#{title_no}")
dob=number_sub_heading(dob,no2,title_no)
end
if dob.ln==no3 #watch because here you change dob.name
t_no3+=1
title_no="#{t_no1}.#{t_no2}.#{t_no3}"
- dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,"h#{title_no}")
dob=number_sub_heading(dob,no3,title_no)
end
elsif dob.ln.to_s =~/^[0-6]/ \
and dob.name =~ /^[\w-]+-/ # endnotes, watch2005# endnotes, watch2005
- dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,dob.name)
dob.name.gsub(/^([a-z_\.]+)-$/,'\1')
end
elsif dob.is ==:heading \
@@ -219,13 +227,13 @@ module SiSU_AO_Numbering
and dob.ln.to_s =~/^[0-9]/ \
and dob.obj =~ /^([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d
dob.name=$1
- dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,dob.name)
end
if @md.toc_lev_limit
end
elsif defined? dob.name \
and dob.name
- dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,dob.name)
end
dob.tags=dob.tags.uniq if defined? dob.tags
@tuned_file << dob
@@ -312,7 +320,7 @@ module SiSU_AO_Numbering
if @md.seg_names.is_a?(Array) \
and not @md.seg_names.include?(possible_seg_name)
dob.name=possible_seg_name
- dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/
+ dob.tags=set_tags(dob.tags,dob.name)
@md.seg_names << possible_seg_name
elsif (@md.opt.act[:verbose_plus][:set]==:on \
or @md.opt.act[:maintenance][:set]==:on)
@@ -323,7 +331,7 @@ module SiSU_AO_Numbering
and dob.name #extract segment name from embedded document structure info
if @md.seg_names.is_a?(Array) \
and not @md.seg_names.include?(dob.name)
- dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/
+ dob.tags=set_tags(dob.tags,dob.name)
@md.seg_names << dob.name
end
end
@@ -334,7 +342,7 @@ module SiSU_AO_Numbering
if @md.seg_names.is_a?(Array) \
and not @md.seg_names.include?(segn_auto)
dob.name=segn_auto
- dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs
+ dob.tags=set_tags(dob.tags,dob.name)
@md.seg_names << segn_auto
else puts 'segment name (numbering) error'
end