aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v5
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2014-09-12 21:35:54 -0400
committerRalph Amissah <ralph@amissah.com>2014-09-12 21:35:54 -0400
commit165276e6c2e357716fb501a40774f4214c506c08 (patch)
tree355b1813443ff37b64ca073a0637264d3a7f3077 /lib/sisu/v5
parentv5 v6: ao, feedback on markup document structure (heading level) errors (diff)
v5 v6: ao, auto naming (segment & tag), exceptions, avoidance of name collisionssisu_5.6.6
* (i) auto naming attempts to extract a suitable numeric name from document headings if available, at times one appears to be available but is re-used elsewhere in the document, (as where the document has Parts or Sections and naming starts again within these sections from 1); (ii) this is treated as an exception and an alternative sequential naming scheme is triggered; (iii) if a document heading does not provide a suitable numeric name, another sequential scheme is used
Diffstat (limited to 'lib/sisu/v5')
-rw-r--r--lib/sisu/v5/ao_numbering.rb247
1 files changed, 187 insertions, 60 deletions
diff --git a/lib/sisu/v5/ao_numbering.rb b/lib/sisu/v5/ao_numbering.rb
index f9257e6a..6954a286 100644
--- a/lib/sisu/v5/ao_numbering.rb
+++ b/lib/sisu/v5/ao_numbering.rb
@@ -69,6 +69,27 @@ module SiSU_AO_Numbering
@obj=@type=@ocn=@lv=@name=@index=@comment=nil
@chosen_seg_names=[]
end
+ def chosen_seg_names(chosen,chosen_seg_name,dob,md,type)
+ @chosen_seg_names=if chosen.compact.uniq.length \
+ == chosen.compact.length
+ chosen
+ else
+ if md.opt.act[:maintenance][:set]==:on
+ SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:green).
+ mark(
+ "duplicated auto segment name: #{type} #{chosen}\n" \
+ + "#{chosen}\n" \
+ + " manually name level 1 segments '1~given_name'\n" \
+ + 'filename: ' + md.fns + "\n" \
+ + 'heading text: "' + dob.obj + '"' + "\n" \
+ + 'duplication: "' + chosen_seg_name + '" (level: ' + dob.lv + '; numbering type: ' + type.to_s + ')'
+ )
+ end
+ chosen=chosen[0..-2]
+ chosen_seg_name=auto_numbering_exceptions(chosen,md,dob)
+ chosen << chosen_seg_name
+ end
+ end
def number_of_segments?
if @@segments_count==0
@data.each do |dob|
@@ -314,6 +335,78 @@ module SiSU_AO_Numbering
possible_seg_name.to_s
end
end
+ def auto_numbering_exceptions(chosen_seg_names_,md,dob)
+ number_make=case dob.lv.to_i
+ when 1
+ @num_exc={
+ t1: @num_exc[:t1] += 1,
+ t2: 0,
+ t3: 0,
+ t4: 0
+ }
+ Mx[:segname_prefix_auto_num_other] + '_' \
+ + @num_exc[:t1].to_s
+ when 2
+ @num_exc={
+ t1: @num_exc[:t1],
+ t2: @num_exc[:t2] += 1,
+ t3: 0,
+ t4: 0
+ }
+ Mx[:segname_prefix_auto_num_other] + '_' \
+ + @num_exc[:t1].to_s + '_' \
+ + @num_exc[:t2].to_s
+ when 3
+ @num_exc={
+ t1: @num_exc[:t1],
+ t2: @num_exc[:t2],
+ t3: @num_exc[:t3] += 1,
+ t4: 0
+ }
+ Mx[:segname_prefix_auto_num_other] + '_' \
+ + @num_exc[:t1].to_s + '_' \
+ + @num_exc[:t2].to_s + '_' \
+ + @num_exc[:t3].to_s
+ when 4
+ @num_exc[:t4] += 1
+ @num_exc={
+ t1: @num_exc[:t1],
+ t2: @num_exc[:t2],
+ t3: @num_exc[:t3],
+ t4: @num_exc[:t4] += 1
+ }
+ Mx[:segname_prefix_auto_num_other] + '_' \
+ + @num_exc[:t1].to_s + '_' \
+ + @num_exc[:t2].to_s + '_' \
+ + @num_exc[:t3].to_s + '_' \
+ + @num_exc[:t4].to_s
+ end
+ end
+ def check_that_seg_names_are_unique(chosen_seg_names_,chosen_seg_name,type,md,dob)
+ begin
+ chosen_seg_names_ << chosen_seg_name
+ chosen_seg_names_=chosen_seg_names(chosen_seg_names_,chosen_seg_name,dob,md,type)
+ if chosen_seg_names_.compact.uniq.length \
+ == chosen_seg_names_.compact.length
+ #check that all auto given seg names are unique
+ chosen_seg_names_=chosen_seg_names(chosen_seg_names_,chosen_seg_name,dob,md,type)
+ chosen_seg_name
+ else
+ SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:green).
+ mark(
+ "duplicated auto segment name: #{type} #{chosen_seg_name}\n" \
+ + "#{chosen_seg_names_}\n" \
+ + " manually name level 1 segments '1~given_name'\n" \
+ + 'filename: ' + md.fns + "\n" \
+ + 'heading text: "' + dob.obj + '"' + "\n" \
+ + 'duplication: "' + chosen_seg_name + '" (level: ' + dob.lv + '; numbering type: ' + type.to_s + ')'
+ )
+ chosen_seg_name=auto_numbering_exceptions(chosen_seg_names_,md,dob)
+ check_that_seg_names_are_unique(chosen_seg_names_,chosen_seg_name,:exception,md,dob)
+ end
+ rescue
+ end
+ end
def auto_seg_name(possible_seg_name,heading_num_is,dob,type)
prefix=case type
when :auto then Mx[:segname_prefix_auto_num_provide]
@@ -324,41 +417,49 @@ module SiSU_AO_Numbering
possible_seg_name=possible_seg_name.
gsub(/\.$/,'')
end
- chosen_seg_name=if possible_seg_name.to_s =~/^[0-9]+[.]?$/m \
- and possible_seg_name.to_i <= heading_num_is.to_i \
- and dob.lv == '1'
- prefix + leading_zeros_fixed_width_number(possible_seg_name)
- elsif possible_seg_name.to_s =~/^[0-9]+[.,:-]*$/m \
- and dob.lv == '1'
+ @chosen_seg_name=
+ if dob.lv=='4' \
+ and possible_seg_name.to_s =~/^[0-9]+(?:[.,:-][0-9]){3}/m
possible_seg_name=possible_seg_name.to_s.
gsub(/(?:[:,-]|\W)/,'.').
gsub(/\.$/,'')
prefix + possible_seg_name
- elsif possible_seg_name.to_s =~
- /^[0-9]+[.,:-][0-9]+[.,:-]*$/m \
- and dob.lv == '2'
+ elsif dob.lv=='3' \
+ and possible_seg_name.to_s =~/^[0-9]+(?:[.,:-][0-9]){2}/m
possible_seg_name=possible_seg_name.to_s.
gsub(/(?:[:,-]|\W)/,'.').
gsub(/\.$/,'')
prefix + possible_seg_name
- elsif possible_seg_name.to_s =~
- /^[0-9]+[.,:-][0-9]+[.,:-][0-9][\d.,:-]*$/m \
- and dob.lv == '3'
+ elsif dob.lv=='2' \
+ and possible_seg_name.to_s =~/^[0-9]+(?:[.,:-][0-9]){1}/m
possible_seg_name=possible_seg_name.to_s.
gsub(/(?:[:,-]|\W)/,'.').
gsub(/\.$/,'')
prefix + possible_seg_name
+ elsif dob.lv=='1' \
+ and possible_seg_name.to_s =~/^[0-9]+[:,-]?$/m
+ if possible_seg_name.to_i <= heading_num_is.to_i
+ prefix + leading_zeros_fixed_width_number(possible_seg_name)
+ else
+ possible_seg_name=possible_seg_name.to_s.
+ gsub(/(?:[:,-]|\W)/,'.').
+ gsub(/\.$/,'')
+ prefix + possible_seg_name
+ end
else
- Mx[:segname_prefix_auto_num_other]*dob.lv.to_i \
- + possible_seg_name.to_s
+ @chosen_seg_name=auto_numbering_exceptions(@chosen_seg_names,md,dob)
end
- @chosen_seg_names << chosen_seg_name
- if @chosen_seg_names.compact.uniq.length == @chosen_seg_names.compact.length #checks that all auto given seg names are unique
- chosen_seg_name
- else
- SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:green).
- mark("duplicated auto segment name: #{type} #{chosen_seg_name} - #{@chosen_seg_names}; manually name level 1 segments '1~given_name'\n #{@md.fns}\n #{dob.obj}")
- exit
+ check_that_seg_names_are_unique(@chosen_seg_names,@chosen_seg_name,type,@md,dob)
+ end
+ def set_name_and_tags(dob,possible_seg_name)
+ if @md.seg_names.is_a?(Array) \
+ and not @md.seg_names.include?(possible_seg_name)
+ dob.name=possible_seg_name
+ dob.tags=set_tags(dob.tags,dob.name)
+ @md.seg_names << possible_seg_name
+ elsif (@md.opt.act[:verbose_plus][:set]==:on \
+ or @md.opt.act[:maintenance][:set]==:on)
+ puts 'warn, there may be a conflicting numbering scheme'
end
end
def name_para_seg_filename(data) #segment naming, remaining
@@ -377,7 +478,7 @@ module SiSU_AO_Numbering
# if there is none a sequential number is designated, preceded by an underscore
@tuned_file,@unique_auto_name=[],[]
tags={}
- art_filename_auto=1
+ @art_filename_auto=0
@counter=1
if not @md.seg_autoname_safe \
and (@md.opt.act[:verbose_plus][:set]==:on \
@@ -385,10 +486,11 @@ module SiSU_AO_Numbering
puts 'manual segment names, numbers used as names, risk warning (segmented html)'
end
ocn_html_seg=[]
+ @num_exc={ t1: 0, t2: 0, t3: 0, t4: 0 }
data.each do |dob|
if dob.is==:heading \
&& dob.ln \
- and dob.ln.to_s =~/^[456]/
+ and dob.ln.to_s =~/^[4-7]/
heading_num_is=/^\d+:(\d+);\d/m.match(dob.node)[1]
if dob.ln==4 \
and not dob.name \
@@ -396,47 +498,72 @@ module SiSU_AO_Numbering
@md.set_heading_seg=true
end
if dob.name !~/^\S+/ \
- and dob.obj =~/^\s*(?:\S+\s+)?([0-9][0-9.,:-]*)/m #heading starts with a recognised numeric or word followed by a recognised numeric construct, use that as name
- possible_seg_name=$1
- possible_seg_name=
- auto_seg_name(possible_seg_name,heading_num_is,dob,:extract)
- possible_seg_name=possible_seg_name.
- gsub(/(?:[:,-]|\W)/,'.').
- gsub(/\.$/,'')
- if @md.seg_names.is_a?(Array) \
- and not @md.seg_names.include?(possible_seg_name)
- dob.name=possible_seg_name
- dob.tags=set_tags(dob.tags,dob.name)
- @md.seg_names << possible_seg_name
- elsif (@md.opt.act[:verbose_plus][:set]==:on \
- or @md.opt.act[:maintenance][:set]==:on)
- puts 'warn, there may be a conflicting numbering scheme'
+ and dob.ln.to_s =~/^[5-7]/ \
+ and dob.obj =~/^\s*(?:\S+\s+)?([0-9]+(?:[.,:-][0-9])+)/m
+ #heading starts with a recognised numeric
+ #or word followed by a recognised numeric construct,
+ #use that as name
+ if dob.ln==7 \
+ and dob.obj =~/^\s*(?:\S+\s+)?([0-9]+(?:[.,:-][0-9]){3})/m
+ possible_seg_name=$1.
+ gsub(/(?:[:,-]|\W)/,'.').
+ gsub(/\.$/,'')
+ possible_seg_name=
+ auto_seg_name(possible_seg_name,heading_num_is,dob,:extract)
+ set_name_and_tags(dob,possible_seg_name)
+ elsif dob.ln==6 \
+ and dob.obj =~/^\s*(?:\S+\s+)?([0-9]+(?:[.,:-][0-9]){2})/m
+ possible_seg_name=$1.
+ gsub(/(?:[:,-]|\W)/,'.').
+ gsub(/\.$/,'')
+ possible_seg_name=
+ auto_seg_name(possible_seg_name,heading_num_is,dob,:extract)
+ set_name_and_tags(dob,possible_seg_name)
+ elsif dob.ln==5 \
+ and dob.obj =~/^\s*(?:\S+\s+)?([0-9]+(?:[.,:-][0-9]){1})/m
+ possible_seg_name=$1.
+ gsub(/(?:[:,-]|\W)/,'.').
+ gsub(/\.$/,'')
+ possible_seg_name=
+ auto_seg_name(possible_seg_name,heading_num_is,dob,:extract)
+ set_name_and_tags(dob,possible_seg_name)
end
end
- if dob.ln==4 \
- and dob.name #extract segment name from embedded document structure info
- if @md.seg_names.is_a?(Array) \
- and not @md.seg_names.include?(dob.name)
- dob.tags=set_tags(dob.tags,dob.name)
- @md.seg_names << dob.name
+ if dob.ln==4
+ if dob.name !~/^\S+/ \
+ and dob.obj =~/^\s*(?:\S+\s+)?([0-9]+)/m
+ #heading starts with a recognised numeric
+ #or word followed by a recognised numeric construct,
+ #use that as name
+ possible_seg_name=$1
+ possible_seg_name=
+ auto_seg_name(possible_seg_name,heading_num_is,dob,:extract)
+ set_name_and_tags(dob,possible_seg_name)
end
- end
- if dob.ln==4 \
- and not dob.name #if still no segment name, provide a numerical one
- possible_seg_name=
- auto_seg_name(art_filename_auto,heading_num_is,dob,:auto)
- if @md.seg_names.is_a?(Array) \
- and not @md.seg_names.include?(possible_seg_name)
- dob.name=possible_seg_name
- dob.tags=set_tags(dob.tags,dob.name)
- @md.seg_names << possible_seg_name
- else puts 'segment name (numbering) error'
+ if dob.name
+ #extract segment name from embedded document structure info
+ if @md.seg_names.is_a?(Array) \
+ and not @md.seg_names.include?(dob.name)
+ dob.tags=set_tags(dob.tags,dob.name)
+ @md.seg_names << dob.name
+ end
+ else
+ #if no segment name,
+ #provide a numerical one
+ @art_filename_auto+=1
+ possible_seg_name=
+ auto_seg_name(@art_filename_auto,heading_num_is,dob,:auto)
+ if @md.seg_names.is_a?(Array) \
+ and not @md.seg_names.include?(possible_seg_name)
+ dob.name=possible_seg_name
+ dob.tags=set_tags(dob.tags,dob.name)
+ @md.seg_names << possible_seg_name
+ else puts 'segment name (numbering) error'
+ end
+ end
+ if not dob.name #should not occur
+ puts "e r r o r -\t#{__FILE__}::#{__LINE__}\n#{dob.inspect}"
end
- art_filename_auto+=1
- end
- if dob.ln==4 \
- and not dob.name #should not occur
- puts "e r r o r -\t#{__FILE__}::#{__LINE__}\n#{dob.inspect}"
end
end
if (dob.is ==:heading \