diff options
author | Ralph Amissah <ralph@amissah.com> | 2014-09-12 21:35:54 -0400 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2014-09-12 21:35:54 -0400 |
commit | 165276e6c2e357716fb501a40774f4214c506c08 (patch) | |
tree | 355b1813443ff37b64ca073a0637264d3a7f3077 /lib/sisu/v5 | |
parent | v5 v6: ao, feedback on markup document structure (heading level) errors (diff) |
v5 v6: ao, auto naming (segment & tag), exceptions, avoidance of name collisionssisu_5.6.6
* (i) auto naming attempts to extract a suitable numeric name from document
headings if available, at times one appears to be available but is re-used
elsewhere in the document, (as where the document has Parts or Sections and
naming starts again within these sections from 1); (ii) this is treated as an
exception and an alternative sequential naming scheme is triggered; (iii) if a
document heading does not provide a suitable numeric name, another sequential
scheme is used
Diffstat (limited to 'lib/sisu/v5')
-rw-r--r-- | lib/sisu/v5/ao_numbering.rb | 247 |
1 files changed, 187 insertions, 60 deletions
diff --git a/lib/sisu/v5/ao_numbering.rb b/lib/sisu/v5/ao_numbering.rb index f9257e6a..6954a286 100644 --- a/lib/sisu/v5/ao_numbering.rb +++ b/lib/sisu/v5/ao_numbering.rb @@ -69,6 +69,27 @@ module SiSU_AO_Numbering @obj=@type=@ocn=@lv=@name=@index=@comment=nil @chosen_seg_names=[] end + def chosen_seg_names(chosen,chosen_seg_name,dob,md,type) + @chosen_seg_names=if chosen.compact.uniq.length \ + == chosen.compact.length + chosen + else + if md.opt.act[:maintenance][:set]==:on + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:green). + mark( + "duplicated auto segment name: #{type} #{chosen}\n" \ + + "#{chosen}\n" \ + + " manually name level 1 segments '1~given_name'\n" \ + + 'filename: ' + md.fns + "\n" \ + + 'heading text: "' + dob.obj + '"' + "\n" \ + + 'duplication: "' + chosen_seg_name + '" (level: ' + dob.lv + '; numbering type: ' + type.to_s + ')' + ) + end + chosen=chosen[0..-2] + chosen_seg_name=auto_numbering_exceptions(chosen,md,dob) + chosen << chosen_seg_name + end + end def number_of_segments? if @@segments_count==0 @data.each do |dob| @@ -314,6 +335,78 @@ module SiSU_AO_Numbering possible_seg_name.to_s end end + def auto_numbering_exceptions(chosen_seg_names_,md,dob) + number_make=case dob.lv.to_i + when 1 + @num_exc={ + t1: @num_exc[:t1] += 1, + t2: 0, + t3: 0, + t4: 0 + } + Mx[:segname_prefix_auto_num_other] + '_' \ + + @num_exc[:t1].to_s + when 2 + @num_exc={ + t1: @num_exc[:t1], + t2: @num_exc[:t2] += 1, + t3: 0, + t4: 0 + } + Mx[:segname_prefix_auto_num_other] + '_' \ + + @num_exc[:t1].to_s + '_' \ + + @num_exc[:t2].to_s + when 3 + @num_exc={ + t1: @num_exc[:t1], + t2: @num_exc[:t2], + t3: @num_exc[:t3] += 1, + t4: 0 + } + Mx[:segname_prefix_auto_num_other] + '_' \ + + @num_exc[:t1].to_s + '_' \ + + @num_exc[:t2].to_s + '_' \ + + @num_exc[:t3].to_s + when 4 + @num_exc[:t4] += 1 + @num_exc={ + t1: @num_exc[:t1], + t2: @num_exc[:t2], + t3: @num_exc[:t3], + t4: @num_exc[:t4] += 1 + } + Mx[:segname_prefix_auto_num_other] + '_' \ + + @num_exc[:t1].to_s + '_' \ + + @num_exc[:t2].to_s + '_' \ + + @num_exc[:t3].to_s + '_' \ + + @num_exc[:t4].to_s + end + end + def check_that_seg_names_are_unique(chosen_seg_names_,chosen_seg_name,type,md,dob) + begin + chosen_seg_names_ << chosen_seg_name + chosen_seg_names_=chosen_seg_names(chosen_seg_names_,chosen_seg_name,dob,md,type) + if chosen_seg_names_.compact.uniq.length \ + == chosen_seg_names_.compact.length + #check that all auto given seg names are unique + chosen_seg_names_=chosen_seg_names(chosen_seg_names_,chosen_seg_name,dob,md,type) + chosen_seg_name + else + SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:green). + mark( + "duplicated auto segment name: #{type} #{chosen_seg_name}\n" \ + + "#{chosen_seg_names_}\n" \ + + " manually name level 1 segments '1~given_name'\n" \ + + 'filename: ' + md.fns + "\n" \ + + 'heading text: "' + dob.obj + '"' + "\n" \ + + 'duplication: "' + chosen_seg_name + '" (level: ' + dob.lv + '; numbering type: ' + type.to_s + ')' + ) + chosen_seg_name=auto_numbering_exceptions(chosen_seg_names_,md,dob) + check_that_seg_names_are_unique(chosen_seg_names_,chosen_seg_name,:exception,md,dob) + end + rescue + end + end def auto_seg_name(possible_seg_name,heading_num_is,dob,type) prefix=case type when :auto then Mx[:segname_prefix_auto_num_provide] @@ -324,41 +417,49 @@ module SiSU_AO_Numbering possible_seg_name=possible_seg_name. gsub(/\.$/,'') end - chosen_seg_name=if possible_seg_name.to_s =~/^[0-9]+[.]?$/m \ - and possible_seg_name.to_i <= heading_num_is.to_i \ - and dob.lv == '1' - prefix + leading_zeros_fixed_width_number(possible_seg_name) - elsif possible_seg_name.to_s =~/^[0-9]+[.,:-]*$/m \ - and dob.lv == '1' + @chosen_seg_name= + if dob.lv=='4' \ + and possible_seg_name.to_s =~/^[0-9]+(?:[.,:-][0-9]){3}/m possible_seg_name=possible_seg_name.to_s. gsub(/(?:[:,-]|\W)/,'.'). gsub(/\.$/,'') prefix + possible_seg_name - elsif possible_seg_name.to_s =~ - /^[0-9]+[.,:-][0-9]+[.,:-]*$/m \ - and dob.lv == '2' + elsif dob.lv=='3' \ + and possible_seg_name.to_s =~/^[0-9]+(?:[.,:-][0-9]){2}/m possible_seg_name=possible_seg_name.to_s. gsub(/(?:[:,-]|\W)/,'.'). gsub(/\.$/,'') prefix + possible_seg_name - elsif possible_seg_name.to_s =~ - /^[0-9]+[.,:-][0-9]+[.,:-][0-9][\d.,:-]*$/m \ - and dob.lv == '3' + elsif dob.lv=='2' \ + and possible_seg_name.to_s =~/^[0-9]+(?:[.,:-][0-9]){1}/m possible_seg_name=possible_seg_name.to_s. gsub(/(?:[:,-]|\W)/,'.'). gsub(/\.$/,'') prefix + possible_seg_name + elsif dob.lv=='1' \ + and possible_seg_name.to_s =~/^[0-9]+[:,-]?$/m + if possible_seg_name.to_i <= heading_num_is.to_i + prefix + leading_zeros_fixed_width_number(possible_seg_name) + else + possible_seg_name=possible_seg_name.to_s. + gsub(/(?:[:,-]|\W)/,'.'). + gsub(/\.$/,'') + prefix + possible_seg_name + end else - Mx[:segname_prefix_auto_num_other]*dob.lv.to_i \ - + possible_seg_name.to_s + @chosen_seg_name=auto_numbering_exceptions(@chosen_seg_names,md,dob) end - @chosen_seg_names << chosen_seg_name - if @chosen_seg_names.compact.uniq.length == @chosen_seg_names.compact.length #checks that all auto given seg names are unique - chosen_seg_name - else - SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:green). - mark("duplicated auto segment name: #{type} #{chosen_seg_name} - #{@chosen_seg_names}; manually name level 1 segments '1~given_name'\n #{@md.fns}\n #{dob.obj}") - exit + check_that_seg_names_are_unique(@chosen_seg_names,@chosen_seg_name,type,@md,dob) + end + def set_name_and_tags(dob,possible_seg_name) + if @md.seg_names.is_a?(Array) \ + and not @md.seg_names.include?(possible_seg_name) + dob.name=possible_seg_name + dob.tags=set_tags(dob.tags,dob.name) + @md.seg_names << possible_seg_name + elsif (@md.opt.act[:verbose_plus][:set]==:on \ + or @md.opt.act[:maintenance][:set]==:on) + puts 'warn, there may be a conflicting numbering scheme' end end def name_para_seg_filename(data) #segment naming, remaining @@ -377,7 +478,7 @@ module SiSU_AO_Numbering # if there is none a sequential number is designated, preceded by an underscore @tuned_file,@unique_auto_name=[],[] tags={} - art_filename_auto=1 + @art_filename_auto=0 @counter=1 if not @md.seg_autoname_safe \ and (@md.opt.act[:verbose_plus][:set]==:on \ @@ -385,10 +486,11 @@ module SiSU_AO_Numbering puts 'manual segment names, numbers used as names, risk warning (segmented html)' end ocn_html_seg=[] + @num_exc={ t1: 0, t2: 0, t3: 0, t4: 0 } data.each do |dob| if dob.is==:heading \ && dob.ln \ - and dob.ln.to_s =~/^[456]/ + and dob.ln.to_s =~/^[4-7]/ heading_num_is=/^\d+:(\d+);\d/m.match(dob.node)[1] if dob.ln==4 \ and not dob.name \ @@ -396,47 +498,72 @@ module SiSU_AO_Numbering @md.set_heading_seg=true end if dob.name !~/^\S+/ \ - and dob.obj =~/^\s*(?:\S+\s+)?([0-9][0-9.,:-]*)/m #heading starts with a recognised numeric or word followed by a recognised numeric construct, use that as name - possible_seg_name=$1 - possible_seg_name= - auto_seg_name(possible_seg_name,heading_num_is,dob,:extract) - possible_seg_name=possible_seg_name. - gsub(/(?:[:,-]|\W)/,'.'). - gsub(/\.$/,'') - if @md.seg_names.is_a?(Array) \ - and not @md.seg_names.include?(possible_seg_name) - dob.name=possible_seg_name - dob.tags=set_tags(dob.tags,dob.name) - @md.seg_names << possible_seg_name - elsif (@md.opt.act[:verbose_plus][:set]==:on \ - or @md.opt.act[:maintenance][:set]==:on) - puts 'warn, there may be a conflicting numbering scheme' + and dob.ln.to_s =~/^[5-7]/ \ + and dob.obj =~/^\s*(?:\S+\s+)?([0-9]+(?:[.,:-][0-9])+)/m + #heading starts with a recognised numeric + #or word followed by a recognised numeric construct, + #use that as name + if dob.ln==7 \ + and dob.obj =~/^\s*(?:\S+\s+)?([0-9]+(?:[.,:-][0-9]){3})/m + possible_seg_name=$1. + gsub(/(?:[:,-]|\W)/,'.'). + gsub(/\.$/,'') + possible_seg_name= + auto_seg_name(possible_seg_name,heading_num_is,dob,:extract) + set_name_and_tags(dob,possible_seg_name) + elsif dob.ln==6 \ + and dob.obj =~/^\s*(?:\S+\s+)?([0-9]+(?:[.,:-][0-9]){2})/m + possible_seg_name=$1. + gsub(/(?:[:,-]|\W)/,'.'). + gsub(/\.$/,'') + possible_seg_name= + auto_seg_name(possible_seg_name,heading_num_is,dob,:extract) + set_name_and_tags(dob,possible_seg_name) + elsif dob.ln==5 \ + and dob.obj =~/^\s*(?:\S+\s+)?([0-9]+(?:[.,:-][0-9]){1})/m + possible_seg_name=$1. + gsub(/(?:[:,-]|\W)/,'.'). + gsub(/\.$/,'') + possible_seg_name= + auto_seg_name(possible_seg_name,heading_num_is,dob,:extract) + set_name_and_tags(dob,possible_seg_name) end end - if dob.ln==4 \ - and dob.name #extract segment name from embedded document structure info - if @md.seg_names.is_a?(Array) \ - and not @md.seg_names.include?(dob.name) - dob.tags=set_tags(dob.tags,dob.name) - @md.seg_names << dob.name + if dob.ln==4 + if dob.name !~/^\S+/ \ + and dob.obj =~/^\s*(?:\S+\s+)?([0-9]+)/m + #heading starts with a recognised numeric + #or word followed by a recognised numeric construct, + #use that as name + possible_seg_name=$1 + possible_seg_name= + auto_seg_name(possible_seg_name,heading_num_is,dob,:extract) + set_name_and_tags(dob,possible_seg_name) end - end - if dob.ln==4 \ - and not dob.name #if still no segment name, provide a numerical one - possible_seg_name= - auto_seg_name(art_filename_auto,heading_num_is,dob,:auto) - if @md.seg_names.is_a?(Array) \ - and not @md.seg_names.include?(possible_seg_name) - dob.name=possible_seg_name - dob.tags=set_tags(dob.tags,dob.name) - @md.seg_names << possible_seg_name - else puts 'segment name (numbering) error' + if dob.name + #extract segment name from embedded document structure info + if @md.seg_names.is_a?(Array) \ + and not @md.seg_names.include?(dob.name) + dob.tags=set_tags(dob.tags,dob.name) + @md.seg_names << dob.name + end + else + #if no segment name, + #provide a numerical one + @art_filename_auto+=1 + possible_seg_name= + auto_seg_name(@art_filename_auto,heading_num_is,dob,:auto) + if @md.seg_names.is_a?(Array) \ + and not @md.seg_names.include?(possible_seg_name) + dob.name=possible_seg_name + dob.tags=set_tags(dob.tags,dob.name) + @md.seg_names << possible_seg_name + else puts 'segment name (numbering) error' + end + end + if not dob.name #should not occur + puts "e r r o r -\t#{__FILE__}::#{__LINE__}\n#{dob.inspect}" end - art_filename_auto+=1 - end - if dob.ln==4 \ - and not dob.name #should not occur - puts "e r r o r -\t#{__FILE__}::#{__LINE__}\n#{dob.inspect}" end end if (dob.is ==:heading \ |