summaryrefslogtreecommitdiffstats
path: root/lib/sisu/ao_numbering.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/ao_numbering.rb')
-rw-r--r--lib/sisu/ao_numbering.rb720
1 files changed, 720 insertions, 0 deletions
diff --git a/lib/sisu/ao_numbering.rb b/lib/sisu/ao_numbering.rb
new file mode 100644
index 0000000..fdf5460
--- /dev/null
+++ b/lib/sisu/ao_numbering.rb
@@ -0,0 +1,720 @@
+# encoding: utf-8
+=begin
+
+* Name: SiSU
+
+** Description: documents, structuring, processing, publishing, search
+*** system environment, resource control and configuration details
+
+** Author: Ralph Amissah
+ <ralph@amissah.com>
+ <ralph.amissah@gmail.com>
+
+** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah,
+ All Rights Reserved.
+
+** License: GPL 3 or later:
+
+ SiSU, a framework for document structuring, publishing and search
+
+ Copyright (C) Ralph Amissah
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see <http://www.gnu.org/licenses/>.
+
+ If you have Internet connection, the latest version of the GPL should be
+ available at these locations:
+ <http://www.fsf.org/licensing/licenses/gpl.html>
+ <http://www.gnu.org/licenses/gpl.html>
+
+ <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>
+
+** SiSU uses:
+ * Standard SiSU markup syntax,
+ * Standard SiSU meta-markup syntax, and the
+ * Standard SiSU object citation numbering and system
+
+** Hompages:
+ <http://www.jus.uio.no/sisu>
+ <http://www.sisudoc.org>
+
+** Git
+ <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary>
+ <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/ao_numbering.rb;hb=HEAD>
+
+=end
+module SiSU_AO_Numbering
+ class Numbering
+ attr_accessor :obj,:osp,:ocn,:lv,:name,:index,:comment
+ @@segments_count=0
+ def initialize(md,data,fnx,process)
+ @md,@data,@fnx,@process=md,data,fnx,process
+ @obj=@type=@ocn=@lv=@name=@index=@comment=nil
+ @chosen_seg_names=[]
+ end
+ def chosen_seg_names(chosen,chosen_seg_name,dob,md,type)
+ @chosen_seg_names=if chosen.compact.uniq.length \
+ == chosen.compact.length
+ chosen
+ else
+ if md.opt.act[:maintenance][:set]==:on
+ SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:green).
+ mark(
+ "duplicated auto segment name: #{type} #{chosen}\n" \
+ + "#{chosen}\n" \
+ + " manually name level 1 segments '1~given_name'\n" \
+ + 'filename: ' + md.fns + "\n" \
+ + 'heading text: "' + dob.obj + '"' + "\n" \
+ + 'duplication: "' + chosen_seg_name + '" (level: ' + dob.lv + '; numbering type: ' + type.to_s + ')'
+ )
+ end
+ chosen=chosen[0..-2]
+ chosen_seg_name=auto_numbering_exceptions(chosen,md,dob)
+ chosen << chosen_seg_name
+ end
+ end
+ def number_of_segments?
+ if @@segments_count==0
+ @data.each do |dob|
+ if dob.is == :heading \
+ and dob.lv == '1'
+ @@segments_count += 1
+ end
+ end
+ @@segments_count
+ else @@segments_count
+ end
+ end
+ def numbering_song
+ begin
+ data=@data
+ data=number_plaintext_para(data)
+ data=auto_number_heading_ie_title(data.compact) #tr issue
+ data=ocn(data.compact) #watch
+ data=xml(data.compact)
+ data=minor_numbering(data.compact)
+ if @process==:complete
+ data,tags_map,ocn_html_seg_map=name_para_seg_filename(data)
+ end
+ data=set_heading_top(data) unless @md.set_heading_top
+ [data,tags_map,ocn_html_seg_map]
+ ensure
+ @@segments_count=0
+ end
+ end
+ def set_tags(tags,tag)
+ tags=if not tag.empty? \
+ and tag !~/^\d+$/
+ tag=tag.gsub(/[^a-z0-9._-]/,'')
+ [tag,tags].flatten
+ else tags
+ end
+ end
+ def number_plaintext_para(data)
+ @tuned_file=[]
+ data.each do |dob|
+ if (dob.of !=:block \
+ && dob.of !=:comment \
+ && dob.of !=:layout) \
+ && dob.ocn_ #and dob.obj !~ /#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ #FIX
+ dob.obj=dob.obj.gsub(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks
+ end
+ unless dob.obj.is_a?(Array)
+ dob.obj=dob.obj.gsub(/^\s+/,'').
+ gsub(/\s$/,"\n")
+ end
+ @tuned_file << dob
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ def number_sub_heading(dob,num,title_no)
+ unless dob.obj =~/\d+\.|(?:chapter|article|section|clause)\s+\d+/i #name selection arbitrary, fix
+ dob.obj=case dob.name
+ when /-/ then dob.obj.gsub(/^/,"#{title_no} ")
+ when /^#/ then dob.obj.gsub(/^/,"#{title_no} ")
+ when /^[a-z_\.]+/ then dob.obj.gsub(/^/,"#{title_no} ")
+ else
+ dob.name=title_no if dob.name=~/^$/ #where title contains title number
+ dob.obj.gsub(/^/,"#{title_no} ") if title_no =~/\d+/ #main, where title number is to be provided #watch changed placement
+ end
+ if @md.toc_lev_limit \
+ and @md.toc_lev_limit < num
+ dob.obj=dob.obj.gsub(/^/,'!_ ') #bold line, watch
+ end
+ end
+ dob
+ end
+ def heading_tag_clean(heading_tag)
+ heading_tag=heading_tag.
+ gsub(/[ ]+/,'_').
+ gsub(/["']/,'').
+ gsub(/[\/]/,'-').
+ gsub(/#{Mx[:fa_bold_o]}|#{Mx[:fa_bold_c]}/,'').
+ gsub(/#{Mx[:fa_italics_o]}|#{Mx[:fa_italics_c]}/,'').
+ gsub(/#{Mx[:fa_underscore_o]}|#{Mx[:fa_underscore_c]}/,'').
+ gsub(/#{Mx[:fa_cite_o]}|#{Mx[:fa_cite_c]}/,'').
+ gsub(/#{Mx[:fa_insert_o]}|#{Mx[:fa_insert_c]}/,'').
+ gsub(/#{Mx[:fa_strike_o]}|#{Mx[:fa_strike_c]}/,'').
+ gsub(/#{Mx[:fa_superscript_o]}|#{Mx[:fa_superscript_c]}/,'').
+ gsub(/#{Mx[:fa_subscript_o]}|#{Mx[:fa_subscript_c]}/,'').
+ gsub(/#{Mx[:fa_hilite_o]}|#{Mx[:fa_hilite_c]}/,'').
+ gsub(/#{Mx[:gl_bullet]}/,'')
+ end
+ def auto_number_heading_ie_title(data) #also does some segment naming
+ if defined? @md.make.num_top \
+ and @md.make.num_top \
+ and @md.make.num_top !~/^$/
+ input||=@md.make.num_top
+ end
+ num_top=(input ? input.to_i : nil)
+ t_no1=t_no2=t_no3=0
+ if num_top
+ no1=num_top; no2=(num_top + 1); no3=(num_top + 2)
+ end
+ chapter_number_counter=0
+ data=data.compact
+ @tuned_file=data.each.map do |dob| #@md.seg_names << [additions to segment names]
+ title_no=nil
+ if dob.is ==:heading \
+ && dob.autonum_ \
+ and defined? @md.make.num_top \
+ and @md.make.num_top !~/^$/
+ if dob.lv=='1' \
+ and dob.obj =~/^#\s|\s#(?:\s|$)/
+ chapter_number_counter +=1
+ dob.obj=dob.obj.gsub(/^#\s/,"#{chapter_number_counter} ").
+ gsub(/#([:,]?\s|[.]?$)/,"#{chapter_number_counter}\\1")
+ end
+ if dob.ln==no1
+ @subnumber=1
+ @subnumber=0 if dob.ln==no1
+ end
+ if dob.ln.to_s =~/^[0-6]/ \
+ and not dob.use_ ==:dummy \
+ and dob.obj !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ # <-- fix
+ if dob.ln==no1
+ t_no1+=1; t_no2=0; t_no3=0
+ title_no="#{t_no1}"
+ if @md.seg_names.is_a?(Array) \
+ and not @md.seg_names.include?(title_no)
+ if dob.ln==no1
+ dob.name="#{title_no}" if not dob.name
+ dob.tags=set_tags(dob.tags,title_no)
+ tag=dob.obj.
+ gsub(/(Article|Clause|Section|Chapter)\s+/,
+ "\\1_#{title_no}").
+ downcase
+ tag=heading_tag_clean(tag)
+ dob.tags=set_tags(dob.tags,tag)
+ dob.obj=(dob.obj =~/(Article|Clause|Section)\s+/) \
+ ? (dob.obj.gsub(/(Article|Clause|Section)\s+/,"\\1 #{title_no} "))
+ : (dob.obj.gsub(/^/,"#{title_no}. ")) #fix stop later
+ end
+ if dob.ln !=no1 \
+ and dob.obj =~/^[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review
+ dob.name ="#{title_no}" if not dob.name
+ dob.tags=set_tags(dob.tags,title_no)
+ dob.obj=dob.obj.gsub(/^/,"#{title_no}. ")
+ end
+ @md.seg_names << title_no
+ end
+ if dob.ln!=no1 \
+ and dob.name!~/^[a-z_\.]+$/ \
+ and dob.obj !~/[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required # not fixed, work on
+ dob.tags=set_tags(dob.tags,title_no)
+ dob.obj=dob.obj.gsub(/^/i,"#{title_no}. ")
+ end
+ end
+ if dob.ln==no1 #watch because here you change dob.name
+ dob.tags=set_tags(dob.tags,"h#{title_no}")
+ end
+ if dob.ln==no2 #watch because here you change dob.name
+ t_no2+=1; t_no3=0
+ title_no="#{t_no1}.#{t_no2}"
+ dob.tags=set_tags(dob.tags,"h#{title_no}")
+ dob=number_sub_heading(dob,no2,title_no)
+ end
+ if dob.ln==no3 #watch because here you change dob.name
+ t_no3+=1
+ title_no="#{t_no1}.#{t_no2}.#{t_no3}"
+ dob.tags=set_tags(dob.tags,"h#{title_no}")
+ dob=number_sub_heading(dob,no3,title_no)
+ end
+ elsif dob.ln.to_s =~/^[0-6]/ \
+ and dob.name =~ /^[\w-]+-/ # endnotes, watch2005# endnotes, watch2005
+ dob.tags=set_tags(dob.tags,dob.name)
+ dob.name.gsub(/^([a-z_\.]+)-$/,'\1')
+ end
+ elsif dob.is ==:heading \
+ and dob.autonum_ \
+ and @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4
+ #here lies a bug, as is nil when run from -Dv --update, FIX
+ if (dob.name.nil? or dob.name.empty?) \
+ and dob.ln.to_s =~/^[0-9]/ \
+ and dob.obj =~ /^([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d
+ dob.name=$1
+ dob.tags=set_tags(dob.tags,dob.name)
+ end
+ if @md.toc_lev_limit
+ end
+ elsif defined? dob.name \
+ and dob.name
+ dob.tags=set_tags(dob.tags,dob.name)
+ end
+ dob.tags=dob.tags.uniq if defined? dob.tags
+ dob
+ end.flatten
+ end
+ def ocn(data) #and auto segment numbering increment
+ @tuned_file=SiSU_AO_DocumentStructureExtract::OCN.new(@md,data,@fnx,@process).ocn
+ @tuned_file
+ end
+ def xml(data)
+ @tuned_file=SiSU_AO_DocumentStructureExtract::XML.new(@md,data).dom
+ @tuned_file
+ end
+ def minor_numbering(data) #and auto segment numbering increment
+ number_small,letter_small=0,0
+ letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z )
+ @tuned_file=data.each.map do |dob|
+ if dob.of ==:heading \
+ || dob.of ==:heading_insert \
+ || dob.of ==:para \
+ || dob.of ==:block
+ if dob.is ==:heading \
+ and dob.ln.to_s=~/^[0-9]/ #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later)
+ number_small,letter_small=0,0
+ elsif dob.is ==:para
+ if dob.obj =~/^#[ 1]/ \
+ and dob.obj !~/^#\s+(?:~#)?$/
+ letter_small=0
+ number_small=0 if dob.obj =~ /^#1/
+ number_small+=1
+ dob.obj=dob.obj.gsub(/^#[ 1]/,"#{number_small}. ")
+ end
+ if dob.obj =~/^_# /
+ dob.obj=dob.obj.gsub(/^_# /,"#{letter[letter_small]}. ")
+ dob.indent='1'
+ letter_small+=1
+ end
+ end
+ end
+ dob
+ end.flatten
+ end
+ def leading_zeros_fixed_width_number(possible_seg_name)
+ if possible_seg_name.to_s =~/^([0-9]+?\.|[0-9]+)$/m #!~/[.,:-]+/
+ possible_seg_name=possible_seg_name.to_s.
+ gsub(/\.$/,'')
+ nl=possible_seg_name.to_s.length
+ zero='0'
+ zeros_fixed_width=number_of_segments?.to_s.length
+ zero_width=(zeros_fixed_width - nl)
+ zero_width == 0 \
+ ? possible_seg_name.to_s
+ : zero*zero_width +
+ possible_seg_name.to_s
+ end
+ end
+ def auto_numbering_exceptions(chosen_seg_names_,md,dob)
+ number_make=case dob.lv.to_i
+ when 1
+ @num_exc={
+ t1: @num_exc[:t1] += 1,
+ t2: 0,
+ t3: 0,
+ t4: 0
+ }
+ Mx[:segname_prefix_auto_num_other] + '_' \
+ + @num_exc[:t1].to_s
+ when 2
+ @num_exc={
+ t1: @num_exc[:t1],
+ t2: @num_exc[:t2] += 1,
+ t3: 0,
+ t4: 0
+ }
+ Mx[:segname_prefix_auto_num_other] + '_' \
+ + @num_exc[:t1].to_s + '_' \
+ + @num_exc[:t2].to_s
+ when 3
+ @num_exc={
+ t1: @num_exc[:t1],
+ t2: @num_exc[:t2],
+ t3: @num_exc[:t3] += 1,
+ t4: 0
+ }
+ Mx[:segname_prefix_auto_num_other] + '_' \
+ + @num_exc[:t1].to_s + '_' \
+ + @num_exc[:t2].to_s + '_' \
+ + @num_exc[:t3].to_s
+ when 4
+ @num_exc[:t4] += 1
+ @num_exc={
+ t1: @num_exc[:t1],
+ t2: @num_exc[:t2],
+ t3: @num_exc[:t3],
+ t4: @num_exc[:t4] += 1
+ }
+ Mx[:segname_prefix_auto_num_other] + '_' \
+ + @num_exc[:t1].to_s + '_' \
+ + @num_exc[:t2].to_s + '_' \
+ + @num_exc[:t3].to_s + '_' \
+ + @num_exc[:t4].to_s
+ end
+ end
+ def check_that_seg_names_are_unique(chosen_seg_names_,chosen_seg_name,type,md,dob)
+ begin
+ chosen_seg_names_ << chosen_seg_name
+ chosen_seg_names_=chosen_seg_names(chosen_seg_names_,chosen_seg_name,dob,md,type)
+ if chosen_seg_names_.compact.uniq.length \
+ == chosen_seg_names_.compact.length
+ #check that all auto given seg names are unique
+ chosen_seg_names_=chosen_seg_names(chosen_seg_names_,chosen_seg_name,dob,md,type)
+ chosen_seg_name
+ else
+ SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:green).
+ mark(
+ "duplicated auto segment name: #{type} #{chosen_seg_name}\n" \
+ + "#{chosen_seg_names_}\n" \
+ + " manually name level 1 segments '1~given_name'\n" \
+ + 'filename: ' + md.fns + "\n" \
+ + 'heading text: "' + dob.obj + '"' + "\n" \
+ + 'duplication: "' + chosen_seg_name + '" (level: ' + dob.lv + '; numbering type: ' + type.to_s + ')'
+ )
+ chosen_seg_name=auto_numbering_exceptions(chosen_seg_names_,md,dob)
+ check_that_seg_names_are_unique(chosen_seg_names_,chosen_seg_name,:exception,md,dob)
+ end
+ rescue
+ end
+ end
+ def auto_seg_name(possible_seg_name,heading_num_is,dob,type)
+ prefix=case type
+ when :auto then Mx[:segname_prefix_auto_num_provide]
+ when :extract then Mx[:segname_prefix_auto_num_extract]
+ else '_'*dob.lv.to_i #should not occur
+ end
+ if possible_seg_name =~/^[0-9]+?\.$/m #!~/[.,:-]+/
+ possible_seg_name=possible_seg_name.
+ gsub(/\.$/,'')
+ end
+ @chosen_seg_name=
+ if dob.lv=='4' \
+ and possible_seg_name.to_s =~/^[0-9]+(?:[.,:-][0-9]){3}/m
+ possible_seg_name=possible_seg_name.to_s.
+ gsub(/(?:[:,-]|\W)/,'.').
+ gsub(/\.$/,'')
+ prefix + possible_seg_name
+ elsif dob.lv=='3' \
+ and possible_seg_name.to_s =~/^[0-9]+(?:[.,:-][0-9]){2}/m
+ possible_seg_name=possible_seg_name.to_s.
+ gsub(/(?:[:,-]|\W)/,'.').
+ gsub(/\.$/,'')
+ prefix + possible_seg_name
+ elsif dob.lv=='2' \
+ and possible_seg_name.to_s =~/^[0-9]+(?:[.,:-][0-9]){1}/m
+ possible_seg_name=possible_seg_name.to_s.
+ gsub(/(?:[:,-]|\W)/,'.').
+ gsub(/\.$/,'')
+ prefix + possible_seg_name
+ elsif dob.lv=='1' \
+ and possible_seg_name.to_s =~/^[0-9]+[:,-]?$/m
+ if possible_seg_name.to_i <= heading_num_is.to_i
+ prefix + leading_zeros_fixed_width_number(possible_seg_name)
+ else
+ possible_seg_name=possible_seg_name.to_s.
+ gsub(/(?:[:,-]|\W)/,'.').
+ gsub(/\.$/,'')
+ prefix + possible_seg_name
+ end
+ else
+ @chosen_seg_name=auto_numbering_exceptions(@chosen_seg_names,md,dob)
+ end
+ check_that_seg_names_are_unique(@chosen_seg_names,@chosen_seg_name,type,@md,dob)
+ end
+ def set_name_and_tags(dob,possible_seg_name)
+ if @md.seg_names.is_a?(Array) \
+ and not @md.seg_names.include?(possible_seg_name)
+ dob.name=possible_seg_name
+ dob.tags=set_tags(dob.tags,dob.name)
+ @md.seg_names << possible_seg_name
+ elsif (@md.opt.act[:verbose_plus][:set]==:on \
+ or @md.opt.act[:maintenance][:set]==:on)
+ puts 'warn, there may be a conflicting numbering scheme'
+ end
+ end
+ def name_para_seg_filename(data) #segment naming, remaining
+ # paragraph name/numbering rules
+ # manual naming overrides, manual naming may be
+ # alpha-numeric characters mixed,
+ # numeric only (a number), if
+ # all segments have been named,
+ # the numbers used are over 1000 or
+ # it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented)
+ # [for now a warning is printed for such documents on use of maintenance or very-verbose flag]
+ # auto-naming takes the form of giving numbers to segments
+ # the rules for which are as follows
+ # if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.)
+ # otherwise the level 4 segment number from the embedded document structure info is used
+ # if there is none a sequential number is designated, preceded by an underscore
+ @tuned_file,@unique_auto_name=[],[]
+ tags={}
+ @art_filename_auto=0
+ @counter=1
+ if not @md.seg_autoname_safe \
+ and (@md.opt.act[:verbose_plus][:set]==:on \
+ || @md.opt.act[:maintenance][:set]==:on)
+ puts 'manual segment names, numbers used as names, risk warning (segmented html)'
+ end
+ ocn_html_seg=[]
+ @num_exc={ t1: 0, t2: 0, t3: 0, t4: 0 }
+ data.each do |dob|
+ if dob.is==:heading \
+ && dob.ln \
+ and dob.ln.to_s =~/^[4-7]/
+ heading_num_is=/^\d+:(\d+);\d/m.match(dob.node)[1]
+ if dob.ln==4 \
+ and not dob.name \
+ and not @md.set_heading_seg
+ @md.set_heading_seg=true
+ end
+ if dob.name !~/^\S+/ \
+ and dob.ln.to_s =~/^[5-7]/ \
+ and dob.obj =~/^\s*(?:\S+\s+)?([0-9]+(?:[.,:-][0-9])+)/m
+ #heading starts with a recognised numeric
+ #or word followed by a recognised numeric construct,
+ #use that as name
+ if dob.ln==7 \
+ and dob.obj =~/^\s*(?:\S+\s+)?([0-9]+(?:[.,:-][0-9]){3})/m
+ possible_seg_name=$1.
+ gsub(/(?:[:,-]|\W)/,'.').
+ gsub(/\.$/,'')
+ possible_seg_name=
+ auto_seg_name(possible_seg_name,heading_num_is,dob,:extract)
+ set_name_and_tags(dob,possible_seg_name)
+ elsif dob.ln==6 \
+ and dob.obj =~/^\s*(?:\S+\s+)?([0-9]+(?:[.,:-][0-9]){2})/m
+ possible_seg_name=$1.
+ gsub(/(?:[:,-]|\W)/,'.').
+ gsub(/\.$/,'')
+ possible_seg_name=
+ auto_seg_name(possible_seg_name,heading_num_is,dob,:extract)
+ set_name_and_tags(dob,possible_seg_name)
+ elsif dob.ln==5 \
+ and dob.obj =~/^\s*(?:\S+\s+)?([0-9]+(?:[.,:-][0-9]){1})/m
+ possible_seg_name=$1.
+ gsub(/(?:[:,-]|\W)/,'.').
+ gsub(/\.$/,'')
+ possible_seg_name=
+ auto_seg_name(possible_seg_name,heading_num_is,dob,:extract)
+ set_name_and_tags(dob,possible_seg_name)
+ end
+ end
+ if dob.ln==4
+ if dob.name !~/^\S+/ \
+ and dob.obj =~/^\s*(?:\S+\s+)?([0-9]+)/m
+ #heading starts with a recognised numeric
+ #or word followed by a recognised numeric construct,
+ #use that as name
+ possible_seg_name=$1
+ possible_seg_name=
+ auto_seg_name(possible_seg_name,heading_num_is,dob,:extract)
+ set_name_and_tags(dob,possible_seg_name)
+ end
+ if dob.name
+ #extract segment name from embedded document structure info
+ if @md.seg_names.is_a?(Array) \
+ and not @md.seg_names.include?(dob.name)
+ dob.tags=set_tags(dob.tags,dob.name)
+ @md.seg_names << dob.name
+ end
+ else
+ #if no segment name,
+ #provide a numerical one
+ @art_filename_auto+=1
+ possible_seg_name=
+ auto_seg_name(@art_filename_auto,heading_num_is,dob,:auto)
+ if @md.seg_names.is_a?(Array) \
+ and not @md.seg_names.include?(possible_seg_name)
+ dob.name=possible_seg_name
+ dob.tags=set_tags(dob.tags,dob.name)
+ @md.seg_names << possible_seg_name
+ else puts 'segment name (numbering) error'
+ end
+ end
+ if not dob.name #should not occur
+ puts "e r r o r -\t#{__FILE__}::#{__LINE__}\n#{dob.inspect}"
+ end
+ end
+ end
+ if (dob.is ==:heading \
+ || dob.is ==:heading_insert) \
+ && dob.ln==4
+ @seg=dob.name
+ end
+ @tuned_file << if dob.is==:heading \
+ && (@md.pagenew || @md.pagebreak || @md.pageline)
+ m=dob.ln.to_s
+ dob_tmp=[]
+ if @md.pagenew.inspect =~/#{m}/
+ dob_tmp <<
+ SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new]) <<
+ dob
+ elsif @md.pagebreak.inspect =~/#{m}/
+ dob_tmp <<
+ SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page]) <<
+ dob
+ elsif @md.pageline.inspect =~/#{m}/
+ dob_tmp <<
+ SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_line]) <<
+ dob
+ end
+ unless dob_tmp.length > 0; dob
+ else dob_tmp
+ end
+ else dob
+ end
+ if defined? dob.ocn \
+ and dob.ocn
+ @segname=((dob.is==:heading || dob.is==:heading_insert) && dob.ln==4 && (defined? dob.name)) \
+ ? (dob.name)
+ : @segname
+ tags["#{dob.ocn}"]={ segname: @segname }
+ ocn_html_seg[dob.ocn]=if (dob.is==:heading || dob.is==:heading_insert)
+ if dob.ln =~/[0-3]/
+ {
+ seg: nil,
+ level: dob.ln,
+ }
+ #elsif dob.ln =~/[4-6]/
+ else
+ {
+ seg: @seg,
+ level: dob.ln,
+ }
+ end
+ else
+ {
+ seg: @seg,
+ level: nil,
+ }
+ end
+ end
+ dob.tags=dob.tags.uniq if defined? dob.tags
+ if defined? dob.tags \
+ and dob.tags.length > 0
+ #@segname=((dob.is=='heading'|| dob.is=='heading_insert') && dob.ln==4 && (defined? dob.name)) \
+ #? (dob.name) \
+ #: @segname
+ dob.tags.each do |y|
+ tags[y]={ ocn: dob.ocn.to_s, segname: @segname }
+ end
+ end
+ dob
+ end
+ ocn_html_seg.each_with_index do |ocn,i|
+ if ocn \
+ and ocn[:level].to_s=~/[1-3]/
+ (1..4).each do |x|
+ if ocn_html_seg[i+x] \
+ and ocn_html_seg[i+x][:level]==4
+ ocn[:seg]=ocn_html_seg[i+x][:seg]
+ end
+ end
+ end
+ end
+ if @md.seg_names.length > 0
+ @md.set_heading_seg=true
+ end
+ tuned_file=@tuned_file.flatten
+ [tuned_file,tags,ocn_html_seg]
+ end
+ def set_heading_top(data) #% make sure no false positives
+ unless @md.set_heading_top
+ if (@md.opt.act[:verbose_plus][:set]==:on \
+ or @md.opt.act[:maintenance][:set]==:on)
+ puts "\tdocument contains no top level heading, (will have to manufacture one)"
+ end
+ @tuned_file=[]
+ data.each do |t_o|
+ unless @md.set_heading_top
+ if t_o !~/^(?:#{Rx[:meta]}|@\S+:)\s/m \
+ and t_o !~/\A\s*\Z/m
+ @md.set_heading_top=true
+ if defined? @md.title \
+ and @md.title \
+ and defined? @md.title.full \
+ and defined? @md.creator \
+ and @md.creator
+ head=@md.title.main \
+ ? ([@lv='1',@obj=@md.title.main])
+ : ([@lv='1',@obj='[no title provided]'])
+ @tuned_file << head
+ end
+ end
+ end
+ @tuned_file << t_o
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ end
+ def set_heading_seg(data) #% make sure no false positives
+ unless @md.set_heading_seg
+ if (@md.opt.act[:verbose_plus][:set]==:on \
+ or @md.opt.act[:maintenance][:set]==:on)
+ puts "\tdocument contains no segment level, (will have to manufacture one)"
+ end
+ @tuned_file=[]
+ data.each do |dob|
+ unless @md.set_heading_seg
+ if defined? dob.ln and dob.ln.to_s !~/^[0-3]/m \
+ and dob.obj !~/\A\s*\Z/m \
+ and dob.is !=:layout
+ @md.set_heading_seg=true
+ head=@md.title.main \
+ ? (dob.ln,dob.name,dob.obj=4,'seg',@md.title.main)
+ : (dob.ln,dob.name,dob.obj=4,'seg','[segment]')
+ @tuned_file << head
+ end
+ end
+ @tuned_file << dob
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ end
+ def set_header_title(data) #% make sure no false positives
+ unless @md.set_header_title
+ if (@md.opt.act[:verbose_plus][:set]==:on \
+ or @md.opt.act[:maintenance][:set]==:on)
+ puts "\t no document title provided, (will have to manufacture one)"
+ end
+ @tuned_file=[]
+ data.each do |t_o|
+ unless @md.set_header_title
+ if t_o !~/^%{1,2}\s/m \
+ and t_o !~/\A\s*\Z/m
+ @tuned_file <<
+ "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}"
+ @md.title.main=@md.heading_seg_first
+ @md.set_header_title=true
+ end
+ end
+ @tuned_file << t_o
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ end
+ end
+end
+__END__