# encoding: utf-8 =begin * Name: SiSU * Description: a framework for document structuring, publishing and search * Author: Ralph Amissah * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Ralph Amissah, All Rights Reserved. * License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . If you have Internet connection, the latest version of the GPL should be available at these locations: * SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system * Hompages: * Download: * Git * Ralph Amissah ** Description: system environment, resource control and configuration details =end module SiSU_AO_Numbering class Numbering attr_accessor :obj,:osp,:ocn,:lv,:name,:index,:comment @@segments_count=0 def initialize(md,data) @md,@data=md,data @obj=@type=@ocn=@lv=@name=@index=@comment=nil end def number_of_segments? if @@segments_count==0 @data.each do |dob| if dob.is == :heading \ and dob.lv == '1' @@segments_count += 1 end end @@segments_count else @@segments_count end end def numbering_song begin data=@data data=number_plaintext_para(data) data=auto_number_heading_ie_title(data.compact) #tr issue data=ocn(data.compact) #watch data=xml(data.compact) data=minor_numbering(data.compact) data,tags_map,ocn_html_seg_map=name_para_seg_filename(data) data=set_heading_top(data) unless @md.set_heading_top [data,tags_map,ocn_html_seg_map] ensure @@segments_count=0 end end def set_tags(tags,tag) tags=if not tag.empty? \ and tag !~/^\d+$/ tag=tag.gsub(/[^a-z0-9._-]/,'') [tag,tags].flatten else tags end end def number_plaintext_para(data) @tuned_file=[] data.each do |dob| if (dob.of !=:block \ && dob.of !=:comment \ && dob.of !=:layout) \ && dob.ocn_ #and dob.obj !~ /#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ #FIX dob.obj=dob.obj.gsub(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks end unless dob.obj.is_a?(Array) dob.obj=dob.obj.gsub(/^\s+/,''). gsub(/\s$/,"\n") end @tuned_file << dob end @tuned_file=@tuned_file.flatten end def number_sub_heading(dob,num,title_no) unless dob.obj =~/\d+\.|(?:chapter|article|section|clause)\s+\d+/i #name selection arbitrary, fix dob.obj=case dob.name when /-/ then dob.obj.gsub(/^/,"#{title_no} ") when /^#/ then dob.obj.gsub(/^/,"#{title_no} ") when /^[a-z_\.]+/ then dob.obj.gsub(/^/,"#{title_no} ") else dob.name=title_no if dob.name=~/^$/ #where title contains title number dob.obj.gsub(/^/,"#{title_no} ") if title_no =~/\d+/ #main, where title number is to be provided #watch changed placement end if @md.toc_lev_limit \ and @md.toc_lev_limit < num dob.obj=dob.obj.gsub(/^/,'!_ ') #bold line, watch end end dob end def heading_tag_clean(heading_tag) heading_tag=heading_tag. gsub(/[ ]+/,'_'). gsub(/["']/,''). gsub(/[\/]/,'-'). gsub(/#{Mx[:fa_bold_o]}|#{Mx[:fa_bold_c]}/,''). gsub(/#{Mx[:fa_italics_o]}|#{Mx[:fa_italics_c]}/,''). gsub(/#{Mx[:fa_underscore_o]}|#{Mx[:fa_underscore_c]}/,''). gsub(/#{Mx[:fa_cite_o]}|#{Mx[:fa_cite_c]}/,''). gsub(/#{Mx[:fa_insert_o]}|#{Mx[:fa_insert_c]}/,''). gsub(/#{Mx[:fa_strike_o]}|#{Mx[:fa_strike_c]}/,''). gsub(/#{Mx[:fa_superscript_o]}|#{Mx[:fa_superscript_c]}/,''). gsub(/#{Mx[:fa_subscript_o]}|#{Mx[:fa_subscript_c]}/,''). gsub(/#{Mx[:fa_hilite_o]}|#{Mx[:fa_hilite_c]}/,''). gsub(/#{Mx[:gl_bullet]}/,'') end def auto_number_heading_ie_title(data) #also does some segment naming @tuned_file=[] if defined? @md.make.num_top \ and @md.make.num_top \ and @md.make.num_top !~/^$/ input||=@md.make.num_top end num_top=(input ? input.to_i : nil) t_no1=t_no2=t_no3=0 if num_top no1=num_top; no2=(num_top + 1); no3=(num_top + 2) end chapter_number_counter=0 data=data.compact data.each do |dob| #@md.seg_names << [additions to segment names] title_no=nil if dob.is ==:heading \ && dob.autonum_ \ and defined? @md.make.num_top \ and @md.make.num_top !~/^$/ if dob.lv=='1' \ and dob.obj =~/^#\s|\s#(?:\s|$)/ chapter_number_counter +=1 dob.obj=dob.obj.gsub(/^#\s/,"#{chapter_number_counter} "). gsub(/#([:,]?\s|[.]?$)/,"#{chapter_number_counter}\\1") end if dob.ln==no1 @subnumber=1 @subnumber=0 if dob.ln==no1 end if dob.ln.to_s =~/^[0-6]/ \ and not dob.use_ ==:dummy \ and dob.obj !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ # <-- fix if dob.ln==no1 t_no1+=1; t_no2=0; t_no3=0 title_no="#{t_no1}" if @md.seg_names.is_a?(Array) \ and not @md.seg_names.include?(title_no) if dob.ln==no1 dob.name="#{title_no}" if not dob.name dob.tags=set_tags(dob.tags,title_no) tag=dob.obj. gsub(/(Article|Clause|Section|Chapter)\s+/, "\\1_#{title_no}"). downcase tag=heading_tag_clean(tag) dob.tags=set_tags(dob.tags,tag) dob.obj=(dob.obj =~/(Article|Clause|Section)\s+/) \ ? (dob.obj.gsub(/(Article|Clause|Section)\s+/,"\\1 #{title_no} ")) : (dob.obj.gsub(/^/,"#{title_no}. ")) #fix stop later end if dob.ln !=no1 \ and dob.obj =~/^[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review dob.name ="#{title_no}" if not dob.name dob.tags=set_tags(dob.tags,title_no) dob.obj=dob.obj.gsub(/^/,"#{title_no}. ") end @md.seg_names << title_no end if dob.ln!=no1 \ and dob.name!~/^[a-z_\.]+$/ \ and dob.obj !~/[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required # not fixed, work on dob.tags=set_tags(dob.tags,title_no) dob.obj=dob.obj.gsub(/^/i,"#{title_no}. ") end end if dob.ln==no1 #watch because here you change dob.name dob.tags=set_tags(dob.tags,"h#{title_no}") end if dob.ln==no2 #watch because here you change dob.name t_no2+=1; t_no3=0 title_no="#{t_no1}.#{t_no2}" dob.tags=set_tags(dob.tags,"h#{title_no}") dob=number_sub_heading(dob,no2,title_no) end if dob.ln==no3 #watch because here you change dob.name t_no3+=1 title_no="#{t_no1}.#{t_no2}.#{t_no3}" dob.tags=set_tags(dob.tags,"h#{title_no}") dob=number_sub_heading(dob,no3,title_no) end elsif dob.ln.to_s =~/^[0-6]/ \ and dob.name =~ /^[\w-]+-/ # endnotes, watch2005# endnotes, watch2005 dob.tags=set_tags(dob.tags,dob.name) dob.name.gsub(/^([a-z_\.]+)-$/,'\1') end elsif dob.is ==:heading \ and dob.autonum_ \ and @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4 #here lies a bug, as is nil when run from -Dv --update, FIX if (dob.name.nil? or dob.name.empty?) \ and dob.ln.to_s =~/^[0-9]/ \ and dob.obj =~ /^([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d dob.name=$1 dob.tags=set_tags(dob.tags,dob.name) end if @md.toc_lev_limit end elsif defined? dob.name \ and dob.name dob.tags=set_tags(dob.tags,dob.name) end dob.tags=dob.tags.uniq if defined? dob.tags @tuned_file << dob end @tuned_file=@tuned_file.flatten end def ocn(data) #and auto segment numbering increment @tuned_file=SiSU_AO_DocumentStructureExtract::OCN.new(@md,data).ocn @tuned_file end def xml(data) @tuned_file=SiSU_AO_DocumentStructureExtract::XML.new(@md,data).dom @tuned_file end def minor_numbering(data) #and auto segment numbering increment @tuned_file=[] number_small,letter_small=0,0 letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z ) data.each do |dob| if dob.of ==:heading \ || dob.of ==:heading_insert \ || dob.of ==:para \ || dob.of ==:block if dob.is ==:heading \ and dob.ln.to_s=~/^[0-9]/ #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later) number_small,letter_small=0,0 elsif dob.is ==:para if dob.obj =~/^#[ 1]/ \ and dob.obj !~/^#\s+(?:~#)?$/ letter_small=0 number_small=0 if dob.obj =~ /^#1/ number_small+=1 dob.obj=dob.obj.gsub(/^#[ 1]/,"#{number_small}. ") end if dob.obj =~/^_# / dob.obj=dob.obj.gsub(/^_# /,"#{letter[letter_small]}. ") dob.indent='1' letter_small+=1 end end end @tuned_file << dob end @tuned_file=@tuned_file.flatten end def leading_zeros_fixed_width_number(possible_seg_name) if possible_seg_name =~/^([0-9]+?\.|[0-9]+)$/m #!~/[.,:-]+/ possible_seg_name=possible_seg_name. gsub(/\.$/,'') nl=possible_seg_name.to_s.length zero='0' zeros_fixed_width=number_of_segments?.to_s.length zero_width=(zeros_fixed_width - nl) zero_width == 0 \ ? Mx[:auto_seg_prefix] + possible_seg_name.to_s : Mx[:auto_seg_prefix] + zero*zero_width + possible_seg_name.to_s end end def auto_seg_name(possible_seg_name,heading_num_is) if possible_seg_name =~/^[0-9]+?\.$/m #!~/[.,:-]+/ possible_seg_name=possible_seg_name. gsub(/\.$/,'') end if possible_seg_name =~/^[0-9]+$/m \ and possible_seg_name.to_i <= heading_num_is.to_i leading_zeros_fixed_width_number(possible_seg_name) elsif possible_seg_name =~/^[\d.,:-]+$/m possible_seg_name=possible_seg_name. gsub(/(?:[:,-]|\W)/,'.'). gsub(/\.$/,'') #Mx[:auto_seg_prefix] + possible_seg_name else possible_seg_name.to_s end end def name_para_seg_filename(data) #segment naming, remaining # paragraph name/numbering rules # manual naming overrides, manual naming may be # alpha-numeric characters mixed, # numeric only (a number), if # all segments have been named, # the numbers used are over 1000 or # it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented) # [for now a warning is printed for such documents on use of maintenance or very-verbose flag] # auto-naming takes the form of giving numbers to segments # the rules for which are as follows # if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.) # otherwise the level 4 segment number from the embedded document structure info is used # if there is none a sequential number is designated, preceded by an underscore @tuned_file,@unique_auto_name=[],[] tags={} art_filename_auto=1 @counter=1 if not @md.seg_autoname_safe \ and (@md.opt.act[:verbose_plus][:set]==:on \ || @md.opt.act[:maintenance][:set]==:on) puts 'manual segment names, numbers used as names, risk warning (segmented html)' end ocn_html_seg=[] data.each do |dob| if dob.is==:heading \ && dob.ln \ and dob.ln.to_s =~/^[456]/ heading_num_is=/^\d+:(\d+);\d/m.match(dob.node)[1] if dob.ln==4 \ and not dob.name \ and not @md.set_heading_seg @md.set_heading_seg=true end if dob.name !~/^\S+/ \ and dob.obj =~/^\s*(?:\S+\s+)?([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numeric construct, use that as name possible_seg_name=$1 possible_seg_name= auto_seg_name(possible_seg_name,heading_num_is) possible_seg_name=possible_seg_name. gsub(/(?:[:,-]|\W)/,'.'). gsub(/\.$/,'') if @md.seg_names.is_a?(Array) \ and not @md.seg_names.include?(possible_seg_name) dob.name=possible_seg_name dob.tags=set_tags(dob.tags,dob.name) @md.seg_names << possible_seg_name elsif (@md.opt.act[:verbose_plus][:set]==:on \ or @md.opt.act[:maintenance][:set]==:on) puts 'warn, there may be a conflicting numbering scheme' end end if dob.ln==4 \ and dob.name #extract segment name from embedded document structure info if @md.seg_names.is_a?(Array) \ and not @md.seg_names.include?(dob.name) dob.tags=set_tags(dob.tags,dob.name) @md.seg_names << dob.name end end if dob.ln==4 \ and not dob.name #if still no segment name, provide a numerical one possible_seg_name= auto_seg_name(art_filename_auto,heading_num_is) if @md.seg_names.is_a?(Array) \ and not @md.seg_names.include?(possible_seg_name) dob.name=possible_seg_name dob.tags=set_tags(dob.tags,dob.name) @md.seg_names << possible_seg_name else puts 'segment name (numbering) error' end art_filename_auto+=1 end if dob.ln==4 \ and not dob.name #should not occur puts "e r r o r -\t#{__FILE__}::#{__LINE__}\n#{dob.inspect}" end end if (dob.is ==:heading \ || dob.is ==:heading_insert) \ && dob.ln==4 @seg=dob.name end @tuned_file << if dob.is==:heading \ && (@md.pagenew || @md.pagebreak || @md.pageline) m=dob.ln.to_s dob_tmp=[] if @md.pagenew.inspect =~/#{m}/ dob_tmp << SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new]) << dob elsif @md.pagebreak.inspect =~/#{m}/ dob_tmp << SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page]) << dob elsif @md.pageline.inspect =~/#{m}/ dob_tmp << SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_line]) << dob end unless dob_tmp.length > 0; dob else dob_tmp end else dob end if defined? dob.ocn \ and dob.ocn @segname=((dob.is==:heading || dob.is==:heading_insert) && dob.ln==4 && (defined? dob.name)) \ ? (dob.name) : @segname tags["#{dob.ocn}"]={ segname: @segname } ocn_html_seg[dob.ocn]=if (dob.is==:heading || dob.is==:heading_insert) if dob.ln =~/[0-3]/ { seg: nil, level: dob.ln, } #elsif dob.ln =~/[4-6]/ else { seg: @seg, level: dob.ln, } end else { seg: @seg, level: nil, } end end dob.tags=dob.tags.uniq if defined? dob.tags if defined? dob.tags \ and dob.tags.length > 0 #@segname=((dob.is=='heading'|| dob.is=='heading_insert') && dob.ln==4 && (defined? dob.name)) \ #? (dob.name) \ #: @segname dob.tags.each do |y| tags[y]={ ocn: dob.ocn.to_s, segname: @segname } end end dob end ocn_html_seg.each_with_index do |ocn,i| if ocn \ and ocn[:level].to_s=~/[1-3]/ (1..4).each do |x| if ocn_html_seg[i+x] \ and ocn_html_seg[i+x][:level]==4 ocn[:seg]=ocn_html_seg[i+x][:seg] end end end end if @md.seg_names.length > 0 @md.set_heading_seg=true end tuned_file=@tuned_file.flatten [tuned_file,tags,ocn_html_seg] end def set_heading_top(data) #% make sure no false positives unless @md.set_heading_top if (@md.opt.act[:verbose_plus][:set]==:on \ or @md.opt.act[:maintenance][:set]==:on) puts "\tdocument contains no top level heading, (will have to manufacture one)" end @tuned_file=[] data.each do |t_o| unless @md.set_heading_top if t_o !~/^(?:#{Rx[:meta]}|@\S+:)\s/m \ and t_o !~/\A\s*\Z/m @md.set_heading_top=true if defined? @md.title \ and @md.title \ and defined? @md.title.full \ and defined? @md.creator \ and @md.creator head=@md.title.main \ ? ([@lv='1',@obj=@md.title.main]) : ([@lv='1',@obj='[no title provided]']) @tuned_file << head end end end @tuned_file << t_o end @tuned_file=@tuned_file.flatten end end def set_heading_seg(data) #% make sure no false positives unless @md.set_heading_seg if (@md.opt.act[:verbose_plus][:set]==:on \ or @md.opt.act[:maintenance][:set]==:on) puts "\tdocument contains no segment level, (will have to manufacture one)" end @tuned_file=[] data.each do |dob| unless @md.set_heading_seg if defined? dob.ln and dob.ln.to_s !~/^[0-3]/m \ and dob.obj !~/\A\s*\Z/m \ and dob.is !=:layout @md.set_heading_seg=true head=@md.title.main \ ? (dob.ln,dob.name,dob.obj=4,'seg',@md.title.main) : (dob.ln,dob.name,dob.obj=4,'seg','[segment]') @tuned_file << head end end @tuned_file << dob end @tuned_file=@tuned_file.flatten end end def set_header_title(data) #% make sure no false positives unless @md.set_header_title if (@md.opt.act[:verbose_plus][:set]==:on \ or @md.opt.act[:maintenance][:set]==:on) puts "\t no document title provided, (will have to manufacture one)" end @tuned_file=[] data.each do |t_o| unless @md.set_header_title if t_o !~/^%{1,2}\s/m \ and t_o !~/\A\s*\Z/m @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}" @md.title.main=@md.heading_seg_first @md.set_header_title=true end end @tuned_file << t_o end @tuned_file=@tuned_file.flatten end end end end __END__