From 506e32633838b4daf9ab566c9da083329212f219 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sun, 26 Jan 2014 02:22:02 -0500 Subject: v5 v6: made true, branches: v6 development; v5 stable; v4 closed --- lib/sisu/v4/dal_numbering.rb | 473 ------------------------------------------- 1 file changed, 473 deletions(-) delete mode 100644 lib/sisu/v4/dal_numbering.rb (limited to 'lib/sisu/v4/dal_numbering.rb') diff --git a/lib/sisu/v4/dal_numbering.rb b/lib/sisu/v4/dal_numbering.rb deleted file mode 100644 index c2ac6785..00000000 --- a/lib/sisu/v4/dal_numbering.rb +++ /dev/null @@ -1,473 +0,0 @@ -# encoding: utf-8 -=begin - - * Name: SiSU - - * Description: a framework for document structuring, publishing and search - - * Author: Ralph Amissah - - * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, - 2007, 2008, 2009, 2010, 2011, 2012, 2013 Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Download: - - - * Git - - - - * Ralph Amissah - - - - ** Description: system environment, resource control and configuration details - -=end -module SiSU_DAL_Numbering - class Numbering - attr_accessor :obj,:osp,:ocn,:lv,:name,:index,:comment - def initialize(md,data) - @md,@data=md,data - @obj=@type=@ocn=@lv=@name=@index=@comment=nil - end - def numbering_song - data=@data - data=number_plaintext_para(data) - data=auto_number_heading_ie_title(data.compact) #tr issue - data=ocn(data.compact) #watch - data=xml(data.compact) - data=minor_numbering(data.compact) - data,tags_map,ocn_html_seg_map=name_para_seg_filename(data) - data=set_heading_top(data) unless @md.set_heading_top - [data,tags_map,ocn_html_seg_map] - end - def number_plaintext_para(data) - @tuned_file=[] - data.each do |dob| - if (dob.of !=:block \ - && dob.of !=:comment \ - && dob.of !=:layout) \ - && dob.ocn_ #and dob.obj !~ /#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ #FIX - dob.obj=dob.obj.gsub(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks - end - unless dob.obj.is_a?(Array) - dob.obj=dob.obj.gsub(/^\s+/,''). - gsub(/\s$/,"\n") - end - @tuned_file << dob - end - @tuned_file=@tuned_file.flatten - end - def number_sub_heading(dob,num,title_no) - unless dob.obj =~/\d+\.|(?:chapter|article|section|clause)\s+\d+/i #name selection arbitrary, fix - dob.obj=case dob.name - when /-/; dob.obj.gsub(/^/,"#{title_no} ") - when /^#/; dob.obj.gsub(/^/,"#{title_no} ") - when /^[a-z_\.]+/; dob.obj.gsub(/^/,"#{title_no} ") - else - dob.name=title_no if dob.name=~/^$/ #where title contains title number - dob.obj.gsub(/^/,"#{title_no} ") if title_no =~/\d+/ #main, where title number is to be provided #watch changed placement - end - if @md.toc_lev_limit \ - and @md.toc_lev_limit < num - dob.obj=dob.obj.gsub(/^/,'!_ ') #bold line, watch - end - end - dob - end - def heading_tag_clean(heading_tag) - heading_tag=heading_tag.gsub(/[ ]+/,'_'). - gsub(/["']/,''). - gsub(/[\/]/,'-'). - gsub(/#{Mx[:fa_bold_o]}|#{Mx[:fa_bold_c]}/,''). - gsub(/#{Mx[:fa_italics_o]}|#{Mx[:fa_italics_c]}/,''). - gsub(/#{Mx[:fa_underscore_o]}|#{Mx[:fa_underscore_c]}/,''). - gsub(/#{Mx[:fa_cite_o]}|#{Mx[:fa_cite_c]}/,''). - gsub(/#{Mx[:fa_insert_o]}|#{Mx[:fa_insert_c]}/,''). - gsub(/#{Mx[:fa_strike_o]}|#{Mx[:fa_strike_c]}/,''). - gsub(/#{Mx[:fa_superscript_o]}|#{Mx[:fa_superscript_c]}/,''). - gsub(/#{Mx[:fa_subscript_o]}|#{Mx[:fa_subscript_c]}/,''). - gsub(/#{Mx[:fa_hilite_o]}|#{Mx[:fa_hilite_c]}/,''). - gsub(/#{Mx[:gl_bullet]}/,'') - end - def auto_number_heading_ie_title(data) #also does some segment naming - @tuned_file=[] - if defined? @md.make.num_top \ - and @md.make.num_top \ - and @md.make.num_top !~/^$/ - input||=@md.make.num_top - end - num_top=(input ? input.to_i : nil) - t_no1=t_no2=t_no3=0 - if num_top - no1=num_top; no2=(num_top + 1); no3=(num_top + 2) - end - chapter_number_counter=0 - data=data.compact - data.each do |dob| #@md.seg_names << [additions to segment names] - title_no=nil - dob=SiSU_DAL_DocumentStructureExtract::Structure.new(@md,dob).structure_markup #must happen earlier, node info etc. require - if dob.is ==:heading \ - && dob.autonum_ \ - and defined? @md.make.num_top \ - and @md.make.num_top !~/^$/ - if dob.lv=='1' \ - and dob.obj =~/^#\s|\s#(?:\s|$)/ - chapter_number_counter +=1 - dob.obj=dob.obj.gsub(/^#\s/,"#{chapter_number_counter} "). - gsub(/#([:,]?\s|[.]?$)/,"#{chapter_number_counter}\\1") - end - if dob.ln==no1 - @subnumber=1 - @subnumber=0 if dob.ln==no1 - end - if dob.ln.to_s =~/^[1-6]/ \ - and not dob.toc_ \ - and dob.obj !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ # <-- fix - if dob.ln==no1 - t_no1+=1; t_no2=0; t_no3=0 - title_no="#{t_no1}" - if @md.seg_names.is_a?(Array) \ - and not @md.seg_names.include?(title_no) - if dob.ln==no1 - dob.name="#{title_no}" if not dob.name - dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs - tag=dob.obj.gsub(/(Article|Clause|Section|Chapter)\s+/,"\\1_#{title_no}").downcase - tag=heading_tag_clean(tag) - dob.tags=[tag,dob.tags].flatten if tag !~/^\d+$/ #check whether will work across file types with stop signs - dob.obj=(dob.obj =~/(Article|Clause|Section)\s+/) \ - ? (dob.obj.gsub(/(Article|Clause|Section)\s+/,"\\1 #{title_no} ")) - : (dob.obj.gsub(/^/,"#{title_no}. ")) #fix stop later - end - if dob.ln !=no1 \ - and dob.obj =~/^[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review - dob.name ="#{title_no}" if not dob.name - dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs - dob.obj=dob.obj.gsub(/^/,"#{title_no}. ") - end - @md.seg_names << title_no - end - if dob.ln!=no1 \ - and dob.name!~/^[a-z_\.]+$/ \ - and dob.obj !~/[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required # not fixed, work on - dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs - dob.obj=dob.obj.gsub(/^/i,"#{title_no}. ") - end - end - if dob.ln==no1 #watch because here you change dob.name - dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs - end - if dob.ln==no2 #watch because here you change dob.name - t_no2+=1; t_no3=0 - title_no="#{t_no1}.#{t_no2}" - dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs - dob=number_sub_heading(dob,no2,title_no) - end - if dob.ln==no3 #watch because here you change dob.name - t_no3+=1 - title_no="#{t_no1}.#{t_no2}.#{t_no3}" - dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs - dob=number_sub_heading(dob,no3,title_no) - end - elsif dob.ln.to_s =~/^[1-6]/ \ - and dob.name =~ /^[\w-]+-/ # endnotes, watch2005# endnotes, watch2005 - dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs - dob.name.gsub(/^([a-z_\.]+)-$/,'\1') - end - elsif dob.is ==:heading \ - and dob.autonum_ \ - and @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4 - #here lies a bug, as is nil when run from -Dv --update, FIX - if (dob.name.nil? or dob.name.empty?) \ - and dob.ln.to_s =~/^[1-9]/ \ - and dob.obj =~ /^([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d - dob.name=$1 - dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs - end - if @md.toc_lev_limit - end - elsif defined? dob.name \ - and dob.name - dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs - end - dob.tags=dob.tags.uniq if defined? dob.tags - @tuned_file << dob - end - @tuned_file=@tuned_file.flatten - end - def ocn(data) #and auto segment numbering increment - @tuned_file=SiSU_DAL_DocumentStructureExtract::OCN.new(@md,data).ocn - @tuned_file - end - def xml(data) - @tuned_file=SiSU_DAL_DocumentStructureExtract::XML.new(@md,data).dom - @tuned_file - end - def minor_numbering(data) #and auto segment numbering increment - @tuned_file=[] - number_small,letter_small=0,0 - letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z ) - data.each do |dob| - if dob.of ==:heading \ - || dob.of ==:heading_insert \ - || dob.of ==:para \ - || dob.of ==:block - if dob.is ==:heading \ - and dob.ln.to_s=~/^[1-9]/ #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later) - number_small,letter_small=0,0 - elsif dob.is ==:para - if dob.obj =~/^#[ 1]/ \ - and dob.obj !~/^#\s+(?:~#)?$/ - letter_small=0 - number_small=0 if dob.obj =~ /^#1/ - number_small+=1 - dob.obj=dob.obj.gsub(/^#[ 1]/,"#{number_small}. ") - end - if dob.obj =~/^_# / - dob.obj=dob.obj.gsub(/^_# /,"#{letter[letter_small]}. ") - dob.indent='1' - letter_small+=1 - end - end - end - @tuned_file << dob - end - @tuned_file=@tuned_file.flatten - end - def name_para_seg_filename(data) #segment naming, remaining - # paragraph name/numbering rules - # manual naming overrides, manual naming may be - # alpha-numeric characters mixed, - # numeric only (a number), if - # all segments have been named, - # the numbers used are over 1000 or - # it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented) - # [for now a warning is printed for such documents on use of maintenance or very-verbose flag] - # auto-naming takes the form of giving numbers to segments - # the rules for which are as follows - # if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.) - # otherwise the level 4 segment number from the embedded document structure info is used - # if there is none a sequential number is designated, preceded by an underscore - @tuned_file,@unique_auto_name=[],[] - tags={} - art_filename_auto=1 - @counter=1 - if not @md.seg_autoname_safe and @md.opt.cmd =~/[MV]/ - puts 'manual segment names, numbers used as names, risk warning (segmented html)' - end - ocn_html_seg=[] - data.each do |dob| - if dob.is==:heading \ - && dob.ln \ - and dob.ln.to_s =~/^[456]/ - if dob.ln==4 \ - and not dob.name \ - and not @md.set_heading_seg - @md.set_heading_seg=true - end - if dob.name !~/^\S+/ \ - and dob.obj =~/^\s*(?:\S+\s+)?([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name - possible_seg_name=$1 - possible_seg_name=possible_seg_name.gsub(/(?:[:,-]|\W)/,'.'). - gsub(/\.$/,'') - if @md.seg_names.is_a?(Array) \ - and not @md.seg_names.include?(possible_seg_name) - dob.name=possible_seg_name - dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ - @md.seg_names << possible_seg_name - else puts 'warn, there may be a conflicting numbering scheme' if @md.opt.cmd =~/[VM]/ - end - end - if dob.ln==4 \ - and dob.name #extract segment name from embedded document structure info - if @md.seg_names.is_a?(Array) \ - and not @md.seg_names.include?(dob.name) - dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ - @md.seg_names << dob.name - end - end - if dob.ln==4 \ - and not dob.name #if still no segment name, provide a numerical one - pf='_' #pg='' #may use e.g. '' or '~' or '_' - segn_auto="#{pf}#{art_filename_auto.to_s}" - if @md.seg_names.is_a?(Array) \ - and not @md.seg_names.include?(segn_auto) - dob.name=segn_auto - dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs - @md.seg_names << segn_auto - else puts 'segment name (numbering) error' - end - art_filename_auto+=1 - end - if dob.ln==4 \ - and not dob.name #should not occur - puts "e r r o r -\t#{__FILE__}::#{__LINE__}\n#{dob.inspect}" - end - end - if (dob.is ==:heading \ - || dob.is ==:heading_insert) \ - && dob.ln==4 - @seg=dob.name - end - @tuned_file << if dob.is==:heading \ - && (@md.pagenew || @md.pagebreak || @md.pageline) - m=dob.ln.to_s - dob_tmp=[] - if @md.pagenew.inspect =~/#{m}/ - dob_tmp << SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new]) << dob - elsif @md.pagebreak.inspect =~/#{m}/ - dob_tmp << SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_page]) << dob - elsif @md.pageline.inspect =~/#{m}/ - dob_tmp << SiSU_DAL_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_line]) << dob - end - unless dob_tmp.length > 0; dob - else dob_tmp - end - else dob - end - if defined? dob.ocn \ - and dob.ocn - @segname=((dob.is==:heading || dob.is==:heading_insert) && dob.ln==4 && (defined? dob.name)) \ - ? (dob.name) - : @segname - tags["#{dob.ocn}"]={ segname: @segname } - ocn_html_seg[dob.ocn]=if (dob.is==:heading || dob.is==:heading_insert) - if dob.ln =~/[1-3]/ - { seg: nil, level: dob.ln } - else #elsif dob.ln =~/[4-6]/ - { seg: @seg, level: dob.ln } - end - else - { seg: @seg, level: nil } - end - end - dob.tags=dob.tags.uniq if defined? dob.tags - if defined? dob.tags \ - and dob.tags.length > 0 - #@segname=((dob.is=='heading'|| dob.is=='heading_insert') && dob.ln==4 && (defined? dob.name)) \ - #? (dob.name) \ - #: @segname - dob.tags.each do |y| - tags[y]={ ocn: dob.ocn.to_s, segname: @segname } - end - end - dob - end - ocn_html_seg.each_with_index do |ocn,i| - if ocn \ - and ocn[:level].to_s=~/[1-3]/ - (1..4).each do |x| - if ocn_html_seg[i+x] and ocn_html_seg[i+x][:level]==4 - ocn[:seg]=ocn_html_seg[i+x][:seg] - end - end - end - end - if @md.seg_names.length > 0 - @md.set_heading_seg=true - end - tuned_file=@tuned_file.flatten - [tuned_file,tags,ocn_html_seg] - end - def set_heading_top(data) #% make sure no false positives - unless @md.set_heading_top - puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.opt.cmd =~/[MV]/ - @tuned_file=[] - data.each do |t_o| - unless @md.set_heading_top - if t_o !~/^(?:#{Rx[:meta]}|@\S+:)\s/m \ - and t_o !~/\A\s*\Z/m - @md.set_heading_top=true - if defined? @md.title \ - and @md.title \ - and defined? @md.title.full \ - and defined? @md.creator \ - and @md.creator - head=@md.title.main ? ([@lv='1',@obj=@md.title.main]) : ([@lv='1',@obj='[no title provided]']) - @tuned_file << head - end - end - end - @tuned_file << t_o - end - @tuned_file=@tuned_file.flatten - end - end - def set_heading_seg(data) #% make sure no false positives - unless @md.set_heading_seg - puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.opt.cmd =~/[MV]/ - @tuned_file=[] - data.each do |dob| - unless @md.set_heading_seg - if defined? dob.ln and dob.ln.to_s !~/^[123]/m \ - and dob.obj !~/\A\s*\Z/m \ - and dob.is !=:layout - @md.set_heading_seg=true - head=@md.title.main \ - ? (dob.ln,dob.name,dob.obj=4,'seg',@md.title.main) - : (dob.ln,dob.name,dob.obj=4,'seg','[segment]') - @tuned_file << head - end - end - @tuned_file << dob - end - @tuned_file=@tuned_file.flatten - end - end - def set_header_title(data) #% make sure no false positives - unless @md.set_header_title - puts "\t no document title provided, (will have to manufacture one)" if @md.opt.cmd =~/[MV]/ - @tuned_file=[] - data.each do |t_o| - unless @md.set_header_title - if t_o !~/^%{1,2}\s/m \ - and t_o !~/\A\s*\Z/m - @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}" - @md.title.main=@md.heading_seg_first - @md.set_header_title=true - end - end - @tuned_file << t_o - end - @tuned_file=@tuned_file.flatten - end - end - end -end -__END__ -- cgit v1.2.3