aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v1/dal_numbering.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/v1/dal_numbering.rb')
-rw-r--r--lib/sisu/v1/dal_numbering.rb368
1 files changed, 368 insertions, 0 deletions
diff --git a/lib/sisu/v1/dal_numbering.rb b/lib/sisu/v1/dal_numbering.rb
new file mode 100644
index 00000000..05123bbc
--- /dev/null
+++ b/lib/sisu/v1/dal_numbering.rb
@@ -0,0 +1,368 @@
+# coding: utf-8
+=begin
+
+ * Name: SiSU
+
+ * Description: a framework for document structuring, publishing and search
+
+ * Author: Ralph Amissah
+
+ * Copyright: (C) 1997 - 2009 Ralph Amissah All Rights Reserved.
+
+ * License: GPL 3 or later:
+
+ SiSU, a framework for document structuring, publishing and search
+
+ Copyright (C) Ralph Amissah
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see <http://www.gnu.org/licenses/>.
+
+ If you have Internet connection, the latest version of the GPL should be
+ available at these locations:
+ <http://www.fsf.org/licensing/licenses/gpl.html>
+ <http://www.gnu.org/copyleft/gpl.html>
+
+ <http://www.jus.uio.no/sisu/gpl.fsf/toc.html>
+ <http://www.jus.uio.no/sisu/gpl.fsf/doc.html>
+ <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt>
+
+ * SiSU uses:
+ * Standard SiSU markup syntax,
+ * Standard SiSU meta-markup syntax, and the
+ * Standard SiSU object citation numbering and system
+
+ * Hompages:
+ <http://www.jus.uio.no/sisu>
+ <http://www.sisudoc.org>
+
+ * Download:
+ <http://www.jus.uio.no/sisu/SiSU/download.html>
+
+ * Ralph Amissah
+ <ralph@amissah.com>
+ <ralph.amissah@gmail.com>
+
+ ** Description: system environment, resource control and configuration details
+
+=end
+module SiSU_numbering
+ class Numbering
+ def initialize(md,data)
+ @md,@data=md,data
+ end
+ def numbering_song
+ data=@data
+ data=number_plaintext_para(data)
+ data=name_endnote_seg(data) #tr issue
+ data=auto_number_heading_ie_title(data) #tr issue
+ data=ocn(data) #watch
+ data=minor_numbering(data)
+ data=name_para_seg_filename(data)
+ data=set_heading_seg(data) unless @md.set_heading_seg
+ data=set_heading_top(data) unless @md.set_heading_top
+ data=set_header_title(data) unless @md.set_header_title
+ data
+ end
+ def number_plaintext_para(data)
+ @tuned_file=[]
+ data.each do |para|
+ if para !~/#{Mx[:gr_o]}(?:code|group|alt|poem|verse)#{Mx[:gr_c]}|#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/
+ para.gsub!(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks
+ end
+ para.gsub!(/^/,"\n") unless para =~/#{Mx[:tc_p]}/u
+ para.gsub!(/^\s+|\s$/,"\n")
+ @tuned_file << para
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ def name_endnote_seg(data)
+ tuned_file=data
+ if @md.flag_auto_endnotes \
+ and @md.flag_separate_endnotes_make
+ tuned_file << "\n#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}Endnotes #{Mx[:gl_o]}-##{Mx[:gl_c]} #{Mx[:id_o]}~0;0:0;u0#{Mx[:id_c]}"
+ end
+ tuned_file << "\n#{Mx[:br_endnotes]}" #DECIDE ON
+ tuned_file=tuned_file.flatten
+ end
+ def owner_details_seg
+ data << "#{Mx[:lv_o]}4:owner.details#{Mx[:lv_c]}Owner Details"
+ end
+ def number_sub_heading(para,num,title_no)
+ case para
+ when /#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/; para.gsub!(/#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/,"#{title_no} ")
+ when /^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/; para.gsub!(/^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/,"#{title_no} ")
+ when /^#{Mx[:lv_o]}#{num}:[a-z_\.]+#{Mx[:lv_c]}/
+ para.gsub!(/^#{Mx[:lv_o]}#{num}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i,%{#{Mx[:lv_o]}#{num}:\\1#{Mx[:lv_c]} #{title_no} \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}})
+ when /^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}\s*#{title_no}/
+ para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]}") #where title contains title number
+ else para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]} #{title_no} ") #main, where title number is to be provided
+ end
+ if @md.toc_lev_limit \
+ and @md.toc_lev_limit < num
+ para.gsub!(/^#{Mx[:lv_o]}[5-9]:\S*?#{Mx[:lv_c]}/,'!_ ') #bold line, watch
+ end
+ para
+ end
+ def auto_number_heading_ie_title(data) #also does some segment naming
+ @tuned_file=[]
+ if @md.markup =~/num_top/ \
+ or @md.num_top # watch, 2003w23
+ input="#{@md.markup}"[/num_top\=([1-6])/,1] if @md.markup
+ input||=@md.num_top if @md.num_top !~/^$/
+ end
+ num_top=input.to_i
+ t_no1=t_no2=t_no3=t_no4=0
+ no1=num_top; no2=(num_top + 1); no3=(num_top + 2); no4=(num_top + 3)
+ t_not=0
+ data.each do |para| #@md.seg_names << [additions to segment names]
+ if (@md.markup =~/num_top/ \
+ or (@md.num_top \
+ and @md.num_top !~/^$/)) \
+ and para !~/^#{Rx[:meta]}/
+ if (para =~/^(?:#{no1}|^#{no2}|^#{no3}#{no4})~#/ \
+ and para !~/^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}/)
+ t_not+=1 #; t_no2=0; t_no3=0
+ para.gsub!(/^(#{Mx[:lv_o]}(?:#{no1}|#{no2}|#{no3}|#{no4})):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2")
+ end
+ if para =~/#{Mx[:lv_o]}#{no1}:/
+ @subnumber=1
+ @subnumber=0 if para =~/#{Mx[:lv_o]}#{no1}:/
+ end
+ if para =~/^#{Mx[:lv_o]}[1-6]:[\w-]*#{Mx[:lv_c]}/ \
+ and para !~ /(?:#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}|#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^#{Mx[:lv_o]}[1-6]:[a-z_\.]+#{Mx[:lv_c]}\s*[\d.]+)\s/ \
+ and para !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/
+ if para =~/^#{Mx[:lv_o]}#{no1}:/
+ t_no1+=1; t_no2=0; t_no3=0
+ title_no="#{t_no1}"
+ if not @md.seg_names.nil? \
+ and not @md.seg_names.include?(title_no)
+ para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*(\S+)#/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]} \\1 #{title_no} ") #shift placement of auto-number to after first word, e.g. Article # not # Article, added on occasion of ABF (20040329)
+ para.gsub!(/^#{no1}\{\s+(Article|Clause|Section)\s+#/i,%{#{no1}~#{title_no} \\1 #{title_no}. })
+ unless para =~/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review
+ para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]}#{title_no}. ")
+ end
+ @md.seg_names << title_no
+ #else puts "warning segment name #{title_no} already exists"
+ end
+ unless para =~/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required
+ para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i,
+ %{#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}#{title_no}. \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}})
+ end
+ para.gsub!(/^#{Mx[:lv_o]}#{no1}:##{Mx[:lv_c]}/,"#{title_no}. ") #watch
+ para.gsub!(/^#{no1}~#\s*/,"#{title_no}. ")
+ end
+ if para =~/^#{Mx[:lv_o]}#{no2}:\S*?#{Mx[:lv_c]}/
+ t_no2+=1; t_no3=0
+ title_no="#{t_no1}.#{t_no2}"
+ para=number_sub_heading(para,no2,title_no)
+ end
+ if para =~/^#{Mx[:lv_o]}#{no3}:\S*?#{Mx[:lv_c]}/
+ t_no3+=1
+ title_no="#{t_no1}.#{t_no2}.#{t_no3}"
+ para=number_sub_heading(para,no3,title_no)
+ end
+ elsif para =~/^#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}/ # endnotes, watch2005
+ para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}") #"#{no1}~\\1 ")
+ para.gsub!(/^#{Mx[:lv_o]}#{no2}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no2}:\\1#{Mx[:lv_c]}")
+ para.gsub!(/^#{Mx[:lv_o]}#{no3}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no3}:\\1#{Mx[:lv_c]}")
+ end
+ elsif @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4
+ if para =~/^#{Mx[:lv_o]}[1-9]:#{Mx[:lv_c]}([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d
+ name_num=$1
+ para.gsub!(/^#{Mx[:lv_o]}([1-9]:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{name_num}#{Mx[:lv_c]}")
+ end
+ if @md.toc_lev_limit
+ end
+ end
+ @tuned_file << para
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ def ocn(data) #and auto segment numbering increment
+ @tuned_file=[]
+ object_array=SiSU_document_structure::OCN.new(@md,data).ocn
+ object_array.each do |o|
+ @tuned_file <<= if o.ocn; "#{o.txt} #{Mx[:id_o]}~#{o.ocn};#{o.lv};#{o.type}#{Mx[:id_c]}" #main ocn descriptor
+ else o.txt
+ end
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ def minor_numbering(data) #and auto segment numbering increment
+ @tuned_file=[]
+ number_small,letter_small=0,0
+ letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z )
+ data.each do |para|
+ if para =~/\w|\S|<|\(/
+ if para !~/^%% |#{Rx[:meta]}|^0~|^#{Mx[:lv_o]}endnotes:|^4~endnotes|^<\/center>|<:ee>|<:e[:_]>|^\^~ |<:e[:_]\d+?>|^#{Mx[:fa_o]}:p[bn]#{Mx[:fa_c]}|^<:\#|<:- |<[:!]!4|#{Mx[:gr_o]}(?:alt|code|group|poem|table)#{Mx[:gr_c]}|^(?:alt|code|group|poem|table)\{|^\}(?:alt|code|group|poem|table)|^\}table$|<table|<\/table>|<td|<\/td>|<th|<\/th>|<tr>|<\/tr>|<hr width|<:4-endnotes>|\[endnotes\]|<:zz>|<:isbn-|<:journal-|<:conference-|#{Mx[:br_endnotes]}/i #ocn here #&nbsp; added with Tune.code #ยก
+ if para=~/^#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/; number_small,letter_small=0,0 #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later)
+ end
+ if para =~/^#[ 1]/
+ letter_small=0
+ number_small=0 if para =~ /^#1/
+ number_small+=1
+ para.gsub!(/^#[ 1]/,"#{number_small}. ") #change 2004
+ end
+ if para =~/^_# /
+ para.gsub!(/^_# /,"#{Mx[:fa_o]}:i1#{Mx[:fa_c]} #{letter[letter_small]}. ") #change 2004
+ letter_small+=1
+ end
+ end
+ end
+ @tuned_file << para
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ def name_para_seg_filename(data)
+ # paragraph name/numbering rules
+ # manual naming overrides, manual naming may be
+ # alpha-numeric characters mixed,
+ # numeric only (a number), if
+ # all segments have been named,
+ # the numbers used are over 1000 or
+ # it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented)
+ # [for now a warning is printed for such documents on use of maintenance or very-verbose flag]
+ # auto-naming takes the form of giving numbers to segments
+ # the rules for which are as follows
+ # if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.)
+ # otherwise the level 4 segment number from the embedded document structure info is used
+ # if there is none a sequential number is designated, preceded by an underscore
+ @tuned_file=[]
+ art_filename_auto=1
+ @counter=1
+ @unique_auto_name=[]
+ if not @md.seg_autoname_safe and @md.cmd =~/[MV]/
+ puts 'manual segment names, numbers used as names, risk warning (segmented html)'
+ end
+ data.each do |para|
+ para=SiSU_document_structure::Structure.new(@md,para).structure_markup
+ if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}/
+ if para=~/^#{Mx[:lv_o]}[4]:#{Mx[:lv_c]}/ \
+ and not @md.set_heading_seg
+ @md.set_heading_seg=true
+ end
+ if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}(?:\s*\S+)?\s+([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name
+ pattern=$1
+ pattern.gsub!(/(?:[:,-]|\W)/,'.')
+ pattern.gsub!(/\.$/,'')
+ if not @md.seg_names.nil? \
+ and not @md.seg_names.include?(pattern)
+ para.gsub!(/^#{Mx[:lv_o]}([456]):#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1:#{pattern}#{Mx[:lv_c]}")
+ @md.seg_names << pattern
+ else puts 'warn, there may be a conflicting numbering scheme' if @md.cmd =~/[VM]/
+ end
+ end
+ if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}.+?;4:(\d+);/m #extract segment name from embedded document structure info
+ pattern=$1
+ pattern.gsub!(/(?:[:,-]|\W)/,'.')
+ pattern.gsub!(/\.$/,'')
+ if not @md.seg_names.nil? \
+ and not @md.seg_names.include?(pattern)
+ para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{pattern}#{Mx[:lv_c]}")
+ @md.seg_names << pattern
+ else
+ para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1~#{pattern}#{Mx[:lv_c]}")
+ @md.seg_names << "~#{pattern}"
+ end
+ end
+ if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}/ #if still not segment name, provide a numerical one
+ if not @md.seg_names.nil? \
+ and not @md.seg_names.include?(art_filename_auto)
+ para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,%{#{Mx[:lv_o]}\\1~#{art_filename_auto}#{Mx[:lv_c]}})
+ @md.seg_names << art_filename_auto
+ else puts 'segment name (numbering) error'
+ end
+ art_filename_auto+=1
+ end
+ end
+ @tuned_file << if para =~/^#{Mx[:lv_o]}([1-6]):\S*?#{Mx[:lv_c]}/m \
+ and (@md.pagenew or @md.pagebreak)
+ m=$1 #watch ref~
+ para_tmp=[]
+ if @md.pagenew.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page_new]}\n" << para
+ elsif @md.pagebreak.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page]}\n" << para
+ end
+ para_result=unless para_tmp.length > 0; para
+ else para_tmp
+ end
+ else para
+ end
+ end
+ if @md.seg_names.length > 0
+ @md.set_heading_seg=true
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ def set_heading_top(data) #% make sure no false positives
+ unless @md.set_heading_top
+ puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.cmd =~/[MV]/
+ @tuned_file=[]
+ data.each do |para|
+ unless @md.set_heading_top
+ if para !~/^(?:#{Rx[:meta]}|@\S+:|0~\S+)\s/m \
+ and para !~/\A\s*\Z/m
+ @md.set_heading_top=true
+ head=if @md.title ; "#{Mx[:lv_o]}1:#{Mx[:lv_c]} #{@md.title}"
+ else "#{Mx[:lv_o]}1:#{Mx[:lv_c]} [no title provided]"
+ end
+ @tuned_file << head
+ end
+ end
+ @tuned_file << para
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ end
+ def set_heading_seg(data) #% make sure no false positives
+ unless @md.set_heading_seg
+ puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.cmd =~/[MV]/
+ @tuned_file=[]
+ data.each do |para|
+ unless @md.set_heading_seg
+ if para !~/^(?:#{Rx[:meta]}|#{Mx[:lv_o]}[123]:\S*?#{Mx[:lv_c]})/m \
+ and para !~/\A\s*\Z/m \
+ and para !~/#{Mx[:br_page]}|#{Mx[:br_page_new]}/
+ @md.set_heading_seg=true
+ head=if @md.title ; "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [#{@md.title}]"
+ else "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [segment]"
+ end
+ @tuned_file << head
+ end
+ end
+ @tuned_file << para
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ end
+ def set_header_title(data) #% make sure no false positives
+ unless @md.set_header_title
+ puts "\t no document title provided, (will have to manufacture one)" if @md.cmd =~/[MV]/
+ @tuned_file=[]
+ data.each do |para|
+ unless @md.set_header_title
+ if para !~/^%{1,2}\s/m \
+ and para !~/\A\s*\Z/m
+ @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}"
+ @md.title=@md.heading_seg_first
+ @md.set_header_title=true
+ end
+ end
+ @tuned_file << para
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ end
+ end
+end
+__END__