From 3655940354be602e6e69e1e8445d157bcb89c965 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sun, 7 Sep 2008 23:16:47 -0400 Subject: book index (markup adjustment, dal module); dal numbering moved to separate module; version date --- lib/sisu/v0/dal.rb | 335 +---------------------------------------------------- 1 file changed, 6 insertions(+), 329 deletions(-) (limited to 'lib/sisu/v0/dal.rb') diff --git a/lib/sisu/v0/dal.rb b/lib/sisu/v0/dal.rb index 25b7528e..aa4758b7 100644 --- a/lib/sisu/v0/dal.rb +++ b/lib/sisu/v0/dal.rb @@ -65,6 +65,8 @@ module SiSU_DAL require "#{SiSU_lib}/param" require "#{SiSU_lib}/dal_syntax" require "#{SiSU_lib}/dal_doc_str" + require "#{SiSU_lib}/dal_idx" + require "#{SiSU_lib}/dal_numbering" require "#{SiSU_lib}/i18n" require "#{SiSU_lib}/shared_sem" include SiSU_Env @@ -211,7 +213,8 @@ module SiSU_DAL data=character_check(data) data=images(data) data=SiSU_document_structure::Tables.new(@md,data).tables - data=numbering_song(data) #tr issue + data=SiSU_numbering::Numbering.new(@md,data).numbering_song + data=SiSU_book_index::Book_index.new(data).indexing_song if @md.book_index data=endnotes(data) data=object_digest(data) meta=metadata(data) @@ -449,7 +452,7 @@ module SiSU_DAL end def substitutions_and_insertions?(data) data_expand=[] - if data[0] =~ /^#!\s*(?:\/usr\/bin\/env sisu|\/usr\/bin\/sisu)/ # remove bang from top #! (however file is stripped, so will be removed provided no content preceeds it) + if data[0] =~ /^#!\s*(?:\/usr\/bin\/env sisu|\/usr\/bin\/sisu)/ # remove bang from top #! (however file is stripped, so will be removed provided no content precedes it) data[0].gsub!(/^#!\s*\/usr\/bin\/sisu/,'') data[0].gsub!(/^#!\s*\/usr\/bin\/env sisu/,'') end @@ -507,332 +510,6 @@ module SiSU_DAL end end end - def numbering_song(data) - data=number_plaintext_para(data) - data=name_endnote_seg(data) #tr issue - data=auto_number_heading_ie_title(data) #tr issue - data=ocn(data) #watch - data=minor_numbering(data) - data=name_para_seg_filename(data) - data=set_heading_seg(data) unless @md.set_heading_seg - data=set_heading_top(data) unless @md.set_heading_top - data=set_header_title(data) unless @md.set_header_title - data - end - def number_plaintext_para(data) - @tuned_file=[] - data.each do |para| - if para !~/#{Mx[:gr_o]}(?:code|group|alt|poem|verse)#{Mx[:gr_c]}|#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ - para.gsub!(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks - end - para.gsub!(/^/,"\n") unless para =~/#{Mx[:tc_p]}/u - para.gsub!(/^\s+|\s$/,"\n") - @tuned_file << para - end - @tuned_file=@tuned_file.flatten - end - def name_endnote_seg(data) - @tuned_file=[] - data.each do |para| - para.gsub!(/<:3>\s*<:ee>/, <<-WOK -#{@@endnote['special_align']}


\r -#{@@endnote['seg_name_3']}

-#{@@endnote['special_align_close']} - WOK - ) - para.gsub!(/<:2>\s*<:ee>/, <<-WOK -#{@@endnote['special_align']}


\r -#{@@endnote['seg_name_2']}

-#{@@endnote['special_align_close']} - WOK - ) - para.gsub!(/<:1>\s*<:ee>/, <<-WOK -#{@@endnote['special_align']}


\r -#{@@endnote['seg_name_1']}

-#{@@endnote['special_align_close']} - WOK - ) - @tuned_file << para - end - # debug 2003w46 adding revision control info - if @md.flag_auto_endnotes \ - and @md.flag_separate_endnotes_make - @tuned_file << "\n#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}Endnotes #{Mx[:gl_o]}-##{Mx[:gl_c]} #{Mx[:id_o]}~0;0:0;u0#{Mx[:id_c]}" - end - @tuned_file << "\n#{Mx[:br_endnotes]}" #DECIDE ON - @tuned_file=@tuned_file.flatten - end - def owner_details_seg - data << "#{Mx[:lv_o]}4:owner.details#{Mx[:lv_c]}Owner Details" - end - def number_sub_heading(para,num,title_no) - case para - when /#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/; para.gsub!(/#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/,"#{title_no} ") - when /^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/; para.gsub!(/^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/,"#{title_no} ") - when /^#{Mx[:lv_o]}#{num}:[a-z_\.]+#{Mx[:lv_c]}/ - para.gsub!(/^#{Mx[:lv_o]}#{num}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i,%{#{Mx[:lv_o]}#{num}:\\1#{Mx[:lv_c]} #{title_no} \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}}) - when /^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}\s*#{title_no}/ - para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]}") #where title contains title number - else para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]} #{title_no} ") #main, where title number is to be provided - end - if @md.toc_lev_limit \ - and @md.toc_lev_limit < num - para.gsub!(/^#{Mx[:lv_o]}[5-9]:\S*?#{Mx[:lv_c]}/,'!_ ') #bold line, watch - end - para - end - def auto_number_heading_ie_title(data) #also does some segment naming - @tuned_file=[] - if @md.markup =~/num_top/ \ - or @md.num_top # watch, 2003w23 - input="#{@md.markup}"[/num_top\=([1-6])/,1] if @md.markup - input||=@md.num_top if @md.num_top !~/^$/ - end - num_top=input.to_i - t_no1=t_no2=t_no3=t_no4=0 - no1=num_top; no2=(num_top + 1); no3=(num_top + 2); no4=(num_top + 3) - t_not=0 - data.each do |para| #@md.seg_names << [additions to segment names] - if (@md.markup =~/num_top/ \ - or (@md.num_top \ - and @md.num_top !~/^$/)) \ - and para !~/^#{Rx[:meta]}/ - if (para =~/^(?:#{no1}|^#{no2}|^#{no3}#{no4})~#/ \ - and para !~/^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}/) - t_not+=1 #; t_no2=0; t_no3=0 - para.gsub!(/^(#{Mx[:lv_o]}#{no1}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") - para.gsub!(/^(#{Mx[:lv_o]}#{no2}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") - para.gsub!(/^(#{Mx[:lv_o]}#{no3}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") - para.gsub!(/^(#{Mx[:lv_o]}#{no4}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2") - end - if para =~/#{Mx[:lv_o]}#{no1}:/ - @subnumber=1 - @subnumber=0 if para =~/#{Mx[:lv_o]}#{no1}:/ - end - if para =~/^#{Mx[:lv_o]}[1-6]:[\w-]*#{Mx[:lv_c]}/ \ - and para !~ /(?:#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}|#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^#{Mx[:lv_o]}[1-6]:[a-z_\.]+#{Mx[:lv_c]}\s*[\d.]+)\s/ \ - and para !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ - if para =~/^#{Mx[:lv_o]}#{no1}:/ - t_no1+=1; t_no2=0; t_no3=0 - title_no="#{t_no1}" - if not @md.seg_names.nil? \ - and not @md.seg_names.include?(title_no) - para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*(\S+)#/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]} \\1 #{title_no} ") #shift placement of auto-number to after first word, e.g. Article # not # Article, added on occasion of ABF (20040329) - para.gsub!(/^#{no1}\{\s+(Article|Clause|Section)\s+#/i,%{#{no1}~#{title_no} \\1 #{title_no}. }) - unless para =~/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review - para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]}#{title_no}. ") - end - @md.seg_names << title_no - #else puts "warning segment name #{title_no} already exists" - end - unless para =~/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required - para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i, - %{#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}#{title_no}. \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}}) - end - para.gsub!(/^#{Mx[:lv_o]}#{no1}:##{Mx[:lv_c]}/,"#{title_no}. ") #watch - para.gsub!(/^#{no1}~#\s*/,"#{title_no}. ") - end - if para =~/^#{Mx[:lv_o]}#{no2}:\S*?#{Mx[:lv_c]}/ - t_no2+=1; t_no3=0 - title_no="#{t_no1}.#{t_no2}" - para=number_sub_heading(para,no2,title_no) - end - if para =~/^#{Mx[:lv_o]}#{no3}:\S*?#{Mx[:lv_c]}/ - t_no3+=1 - title_no="#{t_no1}.#{t_no2}.#{t_no3}" - para=number_sub_heading(para,no3,title_no) - end - elsif para =~/^#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}/ # endnotes, watch2005 - para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}") #"#{no1}~\\1 ") - para.gsub!(/^#{Mx[:lv_o]}#{no2}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no2}:\\1#{Mx[:lv_c]}") - para.gsub!(/^#{Mx[:lv_o]}#{no3}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no3}:\\1#{Mx[:lv_c]}") - end - elsif @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4 - if para =~/^#{Mx[:lv_o]}[1-9]:#{Mx[:lv_c]}([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d - name_num=$1 - para.gsub!(/^#{Mx[:lv_o]}([1-9]:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{name_num}#{Mx[:lv_c]}") - end - if @md.toc_lev_limit - end - end - @tuned_file << para - end - @tuned_file=@tuned_file.flatten - end - def ocn(data) #and auto segment numbering increment - @tuned_file=[] - object_array=SiSU_document_structure::OCN.new(@md,data).ocn - object_array.each do |o| - @tuned_file <<= if o.ocn; "#{o.txt} #{Mx[:id_o]}~#{o.ocn};#{o.lv};#{o.type}#{Mx[:id_c]}" #main ocn descriptor - else o.txt - end - end - @tuned_file=@tuned_file.flatten - end - def minor_numbering(data) #and auto segment numbering increment - @tuned_file=[] - number_small,letter_small=0,0 - letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z ) - data.each do |para| - if para =~/\w|\S|<|\(/ - if para !~/^%% |#{Rx[:meta]}|^0~|^#{Mx[:lv_o]}endnotes:|^4~endnotes|^<\/center>|<:ee>|<:e[:_]>|^\^~ |<:e[:_]\d+?>|^#{Mx[:fa_o]}:p[bn]#{Mx[:fa_c]}|^<:\#|<:- |<[:!]!4|#{Mx[:gr_o]}(?:alt|code|group|poem|table)#{Mx[:gr_c]}|^(?:alt|code|group|poem|table)\{|^\}(?:alt|code|group|poem|table)|^\}table$|||||<\/tr>|


|\[endnotes\]|<:zz>|<:isbn-|<:journal-|<:conference-|#{Mx[:br_endnotes]}/i #ocn here #  added with Tune.code #ยก - if para=~/^#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/; number_small,letter_small=0,0 #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later) - end - if para =~/^#[ 1]/ - letter_small=0 - number_small=0 if para =~ /^#1/ - number_small+=1 - para.gsub!(/^#[ 1]/,"#{number_small}. ") #change 2004 - end - if para =~/^_# / - para.gsub!(/^_# /,"#{Mx[:fa_o]}:i1#{Mx[:fa_c]} #{letter[letter_small]}. ") #change 2004 - letter_small+=1 - end - end - end - @tuned_file << para - end - @tuned_file=@tuned_file.flatten - end - def name_para_seg_filename(data) - # paragraph name/numbering rules - # manual naming overrides, manual naming may be - # alpha-numeric characters mixed, - # numeric only (a number), if - # all segments have been named, - # the numbers used are over 1000 or - # it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented) - # [for now a warning is printed for such documents on use of maintenance or very-verbose flag] - # auto-naming takes the form of giving numbers to segments - # the rules for which are as follows - # if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.) - # otherwise the level 4 segment number from the embedded document structure info is used - # if there is none a sequential number is designated, preceded by an underscore - @tuned_file=[] - art_filename_auto=1 - @counter=1 - @unique_auto_name=[] - if not @md.seg_autoname_safe and @md.cmd =~/[MV]/ - puts 'manual segment names, numbers used as names, risk warning (segmented html)' - end - data.each do |para| - para=SiSU_document_structure::Structure.new(@md,para).structure_markup - if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}/ - if para=~/^#{Mx[:lv_o]}[4]:#{Mx[:lv_c]}/ \ - and not @md.set_heading_seg - @md.set_heading_seg=true - end - if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}(?:\s*\S+)?\s+([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name - pattern=$1 - pattern.gsub!(/(?:[:,-]|\W)/,'.') - pattern.gsub!(/\.$/,'') - if not @md.seg_names.nil? \ - and not @md.seg_names.include?(pattern) - para.gsub!(/^#{Mx[:lv_o]}([456]):#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1:#{pattern}#{Mx[:lv_c]}") - @md.seg_names << pattern - else puts 'warn, there may be a conflicting numbering scheme' if @md.cmd =~/[VM]/ - end - end - if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}.+?;4:(\d+);/m #extract segment name from embedded document structure info - pattern=$1 - pattern.gsub!(/(?:[:,-]|\W)/,'.') - pattern.gsub!(/\.$/,'') - if not @md.seg_names.nil? \ - and not @md.seg_names.include?(pattern) - para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{pattern}#{Mx[:lv_c]}") - @md.seg_names << pattern - else - para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1~#{pattern}#{Mx[:lv_c]}") - @md.seg_names << "~#{pattern}" - end - end - if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}/ #if still not segment name, provide a numerical one - if not @md.seg_names.nil? \ - and not @md.seg_names.include?(art_filename_auto) - para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,%{#{Mx[:lv_o]}\\1~#{art_filename_auto}#{Mx[:lv_c]}}) - @md.seg_names << art_filename_auto - else puts 'segment name (numbering) error' - end - art_filename_auto+=1 - end - end - @tuned_file << if para =~/^#{Mx[:lv_o]}([1-6]):\S*?#{Mx[:lv_c]}/m \ - and (@md.pagenew or @md.pagebreak) - m=$1 #watch ref~ - para_tmp=[] - if @md.pagenew.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page_new]}\n" << para - elsif @md.pagebreak.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page]}\n" << para - end - para_result=unless para_tmp.length > 0; para - else para_tmp - end - else para - end - end - if @md.seg_names.length > 0 - @md.set_heading_seg=true - end - @tuned_file=@tuned_file.flatten - end - def set_heading_top(data) #% make sure no false positives - unless @md.set_heading_top - puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.cmd =~/[MV]/ - @tuned_file=[] - data.each do |para| - unless @md.set_heading_top - if para !~/^(?:#{Rx[:meta]}|@\S+:|0~\S+)\s/m \ - and para !~/\A\s*\Z/m - @md.set_heading_top=true - head=if @md.title ; "#{Mx[:lv_o]}1:#{Mx[:lv_c]} #{@md.title}" - else "#{Mx[:lv_o]}1:#{Mx[:lv_c]} [no title provided]" - end - @tuned_file << head - end - end - @tuned_file << para - end - @tuned_file=@tuned_file.flatten - end - end - def set_heading_seg(data) #% make sure no false positives - unless @md.set_heading_seg - puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.cmd =~/[MV]/ - @tuned_file=[] - data.each do |para| - unless @md.set_heading_seg - if para !~/^(?:#{Rx[:meta]}|#{Mx[:lv_o]}[123]:\S*?#{Mx[:lv_c]})/m \ - and para !~/\A\s*\Z/m \ - and para !~/#{Mx[:br_page]}|#{Mx[:br_page_new]}/ - @md.set_heading_seg=true - head=if @md.title ; "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [#{@md.title}]" - else "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [segment]" - end - @tuned_file << head - end - end - @tuned_file << para - end - @tuned_file=@tuned_file.flatten - end - end - def set_header_title(data) #% make sure no false positives - unless @md.set_header_title - puts "\t no document title provided, (will have to manufacture one)" if @md.cmd =~/[MV]/ - @tuned_file=[] - data.each do |para| - unless @md.set_header_title - if para !~/^%{1,2}\s/m \ - and para !~/\A\s*\Z/m - @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}" - @md.title=@md.heading_seg_first - @md.set_header_title=true - end - end - @tuned_file << para - end - @tuned_file=@tuned_file.flatten - end - end def endnotes(data) @tuned_file=[] endnote_no,endnote_ref=1,1 @@ -1058,7 +735,7 @@ module SiSU_DAL para.gsub!(/\s*(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m,' \1') #watch para_plus_en=para.scan(/.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m) para_tail=if para =~/(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+([\s\S]+)/m - /(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+(.+?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]})/m.match(para)[1] + /(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+(.*?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]})/m.match(para)[1] else '' end para_plus_en << para_tail -- cgit v1.2.3