aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/sisu/v0/constants.rb1
-rw-r--r--lib/sisu/v0/dal.rb335
-rw-r--r--lib/sisu/v0/dal_idx.rb201
-rw-r--r--lib/sisu/v0/dal_numbering.rb374
-rw-r--r--lib/sisu/v0/dal_syntax.rb2
-rw-r--r--lib/sisu/v0/param.rb7
-rw-r--r--lib/sisu/v0/plaintext.rb2
7 files changed, 589 insertions, 333 deletions
diff --git a/lib/sisu/v0/constants.rb b/lib/sisu/v0/constants.rb
index f352ab7c..3f050df7 100644
--- a/lib/sisu/v0/constants.rb
+++ b/lib/sisu/v0/constants.rb
@@ -90,6 +90,7 @@ Mx[:gl_bullet]= "#{Mx[:gl_o]}●#{Mx[:gl_c]}"
#non substantive text sort: <-#> <~#>
Mx[:pa_non_object_dummy_heading]="#{Mx[:pa_o]}-##{Mx[:pa_c]}" #unnumbered paragraph, delete when not required [used in dummy headings, eg. for segmented html] (place marker at end of paragraph)
Mx[:pa_non_object_no_heading]="#{Mx[:pa_o]}~##{Mx[:pa_c]}" #unnumbered paragraph (place marker at end of paragraph)
+Mx[:idx_o]='▢ '; Mx[:idx_c]='▢ ' #
Mx[:nbsp]= '▭ '
Mx[:br_line]= "#{Mx[:mk_o]}br#{Mx[:mk_c]}"
Mx[:br_paragraph]= "#{Mx[:mk_o]}br#{Mx[:mk_c]}"
diff --git a/lib/sisu/v0/dal.rb b/lib/sisu/v0/dal.rb
index 25b7528e..aa4758b7 100644
--- a/lib/sisu/v0/dal.rb
+++ b/lib/sisu/v0/dal.rb
@@ -65,6 +65,8 @@ module SiSU_DAL
require "#{SiSU_lib}/param"
require "#{SiSU_lib}/dal_syntax"
require "#{SiSU_lib}/dal_doc_str"
+ require "#{SiSU_lib}/dal_idx"
+ require "#{SiSU_lib}/dal_numbering"
require "#{SiSU_lib}/i18n"
require "#{SiSU_lib}/shared_sem"
include SiSU_Env
@@ -211,7 +213,8 @@ module SiSU_DAL
data=character_check(data)
data=images(data)
data=SiSU_document_structure::Tables.new(@md,data).tables
- data=numbering_song(data) #tr issue
+ data=SiSU_numbering::Numbering.new(@md,data).numbering_song
+ data=SiSU_book_index::Book_index.new(data).indexing_song if @md.book_index
data=endnotes(data)
data=object_digest(data)
meta=metadata(data)
@@ -449,7 +452,7 @@ module SiSU_DAL
end
def substitutions_and_insertions?(data)
data_expand=[]
- if data[0] =~ /^#!\s*(?:\/usr\/bin\/env sisu|\/usr\/bin\/sisu)/ # remove bang from top #! (however file is stripped, so will be removed provided no content preceeds it)
+ if data[0] =~ /^#!\s*(?:\/usr\/bin\/env sisu|\/usr\/bin\/sisu)/ # remove bang from top #! (however file is stripped, so will be removed provided no content precedes it)
data[0].gsub!(/^#!\s*\/usr\/bin\/sisu/,'')
data[0].gsub!(/^#!\s*\/usr\/bin\/env sisu/,'')
end
@@ -507,332 +510,6 @@ module SiSU_DAL
end
end
end
- def numbering_song(data)
- data=number_plaintext_para(data)
- data=name_endnote_seg(data) #tr issue
- data=auto_number_heading_ie_title(data) #tr issue
- data=ocn(data) #watch
- data=minor_numbering(data)
- data=name_para_seg_filename(data)
- data=set_heading_seg(data) unless @md.set_heading_seg
- data=set_heading_top(data) unless @md.set_heading_top
- data=set_header_title(data) unless @md.set_header_title
- data
- end
- def number_plaintext_para(data)
- @tuned_file=[]
- data.each do |para|
- if para !~/#{Mx[:gr_o]}(?:code|group|alt|poem|verse)#{Mx[:gr_c]}|#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/
- para.gsub!(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks
- end
- para.gsub!(/^/,"\n") unless para =~/#{Mx[:tc_p]}/u
- para.gsub!(/^\s+|\s$/,"\n")
- @tuned_file << para
- end
- @tuned_file=@tuned_file.flatten
- end
- def name_endnote_seg(data)
- @tuned_file=[]
- data.each do |para|
- para.gsub!(/<:3>\s*<:ee>/, <<-WOK
-#{@@endnote['special_align']} <p /><br />\r
-#{@@endnote['seg_name_3']} <p />
-#{@@endnote['special_align_close']}
- WOK
- )
- para.gsub!(/<:2>\s*<:ee>/, <<-WOK
-#{@@endnote['special_align']} <p /><br />\r
-#{@@endnote['seg_name_2']} <p />
-#{@@endnote['special_align_close']}
- WOK
- )
- para.gsub!(/<:1>\s*<:ee>/, <<-WOK
-#{@@endnote['special_align']} <p /><br />\r
-#{@@endnote['seg_name_1']} <p />
-#{@@endnote['special_align_close']}
- WOK
- )
- @tuned_file << para
- end
- # debug 2003w46 adding revision control info
- if @md.flag_auto_endnotes \
- and @md.flag_separate_endnotes_make
- @tuned_file << "\n#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}Endnotes #{Mx[:gl_o]}-##{Mx[:gl_c]} #{Mx[:id_o]}~0;0:0;u0#{Mx[:id_c]}"
- end
- @tuned_file << "\n#{Mx[:br_endnotes]}" #DECIDE ON
- @tuned_file=@tuned_file.flatten
- end
- def owner_details_seg
- data << "#{Mx[:lv_o]}4:owner.details#{Mx[:lv_c]}Owner Details"
- end
- def number_sub_heading(para,num,title_no)
- case para
- when /#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/; para.gsub!(/#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/,"#{title_no} ")
- when /^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/; para.gsub!(/^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/,"#{title_no} ")
- when /^#{Mx[:lv_o]}#{num}:[a-z_\.]+#{Mx[:lv_c]}/
- para.gsub!(/^#{Mx[:lv_o]}#{num}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i,%{#{Mx[:lv_o]}#{num}:\\1#{Mx[:lv_c]} #{title_no} \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}})
- when /^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}\s*#{title_no}/
- para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]}") #where title contains title number
- else para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]} #{title_no} ") #main, where title number is to be provided
- end
- if @md.toc_lev_limit \
- and @md.toc_lev_limit < num
- para.gsub!(/^#{Mx[:lv_o]}[5-9]:\S*?#{Mx[:lv_c]}/,'!_ ') #bold line, watch
- end
- para
- end
- def auto_number_heading_ie_title(data) #also does some segment naming
- @tuned_file=[]
- if @md.markup =~/num_top/ \
- or @md.num_top # watch, 2003w23
- input="#{@md.markup}"[/num_top\=([1-6])/,1] if @md.markup
- input||=@md.num_top if @md.num_top !~/^$/
- end
- num_top=input.to_i
- t_no1=t_no2=t_no3=t_no4=0
- no1=num_top; no2=(num_top + 1); no3=(num_top + 2); no4=(num_top + 3)
- t_not=0
- data.each do |para| #@md.seg_names << [additions to segment names]
- if (@md.markup =~/num_top/ \
- or (@md.num_top \
- and @md.num_top !~/^$/)) \
- and para !~/^#{Rx[:meta]}/
- if (para =~/^(?:#{no1}|^#{no2}|^#{no3}#{no4})~#/ \
- and para !~/^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}/)
- t_not+=1 #; t_no2=0; t_no3=0
- para.gsub!(/^(#{Mx[:lv_o]}#{no1}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2")
- para.gsub!(/^(#{Mx[:lv_o]}#{no2}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2")
- para.gsub!(/^(#{Mx[:lv_o]}#{no3}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2")
- para.gsub!(/^(#{Mx[:lv_o]}#{no4}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2")
- end
- if para =~/#{Mx[:lv_o]}#{no1}:/
- @subnumber=1
- @subnumber=0 if para =~/#{Mx[:lv_o]}#{no1}:/
- end
- if para =~/^#{Mx[:lv_o]}[1-6]:[\w-]*#{Mx[:lv_c]}/ \
- and para !~ /(?:#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}|#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^#{Mx[:lv_o]}[1-6]:[a-z_\.]+#{Mx[:lv_c]}\s*[\d.]+)\s/ \
- and para !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/
- if para =~/^#{Mx[:lv_o]}#{no1}:/
- t_no1+=1; t_no2=0; t_no3=0
- title_no="#{t_no1}"
- if not @md.seg_names.nil? \
- and not @md.seg_names.include?(title_no)
- para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*(\S+)#/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]} \\1 #{title_no} ") #shift placement of auto-number to after first word, e.g. Article # not # Article, added on occasion of ABF (20040329)
- para.gsub!(/^#{no1}\{\s+(Article|Clause|Section)\s+#/i,%{#{no1}~#{title_no} \\1 #{title_no}. })
- unless para =~/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review
- para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]}#{title_no}. ")
- end
- @md.seg_names << title_no
- #else puts "warning segment name #{title_no} already exists"
- end
- unless para =~/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required
- para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i,
- %{#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}#{title_no}. \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}})
- end
- para.gsub!(/^#{Mx[:lv_o]}#{no1}:##{Mx[:lv_c]}/,"#{title_no}. ") #watch
- para.gsub!(/^#{no1}~#\s*/,"#{title_no}. ")
- end
- if para =~/^#{Mx[:lv_o]}#{no2}:\S*?#{Mx[:lv_c]}/
- t_no2+=1; t_no3=0
- title_no="#{t_no1}.#{t_no2}"
- para=number_sub_heading(para,no2,title_no)
- end
- if para =~/^#{Mx[:lv_o]}#{no3}:\S*?#{Mx[:lv_c]}/
- t_no3+=1
- title_no="#{t_no1}.#{t_no2}.#{t_no3}"
- para=number_sub_heading(para,no3,title_no)
- end
- elsif para =~/^#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}/ # endnotes, watch2005
- para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}") #"#{no1}~\\1 ")
- para.gsub!(/^#{Mx[:lv_o]}#{no2}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no2}:\\1#{Mx[:lv_c]}")
- para.gsub!(/^#{Mx[:lv_o]}#{no3}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no3}:\\1#{Mx[:lv_c]}")
- end
- elsif @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4
- if para =~/^#{Mx[:lv_o]}[1-9]:#{Mx[:lv_c]}([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d
- name_num=$1
- para.gsub!(/^#{Mx[:lv_o]}([1-9]:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{name_num}#{Mx[:lv_c]}")
- end
- if @md.toc_lev_limit
- end
- end
- @tuned_file << para
- end
- @tuned_file=@tuned_file.flatten
- end
- def ocn(data) #and auto segment numbering increment
- @tuned_file=[]
- object_array=SiSU_document_structure::OCN.new(@md,data).ocn
- object_array.each do |o|
- @tuned_file <<= if o.ocn; "#{o.txt} #{Mx[:id_o]}~#{o.ocn};#{o.lv};#{o.type}#{Mx[:id_c]}" #main ocn descriptor
- else o.txt
- end
- end
- @tuned_file=@tuned_file.flatten
- end
- def minor_numbering(data) #and auto segment numbering increment
- @tuned_file=[]
- number_small,letter_small=0,0
- letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z )
- data.each do |para|
- if para =~/\w|\S|<|\(/
- if para !~/^%% |#{Rx[:meta]}|^0~|^#{Mx[:lv_o]}endnotes:|^4~endnotes|^<\/center>|<:ee>|<:e[:_]>|^\^~ |<:e[:_]\d+?>|^#{Mx[:fa_o]}:p[bn]#{Mx[:fa_c]}|^<:\#|<:- |<[:!]!4|#{Mx[:gr_o]}(?:alt|code|group|poem|table)#{Mx[:gr_c]}|^(?:alt|code|group|poem|table)\{|^\}(?:alt|code|group|poem|table)|^\}table$|<table|<\/table>|<td|<\/td>|<th|<\/th>|<tr>|<\/tr>|<hr width|<:4-endnotes>|\[endnotes\]|<:zz>|<:isbn-|<:journal-|<:conference-|#{Mx[:br_endnotes]}/i #ocn here #&nbsp; added with Tune.code #¡
- if para=~/^#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/; number_small,letter_small=0,0 #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later)
- end
- if para =~/^#[ 1]/
- letter_small=0
- number_small=0 if para =~ /^#1/
- number_small+=1
- para.gsub!(/^#[ 1]/,"#{number_small}. ") #change 2004
- end
- if para =~/^_# /
- para.gsub!(/^_# /,"#{Mx[:fa_o]}:i1#{Mx[:fa_c]} #{letter[letter_small]}. ") #change 2004
- letter_small+=1
- end
- end
- end
- @tuned_file << para
- end
- @tuned_file=@tuned_file.flatten
- end
- def name_para_seg_filename(data)
- # paragraph name/numbering rules
- # manual naming overrides, manual naming may be
- # alpha-numeric characters mixed,
- # numeric only (a number), if
- # all segments have been named,
- # the numbers used are over 1000 or
- # it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented)
- # [for now a warning is printed for such documents on use of maintenance or very-verbose flag]
- # auto-naming takes the form of giving numbers to segments
- # the rules for which are as follows
- # if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.)
- # otherwise the level 4 segment number from the embedded document structure info is used
- # if there is none a sequential number is designated, preceded by an underscore
- @tuned_file=[]
- art_filename_auto=1
- @counter=1
- @unique_auto_name=[]
- if not @md.seg_autoname_safe and @md.cmd =~/[MV]/
- puts 'manual segment names, numbers used as names, risk warning (segmented html)'
- end
- data.each do |para|
- para=SiSU_document_structure::Structure.new(@md,para).structure_markup
- if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}/
- if para=~/^#{Mx[:lv_o]}[4]:#{Mx[:lv_c]}/ \
- and not @md.set_heading_seg
- @md.set_heading_seg=true
- end
- if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}(?:\s*\S+)?\s+([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name
- pattern=$1
- pattern.gsub!(/(?:[:,-]|\W)/,'.')
- pattern.gsub!(/\.$/,'')
- if not @md.seg_names.nil? \
- and not @md.seg_names.include?(pattern)
- para.gsub!(/^#{Mx[:lv_o]}([456]):#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1:#{pattern}#{Mx[:lv_c]}")
- @md.seg_names << pattern
- else puts 'warn, there may be a conflicting numbering scheme' if @md.cmd =~/[VM]/
- end
- end
- if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}.+?;4:(\d+);/m #extract segment name from embedded document structure info
- pattern=$1
- pattern.gsub!(/(?:[:,-]|\W)/,'.')
- pattern.gsub!(/\.$/,'')
- if not @md.seg_names.nil? \
- and not @md.seg_names.include?(pattern)
- para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{pattern}#{Mx[:lv_c]}")
- @md.seg_names << pattern
- else
- para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1~#{pattern}#{Mx[:lv_c]}")
- @md.seg_names << "~#{pattern}"
- end
- end
- if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}/ #if still not segment name, provide a numerical one
- if not @md.seg_names.nil? \
- and not @md.seg_names.include?(art_filename_auto)
- para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,%{#{Mx[:lv_o]}\\1~#{art_filename_auto}#{Mx[:lv_c]}})
- @md.seg_names << art_filename_auto
- else puts 'segment name (numbering) error'
- end
- art_filename_auto+=1
- end
- end
- @tuned_file << if para =~/^#{Mx[:lv_o]}([1-6]):\S*?#{Mx[:lv_c]}/m \
- and (@md.pagenew or @md.pagebreak)
- m=$1 #watch ref~
- para_tmp=[]
- if @md.pagenew.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page_new]}\n" << para
- elsif @md.pagebreak.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page]}\n" << para
- end
- para_result=unless para_tmp.length > 0; para
- else para_tmp
- end
- else para
- end
- end
- if @md.seg_names.length > 0
- @md.set_heading_seg=true
- end
- @tuned_file=@tuned_file.flatten
- end
- def set_heading_top(data) #% make sure no false positives
- unless @md.set_heading_top
- puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.cmd =~/[MV]/
- @tuned_file=[]
- data.each do |para|
- unless @md.set_heading_top
- if para !~/^(?:#{Rx[:meta]}|@\S+:|0~\S+)\s/m \
- and para !~/\A\s*\Z/m
- @md.set_heading_top=true
- head=if @md.title ; "#{Mx[:lv_o]}1:#{Mx[:lv_c]} #{@md.title}"
- else "#{Mx[:lv_o]}1:#{Mx[:lv_c]} [no title provided]"
- end
- @tuned_file << head
- end
- end
- @tuned_file << para
- end
- @tuned_file=@tuned_file.flatten
- end
- end
- def set_heading_seg(data) #% make sure no false positives
- unless @md.set_heading_seg
- puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.cmd =~/[MV]/
- @tuned_file=[]
- data.each do |para|
- unless @md.set_heading_seg
- if para !~/^(?:#{Rx[:meta]}|#{Mx[:lv_o]}[123]:\S*?#{Mx[:lv_c]})/m \
- and para !~/\A\s*\Z/m \
- and para !~/#{Mx[:br_page]}|#{Mx[:br_page_new]}/
- @md.set_heading_seg=true
- head=if @md.title ; "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [#{@md.title}]"
- else "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [segment]"
- end
- @tuned_file << head
- end
- end
- @tuned_file << para
- end
- @tuned_file=@tuned_file.flatten
- end
- end
- def set_header_title(data) #% make sure no false positives
- unless @md.set_header_title
- puts "\t no document title provided, (will have to manufacture one)" if @md.cmd =~/[MV]/
- @tuned_file=[]
- data.each do |para|
- unless @md.set_header_title
- if para !~/^%{1,2}\s/m \
- and para !~/\A\s*\Z/m
- @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}"
- @md.title=@md.heading_seg_first
- @md.set_header_title=true
- end
- end
- @tuned_file << para
- end
- @tuned_file=@tuned_file.flatten
- end
- end
def endnotes(data)
@tuned_file=[]
endnote_no,endnote_ref=1,1
@@ -1058,7 +735,7 @@ module SiSU_DAL
para.gsub!(/\s*(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m,' \1') #watch
para_plus_en=para.scan(/.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m)
para_tail=if para =~/(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+([\s\S]+)/m
- /(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+(.+?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]})/m.match(para)[1]
+ /(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+(.*?#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]})/m.match(para)[1]
else ''
end
para_plus_en << para_tail
diff --git a/lib/sisu/v0/dal_idx.rb b/lib/sisu/v0/dal_idx.rb
new file mode 100644
index 00000000..5e07396a
--- /dev/null
+++ b/lib/sisu/v0/dal_idx.rb
@@ -0,0 +1,201 @@
+# coding: utf-8
+=begin
+
+ * Name: SiSU
+
+ * Description: a framework for document structuring, publishing and search
+ #___#
+
+ * Author: Ralph Amissah
+
+ * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2007, 2008 Ralph Amissah All Rights Reserved.
+
+ * License: GPL 3 or later:
+
+ SiSU, a framework for document structuring, publishing and search
+
+ Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2007 Ralph Amissah
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see <http://www.gnu.org/licenses/>.
+
+ If you have Internet connection, the latest version of the GPL should be
+ available at these locations:
+ <http://www.fsf.org/licensing/licenses/gpl.html>
+ <http://www.gnu.org/copyleft/gpl.html>
+
+ <http://www.jus.uio.no/sisu/gpl.fsf/toc.html>
+ <http://www.jus.uio.no/sisu/gpl.fsf/doc.html>
+ <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt>
+
+ * SiSU uses:
+ * Standard SiSU markup syntax,
+ * Standard SiSU meta-markup syntax, and the
+ * Standard SiSU object citation numbering and system
+
+ * Hompages:
+ <http://www.jus.uio.no/sisu>
+ <http://www.sisudoc.org>
+
+ * Download:
+ <http://www.jus.uio.no/sisu/SiSU/download.html>
+
+ * Ralph Amissah
+ <ralph@amissah.com>
+ <ralph.amissah@gmail.com>
+
+ ** Description: system environment, resource control and configuration details
+
+=end
+module SiSU_book_index
+ class Book_index
+ def initialize(data)
+ @data=data
+ end
+ def indexing_song
+ @rgx_idx=/#{Mx[:idx_o]}(?:.+?)#{Mx[:idx_c]}\s*/
+ #@rgx_idx=/\s*#{Mx[:idx_o]}(?:.+?)#{Mx[:idx_c]}\s*/
+ @rgx_idx_ocn_init=/#{Mx[:idx_o]}(.+?)#{Mx[:idx_c]}\s*#{Mx[:id_o]}~(\d+)\S+?#{Mx[:id_c]}/
+ @rgx_idx_ocn=/(.+?)~(\d+)/
+ @data=extract_book_index(@data)
+ @data=clean_index(@data)
+ @data
+ end
+ def extract_book_index(data)
+ tuned_file=[]
+ idx_array=[]
+ data.each do |para|
+ idx_array << @rgx_idx_ocn_init.match(para)[0].gsub(@rgx_idx_ocn_init,'\1~\2') if para =~ @rgx_idx_ocn_init
+ tuned_file << para if para
+ end
+ idx_array.each do |i|
+ i.gsub!(@rgx_idx_ocn_init,'\1~\2')
+ end
+ idx_array=construct_idx_array(idx_array) if idx_array.length > 0
+ if idx_array.length > 0
+ the_idx=construct_book_index(idx_array)
+ screen_print(the_idx)
+ end
+ tuned_file
+ end
+ def construct_idx_array(idx_array)
+ idx_lst=[]
+ idx_array.each do |idx|
+ idx_list,ocn=@rgx_idx_ocn.match(idx)[1,2]
+ idx_lst <<=if idx_list =~/;/
+ g=idx_list.scan(/[^;]+/)
+ idxl=[]
+ g.each do |i|
+ idxl << { :rough_idx => i, :ocn => ocn }
+ end
+ idxl
+ else { :rough_idx => idx_list, :ocn => ocn }
+ end
+ end
+ idx_lst.flatten!
+ idx_lst
+ end
+ def construct_book_index(idx_array)
+ @the_idx={}
+ idx_array.each do |idx|
+ idx_lst=idx[:rough_idx].scan(/[^|:]+/)
+ if idx_lst[0] =~/.+?\+\d+/
+ use,plus=/(.+?)\+(\d+)/.match(idx_lst[0])[1,2]
+ else use=idx_lst[0]
+ end
+ @the_idx[use]={} unless @the_idx[use] and defined? @the_idx[use]
+ idx_lst.each do |i|
+ i.strip!
+ i,r=/(.+?)\+(\d+)/.match(i)[1,2] if i =~/.+?\+\d+/
+ x=if idx_lst.length == 1 or idx_lst[0] == i
+ @the_idx[use]['a1']=[] unless @the_idx[use]['a1'] and defined? @the_idx[use]['a1']
+ x=if r
+ @the_idx[use]['a1'] << { :ocn => idx[:ocn], :range => "#{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}" }
+ "#{i} #{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}"
+ else
+ @the_idx[use]['a1'] << { :ocn => idx[:ocn] }
+ "#{i} #{idx[:ocn]}"
+ end
+ else
+ @the_idx[use]['b1']={} unless @the_idx[use]['b1'] and defined? @the_idx[use]['b1']
+ @the_idx[use]['b1'][i]=[] unless @the_idx[use]['b1'][i] and defined? @the_idx[use]['b1'][i]
+ x=if r
+ @the_idx[use]['b1'][i] << { :ocn => idx[:ocn], :range => "#{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}" }
+ "#{idx_lst[0]}:#{i} #{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}"
+ else
+ @the_idx[use]['b1'][i] << { :ocn => idx[:ocn] }
+ "#{idx_lst[0]}:#{i} #{idx[:ocn]}"
+ end
+ end
+ end
+ end
+ the_idx=@the_idx.sort
+ #p the_idx; p '-----'
+ the_idx
+ end
+ def screen_print(the_idx)
+ the_idx.each do |i|
+ i.each do |x|
+ if x.class == String
+ print "\n" + x + ', '
+ elsif x.class == Array
+ p 'array error? -->'
+ print x
+ elsif x.class == Hash
+ if x['a1'].class == Array
+ x['a1'].each do |a|
+ if a[:range]
+ print a[:range] + ', '
+ elsif a[:ocn]
+ print a[:ocn] + ', '
+ else p 'error'
+ end
+ end
+ end
+ if x['b1']
+ m=x['b1']
+ m=m.sort
+ m.each do |k,y|
+ if k !~/a1/
+ print "\n\t" + k + ', '
+ #p y
+ y.each do |z|
+ if z[:range]
+ print z[:range] + ', '
+ elsif z[:ocn]
+ print z[:ocn] + ', '
+ else p 'error'
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+ def clean_index(data)
+ tuned_file=[]
+ data.each do |para|
+ para.gsub!(@rgx_idx,'')
+ tuned_file << para
+ end
+ tuned_file
+ end
+ end
+end
+
+__END__
+
diff --git a/lib/sisu/v0/dal_numbering.rb b/lib/sisu/v0/dal_numbering.rb
new file mode 100644
index 00000000..e14b87d7
--- /dev/null
+++ b/lib/sisu/v0/dal_numbering.rb
@@ -0,0 +1,374 @@
+# coding: utf-8
+=begin
+
+ * Name: SiSU
+
+ * Description: a framework for document structuring, publishing and search
+ #___#
+
+ * Author: Ralph Amissah
+
+ * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2007, 2008 Ralph Amissah All Rights Reserved.
+
+ * License: GPL 3 or later:
+
+ SiSU, a framework for document structuring, publishing and search
+
+ Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2007 Ralph Amissah
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see <http://www.gnu.org/licenses/>.
+
+ If you have Internet connection, the latest version of the GPL should be
+ available at these locations:
+ <http://www.fsf.org/licensing/licenses/gpl.html>
+ <http://www.gnu.org/copyleft/gpl.html>
+
+ <http://www.jus.uio.no/sisu/gpl.fsf/toc.html>
+ <http://www.jus.uio.no/sisu/gpl.fsf/doc.html>
+ <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt>
+
+ * SiSU uses:
+ * Standard SiSU markup syntax,
+ * Standard SiSU meta-markup syntax, and the
+ * Standard SiSU object citation numbering and system
+
+ * Hompages:
+ <http://www.jus.uio.no/sisu>
+ <http://www.sisudoc.org>
+
+ * Download:
+ <http://www.jus.uio.no/sisu/SiSU/download.html>
+
+ * Ralph Amissah
+ <ralph@amissah.com>
+ <ralph.amissah@gmail.com>
+
+ ** Description: system environment, resource control and configuration details
+
+=end
+module SiSU_numbering
+ class Numbering
+ def initialize(md,data)
+ @md,@data=md,data
+ end
+ def numbering_song
+ data=@data
+ data=number_plaintext_para(data)
+ #data=name_endnote_seg(data) #tr issue
+ data=auto_number_heading_ie_title(data) #tr issue
+ data=ocn(data) #watch
+ data=minor_numbering(data)
+ data=name_para_seg_filename(data)
+ data=set_heading_seg(data) unless @md.set_heading_seg
+ data=set_heading_top(data) unless @md.set_heading_top
+ data=set_header_title(data) unless @md.set_header_title
+ data
+ end
+ def number_plaintext_para(data)
+ @tuned_file=[]
+ data.each do |para|
+ if para !~/#{Mx[:gr_o]}(?:code|group|alt|poem|verse)#{Mx[:gr_c]}|#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/
+ para.gsub!(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks
+ end
+ para.gsub!(/^/,"\n") unless para =~/#{Mx[:tc_p]}/u
+ para.gsub!(/^\s+|\s$/,"\n")
+ @tuned_file << para
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ def name_endnote_seg(data)
+ @tuned_file=[]
+ if @md.flag_auto_endnotes \
+ and @md.flag_separate_endnotes_make
+ @tuned_file << "\n#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}Endnotes #{Mx[:gl_o]}-##{Mx[:gl_c]} #{Mx[:id_o]}~0;0:0;u0#{Mx[:id_c]}"
+ end
+ @tuned_file << "\n#{Mx[:br_endnotes]}" #DECIDE ON
+ @tuned_file=@tuned_file.flatten
+ end
+ def owner_details_seg
+ data << "#{Mx[:lv_o]}4:owner.details#{Mx[:lv_c]}Owner Details"
+ end
+ def number_sub_heading(para,num,title_no)
+ case para
+ when /#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/; para.gsub!(/#{Mx[:lv_o]}#{num}:-#{Mx[:lv_c]}/,"#{title_no} ")
+ when /^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/; para.gsub!(/^#{Mx[:lv_o]}#{num}:##{Mx[:lv_c]}/,"#{title_no} ")
+ when /^#{Mx[:lv_o]}#{num}:[a-z_\.]+#{Mx[:lv_c]}/
+ para.gsub!(/^#{Mx[:lv_o]}#{num}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i,%{#{Mx[:lv_o]}#{num}:\\1#{Mx[:lv_c]} #{title_no} \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}})
+ when /^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}\s*#{title_no}/
+ para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]}") #where title contains title number
+ else para.gsub!(/^#{Mx[:lv_o]}#{num}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{num}:#{title_no}#{Mx[:lv_c]} #{title_no} ") #main, where title number is to be provided
+ end
+ if @md.toc_lev_limit \
+ and @md.toc_lev_limit < num
+ para.gsub!(/^#{Mx[:lv_o]}[5-9]:\S*?#{Mx[:lv_c]}/,'!_ ') #bold line, watch
+ end
+ para
+ end
+ def auto_number_heading_ie_title(data) #also does some segment naming
+ @tuned_file=[]
+ if @md.markup =~/num_top/ \
+ or @md.num_top # watch, 2003w23
+ input="#{@md.markup}"[/num_top\=([1-6])/,1] if @md.markup
+ input||=@md.num_top if @md.num_top !~/^$/
+ end
+ num_top=input.to_i
+ t_no1=t_no2=t_no3=t_no4=0
+ no1=num_top; no2=(num_top + 1); no3=(num_top + 2); no4=(num_top + 3)
+ t_not=0
+ data.each do |para| #@md.seg_names << [additions to segment names]
+ if (@md.markup =~/num_top/ \
+ or (@md.num_top \
+ and @md.num_top !~/^$/)) \
+ and para !~/^#{Rx[:meta]}/
+ if (para =~/^(?:#{no1}|^#{no2}|^#{no3}#{no4})~#/ \
+ and para !~/^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}/)
+ t_not+=1 #; t_no2=0; t_no3=0
+ para.gsub!(/^(#{Mx[:lv_o]}#{no1}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2")
+ para.gsub!(/^(#{Mx[:lv_o]}#{no2}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2")
+ para.gsub!(/^(#{Mx[:lv_o]}#{no3}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2")
+ para.gsub!(/^(#{Mx[:lv_o]}#{no4}):#(#{Mx[:lv_c]})/,"\\1:ps#{t_not}\\2")
+ end
+ if para =~/#{Mx[:lv_o]}#{no1}:/
+ @subnumber=1
+ @subnumber=0 if para =~/#{Mx[:lv_o]}#{no1}:/
+ end
+ if para =~/^#{Mx[:lv_o]}[1-6]:[\w-]*#{Mx[:lv_c]}/ \
+ and para !~ /(?:#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}|#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^#{Mx[:lv_o]}[1-6]:[a-z_\.]+#{Mx[:lv_c]}\s*[\d.]+)\s/ \
+ and para !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/
+ if para =~/^#{Mx[:lv_o]}#{no1}:/
+ t_no1+=1; t_no2=0; t_no3=0
+ title_no="#{t_no1}"
+ if not @md.seg_names.nil? \
+ and not @md.seg_names.include?(title_no)
+ para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*(\S+)#/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]} \\1 #{title_no} ") #shift placement of auto-number to after first word, e.g. Article # not # Article, added on occasion of ABF (20040329)
+ para.gsub!(/^#{no1}\{\s+(Article|Clause|Section)\s+#/i,%{#{no1}~#{title_no} \\1 #{title_no}. })
+ unless para =~/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}\s*[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review
+ para.gsub!(/^#{Mx[:lv_o]}#{no1}:#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:#{title_no}#{Mx[:lv_c]}#{title_no}. ")
+ end
+ @md.seg_names << title_no
+ #else puts "warning segment name #{title_no} already exists"
+ end
+ unless para =~/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required
+ para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)#{Mx[:lv_c]}\s*(.+)/i,
+ %{#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}#{title_no}. \\2 #{Mx[:fa_o]}:name##{title_no}#{Mx[:fa_c]}})
+ end
+ para.gsub!(/^#{Mx[:lv_o]}#{no1}:##{Mx[:lv_c]}/,"#{title_no}. ") #watch
+ para.gsub!(/^#{no1}~#\s*/,"#{title_no}. ")
+ end
+ if para =~/^#{Mx[:lv_o]}#{no2}:\S*?#{Mx[:lv_c]}/
+ t_no2+=1; t_no3=0
+ title_no="#{t_no1}.#{t_no2}"
+ para=number_sub_heading(para,no2,title_no)
+ end
+ if para =~/^#{Mx[:lv_o]}#{no3}:\S*?#{Mx[:lv_c]}/
+ t_no3+=1
+ title_no="#{t_no1}.#{t_no2}.#{t_no3}"
+ para=number_sub_heading(para,no3,title_no)
+ end
+ elsif para =~/^#{Mx[:lv_o]}[1-6]:[\w-]+-#{Mx[:lv_c]}/ # endnotes, watch2005
+ para.gsub!(/^#{Mx[:lv_o]}#{no1}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no1}:\\1#{Mx[:lv_c]}") #"#{no1}~\\1 ")
+ para.gsub!(/^#{Mx[:lv_o]}#{no2}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no2}:\\1#{Mx[:lv_c]}")
+ para.gsub!(/^#{Mx[:lv_o]}#{no3}:([a-z_\.]+)-#{Mx[:lv_c]}/,"#{Mx[:lv_o]}#{no3}:\\1#{Mx[:lv_c]}")
+ end
+ elsif @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4
+ if para =~/^#{Mx[:lv_o]}[1-9]:#{Mx[:lv_c]}([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d
+ name_num=$1
+ para.gsub!(/^#{Mx[:lv_o]}([1-9]:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{name_num}#{Mx[:lv_c]}")
+ end
+ if @md.toc_lev_limit
+ end
+ end
+ @tuned_file << para
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ def ocn(data) #and auto segment numbering increment
+ @tuned_file=[]
+ object_array=SiSU_document_structure::OCN.new(@md,data).ocn
+ object_array.each do |o|
+ @tuned_file <<= if o.ocn; "#{o.txt} #{Mx[:id_o]}~#{o.ocn};#{o.lv};#{o.type}#{Mx[:id_c]}" #main ocn descriptor
+ else o.txt
+ end
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ def minor_numbering(data) #and auto segment numbering increment
+ @tuned_file=[]
+ number_small,letter_small=0,0
+ letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z )
+ data.each do |para|
+ if para =~/\w|\S|<|\(/
+ if para !~/^%% |#{Rx[:meta]}|^0~|^#{Mx[:lv_o]}endnotes:|^4~endnotes|^<\/center>|<:ee>|<:e[:_]>|^\^~ |<:e[:_]\d+?>|^#{Mx[:fa_o]}:p[bn]#{Mx[:fa_c]}|^<:\#|<:- |<[:!]!4|#{Mx[:gr_o]}(?:alt|code|group|poem|table)#{Mx[:gr_c]}|^(?:alt|code|group|poem|table)\{|^\}(?:alt|code|group|poem|table)|^\}table$|<table|<\/table>|<td|<\/td>|<th|<\/th>|<tr>|<\/tr>|<hr width|<:4-endnotes>|\[endnotes\]|<:zz>|<:isbn-|<:journal-|<:conference-|#{Mx[:br_endnotes]}/i #ocn here #&nbsp; added with Tune.code #¡
+ if para=~/^#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/; number_small,letter_small=0,0 #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later)
+ end
+ if para =~/^#[ 1]/
+ letter_small=0
+ number_small=0 if para =~ /^#1/
+ number_small+=1
+ para.gsub!(/^#[ 1]/,"#{number_small}. ") #change 2004
+ end
+ if para =~/^_# /
+ para.gsub!(/^_# /,"#{Mx[:fa_o]}:i1#{Mx[:fa_c]} #{letter[letter_small]}. ") #change 2004
+ letter_small+=1
+ end
+ end
+ end
+ @tuned_file << para
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ def name_para_seg_filename(data)
+ # paragraph name/numbering rules
+ # manual naming overrides, manual naming may be
+ # alpha-numeric characters mixed,
+ # numeric only (a number), if
+ # all segments have been named,
+ # the numbers used are over 1000 or
+ # it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented)
+ # [for now a warning is printed for such documents on use of maintenance or very-verbose flag]
+ # auto-naming takes the form of giving numbers to segments
+ # the rules for which are as follows
+ # if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.)
+ # otherwise the level 4 segment number from the embedded document structure info is used
+ # if there is none a sequential number is designated, preceded by an underscore
+ @tuned_file=[]
+ art_filename_auto=1
+ @counter=1
+ @unique_auto_name=[]
+ if not @md.seg_autoname_safe and @md.cmd =~/[MV]/
+ puts 'manual segment names, numbers used as names, risk warning (segmented html)'
+ end
+ data.each do |para|
+ para=SiSU_document_structure::Structure.new(@md,para).structure_markup
+ if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}/
+ if para=~/^#{Mx[:lv_o]}[4]:#{Mx[:lv_c]}/ \
+ and not @md.set_heading_seg
+ @md.set_heading_seg=true
+ end
+ if para =~/^#{Mx[:lv_o]}[456]:#{Mx[:lv_c]}(?:\s*\S+)?\s+([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name
+ pattern=$1
+ pattern.gsub!(/(?:[:,-]|\W)/,'.')
+ pattern.gsub!(/\.$/,'')
+ if not @md.seg_names.nil? \
+ and not @md.seg_names.include?(pattern)
+ para.gsub!(/^#{Mx[:lv_o]}([456]):#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1:#{pattern}#{Mx[:lv_c]}")
+ @md.seg_names << pattern
+ else puts 'warn, there may be a conflicting numbering scheme' if @md.cmd =~/[VM]/
+ end
+ end
+ if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}.+?;4:(\d+);/m #extract segment name from embedded document structure info
+ pattern=$1
+ pattern.gsub!(/(?:[:,-]|\W)/,'.')
+ pattern.gsub!(/\.$/,'')
+ if not @md.seg_names.nil? \
+ and not @md.seg_names.include?(pattern)
+ para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1#{pattern}#{Mx[:lv_c]}")
+ @md.seg_names << pattern
+ else
+ para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,"#{Mx[:lv_o]}\\1~#{pattern}#{Mx[:lv_c]}")
+ @md.seg_names << "~#{pattern}"
+ end
+ end
+ if para =~/^#{Mx[:lv_o]}4:#{Mx[:lv_c]}/ #if still not segment name, provide a numerical one
+ if not @md.seg_names.nil? \
+ and not @md.seg_names.include?(art_filename_auto)
+ para.gsub!(/^#{Mx[:lv_o]}(4:)#{Mx[:lv_c]}/,%{#{Mx[:lv_o]}\\1~#{art_filename_auto}#{Mx[:lv_c]}})
+ @md.seg_names << art_filename_auto
+ else puts 'segment name (numbering) error'
+ end
+ art_filename_auto+=1
+ end
+ end
+ @tuned_file << if para =~/^#{Mx[:lv_o]}([1-6]):\S*?#{Mx[:lv_c]}/m \
+ and (@md.pagenew or @md.pagebreak)
+ m=$1 #watch ref~
+ para_tmp=[]
+ if @md.pagenew.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page_new]}\n" << para
+ elsif @md.pagebreak.inspect =~/#{m}/; para_tmp << "#{Mx[:br_page]}\n" << para
+ end
+ para_result=unless para_tmp.length > 0; para
+ else para_tmp
+ end
+ else para
+ end
+ end
+ if @md.seg_names.length > 0
+ @md.set_heading_seg=true
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ def set_heading_top(data) #% make sure no false positives
+ unless @md.set_heading_top
+ puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.cmd =~/[MV]/
+ @tuned_file=[]
+ data.each do |para|
+ unless @md.set_heading_top
+ if para !~/^(?:#{Rx[:meta]}|@\S+:|0~\S+)\s/m \
+ and para !~/\A\s*\Z/m
+ @md.set_heading_top=true
+ head=if @md.title ; "#{Mx[:lv_o]}1:#{Mx[:lv_c]} #{@md.title}"
+ else "#{Mx[:lv_o]}1:#{Mx[:lv_c]} [no title provided]"
+ end
+ @tuned_file << head
+ end
+ end
+ @tuned_file << para
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ end
+ def set_heading_seg(data) #% make sure no false positives
+ unless @md.set_heading_seg
+ puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.cmd =~/[MV]/
+ @tuned_file=[]
+ data.each do |para|
+ unless @md.set_heading_seg
+ if para !~/^(?:#{Rx[:meta]}|#{Mx[:lv_o]}[123]:\S*?#{Mx[:lv_c]})/m \
+ and para !~/\A\s*\Z/m \
+ and para !~/#{Mx[:br_page]}|#{Mx[:br_page_new]}/
+ @md.set_heading_seg=true
+ head=if @md.title ; "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [#{@md.title}]"
+ else "#{Mx[:lv_o]}4:seg#{Mx[:lv_c]} [segment]"
+ end
+ @tuned_file << head
+ end
+ end
+ @tuned_file << para
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ end
+ def set_header_title(data) #% make sure no false positives
+ unless @md.set_header_title
+ puts "\t no document title provided, (will have to manufacture one)" if @md.cmd =~/[MV]/
+ @tuned_file=[]
+ data.each do |para|
+ unless @md.set_header_title
+ if para !~/^%{1,2}\s/m \
+ and para !~/\A\s*\Z/m
+ @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}"
+ @md.title=@md.heading_seg_first
+ @md.set_header_title=true
+ end
+ end
+ @tuned_file << para
+ end
+ @tuned_file=@tuned_file.flatten
+ end
+ end
+ end
+end
+__END__
diff --git a/lib/sisu/v0/dal_syntax.rb b/lib/sisu/v0/dal_syntax.rb
index acdec0e4..80635f36 100644
--- a/lib/sisu/v0/dal_syntax.rb
+++ b/lib/sisu/v0/dal_syntax.rb
@@ -326,7 +326,7 @@ module SiSU_Syntax
line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>|\d+)\^(\S+?)\^/,"\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") #superscript single word, watch digit added
line.gsub!(/<[:e]\s+(.+?)!?>/,"#{Mx[:en_a_o]}\\1#{Mx[:en_a_c]}") #not tested
line.gsub!(/^\s*_\*\s*/,"#{Mx[:gl_bullet]}") #bullets, shortcut
- #line.gsub!(/^\s*_(\*+)\s*/,"#{Mx[:gl_bullet]}") #bullets, shortcut
+ line.gsub!(/=\{(.+?)\}/,"#{Mx[:idx_o]}\\1#{Mx[:idx_c]}") #
line.gsub!(/^\s*_([1-9])\*\s*/,"#{Mx[:pa_o]}:i\\1#{Mx[:pa_c]}#{Mx[:gl_bullet]}") #bullets, shortcut
#line.gsub!(/^\s*_([1-9])(\*+)\s*/,"#{Mx[:fa_o]}:i\\1#{Mx[:fa_c]}#{Mx[:fa_o]}\\2#{Mx[:fa_c_o]}") #bullets, shortcut
line.gsub!(/^\s*_([1-9])\s+/,"#{Mx[:pa_o]}:i\\1#{Mx[:pa_c]}") #indent
diff --git a/lib/sisu/v0/param.rb b/lib/sisu/v0/param.rb
index b211f5c1..7994487d 100644
--- a/lib/sisu/v0/param.rb
+++ b/lib/sisu/v0/param.rb
@@ -119,13 +119,13 @@ module SiSU_Param
@doc={ :lv=>[] }
@doc[:fns],@doc[:fnb],@doc[:scr_suffix]='','',''
@@publisher='SiSU scribe'
- attr_accessor :cmd,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:sfx,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:dc_title,:html_title,:subtitle,:subtitle_tex,:creator_home,:dc_creator,:translator,:illustrator,:prepared_by,:digitized_by,:dc_subject,:dc_description,:dc_publisher,:dc_contributor,:dc_date,:dc_date_created,:dc_date_issued,:dc_date_available,:dc_date_valid,:dc_date_modified,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:dc_type,:dc_format,:dc_identifier,:dc_source,:dc_language,:language_original,:dc_relation,:dc_coverage,:dc_rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:flag_auto_heading_num,:markup,:markup_instruction,:markup_version,:markup_declared,:make_bold,:make_italic,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:file_size,:user,:home,:hostname,:pwd,:firstseg,:programs,:creator_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:man_section,:man_name,:man_synopsis,:ec,:opt,:sem_tag
+ attr_accessor :cmd,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:sfx,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:dc_title,:html_title,:subtitle,:subtitle_tex,:creator_home,:dc_creator,:translator,:illustrator,:prepared_by,:digitized_by,:dc_subject,:dc_description,:dc_publisher,:dc_contributor,:dc_date,:dc_date_created,:dc_date_issued,:dc_date_available,:dc_date_valid,:dc_date_modified,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:dc_type,:dc_format,:dc_identifier,:dc_source,:dc_language,:language_original,:dc_relation,:dc_coverage,:dc_rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:flag_auto_heading_num,:markup,:markup_instruction,:markup_version,:markup_declared,:make_bold,:make_italic,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:file_size,:user,:home,:hostname,:pwd,:firstseg,:programs,:creator_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:man_section,:man_name,:man_synopsis,:ec,:opt,:sem_tag,:book_index
def initialize(fns_array,opt)
@env=@fn=@fns=@fnb=@fnn=@fnt=@fnl=@flv=@fnz=@fnstex=@ocn=@sfx_src=@sfx=@pdf=@file_type=@dir_out=@dir_tex=@dir_lout=@txt_path=@flag_endnotes=@flag_auto_endnotes=@flag_separate_endnotes=@flag_separate_endnotes_make=@site_skin=@sisu=@sisu_version=@ruby_version=@title=@dc_title=@html_title=@subtitle=@subtitle_tex=@creator_home=@dc_creator=@translator=@illustrator=@prepared_by=@digitized_by=@dc_subject=@dc_description=@dc_publisher=@dc_contributor=@dc_date=@dc_date_created=@dc_date_issued=@dc_date_available=@dc_date_valid=@dc_date_modified=@date_scheme=@date_created_scheme=@date_issued_scheme=@date_available_scheme=@date_valid_scheme=@date_modified_scheme=@dc_type=@dc_format=@dc_identifier=@dc_source=@dc_language=@language_original=@dc_relation=@dc_coverage=@dc_rights=@keywords=@comments=@abstract=@cls_loc=@cls_dewey=@cls_pg=@cls_isbn=@papersize=@toc=@lv1=@lv2=@lv3=@lv4=@lv5=@lv6=@pagenew=@pagebreak=@num_top=@toc_lev_limit=@flag_auto_heading_num=@make_bold=@make_italic=@flag_tables=@vocabulary=@doc_skin=@doc_css=@yaml=@lnk=@prefix_a=@prefix_b=@suffix=@information=@contact=@icon=@ad_url=@ad_png=@ad_alt=@ad_began=@promo=@ad_home=@stmp=@stmpd=@sc_filename=@sc_number=@sc_date=@sc_time=@sc_info=@yamladdr=@locale=@wc_lines=@wc_words=@wc_bytes=@file_encoding=@file_size=@firstseg=@programs=@creator_copymark=@lang=@en=@dgst=@dgst_skin=@generated=@heading_seg_first=@base_program=@man_synopsis=nil
@man_section=1
@man_name='man page "name/whatis" information not provided, set in header @man: name=[whatis information]'
@data,@fns,@cmd,@mod,@opt=fns_array,opt.fns,opt.cmd,opt.mod,opt #@data used as data
- @flag_tables,@set_header_title,@set_heading_top,@set_heading_seg,@heading_seg_first_flag,@flag_promo=false,false,false,false,false,false
+ @flag_tables,@set_header_title,@set_heading_top,@set_heading_seg,@heading_seg_first_flag,@flag_promo,@book_index=false,false,false,false,false,false,false
@seg_autoname_safe=true
@sem_tag=false
@markup_instruction,@markup_declared,@image='','','' #check which other values should be set to empty rather than nil
@@ -576,6 +576,9 @@ module SiSU_Param
end
end
end
+ if not @book_index and para =~/^=\{(.+?)\}\s*$/
+ @book_index=true
+ end
unless @code_flag
case para
when /~\{\s+.+?\}~/m #% processing
diff --git a/lib/sisu/v0/plaintext.rb b/lib/sisu/v0/plaintext.rb
index 4dd808fc..fb4b1115 100644
--- a/lib/sisu/v0/plaintext.rb
+++ b/lib/sisu/v0/plaintext.rb
@@ -321,7 +321,7 @@ WOK
para.gsub!(/(^|#{Mx[:gl_c]}|\s)[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3')
para.gsub!(/<a href=".+?">(.+?)<\/a>/m,'\1')
para.gsub!(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'') # remove name links
- para.gsub!(/&nbsp;/,' ') # decide on
+ para.gsub!(/&nbsp;|#{Mx[:nbsp]}/,' ') # decide on
para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]")
para.gsub!(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]')
#para.gsub!(/^\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]')