diff options
Diffstat (limited to 'lib/sisu/develop/ao_doc_str.rb')
-rw-r--r-- | lib/sisu/develop/ao_doc_str.rb | 2165 |
1 files changed, 2165 insertions, 0 deletions
diff --git a/lib/sisu/develop/ao_doc_str.rb b/lib/sisu/develop/ao_doc_str.rb new file mode 100644 index 00000000..ae029002 --- /dev/null +++ b/lib/sisu/develop/ao_doc_str.rb @@ -0,0 +1,2165 @@ +# encoding: utf-8 +=begin + +* Name: SiSU + +** Description: documents, structuring, processing, publishing, search +*** document abstraction + +** Author: Ralph Amissah + <ralph@amissah.com> + <ralph.amissah@gmail.com> + +** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah, + All Rights Reserved. + +** License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see <http://www.gnu.org/licenses/>. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + <http://www.fsf.org/licensing/licenses/gpl.html> + <http://www.gnu.org/licenses/gpl.html> + + <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html> + +** SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + +** Hompages: + <http://www.jus.uio.no/sisu> + <http://www.sisudoc.org> + +** Git + <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary> + <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/develop/ao_doc_str.rb;hb=HEAD> + +=end +module SiSU_AO_DocumentStructureExtract + class Instantiate < SiSU_Param::Parameters::Instructions + @@flag={ + ocn: :on, + code: :off, + lngsyn: :txt, + poem: :off, + block: :off, + box: :off, + group: :off, + alt: :off, + quote: :off, + table: :off, + table_to: :off, + } + def initialize + @@counter=@@column=@@columns=0 + @@line_mode='' + end + end + class Build + @@flag={ + ocn: :on, + code: :off, + lngsyn: :txt, + poem: :off, + block: :off, + box: :off, + group: :off, + alt: :off, + quote: :off, + table: :off, + table_to: :off, + } + def initialize(md,data) + @md,@data=md,data + SiSU_AO_DocumentStructureExtract::Instantiate.new + @pb=SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page]) + @pbn=SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new]) + @pbl=SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_line]) + end + def ln_get(lv) + case lv + when /A/ then 0 + when /B/ then 1 + when /C/ then 2 + when /D/ then 3 + when /1/ then 4 + when /2/ then 5 + when /3/ then 6 + when /4/ then 7 + when /5/ then 8 + when /6/ then 9 + end + end + def image_test(str) + str=~/\{\s*\S+?\.png.+?\}https?:\/\/\S+/ \ + ? true + : false + end + def bullet_test(str) + (str=~/\*/) \ + ? true + : false + end + def quotes? + @@flag[:quote]==:open \ + ? true + : false + end + def hang_and_indent_test(str) + hang_indent=if str=~/^_([1-9])[^_]/ + [$1,$1] + elsif str=~/^__([1-9])/ + [0,$1] + elsif str=~/^_([0-9])_([0-9])/ + [$1,$2] + else + [0,0] + end + hang,indent=hang_indent[0],hang_indent[1] + [hang,indent] + end + def hang_and_indent_def_test(str1,str2) + hang_indent=if str1=~/^_([1-9])[^_]/ + [$1,$1] + elsif str1=~/^__([1-9])/ + [0,$1] + elsif str1=~/^_([0-9])_([0-9])/ + [$1,$2] + else + [0,0] + end + obj=if str2 =~/^(.+?)\s+\\\\(?:\s+|\n)/ + str2.gsub(/^(.+?)(\s+\\\\(?:\s+|\n))/, + "#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}\\2") + else + str2.gsub(/^(.+?)\n/, + "#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}\n") + end + hang,indent=hang_indent[0],hang_indent[1] + [ + hang, + indent, + obj, + ] + end + def endnote_test?(str) + (str=~/~\{.+?\}~|~\[.+?\]~/) \ + ? true + : false + end + def extract_tags(str,nametag=nil) + tags=[] + if str.nil? + else + if str =~/(?:^|[ ])\*~([a-z0-9._-]+)(?=[ #{Mx[:br_nl]}]|$)/ + str=str.gsub(/(^|[ ])\*~([a-z0-9._-]+)(?=[ #{Mx[:br_nl]}]|$)/i, + "\\1#{Mx[:tag_o]}\\2#{Mx[:tag_c]}"). + gsub(/ [ ]+/i,' ') + tags=str.scan(/#{Mx[:tag_o]}(\S+?)#{Mx[:tag_c]}/).flatten.uniq + str=str.gsub(/[ ]?#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}[ ]?/,' ') #may be issues with spaces would leave one, but "code" blocks? + end + tags=nametag ? (tags << nametag) : tags + tags.each do |t| + t.gsub!(/[^a-z0-9._-]/,'') + end + end + [ + str, + tags, + ] + end + def rgx_idx_ocn_seg + @rgx_idx_ocn_seg=/(.+?)\s*[+](\d+)/ + end + def construct_idx_array_and_hash(idxraw) + idx_array_raw=idxraw.scan(/[^;]+/) + idx_hash,idx_array,idx_lst={},[],[] + idx_array_raw.each do |idx| + idx=idx.strip + idx_lst=case idx + when /\S+?\s*:/ + idx_couplet_tmp=[] + idx_couplet=idx.scan(/\s*[^:]+\s*/) + if idx_couplet[1] =~/[|]/ + idx_couplet_tmp << + idx_couplet[0] << + idx_couplet[1].scan(/\s*[^|]+\s*/) + else + idx_couplet_tmp << + idx_couplet[0] << + [idx_couplet[1]] + end + idx_couplet=idx_couplet_tmp + else [idx] + end + term_nodes=[] + idx_lst.each do |term_node| + case term_node + when String + term_node= + term_node[0].chr.capitalize + + term_node[1,term_node.length] + term_node=(term_node =~/.+?[+]\d+/) \ + ? term_node + : (term_node + '+0') + term_nodes << term_node + use,plus=rgx_idx_ocn_seg.match(term_node)[1,2] + @use=use.strip + unless idx_hash[@use] \ + and defined? idx_hash[@use] + idx_hash[@use]= + { sub: [], plus: plus } + end + when Array + subterm_nodes=[] + term_node.each do |subterm_node| + subterm_node=(subterm_node =~/.+?[+]\d+/) \ + ? subterm_node + : (subterm_node + '+0') + subterm_nodes << subterm_node + sub,sub_plus=rgx_idx_ocn_seg.match(subterm_node)[1,2] + unless idx_hash[@use] \ + and defined? idx_hash[@use] + idx_hash[@use]= + { sub: [], plus: 0 } + end + idx_hash[@use][:sub] << + { sub.strip => { plus: sub_plus } } + end + term_nodes << subterm_nodes + end + end + idx_array << term_nodes + end + { + hash: idx_hash, + array: idx_array, + } + end + def identify_parts + tuned_file=[] + @tuned_block,@tuned_code=[],[] + @@counter,@verse_count=0,0 + @num_id={ + code_block: 0, + poem: 0, + box: 0, + group: 0, + alt: 0, + quote: 0, + table: 0, + } + @metadata={} + @data.each do |t_o| + if t_o =~/^--([+~-])[#]$/ + h=case $1 + when /[+]/ + @@flag[:ocn]=:on + { + flag: :ocn_on, + } + when /[~]/ + @@flag[:ocn]=:ocn_off_headings_keep + { + flag: :ocn_off, + mod: :headings_keep, + } + when /[-]/ #of particular relevance with level 1~ which is required to precede substantive text & used e.g. in html segmented text + @@flag[:ocn]=:ocn_off_headings_dummy_lev1 + { + flag: :ocn_off, + mod: :headings_exclude, + } + else + @@flag[:ocn]=:on + { + flag: :ocn_on, + } + end + t_o=SiSU_AO_DocumentStructure::ObjectFlag.new.flag_ocn(h) + next + end + t_o=t_o.gsub(/(?:\n\s*\n)+/m,"\n") if @@flag[:code]==:off + unless t_o =~/^(?:@\S+?:|%+)\s/ # extract book index for paragraph if any + idx=if t_o=~/^=\{\s*(.+)\s*\}\s*$\Z/m + m=$1 + m=m.split(/[ ]*\n/).join(' '). + gsub(/\s+([|:;])\s+/,'\1'). + gsub(/\s+([+]\d+)\s+/,'\1') + t_o=t_o.gsub(/\n=\{.+?\}\s*$/m,'') + idx_array_and_hash=construct_idx_array_and_hash(m) + idx_array_and_hash[:hash] + else nil + end + end + if t_o !~/^(?:code(?:\.[a-z][0-9a-z_]+)?|box(?:\.[a-z_]+)?|poem|alt|group|block)\{|^\}(?:code|poem|alt|group|block)|^(?:table\{|\{table)[ ~]/ \ + and t_o !~/^```[ ]+(?:code(?:\.[a-z][0-9a-z_]+)?|box(?:\.[a-z_]+)?|poem|alt|group|block|table)|^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$|^`:quote_(?:open|close)`/ \ + and @@flag[:code]==:off \ + and @@flag[:poem]==:off \ + and @@flag[:group]==:off \ + and @@flag[:block]==:off \ + and @@flag[:alt]==:off \ + and @@flag[:box]==:off \ + and @@flag[:table]==:off + t_o=case t_o + when /^#{Mx[:meta_o]}\S+?#{Mx[:meta_c]}/ #metadata, header + if t_o=~/^#{Mx[:meta_o]}(\S+?)#{Mx[:meta_c]}\s*(.+)/m + tag,obj=$1,$2 + @metadata[tag]=obj + end + t_o=nil + when /^%+\s/ #comment + t_o=if t_o=~/^%+\s+(.+)/ + h={ obj: $1 } + SiSU_AO_DocumentStructure::ObjectComment.new.comment(h) + else nil + end + when /^:?([A-D1-6])\~/ #heading / lv + lv=$1 + ln=ln_get(lv) + t_o=if t_o=~/^:?[A-D1-6]\~\s+(.+)/m + obj=$1 + note=endnote_test?(obj) + obj,tags=extract_tags(obj) + if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + or @@flag[:ocn]==:ocn_off_headings_keep + unless obj =~ /[~-][#]\s*$/ + if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + and t_o =~/^1\~\S*\s+/m + obj << ' -#' + elsif @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + or @@flag[:ocn]==:ocn_off_headings_keep + obj << ' ~#' + end + end + end + h={ + lv: lv, + ln: ln, + obj: obj, + idx: idx, + tags: tags, + } + SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h) + elsif t_o=~/^:?[A-D1-6]\~(\S+?)-\s+(.+)/m + name,obj=$1,$2 + note=endnote_test?(obj) + obj,tags=extract_tags(obj) + if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + or @@flag[:ocn]==:ocn_off_headings_keep + unless obj =~ /[~-][#]\s*$/ + if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + and t_o =~/^1\~\S*\s+/m + obj << ' -#' + elsif @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + or @@flag[:ocn]==:ocn_off_headings_keep + obj << ' ~#' + end + end + end + h={ + lv: lv, + name: name, + obj: obj, + idx: idx, + autonum_: false, + tags: tags, + } + SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h) + elsif t_o=~/^:?[A-D1-6]\~(\S+)\s+(.+)/m + name,obj=$1,$2 + note=endnote_test?(obj) + obj,tags=extract_tags(obj,name) + if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + or @@flag[:ocn]==:ocn_off_headings_keep + unless obj =~ /[~-][#]\s*$/ + if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + and t_o =~/^1\~\S*\s+/m + obj << ' -#' + elsif @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + or @@flag[:ocn]==:ocn_off_headings_keep + obj << ' ~#' + end + end + end + h={ + lv: lv, + name: name, + obj: obj, + idx: idx, + tags: tags, + } + SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h) + else nil + end + when /^_(?:[1-9]!?|[1-9]?\*)\s+/ #indented and/or bullet paragraph + t_o=if t_o=~/^(_(?:[1-9]?\*|[1-9]!?)\s+)(.+)/m + tst,obj=$1,$2 + if t_o=~/^_[1-9]!\s+.+/m + hang,indent,obj=hang_and_indent_def_test(tst,obj) + else + hang,indent=hang_and_indent_test(tst) + end + bullet=bullet_test(tst) + image=image_test(obj) + note=endnote_test?(obj) + obj,tags=extract_tags(obj) + unless obj=~/\A\s*\Z/m + if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + or @@flag[:ocn]==:ocn_off_headings_keep + unless obj =~ /[~-][#]\s*$/ + obj << ' ~#' + end + end + h={ + bullet_: bullet, + hang: hang, + indent: indent, + obj: obj, + idx: idx, + note_: note, + image_: image, + tags: tags, + quote: quotes?, + } + SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) + end + else nil + end + when /^_[0-9]?_[0-9]!?\s+/ #hanging indent paragraph + t_o=if t_o=~/^(_[0-9]?_[0-9]!?\s+)(.+)/m + tst,obj=$1,$2 + if t_o=~/^_[0-9]?_[0-9]!\s+.+/m + hang,indent,obj=hang_and_indent_def_test(tst,obj) + else + hang,indent=hang_and_indent_test(tst) + end + image=image_test(obj) + note=endnote_test?(obj) + obj,tags=extract_tags(obj) + unless obj=~/\A\s*\Z/m + if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + or @@flag[:ocn]==:ocn_off_headings_keep + unless obj =~ /[~-][#]\s*$/ + obj << ' ~#' + end + end + h={ + hang: hang, + indent: indent, + obj: obj, + idx: idx, + note_: note, + image_: image, + tags: tags, + quote: quotes?, + } + SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) + end + else nil + end + when /^<(?:br)?:(?:pa?r|o(?:bj|---)?)>\s*$/ #[br:par] #[br:obj] + SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_obj]) + when /^(?:-\\\\-|<:pb>)\s*$/ #[br:pg] + SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page],:markup) + when /^(?:=\\\\=|<:pn>)\s*$/ #[br:pgn] + SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new],:markup) + when /^-\.\.-\s*$/ #[br:pgl] + SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_line],:markup) + else #paragraph + image=image_test(t_o) + note=endnote_test?(t_o) + obj,tags=extract_tags(t_o) + if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \ + or @@flag[:ocn]==:ocn_off_headings_keep + unless obj =~ /[~-][#]\s*$/ + obj << ' ~#' + end + end + unless obj=~/\A\s*\Z/m + h={ + bullet_: false, + indent: 0, + hang: 0, + obj: obj, + idx: idx, + note_: note, + image_: image, + tags: tags, + quote: quotes?, + } + t_o=SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) + end + t_o=SiSU_AO_DocumentStructureExtract::Structure.new(@md).structure_markup(t_o) #must happen earlier, node info etc. require + end + elsif @@flag[:code]==:off + if t_o =~/^(?:code(?:\.[a-z][0-9a-z_]+)?\{|```[ ]+code(?:\.[a-z][0-9a-z_]+)?)/ + @@flag[:code]=case t_o + when /^code(?:\.[a-z][0-9a-z_]+)?\{/ then :curls + when /^```[ ]+code/ then :tics + else @@flag[:code] #error + end + @@flag[:lngsyn]=if t_o =~/^(?:code\.[a-z][0-9a-z_]+\{|```[ ]+code\.[a-z_]+)/ + case t_o + when /^code\.([a-z][0-9a-z_]+)\{/ + :"#{$1}" + when /^```[ ]+code\.([a-z][0-9a-z_]+)/ + :"#{$1}" + else :txt + end + else :txt + end + @@counter=1 + @codeblock_numbered= + (t_o =~/^(?:code(?:\.[a-z][0-9a-z_]+)?\{#|```[ ]+code(?:\.[a-z][0-9a-z_]+)?\s[#])/) \ + ? true + : false + @num_id[:code_block] +=1 + h={ + is_for: :code, + obj: '', + sym: :code_block_open, + num: @num_id[:code_block], + syntax: @@flag[:lngsyn], + } + t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + elsif t_o =~/^(?:poem\{|```[ ]+poem)/ + @@flag[:poem]=case t_o + when /^poem\{/ then :curls + when /^```[ ]+poem/ then :tics + else @@flag[:poem] #error + end + @num_id[:poem] +=1 + h={ + is_for: :poem, + obj: '', + sym: :poem_open, + num: @num_id[:poem], + } + t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + tuned_file << t_o + elsif t_o =~/^(?:box(?:\.[a-z_]+)?\{|```[ ]+box(?:\.[a-z_]+)?)/ + @@flag[:box]=case t_o + when /^box\{/ then :curls + when /^```[ ]+box/ then :tics + else @@flag[:box] #error + end + @num_id[:box] +=1 + h={ + is_for: :box, + obj: '', + sym: :box_open, + num: @num_id[:box], + } + t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + tuned_file << t_o + elsif t_o =~/^(?:group\{|```[ ]+group)/ + @@flag[:group]=case t_o + when /^group\{/ then :curls + when /^```[ ]+group/ then :tics + else @@flag[:group] #error + end + @num_id[:group] +=1 + h={ + is_for: :group, + obj: '', + sym: :group_open, + num: @num_id[:group], + } + t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + tuned_file << t_o + elsif t_o =~/^(?:block\{|```[ ]+block)/ + @@flag[:block]=case t_o + when /^block\{/ then :curls + when /^```[ ]+block/ then :tics + else @@flag[:block] #error + end + @num_id[:block] +=1 + h={ + is_for: :block, + obj: '', + sym: :block_open, + num: @num_id[:block], + } + t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + tuned_file << t_o + elsif t_o =~/^(?:alt\{|```[ ]+alt)/ + @@flag[:alt]=case t_o + when /^alt\{/ then :curls + when /^```[ ]+alt/ then :tics + else @@flag[:alt] #error + end + @num_id[:alt] +=1 + h={ + is_for: :alt, + obj: '', + sym: :alt_open, + num: @num_id[:alt], + } + t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + tuned_file << t_o + elsif t_o =~/^`:quote_open`/ + @@flag[:quote]=:open + @num_id[:quote] +=1 + h={ + is_for: :quote, + obj: '', + sym: :quote_open, + num: @num_id[:quote], + } + t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + #tuned_file << t_o #% find second source, entered twice, should be once so closed off here + elsif t_o =~/^(?:table\{|```[ ]+table|\{table)[ ~]/ + @num_id[:table] +=1 + h={ + is_for: :table, + obj: '', + sym: :table_open, + num: @num_id[:table], + } + ins_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + tuned_file << ins_o + if t_o=~/^table\{(?:~h)?\s+/ + @@flag[:table]=:curls + @rows='' + case t_o + when /table\{~h\s+c(\d+);\s+(.+)/ + cols=$1 + col=$2.scan(/\d+/) + heading=true + when /table\{\s+c(\d+);\s+(.+)/ + cols=$1 + col=$2.scan(/\d+/) + heading=false + end + @h={ + head_: heading, + cols: cols, + widths: col, + idx: idx, + } + elsif t_o=~/^```[ ]+table(?:~h)?\s+c\d+/ + @@flag[:table]=:tics + @rows='' + case t_o + when /^```[ ]+table~h\s+c(\d+);\s+(.+)/ + cols=$1 + col=$2.scan(/\d+/) + heading=true + when /^```[ ]+table\s+c(\d+);\s+(.+)/ + cols=$1 + col=$2.scan(/\d+/) + heading=false + end + @h={ + head_: heading, + cols: cols, + widths: col, + idx: idx, + } + elsif t_o=~/^\{table(?:~h)?(?:\s+\d+;?)?\}\n.+\Z/m + m1,m2,hd=nil,nil,nil + tbl=/^\{table(?:~h)?(?:\s+\d+;?)?\}\n(.+)\Z/m.match(t_o)[1] + hd=((t_o =~/^\{table~h/) ? true : false) + tbl,tags=extract_tags(tbl) + rws=tbl.split(/\n/) + rows='' + cols=nil + rws.each do |r| + cols=(cols ? cols : (r.scan('|').length) +1) + r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}") + rows += r + Mx[:tc_c] + end + col=[] + if t_o =~/^\{table(?:~h)?\s+(\d+);?\}/ #width of col 1 given as %, usually when wider than rest that are even + c1=$1.to_i + width=(100 - c1)/(cols - 1) + col=[ c1 ] + (cols - 1).times { col << width } + else #all columns of equal width + width=100.00/cols + cols.times { col << width } + end + h={ + head_: hd, + cols: cols, + widths: col, + obj: rows, + idx: idx, + tags: tags, + num: @num_id[:table], + } + t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(h) \ + unless h.nil? + tuned_file << t_o + h={ + is_for: :table, + obj: '', + sym: :table_close, + num: @num_id[:table], + } + t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + t_o + elsif t_o=~/^```[ ]+table(?:~h)?\s+/ + m1,m2,hd=nil,nil,nil + h=case t_o + when /^```[ ]+table~h\s+(.+?)\n(.+)\Z/m #two table representations should be consolidated as one + m1,tbl,hd=$1,$2,true + when /^```[ ]+table\s+(.+?)\n(.+)\Z/m #two table representations should be consolidated as one + m1,tbl,hd=$1,$2,false + else nil + end + tbl,tags=extract_tags(tbl) + col=m1.scan(/\d+/) + rws=tbl.split(/\n/) + rows='' + rws.each do |r| + r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}") + rows += r + Mx[:tc_c] + end + h={ + head_: hd, + cols: col.length, + widths: col, + obj: rows, + idx: idx, + tags: tags, + num: @num_id[:table], + } + t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(h) \ + unless h.nil? + tuned_file << t_o + h={ + is_for: :table, + obj: '', + sym: :table_close, + num: @num_id[:table], + } + t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + t_o + elsif t_o=~/^\{table(?:~h)?\s+/ + m1,m2,hd=nil,nil,nil + h=case t_o + when /\{table~h\s+(.+?)\}\n(.+)\Z/m #two table representations should be consolidated as one + m1,tbl,hd=$1,$2,true + when /\{table\s+(.+?)\}\n(.+)\Z/m #two table representations should be consolidated as one + m1,tbl,hd=$1,$2,false + else nil + end + tbl,tags=extract_tags(tbl) + col=m1.scan(/\d+/) + rws=tbl.split(/\n/) + rows='' + rws.each do |r| + r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}") + rows += r + Mx[:tc_c] + end + h={ + head_: hd, + cols: col.length, + widths: col, + obj: rows, + idx: idx, + tags: tags, + num: @num_id[:table], + } + t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(h) \ + unless h.nil? + tuned_file << t_o + h={ + is_for: :table, + obj: '', + sym: :table_close, + num: @num_id[:table], + } + t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + t_o + end + end + t_o + end + if @@flag[:table]==:curls or @@flag[:table]==:tics + if (@@flag[:table]==:curls \ + and t_o =~/^\}table/) \ + or (@@flag[:table]==:tics \ + and t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) + @@flag[:table]=:off + headings,columns,widths,idx=@h[:head_],@h[:cols],@h[:widths],@h[:idx] + @h={ + head_: headings, + cols: columns, + widths: widths, + idx: idx, + obj: @rows, + } + t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(@h) + tuned_file << t_o + @h,@rows=nil,'' + h={ + is_for: :table, + obj: '', + sym: :table_close, + num: @num_id[:table], + } + t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + t_o + else + if t_o.is_a?(String) \ + and t_o !~/^(?:table\{|```[ ]+table)/ + t_o=t_o.gsub(/^\n+/m,''). + gsub(/\n+/m,"#{Mx[:tc_p]}") + @rows += t_o + Mx[:tc_c] + end + t_o=nil + end + end + if @@flag[:code]==:curls \ + or @@flag[:code]==:tics + if (@@flag[:code]==:curls \ + && t_o =~/^\}code/) \ + or (@@flag[:code]==:tics \ + && t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/m) + @@flag[:code]=:off + if @tuned_code[-1] + @tuned_code[-1]. + gsub!(/\s*(?:#{Mx[:br_line]}|#{Mx[:br_nl]})\s*\Z/m,'') + end + obj=@tuned_code.join("\n") + tags=[] + h={ + obj: obj, + idx: idx, + syntax: @@flag[:lngsyn], + tags: tags, + num: @num_id[:code_block], + number_: @codeblock_numbered, + } + @@flag[:lngsyn]=:txt + t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.code(h) + @tuned_code=[] + tuned_file << t_o + h={ + is_for: :code, + obj: '', + sym: :code_close, + num: @num_id[:code_block], + } + t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + end + if (@@flag[:code]==:curls \ + || @@flag[:code]==:tics) \ + and t_o.is_a?(String) + sub_array=t_o.dup + "#{Mx[:br_nl]}" + @line_mode=[] + sub_array.scan(/.+/) {|w| @line_mode << w if w =~/[\S]+/} + t_o=SiSU_AO_DocumentStructureExtract::Build.new(@md,@line_mode).build_lines(:code).join + @tuned_code << t_o + t_o=nil + end + elsif (@@flag[:poem]==:curls \ + || @@flag[:poem]==:tics) \ + or (@@flag[:box]==:curls \ + || @@flag[:box]==:tics) \ + or (@@flag[:group]==:curls \ + || @@flag[:group]==:tics) \ + or (@@flag[:block]==:curls \ + || @@flag[:block]==:tics) \ + or (@@flag[:alt]==:curls \ + || @@flag[:alt]==:tics) \ + or (@@flag[:quote]==:open \ + && t_o =~/`:quote_close`/m) #not + if (@@flag[:poem]==:curls \ + && t_o =~/^\}poem$/m) \ + or (@@flag[:poem]==:tics \ + && t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) + @@flag[:poem]=:off + h={ + is_for: :poem, + obj: '', + idx: idx, + sym: :poem_close, + num: @num_id[:poem], + } + t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + elsif (@@flag[:box]==:curls \ + && t_o =~/^\}box/) \ + or (@@flag[:box]==:tics \ + && t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) + @@flag[:box]=:off + obj,tags=extract_tags(@tuned_block.join("\n")) + h={ + obj: obj, + idx: idx, + tags: tags, + num: @num_id[:box], + } + @tuned_block=[] + t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.box(h) + tuned_file << t_o + h={ + is_for: :box, + obj: '', + idx: idx, + sym: :box_close, + num: @num_id[:box], + } + t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + elsif (@@flag[:group]==:curls \ + && t_o =~/^\}group/) \ + or (@@flag[:group]==:tics \ + && t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) + @@flag[:group]=:off + obj,tags=extract_tags(@tuned_block.join("\n")) + h={ + obj: obj, + idx: idx, + tags: tags, + num: @num_id[:group], + } + @tuned_block=[] + t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.group(h) + tuned_file << t_o + h={ + is_for: :group, + obj: '', + sym: :group_close, + num: @num_id[:group], + } + t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + elsif (@@flag[:block]==:curls \ + && t_o =~/^\}block/) \ + or (@@flag[:block]==:tics \ + && t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) + @@flag[:block]=:off + obj,tags=extract_tags(@tuned_block.join("\n")) + h={ + obj: obj, + idx: idx, + tags: tags, + num: @num_id[:block], + } + @tuned_block=[] + t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.block(h) + tuned_file << t_o + h={ + is_for: :block, + obj: '', + sym: :block_close, + num: @num_id[:block], + } + t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + elsif (@@flag[:alt]==:curls \ + && t_o =~/^\}alt/) \ + or (@@flag[:alt]==:tics \ + && t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) + @@flag[:alt]=:off + obj,tags=extract_tags(@tuned_block.join("\n")) + h={ + obj: obj, + idx: idx, + tags: tags, + num: @num_id[:alt], + } + t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.alt(h) + @tuned_block=[] + tuned_file << t_o + h={ + is_for: :alt, + obj: '', + sym: :alt_close, + num: @num_id[:alt], + } + t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + elsif @@flag[:quote]==:open \ + and t_o =~/`:quote_close`/m + @@flag[:quote]=:off + h={ + is_for: :quote, + idx: idx, + obj: '', + sym: :quote_close, + num: @num_id[:quote], + } + t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) + elsif @@flag[:quote]==:open + t_o,tags=extract_tags(t_o) + h={ + indent: 1, + obj: t_o, + idx: idx, + note_: note, + image_: image, + tags: tags, + quote: quotes?, + } + SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) + end + if (@@flag[:poem]==:curls \ + || @@flag[:poem]==:tics) \ + or (@@flag[:group]==:curls \ + || @@flag[:group]==:tics) \ + or (@@flag[:alt]==:curls \ + || @@flag[:alt]==:tics) \ + and t_o =~/\S/ \ + and t_o !~/^(?:\}(?:verse|code|box|alt|group|block)|(?:verse|code(?:\.[a-z][0-9a-z_]+)?|box(?:\.[a-z_]+)?|alt|group|block)\{)/ \ + and t_o !~/^```[ ]+(?:code(?:\.[a-z][0-9a-z_]+)?|box(?:\.[a-z_]+)?|poem|alt|group|block)|^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/ # fix logic + sub_array=t_o.dup + @line_mode=sub_array.scan(/.+/) + type=if @@flag[:poem]==:curls or @@flag[:poem]==:tics + t_o=SiSU_AO_DocumentStructureExtract::Build.new(@md,@line_mode).build_lines(type).join + poem=t_o.split(/\n\n/) + poem.each do |v| + v=v.gsub(/\n/m,"#{Mx[:br_nl]}\n") + obj,tags=extract_tags(v) + h={ + obj: obj, + tags: tags, + num: @num_id[:poem], + } + t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.verse(h) + tuned_file << t_o + end + :poem + else :group + end + end + @verse_count+=1 if @@flag[:poem]==:curls or @@flag[:poem]==:tics + end + if @@flag[:code]==:off + if @@flag[:poem]==:curls or @@flag[:poem]==:tics \ + or @@flag[:box]==:curls or @@flag[:box]==:tics \ + or @@flag[:group]==:curls or @@flag[:group]==:tics \ + or @@flag[:alt]==:curls or @@flag[:alt]==:tics \ + or (@@flag[:quote]==:open and t_o =~/`:quote_close`/m) + if t_o.is_a?(String) + t_o=t_o.gsub(/\n/m,"#{Mx[:br_nl]}"). + gsub(/[ ][ ]/m,"#{Mx[:nbsp]*2}"). + gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}") + t_o=t_o + Mx[:br_nl] if t_o =~/\S+/ + elsif t_o.is==:group \ + || t_o.is==:block \ + || t_o.is==:alt \ + || t_o.is==:box \ + || t_o.is==:verse + t_o.obj=t_o.obj.gsub(/\n/m,"#{Mx[:br_nl]}"). + gsub(/[ ][ ]/m,"#{Mx[:nbsp]*2}"). + gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}") + end + @tuned_block << t_o if t_o =~/\S+/ + else tuned_file << t_o + end + else tuned_file << t_o + end + end + if @md.flag_endnotes + tuned_file << @pb + h={ + ln: 1, + lc: 1, + obj: 'Endnotes', + autonum_: false, + } + tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) + h={ + ln: 4, + lc: 2, + obj: 'Endnotes', + name: 'endnotes', + autonum_: false, + } + tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) + h={ + obj: 'Endnotes' + } + end + if @md.book_idx + tuned_file << @pb + h={ + ln: 1, + lc: 1, + obj: 'Index', + autonum_: false, + } + tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) + h={ + ln: 4, + lc: 2, + obj: 'Index', + name: 'book_index', + autonum_: false, + } + tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) + h={ + obj: 'Index' + } + end + tuned_file << @pb + h={ + ln: 1, + lc: 1, + obj: 'Metadata', + autonum_: false, + ocn_: false, + } + tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) + h={ + ln: 4, + lc: 2, + obj: 'SiSU Metadata, document information', + name: 'metadata', + autonum_: false, + ocn_: false, + } + tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) + h={ + obj: 'eof', + } + meta=SiSU_AO_DocumentStructure::ObjectMetadata.new.metadata(@metadata) + [tuned_file,meta] + end + def table_rows_and_columns_array(table_str) + table=[] + table_str.split(/#{Mx[:tc_c]}/).each do |table_row| + table_row_with_columns=table_row.split(/#{Mx[:tc_p]}/) + table << table_row_with_columns + end + table + end + def meta_heading(h) + h={ + lv: h[:lv], + ln: h[:ln], + name: h[:name], + obj: h[:obj], + ocn: '0', + } + SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h) + end + def meta_para(str) + h={ + obj: str, + ocn_: false, + } + SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) + end + def build_lines(type=:none) + lines,lines_new=@data,[] + lines.each do |line| + line=if line =~/\S/ \ + and line !~/^(?:code(?:\.[a-z][0-9a-z_]+)?\{|\}code)/ \ + and line !~/^(?:```[ ]+code(?:\.[a-z][0-9a-z_]+)?|```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$)/ \ + and not line.is_a?(Hash) #watch + @@counter+=1 if @@flag[:code]==:curls or @@flag[:code]==:tics + line=line.gsub(/\s\s/,"#{Mx[:nbsp]*2}"). + gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}") + line=line.gsub(/^/,"#{Mx[:gr_o]}codeline#{Mx[:gr_c]}") if type==:code # REMOVE try sort for texpdf special case + line=if line =~/(?:https?|file|ftp):\/\/\S+$/ + line.gsub(/\s*$/," #{Mx[:br_nl]}") + else line.gsub(/\s*$/,"#{Mx[:br_nl]}") #unless type=='code' + end + elsif line =~/^\s*$/ + line.gsub(/\s*$/,"#{Mx[:br_nl]}") + else line + end + lines_new << line + end + lines_new + end + end + class Structure # this must happen early + def initialize(md) + @md=md + end + def structure(data) + data.compact.each do |dob| + structure_markup(dob) + end + end + def structure_markup(dob) #build structure where structure provided only in meta header + dob=if dob.is==:para \ + && (((dob.hang !~/[1-9]/) && (dob.indent !~/[1-9]/)) \ + || (dob.hang != dob.indent)) \ + and not dob.bullet_ + dob=case dob.obj + when /^#{@md.lv0}/ + h={ + is: :heading, + lv: 'A', + ln: 0, + } + SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) + when /^#{@md.lv1}/ + h={ + is: :heading, + lv: 'B', + ln: 1, + } + SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) + when /^#{@md.lv2}/ + h={ + is: :heading, + lv: 'C', + ln: 2, + } + SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) + when /^#{@md.lv3}/ + h={ + is: :heading, + lv: 'D', + ln: 3, + } + SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) + when /^#{@md.lv4}/ + h={ + is: :heading, + lv: '1', + ln: 4, + } + SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) + when /^#{@md.lv5}/ + h={ + is: :heading, + lv: '2', + ln: 5, + } + SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) + when /^#{@md.lv6}/ + h={ + is: :heading, + lv: '3', + ln: 6, + } + SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) + else dob + end + else dob + end + dob + end + end + class OCN + def initialize(md,data) + @md,@data=md,data + end + def structure_info + def lv + %w[A~ B~ C~ D~ 1 2 3 4] + end + def possible_parents(child) + case child + when /A~/ then 'none' + when /B~/ then 'A~' + when /C~/ then 'B~' + when /D~/ then 'C~' + when /1/ then 'A~, B~, C~, D~' + when /2/ then '1' + when /3/ then '2' + when /4/ then '3' + end + end + def possible_children(parent) + case parent + when /A~/ then 'B~, 1' + when /B~/ then 'C~, 1' + when /C~/ then 'D~, 1' + when /D~/ then '1' + when /1/ then '2' + when /2/ then '3' + when /3/ then '4' + when /4/ then 'none' + end + end + self + end + def document_structure_check_info(node,node_parent,status=:ok) + node_ln=/^([0-7])/.match(node)[1].to_i + node_parent_ln=/^([0-7])/.match(node_parent)[1].to_i + if status==:error \ + or @md.opt.act[:maintenance][:set]==:on + puts %{node: #{node}, parent node: #{node_parent} #{status.upcase}} + if status==:error + node_ln=/^([0-7])/.match(node)[1].to_i + node_parent_ln=/^([0-7])/.match(node_parent)[1].to_i + STDERR.puts %{current level: #{structure_info.lv[node_ln]} (possible parent levels: #{structure_info.possible_parents(structure_info.lv[node_ln])}) +parent level: #{structure_info.lv[node_parent_ln]} (possible child levels: #{structure_info.possible_children(structure_info.lv[node_parent_ln])}) +SKIPPED processing file: +[#{@md.opt.lng}] "#{@md.fns}"} + if @md.opt.act[:no_stop][:set]==:on + $process_document = :skip + else exit + end + end + end + end + def warning_incorrect_parent_level_or_level(txt) + puts %{ERROR. There is an error in markup of heading levels either here or in the parent heading. +The current header reads: +"#{txt}" +has incorrect level and/or parent level +--} + end + def required_headers_present? + unless (defined? @md.title \ + and @md.title.full) + STDERR.puts %{required header missing: + +@title: +SKIPPED processing file: +[#{@md.opt.lng}] "#{@md.fns}" +} + if @md.opt.act[:no_stop][:set]==:on + $process_document = :skip + else exit + end + end + unless (defined? @md.creator.author \ + and @md.creator.author) + STDERR.puts %{required header missing: + +@creator: + :author: anonymous? +SKIPPED processing file: +[#{@md.opt.lng}] "#{@md.fns}" +} + if @md.opt.act[:no_stop][:set]==:on + $process_document = :skip + else exit + end + end + end + def ocn #and auto segment numbering increment + required_headers_present? + data=@data + @o_array=[] + node=ocn=ocn_dv=ocn_sp=ocnh=ocnh0=ocnh1=ocnh2=ocnh3=ocnh4=ocnh5=ocnh6=ocnh7=ocno=ocnp=ocnt=ocnc=ocng=ocni=ocnu=0 # h heading, o other, t table, g group, i image + regex_exclude_ocn_and_node = /#{Rx[:meta]}|^@\S+?:\s|^4~endnotes|^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^\^~ |<:e[:_]\d+?>|^<:\#|<:- |<[:!]!4|<hr width|#{Mx[:br_endnotes]}|\A\s*\Z/mi #ocn here # added with Tune.code #ยก + parent=node1=node2=node3=node4=node5=node6=node7=nil + node0='0:0;0' + @collapsed_lv0=0 + @lev_occurences={ a: 0, b: 0, c: 0, d: 0, l1: 0, l2: 0, l3: 0, l4: 0 } + data.each do |dob| + h={} + if (dob.obj !~ regex_exclude_ocn_and_node || dob.is==:code) \ + && (dob.of !=:comment \ + && dob.of !=:layout \ + && dob.of !=:meta) \ + && dob.ocn_ + #dob.ln now is determined, and set earlier, check how best to remove this --> + if dob.is==:heading + @ln=ln=case dob.lv + when 'A' then 0 + when 'B' then 1 + when 'C' then 2 + when 'D' then 3 + when '1' then 4 + when '2' then 5 + when '3' then 6 + when '4' then 7 + when '5' then 8 + when '6' then 9 + end + end + if not dob.obj =~/~#|-#/ + ocn+=1 + end + if dob.is==:heading \ + and (ln.to_s =~/^[0-9]/ \ + or ln.to_s =~@md.lv0 \ + or ln.to_s =~@md.lv1 \ + or ln.to_s =~@md.lv2 \ + or ln.to_s =~@md.lv3 \ + or ln.to_s =~@md.lv4 \ + or ln.to_s =~@md.lv5 \ + or ln.to_s =~@md.lv6 \ + or ln.to_s =~@md.lv7) + if not dob.obj =~/~#|-#/ + ocnh+=1 + end + if ln==0 \ + or ln=~@md.lv0 + @lev_occurences[:a] += 1 + if not dob.obj =~/~#|-#/ + ocn_flag=true + ocnh0+=1 #heading + node0="0:#{ocnh0};#{ocn}" + else + #document_structure_check_info(node0,node0,:error) #fix + ocn_flag=false + node0="0:0;0" + end + document_structure_check_info(node0,node0) + @collapsed_lv0=0 + collapsed_level=@collapsed_lv0 + node,ocn_sp,parent=node0,"h#{ocnh}",'ROOT' + elsif ln==1 \ + or ln=~@md.lv1 + @lev_occurences[:b] += 1 + if not dob.obj =~/~#|-#/ + ocn_flag=true + ocnh1+=1 #heading + node1="1:#{ocnh1};#{ocn}" + else + #document_structure_check_info(node0,node0,:error) #fix + ocn_flag=false + node1="1:0;0" + end + parent=if node0 + document_structure_check_info(node1,node0) + @collapsed_lv1=@collapsed_lv0+1 + node0 + else + warning_incorrect_parent_level_or_level(dob.obj) + document_structure_check_info(node0,node0,:error) + node0 + end + collapsed_level=@collapsed_lv1 + node,ocn_sp,parent=node1,"h#{ocnh}",node0 #FIX + elsif ln==2 \ + or ln=~@md.lv2 + @lev_occurences[:c] += 1 + if not dob.obj =~/~#|-#/ + ocn_flag=true + ocnh2+=1 + node2="2:#{ocnh2};#{ocn}" + else + #document_structure_check_info(node0,node0,:error) #fix + ocn_flag=false + node2="2:0;0" + end + parent=if node1 + document_structure_check_info(node2,node1) + @collapsed_lv2=@collapsed_lv1+1 + node1 + else + warning_incorrect_parent_level_or_level(dob.obj) + document_structure_check_info(node2,node0,:error) + node0 + end + collapsed_level=@collapsed_lv2 + node,ocn_sp=node2,"h#{ocnh}" + elsif ln==3 \ + or ln=~@md.lv3 + @lev_occurences[:d] += 1 + if not dob.obj =~/~#|-#/ + ocn_flag=true + ocnh3+=1 + node3="3:#{ocnh3};#{ocn}" + else + #document_structure_check_info(node0,node0,:error) #fix + ocn_flag=false + node3="3:0;0" + end + parent=if node2 + document_structure_check_info(node3,node2) + @collapsed_lv3=@collapsed_lv2+1 + node2 + elsif node1 + warning_incorrect_parent_level_or_level(dob.obj) + puts %{parent is :A~ & this level #{dob.lv} +either parent should be level :B~ +or this level should be level :B~ rather than #{dob.lv}} + document_structure_check_info(node3,node1,:error) + @collapsed_lv3=@collapsed_lv1+1 + node1 + else + document_structure_check_info(node3,node0,:error) + warning_incorrect_parent_level_or_level(dob.obj) + node0 + end + collapsed_level=@collapsed_lv3 + node,ocn_sp=node3,"h#{ocnh}" + elsif ln==4 \ + or ln=~@md.lv4 + @lev_occurences[:l1] += 1 + if not dob.obj =~/~#|-#/ + ocn_flag=true + ocnh4+=1 + node4="4:#{ocnh4};#{ocn}" + else + ocn_flag=false + node4="4:0;0" + end + parent=if node3 + document_structure_check_info(node4,node3) + @collapsed_lv4=@collapsed_lv3+1 + node3 + elsif node2 + document_structure_check_info(node4,node2) + @collapsed_lv4=@collapsed_lv2+1 + node2 + elsif node1 + document_structure_check_info(node4,node1) + @collapsed_lv4=@collapsed_lv1+1 + node1 + elsif node0 + document_structure_check_info(node4,node0) + @collapsed_lv4=@collapsed_lv0+1 + node0 + else + warning_incorrect_parent_level_or_level(dob.obj) + document_structure_check_info(node4,node0,:error) + node0 + end + collapsed_level=@collapsed_lv4 + node,ocn_sp=node4,"h#{ocnh}" + elsif ln==5 \ + or ln=~@md.lv5 + @lev_occurences[:l2] += 1 + if not dob.obj =~/~#|-#/ + ocn_flag=true + ocnh5+=1 + node5="5:#{ocnh5};#{ocn}" + else + ocn_flag=false + node5="5:0;0" + end + parent=if node4 + document_structure_check_info(node5,node4) + @collapsed_lv5=@collapsed_lv4+1 + node4 + elsif node3 + warning_incorrect_parent_level_or_level(dob.obj) + document_structure_check_info(node5,node3,:error) + @collapsed_lv5=@collapsed_lv3+1 + node3 + elsif node2 + warning_incorrect_parent_level_or_level(dob.obj) + document_structure_check_info(node5,node2,:error) + @collapsed_lv5=@collapsed_lv2+1 + node2 + elsif node1 + warning_incorrect_parent_level_or_level(dob.obj) + document_structure_check_info(node5,node1,:error) + @collapsed_lv5=@collapsed_lv1+1 + node1 + else + warning_incorrect_parent_level_or_level(dob.obj) + document_structure_check_info(node5,node0,:error) + node0 + end + collapsed_level=@collapsed_lv5 + node,ocn_sp=node5,"h#{ocnh}" + elsif ln==6 \ + or ln=~@md.lv6 + @lev_occurences[:l3] += 1 + if not dob.obj =~/~#|-#/ + ocn_flag=true + ocnh6+=1 + node6="6:#{ocnh6};#{ocn}" + else + ocn_flag=false + node6="6:0;0" + end + parent=if node5 + document_structure_check_info(node6,node5) + @collapsed_lv6=@collapsed_lv5+1 + node5 + elsif node4 + warning_incorrect_parent_level_or_level(dob.obj) + puts "parent is level #4 (1~) & this level ##{dob.ln} (#{dob.lv}~) +either parent should be level #5 (2~) +or this level should be #5 (2~) rather ##{dob.ln} (#{dob.lv}~)" + document_structure_check_info(node6,node4,:error) + @collapsed_lv6=@collapsed_lv4+1 + node4 + elsif node3 + warning_incorrect_parent_level_or_level(dob.obj) + document_structure_check_info(node6,node3,:error) + @collapsed_lv6=@collapsed_lv3+1 + node3 + elsif node2 + warning_incorrect_parent_level_or_level(dob.obj) + document_structure_check_info(node6,node2,:error) + @collapsed_lv6=@collapsed_lv2+1 + node2 + elsif node1 + warning_incorrect_parent_level_or_level(dob.obj) + document_structure_check_info(node6,node1,:error) + @collapsed_lv6=@collapsed_lv1+1 + node1 + else + warning_incorrect_parent_level_or_level(dob.obj) + document_structure_check_info(node6,node0,:error) + node0 + end + collapsed_level=@collapsed_lv6 + node,ocn_sp=node6,"h#{ocnh}" + elsif ln==7 \ + or ln=~@md.lv7 + @lev_occurences[:l4] += 1 + if not dob.obj =~/~#|-#/ + ocn_flag=true + ocnh7+=1 + node7="7:#{ocnh7};#{ocn}" + else + ocn_flag=false + node7="7:0;0" + end + parent=if node6 + document_structure_check_info(node7,node6) + @collapsed_lv7=@collapsed_lv6+1 + node5 + elsif node5 + warning_incorrect_parent_level_or_level(dob.obj) + puts "parent is level #5 (2~) & this level ##{dob.ln} (#{dob.lv}~) +either parent should be level #6 (3~) +or this level should be #6 (3~) rather ##{dob.ln} (#{dob.lv}~)" + document_structure_check_info(node7,node5,:error) + @collapsed_lv6=@collapsed_lv5+1 + node5 + elsif node4 + warning_incorrect_parent_level_or_level(dob.obj) + puts "parent is level #4 (1~) & this level ##{dob.ln} (#{dob.lv}~) +either parent should be level 6~ +or this level should be #6 (3~) rather ##{dob.ln} (#{dob.lv}~)" + document_structure_check_info(node7,node4,:error) + @collapsed_lv6=@collapsed_lv4+1 + node4 + elsif node3 + warning_incorrect_parent_level_or_level(dob.obj) + document_structure_check_info(node7,node3,:error) + @collapsed_lv6=@collapsed_lv3+1 + node3 + elsif node2 + warning_incorrect_parent_level_or_level(dob.obj) + document_structure_check_info(node7,node2,:error) + @collapsed_lv6=@collapsed_lv2+1 + node2 + elsif node1 + warning_incorrect_parent_level_or_level(dob.obj) + document_structure_check_info(node7,node1,:error) + @collapsed_lv6=@collapsed_lv1+1 + node1 + else + warning_incorrect_parent_level_or_level(dob.obj) + document_structure_check_info(node7,node0,:error) + node0 + end + collapsed_level=@collapsed_lv7 + node,ocn_sp=node7,"h#{ocnh}" + end + else + unless @lev_occurences[:l1] > 0 + STDERR.puts %{Substantive text objects must follow a level 1~ heading and there are none at this point in processing: #{@lev_occurences[:l1]} +SKIPPED processing file: +[#{@md.opt.lng}] "#{@md.fns}"} + puts dob.obj #.gsub(/^(.{1,80})/,'"\1"') + exit + end + unless @ln >= 4 + lev=case @ln + when 0 then 'A' + when 1 then 'B' + when 2 then 'C' + when 3 then 'D' + when 4 then '1' + when 5 then '2' + when 6 then '3' + when 7 then '4' + when 8 then '5' + when 9 then '6' + end + STDERR.puts %{Substantive text objects must follow a level 1~ 2~ or 3~ heading: #{lev}~ +SKIPPED processing file: +[#{@md.opt.lng}] "#{@md.fns}"} + puts dob.obj.gsub(/^(.{1,80})/,'"\1"') + if @md.opt.act[:no_stop][:set]==:on + $process_document = :skip + break + else exit + end + end + if not dob.obj =~/~#|-#/ + ocn_flag=true + else + ocn_flag=false + end + ocno+=1 + if dob.is==:table + ocnt+=1 + ocn_sp,parent="t#{ocnt}",node + elsif dob.is==:code + ocnc+=1 + ocn_sp,parent="c#{ocnc}",node + elsif dob.is==:group \ + || dob.is==:box \ + || dob.is==:block \ + || dob.is==:alt \ + || dob.is==:verse + ocng+=1 #group, poem + ocn_sp,parent="g#{ocng}",node + elsif dob.is==:image #check + ocni+=1 + ocn_sp,parent="i#{ocni}",node + else ocnp+=1 #paragraph + ocn_sp,parent="p#{ocnp}",node + end + end + if dob.is==:heading + if ocn_flag==true + dob.ln,dob.node,dob.ocn,dob.ocn_,dob.odv,dob.osp,dob.parent,dob.lc= + ln, node, ocn, ocn_flag, ocn_dv,ocn_sp, parent, collapsed_level + else + ocnu+=1 + heading_use=:ok + if dob.obj=~/#{Mx[:pa_non_object_no_heading]}/ + dob.obj=dob.obj.gsub(/#{Mx[:pa_non_object_no_heading]}/,'') + heading_use=:ok + elsif dob.obj=~/#{Mx[:pa_non_object_dummy_heading]}/ + dob.obj=dob.obj.gsub(/#{Mx[:pa_non_object_dummy_heading]}/,'') + heading_use=:dummy + end + dob.ln,dob.node,dob.ocn,dob.ocn_,dob.use_, dob.odv,dob.osp,dob.parent,dob.lc= + ln, node, nil, ocn_flag,heading_use,ocn_dv, ocn_sp, parent, collapsed_level + end + else + if dob.of !=:meta \ + && dob.of !=:comment \ + && dob.of !=:layout + if ocn_flag == true + dob.ocn,dob.ocn_,dob.odv,dob.osp,dob.parent= + ocn, ocn_flag,ocn_dv, ocn_sp, parent + else + ocnu+=1 + dob.obj=dob.obj.gsub(/#{Mx[:fa_o]}[~-]##{Mx[:fa_c]}/,'') if dob.obj + ocn_dv,ocn_sp="u#{ocnu}","u#{ocnu}" + dob.ocn,dob.ocn_,dob.odv,dob.osp,dob.parent= + nil, ocn_flag,ocn_dv, ocn_sp, parent + end + end + end + h + else dob + end + if dob.is==:code \ + || dob.is==:verse \ + || dob.is==:alt \ + || dob.is==:box \ + || dob.is==:group \ + || dob.is==:block + dob.obj=dob.obj.gsub(/\n+/,"\n") #newlines taken out + end + @o_array << dob + end + unless @lev_occurences[:a] == 1 + STDERR.puts %{The number of level A~ in this document: #{@lev_occurences[:a]} +There must be one level A~ (no more and no less) +SKIPPED processing file: +[#{@md.opt.lng}] "#{@md.fns}"} + if @md.opt.act[:no_stop][:set]==:on + $process_document = :skip + else exit + end + end + unless @lev_occurences[:l1] > 0 + STDERR.puts %{The number of level 1~ in this document: #{@lev_occurences[:l1]} +There must be at least one level 1~ (and as many as required) +SKIPPED processing file: +[#{@md.opt.lng}] "#{@md.fns}"} + if @md.opt.act[:no_stop][:set]==:on + $process_document = :skip + else exit + end + end + @o_array + end + end + class XML + def initialize(md,data) + @data,@md=data,md + end + def dom + @s=[ 'A', 'B', 'C', 'D', '1', '2', '3' ] + tuned_file=structure_build + tuned_file + end + def spaces + Ax[:spaces] + end + def structure_build + data=@data + tuned_file=[] + hs=[0,false,false,false] + t={ + lv: @s[0], + status: :open, + } + tuned_file << tags(t) + if @md.opt.act[:verbose_plus][:set]==:on + puts "\nXML sisu structure outline --->\n" + puts "<#{@s[0]}>" + end + data.each_with_index do |o,i| + if o.is==:heading \ + || o.is==:heading_insert + case o.ln + when 0 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.opt.act[:verbose_plus][:set]==:on + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs=[0,true,false,false,false] + when 1 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.opt.act[:verbose_plus][:set]==:on + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs=[1,true,true,false,false] + when 2 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.opt.act[:verbose_plus][:set]==:on + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs=[2,true,true,true,false] + when 3 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.opt.act[:verbose_plus][:set]==:on + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs=[3,true,true,true,true] + when 4 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.opt.act[:verbose_plus][:set]==:on + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs[0]=4 + when 5 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.opt.act[:verbose_plus][:set]==:on + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs[0]=5 + when 6 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.opt.act[:verbose_plus][:set]==:on + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs[0]=6 + end + end + tuned_file << o + end + if @md.opt.act[:verbose_plus][:set]==:on + puts_tag_close(0,hs) + end + tuned_file << tag_close(0,hs) + tuned_file=tuned_file.flatten + end + def tags(o) + tag=(o[:status]==:open) \ + ? %{<#{o[:lv]} id="#{o[:node]}">} + : "</#{o[:lv]}>" + ln=case o[:lv] + when 'A' then 0 + when 'B' then 1 + when 'C' then 2 + when 'D' then 3 + when '1' then 4 + when '2' then 5 + when '3' then 6 + when '4' then 7 + when '5' then 8 + when '6' then 9 + end + h={ + tag: tag, + node: o[:node], + lv: o[:lv], + ln: ln, + status: o[:status], + } + SiSU_AO_DocumentStructure::ObjectStructure.new.xml_dom(h) #downstream code utilise else ignore like comments + end + def tag_open(o,tag) + t={ lv: tag[o.ln], node: o.node, status: :open } + t_o=tags(t) + t_o + end + def tag_close(lev,hs) + ary=[] + case hs[0] + when 0 + if (lev <= 0) and hs[0] + t={ + lv: @s[0], + status: :close, + } + ary << tags(t) + end + when 1 + if (lev <= 1) and hs[1] + t={ + lv: @s[1], + status: :close, + } + ary << tags(t) + end + if (lev==0) + t={ + lv: @s[0], + status: :close, + } + ary << tags(t) + end + when 2 + if (lev <= 2) and hs[2] + t={ + lv: @s[2], + status: :close, + } + ary << tags(t) + end + if (lev <= 1) and hs[1] + t={ + lv: @s[1], + status: :close, + } + ary << tags(t) + end + if (lev==0) + t={ + lv: @s[0], + status: :close, + } + ary << tags(t) + end + when 3 + if (lev <= 3) and hs[3] + t={ + lv: @s[3], + status: :close, + } + ary << tags(t) + end + if (lev <= 2) and hs[2] + t={ + lv: @s[2], + status: :close, + } + ary << tags(t) + end + if (lev <= 1) and hs[1] + t={ + lv: @s[1], + status: :close, + } + ary << tags(t) + end + if (lev==0) + t={ + lv: @s[0], + status: :close, + } + ary << tags(t) + end + when 4 + if (lev <= 4) + t={ + lv: @s[4], + status: :close, + } + ary << tags(t) + end + if (lev <= 3) and hs[3] + t={ + lv: @s[3], + status: :close, + } + ary << tags(t) + end + if (lev <= 2) and hs[2] + t={ + lv: @s[2], + status: :close, + } + ary << tags(t) + end + if (lev <= 1) and hs[1] + t={ + lv: @s[1], + status: :close, + } + ary << tags(t) + end + if (lev==0) + t={ + lv: @s[0], + status: :close, + } + ary << tags(t) + end + when 5 + if (lev <= 5) + t={ + lv: @s[5], + status: :close, + } + ary << tags(t) + end + if (lev <= 4) + t={ + lv: @s[4], + status: :close, + } + ary << tags(t) + end + if (lev <= 3) and hs[3] + t={ + lv: @s[3], + status: :close, + } + ary << tags(t) + end + if (lev <= 2) and hs[2] + t={ + lv: @s[2], + status: :close, + } + ary << tags(t) + end + if (lev <= 1) and hs[1] + t={ + lv: @s[1], + status: :close, + } + ary << tags(t) + end + if (lev==0) + t={ + lv: @s[0], + status: :close, + } + ary << tags(t) + end + when 6 + if (lev <= 6) + t={ + lv: @s[6], + status: :close, + } + ary << tags(t) + end + if (lev <= 5) + t={ + lv: @s[5], + status: :close, + } + ary << tags(t) + end + if (lev <= 4) + t={ + lv: @s[4], + status: :close, + } + ary << tags(t) + end + if (lev <= 3) and hs[3] + t={ + lv: @s[3], + status: :close, + } + ary << tags(t) + end + if (lev <= 2) and hs[2] + t={ + lv: @s[2], + status: :close, + } + ary << tags(t) + end + if (lev <= 1) and hs[1] + t={ + lv: @s[1], + status: :close, + } + ary << tags(t) + end + if (lev==0) + t={ + lv: @s[0], + status: :close, + } + ary << tags(t) + end + end + ary + end + def puts_tag_open(o,tag) + puts %{#{spaces*o.ln}<#{tag[o.ln]} id="#{o.node}">} + end + def puts_tag_close(lev,hs) + case hs[0] + when 0 + #puts "#{spaces*0}</#{@s[0]}>" if (lev <= 0) and hs[0] + puts "</#{@s[0]}>" if (lev==0) + when 1 + puts "#{spaces*1}</#{@s[1]}>" if (lev <= 1) and hs[1] + puts "</#{@s[0]}>" if (lev==0) + when 2 + puts "#{spaces*2}</#{@s[2]}>" if (lev <= 2) and hs[2] + puts "#{spaces*1}</#{@s[1]}>" if (lev <= 1) and hs[1] + puts "</#{@s[0]}>" if (lev==0) + when 3 + puts "#{spaces*3}</#{@s[3]}>" if (lev <= 3) and hs[3] + puts "#{spaces*2}</#{@s[2]}>" if (lev <= 2) and hs[2] + puts "#{spaces*1}</#{@s[1]}>" if (lev <= 1) and hs[1] + puts "</#{@s[0]}>" if (lev==0) + when 4 + puts "#{spaces*4}</#{@s[4]}>" if (lev <= 4) + puts "#{spaces*3}</#{@s[3]}>" if (lev <= 3) and hs[3] + puts "#{spaces*2}</#{@s[2]}>" if (lev <= 2) and hs[2] + puts "#{spaces*1}</#{@s[1]}>" if (lev <= 1) and hs[1] + puts "</#{@s[0]}>" if (lev==0) + when 5 + puts "#{spaces*5}</#{@s[5]}>" if (lev <= 5) + puts "#{spaces*4}</#{@s[4]}>" if (lev <= 4) + puts "#{spaces*3}</#{@s[3]}>" if (lev <= 3) and hs[3] + puts "#{spaces*2}</#{@s[2]}>" if (lev <= 2) and hs[2] + puts "#{spaces*1}</#{@s[1]}>" if (lev <= 1) and hs[1] + puts "</#{@s[0]}>" if (lev==0) + when 6 + puts "#{spaces*6}</#{@s[6]}>" if (lev <= 6) + puts "#{spaces*5}</#{@s[5]}>" if (lev <= 5) + puts "#{spaces*4}</#{@s[4]}>" if (lev <= 4) + puts "#{spaces*3}</#{@s[3]}>" if (lev <= 3) and hs[3] + puts "#{spaces*2}</#{@s[2]}>" if (lev <= 2) and hs[2] + puts "#{spaces*1}</#{@s[1]}>" if (lev <= 1) and hs[1] + puts "</#{@s[0]}>" if (lev==0) + end + end + end +end +__END__ |