# encoding: utf-8 =begin * Name: SiSU ** Description: documents, structuring, processing, publishing, search *** document abstraction ** Author: Ralph Amissah [ralph@amissah.com] [ralph.amissah@gmail.com] ** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Ralph Amissah, All Rights Reserved. ** License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see [http://www.gnu.org/licenses/]. If you have Internet connection, the latest version of the GPL should be available at these locations: [http://www.fsf.org/licensing/licenses/gpl.html] [http://www.gnu.org/licenses/gpl.html] ** SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system ** Hompages: [http://www.jus.uio.no/sisu] [http://www.sisudoc.org] ** Git [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary] [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/ao_doc_str.rb;hb=HEAD] =end module SiSU_AO_DocumentStructureExtract require_relative 'ao_persist' # ao_persist.rb class Instantiate < SiSU_Param::Parameters::Instructions def initialize @@counter=@@column=@@columns=0 @@line_mode='' end end class Build def initialize(md,data) @md,@data=md,data SiSU_AO_DocumentStructureExtract::Instantiate.new @pb=SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page]) @pbn=SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new]) @pbl=SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_line]) @per=SiSU_AO_Persist::PersistDocStructExt.new @make=SiSU_Env::ProcessingSettings.new(@md) end def ln_get(lv) case lv when /A/ then 0 when /B/ then 1 when /C/ then 2 when /D/ then 3 when /1/ then 4 when /2/ then 5 when /3/ then 6 when /4/ then 7 when /5/ then 8 when /6/ then 9 end end def image_test(str) str=~/\{\s*\S+?\.png.+?\}https?:\/\/\S+/ \ ? true : false end def bullet_test(str) (str=~/\*/) \ ? true : false end def quotes? @per.quote==:open \ ? true : false end def hang_and_indent_test(str) hang_indent=if str=~/^_([1-9])[^_]/ [$1,$1] elsif str=~/^__([1-9])/ [0,$1] elsif str=~/^_([0-9])_([0-9])/ [$1,$2] else [0,0] end hang,indent=hang_indent[0],hang_indent[1] [hang,indent] end def hang_and_indent_def_test(str1,str2) hang_indent=if str1=~/^_([1-9])[^_]/ [$1,$1] elsif str1=~/^__([1-9])/ [0,$1] elsif str1=~/^_([0-9])_([0-9])/ [$1,$2] else [0,0] end obj=if str2 =~/^(.+?)\s+\\\\(?:\s+|\n)/ str2.gsub(/^(.+?)(\s+\\\\(?:\s+|\n))/, "#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}\\2") else str2.gsub(/^(.+?)\n/, "#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}\n") end hang,indent=hang_indent[0],hang_indent[1] [ hang, indent, obj, ] end def endnote_test?(str) (str=~/~\{.+?\}~|~\[.+?\]~/) \ ? true : false end def extract_tags(str,nametag=nil) tags=[] if str.nil? else if str =~/(?:^|[ ])\*~([a-z0-9._-]+)(?=[ #{Mx[:br_nl]}]|$)/ str=str.gsub(/(^|[ ])\*~([a-z0-9._-]+)(?=[ #{Mx[:br_nl]}]|$)/i, "\\1#{Mx[:tag_o]}\\2#{Mx[:tag_c]}"). gsub(/ [ ]+/i,' ') tags=str.scan(/#{Mx[:tag_o]}(\S+?)#{Mx[:tag_c]}/).flatten.uniq str=str.gsub(/[ ]?#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}[ ]?/,' ') #may be issues with spaces would leave one, but "code" blocks? end tags=nametag ? (tags << nametag) : tags tags.each do |t| t.gsub!(/[^a-z0-9._-]/,'') end end [ str, tags, ] end def rgx_idx_ocn_seg @rgx_idx_ocn_seg=/(.+?)\s*[+](\d+)/ end def construct_idx_array_and_hash(idxraw) idx_array_raw=idxraw.scan(/[^;]+/) idx_hash,idx_array,idx_lst={},[],[] idx_array_raw.each do |idx| idx=idx.strip idx_lst=case idx when /\S+?\s*:/ idx_couplet_tmp=[] idx_couplet=idx.scan(/\s*[^:]+\s*/) if idx_couplet[1] =~/[|]/ idx_couplet_tmp << idx_couplet[0] << idx_couplet[1].scan(/\s*[^|]+\s*/) else idx_couplet_tmp << idx_couplet[0] << [idx_couplet[1]] end idx_couplet=idx_couplet_tmp else [idx] end term_nodes=[] idx_lst.each do |term_node| case term_node when String term_node= term_node[0].chr.capitalize + term_node[1,term_node.length] term_node=(term_node =~/.+?[+]\d+/) \ ? term_node : (term_node + '+0') term_nodes << term_node use,plus=rgx_idx_ocn_seg.match(term_node)[1,2] @use=use.strip unless idx_hash[@use] \ and defined? idx_hash[@use] idx_hash[@use]= { sub: [], plus: plus } end when Array subterm_nodes=[] term_node.each do |subterm_node| subterm_node=(subterm_node =~/.+?[+]\d+/) \ ? subterm_node : (subterm_node + '+0') subterm_nodes << subterm_node sub,sub_plus=rgx_idx_ocn_seg.match(subterm_node)[1,2] unless idx_hash[@use] \ and defined? idx_hash[@use] idx_hash[@use]= { sub: [], plus: 0 } end idx_hash[@use][:sub] << { sub.strip => { plus: sub_plus } } end term_nodes << subterm_nodes end end idx_array << term_nodes end { hash: idx_hash, array: idx_array, } end def extract_structure_loop(data,tuned_file) data.each do |t_o| if t_o =~/^--([+~-])[#]$/ h=case $1 when /[+]/ @per.ocn=:on { flag: :ocn_on, } when /[~]/ @per.ocn=:ocn_off_headings_keep { flag: :ocn_off, mod: :headings_keep, } when /[-]/ #of particular relevance with level 1~ which is required to precede substantive text & used e.g. in html segmented text @per.ocn=:ocn_off_headings_dummy_lev1 { flag: :ocn_off, mod: :headings_exclude, } else @per.ocn=:on { flag: :ocn_on, } end t_o=SiSU_AO_DocumentStructure::ObjectFlag.new.flag_ocn(h) next end if t_o =~/^:[~](#{SiSU_is.language_list_regex?}|-)$/ # work with for identifying language of objects lng=$1 h=case lng when /(?:#{SiSU_is.language_list_regex?})/ @per.lng=:on @per.lng_is=lng.to_sym { flag: :lng_on, act: lng.to_sym, } else # ^:~- if @per.lng==:on @per.lng=:off @per.lng_is=:doc_default { flag: :lng_off, act: :doc_default, } end end t_o=SiSU_AO_DocumentStructure::ObjectFlag.new.flag_lng(h) next end t_o=t_o.gsub(/(?:\n\s*\n)+/m,"\n") if @per.code==:off unless t_o =~/^(?:@\S+?:|%+)\s/ # extract book index for paragraph if any idx=if t_o=~/^=\{\s*(.+)\s*\}\s*$\Z/m m=$1 m=m.split(/[ ]*\n/).join(' '). gsub(/\s+([|:;])\s+/,'\1'). gsub(/\s+([+]\d+)\s+/,'\1') t_o=t_o.gsub(/\n=\{.+?\}\s*$/m,'') idx_array_and_hash=construct_idx_array_and_hash(m) idx_array_and_hash[:hash] else nil end end if t_o !~/^(?:code(?:\.[a-z][0-9a-z_]+)?|box(?:\.[a-z_]+)?|poem|alt|group|block)\{|^\}(?:code|poem|alt|group|block)|^(?:table\(.+?\)\{|\{table\()|^(?:table\{|\{table)[ ~]/ \ and t_o !~/^```[ ]+(?:code(?:\.[a-z][0-9a-z_]+)?|box(?:\.[a-z_]+)?|poem|alt|group|block|table)|^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$|^`:quote_(?:open|close)`/ \ and @per.code==:off \ and @per.poem==:off \ and @per.group==:off \ and @per.block==:off \ and @per.alt==:off \ and @per.box==:off \ and @per.table==:off t_o=case t_o when /^#{Mx[:meta_o]}\S+?#{Mx[:meta_c]}/ #metadata, header if t_o=~/^#{Mx[:meta_o]}(\S+?)#{Mx[:meta_c]}\s*(.+)/m tag,obj=$1,$2 @metadata[tag]=obj end t_o=nil when /^%+\s/ #comment t_o=if t_o=~/^%+\s+(.+)/ h={ obj: $1 } SiSU_AO_DocumentStructure::ObjectComment.new.comment(h) else nil end when /^:?([A-D1-6])\~/ #heading / lv lv=$1 ln=ln_get(lv) t_o=if t_o=~/^:?[A-D1-6]\~\s+(.+)/m obj=$1 note=endnote_test?(obj) obj,tags=extract_tags(obj) if @per.ocn==:ocn_off_headings_dummy_lev1 \ or @per.ocn==:ocn_off_headings_keep unless obj =~ /[~-][#]\s*$/ if @per.ocn==:ocn_off_headings_dummy_lev1 \ and t_o =~/^1\~\S*\s+/m obj << ' -#' elsif @per.ocn==:ocn_off_headings_dummy_lev1 \ or @per.ocn==:ocn_off_headings_keep obj << ' ~#' end end end h={ lv: lv, ln: ln, obj: obj, idx: idx, tags: tags, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h) elsif t_o=~/^:?[A-D1-6]\~(\S+?)-\s+(.+)/m name,obj=$1,$2 note=endnote_test?(obj) obj,tags=extract_tags(obj) if @per.ocn==:ocn_off_headings_dummy_lev1 \ or @per.ocn==:ocn_off_headings_keep unless obj =~ /[~-][#]\s*$/ if @per.ocn==:ocn_off_headings_dummy_lev1 \ and t_o =~/^1\~\S*\s+/m obj << ' -#' elsif @per.ocn==:ocn_off_headings_dummy_lev1 \ or @per.ocn==:ocn_off_headings_keep obj << ' ~#' end end end h={ lv: lv, name: name, obj: obj, idx: idx, autonum_: false, tags: tags, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h) elsif t_o=~/^:?[A-D1-6]\~(\S+)\s+(.+)/m name,obj=$1,$2 note=endnote_test?(obj) obj,tags=extract_tags(obj,name) if @per.ocn==:ocn_off_headings_dummy_lev1 \ or @per.ocn==:ocn_off_headings_keep unless obj =~ /[~-][#]\s*$/ if @per.ocn==:ocn_off_headings_dummy_lev1 \ and t_o =~/^1\~\S*\s+/m obj << ' -#' elsif @per.ocn==:ocn_off_headings_dummy_lev1 \ or @per.ocn==:ocn_off_headings_keep obj << ' ~#' end end end h={ lv: lv, name: name, obj: obj, idx: idx, tags: tags, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h) else nil end when /^_(?:[1-9]!?|[1-9]?\*)\s+/ #indented and/or bullet paragraph t_o=if t_o=~/^(_(?:[1-9]?\*|[1-9]!?)\s+)(.+)/m tst,obj=$1,$2 if t_o=~/^_[1-9]!\s+.+/m hang,indent,obj=hang_and_indent_def_test(tst,obj) else hang,indent=hang_and_indent_test(tst) end bullet=bullet_test(tst) image=image_test(obj) note=endnote_test?(obj) obj,tags=extract_tags(obj) unless obj=~/\A\s*\Z/m if @per.ocn==:ocn_off_headings_dummy_lev1 \ or @per.ocn==:ocn_off_headings_keep unless obj =~ /[~-][#]\s*$/ obj << ' ~#' end end h={ bullet_: bullet, hang: hang, indent: indent, obj: obj, idx: idx, note_: note, image_: image, tags: tags, quote: quotes?, } SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) end else nil end when /^_[0-9]?_[0-9]!?\s+/ #hanging indent paragraph t_o=if t_o=~/^(_[0-9]?_[0-9]!?\s+)(.+)/m tst,obj=$1,$2 if t_o=~/^_[0-9]?_[0-9]!\s+.+/m hang,indent,obj=hang_and_indent_def_test(tst,obj) else hang,indent=hang_and_indent_test(tst) end image=image_test(obj) note=endnote_test?(obj) obj,tags=extract_tags(obj) unless obj=~/\A\s*\Z/m if @per.ocn==:ocn_off_headings_dummy_lev1 \ or @per.ocn==:ocn_off_headings_keep unless obj =~ /[~-][#]\s*$/ obj << ' ~#' end end h={ hang: hang, indent: indent, obj: obj, idx: idx, note_: note, image_: image, tags: tags, quote: quotes?, } SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) end else nil end when /^<(?:br)?:(?:pa?r|o(?:bj|---)?)>\s*$/ #[br:par] #[br:obj] SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_obj]) when /^(?:-\\\\-|<:pb>)\s*$/ #[br:pg] SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page],:markup) when /^(?:=\\\\=|<:pn>)\s*$/ #[br:pgn] SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new],:markup) when /^-\.\.-\s*$/ #[br:pgl] SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_line],:markup) else #paragraph image=image_test(t_o) note=endnote_test?(t_o) obj,tags=extract_tags(t_o) if @per.ocn==:ocn_off_headings_dummy_lev1 \ or @per.ocn==:ocn_off_headings_keep unless obj =~ /[~-][#]\s*$/ obj << ' ~#' end end unless obj=~/\A\s*\Z/m h={ bullet_: false, indent: 0, hang: 0, obj: obj, idx: idx, note_: note, image_: image, tags: tags, quote: quotes?, } t_o=SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) end t_o=SiSU_AO_DocumentStructureExtract::Structure.new(@md).structure_markup(t_o) #must happen earlier, node info etc. require end elsif @per.code==:off if t_o =~/^(?:code(?:\.[a-z][0-9a-z_]+)?\{|```[ ]+code(?:\.[a-z][0-9a-z_]+)?)/ @per.code=case t_o when /^code(?:\.[a-z][0-9a-z_]+)?\{/ then :curls when /^```[ ]+code/ then :tics else @per.code #error end @per.lngsyn=if t_o =~/^(?:code\.[a-z][0-9a-z_]+\{|```[ ]+code\.[a-z_]+)/ case t_o when /^code\.([a-z][0-9a-z_]+)\{/ :"#{$1}" when /^```[ ]+code\.([a-z][0-9a-z_]+)/ :"#{$1}" else :txt end else :txt end @@counter=1 @codeblock_numbered= (t_o =~/^(?:code(?:\.[a-z][0-9a-z_]+)?\{#|```[ ]+code(?:\.[a-z][0-9a-z_]+)?\s[#])/) \ ? true : false @num_id[:code_block] +=1 h={ is_for: :code, obj: '', sym: :code_block_open, num: @num_id[:code_block], syntax: @per.lngsyn, } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) elsif t_o =~/^(?:poem\{|```[ ]+poem)/ @per.poem=case t_o when /^poem\{/ then :curls when /^```[ ]+poem/ then :tics else @per.poem #error end @num_id[:poem] +=1 h={ is_for: :poem, obj: '', sym: :poem_open, num: @num_id[:poem], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) tuned_file << t_o elsif t_o =~/^(?:box(?:\.[a-z_]+)?\{|```[ ]+box(?:\.[a-z_]+)?)/ @per.box=case t_o when /^box\{/ then :curls when /^```[ ]+box/ then :tics else @per.box #error end @num_id[:box] +=1 h={ is_for: :box, obj: '', sym: :box_open, num: @num_id[:box], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) tuned_file << t_o elsif t_o =~/^(?:group\{|```[ ]+group)/ @per.group=case t_o when /^group\{/ then :curls when /^```[ ]+group/ then :tics else @per.group #error end @num_id[:group] +=1 h={ is_for: :group, obj: '', sym: :group_open, num: @num_id[:group], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) tuned_file << t_o elsif t_o =~/^(?:block\{|```[ ]+block)/ @per.block=case t_o when /^block\{/ then :curls when /^```[ ]+block/ then :tics else @per.block #error end @num_id[:block] +=1 h={ is_for: :block, obj: '', sym: :block_open, num: @num_id[:block], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) tuned_file << t_o elsif t_o =~/^(?:alt\{|```[ ]+alt)/ @per.alt=case t_o when /^alt\{/ then :curls when /^```[ ]+alt/ then :tics else @per.alt #error end @num_id[:alt] +=1 h={ is_for: :alt, obj: '', sym: :alt_open, num: @num_id[:alt], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) tuned_file << t_o elsif t_o =~/^`:quote_open`/ @per.quote=:open @num_id[:quote] +=1 h={ is_for: :quote, obj: '', sym: :quote_open, num: @num_id[:quote], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) #tuned_file << t_o #% find second source, entered twice, should be once so closed off here elsif t_o =~/^(?:table\(.+?\)\{|```[ ]+table\(.+?\)|\{table\(.+?\))/ @num_id[:table] +=1 h={ is_for: :table, obj: '', sym: :table_open, num: @num_id[:table], } ins_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) tuned_file << ins_o if t_o=~/^table\((?:.*?\bh;\s+)?.+?\)\{/ @per.table=:curls @rows='' case t_o when /table\(.*?\bh;\s+c(\d+):\s+(.+?)\)\{/ cols=$1 col=$2.scan(/\d+/) heading=true when /table\(.*?c(\d+):\s+(.+?)\)\{/ cols=$1 col=$2.scan(/\d+/) heading=false end @h={ head_: heading, cols: cols, widths: col, idx: idx, } elsif t_o=~/^```[ ]+table\((?:.*?\bh;)?\s+c\d+:/ @per.table=:tics @rows='' case t_o when /^```[ ]+table\(.*?\bh;\s+c(\d+):\s+(.+?)\)/ cols=$1 col=$2.scan(/\d+/) heading=true when /^```[ ]+table\(\s*c(\d+):\s+(.+?)\)/ cols=$1 col=$2.scan(/\d+/) heading=false end @h={ head_: heading, cols: cols, widths: col, idx: idx, } elsif t_o=~/^\{table\((?:.*?\bh;\s+)?(?:\s+\d+,?)?\)\s*\}\n.+\Z/m m1,m2,hd=nil,nil,nil tbl=/^\{table\((?:.*?\bh;\s+)?(?:\s+\d+,?)*\)\s*\}\n(.+)\Z/m.match(t_o)[1] # fix hd=((t_o =~/^\{table\(.*?\bh;\s+/) ? true : false) tbl,tags=extract_tags(tbl) rws=tbl.split(/\n/) rows='' cols=nil rws.each do |r| cols=(cols ? cols : (r.scan('|').length) +1) r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}") rows += r + Mx[:tc_c] end col=[] if t_o =~/^\{table\((?:.*?\bh;\s+)?\s+c(\d+):.*?\)\s*\}/ #width of col 1 given as %, usually when wider than rest that are even c1=$1.to_i width=(100 - c1)/(cols - 1) col=[ c1 ] (cols - 1).times { col << width } else #all columns of equal width width=100.00/cols cols.times { col << width } end h={ head_: hd, cols: cols, widths: col, obj: rows, idx: idx, tags: tags, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(h) \ unless h.nil? tuned_file << t_o h={ is_for: :table, obj: '', sym: :table_close, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) t_o elsif t_o=~/^```[ ]+table\((?:.*?\bh;)?\s+/ m1,m2,hd=nil,nil,nil h=case t_o when /^```[ ]+table\(.*?\bh;\s+(.+?)\)\n(.+)\Z/m #two table representations should be consolidated as one m1,tbl,hd=$1,$2,true when /^```[ ]+table\((.+?)\)\n(.+)\Z/m #two table representations should be consolidated as one m1,tbl,hd=$1,$2,false else nil end tbl,tags=extract_tags(tbl) col=m1.scan(/\d+/) rws=tbl.split(/\n/) rows='' rws.each do |r| r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}") rows += r + Mx[:tc_c] end h={ head_: hd, cols: col.length, widths: col, obj: rows, idx: idx, tags: tags, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(h) \ unless h.nil? tuned_file << t_o h={ is_for: :table, obj: '', sym: :table_close, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) t_o elsif t_o=~/^\{table\((?:.*?\bh;)?/ m1,m2,hd=nil,nil,nil h=case t_o when /\{table\(.*?\bh;\s+(.+?)\)\s*\}\n(.+)\Z/m #two table representations should be consolidated as one m1,tbl,hd=$1,$2,true when /\{table\((.+?)\)\s*\}\n(.+)\Z/m #two table representations should be consolidated as one m1,tbl,hd=$1,$2,false else nil end tbl,tags=extract_tags(tbl) col=m1.scan(/\d+/) rws=tbl.split(/\n/) rows='' rws.each do |r| r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}") rows += r + Mx[:tc_c] end h={ head_: hd, cols: col.length, widths: col, obj: rows, idx: idx, tags: tags, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(h) \ unless h.nil? tuned_file << t_o h={ is_for: :table, obj: '', sym: :table_close, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) t_o end ## depreciated markup, code should work for new markup after removal { elsif t_o =~/^(?:table\{|```[ ]+table|\{table)[ ~]/ puts "WARNING document using depreciated markup for tables" puts "use table([table attributes]) instead:" puts "table(){" puts "``` table()" puts "{table()}" @num_id[:table] +=1 h={ is_for: :table, obj: '', sym: :table_open, num: @num_id[:table], } ins_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) tuned_file << ins_o if t_o=~/^table\{(?:~h)?\s+/ @per.table=:curls @rows='' case t_o when /table\{~h\s+c(\d+);\s+(.+)/ cols=$1 col=$2.scan(/\d+/) heading=true when /table\{\s+c(\d+);\s+(.+)/ cols=$1 col=$2.scan(/\d+/) heading=false end @h={ head_: heading, cols: cols, widths: col, idx: idx, } elsif t_o=~/^```[ ]+table(?:~h)?\s+c\d+/ @per.table=:tics @rows='' case t_o when /^```[ ]+table~h\s+c(\d+);\s+(.+)/ cols=$1 col=$2.scan(/\d+/) heading=true when /^```[ ]+table\s+c(\d+);\s+(.+)/ cols=$1 col=$2.scan(/\d+/) heading=false end @h={ head_: heading, cols: cols, widths: col, idx: idx, } elsif t_o=~/^\{table(?:~h)?(?:\s+\d+;?)?\}\n.+\Z/m m1,m2,hd=nil,nil,nil tbl=/^\{table(?:~h)?(?:\s+\d+;?)?\}\n(.+)\Z/m.match(t_o)[1] hd=((t_o =~/^\{table~h/) ? true : false) tbl,tags=extract_tags(tbl) rws=tbl.split(/\n/) rows='' cols=nil rws.each do |r| cols=(cols ? cols : (r.scan('|').length) +1) r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}") rows += r + Mx[:tc_c] end col=[] if t_o =~/^\{table(?:~h)?\s+(\d+);?\}/ #width of col 1 given as %, usually when wider than rest that are even c1=$1.to_i width=(100 - c1)/(cols - 1) col=[ c1 ] (cols - 1).times { col << width } else #all columns of equal width width=100.00/cols cols.times { col << width } end h={ head_: hd, cols: cols, widths: col, obj: rows, idx: idx, tags: tags, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(h) \ unless h.nil? tuned_file << t_o h={ is_for: :table, obj: '', sym: :table_close, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) t_o elsif t_o=~/^```[ ]+table(?:~h)?\s+/ m1,m2,hd=nil,nil,nil h=case t_o when /^```[ ]+table~h\s+(.+?)\n(.+)\Z/m #two table representations should be consolidated as one m1,tbl,hd=$1,$2,true when /^```[ ]+table\s+(.+?)\n(.+)\Z/m #two table representations should be consolidated as one m1,tbl,hd=$1,$2,false else nil end tbl,tags=extract_tags(tbl) col=m1.scan(/\d+/) rws=tbl.split(/\n/) rows='' rws.each do |r| r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}") rows += r + Mx[:tc_c] end h={ head_: hd, cols: col.length, widths: col, obj: rows, idx: idx, tags: tags, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(h) \ unless h.nil? tuned_file << t_o h={ is_for: :table, obj: '', sym: :table_close, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) t_o elsif t_o=~/^\{table(?:~h)?\s+/ m1,m2,hd=nil,nil,nil h=case t_o when /\{table~h\s+(.+?)\}\n(.+)\Z/m #two table representations should be consolidated as one m1,tbl,hd=$1,$2,true when /\{table\s+(.+?)\}\n(.+)\Z/m #two table representations should be consolidated as one m1,tbl,hd=$1,$2,false else nil end tbl,tags=extract_tags(tbl) col=m1.scan(/\d+/) rws=tbl.split(/\n/) rows='' rws.each do |r| r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}") rows += r + Mx[:tc_c] end h={ head_: hd, cols: col.length, widths: col, obj: rows, idx: idx, tags: tags, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(h) \ unless h.nil? tuned_file << t_o h={ is_for: :table, obj: '', sym: :table_close, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) t_o ## } depreciated markup, code should (continue to) work for new markup after removal, # when removing depreciated markup check only pass-through for new table attributes format # table(.+?){ ``` table(.+?) {table(.+?)} formats end end t_o end if @per.table==:curls or @per.table==:tics if (@per.table==:curls \ and t_o =~/^\}table/) \ or (@per.table==:tics \ and t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) @per.table=:off headings,columns,widths,idx=@h[:head_],@h[:cols],@h[:widths],@h[:idx] @h={ head_: headings, cols: columns, widths: widths, idx: idx, obj: @rows, } t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(@h) tuned_file << t_o @h,@rows=nil,'' h={ is_for: :table, obj: '', sym: :table_close, num: @num_id[:table], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) t_o else if t_o.is_a?(String) \ and t_o !~/^(?:table\{|```[ ]+table)/ t_o=t_o.gsub(/^\n+/m,''). gsub(/\n+/m,"#{Mx[:tc_p]}") @rows += t_o + Mx[:tc_c] end t_o=nil end end if @per.code==:curls \ or @per.code==:tics if (@per.code==:curls \ && t_o =~/^\}code/) \ or (@per.code==:tics \ && t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/m) @per.code=:off if @tuned_code[-1] @tuned_code[-1]. gsub!(/\s*(?:#{Mx[:br_line]}|#{Mx[:br_nl]})\s*\Z/m,'') end obj=@tuned_code.join("\n") tags=[] h={ obj: obj, idx: idx, syntax: @per.lngsyn, tags: tags, num: @num_id[:code_block], number_: @codeblock_numbered, } @per.lngsyn=:txt t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.code(h) @tuned_code=[] tuned_file << t_o h={ is_for: :code, obj: '', sym: :code_close, num: @num_id[:code_block], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) end if (@per.code==:curls \ || @per.code==:tics) \ and t_o.is_a?(String) sub_array=t_o.dup + "#{Mx[:br_nl]}" @line_mode=[] sub_array.scan(/.+/) {|w| @line_mode << w if w =~/[\S]+/} t_o=SiSU_AO_DocumentStructureExtract::Build.new(@md,@line_mode).build_lines(:code).join @tuned_code << t_o t_o=nil end elsif (@per.poem==:curls \ || @per.poem==:tics) \ or (@per.box==:curls \ || @per.box==:tics) \ or (@per.group==:curls \ || @per.group==:tics) \ or (@per.block==:curls \ || @per.block==:tics) \ or (@per.alt==:curls \ || @per.alt==:tics) \ or (@per.quote==:open \ && t_o =~/`:quote_close`/m) #not if (@per.poem==:curls \ && t_o =~/^\}poem$/m) \ or (@per.poem==:tics \ && t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) @per.poem=:off h={ is_for: :poem, obj: '', idx: idx, sym: :poem_close, num: @num_id[:poem], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) elsif (@per.box==:curls \ && t_o =~/^\}box/) \ or (@per.box==:tics \ && t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) @per.box=:off obj,tags=extract_tags(@tuned_block.join("\n")) h={ obj: obj, idx: idx, tags: tags, num: @num_id[:box], } @tuned_block=[] t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.box(h) tuned_file << t_o h={ is_for: :box, obj: '', idx: idx, sym: :box_close, num: @num_id[:box], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) elsif (@per.group==:curls \ && t_o =~/^\}group/) \ or (@per.group==:tics \ && t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) @per.group=:off obj,tags=extract_tags(@tuned_block.join("\n")) h={ obj: obj, idx: idx, tags: tags, num: @num_id[:group], } @tuned_block=[] t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.group(h) tuned_file << t_o h={ is_for: :group, obj: '', sym: :group_close, num: @num_id[:group], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) elsif (@per.block==:curls \ && t_o =~/^\}block/) \ or (@per.block==:tics \ && t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) @per.block=:off obj,tags=extract_tags(@tuned_block.join("\n")) h={ obj: obj, idx: idx, tags: tags, num: @num_id[:block], } @tuned_block=[] t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.block(h) tuned_file << t_o h={ is_for: :block, obj: '', sym: :block_close, num: @num_id[:block], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) elsif (@per.alt==:curls \ && t_o =~/^\}alt/) \ or (@per.alt==:tics \ && t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) @per.alt=:off obj,tags=extract_tags(@tuned_block.join("\n")) h={ obj: obj, idx: idx, tags: tags, num: @num_id[:alt], } t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.alt(h) @tuned_block=[] tuned_file << t_o h={ is_for: :alt, obj: '', sym: :alt_close, num: @num_id[:alt], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) elsif @per.quote==:open \ and t_o =~/`:quote_close`/m @per.quote=:off h={ is_for: :quote, idx: idx, obj: '', sym: :quote_close, num: @num_id[:quote], } t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h) elsif @per.quote==:open t_o,tags=extract_tags(t_o) h={ indent: 1, obj: t_o, idx: idx, note_: note, image_: image, tags: tags, quote: quotes?, } SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) end if (@per.poem==:curls \ || @per.poem==:tics) \ or (@per.group==:curls \ || @per.group==:tics) \ or (@per.block==:curls \ || @per.block==:tics) \ or (@per.alt==:curls \ || @per.alt==:tics) \ and t_o =~/\S/ \ and t_o !~/^(?:\}(?:verse|code|box|alt|group|block)|(?:verse|code(?:\.[a-z][0-9a-z_]+)?|box(?:\.[a-z_]+)?|alt|group|block)\{)/ \ and t_o !~/^```[ ]+(?:code(?:\.[a-z][0-9a-z_]+)?|box(?:\.[a-z_]+)?|poem|alt|group|block)|^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/ # fix logic sub_array=t_o.dup @line_mode=sub_array.scan(/.+/) type=if @per.poem==:curls or @per.poem==:tics t_o=SiSU_AO_DocumentStructureExtract::Build.new(@md,@line_mode).build_lines(type).join poem=t_o.split(/\n\n/) poem.each do |v| v=v.gsub(/\n/m,"#{Mx[:br_nl]}\n") obj,tags=extract_tags(v) h={ obj: obj, tags: tags, num: @num_id[:poem], } t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.verse(h) tuned_file << t_o end :poem else :group end end @verse_count+=1 if @per.poem==:curls or @per.poem==:tics end if @per.code==:off if @per.poem==:curls or @per.poem==:tics \ or @per.box==:curls or @per.box==:tics \ or @per.group==:curls or @per.group==:tics \ or @per.block==:curls or @per.block==:tics \ or @per.alt==:curls or @per.alt==:tics \ or (@per.quote==:open and t_o =~/`:quote_close`/m) if t_o.is_a?(String) t_o=t_o.gsub(/\n/m,"#{Mx[:br_nl]}"). gsub(/[ ][ ]/m,"#{Mx[:nbsp]*2}"). gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}") t_o=t_o + Mx[:br_nl] if t_o =~/\S+/ elsif t_o.is==:group \ || t_o.is==:block \ || t_o.is==:alt \ || t_o.is==:box \ || t_o.is==:verse t_o.obj=t_o.obj.gsub(/\n/m,"#{Mx[:br_nl]}"). gsub(/[ ][ ]/m,"#{Mx[:nbsp]*2}"). gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}") end @tuned_block << t_o if t_o =~/\S+/ else tuned_file << t_o end else tuned_file << t_o end end tuned_file end def identify_parts tuned_file=[] @tuned_block,@tuned_code=[],[] @@counter,@verse_count=0,0 @num_id={ code_block: 0, poem: 0, box: 0, block: 0, group: 0, alt: 0, quote: 0, table: 0, } @metadata={} if @md.flag_auto_biblio \ or @md.flag_biblio @data,bibliography=SiSU_AO_Appendices::Bibliography.new(@md,@data).biblio_extraction end if @md.flag_glossary @data,glossary=SiSU_AO_Appendices::Glossary.new(@md,@data).glossary_extraction end tuned_file=extract_structure_loop(@data,tuned_file) if @md.flag_endnotes tuned_file << @pb h={ ln: 1, lc: 1, obj: 'Endnotes', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ ln: 4, lc: 2, obj: 'Endnotes', name: 'endnotes', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ obj: 'Endnotes' } end if @md.flag_glossary tuned_file << @pb h={ ln: 1, lc: 1, obj: 'Glossary', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ ln: 4, lc: 2, obj: 'Glossary', name: 'glossary', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ obj: 'Glossary' } if glossary.length > 0 tuned_file=extract_structure_loop(glossary,tuned_file) end end if @md.flag_auto_biblio tuned_file << @pb h={ ln: 1, lc: 1, obj: 'References', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ ln: 4, lc: 2, obj: 'Bibliography', name: 'biblio', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ obj: 'Bibliography' } citenumber=0 bibliography.each do |cite| citenumber +=1 if cite.is_a?(Hash) h={ obj: cite[:obj], #obj: %{[#{citenumber}] } + cite[:obj], tags: [cite[:id]], hang: 0, indent: 2, ocn_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) end elsif @md.flag_biblio tuned_file << @pb h={ ln: 1, lc: 1, obj: 'References', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ ln: 4, lc: 2, obj: 'Bibliography', name: 'biblio', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ obj: 'Bibliography' } if not bibliography.nil? \ and bibliography.length > 0 tuned_file=extract_structure_loop(bibliography,tuned_file) else tuned_file, citations = SiSU_AO_Appendices::Citations.new(@md,tuned_file).songsheet # ao_appendices.rb citenumber=0 citations.compact.each do |c| citenumber +=1 if c.is_a?(Hash) if c[:is]==:book h={ obj: %{#{c[:author]}. /{#{c[:publication]}}/ (#{c[:year]})}, #obj: %{[#{citenumber}] *{#{c[:author]}}* /{#{c[:publication]}}/ (#{c[:year]})}, hang: 0, indent: 2, ocn_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) elsif c[:is]==:article h={ obj: %{#{c[:author]}. /{"#{c[:title]}"}/ #{c[:publication]} editor #{c[:editor]} (#{c[:year]})}, #obj: %{[#{citenumber}] *{#{c[:author]}}* /{"#{c[:title]}"}/ #{c[:publication]} editor #{c[:editor]} (#{c[:year]})}, hang: 0, indent: 2, ocn_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) end end end end if @md.book_idx tuned_file << @pb h={ ln: 1, lc: 1, obj: 'Index', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ ln: 4, lc: 2, obj: 'Index', name: 'book_index', autonum_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ obj: 'Index' } end tuned_file << @pb if @make.build.metadata? h={ ln: 1, lc: 1, obj: 'Metadata', autonum_: false, ocn_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) h={ ln: 4, lc: 2, obj: 'SiSU Metadata, document information', name: 'metadata', autonum_: false, ocn_: false, } tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h) end h={ obj: 'eof', } meta=SiSU_AO_DocumentStructure::ObjectMetadata.new.metadata(@metadata) [tuned_file,meta,bibliography,glossary] end def table_rows_and_columns_array(table_str) table=[] table_str.split(/#{Mx[:tc_c]}/).each do |table_row| table_row_with_columns=table_row.split(/#{Mx[:tc_p]}/) table << table_row_with_columns end table end def meta_heading(h) h={ lv: h[:lv], ln: h[:ln], name: h[:name], obj: h[:obj], ocn: '0', } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h) end def meta_para(str) h={ obj: str, ocn_: false, } SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h) end def build_lines(type=:none) lines=@data lines.each.map do |line| line=if line =~/\S/ \ and line !~/^(?:code(?:\.[a-z][0-9a-z_]+)?\{|\}code)/ \ and line !~/^(?:```[ ]+code(?:\.[a-z][0-9a-z_]+)?|```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$)/ \ and not line.is_a?(Hash) #watch @@counter+=1 if @per.code==:curls or @per.code==:tics line=line.gsub(/\s\s/,"#{Mx[:nbsp]*2}"). gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}") line=line.gsub(/^/,"#{Mx[:gr_o]}codeline#{Mx[:gr_c]}") if type==:code # REMOVE try sort for texpdf special case line=if line =~/(?:https?|file|ftp):\/\/\S+$/ line.gsub(/\s*$/," #{Mx[:br_nl]}") else line.gsub(/\s*$/,"#{Mx[:br_nl]}") #unless type=='code' end elsif line =~/^\s*$/ line.gsub(/\s*$/,"#{Mx[:br_nl]}") else line end line end end end class Structure # this must happen early def initialize(md) @md=md end def structure(data) data.compact.each do |dob| structure_markup(dob) end end def structure_markup(dob) #build structure where structure provided only in meta header dob=if dob.is==:para \ && (((dob.hang !~/[1-9]/) && (dob.indent !~/[1-9]/)) \ || (dob.hang != dob.indent)) \ and not dob.bullet_ dob=case dob.obj when /^#{@md.lv0}/ h={ is: :heading, lv: 'A', ln: 0, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) when /^#{@md.lv1}/ h={ is: :heading, lv: 'B', ln: 1, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) when /^#{@md.lv2}/ h={ is: :heading, lv: 'C', ln: 2, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) when /^#{@md.lv3}/ h={ is: :heading, lv: 'D', ln: 3, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) when /^#{@md.lv4}/ h={ is: :heading, lv: '1', ln: 4, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) when /^#{@md.lv5}/ h={ is: :heading, lv: '2', ln: 5, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) when /^#{@md.lv6}/ h={ is: :heading, lv: '3', ln: 6, } SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob) else dob end else dob end dob end end class OCN def initialize(md,data,fnx,process) @md,@data,@fnx,@process=md,data,fnx,process end def structure_info def lv %w[A~ B~ C~ D~ 1 2 3 4] end def possible_parents(child) case child when /A~/ then 'none' when /B~/ then 'A~' when /C~/ then 'B~' when /D~/ then 'C~' when /1/ then 'A~, B~, C~, D~' when /2/ then '1' when /3/ then '2' when /4/ then '3' end end def possible_children(parent) case parent when /A~/ then 'B~, 1' when /B~/ then 'C~, 1' when /C~/ then 'D~, 1' when /D~/ then '1' when /1/ then '2' when /2/ then '3' when /3/ then '4' when /4/ then 'none' end end self end def document_structure_check_info(node,node_parent,status=:ok) node_ln=/^([0-7])/.match(node)[1].to_i node_parent_ln=/^([0-7])/.match(node_parent)[1].to_i if status==:error \ or @md.opt.act[:maintenance][:set]==:on puts %{node: #{node}, parent node: #{node_parent} #{status.upcase}} if status==:error node_ln=/^([0-7])/.match(node)[1].to_i node_parent_ln=/^([0-7])/.match(node_parent)[1].to_i STDERR.puts %{current level: #{structure_info.lv[node_ln]} (possible parent levels: #{structure_info.possible_parents(structure_info.lv[node_ln])}) parent level: #{structure_info.lv[node_parent_ln]} (possible child levels: #{structure_info.possible_children(structure_info.lv[node_parent_ln])}) SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}"} if @md.opt.act[:no_stop][:set]==:on $process_document = :skip else exit end end end end def warning_incorrect_parent_level_or_level(txt) puts %{ERROR. There is an error in markup of heading levels either here or in the parent heading. The current header reads: "#{txt}" has incorrect level and/or parent level --} end def required_headers_present? if @process == :complete unless (defined? @md.title \ and @md.title.full) STDERR.puts %{required header missing: @title: SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}" } if @md.opt.act[:no_stop][:set]==:on $process_document = :skip else exit end end unless (defined? @md.creator.author \ and @md.creator.author) STDERR.puts %{required header missing: @creator: :author: anonymous? SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}" } if @md.opt.act[:no_stop][:set]==:on $process_document = :skip else exit end end end end def ocn #and auto segment numbering increment required_headers_present? data=@data @o_array=[] node=ocn=ocn_dv=ocn_sp=ocnh=ocnh0=ocnh1=ocnh2=ocnh3=ocnh4=ocnh5=ocnh6=ocnh7=ocno=ocnp=ocnt=ocnc=ocng=ocni=ocnu=0 # h heading, o other, t table, g group, i image regex_exclude_ocn_and_node = /#{Rx[:meta]}|^@\S+?:\s|^4~endnotes|^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^\^~ |<:e[:_]\d+?>|^<:\#|<:- |<[:!]!4|
if dob.is==:heading @ln=ln=case dob.lv when 'A' then 0 when 'B' then 1 when 'C' then 2 when 'D' then 3 when '1' then 4 when '2' then 5 when '3' then 6 when '4' then 7 when '5' then 8 when '6' then 9 end end if not dob.obj =~/~#|-#/ ocn+=1 end if @process == :complete \ or (@fnx == @md.opt.fns \ && @md.opt.fns =~/.sst$/) if dob.is==:heading \ and (ln.to_s =~/^[0-9]/ \ or ln.to_s =~@md.lv0 \ or ln.to_s =~@md.lv1 \ or ln.to_s =~@md.lv2 \ or ln.to_s =~@md.lv3 \ or ln.to_s =~@md.lv4 \ or ln.to_s =~@md.lv5 \ or ln.to_s =~@md.lv6 \ or ln.to_s =~@md.lv7) if not dob.obj =~/~#|-#/ ocnh+=1 end if ln==0 \ or ln=~@md.lv0 @lev_occurences[:a] += 1 if not dob.obj =~/~#|-#/ ocn_flag=true ocnh0+=1 #heading node0="0:#{ocnh0};#{ocn}" else #document_structure_check_info(node0,node0,:error) #fix ocn_flag=false node0="0:0;0" end document_structure_check_info(node0,node0) @collapsed_lv0=0 collapsed_level=@collapsed_lv0 node,ocn_sp,parent=node0,"h#{ocnh}",'ROOT' elsif ln==1 \ or ln=~@md.lv1 @lev_occurences[:b] += 1 if not dob.obj =~/~#|-#/ ocn_flag=true ocnh1+=1 #heading node1="1:#{ocnh1};#{ocn}" else #document_structure_check_info(node0,node0,:error) #fix ocn_flag=false node1="1:0;0" end parent=if node0 document_structure_check_info(node1,node0) @collapsed_lv1=@collapsed_lv0+1 node0 else warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node0,node0,:error) node0 end collapsed_level=@collapsed_lv1 node,ocn_sp,parent=node1,"h#{ocnh}",node0 #FIX elsif ln==2 \ or ln=~@md.lv2 @lev_occurences[:c] += 1 if not dob.obj =~/~#|-#/ ocn_flag=true ocnh2+=1 node2="2:#{ocnh2};#{ocn}" else #document_structure_check_info(node0,node0,:error) #fix ocn_flag=false node2="2:0;0" end parent=if node1 document_structure_check_info(node2,node1) @collapsed_lv2=@collapsed_lv1+1 node1 else warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node2,node0,:error) node0 end collapsed_level=@collapsed_lv2 node,ocn_sp=node2,"h#{ocnh}" elsif ln==3 \ or ln=~@md.lv3 @lev_occurences[:d] += 1 if not dob.obj =~/~#|-#/ ocn_flag=true ocnh3+=1 node3="3:#{ocnh3};#{ocn}" else #document_structure_check_info(node0,node0,:error) #fix ocn_flag=false node3="3:0;0" end parent=if node2 document_structure_check_info(node3,node2) @collapsed_lv3=@collapsed_lv2+1 node2 elsif node1 warning_incorrect_parent_level_or_level(dob.obj) puts %{parent is :A~ & this level #{dob.lv} either parent should be level :B~ or this level should be level :B~ rather than #{dob.lv}} document_structure_check_info(node3,node1,:error) @collapsed_lv3=@collapsed_lv1+1 node1 else document_structure_check_info(node3,node0,:error) warning_incorrect_parent_level_or_level(dob.obj) node0 end collapsed_level=@collapsed_lv3 node,ocn_sp=node3,"h#{ocnh}" elsif ln==4 \ or ln=~@md.lv4 @lev_occurences[:l1] += 1 if not dob.obj =~/~#|-#/ ocn_flag=true ocnh4+=1 node4="4:#{ocnh4};#{ocn}" else ocn_flag=false node4="4:0;0" end parent=if node3 document_structure_check_info(node4,node3) @collapsed_lv4=@collapsed_lv3+1 node3 elsif node2 document_structure_check_info(node4,node2) @collapsed_lv4=@collapsed_lv2+1 node2 elsif node1 document_structure_check_info(node4,node1) @collapsed_lv4=@collapsed_lv1+1 node1 elsif node0 document_structure_check_info(node4,node0) @collapsed_lv4=@collapsed_lv0+1 node0 else warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node4,node0,:error) node0 end collapsed_level=@collapsed_lv4 node,ocn_sp=node4,"h#{ocnh}" elsif ln==5 \ or ln=~@md.lv5 @lev_occurences[:l2] += 1 if not dob.obj =~/~#|-#/ ocn_flag=true ocnh5+=1 node5="5:#{ocnh5};#{ocn}" else ocn_flag=false node5="5:0;0" end parent=if node4 document_structure_check_info(node5,node4) @collapsed_lv5=@collapsed_lv4+1 node4 elsif node3 warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node5,node3,:error) @collapsed_lv5=@collapsed_lv3+1 node3 elsif node2 warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node5,node2,:error) @collapsed_lv5=@collapsed_lv2+1 node2 elsif node1 warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node5,node1,:error) @collapsed_lv5=@collapsed_lv1+1 node1 else warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node5,node0,:error) node0 end collapsed_level=@collapsed_lv5 node,ocn_sp=node5,"h#{ocnh}" elsif ln==6 \ or ln=~@md.lv6 @lev_occurences[:l3] += 1 if not dob.obj =~/~#|-#/ ocn_flag=true ocnh6+=1 node6="6:#{ocnh6};#{ocn}" else ocn_flag=false node6="6:0;0" end parent=if node5 document_structure_check_info(node6,node5) @collapsed_lv6=@collapsed_lv5+1 node5 elsif node4 warning_incorrect_parent_level_or_level(dob.obj) puts "parent is level #4 (1~) & this level ##{dob.ln} (#{dob.lv}~) either parent should be level #5 (2~) or this level should be #5 (2~) rather ##{dob.ln} (#{dob.lv}~)" document_structure_check_info(node6,node4,:error) @collapsed_lv6=@collapsed_lv4+1 node4 elsif node3 warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node6,node3,:error) @collapsed_lv6=@collapsed_lv3+1 node3 elsif node2 warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node6,node2,:error) @collapsed_lv6=@collapsed_lv2+1 node2 elsif node1 warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node6,node1,:error) @collapsed_lv6=@collapsed_lv1+1 node1 else warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node6,node0,:error) node0 end collapsed_level=@collapsed_lv6 node,ocn_sp=node6,"h#{ocnh}" elsif ln==7 \ or ln=~@md.lv7 @lev_occurences[:l4] += 1 if not dob.obj =~/~#|-#/ ocn_flag=true ocnh7+=1 node7="7:#{ocnh7};#{ocn}" else ocn_flag=false node7="7:0;0" end parent=if node6 document_structure_check_info(node7,node6) @collapsed_lv7=@collapsed_lv6+1 node5 elsif node5 warning_incorrect_parent_level_or_level(dob.obj) puts "parent is level #5 (2~) & this level ##{dob.ln} (#{dob.lv}~) either parent should be level #6 (3~) or this level should be #6 (3~) rather ##{dob.ln} (#{dob.lv}~)" document_structure_check_info(node7,node5,:error) @collapsed_lv6=@collapsed_lv5+1 node5 elsif node4 warning_incorrect_parent_level_or_level(dob.obj) puts "parent is level #4 (1~) & this level ##{dob.ln} (#{dob.lv}~) either parent should be level 6~ or this level should be #6 (3~) rather ##{dob.ln} (#{dob.lv}~)" document_structure_check_info(node7,node4,:error) @collapsed_lv6=@collapsed_lv4+1 node4 elsif node3 warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node7,node3,:error) @collapsed_lv6=@collapsed_lv3+1 node3 elsif node2 warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node7,node2,:error) @collapsed_lv6=@collapsed_lv2+1 node2 elsif node1 warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node7,node1,:error) @collapsed_lv6=@collapsed_lv1+1 node1 else warning_incorrect_parent_level_or_level(dob.obj) document_structure_check_info(node7,node0,:error) node0 end collapsed_level=@collapsed_lv7 node,ocn_sp=node7,"h#{ocnh}" end else unless @lev_occurences[:l1] > 0 STDERR.puts %{Substantive text objects must follow a level 1~ heading and there are none at this point in processing: #{@lev_occurences[:l1]} SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}"} puts dob.obj #.gsub(/^(.{1,80})/,'"\1"') exit end unless @ln >= 4 lev=case @ln when 0 then 'A' when 1 then 'B' when 2 then 'C' when 3 then 'D' when 4 then '1' when 5 then '2' when 6 then '3' when 7 then '4' when 8 then '5' when 9 then '6' end STDERR.puts %{Substantive text objects must follow a level 1~ 2~ or 3~ heading: #{lev}~ SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}"} puts dob.obj.gsub(/^(.{1,80})/,'"\1"') if @md.opt.act[:no_stop][:set]==:on $process_document = :skip break else exit end end if not dob.obj =~/~#|-#/ ocn_flag=true else ocn_flag=false end ocno+=1 if dob.is==:table ocnt+=1 ocn_sp,parent="t#{ocnt}",node elsif dob.is==:code ocnc+=1 ocn_sp,parent="c#{ocnc}",node elsif dob.is==:group \ || dob.is==:box \ || dob.is==:block \ || dob.is==:alt \ || dob.is==:verse ocng+=1 #group, poem ocn_sp,parent="g#{ocng}",node elsif dob.is==:image #check ocni+=1 ocn_sp,parent="i#{ocni}",node else ocnp+=1 #paragraph ocn_sp,parent="p#{ocnp}",node end end end if dob.is==:heading if ocn_flag==true dob.ln,dob.node,dob.ocn,dob.ocn_,dob.odv,dob.osp,dob.parent,dob.lc= ln, node, ocn, ocn_flag, ocn_dv,ocn_sp, parent, collapsed_level else ocnu+=1 heading_use=:ok if dob.obj=~/#{Mx[:pa_non_object_no_heading]}/ dob.obj=dob.obj.gsub(/#{Mx[:pa_non_object_no_heading]}/,'') heading_use=:ok elsif dob.obj=~/#{Mx[:pa_non_object_dummy_heading]}/ dob.obj=dob.obj.gsub(/#{Mx[:pa_non_object_dummy_heading]}/,'') heading_use=:dummy end dob.ln,dob.node,dob.ocn,dob.ocn_,dob.use_, dob.odv,dob.osp,dob.parent,dob.lc= ln, node, nil, ocn_flag,heading_use,ocn_dv, ocn_sp, parent, collapsed_level end else if dob.of !=:meta \ && dob.of !=:comment \ && dob.of !=:layout if ocn_flag == true dob.ocn,dob.ocn_,dob.odv,dob.osp,dob.parent= ocn, ocn_flag,ocn_dv, ocn_sp, parent else ocnu+=1 dob.obj=dob.obj.gsub(/#{Mx[:fa_o]}[~-]##{Mx[:fa_c]}/,'') if dob.obj ocn_dv,ocn_sp="u#{ocnu}","u#{ocnu}" dob.ocn,dob.ocn_,dob.odv,dob.osp,dob.parent= nil, ocn_flag,ocn_dv, ocn_sp, parent end end end h else dob end if dob.is==:code \ || dob.is==:verse \ || dob.is==:alt \ || dob.is==:box \ || dob.is==:group \ || dob.is==:block dob.obj=dob.obj.gsub(/\n+/,"\n") #newlines taken out end @o_array << dob end if @process == :complete \ or (@fnx == @md.opt.fns \ && @md.opt.fns =~/.sst$/) unless @lev_occurences[:a] == 1 STDERR.puts %{The number of level A~ in this document: #{@lev_occurences[:a]} There must be one level A~ (no more and no less) SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}"} if @md.opt.act[:no_stop][:set]==:on $process_document = :skip else exit end end unless @lev_occurences[:l1] > 0 STDERR.puts %{The number of level 1~ in this document: #{@lev_occurences[:l1]} There must be at least one level 1~ (and as many as required) SKIPPED processing file: [#{@md.opt.lng}] "#{@md.fns}"} if @md.opt.act[:no_stop][:set]==:on $process_document = :skip else exit end end end @o_array end end class XML def initialize(md,data) @data,@md=data,md end def dom @s=[ 'A', 'B', 'C', 'D', '1', '2', '3' ] tuned_file=structure_build tuned_file end def spaces Ax[:spaces] end def structure_build data=@data tuned_file=[] hs=[0,false,false,false] t={ lv: @s[0], status: :open, } tuned_file << tags(t) if @md.opt.act[:verbose_plus][:set]==:on puts "\nXML sisu structure outline --->\n" puts "<#{@s[0]}>" end data.each_with_index do |o,i| if o.is==:heading \ || o.is==:heading_insert case o.ln when 0 tuned_file << tag_close(o.ln,hs) tuned_file << tag_open(o,@s) if @md.opt.act[:verbose_plus][:set]==:on puts_tag_close(o.ln,hs) puts_tag_open(o,@s) end hs=[0,true,false,false,false] when 1 tuned_file << tag_close(o.ln,hs) tuned_file << tag_open(o,@s) if @md.opt.act[:verbose_plus][:set]==:on puts_tag_close(o.ln,hs) puts_tag_open(o,@s) end hs=[1,true,true,false,false] when 2 tuned_file << tag_close(o.ln,hs) tuned_file << tag_open(o,@s) if @md.opt.act[:verbose_plus][:set]==:on puts_tag_close(o.ln,hs) puts_tag_open(o,@s) end hs=[2,true,true,true,false] when 3 tuned_file << tag_close(o.ln,hs) tuned_file << tag_open(o,@s) if @md.opt.act[:verbose_plus][:set]==:on puts_tag_close(o.ln,hs) puts_tag_open(o,@s) end hs=[3,true,true,true,true] when 4 tuned_file << tag_close(o.ln,hs) tuned_file << tag_open(o,@s) if @md.opt.act[:verbose_plus][:set]==:on puts_tag_close(o.ln,hs) puts_tag_open(o,@s) end hs[0]=4 when 5 tuned_file << tag_close(o.ln,hs) tuned_file << tag_open(o,@s) if @md.opt.act[:verbose_plus][:set]==:on puts_tag_close(o.ln,hs) puts_tag_open(o,@s) end hs[0]=5 when 6 tuned_file << tag_close(o.ln,hs) tuned_file << tag_open(o,@s) if @md.opt.act[:verbose_plus][:set]==:on puts_tag_close(o.ln,hs) puts_tag_open(o,@s) end hs[0]=6 end end tuned_file << o end if @md.opt.act[:verbose_plus][:set]==:on puts_tag_close(0,hs) end tuned_file << tag_close(0,hs) tuned_file=tuned_file.flatten end def tags(o) tag=(o[:status]==:open) \ ? %{<#{o[:lv]} id="#{o[:node]}">} : "" ln=case o[:lv] when 'A' then 0 when 'B' then 1 when 'C' then 2 when 'D' then 3 when '1' then 4 when '2' then 5 when '3' then 6 when '4' then 7 when '5' then 8 when '6' then 9 end h={ tag: tag, node: o[:node], lv: o[:lv], ln: ln, status: o[:status], } SiSU_AO_DocumentStructure::ObjectStructure.new.xml_dom(h) #downstream code utilise else ignore like comments end def tag_open(o,tag) t={ lv: tag[o.ln], node: o.node, status: :open } t_o=tags(t) t_o end def tag_close(lev,hs) ary=[] case hs[0] when 0 if (lev <= 0) and hs[0] t={ lv: @s[0], status: :close, } ary << tags(t) end when 1 if (lev <= 1) and hs[1] t={ lv: @s[1], status: :close, } ary << tags(t) end if (lev==0) t={ lv: @s[0], status: :close, } ary << tags(t) end when 2 if (lev <= 2) and hs[2] t={ lv: @s[2], status: :close, } ary << tags(t) end if (lev <= 1) and hs[1] t={ lv: @s[1], status: :close, } ary << tags(t) end if (lev==0) t={ lv: @s[0], status: :close, } ary << tags(t) end when 3 if (lev <= 3) and hs[3] t={ lv: @s[3], status: :close, } ary << tags(t) end if (lev <= 2) and hs[2] t={ lv: @s[2], status: :close, } ary << tags(t) end if (lev <= 1) and hs[1] t={ lv: @s[1], status: :close, } ary << tags(t) end if (lev==0) t={ lv: @s[0], status: :close, } ary << tags(t) end when 4 if (lev <= 4) t={ lv: @s[4], status: :close, } ary << tags(t) end if (lev <= 3) and hs[3] t={ lv: @s[3], status: :close, } ary << tags(t) end if (lev <= 2) and hs[2] t={ lv: @s[2], status: :close, } ary << tags(t) end if (lev <= 1) and hs[1] t={ lv: @s[1], status: :close, } ary << tags(t) end if (lev==0) t={ lv: @s[0], status: :close, } ary << tags(t) end when 5 if (lev <= 5) t={ lv: @s[5], status: :close, } ary << tags(t) end if (lev <= 4) t={ lv: @s[4], status: :close, } ary << tags(t) end if (lev <= 3) and hs[3] t={ lv: @s[3], status: :close, } ary << tags(t) end if (lev <= 2) and hs[2] t={ lv: @s[2], status: :close, } ary << tags(t) end if (lev <= 1) and hs[1] t={ lv: @s[1], status: :close, } ary << tags(t) end if (lev==0) t={ lv: @s[0], status: :close, } ary << tags(t) end when 6 if (lev <= 6) t={ lv: @s[6], status: :close, } ary << tags(t) end if (lev <= 5) t={ lv: @s[5], status: :close, } ary << tags(t) end if (lev <= 4) t={ lv: @s[4], status: :close, } ary << tags(t) end if (lev <= 3) and hs[3] t={ lv: @s[3], status: :close, } ary << tags(t) end if (lev <= 2) and hs[2] t={ lv: @s[2], status: :close, } ary << tags(t) end if (lev <= 1) and hs[1] t={ lv: @s[1], status: :close, } ary << tags(t) end if (lev==0) t={ lv: @s[0], status: :close, } ary << tags(t) end end ary end def puts_tag_open(o,tag) puts %{#{spaces*o.ln}<#{tag[o.ln]} id="#{o.node}">} end def puts_tag_close(lev,hs) case hs[0] when 0 #puts "#{spaces*0}" if (lev <= 0) and hs[0] puts "" if (lev==0) when 1 puts "#{spaces*1}" if (lev <= 1) and hs[1] puts "" if (lev==0) when 2 puts "#{spaces*2}" if (lev <= 2) and hs[2] puts "#{spaces*1}" if (lev <= 1) and hs[1] puts "" if (lev==0) when 3 puts "#{spaces*3}" if (lev <= 3) and hs[3] puts "#{spaces*2}" if (lev <= 2) and hs[2] puts "#{spaces*1}" if (lev <= 1) and hs[1] puts "" if (lev==0) when 4 puts "#{spaces*4}" if (lev <= 4) puts "#{spaces*3}" if (lev <= 3) and hs[3] puts "#{spaces*2}" if (lev <= 2) and hs[2] puts "#{spaces*1}" if (lev <= 1) and hs[1] puts "" if (lev==0) when 5 puts "#{spaces*5}" if (lev <= 5) puts "#{spaces*4}" if (lev <= 4) puts "#{spaces*3}" if (lev <= 3) and hs[3] puts "#{spaces*2}" if (lev <= 2) and hs[2] puts "#{spaces*1}" if (lev <= 1) and hs[1] puts "" if (lev==0) when 6 puts "#{spaces*6}" if (lev <= 6) puts "#{spaces*5}" if (lev <= 5) puts "#{spaces*4}" if (lev <= 4) puts "#{spaces*3}" if (lev <= 3) and hs[3] puts "#{spaces*2}" if (lev <= 2) and hs[2] puts "#{spaces*1}" if (lev <= 1) and hs[1] puts "" if (lev==0) end end end end __END__