# encoding: utf-8
=begin
* Name: SiSU
* Description: a framework for document structuring, publishing and search
* Author: Ralph Amissah
* Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Ralph Amissah,
All Rights Reserved.
* License: GPL 3 or later:
SiSU, a framework for document structuring, publishing and search
Copyright (C) Ralph Amissah
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see .
If you have Internet connection, the latest version of the GPL should be
available at these locations:
* SiSU uses:
* Standard SiSU markup syntax,
* Standard SiSU meta-markup syntax, and the
* Standard SiSU object citation numbering and system
* Hompages:
* Download:
* Git
* Ralph Amissah
** Description: document abstraction
=end
module SiSU_AO_DocumentStructureExtract
class Instantiate < SiSU_Param::Parameters::Instructions
@@flag={
ocn: :on,
code: :off,
lngsyn: :txt,
poem: :off,
block: :off,
box: :off,
group: :off,
alt: :off,
quote: :off,
table: :off,
table_to: :off,
}
def initialize
@@counter=@@column=@@columns=0
@@line_mode=''
end
end
class Build
@@flag={
ocn: :on,
code: :off,
lngsyn: :txt,
poem: :off,
block: :off,
box: :off,
group: :off,
alt: :off,
quote: :off,
table: :off,
table_to: :off,
}
def initialize(md,data)
@md,@data=md,data
SiSU_AO_DocumentStructureExtract::Instantiate.new
@pb=SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page])
@pbn=SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new])
@pbl=SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_line])
end
def ln_get(lv)
case lv
when /A/ then 0
when /B/ then 1
when /C/ then 2
when /D/ then 3
when /1/ then 4
when /2/ then 5
when /3/ then 6
when /4/ then 7
when /5/ then 8
when /6/ then 9
end
end
def image_test(str)
str=~/\{\s*\S+?\.png.+?\}https?:\/\/\S+/ \
? true
: false
end
def bullet_test(str)
(str=~/\*/) \
? true
: false
end
def quotes?
@@flag[:quote]==:open \
? true
: false
end
def hang_and_indent_test(str)
hang_indent=if str=~/^_([1-9])[^_]/
[$1,$1]
elsif str=~/^__([1-9])/
[0,$1]
elsif str=~/^_([0-9])_([0-9])/
[$1,$2]
else
[0,0]
end
hang,indent=hang_indent[0],hang_indent[1]
[hang,indent]
end
def hang_and_indent_def_test(str1,str2)
hang_indent=if str1=~/^_([1-9])[^_]/
[$1,$1]
elsif str1=~/^__([1-9])/
[0,$1]
elsif str1=~/^_([0-9])_([0-9])/
[$1,$2]
else
[0,0]
end
obj=if str2 =~/^(.+?)\s+\\\\(?:\s+|\n)/
str2.gsub(/^(.+?)(\s+\\\\(?:\s+|\n))/,
"#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}\\2")
else
str2.gsub(/^(.+?)\n/,
"#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}\n")
end
hang,indent=hang_indent[0],hang_indent[1]
[
hang,
indent,
obj,
]
end
def endnote_test?(str)
(str=~/~\{.+?\}~|~\[.+?\]~/) \
? true
: false
end
def extract_tags(str,nametag=nil)
tags=[]
if str.nil?
else
if str =~/(?:^|[ ])\*~([a-z0-9._-]+)(?=[ #{Mx[:br_nl]}]|$)/
str=str.gsub(/(^|[ ])\*~([a-z0-9._-]+)(?=[ #{Mx[:br_nl]}]|$)/i,
"\\1#{Mx[:tag_o]}\\2#{Mx[:tag_c]}").
gsub(/ [ ]+/i,' ')
tags=str.scan(/#{Mx[:tag_o]}(\S+?)#{Mx[:tag_c]}/).flatten.uniq
str=str.gsub(/[ ]?#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}[ ]?/,' ') #may be issues with spaces would leave one, but "code" blocks?
end
tags=nametag ? (tags << nametag) : tags
tags.each do |t|
t.gsub!(/[^a-z0-9._-]/,'')
end
end
[
str,
tags,
]
end
def rgx_idx_ocn_seg
@rgx_idx_ocn_seg=/(.+?)\s*[+](\d+)/
end
def construct_idx_array_and_hash(idxraw)
idx_array_raw=idxraw.scan(/[^;]+/)
idx_hash,idx_array,idx_lst={},[],[]
idx_array_raw.each do |idx|
idx=idx.strip
idx_lst=case idx
when /\S+?\s*:/
idx_couplet_tmp=[]
idx_couplet=idx.scan(/\s*[^:]+\s*/)
if idx_couplet[1] =~/[|]/
idx_couplet_tmp <<
idx_couplet[0] <<
idx_couplet[1].scan(/\s*[^|]+\s*/)
else
idx_couplet_tmp <<
idx_couplet[0] <<
[idx_couplet[1]]
end
idx_couplet=idx_couplet_tmp
else [idx]
end
term_nodes=[]
idx_lst.each do |term_node|
case term_node
when String
term_node=
term_node[0].chr.capitalize +
term_node[1,term_node.length]
term_node=(term_node =~/.+?[+]\d+/) \
? term_node
: (term_node + '+0')
term_nodes << term_node
use,plus=rgx_idx_ocn_seg.match(term_node)[1,2]
@use=use.strip
unless idx_hash[@use] \
and defined? idx_hash[@use]
idx_hash[@use]=
{ sub: [], plus: plus }
end
when Array
subterm_nodes=[]
term_node.each do |subterm_node|
subterm_node=(subterm_node =~/.+?[+]\d+/) \
? subterm_node
: (subterm_node + '+0')
subterm_nodes << subterm_node
sub,sub_plus=rgx_idx_ocn_seg.match(subterm_node)[1,2]
unless idx_hash[@use] \
and defined? idx_hash[@use]
idx_hash[@use]=
{ sub: [], plus: 0 }
end
idx_hash[@use][:sub] <<
{ sub.strip => { plus: sub_plus } }
end
term_nodes << subterm_nodes
end
end
idx_array << term_nodes
end
{
hash: idx_hash,
array: idx_array,
}
end
def identify_parts
tuned_file=[]
@tuned_block,@tuned_code=[],[]
@@counter,@verse_count=0,0
@num_id={
code_block: 0,
poem: 0,
box: 0,
group: 0,
alt: 0,
quote: 0,
table: 0,
}
@metadata={}
@data.each do |t_o|
if t_o =~/^--([+~-])[#]$/
h=case $1
when /[+]/
@@flag[:ocn]=:on
{
flag: :ocn_on,
}
when /[~]/
@@flag[:ocn]=:ocn_off_headings_keep
{
flag: :ocn_off,
mod: :headings_keep,
}
when /[-]/ #of particular relevance with level 1~ which is required to precede substantive text & used e.g. in html segmented text
@@flag[:ocn]=:ocn_off_headings_dummy_lev1
{
flag: :ocn_off,
mod: :headings_exclude,
}
else
@@flag[:ocn]=:on
{
flag: :ocn_on,
}
end
t_o=SiSU_AO_DocumentStructure::ObjectFlag.new.flag_ocn(h)
next
end
t_o=t_o.gsub(/(?:\n\s*\n)+/m,"\n") if @@flag[:code]==:off
unless t_o =~/^(?:@\S+?:|%+)\s/ # extract book index for paragraph if any
idx=if t_o=~/^=\{\s*(.+)\s*\}\s*$\Z/m
m=$1
m=m.split(/[ ]*\n/).join(' ').
gsub(/\s+([|:;])\s+/,'\1').
gsub(/\s+([+]\d+)\s+/,'\1')
t_o=t_o.gsub(/\n=\{.+?\}\s*$/m,'')
idx_array_and_hash=construct_idx_array_and_hash(m)
idx_array_and_hash[:hash]
else nil
end
end
if t_o !~/^(?:code(?:\.[a-z][0-9a-z_]+)?|box(?:\.[a-z_]+)?|poem|alt|group|block)\{|^\}(?:code|poem|alt|group|block)|^(?:table\{|\{table)[ ~]/ \
and t_o !~/^```[ ]+(?:code(?:\.[a-z][0-9a-z_]+)?|box(?:\.[a-z_]+)?|poem|alt|group|block|table)|^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$|^`:quote_(?:open|close)`/ \
and @@flag[:code]==:off \
and @@flag[:poem]==:off \
and @@flag[:group]==:off \
and @@flag[:block]==:off \
and @@flag[:alt]==:off \
and @@flag[:box]==:off \
and @@flag[:table]==:off
t_o=case t_o
when /^#{Mx[:meta_o]}\S+?#{Mx[:meta_c]}/ #metadata, header
if t_o=~/^#{Mx[:meta_o]}(\S+?)#{Mx[:meta_c]}\s*(.+)/m
tag,obj=$1,$2
@metadata[tag]=obj
end
t_o=nil
when /^%+\s/ #comment
t_o=if t_o=~/^%+\s+(.+)/
h={ obj: $1 }
SiSU_AO_DocumentStructure::ObjectComment.new.comment(h)
else nil
end
when /^:?([A-D1-6])\~/ #heading / lv
lv=$1
ln=ln_get(lv)
t_o=if t_o=~/^:?[A-D1-6]\~\s+(.+)/m
obj=$1
note=endnote_test?(obj)
obj,tags=extract_tags(obj)
if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
or @@flag[:ocn]==:ocn_off_headings_keep
unless obj =~ /[~-][#]\s*$/
if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
and t_o =~/^1\~\S*\s+/m
obj << ' -#'
elsif @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
or @@flag[:ocn]==:ocn_off_headings_keep
obj << ' ~#'
end
end
end
h={
lv: lv,
ln: ln,
obj: obj,
idx: idx,
tags: tags,
}
SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h)
elsif t_o=~/^:?[A-D1-6]\~(\S+?)-\s+(.+)/m
name,obj=$1,$2
note=endnote_test?(obj)
obj,tags=extract_tags(obj)
if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
or @@flag[:ocn]==:ocn_off_headings_keep
unless obj =~ /[~-][#]\s*$/
if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
and t_o =~/^1\~\S*\s+/m
obj << ' -#'
elsif @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
or @@flag[:ocn]==:ocn_off_headings_keep
obj << ' ~#'
end
end
end
h={
lv: lv,
name: name,
obj: obj,
idx: idx,
autonum_: false,
tags: tags,
}
SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h)
elsif t_o=~/^:?[A-D1-6]\~(\S+)\s+(.+)/m
name,obj=$1,$2
note=endnote_test?(obj)
obj,tags=extract_tags(obj,name)
if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
or @@flag[:ocn]==:ocn_off_headings_keep
unless obj =~ /[~-][#]\s*$/
if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
and t_o =~/^1\~\S*\s+/m
obj << ' -#'
elsif @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
or @@flag[:ocn]==:ocn_off_headings_keep
obj << ' ~#'
end
end
end
h={
lv: lv,
name: name,
obj: obj,
idx: idx,
tags: tags,
}
SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h)
else nil
end
when /^_(?:[1-9]!?|[1-9]?\*)\s+/ #indented and/or bullet paragraph
t_o=if t_o=~/^(_(?:[1-9]?\*|[1-9]!?)\s+)(.+)/m
tst,obj=$1,$2
if t_o=~/^_[1-9]!\s+.+/m
hang,indent,obj=hang_and_indent_def_test(tst,obj)
else
hang,indent=hang_and_indent_test(tst)
end
bullet=bullet_test(tst)
image=image_test(obj)
note=endnote_test?(obj)
obj,tags=extract_tags(obj)
unless obj=~/\A\s*\Z/m
if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
or @@flag[:ocn]==:ocn_off_headings_keep
unless obj =~ /[~-][#]\s*$/
obj << ' ~#'
end
end
h={
bullet_: bullet,
hang: hang,
indent: indent,
obj: obj,
idx: idx,
note_: note,
image_: image,
tags: tags,
quote: quotes?,
}
SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h)
end
else nil
end
when /^_[0-9]?_[0-9]!?\s+/ #hanging indent paragraph
t_o=if t_o=~/^(_[0-9]?_[0-9]!?\s+)(.+)/m
tst,obj=$1,$2
if t_o=~/^_[0-9]?_[0-9]!\s+.+/m
hang,indent,obj=hang_and_indent_def_test(tst,obj)
else
hang,indent=hang_and_indent_test(tst)
end
image=image_test(obj)
note=endnote_test?(obj)
obj,tags=extract_tags(obj)
unless obj=~/\A\s*\Z/m
if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
or @@flag[:ocn]==:ocn_off_headings_keep
unless obj =~ /[~-][#]\s*$/
obj << ' ~#'
end
end
h={
hang: hang,
indent: indent,
obj: obj,
idx: idx,
note_: note,
image_: image,
tags: tags,
quote: quotes?,
}
SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h)
end
else nil
end
when /^<(?:br)?:(?:pa?r|o(?:bj|---)?)>\s*$/ #[br:par] #[br:obj]
SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_obj])
when /^(?:-\\\\-|<:pb>)\s*$/ #[br:pg]
SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page],:markup)
when /^(?:=\\\\=|<:pn>)\s*$/ #[br:pgn]
SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_new],:markup)
when /^-\.\.-\s*$/ #[br:pgl]
SiSU_AO_DocumentStructure::ObjectLayout.new.break(Hx[:br_page_line],:markup)
else #paragraph
image=image_test(t_o)
note=endnote_test?(t_o)
obj,tags=extract_tags(t_o)
if @@flag[:ocn]==:ocn_off_headings_dummy_lev1 \
or @@flag[:ocn]==:ocn_off_headings_keep
unless obj =~ /[~-][#]\s*$/
obj << ' ~#'
end
end
unless obj=~/\A\s*\Z/m
h={
bullet_: false,
indent: 0,
hang: 0,
obj: obj,
idx: idx,
note_: note,
image_: image,
tags: tags,
quote: quotes?,
}
t_o=SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h)
end
t_o=SiSU_AO_DocumentStructureExtract::Structure.new(@md).structure_markup(t_o) #must happen earlier, node info etc. require
end
elsif @@flag[:code]==:off
if t_o =~/^(?:code(?:\.[a-z][0-9a-z_]+)?\{|```[ ]+code(?:\.[a-z][0-9a-z_]+)?)/
@@flag[:code]=case t_o
when /^code(?:\.[a-z][0-9a-z_]+)?\{/ then :curls
when /^```[ ]+code/ then :tics
else @@flag[:code] #error
end
@@flag[:lngsyn]=if t_o =~/^(?:code\.[a-z][0-9a-z_]+\{|```[ ]+code\.[a-z_]+)/
case t_o
when /^code\.([a-z][0-9a-z_]+)\{/
:"#{$1}"
when /^```[ ]+code\.([a-z][0-9a-z_]+)/
:"#{$1}"
else :txt
end
else :txt
end
@@counter=1
@codeblock_numbered=
(t_o =~/^(?:code(?:\.[a-z][0-9a-z_]+)?\{#|```[ ]+code(?:\.[a-z][0-9a-z_]+)?\s[#])/) \
? true
: false
@num_id[:code_block] +=1
h={
is_for: :code,
obj: '',
sym: :code_block_open,
num: @num_id[:code_block],
syntax: @@flag[:lngsyn],
}
t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
elsif t_o =~/^(?:poem\{|```[ ]+poem)/
@@flag[:poem]=case t_o
when /^poem\{/ then :curls
when /^```[ ]+poem/ then :tics
else @@flag[:poem] #error
end
@num_id[:poem] +=1
h={
is_for: :poem,
obj: '',
sym: :poem_open,
num: @num_id[:poem],
}
t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
tuned_file << t_o
elsif t_o =~/^(?:box(?:\.[a-z_]+)?\{|```[ ]+box(?:\.[a-z_]+)?)/
@@flag[:box]=case t_o
when /^box\{/ then :curls
when /^```[ ]+box/ then :tics
else @@flag[:box] #error
end
@num_id[:box] +=1
h={
is_for: :box,
obj: '',
sym: :box_open,
num: @num_id[:box],
}
t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
tuned_file << t_o
elsif t_o =~/^(?:group\{|```[ ]+group)/
@@flag[:group]=case t_o
when /^group\{/ then :curls
when /^```[ ]+group/ then :tics
else @@flag[:group] #error
end
@num_id[:group] +=1
h={
is_for: :group,
obj: '',
sym: :group_open,
num: @num_id[:group],
}
t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
tuned_file << t_o
elsif t_o =~/^(?:block\{|```[ ]+block)/
@@flag[:block]=case t_o
when /^block\{/ then :curls
when /^```[ ]+block/ then :tics
else @@flag[:block] #error
end
@num_id[:block] +=1
h={
is_for: :block,
obj: '',
sym: :block_open,
num: @num_id[:block],
}
t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
tuned_file << t_o
elsif t_o =~/^(?:alt\{|```[ ]+alt)/
@@flag[:alt]=case t_o
when /^alt\{/ then :curls
when /^```[ ]+alt/ then :tics
else @@flag[:alt] #error
end
@num_id[:alt] +=1
h={
is_for: :alt,
obj: '',
sym: :alt_open,
num: @num_id[:alt],
}
t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
tuned_file << t_o
elsif t_o =~/^`:quote_open`/
@@flag[:quote]=:open
@num_id[:quote] +=1
h={
is_for: :quote,
obj: '',
sym: :quote_open,
num: @num_id[:quote],
}
t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
#tuned_file << t_o #% find second source, entered twice, should be once so closed off here
elsif t_o =~/^(?:table\{|```[ ]+table|\{table)[ ~]/
@num_id[:table] +=1
h={
is_for: :table,
obj: '',
sym: :table_open,
num: @num_id[:table],
}
ins_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
tuned_file << ins_o
if t_o=~/^table\{(?:~h)?\s+/
@@flag[:table]=:curls
@rows=''
case t_o
when /table\{~h\s+c(\d+);\s+(.+)/
cols=$1
col=$2.scan(/\d+/)
heading=true
when /table\{\s+c(\d+);\s+(.+)/
cols=$1
col=$2.scan(/\d+/)
heading=false
end
@h={
head_: heading,
cols: cols,
widths: col,
idx: idx,
}
elsif t_o=~/^```[ ]+table(?:~h)?\s+c\d+/
@@flag[:table]=:tics
@rows=''
case t_o
when /^```[ ]+table~h\s+c(\d+);\s+(.+)/
cols=$1
col=$2.scan(/\d+/)
heading=true
when /^```[ ]+table\s+c(\d+);\s+(.+)/
cols=$1
col=$2.scan(/\d+/)
heading=false
end
@h={
head_: heading,
cols: cols,
widths: col,
idx: idx,
}
elsif t_o=~/^\{table(?:~h)?(?:\s+\d+;?)?\}\n.+\Z/m
m1,m2,hd=nil,nil,nil
tbl=/^\{table(?:~h)?(?:\s+\d+;?)?\}\n(.+)\Z/m.match(t_o)[1]
hd=((t_o =~/^\{table~h/) ? true : false)
tbl,tags=extract_tags(tbl)
rws=tbl.split(/\n/)
rows=''
cols=nil
rws.each do |r|
cols=(cols ? cols : (r.scan('|').length) +1)
r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}")
rows += r + Mx[:tc_c]
end
col=[]
if t_o =~/^\{table(?:~h)?\s+(\d+);?\}/ #width of col 1 given as %, usually when wider than rest that are even
c1=$1.to_i
width=(100 - c1)/(cols - 1)
col=[ c1 ]
(cols - 1).times { col << width }
else #all columns of equal width
width=100.00/cols
cols.times { col << width }
end
h={
head_: hd,
cols: cols,
widths: col,
obj: rows,
idx: idx,
tags: tags,
num: @num_id[:table],
}
t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(h) \
unless h.nil?
tuned_file << t_o
h={
is_for: :table,
obj: '',
sym: :table_close,
num: @num_id[:table],
}
t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
t_o
elsif t_o=~/^```[ ]+table(?:~h)?\s+/
m1,m2,hd=nil,nil,nil
h=case t_o
when /^```[ ]+table~h\s+(.+?)\n(.+)\Z/m #two table representations should be consolidated as one
m1,tbl,hd=$1,$2,true
when /^```[ ]+table\s+(.+?)\n(.+)\Z/m #two table representations should be consolidated as one
m1,tbl,hd=$1,$2,false
else nil
end
tbl,tags=extract_tags(tbl)
col=m1.scan(/\d+/)
rws=tbl.split(/\n/)
rows=''
rws.each do |r|
r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}")
rows += r + Mx[:tc_c]
end
h={
head_: hd,
cols: col.length,
widths: col,
obj: rows,
idx: idx,
tags: tags,
num: @num_id[:table],
}
t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(h) \
unless h.nil?
tuned_file << t_o
h={
is_for: :table,
obj: '',
sym: :table_close,
num: @num_id[:table],
}
t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
t_o
elsif t_o=~/^\{table(?:~h)?\s+/
m1,m2,hd=nil,nil,nil
h=case t_o
when /\{table~h\s+(.+?)\}\n(.+)\Z/m #two table representations should be consolidated as one
m1,tbl,hd=$1,$2,true
when /\{table\s+(.+?)\}\n(.+)\Z/m #two table representations should be consolidated as one
m1,tbl,hd=$1,$2,false
else nil
end
tbl,tags=extract_tags(tbl)
col=m1.scan(/\d+/)
rws=tbl.split(/\n/)
rows=''
rws.each do |r|
r=r.gsub(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}")
rows += r + Mx[:tc_c]
end
h={
head_: hd,
cols: col.length,
widths: col,
obj: rows,
idx: idx,
tags: tags,
num: @num_id[:table],
}
t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(h) \
unless h.nil?
tuned_file << t_o
h={
is_for: :table,
obj: '',
sym: :table_close,
num: @num_id[:table],
}
t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
t_o
end
end
t_o
end
if @@flag[:table]==:curls or @@flag[:table]==:tics
if (@@flag[:table]==:curls \
and t_o =~/^\}table/) \
or (@@flag[:table]==:tics \
and t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/)
@@flag[:table]=:off
headings,columns,widths,idx=@h[:head_],@h[:cols],@h[:widths],@h[:idx]
@h={
head_: headings,
cols: columns,
widths: widths,
idx: idx,
obj: @rows,
}
t_o=SiSU_AO_DocumentStructure::ObjectTable.new.table(@h)
tuned_file << t_o
@h,@rows=nil,''
h={
is_for: :table,
obj: '',
sym: :table_close,
num: @num_id[:table],
}
t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
t_o
else
if t_o.is_a?(String) \
and t_o !~/^(?:table\{|```[ ]+table)/
t_o=t_o.gsub(/^\n+/m,'').
gsub(/\n+/m,"#{Mx[:tc_p]}")
@rows += t_o + Mx[:tc_c]
end
t_o=nil
end
end
if @@flag[:code]==:curls \
or @@flag[:code]==:tics
if (@@flag[:code]==:curls \
&& t_o =~/^\}code/) \
or (@@flag[:code]==:tics \
&& t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/m)
@@flag[:code]=:off
if @tuned_code[-1]
@tuned_code[-1].
gsub!(/\s*(?:#{Mx[:br_line]}|#{Mx[:br_nl]})\s*\Z/m,'')
end
obj=@tuned_code.join("\n")
tags=[]
h={
obj: obj,
idx: idx,
syntax: @@flag[:lngsyn],
tags: tags,
num: @num_id[:code_block],
number_: @codeblock_numbered,
}
@@flag[:lngsyn]=:txt
t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.code(h)
@tuned_code=[]
tuned_file << t_o
h={
is_for: :code,
obj: '',
sym: :code_close,
num: @num_id[:code_block],
}
t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
end
if (@@flag[:code]==:curls \
|| @@flag[:code]==:tics) \
and t_o.is_a?(String)
sub_array=t_o.dup + "#{Mx[:br_nl]}"
@line_mode=[]
sub_array.scan(/.+/) {|w| @line_mode << w if w =~/[\S]+/}
t_o=SiSU_AO_DocumentStructureExtract::Build.new(@md,@line_mode).build_lines(:code).join
@tuned_code << t_o
t_o=nil
end
elsif (@@flag[:poem]==:curls \
|| @@flag[:poem]==:tics) \
or (@@flag[:box]==:curls \
|| @@flag[:box]==:tics) \
or (@@flag[:group]==:curls \
|| @@flag[:group]==:tics) \
or (@@flag[:block]==:curls \
|| @@flag[:block]==:tics) \
or (@@flag[:alt]==:curls \
|| @@flag[:alt]==:tics) \
or (@@flag[:quote]==:open \
&& t_o =~/`:quote_close`/m) #not
if (@@flag[:poem]==:curls \
&& t_o =~/^\}poem$/m) \
or (@@flag[:poem]==:tics \
&& t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/)
@@flag[:poem]=:off
h={
is_for: :poem,
obj: '',
idx: idx,
sym: :poem_close,
num: @num_id[:poem],
}
t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
elsif (@@flag[:box]==:curls \
&& t_o =~/^\}box/) \
or (@@flag[:box]==:tics \
&& t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/)
@@flag[:box]=:off
obj,tags=extract_tags(@tuned_block.join("\n"))
h={
obj: obj,
idx: idx,
tags: tags,
num: @num_id[:box],
}
@tuned_block=[]
t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.box(h)
tuned_file << t_o
h={
is_for: :box,
obj: '',
idx: idx,
sym: :box_close,
num: @num_id[:box],
}
t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
elsif (@@flag[:group]==:curls \
&& t_o =~/^\}group/) \
or (@@flag[:group]==:tics \
&& t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/)
@@flag[:group]=:off
obj,tags=extract_tags(@tuned_block.join("\n"))
h={
obj: obj,
idx: idx,
tags: tags,
num: @num_id[:group],
}
@tuned_block=[]
t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.group(h)
tuned_file << t_o
h={
is_for: :group,
obj: '',
sym: :group_close,
num: @num_id[:group],
}
t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
elsif (@@flag[:block]==:curls \
&& t_o =~/^\}block/) \
or (@@flag[:block]==:tics \
&& t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/)
@@flag[:block]=:off
obj,tags=extract_tags(@tuned_block.join("\n"))
h={
obj: obj,
idx: idx,
tags: tags,
num: @num_id[:block],
}
@tuned_block=[]
t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.block(h)
tuned_file << t_o
h={
is_for: :block,
obj: '',
sym: :block_close,
num: @num_id[:block],
}
t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
elsif (@@flag[:alt]==:curls \
&& t_o =~/^\}alt/) \
or (@@flag[:alt]==:tics \
&& t_o =~/^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/)
@@flag[:alt]=:off
obj,tags=extract_tags(@tuned_block.join("\n"))
h={
obj: obj,
idx: idx,
tags: tags,
num: @num_id[:alt],
}
t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.alt(h)
@tuned_block=[]
tuned_file << t_o
h={
is_for: :alt,
obj: '',
sym: :alt_close,
num: @num_id[:alt],
}
t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
elsif @@flag[:quote]==:open \
and t_o =~/`:quote_close`/m
@@flag[:quote]=:off
h={
is_for: :quote,
idx: idx,
obj: '',
sym: :quote_close,
num: @num_id[:quote],
}
t_o=SiSU_AO_DocumentStructure::ObjectLayout.new.open_close(h)
elsif @@flag[:quote]==:open
t_o,tags=extract_tags(t_o)
h={
indent: 1,
obj: t_o,
idx: idx,
note_: note,
image_: image,
tags: tags,
quote: quotes?,
}
SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h)
end
if (@@flag[:poem]==:curls \
|| @@flag[:poem]==:tics) \
or (@@flag[:group]==:curls \
|| @@flag[:group]==:tics) \
or (@@flag[:alt]==:curls \
|| @@flag[:alt]==:tics) \
and t_o =~/\S/ \
and t_o !~/^(?:\}(?:verse|code|box|alt|group|block)|(?:verse|code(?:\.[a-z][0-9a-z_]+)?|box(?:\.[a-z_]+)?|alt|group|block)\{)/ \
and t_o !~/^```[ ]+(?:code(?:\.[a-z][0-9a-z_]+)?|box(?:\.[a-z_]+)?|poem|alt|group|block)|^```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/ # fix logic
sub_array=t_o.dup
@line_mode=sub_array.scan(/.+/)
type=if @@flag[:poem]==:curls or @@flag[:poem]==:tics
t_o=SiSU_AO_DocumentStructureExtract::Build.new(@md,@line_mode).build_lines(type).join
poem=t_o.split(/\n\n/)
poem.each do |v|
v=v.gsub(/\n/m,"#{Mx[:br_nl]}\n")
obj,tags=extract_tags(v)
h={
obj: obj,
tags: tags,
num: @num_id[:poem],
}
t_o=SiSU_AO_DocumentStructure::ObjectBlockTxt.new.verse(h)
tuned_file << t_o
end
:poem
else :group
end
end
@verse_count+=1 if @@flag[:poem]==:curls or @@flag[:poem]==:tics
end
if @@flag[:code]==:off
if @@flag[:poem]==:curls or @@flag[:poem]==:tics \
or @@flag[:box]==:curls or @@flag[:box]==:tics \
or @@flag[:group]==:curls or @@flag[:group]==:tics \
or @@flag[:alt]==:curls or @@flag[:alt]==:tics \
or (@@flag[:quote]==:open and t_o =~/`:quote_close`/m)
if t_o.is_a?(String)
t_o=t_o.gsub(/\n/m,"#{Mx[:br_nl]}").
gsub(/[ ][ ]/m,"#{Mx[:nbsp]*2}").
gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}")
t_o=t_o + Mx[:br_nl] if t_o =~/\S+/
elsif t_o.is==:group \
|| t_o.is==:block \
|| t_o.is==:alt \
|| t_o.is==:box \
|| t_o.is==:verse
t_o.obj=t_o.obj.gsub(/\n/m,"#{Mx[:br_nl]}").
gsub(/[ ][ ]/m,"#{Mx[:nbsp]*2}").
gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}")
end
@tuned_block << t_o if t_o =~/\S+/
else tuned_file << t_o
end
else tuned_file << t_o
end
end
if @md.flag_endnotes
tuned_file << @pb
h={
ln: 1,
lc: 1,
obj: 'Endnotes',
autonum_: false,
}
tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h)
h={
ln: 4,
lc: 2,
obj: 'Endnotes',
name: 'endnotes',
autonum_: false,
}
tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h)
h={
obj: 'Endnotes'
}
end
if @md.book_idx
tuned_file << @pb
h={
ln: 1,
lc: 1,
obj: 'Index',
autonum_: false,
}
tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h)
h={
ln: 4,
lc: 2,
obj: 'Index',
name: 'book_index',
autonum_: false,
}
tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h)
h={
obj: 'Index'
}
end
tuned_file << @pb
h={
ln: 1,
lc: 1,
obj: 'Metadata',
autonum_: false,
ocn_: false,
}
tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h)
h={
ln: 4,
lc: 2,
obj: 'SiSU Metadata, document information',
name: 'metadata',
autonum_: false,
ocn_: false,
}
tuned_file << SiSU_AO_DocumentStructure::ObjectHeading.new.heading_insert(h)
h={
obj: 'eof',
}
meta=SiSU_AO_DocumentStructure::ObjectMetadata.new.metadata(@metadata)
[tuned_file,meta]
end
def table_rows_and_columns_array(table_str)
table=[]
table_str.split(/#{Mx[:tc_c]}/).each do |table_row|
table_row_with_columns=table_row.split(/#{Mx[:tc_p]}/)
table << table_row_with_columns
end
table
end
def meta_heading(h)
h={
lv: h[:lv],
ln: h[:ln],
name: h[:name],
obj: h[:obj],
ocn: '0',
}
SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h)
end
def meta_para(str)
h={
obj: str,
ocn_: false,
}
SiSU_AO_DocumentStructure::ObjectPara.new.paragraph(h)
end
def build_lines(type=:none)
lines,lines_new=@data,[]
lines.each do |line|
line=if line =~/\S/ \
and line !~/^(?:code(?:\.[a-z][0-9a-z_]+)?\{|\}code)/ \
and line !~/^(?:```[ ]+code(?:\.[a-z][0-9a-z_]+)?|```(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$)/ \
and not line.is_a?(Hash) #watch
@@counter+=1 if @@flag[:code]==:curls or @@flag[:code]==:tics
line=line.gsub(/\s\s/,"#{Mx[:nbsp]*2}").
gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}")
line=line.gsub(/^/,"#{Mx[:gr_o]}codeline#{Mx[:gr_c]}") if type==:code # REMOVE try sort for texpdf special case
line=if line =~/(?:https?|file|ftp):\/\/\S+$/
line.gsub(/\s*$/," #{Mx[:br_nl]}")
else line.gsub(/\s*$/,"#{Mx[:br_nl]}") #unless type=='code'
end
elsif line =~/^\s*$/
line.gsub(/\s*$/,"#{Mx[:br_nl]}")
else line
end
lines_new << line
end
lines_new
end
end
class Structure # this must happen early
def initialize(md)
@md=md
end
def structure(data)
data.compact.each do |dob|
structure_markup(dob)
end
end
def structure_markup(dob) #build structure where structure provided only in meta header
dob=if dob.is==:para \
&& (((dob.hang !~/[1-9]/) && (dob.indent !~/[1-9]/)) \
|| (dob.hang != dob.indent)) \
and not dob.bullet_
dob=case dob.obj
when /^#{@md.lv0}/
h={
is: :heading,
lv: 'A',
ln: 0,
}
SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob)
when /^#{@md.lv1}/
h={
is: :heading,
lv: 'B',
ln: 1,
}
SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob)
when /^#{@md.lv2}/
h={
is: :heading,
lv: 'C',
ln: 2,
}
SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob)
when /^#{@md.lv3}/
h={
is: :heading,
lv: 'D',
ln: 3,
}
SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob)
when /^#{@md.lv4}/
h={
is: :heading,
lv: '1',
ln: 4,
}
SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob)
when /^#{@md.lv5}/
h={
is: :heading,
lv: '2',
ln: 5,
}
SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob)
when /^#{@md.lv6}/
h={
is: :heading,
lv: '3',
ln: 6,
}
SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h,dob)
else dob
end
else dob
end
dob
end
end
class OCN
def initialize(md,data)
@md,@data=md,data
end
def structure_info
def lv
%w[A~ B~ C~ D~ 1 2 3 4]
end
def possible_parents(child)
case child
when /A~/ then 'none'
when /B~/ then 'A~'
when /C~/ then 'B~'
when /D~/ then 'C~'
when /1/ then 'A~, B~, C~, D~'
when /2/ then '1'
when /3/ then '2'
when /4/ then '3'
end
end
def possible_children(parent)
case parent
when /A~/ then 'B~, 1'
when /B~/ then 'C~, 1'
when /C~/ then 'D~, 1'
when /D~/ then '1'
when /1/ then '2'
when /2/ then '3'
when /3/ then '4'
when /4/ then 'none'
end
end
self
end
def document_structure_check_info(node,node_parent,status=:ok)
node_ln=/^([0-7])/.match(node)[1].to_i
node_parent_ln=/^([0-7])/.match(node_parent)[1].to_i
if status==:error \
or @md.opt.act[:maintenance][:set]==:on
puts %{node: #{node}, parent node: #{node_parent} #{status.upcase}}
if status==:error
node_ln=/^([0-7])/.match(node)[1].to_i
node_parent_ln=/^([0-7])/.match(node_parent)[1].to_i
STDERR.puts %{current level: #{structure_info.lv[node_ln]} (possible parent levels: #{structure_info.possible_parents(structure_info.lv[node_ln])})
parent level: #{structure_info.lv[node_parent_ln]} (possible child levels: #{structure_info.possible_children(structure_info.lv[node_parent_ln])})
SKIPPED processing file:
[#{@md.opt.lng}] "#{@md.fns}"}
if @md.opt.act[:no_stop][:set]==:on
$process_document = :skip
else exit
end
end
end
end
def warning_incorrect_parent_level_or_level(txt)
puts %{ERROR. There is an error in markup of heading levels either here or in the parent heading.
The current header reads:
"#{txt}"
has incorrect level and/or parent level
--}
end
def required_headers_present?
unless (defined? @md.title \
and @md.title.full)
STDERR.puts %{required header missing:
@title:
SKIPPED processing file:
[#{@md.opt.lng}] "#{@md.fns}"
}
if @md.opt.act[:no_stop][:set]==:on
$process_document = :skip
else exit
end
end
unless (defined? @md.creator.author \
and @md.creator.author)
STDERR.puts %{required header missing:
@creator:
:author: anonymous?
SKIPPED processing file:
[#{@md.opt.lng}] "#{@md.fns}"
}
if @md.opt.act[:no_stop][:set]==:on
$process_document = :skip
else exit
end
end
end
def ocn #and auto segment numbering increment
required_headers_present?
data=@data
@o_array=[]
node=ocn=ocn_dv=ocn_sp=ocnh=ocnh0=ocnh1=ocnh2=ocnh3=ocnh4=ocnh5=ocnh6=ocnh7=ocno=ocnp=ocnt=ocnc=ocng=ocni=ocnu=0 # h heading, o other, t table, g group, i image
regex_exclude_ocn_and_node = /#{Rx[:meta]}|^@\S+?:\s|^4~endnotes|^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^\^~ |<:e[:_]\d+?>|^<:\#|<:- |<[:!]!4|
if dob.is==:heading
@ln=ln=case dob.lv
when 'A' then 0
when 'B' then 1
when 'C' then 2
when 'D' then 3
when '1' then 4
when '2' then 5
when '3' then 6
when '4' then 7
when '5' then 8
when '6' then 9
end
end
if not dob.obj =~/~#|-#/
ocn+=1
end
if dob.is==:heading \
and (ln.to_s =~/^[0-9]/ \
or ln.to_s =~@md.lv0 \
or ln.to_s =~@md.lv1 \
or ln.to_s =~@md.lv2 \
or ln.to_s =~@md.lv3 \
or ln.to_s =~@md.lv4 \
or ln.to_s =~@md.lv5 \
or ln.to_s =~@md.lv6 \
or ln.to_s =~@md.lv7)
if not dob.obj =~/~#|-#/
ocnh+=1
end
if ln==0 \
or ln=~@md.lv0
@lev_occurences[:a] += 1
if not dob.obj =~/~#|-#/
ocn_flag=true
ocnh0+=1 #heading
node0="0:#{ocnh0};#{ocn}"
else
#document_structure_check_info(node0,node0,:error) #fix
ocn_flag=false
node0="0:0;0"
end
document_structure_check_info(node0,node0)
@collapsed_lv0=0
collapsed_level=@collapsed_lv0
node,ocn_sp,parent=node0,"h#{ocnh}",'ROOT'
elsif ln==1 \
or ln=~@md.lv1
@lev_occurences[:b] += 1
if not dob.obj =~/~#|-#/
ocn_flag=true
ocnh1+=1 #heading
node1="1:#{ocnh1};#{ocn}"
else
#document_structure_check_info(node0,node0,:error) #fix
ocn_flag=false
node1="1:0;0"
end
parent=if node0
document_structure_check_info(node1,node0)
@collapsed_lv1=@collapsed_lv0+1
node0
else
warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node0,node0,:error)
node0
end
collapsed_level=@collapsed_lv1
node,ocn_sp,parent=node1,"h#{ocnh}",node0 #FIX
elsif ln==2 \
or ln=~@md.lv2
@lev_occurences[:c] += 1
if not dob.obj =~/~#|-#/
ocn_flag=true
ocnh2+=1
node2="2:#{ocnh2};#{ocn}"
else
#document_structure_check_info(node0,node0,:error) #fix
ocn_flag=false
node2="2:0;0"
end
parent=if node1
document_structure_check_info(node2,node1)
@collapsed_lv2=@collapsed_lv1+1
node1
else
warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node2,node0,:error)
node0
end
collapsed_level=@collapsed_lv2
node,ocn_sp=node2,"h#{ocnh}"
elsif ln==3 \
or ln=~@md.lv3
@lev_occurences[:d] += 1
if not dob.obj =~/~#|-#/
ocn_flag=true
ocnh3+=1
node3="3:#{ocnh3};#{ocn}"
else
#document_structure_check_info(node0,node0,:error) #fix
ocn_flag=false
node3="3:0;0"
end
parent=if node2
document_structure_check_info(node3,node2)
@collapsed_lv3=@collapsed_lv2+1
node2
elsif node1
warning_incorrect_parent_level_or_level(dob.obj)
puts %{parent is :A~ & this level #{dob.lv}
either parent should be level :B~
or this level should be level :B~ rather than #{dob.lv}}
document_structure_check_info(node3,node1,:error)
@collapsed_lv3=@collapsed_lv1+1
node1
else
document_structure_check_info(node3,node0,:error)
warning_incorrect_parent_level_or_level(dob.obj)
node0
end
collapsed_level=@collapsed_lv3
node,ocn_sp=node3,"h#{ocnh}"
elsif ln==4 \
or ln=~@md.lv4
@lev_occurences[:l1] += 1
if not dob.obj =~/~#|-#/
ocn_flag=true
ocnh4+=1
node4="4:#{ocnh4};#{ocn}"
else
ocn_flag=false
node4="4:0;0"
end
parent=if node3
document_structure_check_info(node4,node3)
@collapsed_lv4=@collapsed_lv3+1
node3
elsif node2
document_structure_check_info(node4,node2)
@collapsed_lv4=@collapsed_lv2+1
node2
elsif node1
document_structure_check_info(node4,node1)
@collapsed_lv4=@collapsed_lv1+1
node1
elsif node0
document_structure_check_info(node4,node0)
@collapsed_lv4=@collapsed_lv0+1
node0
else
warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node4,node0,:error)
node0
end
collapsed_level=@collapsed_lv4
node,ocn_sp=node4,"h#{ocnh}"
elsif ln==5 \
or ln=~@md.lv5
@lev_occurences[:l2] += 1
if not dob.obj =~/~#|-#/
ocn_flag=true
ocnh5+=1
node5="5:#{ocnh5};#{ocn}"
else
ocn_flag=false
node5="5:0;0"
end
parent=if node4
document_structure_check_info(node5,node4)
@collapsed_lv5=@collapsed_lv4+1
node4
elsif node3
warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node5,node3,:error)
@collapsed_lv5=@collapsed_lv3+1
node3
elsif node2
warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node5,node2,:error)
@collapsed_lv5=@collapsed_lv2+1
node2
elsif node1
warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node5,node1,:error)
@collapsed_lv5=@collapsed_lv1+1
node1
else
warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node5,node0,:error)
node0
end
collapsed_level=@collapsed_lv5
node,ocn_sp=node5,"h#{ocnh}"
elsif ln==6 \
or ln=~@md.lv6
@lev_occurences[:l3] += 1
if not dob.obj =~/~#|-#/
ocn_flag=true
ocnh6+=1
node6="6:#{ocnh6};#{ocn}"
else
ocn_flag=false
node6="6:0;0"
end
parent=if node5
document_structure_check_info(node6,node5)
@collapsed_lv6=@collapsed_lv5+1
node5
elsif node4
warning_incorrect_parent_level_or_level(dob.obj)
puts "parent is level #4 (1~) & this level ##{dob.ln} (#{dob.lv}~)
either parent should be level #5 (2~)
or this level should be #5 (2~) rather ##{dob.ln} (#{dob.lv}~)"
document_structure_check_info(node6,node4,:error)
@collapsed_lv6=@collapsed_lv4+1
node4
elsif node3
warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node6,node3,:error)
@collapsed_lv6=@collapsed_lv3+1
node3
elsif node2
warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node6,node2,:error)
@collapsed_lv6=@collapsed_lv2+1
node2
elsif node1
warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node6,node1,:error)
@collapsed_lv6=@collapsed_lv1+1
node1
else
warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node6,node0,:error)
node0
end
collapsed_level=@collapsed_lv6
node,ocn_sp=node6,"h#{ocnh}"
elsif ln==7 \
or ln=~@md.lv7
@lev_occurences[:l4] += 1
if not dob.obj =~/~#|-#/
ocn_flag=true
ocnh7+=1
node7="7:#{ocnh7};#{ocn}"
else
ocn_flag=false
node7="7:0;0"
end
parent=if node6
document_structure_check_info(node7,node6)
@collapsed_lv7=@collapsed_lv6+1
node5
elsif node5
warning_incorrect_parent_level_or_level(dob.obj)
puts "parent is level #5 (2~) & this level ##{dob.ln} (#{dob.lv}~)
either parent should be level #6 (3~)
or this level should be #6 (3~) rather ##{dob.ln} (#{dob.lv}~)"
document_structure_check_info(node7,node5,:error)
@collapsed_lv6=@collapsed_lv5+1
node5
elsif node4
warning_incorrect_parent_level_or_level(dob.obj)
puts "parent is level #4 (1~) & this level ##{dob.ln} (#{dob.lv}~)
either parent should be level 6~
or this level should be #6 (3~) rather ##{dob.ln} (#{dob.lv}~)"
document_structure_check_info(node7,node4,:error)
@collapsed_lv6=@collapsed_lv4+1
node4
elsif node3
warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node7,node3,:error)
@collapsed_lv6=@collapsed_lv3+1
node3
elsif node2
warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node7,node2,:error)
@collapsed_lv6=@collapsed_lv2+1
node2
elsif node1
warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node7,node1,:error)
@collapsed_lv6=@collapsed_lv1+1
node1
else
warning_incorrect_parent_level_or_level(dob.obj)
document_structure_check_info(node7,node0,:error)
node0
end
collapsed_level=@collapsed_lv7
node,ocn_sp=node7,"h#{ocnh}"
end
else
unless @lev_occurences[:l1] > 0
STDERR.puts %{Substantive text objects must follow a level 1~ heading and there are none at this point in processing: #{@lev_occurences[:l1]}
SKIPPED processing file:
[#{@md.opt.lng}] "#{@md.fns}"}
puts dob.obj #.gsub(/^(.{1,80})/,'"\1"')
exit
end
unless @ln >= 4
lev=case @ln
when 0 then 'A'
when 1 then 'B'
when 2 then 'C'
when 3 then 'D'
when 4 then '1'
when 5 then '2'
when 6 then '3'
when 7 then '4'
when 8 then '5'
when 9 then '6'
end
STDERR.puts %{Substantive text objects must follow a level 1~ 2~ or 3~ heading: #{lev}~
SKIPPED processing file:
[#{@md.opt.lng}] "#{@md.fns}"}
puts dob.obj.gsub(/^(.{1,80})/,'"\1"')
if @md.opt.act[:no_stop][:set]==:on
$process_document = :skip
break
else exit
end
end
if not dob.obj =~/~#|-#/
ocn_flag=true
else
ocn_flag=false
end
ocno+=1
if dob.is==:table
ocnt+=1
ocn_sp,parent="t#{ocnt}",node
elsif dob.is==:code
ocnc+=1
ocn_sp,parent="c#{ocnc}",node
elsif dob.is==:group \
|| dob.is==:box \
|| dob.is==:block \
|| dob.is==:alt \
|| dob.is==:verse
ocng+=1 #group, poem
ocn_sp,parent="g#{ocng}",node
elsif dob.is==:image #check
ocni+=1
ocn_sp,parent="i#{ocni}",node
else ocnp+=1 #paragraph
ocn_sp,parent="p#{ocnp}",node
end
end
if dob.is==:heading
if ocn_flag==true
dob.ln,dob.node,dob.ocn,dob.ocn_,dob.odv,dob.osp,dob.parent,dob.lc=
ln, node, ocn, ocn_flag, ocn_dv,ocn_sp, parent, collapsed_level
else
ocnu+=1
heading_use=:ok
if dob.obj=~/#{Mx[:pa_non_object_no_heading]}/
dob.obj=dob.obj.gsub(/#{Mx[:pa_non_object_no_heading]}/,'')
heading_use=:ok
elsif dob.obj=~/#{Mx[:pa_non_object_dummy_heading]}/
dob.obj=dob.obj.gsub(/#{Mx[:pa_non_object_dummy_heading]}/,'')
heading_use=:dummy
end
dob.ln,dob.node,dob.ocn,dob.ocn_,dob.use_, dob.odv,dob.osp,dob.parent,dob.lc=
ln, node, nil, ocn_flag,heading_use,ocn_dv, ocn_sp, parent, collapsed_level
end
else
if dob.of !=:meta \
&& dob.of !=:comment \
&& dob.of !=:layout
if ocn_flag == true
dob.ocn,dob.ocn_,dob.odv,dob.osp,dob.parent=
ocn, ocn_flag,ocn_dv, ocn_sp, parent
else
ocnu+=1
dob.obj=dob.obj.gsub(/#{Mx[:fa_o]}[~-]##{Mx[:fa_c]}/,'') if dob.obj
ocn_dv,ocn_sp="u#{ocnu}","u#{ocnu}"
dob.ocn,dob.ocn_,dob.odv,dob.osp,dob.parent=
nil, ocn_flag,ocn_dv, ocn_sp, parent
end
end
end
h
else dob
end
if dob.is==:code \
|| dob.is==:verse \
|| dob.is==:alt \
|| dob.is==:box \
|| dob.is==:group \
|| dob.is==:block
dob.obj=dob.obj.gsub(/\n+/,"\n") #newlines taken out
end
@o_array << dob
end
unless @lev_occurences[:a] == 1
STDERR.puts %{The number of level A~ in this document: #{@lev_occurences[:a]}
There must be one level A~ (no more and no less)
SKIPPED processing file:
[#{@md.opt.lng}] "#{@md.fns}"}
if @md.opt.act[:no_stop][:set]==:on
$process_document = :skip
else exit
end
end
unless @lev_occurences[:l1] > 0
STDERR.puts %{The number of level 1~ in this document: #{@lev_occurences[:l1]}
There must be at least one level 1~ (and as many as required)
SKIPPED processing file:
[#{@md.opt.lng}] "#{@md.fns}"}
if @md.opt.act[:no_stop][:set]==:on
$process_document = :skip
else exit
end
end
@o_array
end
end
class XML
def initialize(md,data)
@data,@md=data,md
end
def dom
@s=[ 'A', 'B', 'C', 'D', '1', '2', '3' ]
tuned_file=structure_build
tuned_file
end
def spaces
Ax[:spaces]
end
def structure_build
data=@data
tuned_file=[]
hs=[0,false,false,false]
t={
lv: @s[0],
status: :open,
}
tuned_file << tags(t)
if @md.opt.act[:verbose_plus][:set]==:on
puts "\nXML sisu structure outline --->\n"
puts "<#{@s[0]}>"
end
data.each_with_index do |o,i|
if o.is==:heading \
|| o.is==:heading_insert
case o.ln
when 0
tuned_file << tag_close(o.ln,hs)
tuned_file << tag_open(o,@s)
if @md.opt.act[:verbose_plus][:set]==:on
puts_tag_close(o.ln,hs)
puts_tag_open(o,@s)
end
hs=[0,true,false,false,false]
when 1
tuned_file << tag_close(o.ln,hs)
tuned_file << tag_open(o,@s)
if @md.opt.act[:verbose_plus][:set]==:on
puts_tag_close(o.ln,hs)
puts_tag_open(o,@s)
end
hs=[1,true,true,false,false]
when 2
tuned_file << tag_close(o.ln,hs)
tuned_file << tag_open(o,@s)
if @md.opt.act[:verbose_plus][:set]==:on
puts_tag_close(o.ln,hs)
puts_tag_open(o,@s)
end
hs=[2,true,true,true,false]
when 3
tuned_file << tag_close(o.ln,hs)
tuned_file << tag_open(o,@s)
if @md.opt.act[:verbose_plus][:set]==:on
puts_tag_close(o.ln,hs)
puts_tag_open(o,@s)
end
hs=[3,true,true,true,true]
when 4
tuned_file << tag_close(o.ln,hs)
tuned_file << tag_open(o,@s)
if @md.opt.act[:verbose_plus][:set]==:on
puts_tag_close(o.ln,hs)
puts_tag_open(o,@s)
end
hs[0]=4
when 5
tuned_file << tag_close(o.ln,hs)
tuned_file << tag_open(o,@s)
if @md.opt.act[:verbose_plus][:set]==:on
puts_tag_close(o.ln,hs)
puts_tag_open(o,@s)
end
hs[0]=5
when 6
tuned_file << tag_close(o.ln,hs)
tuned_file << tag_open(o,@s)
if @md.opt.act[:verbose_plus][:set]==:on
puts_tag_close(o.ln,hs)
puts_tag_open(o,@s)
end
hs[0]=6
end
end
tuned_file << o
end
if @md.opt.act[:verbose_plus][:set]==:on
puts_tag_close(0,hs)
end
tuned_file << tag_close(0,hs)
tuned_file=tuned_file.flatten
end
def tags(o)
tag=(o[:status]==:open) \
? %{<#{o[:lv]} id="#{o[:node]}">}
: "#{o[:lv]}>"
ln=case o[:lv]
when 'A' then 0
when 'B' then 1
when 'C' then 2
when 'D' then 3
when '1' then 4
when '2' then 5
when '3' then 6
when '4' then 7
when '5' then 8
when '6' then 9
end
h={
tag: tag,
node: o[:node],
lv: o[:lv],
ln: ln,
status: o[:status],
}
SiSU_AO_DocumentStructure::ObjectStructure.new.xml_dom(h) #downstream code utilise else ignore like comments
end
def tag_open(o,tag)
t={ lv: tag[o.ln], node: o.node, status: :open }
t_o=tags(t)
t_o
end
def tag_close(lev,hs)
ary=[]
case hs[0]
when 0
if (lev <= 0) and hs[0]
t={
lv: @s[0],
status: :close,
}
ary << tags(t)
end
when 1
if (lev <= 1) and hs[1]
t={
lv: @s[1],
status: :close,
}
ary << tags(t)
end
if (lev==0)
t={
lv: @s[0],
status: :close,
}
ary << tags(t)
end
when 2
if (lev <= 2) and hs[2]
t={
lv: @s[2],
status: :close,
}
ary << tags(t)
end
if (lev <= 1) and hs[1]
t={
lv: @s[1],
status: :close,
}
ary << tags(t)
end
if (lev==0)
t={
lv: @s[0],
status: :close,
}
ary << tags(t)
end
when 3
if (lev <= 3) and hs[3]
t={
lv: @s[3],
status: :close,
}
ary << tags(t)
end
if (lev <= 2) and hs[2]
t={
lv: @s[2],
status: :close,
}
ary << tags(t)
end
if (lev <= 1) and hs[1]
t={
lv: @s[1],
status: :close,
}
ary << tags(t)
end
if (lev==0)
t={
lv: @s[0],
status: :close,
}
ary << tags(t)
end
when 4
if (lev <= 4)
t={
lv: @s[4],
status: :close,
}
ary << tags(t)
end
if (lev <= 3) and hs[3]
t={
lv: @s[3],
status: :close,
}
ary << tags(t)
end
if (lev <= 2) and hs[2]
t={
lv: @s[2],
status: :close,
}
ary << tags(t)
end
if (lev <= 1) and hs[1]
t={
lv: @s[1],
status: :close,
}
ary << tags(t)
end
if (lev==0)
t={
lv: @s[0],
status: :close,
}
ary << tags(t)
end
when 5
if (lev <= 5)
t={
lv: @s[5],
status: :close,
}
ary << tags(t)
end
if (lev <= 4)
t={
lv: @s[4],
status: :close,
}
ary << tags(t)
end
if (lev <= 3) and hs[3]
t={
lv: @s[3],
status: :close,
}
ary << tags(t)
end
if (lev <= 2) and hs[2]
t={
lv: @s[2],
status: :close,
}
ary << tags(t)
end
if (lev <= 1) and hs[1]
t={
lv: @s[1],
status: :close,
}
ary << tags(t)
end
if (lev==0)
t={
lv: @s[0],
status: :close,
}
ary << tags(t)
end
when 6
if (lev <= 6)
t={
lv: @s[6],
status: :close,
}
ary << tags(t)
end
if (lev <= 5)
t={
lv: @s[5],
status: :close,
}
ary << tags(t)
end
if (lev <= 4)
t={
lv: @s[4],
status: :close,
}
ary << tags(t)
end
if (lev <= 3) and hs[3]
t={
lv: @s[3],
status: :close,
}
ary << tags(t)
end
if (lev <= 2) and hs[2]
t={
lv: @s[2],
status: :close,
}
ary << tags(t)
end
if (lev <= 1) and hs[1]
t={
lv: @s[1],
status: :close,
}
ary << tags(t)
end
if (lev==0)
t={
lv: @s[0],
status: :close,
}
ary << tags(t)
end
end
ary
end
def puts_tag_open(o,tag)
puts %{#{spaces*o.ln}<#{tag[o.ln]} id="#{o.node}">}
end
def puts_tag_close(lev,hs)
case hs[0]
when 0
#puts "#{spaces*0}#{@s[0]}>" if (lev <= 0) and hs[0]
puts "#{@s[0]}>" if (lev==0)
when 1
puts "#{spaces*1}#{@s[1]}>" if (lev <= 1) and hs[1]
puts "#{@s[0]}>" if (lev==0)
when 2
puts "#{spaces*2}#{@s[2]}>" if (lev <= 2) and hs[2]
puts "#{spaces*1}#{@s[1]}>" if (lev <= 1) and hs[1]
puts "#{@s[0]}>" if (lev==0)
when 3
puts "#{spaces*3}#{@s[3]}>" if (lev <= 3) and hs[3]
puts "#{spaces*2}#{@s[2]}>" if (lev <= 2) and hs[2]
puts "#{spaces*1}#{@s[1]}>" if (lev <= 1) and hs[1]
puts "#{@s[0]}>" if (lev==0)
when 4
puts "#{spaces*4}#{@s[4]}>" if (lev <= 4)
puts "#{spaces*3}#{@s[3]}>" if (lev <= 3) and hs[3]
puts "#{spaces*2}#{@s[2]}>" if (lev <= 2) and hs[2]
puts "#{spaces*1}#{@s[1]}>" if (lev <= 1) and hs[1]
puts "#{@s[0]}>" if (lev==0)
when 5
puts "#{spaces*5}#{@s[5]}>" if (lev <= 5)
puts "#{spaces*4}#{@s[4]}>" if (lev <= 4)
puts "#{spaces*3}#{@s[3]}>" if (lev <= 3) and hs[3]
puts "#{spaces*2}#{@s[2]}>" if (lev <= 2) and hs[2]
puts "#{spaces*1}#{@s[1]}>" if (lev <= 1) and hs[1]
puts "#{@s[0]}>" if (lev==0)
when 6
puts "#{spaces*6}#{@s[6]}>" if (lev <= 6)
puts "#{spaces*5}#{@s[5]}>" if (lev <= 5)
puts "#{spaces*4}#{@s[4]}>" if (lev <= 4)
puts "#{spaces*3}#{@s[3]}>" if (lev <= 3) and hs[3]
puts "#{spaces*2}#{@s[2]}>" if (lev <= 2) and hs[2]
puts "#{spaces*1}#{@s[1]}>" if (lev <= 1) and hs[1]
puts "#{@s[0]}>" if (lev==0)
end
end
end
end
__END__