From 9ccfd45405e11016bb28fc6d8ff290dff8f5c83d Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sun, 25 Aug 2013 01:20:49 -0400 Subject: v4: merge v5, syntax additions, block text related (version bump to 4.2.*) * syntax add * switch ocn off and on for a block of content, line containing only off: "--~#" (omit headings where possible) "---#" on: "--+#" * alternative open and close for blocks using line starting with "```", e.g. open code block: "``` code"; close code block: "```" start poem: "``` poem"; end poem: "```" * remove need for an empty line between opening & closing of a text block [ note further refinements will be required; use of tildes "~~" considered but more problematic, tics not used elsewhere] --- lib/sisu/v4/dal_doc_str.rb | 288 ++++++++++++++++++++++++++++++++------------- 1 file changed, 205 insertions(+), 83 deletions(-) (limited to 'lib/sisu/v4/dal_doc_str.rb') diff --git a/lib/sisu/v4/dal_doc_str.rb b/lib/sisu/v4/dal_doc_str.rb index 1eeae2da..6c588a8a 100644 --- a/lib/sisu/v4/dal_doc_str.rb +++ b/lib/sisu/v4/dal_doc_str.rb @@ -61,15 +61,32 @@ =end module SiSU_DAL_DocumentStructureExtract class Instantiate < SiSU_Param::Parameters::Instructions - @@flag={} #Beware!! + @@flag={ + ocn: :on, + code: :off, + poem: :off, + block: :off, + group: :off, + alt: :off, + table: :off, + table_to: :off, + } def initialize - @@flag['table_to']=false @@counter=@@column=@@columns=0 @@line_mode='' end end class Build - @@flag={} #Beware!! + @@flag={ + ocn: :on, + code: :off, + poem: :off, + block: :off, + group: :off, + alt: :off, + table: :off, + table_to: :off, + } def initialize(md,data) @md,@data=md,data SiSU_DAL_DocumentStructureExtract::Instantiate.new @@ -157,14 +174,33 @@ module SiSU_DAL_DocumentStructureExtract @@counter,@verse_count=0,0 @metadata={} @data.each do |t_o| - t_o=t_o.gsub(/(?:\n\s*\n)+/m,"\n") unless @@flag['code'] + if t_o =~/^--([+~-])[#]$/ + h=case $1 + when /[+]/ + @@flag[:ocn]=:on + {flag: :ocn_on} + when /[~]/ + @@flag[:ocn]=:off_headings_substantive + {flag: :ocn_off, mod: :headings_substantive} + when /[-]/ + @@flag[:ocn]=:off_headings_exclude + {flag: :ocn_off, mod: :headings_exclude} + else + @@flag[:ocn]=:on + {flag: :ocn_on} + end + t_o=SiSU_DAL_DocumentStructure::ObjectFlag.new.flag_ocn(h) + next + end + t_o=t_o.gsub(/(?:\n\s*\n)+/m,"\n") if @@flag[:code]==:off if t_o !~/^(?:code|poem|alt|group|block)\{|^\}(?:code|poem|alt|group|block)|^(?:table\{|\{table)[ ~]/ \ - and not @@flag['code'] \ - and not @@flag['poem'] \ - and not @@flag['group'] \ - and not @@flag['block'] \ - and not @@flag['alt'] \ - and not @@flag['table'] + and t_o !~/^[`]{3}\s+(?:code|poem|alt|group|block)|^[`]{3}(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/ \ + and @@flag[:code]==:off \ + and @@flag[:poem]==:off \ + and @@flag[:group]==:off \ + and @@flag[:block]==:off \ + and @@flag[:alt]==:off \ + and @@flag[:table]==:off unless t_o =~/^(?:@\S+?:|%+)\s/ # extract book index for paragraph if any idx=if t_o=~/^=\{(.+)\}\s*$\Z/m; m=$1 t_o=t_o.gsub(/\n=\{.+\}\s*$\Z/m,'') @@ -192,18 +228,48 @@ module SiSU_DAL_DocumentStructureExtract obj=$1 note=endnote_test?(obj) obj,tags=extract_tags(obj) + if @@flag[:ocn]==:off_headings_exclude \ + or @@flag[:ocn]==:off_headings_substantive + unless obj =~ /[~-][#]\s*$/ + if @@flag[:ocn]==:off_headings_exclude + obj << ' -#' + elsif @@flag[:ocn]==:off_headings_substantive + obj << ' ~#' + end + end + end h={ lv: lv, ln: ln, obj: obj, idx: idx, tags: tags } SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h) elsif t_o=~/^:?[A-C1-6]\~(\S+?)-\s+(.+)/m name,obj=$1,$2 note=endnote_test?(obj) obj,tags=extract_tags(obj) + if @@flag[:ocn]==:off_headings_exclude \ + or @@flag[:ocn]==:off_headings_substantive + unless obj =~ /[~-][#]\s*$/ + if @@flag[:ocn]==:off_headings_exclude + obj << ' -#' + elsif @@flag[:ocn]==:off_headings_substantive + obj << ' ~#' + end + end + end h={ lv: lv, name: name, obj: obj, idx: idx, autonum_: false, tags: tags} SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h) elsif t_o=~/^:?[A-C1-6]\~(\S+)\s+(.+)/m name,obj=$1,$2 note=endnote_test?(obj) obj,tags=extract_tags(obj,name) + if @@flag[:ocn]==:off_headings_exclude \ + or @@flag[:ocn]==:off_headings_substantive + unless obj =~ /[~-][#]\s*$/ + if @@flag[:ocn]==:off_headings_exclude + obj << ' -#' + elsif @@flag[:ocn]==:off_headings_substantive + obj << ' ~#' + end + end + end h={ lv: lv, name: name, obj: obj, idx: idx, tags: tags } SiSU_DAL_DocumentStructure::ObjectHeading.new.heading(h) else nil @@ -221,6 +287,12 @@ module SiSU_DAL_DocumentStructureExtract note=endnote_test?(obj) obj,tags=extract_tags(obj) unless obj=~/\A\s*\Z/m + if @@flag[:ocn]==:off_headings_exclude \ + or @@flag[:ocn]==:off_headings_substantive + unless obj =~ /[~-][#]\s*$/ + obj << ' ~#' + end + end h={ bullet_: bullet, hang: hang, indent: indent, obj: obj, idx: idx, note_: note, image_: image, tags: tags } SiSU_DAL_DocumentStructure::ObjectPara.new.paragraph(h) end @@ -238,6 +310,12 @@ module SiSU_DAL_DocumentStructureExtract note=endnote_test?(obj) obj,tags=extract_tags(obj) unless obj=~/\A\s*\Z/m + if @@flag[:ocn]==:off_headings_exclude \ + or @@flag[:ocn]==:off_headings_substantive + unless obj =~ /[~-][#]\s*$/ + obj << ' ~#' + end + end h={ hang: hang, indent: indent, obj: obj, idx: idx, note_: note, image_: image, tags: tags } SiSU_DAL_DocumentStructure::ObjectPara.new.paragraph(h) end @@ -255,44 +333,84 @@ module SiSU_DAL_DocumentStructureExtract image=image_test(t_o) note=endnote_test?(t_o) obj,tags=extract_tags(t_o) + if @@flag[:ocn]==:off_headings_exclude \ + or @@flag[:ocn]==:off_headings_substantive + unless obj =~ /[~-][#]\s*$/ + obj << ' ~#' + end + end unless obj=~/\A\s*\Z/m h={ bullet_: false, indent: 0, hang: 0, obj: obj, idx: idx, note_: note, image_: image, tags: tags } SiSU_DAL_DocumentStructure::ObjectPara.new.paragraph(h) end end - elsif not @@flag['code'] - if t_o =~/^code\{/ - @@flag['code']=true + elsif @@flag[:code]==:off + if t_o =~/^(?:code\{|[`]{3}\s+code)/ + @@flag[:code]=case t_o + when /^code\{/; :curls + when /^[`]{3}\s+code/; :tics + else @@flag[:code] #error + end @@counter=1 - @codeblock_numbered=(t_o =~/^code\{#/) ? true : false + @codeblock_numbered=(t_o =~/^(?:code\{#|[`]{3}\s+code\s[#])/) ? true : false h={ obj: 'code block start' } #introduce a counter - t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) - elsif t_o =~/^poem\{/ - @@flag['poem']=true + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) + elsif t_o =~/^(?:poem\{|[`]{3}\s+poem)/ + @@flag[:poem]=case t_o + when /^poem\{/; :curls + when /^[`]{3}\s+poem/; :tics + else @@flag[:poem] #error + end h={ obj: 'poem start' } #introduce a counter - t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) tuned_file << t_o - elsif t_o =~/^group\{/ - @@flag['group']=true + elsif t_o =~/^(?:group\{|[`]{3}\s+group)/ + @@flag[:group]=case t_o + when /^group\{/; :curls + when /^[`]{3}\s+group/; :tics + else @@flag[:group] #error + end h={ obj: 'group text start' } #introduce a counter - t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) tuned_file << t_o - elsif t_o =~/^block\{/ - @@flag['block']=true + elsif t_o =~/^(?:block\{|[`]{3}\s+block)/ + @@flag[:block]=case t_o + when /^block\{/; :curls + when /^[`]{3}\s+block/; :tics + else @@flag[:block] #error + end h={ obj: 'block text start' } #introduce a counter - t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) tuned_file << t_o - elsif t_o =~/^alt\{/ - @@flag['alt']=true + elsif t_o =~/^(?:alt\{|[`]{3}\s+alt)/ + @@flag[:alt]=case t_o + when /^alt\{/; :curls + when /^[`]{3}\s+alt/; :tics + else @@flag[:alt] #error + end h={ obj: 'alt text start' } #introduce a counter - t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) tuned_file << t_o elsif t_o =~/^(?:table\{|\{table)[ ~]/ h={ obj: 'table start' } #introduce a counter - ins=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #ins=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + ins=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) tuned_file << ins if t_o=~/^table\{(?:~h)?\s+/ - @@flag['table']=true + @@flag[:table]=:curls + @rows='' + case t_o + when /table\{~h\s+c(\d+);\s+(.+)/ + cols=$1 + col=$2.scan(/\d+/) + heading=true + when /table\{\s+c(\d+);\s+(.+)/ + cols=$1 + col=$2.scan(/\d+/) + heading=false + end + @h={ head_: heading, cols: cols, widths: col, idx: idx } + elsif t_o=~/^[`]{3}\s+table(?:~h)?\s+/ + @@flag[:table]=:tics @rows='' case t_o when /table\{~h\s+c(\d+);\s+(.+)/ @@ -361,21 +479,23 @@ module SiSU_DAL_DocumentStructureExtract end t_o end - if @@flag['table'] - if @@flag['table'] \ - and t_o =~/^\}table/ #two table representations should be consolidated as one - @@flag['table']=false + if @@flag[:table]==:curls or @@flag[:table]==:tics + if (@@flag[:table]==:curls \ + and t_o =~/^\}table/) \ + or (@@flag[:table]==:tics \ + and t_o =~/^[`]{3}(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) + @@flag[:table]=:off headings,columns,widths,idx=@h[:head_],@h[:cols],@h[:widths],@h[:idx] @h={ head_: headings, cols: columns, widths: widths, idx: idx, obj: @rows } t_o=SiSU_DAL_DocumentStructure::ObjectTable.new.table(@h) tuned_file << t_o @h,@rows=nil,'' h={ obj: 'table end' } #introduce a counter - t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) t_o else if t_o.is_a?(String) \ - and t_o !~/^table\{/ + and t_o !~/^(?:table\{|[`]{3}\s+table)/ t_o=t_o.gsub(/^\n+/m,''). #check added for ruby 1.9.2 not needed in 1.8 series (tested in v2) gsub(/\n+/m,"#{Mx[:tc_p]}") @rows += t_o + Mx[:tc_c] @@ -383,9 +503,10 @@ module SiSU_DAL_DocumentStructureExtract t_o=nil end end - if @@flag['code'] - if t_o =~/^\}code/ - @@flag['code']=false + if @@flag[:code]==:curls or @@flag[:code]==:tics + if (@@flag[:code]==:curls and t_o =~/^\}code/) \ + or (@@flag[:code]==:tics and t_o =~/^[`]{3}(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) + @@flag[:code]=:off obj=@tuned_code.join("\n") tags=[] h={ obj: obj, tags: tags, number_: @codeblock_numbered } @@ -393,66 +514,66 @@ module SiSU_DAL_DocumentStructureExtract @tuned_code=[] tuned_file << t_o h={ obj: 'code block end' } #introduce a counter - t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) end - if @@flag['code'] \ + if (@@flag[:code]==:curls or @@flag[:code]==:tics) \ and t_o.is_a?(String) sub_array=t_o.dup + "#{Mx[:br_nl]}" - @line_mode=sub_array.scan(/.+/) @line_mode=[] sub_array.scan(/.+/) {|w| @line_mode << w if w =~/[\S]+/} - t_o=SiSU_DAL_DocumentStructureExtract::Build.new(@md,@line_mode).build_lines('code').join + t_o=SiSU_DAL_DocumentStructureExtract::Build.new(@md,@line_mode).build_lines(:code).join @tuned_code << t_o t_o=nil end - elsif @@flag['poem'] \ - or @@flag['group'] \ - or @@flag['block'] \ - or @@flag['alt'] - if @@flag['poem'] \ - and t_o =~/^\}poem/ - @@flag['poem']=false + elsif (@@flag[:poem]==:curls or @@flag[:poem]==:tics) \ + or (@@flag[:group]==:curls or @@flag[:group]==:tics) \ + or (@@flag[:block]==:curls or @@flag[:block]==:tics) \ + or (@@flag[:alt]==:curls or @@flag[:alt]==:tics) + if (@@flag[:poem]==:curls and t_o =~/^\}poem/) \ + or (@@flag[:poem]==:tics and t_o =~/^[`]{3}(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) + @@flag[:poem]=:off h={ obj: 'poem end' } #introduce a counter - t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) - elsif ( @@flag['group'] \ - and t_o =~/^\}group/ ) - @@flag['group']=false + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) + elsif (@@flag[:group]==:curls and t_o =~/^\}group/) \ + or (@@flag[:group]==:tics and t_o =~/^[`]{3}(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) + @@flag[:group]=:off obj,tags=extract_tags(@tuned_block.join("\n")) h={ obj: obj, tags: tags } @tuned_block=[] t_o=SiSU_DAL_DocumentStructure::ObjectBlockTxt.new.group(h) tuned_file << t_o h={ obj: 'group text end' } #introduce a counter - t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) - elsif ( @@flag['block'] \ - and t_o =~/^\}block/ ) - @@flag['block']=false + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) + elsif (@@flag[:block]==:curls and t_o =~/^\}block/) \ + or (@@flag[:block]==:tics and t_o =~/^[`]{3}(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) + @@flag[:block]=:off obj,tags=extract_tags(@tuned_block.join("\n")) h={ obj: obj, tags: tags } @tuned_block=[] t_o=SiSU_DAL_DocumentStructure::ObjectBlockTxt.new.block(h) tuned_file << t_o h={ obj: 'block text end' } #introduce a counter - t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) - elsif ( @@flag['alt'] \ - and t_o =~/^\}alt/ ) - @@flag['alt']=false + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) + elsif (@@flag[:alt]==:curls and t_o =~/^\}alt/) \ + or (@@flag[:alt]==:tics and t_o =~/^[`]{3}(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/) + @@flag[:alt]=:off obj,tags=extract_tags(@tuned_block.join("\n")) h={ obj: obj, tags: tags } t_o=SiSU_DAL_DocumentStructure::ObjectBlockTxt.new.alt(h) @tuned_block=[] tuned_file << t_o h={ obj: 'alt text end' } #introduce a counter - t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) #t_o=SiSU_DAL_DocumentStructure::ObjectLayout.new.insert(h) + t_o=SiSU_DAL_DocumentStructure::ObjectComment.new.comment(h) end - if @@flag['poem'] \ - or @@flag['group'] \ - or @@flag['alt'] \ + if (@@flag[:poem]==:curls or @@flag[:poem]==:tics \ + or @@flag[:group]==:curls or @@flag[:group]==:tics \ + or @@flag[:alt]==:curls or @@flag[:alt]==:tics) \ and t_o =~/\S/ \ - and t_o !~/^(?:\}(?:verse|code|alt|group|block)|(?:verse|code|alt|group|block)\{)/ # fix logic + and t_o !~/^(?:\}(?:verse|code|alt|group|block)|(?:verse|code|alt|group|block)\{)/ \ + and t_o !~/^[`]{3}\s+(?:code|poem|alt|group|block)|^[`]{3}(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$/ # fix logic sub_array=t_o.dup @line_mode=sub_array.scan(/.+/) - type=if @@flag['poem'] + type=if @@flag[:poem]==:curls or @@flag[:poem]==:tics t_o=SiSU_DAL_DocumentStructureExtract::Build.new(@md,@line_mode).build_lines(type).join poem=t_o.split(/\n\n/) poem.each do |v| @@ -462,16 +583,16 @@ module SiSU_DAL_DocumentStructureExtract t_o=SiSU_DAL_DocumentStructure::ObjectBlockTxt.new.verse(h) tuned_file << t_o end - 'poem' - else 'group' + :poem + else :group end end - @verse_count+=1 if @@flag['poem'] + @verse_count+=1 if @@flag[:poem]==:curls or @@flag[:poem]==:tics end - if not @@flag['code'] - if @@flag['poem'] \ - or @@flag['group'] \ - or @@flag['alt'] + if @@flag[:code]==:off + if @@flag[:poem]==:curls or @@flag[:poem]==:tics \ + or @@flag[:group]==:curls or @@flag[:group]==:tics \ + or @@flag[:alt]==:curls or @@flag[:alt]==:tics if t_o.is_a?(String) t_o=t_o.gsub(/\n/m,"#{Mx[:br_nl]}"). gsub(/[ ][ ]/m,"#{Mx[:nbsp]*2}"). @@ -532,16 +653,17 @@ module SiSU_DAL_DocumentStructureExtract h={ obj: str, ocn_: false } SiSU_DAL_DocumentStructure::ObjectPara.new.paragraph(h) end - def build_lines(type='') + def build_lines(type=:none) lines,lines_new=@data,[] lines.each do |line| line=if line =~/\S/ \ - and line !~/^code\{|^\}code/ \ - and not line.is_a?(Hash) - @@counter+=1 if @@flag['code'] + and line !~/^(?:code\{|\}code)/ \ + and line !~/^(?:[`]{3}\s+code|[`]{3}(?:\s+[~-][#]|\s+\~\{.+?\}\~)?\s*$)/ \ + and not line.is_a?(Hash) #watch + @@counter+=1 if @@flag[:code]==:curls or @@flag[:code]==:tics line=line.gsub(/\s\s/,"#{Mx[:nbsp]*2}"). gsub(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}") - line=line.gsub(/^/,"#{Mx[:gr_o]}codeline#{Mx[:gr_c]}") if type=='code' # REMOVE try sort for texpdf special case + line=line.gsub(/^/,"#{Mx[:gr_o]}codeline#{Mx[:gr_c]}") if type==:code # REMOVE try sort for texpdf special case line=if line =~/(?:https?|file|ftp):\/\/\S+$/ line.gsub(/\s*$/," #{Mx[:br_nl]}") else line.gsub(/\s*$/,"#{Mx[:br_nl]}") #unless type=='code' @@ -564,7 +686,7 @@ module SiSU_DAL_DocumentStructureExtract @dob end def structure_markup #build structure where structure provided only in meta header - @dob=if @dob.is ==:para \ + @dob=if @dob.is==:para \ && (((@dob.hang !~/[1-9]/) && (@dob.indent !~/[1-9]/)) \ || (@dob.hang != @dob.indent)) \ and not @dob.bullet_ @@ -606,7 +728,7 @@ module SiSU_DAL_DocumentStructureExtract parent=node1=node2=node3=node4=node5=node6=nil data.each do |dob| h={} - if (dob.obj !~ regex_exclude_ocn_and_node || dob.is ==:code) \ + if (dob.obj !~ regex_exclude_ocn_and_node || dob.is==:code) \ && (dob.of !=:comment \ && dob.of !=:layout \ && dob.of !=:meta) \ @@ -759,8 +881,8 @@ module SiSU_DAL_DocumentStructureExtract puts "<#{@s[0]}>" end data.each_with_index do |o,i| - if o.is ==:heading \ - || o.is ==:heading_insert + if o.is==:heading \ + || o.is==:heading_insert case o.ln when 1 tuned_file << tag_close(o.ln,hs) -- cgit v1.2.3