aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v0/odf.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/v0/odf.rb')
-rw-r--r--lib/sisu/v0/odf.rb129
1 files changed, 64 insertions, 65 deletions
diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb
index 09c67ff6..05a6272c 100644
--- a/lib/sisu/v0/odf.rb
+++ b/lib/sisu/v0/odf.rb
@@ -128,7 +128,7 @@ module SiSU_ODF
end
# Used for extraction of endnotes from paragraphs
def extract_endnotes(para='')
- notes=para.scan(/#{Mx[:en_a_o]}(\d+\s+.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/)
+ notes=para.scan(/#{Mx[:en_a_o]}(\d+\s+.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}#{Mx[:en_a_c]}/)[1]
@n=[]
notes.each do |n| #high cost to deal with <br> appropriately within odf, consider
n=n.dup.to_s
@@ -222,13 +222,13 @@ module SiSU_ODF
end
def image(para)
para.gsub!(@serial,'')
- m=para.scan(/(\{\s*(.+?)\}((?:https?|file|ftp)\S+|image))/)
+ m=para.scan(/(#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}((?:https?|file|ftp)\S+|image))/)
if m; m.each do |i|
cont,url=i[1],i[2]
cont.gsub!(/([)(\]\[])/,"\\\\\\1")
cont.gsub!(/([+?])/,"\\\\\\1") # incorrect handling of +
url.gsub!(/([+?])/,"\\\\\\1")
- para.sub!(/\{\s*#{cont}\}#{url}/m,image_odf(i)) #watch
+ para.sub!(/#{Mx[:lnk_o]}\s*#{cont}\s*#{Mx[:lnk_c]}#{url}/m,image_odf(i)) #watch
para.gsub!(/\\([)(\]\[?])/,'\1') #clumsy fix
end
m=nil
@@ -242,14 +242,14 @@ module SiSU_ODF
end
def text_link(para)
para.gsub!(@serial,'')
- m=para.scan(/(\{([^}]+?)\}((?:https?|file|ftp)\S+?))([;.,]?(?=\s|$))/) #sort
+ m=para.scan(/(#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}((?:https?|file|ftp)\S+?))([;.,]?(?=\s|$))/) #sort
if m
m.each do |i|
txt,url,trail=i[1],i[2]
txt.gsub!(/([)(\]\[])/,"\\\\\\1")
- txt.gsub!(/([+?])/,"\\\\\\1") # problems with +
+ txt.gsub!(/([+?*])/,"\\\\\\1") # problems with +
url.gsub!(/([+?])/,"\\\\\\1") # problems with +
- para.gsub!(/\{\s*#{txt}\}#{url}/m,text_link_odf(txt,url,trail)) #make sure trailing ']' are not caught in url
+ para.gsub!(/#{Mx[:lnk_o]}\s*#{txt}#{Mx[:lnk_c]}#{url}/m,text_link_odf(txt,url,trail)) #make sure trailing ']' are not caught in url
para.gsub!(/\\([)(\]\[?])/,'\1') #clumsy fix
end
m=nil
@@ -260,14 +260,14 @@ module SiSU_ODF
para.gsub!(@serial,'')
para.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,
'<text:a xlink:type="simple" xlink:href="\1">\1</text:a>\2') #http ftp matches escaped, no decoration
- para.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,
+ para.gsub!(/((?:^|\s)#{Mx[:lnk_c]})((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,
'\1<text:a xlink:type="simple" xlink:href="\2">\2</text:a>\3') #special case \{ e.g. \}http://url
para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,
%{\\1#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration
#para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, also works
#%{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration
para.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/,
- %{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="mailto:\\1">\\1</text:a>#{@url_brace.xml_close}})
+ %{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="mailto:\\1">\\1</text:a>#{@url_brace.xml_close}}) if para !~/http:\/\// # improve upon, document crash where url contains '@' symbol
para=case para
when /^#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}/m
m=$1
@@ -283,31 +283,31 @@ module SiSU_ODF
end
def footnote(para)
@astx||=10000
- para.gsub!(/#{Mx[:id_o]}#@dp#{Mx[:id_c]}(#{Mx[:en_a_c]}|#{Mx[:en_a_c]})/,'\1')
+ para.gsub!(/#{Mx[:id_o]}#@dp#{Mx[:id_c]}(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/,'\1')
#para.gsub!(/<br \/><:i[1-9]>/,'<br />')
- if para =~/#{Mx[:en_a_o]}\d+\s+/
+ if para =~/#{Mx[:en_a_o]}\d+\s+/
para=para.gsub(/#{Mx[:en_a_o]}(\d+)\s+(.+?)#{Mx[:en_a_c]}/,'<text:note text:id="ftn\1" text:note-class="footnote"><text:note-citation>\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>')
end
- if para=~/#{Mx[:en_b_o]}[*+]\d+\s/ #editor notes, squre bracket series
- asterisk=para.scan(/#{Mx[:en_b_o]}([*+]\d+)\s+(.+?)#{Mx[:en_b_c]}/)
+ if para=~/#{Mx[:en_a_o]}[*+]+\s/
+ asterisk=para.scan(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)#{Mx[:en_a_c]}/)
asterisk.each do |x|
a=x[0].gsub(/([*+])/,"\\\\\\1")
- para=para.gsub(/#{Mx[:en_b_o]}(#{a})\s+(.+?)#{Mx[:en_b_c]}/,%{<text:note text:id="ftn#{@astx.to_s}" text:note-class="footnote"><text:note-citation text:label="\\1">\\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \\2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>})
+ para=para.gsub(/#{Mx[:en_a_o]}(#{a})\s+(.+?)#{Mx[:en_a_c]}/,%{<text:note text:id="ftn#{@astx.to_s}" text:note-class="footnote"><text:note-citation text:label="\\1">\\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \\2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>})
@astx+=1
end
end
- if para=~/#{Mx[:en_a_o]}[*+]+\s/
- asterisk=para.scan(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)#{Mx[:en_a_c]}/)
+ if para=~/#{Mx[:en_b_o]}[*+]\d+\s/
+ asterisk=para.scan(/#{Mx[:en_b_o]}([*+]\d+)\s+(.+?)#{Mx[:en_b_c]}/)
asterisk.each do |x|
a=x[0].gsub(/([*+])/,"\\\\\\1")
- para=para.gsub(/#{Mx[:en_a_o]}(#{a})\s+(.+?)#{Mx[:en_a_c]}/,%{<text:note text:id="ftn#{@astx.to_s}" text:note-class="footnote"><text:note-citation text:label="\\1">\\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \\2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>})
+ para=para.gsub(/#{Mx[:en_b_o]}(#{a})\s+(.+?)#{Mx[:en_b_c]}/,%{<text:note text:id="ftn#{@astx.to_s}" text:note-class="footnote"><text:note-citation text:label="\\1">\\1</text:note-citation><text:note-body><text:p text:style-name="Footnote"> \\2</text:p><text:p text:style-name="Footnote"/></text:note-body></text:note>})
@astx+=1
end
end
para
end
def group_clean(para)
- para.gsub!(/&amp;nbsp;|&nbsp;/,'&#160;')
+ para.gsub!(/&amp;nbsp;|&nbsp;|#{Mx[:nbsp]}/,'&#160;')
para.gsub!(/</,'&lt;'); para.gsub!(/>/,'&gt;')
para.gsub!(/&lt;(text:span text:style-name="T[1-5]"|\/text:span)&gt;/,'<\1>') #works, not ideal
para.gsub!(/#{Mx[:br_line]}/,'<br />')
@@ -358,18 +358,36 @@ module SiSU_ODF
end
def table(para) #
if para =~/#{Mx[:gr_o]}Th?.+/ # tables come as single block
- table=SiSU_ODF_format::Table.new(@md,para)
+ txt_obj={:table =>para}
+ table=SiSU_ODF_format::Table.new(@md,txt_obj)
para=table.table_split
end
end
- def odf_structure(para='',lv='',ocn='',hname='') #% Used to extract the structure of a document
+ def odf_structure(md,t_o)
+ @md,@t_o=md,t_o
+ @md,@t_o=md,t_o
+ if t_o.class == Hash
+ para =t_o[:txt] || nil
+ lv =t_o[:lv] || nil
+ ocn =t_o[:ocn] || nil
+ hname =t_o[:h_name] || nil
+ #@h_name =t_o[:h_name] || nil
+ #elsif t_o.class == Array
+ # @txt =txt[0]
+ #elsif t_o.class == String
+ # @txt =txt
+ else
+ #@one,@two,@three=one,two,three
+ p t_o.class
+ p caller
+ end
lv=lv.to_i
n=lv - 1
n3=lv + 2
lv=nil if lv == 0
para=unless para=~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/
- para=if para =~/\{\s*\S+?\.(?:png|jpg|gif)\s.+?\}(?:(?:https?|file|ftp):\S+|image)/; image(para)
- elsif para =~/\{.+?\}(?:(?:https?|file|ftp):\S+|image)/; text_link(para)
+ para=if para =~/#{Mx[:lnk_o]}\s*\S+?\.(?:png|jpg|gif)\s.+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\S+|image)/; image(para)
+ elsif para =~/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\S+|image)/; text_link(para)
else para
end
else para
@@ -421,15 +439,15 @@ module SiSU_ODF
bullet=image_src('bullet_09.png')
cp("#{bullet}/bullet_09.png","#{@env.path.odf}/Pictures/.") #if image_src('bullet_09.png')
data.each do |para|
+ para.gsub!(/#{Mx[:id_o]}~0;0:0;x\d+#{Mx[:id_c]}/,'') # if book index? remove
#p para if para =~safe_characters and @md.cmd =~/V/ #KEEP
#para.gsub!(/&lt;(~\d+;(?:\w|[0-6]:)\d+;\w\d+)&gt;&lt;(#@dp:#@dp)&gt;/,'<\1><\2>')
+ para='' if para =~/#{Mx[:lv_o]}\d+:.*?#{Mx[:lv_c]}.+?#{Mx[:pa_non_object_dummy_heading]}/
para_array=[]
+ para.gsub!(/</,'&lt;'); para.gsub!(/>/,'&gt;')
word=para.scan(/\S+|\n/)
if word
word.each do |w| # _ - / # | : ! ^ ~
- unless w =~/#{Mx[:id_o]}~\S+?;\S+?;\S+?#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}|#{Mx[:gr_o]}.+?#{Mx[:gr_c]}|<[:!][^<>]+?>/
- w.gsub!(/^<([^<>][^<>][^<>][^<>]+?)>$/,'&lt;\1&gt;') #refix
- end
unless para =~/^(?:#{Rx[:meta]}|%+ )/m
w.gsub!(/&#(?:126|152);/,'~') #126 usual
if w !~/&\S{1,7};/ \
@@ -468,10 +486,10 @@ module SiSU_ODF
para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'<text:span text:style-name="T3">\1</text:span>')
para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'<text:span text:style-name="T4">\1</text:span>')
para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'<text:span text:style-name="T5">\1</text:span>')
- para.gsub!(/`/,"'")
para.gsub!(/­/u,'-')
+ para.gsub!(/ /u, ' ') # space identify
+ para.gsub!(/ /u, ' ') # space identify
para.gsub!(/·/u,'*')
- para.gsub!(/[“”]/u,'""')
para.gsub!(/[­–—]/u,'-') #— – chk
para.gsub!(/ < /i,'&#060;')
para.gsub!(/\\copy(?:right)?\b/,'&#169;')
@@ -485,7 +503,7 @@ module SiSU_ODF
para.gsub!(/^\s*#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'') # remove empty lines
para.gsub!(/<a href=".+?">(.+?)<\/a>/,'\1')
para.gsub!(/#{Mx[:mk_o]}name#\S+?#{Mx[:mk_c]}/,'') # remove name links
-# para.gsub!(/<a href=".+?">(.+?)<\/a>/im,'\1')
+ #para.gsub!(/<a href=".+?">(.+?)<\/a>/im,'\1')
#para.gsub!(/&nbsp;/,' ') # decide on
#para.gsub!(/\{(\S+?\.(?:png|jpg)) .+?\}(?:http:\/\/\S+|image)/," [ \\1 ]") #"[ #{@env.url.images_local}\/\\1 ]")
#para.gsub!(/<!TZ.+/,'')
@@ -515,51 +533,35 @@ module SiSU_ODF
and para=~/\S+/
para=case @sto.format
when /^(1):(\S*)/
- odf_structure(para,$1,@sto.ocn,$2)
+ txt_obj={:txt =>para,:lv =>$1,:ocn =>@sto.ocn,:h_name=>$2}
+ odf_structure(@md,txt_obj)
para
when /^(2):(\S*)/
- odf_structure(para,$1,@sto.ocn,$2)
+ txt_obj={:txt =>para,:lv =>$1,:ocn =>@sto.ocn,:h_name=>$2}
+ odf_structure(@md,txt_obj)
para
when /^(3):(\S*)/
- odf_structure(para,$1,@sto.ocn,$2)
+ txt_obj={:txt =>para,:lv =>$1,:ocn =>@sto.ocn,:h_name=>$2}
+ odf_structure(@md,txt_obj)
para
when /^(4):(\S+)/ # work on see Split_text_object
- odf_structure(para,$1,@sto.ocn,$2)
+ txt_obj={:txt =>para,:lv =>$1,:ocn =>@sto.ocn,:h_name=>$2}
+ odf_structure(@md,txt_obj)
# work on see SiSU_text_parts::Split_text_object
para
when /^(5):(\S*)/
- odf_structure(para,$1,@sto.ocn,$2)
+ txt_obj={:txt =>para,:lv =>$1,:ocn =>@sto.ocn,:h_name=>$2}
+ odf_structure(@md,txt_obj)
para
when /^(6):(\S*)/
- odf_structure(para,$1,@sto.ocn,$2)
+ txt_obj={:txt =>para,:lv =>$1,:ocn =>@sto.ocn,:h_name=>$2}
+ odf_structure(@md,txt_obj)
para
- #@sto.lev_para_ocn.heading_body6
- #when /^(i1)$/
- # #formatMono.gsubBody
- # #para=@sto[:lev_para_ocn].scrIndent1
- #when /^(i2)$/
- # formatMono.gsubBody
- # para=@sto[:lev_para_ocn].scrIndent2
- #when /^(center)$/
- # para.gsub!(/(.+)/,
- # %{<center>(\\1)</center>})
- # para=@sto[:lev_para_ocn].scrPara
- #when /^(b|bold)$/
- # para.gsub!(/(.+)/,
- # %{<b>(\\1)</b>})
- # para=@sto[:lev_para_ocn].scrPara
- #when /null/ # see whether u can improve
- # if (para !~/#{@margin.txt_0}|#{@margin.txt_1}|#{@margin.txt_2}/)
- # #formatMono.gsubBody
- # #para=@sto[:lev_para_ocn].scrPara
- # end
- else odf_structure(para,nil,nil,nil) #watch may be problematic
+ else
+ txt_obj={:txt =>para}
+ odf_structure(@md,txt_obj) #watch may be problematic
para
end
- elsif para =~/(.*)<!#!>(.*)/
- one,two=$1,$2
- format_text=SiSU_ODF_format::Format_text_object.new(one,two)
- para=format_text.seg_no_paranum
end
if para =~/<a name="n\d+">/ \
and para =~/^(-\{{2}~\d+|<!e[:_]\d+!>)/ # -endnote
@@ -568,17 +570,14 @@ module SiSU_ODF
if (para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/)
# i don't get the condition for no paranum
end
- if para =~/<:center>/
- one,two=/(.*)<:center>(.*)/.match(para)[1,2]
- format_text=SiSU_ODF_format::Format_text_object.new(one,two)
- para=format_text.center
- end
else
if para =~ /^(4)~(\S+)/
- odf_structure(para,$1,@sto.ocn,$2)
+ txt_obj={:txt =>para,:lv =>$1,:ocn =>@sto.ocn,:h_name=>$2}
+ odf_structure(@md,txt_obj)
para
elsif para =~/#{Mx[:id_o]}~(\d+);m\d+;[mdv]\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
- odf_structure(para,nil,nil,nil) #watch may be problematic
+ txt_obj={:txt =>para}
+ odf_structure(@md,txt_obj) #watch may be problematic
para
end
end