From 6a13e63f51e69dacefe580b9084006ae66d3fafc Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Fri, 5 Sep 2014 22:54:22 -0400 Subject: v5 v6: xml xhtml outputs, & issues --- data/doc/sisu/CHANGELOG_v5 | 2 + data/doc/sisu/CHANGELOG_v6 | 2 + lib/sisu/v5/shared_metadata.rb | 15 +++++- lib/sisu/v5/xhtml.rb | 2 +- lib/sisu/v5/xhtml_epub2.rb | 2 + lib/sisu/v5/xml_format.rb | 20 ++++++-- lib/sisu/v5/xml_odf_odt.rb | 2 + lib/sisu/v5/xml_shared.rb | 107 +++-------------------------------------- lib/sisu/v6/shared_metadata.rb | 15 +++++- lib/sisu/v6/xhtml.rb | 2 +- lib/sisu/v6/xhtml_epub2.rb | 2 + lib/sisu/v6/xml_format.rb | 20 ++++++-- lib/sisu/v6/xml_odf_odt.rb | 2 + lib/sisu/v6/xml_shared.rb | 107 +++-------------------------------------- 14 files changed, 84 insertions(+), 216 deletions(-) diff --git a/data/doc/sisu/CHANGELOG_v5 b/data/doc/sisu/CHANGELOG_v5 index 17889091..443f07bf 100644 --- a/data/doc/sisu/CHANGELOG_v5 +++ b/data/doc/sisu/CHANGELOG_v5 @@ -38,6 +38,8 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_5.6.5.orig.tar.xz sisu_5.6.5.orig.tar.xz sisu_5.6.5-1.dsc +* xml xhtml outputs, & issues + * shared_metadata, requires xml_shared * link/path fixes, diff --git a/data/doc/sisu/CHANGELOG_v6 b/data/doc/sisu/CHANGELOG_v6 index 6c043373..22b9d305 100644 --- a/data/doc/sisu/CHANGELOG_v6 +++ b/data/doc/sisu/CHANGELOG_v6 @@ -28,6 +28,8 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_6.2.6.orig.tar.xz sisu_6.2.6.orig.tar.xz sisu_6.2.6-1.dsc +* xml xhtml outputs, & issues + * shared_metadata, requires xml_shared * link/path fixes, diff --git a/lib/sisu/v5/shared_metadata.rb b/lib/sisu/v5/shared_metadata.rb index 405e7939..e6721964 100644 --- a/lib/sisu/v5/shared_metadata.rb +++ b/lib/sisu/v5/shared_metadata.rb @@ -74,6 +74,14 @@ module SiSU_Metadata language=l[:n] tr=SiSU_Translate::Source.new(@md,language) @attrib='md' + def meta_content_clean(content='') + content=if not content.nil? + content=content.tr('"',"'"). + gsub(/&/,'&') + content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content) + else content + end + end if @display_heading @tag,@inf=%{Document Metadata},'' meta << self.meta_para @@ -115,7 +123,7 @@ module SiSU_Metadata end if defined? @md.rights.all \ and @md.rights.all=~/\S+/ - @tag,@inf,@class=tr.rights,@md.rights.all,'dc' #15 + @tag,@inf,@class=tr.rights,meta_content_clean(@md.rights.all),'dc' #15 meta << self.meta_para end if defined? @md.classify.subject \ @@ -810,7 +818,8 @@ module SiSU_Metadata end def xml_docbook def meta_para - inf_xml=char_enc(@inf).utf8 + inf_xml=char_enc(@inf).amp + inf_xml=char_enc(inf_xml).utf8 inf_xml=char_enc(inf_xml).br < @@ -843,6 +852,7 @@ WOK end def xml_dom def meta_para + inf_xml=char_enc(inf_xml).amp inf_xml=char_enc(@inf).utf8 inf_xml=char_enc(inf_xml).br <\1'). #footnote/endnote clean - gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'\1') #footnote/endnote clean + gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'\1') util=SiSU_TextUtils::Wrap.new(dob.obj,70) wrapped=util.line_wrap @@xml[:body] << if defined? dob.ocn diff --git a/lib/sisu/v5/xhtml_epub2.rb b/lib/sisu/v5/xhtml_epub2.rb index e0aa70cb..daa00f39 100644 --- a/lib/sisu/v5/xhtml_epub2.rb +++ b/lib/sisu/v5/xhtml_epub2.rb @@ -71,6 +71,8 @@ module SiSU_XHTML_EPUB2 include SiSU_Particulars require_relative 'defaults' # defaults.rb include SiSU_Viz + require_relative 'xml_shared' # xml_shared.rb + include SiSU_XML_Munge require_relative 'xhtml_table' # xhtml_table.rb require_relative 'xhtml_epub2_format' # xhtml_epub2_format.rb include SiSU_XHTML_EPUB2_Format diff --git a/lib/sisu/v5/xml_format.rb b/lib/sisu/v5/xml_format.rb index fb2cff88..4a7264c0 100644 --- a/lib/sisu/v5/xml_format.rb +++ b/lib/sisu/v5/xml_format.rb @@ -191,15 +191,26 @@ module SiSU_XML_Format end if defined? @md.rights.all \ and @md.rights.all=~/\S+/ # DublinCore 15 - rights - @rdf_rights=%{ dc.rights="#{@md.rights.all}"\n} - @rights=%{\n} + rights=meta_content_clean(@md.rights.all) + copyright=meta_content_clean(@md.rights.copyright.all) + @rdf_rights=%{ dc.rights="#{rights}"\n} + @rights=%{\n} end - @copyright=%{\n} if @md.rights.copyright.all # possibly redundant see dc.rights + @copyright=%{\n} \ + if @md.rights.copyright.all # possibly redundant see dc.rights @owner=%{\n} if @md.owner @keywords=%{\n} if @md.keywords @vz=SiSU_Viz::Defaults.new #margin,paragraph,table,banner,url,png,txt,color,font,nav_txt,nav_png,credits,js,php @index='index' end + def meta_content_clean(content='') + content=if not content.nil? + content=content.tr('"',"'"). + gsub(/&/,'&') + content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content) + else content + end + end def table_close ' ' end @@ -558,7 +569,8 @@ WOK def prefix_a end def rights - rights=@md.rights.copyright.all.gsub(/^\s*Copyright\s+\(C\)/,'Copyright © ') + copyright=meta_content_clean(@md.rights.copyright.all) + rights=copyright(/^\s*Copyright\s+\(C\)/,'Copyright © ') %{

Rights: #{rights}

} end diff --git a/lib/sisu/v5/xml_odf_odt.rb b/lib/sisu/v5/xml_odf_odt.rb index 4e540fde..0c9dc7aa 100644 --- a/lib/sisu/v5/xml_odf_odt.rb +++ b/lib/sisu/v5/xml_odf_odt.rb @@ -71,6 +71,8 @@ module SiSU_XML_ODF_ODT include SiSU_XML_ODF_ODT_Format require_relative 'shared_metadata' # shared_metadata.rb require_relative 'txt_shared' # txt_shared.rb + require_relative 'xml_shared' # xml_shared.rb + include SiSU_XML_Munge @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0 class Source begin diff --git a/lib/sisu/v5/xml_shared.rb b/lib/sisu/v5/xml_shared.rb index bd0e383b..d059cd7b 100644 --- a/lib/sisu/v5/xml_shared.rb +++ b/lib/sisu/v5/xml_shared.rb @@ -232,104 +232,6 @@ module SiSU_XML_Munge if @sys.locale =~/utf-?8/i # instead ucs for utf8 # String#encode Iñtërnâtiônàlizætiøn dob.obj=dob.obj.gsub(/ /u,' '). # space identify gsub(/ /u,' ') # space identify - else - dob.obj=dob.obj.gsub(/¢/u,'¢'). # ¢ - gsub(/£/u,'£'). # £ - gsub(/¥/u,'¥'). # ¥ - gsub(/§/u,'§'). # § - gsub(/©/u,'©'). # © - gsub(/ª/u,'ª'). # ª - gsub(/«/u,'«'). # « - gsub(/®/u,'®'). # ® - gsub(/°/u,'°'). # ° - gsub(/±/u,'±'). # ± - gsub(/²/u,'²'). # ² - gsub(/³/u,'³'). # ³ - gsub(/µ/u,'µ'). # µ - gsub(/¶/u,'¶'). # ¶ - gsub(/¹/u,'¹'). # ¹ - gsub(/º/u,'º'). # º - gsub(/»/u,'»'). # » - gsub(/¼/u,'¼'). # ¼ - gsub(/½/u,'½'). # ½ - gsub(/¾/u,'¾'). # ¾ - gsub(/×/u,'×'). # × - gsub(/÷/u,'÷'). # ÷ - gsub(/¿/u,'¿'). # ¿ - gsub(/À/u,'À'). # À - gsub(/Á/u,'Á'). # Á - gsub(/Â/u,'Â'). # Â - gsub(/Ã/u,'Ã'). # Ã - gsub(/Ä/u,'Ä'). # Ä - gsub(/Å/u,'Å'). # Å - gsub(/Æ/u,'Æ'). # Æ - gsub(/Ç/u,'Ç'). # Ç - gsub(/È/u,'È'). # È - gsub(/É/u,'É'). # É - gsub(/Ê/u,'Ê'). # Ê - gsub(/Ë/u,'Ë'). # Ë - gsub(/Ì/u,'Ì'). # Ì - gsub(/Í/u,'Í'). # Í - gsub(/Î/u,'Î'). # Î - gsub(/Ï/u,'Ï'). # Ï - gsub(/Ð/u,'Ð'). # Ð - gsub(/Ñ/u,'Ñ'). # Ñ - gsub(/Ò/u,'Ò'). # Ò - gsub(/Ó/u,'Ó'). # Ó - gsub(/Ô/u,'Ô'). # Ô - gsub(/Õ/u,'Õ'). # Õ - gsub(/Ö/u,'Ö'). # Ö - gsub(/Ø/u,'Ø'). # Ø - gsub(/Ù/u,'Ù'). # Ù - gsub(/Ú/u,'Ú'). # Ú - gsub(/Û/u,'Û'). # Û - gsub(/Ü/u,'Ü'). # Ü - gsub(/Ý/u,'Ý'). # Ý - gsub(/Þ/u,'Þ'). # Þ - gsub(/ß/u,'ß'). # ß - gsub(/à/u,'à'). # à - gsub(/á/u,'á'). # á - gsub(/â/u,'â'). # â - gsub(/ã/u,'ã'). # ã - gsub(/ä/u,'ä'). # ä - gsub(/å/u,'å'). # å - gsub(/æ/u,'æ'). # æ - gsub(/ç/u,'ç'). # ç - gsub(/è/u,'è'). # è - gsub(/é/u,'´'). # é - gsub(/ê/u,'ˆ'). # ê - gsub(/ë/u,'ë'). # ë - gsub(/ì/u,'ì'). # ì - gsub(/í/u,'´'). # í - gsub(/î/u,'î'). # î - gsub(/ï/u,'ï'). # ï - gsub(/ð/u,'ð'). # ð - gsub(/ñ/u,'ñ'). # ñ - gsub(/ò/u,'ò'). # ò - gsub(/ó/u,'ó'). # ó - gsub(/ô/u,'ô'). # ô - gsub(/õ/u,'õ'). # õ - gsub(/ö/u,'ö'). # ö - gsub(/ø/u,'ø'). # ø - gsub(/ù/u,'ù'). # ú - gsub(/ú/u,'ú'). # û - gsub(/û/u,'û'). # ü - gsub(/ü/u,'ü'). # ý - gsub(/þ/u,'þ'). # þ - gsub(/ÿ/u,'ÿ'). # ÿ - gsub(/‘/u,'&#lsquo;'). # ‘ # ‘ - gsub(/’/u,'&#rsquo;'). # ’ # ’ - gsub(/“/u,'“'). # “ # “ - gsub(/”/u,'”'). # ” # ” - gsub(/–/u,'–'). # – # – - gsub(/—/u,'—'). # — # — - gsub(/∝/u,'∝'). # ∝ # ∝ - gsub(/∞/u,'∞'). # ∞ # ∞ - gsub(/™/u,'™'). # ™ # ™ - gsub(/✠/u,'✠'). # ✠ - #gsub(/✠/u '†'). # † # † incorrect replacement † - gsub(/ /u,' '). # space identify - gsub(/ /u,' ') # space identify end end self @@ -411,7 +313,8 @@ module SiSU_XML_Munge %{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}}). gsub(/#{Dx[:url_o]}/,"#{Dx[:url_o_xml]}"). gsub(/#{Dx[:url_c]}/,"#{Dx[:url_c_xml]}"). - gsub(/ |#{Mx[:nbsp]}/m,' ') + gsub(/ |#{Mx[:nbsp]}/m,' '). + gsub(/;&([^#]|(?:[^gl][^t]|[^a][^m][^p]|[^n][^b][^s][^p])[^;])/,';&\1') # pattern not to match dob end def markup_light(dob='') @@ -426,7 +329,8 @@ module SiSU_XML_Munge gsub(/&([^;]{1,5})/,'&\1'). #sort, rough estimate, revisit #WATCH found in node not sax gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif))[ ]+.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/, "#{@md.file.output_path.xml.rel_image}\/\\1"). - gsub(/ |#{Mx[:nbsp]}/,' ') + gsub(/ |#{Mx[:nbsp]}/,' '). + gsub(/;&([^#]|(?:[^gl][^t]|[^a][^m][^p]|[^n][^b][^s][^p])[^;])/,';&\1') # pattern not to match wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 dob.obj=tidywords(wordlist).join(' ').strip dob @@ -698,7 +602,8 @@ module SiSU_XML_Tags #Format end def meta_content_clean(content='') content=if not content.nil? - content=content.tr('"',"'") + content=content.tr('"',"'"). + gsub(/&/,'&') content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content) else content end diff --git a/lib/sisu/v6/shared_metadata.rb b/lib/sisu/v6/shared_metadata.rb index bef13903..b51fc8f4 100644 --- a/lib/sisu/v6/shared_metadata.rb +++ b/lib/sisu/v6/shared_metadata.rb @@ -74,6 +74,14 @@ module SiSU_Metadata language=l[:n] tr=SiSU_Translate::Source.new(@md,language) @attrib='md' + def meta_content_clean(content='') + content=if not content.nil? + content=content.tr('"',"'"). + gsub(/&/,'&') + content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content) + else content + end + end if @display_heading @tag,@inf=%{Document Metadata},'' meta << self.meta_para @@ -115,7 +123,7 @@ module SiSU_Metadata end if defined? @md.rights.all \ and @md.rights.all=~/\S+/ - @tag,@inf,@class=tr.rights,@md.rights.all,'dc' #15 + @tag,@inf,@class=tr.rights,meta_content_clean(@md.rights.all),'dc' #15 meta << self.meta_para end if defined? @md.classify.subject \ @@ -810,7 +818,8 @@ module SiSU_Metadata end def xml_docbook def meta_para - inf_xml=char_enc(@inf).utf8 + inf_xml=char_enc(@inf).amp + inf_xml=char_enc(inf_xml).utf8 inf_xml=char_enc(inf_xml).br < @@ -843,6 +852,7 @@ WOK end def xml_dom def meta_para + inf_xml=char_enc(inf_xml).amp inf_xml=char_enc(@inf).utf8 inf_xml=char_enc(inf_xml).br <\1'). #footnote/endnote clean - gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'\1') #footnote/endnote clean + gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'\1') util=SiSU_TextUtils::Wrap.new(dob.obj,70) wrapped=util.line_wrap @@xml[:body] << if defined? dob.ocn diff --git a/lib/sisu/v6/xhtml_epub2.rb b/lib/sisu/v6/xhtml_epub2.rb index 26399ef7..44495ff3 100644 --- a/lib/sisu/v6/xhtml_epub2.rb +++ b/lib/sisu/v6/xhtml_epub2.rb @@ -71,6 +71,8 @@ module SiSU_XHTML_EPUB2 include SiSU_Particulars require_relative 'defaults' # defaults.rb include SiSU_Viz + require_relative 'xml_shared' # xml_shared.rb + include SiSU_XML_Munge require_relative 'xhtml_table' # xhtml_table.rb require_relative 'xhtml_epub2_format' # xhtml_epub2_format.rb include SiSU_XHTML_EPUB2_Format diff --git a/lib/sisu/v6/xml_format.rb b/lib/sisu/v6/xml_format.rb index 38cc7f85..e649fa51 100644 --- a/lib/sisu/v6/xml_format.rb +++ b/lib/sisu/v6/xml_format.rb @@ -191,15 +191,26 @@ module SiSU_XML_Format end if defined? @md.rights.all \ and @md.rights.all=~/\S+/ # DublinCore 15 - rights - @rdf_rights=%{ dc.rights="#{@md.rights.all}"\n} - @rights=%{\n} + rights=meta_content_clean(@md.rights.all) + copyright=meta_content_clean(@md.rights.copyright.all) + @rdf_rights=%{ dc.rights="#{rights}"\n} + @rights=%{\n} end - @copyright=%{\n} if @md.rights.copyright.all # possibly redundant see dc.rights + @copyright=%{\n} \ + if @md.rights.copyright.all # possibly redundant see dc.rights @owner=%{\n} if @md.owner @keywords=%{\n} if @md.keywords @vz=SiSU_Viz::Defaults.new #margin,paragraph,table,banner,url,png,txt,color,font,nav_txt,nav_png,credits,js,php @index='index' end + def meta_content_clean(content='') + content=if not content.nil? + content=content.tr('"',"'"). + gsub(/&/,'&') + content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content) + else content + end + end def table_close ' ' end @@ -558,7 +569,8 @@ WOK def prefix_a end def rights - rights=@md.rights.copyright.all.gsub(/^\s*Copyright\s+\(C\)/,'Copyright © ') + copyright=meta_content_clean(@md.rights.copyright.all) + rights=copyright(/^\s*Copyright\s+\(C\)/,'Copyright © ') %{

Rights: #{rights}

} end diff --git a/lib/sisu/v6/xml_odf_odt.rb b/lib/sisu/v6/xml_odf_odt.rb index 967812db..e6cfafe5 100644 --- a/lib/sisu/v6/xml_odf_odt.rb +++ b/lib/sisu/v6/xml_odf_odt.rb @@ -71,6 +71,8 @@ module SiSU_XML_ODF_ODT include SiSU_XML_ODF_ODT_Format require_relative 'shared_metadata' # shared_metadata.rb require_relative 'txt_shared' # txt_shared.rb + require_relative 'xml_shared' # xml_shared.rb + include SiSU_XML_Munge @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0 class Source begin diff --git a/lib/sisu/v6/xml_shared.rb b/lib/sisu/v6/xml_shared.rb index 35d1132d..3ffda8f3 100644 --- a/lib/sisu/v6/xml_shared.rb +++ b/lib/sisu/v6/xml_shared.rb @@ -232,104 +232,6 @@ module SiSU_XML_Munge if @sys.locale =~/utf-?8/i # instead ucs for utf8 # String#encode Iñtërnâtiônàlizætiøn dob.obj=dob.obj.gsub(/ /u,' '). # space identify gsub(/ /u,' ') # space identify - else - dob.obj=dob.obj.gsub(/¢/u,'¢'). # ¢ - gsub(/£/u,'£'). # £ - gsub(/¥/u,'¥'). # ¥ - gsub(/§/u,'§'). # § - gsub(/©/u,'©'). # © - gsub(/ª/u,'ª'). # ª - gsub(/«/u,'«'). # « - gsub(/®/u,'®'). # ® - gsub(/°/u,'°'). # ° - gsub(/±/u,'±'). # ± - gsub(/²/u,'²'). # ² - gsub(/³/u,'³'). # ³ - gsub(/µ/u,'µ'). # µ - gsub(/¶/u,'¶'). # ¶ - gsub(/¹/u,'¹'). # ¹ - gsub(/º/u,'º'). # º - gsub(/»/u,'»'). # » - gsub(/¼/u,'¼'). # ¼ - gsub(/½/u,'½'). # ½ - gsub(/¾/u,'¾'). # ¾ - gsub(/×/u,'×'). # × - gsub(/÷/u,'÷'). # ÷ - gsub(/¿/u,'¿'). # ¿ - gsub(/À/u,'À'). # À - gsub(/Á/u,'Á'). # Á - gsub(/Â/u,'Â'). # Â - gsub(/Ã/u,'Ã'). # Ã - gsub(/Ä/u,'Ä'). # Ä - gsub(/Å/u,'Å'). # Å - gsub(/Æ/u,'Æ'). # Æ - gsub(/Ç/u,'Ç'). # Ç - gsub(/È/u,'È'). # È - gsub(/É/u,'É'). # É - gsub(/Ê/u,'Ê'). # Ê - gsub(/Ë/u,'Ë'). # Ë - gsub(/Ì/u,'Ì'). # Ì - gsub(/Í/u,'Í'). # Í - gsub(/Î/u,'Î'). # Î - gsub(/Ï/u,'Ï'). # Ï - gsub(/Ð/u,'Ð'). # Ð - gsub(/Ñ/u,'Ñ'). # Ñ - gsub(/Ò/u,'Ò'). # Ò - gsub(/Ó/u,'Ó'). # Ó - gsub(/Ô/u,'Ô'). # Ô - gsub(/Õ/u,'Õ'). # Õ - gsub(/Ö/u,'Ö'). # Ö - gsub(/Ø/u,'Ø'). # Ø - gsub(/Ù/u,'Ù'). # Ù - gsub(/Ú/u,'Ú'). # Ú - gsub(/Û/u,'Û'). # Û - gsub(/Ü/u,'Ü'). # Ü - gsub(/Ý/u,'Ý'). # Ý - gsub(/Þ/u,'Þ'). # Þ - gsub(/ß/u,'ß'). # ß - gsub(/à/u,'à'). # à - gsub(/á/u,'á'). # á - gsub(/â/u,'â'). # â - gsub(/ã/u,'ã'). # ã - gsub(/ä/u,'ä'). # ä - gsub(/å/u,'å'). # å - gsub(/æ/u,'æ'). # æ - gsub(/ç/u,'ç'). # ç - gsub(/è/u,'è'). # è - gsub(/é/u,'´'). # é - gsub(/ê/u,'ˆ'). # ê - gsub(/ë/u,'ë'). # ë - gsub(/ì/u,'ì'). # ì - gsub(/í/u,'´'). # í - gsub(/î/u,'î'). # î - gsub(/ï/u,'ï'). # ï - gsub(/ð/u,'ð'). # ð - gsub(/ñ/u,'ñ'). # ñ - gsub(/ò/u,'ò'). # ò - gsub(/ó/u,'ó'). # ó - gsub(/ô/u,'ô'). # ô - gsub(/õ/u,'õ'). # õ - gsub(/ö/u,'ö'). # ö - gsub(/ø/u,'ø'). # ø - gsub(/ù/u,'ù'). # ú - gsub(/ú/u,'ú'). # û - gsub(/û/u,'û'). # ü - gsub(/ü/u,'ü'). # ý - gsub(/þ/u,'þ'). # þ - gsub(/ÿ/u,'ÿ'). # ÿ - gsub(/‘/u,'&#lsquo;'). # ‘ # ‘ - gsub(/’/u,'&#rsquo;'). # ’ # ’ - gsub(/“/u,'“'). # “ # “ - gsub(/”/u,'”'). # ” # ” - gsub(/–/u,'–'). # – # – - gsub(/—/u,'—'). # — # — - gsub(/∝/u,'∝'). # ∝ # ∝ - gsub(/∞/u,'∞'). # ∞ # ∞ - gsub(/™/u,'™'). # ™ # ™ - gsub(/✠/u,'✠'). # ✠ - #gsub(/✠/u '†'). # † # † incorrect replacement † - gsub(/ /u,' '). # space identify - gsub(/ /u,' ') # space identify end end self @@ -411,7 +313,8 @@ module SiSU_XML_Munge %{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}}). gsub(/#{Dx[:url_o]}/,"#{Dx[:url_o_xml]}"). gsub(/#{Dx[:url_c]}/,"#{Dx[:url_c_xml]}"). - gsub(/ |#{Mx[:nbsp]}/m,' ') + gsub(/ |#{Mx[:nbsp]}/m,' '). + gsub(/;&([^#]|(?:[^gl][^t]|[^a][^m][^p]|[^n][^b][^s][^p])[^;])/,';&\1') # pattern not to match dob end def markup_light(dob='') @@ -426,7 +329,8 @@ module SiSU_XML_Munge gsub(/&([^;]{1,5})/,'&\1'). #sort, rough estimate, revisit #WATCH found in node not sax gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif))[ ]+.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/, "#{@md.file.output_path.xml.rel_image}\/\\1"). - gsub(/ |#{Mx[:nbsp]}/,' ') + gsub(/ |#{Mx[:nbsp]}/,' '). + gsub(/;&([^#]|(?:[^gl][^t]|[^a][^m][^p]|[^n][^b][^s][^p])[^;])/,';&\1') # pattern not to match wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 dob.obj=tidywords(wordlist).join(' ').strip dob @@ -698,7 +602,8 @@ module SiSU_XML_Tags #Format end def meta_content_clean(content='') content=if not content.nil? - content=content.tr('"',"'") + content=content.tr('"',"'"). + gsub(/&/,'&') content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content) else content end -- cgit v1.2.3