v5 v6: xml xhtml outputs, & issues
authorRalph Amissah <ralph@amissah.com>
Sat, 6 Sep 2014 02:54:22 +0000 (22:54 -0400)
committerRalph Amissah <ralph@amissah.com>
Sat, 6 Sep 2014 03:14:30 +0000 (23:14 -0400)
14 files changed:
data/doc/sisu/CHANGELOG_v5
data/doc/sisu/CHANGELOG_v6
lib/sisu/v5/shared_metadata.rb
lib/sisu/v5/xhtml.rb
lib/sisu/v5/xhtml_epub2.rb
lib/sisu/v5/xml_format.rb
lib/sisu/v5/xml_odf_odt.rb
lib/sisu/v5/xml_shared.rb
lib/sisu/v6/shared_metadata.rb
lib/sisu/v6/xhtml.rb
lib/sisu/v6/xhtml_epub2.rb
lib/sisu/v6/xml_format.rb
lib/sisu/v6/xml_odf_odt.rb
lib/sisu/v6/xml_shared.rb

index 17889091ff24deaca6c3ec9a144996b6227ac852..443f07bf6aa9cf11325fbe88e030777decb6d0df 100644 (file)
@@ -38,6 +38,8 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_5.6.5.orig.tar.xz
   sisu_5.6.5.orig.tar.xz
   sisu_5.6.5-1.dsc
 
+* xml xhtml outputs, &amp; issues
+
 * shared_metadata, requires xml_shared
 
 * link/path fixes,
index 6c04337372683d2bc9db60a45535b5dcd046ad7e..22b9d3052401bf28cd240248d22f1c09f2805b40 100644 (file)
@@ -28,6 +28,8 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_6.2.6.orig.tar.xz
   sisu_6.2.6.orig.tar.xz
   sisu_6.2.6-1.dsc
 
+* xml xhtml outputs, &amp; issues
+
 * shared_metadata, requires xml_shared
 
 * link/path fixes,
index 405e793998698e2d6d2cde7044a14d37a71ed802..e6721964edf7c96a61b6e7347ad0423e379a58ba 100644 (file)
@@ -74,6 +74,14 @@ module SiSU_Metadata
       language=l[:n]
       tr=SiSU_Translate::Source.new(@md,language)
       @attrib='md'
+      def meta_content_clean(content='')
+        content=if not content.nil?
+          content=content.tr('"',"'").
+            gsub(/&/,'&amp;')
+          content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content)
+        else content
+        end
+      end
       if @display_heading
         @tag,@inf=%{<b><u>Document Metadata</u></b>},''
         meta << self.meta_para
@@ -115,7 +123,7 @@ module SiSU_Metadata
       end
       if defined? @md.rights.all \
       and @md.rights.all=~/\S+/
-        @tag,@inf,@class=tr.rights,@md.rights.all,'dc' #15
+        @tag,@inf,@class=tr.rights,meta_content_clean(@md.rights.all),'dc' #15
         meta << self.meta_para
       end
       if defined? @md.classify.subject \
@@ -810,7 +818,8 @@ module SiSU_Metadata
     end
     def xml_docbook
       def meta_para
-        inf_xml=char_enc(@inf).utf8
+        inf_xml=char_enc(@inf).amp
+        inf_xml=char_enc(inf_xml).utf8
         inf_xml=char_enc(inf_xml).br
         <<WOK
 #{Ax[:tab]}<#{@tag}>
@@ -843,6 +852,7 @@ WOK
     end
     def xml_dom
       def meta_para
+        inf_xml=char_enc(inf_xml).amp
         inf_xml=char_enc(@inf).utf8
         inf_xml=char_enc(inf_xml).br
         <<WOK
@@ -861,6 +871,7 @@ WOK
     end
     def xhtml_scroll
       def meta_para
+        inf_xml=char_enc(inf_xml).amp
         inf_xml=char_enc(@inf).utf8
         inf_xml=char_enc(inf_xml).br
         <<WOK
index 138ccf6dc83878403ab2ac0dcabdaf06cca2c05d..60049043a92284f2a326f8fb36f5744b15457749 100644 (file)
@@ -234,7 +234,7 @@ WOK
           end
           extract_endnotes(dob)
           dob.obj=dob.obj.gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'<en>\1</en>'). #footnote/endnote clean
-            gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'<en>\1</en>') #footnote/endnote clean
+            gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'<en>\1</en>')
           util=SiSU_TextUtils::Wrap.new(dob.obj,70)
           wrapped=util.line_wrap
           @@xml[:body] << if defined? dob.ocn
index e0aa70cb00ce5586b09943ec1bc5d6c86446436c..daa00f39e1bea15ce7730c2c34f62680b1f82d55 100644 (file)
@@ -71,6 +71,8 @@ module SiSU_XHTML_EPUB2
     include SiSU_Particulars
   require_relative 'defaults'                           # defaults.rb
     include SiSU_Viz
+  require_relative 'xml_shared'                         # xml_shared.rb
+    include SiSU_XML_Munge
   require_relative 'xhtml_table'                        # xhtml_table.rb
   require_relative 'xhtml_epub2_format'                 # xhtml_epub2_format.rb
     include SiSU_XHTML_EPUB2_Format
index fb2cff8828e51cd066a2068e63cf976a7dfdddb6..4a7264c09e78fd255e4085c2ad256e093aabac8c 100644 (file)
@@ -191,15 +191,26 @@ module SiSU_XML_Format
       end
       if defined? @md.rights.all \
       and @md.rights.all=~/\S+/                                               # DublinCore 15 - rights
-        @rdf_rights=%{    dc.rights="#{@md.rights.all}"\n}
-        @rights=%{<meta name="dc.rights" content="#{@md.rights.all}" />\n}
+        rights=meta_content_clean(@md.rights.all)
+        copyright=meta_content_clean(@md.rights.copyright.all)
+        @rdf_rights=%{    dc.rights="#{rights}"\n}
+        @rights=%{<meta name="dc.rights" content="#{rights}" />\n}
       end
-      @copyright=%{<meta name="copyright" content="#{@md.rights.copyright.all}" />\n} if @md.rights.copyright.all # possibly redundant see dc.rights
+      @copyright=%{<meta name="copyright" content="#{copyright}" />\n} \
+        if @md.rights.copyright.all # possibly redundant see dc.rights
       @owner=%{<meta name="owner" content="#{@md.owner}" />\n} if @md.owner
       @keywords=%{<meta name="keywords" content="#{@md.keywords}" />\n} if @md.keywords
       @vz=SiSU_Viz::Defaults.new #margin,paragraph,table,banner,url,png,txt,color,font,nav_txt,nav_png,credits,js,php
       @index='index'
     end
+    def meta_content_clean(content='')
+      content=if not content.nil?
+        content=content.tr('"',"'").
+           gsub(/&/,'&amp;')
+        content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content)
+      else content
+      end
+    end
     def table_close
       '</font> </td></tr></table>'
     end
@@ -558,7 +569,8 @@ WOK
     def prefix_a
     end
     def rights
-      rights=@md.rights.copyright.all.gsub(/^\s*Copyright\s+\(C\)/,'Copyright <sup>&copy;</sup>&nbsp;')
+      copyright=meta_content_clean(@md.rights.copyright.all)
+      rights=copyright(/^\s*Copyright\s+\(C\)/,'Copyright <sup>&copy;</sup>&nbsp;')
       %{<p class="small_left">Rights: #{rights}</p>
 <p />}
     end
index 4e540fde0d7828b7a4fbaaeb125bf60b9b46e123..0c9dc7aaa21e8ab1e02fa74157d35c8ec063c6f1 100644 (file)
@@ -71,6 +71,8 @@ module SiSU_XML_ODF_ODT
     include SiSU_XML_ODF_ODT_Format
   require_relative 'shared_metadata'                    # shared_metadata.rb
   require_relative 'txt_shared'                         # txt_shared.rb
+  require_relative 'xml_shared'                         # xml_shared.rb
+    include SiSU_XML_Munge
   @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0
   class Source
     begin
index bd0e383b4d89b594f4a4168db4e446fc800639f7..d059cd7babcffac0b7b3fb57c9a81c3fa4940cc2 100644 (file)
@@ -232,104 +232,6 @@ module SiSU_XML_Munge
         if @sys.locale =~/utf-?8/i # instead ucs for utf8 # String#encode Iñtërnâtiônàlizætiøn
           dob.obj=dob.obj.gsub(/ /u,' ').           # space identify
             gsub(/ /u,' ')           # space identify
-        else
-          dob.obj=dob.obj.gsub(/¢/u,'&cent;').      # &#162;
-            gsub(/£/u,'&pound;').     # &#163;
-            gsub(/¥/u,'&yen;').       # &#165;
-            gsub(/§/u,'&sect;').      # &#167;
-            gsub(/©/u,'&copy;').      # &#169;
-            gsub(/ª/u,'&ordf;').      # &#170;
-            gsub(/«/u,'&laquo;').     # &#171;
-            gsub(/®/u,'&reg;').       # &#174;
-            gsub(/°/u,'&deg;').       # &#176;
-            gsub(/±/u,'&plusmn;').    # &#177;
-            gsub(/²/u,'&sup2;').      # &#178;
-            gsub(/³/u,'&sup3;').      # &#179;
-            gsub(/µ/u,'&micro;').     # &#181;
-            gsub(/¶/u,'&para;').      # &#182;
-            gsub(/¹/u,'&sup1;').      # &#185;
-            gsub(/º/u,'&ordm;').      # &#186;
-            gsub(/»/u,'&raquo;').     # &#187;
-            gsub(/¼/u,'&frac14;').    # &#188;
-            gsub(/½/u,'&frac12;').    # &#189;
-            gsub(/¾/u,'&frac34;').    # &#190;
-            gsub(/×/u,'&times;').     # &#215;
-            gsub(/÷/u,'&divide;').    # &#247;
-            gsub(/¿/u,'&iquest;').    # &#191;
-            gsub(/À/u,'&Agrave;').    # &#192;
-            gsub(/Á/u,'&Aacute;').    # &#193;
-            gsub(/Â/u,'&Acirc;').     # &#194;
-            gsub(/Ã/u,'&Atilde;').    # &#195;
-            gsub(/Ä/u,'&Auml;').      # &#196;
-            gsub(/Å/u,'&Aring;').     # &#197;
-            gsub(/Æ/u,'&AElig;').     # &#198;
-            gsub(/Ç/u,'&Ccedil;').    # &#199;
-            gsub(/È/u,'&Egrave;').    # &#200;
-            gsub(/É/u,'&Eacute;').    # &#201;
-            gsub(/Ê/u,'&Ecirc;').     # &#202;
-            gsub(/Ë/u,'&Euml;').      # &#203;
-            gsub(/Ì/u,'&Igrave;').    # &#204;
-            gsub(/Í/u,'&Iacute;').    # &#205;
-            gsub(/Î/u,'&Icirc;').     # &#206;
-            gsub(/Ï/u,'&Iuml;').      # &#207;
-            gsub(/Ð/u,'&ETH;').       # &#208;
-            gsub(/Ñ/u,'&Ntilde;').    # &#209;
-            gsub(/Ò/u,'&Ograve;').    # &#210;
-            gsub(/Ó/u,'&Oacute;').    # &#211;
-            gsub(/Ô/u,'&Ocirc;').     # &#212;
-            gsub(/Õ/u,'&Otilde;').    # &#213;
-            gsub(/Ö/u,'&Ouml;').      # &#214;
-            gsub(/Ø/u,'&Oslash;').    # &#216;
-            gsub(/Ù/u,'&Ugrave;').    # &#217;
-            gsub(/Ú/u,'&Uacute;').    # &#218;
-            gsub(/Û/u,'&Ucirc;').     # &#219;
-            gsub(/Ü/u,'&Uuml;').      # &#220;
-            gsub(/Ý/u,'&Yacute;').    # &#221;
-            gsub(/Þ/u,'&THORN;').     # &#222;
-            gsub(/ß/u,'&szlig;').     # &#223;
-            gsub(/à/u,'&agrave;').    # &#224;
-            gsub(/á/u,'&aacute;').    # &#225;
-            gsub(/â/u,'&acirc;').     # &#226;
-            gsub(/ã/u,'&atilde;').    # &#227;
-            gsub(/ä/u,'&auml;').      # &#228;
-            gsub(/å/u,'&aring;').     # &#229;
-            gsub(/æ/u,'&aelig;').     # &#230;
-            gsub(/ç/u,'&ccedil;').    # &#231;
-            gsub(/è/u,'&egrave;').    # &#232;
-            gsub(/é/u,'&acute;').     # &#233;
-            gsub(/ê/u,'&circ;').      # &#234;
-            gsub(/ë/u,'&euml;').      # &#235;
-            gsub(/ì/u,'&igrave;').    # &#236;
-            gsub(/í/u,'&acute;').     # &#237;
-            gsub(/î/u,'&icirc;').     # &#238;
-            gsub(/ï/u,'&iuml;').      # &#239;
-            gsub(/ð/u,'&eth;').       # &#240;
-            gsub(/ñ/u,'&ntilde;').    # &#241;
-            gsub(/ò/u,'&ograve;').    # &#242;
-            gsub(/ó/u,'&oacute;').    # &#243;
-            gsub(/ô/u,'&ocirc;').     # &#244;
-            gsub(/õ/u,'&otilde;').    # &#245;
-            gsub(/ö/u,'&ouml;').      # &#246;
-            gsub(/ø/u,'&oslash;').    # &#248;
-            gsub(/ù/u,'&ugrave;').    # &#250;
-            gsub(/ú/u,'&uacute;').    # &#251;
-            gsub(/û/u,'&ucirc;').     # &#252;
-            gsub(/ü/u,'&uuml;').      # &#253;
-            gsub(/þ/u,'&thorn;').     # &#254;
-            gsub(/ÿ/u,'&yuml;').      # &#255;
-            gsub(/‘/u,'&#lsquo;').    # &lsquo;  # &#8216;
-            gsub(/’/u,'&#rsquo;').    # &rsquo;  # &#8217;
-            gsub(/“/u,'&ldquo;').     # &ldquo;  # &#8220;
-            gsub(/”/u,'&rdquo;').     # &rdquo;  # &#8221;
-            gsub(/–/u,'&ndash;').     # &ndash;  # &#8211;
-            gsub(/—/u,'&mdash;').     # &mdash;  # &#8212;
-            gsub(/∝/u,'&prop;').      # &prop;   # &#8733;
-            gsub(/∞/u,'&infin;').     # &infin;  # &#8734;
-            gsub(/™/u,'&trade;').     # &trade;  # &#8482;
-            gsub(/✠/u,'&#10016;').    # &#10016;
-            #gsub(/✠/u '&dagger;').    # &dagger; # &#8224; incorrect replacement †
-            gsub(/ /u,' ').           # space identify
-            gsub(/ /u,' ')           # space identify
         end
       end
       self
@@ -411,7 +313,8 @@ module SiSU_XML_Munge
           %{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}}).
         gsub(/#{Dx[:url_o]}/,"#{Dx[:url_o_xml]}").
         gsub(/#{Dx[:url_c]}/,"#{Dx[:url_c_xml]}").
-        gsub(/&nbsp;|#{Mx[:nbsp]}/m,'&#160;')
+        gsub(/&nbsp;|#{Mx[:nbsp]}/m,'&#160;').
+        gsub(/;&([^#]|(?:[^gl][^t]|[^a][^m][^p]|[^n][^b][^s][^p])[^;])/,';&amp;\1') # pattern not to match
       dob
     end
     def markup_light(dob='')
@@ -426,7 +329,8 @@ module SiSU_XML_Munge
         gsub(/&([^;]{1,5})/,'&amp;\1'). #sort, rough estimate, revisit #WATCH found in node not sax
         gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif))[ ]+.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/,
           "<image.path>#{@md.file.output_path.xml.rel_image}\/\\1</image.path>").
-        gsub(/&nbsp;|#{Mx[:nbsp]}/,'&#160;')
+        gsub(/&nbsp;|#{Mx[:nbsp]}/,'&#160;').
+        gsub(/;&([^#]|(?:[^gl][^t]|[^a][^m][^p]|[^n][^b][^s][^p])[^;])/,';&amp;\1') # pattern not to match
       wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
       dob.obj=tidywords(wordlist).join(' ').strip
       dob
@@ -698,7 +602,8 @@ module SiSU_XML_Tags #Format
     end
     def meta_content_clean(content='')
       content=if not content.nil?
-        content=content.tr('"',"'")
+        content=content.tr('"',"'").
+           gsub(/&/,'&amp;')
         content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content)
       else content
       end
index bef13903a9a3ea460e17e89fc28d55142d6fe629..b51fc8f4ef42fd71ab9d2eeee60fa415e8732886 100644 (file)
@@ -74,6 +74,14 @@ module SiSU_Metadata
       language=l[:n]
       tr=SiSU_Translate::Source.new(@md,language)
       @attrib='md'
+      def meta_content_clean(content='')
+        content=if not content.nil?
+          content=content.tr('"',"'").
+            gsub(/&/,'&amp;')
+          content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content)
+        else content
+        end
+      end
       if @display_heading
         @tag,@inf=%{<b><u>Document Metadata</u></b>},''
         meta << self.meta_para
@@ -115,7 +123,7 @@ module SiSU_Metadata
       end
       if defined? @md.rights.all \
       and @md.rights.all=~/\S+/
-        @tag,@inf,@class=tr.rights,@md.rights.all,'dc' #15
+        @tag,@inf,@class=tr.rights,meta_content_clean(@md.rights.all),'dc' #15
         meta << self.meta_para
       end
       if defined? @md.classify.subject \
@@ -810,7 +818,8 @@ module SiSU_Metadata
     end
     def xml_docbook
       def meta_para
-        inf_xml=char_enc(@inf).utf8
+        inf_xml=char_enc(@inf).amp
+        inf_xml=char_enc(inf_xml).utf8
         inf_xml=char_enc(inf_xml).br
         <<WOK
 #{Ax[:tab]}<#{@tag}>
@@ -843,6 +852,7 @@ WOK
     end
     def xml_dom
       def meta_para
+        inf_xml=char_enc(inf_xml).amp
         inf_xml=char_enc(@inf).utf8
         inf_xml=char_enc(inf_xml).br
         <<WOK
@@ -861,6 +871,7 @@ WOK
     end
     def xhtml_scroll
       def meta_para
+        inf_xml=char_enc(inf_xml).amp
         inf_xml=char_enc(@inf).utf8
         inf_xml=char_enc(inf_xml).br
         <<WOK
index 17f3a3356ed344929bba038307e1a4ddecd269a6..3bb3306655b5ac3fe1b558b05d16ed677757afe6 100644 (file)
@@ -234,7 +234,7 @@ WOK
           end
           extract_endnotes(dob)
           dob.obj=dob.obj.gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'<en>\1</en>'). #footnote/endnote clean
-            gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'<en>\1</en>') #footnote/endnote clean
+            gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'<en>\1</en>')
           util=SiSU_TextUtils::Wrap.new(dob.obj,70)
           wrapped=util.line_wrap
           @@xml[:body] << if defined? dob.ocn
index 26399ef7129299ec40c01d31189da537b4452356..44495ff37d31d97a8555dc1442eb82476ed6350e 100644 (file)
@@ -71,6 +71,8 @@ module SiSU_XHTML_EPUB2
     include SiSU_Particulars
   require_relative 'defaults'                           # defaults.rb
     include SiSU_Viz
+  require_relative 'xml_shared'                         # xml_shared.rb
+    include SiSU_XML_Munge
   require_relative 'xhtml_table'                        # xhtml_table.rb
   require_relative 'xhtml_epub2_format'                 # xhtml_epub2_format.rb
     include SiSU_XHTML_EPUB2_Format
index 38cc7f853ba1ca78201a60e92f918b42a92fd12b..e649fa51287881f440124e96d2b940c92000ae7a 100644 (file)
@@ -191,15 +191,26 @@ module SiSU_XML_Format
       end
       if defined? @md.rights.all \
       and @md.rights.all=~/\S+/                                               # DublinCore 15 - rights
-        @rdf_rights=%{    dc.rights="#{@md.rights.all}"\n}
-        @rights=%{<meta name="dc.rights" content="#{@md.rights.all}" />\n}
+        rights=meta_content_clean(@md.rights.all)
+        copyright=meta_content_clean(@md.rights.copyright.all)
+        @rdf_rights=%{    dc.rights="#{rights}"\n}
+        @rights=%{<meta name="dc.rights" content="#{rights}" />\n}
       end
-      @copyright=%{<meta name="copyright" content="#{@md.rights.copyright.all}" />\n} if @md.rights.copyright.all # possibly redundant see dc.rights
+      @copyright=%{<meta name="copyright" content="#{copyright}" />\n} \
+        if @md.rights.copyright.all # possibly redundant see dc.rights
       @owner=%{<meta name="owner" content="#{@md.owner}" />\n} if @md.owner
       @keywords=%{<meta name="keywords" content="#{@md.keywords}" />\n} if @md.keywords
       @vz=SiSU_Viz::Defaults.new #margin,paragraph,table,banner,url,png,txt,color,font,nav_txt,nav_png,credits,js,php
       @index='index'
     end
+    def meta_content_clean(content='')
+      content=if not content.nil?
+        content=content.tr('"',"'").
+           gsub(/&/,'&amp;')
+        content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content)
+      else content
+      end
+    end
     def table_close
       '</font> </td></tr></table>'
     end
@@ -558,7 +569,8 @@ WOK
     def prefix_a
     end
     def rights
-      rights=@md.rights.copyright.all.gsub(/^\s*Copyright\s+\(C\)/,'Copyright <sup>&copy;</sup>&nbsp;')
+      copyright=meta_content_clean(@md.rights.copyright.all)
+      rights=copyright(/^\s*Copyright\s+\(C\)/,'Copyright <sup>&copy;</sup>&nbsp;')
       %{<p class="small_left">Rights: #{rights}</p>
 <p />}
     end
index 967812dba8c319f1968e1ac7b51b281c5a78ed8a..e6cfafe514bd56b757254cca565e779374de5a84 100644 (file)
@@ -71,6 +71,8 @@ module SiSU_XML_ODF_ODT
     include SiSU_XML_ODF_ODT_Format
   require_relative 'shared_metadata'                    # shared_metadata.rb
   require_relative 'txt_shared'                         # txt_shared.rb
+  require_relative 'xml_shared'                         # xml_shared.rb
+    include SiSU_XML_Munge
   @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0
   class Source
     begin
index 35d1132d114081f6b782a829bfb032288f37e613..3ffda8f3362ef1c3e07886de5a3001619f9456f4 100644 (file)
@@ -232,104 +232,6 @@ module SiSU_XML_Munge
         if @sys.locale =~/utf-?8/i # instead ucs for utf8 # String#encode Iñtërnâtiônàlizætiøn
           dob.obj=dob.obj.gsub(/ /u,' ').           # space identify
             gsub(/ /u,' ')           # space identify
-        else
-          dob.obj=dob.obj.gsub(/¢/u,'&cent;').      # &#162;
-            gsub(/£/u,'&pound;').     # &#163;
-            gsub(/¥/u,'&yen;').       # &#165;
-            gsub(/§/u,'&sect;').      # &#167;
-            gsub(/©/u,'&copy;').      # &#169;
-            gsub(/ª/u,'&ordf;').      # &#170;
-            gsub(/«/u,'&laquo;').     # &#171;
-            gsub(/®/u,'&reg;').       # &#174;
-            gsub(/°/u,'&deg;').       # &#176;
-            gsub(/±/u,'&plusmn;').    # &#177;
-            gsub(/²/u,'&sup2;').      # &#178;
-            gsub(/³/u,'&sup3;').      # &#179;
-            gsub(/µ/u,'&micro;').     # &#181;
-            gsub(/¶/u,'&para;').      # &#182;
-            gsub(/¹/u,'&sup1;').      # &#185;
-            gsub(/º/u,'&ordm;').      # &#186;
-            gsub(/»/u,'&raquo;').     # &#187;
-            gsub(/¼/u,'&frac14;').    # &#188;
-            gsub(/½/u,'&frac12;').    # &#189;
-            gsub(/¾/u,'&frac34;').    # &#190;
-            gsub(/×/u,'&times;').     # &#215;
-            gsub(/÷/u,'&divide;').    # &#247;
-            gsub(/¿/u,'&iquest;').    # &#191;
-            gsub(/À/u,'&Agrave;').    # &#192;
-            gsub(/Á/u,'&Aacute;').    # &#193;
-            gsub(/Â/u,'&Acirc;').     # &#194;
-            gsub(/Ã/u,'&Atilde;').    # &#195;
-            gsub(/Ä/u,'&Auml;').      # &#196;
-            gsub(/Å/u,'&Aring;').     # &#197;
-            gsub(/Æ/u,'&AElig;').     # &#198;
-            gsub(/Ç/u,'&Ccedil;').    # &#199;
-            gsub(/È/u,'&Egrave;').    # &#200;
-            gsub(/É/u,'&Eacute;').    # &#201;
-            gsub(/Ê/u,'&Ecirc;').     # &#202;
-            gsub(/Ë/u,'&Euml;').      # &#203;
-            gsub(/Ì/u,'&Igrave;').    # &#204;
-            gsub(/Í/u,'&Iacute;').    # &#205;
-            gsub(/Î/u,'&Icirc;').     # &#206;
-            gsub(/Ï/u,'&Iuml;').      # &#207;
-            gsub(/Ð/u,'&ETH;').       # &#208;
-            gsub(/Ñ/u,'&Ntilde;').    # &#209;
-            gsub(/Ò/u,'&Ograve;').    # &#210;
-            gsub(/Ó/u,'&Oacute;').    # &#211;
-            gsub(/Ô/u,'&Ocirc;').     # &#212;
-            gsub(/Õ/u,'&Otilde;').    # &#213;
-            gsub(/Ö/u,'&Ouml;').      # &#214;
-            gsub(/Ø/u,'&Oslash;').    # &#216;
-            gsub(/Ù/u,'&Ugrave;').    # &#217;
-            gsub(/Ú/u,'&Uacute;').    # &#218;
-            gsub(/Û/u,'&Ucirc;').     # &#219;
-            gsub(/Ü/u,'&Uuml;').      # &#220;
-            gsub(/Ý/u,'&Yacute;').    # &#221;
-            gsub(/Þ/u,'&THORN;').     # &#222;
-            gsub(/ß/u,'&szlig;').     # &#223;
-            gsub(/à/u,'&agrave;').    # &#224;
-            gsub(/á/u,'&aacute;').    # &#225;
-            gsub(/â/u,'&acirc;').     # &#226;
-            gsub(/ã/u,'&atilde;').    # &#227;
-            gsub(/ä/u,'&auml;').      # &#228;
-            gsub(/å/u,'&aring;').     # &#229;
-            gsub(/æ/u,'&aelig;').     # &#230;
-            gsub(/ç/u,'&ccedil;').    # &#231;
-            gsub(/è/u,'&egrave;').    # &#232;
-            gsub(/é/u,'&acute;').     # &#233;
-            gsub(/ê/u,'&circ;').      # &#234;
-            gsub(/ë/u,'&euml;').      # &#235;
-            gsub(/ì/u,'&igrave;').    # &#236;
-            gsub(/í/u,'&acute;').     # &#237;
-            gsub(/î/u,'&icirc;').     # &#238;
-            gsub(/ï/u,'&iuml;').      # &#239;
-            gsub(/ð/u,'&eth;').       # &#240;
-            gsub(/ñ/u,'&ntilde;').    # &#241;
-            gsub(/ò/u,'&ograve;').    # &#242;
-            gsub(/ó/u,'&oacute;').    # &#243;
-            gsub(/ô/u,'&ocirc;').     # &#244;
-            gsub(/õ/u,'&otilde;').    # &#245;
-            gsub(/ö/u,'&ouml;').      # &#246;
-            gsub(/ø/u,'&oslash;').    # &#248;
-            gsub(/ù/u,'&ugrave;').    # &#250;
-            gsub(/ú/u,'&uacute;').    # &#251;
-            gsub(/û/u,'&ucirc;').     # &#252;
-            gsub(/ü/u,'&uuml;').      # &#253;
-            gsub(/þ/u,'&thorn;').     # &#254;
-            gsub(/ÿ/u,'&yuml;').      # &#255;
-            gsub(/‘/u,'&#lsquo;').    # &lsquo;  # &#8216;
-            gsub(/’/u,'&#rsquo;').    # &rsquo;  # &#8217;
-            gsub(/“/u,'&ldquo;').     # &ldquo;  # &#8220;
-            gsub(/”/u,'&rdquo;').     # &rdquo;  # &#8221;
-            gsub(/–/u,'&ndash;').     # &ndash;  # &#8211;
-            gsub(/—/u,'&mdash;').     # &mdash;  # &#8212;
-            gsub(/∝/u,'&prop;').      # &prop;   # &#8733;
-            gsub(/∞/u,'&infin;').     # &infin;  # &#8734;
-            gsub(/™/u,'&trade;').     # &trade;  # &#8482;
-            gsub(/✠/u,'&#10016;').    # &#10016;
-            #gsub(/✠/u '&dagger;').    # &dagger; # &#8224; incorrect replacement †
-            gsub(/ /u,' ').           # space identify
-            gsub(/ /u,' ')           # space identify
         end
       end
       self
@@ -411,7 +313,8 @@ module SiSU_XML_Munge
           %{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}}).
         gsub(/#{Dx[:url_o]}/,"#{Dx[:url_o_xml]}").
         gsub(/#{Dx[:url_c]}/,"#{Dx[:url_c_xml]}").
-        gsub(/&nbsp;|#{Mx[:nbsp]}/m,'&#160;')
+        gsub(/&nbsp;|#{Mx[:nbsp]}/m,'&#160;').
+        gsub(/;&([^#]|(?:[^gl][^t]|[^a][^m][^p]|[^n][^b][^s][^p])[^;])/,';&amp;\1') # pattern not to match
       dob
     end
     def markup_light(dob='')
@@ -426,7 +329,8 @@ module SiSU_XML_Munge
         gsub(/&([^;]{1,5})/,'&amp;\1'). #sort, rough estimate, revisit #WATCH found in node not sax
         gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif))[ ]+.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/,
           "<image.path>#{@md.file.output_path.xml.rel_image}\/\\1</image.path>").
-        gsub(/&nbsp;|#{Mx[:nbsp]}/,'&#160;')
+        gsub(/&nbsp;|#{Mx[:nbsp]}/,'&#160;').
+        gsub(/;&([^#]|(?:[^gl][^t]|[^a][^m][^p]|[^n][^b][^s][^p])[^;])/,';&amp;\1') # pattern not to match
       wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
       dob.obj=tidywords(wordlist).join(' ').strip
       dob
@@ -698,7 +602,8 @@ module SiSU_XML_Tags #Format
     end
     def meta_content_clean(content='')
       content=if not content.nil?
-        content=content.tr('"',"'")
+        content=content.tr('"',"'").
+           gsub(/&/,'&amp;')
         content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content)
       else content
       end