aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/sisu/v5/shared_metadata.rb15
-rw-r--r--lib/sisu/v5/xhtml.rb2
-rw-r--r--lib/sisu/v5/xhtml_epub2.rb2
-rw-r--r--lib/sisu/v5/xml_format.rb20
-rw-r--r--lib/sisu/v5/xml_odf_odt.rb2
-rw-r--r--lib/sisu/v5/xml_shared.rb107
-rw-r--r--lib/sisu/v6/shared_metadata.rb15
-rw-r--r--lib/sisu/v6/xhtml.rb2
-rw-r--r--lib/sisu/v6/xhtml_epub2.rb2
-rw-r--r--lib/sisu/v6/xml_format.rb20
-rw-r--r--lib/sisu/v6/xml_odf_odt.rb2
-rw-r--r--lib/sisu/v6/xml_shared.rb107
12 files changed, 80 insertions, 216 deletions
diff --git a/lib/sisu/v5/shared_metadata.rb b/lib/sisu/v5/shared_metadata.rb
index 405e7939..e6721964 100644
--- a/lib/sisu/v5/shared_metadata.rb
+++ b/lib/sisu/v5/shared_metadata.rb
@@ -74,6 +74,14 @@ module SiSU_Metadata
language=l[:n]
tr=SiSU_Translate::Source.new(@md,language)
@attrib='md'
+ def meta_content_clean(content='')
+ content=if not content.nil?
+ content=content.tr('"',"'").
+ gsub(/&/,'&')
+ content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content)
+ else content
+ end
+ end
if @display_heading
@tag,@inf=%{<b><u>Document Metadata</u></b>},''
meta << self.meta_para
@@ -115,7 +123,7 @@ module SiSU_Metadata
end
if defined? @md.rights.all \
and @md.rights.all=~/\S+/
- @tag,@inf,@class=tr.rights,@md.rights.all,'dc' #15
+ @tag,@inf,@class=tr.rights,meta_content_clean(@md.rights.all),'dc' #15
meta << self.meta_para
end
if defined? @md.classify.subject \
@@ -810,7 +818,8 @@ module SiSU_Metadata
end
def xml_docbook
def meta_para
- inf_xml=char_enc(@inf).utf8
+ inf_xml=char_enc(@inf).amp
+ inf_xml=char_enc(inf_xml).utf8
inf_xml=char_enc(inf_xml).br
<<WOK
#{Ax[:tab]}<#{@tag}>
@@ -843,6 +852,7 @@ WOK
end
def xml_dom
def meta_para
+ inf_xml=char_enc(inf_xml).amp
inf_xml=char_enc(@inf).utf8
inf_xml=char_enc(inf_xml).br
<<WOK
@@ -861,6 +871,7 @@ WOK
end
def xhtml_scroll
def meta_para
+ inf_xml=char_enc(inf_xml).amp
inf_xml=char_enc(@inf).utf8
inf_xml=char_enc(inf_xml).br
<<WOK
diff --git a/lib/sisu/v5/xhtml.rb b/lib/sisu/v5/xhtml.rb
index 138ccf6d..60049043 100644
--- a/lib/sisu/v5/xhtml.rb
+++ b/lib/sisu/v5/xhtml.rb
@@ -234,7 +234,7 @@ WOK
end
extract_endnotes(dob)
dob.obj=dob.obj.gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'<en>\1</en>'). #footnote/endnote clean
- gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'<en>\1</en>') #footnote/endnote clean
+ gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'<en>\1</en>')
util=SiSU_TextUtils::Wrap.new(dob.obj,70)
wrapped=util.line_wrap
@@xml[:body] << if defined? dob.ocn
diff --git a/lib/sisu/v5/xhtml_epub2.rb b/lib/sisu/v5/xhtml_epub2.rb
index e0aa70cb..daa00f39 100644
--- a/lib/sisu/v5/xhtml_epub2.rb
+++ b/lib/sisu/v5/xhtml_epub2.rb
@@ -71,6 +71,8 @@ module SiSU_XHTML_EPUB2
include SiSU_Particulars
require_relative 'defaults' # defaults.rb
include SiSU_Viz
+ require_relative 'xml_shared' # xml_shared.rb
+ include SiSU_XML_Munge
require_relative 'xhtml_table' # xhtml_table.rb
require_relative 'xhtml_epub2_format' # xhtml_epub2_format.rb
include SiSU_XHTML_EPUB2_Format
diff --git a/lib/sisu/v5/xml_format.rb b/lib/sisu/v5/xml_format.rb
index fb2cff88..4a7264c0 100644
--- a/lib/sisu/v5/xml_format.rb
+++ b/lib/sisu/v5/xml_format.rb
@@ -191,15 +191,26 @@ module SiSU_XML_Format
end
if defined? @md.rights.all \
and @md.rights.all=~/\S+/ # DublinCore 15 - rights
- @rdf_rights=%{ dc.rights="#{@md.rights.all}"\n}
- @rights=%{<meta name="dc.rights" content="#{@md.rights.all}" />\n}
+ rights=meta_content_clean(@md.rights.all)
+ copyright=meta_content_clean(@md.rights.copyright.all)
+ @rdf_rights=%{ dc.rights="#{rights}"\n}
+ @rights=%{<meta name="dc.rights" content="#{rights}" />\n}
end
- @copyright=%{<meta name="copyright" content="#{@md.rights.copyright.all}" />\n} if @md.rights.copyright.all # possibly redundant see dc.rights
+ @copyright=%{<meta name="copyright" content="#{copyright}" />\n} \
+ if @md.rights.copyright.all # possibly redundant see dc.rights
@owner=%{<meta name="owner" content="#{@md.owner}" />\n} if @md.owner
@keywords=%{<meta name="keywords" content="#{@md.keywords}" />\n} if @md.keywords
@vz=SiSU_Viz::Defaults.new #margin,paragraph,table,banner,url,png,txt,color,font,nav_txt,nav_png,credits,js,php
@index='index'
end
+ def meta_content_clean(content='')
+ content=if not content.nil?
+ content=content.tr('"',"'").
+ gsub(/&/,'&amp;')
+ content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content)
+ else content
+ end
+ end
def table_close
'</font> </td></tr></table>'
end
@@ -558,7 +569,8 @@ WOK
def prefix_a
end
def rights
- rights=@md.rights.copyright.all.gsub(/^\s*Copyright\s+\(C\)/,'Copyright <sup>&copy;</sup>&nbsp;')
+ copyright=meta_content_clean(@md.rights.copyright.all)
+ rights=copyright(/^\s*Copyright\s+\(C\)/,'Copyright <sup>&copy;</sup>&nbsp;')
%{<p class="small_left">Rights: #{rights}</p>
<p />}
end
diff --git a/lib/sisu/v5/xml_odf_odt.rb b/lib/sisu/v5/xml_odf_odt.rb
index 4e540fde..0c9dc7aa 100644
--- a/lib/sisu/v5/xml_odf_odt.rb
+++ b/lib/sisu/v5/xml_odf_odt.rb
@@ -71,6 +71,8 @@ module SiSU_XML_ODF_ODT
include SiSU_XML_ODF_ODT_Format
require_relative 'shared_metadata' # shared_metadata.rb
require_relative 'txt_shared' # txt_shared.rb
+ require_relative 'xml_shared' # xml_shared.rb
+ include SiSU_XML_Munge
@@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0
class Source
begin
diff --git a/lib/sisu/v5/xml_shared.rb b/lib/sisu/v5/xml_shared.rb
index bd0e383b..d059cd7b 100644
--- a/lib/sisu/v5/xml_shared.rb
+++ b/lib/sisu/v5/xml_shared.rb
@@ -232,104 +232,6 @@ module SiSU_XML_Munge
if @sys.locale =~/utf-?8/i # instead ucs for utf8 # String#encode Iñtërnâtiônàlizætiøn
dob.obj=dob.obj.gsub(/ /u,' '). # space identify
gsub(/ /u,' ') # space identify
- else
- dob.obj=dob.obj.gsub(/¢/u,'&cent;'). # &#162;
- gsub(/£/u,'&pound;'). # &#163;
- gsub(/¥/u,'&yen;'). # &#165;
- gsub(/§/u,'&sect;'). # &#167;
- gsub(/©/u,'&copy;'). # &#169;
- gsub(/ª/u,'&ordf;'). # &#170;
- gsub(/«/u,'&laquo;'). # &#171;
- gsub(/®/u,'&reg;'). # &#174;
- gsub(/°/u,'&deg;'). # &#176;
- gsub(/±/u,'&plusmn;'). # &#177;
- gsub(/²/u,'&sup2;'). # &#178;
- gsub(/³/u,'&sup3;'). # &#179;
- gsub(/µ/u,'&micro;'). # &#181;
- gsub(/¶/u,'&para;'). # &#182;
- gsub(/¹/u,'&sup1;'). # &#185;
- gsub(/º/u,'&ordm;'). # &#186;
- gsub(/»/u,'&raquo;'). # &#187;
- gsub(/¼/u,'&frac14;'). # &#188;
- gsub(/½/u,'&frac12;'). # &#189;
- gsub(/¾/u,'&frac34;'). # &#190;
- gsub(/×/u,'&times;'). # &#215;
- gsub(/÷/u,'&divide;'). # &#247;
- gsub(/¿/u,'&iquest;'). # &#191;
- gsub(/À/u,'&Agrave;'). # &#192;
- gsub(/Á/u,'&Aacute;'). # &#193;
- gsub(/Â/u,'&Acirc;'). # &#194;
- gsub(/Ã/u,'&Atilde;'). # &#195;
- gsub(/Ä/u,'&Auml;'). # &#196;
- gsub(/Å/u,'&Aring;'). # &#197;
- gsub(/Æ/u,'&AElig;'). # &#198;
- gsub(/Ç/u,'&Ccedil;'). # &#199;
- gsub(/È/u,'&Egrave;'). # &#200;
- gsub(/É/u,'&Eacute;'). # &#201;
- gsub(/Ê/u,'&Ecirc;'). # &#202;
- gsub(/Ë/u,'&Euml;'). # &#203;
- gsub(/Ì/u,'&Igrave;'). # &#204;
- gsub(/Í/u,'&Iacute;'). # &#205;
- gsub(/Î/u,'&Icirc;'). # &#206;
- gsub(/Ï/u,'&Iuml;'). # &#207;
- gsub(/Ð/u,'&ETH;'). # &#208;
- gsub(/Ñ/u,'&Ntilde;'). # &#209;
- gsub(/Ò/u,'&Ograve;'). # &#210;
- gsub(/Ó/u,'&Oacute;'). # &#211;
- gsub(/Ô/u,'&Ocirc;'). # &#212;
- gsub(/Õ/u,'&Otilde;'). # &#213;
- gsub(/Ö/u,'&Ouml;'). # &#214;
- gsub(/Ø/u,'&Oslash;'). # &#216;
- gsub(/Ù/u,'&Ugrave;'). # &#217;
- gsub(/Ú/u,'&Uacute;'). # &#218;
- gsub(/Û/u,'&Ucirc;'). # &#219;
- gsub(/Ü/u,'&Uuml;'). # &#220;
- gsub(/Ý/u,'&Yacute;'). # &#221;
- gsub(/Þ/u,'&THORN;'). # &#222;
- gsub(/ß/u,'&szlig;'). # &#223;
- gsub(/à/u,'&agrave;'). # &#224;
- gsub(/á/u,'&aacute;'). # &#225;
- gsub(/â/u,'&acirc;'). # &#226;
- gsub(/ã/u,'&atilde;'). # &#227;
- gsub(/ä/u,'&auml;'). # &#228;
- gsub(/å/u,'&aring;'). # &#229;
- gsub(/æ/u,'&aelig;'). # &#230;
- gsub(/ç/u,'&ccedil;'). # &#231;
- gsub(/è/u,'&egrave;'). # &#232;
- gsub(/é/u,'&acute;'). # &#233;
- gsub(/ê/u,'&circ;'). # &#234;
- gsub(/ë/u,'&euml;'). # &#235;
- gsub(/ì/u,'&igrave;'). # &#236;
- gsub(/í/u,'&acute;'). # &#237;
- gsub(/î/u,'&icirc;'). # &#238;
- gsub(/ï/u,'&iuml;'). # &#239;
- gsub(/ð/u,'&eth;'). # &#240;
- gsub(/ñ/u,'&ntilde;'). # &#241;
- gsub(/ò/u,'&ograve;'). # &#242;
- gsub(/ó/u,'&oacute;'). # &#243;
- gsub(/ô/u,'&ocirc;'). # &#244;
- gsub(/õ/u,'&otilde;'). # &#245;
- gsub(/ö/u,'&ouml;'). # &#246;
- gsub(/ø/u,'&oslash;'). # &#248;
- gsub(/ù/u,'&ugrave;'). # &#250;
- gsub(/ú/u,'&uacute;'). # &#251;
- gsub(/û/u,'&ucirc;'). # &#252;
- gsub(/ü/u,'&uuml;'). # &#253;
- gsub(/þ/u,'&thorn;'). # &#254;
- gsub(/ÿ/u,'&yuml;'). # &#255;
- gsub(/‘/u,'&#lsquo;'). # &lsquo; # &#8216;
- gsub(/’/u,'&#rsquo;'). # &rsquo; # &#8217;
- gsub(/“/u,'&ldquo;'). # &ldquo; # &#8220;
- gsub(/”/u,'&rdquo;'). # &rdquo; # &#8221;
- gsub(/–/u,'&ndash;'). # &ndash; # &#8211;
- gsub(/—/u,'&mdash;'). # &mdash; # &#8212;
- gsub(/∝/u,'&prop;'). # &prop; # &#8733;
- gsub(/∞/u,'&infin;'). # &infin; # &#8734;
- gsub(/™/u,'&trade;'). # &trade; # &#8482;
- gsub(/✠/u,'&#10016;'). # &#10016;
- #gsub(/✠/u '&dagger;'). # &dagger; # &#8224; incorrect replacement †
- gsub(/ /u,' '). # space identify
- gsub(/ /u,' ') # space identify
end
end
self
@@ -411,7 +313,8 @@ module SiSU_XML_Munge
%{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}}).
gsub(/#{Dx[:url_o]}/,"#{Dx[:url_o_xml]}").
gsub(/#{Dx[:url_c]}/,"#{Dx[:url_c_xml]}").
- gsub(/&nbsp;|#{Mx[:nbsp]}/m,'&#160;')
+ gsub(/&nbsp;|#{Mx[:nbsp]}/m,'&#160;').
+ gsub(/;&([^#]|(?:[^gl][^t]|[^a][^m][^p]|[^n][^b][^s][^p])[^;])/,';&amp;\1') # pattern not to match
dob
end
def markup_light(dob='')
@@ -426,7 +329,8 @@ module SiSU_XML_Munge
gsub(/&([^;]{1,5})/,'&amp;\1'). #sort, rough estimate, revisit #WATCH found in node not sax
gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif))[ ]+.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/,
"<image.path>#{@md.file.output_path.xml.rel_image}\/\\1</image.path>").
- gsub(/&nbsp;|#{Mx[:nbsp]}/,'&#160;')
+ gsub(/&nbsp;|#{Mx[:nbsp]}/,'&#160;').
+ gsub(/;&([^#]|(?:[^gl][^t]|[^a][^m][^p]|[^n][^b][^s][^p])[^;])/,';&amp;\1') # pattern not to match
wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
dob.obj=tidywords(wordlist).join(' ').strip
dob
@@ -698,7 +602,8 @@ module SiSU_XML_Tags #Format
end
def meta_content_clean(content='')
content=if not content.nil?
- content=content.tr('"',"'")
+ content=content.tr('"',"'").
+ gsub(/&/,'&amp;')
content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content)
else content
end
diff --git a/lib/sisu/v6/shared_metadata.rb b/lib/sisu/v6/shared_metadata.rb
index bef13903..b51fc8f4 100644
--- a/lib/sisu/v6/shared_metadata.rb
+++ b/lib/sisu/v6/shared_metadata.rb
@@ -74,6 +74,14 @@ module SiSU_Metadata
language=l[:n]
tr=SiSU_Translate::Source.new(@md,language)
@attrib='md'
+ def meta_content_clean(content='')
+ content=if not content.nil?
+ content=content.tr('"',"'").
+ gsub(/&/,'&amp;')
+ content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content)
+ else content
+ end
+ end
if @display_heading
@tag,@inf=%{<b><u>Document Metadata</u></b>},''
meta << self.meta_para
@@ -115,7 +123,7 @@ module SiSU_Metadata
end
if defined? @md.rights.all \
and @md.rights.all=~/\S+/
- @tag,@inf,@class=tr.rights,@md.rights.all,'dc' #15
+ @tag,@inf,@class=tr.rights,meta_content_clean(@md.rights.all),'dc' #15
meta << self.meta_para
end
if defined? @md.classify.subject \
@@ -810,7 +818,8 @@ module SiSU_Metadata
end
def xml_docbook
def meta_para
- inf_xml=char_enc(@inf).utf8
+ inf_xml=char_enc(@inf).amp
+ inf_xml=char_enc(inf_xml).utf8
inf_xml=char_enc(inf_xml).br
<<WOK
#{Ax[:tab]}<#{@tag}>
@@ -843,6 +852,7 @@ WOK
end
def xml_dom
def meta_para
+ inf_xml=char_enc(inf_xml).amp
inf_xml=char_enc(@inf).utf8
inf_xml=char_enc(inf_xml).br
<<WOK
@@ -861,6 +871,7 @@ WOK
end
def xhtml_scroll
def meta_para
+ inf_xml=char_enc(inf_xml).amp
inf_xml=char_enc(@inf).utf8
inf_xml=char_enc(inf_xml).br
<<WOK
diff --git a/lib/sisu/v6/xhtml.rb b/lib/sisu/v6/xhtml.rb
index 17f3a335..3bb33066 100644
--- a/lib/sisu/v6/xhtml.rb
+++ b/lib/sisu/v6/xhtml.rb
@@ -234,7 +234,7 @@ WOK
end
extract_endnotes(dob)
dob.obj=dob.obj.gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'<en>\1</en>'). #footnote/endnote clean
- gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'<en>\1</en>') #footnote/endnote clean
+ gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'<en>\1</en>')
util=SiSU_TextUtils::Wrap.new(dob.obj,70)
wrapped=util.line_wrap
@@xml[:body] << if defined? dob.ocn
diff --git a/lib/sisu/v6/xhtml_epub2.rb b/lib/sisu/v6/xhtml_epub2.rb
index 26399ef7..44495ff3 100644
--- a/lib/sisu/v6/xhtml_epub2.rb
+++ b/lib/sisu/v6/xhtml_epub2.rb
@@ -71,6 +71,8 @@ module SiSU_XHTML_EPUB2
include SiSU_Particulars
require_relative 'defaults' # defaults.rb
include SiSU_Viz
+ require_relative 'xml_shared' # xml_shared.rb
+ include SiSU_XML_Munge
require_relative 'xhtml_table' # xhtml_table.rb
require_relative 'xhtml_epub2_format' # xhtml_epub2_format.rb
include SiSU_XHTML_EPUB2_Format
diff --git a/lib/sisu/v6/xml_format.rb b/lib/sisu/v6/xml_format.rb
index 38cc7f85..e649fa51 100644
--- a/lib/sisu/v6/xml_format.rb
+++ b/lib/sisu/v6/xml_format.rb
@@ -191,15 +191,26 @@ module SiSU_XML_Format
end
if defined? @md.rights.all \
and @md.rights.all=~/\S+/ # DublinCore 15 - rights
- @rdf_rights=%{ dc.rights="#{@md.rights.all}"\n}
- @rights=%{<meta name="dc.rights" content="#{@md.rights.all}" />\n}
+ rights=meta_content_clean(@md.rights.all)
+ copyright=meta_content_clean(@md.rights.copyright.all)
+ @rdf_rights=%{ dc.rights="#{rights}"\n}
+ @rights=%{<meta name="dc.rights" content="#{rights}" />\n}
end
- @copyright=%{<meta name="copyright" content="#{@md.rights.copyright.all}" />\n} if @md.rights.copyright.all # possibly redundant see dc.rights
+ @copyright=%{<meta name="copyright" content="#{copyright}" />\n} \
+ if @md.rights.copyright.all # possibly redundant see dc.rights
@owner=%{<meta name="owner" content="#{@md.owner}" />\n} if @md.owner
@keywords=%{<meta name="keywords" content="#{@md.keywords}" />\n} if @md.keywords
@vz=SiSU_Viz::Defaults.new #margin,paragraph,table,banner,url,png,txt,color,font,nav_txt,nav_png,credits,js,php
@index='index'
end
+ def meta_content_clean(content='')
+ content=if not content.nil?
+ content=content.tr('"',"'").
+ gsub(/&/,'&amp;')
+ content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content)
+ else content
+ end
+ end
def table_close
'</font> </td></tr></table>'
end
@@ -558,7 +569,8 @@ WOK
def prefix_a
end
def rights
- rights=@md.rights.copyright.all.gsub(/^\s*Copyright\s+\(C\)/,'Copyright <sup>&copy;</sup>&nbsp;')
+ copyright=meta_content_clean(@md.rights.copyright.all)
+ rights=copyright(/^\s*Copyright\s+\(C\)/,'Copyright <sup>&copy;</sup>&nbsp;')
%{<p class="small_left">Rights: #{rights}</p>
<p />}
end
diff --git a/lib/sisu/v6/xml_odf_odt.rb b/lib/sisu/v6/xml_odf_odt.rb
index 967812db..e6cfafe5 100644
--- a/lib/sisu/v6/xml_odf_odt.rb
+++ b/lib/sisu/v6/xml_odf_odt.rb
@@ -71,6 +71,8 @@ module SiSU_XML_ODF_ODT
include SiSU_XML_ODF_ODT_Format
require_relative 'shared_metadata' # shared_metadata.rb
require_relative 'txt_shared' # txt_shared.rb
+ require_relative 'xml_shared' # xml_shared.rb
+ include SiSU_XML_Munge
@@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0
class Source
begin
diff --git a/lib/sisu/v6/xml_shared.rb b/lib/sisu/v6/xml_shared.rb
index 35d1132d..3ffda8f3 100644
--- a/lib/sisu/v6/xml_shared.rb
+++ b/lib/sisu/v6/xml_shared.rb
@@ -232,104 +232,6 @@ module SiSU_XML_Munge
if @sys.locale =~/utf-?8/i # instead ucs for utf8 # String#encode Iñtërnâtiônàlizætiøn
dob.obj=dob.obj.gsub(/ /u,' '). # space identify
gsub(/ /u,' ') # space identify
- else
- dob.obj=dob.obj.gsub(/¢/u,'&cent;'). # &#162;
- gsub(/£/u,'&pound;'). # &#163;
- gsub(/¥/u,'&yen;'). # &#165;
- gsub(/§/u,'&sect;'). # &#167;
- gsub(/©/u,'&copy;'). # &#169;
- gsub(/ª/u,'&ordf;'). # &#170;
- gsub(/«/u,'&laquo;'). # &#171;
- gsub(/®/u,'&reg;'). # &#174;
- gsub(/°/u,'&deg;'). # &#176;
- gsub(/±/u,'&plusmn;'). # &#177;
- gsub(/²/u,'&sup2;'). # &#178;
- gsub(/³/u,'&sup3;'). # &#179;
- gsub(/µ/u,'&micro;'). # &#181;
- gsub(/¶/u,'&para;'). # &#182;
- gsub(/¹/u,'&sup1;'). # &#185;
- gsub(/º/u,'&ordm;'). # &#186;
- gsub(/»/u,'&raquo;'). # &#187;
- gsub(/¼/u,'&frac14;'). # &#188;
- gsub(/½/u,'&frac12;'). # &#189;
- gsub(/¾/u,'&frac34;'). # &#190;
- gsub(/×/u,'&times;'). # &#215;
- gsub(/÷/u,'&divide;'). # &#247;
- gsub(/¿/u,'&iquest;'). # &#191;
- gsub(/À/u,'&Agrave;'). # &#192;
- gsub(/Á/u,'&Aacute;'). # &#193;
- gsub(/Â/u,'&Acirc;'). # &#194;
- gsub(/Ã/u,'&Atilde;'). # &#195;
- gsub(/Ä/u,'&Auml;'). # &#196;
- gsub(/Å/u,'&Aring;'). # &#197;
- gsub(/Æ/u,'&AElig;'). # &#198;
- gsub(/Ç/u,'&Ccedil;'). # &#199;
- gsub(/È/u,'&Egrave;'). # &#200;
- gsub(/É/u,'&Eacute;'). # &#201;
- gsub(/Ê/u,'&Ecirc;'). # &#202;
- gsub(/Ë/u,'&Euml;'). # &#203;
- gsub(/Ì/u,'&Igrave;'). # &#204;
- gsub(/Í/u,'&Iacute;'). # &#205;
- gsub(/Î/u,'&Icirc;'). # &#206;
- gsub(/Ï/u,'&Iuml;'). # &#207;
- gsub(/Ð/u,'&ETH;'). # &#208;
- gsub(/Ñ/u,'&Ntilde;'). # &#209;
- gsub(/Ò/u,'&Ograve;'). # &#210;
- gsub(/Ó/u,'&Oacute;'). # &#211;
- gsub(/Ô/u,'&Ocirc;'). # &#212;
- gsub(/Õ/u,'&Otilde;'). # &#213;
- gsub(/Ö/u,'&Ouml;'). # &#214;
- gsub(/Ø/u,'&Oslash;'). # &#216;
- gsub(/Ù/u,'&Ugrave;'). # &#217;
- gsub(/Ú/u,'&Uacute;'). # &#218;
- gsub(/Û/u,'&Ucirc;'). # &#219;
- gsub(/Ü/u,'&Uuml;'). # &#220;
- gsub(/Ý/u,'&Yacute;'). # &#221;
- gsub(/Þ/u,'&THORN;'). # &#222;
- gsub(/ß/u,'&szlig;'). # &#223;
- gsub(/à/u,'&agrave;'). # &#224;
- gsub(/á/u,'&aacute;'). # &#225;
- gsub(/â/u,'&acirc;'). # &#226;
- gsub(/ã/u,'&atilde;'). # &#227;
- gsub(/ä/u,'&auml;'). # &#228;
- gsub(/å/u,'&aring;'). # &#229;
- gsub(/æ/u,'&aelig;'). # &#230;
- gsub(/ç/u,'&ccedil;'). # &#231;
- gsub(/è/u,'&egrave;'). # &#232;
- gsub(/é/u,'&acute;'). # &#233;
- gsub(/ê/u,'&circ;'). # &#234;
- gsub(/ë/u,'&euml;'). # &#235;
- gsub(/ì/u,'&igrave;'). # &#236;
- gsub(/í/u,'&acute;'). # &#237;
- gsub(/î/u,'&icirc;'). # &#238;
- gsub(/ï/u,'&iuml;'). # &#239;
- gsub(/ð/u,'&eth;'). # &#240;
- gsub(/ñ/u,'&ntilde;'). # &#241;
- gsub(/ò/u,'&ograve;'). # &#242;
- gsub(/ó/u,'&oacute;'). # &#243;
- gsub(/ô/u,'&ocirc;'). # &#244;
- gsub(/õ/u,'&otilde;'). # &#245;
- gsub(/ö/u,'&ouml;'). # &#246;
- gsub(/ø/u,'&oslash;'). # &#248;
- gsub(/ù/u,'&ugrave;'). # &#250;
- gsub(/ú/u,'&uacute;'). # &#251;
- gsub(/û/u,'&ucirc;'). # &#252;
- gsub(/ü/u,'&uuml;'). # &#253;
- gsub(/þ/u,'&thorn;'). # &#254;
- gsub(/ÿ/u,'&yuml;'). # &#255;
- gsub(/‘/u,'&#lsquo;'). # &lsquo; # &#8216;
- gsub(/’/u,'&#rsquo;'). # &rsquo; # &#8217;
- gsub(/“/u,'&ldquo;'). # &ldquo; # &#8220;
- gsub(/”/u,'&rdquo;'). # &rdquo; # &#8221;
- gsub(/–/u,'&ndash;'). # &ndash; # &#8211;
- gsub(/—/u,'&mdash;'). # &mdash; # &#8212;
- gsub(/∝/u,'&prop;'). # &prop; # &#8733;
- gsub(/∞/u,'&infin;'). # &infin; # &#8734;
- gsub(/™/u,'&trade;'). # &trade; # &#8482;
- gsub(/✠/u,'&#10016;'). # &#10016;
- #gsub(/✠/u '&dagger;'). # &dagger; # &#8224; incorrect replacement †
- gsub(/ /u,' '). # space identify
- gsub(/ /u,' ') # space identify
end
end
self
@@ -411,7 +313,8 @@ module SiSU_XML_Munge
%{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}}).
gsub(/#{Dx[:url_o]}/,"#{Dx[:url_o_xml]}").
gsub(/#{Dx[:url_c]}/,"#{Dx[:url_c_xml]}").
- gsub(/&nbsp;|#{Mx[:nbsp]}/m,'&#160;')
+ gsub(/&nbsp;|#{Mx[:nbsp]}/m,'&#160;').
+ gsub(/;&([^#]|(?:[^gl][^t]|[^a][^m][^p]|[^n][^b][^s][^p])[^;])/,';&amp;\1') # pattern not to match
dob
end
def markup_light(dob='')
@@ -426,7 +329,8 @@ module SiSU_XML_Munge
gsub(/&([^;]{1,5})/,'&amp;\1'). #sort, rough estimate, revisit #WATCH found in node not sax
gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif))[ ]+.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/,
"<image.path>#{@md.file.output_path.xml.rel_image}\/\\1</image.path>").
- gsub(/&nbsp;|#{Mx[:nbsp]}/,'&#160;')
+ gsub(/&nbsp;|#{Mx[:nbsp]}/,'&#160;').
+ gsub(/;&([^#]|(?:[^gl][^t]|[^a][^m][^p]|[^n][^b][^s][^p])[^;])/,';&amp;\1') # pattern not to match
wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
dob.obj=tidywords(wordlist).join(' ').strip
dob
@@ -698,7 +602,8 @@ module SiSU_XML_Tags #Format
end
def meta_content_clean(content='')
content=if not content.nil?
- content=content.tr('"',"'")
+ content=content.tr('"',"'").
+ gsub(/&/,'&amp;')
content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content)
else content
end