aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v5/xml_shared.rb
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2014-09-05 23:24:51 -0400
committerRalph Amissah <ralph@amissah.com>2014-09-05 23:24:51 -0400
commita8187c8aa978e99dfbbcae1c6729e16c3438414e (patch)
treedb1ec0a09e972d2122059e665b59ad413a8fb648 /lib/sisu/v5/xml_shared.rb
parentdebian/changelog (5.6.4-1) (diff)
parentv5 v6: ao_doc_str, if faulty document structure, stop; if --no-stop option, skip (diff)
Merge tag 'sisu_5.6.5' into debian/sid
SiSU 5.6.5
Diffstat (limited to 'lib/sisu/v5/xml_shared.rb')
-rw-r--r--lib/sisu/v5/xml_shared.rb107
1 files changed, 6 insertions, 101 deletions
diff --git a/lib/sisu/v5/xml_shared.rb b/lib/sisu/v5/xml_shared.rb
index bd0e383b..d059cd7b 100644
--- a/lib/sisu/v5/xml_shared.rb
+++ b/lib/sisu/v5/xml_shared.rb
@@ -232,104 +232,6 @@ module SiSU_XML_Munge
if @sys.locale =~/utf-?8/i # instead ucs for utf8 # String#encode Iñtërnâtiônàlizætiøn
dob.obj=dob.obj.gsub(/ /u,' '). # space identify
gsub(/ /u,' ') # space identify
- else
- dob.obj=dob.obj.gsub(/¢/u,'&cent;'). # &#162;
- gsub(/£/u,'&pound;'). # &#163;
- gsub(/¥/u,'&yen;'). # &#165;
- gsub(/§/u,'&sect;'). # &#167;
- gsub(/©/u,'&copy;'). # &#169;
- gsub(/ª/u,'&ordf;'). # &#170;
- gsub(/«/u,'&laquo;'). # &#171;
- gsub(/®/u,'&reg;'). # &#174;
- gsub(/°/u,'&deg;'). # &#176;
- gsub(/±/u,'&plusmn;'). # &#177;
- gsub(/²/u,'&sup2;'). # &#178;
- gsub(/³/u,'&sup3;'). # &#179;
- gsub(/µ/u,'&micro;'). # &#181;
- gsub(/¶/u,'&para;'). # &#182;
- gsub(/¹/u,'&sup1;'). # &#185;
- gsub(/º/u,'&ordm;'). # &#186;
- gsub(/»/u,'&raquo;'). # &#187;
- gsub(/¼/u,'&frac14;'). # &#188;
- gsub(/½/u,'&frac12;'). # &#189;
- gsub(/¾/u,'&frac34;'). # &#190;
- gsub(/×/u,'&times;'). # &#215;
- gsub(/÷/u,'&divide;'). # &#247;
- gsub(/¿/u,'&iquest;'). # &#191;
- gsub(/À/u,'&Agrave;'). # &#192;
- gsub(/Á/u,'&Aacute;'). # &#193;
- gsub(/Â/u,'&Acirc;'). # &#194;
- gsub(/Ã/u,'&Atilde;'). # &#195;
- gsub(/Ä/u,'&Auml;'). # &#196;
- gsub(/Å/u,'&Aring;'). # &#197;
- gsub(/Æ/u,'&AElig;'). # &#198;
- gsub(/Ç/u,'&Ccedil;'). # &#199;
- gsub(/È/u,'&Egrave;'). # &#200;
- gsub(/É/u,'&Eacute;'). # &#201;
- gsub(/Ê/u,'&Ecirc;'). # &#202;
- gsub(/Ë/u,'&Euml;'). # &#203;
- gsub(/Ì/u,'&Igrave;'). # &#204;
- gsub(/Í/u,'&Iacute;'). # &#205;
- gsub(/Î/u,'&Icirc;'). # &#206;
- gsub(/Ï/u,'&Iuml;'). # &#207;
- gsub(/Ð/u,'&ETH;'). # &#208;
- gsub(/Ñ/u,'&Ntilde;'). # &#209;
- gsub(/Ò/u,'&Ograve;'). # &#210;
- gsub(/Ó/u,'&Oacute;'). # &#211;
- gsub(/Ô/u,'&Ocirc;'). # &#212;
- gsub(/Õ/u,'&Otilde;'). # &#213;
- gsub(/Ö/u,'&Ouml;'). # &#214;
- gsub(/Ø/u,'&Oslash;'). # &#216;
- gsub(/Ù/u,'&Ugrave;'). # &#217;
- gsub(/Ú/u,'&Uacute;'). # &#218;
- gsub(/Û/u,'&Ucirc;'). # &#219;
- gsub(/Ü/u,'&Uuml;'). # &#220;
- gsub(/Ý/u,'&Yacute;'). # &#221;
- gsub(/Þ/u,'&THORN;'). # &#222;
- gsub(/ß/u,'&szlig;'). # &#223;
- gsub(/à/u,'&agrave;'). # &#224;
- gsub(/á/u,'&aacute;'). # &#225;
- gsub(/â/u,'&acirc;'). # &#226;
- gsub(/ã/u,'&atilde;'). # &#227;
- gsub(/ä/u,'&auml;'). # &#228;
- gsub(/å/u,'&aring;'). # &#229;
- gsub(/æ/u,'&aelig;'). # &#230;
- gsub(/ç/u,'&ccedil;'). # &#231;
- gsub(/è/u,'&egrave;'). # &#232;
- gsub(/é/u,'&acute;'). # &#233;
- gsub(/ê/u,'&circ;'). # &#234;
- gsub(/ë/u,'&euml;'). # &#235;
- gsub(/ì/u,'&igrave;'). # &#236;
- gsub(/í/u,'&acute;'). # &#237;
- gsub(/î/u,'&icirc;'). # &#238;
- gsub(/ï/u,'&iuml;'). # &#239;
- gsub(/ð/u,'&eth;'). # &#240;
- gsub(/ñ/u,'&ntilde;'). # &#241;
- gsub(/ò/u,'&ograve;'). # &#242;
- gsub(/ó/u,'&oacute;'). # &#243;
- gsub(/ô/u,'&ocirc;'). # &#244;
- gsub(/õ/u,'&otilde;'). # &#245;
- gsub(/ö/u,'&ouml;'). # &#246;
- gsub(/ø/u,'&oslash;'). # &#248;
- gsub(/ù/u,'&ugrave;'). # &#250;
- gsub(/ú/u,'&uacute;'). # &#251;
- gsub(/û/u,'&ucirc;'). # &#252;
- gsub(/ü/u,'&uuml;'). # &#253;
- gsub(/þ/u,'&thorn;'). # &#254;
- gsub(/ÿ/u,'&yuml;'). # &#255;
- gsub(/‘/u,'&#lsquo;'). # &lsquo; # &#8216;
- gsub(/’/u,'&#rsquo;'). # &rsquo; # &#8217;
- gsub(/“/u,'&ldquo;'). # &ldquo; # &#8220;
- gsub(/”/u,'&rdquo;'). # &rdquo; # &#8221;
- gsub(/–/u,'&ndash;'). # &ndash; # &#8211;
- gsub(/—/u,'&mdash;'). # &mdash; # &#8212;
- gsub(/∝/u,'&prop;'). # &prop; # &#8733;
- gsub(/∞/u,'&infin;'). # &infin; # &#8734;
- gsub(/™/u,'&trade;'). # &trade; # &#8482;
- gsub(/✠/u,'&#10016;'). # &#10016;
- #gsub(/✠/u '&dagger;'). # &dagger; # &#8224; incorrect replacement †
- gsub(/ /u,' '). # space identify
- gsub(/ /u,' ') # space identify
end
end
self
@@ -411,7 +313,8 @@ module SiSU_XML_Munge
%{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}}).
gsub(/#{Dx[:url_o]}/,"#{Dx[:url_o_xml]}").
gsub(/#{Dx[:url_c]}/,"#{Dx[:url_c_xml]}").
- gsub(/&nbsp;|#{Mx[:nbsp]}/m,'&#160;')
+ gsub(/&nbsp;|#{Mx[:nbsp]}/m,'&#160;').
+ gsub(/;&([^#]|(?:[^gl][^t]|[^a][^m][^p]|[^n][^b][^s][^p])[^;])/,';&amp;\1') # pattern not to match
dob
end
def markup_light(dob='')
@@ -426,7 +329,8 @@ module SiSU_XML_Munge
gsub(/&([^;]{1,5})/,'&amp;\1'). #sort, rough estimate, revisit #WATCH found in node not sax
gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif))[ ]+.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/,
"<image.path>#{@md.file.output_path.xml.rel_image}\/\\1</image.path>").
- gsub(/&nbsp;|#{Mx[:nbsp]}/,'&#160;')
+ gsub(/&nbsp;|#{Mx[:nbsp]}/,'&#160;').
+ gsub(/;&([^#]|(?:[^gl][^t]|[^a][^m][^p]|[^n][^b][^s][^p])[^;])/,';&amp;\1') # pattern not to match
wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
dob.obj=tidywords(wordlist).join(' ').strip
dob
@@ -698,7 +602,8 @@ module SiSU_XML_Tags #Format
end
def meta_content_clean(content='')
content=if not content.nil?
- content=content.tr('"',"'")
+ content=content.tr('"',"'").
+ gsub(/&/,'&amp;')
content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content)
else content
end