From 2a738f528cb87793ff7f8312099666af1e21f44c Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 3 Sep 2008 22:25:03 -0400 Subject: xml character encoding adjusted; xml image match; odf issue with '@' symbol in url --- lib/sisu/v0/shared_xml.rb | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'lib/sisu/v0/shared_xml.rb') diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index 9203f0df..228a5c14 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -166,6 +166,8 @@ module SiSU_XML_munge #¢£¥§©ª«®°±²³µ¶¹º»¼½¾×÷ ##para.gsub!(//, '&#;') ##para.gsub!(//, '&;') + para.gsub!(//u, '>') # '>' # > para.gsub!(/¢/u, '¢') # '¢' # ¢ para.gsub!(/£/u, '£') # '£' # £ para.gsub!(/¥/u, '¥') # '¥' # ¥ @@ -250,10 +252,23 @@ module SiSU_XML_munge para.gsub!(/ü/u, 'ý') # 'ü' # ý para.gsub!(/þ/u, 'þ') # 'þ' # þ para.gsub!(/ÿ/u, 'ÿ') # 'ÿ' # ÿ + para.gsub!(/‘/u, '‘') # '‘' # ‘ + para.gsub!(/’/u, '’') # '’' # ’ + para.gsub!(/–/u, '–') # – # – + para.gsub!(/—/u, '—') # — # — + para.gsub!(/∝/u, '∝') # ∝ # ∝ + para.gsub!(/∞/u, '∞') # ∞ # ∞ + para.gsub!(/™/u, '™') # ™ # ™ + para.gsub!(/✠/u, '†') # † # † incorrect replacement! † + para.gsub!(/ /u, ' ') # space identify + para.gsub!(/ /u, ' ') # space identify end end def html(para='') if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn + para.gsub!(/ /u, ' ') # space identify + para.gsub!(/ /u, ' ') # space identify + else para.gsub!(/¢/u, '¢') # ¢ para.gsub!(/£/u, '£') # £ para.gsub!(/¥/u, '¥') # ¥ @@ -338,6 +353,16 @@ module SiSU_XML_munge para.gsub!(/ü/u, 'ü') # ý para.gsub!(/þ/u, 'þ') # þ para.gsub!(/ÿ/u, 'ÿ') # ÿ + para.gsub!(/‘/u, '&#lsquo;') # ‘ # ‘ + para.gsub!(/’/u, '&#rsquo;') # ’ # ’ + para.gsub!(/–/u, '–') # – # – + para.gsub!(/—/u, '—') # — # — + para.gsub!(/∝/u, '∝') # ∝ # ∝ + para.gsub!(/∞/u, '∞') # ∞ # ∞ + para.gsub!(/™/u, '™') # ™ # ™ + para.gsub!(/✠/u, '†') # † # † incorrect replacement † + para.gsub!(/ /u, ' ') # space identify + para.gsub!(/ /u, ' ') # space identify end end self @@ -381,6 +406,10 @@ module SiSU_XML_munge %{[\\1] \\4}) para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?#{Mx[:lnk_c]}(https?:\/\/\S+)/, %{\\1}) + para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}image/, + %{[\\1] \\4}) + para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?#{Mx[:lnk_c]}image/, + %{\\1}) para.gsub!(/(^|#{Mx[:gl_c]}|\s)#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}(https?:\/\/[^"><]+?)([,.:;"><]?(?=\s|$))/, '\1\2\4') #watch, compare html_tune para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, -- cgit v1.2.3