aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v0/shared_xml.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/v0/shared_xml.rb')
-rw-r--r--lib/sisu/v0/shared_xml.rb66
1 files changed, 50 insertions, 16 deletions
diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb
index 3c34e67f..41e8c393 100644
--- a/lib/sisu/v0/shared_xml.rb
+++ b/lib/sisu/v0/shared_xml.rb
@@ -166,6 +166,8 @@ module SiSU_XML_munge
#¢£¥§©ª«®°±²³µ¶¹º»¼½¾×÷
##para.gsub!(//, '&#;')
##para.gsub!(//, '&;')
+ para.gsub!(/</u, '&#60;') # '&lt;' # &#060;
+ para.gsub!(/>/u, '&#62;') # '&gt;' # &#062;
para.gsub!(/¢/u, '&#162;') # '&cent;' # &#162;
para.gsub!(/£/u, '&#163;') # '&pound;' # &#163;
para.gsub!(/¥/u, '&#165;') # '&yen;' # &#165;
@@ -250,10 +252,25 @@ module SiSU_XML_munge
para.gsub!(/ü/u, '&#253;') # '&uuml;' # &#253;
para.gsub!(/þ/u, '&#254;') # '&thorn;' # &#254;
para.gsub!(/ÿ/u, '&#255;') # '&yuml;' # &#255;
+ para.gsub!(/‘/u, '&#8216;') # '&lsquo;' # &#8216;
+ para.gsub!(/’/u, '&#8217;') # '&rsquo;' # &#8217;
+ para.gsub!(/“/u, '&#8220;') # &ldquo; # &#8220;
+ para.gsub!(/”/u, '&#8221;') # &rdquo; # &#8221;
+ para.gsub!(/–/u, '&#8211;') # &ndash; # &#8211;
+ para.gsub!(/—/u, '&#8212;') # &mdash; # &#8212;
+ para.gsub!(/∝/u, '&#8733;') # &prop; # &#8733;
+ para.gsub!(/∞/u, '&#8734;') # &infin; # &#8734;
+ para.gsub!(/™/u, '&#8482;') # &trade; # &#8482;
+ para.gsub!(/✠/u, '&#10016;') # &cross; # &#10016;
+ para.gsub!(/ /u, ' ') # space identify
+ para.gsub!(/ /u, ' ') # space identify
end
end
def html(para='')
if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn
+ para.gsub!(/ /u, ' ') # space identify
+ para.gsub!(/ /u, ' ') # space identify
+ else
para.gsub!(/¢/u, '&cent;') # &#162;
para.gsub!(/£/u, '&pound;') # &#163;
para.gsub!(/¥/u, '&yen;') # &#165;
@@ -338,17 +355,32 @@ module SiSU_XML_munge
para.gsub!(/ü/u, '&uuml;') # &#253;
para.gsub!(/þ/u, '&thorn;') # &#254;
para.gsub!(/ÿ/u, '&yuml;') # &#255;
+ para.gsub!(/‘/u, '&#lsquo;') # &lsquo; # &#8216;
+ para.gsub!(/’/u, '&#rsquo;') # &rsquo; # &#8217;
+ para.gsub!(/“/u, '&ldquo;') # &ldquo; # &#8220;
+ para.gsub!(/”/u, '&rdquo;') # &rdquo; # &#8221;
+ para.gsub!(/–/u, '&ndash;') # &ndash; # &#8211;
+ para.gsub!(/—/u, '&mdash;') # &mdash; # &#8212;
+ para.gsub!(/∝/u, '&prop;') # &prop; # &#8733;
+ para.gsub!(/∞/u, '&infin;') # &infin; # &#8734;
+ para.gsub!(/™/u, '&trade;') # &trade; # &#8482;
+ para.gsub!(/✠/u, '&#10016;') # &#10016;
+ #para.gsub!(/✠/u, '&dagger;') # &dagger; # &#8224; incorrect replacement †
+ para.gsub!(/ /u, ' ') # space identify
+ para.gsub!(/ /u, ' ') # space identify
end
end
self
end
def tidywords(wordlist)
wordlist.each do |x|
+ #imperfect solution will not catch all possible cases
x.gsub!(/&/,'&amp;') unless x =~/&\S+;/
+ x.gsub!(/&([A-Z])/,'&amp;\1')
end
end
def markup(para='')
- wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17
+ wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
para=tidywords(wordlist).join(' ').strip
para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'<br />')
para.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'')
@@ -377,23 +409,25 @@ module SiSU_XML_munge
para.gsub!(/#{Mx[:br_page]}\s*/,'')
para.gsub!(/#{Mx[:br_page_new]}\s*/,'')
para.gsub!(/#{Mx[:pa_non_object_no_heading]}|#{Mx[:pa_non_object_dummy_heading]}/,''); para.gsub!(/<[-~]#>/,'')
- para.gsub!(/(?:^|[^_\\])\{\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?\}(https?:\/\/\S+)/,
- %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1" width="\\2" height="\\3" />[\\1] \\4})
- para.gsub!(/(?:^|[^_\\])\{\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?\}(https?:\/\/\S+)/,
- %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1"/>\\1})
- para.gsub!(/(^|#{Mx[:gl_c]}|\s)\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?=\s|$))/,
+ para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}(https?:\/\/\S+)/,
+ %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1" width="\\2" height="\\3" />[\\1] \\4})
+ para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?#{Mx[:lnk_c]}(https?:\/\/\S+)/,
+ %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1"/>\\1})
+ para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}image/,
+ %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1" width="\\2" height="\\3" />[\\1] \\4})
+ para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?#{Mx[:lnk_c]}image/,
+ %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\1"/>\\1})
+ para.gsub!(/(^|#{Mx[:gl_c]}|\s)#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}(https?:\/\/[^"><]+?)([,.:;"><]?(?=\s|$))/,
'\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\3">\2</link>\4') #watch, compare html_tune
para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,
%{\\1#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\2">\\2</link>#{@url_brace.xml_close}\\3})
para.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,
- '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>\2') #escaped urls not linked, deal with later
- para.gsub!(/&nbsp;/,'&#160;')
- #para.gsub!(/&nbsp;/,' ') #clean
+ '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>\2') #escaped urls not linked, deal with later
else
para.gsub!(/(^|[^}])_</m,'\1&lt;'); para.gsub!(/(^|[^}])_>/m,'\1&gt;') #code-block: angle brackets special characters
para.gsub!(/(^|[^}])_</m,'\1&lt;'); para.gsub!(/(^|[^}])_>/m,'\1&gt;')
- para.gsub!(/&nbsp;/,'&#160;')
end
+ para.gsub!(/&nbsp;|#{Mx[:nbsp]}/m,'&#160;')
para
end
def markup_light(para='')
@@ -406,11 +440,11 @@ module SiSU_XML_munge
para.gsub!(/<[-~]#>/,'')
para.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1&amp; ') #sort
para.gsub!(/&([^;]{1,5})/,'&amp;\1') #sort, rough estimate, revisit #WATCH found in node not sax
- para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/,
+ para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/,
"<image.path>#{@dir.url.images_local}\/\\1</image.path>")
- para.gsub!(/&nbsp;/,'&#160;')
+ para.gsub!(/&nbsp;|#{Mx[:nbsp]}/,'&#160;')
#para.gsub!(/&nbsp;/,' ') #clean
- wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17
+ wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
para=tidywords(wordlist).join(' ').strip
para
end
@@ -429,11 +463,11 @@ module SiSU_XML_munge
para.gsub!(/<[-~]#>/,'')
para.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1&amp; ') #sort
para.gsub!(/&([^;]{1,5})/,'&amp;\1') #sort, rough estimate, revisit #WATCH found in node not sax
- para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/,
+ para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/,
"<image.path>#{@dir.url.images_local}\/\\1</image.path>")
- para.gsub!(/&nbsp;/,'&#160;')
+ para.gsub!(/&nbsp;|#{Mx[:nbsp]}/,'&#160;')
#para.gsub!(/&nbsp;/,' ') #clean
- wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17
+ wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
para=tidywords(wordlist).join(' ').strip
para
end