aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2008-09-03 22:40:23 -0400
committerRalph Amissah <ralph@amissah.com>2008-09-03 22:40:23 -0400
commitfc826abef5f83543f2cf00938b6d656e3b2c2821 (patch)
treec0907f76b1bca04fd62a1c492953db677ab05be7
parentxml character encoding adjusted; xml image match; odf issue with '@' symbol i... (diff)
dal, left and right quotes (single & double) kept; previously converted to ordinary quotes (watch)
-rw-r--r--lib/sisu/v0/dal.rb4
-rw-r--r--lib/sisu/v0/shared_xml.rb12
2 files changed, 9 insertions, 7 deletions
diff --git a/lib/sisu/v0/dal.rb b/lib/sisu/v0/dal.rb
index a8bf1cd0..e75309f9 100644
--- a/lib/sisu/v0/dal.rb
+++ b/lib/sisu/v0/dal.rb
@@ -240,13 +240,9 @@ module SiSU_DAL
para.gsub!(/(#{Mx[:en_a_o]})\s*/,'\1 '); para.gsub!(/(~\{\s*)\s+/,'\1 ')
para.gsub!(/ \/\//,"#{Mx[:br_line]}") #added 2004w29
para.gsub!(/<br>/,"#{Mx[:br_line]}") #needed by xml, xhtml etc.
- #para.gsub!(/<p>/,'<p />') #consider
- para.gsub!(/`/,"'")
para.gsub!(/\t/,' ')
para.gsub!(/\342\200\231/u,"'") #if para =~/’/ #Avoid #&lsquo; &rsquo; #&ldquo; &rdquo;
para.gsub!(/�/u,' ') #watch, replace with char code
- para.gsub!(/[“”]/u,'""')
- para.gsub!(/[­–—]/u,'-') #— – chk
para.gsub!(/·/u,'*')
para.gsub!(/\\copy(?:right)?\b/,'&#169;')
para.gsub!(/\\trademark\b|\\tm\b/,'&#174;')
diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb
index 228a5c14..7ecc52bb 100644
--- a/lib/sisu/v0/shared_xml.rb
+++ b/lib/sisu/v0/shared_xml.rb
@@ -254,6 +254,8 @@ module SiSU_XML_munge
para.gsub!(/ÿ/u, '&#255;') # '&yuml;' # &#255;
para.gsub!(/‘/u, '&#8216;') # '&lsquo;' # &#8216;
para.gsub!(/’/u, '&#8217;') # '&rsquo;' # &#8217;
+ para.gsub!(/“/u, '&#8220;') # &ldquo; # &#8220;
+ para.gsub!(/”/u, '&#8221;') # &rdquo; # &#8221;
para.gsub!(/–/u, '&#8211;') # &ndash; # &#8211;
para.gsub!(/—/u, '&#8212;') # &mdash; # &#8212;
para.gsub!(/∝/u, '&#8733;') # &prop; # &#8733;
@@ -355,6 +357,8 @@ module SiSU_XML_munge
para.gsub!(/ÿ/u, '&yuml;') # &#255;
para.gsub!(/‘/u, '&#lsquo;') # &lsquo; # &#8216;
para.gsub!(/’/u, '&#rsquo;') # &rsquo; # &#8217;
+ para.gsub!(/“/u, '&ldquo;') # &ldquo; # &#8220;
+ para.gsub!(/”/u, '&rdquo;') # &rdquo; # &#8221;
para.gsub!(/–/u, '&ndash;') # &ndash; # &#8211;
para.gsub!(/—/u, '&mdash;') # &mdash; # &#8212;
para.gsub!(/∝/u, '&prop;') # &prop; # &#8733;
@@ -369,11 +373,13 @@ module SiSU_XML_munge
end
def tidywords(wordlist)
wordlist.each do |x|
+ #imperfect solution will not catch all possible cases
x.gsub!(/&/,'&amp;') unless x =~/&\S+;/
+ x.gsub!(/&([A-Z])/,'&amp;\1')
end
end
def markup(para='')
- wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17
+ wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
para=tidywords(wordlist).join(' ').strip
para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'<br />')
para.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'')
@@ -439,7 +445,7 @@ module SiSU_XML_munge
"<image.path>#{@dir.url.images_local}\/\\1</image.path>")
para.gsub!(/&nbsp;/,'&#160;')
#para.gsub!(/&nbsp;/,' ') #clean
- wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17
+ wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
para=tidywords(wordlist).join(' ').strip
para
end
@@ -462,7 +468,7 @@ module SiSU_XML_munge
"<image.path>#{@dir.url.images_local}\/\\1</image.path>")
para.gsub!(/&nbsp;/,'&#160;')
#para.gsub!(/&nbsp;/,' ') #clean
- wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17
+ wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
para=tidywords(wordlist).join(' ').strip
para
end