aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v0/shared_xml.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/v0/shared_xml.rb')
-rw-r--r--lib/sisu/v0/shared_xml.rb12
1 files changed, 9 insertions, 3 deletions
diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb
index 228a5c14..7ecc52bb 100644
--- a/lib/sisu/v0/shared_xml.rb
+++ b/lib/sisu/v0/shared_xml.rb
@@ -254,6 +254,8 @@ module SiSU_XML_munge
para.gsub!(/ÿ/u, 'ÿ') # 'ÿ' # ÿ
para.gsub!(/‘/u, '‘') # '‘' # ‘
para.gsub!(/’/u, '’') # '’' # ’
+ para.gsub!(/“/u, '“') # “ # “
+ para.gsub!(/”/u, '”') # ” # ”
para.gsub!(/–/u, '–') # – # –
para.gsub!(/—/u, '—') # — # —
para.gsub!(/∝/u, '∝') # ∝ # ∝
@@ -355,6 +357,8 @@ module SiSU_XML_munge
para.gsub!(/ÿ/u, 'ÿ') # ÿ
para.gsub!(/‘/u, '&#lsquo;') # ‘ # ‘
para.gsub!(/’/u, '&#rsquo;') # ’ # ’
+ para.gsub!(/“/u, '“') # “ # “
+ para.gsub!(/”/u, '”') # ” # ”
para.gsub!(/–/u, '–') # – # –
para.gsub!(/—/u, '—') # — # —
para.gsub!(/∝/u, '∝') # ∝ # ∝
@@ -369,11 +373,13 @@ module SiSU_XML_munge
end
def tidywords(wordlist)
wordlist.each do |x|
+ #imperfect solution will not catch all possible cases
x.gsub!(/&/,'&') unless x =~/&\S+;/
+ x.gsub!(/&([A-Z])/,'&\1')
end
end
def markup(para='')
- wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17
+ wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
para=tidywords(wordlist).join(' ').strip
para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'<br />')
para.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'')
@@ -439,7 +445,7 @@ module SiSU_XML_munge
"<image.path>#{@dir.url.images_local}\/\\1</image.path>")
para.gsub!(/&nbsp;/,'&#160;')
#para.gsub!(/&nbsp;/,' ') #clean
- wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17
+ wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
para=tidywords(wordlist).join(' ').strip
para
end
@@ -462,7 +468,7 @@ module SiSU_XML_munge
"<image.path>#{@dir.url.images_local}\/\\1</image.path>")
para.gsub!(/&nbsp;/,'&#160;')
#para.gsub!(/&nbsp;/,' ') #clean
- wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17
+ wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
para=tidywords(wordlist).join(' ').strip
para
end