From fc826abef5f83543f2cf00938b6d656e3b2c2821 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 3 Sep 2008 22:40:23 -0400 Subject: dal, left and right quotes (single & double) kept; previously converted to ordinary quotes (watch) --- lib/sisu/v0/dal.rb | 4 ---- lib/sisu/v0/shared_xml.rb | 12 +++++++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/sisu/v0/dal.rb b/lib/sisu/v0/dal.rb index a8bf1cd0..e75309f9 100644 --- a/lib/sisu/v0/dal.rb +++ b/lib/sisu/v0/dal.rb @@ -240,13 +240,9 @@ module SiSU_DAL para.gsub!(/(#{Mx[:en_a_o]})\s*/,'\1 '); para.gsub!(/(~\{\s*)\s+/,'\1 ') para.gsub!(/ \/\//,"#{Mx[:br_line]}") #added 2004w29 para.gsub!(/
/,"#{Mx[:br_line]}") #needed by xml, xhtml etc. - #para.gsub!(/

/,'

') #consider - para.gsub!(/`/,"'") para.gsub!(/\t/,' ') para.gsub!(/\342\200\231/u,"'") #if para =~/’/ #Avoid #‘ ’ #“ ” para.gsub!(/�/u,' ') #watch, replace with char code - para.gsub!(/[“”]/u,'""') - para.gsub!(/[­–—]/u,'-') #— – chk para.gsub!(/·/u,'*') para.gsub!(/\\copy(?:right)?\b/,'©') para.gsub!(/\\trademark\b|\\tm\b/,'®') diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index 228a5c14..7ecc52bb 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -254,6 +254,8 @@ module SiSU_XML_munge para.gsub!(/ÿ/u, 'ÿ') # 'ÿ' # ÿ para.gsub!(/‘/u, '‘') # '‘' # ‘ para.gsub!(/’/u, '’') # '’' # ’ + para.gsub!(/“/u, '“') # “ # “ + para.gsub!(/”/u, '”') # ” # ” para.gsub!(/–/u, '–') # – # – para.gsub!(/—/u, '—') # — # — para.gsub!(/∝/u, '∝') # ∝ # ∝ @@ -355,6 +357,8 @@ module SiSU_XML_munge para.gsub!(/ÿ/u, 'ÿ') # ÿ para.gsub!(/‘/u, '&#lsquo;') # ‘ # ‘ para.gsub!(/’/u, '&#rsquo;') # ’ # ’ + para.gsub!(/“/u, '“') # “ # “ + para.gsub!(/”/u, '”') # ” # ” para.gsub!(/–/u, '–') # – # – para.gsub!(/—/u, '—') # — # — para.gsub!(/∝/u, '∝') # ∝ # ∝ @@ -369,11 +373,13 @@ module SiSU_XML_munge end def tidywords(wordlist) wordlist.each do |x| + #imperfect solution will not catch all possible cases x.gsub!(/&/,'&') unless x =~/&\S+;/ + x.gsub!(/&([A-Z])/,'&\1') end end def markup(para='') - wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 + wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 para=tidywords(wordlist).join(' ').strip para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
') para.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'') @@ -439,7 +445,7 @@ module SiSU_XML_munge "#{@dir.url.images_local}\/\\1") para.gsub!(/ /,' ') #para.gsub!(/ /,' ') #clean - wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 + wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 para=tidywords(wordlist).join(' ').strip para end @@ -462,7 +468,7 @@ module SiSU_XML_munge "#{@dir.url.images_local}\/\\1") para.gsub!(/ /,' ') #para.gsub!(/ /,' ') #clean - wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 + wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 para=tidywords(wordlist).join(' ').strip para end -- cgit v1.2.3