From 11907e10c73883e5dcdaba11a093ef01c7ee2de8 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sun, 27 Jan 2013 16:26:29 -0500 Subject: v4: check xml representation of characters (& < > in particular) --- data/doc/sisu/CHANGELOG_v4 | 1 + lib/sisu/v4/epub_format.rb | 63 ++++++++++++++---------- lib/sisu/v4/shared_metadata.rb | 109 +++++------------------------------------ 3 files changed, 48 insertions(+), 125 deletions(-) diff --git a/data/doc/sisu/CHANGELOG_v4 b/data/doc/sisu/CHANGELOG_v4 index 120127e8..bb4ff6a1 100644 --- a/data/doc/sisu/CHANGELOG_v4 +++ b/data/doc/sisu/CHANGELOG_v4 @@ -27,6 +27,7 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_4.0.3.orig.tar.xz * v4: epub some fixing, more documents render in more readers/viewers * ncx fix, navpoint_close + * check xml representation of characters (& < > in particular) * v4: remove markup-sample of the first edition of FaiF as redundant, the markup for the second edition being available (& partly in response to diff --git a/lib/sisu/v4/epub_format.rb b/lib/sisu/v4/epub_format.rb index 84d32000..dd3273d0 100644 --- a/lib/sisu/v4/epub_format.rb +++ b/lib/sisu/v4/epub_format.rb @@ -1217,6 +1217,18 @@ module SiSU_EPUB_Format WOK end end + module SanitizeXML + def self.xml(x) + if x.is_a?(String) + x.gsub(/&/,'&'). + gsub(//,'>'). + #gsub(//,'>'). + gsub(/\\\\/,'
'). + gsub(/<br(?: \/)?>/,'
') + else x + end + end + end class HeadInformation include SiSU_Viz attr_reader :md,:rdf,:vz @@ -1347,10 +1359,12 @@ output_epub_cont_seg.close end def head depth=@md.lvs[1] + @md.lvs[2] + @md.lvs[3] + @md.lvs[4] + title=SanitizeXML.xml(@md.title.full) + author=SanitizeXML.xml(@md.author) <<-WOK - #{@md.title.full} by #{@md.author} + #{title} by #{author} @@ -1365,16 +1379,18 @@ output_epub_cont_seg.close WOK end def doc_title + txt=SanitizeXML.xml(@md.title.full) <<-WOK - #{@md.title.full} + #{txt} WOK end def doc_author + txt=SanitizeXML.xml(@md.author) <<-WOK - #{@md.author} + #{txt} WOK end @@ -1466,12 +1482,10 @@ output_epub_cont_seg.close m=(m.empty?) \ ? (surname + other_names) : (m + '; ' + surname + ', ' + other_names) - m=m.gsub(//,'>'). - gsub(/<br(?: \/)?>/,';') + m=SanitizeXML.xml(m) end x=@md.creator.author.dup - x=x.gsub(//,'>'). - gsub(/<br(?: \/)?>/,'
') + x=SanitizeXML.xml(x) %{\n #{x}} else '' end @@ -1488,12 +1502,10 @@ output_epub_cont_seg.close m=(m.empty?) \ ? (surname + other_names) : (m + '; ' + surname + ', ' + other_names) - m=m.gsub(//,'>'). - gsub(/<br(?: \/)?>/,';') + m=SanitizeXML.xml(m) end x=@md.creator.editor.dup - x=x.gsub(//,'>'). - gsub(/<br(?: \/)?>/,'
') + x=SanitizeXML.xml(x) %{\n #{x}} else '' end @@ -1510,12 +1522,10 @@ output_epub_cont_seg.close m=(m.empty?) \ ? (surname + other_names) : (m + '; ' + surname + ', ' + other_names) - m=m.gsub(//,'>'). - gsub(/<br(?: \/)?>/,';') + m=SanitizeXML.xml(m) end x=@md.creator.translator.dup - x=x.gsub(//,'>'). - gsub(/<br(?: \/)?>/,'
') + x=SanitizeXML.xml(x) %{\n #{x}} else '' end @@ -1532,28 +1542,24 @@ output_epub_cont_seg.close m=(m.empty?) \ ? (surname + other_names) : (m + '; ' + surname + ', ' + other_names) - m=m.gsub(//,'>'). - gsub(/<br(?: \/)?>/,';') + m=SanitizeXML.xml(m) end x=@md.creator.illustrator.dup - x=x.gsub(//,'>'). - gsub(/<br(?: \/)?>/,'
') + x=SanitizeXML.xml(x) %{\n #{x}} else '' end date_published=if defined? @md.date.published \ and @md.date.published =~/\S+/ x=@md.date.published.dup - x=x.gsub(//,'>'). - gsub(/<br(?: \/)?>/,'
') + x=SanitizeXML.xml(x) %{\n #{x}} else '' end subject=if defined? @md.classify.subject \ and @md.classify.subject =~/\S+/ x=@md.classify.subject.dup - x=x.gsub(//,'>'). - gsub(/<br(?: \/)?>/,'
') + x=SanitizeXML.xml(x) %{\n #{x}} else '' end @@ -1565,7 +1571,7 @@ output_epub_cont_seg.close end rights=if defined? @md.rights.all \ and @md.rights.all =~/\S+/ - rights=@md.rights.all.gsub(/
/,'
') + rights=SanitizeXML.xml(@md.rights.all) %{\n #{rights}} else '' end @@ -1750,9 +1756,8 @@ output_epub_cont_seg.close end def rights def all - rghts=@md.rights.all.gsub(/
/,'
') - rghts=rghts.gsub(/^\s*Copyright\s+\(C\)/,'Copyright © ') - %{

Rights: #{rghts}

} + rights=SanitizeXML.xml(@md.rights.all) + %{

Rights: #{rights}

} end self end @@ -2068,15 +2073,19 @@ output_epub_cont_seg.close #{@vz.table_close}} end def toc_head_copy_at + @txt=SanitizeXML.xml(@txt) %{

#{@txt}

\n} end def center + @txt=SanitizeXML.xml(@txt) %{

#{@txt}

\n} end def bold + @txt=SanitizeXML.xml(@txt) %{

#{@txt}

\n} end def center_bold + @txt=SanitizeXML.xml(@txt) %{

#{@txt}

\n} end end diff --git a/lib/sisu/v4/shared_metadata.rb b/lib/sisu/v4/shared_metadata.rb index 44c7243e..8b660208 100644 --- a/lib/sisu/v4/shared_metadata.rb +++ b/lib/sisu/v4/shared_metadata.rb @@ -725,107 +725,19 @@ module SiSU_Metadata end def char_enc(str) @s=str + def amp + if @s \ + and @s.is_a?(String) + @s=@s.gsub(/&/u,'&') + end + @s + end def utf8 if @s \ and @s.is_a?(String) @s=@s.gsub(//u,Mx[:br_paragraph]). - gsub(//um,'>'). # '>' # > - gsub(/¢/um,'¢'). # '¢' # ¢ - gsub(/£/um,'£'). # '£' # £ - gsub(/¥/um,'¥'). # '¥' # ¥ - gsub(/§/um,'§'). # '§' # § - gsub(/©/um,'©'). # '©' # © - gsub(/ª/um,'ª'). # 'ª' # ª - gsub(/«/um,'«'). # '«' # « - gsub(/®/um,'®'). # '®' # ® - gsub(/°/um,'°'). # '°' # ° - gsub(/±/um,'±'). # '±' # ± - gsub(/²/um,'²'). # '²' # ² - gsub(/³/um,'³'). # '³' # ³ - gsub(/µ/um,'µ'). # 'µ' # µ - gsub(/¶/um,'¶'). # '¶' # ¶ - gsub(/¹/um,'¹'). # '¹' # ¹ - gsub(/º/um,'º'). # 'º' # º - gsub(/»/um,'»'). # '»' # » - gsub(/¼/um,'¼'). # '¼' # ¼ - gsub(/½/um,'½'). # '½' # ½ - gsub(/¾/um,'¾'). # '¾' # ¾ - gsub(/×/um,'×'). # '×' # × - gsub(/÷/um,'÷'). # '÷' # ÷ - gsub(/¿/um,'¿'). # '¿' # ¿ - gsub(/À/um,'À'). # 'À' # À - gsub(/Á/um,'Á'). # 'Á' # Á - gsub(/Â/um,'Â'). # 'Â' # Â - gsub(/Ã/um,'Ã'). # 'Ã' # Ã - gsub(/Ä/um,'Ä'). # 'Ä' # Ä - gsub(/Å/um,'Å'). # 'Å' # Å - gsub(/Æ/um,'Æ'). # 'Æ' # Æ - gsub(/Ç/um,'Ç'). # 'Ç' # Ç - gsub(/È/um,'È'). # 'È' # È - gsub(/É/um,'É'). # 'É' # É - gsub(/Ê/um,'Ê'). # 'Ê' # Ê - gsub(/Ë/um,'Ë'). # 'Ë' # Ë - gsub(/Ì/um,'Ì'). # 'Ì' # Ì - gsub(/Í/um,'Í'). # 'Í' # Í - gsub(/Î/um,'Î'). # 'Î' # Î - gsub(/Ï/um,'Ï'). # 'Ï' # Ï - gsub(/Ð/um,'Ð'). # 'Ð' # Ð - gsub(/Ñ/um,'Ñ'). # 'Ñ' # Ñ - gsub(/Ò/um,'Ò'). # 'Ò' # Ò - gsub(/Ó/um,'Ó'). # 'Ó' # Ó - gsub(/Ô/um,'Ô'). # 'Ô' # Ô - gsub(/Õ/um,'Õ'). # 'Õ' # Õ - gsub(/Ö/um,'Ö'). # 'Ö' # Ö - gsub(/Ø/um,'Ø'). # 'Ø' # Ø - gsub(/Ù/um,'Ù'). # 'Ù' # Ù - gsub(/Ú/um,'Ú'). # 'Ú' # Ú - gsub(/Û/um,'Û'). # 'Û' # Û - gsub(/Ü/um,'Ü'). # 'Ü' # Ü - gsub(/Ý/um,'Ý'). # 'Ý' # Ý - gsub(/Þ/um,'Þ'). # 'Þ' # Þ - gsub(/ß/um,'ß'). # 'ß' # ß - gsub(/à/um,'à'). # 'à' # à - gsub(/á/um,'á'). # 'á' # á - gsub(/â/um,'â'). # 'â' # â - gsub(/ã/um,'ã'). # 'ã' # ã - gsub(/ä/um,'ä'). # 'ä' # ä - gsub(/å/um,'å'). # 'å' # å - gsub(/æ/um,'æ'). # 'æ' # æ - gsub(/ç/um,'ç'). # 'ç' # ç - gsub(/è/um,'è'). # 'è' # è - gsub(/é/um,'é'). # '´' # é - gsub(/ê/um,'ê'). # 'ˆ' # ê - gsub(/ë/um,'ë'). # 'ë' # ë - gsub(/ì/um,'ì'). # 'ì' # ì - gsub(/í/um,'í'). # '´' # í - gsub(/î/um,'î'). # 'î' # î - gsub(/ï/um,'ï'). # 'ï' # ï - gsub(/ð/um,'ð'). # 'ð' # ð - gsub(/ñ/um,'ñ'). # 'ñ' # ñ - gsub(/ò/um,'ò'). # 'ò' # ò - gsub(/ó/um,'ó'). # 'ó' # ó - gsub(/ô/um,'ô'). # 'ô' # ô - gsub(/õ/um,'õ'). # 'õ' # õ - gsub(/ö/um,'ö'). # 'ö' # ö - gsub(/ø/um,'ø'). # 'ø' # ø - gsub(/ù/um,'ú'). # 'ù' # ú - gsub(/ú/um,'û'). # 'ú' # û - gsub(/û/um,'ü'). # 'û' # ü - gsub(/ü/um,'ý'). # 'ü' # ý - gsub(/þ/um,'þ'). # 'þ' # þ - gsub(/ÿ/um,'ÿ'). # 'ÿ' # ÿ - gsub(/‘/um,'‘'). # '‘' # ‘ - gsub(/’/um,'’'). # '’' # ’ - gsub(/“/um,'“'). # “ # “ - gsub(/”/um,'”'). # ” # ” - gsub(/–/um,'–'). # – # – - gsub(/—/um,'—'). # — # — - gsub(/∝/um,'∝'). # ∝ # ∝ - gsub(/∞/um,'∞'). # ∞ # ∞ - gsub(/™/um,'™'). # ™ # ™ - gsub(/✠/um,'✠'). # ✗ # ✠ + gsub(//um,'>'). + #gsub(//um,'>'). gsub(/ /um,' '). # space identify gsub(/ /um,' '). # space identify gsub(/#{Mx[:br_paragraph]}/u,'
') @@ -888,7 +800,8 @@ WOK end def xhtml_display def meta_para - inf_xml=char_enc(@inf).utf8 + inf_xml=char_enc(@inf).amp + inf_xml=char_enc(inf_xml).utf8 %{

#{@tag}: #{inf_xml}

} -- cgit v1.2.3