aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2013-01-27 16:26:29 -0500
committerRalph Amissah <ralph@amissah.com>2013-01-27 16:28:29 -0500
commit11907e10c73883e5dcdaba11a093ef01c7ee2de8 (patch)
tree65660c532f372936f79544f4d7c2705cfcc56c7d
parentv4 v3: epub, toc.ncx fix, navpoint_close (diff)
v4: check xml representation of characters (& < > in particular)
-rw-r--r--data/doc/sisu/CHANGELOG_v41
-rw-r--r--lib/sisu/v4/epub_format.rb63
-rw-r--r--lib/sisu/v4/shared_metadata.rb109
3 files changed, 48 insertions, 125 deletions
diff --git a/data/doc/sisu/CHANGELOG_v4 b/data/doc/sisu/CHANGELOG_v4
index 120127e8..bb4ff6a1 100644
--- a/data/doc/sisu/CHANGELOG_v4
+++ b/data/doc/sisu/CHANGELOG_v4
@@ -27,6 +27,7 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_4.0.3.orig.tar.xz
* v4: epub some fixing, more documents render in more readers/viewers
* ncx fix, navpoint_close
+ * check xml representation of characters (& < > in particular)
* v4: remove markup-sample of the first edition of FaiF as redundant, the
markup for the second edition being available (& partly in response to
diff --git a/lib/sisu/v4/epub_format.rb b/lib/sisu/v4/epub_format.rb
index 84d32000..dd3273d0 100644
--- a/lib/sisu/v4/epub_format.rb
+++ b/lib/sisu/v4/epub_format.rb
@@ -1217,6 +1217,18 @@ module SiSU_EPUB_Format
WOK
end
end
+ module SanitizeXML
+ def self.xml(x)
+ if x.is_a?(String)
+ x.gsub(/&/,'&amp;').
+ gsub(/</,'&lt;').gsub(/>/,'&gt;').
+ #gsub(/</,'&#60;').gsub(/>/,'&#62;').
+ gsub(/\\\\/,'<br />').
+ gsub(/&lt;br(?: \/)?&gt;/,'<br />')
+ else x
+ end
+ end
+ end
class HeadInformation
include SiSU_Viz
attr_reader :md,:rdf,:vz
@@ -1347,10 +1359,12 @@ output_epub_cont_seg.close
end
def head
depth=@md.lvs[1] + @md.lvs[2] + @md.lvs[3] + @md.lvs[4]
+ title=SanitizeXML.xml(@md.title.full)
+ author=SanitizeXML.xml(@md.author)
<<-WOK
<!-- four required metadata items (for all NCX documents,
(including the relaxed constraints of OPS 2.0) -->
- <title>#{@md.title.full} by #{@md.author}</title>
+ <title>#{title} by #{author}</title>
<link href="css/xhtml.css" rel="stylesheet" type="text/css" id="main-css" />
<meta name="dtb:uid" content="urn:uuid:#{@md.dgst[1]}" />
<!-- <meta name="epub-creator" content="#{@md.publisher}" /> -->
@@ -1365,16 +1379,18 @@ output_epub_cont_seg.close
WOK
end
def doc_title
+ txt=SanitizeXML.xml(@md.title.full)
<<-WOK
<docTitle>
- <text>#{@md.title.full}</text>
+ <text>#{txt}</text>
</docTitle>
WOK
end
def doc_author
+ txt=SanitizeXML.xml(@md.author)
<<-WOK
<docAuthor>
- <text>#{@md.author}</text>
+ <text>#{txt}</text>
</docAuthor>
WOK
end
@@ -1466,12 +1482,10 @@ output_epub_cont_seg.close
m=(m.empty?) \
? (surname + other_names)
: (m + '; ' + surname + ', ' + other_names)
- m=m.gsub(/</,'&lt;').gsub(/>/,'&gt;').
- gsub(/&lt;br(?: \/)?&gt;/,';')
+ m=SanitizeXML.xml(m)
end
x=@md.creator.author.dup
- x=x.gsub(/</,'&lt;').gsub(/>/,'&gt;').
- gsub(/&lt;br(?: \/)?&gt;/,'<br />')
+ x=SanitizeXML.xml(x)
%{\n <dc:creator opf:file-as="#{m}" opf:role="aut">#{x}</dc:creator>}
else ''
end
@@ -1488,12 +1502,10 @@ output_epub_cont_seg.close
m=(m.empty?) \
? (surname + other_names)
: (m + '; ' + surname + ', ' + other_names)
- m=m.gsub(/</,'&lt;').gsub(/>/,'&gt;').
- gsub(/&lt;br(?: \/)?&gt;/,';')
+ m=SanitizeXML.xml(m)
end
x=@md.creator.editor.dup
- x=x.gsub(/</,'&lt;').gsub(/>/,'&gt;').
- gsub(/&lt;br(?: \/)?&gt;/,'<br />')
+ x=SanitizeXML.xml(x)
%{\n <dc:creator opf:file-as="#{m}" opf:role="edt">#{x}</dc:creator>}
else ''
end
@@ -1510,12 +1522,10 @@ output_epub_cont_seg.close
m=(m.empty?) \
? (surname + other_names)
: (m + '; ' + surname + ', ' + other_names)
- m=m.gsub(/</,'&lt;').gsub(/>/,'&gt;').
- gsub(/&lt;br(?: \/)?&gt;/,';')
+ m=SanitizeXML.xml(m)
end
x=@md.creator.translator.dup
- x=x.gsub(/</,'&lt;').gsub(/>/,'&gt;').
- gsub(/&lt;br(?: \/)?&gt;/,'<br />')
+ x=SanitizeXML.xml(x)
%{\n <dc:creator opf:file-as="#{m}" opf:role="trl">#{x}</dc:creator>}
else ''
end
@@ -1532,28 +1542,24 @@ output_epub_cont_seg.close
m=(m.empty?) \
? (surname + other_names)
: (m + '; ' + surname + ', ' + other_names)
- m=m.gsub(/</,'&lt;').gsub(/>/,'&gt;').
- gsub(/&lt;br(?: \/)?&gt;/,';')
+ m=SanitizeXML.xml(m)
end
x=@md.creator.illustrator.dup
- x=x.gsub(/</,'&lt;').gsub(/>/,'&gt;').
- gsub(/&lt;br(?: \/)?&gt;/,'<br />')
+ x=SanitizeXML.xml(x)
%{\n <dc:creator opf:file-as="#{m}" opf:role="ill">#{x}</dc:creator>}
else ''
end
date_published=if defined? @md.date.published \
and @md.date.published =~/\S+/
x=@md.date.published.dup
- x=x.gsub(/</,'&lt;').gsub(/>/,'&gt;').
- gsub(/&lt;br(?: \/)?&gt;/,'<br />')
+ x=SanitizeXML.xml(x)
%{\n <dc:date opf:event="published">#{x}</dc:date>}
else ''
end
subject=if defined? @md.classify.subject \
and @md.classify.subject =~/\S+/
x=@md.classify.subject.dup
- x=x.gsub(/</,'&lt;').gsub(/>/,'&gt;').
- gsub(/&lt;br(?: \/)?&gt;/,'<br />')
+ x=SanitizeXML.xml(x)
%{\n <dc:subject>#{x}</dc:subject>}
else ''
end
@@ -1565,7 +1571,7 @@ output_epub_cont_seg.close
end
rights=if defined? @md.rights.all \
and @md.rights.all =~/\S+/
- rights=@md.rights.all.gsub(/<br>/,'<br />')
+ rights=SanitizeXML.xml(@md.rights.all)
%{\n <dc:rights>#{rights}</dc:rights>}
else ''
end
@@ -1750,9 +1756,8 @@ output_epub_cont_seg.close
end
def rights
def all
- rghts=@md.rights.all.gsub(/<br>/,'<br />')
- rghts=rghts.gsub(/^\s*Copyright\s+\(C\)/,'Copyright <sup>&copy;</sup>&nbsp;')
- %{<p class="small_left">Rights: #{rghts}</p>}
+ rights=SanitizeXML.xml(@md.rights.all)
+ %{<p class="small_left">Rights: #{rights}</p>}
end
self
end
@@ -2068,15 +2073,19 @@ output_epub_cont_seg.close
#{@vz.table_close}}
end
def toc_head_copy_at
+ @txt=SanitizeXML.xml(@txt)
%{<p class="center">#{@txt}</p>\n}
end
def center
+ @txt=SanitizeXML.xml(@txt)
%{<p class="center">#{@txt}</p>\n}
end
def bold
+ @txt=SanitizeXML.xml(@txt)
%{<p class="bold">#{@txt}</p>\n}
end
def center_bold
+ @txt=SanitizeXML.xml(@txt)
%{<p class="centerbold">#{@txt}</p>\n}
end
end
diff --git a/lib/sisu/v4/shared_metadata.rb b/lib/sisu/v4/shared_metadata.rb
index 44c7243e..8b660208 100644
--- a/lib/sisu/v4/shared_metadata.rb
+++ b/lib/sisu/v4/shared_metadata.rb
@@ -725,107 +725,19 @@ module SiSU_Metadata
end
def char_enc(str)
@s=str
+ def amp
+ if @s \
+ and @s.is_a?(String)
+ @s=@s.gsub(/&/u,'&amp;')
+ end
+ @s
+ end
def utf8
if @s \
and @s.is_a?(String)
@s=@s.gsub(/<br(?: \/)?>/u,Mx[:br_paragraph]).
- gsub(/</um,'&#60;'). # '&lt;' # &#060;
- gsub(/</um,'&#60;'). # '&lt;' # &#060;
- gsub(/>/um,'&#62;'). # '&gt;' # &#062;
- gsub(/¢/um,'&#162;'). # '&cent;' # &#162;
- gsub(/£/um,'&#163;'). # '&pound;' # &#163;
- gsub(/¥/um,'&#165;'). # '&yen;' # &#165;
- gsub(/§/um,'&#167;'). # '&sect;' # &#167;
- gsub(/©/um,'&#169;'). # '&copy;' # &#169;
- gsub(/ª/um,'&#170;'). # '&ordf;' # &#170;
- gsub(/«/um,'&#171;'). # '&laquo;' # &#171;
- gsub(/®/um,'&#174;'). # '&reg;' # &#174;
- gsub(/°/um,'&#176;'). # '&deg;' # &#176;
- gsub(/±/um,'&#177;'). # '&plusmn;' # &#177;
- gsub(/²/um,'&#178;'). # '&sup2;' # &#178;
- gsub(/³/um,'&#179;'). # '&sup3;' # &#179;
- gsub(/µ/um,'&#181;'). # '&micro;' # &#181;
- gsub(/¶/um,'&#182;'). # '&para;' # &#182;
- gsub(/¹/um,'&#185;'). # '&sup1;' # &#185;
- gsub(/º/um,'&#186;'). # '&ordm;' # &#186;
- gsub(/»/um,'&#187;'). # '&raquo;' # &#187;
- gsub(/¼/um,'&#188;'). # '&frac14;' # &#188;
- gsub(/½/um,'&#189;'). # '&frac12;' # &#189;
- gsub(/¾/um,'&#190;'). # '&frac34;' # &#190;
- gsub(/×/um,'&#215;'). # '&times;' # &#215;
- gsub(/÷/um,'&#247;'). # '&divide;' # &#247;
- gsub(/¿/um,'&#191;'). # '&iquest;' # &#191;
- gsub(/À/um,'&#192;'). # '&Agrave;' # &#192;
- gsub(/Á/um,'&#193;'). # '&Aacute;' # &#193;
- gsub(/Â/um,'&#194;'). # '&Acirc;' # &#194;
- gsub(/Ã/um,'&#195;'). # '&Atilde;' # &#195;
- gsub(/Ä/um,'&#196;'). # '&Auml;' # &#196;
- gsub(/Å/um,'&#197;'). # '&Aring;' # &#197;
- gsub(/Æ/um,'&#198;'). # '&AElig;' # &#198;
- gsub(/Ç/um,'&#199;'). # '&Ccedil;' # &#199;
- gsub(/È/um,'&#200;'). # '&Egrave;' # &#200;
- gsub(/É/um,'&#201;'). # '&Eacute;' # &#201;
- gsub(/Ê/um,'&#202;'). # '&Ecirc;' # &#202;
- gsub(/Ë/um,'&#203;'). # '&Euml;' # &#203;
- gsub(/Ì/um,'&#204;'). # '&Igrave;' # &#204;
- gsub(/Í/um,'&#205;'). # '&Iacute;' # &#205;
- gsub(/Î/um,'&#206;'). # '&Icirc;' # &#206;
- gsub(/Ï/um,'&#207;'). # '&Iuml;' # &#207;
- gsub(/Ð/um,'&#208;'). # '&ETH;' # &#208;
- gsub(/Ñ/um,'&#209;'). # '&Ntilde;' # &#209;
- gsub(/Ò/um,'&#210;'). # '&Ograve;' # &#210;
- gsub(/Ó/um,'&#211;'). # '&Oacute;' # &#211;
- gsub(/Ô/um,'&#212;'). # '&Ocirc;' # &#212;
- gsub(/Õ/um,'&#213;'). # '&Otilde;' # &#213;
- gsub(/Ö/um,'&#214;'). # '&Ouml;' # &#214;
- gsub(/Ø/um,'&#216;'). # '&Oslash;' # &#216;
- gsub(/Ù/um,'&#217;'). # '&Ugrave;' # &#217;
- gsub(/Ú/um,'&#218;'). # '&Uacute;' # &#218;
- gsub(/Û/um,'&#219;'). # '&Ucirc;' # &#219;
- gsub(/Ü/um,'&#220;'). # '&Uuml;' # &#220;
- gsub(/Ý/um,'&#221;'). # '&Yacute;' # &#221;
- gsub(/Þ/um,'&#222;'). # '&THORN;' # &#222;
- gsub(/ß/um,'&#223;'). # '&szlig;' # &#223;
- gsub(/à/um,'&#224;'). # '&agrave;' # &#224;
- gsub(/á/um,'&#225;'). # '&aacute;' # &#225;
- gsub(/â/um,'&#226;'). # '&acirc;' # &#226;
- gsub(/ã/um,'&#227;'). # '&atilde;' # &#227;
- gsub(/ä/um,'&#228;'). # '&auml;' # &#228;
- gsub(/å/um,'&#229;'). # '&aring;' # &#229;
- gsub(/æ/um,'&#230;'). # '&aelig;' # &#230;
- gsub(/ç/um,'&#231;'). # '&ccedil;' # &#231;
- gsub(/è/um,'&#232;'). # '&egrave;' # &#232;
- gsub(/é/um,'&#233;'). # '&acute;' # &#233;
- gsub(/ê/um,'&#234;'). # '&circ;' # &#234;
- gsub(/ë/um,'&#235;'). # '&euml;' # &#235;
- gsub(/ì/um,'&#236;'). # '&igrave;' # &#236;
- gsub(/í/um,'&#237;'). # '&acute;' # &#237;
- gsub(/î/um,'&#238;'). # '&icirc;' # &#238;
- gsub(/ï/um,'&#239;'). # '&iuml;' # &#239;
- gsub(/ð/um,'&#240;'). # '&eth;' # &#240;
- gsub(/ñ/um,'&#241;'). # '&ntilde;' # &#241;
- gsub(/ò/um,'&#242;'). # '&ograve;' # &#242;
- gsub(/ó/um,'&#243;'). # '&oacute;' # &#243;
- gsub(/ô/um,'&#244;'). # '&ocirc;' # &#244;
- gsub(/õ/um,'&#245;'). # '&otilde;' # &#245;
- gsub(/ö/um,'&#246;'). # '&ouml;' # &#246;
- gsub(/ø/um,'&#248;'). # '&oslash;' # &#248;
- gsub(/ù/um,'&#250;'). # '&ugrave;' # &#250;
- gsub(/ú/um,'&#251;'). # '&uacute;' # &#251;
- gsub(/û/um,'&#252;'). # '&ucirc;' # &#252;
- gsub(/ü/um,'&#253;'). # '&uuml;' # &#253;
- gsub(/þ/um,'&#254;'). # '&thorn;' # &#254;
- gsub(/ÿ/um,'&#255;'). # '&yuml;' # &#255;
- gsub(/‘/um,'&#8216;'). # '&lsquo;' # &#8216;
- gsub(/’/um,'&#8217;'). # '&rsquo;' # &#8217;
- gsub(/“/um,'&#8220;'). # &ldquo; # &#8220;
- gsub(/”/um,'&#8221;'). # &rdquo; # &#8221;
- gsub(/–/um,'&#8211;'). # &ndash; # &#8211;
- gsub(/—/um,'&#8212;'). # &mdash; # &#8212;
- gsub(/∝/um,'&#8733;'). # &prop; # &#8733;
- gsub(/∞/um,'&#8734;'). # &infin; # &#8734;
- gsub(/™/um,'&#8482;'). # &trade; # &#8482;
- gsub(/✠/um,'&#10016;'). # &cross; # &#10016;
+ gsub(/</um,'&lt;').gsub(/>/um,'&gt;').
+ #gsub(/</um,'&#60;').gsub(/>/um,'&#62;').
gsub(/ /um,' '). # space identify
gsub(/ /um,' '). # space identify
gsub(/#{Mx[:br_paragraph]}/u,'<br />')
@@ -888,7 +800,8 @@ WOK
end
def xhtml_display
def meta_para
- inf_xml=char_enc(@inf).utf8
+ inf_xml=char_enc(@inf).amp
+ inf_xml=char_enc(inf_xml).utf8
%{<p class="norm">
<b>#{@tag}</b>: #{inf_xml}
</p>}