diff options
Diffstat (limited to 'lib/sisu/v3dv/shared_xml.rb')
-rw-r--r-- | lib/sisu/v3dv/shared_xml.rb | 713 |
1 files changed, 713 insertions, 0 deletions
diff --git a/lib/sisu/v3dv/shared_xml.rb b/lib/sisu/v3dv/shared_xml.rb new file mode 100644 index 00000000..78611ccc --- /dev/null +++ b/lib/sisu/v3dv/shared_xml.rb @@ -0,0 +1,713 @@ +# encoding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2012, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see <http://www.gnu.org/licenses/>. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + <http://www.fsf.org/licensing/licenses/gpl.html> + <http://www.gnu.org/licenses/gpl.html> + + <http://www.jus.uio.no/sisu/gpl.fsf/toc.html> + <http://www.jus.uio.no/sisu/gpl.fsf/doc.html> + <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt> + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + <http://www.jus.uio.no/sisu> + <http://www.sisudoc.org> + + * Download: + <http://www.jus.uio.no/sisu/SiSU/download.html> + + * Ralph Amissah + <ralph@amissah.com> + <ralph.amissah@gmail.com> + + ** Description: common file for xml generation +=end +module SiSU_XML_Munge + class Trans + require_relative 'defaults' # defaults.rb + def initialize(md) + @md=md + @sys=SiSU_Env::SystemCall.new + @dir=SiSU_Env::InfoEnv.new(@md.fns) + @brace_url=SiSU_Viz::Skin.new.url_decoration + if @md.sem_tag + @ab ||=semantic_tags.default + end + end + def semantic_tags + def default + { + pub: 'publication', + conv: 'convention', + vol: 'volume', + pg: 'page', + cty: 'city', + org: 'organization', + uni: 'university', + dept: 'department', + fac: 'faculty', + inst: 'institute', + co: 'company', + com: 'company', + conv: 'convention', + dt: 'date', + y: 'year', + m: 'month', + d: 'day', + ti: 'title', + au: 'author', + ed: 'editor', #editor? + v: 'version', #edition + n: 'name', + fn: 'firstname', + mn: 'middlename', + ln: 'lastname', + in: 'initials', + qt: 'quote', + ct: 'cite', + ref: 'reference', + ab: 'abreviation', + def: 'define', + desc: 'description', + trans: 'translate', + } + end + self + end + def char_enc #character encode + def utf8(dob='') + if @sys.locale =~/utf-?8/i # instead ucs for utf8 # String#encode Iñtërnâtiônàlizætiøn + str=if defined? dob.obj; dob.obj + elsif dob.class==String; dob + end + if str + #¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûü + #¢£¥§©ª«®°±²³µ¶¹º»¼½¾×÷ + str.gsub!(/</um,'<') # '<' # < + str.gsub!(/>/um,'>') # '>' # > + str.gsub!(/¢/um,'¢') # '¢' # ¢ + str.gsub!(/£/um,'£') # '£' # £ + str.gsub!(/¥/um,'¥') # '¥' # ¥ + str.gsub!(/§/um,'§') # '§' # § + str.gsub!(/©/um,'©') # '©' # © + str.gsub!(/ª/um,'ª') # 'ª' # ª + str.gsub!(/«/um,'«') # '«' # « + str.gsub!(/®/um,'®') # '®' # ® + str.gsub!(/°/um,'°') # '°' # ° + str.gsub!(/±/um,'±') # '±' # ± + str.gsub!(/²/um,'²') # '²' # ² + str.gsub!(/³/um,'³') # '³' # ³ + str.gsub!(/µ/um,'µ') # 'µ' # µ + str.gsub!(/¶/um,'¶') # '¶' # ¶ + str.gsub!(/¹/um,'¹') # '¹' # ¹ + str.gsub!(/º/um,'º') # 'º' # º + str.gsub!(/»/um,'»') # '»' # » + str.gsub!(/¼/um,'¼') # '¼' # ¼ + str.gsub!(/½/um,'½') # '½' # ½ + str.gsub!(/¾/um,'¾') # '¾' # ¾ + str.gsub!(/×/um,'×') # '×' # × + str.gsub!(/÷/um,'÷') # '÷' # ÷ + str.gsub!(/¿/um,'¿') # '¿' # ¿ + str.gsub!(/À/um,'À') # 'À' # À + str.gsub!(/Á/um,'Á') # 'Á' # Á + str.gsub!(/Â/um,'Â') # 'Â' #  + str.gsub!(/Ã/um,'Ã') # 'Ã' # à + str.gsub!(/Ä/um,'Ä') # 'Ä' # Ä + str.gsub!(/Å/um,'Å') # 'Å' # Å + str.gsub!(/Æ/um,'Æ') # 'Æ' # Æ + str.gsub!(/Ç/um,'Ç') # 'Ç' # Ç + str.gsub!(/È/um,'È') # 'È' # È + str.gsub!(/É/um,'É') # 'É' # É + str.gsub!(/Ê/um,'Ê') # 'Ê' # Ê + str.gsub!(/Ë/um,'Ë') # 'Ë' # Ë + str.gsub!(/Ì/um,'Ì') # 'Ì' # Ì + str.gsub!(/Í/um,'Í') # 'Í' # Í + str.gsub!(/Î/um,'Î') # 'Î' # Î + str.gsub!(/Ï/um,'Ï') # 'Ï' # Ï + str.gsub!(/Ð/um,'Ð') # 'Ð' # Ð + str.gsub!(/Ñ/um,'Ñ') # 'Ñ' # Ñ + str.gsub!(/Ò/um,'Ò') # 'Ò' # Ò + str.gsub!(/Ó/um,'Ó') # 'Ó' # Ó + str.gsub!(/Ô/um,'Ô') # 'Ô' # Ô + str.gsub!(/Õ/um,'Õ') # 'Õ' # Õ + str.gsub!(/Ö/um,'Ö') # 'Ö' # Ö + str.gsub!(/Ø/um,'Ø') # 'Ø' # Ø + str.gsub!(/Ù/um,'Ù') # 'Ù' # Ù + str.gsub!(/Ú/um,'Ú') # 'Ú' # Ú + str.gsub!(/Û/um,'Û') # 'Û' # Û + str.gsub!(/Ü/um,'Ü') # 'Ü' # Ü + str.gsub!(/Ý/um,'Ý') # 'Ý' # Ý + str.gsub!(/Þ/um,'Þ') # 'Þ' # Þ + str.gsub!(/ß/um,'ß') # 'ß' # ß + str.gsub!(/à/um,'à') # 'à' # à + str.gsub!(/á/um,'á') # 'á' # á + str.gsub!(/â/um,'â') # 'â' # â + str.gsub!(/ã/um,'ã') # 'ã' # ã + str.gsub!(/ä/um,'ä') # 'ä' # ä + str.gsub!(/å/um,'å') # 'å' # å + str.gsub!(/æ/um,'æ') # 'æ' # æ + str.gsub!(/ç/um,'ç') # 'ç' # ç + str.gsub!(/è/um,'è') # 'è' # è + str.gsub!(/é/um,'é') # '´' # é + str.gsub!(/ê/um,'ê') # 'ˆ' # ê + str.gsub!(/ë/um,'ë') # 'ë' # ë + str.gsub!(/ì/um,'ì') # 'ì' # ì + str.gsub!(/í/um,'í') # '´' # í + str.gsub!(/î/um,'î') # 'î' # î + str.gsub!(/ï/um,'ï') # 'ï' # ï + str.gsub!(/ð/um,'ð') # 'ð' # ð + str.gsub!(/ñ/um,'ñ') # 'ñ' # ñ + str.gsub!(/ò/um,'ò') # 'ò' # ò + str.gsub!(/ó/um,'ó') # 'ó' # ó + str.gsub!(/ô/um,'ô') # 'ô' # ô + str.gsub!(/õ/um,'õ') # 'õ' # õ + str.gsub!(/ö/um,'ö') # 'ö' # ö + str.gsub!(/ø/um,'ø') # 'ø' # ø + str.gsub!(/ù/um,'ú') # 'ù' # ú + str.gsub!(/ú/um,'û') # 'ú' # û + str.gsub!(/û/um,'ü') # 'û' # ü + str.gsub!(/ü/um,'ý') # 'ü' # ý + str.gsub!(/þ/um,'þ') # 'þ' # þ + str.gsub!(/ÿ/um,'ÿ') # 'ÿ' # ÿ + str.gsub!(/‘/um,'‘') # '‘' # ‘ + str.gsub!(/’/um,'’') # '’' # ’ + str.gsub!(/“/um,'“') # “ # “ + str.gsub!(/”/um,'”') # ” # ” + str.gsub!(/–/um,'–') # – # – + str.gsub!(/—/um,'—') # — # — + str.gsub!(/∝/um,'∝') # ∝ # ∝ + str.gsub!(/∞/um,'∞') # ∞ # ∞ + str.gsub!(/™/um,'™') # ™ # ™ + str.gsub!(/✠/um,'✠') # ✗ # ✠ + str.gsub!(/ /um,' ') # space identify + str.gsub!(/ /um,' ') # space identify + end + dob=if defined? dob.obj + dob.obj=str + dob + elsif dob.class==String; dob + end + dob + end + end + def html(dob='') + if @sys.locale =~/utf-?8/i # instead ucs for utf8 # String#encode Iñtërnâtiônàlizætiøn + dob.obj.gsub!(/ /u,' ') # space identify + dob.obj.gsub!(/ /u,' ') # space identify + else + dob.obj.gsub!(/¢/u,'¢') # ¢ + dob.obj.gsub!(/£/u,'£') # £ + dob.obj.gsub!(/¥/u,'¥') # ¥ + dob.obj.gsub!(/§/u,'§') # § + dob.obj.gsub!(/©/u,'©') # © + dob.obj.gsub!(/ª/u,'ª') # ª + dob.obj.gsub!(/«/u,'«') # « + dob.obj.gsub!(/®/u,'®') # ® + dob.obj.gsub!(/°/u,'°') # ° + dob.obj.gsub!(/±/u,'±') # ± + dob.obj.gsub!(/²/u,'²') # ² + dob.obj.gsub!(/³/u,'³') # ³ + dob.obj.gsub!(/µ/u,'µ') # µ + dob.obj.gsub!(/¶/u,'¶') # ¶ + dob.obj.gsub!(/¹/u,'¹') # ¹ + dob.obj.gsub!(/º/u,'º') # º + dob.obj.gsub!(/»/u,'»') # » + dob.obj.gsub!(/¼/u,'¼') # ¼ + dob.obj.gsub!(/½/u,'½') # ½ + dob.obj.gsub!(/¾/u,'¾') # ¾ + dob.obj.gsub!(/×/u,'×') # × + dob.obj.gsub!(/÷/u,'÷') # ÷ + dob.obj.gsub!(/¿/u,'¿') # ¿ + dob.obj.gsub!(/À/u,'À') # À + dob.obj.gsub!(/Á/u,'Á') # Á + dob.obj.gsub!(/Â/u,'Â') #  + dob.obj.gsub!(/Ã/u,'Ã') # à + dob.obj.gsub!(/Ä/u,'Ä') # Ä + dob.obj.gsub!(/Å/u,'Å') # Å + dob.obj.gsub!(/Æ/u,'Æ') # Æ + dob.obj.gsub!(/Ç/u,'Ç') # Ç + dob.obj.gsub!(/È/u,'È') # È + dob.obj.gsub!(/É/u,'É') # É + dob.obj.gsub!(/Ê/u,'Ê') # Ê + dob.obj.gsub!(/Ë/u,'Ë') # Ë + dob.obj.gsub!(/Ì/u,'Ì') # Ì + dob.obj.gsub!(/Í/u,'Í') # Í + dob.obj.gsub!(/Î/u,'Î') # Î + dob.obj.gsub!(/Ï/u,'Ï') # Ï + dob.obj.gsub!(/Ð/u,'Ð') # Ð + dob.obj.gsub!(/Ñ/u,'Ñ') # Ñ + dob.obj.gsub!(/Ò/u,'Ò') # Ò + dob.obj.gsub!(/Ó/u,'Ó') # Ó + dob.obj.gsub!(/Ô/u,'Ô') # Ô + dob.obj.gsub!(/Õ/u,'Õ') # Õ + dob.obj.gsub!(/Ö/u,'Ö') # Ö + dob.obj.gsub!(/Ø/u,'Ø') # Ø + dob.obj.gsub!(/Ù/u,'Ù') # Ù + dob.obj.gsub!(/Ú/u,'Ú') # Ú + dob.obj.gsub!(/Û/u,'Û') # Û + dob.obj.gsub!(/Ü/u,'Ü') # Ü + dob.obj.gsub!(/Ý/u,'Ý') # Ý + dob.obj.gsub!(/Þ/u,'Þ') # Þ + dob.obj.gsub!(/ß/u,'ß') # ß + dob.obj.gsub!(/à/u,'à') # à + dob.obj.gsub!(/á/u,'á') # á + dob.obj.gsub!(/â/u,'â') # â + dob.obj.gsub!(/ã/u,'ã') # ã + dob.obj.gsub!(/ä/u,'ä') # ä + dob.obj.gsub!(/å/u,'å') # å + dob.obj.gsub!(/æ/u,'æ') # æ + dob.obj.gsub!(/ç/u,'ç') # ç + dob.obj.gsub!(/è/u,'è') # è + dob.obj.gsub!(/é/u,'´') # é + dob.obj.gsub!(/ê/u,'ˆ') # ê + dob.obj.gsub!(/ë/u,'ë') # ë + dob.obj.gsub!(/ì/u,'ì') # ì + dob.obj.gsub!(/í/u,'´') # í + dob.obj.gsub!(/î/u,'î') # î + dob.obj.gsub!(/ï/u,'ï') # ï + dob.obj.gsub!(/ð/u,'ð') # ð + dob.obj.gsub!(/ñ/u,'ñ') # ñ + dob.obj.gsub!(/ò/u,'ò') # ò + dob.obj.gsub!(/ó/u,'ó') # ó + dob.obj.gsub!(/ô/u,'ô') # ô + dob.obj.gsub!(/õ/u,'õ') # õ + dob.obj.gsub!(/ö/u,'ö') # ö + dob.obj.gsub!(/ø/u,'ø') # ø + dob.obj.gsub!(/ù/u,'ù') # ú + dob.obj.gsub!(/ú/u,'ú') # û + dob.obj.gsub!(/û/u,'û') # ü + dob.obj.gsub!(/ü/u,'ü') # ý + dob.obj.gsub!(/þ/u,'þ') # þ + dob.obj.gsub!(/ÿ/u,'ÿ') # ÿ + dob.obj.gsub!(/‘/u,'&#lsquo;') # ‘ # ‘ + dob.obj.gsub!(/’/u,'&#rsquo;') # ’ # ’ + dob.obj.gsub!(/“/u,'“') # “ # “ + dob.obj.gsub!(/”/u,'”') # ” # ” + dob.obj.gsub!(/–/u,'–') # – # – + dob.obj.gsub!(/—/u,'—') # — # — + dob.obj.gsub!(/∝/u,'∝') # ∝ # ∝ + dob.obj.gsub!(/∞/u,'∞') # ∞ # ∞ + dob.obj.gsub!(/™/u,'™') # ™ # ™ + dob.obj.gsub!(/✠/u,'✠') # ✠ + #par.obja.gsub!(/✠/u '†') # † # † incorrect replacement † + dob.obj.gsub!(/ /u,' ') # space identify + dob.obj.gsub!(/ /u,' ') # space identify + end + end + self + end + def tidywords(wordlist) + wordlist.each do |x| + #imperfect solution will not catch all possible cases + x.gsub!(/&/,'&') unless x =~/&\S+;/ + x.gsub!(/&([A-Z])/,'&\1') + end + end + def markup(dob='') + wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 + dob.obj=tidywords(wordlist).join(' ').strip + unless dob.is=='table' + dob.obj.gsub!(/#{Mx[:br_line]}/u,'<br />') + dob.obj.gsub!(/#{Mx[:br_paragraph]}/u,'<br />') + dob.obj.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'<br />') + end + dob.obj.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'') + dob.obj.gsub!(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;') + dob.obj.gsub!(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;') + dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+)<\s+/,'\1< '); dob.obj.gsub!(/\s+>(\s+|$)/,' >\1') + #dob.obj.gsub!(/#{Mx[:fa_emphasis_o]}(.+?)#{Mx[:fa_emphasis_c]}/,'<em>\1</em>') #reinstate + dob.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/m,'<b>\1</b>') + dob.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/m,'<i>\1</i>') + dob.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'<u>\1</u>') + dob.obj.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'<sup>\1</sup>') + dob.obj.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'<sub>\1</sub>') + dob.obj.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'<ins>\1</ins>') + dob.obj.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'<cite>\1</cite>') + dob.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'<del>\1</del>') + dob.obj.gsub!(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'<tt>\1</tt>') + dob.obj.gsub!(/<:pb>\s*/,'') #Fix + dob.obj.gsub!(/<+[-~]#>+/,'') + if dob.is !~/^code/ + #embeds a red-bullet image --> + dob.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'<b>\1</b>') + dob.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'<i>\1</i>') + dob.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'<u>\1</u>') + dob.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'<del>\1</del>') + dob.obj.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'<br />') unless dob.is=='table' + dob.obj.gsub!(/#{Mx[:br_page]}\s*/,'') + dob.obj.gsub!(/#{Mx[:br_page_new]}\s*/,'') + dob.obj.gsub!(/#{Mx[:pa_non_object_no_heading]}|#{Mx[:pa_non_object_dummy_heading]}/,''); dob.obj.gsub!(/<[-~]#>/,'') + dob.obj.gsub!(/href="#{Xx[:segment]}/m,'href="') + dob.obj.gsub!(/#{Mx[:lnk_o]}([^#{Mx[:lnk_o]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{Mx[:rel_c]}]+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}(\.\.\/\S+?)#{Mx[:rel_c]}/, + '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\1</link>') + dob.obj.gsub!(/#{Mx[:lnk_o]}([^#{Mx[:lnk_o]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{Mx[:rel_c]}]+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}:(\S+?)#{Mx[:rel_c]}/, + '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="../\2">\1</link>') + dob.obj.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}(\S+?)#{Mx[:rel_c]}/, + '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="#\2">\1</link>') + dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))[ ]+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/, + %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@md.file.output_path.xml.rel_image}/\\1" width="\\2" height="\\3" />[\\1] \\4}) + dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))([ ]+[^}]+)?#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/, + %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@md.file.output_path.xml.rel_image}/\\1"/>\\1}) + dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))[ ]+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}image/, + %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@md.file.output_path.xml.rel_image}/\\1" width="\\2" height="\\3" />[\\1] \\4}) + dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))([ ]+[^}]+)?#{Mx[:lnk_c]}image/, + %{<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@md.file.output_path.xml.rel_image}/\\1"/>\\1}) + dob.obj.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/, + '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\1</link>') #watch, compare html_tune + dob.obj.gsub!(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/, + %{#{@brace_url.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\1">\\1</link>#{@brace_url.xml_close}}) + dob.obj.gsub!(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/, + '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>') #escaped urls not linked, deal with later + else + dob.obj.gsub!(/</m,'<'); dob.obj.gsub!(/>/m,'>') + end + if dob.of=='block' + dob.obj.gsub!(/#{Mx[:gl_bullet]}/,'● ') + end + dob.obj.gsub!(/#{Mx[:url_o]}([a-zA-Z0-9._-]+\@\S+?\.[a-zA-Z0-9._-]+)#{Mx[:url_c]}/, + %{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}}) + dob.obj.gsub!(/#{Dx[:url_o]}/,"#{Dx[:url_o_xml]}") + dob.obj.gsub!(/#{Dx[:url_c]}/,"#{Dx[:url_c_xml]}") + dob.obj.gsub!(/ |#{Mx[:nbsp]}/m,' ') + dob + end + def markup_light(dob='') + dob.obj.gsub!(/\/\{(.+?)\}\//,'<i>\1</i>') + dob.obj.gsub!(/[*!]\{(.+?)\}[*!]/,'<b>\1</b>') + dob.obj.gsub!(/_\{(.+?)\}_/,'<u>\1</u>') + dob.obj.gsub!(/-\{(.+?)\}-/,'<del>\1</del>') + dob.obj.gsub!(/<br(\s*\/)?>/,'<br />') + dob.obj.gsub!(/<:pb>\s*/,'') + dob.obj.gsub!(/<[-~]#>/,'') + dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort + dob.obj.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax + dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif))[ ]+.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/, + "<image.path>#{@md.file.output_path.xml.rel_image}\/\\1</image.path>") + dob.obj.gsub!(/ |#{Mx[:nbsp]}/,' ') + wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 + dob.obj=tidywords(wordlist).join(' ').strip + dob + end + def markup_fictionbook(dob='') + dob.obj.gsub!(/~\{([\d*+]+).+?\}~/,'[\1]') + dob.obj.gsub!(/\/\{(.+?)\}\//,'<i>\1</i>') + dob.obj.gsub!(/[*!]\{(.+?)\}[*!]/,'<b>\1</b>') + dob.obj.gsub!(/_\{(.+?)\}_/,'<u>\1</u>') + dob.obj.gsub!(/-\{(.+?)\}-/,'<del>\1</del>') + dob.obj.gsub!(/<br(?:\s*\/)?>/,'<br />') + dob.obj.gsub!(/<:pb>\s*/,'') + dob.obj.gsub!(/<[-~]#>/,'') + #temporary --> + dob.obj.gsub!(/<:\S+?>/,'') + #<-- temporary + dob.obj.gsub!(/<[-~]#>/,'') + dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort + dob.obj.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax + dob.obj.gsub!(/(#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif))[ ]+.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/, + "<image.path>#{@md.file.output_path.xml.rel_image}\/\\1</image.path>") + dob.obj.gsub!(/ |#{Mx[:nbsp]}/,' ') + wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 + dob.obj=tidywords(wordlist).join(' ').strip + dob + end + def markup_group(dob='') + dob.obj.gsub!(/</,'<'); dob.obj.gsub!(/>/,'>') + dob.obj.gsub!(/<:?br(?:\s+\/)?>/,'<br />') + dob.obj.gsub!(/<(link xmlns:xlink=".+?")>/,'<\1>') + dob.obj.gsub!(/<(\/link)>/,'<\1>') + dob.obj.gsub!(/<(\/?en)>/,'<\1>') + dob + end + def markup_block(dob='') + dob.obj.gsub!(/</,'<'); dob.obj.gsub!(/>/,'>') + dob.obj.gsub!(/<:?br(?:\s+\/)?>/,'<br />') + dob.obj.gsub!(/<(link xmlns:xlink=".+?")>/,'<\1>') + dob.obj.gsub!(/<(\/link)>/,'<\1>') + dob.obj.gsub!(/<(\/?en)>/,'<\1>') + dob + end + def xml_sem_block_paired(matched) # colon depth: many, recurs + matched.gsub!(/\b(au):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:au]} depth="many">\\2</sem:#{@ab[:au]}>}) + matched.gsub!(/\b(vol):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:vol]} depth="many">\\2</sem:#{@ab[:vol]}>}) + matched.gsub!(/\b(pub):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:pub]} depth="many">\\2</sem:#{@ab[:pub]}>}) + matched.gsub!(/\b(ref):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:ref]} depth="many">\\2</sem:#{@ab[:ref]}>}) + matched.gsub!(/\b(desc):\{(.+?)\}:\1\b/m,%{<sem:#{@ab[:desc]} depth="many">\\2</sem:#{@ab[:desc]}>}) + matched.gsub!(/\b(conv):\{(.+?)\}:\1\b/m,%{<sem:#{@ab[:conv]} depth="many">\\2</sem:#{@ab[:conv]}>}) + matched.gsub!(/\b(ct):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:ct]} depth="many">\\2</sem:#{@ab[:ct]}>}) + matched.gsub!(/\b(cty):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:cty]} depth="many">\\2</sem:#{@ab[:cty]}>}) + matched.gsub!(/\b(org):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:org]} depth="many">\\2</sem:#{@ab[:org]}>}) + matched.gsub!(/\b(dt):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:dt]} depth="many">\\2</sem:#{@ab[:dt]}>}) + matched.gsub!(/\b(n):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:n]} depth="many">\\2</sem:#{@ab[:n]}>}) + matched.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m,'<sem:\1 depth="many">\2</sem:\1>') + end + def xml_semantic_tags(dob) + if @md.sem_tag + dob.obj.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } + dob.obj.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } + dob.obj.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } + dob.obj.gsub!(/:\{(.+?)\}:au\b/m, %{<sem:#{@ab[:au]} depth="one">\\1</sem:#{@ab[:au]}>}) + dob.obj.gsub!(/:\{(.+?)\}:n\b/m, %{<sem:#{@ab[:n]} depth="one">\\1</sem:#{@ab[:n]}>}) + dob.obj.gsub!(/:\{(.+?)\}:ti\b/m, %{<sem:#{@ab[:ti]} depth="one">\\1</sem:#{@ab[:ti]}>}) + dob.obj.gsub!(/:\{(.+?)\}:ref\b/m, %{<sem:#{@ab[:ref]} depth="one">\\1</sem:#{@ab[:ref]}>}) + dob.obj.gsub!(/:\{(.+?)\}:desc\b/m, %{<sem:#{@ab[:desc]} depth="one">\\1</sem:#{@ab[:desc]}>}) + dob.obj.gsub!(/:\{(.+?)\}:cty\b/m, %{<sem:#{@ab[:cty]} depth="one">\\1</sem:#{@ab[:cty]}>}) + dob.obj.gsub!(/:\{(.+?)\}:org\b/m, %{<sem:#{@ab[:org]} depth="one">\\1</sem:#{@ab[:org]}>}) + dob.obj.gsub!(/:\{(.+?)\}:([a-z]+(?:[_:.][a-z]+)*)/m,'<sem:\2 depth="one">\1</sem:\2>') + dob.obj.gsub!(/;\{([^}]+(?![;]))\};ti\b/m, %{<sem:#{@ab[:ti]} depth="zero">\\1</sem:#{@ab[:ti]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};qt\b/m, %{<sem:#{@ab[:qt]} depth="zero">\\1</sem:#{@ab[:qt]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};ref\b/m, %{<sem:#{@ab[:ref]} depth="zero">\\1</sem:#{@ab[:ref]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};ed\b/m, %{<sem:#{@ab[:ed]} depth="zero">\\1</sem:#{@ab[:ed]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};v\b/m, %{<sem:#{@ab[:v]} depth="zero">\\1</sem:#{@ab[:v]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};desc\b/m, %{<sem:#{@ab[:desc]} depth="zero">\\1</sem:#{@ab[:desc]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};def\b/m, %{<sem:#{@ab[:def]} depth="zero">\\1</sem:#{@ab[:def]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};trans\b/m, %{<sem:#{@ab[:trans]} depth="zero">\\1</sem:#{@ab[:trans]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};y\b/m, %{<sem:#{@ab[:y]} depth="zero">\\1</sem:#{@ab[:y]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};ab\b/m, %{<sem:#{@ab[:ab]} depth="zero">\\1</sem:#{@ab[:ab]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};pg\b/m, %{<sem:#{@ab[:pg]} depth="zero">\\1</sem:#{@ab[:pg]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};fn?\b/m, %{<sem:#{@ab[:fn]} depth="zero">\\1</sem:#{@ab[:fn]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};mn?\b/m, %{<sem:#{@ab[:mn]} depth="zero">\\1</sem:#{@ab[:mn]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};ln?\b/m, %{<sem:#{@ab[:ln]} depth="zero">\\1</sem:#{@ab[:ln]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};in\b/m, %{<sem:#{@ab[:in]} depth="zero">\\1</sem:#{@ab[:in]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};uni\b/m, %{<sem:#{@ab[:uni]} depth="zero">\\1</sem:#{@ab[:uni]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};fac\b/m, %{<sem:#{@ab[:fac]} depth="zero">\\1</sem:#{@ab[:fac]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};inst\b/m, %{<sem:#{@ab[:inst]} depth="zero">\\1</sem:#{@ab[:inst]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};dept\b/m, %{<sem:#{@ab[:dpt]} depth="zero">\\1</sem:#{@ab[:dept]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};org\b/m, %{<sem:#{@ab[:org]} depth="zero">\\1</sem:#{@ab[:org]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};com?\b/m, %{<sem:#{@ab[:com]} depth="zero">\\1</sem:#{@ab[:com]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};cty\b/m, %{<sem:#{@ab[:cty]} depth="zero">\\1</sem:#{@ab[:cty]}>}) + dob.obj.gsub!(/;\{([^}]+(?![;]))\};([a-z]+(?:[_:.][a-z]+)*)/m,'<sem:\2 depth="zero">\1</sem:\2>') + end + dob + end + end +end +module SiSU_XML_Tags #Format + require_relative 'param' # param.rb + include SiSU_Param + include SiSU_Viz + class RDF + def initialize(md='',seg_name=[],tracker=0) + @full_title=@subtitle=@author=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@copyright=@owner=@keywords='' + @md=md + @rdfurl=%{ rdf:about="http://www.jus.uio.no/lm/toc"\n} + if defined? @md.title.full \ + and @md.title.full # DublinCore 1 - title + @rdf_title=%{ dc.title="#{seg_name}#{@md.title.full}"\n} + @full_title=%{ <meta name="dc.title" content="#{@md.title.full}" />\n} + end + if defined? @md.creator.author \ + and @md.creator.author=~/\S+/ # DublinCore 2 - creator/author (author) + @rdf_author=%{ dc.author="#{@md.creator.author}"\n} + content=meta_content_clean(@md.creator.author) + @author=%{ <meta name="dc.author" content="#{content}" />\n} + end + if defined? @md.classify.subject \ + and @md.classify.subject=~/\S+/ # DublinCore 3 - subject (us library of congress, eric or udc, or schema???) + @rdf_subject=%{ dc.subject="#{@md.classify.subject}"\n} + content=meta_content_clean(@md.classify.subject) + @subject=%{ <meta name="dc.subject" content="#{content}" />\n} + end + if defined? @md.notes.description \ + and @md.notes.description=~/\S+/ # DublinCore 4 - description + @rdf_description=%{ dc.description="#{@md.notes.description}"\n} + content=meta_content_clean(@md.notes.description) + @description=%{ <meta name="dc.description" content="#{content}" />\n} + end + if defined? @md.publisher \ + and @md.publisher # DublinCore 5 - publisher (current copy published by) + @rdf_publisher=%{ dc.publisher="#{@md.publisher}"\n} + content=meta_content_clean(@md.publisher) + @publisher=%{ <meta name="dc.publisher" content="#{content}" />\n} + end + if defined? @md.creator.contributor \ + and @md.creator.contributor=~/\S+/ # DublinCore 6 - contributor + @rdf_contributor=%{ dc.contributor="#{@md.creator.contributor}"\n} + content=meta_content_clean(@md.creator.contributor) + @contributor=%{ <meta name="dc.contributor" content="#{content}" />\n} + end + if defined? @md.date.published \ + and @md.date.published=~/\S+/ # DublinCore 7 - date year-mm-dd + @rdf_date=%{ dc.date="#{@md.date.published}"\n} + @date=%{ <meta name="dc.date" content="#{@md.date.published}" #{@md.date_scheme} />\n} # fix @md.date_scheme + end + if defined? @md.date.created \ + and @md.date.created=~/\S+/ # DublinCore 7 - date.created year-mm-dd + @rdf_date_created=%{ dc.date.created="#{@md.date.created}"\n} + @date_created=%{ <meta name="dc.date.created" content="#{@md.date.created}" #{@md.date_scheme} />\n} + end + if defined? @md.date.issued \ + and @md.date.issued=~/\S+/ # DublinCore 7 - date.issued year-mm-dd + @rdf_date_issued=%{ dc.date.issued="#{@md.date.issued}"\n} + @date_issued=%{ <meta name="dc.date.issued" content="#{@md.date.issued}" #{@md.date_scheme} />\n} + end + if defined? @md.date.available \ + and @md.date.available=~/\S+/ # DublinCore 7 - date.available year-mm-dd + @rdf_date_available=%{ dc.date.available="#{@md.date.available}"\n} + @date_available=%{ <meta name="dc.date.available" content="#{@md.date.available}" #{@md.date_scheme} />\n} + end + if defined? @md.date.valid \ + and @md.date.valid=~/\S+/ # DublinCore 7 - date.valid year-mm-dd + @rdf_date_valid=%{ dc.date.valid="#{@md.date.valid}"\n} + @date_valid=%{ <meta name="dc.date.valid" content="#{@md.date.valid}" #{@md.date_scheme} />\n} + end + if defined? @md.date.modified \ + and @md.date.modified=~/\S+/ # DublinCore 7 - date.modified year-mm-dd + @rdf_date_modified=%{ dc.date.modified="#{@md.date.modified}"\n} + @date_modified=%{ <meta name="dc.date.modified" content="#{@md.date.modified}" #{@md.date_scheme} />\n} + end + if defined? @md.type \ + and @md.type # DublinCore 8 - type (genre eg. report, convention etc) + @rdf_type=%{ dc.type="#{@md.type}"\n} + content=meta_content_clean(@md.type) + @type=%{ <meta name="dc.type" content="#{content}" />\n} + end + if defined? @md.classify.format \ + and @md.classify.format=~/\S+/ # DublinCore 9 - format (use your mime type) + @rdf_format=%{ dc.format="#{@md.classify.format}"\n} + content=meta_content_clean(@md.classify.format) + @format=%{ <meta name="dc.format" content="#{content}" />\n} + end + if defined? @md.classify.identifier \ + and @md.classify.identifier=~/\S+/ # DublinCore 10 - identifier (your identifier, could use urn which is free) + @rdf_identifier=%{ dc.identifier="#{@md.classify.identifier}"\n} + content=meta_content_clean(@md.classify.identifier) + @identifier=%{ <meta name="dc.identifier" content="#{content}" />\n} + end + if defined? @md.original.source \ + and @md.original.source=~/\S+/ # DublinCore 11 - source (document source) + @rdf_source=%{ dc.source="#{@md.original.source}"\n} + content=meta_content_clean(@md.original.source) + @source=%{ <meta name="dc.source" content="#{content}" />\n} + end + if defined? @md.title.language \ + and @md.title.language=~/\S+/ # DublinCore 12 - language (English) + @rdf_language=%{ dc.language="#{@md.title.language}"\n} + @language=%{ <meta name="dc.language" content="#{@md.title.language}" />\n} + end + if defined? @md.original.language \ + and @md.original.language=~/\S+/ + @rdf_language_original=%{ dc.language="#{@md.original.language}"\n} + @language_original=%{ <meta name="dc.language" content="#{@md.original.language}" />\n} + end + if defined? @md.classify.relation \ + and @md.classify.relation=~/\S+/ # DublinCore 13 - relation + @rdf_relation=%{ dc.relation="#{@md.classify.relation}"\n} + content=meta_content_clean(@md.classify.relation) + @relation=%{ <meta name="dc.relation" content="#{content}" />\n} + end + if defined? @md.classify.coverage \ + and @md.classify.coverage=~/\S+/ # DublinCore 14 - coverage + @rdf_coverage=%{ dc.coverage="#{@md.classify.coverage}"\n} + content=meta_content_clean(@md.classify.coverage) + @coverage=%{ <meta name="dc.coverage" content="#{content}" />\n} + end + if defined? @md.rights.all \ + and @md.rights.all # DublinCore 15 - rights + @rdf_rights=%{ dc.rights="#{@md.rights.all}"\n} + content=meta_content_clean(@md.rights.all) + @rights=%{ <meta name="dc.rights" content="#{content}" />\n} + end + content=meta_content_clean(@md.keywords) + @keywords=%{ <meta name="keywords" content="#{content}" />\n} if @md.keywords + @vz=SiSU_Env::GetInit.instance.skin + end + def meta_content_clean(content='') + content + unless content.nil? + content.tr!('"',"'") + content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content) + end + content + end + def rdfseg #segHead + rdftoc + end + def comment_xml(extra='') + generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})" if @md.sisu_version[:version] + lastdone="Last Generated on: #{Time.now}" + rubyv="Ruby version: #{@md.ruby_version}" + sc=if @md.sc_info + "Source file: #{@md.sc_filename} version: #{@md.sc_number} of: #{@md.sc_date}" + else '' + end + if extra.empty? +<<WOK +<!-- Document processing information: + * #{generator} + * #{rubyv} + * #{sc} + * #{lastdone} + * SiSU http://www.jus.uio.no/sisu +--> +WOK + else +<<WOK +<!-- Document processing information: + * #{extra} + * #{generator} + * #{rubyv} + * #{sc} + * #{lastdone} + * SiSU http://www.jus.uio.no/sisu +--> +WOK + end + end + def comment_xml_sax + desc='SiSU XML, SAX type representation' + comment_xml(desc) + end + def comment_xml_node + desc='SiSU XML, Node type representation' + comment_xml(desc) + end + def comment_xml_dom + desc='SiSU XML, DOM type representation' + comment_xml(desc) + end + def metatag_html #values strung together, because some empty, and resulting output (line breaks) is much better +<<WOK +#{@full_title}#{@subtitle}#{@author}#{@subject}#{@description}#{@publisher}#{@contributor}#{@date}#{@date_created}#{@date_issued}#{@date_available}#{@date_valid}#{@date_modified}#{@type}#{@format}#{@identifier}#{@source}#{@language}#{@relation}#{@coverage}#{@rights}#{@copyright}#{@owner} +#{@vz.txt_generator} +#{@vz.png_ico} +WOK + end + end +end +module SiSU_Tables + require_relative 'xml_tables' # xml_tables.rb +end +__END__ |