# encoding: utf-8
=begin
* Name: SiSU
* Description: a framework for document structuring, publishing and search
* Author: Ralph Amissah
* Copyright: (C) 1997 - 2011, Ralph Amissah, All Rights Reserved.
* License: GPL 3 or later:
SiSU, a framework for document structuring, publishing and search
Copyright (C) Ralph Amissah
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see .
If you have Internet connection, the latest version of the GPL should be
available at these locations:
* SiSU uses:
* Standard SiSU markup syntax,
* Standard SiSU meta-markup syntax, and the
* Standard SiSU object citation numbering and system
* Hompages:
* Download:
* Ralph Amissah
** Description: common file for xml generation
=end
module SiSU_XML_munge
class Trans
require_relative 'defaults' # defaults.rb
def initialize(md)
@md=md
@sys=SiSU_Env::System_call.new
@dir=SiSU_Env::Info_env.new(@md.fns)
@brace_url=SiSU_Viz::Skin.new.url_decoration
if @md.sem_tag
@ab ||=semantic_tags.default
end
end
def semantic_tags
def default
{
:pub => 'publication',
:conv => 'convention',
:vol => 'volume',
:pg => 'page',
:cty => 'city',
:org => 'organization',
:uni => 'university',
:dept => 'department',
:fac => 'faculty',
:inst => 'institute',
:co => 'company',
:com => 'company',
:conv => 'convention',
:dt => 'date',
:y => 'year',
:m => 'month',
:d => 'day',
:ti => 'title',
:au => 'author',
:ed => 'editor', #editor?
:v => 'version', #edition
:n => 'name',
:fn => 'firstname',
:mn => 'middlename',
:ln => 'lastname',
:in => 'initials',
:qt => 'quote',
:ct => 'cite',
:ref => 'reference',
:ab => 'abreviation',
:def => 'define',
:desc => 'description',
:trans => 'translate',
}
end
self
end
def char_enc #character encode
def utf8(dob='')
if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn
str=if defined? dob.obj; dob.obj
elsif dob.class==String; dob
end
if str
#¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûü
#¢£¥§©ª«®°±²³µ¶¹º»¼½¾×÷
str.gsub!(//um,'>') # '>' # >
str.gsub!(/¢/um,'¢') # '¢' # ¢
str.gsub!(/£/um,'£') # '£' # £
str.gsub!(/¥/um,'¥') # '¥' # ¥
str.gsub!(/§/um,'§') # '§' # §
str.gsub!(/©/um,'©') # '©' # ©
str.gsub!(/ª/um,'ª') # 'ª' # ª
str.gsub!(/«/um,'«') # '«' # «
str.gsub!(/®/um,'®') # '®' # ®
str.gsub!(/°/um,'°') # '°' # °
str.gsub!(/±/um,'±') # '±' # ±
str.gsub!(/²/um,'²') # '²' # ²
str.gsub!(/³/um,'³') # '³' # ³
str.gsub!(/µ/um,'µ') # 'µ' # µ
str.gsub!(/¶/um,'¶') # '¶' # ¶
str.gsub!(/¹/um,'¹') # '¹' # ¹
str.gsub!(/º/um,'º') # 'º' # º
str.gsub!(/»/um,'»') # '»' # »
str.gsub!(/¼/um,'¼') # '¼' # ¼
str.gsub!(/½/um,'½') # '½' # ½
str.gsub!(/¾/um,'¾') # '¾' # ¾
str.gsub!(/×/um,'×') # '×' # ×
str.gsub!(/÷/um,'÷') # '÷' # ÷
str.gsub!(/¿/um,'¿') # '¿' # ¿
str.gsub!(/À/um,'À') # 'À' # À
str.gsub!(/Á/um,'Á') # 'Á' # Á
str.gsub!(/Â/um,'Â') # 'Â' # Â
str.gsub!(/Ã/um,'Ã') # 'Ã' # Ã
str.gsub!(/Ä/um,'Ä') # 'Ä' # Ä
str.gsub!(/Å/um,'Å') # 'Å' # Å
str.gsub!(/Æ/um,'Æ') # 'Æ' # Æ
str.gsub!(/Ç/um,'Ç') # 'Ç' # Ç
str.gsub!(/È/um,'È') # 'È' # È
str.gsub!(/É/um,'É') # 'É' # É
str.gsub!(/Ê/um,'Ê') # 'Ê' # Ê
str.gsub!(/Ë/um,'Ë') # 'Ë' # Ë
str.gsub!(/Ì/um,'Ì') # 'Ì' # Ì
str.gsub!(/Í/um,'Í') # 'Í' # Í
str.gsub!(/Î/um,'Î') # 'Î' # Î
str.gsub!(/Ï/um,'Ï') # 'Ï' # Ï
str.gsub!(/Ð/um,'Ð') # 'Ð' # Ð
str.gsub!(/Ñ/um,'Ñ') # 'Ñ' # Ñ
str.gsub!(/Ò/um,'Ò') # 'Ò' # Ò
str.gsub!(/Ó/um,'Ó') # 'Ó' # Ó
str.gsub!(/Ô/um,'Ô') # 'Ô' # Ô
str.gsub!(/Õ/um,'Õ') # 'Õ' # Õ
str.gsub!(/Ö/um,'Ö') # 'Ö' # Ö
str.gsub!(/Ø/um,'Ø') # 'Ø' # Ø
str.gsub!(/Ù/um,'Ù') # 'Ù' # Ù
str.gsub!(/Ú/um,'Ú') # 'Ú' # Ú
str.gsub!(/Û/um,'Û') # 'Û' # Û
str.gsub!(/Ü/um,'Ü') # 'Ü' # Ü
str.gsub!(/Ý/um,'Ý') # 'Ý' # Ý
str.gsub!(/Þ/um,'Þ') # 'Þ' # Þ
str.gsub!(/ß/um,'ß') # 'ß' # ß
str.gsub!(/à/um,'à') # 'à' # à
str.gsub!(/á/um,'á') # 'á' # á
str.gsub!(/â/um,'â') # 'â' # â
str.gsub!(/ã/um,'ã') # 'ã' # ã
str.gsub!(/ä/um,'ä') # 'ä' # ä
str.gsub!(/å/um,'å') # 'å' # å
str.gsub!(/æ/um,'æ') # 'æ' # æ
str.gsub!(/ç/um,'ç') # 'ç' # ç
str.gsub!(/è/um,'è') # 'è' # è
str.gsub!(/é/um,'é') # '´' # é
str.gsub!(/ê/um,'ê') # 'ˆ' # ê
str.gsub!(/ë/um,'ë') # 'ë' # ë
str.gsub!(/ì/um,'ì') # 'ì' # ì
str.gsub!(/í/um,'í') # '´' # í
str.gsub!(/î/um,'î') # 'î' # î
str.gsub!(/ï/um,'ï') # 'ï' # ï
str.gsub!(/ð/um,'ð') # 'ð' # ð
str.gsub!(/ñ/um,'ñ') # 'ñ' # ñ
str.gsub!(/ò/um,'ò') # 'ò' # ò
str.gsub!(/ó/um,'ó') # 'ó' # ó
str.gsub!(/ô/um,'ô') # 'ô' # ô
str.gsub!(/õ/um,'õ') # 'õ' # õ
str.gsub!(/ö/um,'ö') # 'ö' # ö
str.gsub!(/ø/um,'ø') # 'ø' # ø
str.gsub!(/ù/um,'ú') # 'ù' # ú
str.gsub!(/ú/um,'û') # 'ú' # û
str.gsub!(/û/um,'ü') # 'û' # ü
str.gsub!(/ü/um,'ý') # 'ü' # ý
str.gsub!(/þ/um,'þ') # 'þ' # þ
str.gsub!(/ÿ/um,'ÿ') # 'ÿ' # ÿ
str.gsub!(/‘/um,'‘') # '‘' # ‘
str.gsub!(/’/um,'’') # '’' # ’
str.gsub!(/“/um,'“') # “ # “
str.gsub!(/”/um,'”') # ” # ”
str.gsub!(/–/um,'–') # – # –
str.gsub!(/—/um,'—') # — # —
str.gsub!(/∝/um,'∝') # ∝ # ∝
str.gsub!(/∞/um,'∞') # ∞ # ∞
str.gsub!(/™/um,'™') # ™ # ™
str.gsub!(/✠/um,'✠') # ✗ # ✠
str.gsub!(/ /um,' ') # space identify
str.gsub!(/ /um,' ') # space identify
end
dob=if defined? dob.obj
dob.obj=str
dob
elsif dob.class==String; dob
end
dob
end
end
def html(dob='')
if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn
dob.obj.gsub!(/ /u,' ') # space identify
dob.obj.gsub!(/ /u,' ') # space identify
else
dob.obj.gsub!(/¢/u,'¢') # ¢
dob.obj.gsub!(/£/u,'£') # £
dob.obj.gsub!(/¥/u,'¥') # ¥
dob.obj.gsub!(/§/u,'§') # §
dob.obj.gsub!(/©/u,'©') # ©
dob.obj.gsub!(/ª/u,'ª') # ª
dob.obj.gsub!(/«/u,'«') # «
dob.obj.gsub!(/®/u,'®') # ®
dob.obj.gsub!(/°/u,'°') # °
dob.obj.gsub!(/±/u,'±') # ±
dob.obj.gsub!(/²/u,'²') # ²
dob.obj.gsub!(/³/u,'³') # ³
dob.obj.gsub!(/µ/u,'µ') # µ
dob.obj.gsub!(/¶/u,'¶') # ¶
dob.obj.gsub!(/¹/u,'¹') # ¹
dob.obj.gsub!(/º/u,'º') # º
dob.obj.gsub!(/»/u,'»') # »
dob.obj.gsub!(/¼/u,'¼') # ¼
dob.obj.gsub!(/½/u,'½') # ½
dob.obj.gsub!(/¾/u,'¾') # ¾
dob.obj.gsub!(/×/u,'×') # ×
dob.obj.gsub!(/÷/u,'÷') # ÷
dob.obj.gsub!(/¿/u,'¿') # ¿
dob.obj.gsub!(/À/u,'À') # À
dob.obj.gsub!(/Á/u,'Á') # Á
dob.obj.gsub!(/Â/u,'Â') # Â
dob.obj.gsub!(/Ã/u,'Ã') # Ã
dob.obj.gsub!(/Ä/u,'Ä') # Ä
dob.obj.gsub!(/Å/u,'Å') # Å
dob.obj.gsub!(/Æ/u,'Æ') # Æ
dob.obj.gsub!(/Ç/u,'Ç') # Ç
dob.obj.gsub!(/È/u,'È') # È
dob.obj.gsub!(/É/u,'É') # É
dob.obj.gsub!(/Ê/u,'Ê') # Ê
dob.obj.gsub!(/Ë/u,'Ë') # Ë
dob.obj.gsub!(/Ì/u,'Ì') # Ì
dob.obj.gsub!(/Í/u,'Í') # Í
dob.obj.gsub!(/Î/u,'Î') # Î
dob.obj.gsub!(/Ï/u,'Ï') # Ï
dob.obj.gsub!(/Ð/u,'Ð') # Ð
dob.obj.gsub!(/Ñ/u,'Ñ') # Ñ
dob.obj.gsub!(/Ò/u,'Ò') # Ò
dob.obj.gsub!(/Ó/u,'Ó') # Ó
dob.obj.gsub!(/Ô/u,'Ô') # Ô
dob.obj.gsub!(/Õ/u,'Õ') # Õ
dob.obj.gsub!(/Ö/u,'Ö') # Ö
dob.obj.gsub!(/Ø/u,'Ø') # Ø
dob.obj.gsub!(/Ù/u,'Ù') # Ù
dob.obj.gsub!(/Ú/u,'Ú') # Ú
dob.obj.gsub!(/Û/u,'Û') # Û
dob.obj.gsub!(/Ü/u,'Ü') # Ü
dob.obj.gsub!(/Ý/u,'Ý') # Ý
dob.obj.gsub!(/Þ/u,'Þ') # Þ
dob.obj.gsub!(/ß/u,'ß') # ß
dob.obj.gsub!(/à/u,'à') # à
dob.obj.gsub!(/á/u,'á') # á
dob.obj.gsub!(/â/u,'â') # â
dob.obj.gsub!(/ã/u,'ã') # ã
dob.obj.gsub!(/ä/u,'ä') # ä
dob.obj.gsub!(/å/u,'å') # å
dob.obj.gsub!(/æ/u,'æ') # æ
dob.obj.gsub!(/ç/u,'ç') # ç
dob.obj.gsub!(/è/u,'è') # è
dob.obj.gsub!(/é/u,'´') # é
dob.obj.gsub!(/ê/u,'ˆ') # ê
dob.obj.gsub!(/ë/u,'ë') # ë
dob.obj.gsub!(/ì/u,'ì') # ì
dob.obj.gsub!(/í/u,'´') # í
dob.obj.gsub!(/î/u,'î') # î
dob.obj.gsub!(/ï/u,'ï') # ï
dob.obj.gsub!(/ð/u,'ð') # ð
dob.obj.gsub!(/ñ/u,'ñ') # ñ
dob.obj.gsub!(/ò/u,'ò') # ò
dob.obj.gsub!(/ó/u,'ó') # ó
dob.obj.gsub!(/ô/u,'ô') # ô
dob.obj.gsub!(/õ/u,'õ') # õ
dob.obj.gsub!(/ö/u,'ö') # ö
dob.obj.gsub!(/ø/u,'ø') # ø
dob.obj.gsub!(/ù/u,'ù') # ú
dob.obj.gsub!(/ú/u,'ú') # û
dob.obj.gsub!(/û/u,'û') # ü
dob.obj.gsub!(/ü/u,'ü') # ý
dob.obj.gsub!(/þ/u,'þ') # þ
dob.obj.gsub!(/ÿ/u,'ÿ') # ÿ
dob.obj.gsub!(/‘/u,'lsquo;') # ‘ # ‘
dob.obj.gsub!(/’/u,'rsquo;') # ’ # ’
dob.obj.gsub!(/“/u,'“') # “ # “
dob.obj.gsub!(/”/u,'”') # ” # ”
dob.obj.gsub!(/–/u,'–') # – # –
dob.obj.gsub!(/—/u,'—') # — # —
dob.obj.gsub!(/∝/u,'∝') # ∝ # ∝
dob.obj.gsub!(/∞/u,'∞') # ∞ # ∞
dob.obj.gsub!(/™/u,'™') # ™ # ™
dob.obj.gsub!(/✠/u,'✠') # ✠
#par.obja.gsub!(/✠/u '†') # † # † incorrect replacement †
dob.obj.gsub!(/ /u,' ') # space identify
dob.obj.gsub!(/ /u,' ') # space identify
end
end
self
end
def tidywords(wordlist)
wordlist.each do |x|
#imperfect solution will not catch all possible cases
x.gsub!(/&/,'&') unless x =~/&\S+;/
x.gsub!(/&([A-Z])/,'&\1')
end
end
def markup(dob='')
wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
dob.obj=tidywords(wordlist).join(' ').strip
unless dob.is=='table'
dob.obj.gsub!(/#{Mx[:br_line]}/u,'
')
dob.obj.gsub!(/#{Mx[:br_paragraph]}/u,'
')
dob.obj.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
')
end
dob.obj.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'')
dob.obj.gsub!(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;')
dob.obj.gsub!(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;')
dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+)<\s+/,'\1< '); dob.obj.gsub!(/\s+>(\s+|$)/,' >\1')
#dob.obj.gsub!(/#{Mx[:fa_emphasis_o]}(.+?)#{Mx[:fa_emphasis_c]}/,'\1') #reinstate
dob.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/m,'\1')
dob.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/m,'\1')
dob.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1')
dob.obj.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1')
dob.obj.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1')
dob.obj.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1')
dob.obj.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1')
dob.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1')
dob.obj.gsub!(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1')
dob.obj.gsub!(/<:pb>\s*/,'') #Fix
dob.obj.gsub!(/<+[-~]#>+/,'')
if dob.is !~/^code/
#embeds a red-bullet image -->
dob.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1')
dob.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1')
dob.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1')
dob.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1')
dob.obj.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
') unless dob.is=='table'
dob.obj.gsub!(/#{Mx[:br_page]}\s*/,'')
dob.obj.gsub!(/#{Mx[:br_page_new]}\s*/,'')
dob.obj.gsub!(/#{Mx[:pa_non_object_no_heading]}|#{Mx[:pa_non_object_dummy_heading]}/,''); dob.obj.gsub!(/<[-~]#>/,'')
dob.obj.gsub!(/href="#{Xx[:segment]}/m,'href="')
dob.obj.gsub!(/#{Mx[:lnk_o]}([^#{Mx[:lnk_o]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{Mx[:rel_c]}]+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}(\.\.\/\S+?)#{Mx[:rel_c]}/,
'\1')
dob.obj.gsub!(/#{Mx[:lnk_o]}([^#{Mx[:lnk_o]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{Mx[:rel_c]}]+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}:(\S+?)#{Mx[:rel_c]}/,
'\1')
dob.obj.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}(\S+?)#{Mx[:rel_c]}/,
'\1')
dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))[ ]+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,
%{[\\1] \\4})
dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))([ ]+[^}]+)?#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,
%{\\1})
dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))[ ]+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}image/,
%{[\\1] \\4})
dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))([ ]+[^}]+)?#{Mx[:lnk_c]}image/,
%{\\1})
dob.obj.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,
'\1') #watch, compare html_tune
dob.obj.gsub!(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,
%{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}})
dob.obj.gsub!(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,
'\1') #escaped urls not linked, deal with later
else
dob.obj.gsub!(/(^|[^}])_/m,'\1>') #code-block: angle brackets special characters
dob.obj.gsub!(/(^|[^}])_/m,'\1>')
end
if dob.of=='block'
dob.obj.gsub!(/#{Mx[:gl_bullet]}/,'● ')
end
dob.obj.gsub!(/#{Dx[:url_o]}/,"#{Dx[:url_o_xml]}")
dob.obj.gsub!(/#{Dx[:url_c]}/,"#{Dx[:url_c_xml]}")
dob.obj.gsub!(/ |#{Mx[:nbsp]}/m,' ')
dob
end
def markup_light(dob='')
dob.obj.gsub!(/\/\{(.+?)\}\//,'\1')
dob.obj.gsub!(/[*!]\{(.+?)\}[*!]/,'\1')
dob.obj.gsub!(/_\{(.+?)\}_/,'\1')
dob.obj.gsub!(/-\{(.+?)\}-/,'\1')
dob.obj.gsub!(/
/,'
')
dob.obj.gsub!(/<:pb>\s*/,'')
dob.obj.gsub!(/<[-~]#>/,'')
dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort
dob.obj.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax
dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif))[ ]+.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/,
"#{@dir.url.images_local}\/\\1")
dob.obj.gsub!(/ |#{Mx[:nbsp]}/,' ')
wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
dob.obj=tidywords(wordlist).join(' ').strip
dob
end
def markup_fictionbook(dob='')
dob.obj.gsub!(/~\{([\d*+]+).+?\}~/,'[\1]')
dob.obj.gsub!(/\/\{(.+?)\}\//,'\1')
dob.obj.gsub!(/[*!]\{(.+?)\}[*!]/,'\1')
dob.obj.gsub!(/_\{(.+?)\}_/,'\1')
dob.obj.gsub!(/-\{(.+?)\}-/,'\1')
dob.obj.gsub!(/
/,'
')
dob.obj.gsub!(/<:pb>\s*/,'')
dob.obj.gsub!(/<[-~]#>/,'')
#temporary -->
dob.obj.gsub!(/<:\S+?>/,'')
#<-- temporary
dob.obj.gsub!(/<[-~]#>/,'')
dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort
dob.obj.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax
dob.obj.gsub!(/(#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif))[ ]+.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/,
"#{@dir.url.images_local}\/\\1")
dob.obj.gsub!(/ |#{Mx[:nbsp]}/,' ')
wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
dob.obj=tidywords(wordlist).join(' ').strip
dob
end
def markup_group(dob='')
dob.obj.gsub!(/,'<'); dob.obj.gsub!(/>/,'>')
dob.obj.gsub!(/<:?br(?:\s+\/)?>/,'
')
dob.obj.gsub!(/<(link xmlns:xlink=".+?")>/,'<\1>')
dob.obj.gsub!(/<(\/link)>/,'<\1>')
dob.obj.gsub!(/<(\/?en)>/,'<\1>')
dob
end
def markup_block(dob='')
dob.obj.gsub!(/,'<'); dob.obj.gsub!(/>/,'>')
dob.obj.gsub!(/<:?br(?:\s+\/)?>/,'
')
dob.obj.gsub!(/<(link xmlns:xlink=".+?")>/,'<\1>')
dob.obj.gsub!(/<(\/link)>/,'<\1>')
dob.obj.gsub!(/<(\/?en)>/,'<\1>')
dob
end
def xml_sem_block_paired(matched) # colon depth: many, recurs
matched.gsub!(/\b(au):\{(.+?)\}:\1\b/m, %{\\2})
matched.gsub!(/\b(vol):\{(.+?)\}:\1\b/m, %{\\2})
matched.gsub!(/\b(pub):\{(.+?)\}:\1\b/m, %{\\2})
matched.gsub!(/\b(ref):\{(.+?)\}:\1\b/m, %{\\2})
matched.gsub!(/\b(desc):\{(.+?)\}:\1\b/m,%{\\2})
matched.gsub!(/\b(conv):\{(.+?)\}:\1\b/m,%{\\2})
matched.gsub!(/\b(ct):\{(.+?)\}:\1\b/m, %{\\2})
matched.gsub!(/\b(cty):\{(.+?)\}:\1\b/m, %{\\2})
matched.gsub!(/\b(org):\{(.+?)\}:\1\b/m, %{\\2})
matched.gsub!(/\b(dt):\{(.+?)\}:\1\b/m, %{\\2})
matched.gsub!(/\b(n):\{(.+?)\}:\1\b/m, %{\\2})
matched.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m,'\2')
end
def xml_semantic_tags(dob)
if @md.sem_tag
dob.obj.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) }
dob.obj.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) }
dob.obj.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) }
dob.obj.gsub!(/:\{(.+?)\}:au\b/m, %{\\1})
dob.obj.gsub!(/:\{(.+?)\}:n\b/m, %{\\1})
dob.obj.gsub!(/:\{(.+?)\}:ti\b/m, %{\\1})
dob.obj.gsub!(/:\{(.+?)\}:ref\b/m, %{\\1})
dob.obj.gsub!(/:\{(.+?)\}:desc\b/m, %{\\1})
dob.obj.gsub!(/:\{(.+?)\}:cty\b/m, %{\\1})
dob.obj.gsub!(/:\{(.+?)\}:org\b/m, %{\\1})
dob.obj.gsub!(/:\{(.+?)\}:([a-z]+(?:[_:.][a-z]+)*)/m,'\1')
dob.obj.gsub!(/;\{([^}]+(?![;]))\};ti\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};qt\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};ref\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};ed\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};v\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};desc\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};def\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};trans\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};y\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};ab\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};pg\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};fn?\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};mn?\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};ln?\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};in\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};uni\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};fac\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};inst\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};dept\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};org\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};com?\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};cty\b/m, %{\\1})
dob.obj.gsub!(/;\{([^}]+(?![;]))\};([a-z]+(?:[_:.][a-z]+)*)/m,'\1')
end
dob
end
end
end
module SiSU_XML_tags #Format
require_relative 'param' # param.rb
include SiSU_Param
include SiSU_Viz
class RDF
def initialize(md='',seg_name=[],tracker=0)
@full_title=@subtitle=@author=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@copyright=@owner=@keywords=''
@md=md
@rdfurl=%{ rdf:about="http://www.jus.uio.no/lm/toc"\n}
if defined? @md.title.full \
and @md.title.full # DublinCore 1 - title
@rdf_title=%{ dc.title="#{seg_name}#{@md.title.full}"\n}
@full_title=%{ \n}
end
if defined? @md.creator.author \
and @md.creator.author=~/\S+/ # DublinCore 2 - creator/author (author)
@rdf_author=%{ dc.author="#{@md.creator.author}"\n}
content=meta_content_clean(@md.creator.author)
@author=%{ \n}
end
if defined? @md.classify.subject \
and @md.classify.subject=~/\S+/ # DublinCore 3 - subject (us library of congress, eric or udc, or schema???)
@rdf_subject=%{ dc.subject="#{@md.classify.subject}"\n}
content=meta_content_clean(@md.classify.subject)
@subject=%{ \n}
end
if defined? @md.notes.description \
and @md.notes.description=~/\S+/ # DublinCore 4 - description
@rdf_description=%{ dc.description="#{@md.notes.description}"\n}
content=meta_content_clean(@md.notes.description)
@description=%{ \n}
end
if defined? @md.publisher \
and @md.publisher # DublinCore 5 - publisher (current copy published by)
@rdf_publisher=%{ dc.publisher="#{@md.publisher}"\n}
content=meta_content_clean(@md.publisher)
@publisher=%{ \n}
end
if defined? @md.creator.contributor \
and @md.creator.contributor=~/\S+/ # DublinCore 6 - contributor
@rdf_contributor=%{ dc.contributor="#{@md.creator.contributor}"\n}
content=meta_content_clean(@md.creator.contributor)
@contributor=%{ \n}
end
if defined? @md.date.published \
and @md.date.published=~/\S+/ # DublinCore 7 - date year-mm-dd
@rdf_date=%{ dc.date="#{@md.date.published}"\n}
@date=%{ \n} # fix @md.date_scheme
end
if defined? @md.date.created \
and @md.date.created=~/\S+/ # DublinCore 7 - date.created year-mm-dd
@rdf_date_created=%{ dc.date.created="#{@md.date.created}"\n}
@date_created=%{ \n}
end
if defined? @md.date.issued \
and @md.date.issued=~/\S+/ # DublinCore 7 - date.issued year-mm-dd
@rdf_date_issued=%{ dc.date.issued="#{@md.date.issued}"\n}
@date_issued=%{ \n}
end
if defined? @md.date.available \
and @md.date.available=~/\S+/ # DublinCore 7 - date.available year-mm-dd
@rdf_date_available=%{ dc.date.available="#{@md.date.available}"\n}
@date_available=%{ \n}
end
if defined? @md.date.valid \
and @md.date.valid=~/\S+/ # DublinCore 7 - date.valid year-mm-dd
@rdf_date_valid=%{ dc.date.valid="#{@md.date.valid}"\n}
@date_valid=%{ \n}
end
if defined? @md.date.modified \
and @md.date.modified=~/\S+/ # DublinCore 7 - date.modified year-mm-dd
@rdf_date_modified=%{ dc.date.modified="#{@md.date.modified}"\n}
@date_modified=%{ \n}
end
if defined? @md.type \
and @md.type # DublinCore 8 - type (genre eg. report, convention etc)
@rdf_type=%{ dc.type="#{@md.type}"\n}
content=meta_content_clean(@md.type)
@type=%{ \n}
end
if defined? @md.classify.format \
and @md.classify.format=~/\S+/ # DublinCore 9 - format (use your mime type)
@rdf_format=%{ dc.format="#{@md.classify.format}"\n}
content=meta_content_clean(@md.classify.format)
@format=%{ \n}
end
if defined? @md.classify.identifier \
and @md.classify.identifier=~/\S+/ # DublinCore 10 - identifier (your identifier, could use urn which is free)
@rdf_identifier=%{ dc.identifier="#{@md.classify.identifier}"\n}
content=meta_content_clean(@md.classify.identifier)
@identifier=%{ \n}
end
if defined? @md.original.source \
and @md.original.source=~/\S+/ # DublinCore 11 - source (document source)
@rdf_source=%{ dc.source="#{@md.original.source}"\n}
content=meta_content_clean(@md.original.source)
@source=%{ \n}
end
if defined? @md.title.language \
and @md.title.language=~/\S+/ # DublinCore 12 - language (English)
@rdf_language=%{ dc.language="#{@md.title.language}"\n}
@language=%{ \n}
end
if defined? @md.original.language \
and @md.original.language=~/\S+/
@rdf_language_original=%{ dc.language="#{@md.original.language}"\n}
@language_original=%{ \n}
end
if defined? @md.classify.relation \
and @md.classify.relation=~/\S+/ # DublinCore 13 - relation
@rdf_relation=%{ dc.relation="#{@md.classify.relation}"\n}
content=meta_content_clean(@md.classify.relation)
@relation=%{ \n}
end
if defined? @md.classify.coverage \
and @md.classify.coverage=~/\S+/ # DublinCore 14 - coverage
@rdf_coverage=%{ dc.coverage="#{@md.classify.coverage}"\n}
content=meta_content_clean(@md.classify.coverage)
@coverage=%{ \n}
end
if defined? @md.rights.all \
and @md.rights.all # DublinCore 15 - rights
@rdf_rights=%{ dc.rights="#{@md.rights.all}"\n}
content=meta_content_clean(@md.rights.all)
@rights=%{ \n}
end
content=meta_content_clean(@md.keywords)
@keywords=%{ \n} if @md.keywords
@vz=SiSU_Env::Get_init.instance.skin
end
def meta_content_clean(content='')
content
unless content.nil?
content.tr!('"',"'")
content=SiSU_XML_munge::Trans.new(@md).char_enc.utf8(content)
end
content
end
def rdfseg #segHead
rdftoc
end
def comment_xml(extra='')
generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})" if @md.sisu_version[:version]
lastdone="Last Generated on: #{Time.now}"
rubyv="Ruby version: #{@md.ruby_version}"
sc=if @md.sc_info
"Source file: #{@md.sc_filename} version: #{@md.sc_number} of: #{@md.sc_date}"
else ''
end
if extra.empty?
<
WOK
else
<
WOK
end
end
def comment_xml_sax
desc='SiSU XML, SAX type representation'
comment_xml(desc)
end
def comment_xml_node
desc='SiSU XML, Node type representation'
comment_xml(desc)
end
def comment_xml_dom
desc='SiSU XML, DOM type representation'
comment_xml(desc)
end
def metatag_html #values strung together, because some empty, and resulting output (line breaks) is much better
<