# coding: utf-8
=begin
* Name: SiSU
* Description: a framework for document structuring, publishing and search
* Author: Ralph Amissah
* Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007, 2008, 2009 Ralph Amissah All Rights Reserved.
* License: GPL 3 or later:
SiSU, a framework for document structuring, publishing and search
Copyright (C) Ralph Amissah
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see .
If you have Internet connection, the latest version of the GPL should be
available at these locations:
* SiSU uses:
* Standard SiSU markup syntax,
* Standard SiSU meta-markup syntax, and the
* Standard SiSU object citation numbering and system
* Hompages:
* Download:
* Ralph Amissah
** Description: common file for xml generation
=end
module SiSU_text_parts
require "#{SiSU_lib}/shared_structure"
class Split_text_object < SiSU_Structure::Split_text_object
require "#{SiSU_lib}/param"
require "#{SiSU_lib}/xml_format"
include SiSU_Viz
include SiSU_XML_format
@@alt_id_count=0
@@dp=nil
def lev_segname_para
if @para =~/^#{Mx[:lv_o]}(?:\d:|<:.+?>).+/
if /^#{Mx[:lv_o]}([1-6]):(\S+?)#{Mx[:lv_c]}\s*(\S.+)/m.match(@para)
@format,segname,@text=$1,$2,$3
elsif /^#{Mx[:lv_o]}([1-6]):#{Mx[:lv_c]}\s*(\S.+)/m.match(@para)
@format,@text=$1,$2
elsif /<:(.+?)>\s*(\S.+?)/m.match(@para)
@format,@text=$1,$2
elsif /^#{Mx[:lv_o]}([1-6]):(\S+?)#{Mx[:lv_c]}\s*(\S.+?)/m.match(@para)
@@alt_id_count+=1
@format,segname,@text=$1,$2,$3
#@format="#@format:#{segname}" #
elsif /^#{Mx[:lv_o]}([1-6]):#{Mx[:lv_c]}\s*(\S.+?)/m.match(@para)
@@alt_id_count+=1
@format,@text=$1,$2
end
else
if /(.+?)/m.match(@para)
@text=$1
end
if @para !~/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$|^$/ #added 2002w06
@text=/(.+?)/m.match(@para)[1]
end
if /^(\d)~\S*\s+(.+)/m.match(@para)
@format,@text=$1,$2
end
end
@format="#@format:#{segname}" #
#follow this search beneath for heading_body1-6
@lev_para_ocn=if @para =~/.+#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
SiSU_XML_format::Format_scroll.new(@md,@format,@text,@ocn)
else
SiSU_XML_format::Format_scroll.new(@md,@format,@text,"#{Mx[:id_o]}~0;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}")
end
self
end
end
end
module SiSU_XML_munge
class Trans
require "#{SiSU_lib}/defaults"
def initialize(md)
@md=md
@sys=SiSU_Env::System_call.new
@dir=SiSU_Env::Info_env.new(@md.fns)
@dp=SiSU_Env::Info_env.new.digest.pattern
@url_brace=SiSU_Viz::Skin.new.url_decoration
if @md.sem_tag
#@ab ||=SiSU_Viz::Skin.new.semantic_tags.default
@ab ||=semantic_tags.default
end
end
def semantic_tags
def default
{
:pub => 'publication',
:conv => 'convention',
:vol => 'volume',
:pg => 'page',
:cty => 'city',
:org => 'organization',
:uni => 'university',
:dept => 'department',
:fac => 'faculty',
:inst => 'institute',
:co => 'company',
:com => 'company',
:conv => 'convention',
:dt => 'date',
:y => 'year',
:m => 'month',
:d => 'day',
:ti => 'title',
:au => 'author',
:ed => 'editor', #editor?
:v => 'version', #edition
:n => 'name',
:fn => 'firstname',
:mn => 'middlename',
:ln => 'lastname',
:in => 'initials',
:qt => 'quote',
:ct => 'cite',
:ref => 'reference',
:ab => 'abreviation',
:def => 'define',
:desc => 'description',
:trans => 'translate',
}
end
self
end
def char_enc #character encode
def utf8(para='')
if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn
#¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûü
#¢£¥§©ª«®°±²³µ¶¹º»¼½¾×÷
##para.gsub!(//, '')
##para.gsub!(//, '&;')
para.gsub!(//u, '>') # '>' # >
para.gsub!(/¢/u, '¢') # '¢' # ¢
para.gsub!(/£/u, '£') # '£' # £
para.gsub!(/¥/u, '¥') # '¥' # ¥
para.gsub!(/§/u, '§') # '§' # §
para.gsub!(/©/u, '©') # '©' # ©
para.gsub!(/ª/u, 'ª') # 'ª' # ª
para.gsub!(/«/u, '«') # '«' # «
para.gsub!(/®/u, '®') # '®' # ®
para.gsub!(/°/u, '°') # '°' # °
para.gsub!(/±/u, '±') # '±' # ±
para.gsub!(/²/u, '²') # '²' # ²
para.gsub!(/³/u, '³') # '³' # ³
para.gsub!(/µ/u, 'µ') # 'µ' # µ
para.gsub!(/¶/u, '¶') # '¶' # ¶
para.gsub!(/¹/u, '¹') # '¹' # ¹
para.gsub!(/º/u, 'º') # 'º' # º
para.gsub!(/»/u, '»') # '»' # »
para.gsub!(/¼/u, '¼') # '¼' # ¼
para.gsub!(/½/u, '½') # '½' # ½
para.gsub!(/¾/u, '¾') # '¾' # ¾
para.gsub!(/×/u, '×') # '×' # ×
para.gsub!(/÷/u, '÷') # '÷' # ÷
para.gsub!(/¿/u, '¿') # '¿' # ¿
para.gsub!(/À/u, 'À') # 'À' # À
para.gsub!(/Á/u, 'Á') # 'Á' # Á
para.gsub!(/Â/u, 'Â') # 'Â' # Â
para.gsub!(/Ã/u, 'Ã') # 'Ã' # Ã
para.gsub!(/Ä/u, 'Ä') # 'Ä' # Ä
para.gsub!(/Å/u, 'Å') # 'Å' # Å
para.gsub!(/Æ/u, 'Æ') # 'Æ' # Æ
para.gsub!(/Ç/u, 'Ç') # 'Ç' # Ç
para.gsub!(/È/u, 'È') # 'È' # È
para.gsub!(/É/u, 'É') # 'É' # É
para.gsub!(/Ê/u, 'Ê') # 'Ê' # Ê
para.gsub!(/Ë/u, 'Ë') # 'Ë' # Ë
para.gsub!(/Ì/u, 'Ì') # 'Ì' # Ì
para.gsub!(/Í/u, 'Í') # 'Í' # Í
para.gsub!(/Î/u, 'Î') # 'Î' # Î
para.gsub!(/Ï/u, 'Ï') # 'Ï' # Ï
para.gsub!(/Ð/u, 'Ð') # 'Ð' # Ð
para.gsub!(/Ñ/u, 'Ñ') # 'Ñ' # Ñ
para.gsub!(/Ò/u, 'Ò') # 'Ò' # Ò
para.gsub!(/Ó/u, 'Ó') # 'Ó' # Ó
para.gsub!(/Ô/u, 'Ô') # 'Ô' # Ô
para.gsub!(/Õ/u, 'Õ') # 'Õ' # Õ
para.gsub!(/Ö/u, 'Ö') # 'Ö' # Ö
para.gsub!(/Ø/u, 'Ø') # 'Ø' # Ø
para.gsub!(/Ù/u, 'Ù') # 'Ù' # Ù
para.gsub!(/Ú/u, 'Ú') # 'Ú' # Ú
para.gsub!(/Û/u, 'Û') # 'Û' # Û
para.gsub!(/Ü/u, 'Ü') # 'Ü' # Ü
para.gsub!(/Ý/u, 'Ý') # 'Ý' # Ý
para.gsub!(/Þ/u, 'Þ') # 'Þ' # Þ
para.gsub!(/ß/u, 'ß') # 'ß' # ß
para.gsub!(/à/u, 'à') # 'à' # à
para.gsub!(/á/u, 'á') # 'á' # á
para.gsub!(/â/u, 'â') # 'â' # â
para.gsub!(/ã/u, 'ã') # 'ã' # ã
para.gsub!(/ä/u, 'ä') # 'ä' # ä
para.gsub!(/å/u, 'å') # 'å' # å
para.gsub!(/æ/u, 'æ') # 'æ' # æ
para.gsub!(/ç/u, 'ç') # 'ç' # ç
para.gsub!(/è/u, 'è') # 'è' # è
para.gsub!(/é/u, 'é') # '´' # é
para.gsub!(/ê/u, 'ê') # 'ˆ' # ê
para.gsub!(/ë/u, 'ë') # 'ë' # ë
para.gsub!(/ì/u, 'ì') # 'ì' # ì
para.gsub!(/í/u, 'í') # '´' # í
para.gsub!(/î/u, 'î') # 'î' # î
para.gsub!(/ï/u, 'ï') # 'ï' # ï
para.gsub!(/ð/u, 'ð') # 'ð' # ð
para.gsub!(/ñ/u, 'ñ') # 'ñ' # ñ
para.gsub!(/ò/u, 'ò') # 'ò' # ò
para.gsub!(/ó/u, 'ó') # 'ó' # ó
para.gsub!(/ô/u, 'ô') # 'ô' # ô
para.gsub!(/õ/u, 'õ') # 'õ' # õ
para.gsub!(/ö/u, 'ö') # 'ö' # ö
para.gsub!(/ø/u, 'ø') # 'ø' # ø
para.gsub!(/ù/u, 'ú') # 'ù' # ú
para.gsub!(/ú/u, 'û') # 'ú' # û
para.gsub!(/û/u, 'ü') # 'û' # ü
para.gsub!(/ü/u, 'ý') # 'ü' # ý
para.gsub!(/þ/u, 'þ') # 'þ' # þ
para.gsub!(/ÿ/u, 'ÿ') # 'ÿ' # ÿ
para.gsub!(/‘/u, '‘') # '‘' # ‘
para.gsub!(/’/u, '’') # '’' # ’
para.gsub!(/“/u, '“') # “ # “
para.gsub!(/”/u, '”') # ” # ”
para.gsub!(/–/u, '–') # – # –
para.gsub!(/—/u, '—') # — # —
para.gsub!(/∝/u, '∝') # ∝ # ∝
para.gsub!(/∞/u, '∞') # ∞ # ∞
para.gsub!(/™/u, '™') # ™ # ™
para.gsub!(/✠/u, '✠') # ✗ # ✠
para.gsub!(/ /u, ' ') # space identify
para.gsub!(/ /u, ' ') # space identify
end
end
def html(para='')
if @sys.locale =~/utf-?8/i # instead ucs for utf8 #require 'iconv' ? Iñtërnâtiônàlizætiøn
para.gsub!(/ /u, ' ') # space identify
para.gsub!(/ /u, ' ') # space identify
else
para.gsub!(/¢/u, '¢') # ¢
para.gsub!(/£/u, '£') # £
para.gsub!(/¥/u, '¥') # ¥
para.gsub!(/§/u, '§') # §
para.gsub!(/©/u, '©') # ©
para.gsub!(/ª/u, 'ª') # ª
para.gsub!(/«/u, '«') # «
para.gsub!(/®/u, '®') # ®
para.gsub!(/°/u, '°') # °
para.gsub!(/±/u, '±') # ±
para.gsub!(/²/u, '²') # ²
para.gsub!(/³/u, '³') # ³
para.gsub!(/µ/u, 'µ') # µ
para.gsub!(/¶/u, '¶') # ¶
para.gsub!(/¹/u, '¹') # ¹
para.gsub!(/º/u, 'º') # º
para.gsub!(/»/u, '»') # »
para.gsub!(/¼/u, '¼') # ¼
para.gsub!(/½/u, '½') # ½
para.gsub!(/¾/u, '¾') # ¾
para.gsub!(/×/u, '×') # ×
para.gsub!(/÷/u, '÷') # ÷
para.gsub!(/¿/u, '¿') # ¿
para.gsub!(/À/u, 'À') # À
para.gsub!(/Á/u, 'Á') # Á
para.gsub!(/Â/u, 'Â') # Â
para.gsub!(/Ã/u, 'Ã') # Ã
para.gsub!(/Ä/u, 'Ä') # Ä
para.gsub!(/Å/u, 'Å') # Å
para.gsub!(/Æ/u, 'Æ') # Æ
para.gsub!(/Ç/u, 'Ç') # Ç
para.gsub!(/È/u, 'È') # È
para.gsub!(/É/u, 'É') # É
para.gsub!(/Ê/u, 'Ê') # Ê
para.gsub!(/Ë/u, 'Ë') # Ë
para.gsub!(/Ì/u, 'Ì') # Ì
para.gsub!(/Í/u, 'Í') # Í
para.gsub!(/Î/u, 'Î') # Î
para.gsub!(/Ï/u, 'Ï') # Ï
para.gsub!(/Ð/u, 'Ð') # Ð
para.gsub!(/Ñ/u, 'Ñ') # Ñ
para.gsub!(/Ò/u, 'Ò') # Ò
para.gsub!(/Ó/u, 'Ó') # Ó
para.gsub!(/Ô/u, 'Ô') # Ô
para.gsub!(/Õ/u, 'Õ') # Õ
para.gsub!(/Ö/u, 'Ö') # Ö
para.gsub!(/Ø/u, 'Ø') # Ø
para.gsub!(/Ù/u, 'Ù') # Ù
para.gsub!(/Ú/u, 'Ú') # Ú
para.gsub!(/Û/u, 'Û') # Û
para.gsub!(/Ü/u, 'Ü') # Ü
para.gsub!(/Ý/u, 'Ý') # Ý
para.gsub!(/Þ/u, 'Þ') # Þ
para.gsub!(/ß/u, 'ß') # ß
para.gsub!(/à/u, 'à') # à
para.gsub!(/á/u, 'á') # á
para.gsub!(/â/u, 'â') # â
para.gsub!(/ã/u, 'ã') # ã
para.gsub!(/ä/u, 'ä') # ä
para.gsub!(/å/u, 'å') # å
para.gsub!(/æ/u, 'æ') # æ
para.gsub!(/ç/u, 'ç') # ç
para.gsub!(/è/u, 'è') # è
para.gsub!(/é/u, '´') # é
para.gsub!(/ê/u, 'ˆ') # ê
para.gsub!(/ë/u, 'ë') # ë
para.gsub!(/ì/u, 'ì') # ì
para.gsub!(/í/u, '´') # í
para.gsub!(/î/u, 'î') # î
para.gsub!(/ï/u, 'ï') # ï
para.gsub!(/ð/u, 'ð') # ð
para.gsub!(/ñ/u, 'ñ') # ñ
para.gsub!(/ò/u, 'ò') # ò
para.gsub!(/ó/u, 'ó') # ó
para.gsub!(/ô/u, 'ô') # ô
para.gsub!(/õ/u, 'õ') # õ
para.gsub!(/ö/u, 'ö') # ö
para.gsub!(/ø/u, 'ø') # ø
para.gsub!(/ù/u, 'ù') # ú
para.gsub!(/ú/u, 'ú') # û
para.gsub!(/û/u, 'û') # ü
para.gsub!(/ü/u, 'ü') # ý
para.gsub!(/þ/u, 'þ') # þ
para.gsub!(/ÿ/u, 'ÿ') # ÿ
para.gsub!(/‘/u, 'lsquo;') # ‘ # ‘
para.gsub!(/’/u, 'rsquo;') # ’ # ’
para.gsub!(/“/u, '“') # “ # “
para.gsub!(/”/u, '”') # ” # ”
para.gsub!(/–/u, '–') # – # –
para.gsub!(/—/u, '—') # — # —
para.gsub!(/∝/u, '∝') # ∝ # ∝
para.gsub!(/∞/u, '∞') # ∞ # ∞
para.gsub!(/™/u, '™') # ™ # ™
para.gsub!(/✠/u, '✠') # ✠
#para.gsub!(/✠/u, '†') # † # † incorrect replacement †
para.gsub!(/ /u, ' ') # space identify
para.gsub!(/ /u, ' ') # space identify
end
end
self
end
def tidywords(wordlist)
wordlist.each do |x|
#imperfect solution will not catch all possible cases
x.gsub!(/&/,'&') unless x =~/&\S+;/
x.gsub!(/&([A-Z])/,'&\1')
end
end
def markup(para='')
wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
para=tidywords(wordlist).join(' ').strip
para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
')
para.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'')
para.gsub!(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;')
para.gsub!(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;')
para.gsub!(/(^|#{Mx[:gl_c]}|\s+)<\s+/,'\1< '); para.gsub!(/\s+>(\s+|$)/,' >\1')
#para.gsub!(/#{Mx[:fa_emphasis_o]}(.+?)#{Mx[:fa_emphasis_c]}/,'\1') #reinstate
para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1')
para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1')
para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1')
para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1')
para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1')
para.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1')
para.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1')
para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1')
para.gsub!(/<:pb>\s*/,'') #Fix
para.gsub!(/<+[-~]#>+/,'')
para.gsub!(/#{Mx[:id_o]}0;\w\d+;[um]\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}/,'')
if para !~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/
#embeds a red-bullet image -->
para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1')
para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1')
para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1')
para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1')
para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
')
para.gsub!(/#{Mx[:br_page]}\s*/,'')
para.gsub!(/#{Mx[:br_page_new]}\s*/,'')
para.gsub!(/#{Mx[:pa_non_object_no_heading]}|#{Mx[:pa_non_object_dummy_heading]}/,''); para.gsub!(/<[-~]#>/,'')
para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}(https?:\/\/\S+)/,
%{[\\1] \\4})
para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?#{Mx[:lnk_c]}(https?:\/\/\S+)/,
%{\\1})
para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}image/,
%{[\\1] \\4})
para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?#{Mx[:lnk_c]}image/,
%{\\1})
para.gsub!(/(^|#{Mx[:gl_c]}|\s)#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}(https?:\/\/[^"><]+?)([,.:;"><]?(?=\s|$))/,
'\1\2\4') #watch, compare html_tune
para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,
%{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3})
para.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,
'\1\2') #escaped urls not linked, deal with later
else
para.gsub!(/(^|[^}])_/m,'\1>') #code-block: angle brackets special characters
para.gsub!(/(^|[^}])_/m,'\1>')
end
para.gsub!(/ |#{Mx[:nbsp]}/m,' ')
para
end
def markup_light(para='')
para.gsub!(/\/\{(.+?)\}\//,'\1')
para.gsub!(/[*!]\{(.+?)\}[*!]/,'\1')
para.gsub!(/_\{(.+?)\}_/,'\1')
para.gsub!(/-\{(.+?)\}-/,'\1')
para.gsub!(/
/,'
')
para.gsub!(/<:pb>\s*/,'')
para.gsub!(/<[-~]#>/,'')
para.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort
para.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax
para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/,
"#{@dir.url.images_local}\/\\1")
para.gsub!(/ |#{Mx[:nbsp]}/,' ')
#para.gsub!(/ /,' ') #clean
wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
para=tidywords(wordlist).join(' ').strip
para
end
def markup_fictionbook(para='')
para.gsub!(/~\{([\d*+]+).+?\}~/,'[\1]')
para.gsub!(/\/\{(.+?)\}\//,'\1')
para.gsub!(/[*!]\{(.+?)\}[*!]/,'\1')
para.gsub!(/_\{(.+?)\}_/,'\1')
para.gsub!(/-\{(.+?)\}-/,'\1')
para.gsub!(/
/,'
')
para.gsub!(/<:pb>\s*/,'')
para.gsub!(/<[-~]#>/,'')
#temporary -->
para.gsub!(/<:\S+?>/,'')
#<-- temporary
para.gsub!(/<[-~]#>/,'')
para.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort
para.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax
para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/,
"#{@dir.url.images_local}\/\\1")
para.gsub!(/ |#{Mx[:nbsp]}/,' ')
#para.gsub!(/ /,' ') #clean
wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
para=tidywords(wordlist).join(' ').strip
para
end
def markup_group(para='')
para.gsub!(/,'<'); para.gsub!(/>/,'>')
para.gsub!(/<:?br(?:\s+\/)?>/,'
')
para.gsub!(/<(link xmlns:xlink=".+?")>/,'<\1>')
para.gsub!(/<(\/link)>/,'<\1>')
para.gsub!(/<(\/?en)>/,'<\1>')
para
end
def xml_sem_block_paired(matched) # colon depth: many, recurs
matched.gsub!(/\b(au):\{(.+?)\}:\1\b/m, %{\\2}) # sem :
matched.gsub!(/\b(vol):\{(.+?)\}:\1\b/m, %{\\2}) # sem :
matched.gsub!(/\b(pub):\{(.+?)\}:\1\b/m, %{\\2}) # sem :
matched.gsub!(/\b(ref):\{(.+?)\}:\1\b/m, %{\\2}) # sem :
matched.gsub!(/\b(desc):\{(.+?)\}:\1\b/m,%{\\2}) # sem :
matched.gsub!(/\b(conv):\{(.+?)\}:\1\b/m,%{\\2}) # sem :
matched.gsub!(/\b(ct):\{(.+?)\}:\1\b/m, %{\\2}) # sem :
matched.gsub!(/\b(cty):\{(.+?)\}:\1\b/m, %{\\2}) # sem :
matched.gsub!(/\b(org):\{(.+?)\}:\1\b/m, %{\\2}) # sem :
matched.gsub!(/\b(dt):\{(.+?)\}:\1\b/m, %{\\2}) # sem :
matched.gsub!(/\b(n):\{(.+?)\}:\1\b/m, %{\\2}) # sem :
matched.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m,'\2') # sem :
end
def xml_semantic_tags(para)
if @md.sem_tag
para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem :
para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem :
para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem :
#colon one / single / flat / shallow
para.gsub!(/:\{(.+?)\}:au\b/m, %{\\1}) # sem :
para.gsub!(/:\{(.+?)\}:n\b/m, %{\\1}) # sem :
para.gsub!(/:\{(.+?)\}:ti\b/m, %{\\1}) # sem :
para.gsub!(/:\{(.+?)\}:ref\b/m, %{\\1}) # sem :
para.gsub!(/:\{(.+?)\}:desc\b/m, %{\\1}) # sem :
para.gsub!(/:\{(.+?)\}:cty\b/m, %{\\1}) # sem :
para.gsub!(/:\{(.+?)\}:org\b/m, %{\\1}) # sem :
para.gsub!(/:\{(.+?)\}:([a-z]+(?:[_:.][a-z]+)*)/m,'\1') # sem :
#semicolon zero / none
para.gsub!(/;\{([^}]+(?![;]))\};ti\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};qt\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};ref\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};ed\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};v\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};desc\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};def\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};trans\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};y\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};ab\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};pg\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};fn?\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};mn?\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};ln?\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};in\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};uni\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};fac\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};inst\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};dept\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};org\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};com?\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};cty\b/m, %{\\1}) # sem ;
para.gsub!(/;\{([^}]+(?![;]))\};([a-z]+(?:[_:.][a-z]+)*)/m,'\1') # sem ;
end
para
end
end
end
module SiSU_XML_tags #Format
require "#{SiSU_lib}/param"
include SiSU_Param
include SiSU_Viz
class RDF
def initialize(md='',seg_name=[],tracker=0)
@full_title=@subtitle=@author=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@copyright=@owner=@keywords=''
#seg_name=%{#{@@seg_name[@@tracker]} - } if @@seg_name[@@tracker]
@md=md
@sfx,@pdf=@md.sfx,@md.pdf
@rdfurl=%{ rdf:about="http://www.jus.uio.no/lm/toc"\n}
if @md.full_title # DublinCore 1 - title
@rdf_title=%{ dc.title="#{seg_name}#{@md.full_title}"\n}
@full_title=%{ \n}
#@full_title=%{ \n}
end
if @md.author # DublinCore 2 - creator/author (author)
@rdf_author=%{ dc.author="#{@md.author}"\n}
content=meta_content_clean(@md.author)
@author=%{ \n}
end
if @md.subject # DublinCore 3 - subject (us library of congress, eric or udc, or schema???)
@rdf_subject=%{ dc.subject="#{@md.subject}"\n}
content=meta_content_clean(@md.subject)
@subject=%{ \n}
end
if @md.description # DublinCore 4 - description
@rdf_description=%{ dc.description="#{@md.description}"\n}
content=meta_content_clean(@md.description)
@description=%{ \n}
end
if @md.publisher # DublinCore 5 - publisher (current copy published by)
@rdf_publisher=%{ dc.publisher="#{@md.publisher}"\n}
content=meta_content_clean(@md.publisher)
@publisher=%{ \n}
end
if @md.contributor # DublinCore 6 - contributor
@rdf_contributor=%{ dc.contributor="#{@md.contributor}"\n}
content=meta_content_clean(@md.contributor)
@contributor=%{ \n}
end
if @md.date # DublinCore 7 - date year-mm-dd
@rdf_date=%{ dc.date="#{@md.date}"\n}
@date=%{ \n}
end
if @md.date_created # DublinCore 7 - date.created year-mm-dd
@rdf_date_created=%{ dc.date.created="#{@md.date_created}"\n}
@date_created=%{ \n}
end
if @md.date_issued # DublinCore 7 - date.issued year-mm-dd
@rdf_date_issued=%{ dc.date.issued="#{@md.date_issued}"\n}
@date_issued=%{ \n}
end
if @md.date_available # DublinCore 7 - date.available year-mm-dd
@rdf_date_available=%{ dc.date.available="#{@md.date_available}"\n}
@date_available=%{ \n}
end
if @md.date_valid # DublinCore 7 - date.valid year-mm-dd
@rdf_date_valid=%{ dc.date.valid="#{@md.date_valid}"\n}
@date_valid=%{ \n}
end
if @md.date_modified # DublinCore 7 - date.modified year-mm-dd
@rdf_date_modified=%{ dc.date.modified="#{@md.date_modified}"\n}
@date_modified=%{ \n}
end
if @md.type # DublinCore 8 - type (genre eg. report, convention etc)
@rdf_type=%{ dc.type="#{@md.type}"\n}
content=meta_content_clean(@md.type)
@type=%{ \n}
end
if @md.format # DublinCore 9 - format (use your mime type)
@rdf_format=%{ dc.format="#{@md.format}"\n}
content=meta_content_clean(@md.format)
@format=%{ \n}
end
if @md.identifier # DublinCore 10 - identifier (your identifier, could use urn which is free)
@rdf_identifier=%{ dc.identifier="#{@md.identifier}"\n}
content=meta_content_clean(@md.identifier)
@identifier=%{ \n}
end
if @md.source # DublinCore 11 - source (document source)
@rdf_source=%{ dc.source="#{@md.source}"\n}
content=meta_content_clean(@md.source)
@source=%{ \n}
end
if @md.language \
and @md.language[:name] # DublinCore 12 - language (English)
@rdf_language=%{ dc.language="#{@md.language[:name]}"\n}
@language=%{ \n}
end
if @md.language_original \
and @md.language_original[:name]
@rdf_language_original=%{ dc.language="#{@md.language_original[:name]}"\n}
@language_original=%{ \n}
end
if @md.relation # DublinCore 13 - relation
@rdf_relation=%{ dc.relation="#{@md.relation}"\n}
content=meta_content_clean(@md.relation)
@relation=%{ \n}
end
if @md.coverage # DublinCore 14 - coverage
@rdf_coverage=%{ dc.coverage="#{@md.coverage}"\n}
content=meta_content_clean(@md.coverage)
@coverage=%{ \n}
end
if @md.rights # DublinCore 15 - rights
@rdf_rights=%{ dc.rights="#{@md.rights}"\n}
content=meta_content_clean(@md.rights)
@rights=%{ \n}
end
content=meta_content_clean(@md.keywords)
@keywords=%{ \n} if @md.keywords
@vz=SiSU_Env::Get_init.instance.skin
end
def meta_content_clean(content='')
unless content.nil?
content.tr!('"',"'")
end
content
end
def rdftoc #tocHead #values strung together, because some empty, and resulting output (line breaks) is much better
#
#
#
#Dublin Core
#### XML only :-( KEEP
#<
# \n
#\n
#WOK
end
def rdfseg #segHead
rdftoc
end
def comment_xml(extra='')
generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})" if @md.sisu_version[:version]
lastdone="Last Generated on: #{Time.now}"
rubyv="Ruby version: #{@md.ruby_version}"
sc=if @md.sc_info
"Source file: #{@md.sc_filename} version: #{@md.sc_number} of: #{@md.sc_date}"
else ''
end
if extra.empty?
<
WOK
else
<
WOK
end
end
def comment_xml_sax
desc='SiSU XML, SAX type representation'
comment_xml(desc)
end
def comment_xml_node
desc='SiSU XML, Node type representation'
comment_xml(desc)
end
def comment_xml_dom
desc='SiSU XML, DOM type representation'
comment_xml(desc)
end
def metatag_html #values strung together, because some empty, and resulting output (line breaks) is much better
#{@vz.js_head}
<