# encoding: utf-8
=begin
 * Name: SiSU
 * Description: a framework for document structuring, publishing and search
 * Author: Ralph Amissah
 * Copyright: (C) 1997 - 2012, Ralph Amissah, All Rights Reserved.
 * License: GPL 3 or later:
   SiSU, a framework for document structuring, publishing and search
   Copyright (C) Ralph Amissah
   This program is free software: you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the Free
   Software Foundation, either version 3 of the License, or (at your option)
   any later version.
   This program is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
   more details.
   You should have received a copy of the GNU General Public License along with
   this program. If not, see .
   If you have Internet connection, the latest version of the GPL should be
   available at these locations:
   
   
   
   
   
 * SiSU uses:
   * Standard SiSU markup syntax,
   * Standard SiSU meta-markup syntax, and the
   * Standard SiSU object citation numbering and system
 * Hompages:
   
   
 * Download:
   
 * Ralph Amissah
   
   
 ** Description: common file for xml generation
=end
module SiSU_XML_Munge
  class Trans
    require_relative 'defaults'                         # defaults.rb
    def initialize(md)
      @md=md
      @sys=SiSU_Env::System_call.new
      @dir=SiSU_Env::Info_env.new(@md.fns)
      @brace_url=SiSU_Viz::Skin.new.url_decoration
      if @md.sem_tag
        @ab ||=semantic_tags.default
      end
    end
    def semantic_tags
      def default
        {
          pub:   'publication',
          conv:  'convention',
          vol:   'volume',
          pg:    'page',
          cty:   'city',
          org:   'organization',
          uni:   'university',
          dept:  'department',
          fac:   'faculty',
          inst:  'institute',
          co:    'company',
          com:   'company',
          conv:  'convention',
          dt:    'date',
          y:     'year',
          m:     'month',
          d:     'day',
          ti:    'title',
          au:    'author',
          ed:    'editor', #editor?
          v:     'version', #edition
          n:     'name',
          fn:    'firstname',
          mn:    'middlename',
          ln:    'lastname',
          in:    'initials',
          qt:    'quote',
          ct:    'cite',
          ref:   'reference',
          ab:    'abreviation',
          def:   'define',
          desc:  'description',
          trans: 'translate',
        }
      end
      self
    end
    def char_enc #character encode
      def utf8(dob='')
        if @sys.locale =~/utf-?8/i # instead ucs for utf8 # String#encode Iñtërnâtiônàlizætiøn
          str=if defined? dob.obj; dob.obj
          elsif dob.class==String; dob
          end
          if str
            #¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûü
            #¢£¥§©ª«®°±²³µ¶¹º»¼½¾×÷
            str.gsub!(//um,'>')    # '>'     # >
            str.gsub!(/¢/um,'¢')   # '¢'   # ¢
            str.gsub!(/£/um,'£')   # '£'  # £
            str.gsub!(/¥/um,'¥')   # '¥'    # ¥
            str.gsub!(/§/um,'§')   # '§'   # §
            str.gsub!(/©/um,'©')   # '©'   # ©
            str.gsub!(/ª/um,'ª')   # 'ª'   # ª
            str.gsub!(/«/um,'«')   # '«'  # «
            str.gsub!(/®/um,'®')   # '®'    # ®
            str.gsub!(/°/um,'°')   # '°'    # °
            str.gsub!(/±/um,'±')   # '±' # ±
            str.gsub!(/²/um,'²')   # '²'   # ²
            str.gsub!(/³/um,'³')   # '³'   # ³
            str.gsub!(/µ/um,'µ')   # 'µ'  # µ
            str.gsub!(/¶/um,'¶')   # '¶'   # ¶
            str.gsub!(/¹/um,'¹')   # '¹'   # ¹
            str.gsub!(/º/um,'º')   # 'º'   # º
            str.gsub!(/»/um,'»')   # '»'  # »
            str.gsub!(/¼/um,'¼')   # '¼' # ¼
            str.gsub!(/½/um,'½')   # '½' # ½
            str.gsub!(/¾/um,'¾')   # '¾' # ¾
            str.gsub!(/×/um,'×')   # '×'  # ×
            str.gsub!(/÷/um,'÷')   # '÷' # ÷
            str.gsub!(/¿/um,'¿')   # '¿' # ¿
            str.gsub!(/À/um,'À')   # 'À' # À
            str.gsub!(/Á/um,'Á')   # 'Á' # Á
            str.gsub!(/Â/um,'Â')   # 'Â'  # Â
            str.gsub!(/Ã/um,'Ã')   # 'Ã' # Ã
            str.gsub!(/Ä/um,'Ä')   # 'Ä'   # Ä
            str.gsub!(/Å/um,'Å')   # 'Å'  # Å
            str.gsub!(/Æ/um,'Æ')   # 'Æ'  # Æ
            str.gsub!(/Ç/um,'Ç')   # 'Ç' # Ç
            str.gsub!(/È/um,'È')   # 'È' # È
            str.gsub!(/É/um,'É')   # 'É' # É
            str.gsub!(/Ê/um,'Ê')   # 'Ê'  # Ê
            str.gsub!(/Ë/um,'Ë')   # 'Ë'   # Ë
            str.gsub!(/Ì/um,'Ì')   # 'Ì' # Ì
            str.gsub!(/Í/um,'Í')   # 'Í' # Í
            str.gsub!(/Î/um,'Î')   # 'Î'  # Î
            str.gsub!(/Ï/um,'Ï')   # 'Ï'   # Ï
            str.gsub!(/Ð/um,'Ð')   # 'Ð'    # Ð
            str.gsub!(/Ñ/um,'Ñ')   # 'Ñ' # Ñ
            str.gsub!(/Ò/um,'Ò')   # 'Ò' # Ò
            str.gsub!(/Ó/um,'Ó')   # 'Ó' # Ó
            str.gsub!(/Ô/um,'Ô')   # 'Ô'  # Ô
            str.gsub!(/Õ/um,'Õ')   # 'Õ' # Õ
            str.gsub!(/Ö/um,'Ö')   # 'Ö'   # Ö
            str.gsub!(/Ø/um,'Ø')   # 'Ø' # Ø
            str.gsub!(/Ù/um,'Ù')   # 'Ù' # Ù
            str.gsub!(/Ú/um,'Ú')   # 'Ú' # Ú
            str.gsub!(/Û/um,'Û')   # 'Û'  # Û
            str.gsub!(/Ü/um,'Ü')   # 'Ü'   # Ü
            str.gsub!(/Ý/um,'Ý')   # 'Ý' # Ý
            str.gsub!(/Þ/um,'Þ')   # 'Þ'  # Þ
            str.gsub!(/ß/um,'ß')   # 'ß'  # ß
            str.gsub!(/à/um,'à')   # 'à' # à
            str.gsub!(/á/um,'á')   # 'á' # á
            str.gsub!(/â/um,'â')   # 'â'  # â
            str.gsub!(/ã/um,'ã')   # 'ã' # ã
            str.gsub!(/ä/um,'ä')   # 'ä'   # ä
            str.gsub!(/å/um,'å')   # 'å'  # å
            str.gsub!(/æ/um,'æ')   # 'æ'  # æ
            str.gsub!(/ç/um,'ç')   # 'ç' # ç
            str.gsub!(/è/um,'è')   # 'è' # è
            str.gsub!(/é/um,'é')   # '´'  # é
            str.gsub!(/ê/um,'ê')   # 'ˆ'   # ê
            str.gsub!(/ë/um,'ë')   # 'ë'   # ë
            str.gsub!(/ì/um,'ì')   # 'ì' # ì
            str.gsub!(/í/um,'í')   # '´'  # í
            str.gsub!(/î/um,'î')   # 'î'  # î
            str.gsub!(/ï/um,'ï')   # 'ï'   # ï
            str.gsub!(/ð/um,'ð')   # 'ð'    # ð
            str.gsub!(/ñ/um,'ñ')   # 'ñ' # ñ
            str.gsub!(/ò/um,'ò')   # 'ò' # ò
            str.gsub!(/ó/um,'ó')   # 'ó' # ó
            str.gsub!(/ô/um,'ô')   # 'ô'  # ô
            str.gsub!(/õ/um,'õ')   # 'õ' # õ
            str.gsub!(/ö/um,'ö')   # 'ö'   # ö
            str.gsub!(/ø/um,'ø')   # 'ø' # ø
            str.gsub!(/ù/um,'ú')   # 'ù' # ú
            str.gsub!(/ú/um,'û')   # 'ú' # û
            str.gsub!(/û/um,'ü')   # 'û'  # ü
            str.gsub!(/ü/um,'ý')   # 'ü'   # ý
            str.gsub!(/þ/um,'þ')   # 'þ'  # þ
            str.gsub!(/ÿ/um,'ÿ')   # 'ÿ'   # ÿ
            str.gsub!(/‘/um,'‘')  # '‘'  # ‘
            str.gsub!(/’/um,'’')  # '’'  # ’
            str.gsub!(/“/um,'“')  # “    # “
            str.gsub!(/”/um,'”')  # ”    # ”
            str.gsub!(/–/um,'–')  # –    # –
            str.gsub!(/—/um,'—')  # —    # —
            str.gsub!(/∝/um,'∝')  # ∝     # ∝
            str.gsub!(/∞/um,'∞')  # ∞    # ∞
            str.gsub!(/™/um,'™')  # ™    # ™
            str.gsub!(/✠/um,'✠') # ✗    # ✠
            str.gsub!(/ /um,' ')       # space identify
            str.gsub!(/ /um,' ')       # space identify
          end
          dob=if defined? dob.obj
            dob.obj=str
            dob
          elsif dob.class==String; dob
          end
          dob
        end
      end
      def html(dob='')
        if @sys.locale =~/utf-?8/i # instead ucs for utf8 # String#encode Iñtërnâtiônàlizætiøn
          dob.obj.gsub!(/ /u,' ')           # space identify
          dob.obj.gsub!(/ /u,' ')           # space identify
        else
          dob.obj.gsub!(/¢/u,'¢')      # ¢
          dob.obj.gsub!(/£/u,'£')     # £
          dob.obj.gsub!(/¥/u,'¥')       # ¥
          dob.obj.gsub!(/§/u,'§')      # §
          dob.obj.gsub!(/©/u,'©')      # ©
          dob.obj.gsub!(/ª/u,'ª')      # ª
          dob.obj.gsub!(/«/u,'«')     # «
          dob.obj.gsub!(/®/u,'®')       # ®
          dob.obj.gsub!(/°/u,'°')       # °
          dob.obj.gsub!(/±/u,'±')    # ±
          dob.obj.gsub!(/²/u,'²')      # ²
          dob.obj.gsub!(/³/u,'³')      # ³
          dob.obj.gsub!(/µ/u,'µ')     # µ
          dob.obj.gsub!(/¶/u,'¶')      # ¶
          dob.obj.gsub!(/¹/u,'¹')      # ¹
          dob.obj.gsub!(/º/u,'º')      # º
          dob.obj.gsub!(/»/u,'»')     # »
          dob.obj.gsub!(/¼/u,'¼')    # ¼
          dob.obj.gsub!(/½/u,'½')    # ½
          dob.obj.gsub!(/¾/u,'¾')    # ¾
          dob.obj.gsub!(/×/u,'×')     # ×
          dob.obj.gsub!(/÷/u,'÷')    # ÷
          dob.obj.gsub!(/¿/u,'¿')    # ¿
          dob.obj.gsub!(/À/u,'À')    # À
          dob.obj.gsub!(/Á/u,'Á')    # Á
          dob.obj.gsub!(/Â/u,'Â')     # Â
          dob.obj.gsub!(/Ã/u,'Ã')    # Ã
          dob.obj.gsub!(/Ä/u,'Ä')      # Ä
          dob.obj.gsub!(/Å/u,'Å')     # Å
          dob.obj.gsub!(/Æ/u,'Æ')     # Æ
          dob.obj.gsub!(/Ç/u,'Ç')    # Ç
          dob.obj.gsub!(/È/u,'È')    # È
          dob.obj.gsub!(/É/u,'É')    # É
          dob.obj.gsub!(/Ê/u,'Ê')     # Ê
          dob.obj.gsub!(/Ë/u,'Ë')      # Ë
          dob.obj.gsub!(/Ì/u,'Ì')    # Ì
          dob.obj.gsub!(/Í/u,'Í')    # Í
          dob.obj.gsub!(/Î/u,'Î')     # Î
          dob.obj.gsub!(/Ï/u,'Ï')      # Ï
          dob.obj.gsub!(/Ð/u,'Ð')       # Ð
          dob.obj.gsub!(/Ñ/u,'Ñ')    # Ñ
          dob.obj.gsub!(/Ò/u,'Ò')    # Ò
          dob.obj.gsub!(/Ó/u,'Ó')    # Ó
          dob.obj.gsub!(/Ô/u,'Ô')     # Ô
          dob.obj.gsub!(/Õ/u,'Õ')    # Õ
          dob.obj.gsub!(/Ö/u,'Ö')      # Ö
          dob.obj.gsub!(/Ø/u,'Ø')    # Ø
          dob.obj.gsub!(/Ù/u,'Ù')    # Ù
          dob.obj.gsub!(/Ú/u,'Ú')    # Ú
          dob.obj.gsub!(/Û/u,'Û')     # Û
          dob.obj.gsub!(/Ü/u,'Ü')      # Ü
          dob.obj.gsub!(/Ý/u,'Ý')    # Ý
          dob.obj.gsub!(/Þ/u,'Þ')     # Þ
          dob.obj.gsub!(/ß/u,'ß')     # ß
          dob.obj.gsub!(/à/u,'à')    # à
          dob.obj.gsub!(/á/u,'á')    # á
          dob.obj.gsub!(/â/u,'â')     # â
          dob.obj.gsub!(/ã/u,'ã')    # ã
          dob.obj.gsub!(/ä/u,'ä')      # ä
          dob.obj.gsub!(/å/u,'å')     # å
          dob.obj.gsub!(/æ/u,'æ')     # æ
          dob.obj.gsub!(/ç/u,'ç')    # ç
          dob.obj.gsub!(/è/u,'è')    # è
          dob.obj.gsub!(/é/u,'´')     # é
          dob.obj.gsub!(/ê/u,'ˆ')      # ê
          dob.obj.gsub!(/ë/u,'ë')      # ë
          dob.obj.gsub!(/ì/u,'ì')    # ì
          dob.obj.gsub!(/í/u,'´')     # í
          dob.obj.gsub!(/î/u,'î')     # î
          dob.obj.gsub!(/ï/u,'ï')      # ï
          dob.obj.gsub!(/ð/u,'ð')       # ð
          dob.obj.gsub!(/ñ/u,'ñ')    # ñ
          dob.obj.gsub!(/ò/u,'ò')    # ò
          dob.obj.gsub!(/ó/u,'ó')    # ó
          dob.obj.gsub!(/ô/u,'ô')     # ô
          dob.obj.gsub!(/õ/u,'õ')    # õ
          dob.obj.gsub!(/ö/u,'ö')      # ö
          dob.obj.gsub!(/ø/u,'ø')    # ø
          dob.obj.gsub!(/ù/u,'ù')    # ú
          dob.obj.gsub!(/ú/u,'ú')    # û
          dob.obj.gsub!(/û/u,'û')     # ü
          dob.obj.gsub!(/ü/u,'ü')      # ý
          dob.obj.gsub!(/þ/u,'þ')     # þ
          dob.obj.gsub!(/ÿ/u,'ÿ')      # ÿ
          dob.obj.gsub!(/‘/u,'lsquo;')    # ‘  # ‘
          dob.obj.gsub!(/’/u,'rsquo;')    # ’  # ’
          dob.obj.gsub!(/“/u,'“')     # “  # “
          dob.obj.gsub!(/”/u,'”')     # ”  # ”
          dob.obj.gsub!(/–/u,'–')     # –  # –
          dob.obj.gsub!(/—/u,'—')     # —  # —
          dob.obj.gsub!(/∝/u,'∝')      # ∝   # ∝
          dob.obj.gsub!(/∞/u,'∞')     # ∞  # ∞
          dob.obj.gsub!(/™/u,'™')     # ™  # ™
          dob.obj.gsub!(/✠/u,'✠')    # ✠
          #par.obja.gsub!(/✠/u '†')    # † # † incorrect replacement †
          dob.obj.gsub!(/ /u,' ')           # space identify
          dob.obj.gsub!(/ /u,' ')           # space identify
        end
      end
      self
    end
    def tidywords(wordlist)
      wordlist.each do |x|
        #imperfect solution will not catch all possible cases
        x.gsub!(/&/,'&') unless x =~/&\S+;/
        x.gsub!(/&([A-Z])/,'&\1')
      end
    end
    def markup(dob='')
      wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
      dob.obj=tidywords(wordlist).join(' ').strip
      unless dob.is=='table'
        dob.obj.gsub!(/#{Mx[:br_line]}/u,'
')
        dob.obj.gsub!(/#{Mx[:br_paragraph]}/u,'
')
        dob.obj.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
')
      end
      dob.obj.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'')
      dob.obj.gsub!(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;')
      dob.obj.gsub!(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;')
      dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+)<\s+/,'\1< '); dob.obj.gsub!(/\s+>(\s+|$)/,' >\1')
      #dob.obj.gsub!(/#{Mx[:fa_emphasis_o]}(.+?)#{Mx[:fa_emphasis_c]}/,'\1') #reinstate
      dob.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/m,'\1')
      dob.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/m,'\1')
      dob.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1')
      dob.obj.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1')
      dob.obj.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1')
      dob.obj.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1')
      dob.obj.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1')
      dob.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1')
      dob.obj.gsub!(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1')
      dob.obj.gsub!(/<:pb>\s*/,'') #Fix
      dob.obj.gsub!(/<+[-~]#>+/,'')
      if dob.is !~/^code/
        #embeds a red-bullet image -->
        dob.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1')
        dob.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1')
        dob.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1')
        dob.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1')
        dob.obj.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
') unless dob.is=='table'
        dob.obj.gsub!(/#{Mx[:br_page]}\s*/,'')
        dob.obj.gsub!(/#{Mx[:br_page_new]}\s*/,'')
        dob.obj.gsub!(/#{Mx[:pa_non_object_no_heading]}|#{Mx[:pa_non_object_dummy_heading]}/,''); dob.obj.gsub!(/<[-~]#>/,'')
        dob.obj.gsub!(/href="#{Xx[:segment]}/m,'href="')
        dob.obj.gsub!(/#{Mx[:lnk_o]}([^#{Mx[:lnk_o]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{Mx[:rel_c]}]+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}(\.\.\/\S+?)#{Mx[:rel_c]}/,
          '\1')
        dob.obj.gsub!(/#{Mx[:lnk_o]}([^#{Mx[:lnk_o]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{Mx[:rel_c]}]+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}:(\S+?)#{Mx[:rel_c]}/,
          '\1')
        dob.obj.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}(\S+?)#{Mx[:rel_c]}/,
          '\1')
        dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))[ ]+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,
          %{[\\1] \\4})
        dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))([ ]+[^}]+)?#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,
          %{\\1})
        dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))[ ]+(\d+)x(\d+)(\s+[^}]+)?#{Mx[:lnk_c]}image/,
          %{[\\1] \\4})
        dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}[ ]*(\S+?\.(?:jpg|png|gif))([ ]+[^}]+)?#{Mx[:lnk_c]}image/,
          %{\\1})
        dob.obj.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,
          '\1') #watch, compare html_tune
        dob.obj.gsub!(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,
          %{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}})
        dob.obj.gsub!(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,
          '\1') #escaped urls not linked, deal with later
      else
        dob.obj.gsub!(//m,'>')
      end
      if dob.of=='block'
        dob.obj.gsub!(/#{Mx[:gl_bullet]}/,'● ')
      end
      dob.obj.gsub!(/#{Mx[:url_o]}([a-zA-Z0-9._-]+\@\S+?\.[a-zA-Z0-9._-]+)#{Mx[:url_c]}/,
        %{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}})
      dob.obj.gsub!(/#{Dx[:url_o]}/,"#{Dx[:url_o_xml]}")
      dob.obj.gsub!(/#{Dx[:url_c]}/,"#{Dx[:url_c_xml]}")
      dob.obj.gsub!(/ |#{Mx[:nbsp]}/m,' ')
      dob
    end
    def markup_light(dob='')
      dob.obj.gsub!(/\/\{(.+?)\}\//,'\1')
      dob.obj.gsub!(/[*!]\{(.+?)\}[*!]/,'\1')
      dob.obj.gsub!(/_\{(.+?)\}_/,'\1')
      dob.obj.gsub!(/-\{(.+?)\}-/,'\1')
      dob.obj.gsub!(/
/,'
')
      dob.obj.gsub!(/<:pb>\s*/,'')
      dob.obj.gsub!(/<[-~]#>/,'')
      dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort
      dob.obj.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax
      dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif))[ ]+.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/,
        "#{@md.file.output_path.xml.rel_image}\/\\1")
      dob.obj.gsub!(/ |#{Mx[:nbsp]}/,' ')
      wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
      dob.obj=tidywords(wordlist).join(' ').strip
      dob
    end
    def markup_fictionbook(dob='')
      dob.obj.gsub!(/~\{([\d*+]+).+?\}~/,'[\1]')
      dob.obj.gsub!(/\/\{(.+?)\}\//,'\1')
      dob.obj.gsub!(/[*!]\{(.+?)\}[*!]/,'\1')
      dob.obj.gsub!(/_\{(.+?)\}_/,'\1')
      dob.obj.gsub!(/-\{(.+?)\}-/,'\1')
      dob.obj.gsub!(/
/,'
')
      dob.obj.gsub!(/<:pb>\s*/,'')
      dob.obj.gsub!(/<[-~]#>/,'')
      #temporary -->
      dob.obj.gsub!(/<:\S+?>/,'')
      #<-- temporary
      dob.obj.gsub!(/<[-~]#>/,'')
      dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s)&\s+/,'\1& ') #sort
      dob.obj.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax
      dob.obj.gsub!(/(#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif))[ ]+.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/,
        "#{@md.file.output_path.xml.rel_image}\/\\1")
      dob.obj.gsub!(/ |#{Mx[:nbsp]}/,' ')
      wordlist=dob.obj.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17
      dob.obj=tidywords(wordlist).join(' ').strip
      dob
    end
    def markup_group(dob='')
      dob.obj.gsub!(/,'<'); dob.obj.gsub!(/>/,'>')
      dob.obj.gsub!(/<:?br(?:\s+\/)?>/,'
')
      dob.obj.gsub!(/<(link xmlns:xlink=".+?")>/,'<\1>')
      dob.obj.gsub!(/<(\/link)>/,'<\1>')
      dob.obj.gsub!(/<(\/?en)>/,'<\1>')
      dob
    end
    def markup_block(dob='')
      dob.obj.gsub!(/,'<'); dob.obj.gsub!(/>/,'>')
      dob.obj.gsub!(/<:?br(?:\s+\/)?>/,'
')
      dob.obj.gsub!(/<(link xmlns:xlink=".+?")>/,'<\1>')
      dob.obj.gsub!(/<(\/link)>/,'<\1>')
      dob.obj.gsub!(/<(\/?en)>/,'<\1>')
      dob
    end
    def xml_sem_block_paired(matched) # colon depth: many, recurs
      matched.gsub!(/\b(au):\{(.+?)\}:\1\b/m,  %{\\2})
      matched.gsub!(/\b(vol):\{(.+?)\}:\1\b/m, %{\\2})
      matched.gsub!(/\b(pub):\{(.+?)\}:\1\b/m, %{\\2})
      matched.gsub!(/\b(ref):\{(.+?)\}:\1\b/m, %{\\2})
      matched.gsub!(/\b(desc):\{(.+?)\}:\1\b/m,%{\\2})
      matched.gsub!(/\b(conv):\{(.+?)\}:\1\b/m,%{\\2})
      matched.gsub!(/\b(ct):\{(.+?)\}:\1\b/m,  %{\\2})
      matched.gsub!(/\b(cty):\{(.+?)\}:\1\b/m, %{\\2})
      matched.gsub!(/\b(org):\{(.+?)\}:\1\b/m, %{\\2})
      matched.gsub!(/\b(dt):\{(.+?)\}:\1\b/m,  %{\\2})
      matched.gsub!(/\b(n):\{(.+?)\}:\1\b/m,   %{\\2})
      matched.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m,'\2')
    end
    def xml_semantic_tags(dob)
      if @md.sem_tag
        dob.obj.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) }
        dob.obj.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) }
        dob.obj.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) }
        dob.obj.gsub!(/:\{(.+?)\}:au\b/m,             %{\\1})
        dob.obj.gsub!(/:\{(.+?)\}:n\b/m,              %{\\1})
        dob.obj.gsub!(/:\{(.+?)\}:ti\b/m,             %{\\1})
        dob.obj.gsub!(/:\{(.+?)\}:ref\b/m,            %{\\1})
        dob.obj.gsub!(/:\{(.+?)\}:desc\b/m,           %{\\1})
        dob.obj.gsub!(/:\{(.+?)\}:cty\b/m,            %{\\1})
        dob.obj.gsub!(/:\{(.+?)\}:org\b/m,            %{\\1})
        dob.obj.gsub!(/:\{(.+?)\}:([a-z]+(?:[_:.][a-z]+)*)/m,'\1')
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};ti\b/m,    %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};qt\b/m,    %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};ref\b/m,   %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};ed\b/m,    %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};v\b/m,     %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};desc\b/m,  %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};def\b/m,   %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};trans\b/m, %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};y\b/m,     %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};ab\b/m,    %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};pg\b/m,    %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};fn?\b/m,   %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};mn?\b/m,   %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};ln?\b/m,   %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};in\b/m,    %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};uni\b/m,   %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};fac\b/m,   %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};inst\b/m,  %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};dept\b/m,  %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};org\b/m,   %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};com?\b/m,  %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};cty\b/m,   %{\\1})
        dob.obj.gsub!(/;\{([^}]+(?![;]))\};([a-z]+(?:[_:.][a-z]+)*)/m,'\1')
      end
      dob
    end
  end
end
module SiSU_XML_Tags #Format
  require_relative 'param'                              # param.rb
    include SiSU_Param
  include SiSU_Viz
  class RDF
    def initialize(md='',seg_name=[],tracker=0)
      @full_title=@subtitle=@author=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@copyright=@owner=@keywords=''
      @md=md
      @rdfurl=%{  rdf:about="http://www.jus.uio.no/lm/toc"\n}
      if defined? @md.title.full \
      and @md.title.full                          # DublinCore 1 - title
        @rdf_title=%{    dc.title="#{seg_name}#{@md.title.full}"\n}
        @full_title=%{  \n}
      end
      if defined? @md.creator.author \
      and @md.creator.author=~/\S+/                                            # DublinCore 2 - creator/author (author)
        @rdf_author=%{    dc.author="#{@md.creator.author}"\n}
        content=meta_content_clean(@md.creator.author)
        @author=%{  \n}
      end
      if defined? @md.classify.subject \
      and @md.classify.subject=~/\S+/                                          # DublinCore 3 - subject (us library of congress, eric or udc, or schema???)
        @rdf_subject=%{    dc.subject="#{@md.classify.subject}"\n}
        content=meta_content_clean(@md.classify.subject)
        @subject=%{  \n}
      end
      if defined? @md.notes.description \
      and @md.notes.description=~/\S+/                                         # DublinCore 4 - description
        @rdf_description=%{    dc.description="#{@md.notes.description}"\n}
        content=meta_content_clean(@md.notes.description)
        @description=%{  \n}
      end
      if defined? @md.publisher \
      and @md.publisher                                                        # DublinCore 5 - publisher (current copy published by)
        @rdf_publisher=%{    dc.publisher="#{@md.publisher}"\n}
        content=meta_content_clean(@md.publisher)
        @publisher=%{  \n}
      end
      if defined? @md.creator.contributor \
      and @md.creator.contributor=~/\S+/                                      # DublinCore 6 - contributor
        @rdf_contributor=%{    dc.contributor="#{@md.creator.contributor}"\n}
        content=meta_content_clean(@md.creator.contributor)
        @contributor=%{  \n}
      end
      if defined? @md.date.published \
      and @md.date.published=~/\S+/                                           # DublinCore 7 - date year-mm-dd
        @rdf_date=%{    dc.date="#{@md.date.published}"\n}
        @date=%{  \n} # fix @md.date_scheme
      end
      if defined? @md.date.created \
      and @md.date.created=~/\S+/                                             # DublinCore 7 - date.created year-mm-dd
        @rdf_date_created=%{    dc.date.created="#{@md.date.created}"\n}
        @date_created=%{  \n}
      end
      if defined? @md.date.issued \
      and @md.date.issued=~/\S+/                                              # DublinCore 7 - date.issued year-mm-dd
        @rdf_date_issued=%{    dc.date.issued="#{@md.date.issued}"\n}
        @date_issued=%{  \n}
      end
      if defined? @md.date.available \
      and @md.date.available=~/\S+/                                           # DublinCore 7 - date.available year-mm-dd
        @rdf_date_available=%{    dc.date.available="#{@md.date.available}"\n}
        @date_available=%{  \n}
      end
      if defined? @md.date.valid \
      and @md.date.valid=~/\S+/                                               # DublinCore 7 - date.valid year-mm-dd
        @rdf_date_valid=%{    dc.date.valid="#{@md.date.valid}"\n}
        @date_valid=%{  \n}
      end
      if defined? @md.date.modified \
      and @md.date.modified=~/\S+/                                            # DublinCore 7 - date.modified year-mm-dd
        @rdf_date_modified=%{    dc.date.modified="#{@md.date.modified}"\n}
        @date_modified=%{  \n}
      end
      if defined? @md.type \
      and @md.type                                                            # DublinCore 8 - type (genre eg. report, convention etc)
        @rdf_type=%{    dc.type="#{@md.type}"\n}
        content=meta_content_clean(@md.type)
        @type=%{  \n}
      end
      if defined? @md.classify.format \
      and @md.classify.format=~/\S+/                                          # DublinCore 9 - format (use your mime type)
        @rdf_format=%{    dc.format="#{@md.classify.format}"\n}
        content=meta_content_clean(@md.classify.format)
        @format=%{  \n}
      end
      if defined? @md.classify.identifier \
      and @md.classify.identifier=~/\S+/                                       # DublinCore 10 - identifier (your identifier, could use urn which is free)
        @rdf_identifier=%{    dc.identifier="#{@md.classify.identifier}"\n}
        content=meta_content_clean(@md.classify.identifier)
        @identifier=%{  \n}
      end
      if defined? @md.original.source \
      and @md.original.source=~/\S+/                                           # DublinCore 11 - source (document source)
        @rdf_source=%{    dc.source="#{@md.original.source}"\n}
        content=meta_content_clean(@md.original.source)
        @source=%{  \n}
      end
      if defined? @md.title.language \
      and @md.title.language=~/\S+/                                            # DublinCore 12 - language (English)
        @rdf_language=%{    dc.language="#{@md.title.language}"\n}
        @language=%{  \n}
      end
      if defined? @md.original.language \
      and @md.original.language=~/\S+/
        @rdf_language_original=%{    dc.language="#{@md.original.language}"\n}
        @language_original=%{  \n}
      end
      if defined? @md.classify.relation \
      and @md.classify.relation=~/\S+/                                         # DublinCore 13 - relation
        @rdf_relation=%{    dc.relation="#{@md.classify.relation}"\n}
        content=meta_content_clean(@md.classify.relation)
        @relation=%{  \n}
      end
      if defined? @md.classify.coverage \
      and @md.classify.coverage=~/\S+/                                         # DublinCore 14 - coverage
        @rdf_coverage=%{    dc.coverage="#{@md.classify.coverage}"\n}
        content=meta_content_clean(@md.classify.coverage)
        @coverage=%{  \n}
      end
      if defined? @md.rights.all \
      and @md.rights.all                                                      # DublinCore 15 - rights
        @rdf_rights=%{    dc.rights="#{@md.rights.all}"\n}
        content=meta_content_clean(@md.rights.all)
        @rights=%{  \n}
      end
      content=meta_content_clean(@md.keywords)
      @keywords=%{  \n} if @md.keywords
      @vz=SiSU_Env::Get_init.instance.skin
    end
    def meta_content_clean(content='')
      content
      unless content.nil?
        content.tr!('"',"'")
        content=SiSU_XML_Munge::Trans.new(@md).char_enc.utf8(content)
      end
      content
    end
    def rdfseg #segHead
      rdftoc
    end
    def comment_xml(extra='')
      generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})"  if @md.sisu_version[:version]
      lastdone="Last Generated on: #{Time.now}"
      rubyv="Ruby version: #{@md.ruby_version}"
      sc=if @md.sc_info
        "Source file: #{@md.sc_filename} version: #{@md.sc_number} of: #{@md.sc_date}"
      else ''
      end
      if extra.empty?
<
WOK
     else
<
WOK
     end
    end
    def comment_xml_sax
      desc='SiSU XML, SAX type representation'
      comment_xml(desc)
    end
    def comment_xml_node
      desc='SiSU XML, Node type representation'
      comment_xml(desc)
    end
    def comment_xml_dom
      desc='SiSU XML, DOM type representation'
      comment_xml(desc)
    end
    def metatag_html #values strung together, because some empty, and resulting output (line breaks) is much better
<