diff options
| author | Ralph Amissah <ralph@amissah.com> | 2008-02-24 21:05:47 +0000 | 
|---|---|---|
| committer | Ralph Amissah <ralph@amissah.com> | 2008-02-24 21:05:47 +0000 | 
| commit | ec9282e23d3262a2746f9837e0bc57e9aa7a48d5 (patch) | |
| tree | f1fdab5e27bad144a1e2472a8242348a45d03914 | |
| parent | make possible to strip 0.66 markup in dal commit (diff) | |
enable conversion from sst to various forms of input xml and back including 0.66 tags
sst_to_s_xml (sax/dom/node) semantic xml mockup naive conversion of known abbreviated tags,
in flux, e.g.
  sisu --to-sax autonomy_markup0.sst
back to sst markup with:
  sisu --from-sxml autonomy_markup0.sxs.xml
sst_to_s_dom fix to footnote/endnote conversion
| -rw-r--r-- | lib/sisu/v0/shared_xml.rb | 89 | ||||
| -rw-r--r-- | lib/sisu/v0/sst_from_xml.rb | 7 | ||||
| -rw-r--r-- | lib/sisu/v0/sst_to_s_xml_dom.rb | 14 | ||||
| -rw-r--r-- | lib/sisu/v0/sst_to_s_xml_node.rb | 11 | ||||
| -rw-r--r-- | lib/sisu/v0/sst_to_s_xml_sax.rb | 10 | 
5 files changed, 121 insertions, 10 deletions
| diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index fd27c664..abc6cc1a 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -1,3 +1,4 @@ +# coding: utf-8  =begin   * Name: SiSU @@ -154,10 +155,44 @@ module SiSU_XML_munge    class Trans      require "#{SiSU_lib}/defaults"      def initialize(md) +      @md=md        @sys=SiSU_Env::System_call.new -      @dir=SiSU_Env::Info_env.new(md.fns) +      @dir=SiSU_Env::Info_env.new(@md.fns)        @dp=SiSU_Env::Info_env.new.digest.pattern        @url_brace=SiSU_Viz::Skin.new.url_decoration +      if @md.sem_tag +        @ab ||=semantic_tags.default +      end +    end +    def semantic_tags +      def default +        { +          :pub =>  'publication', +          :ref =>  'reference', +          :desc => 'description', +          :conv => 'convention', +          :vol =>  'volume', +          :pg =>   'page', +          :ct =>   'cite', +          :cty =>  'city', +          :org =>  'organization', +          :d =>    'date', +          :t =>    'title', +          :a =>    'author', +          :n =>    'name', +          :fn =>   'firstname', +          :f =>    'firstname', +          :mn =>   'middlename', +          :m =>    'middlename', +          :ln =>   'lastname', +          :l =>    'lastname', +          :i =>    'initials', +          :q =>    'quote', +          :y =>    'year', +          :ab =>   'abreviation', +        } +      end +      self      end      def char_enc #character encode        def utf8(para='') @@ -348,10 +383,8 @@ module SiSU_XML_munge        end      end      def markup(para='') -      #if para !~/^<:code>/ -        wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 -        para=tidywords(wordlist).join(' ').strip -      #end +      wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 +      para=tidywords(wordlist).join(' ').strip        para.gsub!(/(^|\s+)<\s+/,'\1< '); para.gsub!(/\s+>(\s+|$)/,' >\1')        para.gsub!(/<:pb>\s*/,'')        para.gsub!(/<+[-~]#>+/,'') @@ -426,6 +459,52 @@ module SiSU_XML_munge        para.gsub!(/<(\/?en)>/,'<\1>')        para      end +    def xml_sem_block_paired(matched) # colon depth: many, recurs +      matched.gsub!(/\b(a):\{(.+?)\}:\1\b/m,   %{<sem:#{@ab[:a]} depth="many">\\2</sem:#{@ab[:a]}>}) # sem : +      matched.gsub!(/\b(vol):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:vol]} depth="many">\\2</sem:#{@ab[:vol]}>}) # sem : +      matched.gsub!(/\b(pub):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:pub]} depth="many">\\2</sem:#{@ab[:pub]}>}) # sem : +      matched.gsub!(/\b(ref):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:ref]} depth="many">\\2</sem:#{@ab[:ref]}>}) # sem : +      matched.gsub!(/\b(desc):\{(.+?)\}:\1\b/m,%{<sem:#{@ab[:desc]} depth="many">\\2</sem:#{@ab[:desc]}>}) # sem : +      matched.gsub!(/\b(conv):\{(.+?)\}:\1\b/m,%{<sem:#{@ab[:conv]} depth="many">\\2</sem:#{@ab[:conv]}>}) # sem : +      matched.gsub!(/\b(ct):\{(.+?)\}:\1\b/m,  %{<sem:#{@ab[:ct]} depth="many">\\2</sem:#{@ab[:ct]}>}) # sem : +      matched.gsub!(/\b(cty):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:cty]} depth="many">\\2</sem:#{@ab[:cty]}>}) # sem : +      matched.gsub!(/\b(org):\{(.+?)\}:\1\b/m, %{<sem:#{@ab[:org]} depth="many">\\2</sem:#{@ab[:org]}>}) # sem : +      matched.gsub!(/\b(d):\{(.+?)\}:\1\b/m,   %{<sem:#{@ab[:d]} depth="many">\\2</sem:#{@ab[:d]}>}) # sem : +      matched.gsub!(/\b(n):\{(.+?)\}:\1\b/m,   %{<sem:#{@ab[:n]} depth="many">\\2</sem:#{@ab[:n]}>}) # sem : +      matched.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m,'<sem:\1 depth="many">\2</sem:\1>') # sem : +    end +    def xml_semantic_tags(para) +      if @md.sem_tag +        para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem : +        para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem : +        para.gsub!(/([a-z]+(?:[_:.][a-z]+)*)(?::\{(.+?)\}:\1)/m) {|c| xml_sem_block_paired(c) } # sem : +        #colon one / single / flat / shallow +        para.gsub!(/:\{(.+?)\}:a\b/m,        %{<sem:#{@ab[:a]} depth="one">\\1</sem:#{@ab[:a]}>}) # sem : +        para.gsub!(/:\{(.+?)\}:n\b/m,        %{<sem:#{@ab[:n]} depth="one">\\1</sem:#{@ab[:n]}>}) # sem : +        para.gsub!(/:\{(.+?)\}:t\b/m,        %{<sem:#{@ab[:t]} depth="one">\\1</sem:#{@ab[:t]}>}) # sem : +        para.gsub!(/:\{(.+?)\}:ref\b/m,      %{<sem:#{@ab[:ref]} depth="one">\\1</sem:#{@ab[:ref]}>}) # sem : +        para.gsub!(/:\{(.+?)\}:desc\b/m,     %{<sem:#{@ab[:desc]} depth="one">\\1</sem:#{@ab[:desc]}>}) # sem : +        para.gsub!(/:\{(.+?)\}:cty\b/m,      %{<sem:#{@ab[:cty]} depth="one">\\1</sem:#{@ab[:cty]}>}) # sem : +        para.gsub!(/:\{(.+?)\}:org\b/m,      %{<sem:#{@ab[:org]} depth="one">\\1</sem:#{@ab[:org]}>}) # sem : +        para.gsub!(/:\{(.+?)\}:([a-z]+(?:[_:.][a-z]+)*)/m,'<sem:\2 depth="one">\1</sem:\2>') # sem : +        #semicolon zero / none +        para.gsub!(/;\{([^}]+(?![;]))\};t\b/m,   %{<sem:#{@ab[:t]} depth="zero">\\1</sem:#{@ab[:t]}>}) # sem ; +        para.gsub!(/;\{([^}]+(?![;]))\};q\b/m,   %{<sem:#{@ab[:q]} depth="zero">\\1</sem:#{@ab[:q]}>}) # sem ; +        para.gsub!(/;\{([^}]+(?![;]))\};ref\b/m, %{<sem:#{@ab[:ref]} depth="zero">\\1</sem:#{@ab[:ref]}>}) # sem ; +        para.gsub!(/;\{([^}]+(?![;]))\};desc\b/m,%{<sem:#{@ab[:desc]} depth="zero">\\1</sem:#{@ab[:desc]}>}) # sem ; +        para.gsub!(/;\{([^}]+(?![;]))\};y\b/m,   %{<sem:#{@ab[:y]} depth="zero">\\1</sem:#{@ab[:y]}>}) # sem ; +        para.gsub!(/;\{([^}]+(?![;]))\};ab\b/m,  %{<sem:#{@ab[:ab]} depth="zero">\\1</sem:#{@ab[:ab]}>}) # sem ; +        para.gsub!(/;\{([^}]+(?![;]))\};pg\b/m,  %{<sem:#{@ab[:pg]} depth="zero">\\1</sem:#{@ab[:pg]}>}) # sem ; +        para.gsub!(/;\{([^}]+(?![;]))\};fn?\b/m, %{<sem:#{@ab[:fn]} depth="zero">\\1</sem:#{@ab[:fn]}>}) # sem ; +        para.gsub!(/;\{([^}]+(?![;]))\};mn?\b/m, %{<sem:#{@ab[:mn]} depth="zero">\\1</sem:#{@ab[:mn]}>}) # sem ; +        para.gsub!(/;\{([^}]+(?![;]))\};ln?\b/m, %{<sem:#{@ab[:ln]} depth="zero">\\1</sem:#{@ab[:ln]}>}) # sem ; +        para.gsub!(/;\{([^}]+(?![;]))\};i\b/m,   %{<sem:#{@ab[:i]} depth="zero">\\1</sem:#{@ab[:i]}>}) # sem ; +        para.gsub!(/;\{([^}]+(?![;]))\};org\b/m, %{<sem:#{@ab[:org]} depth="zero">\\1</sem:#{@ab[:org]}>}) # sem ; +        para.gsub!(/;\{([^}]+(?![;]))\};cty\b/m, %{<sem:#{@ab[:cty]} depth="zero">\\1</sem:#{@ab[:cty]}>}) # sem ; +        para.gsub!(/;\{([^}]+(?![;]))\};([a-z]+(?:[_:.][a-z]+)*)/m,'<sem:\2 depth="zero">\1</sem:\2>') # sem ; +      end +      para +    end    end  end  module SiSU_XML_tags #Format diff --git a/lib/sisu/v0/sst_from_xml.rb b/lib/sisu/v0/sst_from_xml.rb index d93e68b8..af43e611 100644 --- a/lib/sisu/v0/sst_from_xml.rb +++ b/lib/sisu/v0/sst_from_xml.rb @@ -1,3 +1,4 @@ +# coding: utf-8  =begin   * Name: SiSU @@ -99,6 +100,10 @@ module SiSU_sst_from_xml        text.gsub!(/<i>(.+?)<\/i>/,'/{\1}/')        text.gsub!(/<b>(.+?)<\/b>/,'*{\1}*')        text.gsub!(/<u>(.+?)<\/u>/,'_{\1}_') +      text.gsub!(/<sem:([a-z_]+)\s+depth=['"]zero['"]>(\s*.+?\s*)<\/sem:\1>/,';{ \2 };\1') +      text.gsub!(/<sem:([a-z_]+)\s+depth=['"]one['"]>(\s*.+?\s*)<\/sem:\1>/,':{ \2 }:\1') +      text.gsub!(/<sem:([a-z_]+)\s+depth=['"]many['"]>(\s*.+?\s*)<\/sem:\1>/,'\1:{ \2 }:\1') +      text.gsub!(/<sem:([a-z_]+)>(\s*.+?\s*)<\/sem:\1>/,'\1:{ \2 }:\1')        text.gsub!(/\s +/,' ')        text.strip!        #text.gsub!(/<header>(.+?)<\/header/,"@#{x.name}: \\1\n\n") @@ -126,7 +131,7 @@ module SiSU_sst_from_xml        sax      end      def dom -      raise "#{__FILE}::#{__LINE__} xml dom representation to sst not yet implemented (experimental simple xml representations sax and node to sst are in place)." +      raise "#{__FILE__}::#{__LINE__} xml dom representation to sst not yet implemented (experimental simple xml representations sax and node to sst are in place)."      end      def xml_to_sisu        unless @opt.files.empty? diff --git a/lib/sisu/v0/sst_to_s_xml_dom.rb b/lib/sisu/v0/sst_to_s_xml_dom.rb index a1c81532..f9c190bf 100644 --- a/lib/sisu/v0/sst_to_s_xml_dom.rb +++ b/lib/sisu/v0/sst_to_s_xml_dom.rb @@ -1,3 +1,4 @@ +# coding: utf-8  =begin   * Name: SiSU @@ -161,7 +162,7 @@ module SiSU_simple_xml_model_dom          para.gsub!(/~\{([*+]+)\s+(.+?)\}~/,            '<endnote><symbol>\1</symbol><note>\2</note></endnote> ')          para.gsub!(/~\{(.+?)\}~/, -          '<endnote><note>\2</note></endnote> ') +          '<endnote><note>\1</note></endnote> ')        end        def xml_head(meta)          txt=meta.text @@ -356,7 +357,7 @@ WOK          (0..6).each { |x| @cont[x]=@level[x]=false }          (4..6).each { |x| @xml_contents_close[x]='' }          data.each do |para| -          para=SiSU_sem::Tags.new(para).rm.all +          #para=SiSU_sem::Tags.new(para,@md).rm.all            wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17            para=tidywords(wordlist).join(' ').strip            para.gsub!(/<[-~]#>/,'') @@ -546,10 +547,19 @@ WOK        include SiSU_Env        def initialize(data,md)          @data,@md=data,md +        @trans=SiSU_XML_munge::Trans.new(@md)        end        def xml          @sisu=[]          @data.each do |para| +          if para !~/^\s*(?:%+ |<:code>)/ +            if @md.sem_tag and para =~/[:;]\{|\}[:;]/ +              para=@trans.xml_semantic_tags(para) +            end +            if para =~/[:;]\{|\}[:;]/ +              para=SiSU_sem::Tags.new(para,@md).rm.all +            end +          end            para.gsub!(/\/\{(.+?)\}\//,'<italic>\1</italic>')            para.gsub!(/\*\{(.+?)\}\*/,'<bold>\1</bold>')            para.gsub!(/!\{(.+?)\}!/,'<bold>\1</bold>') diff --git a/lib/sisu/v0/sst_to_s_xml_node.rb b/lib/sisu/v0/sst_to_s_xml_node.rb index b6d7c9b2..a2656e3e 100644 --- a/lib/sisu/v0/sst_to_s_xml_node.rb +++ b/lib/sisu/v0/sst_to_s_xml_node.rb @@ -1,3 +1,4 @@ +# coding: utf-8  =begin   * Name: SiSU @@ -399,7 +400,6 @@ WOK          @data=@data.join.split("\n\n")          @data=SiSU_document_structure::Code.new(@md,@data).code          @data.each do |para| -          para=SiSU_sem::Tags.new(para).rm.all            data << SiSU_document_structure::Structure.new(@md,para).structure          end          data=Syntax::Markup.new(@md,data).songsheet @@ -416,6 +416,15 @@ WOK          obj.each do |o|            para=o.txt unless o.txt =~/^%% / #comments are lost, consider            if para +            if para !~/^\s*(?:%+ |<:code>)/ +              if @md.sem_tag and para =~/[:;]\{|\}[:;]/ +                para=@trans.xml_semantic_tags(para) +              end +              if para =~/[:;]\{|\}[:;]/ +                para=SiSU_sem::Tags.new(para,@md).rm.all +              end +            end +            para=SiSU_sem::Tags.new(para,@md).rm.all              para=@trans.markup_light(para)              @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8              if para =~/\A(?:@|0~)(\S+?):?\s+(.+?)\Z/m # for headers diff --git a/lib/sisu/v0/sst_to_s_xml_sax.rb b/lib/sisu/v0/sst_to_s_xml_sax.rb index 5e4eb5ea..4625779d 100644 --- a/lib/sisu/v0/sst_to_s_xml_sax.rb +++ b/lib/sisu/v0/sst_to_s_xml_sax.rb @@ -1,3 +1,4 @@ +# coding: utf-8  =begin   * Name: SiSU @@ -262,7 +263,14 @@ WOK            data << SiSU_document_structure::Structure.new(@md,para).structure          end          data.each do |para| -          para=SiSU_sem::Tags.new(para).rm.all +          if para !~/^\s*(?:%+ |<:code>)/ +            if @md.sem_tag and para =~/[:;]\{|\}[:;]/ +              para=@trans.xml_semantic_tags(para) +            end +            if para =~/[:;]\{|\}[:;]/ +              para=SiSU_sem::Tags.new(para,@md).rm.all +            end +          end            para=@trans.markup_light(para)            @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8            if para =~/\A(?:@|0~)(\S+?):?\s+(.+?)\Z/m # for headers | 
