diff options
Diffstat (limited to 'lib/sisu/v5/po4a.rb')
| -rw-r--r-- | lib/sisu/v5/po4a.rb | 983 | 
1 files changed, 983 insertions, 0 deletions
| diff --git a/lib/sisu/v5/po4a.rb b/lib/sisu/v5/po4a.rb new file mode 100644 index 00000000..b3bcc81f --- /dev/null +++ b/lib/sisu/v5/po4a.rb @@ -0,0 +1,983 @@ +# encoding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, +   2007, 2008, 2009, 2010, 2011, 2012, 2013 Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + +   SiSU, a framework for document structuring, publishing and search + +   Copyright (C) Ralph Amissah + +   This program is free software: you can redistribute it and/or modify it +   under the terms of the GNU General Public License as published by the Free +   Software Foundation, either version 3 of the License, or (at your option) +   any later version. + +   This program is distributed in the hope that it will be useful, but WITHOUT +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +   more details. + +   You should have received a copy of the GNU General Public License along with +   this program. If not, see <http://www.gnu.org/licenses/>. + +   If you have Internet connection, the latest version of the GPL should be +   available at these locations: +   <http://www.fsf.org/licensing/licenses/gpl.html> +   <http://www.gnu.org/licenses/gpl.html> + +   <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html> + + * SiSU uses: +   * Standard SiSU markup syntax, +   * Standard SiSU meta-markup syntax, and the +   * Standard SiSU object citation numbering and system + + * Hompages: +   <http://www.jus.uio.no/sisu> +   <http://www.sisudoc.org> + + * Download: +   <http://www.sisudoc.org/sisu/en/SiSU/download.html> + + * Git +   <http://sources.sisudoc.org/gitweb/?p=code/sisu.git;a=summary> +   <http://sources.sisudoc.org/?p=code/sisu.git;a=blob;f=lib/sisu/v5/po4a.rb;hb=HEAD> + + * Ralph Amissah +   <ralph@amissah.com> +   <ralph.amissah@gmail.com> + + ** Description: pot file generation +     linefeed) + +=end +module SiSU_Po4a +  require_relative 'dal'                                # dal.rb +  require_relative 'sysenv'                             # sysenv.rb +    include SiSU_Env +  require_relative 'composite'                          # composite.rb +  require_relative 'shared_metadata'                    # shared_metadata.rb +  require_relative 'po4a_set'                           # po4a_set.rb +  include SiSU_Param +  include SiSU_Viz +  pwd=Dir.pwd +  class Source +    @@opt_src,@@opt_trn,@@opt_src_,@@opt_trn_,@@md_src,@@md_trn=nil,nil,nil,nil,nil,nil +    def initialize(opt,fn=nil) +      @opt,@fn=opt,fn +      #unless @opt.fns =~/(.+?\.(?:-|ssm\.)?sst)$/ +      #  puts "#{@opt.fns} not a processed file type" +      #end +      r=Px[:lng_lst_rgx].gsub(/\|en\|/,'|') +      @lang_regx=%r{(?:#{r})} +      if opt.fns =~/\S+?~#{@lang_regx}\.ss[mti]/ \ +      and opt.f_pth[:lng]!=@opt.lng_base +        @@opt_src_=false +        @@opt_trn=opt +        @@md_trn=SiSU_Param::Parameters.new(opt).get +      else +        @@opt_src_=true +        @@opt_src=opt +        @@md_src=SiSU_Param::Parameters.new(opt).get +      end +    end +    def read +      begin +        src={} +        src[:pth]=@opt.f_pth[:pth] +        src[:files]=if @opt.fns =~ /\.(?:(?:-|ssm\.)sst|ssm)$/ +          @opt.fns=@opt.fns.gsub(/\.ssm\.sst$/,'.ssm') +          SiSU_Assemble::CompositeFileList.new(@opt).read +        else +          [@opt.fns] +        end +        md=SiSU_Param::Parameters.new(@opt).get +        src[:files].each do |fn| +          SiSU_DAL::Source.new(@opt,fn).read             # -m +          env=SiSU_Env::InfoEnv.new(@opt.fns) +          m=/((.+?)(?:\~\w\w(?:_\w\w)?)?)\.((?:-|ssm\.)?sst|ssm|ssi)$/ #watch added match for sss +          @fnn,@fnb,@fnt=fn[m,1],fn[m,2],fn[m,3] +          unless @opt.cmd =~/q/ +            path=env.path.output_tell +            tool=(@opt.cmd =~/[MVv]/) \ +            ? "#{env.program.text_editor} #{path}/#{md.fnb}/#{md.fn[:plain]}" +            : @opt.fns +            @opt.cmd=~/[MVvz]/ \ +            ? SiSU_Screen::Ansi.new(@opt.cmd,'Pot po4a',tool).green_hi_blue +            : SiSU_Screen::Ansi.new(@opt.cmd,'Pot po4a',tool).green_title_hi +            SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{path}/#{md.fnb}/#{md.fn[:plain]}").flow if @opt.cmd =~/[MV]/ +          end +          if @opt.fns =~/\S+?~#{@lang_regx}\.ss[mti]/ \ +          or @opt.f_pth[:lng] !=@opt.lng_base +            opt_lang_trn_fn=fn +            @dal_array_lang_translation=SiSU_DAL::Source.new(@opt,opt_lang_trn_fn).get # dal file drawn here +            opt_lang_src_fn=if fn =~/\S+?~\S{2}(?:_\S{2})?\.ss[mti]/ +              fn.gsub(/(\S+?)~\S{2}(?:_\S{2})?(\.ss[mti])/,'\1\2') #check i +            else fn +            end +            transdir,srcdir=Dir.pwd,Dir.pwd +            if Dir.pwd.to_s =~/\/#{@lang_regx}$/ +              transdir=Dir.pwd +              srcdir=transdir.gsub(/\/#{@lang_regx}$/,"/#{@opt.lng_base}") +              if FileTest.directory?(srcdir) +                 Dir.chdir(srcdir) +              end +            else nil +            end +            x=if FileTest.file?("#{srcdir}/#{opt_lang_src_fn}") +              @dal_array_lang_src=SiSU_DAL::Source.new(@@opt_src,opt_lang_src_fn).get # dal file drawn here +            else +              puts "no identified source document" +              exit +            end +            Dir.chdir(transdir) if transdir +          else +            @dal_array_lang_src=SiSU_DAL::Source.new(@opt,fn).get # dal file drawn here +            @dal_array_lang_translation=nil +          end +          wrap_width=if defined? md.make.plaintext_wrap \ +          and md.make.plaintext_wrap +            md.make.plaintext_wrap +          elsif defined? env.plaintext_wrap \ +          and env.plaintext_wrap +            env.plaintext_wrap +          else 78 +          end +          SiSU_Po4a::Source::Scroll.new(fn,@dal_array_lang_src,@dal_array_lang_translation,@@md_src,@@md_trn,wrap_width).songsheet +        end +      rescue +        SiSU_Errors::InfoError.new($!,$@,@opt.cmd,@opt.fns).error do +          __LINE__.to_s + ':' + __FILE__ +        end +      ensure +      end +    end +    private +    class Scroll <Source +      require_relative 'defaults'                       # defaults.rb +      require_relative 'po4a_set'                       # po4a_set.rb +      include SiSU_Po4aUtils +      @@endnotes={ para: [], end: [] } +      def initialize(fn,data_src,data_trn,md_src,md_trn,wrap_width) +        @fn,@data_src,@data_trn,@md_src,@md_trn,@wrap_width=fn,data_src,data_trn,md_src,md_trn,wrap_width +        @md=(md_trn.nil?) \ +        ? md_src +        : md_trn +        @brace_url=SiSU_Viz::Defaults.new.url_decoration +        @vz=SiSU_Viz::Defaults.new +        @tab="\t" +        @@endnotes_=(@md.opt.mod.inspect =~/--endnote/) ? true : false    # --footnote +        @br=(@md.opt.mod.inspect =~/--dos/) ? "\r\n" : "\n"               # --unix +        @pot={ body: [], open: [], close: [], head: [], metadata: [], tail: [] } +      end +      def songsheet +############## BUG @fn changes value +        fn=@fn +        pot=pot_markup(@data_src,@data_trn) +        publish(fn,pot) +      end +      def extract_endnotes(dob='')                                   #% Used for extraction of endnotes from paragraphs +        notes_a=dob.obj.scan(/#{Mx[:en_a_o]}([\d]+\s+.+?)#{Mx[:en_a_c]}/) +        ##notes_a=dob.obj.scan(/#{Mx[:en_a_o]}([\d*+]+\s+.+?)#{Mx[:en_a_c]}/) +        #notes_b=dob.obj.scan(/#{Mx[:en_b_o]}([\d*+]+\s+.+?)#{Mx[:en_b_c]}/) +        @n=[] +        notes_a.flatten.each do |n| #high cost to deal with <br> appropriately within plaintext, consider +          n=n.dup.to_s +          n=n.gsub(/^([\d]+)\s+/,'^~\1 '). +           #gsub(/^([\d*+]+)\s+/,'^~\1 '). +            gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'<br>') +          @n << n +        end +        notes_a=@n.flatten +      end +      def wrap_endnotes(orig_notes='',trn_notes='') +        nt=@@endnotes_ ? 'endnote' : 'footnote' +        @fn=0 +        a_l=orig_notes.length +        0.upto(a_l-1) do |i| +          @fn=if orig_notes[i].to_s =~/^\^~([\d*+]+)/ # provides endnote number within paragraph +            @fn += 1 +          else @fn +          end +          d="#{nt} #{@fn}" +          mark="^~ " +          instruct=s_mark='' +          if @md.opt.cmd=~/M/ +            instruct=%{\n# footnotes, the preferred sisu markup for a footnote is~{this is a footnote}~ however, for translation a footnote reference marker in the text~^ with a set of notes following the paragraph starting on a newline with "^~ this is a footnote", is easier to deal with, if possible these should be converted back to~{inline notes}~} +            s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"} +          end +          desc="#{d}#{s_mark}#{instruct}" +          orig=(orig_notes[i].to_s =~/^\^~[\d*+]+/) ? (orig_notes[i].to_s.gsub(/^\^~[\d*+]+/,'^~')) : orig_notes[i].to_s +          trans=if trn_notes.is_a?(Array) \ +          and trn_notes.length==orig_notes.length +            (trn_notes[i].to_s =~/^\^~[\d*+]+/) ? (trn_notes[i].to_s.gsub(/^\^~[\d*+]+/,'^~')) : trn_notes[i].to_s +          else '' +          end +          util=pot_structure(desc,orig,trans) +          wrap=util.line_wrap +          wrap=if wrap =~ /^\s*\^~[\d*+]+\s+.+?\s*\Z/m +            wrap.gsub(/^\s*(\^~[\d*+]+)\s+(.+?)\s*\Z/m, <<GSUB +\\1 \\2 +GSUB +                      ) +          else +            wrap.gsub(/^(.+)\Z/m, <<GSUB +\\1 +GSUB +                      ) +          end +          @@endnotes[:para] << wrap +          @@endnotes[:end] << '' << wrap +        end +        @@endnotes[:para].each {|e| @pot[:body] << e << @br} +        @@endnotes[:para]=[] +        @@endnotes +      end +      def pot_metadata_src +        @po4a_identify_type='type: SiSU doc' #'type: Plain text' +        meta_src=SiSU_Metadata::Summary.new(@md_src) +        w=[] +        w << [ +          "#. #{@po4a_identify_type} - metadata: title", +          "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}", +          'msgid ""', +          meta_src.metadata_tags.title.main, +          meta_src.metadata_tags.title.sub, +          meta_src.metadata_tags.title.edition, +          meta_src.metadata_tags.title.note, +          meta_src.metadata_tags.title.short, +          meta_src.metadata_tags.title.language, +          meta_src.metadata_tags.title.language_char, +          'msgstr ""', +        ] +        w << [ +          "#. #{@po4a_identify_type} - metadata: creator", +          "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}", +          'msgid ""', +          meta_src.metadata_tags.creator.head, +          meta_src.metadata_tags.creator.author, +          meta_src.metadata_tags.creator.contributor, +          meta_src.metadata_tags.creator.illustrator, +          meta_src.metadata_tags.creator.photographer, +          meta_src.metadata_tags.creator.translator, +          meta_src.metadata_tags.creator.audio, +          meta_src.metadata_tags.creator.digitized_by, +          meta_src.metadata_tags.creator.prepared_by, +          'msgstr ""', +        ] +        w << [ +          "#. #{@po4a_identify_type} - metadata: rights", +          "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}", +          'msgid ""', +          meta_src.metadata_tags.rights.head, +          meta_src.metadata_tags.rights.copyright.text, +          meta_src.metadata_tags.rights.copyright.translation, +          meta_src.metadata_tags.rights.copyright.illustrations, +          meta_src.metadata_tags.rights.copyright.photographs, +          meta_src.metadata_tags.rights.copyright.digitization, +          meta_src.metadata_tags.rights.copyright.audio, +          meta_src.metadata_tags.rights.license, +          'msgstr ""', +        ] +        w << [ +          "#. #{@po4a_identify_type} - metadata: classify", +          "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}", +          'msgid ""', +          meta_src.metadata_tags.classify.head, +          meta_src.metadata_tags.classify.subject, +          meta_src.metadata_tags.classify.topic_register, +          meta_src.metadata_tags.classify.loc, +          meta_src.metadata_tags.classify.dewey, +          meta_src.metadata_tags.notes.relation, +          meta_src.metadata_tags.notes.type, +          meta_src.metadata_tags.identifier.oclc, +          meta_src.metadata_tags.identifier.isbn, +          'msgstr ""', +        ] +        w << [ +          "#. #{@po4a_identify_type} - metadata: date", +          "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}", +          'msgid ""', +          meta_src.metadata_tags.date.head, +          meta_src.metadata_tags.date.added_to_site, +          meta_src.metadata_tags.date.available, +          meta_src.metadata_tags.date.created, +          meta_src.metadata_tags.date.issued, +          meta_src.metadata_tags.date.modified, +          meta_src.metadata_tags.date.published, +          meta_src.metadata_tags.date.valid, +          'msgstr ""', +        ] +        w << [ +          "#. #{@po4a_identify_type} - processing, make instruction", +          "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}", +          'msgid ""', +          meta_src.processing_tags.make.language, +          meta_src.processing_tags.make.headings, +          meta_src.processing_tags.make.num_top, +          meta_src.processing_tags.make.breaks, +          meta_src.processing_tags.make.emphasis, +          meta_src.processing_tags.make.bold, +          meta_src.processing_tags.make.italics, +          meta_src.processing_tags.make.texpdf_font, +          'msgstr ""', +        ] +        w.each do |y| +          z='' +          y.each do |x| +            if x +              z += x + "\n" if x =~/^#|^msg(?:id|str)/ +              z += %{"#{x}"\n} if x =~/^@\S+?:(?: |$)/ +              z += %{"#{x}"\n} if x =~/^\s+:\S+?: / +            end +          end +          @pot[:metadata] << z << @br +          #puts z unless z.empty? +        end +      end +      def pot_metadata_src_trn +        @po4a_identify_type='type: SiSU doc' +        #@po4a_identify_type='type: Plain text' +        meta_src=SiSU_Metadata::Summary.new(@md_src) +        meta_trn=SiSU_Metadata::Summary.new(@md_trn) +        w=[] +        w << [ +          "#. #{@po4a_identify_type} - metadata: title", +          "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}", +          'msgid ""', +          meta_src.metadata_tags.title.main, +          meta_src.metadata_tags.title.sub, +          meta_src.metadata_tags.title.edition, +          meta_src.metadata_tags.title.note, +          meta_src.metadata_tags.title.short, +          meta_src.metadata_tags.title.language, +          meta_src.metadata_tags.title.language_char, +          'msgstr ""', +          meta_trn.metadata_tags.title.main, +          meta_trn.metadata_tags.title.sub, +          meta_trn.metadata_tags.title.edition, +          meta_trn.metadata_tags.title.note, +          meta_trn.metadata_tags.title.short, +          meta_trn.metadata_tags.title.language, +          meta_trn.metadata_tags.title.language_char, +        ] +        w << [ +          "#. #{@po4a_identify_type} - metadata: creator", +          "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}", +          'msgid ""', +          meta_src.metadata_tags.creator.head, +          meta_src.metadata_tags.creator.author, +          meta_src.metadata_tags.creator.contributor, +          meta_src.metadata_tags.creator.illustrator, +          meta_src.metadata_tags.creator.photographer, +          meta_src.metadata_tags.creator.translator, +          meta_src.metadata_tags.creator.audio, +          meta_src.metadata_tags.creator.digitized_by, +          meta_src.metadata_tags.creator.prepared_by, +          'msgstr ""', +          meta_trn.metadata_tags.creator.head, +          meta_trn.metadata_tags.creator.author, +          meta_trn.metadata_tags.creator.contributor, +          meta_trn.metadata_tags.creator.illustrator, +          meta_trn.metadata_tags.creator.photographer, +          meta_trn.metadata_tags.creator.translator, +          meta_trn.metadata_tags.creator.audio, +          meta_trn.metadata_tags.creator.digitized_by, +          meta_trn.metadata_tags.creator.prepared_by, +        ] +        w << [ +          "#. #{@po4a_identify_type} - metadata: rights", +          "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}", +          'msgid ""', +          meta_src.metadata_tags.rights.head, +          meta_src.metadata_tags.rights.copyright.text, +          meta_src.metadata_tags.rights.copyright.translation, +          meta_src.metadata_tags.rights.copyright.illustrations, +          meta_src.metadata_tags.rights.copyright.photographs, +          meta_src.metadata_tags.rights.copyright.digitization, +          meta_src.metadata_tags.rights.copyright.audio, +          meta_src.metadata_tags.rights.license, +          'msgstr ""', +          meta_trn.metadata_tags.rights.head, +          meta_trn.metadata_tags.rights.copyright.text, +          meta_trn.metadata_tags.rights.copyright.translation, +          meta_trn.metadata_tags.rights.copyright.illustrations, +          meta_trn.metadata_tags.rights.copyright.photographs, +          meta_trn.metadata_tags.rights.copyright.digitization, +          meta_trn.metadata_tags.rights.copyright.audio, +          meta_trn.metadata_tags.rights.license, +        ] +        w << [ +          "#. #{@po4a_identify_type} - metadata: classify", +          "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}", +          'msgid ""', +          meta_src.metadata_tags.classify.head, +          meta_src.metadata_tags.classify.subject, +          meta_src.metadata_tags.classify.topic_register, +          meta_src.metadata_tags.classify.loc, +          meta_src.metadata_tags.classify.dewey, +          meta_src.metadata_tags.notes.relation, +          meta_src.metadata_tags.notes.type, +          meta_src.metadata_tags.identifier.oclc, +          meta_src.metadata_tags.identifier.isbn, +          'msgstr ""', +          meta_trn.metadata_tags.classify.head, +          meta_trn.metadata_tags.classify.subject, +          meta_trn.metadata_tags.classify.topic_register, +          meta_trn.metadata_tags.classify.loc, +          meta_trn.metadata_tags.classify.dewey, +          meta_trn.metadata_tags.notes.relation, +          meta_trn.metadata_tags.notes.type, +          meta_trn.metadata_tags.identifier.oclc, +          meta_trn.metadata_tags.identifier.isbn, +        ] +        w << [ +          "#. #{@po4a_identify_type} - metadata: date", +          "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}", +          'msgid ""', +          meta_src.metadata_tags.date.head, +          meta_src.metadata_tags.date.added_to_site, +          meta_src.metadata_tags.date.available, +          meta_src.metadata_tags.date.created, +          meta_src.metadata_tags.date.issued, +          meta_src.metadata_tags.date.modified, +          meta_src.metadata_tags.date.published, +          meta_src.metadata_tags.date.valid, +          'msgstr ""', +          meta_trn.metadata_tags.date.head, +          meta_trn.metadata_tags.date.added_to_site, +          meta_trn.metadata_tags.date.available, +          meta_trn.metadata_tags.date.created, +          meta_trn.metadata_tags.date.issued, +          meta_trn.metadata_tags.date.modified, +          meta_trn.metadata_tags.date.published, +          meta_trn.metadata_tags.date.valid, +        ] +        w << [ +          "#. #{@po4a_identify_type} - processing, make instruction", +          "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}", +          'msgid ""', +          meta_src.processing_tags.make.language, +          meta_src.processing_tags.make.headings, +          meta_src.processing_tags.make.num_top, +          meta_src.processing_tags.make.breaks, +          meta_src.processing_tags.make.emphasis, +          meta_src.processing_tags.make.bold, +          meta_src.processing_tags.make.italics, +          meta_src.processing_tags.make.texpdf_font, +          'msgstr ""', +          meta_trn.processing_tags.make.language, +          meta_trn.processing_tags.make.headings, +          meta_trn.processing_tags.make.num_top, +          meta_trn.processing_tags.make.breaks, +          meta_trn.processing_tags.make.emphasis, +          meta_trn.processing_tags.make.bold, +          meta_trn.processing_tags.make.italics, +          meta_trn.processing_tags.make.texpdf_font, +        ] +        w.each do |y| +          z='' +          y.each do |x| +            if x +              z += x + "\n" if x =~/^#|^msg(?:id|str)/ +              z += %{"#{x}"\n} if x =~/^@\S+?:(?: |$)/ +              z += %{"#{x}"\n} if x =~/^\s+:\S+?: / +            end +          end +          @pot[:metadata] << z << @br +          #puts z unless z.empty? +        end +      end +      def pot_structure(desc,orig,trans,indent=0,hang=0) +        SiSU_Po4aUtils::Wrap.new(@md,orig,trans,desc,@wrap_width,indent,hang) +      end +      def pot_structure_heading(dob_src='',notes_s='',dob_trn='',notes_t='')                    #% Used to extract the structure of a document +        lv=n=n3=nil +        lv=dob_src.ln +        n=lv - 1 +        n3=lv + 2 +        util=nil +        fn=(dob_src.name=~/[a-z\d]/i) ? dob_src.name : '' +        mark="#{dob_src.lv}~#{fn} " +        d="#{dob_src.is.to_s} (level #{dob_src.lv})" +        instruct=s_mark='' +        if @md.opt.cmd=~/M/ +          instruct=%{\n# markup for headings is marker at the start of the line/object, indicating the heading level, and if provided an associated name tag, this heading is "#{mark}"} +          s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"} +        end +        desc="#{d}#{s_mark}#{instruct}" +        orig="#{s_mark}#{dob_src.obj}" +        trans=(dob_trn=='') ? '' : "#{s_mark}#{dob_trn.obj}" +        util=pot_structure(desc,orig,trans) +        wrapped=util.line_wrap +        @pot[:body] << wrapped << @br # main text, contents, body KEEP +        if @@endnotes[:para] \ +        and notes_s.length > 0 \ +        and not @@endnotes_ +          @pot[:body] << @br +          wrap_endnotes(notes_s,notes_t) +        elsif @@endnotes[:para] \ +        and @@endnotes_ +          @pot[:body] << @br*2 +        end +      end +      def pot_structure_para(dob_src='',notes_s='',dob_trn='',notes_t='')                       #% Used to extract the structure of a document +        util=nil +        wrapped=if dob_src.indent =~/[1-9]/ \ +        and dob_src.indent == dob_src.hang +          s_mark=desc=orig=trans='' +          if dob_src.bullet_ +            mark="_#{dob_src.indent}* " +            d="#{dob_src.is.to_s}: indent #{dob_src.indent}, bullet" +            instruct=s_mark='' +            if @md.opt.cmd=~/M/ +              instruct=%{\n# markup for indented bullet text is at the start of the line/object, an underscore followed by the indent level and an asterisk "#{mark}"} +              s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"} +            end +            desc="#{d}#{s_mark}#{instruct}" +          else +            mark="_#{dob_src.indent} " +            d="#{dob_src.is.to_s}: indent #{dob_src.indent}" +            instruct=s_mark='' +            if @md.opt.cmd=~/M/ +              instruct=%{\n# markup for indented text is at the start of the line/object, an underscore followed by the indent level "#{mark}"} +              s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"} +            end +            desc="#{d}#{s_mark}#{instruct}" +          end +          orig="#{s_mark}#{dob_src.obj}" +          trans=(dob_trn=='') ? '' : "#{s_mark}#{dob_trn.obj}" +          util=pot_structure(desc,orig,trans) +        elsif dob_src.hang =~/[0-9]/ \ +        and dob_src.indent != dob_src.hang +          s_mark=desc=orig=trans='' +          mark="_#{dob_src.hang}_#{dob_src.indent} " +          d="#{dob_src.is.to_s}: hang #{dob_src.hang} indent #{dob_src.indent}" +          instruct=s_mark='' +          if @md.opt.cmd=~/M/ +            instruct=%{\n# markup for indented text with a first line indented to a different level from the rest of the paragraph, is at the start of the line/object, an underscore and the first indent level a second underscore and the indent level for the rest of the paragraph, "#{mark1}"} +            s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"} +          end +          desc="#{d}#{s_mark}#{instruct}" +          orig="#{s_mark}#{dob_src.obj}" +          trans=(dob_trn=='') ? '' : "#{s_mark}#{dob_trn.obj}" +          util=pot_structure(desc,orig,trans) +        else +          s_mark=desc=orig=trans='' +          if dob_src.bullet_ +            mark='_* ' +            d="#{dob_src.is.to_s}: bullet" +            instruct=s_mark='' +            if @md.opt.cmd=~/M/ +              instruct=%{\n# markup for indented text is at the start of the line/object, an underscore followed by an asterisk "#{mark}"} +              s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"} +            end +            desc="#{d}#{s_mark}#{instruct}" +            orig="#{s_mark}#{dob_src.obj}" +            trans=(dob_trn=='') ? '' : "#{s_mark}#{dob_trn.obj}" +          else +            mark='' +            d=dob_src.is.to_s +            instruct=%{\n# regular paragraph, no special markup} +            if @md.opt.cmd=~/M/ +              instruct="\n# " +              s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"} +            end +            desc="#{d}#{s_mark}#{instruct}" +            orig=dob_src.obj +            trans=(dob_trn=='') ? '' : dob_trn.obj +          end +          util=pot_structure(desc,orig,trans) +        end +        wrapped=util.line_wrap +        @pot[:body] << wrapped << @br # main text, contents, body KEEP +        if @@endnotes[:para] \ +        and notes_s.length > 0 \ +        and not @@endnotes_ +          @pot[:body] << @br +          wrap_endnotes(notes_s,notes_t) +        elsif @@endnotes[:para] \ +        and @@endnotes_ +          @pot[:body] << @br*2 +        end +      end +      def pot_structure_block(dob_src='',notes_s='',dob_trn='',notes_t='')                      #% Used to extract the structure of a document +        mark="block{\\n\\n...\\n\\n}block" +        d=dob_src.is.to_s +        instruct=s_mark='' +        if @md.opt.cmd=~/M/ +          instruct="\n# block text is a text block with an opening and closing marker, the content of which may be wrapped" +          s_mark="\n# " + %{"\\n\\n#{mark}\\n\\n"} +        end +        desc="#{d}#{s_mark}#{instruct}" +        orig=dob_src.obj +        trans=(dob_trn=='') ? '' : dob_trn.obj +        util=pot_structure(desc,orig,trans) +        unwrapped=util.no_line_wrap_block +        @pot[:body] << unwrapped << @br +      end +      def pot_structure_group(dob_src='',notes_s='',dob_trn='',notes_t='')                      #% Used to extract the structure of a document +        mark="group{\\n\\n...\\n\\n}group" +        d=dob_src.is.to_s +        instruct=s_mark='' +        if @md.opt.cmd=~/M/ +          instruct="\n# group text is a text block with an opening and closing marker, the content of which may be wrapped" +          s_mark="\n# " + %{"\\n\\n#{mark}\\n\\n"} +        end +        desc="#{d}#{s_mark}#{instruct}" +        orig=dob_src.obj +        trans=(dob_trn=='') ? '' : dob_trn.obj +        util=pot_structure(desc,orig,trans) +        unwrapped=util.no_line_wrap_block +        @pot[:body] << unwrapped << @br +      end +      def pot_structure_verse(dob_src='',notes_s='',dob_trn='',notes_t='')                      #% Used to extract the structure of a document +        mark="poem{\n\nverse\n\nverse\n\n...\n\n}poem" +        d=dob_src.is.to_s +        instruct=s_mark='' +        if @md.opt.cmd=~/M/ +          instruct="\n# verse are part of the text block described as a poem, the first verse is preceeded by an opening marker, and the last verse by a closing marker, the content of which should remain unwrapped" +          s_mark="\n# " + %{"\\n\\n#{mark}\\n\\n"} +        end +        desc="#{d}#{s_mark}#{instruct}" +        orig=dob_src.obj +        trans=(dob_trn=='') ? '' : dob_trn.obj +        util=pot_structure(desc,orig,trans) +        unwrapped=util.no_line_wrap_block +        @pot[:body] << unwrapped << @br +      end +      def pot_structure_code(dob_src='',notes_s='',dob_trn='',notes_t='')                       #% Used to extract the structure of a document +        mark="code{\\n\\n...\\n\\n}code" +        d=dob_src.is.to_s +        instruct=s_mark='' +        if @md.opt.cmd=~/M/ +          instruct="\n# codeblocks are a text block with an opening and closing marker, the content of which should remain unwrapped" +          s_mark="\n# " + %{"\\n\\n#{mark}\\n\\n"} +        end +        desc="#{d}#{s_mark}#{instruct}" +        orig=dob_src.obj +        trans=(dob_trn=='') ? '' : dob_trn.obj +        util=pot_structure(desc,orig,trans) +        unwrapped=util.no_line_wrap_block +        @pot[:body] << unwrapped << @br +      end +      def pot_structure_table(dob_src='',notes_s='',dob_trn='',notes_t='')                      #% Used to extract the structure of a document +        mark="table{\\n\\n...\\n\\n}table" +        d=dob_src.is.to_s +        instruct=s_mark='' +        if @md.opt.cmd=~/M/ +          instruct="\n# tables are a text block with an opening and closing marker, the content of which should remain unwrapped" +          s_mark="\n# " + %{"\\n\\n#{mark}\\n\\n"} +        end +        desc="#{d}#{s_mark}#{instruct}" +        orig=dob_src.obj +        orig=orig.gsub(/#{Mx[:tc_c]}/,"\n") +        trans=(dob_trn=='') ? '' : dob_trn.obj +        trans=trans.gsub(/#{Mx[:tc_c]}/,"\n") +        util=pot_structure(desc,orig,trans) +        unwrapped=util.no_line_wrap_block +        @pot[:body] << unwrapped << @br +      end +      def pot_structure_idx(dob_src='',dob_trn='')                      #% Used to extract the structure of a document +        mark="={ ... }" +        instruct=s_mark='' +        if @md.opt.cmd=~/M/ +          instruct="\n# the book index should be attached unwrapped to the preceding text block (there should be a new line, but no empty line)" +          s_mark="\n# " + %{"\\n#{mark}\\n\\n"} +        end +        d='book-idx' +        desc="#{d}#{s_mark}#{instruct}" +        orig='={' + dob_src.idx + '}' +        trans=if defined? dob_trn.idx \ +        and not dob_trn.idx.nil? \ +        and not dob_trn.idx.empty? +          '={' + dob_trn.idx + '}' +        else '' +        end +        util=pot_structure(desc,orig,trans) +        unwrapped=util.no_line_wrap_block +        @pot[:body] << unwrapped << @br +      end +      def pot_markup(data_src,data_trn) +        #@endnotes,@copen,@pot_contents_close=Array.new(3){[]} +        a_l=if data_trn +        a_l=(data_src.length >= data_trn.length) \ +        ? data_src.length +        : data_trn.length +        else +          data_src.length +        end +        s,t=0,0 +        if @md.fns =~ /\.(?:(?:-|ssm\.)?sst|ssm)$/ +          (data_trn.nil?) \ +          ? pot_metadata_src +          : pot_metadata_src_trn +        end +        0.upto(a_l-1) do |i| +          if data_trn +            unless data_src[s] \ +            and data_trn[t] +              break +            end +            if data_src[s].of == :comment \ +            and data_trn[t].of == :comment \ +            and (data_src[s].is == data_trn[t].is) +              s+=1;t+=1 +              next +            end +            if (data_src[s].is == :comment or data_trn[t].is == :comment) \ +            and (data_src[s].is != data_trn[t].is) +              if data_src[s].is == :comment +                puts "src (comment):\n\t" + data_src[s].obj if @md.opt.cmd =~/M/ +                s+=1 +                #next if data_src[s].is == :comment +              elsif data_trn[t].is == :comment +                puts "trans (comment):\n\t" + data_trn[t].obj if @md.opt.cmd =~/M/ +                t+=1 +                #next if data_trn[t].is == :comment +              end +            end +            if (defined? data_src[s].ocn and data_src[s].ocn.is_a?(Fixnum)) \ +            and (defined? data_trn[t].ocn and data_trn[t].ocn.is_a?(Fixnum)) \ +            and (data_src[s].ocn == data_trn[t].ocn) +              @m_s,@m_t=s,t +            elsif (defined? data_src[s].ocn and data_src[s].ocn.is_a?(Fixnum)) \ +            and (defined? data_trn[t].ocn and data_trn[t].ocn.is_a?(Fixnum)) \ +            and (data_src[s].ocn != data_trn[t].ocn) +              p '--- OCN ---' +              p 'mis-match' +              p data_src[s].ocn +              p data_src[s].obj +              p data_trn[t].ocn +              p data_trn[t].obj +              p '---' +              p 'previous match' +              p data_src[@m_s].ocn +              p data_src[@m_s].obj +              p data_trn[@m_t].ocn +              p data_trn[@m_t].obj +              exit +            elsif (defined? data_src[s].ocn and defined? data_trn[t].ocn \ +            and data_src[s].ocn.class != data_trn[t].ocn.class) +              p '--- OCN class ---' +              p 'mis-match' +              p data_src[s].ocn if defined? data_src[s].ocn +              p data_src[s].obj +              p data_trn[t].ocn if defined? data_trn[t].ocn +              p data_trn[t].obj +              #p '---' +              #p 'previous match' +              #p data_src[@m_s].ocn +              #p data_src[@m_s].obj +              #p data_trn[@m_t].ocn +              #p data_trn[@m_t].obj +            #elsif (defined? data_src[s].ocn != defined? data_trn[t].ocn) \ +            #and (data_src[s].ocn.nil? != data_trn[t].ocn.nil?) +            #  p '--- missing OCN? ---' +            #  p 'mis-match' +            #  p data_src[s].ocn if defined? data_src[s].ocn +            #  p data_src[s].obj +            #  p data_trn[t].ocn if defined? data_trn[t].ocn +            #  p data_trn[t].obj +            else +            end +          end +          notes_s,notes_t='','' +          data_src[s],notes_s=markup(data_src[s]) +          if data_trn +            data_trn[t],notes_t=markup(data_trn[t]) +            #data_src[s],data_trn[t]=pot_data(data_src[s],notes_s,data_trn[t],notes_t) +            pot_data(data_src[s],notes_s,data_trn[t],notes_t) +          else +            #data_src[s],nul=pot_data(data_src[s],notes_s) +            pot_data(data_src[s],notes_s) +          end +          s+=1;t+=1 +        end +        @pot #watch +      end +      def pot_data(dob_src='',notes_s='',dob_trn='',notes_t='') +        if dob_src.obj !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ +          if defined? dob_src.ocn \ +          and dob_src.ocn.to_s =~/\d+/ +            paranum=dob_src.ocn.to_s +            @p_num=SiSU_Po4aUtils::ParagraphNumber.new(paranum) +          end +          case dob_src.is +          when :heading; pot_structure_heading(dob_src,notes_s,dob_trn,notes_t) +          when :para;    pot_structure_para(dob_src,notes_s,dob_trn,notes_t) +          when :group;   pot_structure_group(dob_src,notes_s,dob_trn,notes_t) +          when :block;   pot_structure_block(dob_src,notes_s,dob_trn,notes_t) +          when :verse;   pot_structure_verse(dob_src,notes_s,dob_trn,notes_t) +          when :code;    pot_structure_code(dob_src,notes_s,dob_trn,notes_t) +          when :table;   pot_structure_table(dob_src,notes_s,dob_trn,notes_t) +          end +          if defined? dob_src.idx \ +          and not dob_src.idx.nil? \ +          and not dob_src.idx.empty? +            pot_structure_idx(dob_src,dob_trn) +          end +          dob_src='' if (dob_src.obj =~/<a name="n\d+">/ \ +          and dob_src.obj =~/^(-\{{2}~\d+|<!e[:_]\d+!>)/) # -endnote +          if dob_src ## Clean Prepared Text +            dob_src.obj=dob_src.obj.gsub(/<!.+!>/,' '). +              gsub(/<:\S+>/,' ') if dob_src ## Clean Prepared Text +          end +        end +        #[dob_src,dob_trn] +      end +      def markup(dob)                                     # Used for major markup instructions +        dir=SiSU_Env::InfoEnv.new(@md.fns) +        fix=[] +          dob.obj=dob.obj.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/, +              "#{Px[:po_bold_o]}\\1#{Px[:po_bold_c]}"). +            gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/, +              "#{Px[:po_italics_o]}\\1#{Px[:po_italics_c]}"). +            gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/, +              "#{Px[:po_underscore_o]}\\1#{Px[:po_underscore_c]}"). +            gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/, +              "#{Px[:po_subscript_o]}\\1#{Px[:po_subscript_c]}"). +            gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/, +              "#{Px[:po_superscript_o]}\\1#{Px[:po_superscript_c]}"). +            gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/, +              "#{Px[:po_insert_o]}\\1#{Px[:po_insert_c]}"). +            gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/, +              "#{Px[:po_cite_o]}\\1#{Px[:po_cite_c]}"). +            gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/, +              "#{Px[:po_strike_o]}\\1#{Px[:po_strike_c]}"). +            gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/, +              "#{Px[:po_monospace_o]}\\1#{Px[:po_monospace_c]}") +          notes='' +          unless dob.is==:code +            dob.obj=dob.obj.gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}/,'\1'). +              gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1'). +              gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1 [link: <\2>]'). +              gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}image/,'\1 [link: local image]'). +              gsub(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1') +            #dob.obj.gsub!(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,"#{@brace_url.txt_open}\\1#{@brace_url.txt_close}") +            notes=extract_endnotes(dob) +            #% ### footnotes current state - extracted +            dob.obj=dob.obj.gsub(/#{Mx[:en_a_o]}([\d]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'~^').   # endnote marker marked up +            #% ### footnotes current state - keep inline +            #dob.obj.gsub!(/#{Mx[:en_a_o]}[\d]+\s+(.+?)#{Mx[:en_a_c]}/,'~{ \1 }~')     # inline endnote with marker marked up +              gsub(/#{Mx[:en_b_o]}[\d]+\s+(.+?)#{Mx[:en_b_c]}/,'~[ \1 ]~').     # inline endnote with marker marked up +              gsub(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)#{Mx[:en_a_c]}/,'~{\1 \2 }~'). # inline endnote with marker marked up +              gsub(/#{Mx[:en_b_o]}([*+]+)\s+(.+?)#{Mx[:en_b_c]}/,'~[\1 \2 ]~'). # inline endnote with marker marked up +              gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<'). +              gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>'). +              gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&'). +              gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!'). +              gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#'). +              gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*'). +              gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-'). +              gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/'). +              gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_'). +              gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{'). +              gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}'). +              gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~'). +              gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©') +          end +          dob.obj=if dob.of==:block                                   # watch +            dob.obj.gsub(/#{Mx[:gl_o]}●#{Mx[:gl_c]}/,"* "). +              gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n") +          else dob.obj.gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n\n") +          end +          if dob.is==:code +            dob.obj=dob.obj.gsub(/(^|[^}])_([<>])/m,'\1\2'). # _> _< +              gsub(/(^|[^}])_([<>])/m,'\1\2') # _<_< +          end +          dob.obj=dob.obj.gsub(/#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}/,'').                     # remove page breaks +            gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1'). +            gsub(/<a href=".+?">(.+?)<\/a>/m,'\1'). +            gsub(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'').                       # remove name links +            gsub(/ |#{Mx[:nbsp]}/,' ').                                       # decide on +            gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'    [ \1 ]'). #"[ #{dir.url.images_local}\/\\1 ]") +            gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}image/,'    [ \1 ]'). #"[ #{dir.url.images_local}\/\\1 ]") +            gsub(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') +        [dob,notes] +      end +      def publish(fn,pot) +        divider='=' +        content=[] +        content << pot[:open] +        content << pot[:head] +        content << pot[:metadata] +        content << pot[:body] +        content << @@endnotes[:end] if @@endnotes_ +        Output.new(fn,content,@md).po4a +        @@endnotes={ para: [], end: [] } +      end +    end +    class Output <Source +      include SiSU_Param +      include SiSU_Env +      def initialize(fn,content,md) +        @fn,@content,@md=fn,content,md +        @file=SiSU_Env::FileOp.new(md,fn) +      end +      def po4a                                                                #%pot output +        file_pot=(@md.opt.f_pth[:lng] ==@md.opt.lng_base) \ +        ? @file.write_file.pot +        : @file.write_file.po +        @sisu=[] +        emptyline=0 +        @content.each do |para|                                                # this is a hack +          if para.is_a?(Array) \ +          and para.length > 0 +            para.each do |line| +              if line +                line=line.gsub(/\s+$/m,''). +                  gsub(/^\A[ ]*\Z/m,'') +                if line=~/^\A[ ]*\Z/m +                  emptyline+=1 +                else emptyline=0 +                end +                file_pot.puts line if emptyline < 2                     #remove extra line spaces (fix upstream) +               end +            end +          else file_pot.puts para          #unix plaintext # /^([*=-]|\.){5}/ +          end +        end +        file_pot.close +        SiSU_Po4aUtils::PotNumber.new.reset +        po4a_git +      end +      def po4a_git +        unless @md.opt.cmd =~/M/ +          require_relative 'git'                           # git.rb +          git=SiSU_Git::Source.new(@md.opt) +          git.create_file_structure_git unless FileTest.directory?(@file.output_path.pot_git.dir) +          if @md.opt.f_pth[:lng] ==@md.opt.lng_base +            FileUtils::cp(@file.place_file.pot.dir, @file.output_path.pot_git.dir) +          else # naive, work on --> +            FileUtils::cp(@file.place_file.po.dir, @file.output_path.po_git.dir) #unless FileTest.file?(@file.place_file.po_git.dir) +          end +          git.read +        end +      end +    end +  end +end +__END__ +!\|#\|&*\|-\|/\|_\|{\|}\|~\|&# + +tables are problematic, difficult to reconstitute instruction, check + +metadata, move to top? and work on + +footnotes, different types, asterisk, also do you want to have separate +paragraphs, or breaks within one block? + +where no ocn appropriately use ~# or -# or indeed 1~name- + +comments in document, what to do about them, not sure they are currently +retained in dal, could be quite valuable to keep | 
