# encoding: utf-8 =begin * Name: SiSU * Description: a framework for document structuring, publishing and search * Author: Ralph Amissah * Copyright: (C) 1997 - 2011, Ralph Amissah, All Rights Reserved. * License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. If you have Internet connection, the latest version of the GPL should be available at these locations: <http://www.fsf.org/licensing/licenses/gpl.html> <http://www.gnu.org/licenses/gpl.html> <http://www.jus.uio.no/sisu/gpl.fsf/toc.html> <http://www.jus.uio.no/sisu/gpl.fsf/doc.html> <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt> * SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system * Hompages: <http://www.jus.uio.no/sisu> <http://www.sisudoc.org> * Download: <http://www.jus.uio.no/sisu/SiSU/download.html> * Ralph Amissah <ralph@amissah.com> <ralph.amissah@gmail.com> ** Description: pot file generation linefeed) =end module SiSU_po4a require_relative 'dal' # dal.rb require_relative 'sysenv' # sysenv.rb include SiSU_Env require_relative 'shared_metadata' # shared_metadata.rb require_relative 'po4a_set' # po4a_set.rb include SiSU_Param include SiSU_Viz pwd=Dir.pwd class Source @@opt_src,@@opt_trn,@@opt_src_,@@opt_trn_,@@md_src,@@md_trn=nil,nil,nil,nil,nil,nil def initialize(opt,fn=nil) @opt,@fn=opt,fn #unless @opt.fns =~/(.+?\.(?:-|ssm\.)?sst)$/ # puts "#{@opt.fns} not a processed file type" #end r=Px[:lng_lst].join('|') r.gsub!(/\|en\|/,'|') @lang_regx=%r{(?:#{r})} if opt.fns =~/\S+?~#{@lang_regx}\.ss[mti]/ \ and opt.f_pth[:lng]!='en' @@opt_src_=false @@opt_trn=opt @@md_trn=SiSU_Param::Parameters.new(opt).get else @@opt_src_=true @@opt_src=opt @@md_src=SiSU_Param::Parameters.new(opt).get end end def read begin src={} src[:pth]=@opt.f_pth[:pth] src[:files]=if @opt.fns =~ /\.(?:(?:-|ssm\.)sst|ssm)$/ @opt.fns=@opt.fns.gsub(/\.ssm\.sst$/,'.ssm') SiSU_Assemble::Composite_file_list.new(@opt).read else [@opt.fns] end md=SiSU_Param::Parameters.new(@opt).get src[:files].each do |fn| SiSU_DAL::Source.new(@opt,fn).read # -m env=SiSU_Env::Info_env.new(@opt.fns) m=/((.+?)(?:\~\w\w(?:_\w\w)?)?)\.((?:-|ssm\.)?sst|ssm|ssi)$/ #watch added match for sss @fnn,@fnb,@fnt=fn[m,1],fn[m,2],fn[m,3] unless @opt.cmd =~/q/ path=env.path.output_tell tool=(@opt.cmd =~/[MVv]/) \ ? "#{env.program.text_editor} #{path}/#{md.fnb}/#{md.fn[:plain]}" \ : @opt.fns @opt.cmd=~/[MVvz]/ \ ? SiSU_Screen::Ansi.new(@opt.cmd,'Pot po4a',tool).green_hi_blue \ : SiSU_Screen::Ansi.new(@opt.cmd,'Pot po4a',tool).green_title_hi SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{path}/#{md.fnb}/#{md.fn[:plain]}").flow if @opt.cmd =~/[MV]/ end if @opt.fns =~/\S+?~#{@lang_regx}\.ss[mti]/ \ or @opt.f_pth[:lng] !='en' opt_lang_trn_fn=fn @dal_array_lang_translation=SiSU_DAL::Source.new(@opt,opt_lang_trn_fn).get # dal file drawn here opt_lang_src_fn=if fn =~/\S+?~\S{2}(?:_\S{2})?\.ss[mti]/ fn.gsub(/(\S+?)~\S{2}(?:_\S{2})?(\.ss[mti])/,'\1\2') #check i else fn end transdir,srcdir=Dir.pwd,Dir.pwd if Dir.pwd.to_s =~/\/#{@lang_regx}$/ transdir=Dir.pwd srcdir=transdir.gsub(/\/#{@lang_regx}$/,'/en') if FileTest.directory?(srcdir) Dir.chdir(srcdir) end else nil end x=if FileTest.file?("#{srcdir}/#{opt_lang_src_fn}") @dal_array_lang_src=SiSU_DAL::Source.new(@@opt_src,opt_lang_src_fn).get # dal file drawn here else puts "no identified source document" exit end Dir.chdir(transdir) if transdir else @dal_array_lang_src=SiSU_DAL::Source.new(@opt,fn).get # dal file drawn here @dal_array_lang_translation=nil end wrap_width=if defined? md.make.plaintext_wrap \ and md.make.plaintext_wrap md.make.plaintext_wrap elsif defined? env.plaintext_wrap \ and env.plaintext_wrap env.plaintext_wrap else 78 end SiSU_po4a::Source::Scroll.new(fn,@dal_array_lang_src,@dal_array_lang_translation,@@md_src,@@md_trn,wrap_width).songsheet SiSU_Env::Info_skin.new(md).select #watch end rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error ensure end end private class Scroll <Source require_relative 'defaults' # defaults.rb require_relative 'po4a_set' # po4a_set.rb include SiSU_po4a_utils @@endnotes={ para: [], end: [] } def initialize(fn,data_src,data_trn,md_src,md_trn,wrap_width) @fn,@data_src,@data_trn,@md_src,@md_trn,@wrap_width=fn,data_src,data_trn,md_src,md_trn,wrap_width @md=(md_trn.nil?) \ ? md_src \ : md_trn @brace_url=SiSU_Viz::Skin.new.url_decoration @vz=SiSU_Env::Get_init.instance.skin @tab="\t" @@endnotes_=(@md.opt.mod.inspect =~/--endnote/) ? true : false # --footnote @br=(@md.opt.mod.inspect =~/--dos/) ? "\r\n" : "\n" # --unix @pot={ body: [], open: [], close: [], head: [], metadata: [], tail: [] } end def songsheet ############## BUG @fn changes value fn=@fn pot=pot_markup(@data_src,@data_trn) publish(fn,pot) end def extract_endnotes(dob='') #% Used for extraction of endnotes from paragraphs notes_a=dob.obj.scan(/#{Mx[:en_a_o]}([\d]+\s+.+?)#{Mx[:en_a_c]}/) ##notes_a=dob.obj.scan(/#{Mx[:en_a_o]}([\d*+]+\s+.+?)#{Mx[:en_a_c]}/) #notes_b=dob.obj.scan(/#{Mx[:en_b_o]}([\d*+]+\s+.+?)#{Mx[:en_b_c]}/) @n=[] notes_a.flatten.each do |n| #high cost to deal with <br> appropriately within plaintext, consider n=n.dup.to_s n.gsub!(/^([\d]+)\s+/,'^~\1 ') #n.gsub!(/^([\d*+]+)\s+/,'^~\1 ') n.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'<br>') @n << n end notes_a=@n.flatten end def wrap_endnotes(orig_notes='',trn_notes='') nt=@@endnotes_ ? 'endnote' : 'footnote' @fn=0 a_l=orig_notes.length 0.upto(a_l-1) do |i| @fn=if orig_notes[i].to_s =~/^\^~([\d*+]+)/ # provides endnote number within paragraph @fn += 1 else @fn end d="#{nt} #{@fn}" mark="^~ " instruct=s_mark='' if @md.opt.cmd=~/M/ instruct=%{\n# footnotes, the preferred sisu markup for a footnote is~{this is a footnote}~ however, for translation a footnote reference marker in the text~^ with a set of notes following the paragraph starting on a newline with "^~ this is a footnote", is easier to deal with, if possible these should be converted back to~{inline notes}~} s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"} end desc="#{d}#{s_mark}#{instruct}" orig=(orig_notes[i].to_s =~/^\^~[\d*+]+/) ? (orig_notes[i].to_s.gsub(/^\^~[\d*+]+/,'^~')) : orig_notes[i].to_s trans=if trn_notes.class==Array \ and trn_notes.length==orig_notes.length (trn_notes[i].to_s =~/^\^~[\d*+]+/) ? (trn_notes[i].to_s.gsub(/^\^~[\d*+]+/,'^~')) : trn_notes[i].to_s else '' end util=pot_structure(desc,orig,trans) wrap=util.line_wrap if wrap =~ /^\s*\^~[\d*+]+\s+.+?\s*\Z/m wrap.gsub!(/^\s*(\^~[\d*+]+)\s+(.+?)\s*\Z/m, <<GSUB \\1 \\2 GSUB ) else wrap.gsub!(/^(.+)\Z/m, <<GSUB \\1 GSUB ) end @@endnotes[:para] << wrap @@endnotes[:end] << '' << wrap end @@endnotes[:para].each {|e| @pot[:body] << e << @br} @@endnotes[:para]=[] @@endnotes end def pot_metadata_src @po4a_identify_type='type: SiSU doc' #'type: Plain text' meta_src=Metadata::Summary.new(@md_src) w=[] w << [ "#. #{@po4a_identify_type} - metadata: title", "#: en/#{@md.fns}:#{SiSU_po4a_utils::Pot_number.new.num}", 'msgid ""', meta_src.metadata_tags.title.main, meta_src.metadata_tags.title.sub, meta_src.metadata_tags.title.edition, meta_src.metadata_tags.title.note, meta_src.metadata_tags.title.short, meta_src.metadata_tags.title.language, meta_src.metadata_tags.title.language_char, 'msgstr ""', ] w << [ "#. #{@po4a_identify_type} - metadata: creator", "#: en/#{@md.fns}:#{SiSU_po4a_utils::Pot_number.new.num}", 'msgid ""', meta_src.metadata_tags.creator.head, meta_src.metadata_tags.creator.author, meta_src.metadata_tags.creator.contributor, meta_src.metadata_tags.creator.illustrator, meta_src.metadata_tags.creator.photographer, meta_src.metadata_tags.creator.translator, meta_src.metadata_tags.creator.audio, meta_src.metadata_tags.creator.digitized_by, meta_src.metadata_tags.creator.prepared_by, 'msgstr ""', ] w << [ "#. #{@po4a_identify_type} - metadata: rights", "#: en/#{@md.fns}:#{SiSU_po4a_utils::Pot_number.new.num}", 'msgid ""', meta_src.metadata_tags.rights.head, meta_src.metadata_tags.rights.copyright.text, meta_src.metadata_tags.rights.copyright.translation, meta_src.metadata_tags.rights.copyright.illustrations, meta_src.metadata_tags.rights.copyright.photographs, meta_src.metadata_tags.rights.copyright.digitization, meta_src.metadata_tags.rights.copyright.audio, meta_src.metadata_tags.rights.license, 'msgstr ""', ] w << [ "#. #{@po4a_identify_type} - metadata: classify", "#: en/#{@md.fns}:#{SiSU_po4a_utils::Pot_number.new.num}", 'msgid ""', meta_src.metadata_tags.classify.head, meta_src.metadata_tags.classify.relation, meta_src.metadata_tags.classify.subject, meta_src.metadata_tags.classify.topic_register, meta_src.metadata_tags.classify.type, meta_src.metadata_tags.classify.identifier, meta_src.metadata_tags.classify.loc, meta_src.metadata_tags.classify.dewey, meta_src.metadata_tags.classify.oclc, meta_src.metadata_tags.classify.isbn, 'msgstr ""', ] w << [ "#. #{@po4a_identify_type} - metadata: date", "#: en/#{@md.fns}:#{SiSU_po4a_utils::Pot_number.new.num}", 'msgid ""', meta_src.metadata_tags.date.head, meta_src.metadata_tags.date.added_to_site, meta_src.metadata_tags.date.available, meta_src.metadata_tags.date.created, meta_src.metadata_tags.date.issued, meta_src.metadata_tags.date.modified, meta_src.metadata_tags.date.published, meta_src.metadata_tags.date.valid, 'msgstr ""', ] w << [ "#. #{@po4a_identify_type} - processing, make instruction", "#: en/#{@md.fns}:#{SiSU_po4a_utils::Pot_number.new.num}", 'msgid ""', meta_src.processing_tags.make.language, meta_src.processing_tags.make.headings, meta_src.processing_tags.make.num_top, meta_src.processing_tags.make.breaks, meta_src.processing_tags.make.emphasis, meta_src.processing_tags.make.bold, meta_src.processing_tags.make.italics, meta_src.processing_tags.make.texpdf_font, meta_src.processing_tags.make.skin, 'msgstr ""', ] w.each do |y| z='' y.each do |x| if x z += x + "\n" if x =~/^#|^msg(?:id|str)/ z += %{"#{x}"\n} if x =~/^@\S+?:(?: |$)/ z += %{"#{x}"\n} if x =~/^\s+:\S+?: / end end @pot[:metadata] << z << @br #puts z unless z.empty? end end def pot_metadata_src_trn @po4a_identify_type='type: SiSU doc' #@po4a_identify_type='type: Plain text' meta_src=Metadata::Summary.new(@md_src) meta_trn=Metadata::Summary.new(@md_trn) w=[] w << [ "#. #{@po4a_identify_type} - metadata: title", "#: en/#{@md.fns}:#{SiSU_po4a_utils::Pot_number.new.num}", 'msgid ""', meta_src.metadata_tags.title.main, meta_src.metadata_tags.title.sub, meta_src.metadata_tags.title.edition, meta_src.metadata_tags.title.note, meta_src.metadata_tags.title.short, meta_src.metadata_tags.title.language, meta_src.metadata_tags.title.language_char, 'msgstr ""', meta_trn.metadata_tags.title.main, meta_trn.metadata_tags.title.sub, meta_trn.metadata_tags.title.edition, meta_trn.metadata_tags.title.note, meta_trn.metadata_tags.title.short, meta_trn.metadata_tags.title.language, meta_trn.metadata_tags.title.language_char, ] w << [ "#. #{@po4a_identify_type} - metadata: creator", "#: en/#{@md.fns}:#{SiSU_po4a_utils::Pot_number.new.num}", 'msgid ""', meta_src.metadata_tags.creator.head, meta_src.metadata_tags.creator.author, meta_src.metadata_tags.creator.contributor, meta_src.metadata_tags.creator.illustrator, meta_src.metadata_tags.creator.photographer, meta_src.metadata_tags.creator.translator, meta_src.metadata_tags.creator.audio, meta_src.metadata_tags.creator.digitized_by, meta_src.metadata_tags.creator.prepared_by, 'msgstr ""', meta_trn.metadata_tags.creator.head, meta_trn.metadata_tags.creator.author, meta_trn.metadata_tags.creator.contributor, meta_trn.metadata_tags.creator.illustrator, meta_trn.metadata_tags.creator.photographer, meta_trn.metadata_tags.creator.translator, meta_trn.metadata_tags.creator.audio, meta_trn.metadata_tags.creator.digitized_by, meta_trn.metadata_tags.creator.prepared_by, ] w << [ "#. #{@po4a_identify_type} - metadata: rights", "#: en/#{@md.fns}:#{SiSU_po4a_utils::Pot_number.new.num}", 'msgid ""', meta_src.metadata_tags.rights.head, meta_src.metadata_tags.rights.copyright.text, meta_src.metadata_tags.rights.copyright.translation, meta_src.metadata_tags.rights.copyright.illustrations, meta_src.metadata_tags.rights.copyright.photographs, meta_src.metadata_tags.rights.copyright.digitization, meta_src.metadata_tags.rights.copyright.audio, meta_src.metadata_tags.rights.license, 'msgstr ""', meta_trn.metadata_tags.rights.head, meta_trn.metadata_tags.rights.copyright.text, meta_trn.metadata_tags.rights.copyright.translation, meta_trn.metadata_tags.rights.copyright.illustrations, meta_trn.metadata_tags.rights.copyright.photographs, meta_trn.metadata_tags.rights.copyright.digitization, meta_trn.metadata_tags.rights.copyright.audio, meta_trn.metadata_tags.rights.license, ] w << [ "#. #{@po4a_identify_type} - metadata: classify", "#: en/#{@md.fns}:#{SiSU_po4a_utils::Pot_number.new.num}", 'msgid ""', meta_src.metadata_tags.classify.head, meta_src.metadata_tags.classify.relation, meta_src.metadata_tags.classify.subject, meta_src.metadata_tags.classify.topic_register, meta_src.metadata_tags.classify.type, meta_src.metadata_tags.classify.identifier, meta_src.metadata_tags.classify.loc, meta_src.metadata_tags.classify.dewey, meta_src.metadata_tags.classify.oclc, meta_src.metadata_tags.classify.isbn, 'msgstr ""', meta_trn.metadata_tags.classify.head, meta_trn.metadata_tags.classify.relation, meta_trn.metadata_tags.classify.subject, meta_trn.metadata_tags.classify.topic_register, meta_trn.metadata_tags.classify.type, meta_trn.metadata_tags.classify.identifier, meta_trn.metadata_tags.classify.loc, meta_trn.metadata_tags.classify.dewey, meta_trn.metadata_tags.classify.oclc, meta_trn.metadata_tags.classify.isbn, ] w << [ "#. #{@po4a_identify_type} - metadata: date", "#: en/#{@md.fns}:#{SiSU_po4a_utils::Pot_number.new.num}", 'msgid ""', meta_src.metadata_tags.date.head, meta_src.metadata_tags.date.added_to_site, meta_src.metadata_tags.date.available, meta_src.metadata_tags.date.created, meta_src.metadata_tags.date.issued, meta_src.metadata_tags.date.modified, meta_src.metadata_tags.date.published, meta_src.metadata_tags.date.valid, 'msgstr ""', meta_trn.metadata_tags.date.head, meta_trn.metadata_tags.date.added_to_site, meta_trn.metadata_tags.date.available, meta_trn.metadata_tags.date.created, meta_trn.metadata_tags.date.issued, meta_trn.metadata_tags.date.modified, meta_trn.metadata_tags.date.published, meta_trn.metadata_tags.date.valid, ] w << [ "#. #{@po4a_identify_type} - processing, make instruction", "#: en/#{@md.fns}:#{SiSU_po4a_utils::Pot_number.new.num}", 'msgid ""', meta_src.processing_tags.make.language, meta_src.processing_tags.make.headings, meta_src.processing_tags.make.num_top, meta_src.processing_tags.make.breaks, meta_src.processing_tags.make.emphasis, meta_src.processing_tags.make.bold, meta_src.processing_tags.make.italics, meta_src.processing_tags.make.texpdf_font, meta_src.processing_tags.make.skin, 'msgstr ""', meta_trn.processing_tags.make.language, meta_trn.processing_tags.make.headings, meta_trn.processing_tags.make.num_top, meta_trn.processing_tags.make.breaks, meta_trn.processing_tags.make.emphasis, meta_trn.processing_tags.make.bold, meta_trn.processing_tags.make.italics, meta_trn.processing_tags.make.texpdf_font, meta_trn.processing_tags.make.skin, ] w.each do |y| z='' y.each do |x| if x z += x + "\n" if x =~/^#|^msg(?:id|str)/ z += %{"#{x}"\n} if x =~/^@\S+?:(?: |$)/ z += %{"#{x}"\n} if x =~/^\s+:\S+?: / end end @pot[:metadata] << z << @br #puts z unless z.empty? end end def pot_structure(desc,orig,trans,indent=0,hang=0) SiSU_po4a_utils::Wrap.new(@md,orig,trans,desc,@wrap_width,indent,hang) end def pot_structure_heading(dob_src='',notes_s='',dob_trn='',notes_t='') #% Used to extract the structure of a document lv=n=n3=nil lv=dob_src.ln n=lv - 1 n3=lv + 2 util=nil fn=(dob_src.name=~/[a-z\d]/i) ? dob_src.name : '' mark="#{dob_src.lv}~#{fn} " d="#{dob_src.is} (level #{dob_src.lv})" instruct=s_mark='' if @md.opt.cmd=~/M/ instruct=%{\n# markup for headings is marker at the start of the line/object, indicating the heading level, and if provided an associated name tag, this heading is "#{mark}"} s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"} end desc="#{d}#{s_mark}#{instruct}" orig="#{s_mark}#{dob_src.obj}" trans=(dob_trn=='') ? '' : "#{s_mark}#{dob_trn.obj}" util=pot_structure(desc,orig,trans) wrapped=util.line_wrap @pot[:body] << wrapped << @br # main text, contents, body KEEP if @@endnotes[:para] \ and notes_s.length > 0 \ and not @@endnotes_ @pot[:body] << @br wrap_endnotes(notes_s,notes_t) elsif @@endnotes[:para] \ and @@endnotes_ @pot[:body] << @br*2 end end def pot_structure_para(dob_src='',notes_s='',dob_trn='',notes_t='') #% Used to extract the structure of a document util=nil wrapped=if dob_src.indent =~/[1-9]/ \ and dob_src.indent == dob_src.hang s_mark=desc=orig=trans='' if dob_src.bullet_ mark="_#{dob_src.indent}* " d="#{dob_src.is}: indent #{dob_src.indent}, bullet" instruct=s_mark='' if @md.opt.cmd=~/M/ instruct=%{\n# markup for indented bullet text is at the start of the line/object, an underscore followed by the indent level and an asterisk "#{mark}"} s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"} end desc="#{d}#{s_mark}#{instruct}" else mark="_#{dob_src.indent} " d="#{dob_src.is}: indent #{dob_src.indent}" instruct=s_mark='' if @md.opt.cmd=~/M/ instruct=%{\n# markup for indented text is at the start of the line/object, an underscore followed by the indent level "#{mark}"} s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"} end desc="#{d}#{s_mark}#{instruct}" end orig="#{s_mark}#{dob_src.obj}" trans=(dob_trn=='') ? '' : "#{s_mark}#{dob_trn.obj}" util=pot_structure(desc,orig,trans) elsif dob_src.hang =~/[0-9]/ \ and dob_src.indent != dob_src.hang s_mark=desc=orig=trans='' mark="_#{dob_src.hang}_#{dob_src.indent} " d="#{dob_src.is}: hang #{dob_src.hang} indent #{dob_src.indent}" instruct=s_mark='' if @md.opt.cmd=~/M/ instruct=%{\n# markup for indented text with a first line indented to a different level from the rest of the paragraph, is at the start of the line/object, an underscore and the first indent level a second underscore and the indent level for the rest of the paragraph, "#{mark1}"} s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"} end desc="#{d}#{s_mark}#{instruct}" orig="#{s_mark}#{dob_src.obj}" trans=(dob_trn=='') ? '' : "#{s_mark}#{dob_trn.obj}" util=pot_structure(desc,orig,trans) else s_mark=desc=orig=trans='' if dob_src.bullet_ mark='_* ' d="#{dob_src.is}: bullet" instruct=s_mark='' if @md.opt.cmd=~/M/ instruct=%{\n# markup for indented text is at the start of the line/object, an underscore followed by an asterisk "#{mark}"} s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"} end desc="#{d}#{s_mark}#{instruct}" orig="#{s_mark}#{dob_src.obj}" trans=(dob_trn=='') ? '' : "#{s_mark}#{dob_trn.obj}" else mark='' d=dob_src.is instruct=%{\n# regular paragraph, no special markup} if @md.opt.cmd=~/M/ instruct="\n# " s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"} end desc="#{d}#{s_mark}#{instruct}" orig=dob_src.obj trans=(dob_trn=='') ? '' : dob_trn.obj end util=pot_structure(desc,orig,trans) end wrapped=util.line_wrap @pot[:body] << wrapped << @br # main text, contents, body KEEP if @@endnotes[:para] \ and notes_s.length > 0 \ and not @@endnotes_ @pot[:body] << @br wrap_endnotes(notes_s,notes_t) elsif @@endnotes[:para] \ and @@endnotes_ @pot[:body] << @br*2 end end def pot_structure_block(dob_src='',notes_s='',dob_trn='',notes_t='') #% Used to extract the structure of a document mark="block{\\n\\n...\\n\\n}block" d=dob_src.is instruct=s_mark='' if @md.opt.cmd=~/M/ instruct="\n# block text is a text block with an opening and closing marker, the content of which may be wrapped" s_mark="\n# " + %{"\\n\\n#{mark}\\n\\n"} end desc="#{d}#{s_mark}#{instruct}" orig=dob_src.obj trans=(dob_trn=='') ? '' : dob_trn.obj util=pot_structure(desc,orig,trans) unwrapped=util.no_line_wrap_block @pot[:body] << unwrapped << @br end def pot_structure_group(dob_src='',notes_s='',dob_trn='',notes_t='') #% Used to extract the structure of a document mark="group{\\n\\n...\\n\\n}group" d=dob_src.is instruct=s_mark='' if @md.opt.cmd=~/M/ instruct="\n# group text is a text block with an opening and closing marker, the content of which may be wrapped" s_mark="\n# " + %{"\\n\\n#{mark}\\n\\n"} end desc="#{d}#{s_mark}#{instruct}" orig=dob_src.obj trans=(dob_trn=='') ? '' : dob_trn.obj util=pot_structure(desc,orig,trans) unwrapped=util.no_line_wrap_block @pot[:body] << unwrapped << @br end def pot_structure_verse(dob_src='',notes_s='',dob_trn='',notes_t='') #% Used to extract the structure of a document mark="poem{\n\nverse\n\nverse\n\n...\n\n}poem" d=dob_src.is instruct=s_mark='' if @md.opt.cmd=~/M/ instruct="\n# verse are part of the text block described as a poem, the first verse is preceeded by an opening marker, and the last verse by a closing marker, the content of which should remain unwrapped" s_mark="\n# " + %{"\\n\\n#{mark}\\n\\n"} end desc="#{d}#{s_mark}#{instruct}" orig=dob_src.obj trans=(dob_trn=='') ? '' : dob_trn.obj util=pot_structure(desc,orig,trans) unwrapped=util.no_line_wrap_block @pot[:body] << unwrapped << @br end def pot_structure_code(dob_src='',notes_s='',dob_trn='',notes_t='') #% Used to extract the structure of a document mark="code{\\n\\n...\\n\\n}code" d=dob_src.is instruct=s_mark='' if @md.opt.cmd=~/M/ instruct="\n# codeblocks are a text block with an opening and closing marker, the content of which should remain unwrapped" s_mark="\n# " + %{"\\n\\n#{mark}\\n\\n"} end desc="#{d}#{s_mark}#{instruct}" orig=dob_src.obj trans=(dob_trn=='') ? '' : dob_trn.obj util=pot_structure(desc,orig,trans) unwrapped=util.no_line_wrap_block @pot[:body] << unwrapped << @br end def pot_structure_table(dob_src='',notes_s='',dob_trn='',notes_t='') #% Used to extract the structure of a document mark="table{\\n\\n...\\n\\n}table" d=dob_src.is instruct=s_mark='' if @md.opt.cmd=~/M/ instruct="\n# tables are a text block with an opening and closing marker, the content of which should remain unwrapped" s_mark="\n# " + %{"\\n\\n#{mark}\\n\\n"} end desc="#{d}#{s_mark}#{instruct}" orig=dob_src.obj orig.gsub!(/#{Mx[:tc_c]}/,"\n") trans=(dob_trn=='') ? '' : dob_trn.obj trans.gsub!(/#{Mx[:tc_c]}/,"\n") util=pot_structure(desc,orig,trans) unwrapped=util.no_line_wrap_block @pot[:body] << unwrapped << @br end def pot_structure_idx(dob_src='',dob_trn='') #% Used to extract the structure of a document mark="={ ... }" instruct=s_mark='' if @md.opt.cmd=~/M/ instruct="\n# the book index should be attached unwrapped to the preceding text block (there should be a new line, but no empty line)" s_mark="\n# " + %{"\\n#{mark}\\n\\n"} end d='book-idx' desc="#{d}#{s_mark}#{instruct}" orig='={' + dob_src.idx + '}' trans=if defined? dob_trn.idx \ and not dob_trn.idx.nil? \ and not dob_trn.idx.empty? '={' + dob_trn.idx + '}' else '' end util=pot_structure(desc,orig,trans) unwrapped=util.no_line_wrap_block @pot[:body] << unwrapped << @br end def pot_markup(data_src,data_trn) #@endnotes,@copen,@pot_contents_close=Array.new(3){[]} a_l=if data_trn a_l=(data_src.length >= data_trn.length) \ ? data_src.length \ : data_trn.length else data_src.length end s,t=0,0 if @md.fns =~ /\.(?:(?:-|ssm\.)?sst|ssm)$/ (data_trn.nil?) \ ? pot_metadata_src \ : pot_metadata_src_trn end 0.upto(a_l-1) do |i| if data_trn unless data_src[s] \ and data_trn[t] break end if data_src[s].of == 'comment' \ and data_trn[t].of == 'comment' \ and (data_src[s].is == data_trn[t].is) s+=1;t+=1 next end if (data_src[s].is == 'comment' or data_trn[t].is == 'comment') \ and (data_src[s].is != data_trn[t].is) if data_src[s].is == 'comment' puts "src (comment):\n\t" + data_src[s].obj if @md.opt.cmd =~/M/ s+=1 #next if data_src[s].is == 'comment' elsif data_trn[t].is == 'comment' puts "trans (comment):\n\t" + data_trn[t].obj if @md.opt.cmd =~/M/ t+=1 #next if data_trn[t].is == 'comment' end end if (defined? data_src[s].ocn and data_src[s].ocn.class == Fixnum) \ and (defined? data_trn[t].ocn and data_trn[t].ocn.class == Fixnum) \ and (data_src[s].ocn == data_trn[t].ocn) @m_s,@m_t=s,t elsif (defined? data_src[s].ocn and data_src[s].ocn.class == Fixnum) \ and (defined? data_trn[t].ocn and data_trn[t].ocn.class == Fixnum) \ and (data_src[s].ocn != data_trn[t].ocn) p '--- OCN ---' p 'mis-match' p data_src[s].ocn p data_src[s].obj p data_trn[t].ocn p data_trn[t].obj p '---' p 'previous match' p data_src[@m_s].ocn p data_src[@m_s].obj p data_trn[@m_t].ocn p data_trn[@m_t].obj exit elsif (defined? data_src[s].ocn and defined? data_trn[t].ocn \ and data_src[s].ocn.class != data_trn[t].ocn.class) p '--- OCN class ---' p 'mis-match' p data_src[s].ocn if defined? data_src[s].ocn p data_src[s].obj p data_trn[t].ocn if defined? data_trn[t].ocn p data_trn[t].obj #p '---' #p 'previous match' #p data_src[@m_s].ocn #p data_src[@m_s].obj #p data_trn[@m_t].ocn #p data_trn[@m_t].obj #elsif (defined? data_src[s].ocn != defined? data_trn[t].ocn) \ #and (data_src[s].ocn.nil? != data_trn[t].ocn.nil?) # p '--- missing OCN? ---' # p 'mis-match' # p data_src[s].ocn if defined? data_src[s].ocn # p data_src[s].obj # p data_trn[t].ocn if defined? data_trn[t].ocn # p data_trn[t].obj else end end notes_s,notes_t='','' data_src[s],notes_s=markup(data_src[s]) if data_trn data_trn[t],notes_t=markup(data_trn[t]) #data_src[s],data_trn[t]=pot_data(data_src[s],notes_s,data_trn[t],notes_t) pot_data(data_src[s],notes_s,data_trn[t],notes_t) else #data_src[s],nul=pot_data(data_src[s],notes_s) pot_data(data_src[s],notes_s) end s+=1;t+=1 end @pot #watch end def pot_data(dob_src='',notes_s='',dob_trn='',notes_t='') if dob_src.obj !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ if defined? dob_src.ocn \ and dob_src.ocn.to_s =~/\d+/ paranum=dob_src.ocn.to_s @p_num=SiSU_po4a_utils::Paragraph_number.new(paranum) end case dob_src.is when 'heading'; pot_structure_heading(dob_src,notes_s,dob_trn,notes_t) when 'para'; pot_structure_para(dob_src,notes_s,dob_trn,notes_t) when 'group'; pot_structure_group(dob_src,notes_s,dob_trn,notes_t) when 'block'; pot_structure_block(dob_src,notes_s,dob_trn,notes_t) when 'verse'; pot_structure_verse(dob_src,notes_s,dob_trn,notes_t) when 'code'; pot_structure_code(dob_src,notes_s,dob_trn,notes_t) when 'table'; pot_structure_table(dob_src,notes_s,dob_trn,notes_t) end if defined? dob_src.idx \ and not dob_src.idx.nil? \ and not dob_src.idx.empty? pot_structure_idx(dob_src,dob_trn) end dob_src='' if (dob_src.obj =~/<a name="n\d+">/ \ and dob_src.obj =~/^(-\{{2}~\d+|<!e[:_]\d+!>)/) # -endnote dob_src.obj.gsub!(/<!.+!>/,' ') if dob_src ## Clean Prepared Text dob_src.obj.gsub!(/<:\S+>/,' ') if dob_src ## Clean Prepared Text end #[dob_src,dob_trn] end def markup(dob) # Used for major markup instructions dir=SiSU_Env::Info_env.new(@md.fns) fix=[] dob.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/, "#{Px[:po_bold_o]}\\1#{Px[:po_bold_c]}") dob.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/, "#{Px[:po_italics_o]}\\1#{Px[:po_italics_c]}") dob.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/, "#{Px[:po_underscore_o]}\\1#{Px[:po_underscore_c]}") dob.obj.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/, "#{Px[:po_subscript_o]}\\1#{Px[:po_subscript_c]}") dob.obj.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/, "#{Px[:po_superscript_o]}\\1#{Px[:po_superscript_c]}") dob.obj.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/, "#{Px[:po_insert_o]}\\1#{Px[:po_insert_c]}") dob.obj.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/, "#{Px[:po_cite_o]}\\1#{Px[:po_cite_c]}") dob.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/, "#{Px[:po_strike_o]}\\1#{Px[:po_strike_c]}") dob.obj.gsub!(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/, "#{Px[:po_monospace_o]}\\1#{Px[:po_monospace_c]}") notes='' unless dob.is=='code' dob.obj.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}/,'\1') dob.obj.gsub!(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1') dob.obj.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1 [link: <\2>]') dob.obj.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}image/,'\1 [link: local image]') dob.obj.gsub!(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1') #dob.obj.gsub!(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,"#{@brace_url.txt_open}\\1#{@brace_url.txt_close}") notes=extract_endnotes(dob) #% ### footnotes current state - extracted dob.obj.gsub!(/#{Mx[:en_a_o]}([\d]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'~^') # endnote marker marked up #% ### footnotes current state - keep inline #dob.obj.gsub!(/#{Mx[:en_a_o]}[\d]+\s+(.+?)#{Mx[:en_a_c]}/,'~{ \1 }~') # inline endnote with marker marked up dob.obj.gsub!(/#{Mx[:en_b_o]}[\d]+\s+(.+?)#{Mx[:en_b_c]}/,'~[ \1 ]~') # inline endnote with marker marked up dob.obj.gsub!(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)#{Mx[:en_a_c]}/,'~{\1 \2 }~') # inline endnote with marker marked up dob.obj.gsub!(/#{Mx[:en_b_o]}([*+]+)\s+(.+?)#{Mx[:en_b_c]}/,'~[\1 \2 ]~') # inline endnote with marker marked up dob.obj.gsub!(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<') dob.obj.gsub!(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>') dob.obj.gsub!(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&') dob.obj.gsub!(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!') dob.obj.gsub!(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#') dob.obj.gsub!(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*') dob.obj.gsub!(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-') dob.obj.gsub!(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/') dob.obj.gsub!(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_') dob.obj.gsub!(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{') dob.obj.gsub!(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}') dob.obj.gsub!(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~') dob.obj.gsub!(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©') end if dob.of=='block' # watch dob.obj.gsub!(/#{Mx[:gl_o]}●#{Mx[:gl_c]}/,"* ") dob.obj.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n") else dob.obj.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n\n") end if dob.is=='code' dob.obj.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _> _< dob.obj.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _<_< end dob.obj.gsub!(/#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}/,'') # remove page breaks dob.obj.gsub!(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1') dob.obj.gsub!(/<a href=".+?">(.+?)<\/a>/m,'\1') dob.obj.gsub!(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'') # remove name links dob.obj.gsub!(/ |#{Mx[:nbsp]}/,' ') # decide on dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}image/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") dob.obj.gsub!(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') [dob,notes] end def publish(fn,pot) divider='=' content=[] content << pot[:open] content << pot[:head] content << pot[:metadata] content << pot[:body] content << @@endnotes[:end] if @@endnotes_ Output.new(fn,content,@md).po4a @@endnotes={ para: [], end: [] } end end class Output <Source include SiSU_Param include SiSU_Env def initialize(fn,content,md) @fn,@content,@md=fn,content,md @file=SiSU_Env::SiSU_file.new(md,fn) end def po4a #%pot output file_pot=(@md.opt.f_pth[:lng] =='en') \ ? @file.write_file.pot \ : @file.write_file.po @sisu=[] emptyline=0 @content.each do |para| # this is a hack if para.class==Array \ and para.length > 0 para.each do |line| if line line.gsub!(/\s+$/m,'') line.gsub!(/^\A[ ]*\Z/m,'') if line=~/^\A[ ]*\Z/m emptyline+=1 else emptyline=0 end file_pot.puts line if emptyline < 2 #remove extra line spaces (fix upstream) end end else file_pot.puts para #unix plaintext # /^([*=-]|\.){5}/ end end file_pot.close SiSU_po4a_utils::Pot_number.new.reset po4a_git end def po4a_git unless @md.opt.cmd =~/M/ require_relative 'git' # git.rb git=SiSU_Git::Source.new(@md.opt) git.create_file_structure_git unless FileTest.directory?(@file.output_path.pot_git.dir) if @md.opt.f_pth[:lng] =='en' cp(@file.place_file.pot.dir, @file.output_path.pot_git.dir) else # naive, work on --> cp(@file.place_file.po.dir, @file.output_path.po_git.dir) #unless FileTest.file?(@file.place_file.po_git.dir) end git.read end end end end end __END__ !\|#\|&*\|-\|/\|_\|{\|}\|~\|&# tables are problematic, difficult to reconstitute instruction, check metadata, move to top? and work on footnotes, different types, asterisk, also do you want to have separate paragraphs, or breaks within one block? where no ocn appropriately use ~# or -# or indeed 1~name- comments in document, what to do about them, not sure they are currently retained in dal, could be quite valuable to keep