d: po4a, continue reorganization (translation request dev stopped)
[software/sisu] / lib / sisu / develop / po4a.rb
1 # encoding: utf-8
2 =begin
3
4 * Name: SiSU
5
6 ** Description: documents, structuring, processing, publishing, search
7 *** pot file generation
8
9 ** Author: Ralph Amissah
10 <ralph@amissah.com>
11 <ralph.amissah@gmail.com>
12
13 ** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
14 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah,
15 All Rights Reserved.
16
17 ** License: GPL 3 or later:
18
19 SiSU, a framework for document structuring, publishing and search
20
21 Copyright (C) Ralph Amissah
22
23 This program is free software: you can redistribute it and/or modify it
24 under the terms of the GNU General Public License as published by the Free
25 Software Foundation, either version 3 of the License, or (at your option)
26 any later version.
27
28 This program is distributed in the hope that it will be useful, but WITHOUT
29 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
30 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
31 more details.
32
33 You should have received a copy of the GNU General Public License along with
34 this program. If not, see <http://www.gnu.org/licenses/>.
35
36 If you have Internet connection, the latest version of the GPL should be
37 available at these locations:
38 <http://www.fsf.org/licensing/licenses/gpl.html>
39 <http://www.gnu.org/licenses/gpl.html>
40
41 <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>
42
43 ** SiSU uses:
44 * Standard SiSU markup syntax,
45 * Standard SiSU meta-markup syntax, and the
46 * Standard SiSU object citation numbering and system
47
48 ** Hompages:
49 <http://www.jus.uio.no/sisu>
50 <http://www.sisudoc.org>
51
52 ** Git
53 <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary>
54 <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/develop/po4a.rb;hb=HEAD>
55
56 =end
57 module SiSU_Po4a
58 require_relative 'ao' # ao.rb
59 require_relative 'se' # se.rb
60 include SiSU_Env
61 require_relative 'ao_composite' # ao_composite.rb
62 require_relative 'shared_metadata' # shared_metadata.rb
63 require_relative 'po4a_set' # po4a_set.rb
64 include SiSU_Param
65 require_relative 'object_munge' # object_munge.rb
66 class Source
67 include SiSU_Object_Munge
68 @@opt_src,@@opt_trn,@@opt_src_,@@opt_trn_,@@md_src,@@md_trn=
69 nil, nil, nil, nil, nil, nil
70 @@auto_translation_ = :go
71 def initialize(opt,fn=nil)
72 @opt,@fn=opt,fn
73 #unless @opt.fns =~/(.+?\.(?:-|ssm\.)?sst)$/
74 # puts "#{@opt.fns} not a processed file type"
75 #end
76 file_arr=SiSU_Info_Env::InfoEnv.new.source_file_processing_array(@opt.fns)
77 SiSU_Param::Parameters::Instructions.new(file_arr,@opt).extract
78 r=Px[:lng_lst_rgx].gsub(/\|en\|/,'|')
79 @lang_regx=%r{(?:#{r})}
80 if opt.fns =~/\S+?~#{@lang_regx}\.ss[mti]/ \
81 and opt.f_pth[:lng]!=@opt.lng_base
82 @@opt_src_=false
83 @@opt_trn=opt
84 @@md_trn=SiSU_Param::Parameters.new(opt).get
85 else
86 @@opt_src_=true
87 @@opt_src=opt
88 @@md_src=SiSU_Param::Parameters.new(opt).get
89 end
90 end
91 def wrap_width_set(md,env)
92 if defined? md.make.plaintext_wrap \
93 and md.make.plaintext_wrap
94 md.make.plaintext_wrap
95 elsif defined? env.plaintext_wrap \
96 and env.plaintext_wrap
97 env.plaintext_wrap
98 else 78
99 end
100 end
101 def process_file(md,env,file,wrap_width,fn)
102 unless @opt.act[:quiet][:set]==:on
103 tool=(@opt.act[:verbose][:set]==:on \
104 || @opt.act[:verbose_plus][:set]==:on \
105 || @opt.act[:maintenance][:set]==:on) \
106 ? "#{env.program.text_editor} #{file.output_path.pot.dir}/"
107 : @opt.fns
108 (@opt.act[:verbose][:set]==:on \
109 || @opt.act[:verbose_plus][:set]==:on \
110 || @opt.act[:maintenance][:set]==:on) \
111 ? SiSU_Screen::Ansi.new(
112 @opt.act[:color_state][:set],
113 'Pot po4a',
114 tool
115 ).green_hi_blue
116 : SiSU_Screen::Ansi.new(
117 @opt.act[:color_state][:set],
118 'Pot po4a',
119 tool
120 ).green_title_hi
121 if (@opt.act[:verbose][:set]==:on \
122 || @opt.act[:verbose_plus][:set]==:on \
123 || @opt.act[:maintenance][:set]==:on)
124 SiSU_Screen::Ansi.new(
125 @opt.act[:color_state][:set],
126 @opt.fns,
127 file.output_path.pot.dir
128 ).flow
129 end
130 end
131 if @opt.fns =~/\S+?~#{@lang_regx}\.ss[mti]/ \
132 or @opt.f_pth[:lng] !=@opt.lng_base
133 opt_lang_trn_fn=fn
134 @ao_arr_lang_trans=
135 SiSU_AO::Source.new(@opt,opt_lang_trn_fn,:po4a).get # ao file drawn here
136 opt_lang_src_fn=(fn =~/\S+?~\S{2}(?:_\S{2})?\.ss[mti]/) \
137 ? (fn.gsub(/(\S+?)~\S{2}(?:_\S{2})?(\.ss[mti])/,'\1\2')) #check i
138 : fn
139 transdir,srcdir=Dir.pwd,Dir.pwd
140 if Dir.pwd.to_s =~/\/#{@lang_regx}$/
141 transdir=Dir.pwd
142 srcdir=transdir.
143 gsub(/\/#{@lang_regx}$/,
144 "/#{@opt.lng_base}")
145 if FileTest.directory?(srcdir)
146 Dir.chdir(srcdir)
147 end
148 else nil
149 end
150 if FileTest.file?("#{srcdir}/#{opt_lang_src_fn}")
151 @ao_arr_lang_src=
152 SiSU_AO::Source.new(
153 @@opt_src,
154 opt_lang_src_fn,
155 :po4a
156 ).get # ao file drawn here
157 else
158 puts "no identified source document"
159 exit
160 end
161 Dir.chdir(transdir) if transdir
162 else
163 @ao_arr_lang_src=
164 SiSU_AO::Source.new(
165 @opt,
166 fn,
167 :po4a
168 ).get # ao file drawn here
169 @ao_arr_lang_trans=nil
170 end
171 SiSU_Po4a::Source::Scroll.new(
172 fn,
173 @ao_arr_lang_src,
174 @ao_arr_lang_trans,
175 @@md_src,
176 @@md_trn,
177 wrap_width
178 ).songsheet
179 end
180 def read
181 begin
182 src={}
183 src[:pth]=@opt.f_pth[:pth]
184 src[:files]=if @opt.fns =~ /\.(?:(?:-|ssm\.)sst|ssm)$/
185 @opt.fns=@opt.fns.gsub(/\.ssm\.sst$/,'.ssm')
186 SiSU_Assemble::CompositeFileList.new(@opt).read
187 else
188 [@opt.fns]
189 end
190 md=SiSU_Param::Parameters.new(@opt).get
191 env=SiSU_Env::InfoEnv.new(@opt.fns)
192 file=SiSU_Env::FileOp.new(md)
193 wrap_width=wrap_width_set(md,env)
194 src[:files].each do |fn|
195 process_file(md,env,file,wrap_width,fn)
196 end
197 rescue
198 SiSU_Errors::Rescued.new($!,$@,@opt.selections.str,@opt.fns).location do
199 __LINE__.to_s + ':' + __FILE__
200 end
201 ensure
202 end
203 end
204 private
205 class Scroll <Source
206 include SiSU_Po4aUtils
207 @@endnotes={ para: [], end: [] }
208 def initialize(fn,data_src,data_trn,md_src,md_trn,wrap_width)
209 @fn,@data_src,@data_trn,@md_src,@md_trn,@wrap_width=
210 fn, data_src, data_trn, md_src, md_trn, wrap_width
211 @md=(md_trn.nil?) \
212 ? md_src
213 : md_trn
214 @tab="\t"
215 @@endnotes_=(@md.opt.selections.str =~/--endnote/) \
216 ? true
217 : false # --footnote
218 @pot={
219 body: [],
220 open: [],
221 close: [],
222 head: [],
223 metadata: [],
224 tail: []
225 }
226 end
227 def br
228 (@md.opt.selections.str =~/--dos/) ? "\r\n" : "\n" # --unix
229 end
230 def songsheet
231 fn=@fn
232 pot=pot_markup(@data_src,@data_trn)
233 publish(fn,pot)
234 end
235 def pot_structure_wrap(desc,orig,trans,indent=0,hang=0)
236 SiSU_Po4aUtils::Wrap.new(
237 @md,
238 orig,
239 trans,
240 desc,
241 @wrap_width,
242 indent,
243 hang
244 )
245 end
246 def wrap_endnotes(orig_notes='',trn_notes='')
247 nt=@@endnotes_ ? 'endnote' : 'footnote'
248 @fn=0
249 a_l=orig_notes.length
250 0.upto(a_l-1) do |i|
251 @fn=if orig_notes[i].to_s =~/^\^~([\d*+]+)/ # provides endnote number within paragraph
252 @fn += 1
253 else @fn
254 end
255 d="#{nt} #{@fn}"
256 mark="^~ "
257 instruct=s_mark=''
258 if @md.opt.act[:maintenance][:set]==:on
259 instruct=%{\n# footnotes, the preferred sisu markup for a footnote is~{this is a footnote}~ } \
260 + %{however, for translation a footnote reference marker in the text~^ } \
261 + %{with a set of notes following the paragraph starting on a newline with "^~ this is a footnote", } \
262 + %{is easier to deal with, if possible these should be converted back to~{inline notes}~}
263 s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"}
264 end
265 desc="#{d}#{s_mark}#{instruct}"
266 orig=(orig_notes[i].to_s =~/^\^~[\d*+]+/) \
267 ? (orig_notes[i].to_s.gsub(/^\^~[\d*+]+/,'^~'))
268 : orig_notes[i].to_s
269 trans=if trn_notes.is_a?(Array) \
270 and trn_notes.length==orig_notes.length
271 (trn_notes[i].to_s =~/^\^~[\d*+]+/) \
272 ? (trn_notes[i].to_s.gsub(/^\^~[\d*+]+/,'^~'))
273 : trn_notes[i].to_s
274 else ''
275 end
276 util=pot_structure_wrap(desc,orig,trans)
277 wrap=util.line_wrap
278 wrap=if wrap =~ /^\s*\^~[\d*+]+\s+.+?\s*\Z/m
279 wrap.gsub(/^\s*(\^~[\d*+]+)\s+(.+?)\s*\Z/m, <<GSUB
280 \\1 \\2
281 GSUB
282 )
283 else
284 wrap.gsub(/^(.+)\Z/m, <<GSUB
285 \\1
286 GSUB
287 )
288 end
289 @@endnotes[:para] << wrap
290 @@endnotes[:end] << '' << wrap
291 end
292 @@endnotes[:para].each {|e| @pot[:body] << e << br}
293 @@endnotes[:para]=[]
294 @@endnotes
295 end
296 def pot_metadata_src
297 @po4a_identify_type='type: SiSU doc' #'type: Plain text'
298 meta_src=SiSU_Metadata::Summary.new(@md_src)
299 w=[]
300 w << [
301 "#. #{@po4a_identify_type} - metadata: title",
302 "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}",
303 'msgid ""',
304 meta_src.metadata_tags.title.main,
305 meta_src.metadata_tags.title.sub,
306 meta_src.metadata_tags.title.edition,
307 meta_src.metadata_tags.title.note,
308 meta_src.metadata_tags.title.short,
309 meta_src.metadata_tags.title.language,
310 meta_src.metadata_tags.title.language_char,
311 'msgstr ""',
312 ]
313 w << [
314 "#. #{@po4a_identify_type} - metadata: creator",
315 "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}",
316 'msgid ""',
317 meta_src.metadata_tags.creator.head,
318 meta_src.metadata_tags.creator.author,
319 meta_src.metadata_tags.creator.contributor,
320 meta_src.metadata_tags.creator.illustrator,
321 meta_src.metadata_tags.creator.photographer,
322 meta_src.metadata_tags.creator.translator,
323 meta_src.metadata_tags.creator.audio,
324 meta_src.metadata_tags.creator.digitized_by,
325 meta_src.metadata_tags.creator.prepared_by,
326 'msgstr ""',
327 ]
328 w << [
329 "#. #{@po4a_identify_type} - metadata: rights",
330 "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}",
331 'msgid ""',
332 meta_src.metadata_tags.rights.head,
333 meta_src.metadata_tags.rights.copyright.text,
334 meta_src.metadata_tags.rights.copyright.translation,
335 meta_src.metadata_tags.rights.copyright.illustrations,
336 meta_src.metadata_tags.rights.copyright.photographs,
337 meta_src.metadata_tags.rights.copyright.digitization,
338 meta_src.metadata_tags.rights.copyright.audio,
339 meta_src.metadata_tags.rights.license,
340 'msgstr ""',
341 ]
342 w << [
343 "#. #{@po4a_identify_type} - metadata: classify",
344 "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}",
345 'msgid ""',
346 meta_src.metadata_tags.classify.head,
347 meta_src.metadata_tags.classify.subject,
348 meta_src.metadata_tags.classify.topic_register,
349 meta_src.metadata_tags.classify.loc,
350 meta_src.metadata_tags.classify.dewey,
351 #meta_src.metadata_tags.notes.relation,
352 #meta_src.metadata_tags.notes.type,
353 #meta_src.metadata_tags.identifier.oclc,
354 #meta_src.metadata_tags.identifier.isbn,
355 'msgstr ""',
356 ]
357 w << [
358 "#. #{@po4a_identify_type} - metadata: date",
359 "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}",
360 'msgid ""',
361 meta_src.metadata_tags.date.head,
362 meta_src.metadata_tags.date.added_to_site,
363 meta_src.metadata_tags.date.available,
364 meta_src.metadata_tags.date.created,
365 meta_src.metadata_tags.date.issued,
366 meta_src.metadata_tags.date.modified,
367 meta_src.metadata_tags.date.published,
368 meta_src.metadata_tags.date.valid,
369 'msgstr ""',
370 ]
371 w << [
372 "#. #{@po4a_identify_type} - processing, make instruction",
373 "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}",
374 'msgid ""',
375 meta_src.processing_tags.make.language,
376 meta_src.processing_tags.make.headings,
377 meta_src.processing_tags.make.num_top,
378 meta_src.processing_tags.make.breaks,
379 meta_src.processing_tags.make.emphasis,
380 meta_src.processing_tags.make.bold,
381 meta_src.processing_tags.make.italics,
382 meta_src.processing_tags.make.texpdf_font,
383 'msgstr ""',
384 ]
385 w.each do |y|
386 z=''
387 y.each do |x|
388 if x
389 z += x + "\n" if x =~/^#|^msg(?:id|str)/
390 z += %{"#{x}"\n} if x =~/^@\S+?:(?: |$)/
391 z += %{"#{x}"\n} if x =~/^\s+:\S+?: /
392 end
393 end
394 @pot[:metadata] << z << br
395 #puts z unless z.empty?
396 end
397 end
398 def pot_metadata_src_trn
399 @po4a_identify_type='type: SiSU doc'
400 #@po4a_identify_type='type: Plain text'
401 meta_src=SiSU_Metadata::Summary.new(@md_src)
402 meta_trn=SiSU_Metadata::Summary.new(@md_trn)
403 w=[]
404 w << [
405 "#. #{@po4a_identify_type} - metadata: title",
406 "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}",
407 'msgid ""',
408 meta_src.metadata_tags.title.main,
409 meta_src.metadata_tags.title.sub,
410 meta_src.metadata_tags.title.edition,
411 meta_src.metadata_tags.title.note,
412 meta_src.metadata_tags.title.short,
413 meta_src.metadata_tags.title.language,
414 meta_src.metadata_tags.title.language_char,
415 'msgstr ""',
416 meta_trn.metadata_tags.title.main,
417 meta_trn.metadata_tags.title.sub,
418 meta_trn.metadata_tags.title.edition,
419 meta_trn.metadata_tags.title.note,
420 meta_trn.metadata_tags.title.short,
421 meta_trn.metadata_tags.title.language,
422 meta_trn.metadata_tags.title.language_char,
423 ]
424 w << [
425 "#. #{@po4a_identify_type} - metadata: creator",
426 "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}",
427 'msgid ""',
428 meta_src.metadata_tags.creator.head,
429 meta_src.metadata_tags.creator.author,
430 meta_src.metadata_tags.creator.contributor,
431 meta_src.metadata_tags.creator.illustrator,
432 meta_src.metadata_tags.creator.photographer,
433 meta_src.metadata_tags.creator.translator,
434 meta_src.metadata_tags.creator.audio,
435 meta_src.metadata_tags.creator.digitized_by,
436 meta_src.metadata_tags.creator.prepared_by,
437 'msgstr ""',
438 meta_trn.metadata_tags.creator.head,
439 meta_trn.metadata_tags.creator.author,
440 meta_trn.metadata_tags.creator.contributor,
441 meta_trn.metadata_tags.creator.illustrator,
442 meta_trn.metadata_tags.creator.photographer,
443 meta_trn.metadata_tags.creator.translator,
444 meta_trn.metadata_tags.creator.audio,
445 meta_trn.metadata_tags.creator.digitized_by,
446 meta_trn.metadata_tags.creator.prepared_by,
447 ]
448 w << [
449 "#. #{@po4a_identify_type} - metadata: rights",
450 "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}",
451 'msgid ""',
452 meta_src.metadata_tags.rights.head,
453 meta_src.metadata_tags.rights.copyright.text,
454 meta_src.metadata_tags.rights.copyright.translation,
455 meta_src.metadata_tags.rights.copyright.illustrations,
456 meta_src.metadata_tags.rights.copyright.photographs,
457 meta_src.metadata_tags.rights.copyright.digitization,
458 meta_src.metadata_tags.rights.copyright.audio,
459 meta_src.metadata_tags.rights.license,
460 'msgstr ""',
461 meta_trn.metadata_tags.rights.head,
462 meta_trn.metadata_tags.rights.copyright.text,
463 meta_trn.metadata_tags.rights.copyright.translation,
464 meta_trn.metadata_tags.rights.copyright.illustrations,
465 meta_trn.metadata_tags.rights.copyright.photographs,
466 meta_trn.metadata_tags.rights.copyright.digitization,
467 meta_trn.metadata_tags.rights.copyright.audio,
468 meta_trn.metadata_tags.rights.license,
469 ]
470 w << [
471 "#. #{@po4a_identify_type} - metadata: classify",
472 "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}",
473 'msgid ""',
474 meta_src.metadata_tags.classify.head,
475 meta_src.metadata_tags.classify.subject,
476 meta_src.metadata_tags.classify.topic_register,
477 meta_src.metadata_tags.classify.loc,
478 meta_src.metadata_tags.classify.dewey,
479 #meta_src.metadata_tags.notes.relation,
480 #meta_src.metadata_tags.notes.type,
481 #meta_src.metadata_tags.identifier.oclc,
482 #meta_src.metadata_tags.identifier.isbn,
483 'msgstr ""',
484 meta_trn.metadata_tags.classify.head,
485 meta_trn.metadata_tags.classify.subject,
486 meta_trn.metadata_tags.classify.topic_register,
487 meta_trn.metadata_tags.classify.loc,
488 meta_trn.metadata_tags.classify.dewey,
489 #meta_trn.metadata_tags.notes.relation,
490 #meta_trn.metadata_tags.notes.type,
491 #meta_trn.metadata_tags.identifier.oclc,
492 #meta_trn.metadata_tags.identifier.isbn,
493 ]
494 w << [
495 "#. #{@po4a_identify_type} - metadata: date",
496 "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}",
497 'msgid ""',
498 meta_src.metadata_tags.date.head,
499 meta_src.metadata_tags.date.added_to_site,
500 meta_src.metadata_tags.date.available,
501 meta_src.metadata_tags.date.created,
502 meta_src.metadata_tags.date.issued,
503 meta_src.metadata_tags.date.modified,
504 meta_src.metadata_tags.date.published,
505 meta_src.metadata_tags.date.valid,
506 'msgstr ""',
507 meta_trn.metadata_tags.date.head,
508 meta_trn.metadata_tags.date.added_to_site,
509 meta_trn.metadata_tags.date.available,
510 meta_trn.metadata_tags.date.created,
511 meta_trn.metadata_tags.date.issued,
512 meta_trn.metadata_tags.date.modified,
513 meta_trn.metadata_tags.date.published,
514 meta_trn.metadata_tags.date.valid,
515 ]
516 w << [
517 "#. #{@po4a_identify_type} - processing, make instruction",
518 "#: en/#{@md.fns}:#{SiSU_Po4aUtils::PotNumber.new.num}",
519 'msgid ""',
520 meta_src.processing_tags.make.language,
521 meta_src.processing_tags.make.headings,
522 meta_src.processing_tags.make.num_top,
523 meta_src.processing_tags.make.breaks,
524 meta_src.processing_tags.make.emphasis,
525 meta_src.processing_tags.make.bold,
526 meta_src.processing_tags.make.italics,
527 meta_src.processing_tags.make.texpdf_font,
528 'msgstr ""',
529 meta_trn.processing_tags.make.language,
530 meta_trn.processing_tags.make.headings,
531 meta_trn.processing_tags.make.num_top,
532 meta_trn.processing_tags.make.breaks,
533 meta_trn.processing_tags.make.emphasis,
534 meta_trn.processing_tags.make.bold,
535 meta_trn.processing_tags.make.italics,
536 meta_trn.processing_tags.make.texpdf_font,
537 ]
538 w.each do |y|
539 z=''
540 y.each do |x|
541 if x
542 z += x + "\n" if x =~/^#|^msg(?:id|str)/
543 z += %{"#{x}"\n} if x =~/^@\S+?:(?: |$)/
544 z += %{"#{x}"\n} if x =~/^\s+:\S+?: /
545 end
546 end
547 @pot[:metadata] << z << br
548 #puts z unless z.empty?
549 end
550 end
551 def auto_translate?(set_to=nil)
552 @@auto_translation_=
553 if @md.opt.act[:po4a_lang_trans][:set]==:on
554 set_to \
555 ? set_to
556 : @@auto_translation_
557 else :skip
558 end
559 end
560 def auto_translation(src_txt,markup=:src) # check for an appropriate request flag
561 auto_translate?(:skip)
562 begin
563 src_txt_clean=clean_text(src_txt,markup)
564 src_txt_clean=src_txt_clean.
565 gsub(/\n/,' ').
566 gsub(/"/,'\"').
567 gsub(/([()])/,'\\\\\1')
568 trans=''
569 unless auto_translate? == :skip
570 require 'timeout'
571 Timeout::timeout(60) {
572 trans=`trans -b -no-ansi en:#{@md.opt.f_pth[:lng_is]} #{src_txt_clean}`.strip
573 unless trans.empty?
574 trans + ' {[G.Tr]}http://translate.google.com'
575 end
576 }
577 end
578 trans
579 rescue
580 auto_translate?(:skip)
581 p 'timeout issues with translation, skip remaining'
582 end
583 end
584 def pot_structure
585 def heading(dob_src='',notes_s='',dob_trn='',notes_t='') #% used to extract the structure of a document
586 lv=n=n3=nil
587 lv=dob_src.ln
588 n=lv - 1
589 n3=lv + 2
590 util=nil
591 fn=(dob_src.name=~/[a-z\d]/i) ? dob_src.name : ''
592 mark="#{dob_src.lv}~#{fn} "
593 d="#{dob_src.is.to_s} (level #{dob_src.lv})"
594 instruct=s_mark=''
595 if @md.opt.act[:maintenance][:set]==:on
596 instruct=%{\n# markup for headings is marker at the start of the line/object, } \
597 + %{indicating the heading level, and if provided an associated name tag, } \
598 + %{this heading is "#{mark}"}
599 s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"}
600 end
601 desc="#{d}#{s_mark}#{instruct}"
602 orig="#{s_mark}#{dob_src.obj}"
603 trans=((dob_trn=='') \
604 || (dob_src.obj == dob_trn.obj)) \
605 ? ''
606 : "#{s_mark}#{dob_trn.obj}"
607 if @md.opt.f_pth[:lng_is] !=@md.opt.lng_base \
608 and trans.empty? \
609 and auto_translate?
610 trans=auto_translation(dob_src.obj,:src)
611 end
612 util=pot_structure_wrap(desc,orig,trans)
613 wrapped=util.line_wrap
614 @pot[:body] << wrapped << br # main text, contents, body KEEP
615 if @@endnotes[:para] \
616 and notes_s.length > 0 \
617 and not @@endnotes_
618 @pot[:body] << br
619 wrap_endnotes(notes_s,notes_t)
620 elsif @@endnotes[:para] \
621 and @@endnotes_
622 @pot[:body] << br*2
623 end
624 end
625 def para(dob_src='',notes_s='',dob_trn='',notes_t='') #% used to extract the structure of a document
626 util=nil
627 wrapped=if dob_src.indent =~/[1-9]/ \
628 and dob_src.indent == dob_src.hang
629 s_mark=desc=orig=trans=''
630 if dob_src.bullet_
631 mark="_#{dob_src.indent}* "
632 d="#{dob_src.is.to_s}: indent #{dob_src.indent}, bullet"
633 instruct=s_mark=''
634 if @md.opt.act[:maintenance][:set]==:on
635 instruct=%{\n# markup for indented bullet text is at the start of the line/object, } \
636 + %{an underscore followed by the indent level and an asterisk "#{mark}"}
637 s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"}
638 end
639 desc="#{d}#{s_mark}#{instruct}"
640 else
641 mark="_#{dob_src.indent} "
642 d="#{dob_src.is.to_s}: indent #{dob_src.indent}"
643 instruct=s_mark=''
644 if @md.opt.act[:maintenance][:set]==:on
645 instruct=%{\n# markup for indented text is at the start of the line/object, } \
646 + %{an underscore followed by the indent level "#{mark}"}
647 s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"}
648 end
649 desc="#{d}#{s_mark}#{instruct}"
650 end
651 orig="#{s_mark}#{dob_src.obj}"
652 trans=((dob_trn=='') \
653 || (dob_src.obj == dob_trn.obj)) \
654 ? ''
655 : "#{s_mark}#{dob_trn.obj}"
656 if @md.opt.f_pth[:lng_is] !=@md.opt.lng_base \
657 and trans.empty? \
658 and auto_translate?
659 trans=auto_translation(dob_src.obj,:src)
660 end
661 util=pot_structure_wrap(desc,orig,trans)
662 elsif dob_src.hang =~/[0-9]/ \
663 and dob_src.indent != dob_src.hang
664 s_mark=desc=orig=trans=''
665 mark="_#{dob_src.hang}_#{dob_src.indent} "
666 d="#{dob_src.is.to_s}: hang #{dob_src.hang} indent #{dob_src.indent}"
667 instruct=s_mark=''
668 if @md.opt.act[:maintenance][:set]==:on
669 instruct=%{\n# markup for indented text with a first line indented } \
670 + %{to a different level from the rest of the paragraph, } \
671 + %{is at the start of the line/object, } \
672 + %{an underscore and the first indent level } \
673 + %{a second underscore and the indent level for the rest of the paragraph, "#{mark1}"}
674 s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"}
675 end
676 desc="#{d}#{s_mark}#{instruct}"
677 orig="#{s_mark}#{dob_src.obj}"
678 trans=((dob_trn=='') \
679 || (dob_src.obj == dob_trn.obj)) \
680 ? ''
681 : "#{s_mark}#{dob_trn.obj}"
682 if @md.opt.f_pth[:lng_is] !=@md.opt.lng_base \
683 and trans.empty? \
684 and auto_translate?
685 trans=auto_translation(dob_src.obj,:src)
686 end
687 util=pot_structure_wrap(desc,orig,trans)
688 else
689 s_mark=desc=orig=trans=''
690 if dob_src.bullet_
691 mark='_* '
692 d="#{dob_src.is.to_s}: bullet"
693 instruct=s_mark=''
694 if @md.opt.act[:maintenance][:set]==:on
695 instruct=%{\n# markup for indented text is at the start of the line/object, } \
696 + %{an underscore followed by an asterisk "#{mark}"}
697 s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"}
698 end
699 desc="#{d}#{s_mark}#{instruct}"
700 orig="#{s_mark}#{dob_src.obj}"
701 trans=((dob_trn=='') \
702 || (dob_src.obj == dob_trn.obj)) \
703 ? ''
704 : "#{s_mark}#{dob_trn.obj}"
705 if @md.opt.f_pth[:lng_is] !=@md.opt.lng_base \
706 and trans.empty? \
707 and auto_translate?
708 trans=auto_translation(dob_src.obj,:src)
709 end
710 else
711 mark=''
712 d=dob_src.is.to_s
713 instruct=%{\n# regular paragraph, no special markup}
714 if @md.opt.act[:maintenance][:set]==:on
715 instruct="\n# "
716 s_mark="\n# " + %{"\\n\\n#{mark}...\\n\\n"}
717 end
718 desc="#{d}#{s_mark}#{instruct}"
719 orig=dob_src.obj
720 trans=((dob_trn=='') \
721 || (dob_src.obj == dob_trn.obj)) \
722 ? ''
723 : "#{s_mark}#{dob_trn.obj}"
724 if @md.opt.f_pth[:lng_is] !=@md.opt.lng_base \
725 and trans.empty? \
726 and auto_translate?
727 trans=auto_translation(dob_src.obj,:src)
728 end
729 end
730 util=pot_structure_wrap(desc,orig,trans)
731 end
732 wrapped=util.line_wrap
733 @pot[:body] << wrapped << br # main text, contents, body KEEP
734 if @@endnotes[:para] \
735 and notes_s.length > 0 \
736 and not @@endnotes_
737 @pot[:body] << br
738 wrap_endnotes(notes_s,notes_t)
739 elsif @@endnotes[:para] \
740 and @@endnotes_
741 @pot[:body] << br*2
742 end
743 end
744 def block(dob_src='',notes_s='',dob_trn='',notes_t='') #% used to extract the structure of a document
745 mark="block{\\n\\n...\\n\\n}block"
746 d=dob_src.is.to_s
747 instruct=s_mark=''
748 if @md.opt.act[:maintenance][:set]==:on
749 instruct=%{\n# block text is a text block with an opening and closing marker, } \
750 + %{the content of which may be wrapped}
751 s_mark="\n# " + %{"\\n\\n#{mark}\\n\\n"}
752 end
753 desc="#{d}#{s_mark}#{instruct}"
754 orig=dob_src.obj
755 trans=((dob_trn=='') \
756 || (dob_src.obj == dob_trn.obj)) \
757 ? ''
758 : "#{s_mark}#{dob_trn.obj}"
759 if @md.opt.f_pth[:lng_is] !=@md.opt.lng_base \
760 and trans.empty? \
761 and auto_translate?
762 trans=auto_translation(dob_src.obj,:src)
763 end
764 util=pot_structure_wrap(desc,orig,trans)
765 unwrapped=util.no_line_wrap_block
766 @pot[:body] << unwrapped << br
767 end
768 def group(dob_src='',notes_s='',dob_trn='',notes_t='') #% used to extract the structure of a document
769 mark="group{\\n\\n...\\n\\n}group"
770 d=dob_src.is.to_s
771 instruct=s_mark=''
772 if @md.opt.act[:maintenance][:set]==:on
773 instruct=%{\n# group text is a text block with an opening and closing marker, } \
774 + %{the content of which may be wrapped}
775 s_mark="\n# " + %{"\\n\\n#{mark}\\n\\n"}
776 end
777 desc="#{d}#{s_mark}#{instruct}"
778 orig=dob_src.obj
779 trans=((dob_trn=='') \
780 || (dob_src.obj == dob_trn.obj)) \
781 ? ''
782 : "#{s_mark}#{dob_trn.obj}"
783 if @md.opt.f_pth[:lng_is] !=@md.opt.lng_base \
784 and trans.empty? \
785 and auto_translate?
786 trans=auto_translation(dob_src.obj,:src)
787 end
788 util=pot_structure_wrap(desc,orig,trans)
789 unwrapped=util.no_line_wrap_block
790 @pot[:body] << unwrapped << br
791 end
792 def verse(dob_src='',notes_s='',dob_trn='',notes_t='') #% used to extract the structure of a document
793 mark="poem{\n\nverse\n\nverse\n\n...\n\n}poem"
794 d=dob_src.is.to_s
795 instruct=s_mark=''
796 if @md.opt.act[:maintenance][:set]==:on
797 instruct=%{\n# verse are part of the text block described as a poem, } \
798 + %{the first verse is preceeded by an opening marker, } \
799 + %{and the last verse by a closing marker, } \
800 + %{the content of which should remain unwrapped}
801 s_mark="\n# " + %{"\\n\\n#{mark}\\n\\n"}
802 end
803 desc="#{d}#{s_mark}#{instruct}"
804 orig=dob_src.obj
805 trans=(dob_trn=='') ? '' : dob_trn.obj
806 util=pot_structure_wrap(desc,orig,trans)
807 unwrapped=util.no_line_wrap_block
808 @pot[:body] << unwrapped << br
809 end
810 def code(dob_src='',notes_s='',dob_trn='',notes_t='') #% used to extract the structure of a document
811 mark="code{\\n\\n...\\n\\n}code"
812 d=dob_src.is.to_s
813 instruct=s_mark=''
814 if @md.opt.act[:maintenance][:set]==:on
815 instruct=%{\n# codeblocks are a text block with an opening and closing marker, } \
816 + %{the content of which should remain unwrapped}
817 s_mark="\n# " + %{"\\n\\n#{mark}\\n\\n"}
818 end
819 desc="#{d}#{s_mark}#{instruct}"
820 orig=dob_src.obj
821 trans=(dob_trn=='') ? '' : dob_trn.obj
822 util=pot_structure_wrap(desc,orig,trans)
823 unwrapped=util.no_line_wrap_block
824 @pot[:body] << unwrapped << br
825 end
826 def table(dob_src='',notes_s='',dob_trn='',notes_t='') #% used to extract the structure of a document
827 mark="table{\\n\\n...\\n\\n}table"
828 d=dob_src.is.to_s
829 instruct=s_mark=''
830 if @md.opt.act[:maintenance][:set]==:on
831 instruct=%{\n# tables are a text block with an opening and closing marker, } \
832 + %{the content of which should remain unwrapped}
833 s_mark="\n# " + %{"\\n\\n#{mark}\\n\\n"}
834 end
835 desc="#{d}#{s_mark}#{instruct}"
836 orig=dob_src.obj
837 orig=orig.gsub(/#{Mx[:tc_c]}/,"\n")
838 trans=(dob_trn=='') ? '' : dob_trn.obj
839 trans=trans.gsub(/#{Mx[:tc_c]}/,"\n")
840 util=pot_structure_wrap(desc,orig,trans)
841 unwrapped=util.no_line_wrap_block
842 @pot[:body] << unwrapped << br
843 end
844 def idx_markup(idx)
845 struct=['={']
846 idx.sort.each do |x|
847 x.each_with_index do |y,i0|
848 case y
849 when String
850 struct << ';' unless struct[-1] =~/=\{/
851 struct << y
852 if x[i0+1].class == Hash \
853 and x[i0+1][:sub].length > 0
854 struct << ':'
855 end
856 when Hash
857 if y[:plus].to_i > 0
858 struct << '+' + y[:plus].to_s
859 end
860 if y[:sub].length > 0
861 y[:sub].each_with_index do |z,i1|
862 z.each_with_index do |a,i2|
863 #p a
864 if z.length > 0
865 struct << a[0]
866 if a[1][:plus].to_i > 0
867 struct << '+' + a[1][:plus].to_s
868 end
869 if (i1 + 1) < y[:sub].length
870 struct << '|'
871 end
872 end
873 end
874 end
875 end
876 end
877 end
878 end
879 struct << '}'
880 #puts struct.join
881 struct.join
882 end
883 def idx(dob_src='',dob_trn='') #% used for book index but broken as original markup lost, already abstracted, fix
884 mark="={ ... }"
885 instruct=s_mark=''
886 if @md.opt.act[:maintenance][:set]==:on
887 instruct=%{\n# the book index should be attached unwrapped to the preceding text block } \
888 + %{(there should be a new line, but no empty line)}
889 s_mark="\n# " + %{"\\n#{mark}\\n\\n"}
890 end
891 d='book-idx'
892 desc="#{d}#{s_mark}#{instruct}"
893 orig=pot_structure.idx_markup(dob_src.idx) #'={' + dob_src.idx + '}'
894 trans=if defined? dob_trn.idx \
895 and not dob_trn.idx.nil? \
896 and not dob_trn.idx.empty?
897 pot_structure.idx_markup(dob_trn.idx) #'={' + dob_trn.idx + '}'
898 else ''
899 end
900 util=pot_structure_wrap(desc,orig,trans)
901 unwrapped=util.no_line_wrap_block
902 @pot[:body] << unwrapped << br
903 end
904 self
905 end
906 def pot_markup(data_src,data_trn)
907 #@endnotes,@copen,@pot_contents_close=Array.new(3){[]}
908 a_l=if data_trn
909 a_l=(data_src.length >= data_trn.length) \
910 ? data_src.length
911 : data_trn.length
912 else
913 data_src.length
914 end
915 s,t=0,0
916 if @md.fns =~ /\.(?:(?:-|ssm\.)?sst|ssm)$/
917 (data_trn.nil?) \
918 ? pot_metadata_src
919 : pot_metadata_src_trn
920 end
921 0.upto(a_l-1) do |i|
922 if data_trn
923 unless data_src[s] \
924 and data_trn[t]
925 break
926 end
927 if data_src[s].of == :comment \
928 and data_trn[t].of == :comment \
929 and (data_src[s].is == data_trn[t].is)
930 s+=1;t+=1
931 next
932 end
933 if ((data_src[s].is == :comment) \
934 || (data_trn[t].is == :comment)) \
935 and (data_src[s].is != data_trn[t].is)
936 if data_src[s].is == :comment
937 if @md.opt.act[:maintenance][:set]==:on
938 puts "src (comment):\n\t" \
939 + data_src[s].obj
940 end
941 s+=1
942 #next if data_src[s].is == :comment
943 elsif data_trn[t].is == :comment
944 if @md.opt.act[:maintenance][:set]==:on
945 puts "trans (comment):\n\t" \
946 + data_trn[t].obj
947 end
948 t+=1
949 #next if data_trn[t].is == :comment
950 end
951 end
952 if ((defined? data_src[s].ocn) \
953 && (data_src[s].ocn.is_a?(Fixnum))) \
954 and ((defined? data_trn[t].ocn) \
955 && (data_trn[t].ocn.is_a?(Fixnum))) \
956 and (data_src[s].ocn == data_trn[t].ocn)
957 @m_s,@m_t=s,t
958 elsif ((defined? data_src[s].ocn) \
959 && (data_src[s].ocn.is_a?(Fixnum))) \
960 and ((defined? data_trn[t].ocn) \
961 && (data_trn[t].ocn.is_a?(Fixnum))) \
962 and (data_src[s].ocn != data_trn[t].ocn)
963 p '--- OCN ---'
964 p 'mis-match'
965 p data_src[s].ocn
966 p data_src[s].obj
967 p data_trn[t].ocn
968 p data_trn[t].obj
969 p '---'
970 p 'previous match'
971 p data_src[@m_s].ocn
972 p data_src[@m_s].obj
973 p data_trn[@m_t].ocn
974 p data_trn[@m_t].obj
975 exit
976 elsif (((defined? data_src[s].ocn) \
977 && (defined? data_trn[t].ocn)) \
978 and data_src[s].ocn.class != data_trn[t].ocn.class)
979 p '--- OCN class ---'
980 p 'mis-match'
981 p data_src[s].ocn if defined? data_src[s].ocn
982 p data_src[s].obj
983 p data_trn[t].ocn if defined? data_trn[t].ocn
984 p data_trn[t].obj
985 #p '---'
986 #p 'previous match'
987 #p data_src[@m_s].ocn
988 #p data_src[@m_s].obj
989 #p data_trn[@m_t].ocn
990 #p data_trn[@m_t].obj
991 #elsif (defined? data_src[s].ocn != defined? data_trn[t].ocn) \
992 #and (data_src[s].ocn.nil? != data_trn[t].ocn.nil?)
993 # p '--- missing OCN? ---'
994 # p 'mis-match'
995 # p data_src[s].ocn if defined? data_src[s].ocn
996 # p data_src[s].obj
997 # p data_trn[t].ocn if defined? data_trn[t].ocn
998 # p data_trn[t].obj
999 else
1000 end
1001 end
1002 notes_s,notes_t='',''
1003 data_src[s],notes_s=markup(data_src[s])
1004 if data_trn
1005 data_trn[t],notes_t=markup(data_trn[t])
1006 #data_src[s],data_trn[t]=pot_data(data_src[s],notes_s,data_trn[t],notes_t)
1007 pot_data(data_src[s],notes_s,data_trn[t],notes_t)
1008 else
1009 #data_src[s],nul=pot_data(data_src[s],notes_s)
1010 pot_data(data_src[s],notes_s)
1011 end
1012 s+=1;t+=1
1013 end
1014 @pot #watch
1015 end
1016 def pot_data(dob_src='',notes_s='',dob_trn='',notes_t='')
1017 if dob_src.obj !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/
1018 if defined? dob_src.ocn \
1019 and dob_src.ocn.to_s =~/\d+/
1020 paranum=dob_src.ocn.to_s
1021 @p_num=SiSU_Po4aUtils::ParagraphNumber.new(paranum)
1022 end
1023 case dob_src.is
1024 when :heading
1025 pot_structure.heading(dob_src,notes_s,dob_trn,notes_t)
1026 when :para
1027 pot_structure.para(dob_src,notes_s,dob_trn,notes_t)
1028 when :group
1029 pot_structure.group(dob_src,notes_s,dob_trn,notes_t)
1030 when :block
1031 pot_structure.block(dob_src,notes_s,dob_trn,notes_t)
1032 when :verse
1033 pot_structure.verse(dob_src,notes_s,dob_trn,notes_t)
1034 when :code
1035 pot_structure.code(dob_src,notes_s,dob_trn,notes_t)
1036 when :table
1037 pot_structure.table(dob_src,notes_s,dob_trn,notes_t)
1038 end
1039 if defined? dob_src.idx \
1040 and not dob_src.idx.nil? \
1041 and not dob_src.idx.empty?
1042 pot_structure.idx(dob_src,dob_trn)
1043 end
1044 dob_src='' if (dob_src.obj =~/<a name="n\d+">/ \
1045 and dob_src.obj =~/^(-\{{2}~\d+|<!e[:_]\d+!>)/) # -endnote
1046 if dob_src ## Clean Prepared Text
1047 dob_src.obj=dob_src.obj.gsub(/<!.+!>/,' ').
1048 gsub(/<:\S+>/,' ') if dob_src ## Clean Prepared Text
1049 end
1050 end
1051 #[dob_src,dob_trn]
1052 end
1053 def markup(dob)
1054 dob,notes=objects.textface_marks_po4a(dob,:separate)
1055 [dob,notes]
1056 end
1057 def publish(fn,pot)
1058 content=[]
1059 content << pot[:open]
1060 content << pot[:head]
1061 content << pot[:metadata]
1062 content << pot[:body]
1063 content << @@endnotes[:end] if @@endnotes_
1064 Output.new(fn,content,@md,@process).po4a
1065 @@endnotes={ para: [], end: [] }
1066 end
1067 end
1068 class Output <Source
1069 include SiSU_Param
1070 include SiSU_Env
1071 def initialize(fn,content,md,process=:complete)
1072 @fn,@content,@md,@process=fn,content,md,process
1073 @file=SiSU_Env::FileOp.new(md,fn)
1074 end
1075 def po4a #%pot output
1076 file_pot=(@md.opt.f_pth[:lng] == @md.opt.lng_base) \
1077 ? @file.write_file.pot
1078 : @file.write_file.po
1079 @sisu=[]
1080 emptyline=0
1081 @content.each do |para| # this is a hack
1082 if para.is_a?(Array) \
1083 and para.length > 0
1084 para.each do |line|
1085 if line
1086 line=line.gsub(/\s+$/m,'').
1087 gsub(/^\A[ ]*\Z/m,'')
1088 if line=~/^\A[ ]*\Z/m
1089 emptyline+=1
1090 else emptyline=0
1091 end
1092 file_pot.puts line if emptyline < 2 #remove extra line spaces (fix upstream)
1093 end
1094 end
1095 else file_pot.puts para #unix plaintext # /^([*=-]|\.){5}/
1096 end
1097 end
1098 file_pot.close
1099 SiSU_Po4aUtils::PotNumber.new.reset
1100 po4a_git
1101 end
1102 def po4a_git
1103 unless @md.opt.act[:maintenance][:set]==:on
1104 require_relative 'git' # git.rb
1105 git=SiSU_Git::Source.new(@md.opt,@process)
1106 unless FileTest.directory?(@file.output_path.pot_git.dir)
1107 git.create_file_structure_git
1108 end
1109 if @md.opt.f_pth[:lng] == @md.opt.lng_base
1110 FileUtils::cp(
1111 @file.place_file.pot.dir,
1112 @file.output_path.pot_git.dir
1113 )
1114 else # naive, work on -->
1115 FileUtils::cp(
1116 @file.place_file.po.dir,
1117 @file.output_path.po_git.dir
1118 ) #unless FileTest.file?(@file.place_file.po_git.dir)
1119 end
1120 git.read
1121 end
1122 end
1123 end
1124 end
1125 end
1126 __END__
1127 &#033;\|&#035;\|&&#042;\|&#045;\|&#047;\|&#095;\|&#123;\|&#125;\|&#126;\|&#
1128
1129 tables are problematic, difficult to reconstitute instruction, check
1130
1131 metadata, move to top? and work on
1132
1133 footnotes, different types, asterisk, also do you want to have separate
1134 paragraphs, or breaks within one block?
1135
1136 where no ocn appropriately use ~# or -# or indeed 1~name-
1137
1138 comments in document, what to do about them, not sure they are currently
1139 retained in dal, could be quite valuable to keep
1140
1141 Translate Shell
1142 http://www.soimort.org/translate-shell/
1143 translate.google.com