diff options
author | Ralph Amissah <ralph@amissah.com> | 2014-11-26 09:12:11 -0500 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2014-12-08 00:18:15 -0500 |
commit | 4db4dcc886b85bf9db43d66025452fb33c94a9dd (patch) | |
tree | 71aab89128b7b7fd35c886e84808c1881a3bf191 /lib/sisu/develop/txt_plain.rb | |
parent | v5 v6: code headers reformatted for viewing as org-mode files (diff) |
c&d: project dir structure, libs moved under new branch names
* libs & version files under new branch names: current & develop
* previously under branch version numbers (v5|v6)
* version .yml files moved
* associated adjustments made as required, notably to:
bin/sisu se* qi* (file headers); breakage potential, testing
required
* [on dir names, want release to (alphabetically) precede
next/development, considered (cur|dev)
(current|(dev|development|progress|next)) (stable|unstable),
alpha sorting fail (release|(next|develop))]
Diffstat (limited to 'lib/sisu/develop/txt_plain.rb')
-rw-r--r-- | lib/sisu/develop/txt_plain.rb | 597 |
1 files changed, 597 insertions, 0 deletions
diff --git a/lib/sisu/develop/txt_plain.rb b/lib/sisu/develop/txt_plain.rb new file mode 100644 index 00000000..45cab309 --- /dev/null +++ b/lib/sisu/develop/txt_plain.rb @@ -0,0 +1,597 @@ +# encoding: utf-8 +=begin + +* Name: SiSU + +** Description: documents, structuring, processing, publishing, search +*** plaintext text generation, stripped plaintext output (unix, linefeed) + +** Author: Ralph Amissah + <ralph@amissah.com> + <ralph.amissah@gmail.com> + +** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah, + All Rights Reserved. + +** License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see <http://www.gnu.org/licenses/>. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + <http://www.fsf.org/licensing/licenses/gpl.html> + <http://www.gnu.org/licenses/gpl.html> + + <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html> + +** SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + +** Hompages: + <http://www.jus.uio.no/sisu> + <http://www.sisudoc.org> + +** Git + <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary> + <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/develop/txt.rb;hb=HEAD> + +=end +module SiSU_Txt_Plain + require_relative 'ao' # ao.rb + require_relative 'se' # se.rb + include SiSU_Env + require_relative 'shared_metadata' # shared_metadata.rb + require_relative 'generic_parts' # generic_parts.rb + require_relative 'txt_shared' # txt_shared.rb + include SiSU_Param + @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0 + @@tablefoot='' + class Source + def initialize(opt) + @opt=opt + unless @opt.fns =~/(.+?)\.(?:-|ssm\.)?sst$/ + puts "#{sf} not a processed file type" + end + end + def read + begin + md=SiSU_Param::Parameters.new(@opt).get + env=SiSU_Env::InfoEnv.new(@opt.fns) + unless @opt.act[:quiet][:set]==:on + tool=(@opt.act[:verbose][:set]==:on \ + || @opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) \ + ? "#{env.program.text_editor} #{md.file.output_path.txt.dir}/#{md.file.base_filename.txt}" + : "[#{@opt.f_pth[:lng_is]}] #{@opt.fno}" + (@opt.act[:verbose][:set]==:on \ + || @opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) \ + ? SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + 'Plaintext', + tool + ).green_hi_blue + : SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + 'Plaintext', + tool + ).green_title_hi + if (@opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) + SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + @opt.fns, + "#{md.file.output_path.txt.dir}/#{md.file.base_filename.txt}" + ).flow + end + end + ao_array=SiSU_AO::Source.new(@opt).get # ao file drawn here + wrap_width=if defined? md.make.plaintext_wrap \ + and md.make.plaintext_wrap + md.make.plaintext_wrap + elsif defined? env.plaintext_wrap \ + and env.plaintext_wrap + env.plaintext_wrap + else 78 + end + #wrap_width=(defined? md.make.plaintext_wrap) ? md.make.plaintext_wrap : 78 + SiSU_Txt_Plain::Source::Scroll.new(md,ao_array,wrap_width).songsheet + rescue + SiSU_Errors::Rescued.new($!,$@,@opt.selections.str,@opt.fns).location do + __LINE__.to_s + ':' + __FILE__ + end + ensure + end + end + private + class Scroll <Source + include SiSU_Parts_Generic + include SiSU_TextUtils + @@endnotes={ para: [], end: [] } + def initialize(md,data,wrap_width) + @md,@data,@wrap_width=md,data,wrap_width + @env=SiSU_Env::InfoEnv.new(@md.fns) + @tab="\t" + @@endnotes_=case md.opt.selections.str + when /--footnote/ then false + when /--endnote/ then true + else true + end + @plaintext={ body: [], open: [], close: [], head: [], metadata: [], tail: [] } + end + def songsheet + plaintext=markup(@data) + publish(plaintext) + end + def break_line + "\n" + end + # Used for extraction of endnotes from paragraphs + def extract_endnotes(dob='') + notes=dob.obj.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/) + @n=[] + notes.flatten.each do |n| #high cost to deal with <br> appropriately within plaintext, consider + n=n.dup.to_s + if n =~/#{Mx[:br_line]}|#{Mx[:br_nl]}/ + fix = n.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/) #watch #added + fix.each do |x| + unless x.empty?; @n << x + end + end + else @n << n + end + end + notes=@n.flatten + notes.each do |e| + util=(e.to_s =~/^\[[\d*+]+\]:/) \ + ? (SiSU_TextUtils::Wrap.new(e.to_s,@wrap_width,4,1)) + : (SiSU_TextUtils::Wrap.new(e.to_s,@wrap_width,1,1)) + wrap=util.line_wrap + wrap=if wrap =~ /^\s*[\d*+]+\s+.+?\s*\Z/m + wrap.gsub(/^(\s*)([\d*+]+)\s+(.+?)\s*\Z/m, <<-GSUB +\\1[\\2]: \\3 + GSUB + ) + else + wrap.gsub(/^(.+)\Z/m, <<-GSUB +\\1 + GSUB + ) + end + @@endnotes[:para] << "-#{wrap}" + @@endnotes[:end] << '' << wrap + end + @@endnotes + end + def plaintext_metadata + array=SiSU_Metadata::Summary.new(@md).plaintext.metadata + array.each do |meta| + tag,inf=meta.scan(/^.+?:\s|.+/) + if tag and inf + util=SiSU_TextUtils::Wrap.new(inf,@wrap_width,15,1) + txt=util.line_wrap + @plaintext[:metadata] <<<<WOK + +#{@tab}#{tag}#{txt} +WOK + end + end + end + def plaintext_tail +# env=SiSU_Env::InfoEnv.new(@md.fns) + generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})" if @md.sisu_version[:version] + lastdone="Last Generated on: #{Time.now}" + rubyv="Ruby version: #{@md.ruby_version}" + sc=if @md.sc_info + "Source file: #{@md.sc_filename}#{break_line}Version number: #{@md.sc_number}#{break_line}Version date: #{@md.sc_date}#{break_line}" + else '' + end + @plaintext[:tail] <<<<WOK +#{break_line} +plaintext (plain text): + #{@md.file.output_path.txt.url}/#{@md.file.base_filename.txt}#{break_line} +Other versions of this document: #{break_line} +manifest: + #{@md.file.output_path.manifest.url}/#{@md.file.base_filename.manifest}#{break_line} +at: + #{@md.file.output_path.base.url}#{break_line} + +#{sc} +* #{generator} +* #{rubyv} +* #{lastdone} +* SiSU #{the_url.sisu_txt} +WOK + end + def decorate + def heading_underscore + def l0 + '=' + end + def l1 + '*' + end + def l2 + '+' + end + def l3 + '~' + end + def l4 + '-' + end + def l5 + '.' + end + def l6 + '.' + end + def l7 + '.' + end + self + end + def bold + def open + '*' + end + def close + '*' + end + self + end + def italics + def open + '/' + end + def close + '/' + end + self + end + def underscore + def open + '_' + end + def close + '_' + end + self + end + #def emphasis + # def open + # '' + # end + # def close + # '' + # end + # self + #end + def cite + def open + '"' + end + def close + '"' + end + self + end + def insert + def open + '+' + end + def close + '+' + end + self + end + def strike + def open + '-' + end + def close + '-' + end + self + end + def superscript + def open + '^' + end + def close + '^' + end + self + end + def subscript + def open + '[' + end + def close + ']' + end + self + end + def hilite + def open + '*' + end + def close + '*' + end + self + end + def monospace + def open + '#' + end + def close + '#' + end + self + end + self + end + def plaintext_structure(dob='',p_num='') #% Used to extract the structure of a document + lv=n=n3=nil + if dob.is==:heading + lv=dob.ln + n=lv - 1 + n3=lv + 2 + end + util=nil + wrapped=if dob.is==:para \ + || dob.is==:heading + if dob.is==:para + if dob.hang \ + and dob.hang =~/[0-9]/ \ + and dob.indent != dob.hang + util=SiSU_TextUtils::Wrap.new(dob.obj,@wrap_width,dob.indent.to_i*2,dob.hang.to_i*2) + #util=SiSU_TextUtils::Wrap.new(dob.obj,@wrap_width,dob.hang.to_i*2,0) + elsif dob.indent =~/[1-9]/ + util=if dob.bullet_ + SiSU_TextUtils::Wrap.new("* #{dob.obj}",@wrap_width,dob.indent.to_i*2) + else SiSU_TextUtils::Wrap.new(dob.obj,@wrap_width,dob.indent.to_i*2) + end + else + util=if dob.bullet_ + SiSU_TextUtils::Wrap.new("* #{dob.obj}",@wrap_width,0) + else SiSU_TextUtils::Wrap.new(dob.obj,@wrap_width,0) + end + end + else util=SiSU_TextUtils::Wrap.new(dob.obj,@wrap_width,0) + end + util.line_wrap + end + if lv + times=wrapped.length + times=@wrap_width if times > @wrap_width + @plaintext[:body] << case lv + when 0 then wrapped.upcase << break_line << decorate.heading_underscore.l0*times + p_num << break_line*2 + when 1 then wrapped.upcase << break_line << decorate.heading_underscore.l1*times + p_num << break_line*2 + when 2 then wrapped.upcase << break_line << decorate.heading_underscore.l2*times + p_num << break_line*2 + when 3 then wrapped.upcase << break_line << decorate.heading_underscore.l3*times + p_num << break_line*2 + when 4 + unless dob.use_ == :dummy + wrapped.upcase << break_line << decorate.heading_underscore.l4*times + p_num << break_line*2 + end + when 5 then wrapped.upcase << break_line << decorate.heading_underscore.l5*times + p_num << break_line*2 + when 6 then wrapped.upcase << break_line << decorate.heading_underscore.l6*times + p_num << break_line*2 + when 7 + wrapped.upcase << break_line << decorate.heading_underscore.l7*times + p_num << break_line*2 + #when 7 then wrapped.upcase << break_line << decorate.heading_underscore.l7*times + p_num << break_line*2 + end + else + @plaintext[:body] << wrapped + p_num << break_line # main text, contents, body KEEP + end + if @@endnotes[:para] \ + and not @@endnotes_ + @@endnotes[:para].each {|e| @plaintext[:body] << e << break_line} + elsif @@endnotes[:para] \ + and @@endnotes_ + end + @@endnotes[:para]=[] + end + def ocn_display(dob) + make=SiSU_Env::ProcessingSettings.new(@md) + if make.build.plaintext_ocn? + if defined? dob.ocn \ + and dob.ocn.is_a?(Fixnum) + (defined? dob.ocn) \ + ? "\n#{Dx[:ocn_o]}#{dob.ocn}#{Dx[:ocn_c]}" \ + : '' + else '' + end + else '' + end + end + def markup(data) # Used for major markup instructions + SiSU_Env::InfoEnv.new(@md.fns) + @data_mod,@endnotes,@level,@cont,@copen,@plaintext_contents_close=Array.new(6){[]} + (0..7).each { |x| @cont[x]=@level[x]=false } + (4..7).each { |x| @plaintext_contents_close[x]='' } + plaintext_tail #($1,$2) + plaintext_metadata + table_message='[table omitted, see other document formats]' + data.each do |dob| + dob.obj=dob.obj.gsub(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}.+/um,"#{break_line}#{table_message}"). #fix + gsub(/.+?#{Mx[:gl_o]}-##{Mx[:gl_c]}/,''). # remove dummy headings (used by html) #check also [~-]# + gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/, + "#{decorate.bold.open}\\1#{decorate.bold.close}"). + gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/, + "#{decorate.italics.open}\\1#{decorate.italics.close}"). + gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/, + "#{decorate.underscore.open}\\1#{decorate.underscore.close}"). + gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/, + "#{decorate.subscript.open}\\1#{decorate.subscript.close}"). + gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/, + "#{decorate.superscript.open}\\1#{decorate.superscript.close}"). + gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/, + "#{decorate.insert.open}\\1#{decorate.insert.close}"). + gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/, + "#{decorate.cite.open}\\1#{decorate.cite.close}"). + gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/, + "#{decorate.strike.open}\\1#{decorate.strike.close}"). + gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/, + "#{decorate.monospace.open}\\1#{decorate.monospace.close}") + unless dob.is==:code + dob.obj=dob.obj.gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}/,'\1'). + gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1'). + gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1 [link: <\2>]'). + gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}image/,'\1 [link: local image]'). + gsub(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,"#{the_text.url_open}\\1#{the_text.url_close}") + extract_endnotes(dob) + dob.obj=dob.obj.gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'[^\1]'). # endnote marker marked up + gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'[^\1]'). # endnote marker marked up + gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<'). + gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>'). + gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&'). + gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!'). + gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#'). + gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*'). + gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-'). + gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/'). + gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_'). + gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{'). + gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}'). + gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~'). + gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©'). + gsub(/#{Mx[:gl_o]}#092#{Mx[:gl_c]}/,'\\') + end + dob.obj=if dob.of==:block # watch + dob.obj.gsub(/#{Mx[:gl_o]}●#{Mx[:gl_c]}/m,"* "). + gsub(/\n?#{Mx[:br_line]}\n?|\n?#{Mx[:br_nl]}\n?/m,break_line) + else dob.obj.gsub(/\n?#{Mx[:br_line]}\n?|\n?#{Mx[:br_nl]}\n?/m,break_line*2) + end + if dob.is==:code + dob.obj=dob.obj.gsub(/(^|[^}])_([<>])/m,'\1\2'). # _> _< + gsub(/(^|[^}])_([<>])/m,'\1\2') # _<_< + end + dob.obj=dob.obj.gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1'). + gsub(/<a href=".+?">(.+?)<\/a>/m,'\1'). + gsub(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,''). # remove name links + gsub(/ |#{Mx[:nbsp]}/,' '). # decide on + gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [ \1 ]'). #"[ #{dir.url.images_local}\/\\1 ]") + gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}image/,' [ \1 ]'). + gsub(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') + if dob.obj !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ + p_num=ocn_display(dob) + if dob.is==:heading \ + or dob.is==:para + plaintext_structure(dob,p_num) + elsif dob.is==:group \ + or dob.is==:block \ + or dob.is==:verse \ + or dob.is==:code \ + or dob.is==:table + @plaintext[:body] << dob.obj + p_num << break_line + elsif dob.is==:break + sp=' ' + ln='-' + @plaintext[:body] <<=if dob.obj==Mx[:br_page] \ + or dob.obj==Mx[:br_page_new] \ + or dob.obj==Mx[:br_page_line] + "#{break_line}#{ln*40}#{break_line*2}" + elsif dob.obj ==Mx[:br_obj] + "#{break_line}#{sp*20}* * *#{break_line*2}" + end # following empty line (break_line) missing, fix + end + dob='' if (dob.obj =~/<a name="n\d+">/ \ + and dob.obj =~/^(-\{{2}~\d+|<!e[:_]\d+!>)/) # -endnote + if dob ## Clean Prepared Text + dob.obj=dob.obj.gsub(/<!.+!>/,' '). + gsub(/<:\S+>/,' ') + end + end + end + @plaintext + end + def publish(plaintext) + divider='=' + content=[] + content << plaintext[:open] + content << plaintext[:head] + content << plaintext[:body] + content << @@endnotes[:end] if @@endnotes_ + content << "#{break_line}#{divider*@wrap_width}#{break_line}" + content << plaintext[:metadata] + content << "#{break_line}#{divider*@wrap_width}#{break_line}" if @md.stmp =~/\w+/ #not used? + content << plaintext[:tail] + Output.new(content,@md).plaintext + @@endnotes={ para: [], end: [] } + end + end + class Output <Source + include SiSU_Param + include SiSU_Env + def initialize(content,md) + @content,@md=content,md + end + def plaintext #%plaintext output + file_plaintext=SiSU_Env::FileOp.new(@md).write_file.txt + @sisu=[] + emptyline=0 + @content.each do |para| # this is a hack + if para.is_a?(Array) \ + and para.length > 0 + para.each do |line| + if line + line=line.gsub(/[ \t]+$/m,''). + gsub(/^\A[ ]*\Z/m,'') + (line=~/^\A\Z/) \ + ? (emptyline+=1) + : emptyline=0 + if emptyline < 2 #remove additional empty lines + file_plaintext.puts line + end + end + end + else file_plaintext.puts para #unix plaintext # /^([*=-]|\.){5}/ + end + end + file_plaintext.close + end + end + end +end +__END__ + bold_o: '*', bold_c: '*', + #bold_o: '!', bold_c: '!', + #emphasis_o: '*', emphasis_c: '*', + italics_o: '/', italics_c: '/', + underscore_o: '_', underscore_c: '_', + cite_o: '"', cite_c: '"', + insert_o: '+', insert_c: '+', + strike_o: '-', strike_c: '-', + superscript_o: '^', superscript_c: '^', + subscript_o: '[', subscript_c: ']', + hilite_o: '*', hilite_c: '*', + monospace_o: '', monospace_c: '', + po_bold_o: '!{', po_bold_c: '}!', + po_italics_o: '/{', po_italics_c: '}/', + po_underscore_o: '_{', po_underscore_c: '}_', + po_cite_o: '"{', po_cite_c: '}"', + po_insert_o: '+{', po_insert_c: '}+', + po_strike_o: '-{', po_strike_c: '}-', + po_superscript_o: '^{', po_superscript_c: '}^', + po_subscript_o: ',{', po_subscript_c: '},', + po_hilite_o: '*{', po_hilite_c: '}*', + po_monospace_o: '#{', po_monospace_c: '}#', |