aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v3dv/plaintext.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/v3dv/plaintext.rb')
-rw-r--r--lib/sisu/v3dv/plaintext.rb430
1 files changed, 430 insertions, 0 deletions
diff --git a/lib/sisu/v3dv/plaintext.rb b/lib/sisu/v3dv/plaintext.rb
new file mode 100644
index 00000000..533e35df
--- /dev/null
+++ b/lib/sisu/v3dv/plaintext.rb
@@ -0,0 +1,430 @@
+# encoding: utf-8
+=begin
+
+ * Name: SiSU
+
+ * Description: a framework for document structuring, publishing and search
+
+ * Author: Ralph Amissah
+
+ * Copyright: (C) 1997 - 2012, Ralph Amissah, All Rights Reserved.
+
+ * License: GPL 3 or later:
+
+ SiSU, a framework for document structuring, publishing and search
+
+ Copyright (C) Ralph Amissah
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see <http://www.gnu.org/licenses/>.
+
+ If you have Internet connection, the latest version of the GPL should be
+ available at these locations:
+ <http://www.fsf.org/licensing/licenses/gpl.html>
+ <http://www.gnu.org/licenses/gpl.html>
+
+ <http://www.jus.uio.no/sisu/gpl.fsf/toc.html>
+ <http://www.jus.uio.no/sisu/gpl.fsf/doc.html>
+ <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt>
+
+ * SiSU uses:
+ * Standard SiSU markup syntax,
+ * Standard SiSU meta-markup syntax, and the
+ * Standard SiSU object citation numbering and system
+
+ * Hompages:
+ <http://www.jus.uio.no/sisu>
+ <http://www.sisudoc.org>
+
+ * Download:
+ <http://www.jus.uio.no/sisu/SiSU/download.html>
+
+ * Ralph Amissah
+ <ralph@amissah.com>
+ <ralph.amissah@gmail.com>
+
+ ** Description: plaintext text generation, stripped plaintext output (unix,
+ linefeed)
+
+=end
+module SiSU_Plaintext
+ require_relative 'dal' # dal.rb
+ require_relative 'sysenv' # sysenv.rb
+ include SiSU_Env
+ require_relative 'plaintext_format' # plaintext_format.rb
+ include SiSU_Plaintext_format
+ require_relative 'shared_metadata' # shared_metadata.rb
+ require_relative 'shared_txt' # shared_txt.rb
+ include SiSU_Param
+ include SiSU_Viz
+ pwd=Dir.pwd
+ @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0
+ @@tablefoot=''
+ class Source
+ def initialize(opt)
+ @opt=opt
+ unless @opt.fns =~/(.+?)\.(?:-|ssm\.)?sst$/
+ puts "#{sf} not a processed file type"
+ end
+ end
+ def read
+ begin
+ md=SiSU_Param::Parameters.new(@opt).get
+ env=SiSU_Env::Info_env.new(@opt.fns)
+ unless @opt.cmd =~/q/
+ path=env.path.output_tell
+ tool=(@opt.cmd =~/[MVv]/) \
+ ? "#{env.program.text_editor} #{md.file.output_path.txt.dir}/#{md.file.base_filename.txt}"
+ : "[#{@opt.f_pth[:lng_is]}] #{@opt.fns}"
+ @opt.cmd=~/[MVvz]/ \
+ ? SiSU_Screen::Ansi.new(@opt.cmd,'Plaintext',tool).green_hi_blue
+ : SiSU_Screen::Ansi.new(@opt.cmd,'Plaintext',tool).green_title_hi
+ SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{md.file.output_path.txt.dir}/#{md.file.base_filename.txt}").flow if @opt.cmd =~/[MV]/
+ end
+ dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here
+ wrap_width=if defined? md.make.plaintext_wrap \
+ and md.make.plaintext_wrap
+ md.make.plaintext_wrap
+ elsif defined? env.plaintext_wrap \
+ and env.plaintext_wrap
+ env.plaintext_wrap
+ else 78
+ end
+ #wrap_width=(defined? md.make.plaintext_wrap) ? md.make.plaintext_wrap : 78
+ SiSU_Plaintext::Source::Scroll.new(dal_array,md,wrap_width).songsheet
+ SiSU_Env::Info_skin.new(md).select #watch
+ rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error
+ ensure
+ end
+ end
+ private
+ class Scroll <Source
+ require_relative 'defaults' # defaults.rb
+ require_relative 'shared_txt' # shared_txt.rb
+ include SiSU_text_utils
+ @@endnotes={ para: [], end: [] }
+ def initialize(data,md,wrap_width)
+ @data,@md,@wrap_width=data,md,wrap_width
+ @env=SiSU_Env::Info_env.new(@md.fns)
+ @brace_url=SiSU_Viz::Skin.new.url_decoration
+ @tab="\t"
+ @@endnotes_=case md.opt.mod.inspect
+ when /--footnote/; false
+ when /--endnote/; true
+ else true
+ end
+ @br=case md.opt.mod.inspect
+ when /--dos/; "\r\n"
+ when /--unix/; "\n"
+ else "\n"
+ end
+ @plaintext={ body: [], open: [], close: [], head: [], metadata: [], tail: [] }
+ end
+ def songsheet
+ plaintext=markup(@data)
+ publish(plaintext)
+ end
+ # Used for extraction of endnotes from paragraphs
+ def extract_endnotes(dob='')
+ notes=dob.obj.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/)
+ @n=[]
+ notes.flatten.each do |n| #high cost to deal with <br> appropriately within plaintext, consider
+ n=n.dup.to_s
+ if n =~/#{Mx[:br_line]}|#{Mx[:br_nl]}/
+ fix = n.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/) #watch #added
+ fix.each do |x|
+ unless x.empty?; @n << x
+ end
+ end
+ else @n << n
+ end
+ end
+ notes=@n.flatten
+ notes.each do |e|
+ util=(e.to_s =~/^\[[\d*+]+\]:/) \
+ ? (SiSU_text_utils::Wrap.new(e.to_s,@wrap_width,4,1))
+ : (SiSU_text_utils::Wrap.new(e.to_s,@wrap_width,1,1))
+ wrap=util.line_wrap
+ if wrap =~ /^\s*[\d*+]+\s+.+?\s*\Z/m
+ wrap.gsub!(/^(\s*)([\d*+]+)\s+(.+?)\s*\Z/m, <<GSUB
+\\1[\\2]: \\3
+GSUB
+ )
+ else
+ wrap.gsub!(/^(.+)\Z/m, <<GSUB
+\\1
+GSUB
+ )
+ end
+ @@endnotes[:para] << "-#{wrap}"
+ @@endnotes[:end] << '' << wrap
+ end
+ @@endnotes
+ end
+ def plaintext_metadata
+ array=Metadata::Summary.new(@md).plaintext.metadata
+ array.each do |meta|
+ tag,inf=meta.scan(/^.+?:\s|.+/)
+ if tag and inf
+ util=SiSU_text_utils::Wrap.new(inf,@wrap_width,15,1)
+ txt=util.line_wrap
+ @plaintext[:metadata] <<<<WOK
+
+#{@tab}#{tag}#{txt}
+WOK
+ end
+ end
+ end
+ def plaintext_tail
+ SiSU_Env::Info_skin.new(@md).select
+# env=SiSU_Env::Info_env.new(@md.fns)
+ vz=SiSU_Env::Get_init.instance.skin
+ base_url="#{@env.url.root}/#{@md.fnb}"
+ generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})" if @md.sisu_version[:version]
+ lastdone="Last Generated on: #{Time.now}"
+ rubyv="Ruby version: #{@md.ruby_version}"
+ sc=if @md.sc_info
+ "Source file: #{@md.sc_filename}#{@br}Version number: #{@md.sc_number}#{@br}Version date: #{@md.sc_date}#{@br}"
+ else ''
+ end
+ @plaintext[:tail] <<<<WOK
+#{@br}
+plaintext (plain text):
+ #{base_url}/#{@md.fn[:plain]}#{@br}
+Other versions of this document: #{@br}
+manifest:
+ #{base_url}/#{@md.fn[:manifest]}#{@br}
+at:
+ #{@env.url.root}#{@br}
+
+#{sc}
+* #{generator}
+* #{rubyv}
+* #{lastdone}
+* SiSU #{vz.url_sisu}
+WOK
+ end
+ def plaintext_structure(dob='',p_num='') #% Used to extract the structure of a document
+ lv=n=n3=nil
+ if dob.is=='heading'
+ lv=dob.ln
+ n=lv - 1
+ n3=lv + 2
+ end
+ util=nil
+ wrapped=if dob.is =='para' \
+ or dob.is=='heading'
+ if dob.is=='para'
+ if dob.hang \
+ and dob.hang =~/[0-9]/ \
+ and dob.indent != dob.hang
+ util=SiSU_text_utils::Wrap.new(dob.obj,@wrap_width,dob.indent.to_i*2,dob.hang.to_i*2)
+ #util=SiSU_text_utils::Wrap.new(dob.obj,@wrap_width,dob.hang.to_i*2,0)
+ elsif dob.indent =~/[1-9]/
+ util=if dob.bullet_
+ SiSU_text_utils::Wrap.new("* #{dob.obj}",@wrap_width,dob.indent.to_i*2)
+ else SiSU_text_utils::Wrap.new(dob.obj,@wrap_width,dob.indent.to_i*2)
+ end
+ else
+ util=if dob.bullet_
+ SiSU_text_utils::Wrap.new("* #{dob.obj}",@wrap_width,0)
+ else SiSU_text_utils::Wrap.new(dob.obj,@wrap_width,0)
+ end
+ end
+ else util=SiSU_text_utils::Wrap.new(dob.obj,@wrap_width,0)
+ end
+ util.line_wrap
+ end
+ if lv
+ times=wrapped.length
+ times=@wrap_width if times > @wrap_width
+ @plaintext[:body] << case lv
+ when 1; wrapped.upcase << @br << Px[:lv1]*times + p_num << @br
+ when 2; wrapped.upcase << @br << Px[:lv2]*times + p_num << @br
+ when 3; wrapped.upcase << @br << Px[:lv3]*times + p_num << @br
+ when 4; wrapped.upcase << @br << Px[:lv4]*times + p_num << @br
+ when 5; wrapped.upcase << @br << Px[:lv5]*times + p_num << @br
+ when 6; wrapped.upcase << @br << Px[:lv6]*times + p_num << @br
+ end
+ else
+ @plaintext[:body] << wrapped + p_num << @br # main text, contents, body KEEP
+ end
+ if @@endnotes[:para] \
+ and not @@endnotes_
+ @plaintext[:body] << @br
+ @@endnotes[:para].each {|e| @plaintext[:body] << e << @br}
+ elsif @@endnotes[:para] \
+ and @@endnotes_
+ @plaintext[:body] << @br*2
+ end
+ @@endnotes[:para]=[]
+ end
+ def markup(data) # Used for major markup instructions
+ dir=SiSU_Env::Info_env.new(@md.fns)
+ @data_mod,@endnotes,@level,@cont,@copen,@plaintext_contents_close=Array.new(6){[]}
+ (0..6).each { |x| @cont[x]=@level[x]=false }
+ (4..6).each { |x| @plaintext_contents_close[x]='' }
+ plaintext_tail #($1,$2)
+ plaintext_metadata
+ table_message='[table omitted, see other document formats]'
+ fix=[]
+ data.each do |dob|
+ dob.obj.gsub!(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}.+/um,"#{@br}#{table_message}") #fix
+ dob.obj.gsub!(/.+?#{Mx[:gl_o]}-##{Mx[:gl_c]}/,'') # remove dummy headings (used by html) #check also [~-]#
+ dob.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,
+ "#{Px[:bold_o]}\\1#{Px[:bold_c]}")
+ dob.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,
+ "#{Px[:italics_o]}\\1#{Px[:italics_c]}")
+ dob.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,
+ "#{Px[:underscore_o]}\\1#{Px[:underscore_c]}")
+ dob.obj.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,
+ "#{Px[:subscript_o]}\\1#{Px[:subscript_c]}")
+ dob.obj.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,
+ "#{Px[:superscript_o]}\\1#{Px[:superscript_c]}")
+ dob.obj.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,
+ "#{Px[:insert_o]}\\1#{Px[:insert_c]}")
+ dob.obj.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,
+ "#{Px[:cite_o]}\\1#{Px[:cite_c]}")
+ dob.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,
+ "#{Px[:strike_o]}\\1#{Px[:strike_c]}")
+ dob.obj.gsub!(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,
+ "#{Px[:monospace_o]}\\1#{Px[:monospace_c]}")
+ unless dob.is=='code'
+ dob.obj.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}/,'\1')
+ dob.obj.gsub!(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1')
+ dob.obj.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1 [link: <\2>]')
+ dob.obj.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}image/,'\1 [link: local image]')
+ dob.obj.gsub!(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,"#{@brace_url.txt_open}\\1#{@brace_url.txt_close}")
+ extract_endnotes(dob)
+ dob.obj.gsub!(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'[^\1]') # endnote marker marked up
+ dob.obj.gsub!(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'[^\1]') # endnote marker marked up
+ dob.obj.gsub!(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<')
+ dob.obj.gsub!(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>')
+ dob.obj.gsub!(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&')
+ dob.obj.gsub!(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!')
+ dob.obj.gsub!(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#')
+ dob.obj.gsub!(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*')
+ dob.obj.gsub!(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-')
+ dob.obj.gsub!(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/')
+ dob.obj.gsub!(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_')
+ dob.obj.gsub!(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{')
+ dob.obj.gsub!(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}')
+ dob.obj.gsub!(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~')
+ dob.obj.gsub!(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©')
+ dob.obj.gsub!(/#{Mx[:gl_o]}#092#{Mx[:gl_c]}/,'\\')
+ end
+ if dob.of=='block' # watch
+ dob.obj.gsub!(/#{Mx[:gl_o]}●#{Mx[:gl_c]}/,"* ")
+ dob.obj.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n")
+ else dob.obj.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n\n")
+ end
+ if dob.is=='code'
+ dob.obj.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _> _<
+ dob.obj.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _<_<
+ end
+ dob.obj.gsub!(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1')
+ dob.obj.gsub!(/<a href=".+?">(.+?)<\/a>/m,'\1')
+ dob.obj.gsub!(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'') # remove name links
+ dob.obj.gsub!(/&nbsp;|#{Mx[:nbsp]}/,' ') # decide on
+ dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]")
+ dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}image/,' [ \1 ]')
+ dob.obj.gsub!(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]')
+ wordlist=dob.obj.scan(/\S+/)
+ if dob.obj !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/
+ #if defined? dob.ocn and dob.ocn.to_s =~/\d+/
+ # paranum=dob.ocn.to_s
+ # @p_num=SiSU_Plaintext_format::Paragraph_number.new(paranum)
+ #end
+ p_num=''
+ if @env.plaintext_ocn?
+ if defined? dob.ocn \
+ and not dob.ocn.nil?
+ p_num=SiSU_Plaintext_format::Paragraph_number.new(dob.ocn).display
+ end
+ end
+ if dob.is=='heading' \
+ or dob.is=='para'
+ plaintext_structure(dob,p_num)
+ elsif dob.is=='group' \
+ or dob.is=='block' \
+ or dob.is=='verse' \
+ or dob.is=='code' \
+ or dob.is=='table'
+ @plaintext[:body] << dob.obj + p_num << @br
+ elsif dob.is=='break'
+ sp=' '
+ ln='-'
+ @plaintext[:body] <<=if dob.obj==Mx[:br_page] \
+ or dob.obj==Mx[:br_page_new]
+ "#{@br}#{ln*40}#{@br*2}"
+ elsif dob.obj ==Mx[:br_obj]
+ "#{@br}#{sp*20}* * *#{@br*2}"
+ end # following empty line (@br) missing, fix
+ end
+ dob='' if (dob.obj =~/<a name="n\d+">/ \
+ and dob.obj =~/^(-\{{2}~\d+|<!e[:_]\d+!>)/) # -endnote
+ dob.obj.gsub!(/<!.+!>/,' ') if dob ## Clean Prepared Text
+ dob.obj.gsub!(/<:\S+>/,' ') if dob ## Clean Prepared Text
+ end
+ end
+ @plaintext
+ end
+ def publish(plaintext)
+ divider='='
+ content=[]
+ content << plaintext[:open]
+ content << plaintext[:head]
+ content << plaintext[:body]
+ content << @@endnotes[:end] if @@endnotes_
+ content << "#{@br}#{divider*@wrap_width}#{@br}"
+ content << plaintext[:metadata]
+ content << "#{@br}#{divider*@wrap_width}#{@br}" if @md.stmp =~/\w+/ #not used?
+ content << plaintext[:tail]
+ Output.new(content,@md).plaintext
+ @@endnotes={ para: [], end: [] }
+ end
+ end
+ class Output <Source
+ include SiSU_Param
+ include SiSU_Env
+ def initialize(content,md)
+ @content,@md=content,md
+ end
+ def plaintext #%plaintext output
+ file_plaintext=SiSU_Env::SiSU_file.new(@md).write_file.txt
+ @sisu=[]
+ emptyline=0
+ @content.each do |para| # this is a hack
+ if para.class==Array \
+ and para.length > 0
+ para.each do |line|
+ if line
+ line.gsub!(/\s+$/m,'')
+ line.gsub!(/^\A[ ]*\Z/m,'')
+ if line=~/^\A[ ]*\Z/m
+ emptyline+=1
+ else emptyline=0
+ end
+ file_plaintext.puts line if emptyline < 2 #remove extra line spaces (fix upstream)
+ end
+ end
+ else file_plaintext.puts para #unix plaintext # /^([*=-]|\.){5}/
+ end
+ end
+ file_plaintext.close
+ end
+ end
+ end
+end
+__END__
+&#033;\|&#035;\|&&#042;\|&#045;\|&#047;\|&#095;\|&#123;\|&#125;\|&#126;\|&#