# coding: utf-8
=begin
* Name: SiSU
* Description: a framework for document structuring, publishing and search
* Author: Ralph Amissah
* Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007, 2008, 2009 Ralph Amissah All Rights Reserved.
* License: GPL 3 or later:
SiSU, a framework for document structuring, publishing and search
Copyright (C) Ralph Amissah
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see .
If you have Internet connection, the latest version of the GPL should be
available at these locations:
* SiSU uses:
* Standard SiSU markup syntax,
* Standard SiSU meta-markup syntax, and the
* Standard SiSU object citation numbering and system
* Hompages:
* Download:
* Ralph Amissah
** Description: plaintext text generation, stripped plaintext output (unix,
linefeed)
=end
module SiSU_Plaintext
require "#{SiSU_lib}/dal"
require "#{SiSU_lib}/sysenv"
include SiSU_Env
include SiSU_Param
include SiSU_Viz
require "#{SiSU_lib}/plaintext_format"
include SiSU_Plaintext_format
require "#{SiSU_lib}/shared_txt"
require "#{SiSU_lib}/shared_structure"
pwd=Dir.pwd
@@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0
@@tablefoot=''
class Source
def initialize(opt)
@opt=opt
@@dostype=if @opt.fns =~/(.+?)\.(?:-|ssm\.)?sst$/
if @opt.mod.inspect =~ /--footnote/ \
and @opt.mod.inspect =~ /--dos/
'msdos footnotes'
elsif @opt.mod.inspect =~ /--endnote/ \
and @opt.mod.inspect =~ /--dos/
'msdos endnotes'
elsif @opt.mod.inspect =~ /--footnote/
'unix footnotes'
elsif @opt.mod.inspect =~ /--endnote/
'unix endnotes'
else 'unix footnotes'
end
else puts "#{sf} not a processed file type"
end
end
def read
begin
@md=SiSU_Param::Parameters.new(@opt).get
@env=SiSU_Env::Info_env.new(@opt.fns)
path=@env.path.output_tell
tool=if @opt.cmd =~/[MVv]/; "#{@env.program.text_editor} #{path}/#{@md.fnb}/#{@md.fn[:plain]}"
else ''
end
tell=SiSU_Screen::Ansi.new(@opt.cmd,'Plaintext',tool)
tell.green_hi_blue unless @opt.cmd =~/q/
tell=SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{@env.path.output_tell}/#{@md.fnb}/#{@md.fn[:plain]}")
tell.flow if @opt.cmd =~/[MV]/
my_make=SiSU_Env::Create_file.new(@opt.cmd,@opt.fns)
@dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here
SiSU_Plaintext::Source::Scroll.new(@dal_array,@md).songsheet
SiSU_Env::Info_skin.new(@md).select #watch
rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error
ensure
end
end
private
class Scroll [],:end=>[] }
@@dp=nil
def initialize(data,md)
@data,@md=data,md
@url_brace=SiSU_Viz::Skin.new.url_decoration
@vz=SiSU_Env::Get_init.instance.skin
@dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern
@regx=/^(?:(?:#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}\s*)?#{Mx[:lv_o]}\d:(\S*?)#{Mx[:lv_c]})?\s*(.+?)\s*#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/m # 2004w18 pb pn removal added
@tab="\t"
@br=if md.mod.inspect =~ /--footnote/ \
and md.mod.inspect =~ /--dos/
@@dostype='msdos footnotes'
"\r\n"
elsif md.mod.inspect =~ /--endnote/ \
and md.mod.inspect =~ /--dos/
@@dostype='msdos endnotes'
"\r\n"
elsif md.mod.inspect =~ /--footnote/
@@dostype='unix footnotes'
"\n"
elsif md.mod.inspect =~ /--endnote/
@@dostype='unix endnotes'
"\n"
else
@@dostype='unix footnotes'
"\n"
end
@plaintext={ :body=>[],:open=>[],:close=>[],:head=>[],:metadata=>[],:tail=>[] }
end
def songsheet
plaintext=markup(@data)
publish(plaintext)
end
# Used for extraction of endnotes from paragraphs
def extract_endnotes(para='')
notes=para.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)\s*#{Mx[:id_o]}#@dp#{Mx[:id_c]}(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/)
@n=[]
notes.flatten.each do |n| #high cost to deal with appropriately within plaintext, consider
n=n.dup.to_s
if n =~/#{Mx[:br_line]}|#{Mx[:br_nl]}/
fix = n.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/) #watch #added
fix.each do |x|
unless x.empty?; @n << x
end
end
else @n << n
end
end
notes=@n.flatten
notes.each do |e|
util=if e.to_s =~/^\[[\d*+]+\]:/; SiSU_text_utils::Wrap.new(e.to_s,78,4,1)
else SiSU_text_utils::Wrap.new(e.to_s,78,1,1)
end
wrap=util.line_wrap
if wrap =~ /^\s*[\d*+]+\s+.+?\s*\Z/m
wrap.gsub!(/^(\s*)([\d*+]+)\s+(.+?)\s*\Z/m, < 78
@plaintext[:body] << case lv
when 1; wrapped.upcase << @br << '*'*times << @br
when 2..3; wrapped.upcase << @br << '='*times << @br
when 4; wrapped.upcase << @br << '-'*times << @br
when 5..6; wrapped.upcase << @br << '.'*times << @br
end
else
@plaintext[:body] << wrapped << @br # main text, contents, body KEEP
end
if @@endnotes[:para] \
and @@dostype =~/footnote/ #edit out to switch off endnotes following paragraph to which they belong
@plaintext[:body] << @br
@@endnotes[:para].each {|e| @plaintext[:body] << e << @br}
elsif @@endnotes[:para] \
and @@dostype =~/endnote/
@plaintext[:body] << @br*2
end
@@endnotes[:para]=[]
end
def markup(data) # Used for major markup instructions
dir=SiSU_Env::Info_env.new(@md.fns)
@data_mod,@endnotes,@level,@cont,@copen,@plaintext_contents_close=Array.new(6){[]}
(0..6).each { |x| @cont[x]=@level[x]=false }
(4..6).each { |x| @plaintext_contents_close[x]='' }
plaintext_tail #($1,$2)
table_message='[table omitted, see other document formats]'
fix=[]
data.each do |para|
para.gsub!(/#{Mx[:id_o]}~0;0:0;x\d+#{Mx[:id_c]}/,'') # if book index? remove
para.gsub!(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}.+/um,"#@br#{table_message}")
para.gsub!(/.+?#{Mx[:gl_o]}-##{Mx[:gl_c]}/,'') # remove dummy headings (used by html) #check
para.gsub!(/#{Mx[:gl_bullet]}\s*/,'* ') # bullet markup, marked down
para.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'*\1*')
para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'/\1/')
para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'[\1]')
para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'_\1_')
para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'^\1^')
para.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+\1+')
para.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"\1"')
para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'-\1-')
unless para =~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/
para.gsub!(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}((?:https?|file|ftp):\/\/\S+|image)/,'\1 [link:] \2')
para.gsub!(/(^|#{Mx[:gl_c]}|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.txt_open}\\2#{@url_brace.txt_close}\\3")
para.gsub!(/_((?:https?|file|ftp):\/\/\S+)/,'\1')
extract_endnotes(para)
para.gsub!(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'[^\1]') # endnote marker marked up
para.gsub!(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'[^\1]') # endnote marker marked up
para.gsub!(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<')
para.gsub!(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>')
para.gsub!(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&')
para.gsub!(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!')
para.gsub!(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#')
para.gsub!(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*')
para.gsub!(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-')
para.gsub!(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/')
para.gsub!(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_')
para.gsub!(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{')
para.gsub!(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}')
para.gsub!(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~')
para.gsub!(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©')
end
if para =~/#{Mx[:gr_o]}(?:group|verse|alt|code)(?:-end)?#{Mx[:gr_c]}(?:\s+#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/ ##{Mx[:gr_o]}codeline#{Mx[:gr_c]}
if para =~/#{Mx[:gr_o]}code#{Mx[:gr_c]}/ #code-block: angle brackets special characters
para.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _> _<
para.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _<_<
end
para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n") # watch
para.gsub!(/#{Mx[:gr_o]}(?:group|verse|alt|code)(?:-end)?#{Mx[:gr_c]}(?:\s+#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]})?/,'')
else para.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n\n") # watch introduces a bug
end
para.gsub!(/#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}/,'') # remove page breaks
para.gsub!(/^\s*#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/,'') # remove empty lines - check
para.gsub!(/(^|#{Mx[:gl_c]}|\s)[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3')
para.gsub!(/(.+?)<\/a>/m,'\1')
para.gsub!(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'') # remove name links
para.gsub!(/ |#{Mx[:nbsp]}/,' ') # decide on
para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]")
para.gsub!(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]')
#para.gsub!(/^\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]')
wordlist=para.scan(/\S+/)
if para =~/^#{Rx[:meta]}\s*(.+?)\Z/m # for headers
d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta
if d_meta; plaintext_metadata(d_meta)
end
end
if para !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/
if para =~@regx #/.+?<~\d+;\w\d+;\w\d+>.*/ #watch change
paranum=para[@regx,3]
@p_num=SiSU_Plaintext_format::Paragraph_number.new(paranum)
end
@sto=SiSU_Structure::Split_text_object.new(@md,para).txt
### problem in scroll, it appears tables are getting paragraph numbers
m=/#{Mx[:id_o]}~(\d+);(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
if para =~m \
and para=~/\S+/
para=case @sto.format
when /^(1):(\S*?)/
plaintext_structure(para,$1,@sto.ocn,$2)
@sto.lev_para_ocn.heading_body1
when /^(2):(\S*?)/
plaintext_structure(para,$1,@sto.ocn,$2)
@sto.lev_para_ocn.heading_body2
when /^(3):(\S*?)/
plaintext_structure(para,$1,@sto.ocn,$2)
@sto.lev_para_ocn.heading_body3
when /^(4):(\S+?)/ # work on see SiSU_text_parts::Split_text_object
plaintext_structure(para,$1,@sto.ocn,$2)
@sto.lev_para_ocn.heading_body4
when /^(5):(\S*?)/
plaintext_structure(para,$1,@sto.ocn,$2)
@sto.lev_para_ocn.heading_body5
when /^(6):(\S*?)/
plaintext_structure(para,$1,@sto.ocn,$2)
@sto.lev_para_ocn.heading_body6
else
plaintext_structure(para,nil,nil,nil) #watch may be problematic
para
end
elsif para =~/#{table_message}/
@plaintext[:body] << para << @br
elsif para =~/(Note|Endnotes?)/ \
and para !~/#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
elsif para =~/(MetaData)/ \
and para =~/#{Mx[:id_o]}~(\d+);[um]\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/ #debug 2003w46 add rc info ####suspect visit
#formatMono=MonoSiSU.new(' MetaData ')
#para=formatMono.bold_para
elsif para.include? 'Owner Details' \
and para !~/#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/
#formatMono=MonoSiSU.new('Owner Details ')
#@plaintext[:owner_details]=formatMono.bold_para
#para=''
elsif para =~/(#{Mx[:tc_p]}|#{Mx[:gr_o]}Th?)/u #tables ! check
end
para='' if (para =~// \
and para =~/^(-\{{2}~\d+|)/) # -endnote
case para
when /#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/
if para =~/.*<:#>.*$/m
txt_obj={:txt =>para}
format_text=Format_text_object.new(@md,txt_obj)
para=format_text.scr_indent_one_no_paranum
end
end
if para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/
# i don't get the condition for no paranum
end
#if para =~/<:center>/
# one,two=/(.*)<:center>(.*)/.match(para)[1,2]
# format_text=Format_text_object.new(one,two)
# para=format_text.center
#end
para.gsub!(/#{Mx[:id_o]}.+?#{Mx[:id_c]}/,' ') if para ## Clean Prepared Text
para.gsub!(//,' ') if para ## Clean Prepared Text
para.gsub!(/<:\S+>/,' ') if para ## Clean Prepared Text
end
end
@plaintext
end
def publish(plaintext)
divider='='
content=[]
content << plaintext[:open]
content << plaintext[:head]
content << plaintext[:body]
content << @@endnotes[:end] if @@dostype =~/endnotes/
content << "#@br#{divider*78}#@br"
content << plaintext[:metadata]
content << "#@br#{divider*78}#@br" if @md.stmp =~/\w+/ #not used?
content << plaintext[:owner_details] if @md.stmp =~/\w+/ #not used?
content << plaintext[:tail]
Output.new(content,@md).plaintext
@@endnotes={ :para=>[],:end=>[] }
end
end
class Output 0
para.each do |line|
line.gsub!(/\s+$/m,'')
file_plaintext.puts line #unix plaintext
end
else file_plaintext.puts para #unix plaintext # /^([*=-]|\.){5}/
end
end
file_plaintext.close
end
end
end
end
__END__
!\|#\|&*\|-\|/\|_\|{\|}\|~\|