aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/0.52/plaintext.rb
diff options
context:
space:
mode:
authorRalph Amissah <ralph.amissah@gmail.com>2007-05-22 02:06:46 +0100
committerRalph Amissah <ralph.amissah@gmail.com>2007-05-22 02:06:46 +0100
commit65477054fd798728bf186aa2938727ddddbe86a5 (patch)
tree612da47b57ad4c4157f495dcf10710d32cdc6ead /lib/sisu/0.52/plaintext.rb
Imported upstream version 0.52.7upstream/0.52.7sisu_0.52.7
Diffstat (limited to 'lib/sisu/0.52/plaintext.rb')
-rw-r--r--lib/sisu/0.52/plaintext.rb473
1 files changed, 473 insertions, 0 deletions
diff --git a/lib/sisu/0.52/plaintext.rb b/lib/sisu/0.52/plaintext.rb
new file mode 100644
index 00000000..eb43dfa8
--- /dev/null
+++ b/lib/sisu/0.52/plaintext.rb
@@ -0,0 +1,473 @@
+=begin
+ * Name: SiSU information Structuring Universe - Structured information, Serialized Units
+ * Author: Ralph Amissah
+ * http://www.jus.uio.no/sisu
+ * http://www.jus.uio.no/sisu/SiSU/download.html
+
+ * Description: plaintext text generation, stripped plaintext output (unix, linefeed)
+
+ * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah
+
+ * License: GPL 2 or later
+
+ Summary of GPL 2
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+
+ If you have Internet connection, the latest version of the GPL should be
+ available at these locations:
+ http://www.fsf.org/licenses/gpl.html
+ http://www.gnu.org/copyleft/gpl.html
+ http://www.jus.uio.no/sisu/gpl2.fsf
+
+ SiSU was first released to the public on January 4th 2005
+
+ SiSU uses:
+
+ * Standard SiSU markup syntax,
+ * Standard SiSU meta-markup syntax, and the
+ * Standard SiSU object citation numbering and system
+
+ © Ralph Amissah 1997, current 2007.
+ All Rights Reserved.
+
+ * Notes: tidy -ascii index.xml >> index.tidy
+
+ * Ralph Amissah: ralph@amissah.com
+ ralph.amissah@gmail.com
+=end
+module SiSU_Plaintext
+ require SiSU_lib + '/dal'
+ require SiSU_lib + '/sysenv'
+ include SiSU_Env
+ include SiSU_Param
+ include SiSU_Viz
+ require SiSU_lib + '/plaintext_format'
+ include Format
+ require SiSU_lib + '/shared_txt'
+ pwd=Dir.pwd
+ @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0
+ @@tablefoot=''
+ class Source
+ def initialize(opt)
+ @opt=opt
+ if @opt.fns =~/(.+?)\.[_-]?sst$/
+ case @opt.cmd
+ when /[af]/; @@dostype='unix footnotes'
+ when /e/; @@dostype='unix endnotes'
+ when /[AF]/; @@dostype='msdos footnotes'
+ when /E/; @@dostype='msdos endnotes'
+ end
+ else puts "#{sf} not a processed file type"
+ end
+ end
+ def read
+ begin
+ @md=SiSU_Param::Parameters.new(@opt).get
+ @env=SiSU_Env::Info_env.new(@opt.fns)
+ path=@env.path.output_tell
+ tool=if @opt.cmd =~/[MVv]/; "#{@env.program.text_editor} #{path}/#{@md.fnb}/#{@md.fn[:plain]}"
+ else ''
+ end
+ tell=SiSU_Screen::Ansi.new(@opt.cmd,'Plaintext',tool)
+ tell.green_hi_blue unless @opt.cmd =~/q/
+ tell=SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{@env.path.output_tell}/#{@md.fnb}/#{@md.fn[:plain]}")
+ tell.flow if @opt.cmd =~/[MV]/
+ my_make=SiSU_Env::Create_file.new(@opt.cmd,@opt.fns)
+ @dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here
+ SiSU_Plaintext::Source::Scroll.new(@dal_array,@md).songsheet
+ SiSU_Env::Info_skin.new(@md).select #watch
+ rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error
+ ensure
+ end
+ end
+ private
+ class Split_text_object <Source
+ require SiSU_lib + '/plaintext_format'
+ include SiSU_Viz
+ include Format
+ @@alt_id_count=0
+ @@dp=nil
+ attr_reader :format,:lev,:text,:ocn,:lev_para_ocn
+ def initialize(para)
+ @para=para
+ @format,@ocn='null','null'
+ @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern
+ end
+ def lev_segname_para_ocn
+ @text=nil
+ if @para =~/^(\d~|<:.+?>).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
+ if /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
+ @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,$5
+ elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
+ @format,@lev,@text,@ocn=$1,$2,$3,$4
+ elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
+ @format,@text,@ocn=$1,$2,$3,$4
+ elsif /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;[um]\d+><#@dp:#@dp>$/m.match(@para)
+ @@alt_id_count+=1
+ @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,"x#{@@alt_id_count}"
+ elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
+ @@alt_id_count+=1
+ @format,@lev,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}"
+ end
+ else
+ if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para)
+ @text,@ocn=$1,$2
+ end
+ if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/ #added 2002w06
+ @text=/(.+?)/m.match(@para)[1]
+ end
+ if /^((\d)~(?:~\S+)?)\s+(.+)/m.match(@para)
+ @format,@lev,@text=$1,$2,$3
+ end
+ end
+ format=@format.dup
+ @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
+ Format::Format_text_object.new(format,@text,@ocn)
+ else
+ Format::Format_text_object.new(format,@text,"<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>")
+ end
+ self
+ end
+ end
+ class Scroll <Source
+ require SiSU_lib + '/shared_txt'
+ include SiSU_text_utils
+ @@endnotes_para=[]
+ @@plaintext={ :body=>[],:open=>[],:close=>[],:head=>[],:metadata=>[],:tail=>[],:endnotes=>[] }
+ @@dp=nil
+ def initialize(data,md)
+ @data,@md=data,md
+ @vz=SiSU_Env::Get_init.instance.skin
+ @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern
+ @regx=/^(?:(?:<:p[bn]>\s*)?\d~(?:(\S+))?\s+)?(.+?)\s*<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ #m # 2004w18 pb pn removal added
+ @tab="\t"
+ @br=case md.cmd
+ when /[af]/
+ @@dostype='unix footnotes'
+ "\n"
+ when /e/
+ @@dostype='unix endnotes'
+ "\n"
+ when /[AF]/
+ @@dostype='msdos footnotes'
+ "\r\n"
+ when /E/
+ @@dostype='msdos endnotes'
+ "\r\n"
+ else "\n"
+ end
+ end
+ def songsheet
+ markup
+ publish
+ #@data.each { |x| puts x.inspect if x =~/\[table/ }
+ end
+ # Used for extraction of endnotes from paragraphs
+ def extract_endnotes(para='')
+ notes=para.scan(/~[{\[]([\d*+]+\s+.+?)\s*<#@dp>[}\]]~/)
+ @n=[]
+ notes.each do |n| #high cost to deal with <br> appropriately within plaintext, consider
+ n=n.dup.to_s
+ if n =~/<br(?: \/)?>/
+ fix = n.split(/<br(?: \/)?>/) #watch #added
+ fix.each do |x|
+ unless x.empty?; @n << x
+ end
+ end
+ else @n << n
+ end
+ end
+ notes=@n.flatten
+ notes.each do |e|
+ util=if e.to_s =~/^\[[\d*+]+\]:/; SiSU_text_utils::Wrap.new(e.to_s,70,4,1)
+ else SiSU_text_utils::Wrap.new(e.to_s,70,1,1)
+ end
+ wrap=util.line_wrap
+ if wrap =~ /^\s*[\d*+]+\s+.+?\s*\Z/m
+ wrap.gsub!(/^(\s*)([\d*+]+)\s+(.+?)\s*\Z/m, <<GSUB
+
+\\1[\\2]: \\3
+GSUB
+ )
+ else
+ wrap.gsub!(/^(.+)\Z/m, <<GSUB
+\\1
+GSUB
+ )
+ end
+ @@plaintext[:endnotes] << wrap
+ @@endnotes_para << wrap
+ end
+ end
+ def plaintext_metadata(meta)
+ util=SiSU_text_utils::Wrap.new(meta.text,70,15,1)
+ txt=util.line_wrap
+ @@plaintext[:metadata] <<= if meta.type == 'meta'
+ <<WOK
+
+#{@tab}#{meta.el}: #{txt}
+WOK
+ else ''
+ end
+ end
+ def plaintext_tail
+ SiSU_Env::Info_skin.new(@md).select
+ vz=SiSU_Env::Get_init.instance.skin
+ generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})" if @md.sisu_version[:version]
+ lastdone="Last Generated on: #{Time.now}"
+ rubyv="Ruby version: #{@md.ruby_version}"
+ sc=if @md.sc_info
+ "Source file: #{@md.sc_filename}#{@br}Version number: #{@md.sc_number}#{@br}Version date: #{@md.sc_date}#{@br}"
+ else ''
+ end
+ @@plaintext[:tail] <<<<WOK
+#@br
+Other versions of this document: #@br
+manifest:
+ #{vz.url_root_http}/#{@md.fnb}/#{@md.fn[:manifest]}#@br
+html:
+ #{vz.url_root_http}/#{@md.fnb}/#{@md.fn[:toc]}#@br
+pdf:
+ #{vz.url_root_http}/#{@md.fnb}/#{@md.fn[:pdf_p]}
+ #{vz.url_root_http}/#{@md.fnb}/#{@md.fn[:pdf_l]}#@br
+plaintext (plain text):
+ #{vz.url_root_http}/#{@md.fnb}/#{@md.fn[:plain]}#@br
+at:
+ #{vz.url_site}#@br
+
+#{sc}
+* #{generator}
+* #{rubyv}
+* #{lastdone}
+* SiSU #{vz.url_sisu}
+WOK
+ end
+ def plaintext_structure(para='',lv='',ocn='',hname='') #% Used to extract the structure of a document
+ lv=lv.to_i
+ n=lv - 1
+ n3=lv + 2
+ lv=nil if lv == 0
+ extract_endnotes(para)
+ para.gsub!(/~[{\[]([\d*+]+)\s+(?:.+?)[}\]]~/,'[^\1]') # endnote marker marked up
+ wrapped=if para[@regx]
+ paragraph=para[@regx,2]
+ if paragraph.include? '<:i1>'
+ paragraph.gsub!(/<:i1>/,'')
+ util=SiSU_text_utils::Wrap.new(paragraph,70,2)
+ else util=SiSU_text_utils::Wrap.new(paragraph,70,0)
+ end
+ util.line_wrap
+ end
+ if lv
+ times=wrapped.length
+ times=70 if times > 70
+ @@plaintext[:body] << case lv
+ when 1; wrapped.upcase << @br << '*'*times << @br
+ when 2..3; wrapped.upcase << @br << '='*times << @br
+ when 4; wrapped.upcase << @br << '-'*times << @br
+ when 5..6; wrapped.upcase << @br << '.'*times << @br
+ end
+ else
+ @@plaintext[:body] << wrapped << @br # main text, contents, body KEEP
+ end
+ if @@endnotes_para and @@dostype =~/footnote/ #edit out to switch off endnotes following paragraph to which they belong
+ @@plaintext[:body] << @br
+ @@endnotes_para.each {|e| @@plaintext[:body] << e << @br}
+ elsif @@endnotes_para and @@dostype =~/endnote/
+ @@plaintext[:body] << @br*2
+ end
+ @@endnotes_para=[]
+ end
+ def markup # Used for major markup instructions
+ data=@data
+ dir=SiSU_Env::Info_env.new(@md.fns)
+ @data_mod,@endnotes,@level,@cont,@copen,@plaintext_contents_close=Array.new(6){[]}
+ (0..6).each { |x| @cont[x]=@level[x]=false }
+ (4..6).each { |x| @plaintext_contents_close[x]='' }
+ plaintext_tail #($1,$2)
+ table_message='[table omitted, see other document formats]'
+ fix=[]
+ #data.each do |para| #high cost to deal with <br> appropriately within plaintext, consider
+ # para=para.dup
+ # if para =~/<br(?: \/)?>/
+ # puts para
+ # fix = para.split(/<br(?: \/)?>/) #watch #added
+ # fix.each do |x|
+ # if x =~/\S+/; @data_mod << x
+ # end
+ # end
+ # else @data_mod << para
+ # end
+ #end
+ #data=@data_mod.flatten
+ data.each do |para|
+ para.gsub!(/<!Th?¡.+/m,"#@br#{table_message}")
+ para.gsub!(/.+?<-#>/,'') # remove dummy headings (used by html) #check
+ para.gsub!(/_\*\s+/,'* ') # bullet markup, marked down
+ para.gsub!(/&#169;/,'©') # bullet markup, marked down
+ para.gsub!(/&amp;/,'&') # bullet markup, marked down
+ para.gsub!(/<sup>(.+?)<\/sup>/,'^\1^')
+ para.gsub!(/<sub>(.+?)<\/sub>/,'[\1]')
+ para.gsub!(/<i>(.+?)<\/i>/,'/\1/')
+ para.gsub!(/<b>(.+?)<\/b>/,'*\1*')
+ para.gsub!(/<u>(.+?)<\/u>/,'_\1_')
+ para.gsub!(/<:(?:group|verse|alt|code)(?:-end)?>(?:\s+<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>)?/,'')
+ para.gsub!(/<:p[bn]>/,'') # remove page breaks
+ para.gsub!(/^\s*<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/,'') # remove empty lines - check
+ para.gsub!(/<a href=".+?">(.+?)<\/a>/m,'\1')
+ para.gsub!(/<:name#\S+?>/,'') # remove name links
+ para.gsub!(/&nbsp;/,' ') # decide on
+ para.gsub!(/\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]")
+ para.gsub!(/^\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]')
+ wordlist=para.scan(/\S+/)
+ if para =~/^0~(\S+)\s+(.+?)\Z/m # for headers
+ d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta
+ if d_meta; plaintext_metadata(d_meta)
+ end
+ end
+ if para !~/(^0~|<ENDNOTES>|<EOF>)/
+ if para =~@regx #/.+?<~\d+;\w\d+;\w\d+>.*/ #watch change
+ paranum=para[@regx,3]
+ @p_num=Format::Paragraph_number.new(paranum)
+ end
+ @sto=Split_text_object.new(para).lev_segname_para_ocn
+ ### problem in scroll, it appears tables are getting paragraph numbers
+ m=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
+ if para =~m and para=~/\S+/
+ para=case @sto.format
+ when /^(1)~(?:(\S+))?/
+ plaintext_structure(para,$1,@sto.ocn,$2)
+ @sto.lev_para_ocn.heading_body1
+ when /^(2)~(?:(\S+))?/
+ plaintext_structure(para,$1,@sto.ocn,$2)
+ @sto.lev_para_ocn.heading_body2
+ when /^(3)~(?:(\S+))?/
+ plaintext_structure(para,$1,@sto.ocn,$2)
+ @sto.lev_para_ocn.heading_body3
+ when /^(4)~(\S+)/ # work on see SiSU_text_parts::Split_text_object
+ plaintext_structure(para,$1,@sto.ocn,$2)
+ @sto.lev_para_ocn.heading_body4
+ when /^(5)~(?:(\S+))?/
+ plaintext_structure(para,$1,@sto.ocn,$2)
+ @sto.lev_para_ocn.heading_body5
+ when /^(6)~(?:(\S+))?/
+ plaintext_structure(para,$1,@sto.ocn,$2)
+ @sto.lev_para_ocn.heading_body6
+ #when /^(i1)$/
+ # #formatMono.gsubBody
+ # #para=@sto[:lev_para_ocn].scrIndent1
+ #when /^(i2)$/
+ # formatMono.gsubBody
+ # para=@sto[:lev_para_ocn].scrIndent2
+ #when /^(center)$/
+ # para.gsub!(/(.+)/,
+ # %{<center>(\\1)</center>})
+ # para=@sto[:lev_para_ocn].scrPara
+ #when /^(b|bold)$/
+ # para.gsub!(/(.+)/,
+ # %{<b>(\\1)</b>})
+ # para=@sto[:lev_para_ocn].scrPara
+ #when /null/ # see whether u can improve
+ # if (para !~/#{@margin.txt_0}|#{@margin.txt_1}|#{@margin.txt_2}/)
+ # #formatMono.gsubBody
+ # #para=@sto[:lev_para_ocn].scrPara
+ # end
+ else
+ plaintext_structure(para,nil,nil,nil) #watch may be problematic
+ para
+ end
+ elsif para =~/#{table_message}/
+ @@plaintext[:body] << para << @br
+ elsif para =~/(Note|Endnotes?)/ and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
+ elsif para =~/(MetaData)/ and para =~/<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/ #debug 2003w46 add rc info ####suspect visit
+ #formatMono=MonoSiSU.new('<br /><a name="metadata">MetaData</a>')
+ #para=formatMono.bold_para
+ elsif para.include? 'Owner Details' and para !~/<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/
+ #formatMono=MonoSiSU.new('<br /><a name="owner.details">Owner Details</a>')
+ #@@plaintext[:owner_details]=formatMono.bold_para
+ #para=''
+ elsif para =~/(¡|<!Th?)/ #tables !
+ elsif para =~/(.*)<!#!>(.*)/
+ one,two=$1,$2
+ format_text=Format_text_object.new(one,two)
+ para=format_text.seg_no_paranum
+ end
+ para='' if (para =~/<a name="n\d+">/ and para =~/^(-\{{2}~\d+|<!e[:_]\d+!>)/) # -endnote
+ case para
+ when /<:i1>/
+ if para =~/.*<:#>.*$/
+ format_text=Format_text_object.new(para,'')
+ para=format_text.scr_indent_one_no_paranum
+ end
+ when /<:i2>/
+ if para =~/.*<:#>.*$/
+ format_text=Format_text_object.new(para,'')
+ para=format_text.scr_indent_one_no_paranum
+ end
+ end
+ if para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/
+ # i don't get the condition for no paranum
+ end
+ if para =~/<:center>/
+ one,two=/(.*)<:center>(.*)/.match(para)[1,2]
+ format_text=Format_text_object.new(one,two)
+ para=format_text.center
+ end
+ para.gsub!(/<!.+!>/,' ') if para ## Clean Prepared Text
+ para.gsub!(/<:\S+>/,' ') if para ## Clean Prepared Text
+ end
+ end
+ end
+ def publish
+ divider="="
+ content=[]
+ data=@data
+ content << @@plaintext[:open]
+ content << @@plaintext[:head]
+ content << @@plaintext[:body]
+ content << @@plaintext[:endnotes] if @@dostype =~/endnotes/
+ content << "#@br#{divider*70}#@br"
+ content << @@plaintext[:metadata]
+ content << "#@br#{divider*70}#@br" if @md.stmp =~/\w+/ #not used?
+ content << @@plaintext[:owner_details] if @md.stmp =~/\w+/ #not used?
+ content << @@plaintext[:tail]
+ Output.new(content.to_s,@md).plaintext
+ @@plaintext[:head],@@plaintext[:body],@@plaintext[:tail],@@plaintext[:metadata]=[],[],[],[]
+ end
+ end
+ class Output <Source
+ include SiSU_Param
+ include SiSU_Env
+ def initialize(content,md)
+ @content,@md=content,md
+ end
+ def plaintext #%plaintext output
+ SiSU_Env::SiSU_file.new(@md).mkdir
+ filename_plaintext=SiSU_Env::SiSU_file.new(@md,@md.fn[:plain]).mkfile
+ @sisu=[]
+ @content.each do |para| # this is a hack
+ if para =~/^\S/
+ if para !~/^([*=-]|\.){5}/; filename_plaintext.puts para #unix plaintext
+ else filename_plaintext.puts para #unix plaintext
+ end
+ else filename_plaintext.puts para # if para =~/^\s/
+ end
+ end
+ end
+ end
+ end
+end
+__END__