=begin * Name: SiSU information Structuring Universe - Structured information, Serialized Units * Author: Ralph Amissah * http://www.jus.uio.no/sisu * http://www.jus.uio.no/sisu/SiSU/download.html * Description: plaintext text generation, stripped plaintext output (unix, linefeed) * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah * License: GPL 2 or later Summary of GPL 2 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA If you have Internet connection, the latest version of the GPL should be available at these locations: http://www.fsf.org/licenses/gpl.html http://www.gnu.org/copyleft/gpl.html http://www.jus.uio.no/sisu/gpl2.fsf SiSU was first released to the public on January 4th 2005 SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system © Ralph Amissah 1997, current 2007. All Rights Reserved. * Notes: tidy -ascii index.xml >> index.tidy * Ralph Amissah: ralph@amissah.com ralph.amissah@gmail.com =end module SiSU_Plaintext require "#{SiSU_lib}/dal" require "#{SiSU_lib}/sysenv" include SiSU_Env include SiSU_Param include SiSU_Viz require "#{SiSU_lib}/plaintext_format" include Format require "#{SiSU_lib}/shared_txt" pwd=Dir.pwd @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0 @@tablefoot='' class Source def initialize(opt) @opt=opt if @opt.fns =~/(.+?)\.[_-]?sst$/ case @opt.cmd when /[af]/; @@dostype='unix footnotes' when /e/; @@dostype='unix endnotes' when /[AF]/; @@dostype='msdos footnotes' when /E/; @@dostype='msdos endnotes' end else puts "#{sf} not a processed file type" end end def read begin @md=SiSU_Param::Parameters.new(@opt).get @env=SiSU_Env::Info_env.new(@opt.fns) path=@env.path.output_tell tool=if @opt.cmd =~/[MVv]/; "#{@env.program.text_editor} #{path}/#{@md.fnb}/#{@md.fn[:plain]}" else '' end tell=SiSU_Screen::Ansi.new(@opt.cmd,'Plaintext',tool) tell.green_hi_blue unless @opt.cmd =~/q/ tell=SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{@env.path.output_tell}/#{@md.fnb}/#{@md.fn[:plain]}") tell.flow if @opt.cmd =~/[MV]/ my_make=SiSU_Env::Create_file.new(@opt.cmd,@opt.fns) @dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here SiSU_Plaintext::Source::Scroll.new(@dal_array,@md).songsheet SiSU_Env::Info_skin.new(@md).select #watch rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error ensure end end private class Split_text_object ).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ if /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,$5 elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) @format,@lev,@text,@ocn=$1,$2,$3,$4 elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) @format,@text,@ocn=$1,$2,$3 elsif /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;[um]\d+><#@dp:#@dp>$/m.match(@para) @@alt_id_count+=1 @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,"x#{@@alt_id_count}" elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/m.match(@para) @@alt_id_count+=1 @format,@lev,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}" end else if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) @text,@ocn=$1,$2 end if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/ #added 2002w06 @text=/(.+?)/m.match(@para)[1] end if /^((\d)~(?:~\S+)?)\s+(.+)/m.match(@para) @format,@lev,@text=$1,$2,$3 end end format=@format.dup @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ Format::Format_text_object.new(format,@text,@ocn) else Format::Format_text_object.new(format,@text,"<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>") end self end end class Scroll [],:open=>[],:close=>[],:head=>[],:metadata=>[],:tail=>[],:endnotes=>[] } @@dp=nil def initialize(data,md) @data,@md=data,md @url_brace=SiSU_Viz::Skin.new.url_decoration @vz=SiSU_Env::Get_init.instance.skin @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern @regx=/^(?:(?:<:p[bn]>\s*)?\d~(?:(\S+))?\s+)?(.+?)\s*<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ #m # 2004w18 pb pn removal added @tab="\t" @br=case md.cmd when /[af]/ @@dostype='unix footnotes' "\n" when /e/ @@dostype='unix endnotes' "\n" when /[AF]/ @@dostype='msdos footnotes' "\r\n" when /E/ @@dostype='msdos endnotes' "\r\n" else "\n" end end def songsheet @data=markup(@data) publish #@data.each { |x| puts x.inspect if x =~/\[table/ } end # Used for extraction of endnotes from paragraphs def extract_endnotes(para='') notes=para.scan(/~[{\[]([\d*+]+\s+.+?)\s*<#@dp>[}\]]~/) @n=[] notes.each do |n| #high cost to deal with
appropriately within plaintext, consider n=n.dup.to_s if n =~// fix = n.split(//) #watch #added fix.each do |x| unless x.empty?; @n << x end end else @n << n end end notes=@n.flatten notes.each do |e| util=if e.to_s =~/^\[[\d*+]+\]:/; SiSU_text_utils::Wrap.new(e.to_s,70,4,1) else SiSU_text_utils::Wrap.new(e.to_s,70,1,1) end wrap=util.line_wrap if wrap =~ /^\s*[\d*+]+\s+.+?\s*\Z/m wrap.gsub!(/^(\s*)([\d*+]+)\s+(.+?)\s*\Z/m, </ m=$1.to_i paragraph.gsub!(/<:i#{m}>/,'') util=SiSU_text_utils::Wrap.new(paragraph,70,m*2) else util=SiSU_text_utils::Wrap.new(paragraph,70,0) end util.line_wrap end if lv times=wrapped.length times=70 if times > 70 @@plaintext[:body] << case lv when 1; wrapped.upcase << @br << '*'*times << @br when 2..3; wrapped.upcase << @br << '='*times << @br when 4; wrapped.upcase << @br << '-'*times << @br when 5..6; wrapped.upcase << @br << '.'*times << @br end else @@plaintext[:body] << wrapped << @br # main text, contents, body KEEP end if @@endnotes_para and @@dostype =~/footnote/ #edit out to switch off endnotes following paragraph to which they belong @@plaintext[:body] << @br @@endnotes_para.each {|e| @@plaintext[:body] << e << @br} elsif @@endnotes_para and @@dostype =~/endnote/ @@plaintext[:body] << @br*2 end @@endnotes_para=[] end def markup(data) # Used for major markup instructions dir=SiSU_Env::Info_env.new(@md.fns) @data_mod,@endnotes,@level,@cont,@copen,@plaintext_contents_close=Array.new(6){[]} (0..6).each { |x| @cont[x]=@level[x]=false } (4..6).each { |x| @plaintext_contents_close[x]='' } plaintext_tail #($1,$2) table_message='[table omitted, see other document formats]' fix=[] data.each do |para| para.gsub!(//,'') # remove dummy headings (used by html) #check para.gsub!(/_\*\s+/,'* ') # bullet markup, marked down #para.gsub!(//,"\n") # introduces a bug para.gsub!(/©/,'©') # bullet markup, marked down para.gsub!(/&/,'&') # bullet markup, marked down para.gsub!(/(.+?)<\/sup>/,'^\1^') para.gsub!(/(.+?)<\/sub>/,'[\1]') para.gsub!(/(.+?)<\/i>/,'/\1/') para.gsub!(/(.+?)<\/b>/,'*\1*') para.gsub!(/(.+?)<\/u>/,'_\1_') if para =~/<:(?:group|verse|alt|code)(?:-end)?>(?:\s+<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>)?/ para.gsub!(//,"\n") # watch para.gsub!(/<:(?:group|verse|alt|code)(?:-end)?>(?:\s+<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>)?/,'') end para.gsub!(/<:p[bn]>/,'') # remove page breaks para.gsub!(/^\s*<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/,'') # remove empty lines - check para.gsub!(/(^|\s)(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/,"\\1#{@url_brace.txt_open}\\2#{@url_brace.txt_close}\\3") para.gsub!(/(.+?)<\/a>/m,'\1') para.gsub!(/<:name#\S+?>/,'') # remove name links para.gsub!(/ /,' ') # decide on para.gsub!(/\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") para.gsub!(/^\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') wordlist=para.scan(/\S+/) if para =~/^0~(\S+)\s+(.+?)\Z/m # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta if d_meta; plaintext_metadata(d_meta) end end if para !~/(^0~||)/ if para =~@regx #/.+?<~\d+;\w\d+;\w\d+>.*/ #watch change paranum=para[@regx,3] @p_num=Format::Paragraph_number.new(paranum) end @sto=Split_text_object.new(para).lev_segname_para_ocn ### problem in scroll, it appears tables are getting paragraph numbers m=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ if para =~m and para=~/\S+/ para=case @sto.format when /^(1)~(?:(\S+))?/ plaintext_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body1 when /^(2)~(?:(\S+))?/ plaintext_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body2 when /^(3)~(?:(\S+))?/ plaintext_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body3 when /^(4)~(\S+)/ # work on see SiSU_text_parts::Split_text_object plaintext_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body4 when /^(5)~(?:(\S+))?/ plaintext_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body5 when /^(6)~(?:(\S+))?/ plaintext_structure(para,$1,@sto.ocn,$2) @sto.lev_para_ocn.heading_body6 #when /^(i1)$/ # #formatMono.gsubBody # #para=@sto[:lev_para_ocn].scrIndent1 #when /^(i2)$/ # formatMono.gsubBody # para=@sto[:lev_para_ocn].scrIndent2 #when /^(center)$/ # para.gsub!(/(.+)/, # %{
(\\1)
}) # para=@sto[:lev_para_ocn].scrPara #when /^(b|bold)$/ # para.gsub!(/(.+)/, # %{(\\1)}) # para=@sto[:lev_para_ocn].scrPara #when /null/ # see whether u can improve # if (para !~/#{@margin.txt_0}|#{@margin.txt_1}|#{@margin.txt_2}/) # #formatMono.gsubBody # #para=@sto[:lev_para_ocn].scrPara # end else plaintext_structure(para,nil,nil,nil) #watch may be problematic para end elsif para =~/#{table_message}/ @@plaintext[:body] << para << @br elsif para =~/(Note|Endnotes?)/ and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ elsif para =~/(MetaData)/ and para =~/<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/ #debug 2003w46 add rc info ####suspect visit #formatMono=MonoSiSU.new('
MetaData') #para=formatMono.bold_para elsif para.include? 'Owner Details' and para !~/<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ #formatMono=MonoSiSU.new('
Owner Details') #@@plaintext[:owner_details]=formatMono.bold_para #para='' elsif para =~/(¡|(.*)/ one,two=$1,$2 format_text=Format_text_object.new(one,two) para=format_text.seg_no_paranum end para='' if (para =~// and para =~/^(-\{{2}~\d+|)/) # -endnote case para when /<:i[1-9]>/ if para =~/.*<:#>.*$/m format_text=Format_text_object.new(para,'') para=format_text.scr_indent_one_no_paranum end end if para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/ # i don't get the condition for no paranum end if para =~/<:center>/ one,two=/(.*)<:center>(.*)/.match(para)[1,2] format_text=Format_text_object.new(one,two) para=format_text.center end para.gsub!(//,' ') if para ## Clean Prepared Text para.gsub!(/<:\S+>/,' ') if para ## Clean Prepared Text end end end def publish divider="=" content=[] content << @@plaintext[:open] content << @@plaintext[:head] content << @@plaintext[:body] content << @@plaintext[:endnotes] if @@dostype =~/endnotes/ content << "#@br#{divider*70}#@br" content << @@plaintext[:metadata] content << "#@br#{divider*70}#@br" if @md.stmp =~/\w+/ #not used? content << @@plaintext[:owner_details] if @md.stmp =~/\w+/ #not used? content << @@plaintext[:tail] Output.new(content,@md).plaintext @@plaintext[:head],@@plaintext[:body],@@plaintext[:tail],@@plaintext[:metadata]=[],[],[],[] end end class Output 0 para.each do |line| line.gsub!(/\s+$/m,'') filename_plaintext.puts line #unix plaintext end else filename_plaintext.puts para #unix plaintext # /^([*=-]|\.){5}/ end end end end end end __END__