diff options
author | Ralph Amissah <ralph.amissah@gmail.com> | 2007-05-22 02:06:46 +0100 |
---|---|---|
committer | Ralph Amissah <ralph.amissah@gmail.com> | 2007-05-22 02:06:46 +0100 |
commit | 65477054fd798728bf186aa2938727ddddbe86a5 (patch) | |
tree | 612da47b57ad4c4157f495dcf10710d32cdc6ead /lib/sisu/0.52/plaintext.rb |
Imported upstream version 0.52.7upstream/0.52.7sisu_0.52.7
Diffstat (limited to 'lib/sisu/0.52/plaintext.rb')
-rw-r--r-- | lib/sisu/0.52/plaintext.rb | 473 |
1 files changed, 473 insertions, 0 deletions
diff --git a/lib/sisu/0.52/plaintext.rb b/lib/sisu/0.52/plaintext.rb new file mode 100644 index 00000000..eb43dfa8 --- /dev/null +++ b/lib/sisu/0.52/plaintext.rb @@ -0,0 +1,473 @@ +=begin + * Name: SiSU information Structuring Universe - Structured information, Serialized Units + * Author: Ralph Amissah + * http://www.jus.uio.no/sisu + * http://www.jus.uio.no/sisu/SiSU/download.html + + * Description: plaintext text generation, stripped plaintext output (unix, linefeed) + + * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah + + * License: GPL 2 or later + + Summary of GPL 2 + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + http://www.fsf.org/licenses/gpl.html + http://www.gnu.org/copyleft/gpl.html + http://www.jus.uio.no/sisu/gpl2.fsf + + SiSU was first released to the public on January 4th 2005 + + SiSU uses: + + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + © Ralph Amissah 1997, current 2007. + All Rights Reserved. + + * Notes: tidy -ascii index.xml >> index.tidy + + * Ralph Amissah: ralph@amissah.com + ralph.amissah@gmail.com +=end +module SiSU_Plaintext + require SiSU_lib + '/dal' + require SiSU_lib + '/sysenv' + include SiSU_Env + include SiSU_Param + include SiSU_Viz + require SiSU_lib + '/plaintext_format' + include Format + require SiSU_lib + '/shared_txt' + pwd=Dir.pwd + @@alt_id_count,@@alt_id_count,@@tablehead,@@number_of_cols=0,0,0,0 + @@tablefoot='' + class Source + def initialize(opt) + @opt=opt + if @opt.fns =~/(.+?)\.[_-]?sst$/ + case @opt.cmd + when /[af]/; @@dostype='unix footnotes' + when /e/; @@dostype='unix endnotes' + when /[AF]/; @@dostype='msdos footnotes' + when /E/; @@dostype='msdos endnotes' + end + else puts "#{sf} not a processed file type" + end + end + def read + begin + @md=SiSU_Param::Parameters.new(@opt).get + @env=SiSU_Env::Info_env.new(@opt.fns) + path=@env.path.output_tell + tool=if @opt.cmd =~/[MVv]/; "#{@env.program.text_editor} #{path}/#{@md.fnb}/#{@md.fn[:plain]}" + else '' + end + tell=SiSU_Screen::Ansi.new(@opt.cmd,'Plaintext',tool) + tell.green_hi_blue unless @opt.cmd =~/q/ + tell=SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{@env.path.output_tell}/#{@md.fnb}/#{@md.fn[:plain]}") + tell.flow if @opt.cmd =~/[MV]/ + my_make=SiSU_Env::Create_file.new(@opt.cmd,@opt.fns) + @dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here + SiSU_Plaintext::Source::Scroll.new(@dal_array,@md).songsheet + SiSU_Env::Info_skin.new(@md).select #watch + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + ensure + end + end + private + class Split_text_object <Source + require SiSU_lib + '/plaintext_format' + include SiSU_Viz + include Format + @@alt_id_count=0 + @@dp=nil + attr_reader :format,:lev,:text,:ocn,:lev_para_ocn + def initialize(para) + @para=para + @format,@ocn='null','null' + @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern + end + def lev_segname_para_ocn + @text=nil + if @para =~/^(\d~|<:.+?>).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + if /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) + @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,$5 + elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) + @format,@lev,@text,@ocn=$1,$2,$3,$4 + elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) + @format,@text,@ocn=$1,$2,$3,$4 + elsif /^(([1-6])~(\S+))\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;[um]\d+><#@dp:#@dp>$/m.match(@para) + @@alt_id_count+=1 + @format,@lev,segname,@text,@ocn=$1,$2,$3,$4,"x#{@@alt_id_count}" + elsif /^(([1-6])~)\s+(\S.+?)<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/m.match(@para) + @@alt_id_count+=1 + @format,@lev,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}" + end + else + if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/m.match(@para) + @text,@ocn=$1,$2 + end + if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$|^$/ #added 2002w06 + @text=/(.+?)/m.match(@para)[1] + end + if /^((\d)~(?:~\S+)?)\s+(.+)/m.match(@para) + @format,@lev,@text=$1,$2,$3 + end + end + format=@format.dup + @lev_para_ocn=if @para =~/.+<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + Format::Format_text_object.new(format,@text,@ocn) + else + Format::Format_text_object.new(format,@text,"<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>") + end + self + end + end + class Scroll <Source + require SiSU_lib + '/shared_txt' + include SiSU_text_utils + @@endnotes_para=[] + @@plaintext={ :body=>[],:open=>[],:close=>[],:head=>[],:metadata=>[],:tail=>[],:endnotes=>[] } + @@dp=nil + def initialize(data,md) + @data,@md=data,md + @vz=SiSU_Env::Get_init.instance.skin + @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern + @regx=/^(?:(?:<:p[bn]>\s*)?\d~(?:(\S+))?\s+)?(.+?)\s*<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ #m # 2004w18 pb pn removal added + @tab="\t" + @br=case md.cmd + when /[af]/ + @@dostype='unix footnotes' + "\n" + when /e/ + @@dostype='unix endnotes' + "\n" + when /[AF]/ + @@dostype='msdos footnotes' + "\r\n" + when /E/ + @@dostype='msdos endnotes' + "\r\n" + else "\n" + end + end + def songsheet + markup + publish + #@data.each { |x| puts x.inspect if x =~/\[table/ } + end + # Used for extraction of endnotes from paragraphs + def extract_endnotes(para='') + notes=para.scan(/~[{\[]([\d*+]+\s+.+?)\s*<#@dp>[}\]]~/) + @n=[] + notes.each do |n| #high cost to deal with <br> appropriately within plaintext, consider + n=n.dup.to_s + if n =~/<br(?: \/)?>/ + fix = n.split(/<br(?: \/)?>/) #watch #added + fix.each do |x| + unless x.empty?; @n << x + end + end + else @n << n + end + end + notes=@n.flatten + notes.each do |e| + util=if e.to_s =~/^\[[\d*+]+\]:/; SiSU_text_utils::Wrap.new(e.to_s,70,4,1) + else SiSU_text_utils::Wrap.new(e.to_s,70,1,1) + end + wrap=util.line_wrap + if wrap =~ /^\s*[\d*+]+\s+.+?\s*\Z/m + wrap.gsub!(/^(\s*)([\d*+]+)\s+(.+?)\s*\Z/m, <<GSUB + +\\1[\\2]: \\3 +GSUB + ) + else + wrap.gsub!(/^(.+)\Z/m, <<GSUB +\\1 +GSUB + ) + end + @@plaintext[:endnotes] << wrap + @@endnotes_para << wrap + end + end + def plaintext_metadata(meta) + util=SiSU_text_utils::Wrap.new(meta.text,70,15,1) + txt=util.line_wrap + @@plaintext[:metadata] <<= if meta.type == 'meta' + <<WOK + +#{@tab}#{meta.el}: #{txt} +WOK + else '' + end + end + def plaintext_tail + SiSU_Env::Info_skin.new(@md).select + vz=SiSU_Env::Get_init.instance.skin + generator="Generated by: #{@md.sisu_version[:project]} #{@md.sisu_version[:version]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})" if @md.sisu_version[:version] + lastdone="Last Generated on: #{Time.now}" + rubyv="Ruby version: #{@md.ruby_version}" + sc=if @md.sc_info + "Source file: #{@md.sc_filename}#{@br}Version number: #{@md.sc_number}#{@br}Version date: #{@md.sc_date}#{@br}" + else '' + end + @@plaintext[:tail] <<<<WOK +#@br +Other versions of this document: #@br +manifest: + #{vz.url_root_http}/#{@md.fnb}/#{@md.fn[:manifest]}#@br +html: + #{vz.url_root_http}/#{@md.fnb}/#{@md.fn[:toc]}#@br +pdf: + #{vz.url_root_http}/#{@md.fnb}/#{@md.fn[:pdf_p]} + #{vz.url_root_http}/#{@md.fnb}/#{@md.fn[:pdf_l]}#@br +plaintext (plain text): + #{vz.url_root_http}/#{@md.fnb}/#{@md.fn[:plain]}#@br +at: + #{vz.url_site}#@br + +#{sc} +* #{generator} +* #{rubyv} +* #{lastdone} +* SiSU #{vz.url_sisu} +WOK + end + def plaintext_structure(para='',lv='',ocn='',hname='') #% Used to extract the structure of a document + lv=lv.to_i + n=lv - 1 + n3=lv + 2 + lv=nil if lv == 0 + extract_endnotes(para) + para.gsub!(/~[{\[]([\d*+]+)\s+(?:.+?)[}\]]~/,'[^\1]') # endnote marker marked up + wrapped=if para[@regx] + paragraph=para[@regx,2] + if paragraph.include? '<:i1>' + paragraph.gsub!(/<:i1>/,'') + util=SiSU_text_utils::Wrap.new(paragraph,70,2) + else util=SiSU_text_utils::Wrap.new(paragraph,70,0) + end + util.line_wrap + end + if lv + times=wrapped.length + times=70 if times > 70 + @@plaintext[:body] << case lv + when 1; wrapped.upcase << @br << '*'*times << @br + when 2..3; wrapped.upcase << @br << '='*times << @br + when 4; wrapped.upcase << @br << '-'*times << @br + when 5..6; wrapped.upcase << @br << '.'*times << @br + end + else + @@plaintext[:body] << wrapped << @br # main text, contents, body KEEP + end + if @@endnotes_para and @@dostype =~/footnote/ #edit out to switch off endnotes following paragraph to which they belong + @@plaintext[:body] << @br + @@endnotes_para.each {|e| @@plaintext[:body] << e << @br} + elsif @@endnotes_para and @@dostype =~/endnote/ + @@plaintext[:body] << @br*2 + end + @@endnotes_para=[] + end + def markup # Used for major markup instructions + data=@data + dir=SiSU_Env::Info_env.new(@md.fns) + @data_mod,@endnotes,@level,@cont,@copen,@plaintext_contents_close=Array.new(6){[]} + (0..6).each { |x| @cont[x]=@level[x]=false } + (4..6).each { |x| @plaintext_contents_close[x]='' } + plaintext_tail #($1,$2) + table_message='[table omitted, see other document formats]' + fix=[] + #data.each do |para| #high cost to deal with <br> appropriately within plaintext, consider + # para=para.dup + # if para =~/<br(?: \/)?>/ + # puts para + # fix = para.split(/<br(?: \/)?>/) #watch #added + # fix.each do |x| + # if x =~/\S+/; @data_mod << x + # end + # end + # else @data_mod << para + # end + #end + #data=@data_mod.flatten + data.each do |para| + para.gsub!(/<!Th?¡.+/m,"#@br#{table_message}") + para.gsub!(/.+?<-#>/,'') # remove dummy headings (used by html) #check + para.gsub!(/_\*\s+/,'* ') # bullet markup, marked down + para.gsub!(/©/,'©') # bullet markup, marked down + para.gsub!(/&/,'&') # bullet markup, marked down + para.gsub!(/<sup>(.+?)<\/sup>/,'^\1^') + para.gsub!(/<sub>(.+?)<\/sub>/,'[\1]') + para.gsub!(/<i>(.+?)<\/i>/,'/\1/') + para.gsub!(/<b>(.+?)<\/b>/,'*\1*') + para.gsub!(/<u>(.+?)<\/u>/,'_\1_') + para.gsub!(/<:(?:group|verse|alt|code)(?:-end)?>(?:\s+<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>)?/,'') + para.gsub!(/<:p[bn]>/,'') # remove page breaks + para.gsub!(/^\s*<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/,'') # remove empty lines - check + para.gsub!(/<a href=".+?">(.+?)<\/a>/m,'\1') + para.gsub!(/<:name#\S+?>/,'') # remove name links + para.gsub!(/ /,' ') # decide on + para.gsub!(/\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") + para.gsub!(/^\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') + wordlist=para.scan(/\S+/) + if para =~/^0~(\S+)\s+(.+?)\Z/m # for headers + d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta + if d_meta; plaintext_metadata(d_meta) + end + end + if para !~/(^0~|<ENDNOTES>|<EOF>)/ + if para =~@regx #/.+?<~\d+;\w\d+;\w\d+>.*/ #watch change + paranum=para[@regx,3] + @p_num=Format::Paragraph_number.new(paranum) + end + @sto=Split_text_object.new(para).lev_segname_para_ocn + ### problem in scroll, it appears tables are getting paragraph numbers + m=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + if para =~m and para=~/\S+/ + para=case @sto.format + when /^(1)~(?:(\S+))?/ + plaintext_structure(para,$1,@sto.ocn,$2) + @sto.lev_para_ocn.heading_body1 + when /^(2)~(?:(\S+))?/ + plaintext_structure(para,$1,@sto.ocn,$2) + @sto.lev_para_ocn.heading_body2 + when /^(3)~(?:(\S+))?/ + plaintext_structure(para,$1,@sto.ocn,$2) + @sto.lev_para_ocn.heading_body3 + when /^(4)~(\S+)/ # work on see SiSU_text_parts::Split_text_object + plaintext_structure(para,$1,@sto.ocn,$2) + @sto.lev_para_ocn.heading_body4 + when /^(5)~(?:(\S+))?/ + plaintext_structure(para,$1,@sto.ocn,$2) + @sto.lev_para_ocn.heading_body5 + when /^(6)~(?:(\S+))?/ + plaintext_structure(para,$1,@sto.ocn,$2) + @sto.lev_para_ocn.heading_body6 + #when /^(i1)$/ + # #formatMono.gsubBody + # #para=@sto[:lev_para_ocn].scrIndent1 + #when /^(i2)$/ + # formatMono.gsubBody + # para=@sto[:lev_para_ocn].scrIndent2 + #when /^(center)$/ + # para.gsub!(/(.+)/, + # %{<center>(\\1)</center>}) + # para=@sto[:lev_para_ocn].scrPara + #when /^(b|bold)$/ + # para.gsub!(/(.+)/, + # %{<b>(\\1)</b>}) + # para=@sto[:lev_para_ocn].scrPara + #when /null/ # see whether u can improve + # if (para !~/#{@margin.txt_0}|#{@margin.txt_1}|#{@margin.txt_2}/) + # #formatMono.gsubBody + # #para=@sto[:lev_para_ocn].scrPara + # end + else + plaintext_structure(para,nil,nil,nil) #watch may be problematic + para + end + elsif para =~/#{table_message}/ + @@plaintext[:body] << para << @br + elsif para =~/(Note|Endnotes?)/ and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + elsif para =~/(MetaData)/ and para =~/<~(\d+);[um]\d+;\w\d+><#@dp:#@dp>$/ #debug 2003w46 add rc info ####suspect visit + #formatMono=MonoSiSU.new('<br /><a name="metadata">MetaData</a>') + #para=formatMono.bold_para + elsif para.include? 'Owner Details' and para !~/<~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ + #formatMono=MonoSiSU.new('<br /><a name="owner.details">Owner Details</a>') + #@@plaintext[:owner_details]=formatMono.bold_para + #para='' + elsif para =~/(¡|<!Th?)/ #tables ! + elsif para =~/(.*)<!#!>(.*)/ + one,two=$1,$2 + format_text=Format_text_object.new(one,two) + para=format_text.seg_no_paranum + end + para='' if (para =~/<a name="n\d+">/ and para =~/^(-\{{2}~\d+|<!e[:_]\d+!>)/) # -endnote + case para + when /<:i1>/ + if para =~/.*<:#>.*$/ + format_text=Format_text_object.new(para,'') + para=format_text.scr_indent_one_no_paranum + end + when /<:i2>/ + if para =~/.*<:#>.*$/ + format_text=Format_text_object.new(para,'') + para=format_text.scr_indent_one_no_paranum + end + end + if para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/ + # i don't get the condition for no paranum + end + if para =~/<:center>/ + one,two=/(.*)<:center>(.*)/.match(para)[1,2] + format_text=Format_text_object.new(one,two) + para=format_text.center + end + para.gsub!(/<!.+!>/,' ') if para ## Clean Prepared Text + para.gsub!(/<:\S+>/,' ') if para ## Clean Prepared Text + end + end + end + def publish + divider="=" + content=[] + data=@data + content << @@plaintext[:open] + content << @@plaintext[:head] + content << @@plaintext[:body] + content << @@plaintext[:endnotes] if @@dostype =~/endnotes/ + content << "#@br#{divider*70}#@br" + content << @@plaintext[:metadata] + content << "#@br#{divider*70}#@br" if @md.stmp =~/\w+/ #not used? + content << @@plaintext[:owner_details] if @md.stmp =~/\w+/ #not used? + content << @@plaintext[:tail] + Output.new(content.to_s,@md).plaintext + @@plaintext[:head],@@plaintext[:body],@@plaintext[:tail],@@plaintext[:metadata]=[],[],[],[] + end + end + class Output <Source + include SiSU_Param + include SiSU_Env + def initialize(content,md) + @content,@md=content,md + end + def plaintext #%plaintext output + SiSU_Env::SiSU_file.new(@md).mkdir + filename_plaintext=SiSU_Env::SiSU_file.new(@md,@md.fn[:plain]).mkfile + @sisu=[] + @content.each do |para| # this is a hack + if para =~/^\S/ + if para !~/^([*=-]|\.){5}/; filename_plaintext.puts para #unix plaintext + else filename_plaintext.puts para #unix plaintext + end + else filename_plaintext.puts para # if para =~/^\s/ + end + end + end + end + end +end +__END__ |