aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/develop/txt_plain.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/develop/txt_plain.rb')
-rw-r--r--lib/sisu/develop/txt_plain.rb410
1 files changed, 0 insertions, 410 deletions
diff --git a/lib/sisu/develop/txt_plain.rb b/lib/sisu/develop/txt_plain.rb
deleted file mode 100644
index 2c19c7d2..00000000
--- a/lib/sisu/develop/txt_plain.rb
+++ /dev/null
@@ -1,410 +0,0 @@
-# encoding: utf-8
-=begin
-
-* Name: SiSU
-
-** Description: documents, structuring, processing, publishing, search
-*** plaintext text generation, stripped plaintext output (unix, linefeed)
-
-** Author: Ralph Amissah
- <ralph@amissah.com>
- <ralph.amissah@gmail.com>
-
-** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
- 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah,
- All Rights Reserved.
-
-** License: GPL 3 or later:
-
- SiSU, a framework for document structuring, publishing and search
-
- Copyright (C) Ralph Amissah
-
- This program is free software: you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the Free
- Software Foundation, either version 3 of the License, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program. If not, see <http://www.gnu.org/licenses/>.
-
- If you have Internet connection, the latest version of the GPL should be
- available at these locations:
- <http://www.fsf.org/licensing/licenses/gpl.html>
- <http://www.gnu.org/licenses/gpl.html>
-
- <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>
-
-** SiSU uses:
- * Standard SiSU markup syntax,
- * Standard SiSU meta-markup syntax, and the
- * Standard SiSU object citation numbering and system
-
-** Hompages:
- <http://www.jus.uio.no/sisu>
- <http://www.sisudoc.org>
-
-** Git
- <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary>
- <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/develop/txt.rb;hb=HEAD>
-
-=end
-module SiSU_Txt_Plain
- require_relative 'ao' # ao.rb
- require_relative 'se' # se.rb
- include SiSU_Env
- require_relative 'shared_metadata' # shared_metadata.rb
- require_relative 'generic_parts' # generic_parts.rb
- require_relative 'txt_read' # txt_read.rb
- require_relative 'txt_shared' # txt_shared.rb
- require_relative 'txt_plain_decorate' # txt_plain_decorate.rb
- require_relative 'txt_output' # txt_output.rb
- include SiSU_Param
- @@alt_id_count,@@alt_id_count=0,0
- @@tablefoot=''
- class Source
- include SiSU_Txt_Read
- def initialize(opt)
- @opt=opt
- unless @opt.fns =~/(.+?)\.(?:-|ssm\.)?sst$/
- puts "#{sf} not a processed file type"
- end
- end
- def read
- begin
- md=SiSU_Param::Parameters.new(@opt).get
- specific={
- description: 'Plaintext (utf-8)',
- output_path: md.file.output_path.txt.dir,
- output_file: md.file.base_filename.txt,
- }
- read_generic(@opt,specific)
- SiSU_Txt_Plain::Source::Scroll.new(md,@ao_array,@wrap_width).songsheet
- rescue
- SiSU_Errors::Rescued.new($!,$@,@opt.selections.str,@opt.fns).location do
- __LINE__.to_s + ':' + __FILE__
- end
- ensure
- end
- end
- private
- class Scroll <Source
- include SiSU_Parts_Generic
- include SiSU_TextUtils
- include SiSU_Decorate_Txt_Plain
- @@endnotes={ para: [], end: [] }
- def initialize(md,data,wrap_width)
- @md,@data,@wrap_width=md,data,wrap_width
- @env=SiSU_Env::InfoEnv.new(@md.fns)
- @tab="\t"
- @@endnotes_=case md.opt.selections.str
- when /--footnote/ then false
- when /--endnote/ then true
- else true
- end
- @plaintext={ body: [], open: [], close: [], head: [], metadata: [], tail: [] }
- end
- def songsheet
- plaintext=markup(@data)
- publish(plaintext)
- end
- def break_line
- "\n"
- end
- # Used for extraction of endnotes from paragraphs
- def extract_endnotes(dob='')
- notes=dob.obj.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/)
- @n=[]
- notes.flatten.each do |n| #high cost to deal with <br> appropriately within plaintext, consider
- n=n.dup.to_s
- if n =~/#{Mx[:br_line]}|#{Mx[:br_nl]}/
- fix = n.split(/#{Mx[:br_line]}|#{Mx[:br_nl]}/) #watch #added
- fix.each do |x|
- unless x.empty?; @n << x
- end
- end
- else @n << n
- end
- end
- notes=@n.flatten
- notes.each do |e|
- util=(e.to_s =~/^\[[\d*+]+\]:/) \
- ? (SiSU_TextUtils::Wrap.new(e.to_s,@wrap_width,4,1))
- : (SiSU_TextUtils::Wrap.new(e.to_s,@wrap_width,1,1))
- wrap=util.line_wrap
- wrap=if wrap =~ /^\s*[\d*+]+\s+.+?\s*\Z/m
- wrap.gsub(/^(\s*)([\d*+]+)\s+(.+?)\s*\Z/m, <<-GSUB
-\\1[\\2]: \\3
- GSUB
- )
- else
- wrap.gsub(/^(.+)\Z/m, <<-GSUB
-\\1
- GSUB
- )
- end
- @@endnotes[:para] << "-#{wrap}"
- @@endnotes[:end] << '' << wrap
- end
- @@endnotes
- end
- def plaintext_metadata
- array=SiSU_Metadata::Summary.new(@md).plaintext.metadata
- array.each do |meta|
- tag,inf=meta.scan(/^.+?:\s|.+/)
- if tag and inf
- util=SiSU_TextUtils::Wrap.new(inf,@wrap_width,15,1)
- txt=util.line_wrap
- @plaintext[:metadata] <<<<WOK
-
-#{@tab}#{tag}#{txt}
-WOK
- end
- end
- end
- def plaintext_tail
-# env=SiSU_Env::InfoEnv.new(@md.fns)
- generator="Generated by: #{@md.project_details.project} #{@md.project_details.version} of #{@md.project_details.date_stamp} (#{@md.project_details.date})" if @md.project_details.version
- lastdone="Last Generated on: #{Time.now}"
- rubyv="Ruby version: #{@md.ruby_version}"
- sc=if @md.sc_info
- "Source file: #{@md.sc_filename}#{break_line}Version number: #{@md.sc_number}#{break_line}Version date: #{@md.sc_date}#{break_line}"
- else ''
- end
- @plaintext[:tail] <<<<WOK
-#{break_line}
-plaintext (plain text):
- #{@md.file.output_path.txt.url}/#{@md.file.base_filename.txt}#{break_line}
-Other versions of this document: #{break_line}
-manifest:
- #{@md.file.output_path.manifest.url}/#{@md.file.base_filename.manifest}#{break_line}
-at:
- #{@md.file.output_path.base.url}#{break_line}
-
-#{sc}
-* #{generator}
-* #{rubyv}
-* #{lastdone}
-* SiSU #{the_url.sisu_txt}
-WOK
- end
- def plaintext_structure(dob='',p_num='') #% Used to extract the structure of a document
- lv=n=n3=nil
- if dob.is==:heading
- lv=dob.ln
- n=lv - 1
- n3=lv + 2
- end
- util=nil
- wrapped=if dob.is==:para \
- || dob.is==:heading
- if dob.is==:para
- if dob.hang \
- and dob.hang =~/[0-9]/ \
- and dob.indent != dob.hang
- util=SiSU_TextUtils::Wrap.new(dob.obj,@wrap_width,dob.indent.to_i*2,dob.hang.to_i*2)
- #util=SiSU_TextUtils::Wrap.new(dob.obj,@wrap_width,dob.hang.to_i*2,0)
- elsif dob.indent =~/[1-9]/
- util=if dob.bullet_
- SiSU_TextUtils::Wrap.new("* #{dob.obj}",@wrap_width,dob.indent.to_i*2)
- else SiSU_TextUtils::Wrap.new(dob.obj,@wrap_width,dob.indent.to_i*2)
- end
- else
- util=if dob.bullet_
- SiSU_TextUtils::Wrap.new("* #{dob.obj}",@wrap_width,0)
- else SiSU_TextUtils::Wrap.new(dob.obj,@wrap_width,0)
- end
- end
- else util=SiSU_TextUtils::Wrap.new(dob.obj,@wrap_width,0)
- end
- util.line_wrap
- end
- if lv
- times=wrapped.length
- times=@wrap_width if times > @wrap_width
- @plaintext[:body] << case lv
- when 0 then wrapped.upcase << break_line << decorate.heading_underscore.l0*times + p_num << break_line*2
- when 1 then wrapped.upcase << break_line << decorate.heading_underscore.l1*times + p_num << break_line*2
- when 2 then wrapped.upcase << break_line << decorate.heading_underscore.l2*times + p_num << break_line*2
- when 3 then wrapped.upcase << break_line << decorate.heading_underscore.l3*times + p_num << break_line*2
- when 4
- unless dob.use_ == :dummy
- wrapped.upcase << break_line << decorate.heading_underscore.l4*times + p_num << break_line*2
- end
- when 5 then wrapped.upcase << break_line << decorate.heading_underscore.l5*times + p_num << break_line*2
- when 6 then wrapped.upcase << break_line << decorate.heading_underscore.l6*times + p_num << break_line*2
- when 7
- wrapped.upcase << break_line << decorate.heading_underscore.l7*times + p_num << break_line*2
- #when 7 then wrapped.upcase << break_line << decorate.heading_underscore.l7*times + p_num << break_line*2
- end
- else
- @plaintext[:body] << wrapped + p_num << break_line # main text, contents, body KEEP
- end
- if @@endnotes[:para] \
- and not @@endnotes_
- @@endnotes[:para].each {|e| @plaintext[:body] << e << break_line}
- elsif @@endnotes[:para] \
- and @@endnotes_
- end
- @@endnotes[:para]=[]
- end
- def ocn_display(dob)
- make=SiSU_Env::ProcessingSettings.new(@md)
- if make.build.plaintext_ocn?
- if defined? dob.ocn \
- and dob.ocn.is_a?(Fixnum)
- (defined? dob.ocn) \
- ? "\n#{Dx[:ocn_o]}#{dob.ocn}#{Dx[:ocn_c]}" \
- : ''
- else ''
- end
- else ''
- end
- end
- def markup(data) # Used for major markup instructions
- SiSU_Env::InfoEnv.new(@md.fns)
- @data_mod,@endnotes,@level,@cont,@copen,@plaintext_contents_close=Array.new(6){[]}
- (0..7).each { |x| @cont[x]=@level[x]=false }
- (4..7).each { |x| @plaintext_contents_close[x]='' }
- plaintext_tail #($1,$2)
- plaintext_metadata
- table_message='[table omitted, see other document formats]'
- data.each do |dob|
- dob.obj=dob.obj.gsub(/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}.+/um,"#{break_line}#{table_message}"). #fix
- gsub(/.+?#{Mx[:gl_o]}-##{Mx[:gl_c]}/,''). # remove dummy headings (used by html) #check also [~-]#
- gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,
- "#{decorate.bold.open}\\1#{decorate.bold.close}").
- gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,
- "#{decorate.italics.open}\\1#{decorate.italics.close}").
- gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,
- "#{decorate.underscore.open}\\1#{decorate.underscore.close}").
- gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,
- "#{decorate.subscript.open}\\1#{decorate.subscript.close}").
- gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,
- "#{decorate.superscript.open}\\1#{decorate.superscript.close}").
- gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,
- "#{decorate.insert.open}\\1#{decorate.insert.close}").
- gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,
- "#{decorate.cite.open}\\1#{decorate.cite.close}").
- gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,
- "#{decorate.strike.open}\\1#{decorate.strike.close}").
- gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,
- "#{decorate.monospace.open}\\1#{decorate.monospace.close}")
- unless dob.is==:code
- dob.obj=dob.obj.gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}/,'\1').
- gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1').
- gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1 [link: <\2>]').
- gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}image/,'\1 [link: local image]').
- gsub(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,"#{the_text.url_open}\\1#{the_text.url_close}")
- extract_endnotes(dob)
- dob.obj=dob.obj.gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'[^\1]'). # endnote marker marked up
- gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'[^\1]'). # endnote marker marked up
- gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<').
- gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>').
- gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&').
- gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!').
- gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#').
- gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*').
- gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-').
- gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/').
- gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_').
- gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{').
- gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}').
- gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~').
- gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©').
- gsub(/#{Mx[:gl_o]}#092#{Mx[:gl_c]}/,'\\')
- end
- dob.obj=if dob.of==:block # watch
- dob.obj.gsub(/#{Mx[:gl_o]}●#{Mx[:gl_c]}/m,"* ").
- gsub(/\n?#{Mx[:br_line]}\n?|\n?#{Mx[:br_nl]}\n?/m,break_line)
- else dob.obj.gsub(/\n?#{Mx[:br_line]}\n?|\n?#{Mx[:br_nl]}\n?/m,break_line*2)
- end
- if dob.is==:code
- dob.obj=dob.obj.gsub(/(^|[^}])_([<>])/m,'\1\2'). # _> _<
- gsub(/(^|[^}])_([<>])/m,'\1\2') # _<_<
- end
- dob.obj=dob.obj.gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1').
- gsub(/<a href=".+?">(.+?)<\/a>/m,'\1').
- gsub(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,''). # remove name links
- gsub(/&nbsp;|#{Mx[:nbsp]}/,' '). # decide on
- gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [ \1 ]'). #"[ #{dir.url.images_local}\/\\1 ]")
- gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}image/,' [ \1 ]').
- gsub(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]')
- if dob.obj !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/
- p_num=ocn_display(dob)
- if dob.is==:heading \
- or dob.is==:para
- plaintext_structure(dob,p_num)
- elsif dob.is==:group \
- or dob.is==:block \
- or dob.is==:verse \
- or dob.is==:code \
- or dob.is==:table
- @plaintext[:body] << dob.obj + p_num << break_line
- elsif dob.is==:break
- sp=' '
- ln='-'
- @plaintext[:body] <<=if dob.obj==Mx[:br_page] \
- or dob.obj==Mx[:br_page_new] \
- or dob.obj==Mx[:br_page_line]
- "#{break_line}#{ln*40}#{break_line*2}"
- elsif dob.obj ==Mx[:br_obj]
- "#{break_line}#{sp*20}* * *#{break_line*2}"
- end # following empty line (break_line) missing, fix
- end
- dob='' if (dob.obj =~/<a name="n\d+">/ \
- and dob.obj =~/^(-\{{2}~\d+|<!e[:_]\d+!>)/) # -endnote
- if dob ## Clean Prepared Text
- dob.obj=dob.obj.gsub(/<!.+!>/,' ').
- gsub(/<:\S+>/,' ')
- end
- end
- end
- @plaintext
- end
- def publish(plaintext)
- divider='='
- content=[]
- content << plaintext[:open]
- content << plaintext[:head]
- content << plaintext[:body]
- content << @@endnotes[:end] if @@endnotes_
- content << "#{break_line}#{divider*@wrap_width}#{break_line}"
- content << plaintext[:metadata]
- content << "#{break_line}#{divider*@wrap_width}#{break_line}" if @md.stmp =~/\w+/ #not used?
- content << plaintext[:tail]
- outputfile=SiSU_Env::FileOp.new(@md).write_file.txt
- Txt_Output::Output.new.document(content,outputfile)
- @@endnotes={ para: [], end: [] }
- end
- end
- end
-end
-__END__
- bold_o: '*', bold_c: '*',
- #bold_o: '!', bold_c: '!',
- #emphasis_o: '*', emphasis_c: '*',
- italics_o: '/', italics_c: '/',
- underscore_o: '_', underscore_c: '_',
- cite_o: '"', cite_c: '"',
- insert_o: '+', insert_c: '+',
- strike_o: '-', strike_c: '-',
- superscript_o: '^', superscript_c: '^',
- subscript_o: '[', subscript_c: ']',
- hilite_o: '*', hilite_c: '*',
- monospace_o: '', monospace_c: '',
- p_bold_o: '!{', p_bold_c: '}!',
- p_italics_o: '/{', p_italics_c: '}/',
- p_underscore_o: '_{', p_underscore_c: '}_',
- p_cite_o: '"{', p_cite_c: '}"',
- p_insert_o: '+{', p_insert_c: '}+',
- p_strike_o: '-{', p_strike_c: '}-',
- p_superscript_o: '^{', p_superscript_c: '}^',
- p_subscript_o: ',{', p_subscript_c: '},',
- p_hilite_o: '*{', p_hilite_c: '}*',
- p_monospace_o: '#{', p_monospace_c: '}#',