summaryrefslogtreecommitdiffstats
path: root/lib/sisu/txt_shared.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/txt_shared.rb')
-rw-r--r--lib/sisu/txt_shared.rb241
1 files changed, 241 insertions, 0 deletions
diff --git a/lib/sisu/txt_shared.rb b/lib/sisu/txt_shared.rb
new file mode 100644
index 0000000..4b4bbb2
--- /dev/null
+++ b/lib/sisu/txt_shared.rb
@@ -0,0 +1,241 @@
+# encoding: utf-8
+=begin
+
+* Name: SiSU
+
+** Description: documents, structuring, processing, publishing, search
+*** modules shared by flatfile output generators
+
+** Author: Ralph Amissah
+ <ralph@amissah.com>
+ <ralph.amissah@gmail.com>
+
+** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah,
+ All Rights Reserved.
+
+** License: GPL 3 or later:
+
+ SiSU, a framework for document structuring, publishing and search
+
+ Copyright (C) Ralph Amissah
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see <http://www.gnu.org/licenses/>.
+
+ If you have Internet connection, the latest version of the GPL should be
+ available at these locations:
+ <http://www.fsf.org/licensing/licenses/gpl.html>
+ <http://www.gnu.org/licenses/gpl.html>
+
+ <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>
+
+** SiSU uses:
+ * Standard SiSU markup syntax,
+ * Standard SiSU meta-markup syntax, and the
+ * Standard SiSU object citation numbering and system
+
+** Hompages:
+ <http://www.jus.uio.no/sisu>
+ <http://www.sisudoc.org>
+
+** Git
+ <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary>
+ <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/txt_shared.rb;hb=HEAD>
+
+=end
+module SiSU_TextUtils
+ require_relative 'generic_parts' # generic_parts.rb
+ class Wrap
+ def initialize(para='',n_char_max=76,n_indent=0,n_hang=nil,post='')
+ @para,@n_char_max,@n_indent,@post,=para,n_char_max,n_indent,post
+ @n_char_max_extend = n_char_max
+ @n_hang=n_hang ? n_hang : @n_indent
+ end
+ def break_line
+ "\n"
+ end
+ def line_wrap
+ space=' '
+ spaces_indent,spaces_hang="#{break_line}#{space*@n_indent}",space*@n_hang
+ line=0
+ out=[]
+ out[line]=''
+ @para=@para.gsub(/<br>/,' \\ ').
+ gsub(/#{Mx[:br_nl]}/,"\n\n")
+ words=@para.scan(/\n\n|\s+\\\s+|<br>|\S+/m)
+ while words != ''
+ word=words.shift
+ if not word
+ out[line] unless out[line].empty? #check
+ break
+ elsif word =~/<br>/
+ word=nil
+ out[line]=out[line].gsub(/<br>/,'')
+ line=line
+ elsif word =~/\n\n/
+ word="\n"
+ @n_char_max_extend = @n_char_max
+ line += 1
+ elsif (out[line].length + word.length) > (@n_char_max_extend - @n_indent) \
+ and out[line] =~/\S+/
+ @n_char_max_extend = @n_char_max
+ out[line].squeeze!(' ')
+ line += 1
+ end
+ if word
+ out[line]=if out[line] \
+ and out[line] !~/\S+$/m
+ "#{out[line]}#{word}"
+ elsif out[line] \
+ and out[line] =~/\S+/
+ "#{out[line]} #{word}"
+ else "#{word.strip}"
+ end
+ end
+ @oldword=word if word =~/\S+/
+ end
+ post=(@post.empty?) \
+ ? ''
+ : "\n" + (' '*@n_indent) +@post
+ spaces_hang + out.join(spaces_indent) + post
+ end
+ def line_wrap_indent1
+ @n_indent,@n_hang=2,2
+ line_wrap
+ end
+ def line_wrap_endnote
+ @n_indent,@n_hang=4,2
+ line_wrap
+ end
+ def array_wrap
+ if @para.is_a?(Array)
+ @arr=[]
+ @para.each do |line|
+ @arr << SiSU_TextUtils::Wrap.new(line,@n_char_max,@n_indent,@n_hang).line_wrap
+ end
+ end
+ @arr
+ end
+ def no_wrap
+ @para
+ end
+ def no_wrap_no_breaks
+ @para.gsub(/\n/m,' ').gsub(/\s\s+/,' ')
+ end
+ end
+ class HeaderScan
+ def initialize(md,para)
+ @md,@p=md,para
+ end
+ def extract(tag,tag_content,type,attrib)
+ if dc_tag \
+ and dc_content
+ [dc_tag,dc_content,{dc_tag=>dc_content}]
+ else nil
+ end
+ end
+ def header(tag,tag_content,type='',attrib='') #this will break stuff and must be tested thoroughly 20060825
+ @tag,@tag_content,@type,@attrib=tag,tag_content,type,attrib
+ def label #element
+ @tag
+ end
+ def type
+ @type
+ end
+ def text
+ @tag_content
+ end
+ def info #element text
+ @tag_content
+ end
+ def attribute
+ @attrib
+ end
+ def element
+ @tag
+ end
+ def attrib
+ @attrib
+ end
+ def el
+ @tag
+ end
+ self
+ end
+ def start_is_match
+ case @p
+ when /^#{Mx[:meta_o]}(title)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,@md.title.full,'meta','dc') #dc 1
+ when /^#{Mx[:meta_o]}(creator|author)#{Mx[:meta_c]}\s*(.+?)$/ then header('creator',$2,'meta','dc') #dc 2
+ when /^#{Mx[:meta_o]}(subject)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','dc') #dc 3
+ when /^#{Mx[:meta_o]}(description)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','dc') #dc 4
+ when /^#{Mx[:meta_o]}(publisher)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','dc') #dc 5
+ when /^#{Mx[:meta_o]}(contributor)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','dc') #dc 6
+ when /^#{Mx[:meta_o]}(date)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','dc') #dc 7
+ when /^#{Mx[:meta_o]}(date\.created)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','extra')
+ when /^#{Mx[:meta_o]}(date\.issued)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','extra')
+ when /^#{Mx[:meta_o]}(date\.available)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','extra')
+ when /^#{Mx[:meta_o]}(date\.valid)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','extra')
+ when /^#{Mx[:meta_o]}(date\.modified)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','extra')
+ when /^#{Mx[:meta_o]}(type)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','dc') #dc 8
+ when /^#{Mx[:meta_o]}(format)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','dc') #dc 9
+ when /^#{Mx[:meta_o]}(identifier)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','dc') #dc 10
+ when /^#{Mx[:meta_o]}(source)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','dc') #dc 11
+ when /^#{Mx[:meta_o]}(language)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','dc') #dc 12
+ when /^#{Mx[:meta_o]}(relation)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','dc') #dc 13
+ when /^#{Mx[:meta_o]}(coverage)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','dc') #dc 14
+ when /^#{Mx[:meta_o]}(rights)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','dc') #dc 15
+ when /^#{Mx[:meta_o]}(keywords)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','extra')
+ when /^#{Mx[:meta_o]}(copyright)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','extra')
+ when /^#{Mx[:meta_o]}(translator|translated_by)#{Mx[:meta_c]}\s*(.+?)$/ then header('translator',$2)
+ when /^#{Mx[:meta_o]}(illustrator|illustrated_by)#{Mx[:meta_c]}\s*(.+?)$/ then header('illustrator',$2)
+ when /^#{Mx[:meta_o]}(prepared_by)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','extra')
+ when /^#{Mx[:meta_o]}(digitized_by)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','extra')
+ when /^#{Mx[:meta_o]}(comments?)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','extra')
+ when /^#{Mx[:meta_o]}(abstract)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','extra')
+ when /^#{Mx[:meta_o]}(tags?)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','extra')
+ when /^#{Mx[:meta_o]}(catalogue)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'meta','extra')
+ when /^#{Mx[:meta_o]}(class(?:ify)?_loc)#{Mx[:meta_c]}\s*(.+?)$/ then header('classify_loc',$2,'meta','extra')
+ when /^#{Mx[:meta_o]}(class(?:ify)?_dewey)#{Mx[:meta_c]}\s*(.+?)$/ then header('classify_dewey',$2,'meta','extra')
+ when /^#{Mx[:meta_o]}(class(?:ify)?_pg)#{Mx[:meta_c]}\s*(.+?)$/ then header('classify_pg',$2,'meta','extra')
+ when /^#{Mx[:meta_o]}(class(?:ify)?_isbn)#{Mx[:meta_c]}\s*(.+?)$/ then header('classify_isbn',$2,'meta','extra')
+ when /^#{Mx[:meta_o]}(toc|structure)#{Mx[:meta_c]}\s*(.+?)$/ then header('structure',$2,'process','instruct')
+ when /^#{Mx[:meta_o]}(level|page|markup)#{Mx[:meta_c]}\s*(.+?)$/ then header('markup',$2,'process','instruct')
+ when /^#{Mx[:meta_o]}(bold)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'process','instruct')
+ when /^#{Mx[:meta_o]}(italics|itali[sz]e)#{Mx[:meta_c]}\s*(.+?)$/ then header('italicize',$2,'process','instruct')
+ when /^#{Mx[:meta_o]}(vocabulary|wordlist)#{Mx[:meta_c]}\s*(.+?)$/ then header('vocabulary',$2,'process','instruct')
+ when /^#{Mx[:meta_o]}(css|stylesheet)#{Mx[:meta_c]}\s*(.+?)$/ then header('css',$2,'process','instruct')
+ when /^#{Mx[:meta_o]}(links)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'process','instruct')
+ when /^#{Mx[:meta_o]}(prefix)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'process','instruct') #add a & b
+ when /^#{Mx[:meta_o]}(suffix)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'process','instruct')
+ when /^#{Mx[:meta_o]}(information)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'process','instruct')
+ when /^#{Mx[:meta_o]}(contact)#{Mx[:meta_c]}\s*(.+?)$/ then header($1,$2,'process','instruct')
+ when /^#{Mx[:meta_o]}(rcs|cvs)#{Mx[:meta_c]}\s*(.+?)$/ then header('version',$2,'process','instruct')
+ else nil
+ end
+ end
+ def dublin
+ if @p =~/^#{Mx[:meta_o]}\S+?#{Mx[:meta_c]}/
+ start_is_match
+ else nil
+ end
+ end
+ def meta
+ if @p =~/^#{Mx[:meta_o]}\S+?#{Mx[:meta_c]}/
+ start_is_match
+ else nil
+ end
+ end
+ end
+end
+__END__