=begin
* Name: SiSU
* Description: a framework for document structuring, publishing and search
* Author: Ralph Amissah
* Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007 Ralph Amissah All Rights Reserved.
* License: GPL 3 or later:
SiSU, a framework for document structuring, publishing and search
Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007 Ralph Amissah
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see .
If you have Internet connection, the latest version of the GPL should be
available at these locations:
* SiSU uses:
* Standard SiSU markup syntax,
* Standard SiSU meta-markup syntax, and the
* Standard SiSU object citation numbering and system
* Hompages:
* Download:
* Ralph Amissah
** Description: modules shared by flatfile output generators
=end
module SiSU_text_utils
class Wrap
def initialize(para='',n_char_max=76,n_indent=0,n_hang=nil)
@para,@n_char_max,@n_indent=para,n_char_max,n_indent
@n_char_max_extend = n_char_max
@br="\n"
@n_hang=unless n_hang; @n_hang=@n_indent
else n_hang
end
end
def line_wrap
space=' '
spaces_indent,spaces_hang="#@br#{space*@n_indent}",space*@n_hang
line=0
out=[]
out[line]=''
@para.gsub!(/<:br>/,"\n\n")
words=@para.scan(/\n\n|\S+/m)
while words != ''
word=words.shift
if not word
out[line] unless out[line].empty? #check
break
elsif word =~/\n\n/
word="\n"
@n_char_max_extend = @n_char_max + out[line].length
line=line
elsif (out[line].length + word.length) > (@n_char_max_extend - @n_indent) \
and out[line] =~/\S+/
@n_char_max_extend = @n_char_max
out[line].squeeze!(' ')
line += 1
end
if word
out[line]=if out[line] \
and out[line] !~/\S+$/m
"#{out[line]}#{word}"
elsif out[line] \
and out[line] =~/\S+/
"#{out[line]} #{word}"
else "#{word.strip}"
end
end
@oldword=word if word =~/\S+/
end
x=out.join(spaces_indent).gsub(/\A\n+/m,'').insert(0,spaces_hang)
end
def line_wrap_indent1
@n_indent,@n_hang=2,2
line_wrap
end
def line_wrap_endnote
@n_indent,@n_hang=4,2
line_wrap
end
end
class Header_scan
def initialize(md,para)
@regxcl=/<~\d+;\w\d+;\w\d+><(?:[0-9a-f]{32}|[0-9a-f]{64}):(?:[0-9a-f]{32}|[0-9a-f]{64})>/
para=para.gsub(@regxcl,'').dup
@md,@p=md,para
end
def extract(tag,tag_content,type,attrib)
dc=if dc_tag \
and dc_content
[dc_tag,dc_content,{dc_tag=>dc_content}]
else nil
end
end
def header(tag,tag_content,type='',attrib='') #this will break stuff and must be tested thoroughly 20060825
@tag,@tag_content,@type,@attrib=tag,tag_content,type,attrib
def label #element
@tag
end
def type
@type
end
def text
@tag_content
end
def info #element text
@tag_content
end
def attribute
@attrib
end
def element
@tag
end
def attrib
@attrib
end
def el
@tag
end
self
end
def start_is_zero
meta=case @p
when /^0~(title)\s+(.+?)$/; header($1,@md.dc_title,'meta','dc') #dc 1
#when /^0~(subtitle)\s+(.+?)$/; header($1,$2)
when /^0~(creator|author)\s+(.+?)$/; header('creator',$2,'meta','dc') #dc 2
when /^0~(subject)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 3
when /^0~(description)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 4
when /^0~(publisher)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 5
when /^0~(contributor)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 6
when /^0~(date)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 7
when /^0~(date\.created)\s+(.+?)$/; header($1,$2,'meta','extra')
when /^0~(date\.issued)\s+(.+?)$/; header($1,$2,'meta','extra')
when /^0~(date\.available)\s+(.+?)$/; header($1,$2,'meta','extra')
when /^0~(date\.valid)\s+(.+?)$/; header($1,$2,'meta','extra')
when /^0~(date\.modified)\s+(.+?)$/; header($1,$2,'meta','extra')
when /^0~(type)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 8
when /^0~(format)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 9
when /^0~(identifier)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 10
when /^0~(source)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 11
when /^0~(language)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 12
when /^0~(relation)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 13
when /^0~(coverage)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 14
when /^0~(rights)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 15
when /^0~(keywords)\s+(.+?)$/; header($1,$2,'meta','extra')
when /^0~(copyright)\s+(.+?)$/; header($1,$2,'meta','extra')
when /^0~(translator|translated_by)\s+(.+?)$/; header('translator',$2,'meta','extra')
when /^0~(illustrator|illustrated_by)\s+(.+?)$/; header('illustrator',$2,'meta','extra')
when /^0~(prepared_by)\s+(.+?)$/; header($1,$2,'meta','extra')
when /^0~(digitized_by)\s+(.+?)$/; header($1,$2,'meta','extra')
when /^0~(comments?)\s+(.+?)$/; header($1,$2,'meta','extra')
when /^0~(abstract)\s+(.+?)$/; header($1,$2,'meta','extra')
when /^0~(tags?)\s+(.+?)$/; header($1,$2,'meta','extra')
when /^0~(catalogue)\s+(.+?)$/; header($1,$2,'meta','extra')
when /^0~(class(?:ify)?_loc)\s+(.+?)$/; header('classify_loc',$2,'meta','extra')
when /^0~(class(?:ify)?_dewey)\s+(.+?)$/; header('classify_dewey',$2,'meta','extra')
when /^0~(class(?:ify)?_pg)\s+(.+?)$/; header('classify_pg',$2,'meta','extra')
when /^0~(class(?:ify)?_isbn)\s+(.+?)$/; header('classify_isbn',$2,'meta','extra')
when /^0~(toc|structure)\s+(.+?)$/; header('structure',$2,'meta','extra')
when /^0~(toc|structure)\s+(.+?)$/; header('structure',$2,'proc','instruct')
when /^0~(level|page|markup)\s+(.+?)$/; header('markup',$2,'process','instruct')
when /^0~(bold)\s+(.+?)$/; header($1,$2,'process','instruct')
when /^0~(italics|itali[sz]e)\s+(.+?)$/; header('italicize',$2,'process','instruct')
when /^0~(vocabulary|wordlist)\s+(.+?)$/; header('vocabulary',$2,'process','instruct')
when /^0~(skin)\s+(.+?)$/; header($1,$2,'process','instruct')
when /^0~(css|stylesheet)\s+(.+?)$/; header('css',$2,'process','instruct')
when /^0~(links)\s+(.+?)$/; header($1,$2,'process','instruct')
when /^0~(prefix)\s+(.+?)$/; header($1,$2,'process','instruct')
when /^0~(suffix)\s+(.+?)$/; header($1,$2,'process','instruct')
when /^0~(information)\s+(.+?)$/; header($1,$2,'process','instruct')
when /^0~(contact)\s+(.+?)$/; header($1,$2,'process','instruct')
when /^0~(rcs|cvs)\s+(.+?)$/; header('version',$2,'process','instruct')
else nil
end
end
def start_is_at
meta=case @p
when /^@(title):\s+(.+?)$/; header($1,@md.dc_title,'meta','dc') #dc 1
#when /^@(subtitle):\s+(.+?)$/; header($1,$2,'meta','extra')
when /^@(creator|author):\s+(.+?)$/; header('creator',$2,'meta','dc') #dc 2
when /^@(subject):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 3
when /^@(description):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 4
when /^@(publisher):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 5
when /^@(contributor):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 6
when /^@(date):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 7
when /^@(date\.created):\s+(.+?)$/; header($1,$2,'meta','extra')
when /^@(date\.issued):\s+(.+?)$/; header($1,$2,'meta','extra')
when /^@(date\.available):\s+(.+?)$/; header($1,$2,'meta','extra')
when /^@(date\.valid):\s+(.+?)$/; header($1,$2,'meta','extra')
when /^@(date\.modified):\s+(.+?)$/; header($1,$2,'meta','extra')
when /^@(type):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 8
when /^@(format):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 9
when /^@(identifier):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 10
when /^@(source):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 11
when /^@(language):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 12
when /^@(relation):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 13
when /^@(coverage):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 14
when /^@(rights):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 15
when /^@(keywords):\s+(.+?)$/; header($1,$2,'meta','extra')
when /^@(copyright):\s+(.+?)$/; header($1,$2,'meta','extra')
when /^@(translator|translated_by):\s+(.+?)$/; header('translator',$2)
when /^@(illustrator|illustrated_by):\s+(.+?)$/; header('illustrator',$2)
when /^@(prepared_by):\s+(.+?)$/; header($1,$2,'meta','extra')
when /^@(digitized_by):\s+(.+?)$/; header($1,$2,'meta','extra')
when /^@(comments?):\s+(.+?)$/; header($1,$2,'meta','extra')
when /^@(abstract):\s+(.+?)$/; header($1,$2,'meta','extra')
when /^@(tags?):\s+(.+?)$/; header($1,$2,'meta','extra')
when /^@(catalogue):\s+(.+?)$/; header($1,$2,'meta','extra')
when /^@(class(?:ify)?_loc):\s+(.+?)$/; header('classify_loc',$2,'meta','extra')
when /^@(class(?:ify)?_dewey):\s+(.+?)$/; header('classify_dewey',$2,'meta','extra')
when /^@(class(?:ify)?_pg):\s+(.+?)$/; header('classify_pg',$2,'meta','extra')
when /^@(class(?:ify)?_isbn):\s+(.+?)$/; header('classify_isbn',$2,'meta','extra')
when /^@(toc|structure):\s+(.+?)$/; header('structure',$2,'process','instruct')
when /^@(level|page|markup):\s+(.+?)$/; header('markup',$2,'process','instruct')
when /^@(bold):\s+(.+?)$/; header($1,$2,'process','instruct')
when /^@(italics|itali[sz]e):\s+(.+?)$/; header('italicize',$2,'process','instruct')
when /^@(vocabulary|wordlist):\s+(.+?)$/; header('vocabulary',$2,'process','instruct')
when /^@(skin):\s+(.+?)$/; header($1,$2,'process','instruct')
when /^@(css|stylesheet):\s+(.+?)$/; header('css',$2,'process','instruct')
when /^@(links):\s+(.+?)$/; header($1,$2,'process','instruct')
when /^@(prefix):\s+(.+?)$/; header($1,$2,'process','instruct') #add a & b
when /^@(suffix):\s+(.+?)$/; header($1,$2,'process','instruct')
when /^@(information):\s+(.+?)$/; header($1,$2,'process','instruct')
when /^@(contact):\s+(.+?)$/; header($1,$2,'process','instruct')
when /^@(rcs|cvs):\s+(.+?)$/; header('version',$2,'process','instruct')
else nil
end
end
def dublin
out=if @p =~/^0~\S+\s/; start_is_zero
elsif @p =~/^@\S+:[+-]?\s/; start_is_at
else nil
end
end
def meta
out=if @p =~/^0~\S+\s/; start_is_zero
elsif @p =~/^@\S+:[+-]?\s/; start_is_at
else nil
end
end
end
end
module SiSU_text_parts_flatfile
class Split_text_object
@@dl=nil
attr_reader :format,:text,:ocn,:lev_para_ocn
def initialize(md,para)
@md,@para=md,para
@format,@ocn='null','null'
#@format,@ocn=nil,nil
@@dl ||=SiSU_Env::Info_env.new.digest.length
end
def lev_segname_para_ocn
if @para =~/^(\d~|<:.+?>).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>.*/
if /^([1-6])~(\S+)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
@format,segname,@text,@ocn=$1,$2,$3,$4
@format="#@format~#{segname}" #
elsif /^([1-6]~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
@format,@text,@ocn=$1,$2,$3
elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
@format,@text,@ocn=$1,$2,$3
elsif /^([1-6])~(\S+)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
@@alt_id_count+=1
@format,segname,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}"
@format="#@format~#{segname}" #
elsif /^([1-6]~)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
@@alt_id_count+=1
@format,@text,@ocn=$1,$2,"x#{@@alt_id_count}"
end
else
if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
@text,@ocn=$1,$2
end
if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>|^$/ #added 2002w06
@text=/(.+?)/m.match(@para)[1]
end
if /^(\d)~\S*\s+(.+)/m.match(@para)
@format,@text=$1,$2
end
end
@lev_para_ocn=if @para =~/.+<~\d+>/ #hmmm, watch
Format::ParaSiSU.new(@md,@format,@text,@ocn)
else Format::ParaSiSU.new(@md,@format,@text,'<~0>')
end
self
end
end
end
__END__