=begin * Name: SiSU * Description: a framework for document structuring, publishing and search * Author: Ralph Amissah * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah All Rights Reserved. * License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . If you have Internet connection, the latest version of the GPL should be available at these locations: * SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system * Hompages: * Download: * Ralph Amissah ** Description: modules shared by flatfile output generators =end module SiSU_text_utils class Wrap def initialize(para='',n_char_max=76,n_indent=0,n_hang=nil) @para,@n_char_max,@n_indent=para,n_char_max,n_indent @n_char_max_extend = n_char_max @br="\n" @n_hang=unless n_hang; @n_hang=@n_indent else n_hang end end def line_wrap space=' ' spaces_indent,spaces_hang="#@br#{space*@n_indent}",space*@n_hang line=0 out=[] out[line]='' @para.gsub!(/<:br>/,"\n\n") words=@para.scan(/\n\n|\S+/m) while words != '' word=words.shift if not word out[line] unless out[line].empty? #check break elsif word =~/\n\n/ word="\n" @n_char_max_extend = @n_char_max + out[line].length line=line elsif (out[line].length + word.length) > (@n_char_max_extend - @n_indent) \ and out[line] =~/\S+/ @n_char_max_extend = @n_char_max out[line].squeeze!(' ') line += 1 end if word out[line]=if out[line] \ and out[line] !~/\S+$/m "#{out[line]}#{word}" elsif out[line] \ and out[line] =~/\S+/ "#{out[line]} #{word}" else "#{word.strip}" end end @oldword=word if word =~/\S+/ end x=out.join(spaces_indent).gsub(/\A\n+/m,'').insert(0,spaces_hang) end def line_wrap_indent1 @n_indent,@n_hang=2,2 line_wrap end def line_wrap_endnote @n_indent,@n_hang=4,2 line_wrap end end class Header_scan def initialize(md,para) @regxcl=/<~\d+;\w\d+;\w\d+><(?:[0-9a-f]{32}|[0-9a-f]{64}):(?:[0-9a-f]{32}|[0-9a-f]{64})>/ para=para.gsub(@regxcl,'').dup @md,@p=md,para end def extract(tag,tag_content,type,attrib) dc=if dc_tag \ and dc_content [dc_tag,dc_content,{dc_tag=>dc_content}] else nil end end def header(tag,tag_content,type='',attrib='') #this will break stuff and must be tested thoroughly 20060825 @tag,@tag_content,@type,@attrib=tag,tag_content,type,attrib def label #element @tag end def type @type end def text @tag_content end def info #element text @tag_content end def attribute @attrib end def element @tag end def attrib @attrib end def el @tag end self end def start_is_zero meta=case @p when /^0~(title)\s+(.+?)$/; header($1,@md.dc_title,'meta','dc') #dc 1 #when /^0~(subtitle)\s+(.+?)$/; header($1,$2) when /^0~(creator|author)\s+(.+?)$/; header('creator',$2,'meta','dc') #dc 2 when /^0~(subject)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 3 when /^0~(description)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 4 when /^0~(publisher)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 5 when /^0~(contributor)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 6 when /^0~(date)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 7 when /^0~(date\.created)\s+(.+?)$/; header($1,$2,'meta','extra') when /^0~(date\.issued)\s+(.+?)$/; header($1,$2,'meta','extra') when /^0~(date\.available)\s+(.+?)$/; header($1,$2,'meta','extra') when /^0~(date\.valid)\s+(.+?)$/; header($1,$2,'meta','extra') when /^0~(date\.modified)\s+(.+?)$/; header($1,$2,'meta','extra') when /^0~(type)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 8 when /^0~(format)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 9 when /^0~(identifier)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 10 when /^0~(source)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 11 when /^0~(language)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 12 when /^0~(relation)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 13 when /^0~(coverage)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 14 when /^0~(rights)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 15 when /^0~(keywords)\s+(.+?)$/; header($1,$2,'meta','extra') when /^0~(copyright)\s+(.+?)$/; header($1,$2,'meta','extra') when /^0~(translator|translated_by)\s+(.+?)$/; header('translator',$2,'meta','extra') when /^0~(illustrator|illustrated_by)\s+(.+?)$/; header('illustrator',$2,'meta','extra') when /^0~(prepared_by)\s+(.+?)$/; header($1,$2,'meta','extra') when /^0~(digitized_by)\s+(.+?)$/; header($1,$2,'meta','extra') when /^0~(comments?)\s+(.+?)$/; header($1,$2,'meta','extra') when /^0~(abstract)\s+(.+?)$/; header($1,$2,'meta','extra') when /^0~(tags?)\s+(.+?)$/; header($1,$2,'meta','extra') when /^0~(catalogue)\s+(.+?)$/; header($1,$2,'meta','extra') when /^0~(class(?:ify)?_loc)\s+(.+?)$/; header('classify_loc',$2,'meta','extra') when /^0~(class(?:ify)?_dewey)\s+(.+?)$/; header('classify_dewey',$2,'meta','extra') when /^0~(class(?:ify)?_pg)\s+(.+?)$/; header('classify_pg',$2,'meta','extra') when /^0~(class(?:ify)?_isbn)\s+(.+?)$/; header('classify_isbn',$2,'meta','extra') when /^0~(toc|structure)\s+(.+?)$/; header('structure',$2,'meta','extra') when /^0~(toc|structure)\s+(.+?)$/; header('structure',$2,'proc','instruct') when /^0~(level|page|markup)\s+(.+?)$/; header('markup',$2,'process','instruct') when /^0~(bold)\s+(.+?)$/; header($1,$2,'process','instruct') when /^0~(italics|itali[sz]e)\s+(.+?)$/; header('italicize',$2,'process','instruct') when /^0~(vocabulary|wordlist)\s+(.+?)$/; header('vocabulary',$2,'process','instruct') when /^0~(skin)\s+(.+?)$/; header($1,$2,'process','instruct') when /^0~(css|stylesheet)\s+(.+?)$/; header('css',$2,'process','instruct') when /^0~(links)\s+(.+?)$/; header($1,$2,'process','instruct') when /^0~(prefix)\s+(.+?)$/; header($1,$2,'process','instruct') when /^0~(suffix)\s+(.+?)$/; header($1,$2,'process','instruct') when /^0~(information)\s+(.+?)$/; header($1,$2,'process','instruct') when /^0~(contact)\s+(.+?)$/; header($1,$2,'process','instruct') when /^0~(rcs|cvs)\s+(.+?)$/; header('version',$2,'process','instruct') else nil end end def start_is_at meta=case @p when /^@(title):\s+(.+?)$/; header($1,@md.dc_title,'meta','dc') #dc 1 #when /^@(subtitle):\s+(.+?)$/; header($1,$2,'meta','extra') when /^@(creator|author):\s+(.+?)$/; header('creator',$2,'meta','dc') #dc 2 when /^@(subject):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 3 when /^@(description):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 4 when /^@(publisher):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 5 when /^@(contributor):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 6 when /^@(date):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 7 when /^@(date\.created):\s+(.+?)$/; header($1,$2,'meta','extra') when /^@(date\.issued):\s+(.+?)$/; header($1,$2,'meta','extra') when /^@(date\.available):\s+(.+?)$/; header($1,$2,'meta','extra') when /^@(date\.valid):\s+(.+?)$/; header($1,$2,'meta','extra') when /^@(date\.modified):\s+(.+?)$/; header($1,$2,'meta','extra') when /^@(type):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 8 when /^@(format):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 9 when /^@(identifier):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 10 when /^@(source):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 11 when /^@(language):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 12 when /^@(relation):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 13 when /^@(coverage):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 14 when /^@(rights):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 15 when /^@(keywords):\s+(.+?)$/; header($1,$2,'meta','extra') when /^@(copyright):\s+(.+?)$/; header($1,$2,'meta','extra') when /^@(translator|translated_by):\s+(.+?)$/; header('translator',$2) when /^@(illustrator|illustrated_by):\s+(.+?)$/; header('illustrator',$2) when /^@(prepared_by):\s+(.+?)$/; header($1,$2,'meta','extra') when /^@(digitized_by):\s+(.+?)$/; header($1,$2,'meta','extra') when /^@(comments?):\s+(.+?)$/; header($1,$2,'meta','extra') when /^@(abstract):\s+(.+?)$/; header($1,$2,'meta','extra') when /^@(tags?):\s+(.+?)$/; header($1,$2,'meta','extra') when /^@(catalogue):\s+(.+?)$/; header($1,$2,'meta','extra') when /^@(class(?:ify)?_loc):\s+(.+?)$/; header('classify_loc',$2,'meta','extra') when /^@(class(?:ify)?_dewey):\s+(.+?)$/; header('classify_dewey',$2,'meta','extra') when /^@(class(?:ify)?_pg):\s+(.+?)$/; header('classify_pg',$2,'meta','extra') when /^@(class(?:ify)?_isbn):\s+(.+?)$/; header('classify_isbn',$2,'meta','extra') when /^@(toc|structure):\s+(.+?)$/; header('structure',$2,'process','instruct') when /^@(level|page|markup):\s+(.+?)$/; header('markup',$2,'process','instruct') when /^@(bold):\s+(.+?)$/; header($1,$2,'process','instruct') when /^@(italics|itali[sz]e):\s+(.+?)$/; header('italicize',$2,'process','instruct') when /^@(vocabulary|wordlist):\s+(.+?)$/; header('vocabulary',$2,'process','instruct') when /^@(skin):\s+(.+?)$/; header($1,$2,'process','instruct') when /^@(css|stylesheet):\s+(.+?)$/; header('css',$2,'process','instruct') when /^@(links):\s+(.+?)$/; header($1,$2,'process','instruct') when /^@(prefix):\s+(.+?)$/; header($1,$2,'process','instruct') #add a & b when /^@(suffix):\s+(.+?)$/; header($1,$2,'process','instruct') when /^@(information):\s+(.+?)$/; header($1,$2,'process','instruct') when /^@(contact):\s+(.+?)$/; header($1,$2,'process','instruct') when /^@(rcs|cvs):\s+(.+?)$/; header('version',$2,'process','instruct') else nil end end def dublin out=if @p =~/^0~\S+\s/; start_is_zero elsif @p =~/^@\S+:[+-]?\s/; start_is_at else nil end end def meta out=if @p =~/^0~\S+\s/; start_is_zero elsif @p =~/^@\S+:[+-]?\s/; start_is_at else nil end end end end module SiSU_text_parts_flatfile class Split_text_object @@dl=nil attr_reader :format,:text,:ocn,:lev_para_ocn def initialize(md,para) @md,@para=md,para @format,@ocn='null','null' #@format,@ocn=nil,nil @@dl ||=SiSU_Env::Info_env.new.digest.length end def lev_segname_para_ocn if @para =~/^(\d~|<:.+?>).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>.*/ if /^([1-6])~(\S+)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para) @format,segname,@text,@ocn=$1,$2,$3,$4 @format="#@format~#{segname}" # elsif /^([1-6]~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para) @format,@text,@ocn=$1,$2,$3 elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para) @format,@text,@ocn=$1,$2,$3 elsif /^([1-6])~(\S+)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para) @@alt_id_count+=1 @format,segname,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}" @format="#@format~#{segname}" # elsif /^([1-6]~)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para) @@alt_id_count+=1 @format,@text,@ocn=$1,$2,"x#{@@alt_id_count}" end else if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para) @text,@ocn=$1,$2 end if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>|^$/ #added 2002w06 @text=/(.+?)/m.match(@para)[1] end if /^(\d)~\S*\s+(.+)/m.match(@para) @format,@text=$1,$2 end end @lev_para_ocn=if @para =~/.+<~\d+>/ #hmmm, watch Format::ParaSiSU.new(@md,@format,@text,@ocn) else Format::ParaSiSU.new(@md,@format,@text,'<~0>') end self end end end __END__