aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/0.52/shared_txt.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/0.52/shared_txt.rb')
-rw-r--r--lib/sisu/0.52/shared_txt.rb299
1 files changed, 0 insertions, 299 deletions
diff --git a/lib/sisu/0.52/shared_txt.rb b/lib/sisu/0.52/shared_txt.rb
deleted file mode 100644
index bd0b41cc..00000000
--- a/lib/sisu/0.52/shared_txt.rb
+++ /dev/null
@@ -1,299 +0,0 @@
-=begin
- * Name: SiSU information Structuring Universe - Structured information, Serialized Units
- * Author: Ralph Amissah
- * http://www.jus.uio.no/sisu
- * http://www.jus.uio.no/sisu/SiSU/download.html
-
- * Description: modules shared by flatfile output generators
-
- * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah
-
- * License: GPL 2 or later
-
- Summary of GPL 2
-
- This program is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the Free
- Software Foundation; either version 2 of the License, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along
- with this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
-
- If you have Internet connection, the latest version of the GPL should be
- available at these locations:
- http://www.fsf.org/licenses/gpl.html
- http://www.gnu.org/copyleft/gpl.html
- http://www.jus.uio.no/sisu/gpl2.fsf
-
- SiSU was first released to the public on January 4th 2005
-
- SiSU uses:
-
- * Standard SiSU markup syntax,
- * Standard SiSU meta-markup syntax, and the
- * Standard SiSU object citation numbering and system
-
- © Ralph Amissah 1997, current 2007.
- All Rights Reserved.
-
- * Ralph Amissah: ralph@amissah.com
- ralph.amissah@gmail.com
-=end
-module SiSU_text_utils
- class Wrap
- def initialize(para='',n_char_max=76,n_indent=0,n_hang=nil)
- @para,@n_char_max,@n_indent=para,n_char_max,n_indent
- @br="\n"
- @n_hang=unless n_hang; @n_hang=@n_indent
- else n_hang
- end
- end
- def line_wrap
- space=' '
- spaces_indent,spaces_hang="#@br#{space*@n_indent}",space*@n_hang
- line=0
- out=[]
- out[line]=''
- #line=0,out,out[line]=0,[],''
- #@para.gsub!(/<br(?: \/)?>/,"\n") #watch #added
- words=@para.scan(/\S+/)
- while words != ''
- word=words.shift
- if not word
- out[line].strip!.squeeze!(' ') unless out[line].empty? #check
- break
- elsif (out[line].length + word.length) > (@n_char_max - @n_indent) and out[line] =~/\S+/
- out[line].strip!.squeeze!(' ')
- line += 1
- end
- out[line]="#{out[line]} #{word}" if word
- end
- out.join(spaces_indent).gsub(/\A\n+/m,'').insert(0,spaces_hang)
- end
- def line_wrap_indent1
- @n_indent,@n_hang=2,2
- line_wrap
- end
- def line_wrap_endnote
- @n_indent,@n_hang=4,2
- line_wrap
- end
- end
-#end
-#module SiSU_scan
- class Header_scan
- def initialize(md,para)
- @regxcl=/<~\d+;\w\d+;\w\d+><(?:[0-9a-f]{32}|[0-9a-f]{64}):(?:[0-9a-f]{32}|[0-9a-f]{64})>/
- para=para.gsub(@regxcl,'').dup
- @md,@p=md,para
- end
- def extract(tag,tag_content,type,attrib)
- dc=if dc_tag and dc_content
- [dc_tag,dc_content,{dc_tag=>dc_content}]
- else nil
- end
- end
- def header(tag,tag_content,type='',attrib='') #this will break stuff and must be tested thoroughly 20060825
- @tag,@tag_content,@type,@attrib=tag,tag_content,type,attrib
- def label #element
- @tag
- end
- def type
- @type
- end
- def text
- @tag_content
- end
- def info #element text
- @tag_content
- end
- def attribute
- @attrib
- end
- def element
- @tag
- end
- def attrib
- @attrib
- end
- def el
- @tag
- end
- self
- end
- def start_is_zero
- meta=case @p
- when /^0~(title)\s+(.+?)$/; header($1,@md.dc_title,'meta','dc') #dc 1
- #when /^0~(subtitle)\s+(.+?)$/; header($1,$2)
- when /^0~(creator|author)\s+(.+?)$/; header('creator',$2,'meta','dc') #dc 2
- when /^0~(subject)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 3
- when /^0~(description)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 4
- when /^0~(publisher)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 5
- when /^0~(contributor)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 6
- when /^0~(date)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 7
- when /^0~(date\.created)\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^0~(date\.issued)\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^0~(date\.available)\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^0~(date\.valid)\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^0~(date\.modified)\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^0~(type)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 8
- when /^0~(format)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 9
- when /^0~(identifier)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 10
- when /^0~(source)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 11
- when /^0~(language)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 12
- when /^0~(relation)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 13
- when /^0~(coverage)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 14
- when /^0~(rights)\s+(.+?)$/; header($1,$2,'meta','dc') #dc 15
- when /^0~(keywords)\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^0~(copyright)\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^0~(translator|translated_by)\s+(.+?)$/; header('translator',$2,'meta','extra')
- when /^0~(illustrator|illustrated_by)\s+(.+?)$/; header('illustrator',$2,'meta','extra')
- when /^0~(prepared_by)\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^0~(digitized_by)\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^0~(comments?)\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^0~(abstract)\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^0~(tags?)\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^0~(catalogue)\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^0~(class(?:ify)?_loc)\s+(.+?)$/; header('classify_loc',$2,'meta','extra')
- when /^0~(class(?:ify)?_dewey)\s+(.+?)$/; header('classify_dewey',$2,'meta','extra')
- when /^0~(class(?:ify)?_pg)\s+(.+?)$/; header('classify_pg',$2,'meta','extra')
- when /^0~(class(?:ify)?_isbn)\s+(.+?)$/; header('classify_isbn',$2,'meta','extra')
- when /^0~(toc|structure)\s+(.+?)$/; header('structure',$2,'meta','extra')
- when /^0~(toc|structure)\s+(.+?)$/; header('structure',$2,'proc','instruct')
- when /^0~(level|page|markup)\s+(.+?)$/; header('markup',$2,'process','instruct')
- when /^0~(bold)\s+(.+?)$/; header($1,$2,'process','instruct')
- when /^0~(italics|itali[sz]e)\s+(.+?)$/; header('italicize',$2,'process','instruct')
- when /^0~(vocabulary|wordlist)\s+(.+?)$/; header('vocabulary',$2,'process','instruct')
- when /^0~(skin)\s+(.+?)$/; header($1,$2,'process','instruct')
- when /^0~(css|stylesheet)\s+(.+?)$/; header('css',$2,'process','instruct')
- when /^0~(links)\s+(.+?)$/; header($1,$2,'process','instruct')
- when /^0~(prefix)\s+(.+?)$/; header($1,$2,'process','instruct')
- when /^0~(suffix)\s+(.+?)$/; header($1,$2,'process','instruct')
- when /^0~(information)\s+(.+?)$/; header($1,$2,'process','instruct')
- when /^0~(contact)\s+(.+?)$/; header($1,$2,'process','instruct')
- when /^0~(rcs|cvs)\s+(.+?)$/; header('version',$2,'process','instruct')
- else nil
- end
- end
- def start_is_at
- meta=case @p
- when /^@(title):\s+(.+?)$/; header($1,@md.dc_title,'meta','dc') #dc 1
- #when /^@(subtitle):\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^@(creator|author):\s+(.+?)$/; header('creator',$2,'meta','dc') #dc 2
- when /^@(subject):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 3
- when /^@(description):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 4
- when /^@(publisher):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 5
- when /^@(contributor):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 6
- when /^@(date):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 7
- when /^@(date\.created):\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^@(date\.issued):\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^@(date\.available):\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^@(date\.valid):\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^@(date\.modified):\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^@(type):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 8
- when /^@(format):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 9
- when /^@(identifier):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 10
- when /^@(source):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 11
- when /^@(language):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 12
- when /^@(relation):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 13
- when /^@(coverage):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 14
- when /^@(rights):\s+(.+?)$/; header($1,$2,'meta','dc') #dc 15
- when /^@(keywords):\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^@(copyright):\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^@(translator|translated_by):\s+(.+?)$/; header('translator',$2)
- when /^@(illustrator|illustrated_by):\s+(.+?)$/; header('illustrator',$2)
- when /^@(prepared_by):\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^@(digitized_by):\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^@(comments?):\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^@(abstract):\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^@(tags?):\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^@(catalogue):\s+(.+?)$/; header($1,$2,'meta','extra')
- when /^@(class(?:ify)?_loc):\s+(.+?)$/; header('classify_loc',$2,'meta','extra')
- when /^@(class(?:ify)?_dewey):\s+(.+?)$/; header('classify_dewey',$2,'meta','extra')
- when /^@(class(?:ify)?_pg):\s+(.+?)$/; header('classify_pg',$2,'meta','extra')
- when /^@(class(?:ify)?_isbn):\s+(.+?)$/; header('classify_isbn',$2,'meta','extra')
- when /^@(toc|structure):\s+(.+?)$/; header('structure',$2,'process','instruct')
- when /^@(level|page|markup):\s+(.+?)$/; header('markup',$2,'process','instruct')
- when /^@(bold):\s+(.+?)$/; header($1,$2,'process','instruct')
- when /^@(italics|itali[sz]e):\s+(.+?)$/; header('italicize',$2,'process','instruct')
- when /^@(vocabulary|wordlist):\s+(.+?)$/; header('vocabulary',$2,'process','instruct')
- when /^@(skin):\s+(.+?)$/; header($1,$2,'process','instruct')
- when /^@(css|stylesheet):\s+(.+?)$/; header('css',$2,'process','instruct')
- when /^@(links):\s+(.+?)$/; header($1,$2,'process','instruct')
- when /^@(prefix):\s+(.+?)$/; header($1,$2,'process','instruct') #add a & b
- when /^@(suffix):\s+(.+?)$/; header($1,$2,'process','instruct')
- when /^@(information):\s+(.+?)$/; header($1,$2,'process','instruct')
- when /^@(contact):\s+(.+?)$/; header($1,$2,'process','instruct')
- when /^@(rcs|cvs):\s+(.+?)$/; header('version',$2,'process','instruct')
- else nil
- end
- end
- def dublin
- out=if @p =~/^0~\S+\s/; start_is_zero
- elsif @p =~/^@\S+:[+-]?\s/; start_is_at
- else nil
- end
- end
- def meta
- out=if @p =~/^0~\S+\s/; start_is_zero
- elsif @p =~/^@\S+:[+-]?\s/; start_is_at
- else nil
- end
- end
- end
-end
-module SiSU_text_parts_flatfile
- class Split_text_object
- @@dl=nil
- attr_reader :format,:text,:ocn,:lev_para_ocn
- def initialize(md,para)
- @md,@para=md,para
- @format,@ocn='null','null'
- #@format,@ocn=nil,nil
- @@dl ||=SiSU_Env::Info_env.new.digest.length
- end
- def lev_segname_para_ocn
- if @para =~/^(\d~|<:.+?>).+?<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>.*/
- if /^([1-6])~(\S+)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
- @format,segname,@text,@ocn=$1,$2,$3,$4
- @format="#@format~#{segname}" #
- elsif /^([1-6]~)\s+(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
- @format,@text,@ocn=$1,$2,$3 #,$4
- elsif /<:(.+?)>\s*(\S.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
- @format,@text,@ocn=$1,$2,$3
- elsif /^([1-6])~(\S+)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
- @@alt_id_count+=1
- @format,segname,@text,@ocn=$1,$2,$3,"x#{@@alt_id_count}"
- @format="#@format~#{segname}" #
- elsif /^([1-6]~)\s+(\S.+?)<~(0);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
- @@alt_id_count+=1
- @format,@text,@ocn=$1,$2,"x#{@@alt_id_count}"
- end
- else
- if /(.+?)<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m.match(@para)
- @text,@ocn=$1,$2
- end
- if @para !~/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>|^$/ #added 2002w06
- @text=/(.+?)/m.match(@para)[1]
- end
- if /^(\d)~\S*\s+(.+)/m.match(@para)
- @format,@text=$1,$2
- end
- end
- @lev_para_ocn=if @para =~/.+<~\d+>/ #hmmm, watch
- Format::ParaSiSU.new(@md,@format,@text,@ocn)
- else Format::ParaSiSU.new(@md,@format,@text,'<~0>')
- end
- self
- end
- end
-end
-__END__
-