=begin * Name: SiSU information Structuring Universe - Structured information, Serialized Units * Author: Ralph Amissah * http://www.jus.uio.no/sisu * http://www.jus.uio.no/sisu/SiSU/download.html * Description: xml output (sax style) processing * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah * License: GPL 2 or later Summary of GPL 2 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA If you have Internet connection, the latest version of the GPL should be available at these locations: http://www.fsf.org/licenses/gpl.html http://www.gnu.org/copyleft/gpl.html http://www.jus.uio.no/sisu/gpl2.fsf SiSU was first released to the public on January 4th 2005 SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system © Ralph Amissah 1997, current 2007. All Rights Reserved. * Ralph Amissah: ralph@amissah.com ralph.amissah@gmail.com * Notes: tidy -xml sax.xml >> index.tidy =end module SiSU_document_structure require "#{SiSU_lib}/dal_doc_str_tables" require "#{SiSU_lib}/dal_doc_str_code" class Structure def initialize(md,para) @md,@para=md,para end def structure structure_markup_normalize structure_markup @para end def structure_markup @para=unless @para =~/[0-6]~/ @para=case @para when /^\s*#{@md.lv1}/; @para.sub!(/(?:<[:!]1!?>\s*)?(.+)/,'1~ \1') if @para !~/^1~/ when /^\s*#{@md.lv2}/; @para.sub!(/(?:<[:!]2!?>\s*)?(.+)/,'2~ \1') if @para !~/^2~/ when /^\s*#{@md.lv3}/; @para.sub!(/(?:<[:!]3!?>\s*)?(.+)/,'3~ \1') if @para !~/^3~/ when /^\s*#{@md.lv4}/; @para.sub!(/(?:<[:!]4!?>\s*)?(.+)/,'4~ \1') if @para !~/^4~/ when /^\s*#{@md.lv5}/; @para.sub!(/(?:<[:!]5!?>\s*)?(.+)/,'5~ \1') if @para !~/^5~/ when /^\s*#{@md.lv6}/; @para.sub!(/(?:<[:!]6!?>\s*)?(.+)/,'6~ \1') if @para !~/^6~/ else @para end else @para end end def structure_markup_normalize para=if @md.markup_version.to_f >= 0.38 #%convert internal representation, consider making 0.38 structure default ([A-C1-6] instead of [1-9]), requires downstream changes #keep and implement, requires downstream changes: #@para.gsub!(/^6~/,'9~') #@para.gsub!(/^5~/,'8~') #@para.gsub!(/^4~/,'7~') @para.gsub!(/^[456]~/,'!_') @para.gsub!(/^3~/,'6~') @para.gsub!(/^2~/,'5~') @para.gsub!(/^1~/,'4~') @para.gsub!(/^:?C~/,'3~') @para.gsub!(/^:?B~/,'2~') @para.gsub!(/^:?A~/,'1~') @para=if @para =~/^@(?:level|markup):\s/ @para.gsub!(/3/,'6') @para.gsub!(/2/,'5') @para.gsub!(/1/,'4') @para.gsub!(/:?C/,'3') @para.gsub!(/:?B/,'2') @para.gsub!(/:?A/,'1') @para else @para end else @para end end end class Struct def initialize(o) @o=o end def structure def txt @o[:txt] end def node @o[:node] end def ocn @o[:ocn] end def lv @o[:lv] end def type @o[:type] end self end end class OCN def initialize(md,data) @md,@data=md,data end def ocn #and auto segment numbering increment data=@data @o_array=[] node=ocn=ocn_dv=ocn_sp=ocnh=ocnh1=ocnh2=ocnh3=ocnh4=ocnh5=ocnh6=ocno=ocnp=ocnt=ocnc=ocng=ocni=ocnm=ocnu=ocnk=0 # h heading, o other, t table, g group, i image number_small,letter_small=0,0 letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z ) node_count_flag=false headings='' #where headings omitted an alternative form of ocn heading numbering is required for html and other linking... #headings=if @md.ocn.inspect =~/skip=headings/; '^(?:[A-C]|[1-9])~\S* |' #else '' #end regex_exclude_ocn_and_node = /#{headings}^%{1,4}\s|^@\S+?:\s|^0~|^4~endnotes|^<\/center>|<:ee>|<:e[:_]>|^\^~ |<:e[:_]\d+?>|^<:p[bn]>|^<:\#|<:- |<[:!]!4|||||<\/tr>|
|\[endnotes\]|<:zz>|<:isbn-|<:journal-|<:conference-|/i #ocn here #  added with Tune.code #¡ regex_exclude_ocn = /^(?:alt|code|group|poem|table)\{|^\}(?:alt|code|group|poem|table)|^\}table$/ #ocn here #  added with Tune.code #¡ data.each do |para| o={} if para =~/\w|\S|<|\(/ if para !~ regex_exclude_ocn_and_node if node_count_flag or para=~/^1~/ node_count_flag=true end node+=1 if node_count_flag if para !~ regex_exclude_ocn # regex_exclude_large previously excluded unless para=~/<:#>|~#|-#/ # |^\s*\*\s*\*\s*\*\s*$ <-consider leaving un-numbered ocn+=1 if para=~/^[1-8]~(?:\s+|\S)/ or para =~@md.lv1 or para =~@md.lv2 or para =~@md.lv3 or para =~@md.lv4 or para =~@md.lv5 or para =~@md.lv6 ocnh+=1 if para=~/^1~(?:\s+|\S)/ or para =~@md.lv1; ocnh1+=1 ocn_dv,ocn_sp="1:#{ocnh1}","h#{ocnh}" elsif para=~/^2~(?:\s+|\S)/ or para =~@md.lv2; ocnh2+=1 ocn_dv,ocn_sp="2:#{ocnh2}","h#{ocnh}" elsif para=~/^3~(?:\s+|\S)/ or para =~@md.lv3; ocnh3+=1 ocn_dv,ocn_sp="3:#{ocnh3}","h#{ocnh}" elsif para=~/^4~(?:\s+|\S)/ or para =~@md.lv4; ocnh4+=1 ocn_dv,ocn_sp="4:#{ocnh4}","h#{ocnh}" elsif para=~/^5~(?:\s+|\S)/ or para =~@md.lv5; ocnh5+=1 ocn_dv,ocn_sp="5:#{ocnh5}","h#{ocnh}" elsif para=~/^6~(?:\s+|\S)/ or para =~@md.lv6; ocnh6+=1 ocn_dv,ocn_sp="6:#{ocnh6}","h#{ocnh}" end else ocno+=1 if para=~//; ocnt+=1 #tables ocn_dv,ocn_sp="o#{ocno}","t#{ocnt}" elsif para=~/^<:code>/; ocnc+=1 ocn_dv,ocn_sp="o#{ocno}","c#{ocnc}" elsif para=~/^<:(?:group|alt|verse)>/; ocng+=1 ocn_dv,ocn_sp="o#{ocno}","g#{ocng}" elsif para=~/\{\S+?\.(?:png|jpg|gif)\s+/m; ocni+=1 ocn_dv,ocn_sp="o#{ocno}","i#{ocni}" else ocnp+=1 ocn_dv,ocn_sp="o#{ocno}","p#{ocnp}" end end o[:txt],o[:node],o[:ocn],o[:lv],o[:type]=para,node,ocn,ocn_dv,ocn_sp else ocnu+=1 #if para=~/-#/ #if implemented would remove need to keep <-#> # ocnk+=1 # ocn_dv,ocn_sp="k#{ocnk}","u#{ocnu}" #else # ocn_dv,ocn_sp="u#{ocnu}","u#{ocnu}" #end #para.gsub!(/<~#>|<-#>/,'') if para #get rid of need para.gsub!(/<~#>/,'') if para ocn_dv,ocn_sp="u#{ocnu}","u#{ocnu}" o[:txt],o[:node],o[:ocn],o[:lv],o[:type]=para,node,0,ocn_dv,ocn_sp end else o[:txt],o[:node],o[:ocn],o[:lv],o[:type]=para,node,nil,nil,nil end else para=if para !~/^%{1,4}\s/ o[:txt],o[:node],o[:ocn],o[:lv],o[:type]=para,node,nil,nil,nil else '' end end end para.gsub!(/\n\n/,"\n") if para =~/<:(?:code|verse|alt|group)>/ #newlines taken out para.gsub!(/(<:(?:code-end)>)/,"\n\\1") if para =~/<:(?:code-end)>/ #newlines added check if para =~//,"\\1~#{o[:ocn]};#{o[:lv]};#{o[:type]}>") end @o_array << Struct.new(o).structure if o end @o_array end end class Code < SiSU_document_structure_code::Code end class Tables < SiSU_document_structure_tables::Tables end end __END__