=begin * Name: SiSU * Description: a framework for document structuring, publishing and search * Author: Ralph Amissah * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah All Rights Reserved. * License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . If you have Internet connection, the latest version of the GPL should be available at these locations: * SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system * Hompages: * Download: * Ralph Amissah ** Description: simple xml representation (node style) =end module SiSU_simple_xml_model_node require "#{SiSU_lib}/defaults" require "#{SiSU_lib}/param" include SiSU_Param include SiSU_Viz require "#{SiSU_lib}/sysenv" include SiSU_Env require "#{SiSU_lib}/dal_syntax" require "#{SiSU_lib}/dal_doc_str" require "#{SiSU_lib}/shared_xml" require "#{SiSU_lib}/xml_format" include SiSU_XML_format include SiSU_XML_munge require "#{SiSU_lib}/rexml" include SiSU_Rexml @@alt_id_count,@@tablehead,@@number_of_cols=0,0,0 @@tablefoot='' class Convert @@fns=nil def initialize(opt) @opt=opt end def read begin @md=SiSU_Param::Parameters.new(@opt).get #bug, relies on info persistence, assumes -m has previously been run @env=SiSU_Env::Info_env.new(@opt.fns) path=@env.path.output_tell loc=@env.url.output_tell tool=if @opt.cmd =~/[MV]/; "#{Dir.pwd}/#{@md.fn[:sxn]}\n\t#{@env.program.xml_viewer} #{path}/#{@md.fnb}/#{@md.fn[:sxn]}" elsif @opt.cmd =~/v/; "#{@env.program.web_browser} #{Dir.pwd}/#{@md.fn[:sxn]}" else '' end SiSU_Screen::Ansi.new(@opt.cmd,'invert','XML Node',"#{@md.fns} -> #{@md.fn[:sxn]}").colorize unless @opt.cmd =~/q/ tell=SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{Dir.pwd}/#{@md.fn[:sxn]}") tell.flow if @opt.cmd =~/[MV]/ unless @@fns==@opt.fns @@fns=@opt.fns @@dal_array=[] end @dal_array=if @@dal_array.empty?; read_fnm else @@dal_array.dup #.dup #jokes on you end SiSU_simple_xml_model_node::Convert::Songsheet.new(@dal_array,@md,@env).songsheet rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error ensure #file closed in songsheet end end def read_fnm dal=[] if FileTest.file?("#{Dir.pwd}/#{@opt.fns}") dal=IO.readlines("#{Dir.pwd}/#{@opt.fns}","\n\n") else puts 'Error' end end private class Songsheet def initialize(data,md,dir) @data,@md,@env=data,md,dir end def songsheet begin SiSU_simple_xml_model_node::Convert::Scroll.new(@data,@md).songsheet SiSU_simple_xml_model_node::Convert::Tidy.new(@md,@env).xml if @md.cmd =~/[vVM]/ # test wellformedness, comment out when not in use SiSU_Rexml::Rexml.new(@md,@md.fn[:sxn]).xml if @md.cmd =~/M/ # test rexml parsing, comment out when not in use #debug rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error ensure end end end class Scroll require "#{SiSU_lib}/shared_txt" require "#{SiSU_lib}/css" include SiSU_text_utils @@xml={ :body=>[],:open=>[],:close=>[],:head=>[] } @@parent={ :ocn=>[],:node=>[] } @@offspring={ :ocn=>[],:node=>[] } @@current=nil def initialize(data='',md=nil) @data,@md=data,md @vz=SiSU_Env::Get_init.instance.skin @regx=/^(?:(?:<:p[bn]>\s*)?(?::?[A-C]~|\d~)(?:(\S+))?\s+)?(.+)/ @tab="\t" if @md @env=SiSU_Env::Info_env.new(@md.fns) @trans=SiSU_XML_munge::Trans.new(@md) end @sys=SiSU_Env::System_call.new @ocn=[] @node={ :ocn=>[],:no=>[] } end def songsheet pre markup post publish end protected def embedded_endnotes(para='') para.gsub!(/~\{(.+?)\}~/,'\1 ') para.gsub!(/~\[([*+])\s+(.+?)\]~/,'\2 ') end def xml_head(meta) txt=meta.text txt.gsub!(/\/{(.+?)}\//,'\1') txt.gsub!(/[*!]{(.+?)}[*!]/,'\1') txt.gsub!(/_{(.+?)}_/,'\1') txt.gsub!(/-{(.+?)}-/,'\1') txt.gsub!(//,'
') txt.gsub!(/ & /,' and ') @@xml[:head] <<< #{@tab*2}<#{meta.el}> #{@tab*3}#{txt} #{@tab*2} #{@tab} WOK end def xml_sc(md='') sc=if @md.sc_info < #{@md.sc_filename} #{@md.sc_number} #{@md.sc_date} WOK else '' end @@xml[:sc]=sc end def parent def node @@parent[:node] end def ocn @@parent[:ocn] end def prt_node parent.node.each_with_index { |a,n| puts "n.#{n} a.#{a}" unless n == 0 } #x == node end def prt_ocn parent.ocn.each_with_index { |a,n| puts "n.#{n} a.#{a}" unless n == 0 } #x == node end self end def offspring def node @@offspring=if parent.node @offspring=[] parent.node.each_with_index do |n,o| if n @offspring[n] ||=[] @offspring[n] << o end end @offspring else @@offspring end end self end def build_relationships(o='',lv='') if lv.to_s =~/[0-6]/ @node[:ocn][lv]=o.ocn @node[:no][lv]=o.node end if lv.to_s =~/^[1-6]/; @@current=lv end if o.node == 1 \ or lv == 1 @@parent[:node][o.node]=0 elsif @@current == lv \ and @@current !=nil if @node[:no][lv-1] != nil @@parent[:node][o.node]=@node[:no][lv-1] elsif @node[:no][lv-2] != nil @@parent[:node][o.node]=@node[:no][lv-2] elsif @node[:no][lv-3] != nil: @@parent[:node][o.node]=@node[:no][lv-3] else puts 'error' end elsif lv == nil if o.ocn.class == Fixnum \ and @@current @@parent[:node][o.node]=@node[:no][@@current] end else puts 'error' end if @@current == lv \ and @@current !=nil if @node[:ocn][lv-1] != nil @@parent[:ocn][o.ocn]=@node[:ocn][lv-1] elsif @node[:ocn][lv-2] != nil @@parent[:ocn][o.ocn]=@node[:ocn][lv-2] elsif @node[:ocn][lv-3] != nil: @@parent[:ocn][o.ocn]=@node[:ocn][lv-3] else puts 'error' end elsif lv == nil if o.ocn.class == Fixnum \ and @@current @@parent[:ocn][o.ocn]=@node[:ocn][@@current] end else puts 'error' end end def node_structure(o='',para='',lv='',hname='') #extracted endnotes if o.ocn lv=lv.to_i lv=nil if lv == 0 build_relationships(o,lv) end end def xml_structure(o='',para='',lv='',hname='') #extracted endnotes if o.ocn puts para if lv and @md.cmd =~/M/ lv=lv.to_i n=lv - 1 n3=lv + 2 lv=nil if lv == 0 embedded_endnotes(para) if para[@regx] paragraph="#{para[@regx,2]}" util=SiSU_text_utils::Wrap.new(paragraph,70) wrapped=util.line_wrap end if @md.cmd =~/[VM]/ if offspring.node[o.node] puts "#{o.node}::#{parent.node[o.node]}::[#{offspring.node[o.node].join(',')}]" else puts "#{o.node}::#{parent.node[o.node]}" end end @@xml[:body] << "#{@tab*0}" if para[@regx] #@@xml[:body] << "#{@tab*1}#{lv}" << "\n" if lv @@xml[:body] << "#{@tab*1}" << "\n" if para[@regx] if @md.mod.inspect =~/odf/ #condition not currently present, but consider @@xml[:body] << %{#{@tab*1}#{o.ocn}\n} end @@xml[:body] << if lv; %{#{@tab*1}\n#{@tab*2}#{wrapped}\n#{@tab*1}\n} << "\n" elsif wrapped =~/\A%%?\s+/; %{\n} # comments [not included, review] else %{#{@tab*1}\n#{@tab*2}#{wrapped}\n#{@tab*1}\n} # main text, contents, body KEEP end #@@xml[:body] << "#{@tab*1}\n#{@tab*2}#{wrapped}\n#{@tab*1}\n" # main text, contents, body KEEP @@xml[:body] << "#{@endnotes}" if @endnotes # main text, endnotes KEEP ##@@xml[:body] << "#{@tab*1}#{para[@regx,2]}\n" if para[@regx,2] # old unwrapped main text, contents, body KEEP @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << %{#{@tab*2}#{o.node}\n} @@xml[:body] << %{#{@tab*2}#{parent.node[o.node]}\n} if offspring.node[o.node] @@xml[:body] << %{#{@tab*2}#{offspring.node[o.node].join(',')}\n} end @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << "#{@tab*0}" << "\n" if para[@regx] @endnotes=[] end end def group_structure(o='',para='') para.gsub!(/<:group(?:-end)?>/,'') para.strip! @@xml[:body] << %{#{@tab*0}} if @md.mod.inspect =~/odf/ @@xml[:body] << %{#{@tab*1}#{o.ocn}\n} end @@xml[:body] << %{#{@tab*1}#{@tab*1}\n} @@xml[:body] << %{#{@tab*2}#{para}#{@tab*1}\n} @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << %{#{@tab*2}#{o.node}\n} @@xml[:body] << %{#{@tab*2}#{parent.node[o.node]}\n} if offspring.node[o.node] @@xml[:body] << %{#{@tab*2}#{offspring.node[o.node].join(',')}\n} end @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << "#{@tab*0}" end def poem_structure(o='',para='') para.gsub!(/<:verse(?:-end)?>/,'') para.strip! @@xml[:body] << %{#{@tab*0}} if @md.mod.inspect =~/odf/ @@xml[:body] << %{#{@tab*1}#{o.ocn}\n} end @@xml[:body] << %{#{@tab*1}#{@tab*1}\n} @@xml[:body] << %{#{@tab*2}#{para}#{@tab*1}\n} @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << %{#{@tab*2}#{o.node}\n} @@xml[:body] << %{#{@tab*2}#{parent.node[o.node]}\n} if offspring.node[o.node] @@xml[:body] << %{#{@tab*2}#{offspring.node[o.node].join(',')}\n} end @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << "#{@tab*0}" << "\n" end def code_structure(o='',para='') para.gsub!(/<:code(?:-end)?>/,'') para.strip! @@xml[:body] << %{#{@tab*0}} if @md.mod.inspect =~/odf/ @@xml[:body] << %{#{@tab*1}#{o.ocn}\n} end @@xml[:body] << %{#{@tab*1}#{@tab*1}\n} @@xml[:body] << %{#{@tab*2}#{para}#{@tab*1}\n} @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << %{#{@tab*2}#{o.node}\n} @@xml[:body] << %{#{@tab*2}#{parent.node[o.node]}\n} if offspring.node[o.node] @@xml[:body] << %{#{@tab*2}#{offspring.node[o.node].join(',')}\n} end @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << "#{@tab*0}" << "\n" end def table_structure(o='',table='') #tables @@xml[:body] << %{#{@tab*0}} if @md.mod.inspect =~/odf/ @@xml[:body] << %{#{@tab*1}#{o.ocn}\n} end @@xml[:body] << %{#{@tab*1}#{table}\n#{@tab*1}\n} # unless lv # main text, contents, body KEEP @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << %{#{@tab*2}#{o.node}\n} @@xml[:body] << %{#{@tab*2}#{parent.node[o.node]}\n} if offspring.node[o.node] @@xml[:body] << %{#{@tab*2}#{offspring.node[o.node].join(',')}\n} end @@xml[:body] << %{#{@tab*1}\n} @@xml[:body] << "#{@tab*0}" << "\n" #if para[@regx] @endnotes=[] end def markup data=[] @data=@data.join.split("\n\n") @data=SiSU_document_structure::Code.new(@md,@data).code @data.each do |para| data << SiSU_document_structure::Structure.new(@md,para).structure end data=Syntax::Markup.new(@md,data).songsheet data=SiSU_document_structure::Tables.new(@md,data).tables obj=SiSU_document_structure::OCN.new(@md,data).ocn obj.compact! data=nil dir=SiSU_Env::Info_env.new(@md.fns) xml_sc(@md) @endnotes,@level,@cont,@copen,@xml_contents_close=[],[],[],[],[] @rcdc=false (0..6).each { |x| @cont[x]=@level[x]=false } (4..6).each { |x| @xml_contents_close[x]='' } obj.each do |o| para=o.txt unless o.txt =~/^%% / #comments are lost, consider if para para=@trans.markup_light(para) @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 if para =~/\A(?:@|0~)(\S+?):?\s+(.+?)\Z/m # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta if d_meta; xml_head(d_meta) end end end end obj.each do |o| para=o.txt unless o.txt =~/^%% / #comments are lost, consider if para if @rcdc==false \ and (para =~/~metadata/ or para =~/^1~meta\s+Document Information/) if para !~/(^0~|^@\S+?:|^\s*$||)/ @rcdc=true end @sto=SiSU_text_parts::Split_text_object.new(@md,para).lev_segname_para unless @rcdc format_scroll=SiSU_XML_format::Format_scroll.new(@md,@sto.text) if @sto.format =~/i[12]|null/ case @sto.format when /^(1)~(\S+)?/ node_structure(o,para,$1,$2) when /^(2)~(\S+)?/ node_structure(o,para,$1,$2) when /^(3)~(\S+)?/ node_structure(o,para,$1,$2) when /^(4)~(\S+)?/ # work on see Split_text_object node_structure(o,para,$1,$2) when /^(5)~(\S+)?/ node_structure(o,para,$1,$2) when /^(6)~(\S+)?/ node_structure(o,para,$1,$2) else if para =~ /<:verse>/ node_structure(o,para) elsif para =~ /<:group>/ node_structure(o,para) elsif para =~ /<:code>/ node_structure(o,para) elsif para =~/|)/ @sto=SiSU_text_parts::Split_text_object.new(@md,para).lev_segname_para unless @rcdc format_scroll=SiSU_XML_format::Format_scroll.new(@md,@sto.text) if @sto.format =~/i[12]|null/ case @sto.format when /^(1)~(\S+)?/ xml_structure(o,para,$1,$2) para=@sto.lev_para_ocn.heading_body1 when /^(2)~(\S+)?/ xml_structure(o,para,$1,$2) para=@sto.lev_para_ocn.heading_body2 when /^(3)~(\S+)?/ xml_structure(o,para,$1,$2) para=@sto.lev_para_ocn.heading_body3 when /^(4)~(\S+)?/ # work on see Split_text_object xml_structure(o,para,$1,$2) para=@sto.lev_para_ocn.heading_body4 when /^(5)~(\S+)?/ xml_structure(o,para,$1,$2) para=@sto.lev_para_ocn.heading_body5 when /^(6)~(\S+)?/ xml_structure(o,para,$1,$2) para=@sto.lev_para_ocn.heading_body6 else if para =~ /<:verse>/ poem_structure(o,para) elsif para =~ /<:group>/ group_structure(o,para) elsif para =~ /<:code>/ para.gsub!(//,'>') code_structure(o,para) elsif para =~// \ and para =~/^(-\{{2}~\d+|)/ # -endnote para='' end if para =~/.*<:#>.*$/ para=case para when /<:i1>/ format_text=Format_text_object.new(para,'') format_text.scr_inden_ocn_e_no_paranum when /<:i2>/ format_text=Format_text_object.new(para,'') format_text.scr_inden_ocn_e_no_paranum end end if para !~/#{@vz.margin_txt_0}|#{@vz.margin_txt_1}|#{@vz.margin_txt_2}/ # i don't get the condition for no paranum end if para =~/<:center>/ one,two=/(.*)<:center>(.*)/.match(para)[1,2] format_text=Format_text_object.new(one,two) para=format_text.center end end para.gsub!(/<:\S+?>/,'') para.gsub!(//,'') ## Clean Prepared Text #bugwatch reinstate end end end 6.downto(4) do |x| y=x - 1; v=x - 3 @@xml[:body] << "#{@tab*5}\n#{@tab*y}\n" if @level[x] == true end 3.downto(1) do |x| y=x - 1 @@xml[:body] << "#{@tab*y}\n" if @level[x] == true end #6.downto(1) { |x| y=x - 1; @@xml[:body] << "#{@tab*y}\n" if @level[x] == true } end def pre rdf=SiSU_XML_tags::RDF.new(@md) dir=SiSU_Env::Info_env.new @@xml[:head],@@xml[:body]=[],[] css=SiSU_Env::CSS_select.new(@md).xml_sax encoding=if @sys.locale =~/utf-?8/i; '' else '' end @@xml[:open] =< #{rdf.comment_xml_node} WOK @@xml[:head] << "\n" @@xml[:body] << "\n" end def post @@xml[:head] << @@xml[:sc] @@xml[:head] << "\n" @@xml[:body] << "\n" @@xml[:close] = "\n" end def publish content=[] data=@data content << @@xml[:open] << @@xml[:head] << @@xml[:body] << @@xml[:metadata] content << @@xml[:owner_details] if @md.stmp =~/\w\w/ content << @@xml[:tail] << @@xml[:close] Output.new(content.to_s,@md).xml @@xml={} end end class Output include SiSU_Param include SiSU_Env def initialize(data,md) @data,@md=data,md end def xml @sisu=[] @data.each do |para| para.gsub!(/<:\S+?>/,'') para.gsub!(//,'') para="#{para}\n" unless para.empty? @sisu << para end new_file_data=@sisu.to_s @sisu=new_file_data.scan(/.+/) SiSU_Env::SiSU_file.new(@md).mkdir filename_sxm=SiSU_Env::SiSU_file.new(@md,@md.fn[:sxn]).mkfile_pwd if filename_sxm.class == File @sisu.each {|para| filename_sxm.puts para} filename_sxm.close else puts 'file not created, is directory writable?' end end end class Tidy def initialize(md,dir) @md,@env=md,dir @prog=SiSU_Env::Info_program.new end def xml if @prog.tidy !=false #note values can be other than true if @md.cmd =~/[VM]/ tell=SiSU_Screen::Ansi.new(@md.cmd,'invert','Using XML Tidy','check document structure') tell.colorize unless @md.cmd =~/q/ tell.grey_open unless @md.cmd =~/q/ tidyfile='/dev/null' #don't want one or screen output, check for alternative flags tidy=SiSU_Env::System_call.new("#{Dir.pwd}/#{@md.fn[:sxn]}",tidyfile) tidy.well_formed? tell.p_off unless @md.cmd =~/q/ end end end end end end __END__ Notes: ocn ocn are given to substantive text objects nodes nodes != ocn nodes are given to every text object (regardless of whether or not it is "substantive" and/or introduced by the editor just for structuring purposes) nodes are required for a node/tree based representation of content, e.g. a document mapping tool such as kdissert nodes are given to all structural divisions/headings whether or not they are given an ocn why arn't all structural divisions given an ocn? sometimes to structure documents correctly, it is necessary to introduce a dummy level, e.g. using a marker like 4~ [Preamble]-# these are heading items that are not in the original text, and do not warrant an ocn they do however require a node value [at other times the editor introduces a comment that is not to be recognised as a part of the original text] it is unofortunate that the nature of documents is such that ocn != nodes [that ocn should be the equivalent of nodes has been rejected] map parent x parent of 1 is either 0 or as in kdissert -1 (i prefer 0) offspring x..y siblings - gratuitous, x..y and therefore optional but might as well