=begin * Name: SiSU information Structuring Universe - Structured information, Serialized Units * Author: Ralph Amissah * http://www.jus.uio.no/sisu * http://www.jus.uio.no/sisu/SiSU/download.html * Description: xml (dom style) output processing * Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah * License: GPL 2 or later Summary of GPL 2 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA If you have Internet connection, the latest version of the GPL should be available at these locations: http://www.fsf.org/licenses/gpl.html http://www.gnu.org/copyleft/gpl.html http://www.jus.uio.no/sisu/gpl2.fsf SiSU was first released to the public on January 4th 2005 SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system © Ralph Amissah 1997, current 2007. All Rights Reserved. * Ralph Amissah: ralph@amissah.com ralph.amissah@gmail.com * Notes: tidy -xml dom.xml >> index.tidy =end module SiSU_Docbook require "#{SiSU_lib}/defaults" require "#{SiSU_lib}/param" include SiSU_Param include SiSU_Viz require "#{SiSU_lib}/sysenv" include SiSU_Env require "#{SiSU_lib}/dal" require "#{SiSU_lib}/shared_xml" require "#{SiSU_lib}/xml_format" include SiSU_XML_format include SiSU_XML_munge require "#{SiSU_lib}/rexml" include SiSU_Rexml @@alt_id_count,@@tablehead,@@number_of_cols=0,0,0 @@tablefoot='' class Source def initialize(opt) @opt=opt end def read begin @md=SiSU_Param::Parameters.new(@opt).get @env=SiSU_Env::Info_env.new(@opt.fns) path=@env.path.output_tell loc=@env.url.output_tell tool=if @opt.cmd =~/[MV]/; "#{@env.program.web_browser} #{loc}/#{@md.fnb}/#{@md.fn[:docbook]}\n\t#{@env.program.xml_viewer} #{path}/#{@md.fnb}/#{@md.fn[:docbook]}" elsif @opt.cmd =~/v/; "#{@env.program.web_browser} #{loc}/#{@md.fnb}/#{@md.fn[:docbook]}" else '' end tell=SiSU_Screen::Ansi.new(@opt.cmd,'invert','XML DOM',tool) tell.colorize unless @opt.cmd =~/q/ tell=SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{@env.path.output_tell}/#{@md.fnb}/#{@md.fn[:docbook]}") tell.flow if @opt.cmd =~/[MV]/ @dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here SiSU_Docbook::Source::Songsheet.new(@dal_array,@md,@env).songsheet rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error ensure #file closed in songsheet end end private class Songsheet def initialize(data,md='',dir='') @data,@md,@env=data,md,dir end def songsheet begin SiSU_Docbook::Source::Scroll.new(@data,@md).songsheet SiSU_Docbook::Source::Tidy.new(@md,@env).xml if @md.cmd =~/[vVM]/i # test wellformedness, comment out when not in use SiSU_Rexml::Rexml.new(@md,@md.fn[:docbook]).xml if @md.cmd =~/M/ # test rexml parsing, comment out when not in use #debug rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error ensure end end end class Scroll Heading,Heading_close,Contents=[],[],[] Heading[0]='part' Heading[1]='part level="1"' Heading[2]='part level="2"' Heading[3]='part level="3"' Heading_close[1]=Heading[0] Heading_close[2]=Heading[0] Heading_close[3]=Heading[0] #Contents[0]='preface' Contents[1]='chapter' Contents[2]='sect1' Contents[3]='sect2' @@xml={ :body=>[],:open=>[],:close=>[],:head=>[],:sc=[] } @@dp=nil require "#{SiSU_lib}/shared_txt" include SiSU_text_utils def initialize(data='',md='') @data,@md=data,md @vz=SiSU_Env::Get_init.instance.skin @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern @regx=/^(?:(?:<:p[bn]>\s*)?\d~(?:(\S+))?\s+)?(.+?)\s*<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ @tab="\t" @trans=SiSU_XML_munge::Trans.new(@md) @sys=SiSU_Env::System_call.new end def songsheet pre markup post publish end protected def xml_markup(para='') para.gsub!(/~\{(\d+)\s+(.+?)\s*<#@dp>\}~/, '\1 \2 ') end def xml_head(meta) txt=meta.text txt.gsub!(//,'') txt.gsub!(/ & /,' and ') @@xml[:head] <<=if meta.type == 'meta' < #{@tab*2}#{txt} #{@tab} WOK else '' end end def xml_sc(md='') sc=if @md.sc_info < #{@md.sc_filename} #{@md.sc_number} #{@md.sc_date} WOK else '' end @@xml[:sc]=sc end def xml_element(lv='',ocn='',para='',hname='',tag='',xml_element='') lv=lv.to_i n=lv - 1 n1=lv n2=lv + 1 n3=lv + 2 v=lv - 3 tag='' tag="\n#{@tab*n3}#{hname}\n" if hname @@xml[:body] <<<#{para[@regx, 2]} WOK if lv == 4 @copen[1]=true @copen[2]=@copen[3]=false elsif lv == 5 @copen[2]=true @copen[3]=false elsif lv == 6 @copen[3]=true end end def xml_structure(lv='',ocn='',para='',hname='' ) lv=lv.to_i n=lv - 1 n1=lv n2=lv + 1 n3=lv + 2 v=lv - 3 tag='' tag="\n#{@tab*n3}#{hname}\n" if hname !=nil #if para[@regx] # paragraph="#{para[@regx, 2]}" # util=SiSU_text_utils::Paragraph.new(paragraph, 70) # wrapped=util.line_wrap #end case lv when 1..3 xml_element="<#{Heading[lv]}>" 3.downto(lv) do |x| y=x - 1 @cont[1]=false if @cont[1] @cont[2]=false if @cont[2] @cont[3]=false if @cont[3] ####### attempt to close contents if @copen[3] # 6~ [3,2,1].each do |v| @@xml[:body] << "#{@tab*n}\n" end @copen[1]=@copen[2]=@copen[3]=false elsif @copen[2] # 5~ [2,1].each do |v| @@xml[:body] << "#{@tab*n}\n" end @copen[1]=@copen[2]=@copen[3]=false elsif @copen[1] # 4~ [1].each do |v| @@xml[:body] << "#{@tab*n}\n" end @copen[1]=@copen[2]=@copen[3]=false end @@xml[:body] << "#{@tab*y}\n" if @level[x] @level[x]=false end when 4..6 6.downto(lv) do |x| y=x - 1 if @level[x] == true u=x - 3; @xml_contents_close[x]='' end end cv=lv - 3 if para =~/^4~\S+/ m=/^4~(\S+)/.match(para)[1] id=if m =~/^\d+$/; 'ch' + m else 'ch_' + m end elsif para =~/^5~\S+/ m=/^5~(\S+)/.match(para)[1] id= 'sec_' + m elsif para =~/^6~\S+/ m=/^6~(\S+)/.match(para)[1] id= 'subsec_' + m else '' end xml_element=%{<#{Contents[cv]} id="#{id}">} #hmmm gsub were it possible case lv when 4 if @copen[3] == true # 6~ [3,2,1].each do |v| @@xml[:body] << "#{@tab*n}\n" end elsif @copen[2] == true # 5~ [2,1].each do |v| @@xml[:body] << "#{@tab*n}\n" end elsif @copen[1] == true # 4~ [1].each do |v| @@xml[:body] << "#{@tab*n}\n" end end @cont[1]=true when 5 if @copen[3] == true #6~ [3,2].each do |v| @@xml[:body] << "#{@tab*n}\n" end elsif @copen[2] == true #5~ [2].each do |v| @@xml[:body] << "#{@tab*n}\n" end end @cont[2]=true when 6 [3].each do |v| @@xml[:body] << "#{@tab*n}\n" if @copen[3] #watch should possibly be outside... end @cont[3]=true end end xml_element(lv,ocn,para,hname,tag,xml_element) @level[lv]=true ((lv+1)..6).each { |x| @level[x]=false } end def group_structure(para='',ocn='') para.gsub!(/<:group(?:-end)?>/,'') para.strip! @@xml[:body] << %{#{@tab*7}#{@tab*1}\n} @@xml[:body] << %{#{@tab*8}#{para}#{@tab*1}\n} @@xml[:body] << %{#{@tab*7}\n} end def poem_structure(para='',ocn='') para.gsub!(/<:verse(?:-end)?>/,'') para.strip! @@xml[:body] << %{#{@tab*7}#{@tab*1}\n} @@xml[:body] << %{#{@tab*8}#{para}#{@tab*1}\n} @@xml[:body] << %{#{@tab*7}\n} end def code_structure(para='',ocn='') para.gsub!(/<:code(?:-end)?>/,'') para.strip! @@xml[:body] << %{#{@tab*7}#{@tab*1}\n} @@xml[:body] << %{#{@tab*8}#{para}#{@tab*1}\n} @@xml[:body] << %{#{@tab*7}\n} end #def table_structure(table='',ocn='') #tables # @@xml[:body] << %{#{@tab*1}#{table}\n#{@tab*1}\n} # unless lv # main text, contents, body KEEP #{ocn} # @endnotes=[] #end def tidywords(wordlist) wordlist.each do |x| x.gsub!(/&/,'&') unless x =~/&\S+;/ end end def markup data=@data dir=SiSU_Env::Info_env.new(@md.fns) xml_sc(@md) @rcdc=false @level,@cont,@copen,@xml_contents_close=[],[],[],[] (0..6).each { |x| @cont[x]=@level[x]=false } (4..6).each { |x| @xml_contents_close[x]='' } data.each do |para| wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 para=tidywords(wordlist).join(' ').strip para.gsub!(/<[-~]#>/,'') para.gsub!(/<0;\w\d+;[um]\d+><#@dp:#@dp>/,'') para.gsub!(/<:pb>\s*/,'') para.gsub!(/\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|ftp):\/\/\S+|image)/, %{}) #para.gsub!(/\{(\S+?\.png) \d+x\d+ \".+?\" \}(?:http:\/\/\S+|image)/,'\1') para.gsub!(/ /,' ') @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 if para =~/^0~(\S+)\s+(.+?)\Z/m # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta if d_meta; xml_head(d_meta) end end @rcdc=true if @rcdc ==false and (para =~/^\d~metadata/ or para =~/^1~\s+Document Information/) if para !~/(^0~||)/ if para =~/.+?<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ paranum=para[@regx, 3] @p_num=SiSU_XML_format::Paragraph_number.new(@md,paranum) end @sto=SiSU_text_parts::Split_text_object.new(@md,para).lev_segname_para_ocn ### problem in scroll, it appears tables are getting paragraph numbers unless @rcdc m=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ if para =~m format_scroll=SiSU_XML_format::Format_scroll.new(@md,@sto.text) if @sto.format =~/i[12]|null/ case @sto.format when /^(1)~(?:(\S+))?/ xml_markup(para) xml_structure($1,@sto.ocn,para,$2) para=@sto.lev_para_ocn.heading_body1 when /^(2)~(?:(\S+))?/ xml_markup(para) xml_structure($1,@sto.ocn,para,$2) para=@sto.lev_para_ocn.heading_body2 when /^(3)~(?:(\S+))?/ xml_markup(para) xml_structure($1,@sto.ocn,para,$2) para=@sto.lev_para_ocn.heading_body3 when /^(4)~(\S+)/ # work on see SiSU_text_parts::Split_text_object xml_markup(para) xml_structure($1,@sto.ocn,para,$2) para=@sto.lev_para_ocn.heading_body4 when /^(5)~(?:(\S+))?/ xml_markup(para) xml_structure($1,@sto.ocn,para,$2) para=@sto.lev_para_ocn.heading_body5 when /^(6)~(?:(\S+))?/ xml_markup(para) xml_structure($1,@sto.ocn,para,$2) para=@sto.lev_para_ocn.heading_body6 #when /^(i1)$/ # #format_scroll.gsubBody # #para=@sto.lev_para_ocn.scrIndent1 #when /^(i2)$/ # format_scroll.gsubBody # para=@sto.lev_para_ocn.scrIndent2 #when /^(center)$/ # para.gsub!(/(.+)/, # %{
(\\1)
}) # para=@sto.lev_para_ocn.scrPara #when /^(b|bold)$/ # para.gsub!(/(.+)/, # %{(\\1)}) # para=@sto.lev_para_ocn.scrPara #when /null/ # see whether u can improve # if (para !~/#{@margin.txt_0}|#{@margin.txt_1}|#{@margin.txt_2}/) # #format_scroll.gsubBody # #para=@sto.lev_para_ocn.scrPara # end else matched=/<~(\d+);(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/mi.match(para) stamp,ocn=matched[0],matched[1] if para =~ /<:verse>/ para.gsub!(/#{stamp}/,'') poem_structure(para,ocn) elsif para =~ /<:group>/ para.gsub!(/#{stamp}/,'') group_structure(para,ocn) elsif para =~ /<:code>/ para.gsub!(/#{stamp}/,'') code_structure(para,ocn) elsif para =~/#{para[@regx, 2]}\n" if para[@regx, 2] # main text, contents, body KEEP ocn = #{para[@regx, 3]} == #{ocn} end end elsif para =~/(Note|Endnotes?)/ and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ #format_scroll=MonoSiSU.new('
Note') #para=format_scroll.boldPara elsif para =~/(MetaData)/ and para =~/<~\d+;[m]\d+;\w\d+><#@dp:#@dp>$/ #debug 2003w46 add rc info format_scroll=Format_scroll.new(@md,'
MetaData') para=format_scroll.bold_para elsif para =~/(Owner Details)/ and para !~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><#@dp:#@dp>$/ format_scroll=Format_scroll.new(@md,'
Owner Details') @@xml[:owner_details]=format_scroll.bold_para para='' elsif para =~/(.*)<:#>(.*)/ one, two=$1,$2 format_text=Format_text_object.new(one,two) para=format_text.seg_no_paranum end para='' if para =~// and para =~/^(-\{{2}~\d+|)/ # -endnote if para =~/.*<:#>.*$/ para=case para when /<:i1>/ format_text=Format_text_object.new(para,'') format_text.scr_inden_ocn_e_no_paranum when /<:i2>/ format_text=Format_text_object.new(para,'') format_text.scr_inden_ocn_e_no_paranum end end if para =~/<:center>/ one, two=/(.*)<:center>(.*)/.match(para)[1,2] format_text=Format_text_object.new(one, two) para=format_text.center end else end para.gsub!(/<:\S+?>/,'') para.gsub!(//,' ') end end @content_flag=true 6.downto(4) do |x| y=x - 1; v=x - 3 if @level[x] == true #2004w36 bug fix? watch/test previous logic broke on free.for.all @coontent_flag introduced if @content_flag==true @@xml[:body] << "\n#{@tab*y}\n" @content_flag=false else @@xml[:body] << "\n#{@tab*y}\n" end end end 3.downto(1) do |x| y=x - 1 @@xml[:body] << "#{@tab*y}\n" if @level[x] == true end end def pre rdf=SiSU_XML_tags::RDF.new(@md) dir=SiSU_Env::Info_env.new css=SiSU_Env::CSS_select.new(@md).docbook_xml encoding=' ' #encoding='' @@xml[:open] =< #{rdf.comment_xml} WOK @@xml[:head] << "\n" end def post @@xml[:head] << @@xml[:sc] @@xml[:head] << "\n" @@xml[:close] = "\n" end def publish content=[] data=@data content << @@xml[:open] << @@xml[:head] << @@xml[:body] << @@xml[:metadata] content << @@xml[:owner_details] if @md.stmp =~/\w\w/ content << @@xml[:tail] << @@xml[:close] Output.new(content.to_s,@md).xml @@xml[:head],@@xml[:body],@@xml[:tail]=[],[],[] end end class Output include SiSU_Param def initialize(data,md) @data,@md=data,md end def xml @sisu=[] @data.each do |para| para.gsub!(/<:\S+?>/,'') para.gsub!(//,'') para="#{para}\n" unless para.empty? @sisu << para end new_file_data=@sisu.to_s @sisu=new_file_data.scan(/.+/) SiSU_Env::SiSU_file.new(@md).mkdir filename_xml=SiSU_Env::SiSU_file.new(@md,@md.fn[:docbook]).mkfile @sisu.each {|para| filename_xml.puts para} filename_xml.close end end class Tidy def initialize(md,dir) @md,@env=md,dir @prog=SiSU_Env::Info_program.new end def xml if @prog.tidy !=false if @md.cmd =~/[VM]/ tell=SiSU_Screen::Ansi.new(@md.cmd,'invert','Using XML Tidy','check document structure') tell.colorize unless @md.cmd =~/q/ tell.grey_open unless @md.cmd =~/q/ tidyfile='/dev/null' #don't want one or screen output, check for alternative flags tidy=SiSU_Env::System_call.new("#{@env.path.output}/#{@md.fnb}/#{@md.fn[:docbook]}",tidyfile) tidy.well_formed? tell.p_off unless @md.cmd =~/q/ end end end end end end __END__