diff options
author | Ralph Amissah <ralph@amissah.com> | 2014-01-26 02:22:02 -0500 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2014-01-26 02:31:54 -0500 |
commit | 506e32633838b4daf9ab566c9da083329212f219 (patch) | |
tree | ef48a6985ce663aa3d4d62037f232b2286422979 /lib/sisu/v6/sst_to_s_xml_sax.rb | |
parent | v5 v6: version & changelog (& rakefile), make true on next commit (diff) |
v5 v6: made true, branches: v6 development; v5 stable; v4 closedsisu_5.3.0
Diffstat (limited to 'lib/sisu/v6/sst_to_s_xml_sax.rb')
-rw-r--r-- | lib/sisu/v6/sst_to_s_xml_sax.rb | 461 |
1 files changed, 461 insertions, 0 deletions
diff --git a/lib/sisu/v6/sst_to_s_xml_sax.rb b/lib/sisu/v6/sst_to_s_xml_sax.rb new file mode 100644 index 00000000..a82ce5cc --- /dev/null +++ b/lib/sisu/v6/sst_to_s_xml_sax.rb @@ -0,0 +1,461 @@ +# encoding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Ralph Amissah, + All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see <http://www.gnu.org/licenses/>. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + <http://www.fsf.org/licensing/licenses/gpl.html> + <http://www.gnu.org/licenses/gpl.html> + + <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html> + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + <http://www.jus.uio.no/sisu> + <http://www.sisudoc.org> + + * Download: + <http://www.sisudoc.org/sisu/en/SiSU/download.html> + + * Git + <http://sources.sisudoc.org/gitweb/?p=code/sisu.git;a=summary> + <http://sources.sisudoc.org/?p=code/sisu.git;a=blob;f=lib/sisu/v6/sst_to_s_xml_sax.rb;hb=HEAD> + + * Ralph Amissah + <ralph@amissah.com> + <ralph.amissah@gmail.com> + + ** Description: simple xml representation (sax style) + +=end +module SiSU_SimpleXML_ModelSax + require_relative 'particulars' # particulars.rb + include SiSU_Particulars + require_relative 'defaults' # defaults.rb + include SiSU_Viz + require_relative 'param' # param.rb + include SiSU_Param + require_relative 'sysenv' # sysenv.rb + include SiSU_Env + require_relative 'ao_doc_str' # ao_doc_str.rb + require_relative 'xml_shared' # xml_shared.rb + include SiSU_XML_Munge + require_relative 'shared_sem' # shared_sem.rb + require_relative 'xml_format' # xml_format.rb + include SiSU_XML_Format + require_relative 'rexml' # rexml.rb + include SiSU_Rexml + @@alt_id_count,@@tablehead,@@number_of_cols=0,0,0 + @@tablefoot='' + class Convert + @@fns=nil + def initialize(opt) + @opt=opt + @particulars=SiSU_Particulars::CombinedSingleton.instance.get_env_md(opt) + end + def read + begin + @md=@particulars.md #bug, relies on info persistence, assumes -m has previously been run + @env=@particulars.env + SiSU_Screen::Ansi.new(@opt.act[:color_state][:set],'invert','XML SAX',"#{@md.fns} -> #{@md.fn[:sxs]}").colorize unless @opt.act[:quiet][:set]==:on + if (@opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) + SiSU_Screen::Ansi.new(@opt.act[:color_state][:set],@opt.fns,"#{Dir.pwd}/#{@md.fn[:sxs]}").flow + end + unless @@fns==@opt.fns + @@fns=@opt.fns + @@fns_array=[] + end + @fns_array=if @@fns_array.empty?; read_fnm + else @@fns_array.dup #check + end + SiSU_SimpleXML_ModelSax::Convert::Songsheet.new(@fns_array,@particulars).songsheet + rescue + SiSU_Errors::Rescued.new($!,$@,@opt.cmd,@opt.fns).location do + __LINE__.to_s + ':' + __FILE__ + end + ensure #file closed in songsheet + end + end + def read_fnm + ao=[] + if FileTest.file?("#{Dir.pwd}/#{@opt.fns}") + ao=IO.readlines("#{Dir.pwd}/#{@opt.fns}","\n\n") + else STDERR.puts 'Error' + end + end + private + class Songsheet + def initialize(data,particulars) + @data,@particulars,@env,@md=data,particulars,particulars.env,particulars.md + end + def songsheet + begin + SiSU_SimpleXML_ModelSax::Convert::Scroll.new(@data,@particulars).songsheet + if (@md.opt.act[:verbose][:set]==:on \ + || @md.opt.act[:verbose_plus][:set]==:on \ + || @md.opt.act[:maintenance][:set]==:on) + SiSU_SimpleXML_ModelSax::Convert::Tidy.new(@md,@env).xml # test wellformedness, comment out when not in use + end + SiSU_Rexml::Rexml.new(@md,@md.fn[:sxs]).xml if @md.opt.act[:maintenance][:set]==:on # test rexml parsing, comment out when not in use #debug + rescue + SiSU_Errors::Rescued.new($!,$@,@md.opt.cmd,@md.fns).location do + __LINE__.to_s + ':' + __FILE__ + end + ensure + end + end + end + class Scroll + require_relative 'txt_shared' # txt_shared.rb + require_relative 'css' # css.rb + include SiSU_TextUtils + @@xml={ body: [], open: [], close: [], head: [] } + def initialize(data='',particulars='') + @data,@env,@md=data,particulars.env,particulars.md + @vz=SiSU_Viz::Defaults.new + @regx=/^(?:#{Mx[:mk_o]}:p[bn]#{Mx[:mk_c]}\s*)?(?:#{Mx[:lv_o]}[1-9]:(\S*)#{Mx[:lv_c]})?(.+)/ + @tab="\t" + if @md + @trans=SiSU_XML_Munge::Trans.new(@md) + end + @sys=SiSU_Env::SystemCall.new + end + def songsheet + pre + markup + post + publish + end + protected + def embedded_endnotes(para='') + para.gsub!(/~\{(.+?)\}~/,'<endnote symbol="norm">\1</endnote> ') + para.gsub!(/~\[([*+])\s+(.+?)\]~/,'<endnote symbol="\1">\2</endnote> ') + end + def xml_head(meta) + txt=meta.text + txt.gsub!(/\/{(.+?)}\//,'<i>\1</i>') + txt.gsub!(/[*!]{(.+?)}[*!]/,'<b>\1</b>') + txt.gsub!(/_{(.+?)}_/,'<u>\1</u>') + txt.gsub!(/-{(.+?)}-/,'<del>\1</del>') + txt.gsub!(/<br(?: \/)?>/,'<br />') + txt.gsub!(/ & /,' and ') + @@xml[:head] <<<<WOK +#{@tab}<header class="#{meta.attrib}"> +#{@tab*2}<#{meta.el}> +#{@tab*3}#{txt} +#{@tab*2}</#{meta.el}> +#{@tab}</header> +WOK + end + def xml_sc(md='') + sc=if @md.sc_info + <<WOK + <source_control> + <sc class="sourcefile"> + #{@md.sc_filename} + </sc> + <sc class="number"> + #{@md.sc_number} + </sc> + <sc class="date"> + #{@md.sc_date} + </sc> + </source_control> +WOK + else '' + end + @@xml[:sc]=sc + end + def xml_structure(para='',lv='',hname='') #extracted endnotes + lv=lv.to_i + lv=nil if lv==0 + embedded_endnotes(para) + if para[@regx] + paragraph="#{para[@regx,2]}" + util=SiSU_TextUtils::Wrap.new(paragraph,70) + wrapped=util.line_wrap + end + @@xml[:body] << "#{@tab*0}<object>" if para[@regx] + @@xml[:body] << "#{@tab*1}" << "\n" if para[@regx] + @@xml[:body] << if lv; %{#{@tab*1}<text class="h#{lv}">\n#{@tab*2}#{wrapped}\n#{@tab*1}</text>\n} << "\n" + elsif wrapped =~/\A%%?\s+/; %{<!--\n#{@tab*1}<text class="comment">\n#{@tab*2}#{wrapped}\n#{@tab*1}</text>\n-->\n} # comments + else %{#{@tab*1}<text class="norm">\n#{@tab*2}#{wrapped}\n#{@tab*1}</text>\n} # main text, contents, body KEEP + end + @@xml[:body] << "#{@endnotes}" if @endnotes # main text, endnotes KEEP + @@xml[:body] << "#{@tab*0}</object>" << "\n" if para[@regx] + @endnotes=[] + end + def block_structure(para='') + para.gsub!(/<:block(?:-end)?>/,'') + para.strip! + @@xml[:body] << %{#{@tab*0}<object>} + @@xml[:body] << %{#{@tab*1}<text class="block">#{@tab*1}\n} + @@xml[:body] << %{#{@tab*2}#{para}#{@tab*1}\n} + @@xml[:body] << %{#{@tab*1}</text>\n} + @@xml[:body] << "#{@tab*0}</object>" + end + def group_structure(para='') + para.gsub!(/<:group(?:-end)?>/,'') + para.strip! + @@xml[:body] << %{#{@tab*0}<object>} + @@xml[:body] << %{#{@tab*1}<text class="group">#{@tab*1}\n} + @@xml[:body] << %{#{@tab*2}#{para}#{@tab*1}\n} + @@xml[:body] << %{#{@tab*1}</text>\n} + @@xml[:body] << "#{@tab*0}</object>" + end + def poem_structure(para='') + para.gsub!(/<:verse(?:-end)?>/,'') + para.strip! + @@xml[:body] << %{#{@tab*0}<object>} + @@xml[:body] << %{#{@tab*1}<text class="verse">#{@tab*1}\n} + @@xml[:body] << %{#{@tab*2}#{para}#{@tab*1}\n} + @@xml[:body] << %{#{@tab*1}</text>\n} + @@xml[:body] << "#{@tab*0}</object>" << "\n" + end + def code_structure(para='') + para.gsub!(/<:code(?:-end)?>/,'') + para.strip! + @@xml[:body] << %{#{@tab*0}<object>} + @@xml[:body] << %{#{@tab*1}<text class="code">#{@tab*1}\n} + @@xml[:body] << %{#{@tab*2}#{para}#{@tab*1}\n} + @@xml[:body] << %{#{@tab*1}</text>\n} + @@xml[:body] << "#{@tab*0}</object>" << "\n" + end + def table_structure(table='') #tables + @@xml[:body] << %{#{@tab*0}<object>} + @@xml[:body] << %{#{@tab*1}#{table}\n#{@tab*1}\n} # unless lv # main text, contents, body KEEP + @@xml[:body] << "#{@tab*0}</object>" << "\n" #if para[@regx] + @endnotes=[] + end + def tidywords(wordlist) + wordlist.each do |x| + x.gsub!(/&/,'&') unless x =~/&\S+;/ + end + end + def xml_clean(para) + para.gsub!(/#{Mx[:gl_o]}[1-9]:\S*?#{Mx[:gl_c]}/,'') #Danger, watch + para + end + def markup + data=[] + xml_sc(@md) + @endnotes,@level,@cont,@copen,@xml_contents_close=[],[],[],[],[] + @rcdc=false + (0..6).each { |x| @cont[x]=@level[x]=false } + (4..6).each { |x| @xml_contents_close[x]='' } + @data.each do |para| + data << SiSU_AO_DocumentStructureExtract::Structure.new(@md,para).structure #takes on Mx marks + end + data.each do |para| + if para !~/^\s*(?:%+ |<:code>)/ + if @md.sem_tag and para =~/[:;]\{|\}[:;]/ + para=@trans.xml_semantic_tags(para) + end + if para =~/[:;]\{|\}[:;]/ + para=SiSU_Sem::Tags.new(para,@md).rm.all + end + end + para=@trans.markup_light(para) + @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 + para.gsub!(/^@(\S+?):/,"#{Mx[:lv_o]}@\\1#{Mx[:lv_c]}") + if para =~/\A#{Mx[:lv_o]}@(\S+?)#{Mx[:lv_c]}\s*(.+?)\Z/m # for headers + d_meta=SiSU_TextUtils::HeaderScan.new(@md,para).meta + if d_meta; xml_head(d_meta) + end + end + para='' if para=~/#{Mx[:lv_o]}@\S+?#{Mx[:lv_c]}/ + if @rcdc==false \ + and (para =~/~metadata/ or para =~/^1~meta\s+Document Information/) + @rcdc=true + end + if para !~/(^@\S+?:|^\s*$|<ENDNOTES>|<EOF>)/ + @sto=SiSU_text_parts::SplitTextObject.new(@md,para).lev_segname_para + unless @rcdc + SiSU_XML_Format::FormatScroll.new(@md,@sto.text) if @sto.format =~/i[1-9]|ordinary/ + case @sto.format + when /^(1):(\S*)/ + xml_clean(para) + xml_structure(para,$1,$2) + para=@sto.lev_para_ocn.heading_body1 + when /^(2):(\S*)/ + xml_clean(para) + xml_structure(para,$1,$2) + para=@sto.lev_para_ocn.heading_body2 + when /^(3):(\S*)/ + xml_clean(para) + xml_structure(para,$1,$2) + para=@sto.lev_para_ocn.heading_body3 + when /^(4):(\S*)/ # work on see SplitTextObject + xml_clean(para) + xml_structure(para,$1,$2) + para=@sto.lev_para_ocn.heading_body4 + when /^(5):(\S*)/ + xml_clean(para) + xml_structure(para,$1,$2) + para=@sto.lev_para_ocn.heading_body5 + when /^(6):(\S*)/ + xml_clean(para) + xml_structure(para,$1,$2) + para=@sto.lev_para_ocn.heading_body6 + else + if para =~ /<:verse>/ + para=poem_structure(para) + elsif para =~ /<:group>/ + para=group_structure(para) + elsif para =~ /<:code>/ + para.gsub!(/</,'<') + para.gsub!(/>/,'>') + para=code_structure(para) + elsif para =~/<!Th?.+/ # tables come as single block #work area 2005w13 + table=SiSU_Tables::TableXML.new(para) + para=table.table_split + para=table_structure(para) + else xml_structure(para,nil,nil) + end + end + if para =~/<a name="n\d+">/ \ + and para =~/^(-\{{2}~\d+|<!e[:_]\d+!>)/ # -endnote + para='' + end + if para =~/.*<:#>.*$/ + para=case para + when /<:i1>/ + format_text=FormatTextObject.new(para,'') + format_text.scr_inden_ocn_e_no_paranum + when /<:i2>/ + format_text=FormatTextObject.new(para,'') + format_text.scr_inden_ocn_e_no_paranum + end + end + if para =~/<:center>/ + one,two=/(.*)<:center>(.*)/.match(para)[1,2] + format_text=FormatTextObject.new(one,two) + para=format_text.center + end + end + para.gsub!(/<:\S+?>/,'') + para.gsub!(/<!.+!>/,'') ## Clean Prepared Text #bugwatch reinstate + para + end + para + end + 6.downto(4) do |x| + y=x - 1; v=x - 3 + @@xml[:body] << "#{@tab*5}</content>\n#{@tab*y}</contents#{v}>\n" if @level[x]==true + end + 3.downto(1) do |x| + y=x - 1 + @@xml[:body] << "#{@tab*y}</heading#{x}>\n" if @level[x]==true + end + end + def pre + rdf=SiSU_XML_Tags::RDF.new(@md) + dir=SiSU_Env::InfoEnv.new + @@xml[:head],@@xml[:body]=[],[] + css=SiSU_Env::CSS_Select.new(@md).xml_sax + encoding=if @sys.locale =~/utf-?8/i; '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' + else '<?xml version="1.0" encoding="ISO-8859-1" standalone="no"?>' + end + @@xml[:open] =<<WOK +#{encoding} +<?xml-stylesheet type="text/css" href="../#{dir.path.style}/#{css}"?> +#{rdf.comment_xml_sax} +<document> +WOK + @@xml[:head] << "<head>\n" + @@xml[:body] << "<body>\n" + end + def post + @@xml[:head] << @@xml[:sc] + @@xml[:head] << "</head>\n" + @@xml[:body] << "</body>\n" + @@xml[:close] = "</document>\n" + end + def publish + content=[] + content << @@xml[:open] << @@xml[:head] << @@xml[:body] << @@xml[:metadata] + content << @@xml[:owner_details] if @md.stmp =~/\w\w/ + content << @@xml[:tail] << @@xml[:close] + Output.new(content.join,@md).xml + @@xml={} + end + end + class Output + def initialize(data,md) + @data,@md=data,md + end + def xml + @sisu=[] + @data.each do |para| + para.gsub!(/<:\S+?>/,'') + para.gsub!(/<!.+?!>/,'') + para="#{para}\n" unless para.empty? + @sisu << para + end + new_file_data=@sisu.join + @sisu=new_file_data.scan(/.+/) + SiSU_Env::FileOp.new(@md).mkdir + filename_sxm=SiSU_Env::FileOp.new(@md,@md.fn[:sxs]).mkfile_pwd + if filename_sxm.is_a?(File) + @sisu.each {|para| filename_sxm.puts para} + filename_sxm.close + else puts 'file not created, is directory writable?' + end + end + end + class Tidy + def initialize(md,dir) + @md,@env=md,dir + @prog=SiSU_Env::InfoProgram.new + end + def xml + if @prog.tidy !=false #note values can be other than true + if (@md.opt.act[:verbose_plus][:set]==:on \ + || @md.opt.act[:maintenance][:set]==:on) + SiSU_Screen::Ansi.new(@md.opt.act[:color_state][:set],'invert','Using XML Tidy','check document structure').colorize unless @md.opt.act[:quiet][:set]==:on + SiSU_Screen::Ansi.new(@md.opt.act[:color_state][:set],'','','check document structure') + tell.grey_open unless @md.opt.act[:quiet][:set]==:on + tidyfile='/dev/null' #don't want one or screen output, check for alternative flags + tidy =SiSU_Env::SystemCall.new("#{Dir.pwd}/#{@md.fn[:sxs]}",tidyfile) + tidy.well_formed? + tell.p_off unless @md.opt.act[:quiet][:set]==:on + end + end + end + end + end +end +__END__ |