# coding: utf-8 =begin * Name: SiSU * Description: a framework for document structuring, publishing and search * Author: Ralph Amissah * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. * License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . If you have Internet connection, the latest version of the GPL should be available at these locations: * SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system * Hompages: * Download: * Ralph Amissah ** Description: preprocessing, (dal), data abstraction used in subsequent processing =end module SiSU_Convert_footnotes require "#{SiSU_lib}/defaults" # defaults.rb include SiSU_Viz require "#{SiSU_lib}/sysenv" # sysenv.rb include SiSU_Env require "#{SiSU_lib}/param" # param.rb include SiSU_Param require "#{SiSU_lib}/dal_syntax" # dal_syntax.rb include SiSU_Syntax require "#{SiSU_lib}/i18n" # i18n.rb class Instantiate < SiSU_Param::Parameters::Instructions @@flag={} #Beware!! def initialize @@flag['table_to']=false @@counter=@@column=@@columns=@@flag_vocab=0 @@endnote={} @@endnote_array=@@word_mode=[] @@endnote_call_counter=1 @@line_mode='' end end class Source #{@md.fns}.fn").txt_red unless @md.cmd =~/q/ dal.each{|s| dal_array << "#{s.strip}\n\n" unless s.strip.empty?} dal_array else SiSU_Screen::Ansi.new(@md.cmd,'no footnote conversion done, problem with source file','to override use --convert=footnote-force (this is not advised)').warn if @cmd !~/q/ '' end end def read_fnm dal=[] dal=if FileTest.file?(@fnm); File.open(@fnm){ |f| dal=Marshal.load(f)} else SiSU_Convert_footnotes::Source.new(@opt).create_dal #watch end end end class Output def initialize(md,data) @md,@data=md,data @my_make=SiSU_Env::Create_file.new(@md.cmd,@md.fns) dir=SiSU_Env::Info_env.new(@md.fns) @hard="#{Dir.pwd}/#{@md.fns}.fn" end def hard_output filename_note=@my_make.file_note @data.each {|s| filename_note.puts s.strip + "\n\n" unless s.strip.empty?} end end class Make @@endnote={} @@endnote_array=@@word_mode=[] @@endnote_call_counter=1 @@comment='%' @@flag={ ['table_to']=>false } def initialize(md,data) @md,@data=md,data @@word_mode=[] @env=SiSU_Env::Info_env.new(@md.fns) @skin=SiSU_Env::Info_skin.new(@md) l=SiSU_Env::Standardise_language.new.file_to_language(@md.fns) @language=l[:l] @translate=SiSU_Translate::Source.new(@md,@language) end def reset @@counter=@@column=@@columns=@@flag_vocab=0 @@endnote={} @@endnote_array=@@word_mode=[] @@endnote_call_counter=1 @@line_mode='' end def song reset data=@data @metafile="#{@env.path.dal}/#{@md.fns}.meta" my_make_source_file=SiSU_Env::Create_file.new(@md.cmd,@md.fns) data=data.join.split("\n\n") data_new=[] data.each do |x| data_new << if x =~ /\n\n/m; x.split(/\n\n+/) else x end end data=data_new.flatten data=SiSU_Convert_footnotes::Make.new(@md,data).substitutions_and_insertions? data=SiSU_Convert_footnotes::Make.new(@md,data).character_check data=SiSU_Convert_footnotes::Make.new(@md,data).endnotes SiSU_Convert_footnotes::Output.new(@md,data).hard_output reset data end protected def vocabulary data=@data tuned_file,vocab_insert=[],[] data.each do |para| if para =~/^1~/ \ and @@flag_vocab==0 vocab_insert << '@vocabulary: lex' << "\n\n" << para tuned_file << vocab_insert unless para.nil? @@flag_vocab=1 else tuned_file << para unless para.nil? end end tuned_file end def character_check require 'iconv' reset data=@data @tuned_file=[] endnote_no=1 data.each do |para| para.strip! para.gsub!(/^[{~}]\s*$/,'') para.gsub!(/^#{@@comment}.*/,'') #remove comment and divider #% para.gsub!(/<~#>|~#\s*/,'~#') para.gsub!(/-#\s*/,'-#') para.gsub!(/(~\{ )\s+/,'\1') para.gsub!(/ \/\//,'
') #added 2004w29 para.gsub!(/
/,'
') #needed by xml, xhtml etc. para.gsub!(/`/,"'") para.gsub!(/\342\200\231/,"'") #if para =~/’/ #Avoid #‘ ’ #“ ” para.gsub!(/\t/,' ') para.gsub!(/�/,' ') #watch, replace with char code para.gsub!(/[“”]/,'""') para.gsub!(/[­–—]/,'-') #— – chk para.gsub!(/·/,'*') para.gsub!(/\\copy(?:right)?\b/,'©') para.gsub!(/\\trademark\b|\\tm\b/,'®') para.gsub!(/\44/,'$') #$ watch para=para + "\n" case para when /\^~/ # endnotes #% Note must do this first (earlier loop) and then enter gathered data into ~^\d+ sub_para=para.dup @@endnote_array << sub_para.gsub!(/\n/,'').gsub!(/\^~\s+(.+)\s*/,'~{ \1 }~').strip endnote_no+=1 para=nil if para =~/\^~ .+/ #removes 'binary' endnote now in endnote array for later insertion end @tuned_file << para unless para.nil? end @tuned_file end def substitutions_and_insertions? data=@data tuned_file=[] data.each do |para| if @md.markup =~/0\.16|0\.37/ #parameters not extracted/available para.gsub!(/^0~\S+\s+/,'@\1: ') para.gsub!(/^1~/,':A~') para.gsub!(/^2~/,':B~') para.gsub!(/^3~/,':C~') para.gsub!(/^4~/,'1~') para.gsub!(/^5~/,'2~') para.gsub!(/^6~/,'3~') para.gsub!(/^7~/,'4~') para.gsub!(/^8~/,'5~') para.gsub!(/^9~/,'6~') end if para =~/<:insert\d+!?>/ \ and para !~/^%\s+/ @skin.select ins=SiSU_Viz::Inserts.new case para when /^\s*<:insert1>\s*$/ para=[] ins.insert1.split(/\n\n/).each{|x| para << x } when /^\s*<:insert2>\s*$/ para=[] ins.insert2.split(/\n\n/).each{|x| para << x } when /^\s*<:insert3>\s*$/ para=[] ins.insert3.split(/\n\n/).each{|x| para << x << "\n"} para=ins.insert3 when /^\s*<:insert4>\s*$/ para=[] ins.insert4.split(/\n\n/).each{|x| para << x << "\n"} para=ins.insert4 when /^\s*<:insert5>\s*$/ para=[] ins.insert5.split(/\n\n/).each{|x| para << x << "\n"} when /^\s*<:insert6>\s*$/ para=[] ins.insert6.split(/\n\n/).each{|x| para << x << "\n"} when /^\s*<:insert7>\s*$/ para=[] ins.insert7.split(/\n\n/).each{|x| para << x << "\n"} end para.each{|x| tuned_file << x } else tuned_file << para end tuned_file.compact! end tuned_file end def name_endnote_seg data=@data @tuned_file=[] data.each do |para| para.gsub!(/<:3>\s*<:ee>/, "#{@@endnote['special_align']}


\r " + "#{@@endnote['seg_name_3']}

" + "#{@@endnote['special_align_close']}") para.gsub!(/<:2>\s*<:ee>/, "#{@@endnote['special_align']}


\r " + "#{@@endnote['seg_name_2']}

" + "#{@@endnote['special_align_close']}") para.gsub!(/<:1>\s*<:ee>/, "#{@@endnote['special_align']}


\r " + "#{@@endnote['seg_name_1']}

" + "#{@@endnote['special_align_close']}") @tuned_file << para end if @md.flag_auto_endnotes \ and @md.flag_separate_endnotes_make @tuned_file << "\n1~endnotes Endnotes" #prob numbering, revisit end @tuned_file << "\n" @tuned_file end def owner_details_seg data << '1~owner.details Owner Details' end def number_sub_heading(para,num,title_no) case para when /#{num}~- /; para.gsub!(/#{num}~- /,"#{title_no} ") when /^#{num}~#\s*/; para.gsub!(/^#{num}~#\s*/,"#{title_no} ") when /^#{num}~[a-z_\.]+ / para.gsub!(/^#{num}~([a-z_\.]+)\s+(.+)/i,%{#{num}~\\1 #{title_no} \\2 <:name##{title_no}>}) else para.gsub!(/^#{num}~ /,"#{num}~#{title_no} #{title_no} ") #main end if @md.toc_lev_limit \ and @md.toc_lev_limit < num para.gsub!(/^[2-6]~(?:~\S+)?\s*/,'!_ ') end para end def set_heading_top #% make sure no false positives unless @md.set_heading_top puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.cmd =~/[MV]/ data=@data @tuned_file=[] data.each do |para| unless @md.set_heading_top if para !~/^(?:@\S+:|0~\S+)\s/m \ and para !~/\A\s*\Z/m @md.set_heading_top=true head=if @md.title.full ; ":A~ #{@md.title.full}" else ':A~ [no title provided]' end @tuned_file << head end end @tuned_file << para end @tuned_file end end def set_heading_seg #% make sure no false positives unless @md.set_heading_seg puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.cmd =~/[MV]/ data=@data @tuned_file=[] data.each do |para| unless @md.set_heading_seg if para !~/^(?:@\S+:|0~\S+|:[ABC]~)/m \ and para !~/\A\s*\Z/m \ and para !~/<:p[bn]>/ @md.set_heading_seg=true head=if @md.title.full ; "1~seg [#{@md.title.full}]" else '1~seg [segment]' end @tuned_file << head end end @tuned_file << para end @tuned_file end end def set_header_title #% make sure no false positives unless @md.set_header_title puts "\t no document title provided, (will have to manufacture one)" if @md.cmd =~/[MV]/ data=@data @tuned_file=[] data.each do |para| unless @md.set_header_title if para !~/^%{1,2}\s/m \ and para !~/\A\s*\Z/m @tuned_file << "0~title #{@md.heading_seg_first}" @md.title.full=@md.heading_seg_first @md.set_header_title=true end end @tuned_file << para end @tuned_file end end def endnotes #% endnote work zone data=@data @tuned_file=[] endnote_no,endnote_ref=1,1 data.each do |para| case para # manually numbered endnotes --> when /~\{\s+.+?\}~/ # auto-numbered endnotes --> para.gsub!(/\s*\}~/,' }~') # required 2003w31 @word_mode=para.scan(/\S+/) word_mode=SiSU_Convert_footnotes::Make.new(@md,@word_mode).endnote_call_number para=word_mode.join(' ') endnote_ref+=1 when /~\^(?:\s|$)|<:e>/ #%Note inserts endnotes previously gathered from /^(|[-~]\{{3})/ (in earlier loop) word_mode=para.scan(/\S+/) word_mode=SiSU_Convert_footnotes::Make.new(@md,word_mode).endnote_call_number para=word_mode.join(' ') endnote_ref+=1 end @tuned_file << para end @tuned_file end def endnote_call_number data=@data data.each do |word| case word when /~\{/ unless word =~/~\{\*+/ @@endnote_call_counter+=1 end when /~\^|<:e>/ word.gsub!(/~\^|<:e>/,"#{@@endnote_array[@@endnote_call_counter-1]}") @@endnote_call_counter+=1 end end end def strip_clean_extra_spaces(s) # dal output tuned s=s.dup s=s.gsub(/[ ]+([,.;:?](?:$|\s))/,'\1') s=s.gsub(/ [ ]+/,' ') s=s.gsub(/^ [ ]+/,'') s=s.gsub(/ [ ]+$/,'') s=s.gsub(/(<\/[bi]>')[ ]+(s )/,'\1\2') end def strip_clean_of_markup(s) # used for digest, define rules, make same as in db clean s=s.dup s=s.gsub(/(?:<\/?[ib]>|^:[A-C]~\S+|^[1-6]~\S+|~\{\d+\s.+?\}~)/,'') # markup and endnotes removed #% same as db clean --> s=s.gsub(/(.+?)<\/del>/,'DELETED(\1)') # deletions s=s.gsub(/(\d+)<\/sup>/,'[\1]') s=s.gsub(/(?: \\;|#{Mx[:nbsp]})+/,' ') #checking source Mx not necessary s=s.gsub(/\{.+?\.(?:png|jpg|gif).+?\}(?:https?|file|ftp)\\\:\S+ /,' [image] ') # else image names found in search s=s.gsub(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image] ') # else image names found in search, re-check s=s.gsub(/\s\s+/,' ') s=s.strip end end end __END__