# encoding: utf-8 =begin * Name: SiSU * Description: a framework for document structuring, publishing and search * Author: Ralph Amissah * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Ralph Amissah, All Rights Reserved. * License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . If you have Internet connection, the latest version of the GPL should be available at these locations: * SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system * Hompages: * Download: * Git * Ralph Amissah ** Description: modules shared by the different db types, dbi, postgresql, sqlite =end module SiSU_DbImport require_relative 'db_columns' # db_columns.rb require_relative 'db_load_tuple' # db_load_tuple.rb require_relative 'db_sqltxt' # db_sqltxt.rb require_relative 'shared_html_lite' # shared_html_lite.rb require 'sqlite3' class Import < SiSU_DbText::Prepare include SiSU_Param include SiSU_Screen @@dl=nil @@hname=nil attr_accessor :tp def initialize(opt,conn,file_maint,sql_type='pg') @opt,@conn,@file_maint,@sql_type=opt,conn,file_maint,sql_type @cX=SiSU_Screen::Ansi.new(@opt.cmd).cX @env=SiSU_Env::InfoEnv.new(@opt.fns) @dal="#{@env.processing_path.dal}" @fnb=if @opt.fns.empty? \ or @opt.cmd.empty? '' else @md=SiSU_Param::Parameters.new(@opt).get @md.fnb end @fnc="#{@dal}/#{@opt.fns}.content.rbm" @@seg,@@seg_full='','' #create? consider placing field just before clean text as opposed to seg which contains seg(.html) name info seg_full would contain seg info for levels 5 & 6 where available eg seg_full may be 7.3 (level 5) and 7.3.1 (level 6) where seg is 7 @col=Hash.new('') @col[:ocn]='' @counter={} @db=SiSU_Env::InfoDb.new if @sql_type=='sqlite' @driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) \ ? true : false end sql='SELECT MAX(lid) FROM doc_objects' begin @col[:lid] ||=0 @col[:lid]=@driver_sqlite3 \ ? @conn.execute( sql ).join.to_i : @conn.execute( sql ) { |x| x.fetch_all.flatten[0] } rescue puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/ end @col[:lid]=0 if @col[:lid].nil? or @col[:lid].to_s.empty? sql='SELECT MAX(nid) FROM endnotes' begin @id_n=@driver_sqlite3 \ ? @conn.execute( sql ).join.to_i : @id_n=@conn.execute( sql ) { |x| x.fetch_all.flatten[0] } @id_n ||=0 rescue puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/ end @id_n =0 if @col[:lid].nil? or @col[:lid].to_s.empty? @col[:lv1]=@col[:lv2]=@col[:lv3]=@col[:lv4]=@col[:lv5]=@col[:lv6]=0 @db=SiSU_Env::InfoDb.new @pdf_fn=SiSU_Env::FileOp.new(@md).base_filename @@dl ||=SiSU_Env::InfoEnv.new.digest.length end def marshal_load require_relative 'dal' # dal.rb @dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here SiSU_Screen::Ansi.new(@opt.cmd,"#{@db.psql.db}::#{@opt.fns}").puts_blue if @opt.cmd =~/vVM/ SiSU_Screen::Ansi.new(@opt.cmd,'Marshal Load',@fnc).puts_grey if @opt.cmd =~/v/ #% select_first_match=%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.src_filename = '#{@md.fns}' AND metadata_and_text.language_document_char = '#{@opt.lng}' ;} # note, for .ssm: @md.fns (is set during runtime & is) != @opt.fns @md.opt.fns file_exist=@sql_type=~/sqlite/ \ ? @conn.get_first_value(select_first_match) : @conn.select_one(select_first_match) if not file_exist t_d=[] # transaction_data t_d << db_import_metadata t_d << db_import_documents(@dal_array) t_d << db_import_urls(@dal_array,@fnc) #import OID on/off t_d=t_d.flatten if @opt.cmd =~/[MV]/ puts @conn.class if defined? @conn.class puts @conn.driver_name if defined? @conn.driver_name puts @conn.driver if defined? @conn.driver end begin sql='' if @sql_type=~/sqlite/ @conn.transaction do |conn| t_d.each do |sql| conn.execute(sql) end end #also 'execute' works for sqlite #@conn.execute("BEGIN") # t_d.each do |sql| # @conn.execute(sql) # end #@conn.execute("COMMIT") else #'do' works for postgresql @conn.do("BEGIN") t_d.each do |sql| @conn.do(sql) end @conn.do("COMMIT") end rescue DBI::DatabaseError => e STDERR.puts "Error code: #{e.err}" STDERR.puts "Error message: #{e.errstr}" STDERR.puts "Error SQLSTATE: #{e.state}" SiSU_Errors::InfoError.new($!,$@,@opt.cmd,@opt.fns).error do __LINE__.to_s + ':' + __FILE__ end sqlfn="#{@env.processing_path.sql}/#{@md.fnb}.sql" sql=File.new(sqlfn,'w') t_d.each {|i| sql.puts i} p sqlfn if @opt.cmd =~/M/ puts sql p @conn.methods.sort puts "#{__FILE__}:#{__LINE__}" end rescue SiSU_Errors::InfoError.new($!,$@,@opt.cmd,@opt.fns).error do __LINE__.to_s + ':' + __FILE__ end sqlfn="#{@env.processing_path.sql}/#{@md.fnb}.sql" sql=File.new(sqlfn,'w') t_d.each {|i| sql.puts i} p sqlfn if @opt.cmd =~/M/ puts sql p @conn.methods.sort puts "#{__FILE__}:#{__LINE__}" end ensure end else if file_exist @db=SiSU_Env::InfoDb.new puts "\n#{@cX.grey}file #{@cX.off} #{@cX.blue}#{@opt.fns}#{@cX.off} in language code #{cX.blue}#{@opt.lng}#{cX.off} #{@cX.grey}already exists in database#{@cX.off} #{@cX.blue}#{@db.psql.db}#{@cX.off} #{@cX.brown}update instead?#{@cX.off}" end end end def pf_db_import_transaction_open end def pf_db_import_transaction_close end def db_import_metadata #% import documents - populate database print %{ #{@cX.grey}import documents dbi_unit #{@cX.off} } if @opt.cmd =~/vVM/ @tp={} @md=SiSU_Param::Parameters.new(@opt).get #% sisutxt & fulltxt if FileTest.exist?(@md.fns) txt_arr=IO.readlines(@md.fns,'') src=txt_arr.join("\n") src=special_character_escape(src) @tp[:sisutxt_f],@tp[:sisutxt_i]='sisutxt, ',"'#{src}', " txt=clean_searchable_text(txt_arr) #txt=special_character_escape(txt) @tp[:fulltxt_f],@tp[:fulltxt_i]='fulltxt, ',"'#{txt}', " end #% title if defined? @md.title.full \ and @md.title.full=~/\S+/ # DublinCore 1 - title #@tp[:title]=@md.title.full #special_character_escape(@tp[:title]) #@tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', " sql='SELECT MAX(tid) FROM metadata_and_text;' begin @@id_t ||=0 id_t=if @driver_sqlite3 @conn.execute( sql ).join.to_i # { |x| id_t=x.join.to_i } else @conn.execute( sql ) { |x| x.fetch_all.flatten[0] } end @@id_t=id_t if id_t rescue puts "#{__FILE__} #{__LINE__}" if @opt.cmd =~/M/ end @@id_t+=1 #bug related, needs to be performed once at start of file, but consider moving, as, placed here it means program will fail if document header lacks @title: puts %{\n#{@cX.grey}Processing file number#{@cX.off}: #{@cX.green}#{@@id_t}#{@@cX.off}} if @opt.cmd =~/vVM/ end ################ CLEAR ############## SiSU_DbDBI::Test.new(self,@opt).verify #% import title names, filenames (tuple) t=SiSU_DbTuple::LoadMetadata.new(@conn,@@id_t,@md,@file_maint) tuple=t.tuple tuple end def db_import_documents(dal_array) #% import documents - populate main database table, import into substantive database tables (tuple) begin @col[:tid]=@@id_t @en,@en_ast,@en_pls,@tuple_array=[],[],[],[] @col[:en_a],@col[:en_z]=nil,nil dal_array.each do |data| data.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') data.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') data.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') data.obj.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1') data.obj.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1') data.obj.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1') data.obj.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1') data.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') data.obj.gsub!(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1') data.obj.gsub!(/#{Mx[:gl_o]}(●)#{Mx[:gl_c]}\s*/,'\1 ') data.obj.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check @col[:seg]=@@seg if data.of ==:para \ || :heading \ || :heading_insert \ || :block \ || :group # regular text what of code-blocks grouped text etc. notedata=data.obj.dup #% :headings if data.is==:heading \ && (data.ln.inspect=~/[123]/) @col[:lev],txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.ln,data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'','' @col[:lid]+=1 txt=endnotes(txt).extract_any body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_minus @col[:body]=special_character_escape(body) plaintext=@col[:body].dup plaintext=strip_markup(plaintext) @col[:plaintext]=clean_searchable_text(plaintext) book_idx=data.idx ? data.idx : '' @col[:book_idx]=clean_searchable_text(book_idx) if @en[0] then @en_a,@en_z=@en[0].first,@en[0].last end if @en_ast[0] then @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last end if @en_pls[0] then @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last end t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint) @tuple_array << t.tuple case @col[:lev] when /1/; @col[:lv1]+=1 when /2/; @col[:lv2]+=1 when /3/; @col[:lv3]+=1 end @col[:lev]=@col[:plaintext]=@col[:body]='' elsif data.is==:heading \ && data.ln==4 @@seg,txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.name,data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'','' @col[:seg]=@@seg @col[:lv4]+=1 @col[:lid]+=1 @col[:lev]=4 @hname=if @col[:seg] \ and not @col[:seg].to_s.empty? @@hname=@col[:seg].to_s else @@hname end @env=SiSU_Env::InfoEnv.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" txt=endnotes(txt).extract_any body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus @col[:body]=special_character_escape(body) plaintext=@col[:body].dup plaintext=strip_markup(plaintext) @col[:plaintext]=clean_searchable_text(plaintext) book_idx=data.idx ? data.idx : '' @col[:book_idx]=clean_searchable_text(book_idx) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint) @tuple_array << t.tuple @col[:lev]=@col[:plaintext]=@col[:body]='' elsif data.is==:heading \ && data.ln==5 txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'','' @@seg_full=data.name if data.is==:heading \ && data.ln==5 \ && data.name #check data.name @@seg ||='' #nil # watch @col[:seg]=@@seg @col[:lv5]+=1 @col[:lid]+=1 @col[:lev]=5 @hname=if @col[:seg] \ and not @col[:seg].to_s.empty? @@hname=@col[:seg].to_s else @@hname end @env=SiSU_Env::InfoEnv.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" txt=endnotes(txt).extract_any body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus @col[:body]=special_character_escape(body) plaintext=@col[:body].dup plaintext=strip_markup(plaintext) @col[:plaintext]=clean_searchable_text(plaintext) book_idx=data.idx ? data.idx : '' @col[:book_idx]=clean_searchable_text(book_idx) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint) @tuple_array << t.tuple @col[:lev]=@col[:plaintext]=@col[:body]='' elsif data.is==:heading \ && data.ln==6 txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'','' @@seg_full=data.name if data.is==:heading && data.ln==6 && data.name #check data.name @@seg ||='' #nil # watch @col[:seg]=@@seg @col[:lv6]+=1 @col[:lid]+=1 @col[:lev]=6 @hname=if @col[:seg] \ and not @col[:seg].to_s.empty? @@hname=@col[:seg].to_s else @@hname end @env=SiSU_Env::InfoEnv.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" txt=endnotes(txt).extract_any body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus @col[:body]=special_character_escape(body) plaintext=@col[:body].dup plaintext=strip_markup(plaintext) @col[:plaintext]=clean_searchable_text(plaintext) book_idx=data.idx ? data.idx : '' @col[:book_idx]=clean_searchable_text(book_idx) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint) @tuple_array << t.tuple @col[:lev]=@col[:plaintext]=@col[:body]='' #% :structure :layout :comment elsif data.of==:structure \ || data.of==:layout \ || data.of==:comment #added watch #% : else #% regular text @col[:lid]+=1 txt='' txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.obj,data.ocn,data.odv,data.osp,data.of,data.is,'',data.parent,'','' @hname=if @col[:seg] \ and not @col[:seg].to_s.empty? @@hname=@col[:seg].to_s else @@hname end @env=SiSU_Env::InfoEnv.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" txt=endnotes(txt).extract_any if @sql_type=~/pg/ \ and txt.size > (SiSU_DbColumns::ColumnSize.new.document_clean - 1) # examine pg build & remove limitation puts "\n\nTOO LARGE (TXT - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nTEXT BODY\n#{@col[:body].size} object #{@col[:ocn]} -> #{@col[:body].slice(0..500)}") end txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} end @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] body=if data.is==:table SiSU_FormatShared::CSS_Format.new(@md,data).html_table elsif data.is==:code SiSU_FormatShared::CSS_Format.new(@md,data).code elsif defined? data.indent \ and defined? data.hang \ and data.indent =~/[1-9]/ \ and data.indent == data.hang SiSU_FormatShared::CSS_Format.new(@md,data).indent(data.indent) elsif defined? data.indent \ and defined? data.hang \ and data.hang =~/[0-9]/ \ and data.indent != data.hang SiSU_FormatShared::CSS_Format.new(@md,data).hang_indent(data.hang,data.indent) else SiSU_FormatShared::CSS_Format.new(@md,data).norm end @col[:body]=special_character_escape(body) plaintext=@col[:body].dup plaintext=strip_markup(plaintext) @col[:plaintext]=clean_searchable_text(plaintext) book_idx=data.idx ? data.idx : '' @col[:book_idx]=clean_searchable_text(book_idx) t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint) @tuple_array << t.tuple @en,@en_ast,@en_pls=[],[],[] @col[:en_a]=@col[:en_z]=nil @col[:lev]=@col[:plaintext]=@col[:body]=@col[:words]='' end if notedata =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ #% import into database endnotes tables endnote_array=notedata.scan(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) endnote_array.each do |inf| if inf[/#{Mx[:en_a_o]}\d+.+?#{Mx[:en_a_c]}/] if inf[/#{Mx[:en_a_o]}(\d+)(.+?)#{Mx[:en_a_c]}/] nr,txt,digest_clean=$1,$2.strip,0 end @id_n+=1 txt=special_character_escape(txt) body=SiSU_FormatShared::CSS_Format.new(@md,data).endnote(nr,txt) txt=strip_markup(txt) if txt.size > (SiSU_DbColumns::ColumnSize.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") end txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} end if txt en={ type: 'endnotes', id: @id_n, lid: @col[:lid], nr: nr, txt: txt, body: body, ocn: @col[:ocn], ocnd: @col[:ocnd], ocns: @col[:ocns], id_t: @@id_t, hash: digest_clean } t=SiSU_DbTuple::LoadEndnotes.new(@conn,en,@opt,@file_maint) @tuple_array << t.tuple end end end word_mode=notedata.scan(/\S+/) end if notedata =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ #% import into database endnotes tables endnote_array=notedata.scan(/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) endnote_array.each do |inf| if inf[/#{Mx[:en_b_o]}\*\d+.+?#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 if inf[/#{Mx[:en_b_o]}[*](\d+)(.+?)#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 nr,txt,digest_clean=$1,$2.strip,0 end @id_n+=1 txt=special_character_escape(txt) body=SiSU_FormatShared::CSS_Format.new(@md,data).endnote(nr,txt) txt=strip_markup(txt) if txt.size > (SiSU_DbColumns::ColumnSize.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") end txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} end if txt en={ type: 'endnotes_asterisk', id: @id_n, lid: @col[:lid], nr: nr, txt: txt, body: body, ocn: @col[:ocn], ocnd: @col[:ocnd], ocns: @col[:ocns], id_t: @@id_t, hash: digest_clean } t=SiSU_DbTuple::LoadEndnotes.new(@conn,en,@opt,@file_maint) @tuple_array << t.tuple end end end word_mode=notedata.scan(/\S+/) end if notedata =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ #% import into database endnotes tables endnote_array=notedata.scan(/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) endnote_array.each do |inf| if inf[/#{Mx[:en_b_o]}\+\d+.+?#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 if inf[/#{Mx[:en_b_o]}[+](\d+)(.+?)#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 nr,txt,digest_clean=$1,$2.strip,0 end @id_n+=1 txt=special_character_escape(txt) body=SiSU_FormatShared::CSS_Format.new(@md,data).endnote(nr,txt) txt=strip_markup(txt) if txt.size > (SiSU_DbColumns::ColumnSize.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") end txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} end if txt en={ type: 'endnotes_plus', id: @id_n, lid: @col[:lid], nr: nr, txt: txt, body: body, ocn: @col[:ocn], ocnd: @col[:ocnd], ocns: @col[:ocns], id_t: @@id_t, hash: digest_clean } t=SiSU_DbTuple::LoadEndnotes.new(@conn,en,@opt,@file_maint) @tuple_array << t.tuple end end end word_mode=notedata.scan(/\S+/) end end end rescue SiSU_Errors::InfoError.new($!,$@,@opt.cmd,@opt.fns).error do __LINE__.to_s + ':' + __FILE__ end ensure end @tuple_array end def endnotes(txt) @txt=txt def extract_any if @txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ endnotes(@txt).range @en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ @en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ @txt=endnotes(@txt).clean_text end @txt end def standard x=(@txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) \ ? @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/) : nil end def asterisk x=(@txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) \ ? @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/) : nil end def plus x=(@txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) \ ? @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/) : nil end def clean_text(base_url=nil) @txt=if base_url @txt.gsub(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,%{\\1}). gsub(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,%{\\1}). gsub(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,%{\\1}) else @txt.gsub(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,'\1'). gsub(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,'\1'). gsub(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,'\1') end @txt end def range @col[:en_a]=@col[:en_z]=nil if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}|#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/ word_array=@txt.scan(/\S+/) word_array.each do |w| if w[/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/] # not tested since change 2003w31 @col[:en_a]=$1 unless @col[:en_a] @col[:en_z]=@col[:en_a].dup unless @col[:en_a] @col[:en_z]=$1 if @col[:en_a] end end end @col end self end def db_import_urls(dbi_unit,content) #% import documents OID - populate database begin @fnc=content @env=SiSU_Env::InfoEnv.new(@opt.fns) base=@env.url.root out=@env.path.output f,u={},{} if @fnb.empty? \ or @fnb.nil? p 'file output path error' #remove end if FileTest.file?("#{@md.file.output_path.txt.dir}/#{@md.file.base_filename.txt}")==true f[:txt],u[:txt]='plaintext,', "'#{@md.file.output_path.txt.url}/#{@md.file.base_filename.txt}'," end if FileTest.file?("#{@md.file.output_path.html_seg.dir}/#{@md.file.base_filename.html_seg}")==true f[:html_toc],u[:html_toc]='html_toc,', "'#{@md.file.output_path.html_seg.url}/#{@md.file.base_filename.html_seg}'," end if FileTest.file?("#{@md.file.output_path.html_scroll.dir}/#{@md.file.base_filename.html_scroll}")==true f[:html_doc],u[:html_doc]='html_doc,', "'#{@md.file.output_path.html_scroll.url}/#{@md.file.base_filename.html_scroll}'," end if FileTest.file?("#{@md.file.output_path.xhtml.dir}/#{@md.file.base_filename.xhtml}")==true f[:xhtml],u[:xhtml]='xhtml,', "'#{@md.file.output_path.xhtml.url}/#{@md.file.base_filename.xhtml}'," end if FileTest.file?("#{@md.file.output_path.xml_sax.dir}/#{@md.file.base_filename.xml_sax}")==true f[:xml_sax],u[:xml_sax]='xml_sax,', "'#{@md.file.output_path.xml_sax.url}/#{@md.file.base_filename.xml_sax}'," end if FileTest.file?("#{@md.file.output_path.xml_dom.dir}/#{@md.file.base_filename.xml_dom}")==true f[:xml_dom],u[:xml_dom]='xml_dom,', "'#{@md.file.output_path.xml_dom.url}/#{@md.file.base_filename.xml_dom}'," end if FileTest.file?("#{@md.file.output_path.epub.dir}/#{@md.file.base_filename.epub}")==true f[:epub],u[:epub]='epub,', "'#{@md.file.output_path.epub.url}/#{@md.file.base_filename.epub}'," end if FileTest.file?("#{@md.file.output_path.odt.dir}/#{@md.file.base_filename.odt}")==true f[:odf],u[:odf]='odf,', "'#{@md.file.output_path.odt.url}/#{@md.file.base_filename.odt}'," end if FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_p_a4}")==true #\ #or FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_p_letter}")==true f[:pdf_p],u[:pdf_p]='pdf_p,', "'#{@md.file.output_path.pdf.url}/#{@pdf_fn.pdf_p_a4}'," end if FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_l_a4}")==true #\ #or FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_l_letter}")==true f[:pdf_l],u[:pdf_l]='pdf_l,', "'#{@md.file.output_path.pdf.url}/#{@pdf_fn.pdf_l_a4}'," end if FileTest.file?("#{@md.file.output_path.html_concordance.dir}/#{@md.file.base_filename.html_concordance}")==true f[:concordance],u[:concordance]='concordance,', "'#{@md.file.output_path.html_concordance.url}/#{@md.file.base_filename.html_concordance}'," end #if FileTest.file?("#{@md.file.output_path.x.dir}/#{@md.file.base_filename.x}")==true # f[:latex_p],u[:latex_p]='latex_p,', "'#{@md.file.output_path.x.url}/#{@md.file.base_filename.x}'," #end ##if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.tex")==true ## f[:latex_p],u[:latex_p]='latex_p,', "'#{base}/#{@fnb}/#{@opt.fns}.tex'," ##end #if FileTest.file?("#{@md.file.output_path.x.dir}/#{@md.file.base_filename.x}")==true # f[:latex_l],u[:latex_l]='latex_l,', "'#{@md.file.output_path.x.url}/#{@md.file.base_filename.x}'," #end ##if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.landscape.tex")==true ## f[:latex_l],u[:latex_l]='latex_l,', "'#{base}/#{@fnb}/#{@opt}.fns}.landscape.tex'," ##end if FileTest.file?("#{@md.file.output_path.digest.dir}/#{@md.file.base_filename.digest}")==true f[:digest],u[:digest]='digest,', "'#{@md.file.output_path.digest.url}/#{@md.file.base_filename.digest}'," end if FileTest.file?("#{@md.file.output_path.manifest.dir}/#{@md.file.base_filename.manifest}")==true #revisit, was to be text, this is html f[:manifest],u[:manifest]='manifest,', "'#{@md.file.output_path.manifest.url}/#{@md.file.base_filename.manifest}'," end if FileTest.file?("#{@md.file.output_path.src.dir}/#{@md.file.base_filename.src}")==true f[:markup],u[:markup]='markup,', "'#{@md.file.output_path.src.url}/#{@md.file.base_filename.src}'," end if FileTest.file?("#{@md.file.output_path.sisupod.dir}/#{@md.file.base_filename.sisupod}")==true f[:sisupod],u[:sisupod]='sisupod,', "'#{@md.file.output_path.sisupod.url}/#{@md.file.base_filename.sisupod}'," end t=SiSU_DbTuple::LoadUrls.new(@conn,f,u,@@id_t,@opt,@file_maint) tuple=t.tuple rescue SiSU_Errors::InfoError.new($!,$@,@opt.cmd,@opt.fns).error do __LINE__.to_s + ':' + __FILE__ end ensure end tuple end end end __END__