aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/develop/db_import.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/develop/db_import.rb')
-rw-r--r--lib/sisu/develop/db_import.rb877
1 files changed, 877 insertions, 0 deletions
diff --git a/lib/sisu/develop/db_import.rb b/lib/sisu/develop/db_import.rb
new file mode 100644
index 00000000..c221885c
--- /dev/null
+++ b/lib/sisu/develop/db_import.rb
@@ -0,0 +1,877 @@
+# encoding: utf-8
+=begin
+
+* Name: SiSU
+
+** Description: documents, structuring, processing, publishing, search
+*** modules shared by the different db types, dbi, postgresql, sqlite
+
+** Author: Ralph Amissah
+ <ralph@amissah.com>
+ <ralph.amissah@gmail.com>
+
+** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah,
+ All Rights Reserved.
+
+** License: GPL 3 or later:
+
+ SiSU, a framework for document structuring, publishing and search
+
+ Copyright (C) Ralph Amissah
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see <http://www.gnu.org/licenses/>.
+
+ If you have Internet connection, the latest version of the GPL should be
+ available at these locations:
+ <http://www.fsf.org/licensing/licenses/gpl.html>
+ <http://www.gnu.org/licenses/gpl.html>
+
+ <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>
+
+** SiSU uses:
+ * Standard SiSU markup syntax,
+ * Standard SiSU meta-markup syntax, and the
+ * Standard SiSU object citation numbering and system
+
+** Hompages:
+ <http://www.jus.uio.no/sisu>
+ <http://www.sisudoc.org>
+
+** Git
+ <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary>
+ <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/develop/db_import.rb;hb=HEAD>
+
+=end
+module SiSU_DbImport
+ require_relative 'db_columns' # db_columns.rb
+ require_relative 'db_load_tuple' # db_load_tuple.rb
+ require_relative 'db_sqltxt' # db_sqltxt.rb
+ require_relative 'html_lite_shared' # html_lite_shared.rb
+ class Import < SiSU_DbText::Prepare
+ include SiSU_Param
+ include SiSU_Screen
+ include SiSU_DbAction
+ @@dl=nil
+ @@hname=nil
+ attr_accessor :tp
+ def initialize(opt,conn,file_maint,sql_type=:pg)
+ @opt,@conn,@file_maint,@sql_type=opt,conn,file_maint,sql_type
+ @cX=SiSU_Screen::Ansi.new(@opt.act[:color_state][:set]).cX
+ @env=SiSU_Env::InfoEnv.new(@opt.fns)
+ @dal="#{@env.processing_path.ao}"
+ @fnb=if @opt.fns.empty? \
+ or @opt.selections.str.empty?
+ ''
+ else
+ @md=SiSU_Param::Parameters.new(@opt).get
+ @md.fnb
+ end
+ @fnc="#{@dal}/#{@opt.fns}.content.rbm"
+ @@seg,@@seg_full='','' #create? consider placing field just before clean text as opposed to seg which contains seg(.html) name info seg_full would contain seg info for levels 5 & 6 where available eg seg_full may be 7.3 (level 5) and 7.3.1 (level 6) where seg is 7
+ @col=Hash.new('')
+ @col[:ocn]=''
+ @counter={}
+ @db=SiSU_Env::InfoDb.new
+ if @sql_type==:sqlite
+ @driver_sqlite3=
+ (@conn.inspect.match(/^(.{10})/)[1] \
+ == @db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) \
+ ? true
+ : false
+ end
+ sql='SELECT MAX(lid) FROM doc_objects'
+ begin
+ @col[:lid] ||=0
+ @col[:lid]=@driver_sqlite3 \
+ ? @conn.execute( sql ).join.to_i
+ : @conn.exec( sql ).getvalue(0,0).to_i
+ rescue
+ if @opt.act[:maintenance][:set]==:on
+ puts "#{__FILE__}:#{__LINE__}"
+ end
+ end
+ @col[:lid]=0 if @col[:lid].nil? or @col[:lid].to_s.empty?
+ sql='SELECT MAX(nid) FROM endnotes'
+ begin
+ @id_n=@driver_sqlite3 \
+ ? @conn.execute( sql ).join.to_i
+ : @conn.exec( sql ).getvalue(0,0).to_i
+ @id_n ||=0
+ rescue
+ if @opt.act[:maintenance][:set]==:on
+ puts "#{__FILE__}:#{__LINE__}"
+ end
+ end
+ @id_n =0 if @col[:lid].nil? or @col[:lid].to_s.empty?
+ @col[:lv0]=@col[:lv1]=@col[:lv2]=@col[:lv3]=@col[:lv4]=@col[:lv5]=@col[:lv6]=@col[:lv7]=0
+ @db=SiSU_Env::InfoDb.new
+ @pdf_fn=SiSU_Env::FileOp.new(@md).base_filename
+ @@dl ||=SiSU_Env::InfoEnv.new.digest.length
+ end
+ def marshal_load
+ require_relative 'ao' # ao.rb
+ @ao_array=SiSU_AO::Source.new(@opt).get # ao file drawn here
+ if (@opt.act[:verbose][:set]==:on \
+ || @opt.act[:verbose_plus][:set]==:on \
+ || @opt.act[:maintenance][:set]==:on)
+ SiSU_Screen::Ansi.new(
+ @opt.act[:color_state][:set],
+ "#{@db.psql.db}::#{@opt.fns}"
+ ).puts_blue
+ end
+ SiSU_Screen::Ansi.new(
+ @opt.act[:color_state][:set],
+ 'Marshal Load',
+ @fnc
+ ).puts_grey if @opt.act[:verbose][:set]==:on
+ select_first_match=%{
+ SELECT metadata_and_text.tid
+ FROM metadata_and_text
+ WHERE metadata_and_text.src_filename = '#{@md.fns}'
+ AND metadata_and_text.language_document_char = '#{@opt.lng}'
+ ;} # note, for .ssm: @md.fns (is set during runtime & is) != @opt.fns @md.opt.fns
+ file_exist=if @sql_type==:sqlite
+ begin
+ @conn.get_first_value(select_first_match)
+ rescue SQLite3::Exception => e
+ # not tested
+ puts "Exception occurred"
+ puts e
+ SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:yellow).mark(
+ "\n" \
+ + 'Attempting to initialize db' + "\n" \
+ + 'Creating db tables'
+ )
+ sdb={
+ create: SiSU_DbDBI::Create.new(@opt,@conn,@file_maint,@sql_type),
+ index: SiSU_DbDBI::Index.new(@opt,@conn,@file_maint,@sql_type),
+ }
+ db_action(sdb).create
+ end
+ else
+ begin
+ @conn.exec(select_first_match).field_values("tid")[0]
+ rescue PG::Error => e
+ err=[
+ e.result.error_field( PG::Result::PG_DIAG_SEVERITY ),
+ e.result.error_field( PG::Result::PG_DIAG_SQLSTATE ),
+ e.result.error_field( PG::Result::PG_DIAG_MESSAGE_PRIMARY ),
+ e.result.error_field( PG::Result::PG_DIAG_MESSAGE_DETAIL ),
+ e.result.error_field( PG::Result::PG_DIAG_MESSAGE_HINT ),
+ e.result.error_field( PG::Result::PG_DIAG_STATEMENT_POSITION ),
+ e.result.error_field( PG::Result::PG_DIAG_INTERNAL_POSITION ),
+ e.result.error_field( PG::Result::PG_DIAG_INTERNAL_QUERY ),
+ e.result.error_field( PG::Result::PG_DIAG_CONTEXT ),
+ e.result.error_field( PG::Result::PG_DIAG_SOURCE_FILE ),
+ e.result.error_field( PG::Result::PG_DIAG_SOURCE_LINE ),
+ e.result.error_field( PG::Result::PG_DIAG_SOURCE_FUNCTION ),
+ ]
+ p err
+ if err[2] =~/relation "\S+?" does not exist/ \
+ or err.inspect =~/relation "\S+?" does not exist/
+ SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:yellow).mark(
+ "\n" \
+ + err[2] + "\n" \
+ + 'Attempting to initialize db' + "\n" \
+ + 'Creating db tables'
+ )
+ sdb={
+ create: SiSU_DbDBI::Create.new(@opt,@conn,@file_maint,@sql_type),
+ index: SiSU_DbDBI::Index.new(@opt,@conn,@file_maint,@sql_type),
+ }
+ db_action(sdb).create
+ retry
+ end
+ end
+ end
+ if not file_exist
+ t_d=[] # transaction_data
+ t_d << db_import_metadata
+ t_d << db_import_documents(@ao_array)
+ t_d << db_import_urls(@ao_array,@fnc) #import OID on/off
+ t_d=t_d.flatten
+ if (@opt.act[:verbose_plus][:set]==:on \
+ || @opt.act[:maintenance][:set]==:on)
+ puts @conn.class if defined? @conn.class
+ puts @conn.driver_name if defined? @conn.driver_name
+ puts @conn.driver if defined? @conn.driver
+ end
+ begin #% sql
+ if @sql_type==:sqlite
+ @conn.transaction do |conn|
+ t_d.each do |sql|
+ conn.execute(sql)
+ end
+ end
+ #also 'execute' works for sqlite
+ #@conn.execute("BEGIN")
+ # t_d.each do |sql|
+ # @conn.execute(sql)
+ # end
+ #@conn.execute("COMMIT")
+ else
+ #'do' works for postgresql
+ @conn.exec("BEGIN")
+ t_d.each do |sql|
+ @conn.exec(sql)
+ end
+ @conn.exec("COMMIT")
+ end
+ rescue
+ SiSU_Errors::Rescued.new($!,$@,@opt.selections.str,@opt.fns).location do
+ __LINE__.to_s + ':' + __FILE__
+ end
+ sqlfn="#{@env.processing_path.sql}/#{@md.fnb}.sql"
+ sql=File.new(sqlfn,'w')
+ t_d.each {|i| sql.puts i}
+ p sqlfn
+ if @opt.act[:maintenance][:set]==:on
+ puts sql
+ p @conn.methods.sort
+ puts "#{__FILE__}:#{__LINE__}"
+ end
+ ensure
+ end
+ else
+ if file_exist
+ @db=SiSU_Env::InfoDb.new
+ puts "\nfile #{@opt.fns} in language code #{@opt.lng} already exists in database #{@db.psql.db}, use --update instead?"
+ end
+ end
+ end
+ def pf_db_import_transaction_open
+ end
+ def pf_db_import_transaction_close
+ end
+ def book_idx_hash_to_str(book_idx)
+ book_idx=book_idx ? book_idx : ''
+ book_idx_str,book_subidx_part='',''
+ if not book_idx.empty?
+ book_idx_str=''
+ book_idx.each_pair do |k0,v0|
+ book_idx_str << %{#{k0}+#{v0[:plus]}}
+ book_subidx_part=''
+ if v0[:sub].length > 0
+ v0[:sub].each do |subterms|
+ subterms.each_pair do |k1,v1|
+ book_subidx_part << %{\n #{k1}+#{v1[:plus]} | }
+ end
+ end
+ book_idx_str=book_idx_str + ':' + book_subidx_part
+ end
+ end
+ end
+ book_idx_str
+ end
+ def db_import_metadata #% import documents - populate database
+ if (@opt.act[:verbose][:set]==:on \
+ || @opt.act[:verbose_plus][:set]==:on \
+ || @opt.act[:maintenance][:set]==:on)
+ print %{ #{@cX.grey}import documents dbi_unit #{@cX.off} }
+ end
+ @tp={}
+ @md=SiSU_Param::Parameters.new(@opt).get
+#% sisutxt & fulltxt
+ if FileTest.exist?(@md.fns)
+ txt_arr=IO.readlines(@md.fns,'')
+ src=txt_arr.join("\n")
+ src=special_character_escape(src)
+ @tp[:sisutxt_f],@tp[:sisutxt_i]='sisutxt, ',"'#{src}', "
+ txt=clean_searchable_text_from_document_source(txt_arr)
+ #txt=special_character_escape(txt)
+ @tp[:fulltxt_f],@tp[:fulltxt_i]='fulltxt, ',"'#{txt}', "
+ end
+#% title
+ if defined? @md.title.full \
+ and @md.title.full=~/\S+/ # DublinCore 1 - title
+ #@tp[:title]=@md.title.full
+ #special_character_escape(@tp[:title])
+ #@tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', "
+ sql='SELECT MAX(tid) FROM metadata_and_text;'
+ begin
+ @@id_t ||=0
+ id_t=@driver_sqlite3 \
+ ? @conn.execute( sql ).join.to_i # { |x| id_t=x.join.to_i }
+ : @conn.exec( sql ).getvalue(0,0).to_i
+ @@id_t=id_t if id_t
+ rescue
+ if @opt.act[:maintenance][:set]==:on
+ puts "#{__FILE__} #{__LINE__}"
+ end
+ end
+ @@id_t+=1 #bug related, needs to be performed once at start of file, but consider moving, as, placed here it means program will fail if document header lacks @title:
+ if (@opt.act[:verbose][:set]==:on \
+ || @opt.act[:verbose_plus][:set]==:on \
+ || @opt.act[:maintenance][:set]==:on)
+ puts %{\n#{@cX.grey}Processing file number#{@cX.off}: #{@cX.green}#{@@id_t}#{@@cX.off}}
+ end
+ end
+ ################ CLEAR ##############
+ SiSU_DbDBI::Test.new(self,@opt).verify #% import title names, filenames (tuple)
+ t=SiSU_DbTuple::LoadMetadata.new(@conn,@@id_t,@md,@file_maint)
+ tuple=t.tuple
+ tuple
+ end
+ def db_import_documents(ao_array) #% import documents - populate main database table, import into substantive database tables (tuple)
+ begin
+ @col[:tid]=@@id_t
+ @en,@en_ast,@en_pls,@tuple_array=[],[],[],[]
+ @col[:en_a],@col[:en_z]=nil,nil
+ ao_array.each do |data|
+ data.obj=data.obj.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1').
+ gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1').
+ gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1').
+ gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1').
+ gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1').
+ gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1').
+ gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1').
+ gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1').
+ gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1').
+ gsub(/#{Mx[:gl_o]}(●)#{Mx[:gl_c]}\s*/,'\1 ').
+ gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check
+ @col[:seg]=@@seg
+ if data.of ==:para \
+ || data.of ==:heading \
+ || data.of ==:heading_insert \
+ || data.of ==:block \
+ || data.of ==:group # regular text what of code-blocks grouped text etc.
+ notedata=data.obj.dup
+ #% :headings
+ if data.is==:heading \
+ && (data.ln.inspect=~/[0-3]/)
+ (
+ @col[:lev],
+ txt,@col[:ocn],
+ @col[:lev_an],
+ @col[:ocnd],@col[:ocns],
+ @col[:t_of],@col[:t_is],
+ @col[:node],@col[:parent],
+ @col[:digest_clean],@col[:digest_all]=
+ data.ln,
+ data.obj,data.ocn,
+ data.lv,
+ data.odv,data.osp,
+ data.of,data.is,
+ data.node,data.parent,
+ '',''
+ )
+ @col[:lid]+=1
+ txt=endnotes(txt).extract_any
+ body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_minus
+ @col[:body]=clean_document_objects_body(body)
+ plaintext=@col[:body].dup
+ plaintext=strip_markup(plaintext)
+ @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
+ book_idx=book_idx_hash_to_str(data.idx)
+ @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx)
+ if @en[0] then @en_a,@en_z=@en[0].first,@en[0].last
+ end
+ if @en_ast[0] then @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last
+ end
+ if @en_pls[0] then @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last
+ end
+ t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint)
+ @tuple_array << t.tuple
+ case @col[:lev]
+ when /0/ then @col[:lv0]+=1
+ when /1/ then @col[:lv1]+=1
+ when /2/ then @col[:lv2]+=1
+ when /3/ then @col[:lv3]+=1
+ when /4/ then @col[:lv4]+=1
+ end
+ @col[:lev]=@col[:plaintext]=@col[:body]=''
+ elsif data.is==:heading \
+ && data.ln==4
+ (
+ @@seg,txt,
+ @col[:ocn],@col[:lev_an],
+ @col[:ocnd],@col[:ocns],
+ @col[:t_of],@col[:t_is],
+ @col[:node],@col[:parent],
+ @col[:digest_clean],@col[:digest_all]=
+ data.name,data.obj,
+ data.ocn,data.lv,
+ data.odv,data.osp,
+ data.of,data.is,
+ data.node,data.parent,
+ '',''
+ )
+ @col[:seg]=@@seg
+ @col[:lv4]+=1
+ @col[:lid]+=1
+ @col[:lev]=4
+ @hname=if @col[:seg] \
+ and not @col[:seg].to_s.empty?
+ @@hname=@col[:seg].to_s
+ else @@hname
+ end
+ @env=SiSU_Env::InfoEnv.new(@md.fns)
+ @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
+ txt=endnotes(txt).extract_any
+ body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
+ @col[:body]=clean_document_objects_body(body)
+ plaintext=@col[:body].dup
+ plaintext=strip_markup(plaintext)
+ @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
+ book_idx=book_idx_hash_to_str(data.idx)
+ @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx)
+ @en_a,@en_z=@en[0].first,@en[0].last if @en[0]
+ @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
+ @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
+ t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint)
+ @tuple_array << t.tuple
+ @col[:lev]=@col[:plaintext]=@col[:body]=''
+ elsif data.is==:heading \
+ && data.ln==5
+ (
+ txt,
+ @col[:ocn],@col[:lev_an],
+ @col[:ocnd],@col[:ocns],
+ @col[:t_of],@col[:t_is],
+ @col[:node],@col[:parent],
+ @col[:digest_clean],@col[:digest_all]=
+ data.obj,
+ data.ocn,data.lv,
+ data.odv,data.osp,
+ data.of,data.is,
+ data.node,data.parent,
+ '',''
+ )
+ @@seg_full=data.name if data.is==:heading \
+ && data.ln==5 \
+ && data.name #check data.name
+ @@seg ||='' #nil # watch
+ @col[:seg]=@@seg
+ @col[:lv5]+=1
+ @col[:lid]+=1
+ @col[:lev]=5
+ @hname=if @col[:seg] \
+ and not @col[:seg].to_s.empty?
+ @@hname=@col[:seg].to_s
+ else @@hname
+ end
+ @env=SiSU_Env::InfoEnv.new(@md.fns)
+ @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
+ txt=endnotes(txt).extract_any
+ body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
+ @col[:body]=clean_document_objects_body(body)
+ plaintext=@col[:body].dup
+ plaintext=strip_markup(plaintext)
+ @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
+ book_idx=book_idx_hash_to_str(data.idx)
+ @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx)
+ @en_a,@en_z=@en[0].first,@en[0].last if @en[0]
+ @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
+ @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
+ t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint)
+ @tuple_array << t.tuple
+ @col[:lev]=@col[:plaintext]=@col[:body]=''
+ elsif data.is==:heading \
+ && data.ln==6
+ txt, @col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=
+ data.obj,data.ocn, data.lv, data.odv, data.osp, data.of, data.is, data.node, data.parent, '', ''
+ @@seg_full=data.name if data.is==:heading && data.ln==6 && data.name #check data.name
+ @@seg ||='' #nil # watch
+ @col[:seg]=@@seg
+ @col[:lv6]+=1
+ @col[:lid]+=1
+ @col[:lev]=6
+ @hname=if @col[:seg] \
+ and not @col[:seg].to_s.empty?
+ @@hname=@col[:seg].to_s
+ else @@hname
+ end
+ @env=SiSU_Env::InfoEnv.new(@md.fns)
+ @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
+ txt=endnotes(txt).extract_any
+ body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
+ @col[:body]=clean_document_objects_body(body)
+ plaintext=@col[:body].dup
+ plaintext=strip_markup(plaintext)
+ @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
+ book_idx=book_idx_hash_to_str(data.idx)
+ @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx)
+ @en_a,@en_z=@en[0].first,@en[0].last if @en[0]
+ @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
+ @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
+ t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint)
+ @tuple_array << t.tuple
+ @col[:lev]=@col[:plaintext]=@col[:body]=''
+ elsif data.is==:heading \
+ && data.ln==7
+ txt, @col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=
+ data.obj,data.ocn, data.lv, data.odv, data.osp, data.of, data.is, data.node, data.parent, '', ''
+ @@seg_full=data.name if data.is==:heading && data.ln==7 && data.name #check data.name
+ @@seg ||='' #nil # watch
+ @col[:seg]=@@seg
+ @col[:lv7]+=1
+ @col[:lid]+=1
+ @col[:lev]=7
+ @hname=if @col[:seg] \
+ and not @col[:seg].to_s.empty?
+ @@hname=@col[:seg].to_s
+ else @@hname
+ end
+ @env=SiSU_Env::InfoEnv.new(@md.fns)
+ @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
+ txt=endnotes(txt).extract_any
+ body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
+ @col[:body]=clean_document_objects_body(body)
+ plaintext=@col[:body].dup
+ plaintext=strip_markup(plaintext)
+ @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
+ book_idx=book_idx_hash_to_str(data.idx)
+ @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx)
+ @en_a,@en_z=@en[0].first,@en[0].last if @en[0]
+ @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
+ @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
+ t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint)
+ @tuple_array << t.tuple
+ @col[:lev]=@col[:plaintext]=@col[:body]=''
+ #% :structure :layout :comment
+ elsif data.of==:structure \
+ || data.of==:layout \
+ || data.of==:comment
+ #added watch
+ #% :
+ else #% regular text
+ @col[:lid]+=1
+ (
+ txt=''
+ txt,@col[:ocn],
+ @col[:ocnd],@col[:ocns],
+ @col[:t_of],@col[:t_is],
+ @col[:node],@col[:parent],
+ @col[:digest_clean],@col[:digest_all],
+ @col[:lev]=
+ data.obj,data.ocn,
+ data.odv,data.osp,
+ data.of,data.is,
+ '',data.parent,
+ '','',
+ 9
+ )
+ @hname=if @col[:seg] \
+ and not @col[:seg].to_s.empty?
+ @@hname=@col[:seg].to_s
+ else @@hname
+ end
+ @env=SiSU_Env::InfoEnv.new(@md.fns)
+ @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
+ txt=endnotes(txt).extract_any
+ if @sql_type==:pg \
+ and txt.size > (SiSU_DbColumns::ColumnSize.new.document_clean - 1) # examine pg build & remove limitation
+ puts "\n\nTOO LARGE (TXT - see error log)\n\n"
+ open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
+ error.puts("\n#{@opt.fns}\nTEXT BODY\n#{@col[:body].size} object #{@col[:ocn]} -> #{@col[:body].slice(0..500)}")
+ end
+ txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
+ end
+ @en_a,@en_z=@en[0].first,@en[0].last if @en[0]
+ @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
+ @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
+ body=if data.is==:table
+ SiSU_FormatShared::CSS_Format.new(@md,data).html_table
+ elsif data.is==:code
+ SiSU_FormatShared::CSS_Format.new(@md,data).code
+ elsif defined? data.indent \
+ and defined? data.hang \
+ and data.indent =~/[1-9]/ \
+ and data.indent == data.hang
+ SiSU_FormatShared::CSS_Format.new(@md,data).indent(data.indent)
+ elsif defined? data.indent \
+ and defined? data.hang \
+ and data.hang =~/[0-9]/ \
+ and data.indent != data.hang
+ SiSU_FormatShared::CSS_Format.new(@md,data).hang_indent(data.hang,data.indent)
+ else
+ SiSU_FormatShared::CSS_Format.new(@md,data).norm
+ end
+ @col[:body]=clean_document_objects_body(body)
+ plaintext=@col[:body].dup
+ plaintext=strip_markup(plaintext)
+ @col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
+ book_idx=book_idx_hash_to_str(data.idx)
+ @col[:book_idx]=clean_searchable_text_from_document_objects(book_idx)
+ t=SiSU_DbTuple::LoadDocuments.new(@conn,@col,@opt,@file_maint)
+ @tuple_array << t.tuple
+ @en,@en_ast,@en_pls=[],[],[]
+ @col[:en_a]=@col[:en_z]=nil
+ @col[:lev]=@col[:plaintext]=@col[:body]=@col[:words]=''
+ end
+ if notedata =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ #% import into database endnotes tables
+ endnote_array=notedata.scan(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/)
+ endnote_array.each do |inf|
+ if inf[/#{Mx[:en_a_o]}\d+.+?#{Mx[:en_a_c]}/]
+ if inf[/#{Mx[:en_a_o]}(\d+)(.+?)#{Mx[:en_a_c]}/]
+ nr,txt,digest_clean=$1,$2.strip,0
+ end
+ @id_n ||=0
+ @id_n+=1
+ txt=special_character_escape(txt)
+ body=SiSU_FormatShared::CSS_Format.new(@md,data).endnote(nr,txt)
+ txt=strip_markup(txt)
+ if txt.size > (SiSU_DbColumns::ColumnSize.new.endnote_clean - 1)
+ puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
+ open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
+ error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}")
+ end
+ txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
+ end
+ if txt
+ en={
+ type: 'endnotes',
+ id: @id_n,
+ lid: @col[:lid],
+ nr: nr,
+ txt: txt,
+ body: body,
+ ocn: @col[:ocn],
+ ocnd: @col[:ocnd],
+ ocns: @col[:ocns],
+ id_t: @@id_t,
+ hash: digest_clean
+ }
+ t=SiSU_DbTuple::LoadEndnotes.new(@conn,en,@opt,@file_maint)
+ @tuple_array << t.tuple
+ end
+ end
+ end
+ word_mode=notedata.scan(/\S+/)
+ end
+ if notedata =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ #% import into database endnotes tables
+ endnote_array=notedata.scan(/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/)
+ endnote_array.each do |inf|
+ if inf[/#{Mx[:en_b_o]}\*\d+.+?#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1
+ if inf[/#{Mx[:en_b_o]}[*](\d+)(.+?)#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1
+ nr,txt,digest_clean=$1,$2.strip,0
+ end
+ @id_n+=1
+ txt=special_character_escape(txt)
+ body=SiSU_FormatShared::CSS_Format.new(@md,data).endnote(nr,txt)
+ txt=strip_markup(txt)
+ if txt.size > (SiSU_DbColumns::ColumnSize.new.endnote_clean - 1)
+ puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
+ open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
+ error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}")
+ end
+ txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
+ end
+ if txt
+ en={
+ type: 'endnotes_asterisk',
+ id: @id_n,
+ lid: @col[:lid],
+ nr: nr,
+ txt: txt,
+ body: body,
+ ocn: @col[:ocn],
+ ocnd: @col[:ocnd],
+ ocns: @col[:ocns],
+ id_t: @@id_t,
+ hash: digest_clean
+ }
+ t=SiSU_DbTuple::LoadEndnotes.new(@conn,en,@opt,@file_maint)
+ @tuple_array << t.tuple
+ end
+ end
+ end
+ word_mode=notedata.scan(/\S+/)
+ end
+ if notedata =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ #% import into database endnotes tables
+ endnote_array=notedata.scan(/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/)
+ endnote_array.each do |inf|
+ if inf[/#{Mx[:en_b_o]}\+\d+.+?#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1
+ if inf[/#{Mx[:en_b_o]}[+](\d+)(.+?)#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1
+ nr,txt,digest_clean=$1,$2.strip,0
+ end
+ @id_n+=1
+ txt=special_character_escape(txt)
+ body=SiSU_FormatShared::CSS_Format.new(@md,data).endnote(nr,txt)
+ txt=strip_markup(txt)
+ if txt.size > (SiSU_DbColumns::ColumnSize.new.endnote_clean - 1)
+ puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
+ open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
+ error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}")
+ end
+ txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
+ end
+ if txt
+ en={
+ type: 'endnotes_plus',
+ id: @id_n,
+ lid: @col[:lid],
+ nr: nr,
+ txt: txt,
+ body: body,
+ ocn: @col[:ocn],
+ ocnd: @col[:ocnd],
+ ocns: @col[:ocns],
+ id_t: @@id_t,
+ hash: digest_clean
+ }
+ t=SiSU_DbTuple::LoadEndnotes.new(@conn,en,@opt,@file_maint)
+ @tuple_array << t.tuple
+ end
+ end
+ end
+ word_mode=notedata.scan(/\S+/)
+ end
+ end
+ end
+ rescue
+ SiSU_Errors::Rescued.new($!,$@,@opt.selections.str,@opt.fns).location do
+ __LINE__.to_s + ':' + __FILE__
+ end
+ ensure
+ end
+ @tuple_array
+ end
+ def endnotes(txt)
+ @txt=txt
+ def extract_any
+ if @txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/
+ endnotes(@txt).range
+ @en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/
+ @en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/
+ @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/
+ @txt=endnotes(@txt).clean_text
+ end
+ @txt
+ end
+ def standard
+ (@txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) \
+ ? @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/)
+ : nil
+ end
+ def asterisk
+ (@txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) \
+ ? @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/)
+ : nil
+ end
+ def plus
+ (@txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) \
+ ? @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/)
+ : nil
+ end
+ def clean_text(base_url=nil)
+ @txt=if base_url
+ @txt.gsub(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,%{<sup><a href="#{base_url}#_\\1" name="-\\1">\\1</a></sup>}).
+ gsub(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,%{<sup><a href="#{base_url}#_\\1" name="-\\1">\\1</a></sup>}).
+ gsub(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,%{<sup><a href="#{base_url}#_\\1" name="-\\1">\\1</a></sup>})
+ else
+ @txt.gsub(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,'<sup>\1</sup>').
+ gsub(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,'<sup>\1</sup>').
+ gsub(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,'<sup>\1</sup>')
+ end
+ @txt
+ end
+ def range
+ @col[:en_a]=@col[:en_z]=nil
+ if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}|#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/
+ word_array=@txt.scan(/\S+/)
+ word_array.each do |w|
+ if w[/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/] # not tested since change 2003w31
+ @col[:en_a]=$1 unless @col[:en_a]
+ @col[:en_z]=@col[:en_a].dup unless @col[:en_a]
+ @col[:en_z]=$1 if @col[:en_a]
+ end
+ end
+ end
+ @col
+ end
+ self
+ end
+ def db_import_urls(dbi_unit,content) #% import documents OID - populate database
+ begin
+ @fnc=content
+ @env=SiSU_Env::InfoEnv.new(@opt.fns)
+ f,u={},{}
+ if @fnb.empty? \
+ or @fnb.nil?
+ p 'file output path error' #remove
+ end
+ if FileTest.file?("#{@md.file.output_path.txt.dir}/#{@md.file.base_filename.txt}")==true
+ f[:txt],u[:txt]='plaintext,', "'#{@md.file.output_path.txt.url}/#{@md.file.base_filename.txt}',"
+ end
+ if FileTest.file?("#{@md.file.output_path.html_seg.dir}/#{@md.file.base_filename.html_segtoc}")==true
+ f[:html_toc],u[:html_toc]='html_toc,', "'#{@md.file.output_path.html_seg.url}/#{@md.file.base_filename.html_segtoc}',"
+ end
+ if FileTest.file?("#{@md.file.output_path.html_scroll.dir}/#{@md.file.base_filename.html_scroll}")==true
+ f[:html_doc],u[:html_doc]='html_doc,', "'#{@md.file.output_path.html_scroll.url}/#{@md.file.base_filename.html_scroll}',"
+ end
+ if FileTest.file?("#{@md.file.output_path.xhtml.dir}/#{@md.file.base_filename.xhtml}")==true
+ f[:xhtml],u[:xhtml]='xhtml,', "'#{@md.file.output_path.xhtml.url}/#{@md.file.base_filename.xhtml}',"
+ end
+ if FileTest.file?("#{@md.file.output_path.xml_sax.dir}/#{@md.file.base_filename.xml_sax}")==true
+ f[:xml_sax],u[:xml_sax]='xml_sax,', "'#{@md.file.output_path.xml_sax.url}/#{@md.file.base_filename.xml_sax}',"
+ end
+ if FileTest.file?("#{@md.file.output_path.xml_dom.dir}/#{@md.file.base_filename.xml_dom}")==true
+ f[:xml_dom],u[:xml_dom]='xml_dom,', "'#{@md.file.output_path.xml_dom.url}/#{@md.file.base_filename.xml_dom}',"
+ end
+ if FileTest.file?("#{@md.file.output_path.epub.dir}/#{@md.file.base_filename.epub}")==true
+ f[:epub],u[:epub]='epub,', "'#{@md.file.output_path.epub.url}/#{@md.file.base_filename.epub}',"
+ end
+ if FileTest.file?("#{@md.file.output_path.odt.dir}/#{@md.file.base_filename.odt}")==true
+ f[:odf],u[:odf]='odf,', "'#{@md.file.output_path.odt.url}/#{@md.file.base_filename.odt}',"
+ end
+ if FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_p_a4}")==true #\
+ #or FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_p_letter}")==true
+ f[:pdf_p],u[:pdf_p]='pdf_p,', "'#{@md.file.output_path.pdf.url}/#{@pdf_fn.pdf_p_a4}',"
+ end
+ if FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_l_a4}")==true #\
+ #or FileTest.file?("#{@md.file.output_path.pdf.dir}/#{@pdf_fn.pdf_l_letter}")==true
+ f[:pdf_l],u[:pdf_l]='pdf_l,', "'#{@md.file.output_path.pdf.url}/#{@pdf_fn.pdf_l_a4}',"
+ end
+ if FileTest.file?("#{@md.file.output_path.html_concordance.dir}/#{@md.file.base_filename.html_concordance}")==true
+ f[:concordance],u[:concordance]='concordance,', "'#{@md.file.output_path.html_concordance.url}/#{@md.file.base_filename.html_concordance}',"
+ end
+ #if FileTest.file?("#{@md.file.output_path.x.dir}/#{@md.file.base_filename.x}")==true
+ # f[:latex_p],u[:latex_p]='latex_p,', "'#{@md.file.output_path.x.url}/#{@md.file.base_filename.x}',"
+ #end
+ ##if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.tex")==true
+ ## f[:latex_p],u[:latex_p]='latex_p,', "'#{base}/#{@fnb}/#{@opt.fns}.tex',"
+ ##end
+ #if FileTest.file?("#{@md.file.output_path.x.dir}/#{@md.file.base_filename.x}")==true
+ # f[:latex_l],u[:latex_l]='latex_l,', "'#{@md.file.output_path.x.url}/#{@md.file.base_filename.x}',"
+ #end
+ ##if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.landscape.tex")==true
+ ## f[:latex_l],u[:latex_l]='latex_l,', "'#{base}/#{@fnb}/#{@opt}.fns}.landscape.tex',"
+ ##end
+ if FileTest.file?("#{@md.file.output_path.digest.dir}/#{@md.file.base_filename.digest}")==true
+ f[:digest],u[:digest]='digest,', "'#{@md.file.output_path.digest.url}/#{@md.file.base_filename.digest}',"
+ end
+ if FileTest.file?("#{@md.file.output_path.manifest.dir}/#{@md.file.base_filename.manifest}")==true #revisit, was to be text, this is html
+ f[:manifest],u[:manifest]='manifest,', "'#{@md.file.output_path.manifest.url}/#{@md.file.base_filename.manifest}',"
+ end
+ if FileTest.file?("#{@md.file.output_path.src.dir}/#{@md.file.base_filename.src}")==true
+ f[:markup],u[:markup]='markup,', "'#{@md.file.output_path.src.url}/#{@md.file.base_filename.src}',"
+ end
+ if FileTest.file?("#{@md.file.output_path.sisupod.dir}/#{@md.file.base_filename.sisupod}")==true
+ f[:sisupod],u[:sisupod]='sisupod,', "'#{@md.file.output_path.sisupod.url}/#{@md.file.base_filename.sisupod}',"
+ end
+ t=SiSU_DbTuple::LoadUrls.new(@conn,f,u,@@id_t,@opt,@file_maint)
+ tuple=t.tuple
+ rescue
+ SiSU_Errors::Rescued.new($!,$@,@opt.selections.str,@opt.fns).location do
+ __LINE__.to_s + ':' + __FILE__
+ end
+ ensure
+ end
+ tuple
+ end
+ end
+end
+__END__