aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v2/db_import.rb
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2012-10-03 00:11:08 -0400
committerRalph Amissah <ralph@amissah.com>2012-10-03 00:11:08 -0400
commit804a103722aa7731ca7f2062ee2ebf533607e6aa (patch)
treea480caebb78925848807692c57c017b3ae5e6839 /lib/sisu/v2/db_import.rb
parentv3: 3.3.3 version & changelog, dates touched (diff)
v4: 4.0.0 new branch & version & changelog "opened"
Diffstat (limited to 'lib/sisu/v2/db_import.rb')
-rw-r--r--lib/sisu/v2/db_import.rb649
1 files changed, 0 insertions, 649 deletions
diff --git a/lib/sisu/v2/db_import.rb b/lib/sisu/v2/db_import.rb
deleted file mode 100644
index 4237757f..00000000
--- a/lib/sisu/v2/db_import.rb
+++ /dev/null
@@ -1,649 +0,0 @@
-# coding: utf-8
-=begin
-
- * Name: SiSU
-
- * Description: a framework for document structuring, publishing and search
-
- * Author: Ralph Amissah
-
- * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved.
-
- * License: GPL 3 or later:
-
- SiSU, a framework for document structuring, publishing and search
-
- Copyright (C) Ralph Amissah
-
- This program is free software: you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the Free
- Software Foundation, either version 3 of the License, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program. If not, see <http://www.gnu.org/licenses/>.
-
- If you have Internet connection, the latest version of the GPL should be
- available at these locations:
- <http://www.fsf.org/licensing/licenses/gpl.html>
- <http://www.gnu.org/licenses/gpl.html>
-
- <http://www.jus.uio.no/sisu/gpl.fsf/toc.html>
- <http://www.jus.uio.no/sisu/gpl.fsf/doc.html>
- <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt>
-
- * SiSU uses:
- * Standard SiSU markup syntax,
- * Standard SiSU meta-markup syntax, and the
- * Standard SiSU object citation numbering and system
-
- * Hompages:
- <http://www.jus.uio.no/sisu>
- <http://www.sisudoc.org>
-
- * Download:
- <http://www.jus.uio.no/sisu/SiSU/download.html>
-
- * Ralph Amissah
- <ralph@amissah.com>
- <ralph.amissah@gmail.com>
-
- ** Description: modules shared by the different db types, dbi, postgresql,
- sqlite
-
-=end
-module SiSU_DB_import
- require "#{SiSU_lib}/db_columns" # db_columns.rb
- require "#{SiSU_lib}/db_load_tuple" # db_load_tuple.rb
- require "#{SiSU_lib}/db_sqltxt" # db_sqltxt.rb
- require "#{SiSU_lib}/shared_html_lite" # shared_html_lite.rb
- require 'sqlite3'
- class Import < SiSU_DB_text::Prepare
- include SiSU_Param
- include SiSU_Screen
- @@dl=nil
- @@hname=nil
- attr_accessor :tp
- def initialize(opt,conn,file,sql_type='pg')
- @opt,@conn,@file,@sql_type=opt,conn,file,sql_type
- @cX=SiSU_Screen::Ansi.new(@opt.cmd).cX
- @env=SiSU_Env::Info_env.new(@opt.fns)
- @dal="#{@env.path.dal}"
- if @opt.fns.empty? or @opt.cmd.empty?; @fnb=''
- else
- @md=SiSU_Param::Parameters.new(@opt).get
- @fnb=@md.fnb
- end
- @suffix=@opt.fns[/(?:.+?)(?:\.ssm\.sst|\.-?sst)/,1]
- @fnc="#{@dal}/#{@opt.fns}.content.rbm"
- @@seg,@@seg_full='','' #create? consider placing field just before clean text as opposed to seg which contains seg(.html) name info seg_full would contain seg info for levels 5 & 6 where available eg seg_full may be 7.3 (level 5) and 7.3.1 (level 6) where seg is 7
- @col=Hash.new('')
- @col[:ocn]=''
- @counter={}
- @db=SiSU_Env::Info_db.new
- if @sql_type=='sqlite'
- @driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) \
- ? true \
- : false
- end
- sql='SELECT MAX(lid) FROM doc_objects'
- begin
- @col[:lid] ||=0
- @col[:lid]=@driver_sqlite3 \
- ? @conn.execute( sql ).join.to_i \
- : @conn.execute( sql ) { |x| x.fetch_all.to_s.to_i }
- rescue
- puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/
- end
- @col[:lid]=0 if @col[:lid].nil? or @col[:lid].to_s.empty?
- sql='SELECT MAX(nid) FROM endnotes'
- begin
- @id_n ||=0
- @id_n=@driver_sqlite3 \
- ? @conn.execute( sql ).join.to_i \
- : @id_n=@conn.execute( sql ) { |x| x.fetch_all.to_s.to_i }
- rescue
- puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/
- end
- @id_n =0 if @col[:lid].nil? or @col[:lid].to_s.empty?
- @col[:lv1]=@col[:lv2]=@col[:lv3]=@col[:lv4]=@col[:lv5]=@col[:lv6]=0
- @db=SiSU_Env::Info_db.new
- @@dl ||=SiSU_Env::Info_env.new.digest.length
- end
- def marshal_load
- require "#{SiSU_lib}/dal" # dal.rb
- @dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here
- SiSU_Screen::Ansi.new(@opt.cmd,"#{@db.psql.db}::#{@opt.fns}").puts_blue if @opt.cmd =~/vVM/
- SiSU_Screen::Ansi.new(@opt.cmd,'Marshal Load',@fnc).puts_grey if @opt.cmd =~/v/
- select_first_match=%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.src_filename = '#{@opt.fns}'; }
- file_exist=@sql_type=~/sqlite/ \
- ? @conn.get_first_value(select_first_match) \
- : @conn.select_one(select_first_match)
- if not file_exist
- t_d=[] # transaction_data
- t_d << db_import_metadata
- t_d << db_import_documents(@dal_array)
- t_d << db_import_urls(@dal_array,@fnc) #import OID on/off
- t_d=t_d.flatten
- if @opt.cmd =~/[MV]/
- puts @conn.class if defined? @conn.class
- puts @conn.driver_name if defined? @conn.driver_name
- puts @conn.driver if defined? @conn.driver
- end
- begin
- sql=''
- if @sql_type=~/sqlite/
- @conn.transaction do |conn|
- t_d.each do |sql|
- conn.execute(sql)
- end
- end
- #also 'execute' works for sqlite
- #@conn.execute("BEGIN")
- # t_d.each do |sql|
- # @conn.execute(sql)
- # end
- #@conn.execute("COMMIT")
- else
- #'do' works for postgresql
- @conn.do("BEGIN")
- t_d.each do |sql|
- @conn.do(sql)
- end
- @conn.do("COMMIT")
- end
- rescue DBI::DatabaseError => e
- puts "Error code: #{e.err}"
- puts "Error message: #{e.errstr}"
- puts "Error SQLSTATE: #{e.state}"
- SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error
- sqlfn="#{@env.path.sql}/#{@md.fnb}.sql"
- sql=File.new(sqlfn,'w')
- t_d.each {|i| sql.puts i}
- p sqlfn
- if @opt.cmd =~/M/
- puts sql
- p @conn.methods.sort
- puts "#{__FILE__}:#{__LINE__}"
- end
- rescue
- SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error
- sqlfn="#{@env.path.sql}/#{@md.fnb}.sql"
- sql=File.new(sqlfn,'w')
- t_d.each {|i| sql.puts i}
- p sqlfn
- if @opt.cmd =~/M/
- puts sql
- p @conn.methods.sort
- puts "#{__FILE__}:#{__LINE__}"
- end
- ensure
- end
- else
- if file_exist
- @db=SiSU_Env::Info_db.new
- puts "\n#{@cX.grey}file #{@cX.off} #{@cX.blue}#{@opt.fns}#{@cX.off} #{@cX.grey}already exists in database#{@cX.off} #{@cX.blue}#{@db.psql.db}#{@cX.off} #{@cX.brown}update instead?#{@cX.off}"
- end
- end
- end
- def pf_db_import_transaction_open
- end
- def pf_db_import_transaction_close
- end
- def db_import_metadata #% import documents - populate database
- print %{ #{@cX.grey}import documents dbi_unit #{@cX.off} } if @opt.cmd =~/vVM/
- @tp={}
- @md=SiSU_Param::Parameters.new(@opt).get
-#% sisutxt & fulltxt
- if FileTest.exist?(@md.fns)
- txt_arr=IO.readlines(@md.fns,'')
- src=txt_arr.join("\n")
- src=special_character_escape(src)
- @tp[:sisutxt_f],@tp[:sisutxt_i]='sisutxt, ',"'#{src}', "
- txt=clean_searchable_text(txt_arr)
- #special_character_escape(txt)
- @tp[:fulltxt_f],@tp[:fulltxt_i]='fulltxt, ',"'#{txt}', "
- end
-#% title
- if defined? @md.title.full \
- and @md.title.full=~/\S+/ # DublinCore 1 - title
- #@tp[:title]=@md.title.full
- #special_character_escape(@tp[:title])
- #@tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', "
- sql='SELECT MAX(tid) FROM metadata_and_text'
- begin
- @@id_t ||=0
- id_t=if @driver_sqlite3
- @conn.execute( sql ).join.to_i # { |x| id_t=x.join.to_i }
- else @conn.execute( sql ) { |x| x.fetch_all.to_s.to_i }
- end
- @@id_t=id_t if id_t
- rescue
- puts "#{__FILE__} #{__LINE__}" if @opt.cmd =~/M/
- end
- @@id_t =0 if @col[:lid].nil? or @col[:lid].to_s.empty?
- @@id_t+=1 #bug related, needs to be performed once at start of file, but consider moving, as, placed here it means program will fail if document header lacks @title:
- puts %{\n#{@cX.grey}Processing file number#{@cX.off}: #{@cX.green}#{@@id_t}#{@@cX.off}} if @opt.cmd =~/vVM/
- end
- ################ CLEAR ##############
- SiSU_DB_DBI::Test.new(self,@opt).verify #% import title names, filenames (tuple)
- t=SiSU_DB_tuple::Load_metadata.new(@conn,@@id_t,@md,@file)
- tuple=t.tuple
- tuple
- end
- def db_import_documents(dal_array) #% import documents - populate main database table, import into substantive database tables (tuple)
- begin
- @col[:tid]=@@id_t
- @en,@en_ast,@en_pls,@tuple_array=[],[],[],[]
- @col[:en_a],@col[:en_z]=nil,nil
- dal_array.each do |data|
- data.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:gl_o]}(●)#{Mx[:gl_c]}\s*/,'\1 ')
- data.obj.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check
- @col[:seg]=@@seg
- if data.of =~/para|heading|group/ # regular text what of code-blocks grouped text etc.
- notedata=data.obj.dup
- if data.is=='heading' \
- and data.ln.inspect=~/[123]/
- @col[:lev],txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.ln,data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'',''
- @col[:lid]+=1
- txt=endnotes(txt).extract_any
- @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_minus
- special_character_escape(@col[:body])
- @col[:plaintext]=@col[:body].dup
- @col[:plaintext]=strip_markup(@col[:plaintext])
- @col[:plaintext]=clean_searchable_text(@col[:plaintext])
- if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last
- end
- if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last
- end
- if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last
- end
- t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file)
- @tuple_array << t.tuple
- case @col[:lev]
- when /1/; @col[:lv1]+=1
- when /2/; @col[:lv2]+=1
- when /3/; @col[:lv3]+=1
- end
- @col[:lev]=@col[:plaintext]=@col[:body]=''
- elsif data.is=='heading' \
- and data.ln==4
- @@seg,txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.name,data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'',''
- @col[:seg]=@@seg
- @col[:lv4]+=1
- @col[:lid]+=1
- @col[:lev]=4
- @hname=if @col[:seg] \
- and not @col[:seg].to_s.empty?
- @@hname=@col[:seg].to_s
- else @@hname
- end
- @env=SiSU_Env::Info_env.new(@md.fns)
- @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
- txt=endnotes(txt).extract_any
- @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus
- special_character_escape(@col[:body])
- @col[:plaintext]=@col[:body].dup
- @col[:plaintext]=strip_markup(@col[:plaintext])
- @col[:plaintext]=clean_searchable_text(@col[:plaintext])
- @en_a,@en_z=@en[0].first,@en[0].last if @en[0]
- @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
- @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
- t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file)
- @tuple_array << t.tuple
- @col[:lev]=@col[:plaintext]=@col[:body]=''
- elsif data.is=='heading' and data.ln==5
- txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'',''
- @@seg_full=data.name if data.is=='heading' and data.ln==5 and data.name #check data.name
- @@seg ||='' #nil # watch
- @col[:seg]=@@seg
- @col[:lv5]+=1
- @col[:lid]+=1
- @col[:lev]=5
- @hname=if @col[:seg] \
- and not @col[:seg].to_s.empty?
- @@hname=@col[:seg].to_s
- else @@hname
- end
- @env=SiSU_Env::Info_env.new(@md.fns)
- @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
- txt=endnotes(txt).extract_any
- @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus
- special_character_escape(@col[:body])
- @col[:plaintext]=@col[:body].dup
- @col[:plaintext]=strip_markup(@col[:plaintext])
- @col[:plaintext]=clean_searchable_text(@col[:plaintext])
- @en_a,@en_z=@en[0].first,@en[0].last if @en[0]
- @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
- @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
- t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file)
- @tuple_array << t.tuple
- @col[:lev]=@col[:plaintext]=@col[:body]=''
- elsif data.is=='heading' and data.ln==6
- txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'',''
- @@seg_full=data.name if data.is=='heading' and data.ln==6 and data.name #check data.name
- @@seg ||='' #nil # watch
- @col[:seg]=@@seg
- @col[:lv6]+=1
- @col[:lid]+=1
- @col[:lev]=6
- @hname=if @col[:seg] \
- and not @col[:seg].to_s.empty?
- @@hname=@col[:seg].to_s
- else @@hname
- end
- @env=SiSU_Env::Info_env.new(@md.fns)
- @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
- txt=endnotes(txt).extract_any
- @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus
- special_character_escape(@col[:body])
- @col[:plaintext]=@col[:body].dup
- @col[:plaintext]=strip_markup(@col[:plaintext])
- @col[:plaintext]=clean_searchable_text(@col[:plaintext])
- @en_a,@en_z=@en[0].first,@en[0].last if @en[0]
- @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
- @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
- t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file)
- @tuple_array << t.tuple
- @col[:lev]=@col[:plaintext]=@col[:body]=''
- else #% regular text
- @col[:lid]+=1
- txt=''
- txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.obj,data.ocn,data.odv,data.osp,data.of,data.is,'',data.parent,'',''
- @hname=if @col[:seg] \
- and not @col[:seg].to_s.empty?
- @@hname=@col[:seg].to_s
- else @@hname
- end
- @env=SiSU_Env::Info_env.new(@md.fns)
- @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
- txt=endnotes(txt).extract_any
- if @sql_type=~/pg/ \
- and txt.size > (SiSU_DB_columns::Column_size.new.document_clean - 1) #% examine pg build & remove limitation
- puts "\n\nTOO LARGE (TXT - see error log)\n\n"
- open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
- error.puts("\n#{@opt.fns}\nTEXT BODY\n#{@col[:body].size} object #{@col[:ocn]} -> #{@col[:body].slice(0..500)}")
- end
- txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
- end
- @en_a,@en_z=@en[0].first,@en[0].last if @en[0]
- @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
- @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
- @col[:body]=if data.is=='table'
- SiSU_Format_Shared::CSS_Format.new(@md,data).html_table
- elsif data.is=='code'
- SiSU_Format_Shared::CSS_Format.new(@md,data).code
- elsif defined? data.indent and data.indent =~/[1-9]/
- SiSU_Format_Shared::CSS_Format.new(@md,data).indent(data.indent)
- else
- SiSU_Format_Shared::CSS_Format.new(@md,data).norm
- end
- special_character_escape(@col[:body])
- @col[:plaintext]=@col[:body].dup
- @col[:plaintext]=strip_markup(@col[:plaintext])
- @col[:plaintext]=clean_searchable_text(@col[:plaintext])
- t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file)
- @tuple_array << t.tuple
- @en,@en_ast,@en_pls=[],[],[]
- @col[:en_a]=@col[:en_z]=nil
- @col[:lev]=@col[:plaintext]=@col[:body]=@col[:words]=''
- end
- if notedata =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ #% import into database endnotes tables
- endnote_array=notedata.scan(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/)
- endnote_array.each do |inf|
- if inf[/#{Mx[:en_a_o]}\d+.+?#{Mx[:en_a_c]}/]
- if inf[/#{Mx[:en_a_o]}(\d+)(.+?)#{Mx[:en_a_c]}/]
- nr,txt,digest_clean=$1,$2.strip,0
- end
- @id_n+=1
- special_character_escape(txt)
- body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)
- strip_markup(txt)
- if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)
- puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
- open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
- error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}")
- end
- txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
- end
- if txt
- en={ :type => 'endnotes',
- :id => @id_n,
- :lid => @col[:lid],
- :nr => nr,
- :txt => txt,
- :body => body,
- :ocn => @col[:ocn],
- :ocnd => @col[:ocnd],
- :ocns => @col[:ocns],
- :id_t => @@id_t,
- :hash => digest_clean
- }
- t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file)
- @tuple_array << t.tuple
- end
- end
- end
- word_mode=notedata.scan(/\S+/)
- end
- if notedata =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ #% import into database endnotes tables
- endnote_array=notedata.scan(/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/)
- endnote_array.each do |inf|
- if inf[/#{Mx[:en_b_o]}\*\d+.+?#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1
- if inf[/#{Mx[:en_b_o]}[*](\d+)(.+?)#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1
- nr,txt,digest_clean=$1,$2.strip,0
- end
- @id_n+=1
- special_character_escape(txt)
- body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)
- strip_markup(txt)
- if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)
- puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
- open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
- error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}")
- end
- txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
- end
- if txt
- en={ :type => 'endnotes_asterisk',
- :id => @id_n,
- :lid => @col[:lid],
- :nr => nr,
- :txt => txt,
- :body => body,
- :ocn => @col[:ocn],
- :ocnd => @col[:ocnd],
- :ocns => @col[:ocns],
- :id_t => @@id_t,
- :hash => digest_clean
- }
- t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file)
- @tuple_array << t.tuple
- end
- end
- end
- word_mode=notedata.scan(/\S+/)
- end
- if notedata =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ #% import into database endnotes tables
- endnote_array=notedata.scan(/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/)
- endnote_array.each do |inf|
- if inf[/#{Mx[:en_b_o]}\+\d+.+?#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1
- if inf[/#{Mx[:en_b_o]}[+](\d+)(.+?)#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1
- nr,txt,digest_clean=$1,$2.strip,0
- end
- @id_n+=1
- special_character_escape(txt)
- body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)
- strip_markup(txt)
- if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)
- puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
- open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
- error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}")
- end
- txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n}
- end
- if txt
- en={ :type => 'endnotes_plus',
- :id => @id_n,
- :lid => @col[:lid],
- :nr => nr,
- :txt => txt,
- :body => body,
- :ocn => @col[:ocn],
- :ocnd => @col[:ocnd],
- :ocns => @col[:ocns],
- :id_t => @@id_t,
- :hash => digest_clean
- }
- t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file)
- @tuple_array << t.tuple
- end
- end
- end
- word_mode=notedata.scan(/\S+/)
- end
- end
- end
- rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error
- ensure
- end
- @tuple_array
- end
- def endnotes(txt)
- @txt=txt
- def extract_any
- if @txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/
- endnotes(@txt).range
- @en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/
- @en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/
- @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/
- @txt=endnotes(@txt).clean_text
- end
- @txt
- end
- def standard
- x=(@txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) \
- ? @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/) \
- : nil
- end
- def asterisk
- x=(@txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) \
- ? @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/) \
- : nil
- end
- def plus
- x=(@txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) \
- ? @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/) \
- : nil
- end
- def clean_text(base_url=nil)
- if base_url
- @txt.gsub!(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,%{<sup><a href="#{base_url}#_\\1" name="-\\1">\\1</a></sup>})
- @txt.gsub!(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,%{<sup><a href="#{base_url}#_\\1" name="-\\1">\\1</a></sup>})
- @txt.gsub!(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,%{<sup><a href="#{base_url}#_\\1" name="-\\1">\\1</a></sup>})
- else
- @txt.gsub!(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,'<sup>\1</sup>')
- @txt.gsub!(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,'<sup>\1</sup>')
- @txt.gsub!(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,'<sup>\1</sup>')
- end
- @txt
- end
- def range
- @col[:en_a]=@col[:en_z]=nil
- if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}|#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/
- word_array=@txt.scan(/\S+/)
- word_array.each do |w|
- if w[/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/] # not tested since change 2003w31
- @col[:en_a]=$1 unless @col[:en_a]
- @col[:en_z]=@col[:en_a].dup unless @col[:en_a]
- @col[:en_z]=$1 if @col[:en_a]
- end
- end
- end
- @col
- end
- self
- end
- def db_import_urls(dbi_unit,content) #% import documents OID - populate database
- begin
- @fnc=content
- @env=SiSU_Env::Info_env.new(@opt.fns)
- base=@env.url.root
- out=@env.path.output
- f,u={},{}
- if @fnb.empty? \
- or @fnb.nil?
- p 'file output path error' #remove
- end
- if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:plain]}")==true
- f[:txt],u[:txt]='plaintext,', "'#{base}/#{@fnb}/#{@md.fn[:plain]}',"
- end
- if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:toc]}")==true
- f[:html_toc],u[:html_toc]='html_toc,', "'#{base}/#{@fnb}/#{@md.fn[:toc]}',"
- end
- if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:doc]}")==true
- f[:html_doc],u[:html_doc]='html_doc,', "'#{base}/#{@fnb}/#{@md.fn[:doc]}',"
- end
- if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:xhtml]}")==true
- f[:xhtml],u[:xhtml]='xhtml,', "'#{base}/#{@fnb}/#{@md.fn[:xhtml]}',"
- end
- if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:sax]}")==true
- f[:xml_sax],u[:xml_sax]='xml_sax,', "'#{base}/#{@fnb}/#{@md.fn[:sax]}',"
- end
- if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:dom]}")==true
- f[:xml_dom],u[:xml_dom]='xml_dom,', "'#{base}/#{@fnb}/#{@md.fn[:dom]}',"
- end
- if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:odf]}")==true
- f[:odf],u[:odf]='odf,', "'#{base}/#{@fnb}/#{@md.fn[:odf]}',"
- end
- if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:pdf_p]}")==true
- f[:pdf_p],u[:pdf_p]='pdf_p,', "'#{base}/#{@fnb}/#{@md.fn[:pdf_p]}',"
- end
- if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:pdf_l]}")==true
- f[:pdf_l],u[:pdf_l]='pdf_l,', "'#{base}/#{@fnb}/#{@md.fn[:pdf_l]}',"
- end
- if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:concordance]}")==true
- f[:concordance],u[:concordance]='concordance,', "'#{base}/#{@fnb}/#{@md.fn[:concordance]}',"
- end
- if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.tex")==true
- f[:latex_p],u[:latex_p]='latex_p,', "'#{base}/#{@fnb}/#{@opt.fns}.tex',"
- end
- if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.landscape.tex")==true
- f[:latex_l],u[:latex_l]='latex_l,', "'#{base}/#{@fnb}/#{@opt}.fns}.landscape.tex',"
- end
- if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:digest]}")==true
- f[:digest],u[:digest]='digest,', "'#{base}/#{@fnb}/#{@md.fn[:digest]}',"
- end
- if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:manifest]}")==true #revisit, was to be text, this is html
- f[:manifest],u[:manifest]='manifest,', "'#{base}/#{@fnb}/#{@md.fn[:manifest]}',"
- end
- if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.meta")==true
- f[:markup],u[:markup]='markup,', "'#{base}/#{@fnb}/#{@opt.fns}.meta',"
- end
- if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.tgz")==true
- f[:sisupod],u[:sisupod]='sisupod,', "'#{base}/#{@fnb}/#{@opt.fns}.tgz',"
- end
- t=SiSU_DB_tuple::Load_urls.new(@conn,f,u,@@id_t,@opt,@file)
- tuple=t.tuple
- rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error
- ensure
- end
- tuple
- end
- end
-end
-__END__