=begin * Name: SiSU * Description: a framework for document structuring, publishing and search * Author: Ralph Amissah * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah All Rights Reserved. * License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . If you have Internet connection, the latest version of the GPL should be available at these locations: * SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system * Hompages: * Download: * Ralph Amissah ** Description: modules shared by the different db types, dbi, postgresql, sqlite =end module SiSU_DB_import require "#{SiSU_lib}/db_columns" require "#{SiSU_lib}/db_load_tuple" require "#{SiSU_lib}/shared_html_lite" class Import < SiSU_DB_columns::Column_size include SiSU_Param include SiSU_Screen @@dl=nil @@hname=nil attr_accessor :tp def initialize(opt,conn='',sql_type='pg') @opt,@conn,@sql_type=opt,conn,sql_type @cX=SiSU_Screen::Ansi.new(@opt.cmd).cX @env=SiSU_Env::Info_env.new(@opt.fns) @dal="#{@env.path.dal}" if @opt.fns.empty? or @opt.cmd.empty?; @fnb='' else @md=SiSU_Param::Parameters.new(@opt).get @fnb=@md.fnb end @suffix=@opt.fns[/(?:.+?)(?:\.ssm\.sst|\.-?sst)/,1] @fnm="#@dal/#{@opt.fns}.meta.rbm" @@seg,@@seg_full='','' #create? consider placing field just before clean text as opposed to seg which contains seg(.html) name info seg_full would contain seg info for levels 5 & 6 where available eg seg_full may be 7.3 (level 5) and 7.3.1 (level 6) where seg is 7 @col=Hash.new('') @col[:ocn]='' @counter={} sql='SELECT MAX(lid) FROM documents' @col[:lid]=@conn.execute( sql ) { |x| x.fetch_all.to_s.to_i } @col[:lid] ||=0 sql='SELECT MAX(nid) FROM endnotes' @id_n=@conn.execute( sql ) { |x| x.fetch_all.to_s.to_i } @id_n ||=0 @col[:lv1]=@col[:lv2]=@col[:lv3]=@col[:lv4]=@col[:lv5]=@col[:lv6]=0 @db=SiSU_Env::Info_db.new @@dl ||=SiSU_Env::Info_env.new.digest.length end def marshal_load require "#{SiSU_lib}/dal" @dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here tell=SiSU_Screen::Ansi.new(@opt.cmd,"#{@db.psql.db}::#{@opt.fns}") tell.puts_blue unless @opt.cmd =~/q/ tell=SiSU_Screen::Ansi.new(@opt.cmd,'Marshal Load',@fnm) tell.print_grey if @opt.cmd =~/v/ case @sql_type when /sqlite/ #fix logic for sqlite ! import_db_metadata(@dal_array) import_documents(@dal_array) import_db_urls(@dal_array,@fnm) #import OID on/off @conn.commit #sqlite watch else file_exist=@conn.select_one(%{ SELECT metadata.tid FROM metadata WHERE metadata.filename ~ '#{@opt.fns}'; }) unless file_exist @conn.execute('BEGIN') import_db_metadata(@dal_array) import_documents(@dal_array) import_db_urls(@dal_array,@fnm) #import OID on/off @conn.execute('COMMIT') else @db=SiSU_Env::Info_db.new puts "\n#{@cX.grey}file #{@cX.off} #{@cX.blue}#{@opt.fns}#{@cX.off} #{@cX.grey}already exists in database#{@cX.off} #{@cX.blue}#{@db.psql.db}#{@cX.off} #{@cX.brown}update instead?#{@cX.off}" end end end def special_character_escape(string) string.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") string.gsub!(/<:br>/,"
\n") string.gsub!(/<:(?:code|alt|group|verse)(?:-end)?>/,'') string.gsub!(/<:name#\S+?>/,'') string.gsub!(/\{\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)\}\S+/,'[image: \1] \2') string.gsub!(/\{\s*(.+?)\s*\}(?:https?|file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2') end def strip_markup(string) #define rules, make same as in dal clean string.gsub!(/(\d+)<\/sup>/,'[\1]') string.gsub!(/<:i[12]>/,'') string.gsub!(/(?: \\;)+/,' ') string.gsub!(//,"[TABLE]\n") #tables string.gsub!(//,'\1') #tables string.gsub!(/¡¡\d+¡/,' ') #tables string.gsub!(/¡/,' ') #tables tidy later string.gsub!(/<.+?>/,'') string.gsub!(/\{.+?\.(?:png|jpg|gif).+?\}(?:https?|file|ftp)\\\:\S+ /,' [image] ') # else image names found in search string.gsub!(/\s\s+/,' ') string.strip! end #% import into database tables def import_db_metadata(dbi_unit) #% import documents - populate database print %{ #{@cX.grey}import documents dbi_unit #{@cX.off} } unless @opt.cmd =~/q/ @tp={} @md=SiSU_Param::Parameters.new(@opt).get if @md.title; @tp[:title]=@md.title special_character_escape(@tp[:title]) @tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', " sql='SELECT MAX(tid) FROM metadata' id_t=@conn.execute( sql ) { |x| x.fetch_all.to_s.to_i } @@id_t=id_t if id_t @@id_t ||=0 @@id_t+=1 #bug related, needs to be performed once at start of file, but consider moving, as, placed here it means program will fail if document header lacks 0~title puts %{\n#{@cX.grey}Processing file number#{@cX.off}: #{@cX.green}#{@@id_t}#{@@cX.off}} unless @opt.cmd =~/q/ end if @md.dc_title; @tp[:long_title]=@md.dc_title #sql='SELECT MAX(tid) FROM metadata' #id_t=@conn.execute( sql ) { |x| x.fetch_all.to_s.to_i } #@@id_t=id_t if id_t #@@id_t ||=0 #@@id_t+=1 #bug related, needs to be performed once at start of file, but consider moving, as, placed here it means program will fail if document header lacks 0~title #puts %{\n#{@cX.grey}Processing file number#{@cX.off}: #{@cX.green}#{@@id_t}#{@@cX.off}} end if @md.subtitle; txt=@md.subtitle special_character_escape(txt) @tp[:subtitle_f],@tp[:subtitle_i]='subtitle, ',"'#{txt}', " end if @md.dc_creator; txt=@md.dc_creator special_character_escape(txt) @tp[:creator_f],@tp[:creator_i]='creator, ',"'#{txt}', " end if @md.dc_contributor; txt=@md.dc_contributor special_character_escape(txt) @tp[:contributor_f],@tp[:contributor_i]='contributor, ',"'#{txt}', " end if @md.translator; txt=@md.translator special_character_escape(txt) @tp[:translator_f],@tp[:translator_i]='translator, ',"'#{txt}', " end if @md.illustrator; txt=@md.illustrator special_character_escape(txt) @tp[:illustrator_f],@tp[:illustrator_i]='illustrator, ',"'#{txt}', " end if @md.dc_publisher; txt=@md.dc_publisher special_character_escape(txt) @tp[:publisher_f],@tp[:publisher_i]='publisher, ',"'#{txt}', " end if @md.prepared_by; txt=@md.prepared_by special_character_escape(txt) @tp[:prepared_by_f],@tp[:prepared_by_i]='prepared_by, ',"'#{txt}', " end if @md.digitized_by; txt=@md.digitized_by special_character_escape(txt) @tp[:digitized_by_f],@tp[:digitized_by_i]='digitized_by, ',"'#{txt}', " end if @md.dc_subject; txt=@md.dc_subject special_character_escape(txt) @tp[:subject_f],@tp[:subject_i]='subject, ',"'#{txt}', " end if @md.dc_description; txt=@md.dc_description special_character_escape(txt) @tp[:description_f],@tp[:description_i]='description, ',"'#{txt}', " end if @md.abstract; txt=@md.abstract special_character_escape(txt) @tp[:abstract_f],@tp[:abstract_i]='abstract, ',"'#{txt}', " end if @md.dc_type; txt=@md.dc_type special_character_escape(txt) @tp[:type_f],@tp[:type_i]='type, ',"'#{txt}', " end #if @md.owner; txt=@md.owner # special_character_escape(txt) # @tp[:owner_f],@tp[:owner_i]='owner, ',"'#{txt}', " #end #if @md.copyright; txt=@md.copyright # special_character_escape(txt) # @tp[:copyright_f],@tp[:copyright_i]='copyright, ',"'#{txt}', " #end if @md.dc_rights; txt=@md.dc_rights special_character_escape(txt) @tp[:rights_f],@tp[:rights_i]='rights, ',"'#{txt}', " end if @md.dc_date; txt=@md.dc_date special_character_escape(txt) @tp[:date_f],@tp[:date_i]='date, ',"'#{txt}', " end if @md.dc_date_created; txt=@md.dc_date_created special_character_escape(txt) @tp[:date_created_f],@tp[:date_created_i]='date_created, ',"'#{txt}', " end if @md.dc_date_issued; txt=@md.dc_date_issued special_character_escape(txt) @tp[:date_issued_f],@tp[:date_issued_i]='date_issued, ',"'#{txt}', " end if @md.dc_date_available; txt=@md.dc_date_available special_character_escape(txt) @tp[:date_available_f],@tp[:date_available_i]='date_available, ',"'#{txt}', " end if @md.dc_date_modified; txt=@md.dc_date_modified special_character_escape(txt) @tp[:date_modified_f],@tp[:date_modified_i]='date_modified, ',"'#{txt}', " end if @md.dc_date_valid; txt=@md.dc_date_valid special_character_escape(txt) @tp[:date_valid_f],@tp[:date_valid_i]='date_valid, ',"'#{txt}', " end if @md.dc_language[:name]; txt=@md.dc_language[:name] special_character_escape(txt) @tp[:language_f],@tp[:language_i]='language, ',"'#{txt}', " end if @md.language_original[:name]; txt=@md.language_original[:name] special_character_escape(txt) @tp[:language_original_f],@tp[:language_original_i]='language_original, ',"'#{txt}', " end if @md.dc_format; txt=@md.dc_format special_character_escape(txt) @tp[:format_f],@tp[:format_i]='format, ',"'#{txt}', " end if @md.dc_identifier; txt=@md.dc_identifier special_character_escape(txt) @tp[:identifier_f],@tp[:identifier_i]='identifier, ',"'#{txt}', " end if @md.dc_source; txt=@md.dc_source special_character_escape(txt) @tp[:source_f],@tp[:source_i]='source, ',"'#{txt}', " end if @md.dc_relation; txt=@md.dc_relation special_character_escape(txt) @tp[:relation_f],@tp[:relation_i]='relation, ',"'#{txt}', " end if @md.dc_coverage; txt=@md.dc_coverage special_character_escape(txt) @tp[:coverage_f],@tp[:coverage_i]='coverage, ',"'#{txt}', " end if @md.keywords; txt=@md.keywords special_character_escape(txt) @tp[:keywords_f],@tp[:keywords_i]='keywords, ',"'#{txt}', " end if @md.comments; txt=@md.comments special_character_escape(txt) @tp[:comments_f],@tp[:comments_i]='comments, ',"'#{txt}', " end if @md.cls_loc; txt=@md.cls_loc special_character_escape(txt) @tp[:cls_loc_f],@tp[:cls_loc_i]='cls_loc, ',"'#{txt}', " end if @md.cls_dewey; txt=@md.cls_dewey special_character_escape(txt) @tp[:cls_dewey_f],@tp[:cls_dewey_i]='cls_dewey, ',"'#{txt}', " end if @md.cls_pg; txt=@md.cls_pg special_character_escape(txt) @tp[:cls_pg_f],@tp[:cls_pg_i]='cls_pg, ',"'#{txt}', " end if @md.cls_isbn; txt=@md.cls_isbn special_character_escape(txt) @tp[:cls_isbn_f],@tp[:cls_isbn_i]='cls_isbn, ',"'#{txt}', " end if @md.prefix_a; txt=@md.prefix_a special_character_escape(txt) @tp[:prefix_a_f],@tp[:prefix_a_i]='prefix_a, ',"'#{txt}', " end if @md.prefix_b; txt=@md.prefix_b special_character_escape(txt) @tp[:prefix_b_f],@tp[:prefix_b_i]='prefix_b, ',"'#{txt}', " end #if @md.suffix; txt=@md.suffix # special_character_escape(txt) # @tp[:suffix_f],@tp[:suffix_i]='suffix, ',"'#{txt}', " #end if @md.fns; txt=@md.fns special_character_escape(txt) @tp[:fns_f],@tp[:fns_i]="filename, ","'#{txt}', " end #if @md.en[:mismatch] > 0 # id,info='WARNING document error in endnote markup, number mismatch',"endnotes: #{@md.en[:note]} != endnote reference marks: #{@md.en[:mark]} (difference = #{@md.en[:mismatch]})" #end if @md.wc_words; txt=@md.wc_words @tp[:wc_words_f],@tp[:wc_words_i]='wc_words, ',"'#{txt}', " end if @md.dgst; txt=@md.dgst @tp[:dgst_f],@tp[:dgst_i]='dgst, ',"'#{txt}', " end if @md.sc_number; txt=@md.sc_number @tp[:sc_number_f],@tp[:sc_number_i]='sc_number, ',"'#{txt}', " end if @md.sc_date; txt=@md.sc_date @tp[:sc_date_f],@tp[:sc_date_i]='sc_date, ',"'#{txt}', " end if @md.generated; txt=@md.generated @tp[:generated_f],@tp[:generated_i]='generated, ',"'#{@txt}', " end #if @md.sisu_version; special_character_escape(@md.sisu_version) # #id,info='Generated by',"#{@md.sisu_version[:project]} #{@md.sisu_version[:version]} #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})" #end #if @md.ruby_version; special_character_escape(@md.ruby_version) SiSU_DB_DBI::Test.new(self,@opt).verify #% import title names, filenames (tuple) t=SiSU_DB_tuple::Load_metadata.new(@conn,@tp,@@id_t) t.tuple end def import_documents(dbi_unit) #% import documents - populate main database table #% import into substantive database tables (tuple) begin @col[:tid]=@@id_t @en,@en_ast,@en_pls=[],[],[] @col[:en_a]=nil @col[:en_z]=nil dbi_unit.each do |data| #data.gsub!(/<[biu]>(.+?)<\/[biu]>/,'\1') # remove bold, italics, underscore data.gsub!(/(.+?)<\/b>/,'\1') # remove bold, italics, underscore data.gsub!(/(.+?)<\/i>/,'\1') # remove bold, italics, underscore data.gsub!(/(.+?)<\/u>/,'\1') # remove bold, italics, underscore #data.gsub!(/<:name#\S+?>/,'') @col[:seg]=@@seg if data =~/<~\d+;(?:\w|[0-6]:)\d+;\w\d+><[0-9a-f]{#{@@dl}}:[0-9a-f]{#{@@dl}}>/m # regular text notedata=data.dup if data[/^([123])~\s+(.+?)<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)><([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})>/] @col[:lev],txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=$1,$2,$3,$4,$5,$6,$7 @col[:lid]+=1 if txt =~/~[{\[][*+]?(\d+)\s+.+?[}\]]~/ endnotes(txt).range if txt =~/~\{.+?\}~/; @en << endnotes(txt).standard end if txt =~/~\[\*.+?\]~/; @en_ast << endnotes(txt).asterisk end if txt =~/~\[\+.+?\]~/; @en_pls << endnotes(txt).plus end txt=endnotes(txt).clean_text end @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).lev4_minus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup strip_markup(@col[:plaintext]) if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last end if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last end if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last end t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt) t.tuple case @col[:lev] when /1/; @col[:lv1]+=1 when /2/; @col[:lv2]+=1 when /3/; @col[:lv3]+=1 end @col[:lev]=@col[:plaintext]=@col[:body]='' elsif data[/^4~(.+?)\s+(.+?)<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)><([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})>/] @@seg,txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=$1,$2,$3,$4,$5,$6,$7 @col[:seg]=@@seg @col[:lv4]+=1 @col[:lid]+=1 @col[:lev]=4 @hname=if @col[:seg] \ and not @col[:seg].to_s.empty? @@hname=@col[:seg].to_s else @@hname end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#@hname.html" if txt =~/~[{\[][*+]?(\d+)\s+.+?[}\]]~/ endnotes(txt).range if txt =~ /~\{.+?\}~/; @en << endnotes(txt).standard end if txt =~/~\[\*.+?\]~/; @en_ast << endnotes(txt).asterisk end if txt =~/~\[\+.+?\]~/; @en_pls << endnotes(txt).plus end txt=endnotes(txt).clean_text(@base_url) end @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).lev4_plus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup strip_markup(@col[:plaintext]) if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last end if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last end if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last end t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt) t.tuple @col[:lev]=@col[:plaintext]=@col[:body]='' elsif data[/^5~(?:~\S+)?(.+?)<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)><([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})>/] # header lev5 seg level txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=$1,$2,$3,$4,$5,$6 re=/^5~(.+?)\s+/ @@seg_full=re.match(data)[1] if data=~re #create? @@seg ||='' #nil # watch @col[:seg]=@@seg @col[:lv5]+=1 @col[:lid]+=1 @col[:lev]=5 @hname=if @col[:seg] \ and not @col[:seg].to_s.empty? @@hname=@col[:seg].to_s else @@hname end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#@hname.html" if txt =~/~[{\[][*+]?(\d+)\s+.+?[}\]]~/ endnotes(txt).range if txt =~ /~\{.+?\}~/; @en << endnotes(txt).standard end if txt =~/~\[\*.+?\]~/; @en_ast << endnotes(txt).asterisk end if txt =~/~\[\+.+?\]~/; @en_pls << endnotes(txt).plus end txt=endnotes(txt).clean_text(@base_url) end @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).lev4_plus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup strip_markup(@col[:plaintext]) if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last end if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last end if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last end t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt) t.tuple @col[:lev]=@col[:plaintext]=@col[:body]='' elsif data[/^6~(?:~\S+)?(.+?)<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)><([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})>/] # header lev6 seg level txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=$1,$2,$3,$4,$5,$6 re=/^6~(.+?)\s+/ @@seg_full=re.match(data)[1] if data=~re #create? @@seg ||='' #nil # watch @col[:seg]=@@seg @col[:lv6]+=1 @col[:lid]+=1 @col[:lev]=6 @hname=if @col[:seg] \ and not @col[:seg].to_s.empty? @@hname=@col[:seg].to_s else @@hname end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#@hname.html" if txt =~/~[{\[][*+]?(\d+)\s+.+?[}\]]~/ endnotes(txt).range if txt =~ /~\{.+?\}~/; @en << endnotes(txt).standard end if txt =~/~\[\*.+?\]~/; @en_ast << endnotes(txt).asterisk end if txt =~/~\[\+.+?\]~/; @en_pls << endnotes(txt).plus end txt=endnotes(txt).clean_text(@base_url) end @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).lev4_plus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup strip_markup(@col[:plaintext]) if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last end if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last end if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last end t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt) t.tuple @col[:lev]=@col[:plaintext]=@col[:body]='' else #% regular text @col[:lid]+=1 txt='' txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=(/(.+?)<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)><([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})>/m).match(data).captures @hname=if @col[:seg] \ and not @col[:seg].to_s.empty? @@hname=@col[:seg].to_s else @@hname end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#@hname.html" if txt =~/~[{\[][*+]?(\d+)\s+.+?[}\]]~/ endnotes(txt).range if txt =~ /~\{.+?\}~/; @en << endnotes(txt).standard end if txt =~/~\[\*.+?\]~/; @en_ast << endnotes(txt).asterisk end if txt =~/~\[\+.+?\]~/; @en_pls << endnotes(txt).plus end txt=endnotes(txt).clean_text(@base_url) end if @sql_type=~/pg/ \ and txt.size > (document_clean - 1) #% examine pg build & remove limitation puts "\n\nTOO LARGE (TXT - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nTEXT BODY\n#{@col[:body].size} object #{@col[:ocn]} -> #{@col[:body].slice(0..500)}") end txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} end if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last end if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last end if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last end @col[:body]=if txt=~// #watch SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).html_table elsif txt=~/<:i1>/ SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).indent1 elsif txt=~/<:i2>/ SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).indent2 else SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).norm end special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup strip_markup(@col[:plaintext]) t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt) t.tuple @en,@en_ast,@en_pls=[],[],[] @col[:en_a]=@col[:en_z]=nil @col[:lev]=@col[:plaintext]=@col[:body]='' end if notedata =~ /~\{.+?\}~/ #% import into database endnotes tables endnote_array=notedata.scan(/~\{.+?\}~/) endnote_array.each do |inf| if inf[/~\{\d+.+?<[0-9a-f]{#{@@dl}}>\}~/] # dal new endnotes 2003w31/1 if inf[/~\{(\d+)(.+?)<([0-9a-f]{#{@@dl}})>\}~/] # dal new endnotes 2003w31/1 nr,txt,digest_clean=$1,$2,$3 end @id_n+=1 body=SiSU_Format_Shared::CSS_Format.new(@md,txt,@col,nr).endnote special_character_escape(body) special_character_escape(txt) strip_markup(txt) if txt.size > (endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") end txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} end if txt en={} en={ :type => 'endnotes', :id => @id_n, :lid => @col[:lid], :nr => nr, :txt => txt, :body => body, :ocn => @col[:ocn], :ocnd => @col[:ocnd], :ocns => @col[:ocns], :id_t => @@id_t, :hash => digest_clean } t=SiSU_DB_tuple::Load_endnotes.new(@conn,en) t.tuple end end end word_mode=notedata.scan(/\S+/) end if notedata =~ /~\[\*.+?\]~/ #% import into database endnotes tables endnote_array=notedata.scan(/~\[\*.+?\]~/) endnote_array.each do |inf| if inf[/~\[\*\d+.+?<[0-9a-f]{#{@@dl}}>\]~/] # dal new endnotes 2003w31/1 if inf[/~\[[*](\d+)(.+?)<([0-9a-f]{#{@@dl}})>\]~/] # dal new endnotes 2003w31/1 nr,txt,digest_clean=$1,$2,$3 end @id_n+=1 body=SiSU_Format_Shared::CSS_Format.new(@md,txt,@col,nr).endnote special_character_escape(body) special_character_escape(txt) strip_markup(txt) if txt.size > (endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") end txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} end if txt #puts "'#{@id_n}', '#{@col[:lid]}', '#{en}', '#{txt}', '#{body}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@@id_t}'" #% endnotes en={} en={ :type => 'endnotes_asterisk', :id => @id_n, :lid => @col[:lid], :nr => nr, :txt => txt, :body => body, :ocn => @col[:ocn], :ocnd => @col[:ocnd], :ocns => @col[:ocns], :id_t => @@id_t, :hash => digest_clean } t=SiSU_DB_tuple::Load_endnotes.new(@conn,en) t.tuple end end end word_mode=notedata.scan(/\S+/) end if notedata =~ /~\[\+.+?\]~/ #% import into database endnotes tables endnote_array=notedata.scan(/~\[\+.+?\]~/) endnote_array.each do |inf| if inf[/~\[\+\d+.+?<[0-9a-f]{#{@@dl}}>\]~/] # dal new endnotes 2003w31/1 if inf[/~\[[+](\d+)(.+?)<([0-9a-f]{#{@@dl}})>\]~/] # dal new endnotes 2003w31/1 nr,txt,digest_clean=$1,$2,$3 end @id_n+=1 body=SiSU_Format_Shared::CSS_Format.new(@md,txt,@col,nr).endnote special_character_escape(body) special_character_escape(txt) strip_markup(txt) if txt.size > (endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") end txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} end if txt en={} en={ :type => 'endnotes_plus', :id => @id_n, :lid => @col[:lid], :nr => nr, :txt => txt, :body => body, :ocn => @col[:ocn], :ocnd => @col[:ocnd], :ocns => @col[:ocns], :id_t => @@id_t, :hash => digest_clean } t=SiSU_DB_tuple::Load_endnotes.new(@conn,en) t.tuple end end end word_mode=notedata.scan(/\S+/) end end end rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error ensure end end def endnotes(txt) @txt=txt def standard x=if @txt =~ /~\{.+?\}~/; @txt.scan(/~\{(\d+).+?\}~/) else nil end end def asterisk x=if @txt =~/~\[\*.+?\]~/; @txt.scan(/~\[[*](\d+).+?\]~/) else nil end end def plus x=if @txt =~/~\[\+.+?\]~/; @txt.scan(/~\[[+](\d+).+?\]~/) else nil end end def clean_text(base_url=nil) if base_url @txt.gsub!(/~\{(\d+).+?\}~/,%{\\1}) @txt.gsub!(/~\[([*]\d+).+?\]~/,%{\\1}) @txt.gsub!(/~\[([+]\d+).+?\]~/,%{\\1}) else @txt.gsub!(/~\{(\d+).+?\}~/,'\1') @txt.gsub!(/~\[([*]\d+).+?\]~/,'\1') @txt.gsub!(/~\[([+]\d+).+?\]~/,'\1') end @txt end def range @col[:en_a]=@col[:en_z]=nil if @txt =~ /~\{.+?\}~|~\[([*]\d+).+?\]~|~\[([+]\d+).+?\]~/ word_array=@txt.scan(/\S+/) word_array.each do |w| if w[/~[{\[][*+]?(\d+)\s+.+?[}\]]~/] # not tested since change 2003w31 @col[:en_a]=$1 unless @col[:en_a] @col[:en_z]=@col[:en_a].dup unless @col[:en_a] @col[:en_z]=$1 if @col[:en_a] end end end @col end self end def import_db_urls(dbi_unit,meta) #% import documents OID - populate database begin @fnm=meta @env=SiSU_Env::Info_env.new(@opt.fns) base=@env.url.root out=@env.path.output f,u={},{} if @fnb.empty? \ or @fnb.nil? p 'file output path error' #remove end if @opt.cmd !~/e/ \ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:plain]}")==true) f[:txt],u[:txt]='plaintext,', "'#{base}/#@fnb/#{@md.fn[:plain]}'," end if @opt.cmd !~/e/ \ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:toc]}")==true) f[:html_toc],u[:html_toc]='html_toc,', "'#{base}/#@fnb/#{@md.fn[:toc]}'," end if @opt.cmd !~/e/ \ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:doc]}")==true) f[:html_doc],u[:html_doc]='html_doc,', "'#{base}/#@fnb/#{@md.fn[:doc]}'," end if @opt.cmd !~/e/ \ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:xhtml]}")==true) f[:xhtml],u[:xhtml]='xhtml,', "'#{base}/#@fnb/#{@md.fn[:xhtml]}'," end if @opt.cmd !~/e/ \ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:sax]}")==true) f[:xml_sax],u[:xml_sax]='xml_sax,', "'#{base}/#@fnb/#{@md.fn[:sax]}'," end if @opt.cmd !~/e/ \ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:dom]}")==true) f[:xml_dom],u[:xml_dom]='xml_dom,', "'#{base}/#@fnb/#{@md.fn[:dom]}'," end if @opt.cmd !~/e/ \ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:odf]}")==true) f[:odf],u[:odf]='odf,', "'#{base}/#@fnb/#{@md.fn[:odf]}'," end if @opt.cmd !~/e/ \ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:pdf_p]}")==true) f[:pdf_p],u[:pdf_p]='pdf_p,', "'#{base}/#@fnb/#{@md.fn[:pdf_p]}'," end if @opt.cmd !~/e/ \ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:pdf_l]}")==true) f[:pdf_l],u[:pdf_l]='pdf_l,', "'#{base}/#@fnb/#{@md.fn[:pdf_l]}'," end if @opt.cmd !~/e/ \ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:concordance]}")==true) f[:concordance],u[:concordance]='concordance,', "'#{base}/#@fnb/#{@md.fn[:concordance]}'," end if @opt.cmd !~/e/ \ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@opt.fns}.tex")==true) f[:latex_p],u[:latex_p]='latex_p,', "'#{base}/#@fnb/#{@opt.fns}.tex'," end if @opt.cmd !~/e/ \ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@opt.fns}.landscape.tex")==true) f[:latex_l],u[:latex_l]='latex_l,', "'#{base}/#@fnb/#@opt.fns}.landscape.tex'," end if @opt.cmd !~/e/ \ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:digest]}")==true) f[:digest],u[:digest]='digest,', "'#{base}/#@fnb/#{@md.fn[:digest]}'," end if @opt.cmd !~/e/ \ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@md.fn[:manifest]}")==true) #revisit, was to be text, this is html f[:manifest],u[:manifest]='manifest,', "'#{base}/#@fnb/#{@md.fn[:manifest]}'," end if @opt.cmd !~/e/ \ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@opt.fns}.meta")==true) f[:markup],u[:markup]='markup,', "'#{base}/#@fnb/#{@opt.fns}.meta'," end if @opt.cmd !~/e/ \ or (@opt.cmd=~/e/ and FileTest.file?("#{out}/#@fnb/#{@opt.fns}.tgz")==true) f[:sisupod],u[:sisupod]='sisupod,', "'#{base}/#@fnb/#{@opt.fns}.tgz'," end t=SiSU_DB_tuple::Load_urls.new(@conn,f,u,@@id_t) t.tuple rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error ensure end end end end __END__