From d348ae931a17901eda839ef9501e13c9be51e913 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 20 Apr 2010 19:12:58 -0400 Subject: reverts unique words list column and indexes; fix for drop indexes; changelog & version update * reverts unique word list column on doc_objects and endnotes and associated indexes, may reintroduce at a later time * db_drop, drop TEXT indexes for sqlite only * update: changelog, version date --- lib/sisu/v2/db_create.rb | 4 --- lib/sisu/v2/db_drop.rb | 85 +++++++++++++++++++++++--------------------- lib/sisu/v2/db_import.rb | 19 ---------- lib/sisu/v2/db_indexes.rb | 6 +--- lib/sisu/v2/db_load_tuple.rb | 12 +++---- 5 files changed, 51 insertions(+), 75 deletions(-) (limited to 'lib/sisu') diff --git a/lib/sisu/v2/db_create.rb b/lib/sisu/v2/db_create.rb index c1fed045..c7ce9a6d 100644 --- a/lib/sisu/v2/db_create.rb +++ b/lib/sisu/v2/db_create.rb @@ -218,7 +218,6 @@ module SiSU_DB_create ocns VARCHAR(6), clean TEXT NULL, body TEXT NULL, - words VARCHAR(3000) NULL, seg VARCHAR(256) NULL, lev_an VARCHAR(1), lev SMALLINT NULL, @@ -258,7 +257,6 @@ module SiSU_DB_create nr SMALLINT, clean TEXT NULL, body TEXT NULL, - words VARCHAR(3000) NULL, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), @@ -281,7 +279,6 @@ module SiSU_DB_create nr SMALLINT, clean TEXT NULL, body TEXT NULL, - words VARCHAR(3000) NULL, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), @@ -304,7 +301,6 @@ module SiSU_DB_create nr SMALLINT, clean TEXT NULL, body TEXT NULL, - words VARCHAR(3000) NULL, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), diff --git a/lib/sisu/v2/db_drop.rb b/lib/sisu/v2/db_drop.rb index 35d971af..edd08345 100644 --- a/lib/sisu/v2/db_drop.rb +++ b/lib/sisu/v2/db_drop.rb @@ -133,50 +133,53 @@ module SiSU_DB_drop ensure end end - def conn_execute_array(sql_arr) - @conn.transaction do |conn| - sql_arr.each do |sql| - conn.execute(sql) + def indexes + def conn_execute_array(sql_arr) + @conn.transaction do |conn| + sql_arr.each do |sql| + conn.execute(sql) + end end end + def base #% drop base indexes + print "\n drop documents common indexes\n" unless @opt.cmd =~/q/ + sql_arr=[ + %{DROP INDEX idx_title;}, + %{DROP INDEX idx_author;}, + %{DROP INDEX idx_filename;}, + %{DROP INDEX idx_topics;}, + %{DROP INDEX idx_ocn;}, + %{DROP INDEX idx_digest_clean;}, + %{DROP INDEX idx_digest_all;}, + %{DROP INDEX idx_lev1;}, + %{DROP INDEX idx_lev2;}, + %{DROP INDEX idx_lev3;}, + %{DROP INDEX idx_lev4;}, + %{DROP INDEX idx_lev5;}, + %{DROP INDEX idx_lev6;}, + %{DROP INDEX idx_endnote_nr;}, + %{DROP INDEX idx_digest_en;}, + %{DROP INDEX idx_endnote_nr_asterisk;}, + %{DROP INDEX idx_endnote_asterisk;}, + %{DROP INDEX idx_digest_en_asterisk;}, + %{DROP INDEX idx_endnote_nr_plus;}, + %{DROP INDEX idx_endnote_plus;}, + %{DROP INDEX idx_digest_en_plus}, + ] + conn_execute_array(sql_arr) + end + def text #% drop TEXT indexes, sqlite + print "\n drop documents TEXT indexes\n" unless @opt.cmd =~/q/ + sql_arr=[ + %{DROP INDEX idx_clean;}, + %{DROP INDEX idx_endnote}, + ] + conn_execute_array(sql_arr) + end + self end - def indexes #% drop all indexes - print "\n drop documents common indexes\n" unless @opt.cmd =~/q/ - sql_arr=[ - %{DROP INDEX idx_text_words;}, - %{DROP INDEX idx_title;}, - %{DROP INDEX idx_author;}, - %{DROP INDEX idx_filename;}, - %{DROP INDEX idx_topics;}, - %{DROP INDEX idx_ocn;}, - %{DROP INDEX idx_digest_clean;}, - %{DROP INDEX idx_digest_all;}, - %{DROP INDEX idx_lev1;}, - %{DROP INDEX idx_lev2;}, - %{DROP INDEX idx_lev3;}, - %{DROP INDEX idx_lev4;}, - %{DROP INDEX idx_lev5;}, - %{DROP INDEX idx_lev6;}, - %{DROP INDEX idx_endnote_words;}, - %{DROP INDEX idx_endnote_nr;}, - %{DROP INDEX idx_digest_en;}, - %{DROP INDEX idx_endnote_words_asterisk;}, - %{DROP INDEX idx_endnote_nr_asterisk;}, - %{DROP INDEX idx_endnote_asterisk;}, - %{DROP INDEX idx_digest_en_asterisk;}, - %{DROP INDEX idx_endnote_words_plus;}, - %{DROP INDEX idx_endnote_nr_plus;}, - %{DROP INDEX idx_endnote_plus;}, - %{DROP INDEX idx_digest_en_plus}, - ] - conn_execute_array(sql_arr) - print "\n drop documents TEXT indexes\n" unless @opt.cmd =~/q/ - sql_arr=[ - %{DROP INDEX idx_clean;}, - %{DROP INDEX idx_endnote}, - ] - conn_execute_array(sql_arr) - end + indexes.base + @opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : indexes.text) self end end diff --git a/lib/sisu/v2/db_import.rb b/lib/sisu/v2/db_import.rb index e351f6fc..0e2db8e3 100644 --- a/lib/sisu/v2/db_import.rb +++ b/lib/sisu/v2/db_import.rb @@ -267,8 +267,6 @@ module SiSU_DB_import @col[:plaintext]=@col[:body].dup @col[:plaintext]=strip_markup(@col[:plaintext]) @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - @col[:words]=@col[:plaintext].dup - @col[:words]=unique_words(@col[:words]) if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last end if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last @@ -303,8 +301,6 @@ module SiSU_DB_import @col[:plaintext]=@col[:body].dup @col[:plaintext]=strip_markup(@col[:plaintext]) @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - @col[:words]=@col[:plaintext].dup - @col[:words]=unique_words(@col[:words]) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] @@ -332,8 +328,6 @@ module SiSU_DB_import @col[:plaintext]=@col[:body].dup @col[:plaintext]=strip_markup(@col[:plaintext]) @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - @col[:words]=@col[:plaintext].dup - @col[:words]=unique_words(@col[:words]) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] @@ -361,8 +355,6 @@ module SiSU_DB_import @col[:plaintext]=@col[:body].dup @col[:plaintext]=strip_markup(@col[:plaintext]) @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - @col[:words]=@col[:plaintext].dup - @col[:words]=unique_words(@col[:words]) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] @@ -403,8 +395,6 @@ module SiSU_DB_import @col[:plaintext]=@col[:body].dup @col[:plaintext]=strip_markup(@col[:plaintext]) @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - @col[:words]=@col[:plaintext].dup - @col[:words]=unique_words(@col[:words]) t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) @tuple_array << t.tuple @en,@en_ast,@en_pls=[],[],[] @@ -422,8 +412,6 @@ module SiSU_DB_import special_character_escape(txt) body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) strip_markup(txt) - words=txt.dup - words=unique_words(words) if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| @@ -438,7 +426,6 @@ module SiSU_DB_import :nr => nr, :txt => txt, :body => body, - :words => words, :ocn => @col[:ocn], :ocnd => @col[:ocnd], :ocns => @col[:ocns], @@ -463,8 +450,6 @@ module SiSU_DB_import special_character_escape(txt) body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) strip_markup(txt) - words=txt.dup - words=unique_words(words) if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| @@ -479,7 +464,6 @@ module SiSU_DB_import :nr => nr, :txt => txt, :body => body, - :words => words, :ocn => @col[:ocn], :ocnd => @col[:ocnd], :ocns => @col[:ocns], @@ -504,8 +488,6 @@ module SiSU_DB_import special_character_escape(txt) body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) strip_markup(txt) - words=txt.dup - words=unique_words(words) if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| @@ -520,7 +502,6 @@ module SiSU_DB_import :nr => nr, :txt => txt, :body => body, - :words => words, :ocn => @col[:ocn], :ocnd => @col[:ocnd], :ocns => @col[:ocns], diff --git a/lib/sisu/v2/db_indexes.rb b/lib/sisu/v2/db_indexes.rb index fb5ddd76..abd90409 100644 --- a/lib/sisu/v2/db_indexes.rb +++ b/lib/sisu/v2/db_indexes.rb @@ -73,7 +73,6 @@ module SiSU_DB_index def base print "\n create documents common indexes\n" unless @opt.cmd =~/q/ sql_arr=[ - %{CREATE INDEX idx_text_words ON doc_objects(words);}, %{CREATE INDEX idx_ocn ON doc_objects(ocn);}, %{CREATE INDEX idx_digest_clean ON doc_objects(digest_clean);}, %{CREATE INDEX idx_digest_all ON doc_objects(digest_all);}, @@ -83,14 +82,11 @@ module SiSU_DB_index %{CREATE INDEX idx_lev4 ON doc_objects(lev4);}, %{CREATE INDEX idx_lev5 ON doc_objects(lev5);}, %{CREATE INDEX idx_lev6 ON doc_objects(lev6);}, - %{CREATE INDEX idx_endnote_words ON endnotes(words);}, %{CREATE INDEX idx_endnote_nr ON endnotes(nr);}, %{CREATE INDEX idx_digest_en ON endnotes(digest_clean);}, - %{CREATE INDEX idx_endnote_words_asterisk ON endnotes_asterisk(words);}, %{CREATE INDEX idx_endnote_nr_asterisk ON endnotes_asterisk(nr);}, %{CREATE INDEX idx_endnote_asterisk ON endnotes_asterisk(clean);}, %{CREATE INDEX idx_digest_en_asterisk ON endnotes_asterisk(digest_clean);}, - %{CREATE INDEX idx_endnote_words_plus ON endnotes_plus(words);}, %{CREATE INDEX idx_endnote_nr_plus ON endnotes_plus(nr);}, %{CREATE INDEX idx_endnote_plus ON endnotes_plus(clean);}, %{CREATE INDEX idx_digest_en_plus ON endnotes_plus(digest_clean);}, @@ -110,7 +106,7 @@ module SiSU_DB_index conn_execute_array(sql_arr) end base - text #@opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : text) + @opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : text) end end end diff --git a/lib/sisu/v2/db_load_tuple.rb b/lib/sisu/v2/db_load_tuple.rb index 23b6249d..67c8008f 100644 --- a/lib/sisu/v2/db_load_tuple.rb +++ b/lib/sisu/v2/db_load_tuple.rb @@ -79,11 +79,11 @@ module SiSU_DB_tuple end def tuple #% import line sql_entry=if @col[:en_a] - "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, words, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " + - "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:words]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" + "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " + + "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" else - "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, words, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " + - "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:words]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" + "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " + + "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" end if @opt.cmd =~/M/ if @opt.cmd =~/V/ @@ -315,8 +315,8 @@ tid) @conn,@en,@opt,@file=conn,en,opt,file end def tuple - sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, words, ocn, ocnd, ocns, metadata_tid, digest_clean) " + - "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:words]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');" + sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, ocn, ocnd, ocns, metadata_tid, digest_clean) " + + "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');" if @opt.cmd =~/M/ @file.puts sql_entry else -- cgit v1.2.3