From d348ae931a17901eda839ef9501e13c9be51e913 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 20 Apr 2010 19:12:58 -0400 Subject: reverts unique words list column and indexes; fix for drop indexes; changelog & version update * reverts unique word list column on doc_objects and endnotes and associated indexes, may reintroduce at a later time * db_drop, drop TEXT indexes for sqlite only * update: changelog, version date --- CHANGELOG_v2 | 4 +- conf/sisu/v2/version.yml | 4 +- data/doc/sisu/v2/CHANGELOG | 4 +- .../sisu_manual/sisu_download.ssi | 4 +- lib/sisu/v2/db_create.rb | 4 - lib/sisu/v2/db_drop.rb | 85 +++++++++++----------- lib/sisu/v2/db_import.rb | 19 ----- lib/sisu/v2/db_indexes.rb | 6 +- lib/sisu/v2/db_load_tuple.rb | 12 +-- 9 files changed, 57 insertions(+), 85 deletions(-) diff --git a/CHANGELOG_v2 b/CHANGELOG_v2 index 3d5ebb13..715caef8 100644 --- a/CHANGELOG_v2 +++ b/CHANGELOG_v2 @@ -12,7 +12,7 @@ Reverse Chronological: %% Development branch UNSTABLE -%% 2.2.0.orig.tar.gz (2010-04-19:16/1) +%% 2.2.0.orig.tar.gz (2010-04-20:16/2) http://www.jus.uio.no/sisu/pkg/src/sisu_2.2.0.orig.tar.gz sisu_2.2.0.orig.tar.gz sisu_2.2.0-1.dsc @@ -20,8 +20,6 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_2.2.0.orig.tar.gz * db (sql) table structure, further review and changes (hence breakage & version bump) * new pgsql db name prefix "sisu_v2b_" - * new table column words in doc_objects & endnotes, VARCHAR 3000 to contain - list of unique sorted words in object * increase use of VARCHAR * new indexes diff --git a/conf/sisu/v2/version.yml b/conf/sisu/v2/version.yml index 5cb7a079..2fbef6ac 100644 --- a/conf/sisu/v2/version.yml +++ b/conf/sisu/v2/version.yml @@ -1,5 +1,5 @@ --- :version: 2.2.0 -:date_stamp: 2010w16/1 -:date: "2010-04-19" +:date_stamp: 2010w16/2 +:date: "2010-04-20" :project: SiSU diff --git a/data/doc/sisu/v2/CHANGELOG b/data/doc/sisu/v2/CHANGELOG index e182f467..feda1a86 100644 --- a/data/doc/sisu/v2/CHANGELOG +++ b/data/doc/sisu/v2/CHANGELOG @@ -12,7 +12,7 @@ Reverse Chronological: %% Development branch UNSTABLE -%% 2.2.0.orig.tar.gz (2010-04-19:16/1) +%% 2.2.0.orig.tar.gz (2010-04-20:16/2) http://www.jus.uio.no/sisu/pkg/src/sisu_2.2.0.orig.tar.gz sisu_2.2.0.orig.tar.gz sisu_2.2.0-1.dsc @@ -20,8 +20,6 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_2.2.0.orig.tar.gz * db (sql) table structure, further review and changes (hence breakage & version bump) * new pgsql db name prefix "sisu_v2b_" - * new table column words in doc_objects & endnotes, VARCHAR 3000 to contain - list of unique sorted words in object * increase use of VARCHAR * new indexes diff --git a/data/doc/sisu/v2/sisu_markup_samples/sisu_manual/sisu_download.ssi b/data/doc/sisu/v2/sisu_markup_samples/sisu_manual/sisu_download.ssi index 7b5b2caf..641846b5 100644 --- a/data/doc/sisu/v2/sisu_markup_samples/sisu_manual/sisu_download.ssi +++ b/data/doc/sisu/v2/sisu_markup_samples/sisu_manual/sisu_download.ssi @@ -16,7 +16,7 @@ :issued: 2002-11-12 :available: 2002-11-12 :published: 2009-01-18 - :modified: 2010-04-18 + :modified: 2010-04-20 @make: :num_top: 1 @@ -57,7 +57,7 @@ Download the latest version of SiSU (and SiSU markup samples):~{ Breakage and Fixes Report
http://www.jus.uio.no/sisu/SiSU/breakage_and_fixes.html }~ -_* {~^ sisu_2.2.0.orig.tar.gz (2010-04-19:16/1) }http://www.jus.uio.no/sisu/pkg/src/sisu_2.2.0.orig.tar.gz +_* {~^ sisu_2.2.0.orig.tar.gz (2010-04-20:16/2) }http://www.jus.uio.no/sisu/pkg/src/sisu_2.2.0.orig.tar.gz _* {~^ sisu-markup-samples_2.0.4.orig.tar.gz (of 2008-10-09:40/4 ) }http://www.jus.uio.no/sisu/pkg/src/sisu-markup-samples_2.0.4.orig.tar.gz *~sisu-markup-samples diff --git a/lib/sisu/v2/db_create.rb b/lib/sisu/v2/db_create.rb index c1fed045..c7ce9a6d 100644 --- a/lib/sisu/v2/db_create.rb +++ b/lib/sisu/v2/db_create.rb @@ -218,7 +218,6 @@ module SiSU_DB_create ocns VARCHAR(6), clean TEXT NULL, body TEXT NULL, - words VARCHAR(3000) NULL, seg VARCHAR(256) NULL, lev_an VARCHAR(1), lev SMALLINT NULL, @@ -258,7 +257,6 @@ module SiSU_DB_create nr SMALLINT, clean TEXT NULL, body TEXT NULL, - words VARCHAR(3000) NULL, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), @@ -281,7 +279,6 @@ module SiSU_DB_create nr SMALLINT, clean TEXT NULL, body TEXT NULL, - words VARCHAR(3000) NULL, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), @@ -304,7 +301,6 @@ module SiSU_DB_create nr SMALLINT, clean TEXT NULL, body TEXT NULL, - words VARCHAR(3000) NULL, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), diff --git a/lib/sisu/v2/db_drop.rb b/lib/sisu/v2/db_drop.rb index 35d971af..edd08345 100644 --- a/lib/sisu/v2/db_drop.rb +++ b/lib/sisu/v2/db_drop.rb @@ -133,50 +133,53 @@ module SiSU_DB_drop ensure end end - def conn_execute_array(sql_arr) - @conn.transaction do |conn| - sql_arr.each do |sql| - conn.execute(sql) + def indexes + def conn_execute_array(sql_arr) + @conn.transaction do |conn| + sql_arr.each do |sql| + conn.execute(sql) + end end end + def base #% drop base indexes + print "\n drop documents common indexes\n" unless @opt.cmd =~/q/ + sql_arr=[ + %{DROP INDEX idx_title;}, + %{DROP INDEX idx_author;}, + %{DROP INDEX idx_filename;}, + %{DROP INDEX idx_topics;}, + %{DROP INDEX idx_ocn;}, + %{DROP INDEX idx_digest_clean;}, + %{DROP INDEX idx_digest_all;}, + %{DROP INDEX idx_lev1;}, + %{DROP INDEX idx_lev2;}, + %{DROP INDEX idx_lev3;}, + %{DROP INDEX idx_lev4;}, + %{DROP INDEX idx_lev5;}, + %{DROP INDEX idx_lev6;}, + %{DROP INDEX idx_endnote_nr;}, + %{DROP INDEX idx_digest_en;}, + %{DROP INDEX idx_endnote_nr_asterisk;}, + %{DROP INDEX idx_endnote_asterisk;}, + %{DROP INDEX idx_digest_en_asterisk;}, + %{DROP INDEX idx_endnote_nr_plus;}, + %{DROP INDEX idx_endnote_plus;}, + %{DROP INDEX idx_digest_en_plus}, + ] + conn_execute_array(sql_arr) + end + def text #% drop TEXT indexes, sqlite + print "\n drop documents TEXT indexes\n" unless @opt.cmd =~/q/ + sql_arr=[ + %{DROP INDEX idx_clean;}, + %{DROP INDEX idx_endnote}, + ] + conn_execute_array(sql_arr) + end + self end - def indexes #% drop all indexes - print "\n drop documents common indexes\n" unless @opt.cmd =~/q/ - sql_arr=[ - %{DROP INDEX idx_text_words;}, - %{DROP INDEX idx_title;}, - %{DROP INDEX idx_author;}, - %{DROP INDEX idx_filename;}, - %{DROP INDEX idx_topics;}, - %{DROP INDEX idx_ocn;}, - %{DROP INDEX idx_digest_clean;}, - %{DROP INDEX idx_digest_all;}, - %{DROP INDEX idx_lev1;}, - %{DROP INDEX idx_lev2;}, - %{DROP INDEX idx_lev3;}, - %{DROP INDEX idx_lev4;}, - %{DROP INDEX idx_lev5;}, - %{DROP INDEX idx_lev6;}, - %{DROP INDEX idx_endnote_words;}, - %{DROP INDEX idx_endnote_nr;}, - %{DROP INDEX idx_digest_en;}, - %{DROP INDEX idx_endnote_words_asterisk;}, - %{DROP INDEX idx_endnote_nr_asterisk;}, - %{DROP INDEX idx_endnote_asterisk;}, - %{DROP INDEX idx_digest_en_asterisk;}, - %{DROP INDEX idx_endnote_words_plus;}, - %{DROP INDEX idx_endnote_nr_plus;}, - %{DROP INDEX idx_endnote_plus;}, - %{DROP INDEX idx_digest_en_plus}, - ] - conn_execute_array(sql_arr) - print "\n drop documents TEXT indexes\n" unless @opt.cmd =~/q/ - sql_arr=[ - %{DROP INDEX idx_clean;}, - %{DROP INDEX idx_endnote}, - ] - conn_execute_array(sql_arr) - end + indexes.base + @opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : indexes.text) self end end diff --git a/lib/sisu/v2/db_import.rb b/lib/sisu/v2/db_import.rb index e351f6fc..0e2db8e3 100644 --- a/lib/sisu/v2/db_import.rb +++ b/lib/sisu/v2/db_import.rb @@ -267,8 +267,6 @@ module SiSU_DB_import @col[:plaintext]=@col[:body].dup @col[:plaintext]=strip_markup(@col[:plaintext]) @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - @col[:words]=@col[:plaintext].dup - @col[:words]=unique_words(@col[:words]) if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last end if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last @@ -303,8 +301,6 @@ module SiSU_DB_import @col[:plaintext]=@col[:body].dup @col[:plaintext]=strip_markup(@col[:plaintext]) @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - @col[:words]=@col[:plaintext].dup - @col[:words]=unique_words(@col[:words]) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] @@ -332,8 +328,6 @@ module SiSU_DB_import @col[:plaintext]=@col[:body].dup @col[:plaintext]=strip_markup(@col[:plaintext]) @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - @col[:words]=@col[:plaintext].dup - @col[:words]=unique_words(@col[:words]) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] @@ -361,8 +355,6 @@ module SiSU_DB_import @col[:plaintext]=@col[:body].dup @col[:plaintext]=strip_markup(@col[:plaintext]) @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - @col[:words]=@col[:plaintext].dup - @col[:words]=unique_words(@col[:words]) @en_a,@en_z=@en[0].first,@en[0].last if @en[0] @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] @@ -403,8 +395,6 @@ module SiSU_DB_import @col[:plaintext]=@col[:body].dup @col[:plaintext]=strip_markup(@col[:plaintext]) @col[:plaintext]=clean_searchable_text(@col[:plaintext]) - @col[:words]=@col[:plaintext].dup - @col[:words]=unique_words(@col[:words]) t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) @tuple_array << t.tuple @en,@en_ast,@en_pls=[],[],[] @@ -422,8 +412,6 @@ module SiSU_DB_import special_character_escape(txt) body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) strip_markup(txt) - words=txt.dup - words=unique_words(words) if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| @@ -438,7 +426,6 @@ module SiSU_DB_import :nr => nr, :txt => txt, :body => body, - :words => words, :ocn => @col[:ocn], :ocnd => @col[:ocnd], :ocns => @col[:ocns], @@ -463,8 +450,6 @@ module SiSU_DB_import special_character_escape(txt) body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) strip_markup(txt) - words=txt.dup - words=unique_words(words) if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| @@ -479,7 +464,6 @@ module SiSU_DB_import :nr => nr, :txt => txt, :body => body, - :words => words, :ocn => @col[:ocn], :ocnd => @col[:ocnd], :ocns => @col[:ocns], @@ -504,8 +488,6 @@ module SiSU_DB_import special_character_escape(txt) body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) strip_markup(txt) - words=txt.dup - words=unique_words(words) if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| @@ -520,7 +502,6 @@ module SiSU_DB_import :nr => nr, :txt => txt, :body => body, - :words => words, :ocn => @col[:ocn], :ocnd => @col[:ocnd], :ocns => @col[:ocns], diff --git a/lib/sisu/v2/db_indexes.rb b/lib/sisu/v2/db_indexes.rb index fb5ddd76..abd90409 100644 --- a/lib/sisu/v2/db_indexes.rb +++ b/lib/sisu/v2/db_indexes.rb @@ -73,7 +73,6 @@ module SiSU_DB_index def base print "\n create documents common indexes\n" unless @opt.cmd =~/q/ sql_arr=[ - %{CREATE INDEX idx_text_words ON doc_objects(words);}, %{CREATE INDEX idx_ocn ON doc_objects(ocn);}, %{CREATE INDEX idx_digest_clean ON doc_objects(digest_clean);}, %{CREATE INDEX idx_digest_all ON doc_objects(digest_all);}, @@ -83,14 +82,11 @@ module SiSU_DB_index %{CREATE INDEX idx_lev4 ON doc_objects(lev4);}, %{CREATE INDEX idx_lev5 ON doc_objects(lev5);}, %{CREATE INDEX idx_lev6 ON doc_objects(lev6);}, - %{CREATE INDEX idx_endnote_words ON endnotes(words);}, %{CREATE INDEX idx_endnote_nr ON endnotes(nr);}, %{CREATE INDEX idx_digest_en ON endnotes(digest_clean);}, - %{CREATE INDEX idx_endnote_words_asterisk ON endnotes_asterisk(words);}, %{CREATE INDEX idx_endnote_nr_asterisk ON endnotes_asterisk(nr);}, %{CREATE INDEX idx_endnote_asterisk ON endnotes_asterisk(clean);}, %{CREATE INDEX idx_digest_en_asterisk ON endnotes_asterisk(digest_clean);}, - %{CREATE INDEX idx_endnote_words_plus ON endnotes_plus(words);}, %{CREATE INDEX idx_endnote_nr_plus ON endnotes_plus(nr);}, %{CREATE INDEX idx_endnote_plus ON endnotes_plus(clean);}, %{CREATE INDEX idx_digest_en_plus ON endnotes_plus(digest_clean);}, @@ -110,7 +106,7 @@ module SiSU_DB_index conn_execute_array(sql_arr) end base - text #@opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : text) + @opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : text) end end end diff --git a/lib/sisu/v2/db_load_tuple.rb b/lib/sisu/v2/db_load_tuple.rb index 23b6249d..67c8008f 100644 --- a/lib/sisu/v2/db_load_tuple.rb +++ b/lib/sisu/v2/db_load_tuple.rb @@ -79,11 +79,11 @@ module SiSU_DB_tuple end def tuple #% import line sql_entry=if @col[:en_a] - "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, words, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " + - "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:words]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" + "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " + + "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" else - "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, words, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " + - "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:words]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" + "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " + + "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" end if @opt.cmd =~/M/ if @opt.cmd =~/V/ @@ -315,8 +315,8 @@ tid) @conn,@en,@opt,@file=conn,en,opt,file end def tuple - sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, words, ocn, ocnd, ocns, metadata_tid, digest_clean) " + - "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:words]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');" + sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, ocn, ocnd, ocns, metadata_tid, digest_clean) " + + "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');" if @opt.cmd =~/M/ @file.puts sql_entry else -- cgit v1.2.3