aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2010-04-20 19:12:58 -0400
committerRalph Amissah <ralph@amissah.com>2010-04-20 19:27:13 -0400
commitd348ae931a17901eda839ef9501e13c9be51e913 (patch)
tree12d2c96f00df19cc70619b017e51db79f0705b13
parentupdate: changelog, version (2.2.0), review (diff)
reverts unique words list column and indexes; fix for drop indexes; changelog & version updatesisu_2.2.0
* reverts unique word list column on doc_objects and endnotes and associated indexes, may reintroduce at a later time * db_drop, drop TEXT indexes for sqlite only * update: changelog, version date
-rw-r--r--CHANGELOG_v24
-rw-r--r--conf/sisu/v2/version.yml4
-rw-r--r--data/doc/sisu/v2/CHANGELOG4
-rw-r--r--data/doc/sisu/v2/sisu_markup_samples/sisu_manual/sisu_download.ssi4
-rw-r--r--lib/sisu/v2/db_create.rb4
-rw-r--r--lib/sisu/v2/db_drop.rb85
-rw-r--r--lib/sisu/v2/db_import.rb19
-rw-r--r--lib/sisu/v2/db_indexes.rb6
-rw-r--r--lib/sisu/v2/db_load_tuple.rb12
9 files changed, 57 insertions, 85 deletions
diff --git a/CHANGELOG_v2 b/CHANGELOG_v2
index 3d5ebb13..715caef8 100644
--- a/CHANGELOG_v2
+++ b/CHANGELOG_v2
@@ -12,7 +12,7 @@ Reverse Chronological:
%% Development branch UNSTABLE
-%% 2.2.0.orig.tar.gz (2010-04-19:16/1)
+%% 2.2.0.orig.tar.gz (2010-04-20:16/2)
http://www.jus.uio.no/sisu/pkg/src/sisu_2.2.0.orig.tar.gz
sisu_2.2.0.orig.tar.gz
sisu_2.2.0-1.dsc
@@ -20,8 +20,6 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_2.2.0.orig.tar.gz
* db (sql) table structure, further review and changes (hence breakage & version bump)
* new pgsql db name prefix "sisu_v2b_"
- * new table column words in doc_objects & endnotes, VARCHAR 3000 to contain
- list of unique sorted words in object
* increase use of VARCHAR
* new indexes
diff --git a/conf/sisu/v2/version.yml b/conf/sisu/v2/version.yml
index 5cb7a079..2fbef6ac 100644
--- a/conf/sisu/v2/version.yml
+++ b/conf/sisu/v2/version.yml
@@ -1,5 +1,5 @@
---
:version: 2.2.0
-:date_stamp: 2010w16/1
-:date: "2010-04-19"
+:date_stamp: 2010w16/2
+:date: "2010-04-20"
:project: SiSU
diff --git a/data/doc/sisu/v2/CHANGELOG b/data/doc/sisu/v2/CHANGELOG
index e182f467..feda1a86 100644
--- a/data/doc/sisu/v2/CHANGELOG
+++ b/data/doc/sisu/v2/CHANGELOG
@@ -12,7 +12,7 @@ Reverse Chronological:
%% Development branch UNSTABLE
-%% 2.2.0.orig.tar.gz (2010-04-19:16/1)
+%% 2.2.0.orig.tar.gz (2010-04-20:16/2)
http://www.jus.uio.no/sisu/pkg/src/sisu_2.2.0.orig.tar.gz
sisu_2.2.0.orig.tar.gz
sisu_2.2.0-1.dsc
@@ -20,8 +20,6 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_2.2.0.orig.tar.gz
* db (sql) table structure, further review and changes (hence breakage & version bump)
* new pgsql db name prefix "sisu_v2b_"
- * new table column words in doc_objects & endnotes, VARCHAR 3000 to contain
- list of unique sorted words in object
* increase use of VARCHAR
* new indexes
diff --git a/data/doc/sisu/v2/sisu_markup_samples/sisu_manual/sisu_download.ssi b/data/doc/sisu/v2/sisu_markup_samples/sisu_manual/sisu_download.ssi
index 7b5b2caf..641846b5 100644
--- a/data/doc/sisu/v2/sisu_markup_samples/sisu_manual/sisu_download.ssi
+++ b/data/doc/sisu/v2/sisu_markup_samples/sisu_manual/sisu_download.ssi
@@ -16,7 +16,7 @@
:issued: 2002-11-12
:available: 2002-11-12
:published: 2009-01-18
- :modified: 2010-04-18
+ :modified: 2010-04-20
@make:
:num_top: 1
@@ -57,7 +57,7 @@
Download the latest version of SiSU (and SiSU markup samples):~{ Breakage and Fixes Report<br> http://www.jus.uio.no/sisu/SiSU/breakage_and_fixes.html }~
-_* {~^ sisu_2.2.0.orig.tar.gz (2010-04-19:16/1) }http://www.jus.uio.no/sisu/pkg/src/sisu_2.2.0.orig.tar.gz
+_* {~^ sisu_2.2.0.orig.tar.gz (2010-04-20:16/2) }http://www.jus.uio.no/sisu/pkg/src/sisu_2.2.0.orig.tar.gz
_* {~^ sisu-markup-samples_2.0.4.orig.tar.gz (of 2008-10-09:40/4 ) }http://www.jus.uio.no/sisu/pkg/src/sisu-markup-samples_2.0.4.orig.tar.gz *~sisu-markup-samples
diff --git a/lib/sisu/v2/db_create.rb b/lib/sisu/v2/db_create.rb
index c1fed045..c7ce9a6d 100644
--- a/lib/sisu/v2/db_create.rb
+++ b/lib/sisu/v2/db_create.rb
@@ -218,7 +218,6 @@ module SiSU_DB_create
ocns VARCHAR(6),
clean TEXT NULL,
body TEXT NULL,
- words VARCHAR(3000) NULL,
seg VARCHAR(256) NULL,
lev_an VARCHAR(1),
lev SMALLINT NULL,
@@ -258,7 +257,6 @@ module SiSU_DB_create
nr SMALLINT,
clean TEXT NULL,
body TEXT NULL,
- words VARCHAR(3000) NULL,
ocn SMALLINT,
ocnd VARCHAR(6),
ocns VARCHAR(6),
@@ -281,7 +279,6 @@ module SiSU_DB_create
nr SMALLINT,
clean TEXT NULL,
body TEXT NULL,
- words VARCHAR(3000) NULL,
ocn SMALLINT,
ocnd VARCHAR(6),
ocns VARCHAR(6),
@@ -304,7 +301,6 @@ module SiSU_DB_create
nr SMALLINT,
clean TEXT NULL,
body TEXT NULL,
- words VARCHAR(3000) NULL,
ocn SMALLINT,
ocnd VARCHAR(6),
ocns VARCHAR(6),
diff --git a/lib/sisu/v2/db_drop.rb b/lib/sisu/v2/db_drop.rb
index 35d971af..edd08345 100644
--- a/lib/sisu/v2/db_drop.rb
+++ b/lib/sisu/v2/db_drop.rb
@@ -133,50 +133,53 @@ module SiSU_DB_drop
ensure
end
end
- def conn_execute_array(sql_arr)
- @conn.transaction do |conn|
- sql_arr.each do |sql|
- conn.execute(sql)
+ def indexes
+ def conn_execute_array(sql_arr)
+ @conn.transaction do |conn|
+ sql_arr.each do |sql|
+ conn.execute(sql)
+ end
end
end
+ def base #% drop base indexes
+ print "\n drop documents common indexes\n" unless @opt.cmd =~/q/
+ sql_arr=[
+ %{DROP INDEX idx_title;},
+ %{DROP INDEX idx_author;},
+ %{DROP INDEX idx_filename;},
+ %{DROP INDEX idx_topics;},
+ %{DROP INDEX idx_ocn;},
+ %{DROP INDEX idx_digest_clean;},
+ %{DROP INDEX idx_digest_all;},
+ %{DROP INDEX idx_lev1;},
+ %{DROP INDEX idx_lev2;},
+ %{DROP INDEX idx_lev3;},
+ %{DROP INDEX idx_lev4;},
+ %{DROP INDEX idx_lev5;},
+ %{DROP INDEX idx_lev6;},
+ %{DROP INDEX idx_endnote_nr;},
+ %{DROP INDEX idx_digest_en;},
+ %{DROP INDEX idx_endnote_nr_asterisk;},
+ %{DROP INDEX idx_endnote_asterisk;},
+ %{DROP INDEX idx_digest_en_asterisk;},
+ %{DROP INDEX idx_endnote_nr_plus;},
+ %{DROP INDEX idx_endnote_plus;},
+ %{DROP INDEX idx_digest_en_plus},
+ ]
+ conn_execute_array(sql_arr)
+ end
+ def text #% drop TEXT indexes, sqlite
+ print "\n drop documents TEXT indexes\n" unless @opt.cmd =~/q/
+ sql_arr=[
+ %{DROP INDEX idx_clean;},
+ %{DROP INDEX idx_endnote},
+ ]
+ conn_execute_array(sql_arr)
+ end
+ self
end
- def indexes #% drop all indexes
- print "\n drop documents common indexes\n" unless @opt.cmd =~/q/
- sql_arr=[
- %{DROP INDEX idx_text_words;},
- %{DROP INDEX idx_title;},
- %{DROP INDEX idx_author;},
- %{DROP INDEX idx_filename;},
- %{DROP INDEX idx_topics;},
- %{DROP INDEX idx_ocn;},
- %{DROP INDEX idx_digest_clean;},
- %{DROP INDEX idx_digest_all;},
- %{DROP INDEX idx_lev1;},
- %{DROP INDEX idx_lev2;},
- %{DROP INDEX idx_lev3;},
- %{DROP INDEX idx_lev4;},
- %{DROP INDEX idx_lev5;},
- %{DROP INDEX idx_lev6;},
- %{DROP INDEX idx_endnote_words;},
- %{DROP INDEX idx_endnote_nr;},
- %{DROP INDEX idx_digest_en;},
- %{DROP INDEX idx_endnote_words_asterisk;},
- %{DROP INDEX idx_endnote_nr_asterisk;},
- %{DROP INDEX idx_endnote_asterisk;},
- %{DROP INDEX idx_digest_en_asterisk;},
- %{DROP INDEX idx_endnote_words_plus;},
- %{DROP INDEX idx_endnote_nr_plus;},
- %{DROP INDEX idx_endnote_plus;},
- %{DROP INDEX idx_digest_en_plus},
- ]
- conn_execute_array(sql_arr)
- print "\n drop documents TEXT indexes\n" unless @opt.cmd =~/q/
- sql_arr=[
- %{DROP INDEX idx_clean;},
- %{DROP INDEX idx_endnote},
- ]
- conn_execute_array(sql_arr)
- end
+ indexes.base
+ @opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : indexes.text)
self
end
end
diff --git a/lib/sisu/v2/db_import.rb b/lib/sisu/v2/db_import.rb
index e351f6fc..0e2db8e3 100644
--- a/lib/sisu/v2/db_import.rb
+++ b/lib/sisu/v2/db_import.rb
@@ -267,8 +267,6 @@ module SiSU_DB_import
@col[:plaintext]=@col[:body].dup
@col[:plaintext]=strip_markup(@col[:plaintext])
@col[:plaintext]=clean_searchable_text(@col[:plaintext])
- @col[:words]=@col[:plaintext].dup
- @col[:words]=unique_words(@col[:words])
if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last
end
if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last
@@ -303,8 +301,6 @@ module SiSU_DB_import
@col[:plaintext]=@col[:body].dup
@col[:plaintext]=strip_markup(@col[:plaintext])
@col[:plaintext]=clean_searchable_text(@col[:plaintext])
- @col[:words]=@col[:plaintext].dup
- @col[:words]=unique_words(@col[:words])
@en_a,@en_z=@en[0].first,@en[0].last if @en[0]
@en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
@en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
@@ -332,8 +328,6 @@ module SiSU_DB_import
@col[:plaintext]=@col[:body].dup
@col[:plaintext]=strip_markup(@col[:plaintext])
@col[:plaintext]=clean_searchable_text(@col[:plaintext])
- @col[:words]=@col[:plaintext].dup
- @col[:words]=unique_words(@col[:words])
@en_a,@en_z=@en[0].first,@en[0].last if @en[0]
@en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
@en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
@@ -361,8 +355,6 @@ module SiSU_DB_import
@col[:plaintext]=@col[:body].dup
@col[:plaintext]=strip_markup(@col[:plaintext])
@col[:plaintext]=clean_searchable_text(@col[:plaintext])
- @col[:words]=@col[:plaintext].dup
- @col[:words]=unique_words(@col[:words])
@en_a,@en_z=@en[0].first,@en[0].last if @en[0]
@en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
@en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
@@ -403,8 +395,6 @@ module SiSU_DB_import
@col[:plaintext]=@col[:body].dup
@col[:plaintext]=strip_markup(@col[:plaintext])
@col[:plaintext]=clean_searchable_text(@col[:plaintext])
- @col[:words]=@col[:plaintext].dup
- @col[:words]=unique_words(@col[:words])
t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file)
@tuple_array << t.tuple
@en,@en_ast,@en_pls=[],[],[]
@@ -422,8 +412,6 @@ module SiSU_DB_import
special_character_escape(txt)
body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)
strip_markup(txt)
- words=txt.dup
- words=unique_words(words)
if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)
puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
@@ -438,7 +426,6 @@ module SiSU_DB_import
:nr => nr,
:txt => txt,
:body => body,
- :words => words,
:ocn => @col[:ocn],
:ocnd => @col[:ocnd],
:ocns => @col[:ocns],
@@ -463,8 +450,6 @@ module SiSU_DB_import
special_character_escape(txt)
body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)
strip_markup(txt)
- words=txt.dup
- words=unique_words(words)
if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)
puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
@@ -479,7 +464,6 @@ module SiSU_DB_import
:nr => nr,
:txt => txt,
:body => body,
- :words => words,
:ocn => @col[:ocn],
:ocnd => @col[:ocnd],
:ocns => @col[:ocns],
@@ -504,8 +488,6 @@ module SiSU_DB_import
special_character_escape(txt)
body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)
strip_markup(txt)
- words=txt.dup
- words=unique_words(words)
if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)
puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
@@ -520,7 +502,6 @@ module SiSU_DB_import
:nr => nr,
:txt => txt,
:body => body,
- :words => words,
:ocn => @col[:ocn],
:ocnd => @col[:ocnd],
:ocns => @col[:ocns],
diff --git a/lib/sisu/v2/db_indexes.rb b/lib/sisu/v2/db_indexes.rb
index fb5ddd76..abd90409 100644
--- a/lib/sisu/v2/db_indexes.rb
+++ b/lib/sisu/v2/db_indexes.rb
@@ -73,7 +73,6 @@ module SiSU_DB_index
def base
print "\n create documents common indexes\n" unless @opt.cmd =~/q/
sql_arr=[
- %{CREATE INDEX idx_text_words ON doc_objects(words);},
%{CREATE INDEX idx_ocn ON doc_objects(ocn);},
%{CREATE INDEX idx_digest_clean ON doc_objects(digest_clean);},
%{CREATE INDEX idx_digest_all ON doc_objects(digest_all);},
@@ -83,14 +82,11 @@ module SiSU_DB_index
%{CREATE INDEX idx_lev4 ON doc_objects(lev4);},
%{CREATE INDEX idx_lev5 ON doc_objects(lev5);},
%{CREATE INDEX idx_lev6 ON doc_objects(lev6);},
- %{CREATE INDEX idx_endnote_words ON endnotes(words);},
%{CREATE INDEX idx_endnote_nr ON endnotes(nr);},
%{CREATE INDEX idx_digest_en ON endnotes(digest_clean);},
- %{CREATE INDEX idx_endnote_words_asterisk ON endnotes_asterisk(words);},
%{CREATE INDEX idx_endnote_nr_asterisk ON endnotes_asterisk(nr);},
%{CREATE INDEX idx_endnote_asterisk ON endnotes_asterisk(clean);},
%{CREATE INDEX idx_digest_en_asterisk ON endnotes_asterisk(digest_clean);},
- %{CREATE INDEX idx_endnote_words_plus ON endnotes_plus(words);},
%{CREATE INDEX idx_endnote_nr_plus ON endnotes_plus(nr);},
%{CREATE INDEX idx_endnote_plus ON endnotes_plus(clean);},
%{CREATE INDEX idx_digest_en_plus ON endnotes_plus(digest_clean);},
@@ -110,7 +106,7 @@ module SiSU_DB_index
conn_execute_array(sql_arr)
end
base
- text #@opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : text)
+ @opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : text)
end
end
end
diff --git a/lib/sisu/v2/db_load_tuple.rb b/lib/sisu/v2/db_load_tuple.rb
index 23b6249d..67c8008f 100644
--- a/lib/sisu/v2/db_load_tuple.rb
+++ b/lib/sisu/v2/db_load_tuple.rb
@@ -79,11 +79,11 @@ module SiSU_DB_tuple
end
def tuple #% import line
sql_entry=if @col[:en_a]
- "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, words, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " +
- "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:words]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');"
+ "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " +
+ "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');"
else
- "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, words, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " +
- "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:words]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');"
+ "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " +
+ "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');"
end
if @opt.cmd =~/M/
if @opt.cmd =~/V/
@@ -315,8 +315,8 @@ tid)
@conn,@en,@opt,@file=conn,en,opt,file
end
def tuple
- sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, words, ocn, ocnd, ocns, metadata_tid, digest_clean) " +
- "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:words]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');"
+ sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, ocn, ocnd, ocns, metadata_tid, digest_clean) " +
+ "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');"
if @opt.cmd =~/M/
@file.puts sql_entry
else