aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/sisu/v2/db_create.rb4
-rw-r--r--lib/sisu/v2/db_drop.rb85
-rw-r--r--lib/sisu/v2/db_import.rb19
-rw-r--r--lib/sisu/v2/db_indexes.rb6
-rw-r--r--lib/sisu/v2/db_load_tuple.rb12
5 files changed, 51 insertions, 75 deletions
diff --git a/lib/sisu/v2/db_create.rb b/lib/sisu/v2/db_create.rb
index c1fed045..c7ce9a6d 100644
--- a/lib/sisu/v2/db_create.rb
+++ b/lib/sisu/v2/db_create.rb
@@ -218,7 +218,6 @@ module SiSU_DB_create
ocns VARCHAR(6),
clean TEXT NULL,
body TEXT NULL,
- words VARCHAR(3000) NULL,
seg VARCHAR(256) NULL,
lev_an VARCHAR(1),
lev SMALLINT NULL,
@@ -258,7 +257,6 @@ module SiSU_DB_create
nr SMALLINT,
clean TEXT NULL,
body TEXT NULL,
- words VARCHAR(3000) NULL,
ocn SMALLINT,
ocnd VARCHAR(6),
ocns VARCHAR(6),
@@ -281,7 +279,6 @@ module SiSU_DB_create
nr SMALLINT,
clean TEXT NULL,
body TEXT NULL,
- words VARCHAR(3000) NULL,
ocn SMALLINT,
ocnd VARCHAR(6),
ocns VARCHAR(6),
@@ -304,7 +301,6 @@ module SiSU_DB_create
nr SMALLINT,
clean TEXT NULL,
body TEXT NULL,
- words VARCHAR(3000) NULL,
ocn SMALLINT,
ocnd VARCHAR(6),
ocns VARCHAR(6),
diff --git a/lib/sisu/v2/db_drop.rb b/lib/sisu/v2/db_drop.rb
index 35d971af..edd08345 100644
--- a/lib/sisu/v2/db_drop.rb
+++ b/lib/sisu/v2/db_drop.rb
@@ -133,50 +133,53 @@ module SiSU_DB_drop
ensure
end
end
- def conn_execute_array(sql_arr)
- @conn.transaction do |conn|
- sql_arr.each do |sql|
- conn.execute(sql)
+ def indexes
+ def conn_execute_array(sql_arr)
+ @conn.transaction do |conn|
+ sql_arr.each do |sql|
+ conn.execute(sql)
+ end
end
end
+ def base #% drop base indexes
+ print "\n drop documents common indexes\n" unless @opt.cmd =~/q/
+ sql_arr=[
+ %{DROP INDEX idx_title;},
+ %{DROP INDEX idx_author;},
+ %{DROP INDEX idx_filename;},
+ %{DROP INDEX idx_topics;},
+ %{DROP INDEX idx_ocn;},
+ %{DROP INDEX idx_digest_clean;},
+ %{DROP INDEX idx_digest_all;},
+ %{DROP INDEX idx_lev1;},
+ %{DROP INDEX idx_lev2;},
+ %{DROP INDEX idx_lev3;},
+ %{DROP INDEX idx_lev4;},
+ %{DROP INDEX idx_lev5;},
+ %{DROP INDEX idx_lev6;},
+ %{DROP INDEX idx_endnote_nr;},
+ %{DROP INDEX idx_digest_en;},
+ %{DROP INDEX idx_endnote_nr_asterisk;},
+ %{DROP INDEX idx_endnote_asterisk;},
+ %{DROP INDEX idx_digest_en_asterisk;},
+ %{DROP INDEX idx_endnote_nr_plus;},
+ %{DROP INDEX idx_endnote_plus;},
+ %{DROP INDEX idx_digest_en_plus},
+ ]
+ conn_execute_array(sql_arr)
+ end
+ def text #% drop TEXT indexes, sqlite
+ print "\n drop documents TEXT indexes\n" unless @opt.cmd =~/q/
+ sql_arr=[
+ %{DROP INDEX idx_clean;},
+ %{DROP INDEX idx_endnote},
+ ]
+ conn_execute_array(sql_arr)
+ end
+ self
end
- def indexes #% drop all indexes
- print "\n drop documents common indexes\n" unless @opt.cmd =~/q/
- sql_arr=[
- %{DROP INDEX idx_text_words;},
- %{DROP INDEX idx_title;},
- %{DROP INDEX idx_author;},
- %{DROP INDEX idx_filename;},
- %{DROP INDEX idx_topics;},
- %{DROP INDEX idx_ocn;},
- %{DROP INDEX idx_digest_clean;},
- %{DROP INDEX idx_digest_all;},
- %{DROP INDEX idx_lev1;},
- %{DROP INDEX idx_lev2;},
- %{DROP INDEX idx_lev3;},
- %{DROP INDEX idx_lev4;},
- %{DROP INDEX idx_lev5;},
- %{DROP INDEX idx_lev6;},
- %{DROP INDEX idx_endnote_words;},
- %{DROP INDEX idx_endnote_nr;},
- %{DROP INDEX idx_digest_en;},
- %{DROP INDEX idx_endnote_words_asterisk;},
- %{DROP INDEX idx_endnote_nr_asterisk;},
- %{DROP INDEX idx_endnote_asterisk;},
- %{DROP INDEX idx_digest_en_asterisk;},
- %{DROP INDEX idx_endnote_words_plus;},
- %{DROP INDEX idx_endnote_nr_plus;},
- %{DROP INDEX idx_endnote_plus;},
- %{DROP INDEX idx_digest_en_plus},
- ]
- conn_execute_array(sql_arr)
- print "\n drop documents TEXT indexes\n" unless @opt.cmd =~/q/
- sql_arr=[
- %{DROP INDEX idx_clean;},
- %{DROP INDEX idx_endnote},
- ]
- conn_execute_array(sql_arr)
- end
+ indexes.base
+ @opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : indexes.text)
self
end
end
diff --git a/lib/sisu/v2/db_import.rb b/lib/sisu/v2/db_import.rb
index e351f6fc..0e2db8e3 100644
--- a/lib/sisu/v2/db_import.rb
+++ b/lib/sisu/v2/db_import.rb
@@ -267,8 +267,6 @@ module SiSU_DB_import
@col[:plaintext]=@col[:body].dup
@col[:plaintext]=strip_markup(@col[:plaintext])
@col[:plaintext]=clean_searchable_text(@col[:plaintext])
- @col[:words]=@col[:plaintext].dup
- @col[:words]=unique_words(@col[:words])
if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last
end
if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last
@@ -303,8 +301,6 @@ module SiSU_DB_import
@col[:plaintext]=@col[:body].dup
@col[:plaintext]=strip_markup(@col[:plaintext])
@col[:plaintext]=clean_searchable_text(@col[:plaintext])
- @col[:words]=@col[:plaintext].dup
- @col[:words]=unique_words(@col[:words])
@en_a,@en_z=@en[0].first,@en[0].last if @en[0]
@en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
@en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
@@ -332,8 +328,6 @@ module SiSU_DB_import
@col[:plaintext]=@col[:body].dup
@col[:plaintext]=strip_markup(@col[:plaintext])
@col[:plaintext]=clean_searchable_text(@col[:plaintext])
- @col[:words]=@col[:plaintext].dup
- @col[:words]=unique_words(@col[:words])
@en_a,@en_z=@en[0].first,@en[0].last if @en[0]
@en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
@en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
@@ -361,8 +355,6 @@ module SiSU_DB_import
@col[:plaintext]=@col[:body].dup
@col[:plaintext]=strip_markup(@col[:plaintext])
@col[:plaintext]=clean_searchable_text(@col[:plaintext])
- @col[:words]=@col[:plaintext].dup
- @col[:words]=unique_words(@col[:words])
@en_a,@en_z=@en[0].first,@en[0].last if @en[0]
@en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
@en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
@@ -403,8 +395,6 @@ module SiSU_DB_import
@col[:plaintext]=@col[:body].dup
@col[:plaintext]=strip_markup(@col[:plaintext])
@col[:plaintext]=clean_searchable_text(@col[:plaintext])
- @col[:words]=@col[:plaintext].dup
- @col[:words]=unique_words(@col[:words])
t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file)
@tuple_array << t.tuple
@en,@en_ast,@en_pls=[],[],[]
@@ -422,8 +412,6 @@ module SiSU_DB_import
special_character_escape(txt)
body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)
strip_markup(txt)
- words=txt.dup
- words=unique_words(words)
if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)
puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
@@ -438,7 +426,6 @@ module SiSU_DB_import
:nr => nr,
:txt => txt,
:body => body,
- :words => words,
:ocn => @col[:ocn],
:ocnd => @col[:ocnd],
:ocns => @col[:ocns],
@@ -463,8 +450,6 @@ module SiSU_DB_import
special_character_escape(txt)
body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)
strip_markup(txt)
- words=txt.dup
- words=unique_words(words)
if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)
puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
@@ -479,7 +464,6 @@ module SiSU_DB_import
:nr => nr,
:txt => txt,
:body => body,
- :words => words,
:ocn => @col[:ocn],
:ocnd => @col[:ocnd],
:ocns => @col[:ocns],
@@ -504,8 +488,6 @@ module SiSU_DB_import
special_character_escape(txt)
body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)
strip_markup(txt)
- words=txt.dup
- words=unique_words(words)
if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)
puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
@@ -520,7 +502,6 @@ module SiSU_DB_import
:nr => nr,
:txt => txt,
:body => body,
- :words => words,
:ocn => @col[:ocn],
:ocnd => @col[:ocnd],
:ocns => @col[:ocns],
diff --git a/lib/sisu/v2/db_indexes.rb b/lib/sisu/v2/db_indexes.rb
index fb5ddd76..abd90409 100644
--- a/lib/sisu/v2/db_indexes.rb
+++ b/lib/sisu/v2/db_indexes.rb
@@ -73,7 +73,6 @@ module SiSU_DB_index
def base
print "\n create documents common indexes\n" unless @opt.cmd =~/q/
sql_arr=[
- %{CREATE INDEX idx_text_words ON doc_objects(words);},
%{CREATE INDEX idx_ocn ON doc_objects(ocn);},
%{CREATE INDEX idx_digest_clean ON doc_objects(digest_clean);},
%{CREATE INDEX idx_digest_all ON doc_objects(digest_all);},
@@ -83,14 +82,11 @@ module SiSU_DB_index
%{CREATE INDEX idx_lev4 ON doc_objects(lev4);},
%{CREATE INDEX idx_lev5 ON doc_objects(lev5);},
%{CREATE INDEX idx_lev6 ON doc_objects(lev6);},
- %{CREATE INDEX idx_endnote_words ON endnotes(words);},
%{CREATE INDEX idx_endnote_nr ON endnotes(nr);},
%{CREATE INDEX idx_digest_en ON endnotes(digest_clean);},
- %{CREATE INDEX idx_endnote_words_asterisk ON endnotes_asterisk(words);},
%{CREATE INDEX idx_endnote_nr_asterisk ON endnotes_asterisk(nr);},
%{CREATE INDEX idx_endnote_asterisk ON endnotes_asterisk(clean);},
%{CREATE INDEX idx_digest_en_asterisk ON endnotes_asterisk(digest_clean);},
- %{CREATE INDEX idx_endnote_words_plus ON endnotes_plus(words);},
%{CREATE INDEX idx_endnote_nr_plus ON endnotes_plus(nr);},
%{CREATE INDEX idx_endnote_plus ON endnotes_plus(clean);},
%{CREATE INDEX idx_digest_en_plus ON endnotes_plus(digest_clean);},
@@ -110,7 +106,7 @@ module SiSU_DB_index
conn_execute_array(sql_arr)
end
base
- text #@opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : text)
+ @opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : text)
end
end
end
diff --git a/lib/sisu/v2/db_load_tuple.rb b/lib/sisu/v2/db_load_tuple.rb
index 23b6249d..67c8008f 100644
--- a/lib/sisu/v2/db_load_tuple.rb
+++ b/lib/sisu/v2/db_load_tuple.rb
@@ -79,11 +79,11 @@ module SiSU_DB_tuple
end
def tuple #% import line
sql_entry=if @col[:en_a]
- "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, words, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " +
- "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:words]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');"
+ "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " +
+ "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');"
else
- "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, words, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " +
- "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:words]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');"
+ "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " +
+ "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');"
end
if @opt.cmd =~/M/
if @opt.cmd =~/V/
@@ -315,8 +315,8 @@ tid)
@conn,@en,@opt,@file=conn,en,opt,file
end
def tuple
- sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, words, ocn, ocnd, ocns, metadata_tid, digest_clean) " +
- "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:words]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');"
+ sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, ocn, ocnd, ocns, metadata_tid, digest_clean) " +
+ "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');"
if @opt.cmd =~/M/
@file.puts sql_entry
else