aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2010-04-20 19:01:55 -0400
committerRalph Amissah <ralph@amissah.com>2010-04-20 19:01:55 -0400
commit63c5a3cead1fb5cbd9b1bff653f269dce8d8052c (patch)
treeeb3e09a1199ae2dc79b3f7db97ff1024b25cfb0c
parentdal, minor cosmetic re-arrangement (diff)
db name, tables, columns, indexes changes, review (need another version bump 2.2.0)
* db (sql) table structure, further review and changes (hence breakage & version bump) * new pgsql db name prefix "sisu_v2b_" * new table column words in doc_objects & endnotes, VARCHAR 3000 to contain list of unique sorted words in object * increase use of VARCHAR * constants takes on related additions * param, extensive db column size checks for metadata
-rw-r--r--lib/sisu/v2/constants.rb7
-rw-r--r--lib/sisu/v2/db_columns.rb158
-rw-r--r--lib/sisu/v2/db_create.rb27
-rw-r--r--lib/sisu/v2/db_drop.rb72
-rw-r--r--lib/sisu/v2/db_import.rb120
-rw-r--r--lib/sisu/v2/db_indexes.rb52
-rw-r--r--lib/sisu/v2/db_load_tuple.rb32
-rw-r--r--lib/sisu/v2/db_remove.rb4
-rw-r--r--lib/sisu/v2/db_sqltxt.rb34
-rw-r--r--lib/sisu/v2/param.rb57
10 files changed, 349 insertions, 214 deletions
diff --git a/lib/sisu/v2/constants.rb b/lib/sisu/v2/constants.rb
index 3fcb1e3a..9a24736c 100644
--- a/lib/sisu/v2/constants.rb
+++ b/lib/sisu/v2/constants.rb
@@ -131,8 +131,8 @@ Px[:lv4]= '-'
Px[:lv5]= '.'
Px[:lv6]= '.'
#Px[:lv5_6]= '.'
-Db[:name_prefix]="SiSU#{SiSU_version_dir}a_"
-Db[:name_prefix_db]="sisu_#{SiSU_version_dir}a_"
+Db[:name_prefix]="SiSU#{SiSU_version_dir}b_"
+Db[:name_prefix_db]="sisu_#{SiSU_version_dir}b_"
Db[:col_title]=800
Db[:col_title_part]=400
Db[:col_title_edition]=10
@@ -148,6 +148,9 @@ Db[:col_classify_identify]=256
Db[:col_classify_library]=30
Db[:col_classify_small]=16
Db[:col_filename]=256
+Db[:col_digest]=64
+Db[:col_filesize]=10
+Db[:col_info_note]=3000
__END__
consider:
〔comment〕
diff --git a/lib/sisu/v2/db_columns.rb b/lib/sisu/v2/db_columns.rb
index ee66c59e..0c2eb367 100644
--- a/lib/sisu/v2/db_columns.rb
+++ b/lib/sisu/v2/db_columns.rb
@@ -208,7 +208,7 @@ module SiSU_DB_columns
'title_note'
end
def create_column
- "#{name} TEXT NULL,"
+ "#{name} VARCHAR(#{Db[:col_info_note]}) NULL,"
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
@@ -1119,7 +1119,7 @@ module SiSU_DB_columns
'rights'
end
def create_column
- "#{name} TEXT NULL,"
+ "#{name} VARCHAR(#{Db[:col_info_note]}) NULL,"
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
@@ -1141,7 +1141,7 @@ module SiSU_DB_columns
'rights_copyright_text'
end
def create_column
- "#{name} TEXT NULL,"
+ "#{name} VARCHAR(#{Db[:col_info_note]}) NULL,"
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
@@ -1163,7 +1163,7 @@ module SiSU_DB_columns
'rights_copyright_translation'
end
def create_column
- "#{name} TEXT NULL,"
+ "#{name} VARCHAR(#{Db[:col_info_note]}) NULL,"
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
@@ -1185,7 +1185,7 @@ module SiSU_DB_columns
'rights_copyright_illustrations'
end
def create_column
- "#{name} TEXT NULL,"
+ "#{name} VARCHAR(#{Db[:col_info_note]}) NULL,"
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
@@ -1207,7 +1207,7 @@ module SiSU_DB_columns
'rights_copyright_photographs'
end
def create_column
- "#{name} TEXT NULL,"
+ "#{name} VARCHAR(#{Db[:col_info_note]}) NULL,"
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
@@ -1229,7 +1229,7 @@ module SiSU_DB_columns
'rights_copyright_preparation'
end
def create_column
- "#{name} TEXT NULL,"
+ "#{name} VARCHAR(#{Db[:col_info_note]}) NULL,"
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
@@ -1251,7 +1251,7 @@ module SiSU_DB_columns
'rights_copyright_digitization'
end
def create_column
- "#{name} TEXT NULL,"
+ "#{name} VARCHAR(#{Db[:col_info_note]}) NULL,"
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
@@ -1273,7 +1273,7 @@ module SiSU_DB_columns
'rights_copyright_audio'
end
def create_column
- "#{name} TEXT NULL,"
+ "#{name} VARCHAR(#{Db[:col_info_note]}) NULL,"
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
@@ -1295,7 +1295,7 @@ module SiSU_DB_columns
'rights_copyright_video'
end
def create_column
- "#{name} TEXT NULL,"
+ "#{name} VARCHAR(#{Db[:col_info_note]}) NULL,"
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
@@ -1317,7 +1317,7 @@ module SiSU_DB_columns
'rights_license'
end
def create_column
- "#{name} TEXT NULL,"
+ "#{name} VARCHAR(#{Db[:col_info_note]}) NULL,"
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
@@ -1355,8 +1355,7 @@ module SiSU_DB_columns
'classify_topic_register'
end
def create_column
- "#{name} VARCHAR(#{Db[:col_classify_txt_long]}) NULL,"
- #"#{name} TEXT NULL,"
+ "#{name} VARCHAR(#{Db[:col_info_note]}) NULL,"
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
@@ -1804,21 +1803,18 @@ module SiSU_DB_columns
self
end
=begin
-#% misc
-@make:
- :skin:
-@links:
+#% src
=end
- def filename
+ def src_filename
def name
- 'filename'
+ 'src_filename'
end
def create_column
"#{name} VARCHAR(#{Db[:col_filename]}) NULL,"
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
- IS 'metadata document filename';}
+ IS 'sisu markup source text filename';}
end
def tuple
t=if defined? @md.fns \
@@ -1831,56 +1827,61 @@ module SiSU_DB_columns
end
self
end
- def sisutxt # consider naming sisusrc
+ def src_fingerprint
def name
- 'sisutxt'
+ 'src_fingerprint' #hash/digest, sha256 or md5
end
def create_column
- "#{name} TEXT NULL,"
+ "#{name} VARCHAR(#{Db[:col_digest]}) NULL,"
+ #"#{name} TEXT NULL,"
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
- IS 'sisu markup text (if shared)';}
+ IS 'sisu markup source text fingerprint, hash digest sha256 (or md5)';}
end
def tuple
- t=if @md.mod.inspect=~/import|update/ \
- and FileTest.exist?(@md.fns)
- ["#{name}, ","'#{@sisutxt}', "]
+ t=if defined? @md.dgst \
+ and @md.dgst.class==Array \
+ and @md.dgst[1]=~/\S+/
+ txt=@md.dgst[1]
+ ["#{name}, ","'#{txt}', "]
else ['','']
end
end
self
end
- def fulltext
+ def src_filesize
def name
- 'fulltext'
+ 'src_filesize'
end
def create_column
- "#{name} TEXT NULL,"
+ "#{name} VARCHAR(#{Db[:col_filesize]}) NULL,"
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
- IS 'document full text clean, searchable';}
+ IS 'sisu markup source text file size';}
end
def tuple
- t=if @md.mod.inspect=~/import|update/ \
- and FileTest.exist?(@md.fns)
- ["#{name}, ","'#{@fulltext}', "]
- else ['','']
- end
+ t=if defined? @md.filesize \
+ and @md.filesize=~/\S+/
+ txt=@md.filesize
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
end
self
end
- def word_count
+ def src_word_count
def name
- 'word_count'
+ 'src_word_count'
end
def create_column
"#{name} TEXT NULL,"
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
- IS 'document word count';}
+ IS 'sisu markup source text word count';}
end
def tuple
t=if defined? @md.wc_words \
@@ -1893,23 +1894,47 @@ module SiSU_DB_columns
end
self
end
- def digest
+ def src_txt # consider naming sisusrc
def name
- 'dgst'
+ 'src_text'
end
def create_column
"#{name} TEXT NULL,"
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
- IS 'document hash digest sha256 (or md5)';}
+ IS 'sisu markup source text (if shared)';}
end
def tuple
- t=if defined? @md.dgst \
- and @md.dgst=~/\S+/
- txt=@md.dgst
- special_character_escape(txt)
- ["#{name}, ","'#{txt}', "]
+ t=if @md.mod.inspect=~/import|update/ \
+ and FileTest.exist?(@md.fns)
+ ["#{name}, ","'#{@sisutxt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+=begin
+#% misc
+@make:
+ :skin:
+@links:
+=end
+ def fulltext
+ def name
+ 'fulltext'
+ end
+ def create_column
+ "#{name} TEXT NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'document full text clean, searchable';}
+ end
+ def tuple
+ t=if @md.mod.inspect=~/import|update/ \
+ and FileTest.exist?(@md.fns)
+ ["#{name}, ","'#{@fulltext}', "]
else ['','']
end
end
@@ -1924,12 +1949,35 @@ module SiSU_DB_columns
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
- IS 'metadata document skin name';}
+ IS 'source text skin name';}
+ end
+ def tuple
+ t=if defined? @md.skin_name \
+ and @md.skin_name=~/\S+/
+ txt=@md.skin_name
+ special_character_escape(txt)
+ ["#{name}, ","'#{txt}', "]
+ else ['','']
+ end
+ end
+ self
+ end
+ def skin_fingerprint #check
+ def name
+ 'skin_fingerprint'
+ end
+ def create_column
+ "#{name} VARCHAR(#{Db[:col_digest]}) NULL,"
+ end
+ def column_comment
+ %{COMMENT ON COLUMN metadata_and_text.#{name}
+ IS 'source text skin fingerprint';}
end
def tuple
- t=if defined? @md.notes.skin_name \
- and @md.notes.skin_name=~/\S+/
- txt=@md.notes.skin_name
+ t=if defined? @md.dgst_skin \
+ and @md.dgst_skin.class==Array \
+ and @md.dgst_skin[1]=~/\S+/
+ txt=@md.dgst_skin[1]
special_character_escape(txt)
["#{name}, ","'#{txt}', "]
else ['','']
@@ -1946,7 +1994,7 @@ module SiSU_DB_columns
end
def column_comment
%{COMMENT ON COLUMN metadata_and_text.#{name}
- IS 'metadata document skin';}
+ IS 'source text skin';}
end
def tuple
t=if defined? @md.skin \
@@ -1972,9 +2020,9 @@ module SiSU_DB_columns
IS 'metadata document links';}
end
def tuple
- t=if defined? @md.notes.links \
- and @md.notes.links=~/\S+/
- txt=@md.notes.links
+ t=if defined? @md.links \
+ and @md.links=~/\S+/
+ txt=@md.links
special_character_escape(txt)
["#{name}, ","'#{txt}', "]
else ['','']
diff --git a/lib/sisu/v2/db_create.rb b/lib/sisu/v2/db_create.rb
index 1fc00168..c1fed045 100644
--- a/lib/sisu/v2/db_create.rb
+++ b/lib/sisu/v2/db_create.rb
@@ -182,13 +182,16 @@ module SiSU_DB_create
#{column.notes_prefix_a.create_column}
#{column.notes_prefix_b.create_column}
#{column.notes_suffix.create_column}
+ /* src */
+ #{column.src_filename.create_column}
+ #{column.src_fingerprint.create_column}
+ #{column.src_filesize.create_column}
+ #{column.src_word_count.create_column}
+ #{column.src_txt.create_column}
/* misc */
- #{column.filename.create_column}
- #{column.sisutxt.create_column}
#{column.fulltext.create_column}
- #{column.word_count.create_column}
- #{column.digest.create_column}
#{column.skin_name.create_column}
+ #{column.skin_fingerprint.create_column}
#{column.skin.create_column}
#{column.links.create_column.gsub(/,$/,'')}
/* subj VARCHAR(64) NULL, */
@@ -215,7 +218,8 @@ module SiSU_DB_create
ocns VARCHAR(6),
clean TEXT NULL,
body TEXT NULL,
- seg VARCHAR(120) NULL,
+ words VARCHAR(3000) NULL,
+ seg VARCHAR(256) NULL,
lev_an VARCHAR(1),
lev SMALLINT NULL,
lev1 SMALLINT,
@@ -254,6 +258,7 @@ module SiSU_DB_create
nr SMALLINT,
clean TEXT NULL,
body TEXT NULL,
+ words VARCHAR(3000) NULL,
ocn SMALLINT,
ocnd VARCHAR(6),
ocns VARCHAR(6),
@@ -276,6 +281,7 @@ module SiSU_DB_create
nr SMALLINT,
clean TEXT NULL,
body TEXT NULL,
+ words VARCHAR(3000) NULL,
ocn SMALLINT,
ocnd VARCHAR(6),
ocns VARCHAR(6),
@@ -298,6 +304,7 @@ module SiSU_DB_create
nr SMALLINT,
clean TEXT NULL,
body TEXT NULL,
+ words VARCHAR(3000) NULL,
ocn SMALLINT,
ocnd VARCHAR(6),
ocns VARCHAR(6),
@@ -430,12 +437,14 @@ module SiSU_DB_create
%{#{column.notes_prefix_a.column_comment}},
%{#{column.notes_prefix_b.column_comment}},
%{#{column.notes_suffix.column_comment}},
- %{#{column.filename.column_comment}},
- %{#{column.sisutxt.column_comment}},
+ %{#{column.src_filename.column_comment}},
+ %{#{column.src_fingerprint.column_comment}},
+ %{#{column.src_filesize.column_comment}},
+ %{#{column.src_word_count.column_comment}},
+ %{#{column.src_txt.column_comment}},
%{#{column.fulltext.column_comment}},
- %{#{column.word_count.column_comment}},
- %{#{column.digest.column_comment}},
%{#{column.skin_name.column_comment}},
+ %{#{column.skin_fingerprint.column_comment}},
%{#{column.skin.column_comment}},
%{#{column.links.column_comment}},
]
diff --git a/lib/sisu/v2/db_drop.rb b/lib/sisu/v2/db_drop.rb
index 7189da56..35d971af 100644
--- a/lib/sisu/v2/db_drop.rb
+++ b/lib/sisu/v2/db_drop.rb
@@ -133,37 +133,49 @@ module SiSU_DB_drop
ensure
end
end
+ def conn_execute_array(sql_arr)
+ @conn.transaction do |conn|
+ sql_arr.each do |sql|
+ conn.execute(sql)
+ end
+ end
+ end
def indexes #% drop all indexes
- #@conn.do(%{
- # DROP INDEX object_nr ON doc_objects(ocn);
- # DROP INDEX body ON doc_objects(body);
- # DROP INDEX clean ON doc_objects(clean);
- # DROP INDEX lev1 ON doc_objects(lev1);
- # DROP INDEX lev2 ON doc_objects(lev2);
- # DROP INDEX lev3 ON doc_objects(lev3);
- # DROP INDEX lev4 ON doc_objects(lev4);
- # DROP INDEX lev5 ON doc_objects(lev5);
- # DROP INDEX lev6 ON doc_objects(lev6);
- # DROP INDEX endnote_nr ON endnotes(nr);
- # DROP INDEX endnote ON endnotes(body);
- # DROP INDEX title ON metadata_and_text(title);
- # DROP INDEX filename ON metadata_and_text(filename)
- # /*
- # DROP INDEX object_nr ON doc_objects(ocn) CASCADE;
- # DROP INDEX body ON doc_objects(body) CASCADE;
- # DROP INDEX clean ON doc_objects(clean) CASCADE;
- # DROP INDEX lev1 ON doc_objects(lev1) CASCADE;
- # DROP INDEX lev2 ON doc_objects(lev2) CASCADE;
- # DROP INDEX lev3 ON doc_objects(lev3) CASCADE;
- # DROP INDEX lev4 ON doc_objects(lev4) CASCADE;
- # DROP INDEX lev5 ON doc_objects(lev5) CASCADE;
- # DROP INDEX lev6 ON doc_objects(lev6) CASCADE;
- # DROP INDEX endnote_nr ON endnotes(nr) CASCADE;
- # DROP INDEX endnote ON endnotes(body) CASCADE;
- # DROP INDEX title ON metadata_and_text(title) CASCADE;
- # DROP INDEX filename ON metadata_and_text(filename) CASCADE
- # */
- #})
+ print "\n drop documents common indexes\n" unless @opt.cmd =~/q/
+ sql_arr=[
+ %{DROP INDEX idx_text_words;},
+ %{DROP INDEX idx_title;},
+ %{DROP INDEX idx_author;},
+ %{DROP INDEX idx_filename;},
+ %{DROP INDEX idx_topics;},
+ %{DROP INDEX idx_ocn;},
+ %{DROP INDEX idx_digest_clean;},
+ %{DROP INDEX idx_digest_all;},
+ %{DROP INDEX idx_lev1;},
+ %{DROP INDEX idx_lev2;},
+ %{DROP INDEX idx_lev3;},
+ %{DROP INDEX idx_lev4;},
+ %{DROP INDEX idx_lev5;},
+ %{DROP INDEX idx_lev6;},
+ %{DROP INDEX idx_endnote_words;},
+ %{DROP INDEX idx_endnote_nr;},
+ %{DROP INDEX idx_digest_en;},
+ %{DROP INDEX idx_endnote_words_asterisk;},
+ %{DROP INDEX idx_endnote_nr_asterisk;},
+ %{DROP INDEX idx_endnote_asterisk;},
+ %{DROP INDEX idx_digest_en_asterisk;},
+ %{DROP INDEX idx_endnote_words_plus;},
+ %{DROP INDEX idx_endnote_nr_plus;},
+ %{DROP INDEX idx_endnote_plus;},
+ %{DROP INDEX idx_digest_en_plus},
+ ]
+ conn_execute_array(sql_arr)
+ print "\n drop documents TEXT indexes\n" unless @opt.cmd =~/q/
+ sql_arr=[
+ %{DROP INDEX idx_clean;},
+ %{DROP INDEX idx_endnote},
+ ]
+ conn_execute_array(sql_arr)
end
self
end
diff --git a/lib/sisu/v2/db_import.rb b/lib/sisu/v2/db_import.rb
index 45aca11b..e351f6fc 100644
--- a/lib/sisu/v2/db_import.rb
+++ b/lib/sisu/v2/db_import.rb
@@ -122,7 +122,7 @@ module SiSU_DB_import
tell.puts_blue unless @opt.cmd =~/q/
tell=SiSU_Screen::Ansi.new(@opt.cmd,'Marshal Load',@fnc)
tell.print_grey if @opt.cmd =~/v/
- select_first_match=%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; }
+ select_first_match=%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.src_filename = '#{@opt.fns}'; }
file_exist=@sql_type=~/sqlite/ \
? @conn.get_first_value(select_first_match) \
: @conn.select_one(select_first_match)
@@ -265,7 +265,10 @@ module SiSU_DB_import
@col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_minus
special_character_escape(@col[:body])
@col[:plaintext]=@col[:body].dup
- strip_markup(@col[:plaintext])
+ @col[:plaintext]=strip_markup(@col[:plaintext])
+ @col[:plaintext]=clean_searchable_text(@col[:plaintext])
+ @col[:words]=@col[:plaintext].dup
+ @col[:words]=unique_words(@col[:words])
if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last
end
if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last
@@ -298,7 +301,10 @@ module SiSU_DB_import
@col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus
special_character_escape(@col[:body])
@col[:plaintext]=@col[:body].dup
- strip_markup(@col[:plaintext])
+ @col[:plaintext]=strip_markup(@col[:plaintext])
+ @col[:plaintext]=clean_searchable_text(@col[:plaintext])
+ @col[:words]=@col[:plaintext].dup
+ @col[:words]=unique_words(@col[:words])
@en_a,@en_z=@en[0].first,@en[0].last if @en[0]
@en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
@en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
@@ -324,7 +330,10 @@ module SiSU_DB_import
@col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus
special_character_escape(@col[:body])
@col[:plaintext]=@col[:body].dup
- strip_markup(@col[:plaintext])
+ @col[:plaintext]=strip_markup(@col[:plaintext])
+ @col[:plaintext]=clean_searchable_text(@col[:plaintext])
+ @col[:words]=@col[:plaintext].dup
+ @col[:words]=unique_words(@col[:words])
@en_a,@en_z=@en[0].first,@en[0].last if @en[0]
@en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
@en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
@@ -350,7 +359,10 @@ module SiSU_DB_import
@col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus
special_character_escape(@col[:body])
@col[:plaintext]=@col[:body].dup
- strip_markup(@col[:plaintext])
+ @col[:plaintext]=strip_markup(@col[:plaintext])
+ @col[:plaintext]=clean_searchable_text(@col[:plaintext])
+ @col[:words]=@col[:plaintext].dup
+ @col[:words]=unique_words(@col[:words])
@en_a,@en_z=@en[0].first,@en[0].last if @en[0]
@en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0]
@en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0]
@@ -389,12 +401,15 @@ module SiSU_DB_import
end
special_character_escape(@col[:body])
@col[:plaintext]=@col[:body].dup
- strip_markup(@col[:plaintext])
+ @col[:plaintext]=strip_markup(@col[:plaintext])
+ @col[:plaintext]=clean_searchable_text(@col[:plaintext])
+ @col[:words]=@col[:plaintext].dup
+ @col[:words]=unique_words(@col[:words])
t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file)
@tuple_array << t.tuple
@en,@en_ast,@en_pls=[],[],[]
@col[:en_a]=@col[:en_z]=nil
- @col[:lev]=@col[:plaintext]=@col[:body]=''
+ @col[:lev]=@col[:plaintext]=@col[:body]=@col[:words]=''
end
if notedata =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ #% import into database endnotes tables
endnote_array=notedata.scan(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/)
@@ -406,9 +421,9 @@ module SiSU_DB_import
@id_n+=1
special_character_escape(txt)
body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)
- #special_character_escape(body)
- #special_character_escape(txt)
strip_markup(txt)
+ words=txt.dup
+ words=unique_words(words)
if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)
puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
@@ -418,16 +433,17 @@ module SiSU_DB_import
end
if txt
en={ :type => 'endnotes',
- :id => @id_n,
- :lid => @col[:lid],
- :nr => nr,
- :txt => txt,
- :body => body,
- :ocn => @col[:ocn],
- :ocnd => @col[:ocnd],
- :ocns => @col[:ocns],
- :id_t => @@id_t,
- :hash => digest_clean
+ :id => @id_n,
+ :lid => @col[:lid],
+ :nr => nr,
+ :txt => txt,
+ :body => body,
+ :words => words,
+ :ocn => @col[:ocn],
+ :ocnd => @col[:ocnd],
+ :ocns => @col[:ocns],
+ :id_t => @@id_t,
+ :hash => digest_clean
}
t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file)
@tuple_array << t.tuple
@@ -447,6 +463,8 @@ module SiSU_DB_import
special_character_escape(txt)
body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)
strip_markup(txt)
+ words=txt.dup
+ words=unique_words(words)
if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)
puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
@@ -456,16 +474,17 @@ module SiSU_DB_import
end
if txt
en={ :type => 'endnotes_asterisk',
- :id => @id_n,
- :lid => @col[:lid],
- :nr => nr,
- :txt => txt,
- :body => body,
- :ocn => @col[:ocn],
- :ocnd => @col[:ocnd],
- :ocns => @col[:ocns],
- :id_t => @@id_t,
- :hash => digest_clean
+ :id => @id_n,
+ :lid => @col[:lid],
+ :nr => nr,
+ :txt => txt,
+ :body => body,
+ :words => words,
+ :ocn => @col[:ocn],
+ :ocnd => @col[:ocnd],
+ :ocns => @col[:ocns],
+ :id_t => @@id_t,
+ :hash => digest_clean
}
t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file)
@tuple_array << t.tuple
@@ -485,6 +504,8 @@ module SiSU_DB_import
special_character_escape(txt)
body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)
strip_markup(txt)
+ words=txt.dup
+ words=unique_words(words)
if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)
puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"
open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|
@@ -494,16 +515,17 @@ module SiSU_DB_import
end
if txt
en={ :type => 'endnotes_plus',
- :id => @id_n,
- :lid => @col[:lid],
- :nr => nr,
- :txt => txt,
- :body => body,
- :ocn => @col[:ocn],
- :ocnd => @col[:ocnd],
- :ocns => @col[:ocns],
- :id_t => @@id_t,
- :hash => digest_clean
+ :id => @id_n,
+ :lid => @col[:lid],
+ :nr => nr,
+ :txt => txt,
+ :body => body,
+ :words => words,
+ :ocn => @col[:ocn],
+ :ocnd => @col[:ocnd],
+ :ocns => @col[:ocns],
+ :id_t => @@id_t,
+ :hash => digest_clean
}
t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file)
@tuple_array << t.tuple
@@ -526,25 +548,25 @@ module SiSU_DB_import
endnotes(@txt).range
@en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/
@en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/
- @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_o]}/
+ @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/
@txt=endnotes(@txt).clean_text
end
@txt
end
def standard
- x=if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/; @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/)
- else nil
- end
+ x=(@txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) \
+ ? @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/) \
+ : nil
end
def asterisk
- x=if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/; @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/)
- else nil
- end
+ x=(@txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) \
+ ? @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/) \
+ : nil
end
def plus
- x=if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/; @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/)
- else nil
- end
+ x=(@txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) \
+ ? @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/) \
+ : nil
end
def clean_text(base_url=nil)
if base_url
diff --git a/lib/sisu/v2/db_indexes.rb b/lib/sisu/v2/db_indexes.rb
index 3cbcc20c..fb5ddd76 100644
--- a/lib/sisu/v2/db_indexes.rb
+++ b/lib/sisu/v2/db_indexes.rb
@@ -73,38 +73,44 @@ module SiSU_DB_index
def base
print "\n create documents common indexes\n" unless @opt.cmd =~/q/
sql_arr=[
- %{CREATE INDEX object_nr ON doc_objects(ocn);},
- %{CREATE INDEX digest_clean ON doc_objects(digest_clean);},
- %{CREATE INDEX digest_all ON doc_objects(digest_all);},
- %{CREATE INDEX lev1 ON doc_objects(lev1);},
- %{CREATE INDEX lev2 ON doc_objects(lev2);},
- %{CREATE INDEX lev3 ON doc_objects(lev3);},
- %{CREATE INDEX lev4 ON doc_objects(lev4);},
- %{CREATE INDEX lev5 ON doc_objects(lev5);},
- %{CREATE INDEX lev6 ON doc_objects(lev6);},
- %{CREATE INDEX endnote_nr ON endnotes(nr);},
- %{CREATE INDEX digest_en ON endnotes(digest_clean);},
- %{CREATE INDEX endnote_nr_asterisk ON endnotes_asterisk(nr);},
- %{CREATE INDEX endnote_asterisk ON endnotes_asterisk(clean);},
- %{CREATE INDEX digest_en_asterisk ON endnotes_asterisk(digest_clean);},
- %{CREATE INDEX endnote_nr_plus ON endnotes_plus(nr);},
- %{CREATE INDEX endnote_plus ON endnotes_plus(clean);},
- %{CREATE INDEX digest_en_plus ON endnotes_plus(digest_clean);},
- %{CREATE INDEX title ON metadata_and_text(title);},
- %{CREATE INDEX filename ON metadata_and_text(filename)},
+ %{CREATE INDEX idx_text_words ON doc_objects(words);},
+ %{CREATE INDEX idx_ocn ON doc_objects(ocn);},
+ %{CREATE INDEX idx_digest_clean ON doc_objects(digest_clean);},
+ %{CREATE INDEX idx_digest_all ON doc_objects(digest_all);},
+ %{CREATE INDEX idx_lev1 ON doc_objects(lev1);},
+ %{CREATE INDEX idx_lev2 ON doc_objects(lev2);},
+ %{CREATE INDEX idx_lev3 ON doc_objects(lev3);},
+ %{CREATE INDEX idx_lev4 ON doc_objects(lev4);},
+ %{CREATE INDEX idx_lev5 ON doc_objects(lev5);},
+ %{CREATE INDEX idx_lev6 ON doc_objects(lev6);},
+ %{CREATE INDEX idx_endnote_words ON endnotes(words);},
+ %{CREATE INDEX idx_endnote_nr ON endnotes(nr);},
+ %{CREATE INDEX idx_digest_en ON endnotes(digest_clean);},
+ %{CREATE INDEX idx_endnote_words_asterisk ON endnotes_asterisk(words);},
+ %{CREATE INDEX idx_endnote_nr_asterisk ON endnotes_asterisk(nr);},
+ %{CREATE INDEX idx_endnote_asterisk ON endnotes_asterisk(clean);},
+ %{CREATE INDEX idx_digest_en_asterisk ON endnotes_asterisk(digest_clean);},
+ %{CREATE INDEX idx_endnote_words_plus ON endnotes_plus(words);},
+ %{CREATE INDEX idx_endnote_nr_plus ON endnotes_plus(nr);},
+ %{CREATE INDEX idx_endnote_plus ON endnotes_plus(clean);},
+ %{CREATE INDEX idx_digest_en_plus ON endnotes_plus(digest_clean);},
+ %{CREATE INDEX idx_title ON metadata_and_text(title);},
+ %{CREATE INDEX idx_author ON metadata_and_text(creator_author);},
+ %{CREATE INDEX idx_filename ON metadata_and_text(src_filename);},
+ %{CREATE INDEX idx_topics ON metadata_and_text(classify_topic_register)},
]
conn_execute_array(sql_arr)
end
def text
- print "\n create documents text indexes\n" unless @opt.cmd =~/q/
+ print "\n create documents TEXT indexes\n" unless @opt.cmd =~/q/
sql_arr=[
- %{CREATE INDEX clean ON doc_objects(clean);},
- %{CREATE INDEX endnote ON endnotes(clean);}
+ %{CREATE INDEX idx_clean ON doc_objects(clean);},
+ %{CREATE INDEX idx_endnote ON endnotes(clean);}
]
conn_execute_array(sql_arr)
end
base
- @opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : text)
+ text #@opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : text)
end
end
end
diff --git a/lib/sisu/v2/db_load_tuple.rb b/lib/sisu/v2/db_load_tuple.rb
index cc00b74a..23b6249d 100644
--- a/lib/sisu/v2/db_load_tuple.rb
+++ b/lib/sisu/v2/db_load_tuple.rb
@@ -79,11 +79,11 @@ module SiSU_DB_tuple
end
def tuple #% import line
sql_entry=if @col[:en_a]
- "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " +
- "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');"
+ "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, words, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " +
+ "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:words]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');"
else
- "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " +
- "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');"
+ "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, words, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " +
+ "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:words]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');"
end
if @opt.cmd =~/M/
if @opt.cmd =~/V/
@@ -191,12 +191,14 @@ module SiSU_DB_tuple
#{@tp.column.notes_prefix_a.tuple[0]}
#{@tp.column.notes_prefix_b.tuple[0]}
#{@tp.column.notes_suffix.tuple[0]}
-#{@tp.column.filename.tuple[0]}
-#{@tp.column.sisutxt.tuple[0]}
+#{@tp.column.src_filename.tuple[0]}
+#{@tp.column.src_fingerprint.tuple[0]}
+#{@tp.column.src_filesize.tuple[0]}
+#{@tp.column.src_word_count.tuple[0]}
+#{@tp.column.src_txt.tuple[0]}
#{@tp.column.fulltext.tuple[0]}
-#{@tp.column.word_count.tuple[0]}
-#{@tp.column.digest.tuple[0]}
#{@tp.column.skin_name.tuple[0]}
+#{@tp.column.skin_fingerprint.tuple[0]}
#{@tp.column.skin.tuple[0]}
#{@tp.column.links.tuple[0]}
tid)
@@ -272,12 +274,14 @@ tid)
#{@tp.column.notes_prefix_a.tuple[1]}
#{@tp.column.notes_prefix_b.tuple[1]}
#{@tp.column.notes_suffix.tuple[1]}
-#{@tp.column.filename.tuple[1]}
-#{@tp.column.sisutxt.tuple[1]}
+#{@tp.column.src_filename.tuple[1]}
+#{@tp.column.src_fingerprint.tuple[1]}
+#{@tp.column.src_filesize.tuple[1]}
+#{@tp.column.src_word_count.tuple[1]}
+#{@tp.column.src_txt.tuple[1]}
#{@tp.column.fulltext.tuple[1]}
-#{@tp.column.word_count.tuple[1]}
-#{@tp.column.digest.tuple[1]}
#{@tp.column.skin_name.tuple[1]}
+#{@tp.column.skin_fingerprint.tuple[1]}
#{@tp.column.skin.tuple[1]}
#{@tp.column.links.tuple[1]}
#{@id}
@@ -311,8 +315,8 @@ tid)
@conn,@en,@opt,@file=conn,en,opt,file
end
def tuple
- sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, ocn, ocnd, ocns, metadata_tid, digest_clean) " +
- "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');"
+ sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, words, ocn, ocnd, ocns, metadata_tid, digest_clean) " +
+ "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:words]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');"
if @opt.cmd =~/M/
@file.puts sql_entry
else
diff --git a/lib/sisu/v2/db_remove.rb b/lib/sisu/v2/db_remove.rb
index e7942a15..5a7f1244 100644
--- a/lib/sisu/v2/db_remove.rb
+++ b/lib/sisu/v2/db_remove.rb
@@ -72,9 +72,9 @@ module SiSU_DB_remove
: false
end
del_id=if driver_sqlite3
- @conn.get_first_value(%{ SELECT tid FROM metadata_and_text WHERE filename = '#{@opt.fns}'; }).to_i
+ @conn.get_first_value(%{ SELECT tid FROM metadata_and_text WHERE src_filename = '#{@opt.fns}'; }).to_i
else
- x=@conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; })
+ x=@conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.src_filename = '#{@opt.fns}'; })
x ? (x.join.to_i) : nil
end
if del_id
diff --git a/lib/sisu/v2/db_sqltxt.rb b/lib/sisu/v2/db_sqltxt.rb
index 68e9ef8a..17a92683 100644
--- a/lib/sisu/v2/db_sqltxt.rb
+++ b/lib/sisu/v2/db_sqltxt.rb
@@ -72,9 +72,10 @@ module SiSU_DB_text
end
def clean_searchable_text(arr) #produce clean, searchable, plaintext from document source
txt_arr,en=[],[]
+ arr=arr.class==String ? arr.split(/\n+/m) : arr
arr.each do |s|
- s.gsub!(/([*\/_-])\{(.+?)\}\1/,'\2')
- s.gsub!(/^(?:group|poem|code)\{/,''); s.gsub!(/^\}(?:group|poem|code)/,'')
+ s.gsub!(/([*\/_-])\{(.+?)\}\1/m,'\2')
+ s.gsub!(/^(?:group|poem|code)\{/m,''); s.gsub!(/^\}(?:group|poem|code)/m,'')
s.gsub!(/\A(?:@\S+:\s+.+)\Z/m,'')
if s =~/^:A~/
if defined? @md.creator \
@@ -82,26 +83,26 @@ module SiSU_DB_text
and not @md.creator.author.empty?
s.gsub!(/@author/,@md.creator.author)
else
- tell=SiSU_Screen::Ansi.new('v','WARNING Document Author information missing; provide @creator: :author:')
- tell.warn
+ tell=SiSU_Screen::Ansi.new('v','WARNING Document Author information missing; provide @creator: :author:',@md.fnb)
+ tell.warn unless @md.cmd.inspect =~/q/
end
if defined? @md.title \
and defined? @md.title.full \
and not @md.title.full.empty?
s.gsub!(/@title/,@md.title.full)
else
- tell=SiSU_Screen::Ansi.new('v','WARNING Document Title missing; provide @title:')
- tell.warn
+ tell=SiSU_Screen::Ansi.new('v','WARNING Document Title missing; provide @title:',@md.fnb)
+ tell.warn unless @md.cmd.inspect =~/q/
end
end
- s.gsub!(/^(?:_[1-9]\*?|_\*)\s+/,'')
- s.gsub!(/^(?:[1-9]\~(\S+)?)\s+/,'')
- s.gsub!(/^(?::?[A-C]\~(\S+)?)\s+/,'')
- s.gsub!(/^%{1,3} .+/,'') #removed even if contained in code block
- s.gsub!(/<br>/,' ')
- en << s.scan(/~\{\s*(.+?)\s*\}~/)
- s.gsub!(/~\{.+?\}~/,'')
- s.gsub!(/ \s+/,' ')
+ s.gsub!(/^(?:_[1-9]\*?|_\*)\s+/m,'')
+ s.gsub!(/^(?:[1-9]\~(\S+)?)\s+/m,'')
+ s.gsub!(/^(?::?[A-C]\~(\S+)?)\s+/m,'')
+ s.gsub!(/^%{1,3} .+/m,'') #removed even if contained in code block
+ s.gsub!(/<br>/m,' ')
+ en << s.scan(/~\{\s*(.+?)\s*\}~/m)
+ s.gsub!(/~\{.+?\}~/m,'')
+ s.gsub!(/ \s+/m,' ')
#special_character_escape(s)
s
end
@@ -124,6 +125,11 @@ module SiSU_DB_text
str.strip!
str
end
+ def unique_words(str)
+ a=str.scan(/[a-zA-Z0-9\\\/_-]{2,}/) #a=str.scan(/\S+{2,}/)
+ str=a.uniq.sort.join(' ')
+ str
+ end
end
end
__END__
diff --git a/lib/sisu/v2/param.rb b/lib/sisu/v2/param.rb
index ef91f10d..87dd9aab 100644
--- a/lib/sisu/v2/param.rb
+++ b/lib/sisu/v2/param.rb
@@ -162,8 +162,8 @@ module SiSU_Param
puts "#{n} is #{s.class}: programming error, String expected #{__FILE__}:#{__LINE__}"
s
else
- tell=SiSU_Screen::Ansi.new('v',"#{n} length #{s.length} exceeds set db field length #{l}, metadata dropped")
- tell.warn
+ tell=SiSU_Screen::Ansi.new('v',"#{n} length #{s.length} exceeds set db field length #{l}, metadata dropped",@opt.fns)
+ tell.warn unless @opt.cmd =~/q/
nil
end
end
@@ -244,7 +244,9 @@ module SiSU_Param
validate_length(s,l,n)
end
def note
- @h['note'] #TEXT
+ s=@h['note']
+ l,n=Db[:col_info_note],'title.note'
+ validate_length(s,l,n)
end
def short
s=(@h['short'] ? @h['short'] : @h['main'])
@@ -380,9 +382,9 @@ module SiSU_Param
def rights
a=@s.split(/[ ]*\n[ ]*/m)
@h=build_hash(a)
- def copyright # TEXT used db sql
+ def copyright
def text #you may wish to expand to take from all
- r=if @h['copyright']
+ s=if @h['copyright']
@h['copyright']
elsif @h['text']
@h['text']
@@ -393,26 +395,40 @@ module SiSU_Param
tell.warn unless @opt.cmd =~/q/
''
end
+ l,n=Db[:col_info_note],'rights.copyright.text'
+ validate_length(s,l,n)
end
def translation
- r=(@h['translation'] ? @h['translation'] : nil)
+ s=(@h['translation'] ? @h['translation'] : nil)
+ l,n=Db[:col_info_note],'rights.copyright.translation'
+ validate_length(s,l,n)
end
def illustrations
- r=(@h['illustrations'] ? @h['illustrations'] : nil)
+ s=(@h['illustrations'] ? @h['illustrations'] : nil)
+ l,n=Db[:col_info_note],'rights.copyright.illustrations'
+ validate_length(s,l,n)
end
def photographs
- r=(@h['photographs'] ? @h['photographs'] : nil)
+ s=(@h['photographs'] ? @h['photographs'] : nil)
+ l,n=Db[:col_info_note],'rights.copyright.photographs'
+ validate_length(s,l,n)
end
def digitiztion
- r=(@h['digitization'] ? @h['digitization'] : nil)
+ s=(@h['digitization'] ? @h['digitization'] : nil)
+ l,n=Db[:col_info_note],'rights.copyright.digitization'
+ validate_length(s,l,n)
end
def audio
- r=(@h['audio'] ? @h['audio'] : nil)
+ s=(@h['audio'] ? @h['audio'] : nil)
+ l,n=Db[:col_info_note],'rights.copyright.audio'
+ validate_length(s,l,n)
end
self
end
def license
- r=(@h['license'] ? @h['license'] : nil)
+ s=(@h['license'] ? @h['license'] : nil)
+ l,n=Db[:col_info_note],'rights.license'
+ validate_length(s,l,n)
end
def all
s=if @h['all']; @h['all']
@@ -445,6 +461,9 @@ module SiSU_Param
if s.empty?
tell=SiSU_Screen::Ansi.new(@cmd,'WARNING Document Rights information missing; provide @rights: :copyright:')
tell.warn unless @opt.cmd =~/q/
+ else
+ l,n=Db[:col_info_note],'rights.all'
+ validate_length(s,l,n)
end
s
end
@@ -472,7 +491,7 @@ module SiSU_Param
end
def topic_register
s=@h['topic_register']
- l,n=Db[:col_classify_txt_long],'classify.topic_register'
+ l,n=Db[:col_info_note],'classify.topic_register'
validate_length(s,l,n)
end
def type
@@ -743,9 +762,9 @@ module SiSU_Param
@doc={ :lv=>[] }
@doc[:fns],@doc[:fnb],@doc[:scr_suffix]='','',''
@@publisher='SiSU scribe'
- attr_accessor :cmd,:make,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:subtitle,:full_title,:html_title,:subtitle_tex,:creator,:classify,:author_home,:author,:author_title,:author_nationality,:authors,:authorship,:translator,:illustrator,:prepared_by,:digitized_by,:subject,:description,:publisher,:contributor,:date,:date_created,:date_issued,:date_available,:date_valid,:date_modified,:date_translated,:date_added_to_site,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:type,:format,:identifier,:source,:language,:language_original,:relation,:coverage,:rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:lvs,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:markup,:markup_instruction,:markup_version,:markup_declared,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:file_size,:user,:home,:hostname,:pwd,:firstseg,:programs,:author_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:ec,:opt,:sem_tag,:book_idx,:topic_register,:topic_register_array,:original_publication,:original_publication_date,:original_publication_nationality,:original_publication_institution,:writing_focus,:audio,:daisy
+ attr_accessor :cmd,:make,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:subtitle,:full_title,:html_title,:subtitle_tex,:creator,:classify,:author_home,:author,:author_title,:author_nationality,:authors,:authorship,:translator,:illustrator,:prepared_by,:digitized_by,:subject,:description,:publisher,:contributor,:date,:date_created,:date_issued,:date_available,:date_valid,:date_modified,:date_translated,:date_added_to_site,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:type,:format,:identifier,:source,:language,:language_original,:relation,:coverage,:rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:lvs,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:markup,:markup_instruction,:markup_version,:markup_declared,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:filesize,:user,:home,:hostname,:pwd,:firstseg,:programs,:author_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:ec,:opt,:sem_tag,:book_idx,:topic_register,:topic_register_array,:original_publication,:original_publication_date,:original_publication_nationality,:original_publication_institution,:writing_focus,:audio,:daisy
def initialize(fns_array,opt)
- @env=@fn=@fns=@fnb=@fnn=@fnt=@fnl=@flv=@fnz=@fnstex=@ocn=@sfx_src=@pdf=@file_type=@dir_out=@dir_tex=@dir_lout=@txt_path=@make=@flag_endnotes=@flag_auto_endnotes=@flag_separate_endnotes=@flag_separate_endnotes_make=@site_skin=@sisu=@sisu_version=@ruby_version=@title=@subtitle=@full_title=@html_title=@subtitle_tex=@creator=@classify=@author_home=@author=@author_title=@author_nationality=@translator=@illustrator=@prepared_by=@digitized_by=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@date_translated=@date_added_to_site=@date_scheme=@date_created_scheme=@date_issued_scheme=@date_available_scheme=@date_valid_scheme=@date_modified_scheme=@type=@format=@identifier=@source=@language=@language_original=@relation=@coverage=@rights=@keywords=@comments=@abstract=@cls_loc=@cls_dewey=@cls_pg=@cls_isbn=@papersize=@toc=@lv1=@lv2=@lv3=@lv4=@lv5=@lv6=@pagenew=@pagebreak=@num_top=@toc_lev_limit=@flag_tables=@vocabulary=@doc_skin=@doc_css=@yaml=@lnk=@prefix_a=@prefix_b=@suffix=@information=@contact=@icon=@ad_url=@ad_png=@ad_alt=@ad_began=@promo=@ad_home=@stmp=@stmpd=@sc_filename=@sc_number=@sc_date=@sc_time=@sc_info=@yamladdr=@locale=@wc_lines=@wc_words=@wc_bytes=@file_encoding=@file_size=@firstseg=@programs=@author_copymark=@lang=@en=@dgst=@dgst_skin=@generated=@heading_seg_first=@base_program=@topic_register=@original_publication_details=@original_publication=@original_publication_date=@original_publication_nationality=@original_publication_institution=@writing_focus=@audio=nil
+ @env=@fn=@fns=@fnb=@fnn=@fnt=@fnl=@flv=@fnz=@fnstex=@ocn=@sfx_src=@pdf=@file_type=@dir_out=@dir_tex=@dir_lout=@txt_path=@make=@flag_endnotes=@flag_auto_endnotes=@flag_separate_endnotes=@flag_separate_endnotes_make=@site_skin=@sisu=@sisu_version=@ruby_version=@title=@subtitle=@full_title=@html_title=@subtitle_tex=@creator=@classify=@author_home=@author=@author_title=@author_nationality=@translator=@illustrator=@prepared_by=@digitized_by=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@date_translated=@date_added_to_site=@date_scheme=@date_created_scheme=@date_issued_scheme=@date_available_scheme=@date_valid_scheme=@date_modified_scheme=@type=@format=@identifier=@source=@language=@language_original=@relation=@coverage=@rights=@keywords=@comments=@abstract=@cls_loc=@cls_dewey=@cls_pg=@cls_isbn=@papersize=@toc=@lv1=@lv2=@lv3=@lv4=@lv5=@lv6=@pagenew=@pagebreak=@num_top=@toc_lev_limit=@flag_tables=@vocabulary=@doc_skin=@doc_css=@yaml=@lnk=@prefix_a=@prefix_b=@suffix=@information=@contact=@icon=@ad_url=@ad_png=@ad_alt=@ad_began=@promo=@ad_home=@stmp=@stmpd=@sc_filename=@sc_number=@sc_date=@sc_time=@sc_info=@yamladdr=@locale=@wc_lines=@wc_words=@wc_bytes=@file_encoding=@filesize=@firstseg=@programs=@author_copymark=@lang=@en=@dgst=@dgst_skin=@generated=@heading_seg_first=@base_program=@topic_register=@original_publication_details=@original_publication=@original_publication_date=@original_publication_nationality=@original_publication_institution=@writing_focus=@audio=nil
@data,@fns,@cmd,@mod,@opt=fns_array,opt.fns,opt.cmd,opt.mod,opt #@data used as data
@flag_tables,@set_header_title,@set_heading_top,@set_heading_seg,@heading_seg_first_flag,@flag_promo,@book_idx=false,false,false,false,false,false,false
@seg_autoname_safe=true
@@ -1107,16 +1126,22 @@ module SiSU_Param
@papersize=determine_papersize(@mod.inspect)
end
@papersize_array=@papersize.scan(/(?:a4|letter|legal|book|a5|b5)/i)
+ fn=@fns=~/\.ssm\.sst$/ ? @fns.gsub(/.sst/,'') : @fns #decide what to do a filesize on .ssm tells very little about actual document size
+ @filesize=(File.size(fn)).to_s
if @sys.openssl !=false
skin=@doc_skin \
? (SiSU_Env::Info_skin.new(@opt,@doc_skin).select) \
: SiSU_Env::Info_skin.new(@opt).select
@dgst,@dgst_skin=[],[]
if @env.digest.type =~/sha256/
- @dgst=@sys.sha256(@env.source_file_with_path)
+ dgst=@sys.sha256(@env.source_file_with_path)
+ @dgst=dgst[1].length==64 ? dgst : nil
+ puts 'check document (sha256) digest' if not @dgst
@dgst_skin=skin ? (@sys.sha256(skin)) : nil
else
- @dgst=@sys.md5(@env.source_file_with_path)
+ dgst=@sys.md5(@env.source_file_with_path)
+ @dgst=dgst[1].length==32 ? dgst : nil
+ puts 'check document (md5) digest' if not @dgst
@dgst_skin=skin ? (@sys.md5(skin)) : nil
end
end