From 4b51bc00cda70d3c118401a74f1704df38c947a3 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 1 Feb 2011 09:48:30 -0500 Subject: v3 introduced as development branch, invoked using "sisu --v3 [instructions] --- lib/sisu/v3/db_create.rb | 612 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 612 insertions(+) create mode 100644 lib/sisu/v3/db_create.rb (limited to 'lib/sisu/v3/db_create.rb') diff --git a/lib/sisu/v3/db_create.rb b/lib/sisu/v3/db_create.rb new file mode 100644 index 00000000..2676542e --- /dev/null +++ b/lib/sisu/v3/db_create.rb @@ -0,0 +1,612 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: modules shared by the different db types, dbi, postgresql, + sqlite + +=end +module SiSU_DB_create + require "#{SiSU_lib}/db_columns" # db_columns.rb + class Create < SiSU_DB_columns::Columns + require "#{SiSU_lib}/sysenv" # sysenv.rb + @@dl=nil + def initialize(opt,conn,file,sql_type='pg') + @opt,@conn,@file,@sql_type=opt,conn,file,sql_type + @cX=SiSU_Screen::Ansi.new(@opt.cmd).cX + @comment=(@sql_type=='pg') \ + ? (Comment.new(@conn,@sql_type)) \ + : nil + @@dl ||=SiSU_Env::Info_env.new.digest.length + end + def available + DBI.available_drivers.each do |driver| + puts "Driver: #{driver}" + DBI.data_sources(driver).each do |dsn| + puts "\tDatasource: #{dsn}" + end + end + end + def create_db + @env=SiSU_Env::Info_env.new(@opt.fns) + tell=(@sql_type=='sqlite') \ + ? SiSU_Screen::Ansi.new(@opt.cmd,'invert','Create Sqlite db tables in:',%{"#{@env.path.output}/sisu_sqlite.db"}) \ + : SiSU_Screen::Ansi.new(@opt.cmd,'invert','Create PG db tables in:',%{"#{Db[:name_prefix]}#{@env.path.stub_pwd}"}) + tell.colorize if @opt.cmd =~/vVM/ + SiSU_Env::System_call.new.create_pg_db(@env.path.stub_pwd) if @sql_type=='pg' #watch use of path.stub_pwd instead of stub + end + def output_dir? + dir=SiSU_Env::Info_env.new('') + if @opt.cmd =~/d/ + dir.path.webserv_stub_ensure + end + end + def create_table + def metadata_and_text + print %{ + currently using sisu dbi module + to be populated from document files + create tables metadata_and_text + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE metadata_and_text ( + tid BIGINT PRIMARY KEY, + /* title */ + #{column.title.create_column} + #{column.title_main.create_column} + #{column.title_sub.create_column} + #{column.title_short.create_column} + #{column.title_edition.create_column} + #{column.title_note.create_column} + #{column.title_language.create_column} + #{column.title_language_char.create_column} + /* creator */ + #{column.creator_author.create_column} + #{column.creator_author_honorific.create_column} + #{column.creator_author_nationality.create_column} + #{column.creator_contributor.create_column} + #{column.creator_illustrator.create_column} + #{column.creator_photographer.create_column} + #{column.creator_translator.create_column} + #{column.creator_prepared_by.create_column} + #{column.creator_digitized_by.create_column} + #{column.creator_audio.create_column} + #{column.creator_video.create_column} + /* language */ + #{column.language_document.create_column} + #{column.language_document_char.create_column} + #{column.language_original.create_column} + #{column.language_original_char.create_column} + /* date */ + #{column.date_added_to_site.create_column} + #{column.date_available.create_column} + #{column.date_created.create_column} + #{column.date_issued.create_column} + #{column.date_modified.create_column} + #{column.date_published.create_column} + #{column.date_valid.create_column} + #{column.date_translated.create_column} + #{column.date_original_publication.create_column} + #{column.date_generated.create_column} + /* publisher */ + #{column.publisher.create_column} + /* original */ + #{column.original_publisher.create_column} + #{column.original_language.create_column} + #{column.original_language_char.create_column} + #{column.original_source.create_column} + #{column.original_institution.create_column} + #{column.original_nationality.create_column} + /* rights */ + #{column.rights_all.create_column} + #{column.rights_copyright_text.create_column} + #{column.rights_copyright_translation.create_column} + #{column.rights_copyright_illustrations.create_column} + #{column.rights_copyright_photographs.create_column} + #{column.rights_copyright_preparation.create_column} + #{column.rights_copyright_digitization.create_column} + #{column.rights_copyright_audio.create_column} + #{column.rights_copyright_video.create_column} + #{column.rights_license.create_column} + /* classify */ + #{column.classify_topic_register.create_column} + #{column.classify_subject.create_column} + #{column.classify_type.create_column} + #{column.classify_loc.create_column} + #{column.classify_dewey.create_column} + #{column.classify_oclc.create_column} + #{column.classify_pg.create_column} + #{column.classify_isbn.create_column} + #{column.classify_format.create_column} + #{column.classify_identifier.create_column} + #{column.classify_relation.create_column} + #{column.classify_coverage.create_column} + #{column.classify_keywords.create_column} + /* notes */ + #{column.notes_abstract.create_column} + #{column.notes_comment.create_column} + #{column.notes_description.create_column} + #{column.notes_history.create_column} + #{column.notes_prefix.create_column} + #{column.notes_prefix_a.create_column} + #{column.notes_prefix_b.create_column} + #{column.notes_suffix.create_column} + /* src */ + #{column.src_filename.create_column} + #{column.src_fingerprint.create_column} + #{column.src_filesize.create_column} + #{column.src_word_count.create_column} + #{column.src_txt.create_column} + /* misc */ + #{column.fulltext.create_column} + #{column.skin_name.create_column} + #{column.skin_fingerprint.create_column} + #{column.skin.create_column} + #{column.links.create_column.gsub(/,$/,'')} +/* subj VARCHAR(64) NULL, */ +/* contact VARCHAR(100) NULL, */ +/* information VARCHAR(100) NULL, */ +/* types CHAR(1) NULL, */ +/* writing_focus_nationality VARCHAR(100) NULL, */ + ); + }) + @comment.psql.metadata_and_text if @comment + end + def doc_objects # create doc_objects base + print %{ + to be populated from documents files + create tables doc_objects + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE doc_objects ( + lid BIGINT PRIMARY KEY, + metadata_tid BIGINT REFERENCES metadata_and_text, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + clean TEXT NULL, + body TEXT NULL, + seg VARCHAR(256) NULL, + lev_an VARCHAR(1), + lev SMALLINT NULL, + lev1 SMALLINT, + lev2 SMALLINT, + lev3 SMALLINT, + lev4 SMALLINT, + lev5 SMALLINT, + lev6 SMALLINT, + en_a SMALLINT NULL, + en_z SMALLINT NULL, + en_a_asterisk SMALLINT NULL, + en_z_asterisk SMALLINT NULL, + en_a_plus SMALLINT NULL, + en_z_plus SMALLINT NULL, + t_of VARCHAR(16), + t_is VARCHAR(16), + node VARCHAR(16) NULL, + parent VARCHAR(16) NULL, + digest_clean CHAR(#{@@dl}), + digest_all CHAR(#{@@dl}), + types CHAR(1) NULL + ); + }) + @comment.psql.doc_objects if @comment + end + def endnotes + print %{ + to be populated from document files + create tables endnotes + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE endnotes ( + nid BIGINT PRIMARY KEY, + document_lid BIGINT REFERENCES doc_objects, + nr SMALLINT, + clean TEXT NULL, + body TEXT NULL, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + digest_clean CHAR(#{@@dl}), + metadata_tid BIGINT REFERENCES metadata_and_text + ); + }) + @comment.psql.endnotes if @comment + end + def endnotes_asterisk + print %{ + to be populated from document files + create tables endnotes_asterisk + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE endnotes_asterisk ( + nid BIGINT PRIMARY KEY, + document_lid BIGINT REFERENCES doc_objects, + nr SMALLINT, + clean TEXT NULL, + body TEXT NULL, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + digest_clean CHAR(#{@@dl}), + metadata_tid BIGINT REFERENCES metadata_and_text + ); + }) + @comment.psql.endnotes_asterisk if @comment + end + def endnotes_plus + print %{ + to be populated from document files + create tables endnotes_plus + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE endnotes_plus ( + nid BIGINT PRIMARY KEY, + document_lid BIGINT REFERENCES doc_objects, + nr SMALLINT, + clean TEXT NULL, + body TEXT NULL, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + digest_clean CHAR(#{@@dl}), + metadata_tid BIGINT REFERENCES metadata_and_text + ); + }) + @comment.psql.endnotes_plus if @comment + end + def urls # create doc_objects file links mapping + print %{ + currently using sisu dbi module + to be populated from doc_objects files + create tables urls + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE urls ( + metadata_tid BIGINT REFERENCES metadata_and_text, + plaintext varchar(512), + html_toc varchar(512), + html_doc varchar(512), + xhtml varchar(512), + xml_sax varchar(512), + xml_dom varchar(512), + odf varchar(512), + pdf_p varchar(512), + pdf_l varchar(512), + concordance varchar(512), + latex_p varchar(512), + latex_l varchar(512), + digest varchar(512), + manifest varchar(512), + markup varchar(512), + sisupod varchar(512) + ); + }) + @comment.psql.urls if @comment + end + self + end + end + class Comment < SiSU_DB_columns::Columns + def initialize(conn,sql_type='pg') + @conn=conn + if sql_type =~ /pg/; psql + end + end + def psql + def conn_execute_array(sql_arr) + @conn.transaction do |conn| + sql_arr.each do |sql| + conn.execute(sql) + end + end + end + def metadata_and_text + sql_arr=[ + %{COMMENT ON Table metadata_and_text + IS 'contains SiSU metadata and fulltext for search (including source .sst if shared)';}, + %{COMMENT ON COLUMN metadata_and_text.tid + IS 'unique';}, + %{#{column.title.column_comment}}, + %{#{column.title_main.column_comment}}, + %{#{column.title_sub.column_comment}}, + %{#{column.title_short.column_comment}}, + %{#{column.title_edition.column_comment}}, + %{#{column.title_note.column_comment}}, + %{#{column.title_language.column_comment}}, + %{#{column.title_language_char.column_comment}}, + %{#{column.creator_author.column_comment}}, + %{#{column.creator_author_honorific.column_comment}}, + %{#{column.creator_author_nationality.column_comment}}, + %{#{column.creator_contributor.column_comment}}, + %{#{column.creator_illustrator.column_comment}}, + %{#{column.creator_photographer.column_comment}}, + %{#{column.creator_translator.column_comment}}, + %{#{column.creator_prepared_by.column_comment}}, + %{#{column.creator_digitized_by.column_comment}}, + %{#{column.creator_audio.column_comment}}, + %{#{column.creator_video.column_comment}}, + %{#{column.language_document.column_comment}}, + %{#{column.language_document_char.column_comment}}, + %{#{column.language_original.column_comment}}, + %{#{column.language_original_char.column_comment}}, + %{#{column.date_added_to_site.column_comment}}, + %{#{column.date_available.column_comment}}, + %{#{column.date_created.column_comment}}, + %{#{column.date_issued.column_comment}}, + %{#{column.date_modified.column_comment}}, + %{#{column.date_published.column_comment}}, + %{#{column.date_valid.column_comment}}, + %{#{column.date_translated.column_comment}}, + %{#{column.date_original_publication.column_comment}}, + %{#{column.date_generated.column_comment}}, + %{#{column.publisher.column_comment}}, + %{#{column.original_publisher.column_comment}}, + %{#{column.original_language.column_comment}}, + %{#{column.original_language_char.column_comment}}, + %{#{column.original_source.column_comment}}, + %{#{column.original_institution.column_comment}}, + %{#{column.original_nationality.column_comment}}, + %{#{column.rights_all.column_comment}}, + %{#{column.rights_copyright_text.column_comment}}, + %{#{column.rights_copyright_translation.column_comment}}, + %{#{column.rights_copyright_illustrations.column_comment}}, + %{#{column.rights_copyright_photographs.column_comment}}, + %{#{column.rights_copyright_preparation.column_comment}}, + %{#{column.rights_copyright_digitization.column_comment}}, + %{#{column.rights_copyright_audio.column_comment}}, + %{#{column.rights_copyright_video.column_comment}}, + %{#{column.rights_license.column_comment}}, + %{#{column.classify_topic_register.column_comment}}, + %{#{column.classify_subject.column_comment}}, + %{#{column.classify_type.column_comment}}, + %{#{column.classify_loc.column_comment}}, + %{#{column.classify_dewey.column_comment}}, + %{#{column.classify_oclc.column_comment}}, + %{#{column.classify_pg.column_comment}}, + %{#{column.classify_isbn.column_comment}}, + %{#{column.classify_format.column_comment}}, + %{#{column.classify_identifier.column_comment}}, + %{#{column.classify_relation.column_comment}}, + %{#{column.classify_coverage.column_comment}}, + %{#{column.classify_keywords.column_comment}}, + %{#{column.notes_abstract.column_comment}}, + %{#{column.notes_comment.column_comment}}, + %{#{column.notes_description.column_comment}}, + %{#{column.notes_history.column_comment}}, + %{#{column.notes_prefix.column_comment}}, + %{#{column.notes_prefix_a.column_comment}}, + %{#{column.notes_prefix_b.column_comment}}, + %{#{column.notes_suffix.column_comment}}, + %{#{column.src_filename.column_comment}}, + %{#{column.src_fingerprint.column_comment}}, + %{#{column.src_filesize.column_comment}}, + %{#{column.src_word_count.column_comment}}, + %{#{column.src_txt.column_comment}}, + %{#{column.fulltext.column_comment}}, + %{#{column.skin_name.column_comment}}, + %{#{column.skin_fingerprint.column_comment}}, + %{#{column.skin.column_comment}}, + %{#{column.links.column_comment}}, + ] + conn_execute_array(sql_arr) + end + def doc_objects + sql_arr=[ + %{COMMENT ON Table doc_objects + IS 'contains searchable text of SiSU document objects';}, + %{COMMENT ON COLUMN doc_objects.lid + IS 'unique';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text';}, + %{COMMENT ON COLUMN doc_objects.lev_an + IS 'doc level A-C 1-6';}, + %{COMMENT ON COLUMN doc_objects.lev + IS 'doc level 1-6 \d\~';}, + %{COMMENT ON COLUMN doc_objects.seg + IS 'segment name from level number 4 (lv 1)';}, + %{COMMENT ON COLUMN doc_objects.ocn + IS 'object citation number';}, + %{COMMENT ON COLUMN doc_objects.en_a + IS 'first endnote number in text object (eg. NULL or 34) (used with en_z to create range)';}, + %{COMMENT ON COLUMN doc_objects.en_z + IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a to create range)';}, + %{COMMENT ON COLUMN doc_objects.en_a_asterisk + IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_asterisk to create range)';}, + %{COMMENT ON COLUMN doc_objects.en_z_asterisk + IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_asterisk to create range)';}, + %{COMMENT ON COLUMN doc_objects.en_a_plus + IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_plus to create range)';}, + %{COMMENT ON COLUMN doc_objects.en_z_plus + IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_plus to create range)';}, + %{COMMENT ON COLUMN doc_objects.types + IS 'document types seg scroll';}, + %{COMMENT ON COLUMN doc_objects.clean + IS 'text object - substantive text: clean, stripped of markup';}, + %{COMMENT ON COLUMN doc_objects.body + IS 'text object - substantive text: light html markup';}, + %{COMMENT ON COLUMN doc_objects.lev1 + IS 'document structure, level number 1';}, + %{COMMENT ON COLUMN doc_objects.lev2 + IS 'document structure, level number 2';}, + %{COMMENT ON COLUMN doc_objects.lev3 + IS 'document structure, level number 3';}, + %{COMMENT ON COLUMN doc_objects.lev4 + IS 'document structure, level number 4';}, + %{COMMENT ON COLUMN doc_objects.lev5 + IS 'document structure, level number 5';}, + %{COMMENT ON COLUMN doc_objects.lev6 + IS 'document structure, level number 6';}, + %{COMMENT ON COLUMN doc_objects.t_of + IS 'document structure, type of object (object is of)';}, + %{COMMENT ON COLUMN doc_objects.t_is + IS 'document structure, object is';}, + %{COMMENT ON COLUMN doc_objects.node + IS 'document structure, object node if heading';}, + %{COMMENT ON COLUMN doc_objects.parent + IS 'document structure, object parent (is a heading)';} + ] + conn_execute_array(sql_arr) + end + def endnotes + sql_arr=[ + %{COMMENT ON Table endnotes + IS 'contains searchable text of SiSU documents endnotes';}, + %{COMMENT ON COLUMN endnotes.nid + IS 'unique';}, + %{COMMENT ON COLUMN endnotes.document_lid + IS 'ties to text block from which referenced';}, + %{COMMENT ON COLUMN endnotes.nr + IS 'endnote number ';}, + %{COMMENT ON COLUMN endnotes.clean + IS 'endnote substantive content, stripped of markup';}, + %{COMMENT ON COLUMN endnotes.body + IS 'endnote substantive content';}, + %{COMMENT ON COLUMN endnotes.ocn + IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document';} + ] + conn_execute_array(sql_arr) + end + def endnotes_asterisk + sql_arr=[ + %{COMMENT ON Table endnotes_asterisk + IS 'contains searchable text of SiSU documents endnotes marked with asterisk';}, + %{COMMENT ON COLUMN endnotes_asterisk.nid + IS 'unique';}, + %{COMMENT ON COLUMN endnotes_asterisk.document_lid + IS 'ties to text block from which referenced';}, + %{COMMENT ON COLUMN endnotes_asterisk.nr + IS 'endnote number ';}, + %{COMMENT ON COLUMN endnotes_asterisk.clean + IS 'endnote substantive content, stripped of markup';}, + %{COMMENT ON COLUMN endnotes_asterisk.body + IS 'endnote substantive content';}, + %{COMMENT ON COLUMN endnotes_asterisk.ocn + IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document';} + ] + conn_execute_array(sql_arr) + end + def endnotes_plus + sql_arr=[ + %{COMMENT ON Table endnotes_plus + IS 'contains searchable text of SiSU documents endnotes marked with plus';}, + %{COMMENT ON COLUMN endnotes_plus.nid + IS 'unique';}, + %{COMMENT ON COLUMN endnotes_plus.document_lid + IS 'ties to text block from which referenced';}, + %{COMMENT ON COLUMN endnotes_plus.nr + IS 'endnote number ';}, + %{COMMENT ON COLUMN endnotes_plus.clean + IS 'endnote substantive content, stripped of markup';}, + %{COMMENT ON COLUMN endnotes_plus.body + IS 'endnote substantive content';}, + %{COMMENT ON COLUMN endnotes_plus.ocn + IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document';}, + ] + conn_execute_array(sql_arr) + end + def urls + sql_arr=[ + %{COMMENT ON Table urls + IS 'contains base url links to different SiSU output';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document, the mapping of rows is one to one';}, + %{COMMENT ON COLUMN urls.plaintext + IS 'plaintext utf-8';}, + %{COMMENT ON COLUMN urls.html_toc + IS 'table of contents for segmented html document';}, + %{COMMENT ON COLUMN urls.html_doc + IS 'html document (scroll)';}, + %{COMMENT ON COLUMN urls.xhtml + IS 'xhtml document (scroll)';}, + %{COMMENT ON COLUMN urls.xml_sax + IS 'xml sax oriented document (scroll)';}, + %{COMMENT ON COLUMN urls.xml_dom + IS 'xml dom oriented document (scroll)';}, + %{COMMENT ON COLUMN urls.odf + IS 'opendocument format text';}, + %{COMMENT ON COLUMN urls.pdf_p + IS 'pdf portrait';}, + %{COMMENT ON COLUMN urls.pdf_l + IS 'pdf landscape';}, + %{COMMENT ON COLUMN urls.concordance + IS 'rudimentary document index linked to html';}, + %{COMMENT ON COLUMN urls.latex_p + IS 'latex portrait';}, + %{COMMENT ON COLUMN urls.latex_l + IS 'latex_landscape';}, + %{COMMENT ON COLUMN urls.markup + IS 'markup';}, + %{COMMENT ON COLUMN urls.sisupod + IS 'SiSU document format .tgz (all SiSU information on document)';}, + ] + conn_execute_array(sql_arr) + end + self + end + end +end +__END__ -- cgit v1.2.3