=begin * Name: SiSU * Description: a framework for document structuring, publishing and search * Author: Ralph Amissah * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah All Rights Reserved. * License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . If you have Internet connection, the latest version of the GPL should be available at these locations: * SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system * Hompages: * Download: * Ralph Amissah ** Description: modules shared by the different db types, dbi, postgresql, sqlite =end module SiSU_DB_create require "#{SiSU_lib}/db_columns" class Create < SiSU_DB_columns::Column_size require "#{SiSU_lib}/sysenv" @@dl=nil def initialize(opt,conn='',sql_type='pg') @opt,@conn,@sql_type=opt,conn,sql_type @cX=SiSU_Screen::Ansi.new(@opt.cmd).cX @comment=comment @@dl ||=SiSU_Env::Info_env.new.digest.length end def available DBI.available_drivers.each do |driver| puts "Driver: #{driver}" DBI.data_sources(driver).each do |dsn| puts "\tDatasource: #{dsn}" end end end def create_db @env=SiSU_Env::Info_env.new(@opt.fns) tell=SiSU_Screen::Ansi.new(@opt.cmd,'invert','Create PG db:',%{"SiSU_#{@env.path.stub_pwd}"}) tell.colorize unless @opt.cmd =~/q/ SiSU_Env::System_call.new.create_pg_db(@env.path.stub_pwd) #watch use of path.stub_pwd instead of stub end def comment @comment=Hash.new('') case @sql_type when /pg/ @comment['metadata'] =%{ COMMENT ON Table metadata IS 'contains SiSU documents metadata with metadata'; COMMENT ON COLUMN metadata.tid IS 'unique'; COMMENT ON COLUMN metadata.filename IS 'document filename'; COMMENT ON COLUMN metadata.title IS 'metadata title (dublin core element 1)'; COMMENT ON COLUMN metadata.subtitle IS 'document subtitle'; COMMENT ON COLUMN metadata.creator IS 'metadata creator (dublin core element 2)'; COMMENT ON COLUMN metadata.illustrator IS 'metadata illustrator'; COMMENT ON COLUMN metadata.translator IS 'metadata translator'; COMMENT ON COLUMN metadata.subject IS 'metadata subject (dublin core element 3)'; COMMENT ON COLUMN metadata.date IS 'metadata date (dublin core element 7)'; COMMENT ON COLUMN metadata.date_created IS 'metadata date created (dublin core)'; COMMENT ON COLUMN metadata.date_issued IS 'metadata date of issue (dublin core)'; COMMENT ON COLUMN metadata.date_available IS 'metadata date available (dublin core)'; COMMENT ON COLUMN metadata.date_valid IS 'metadata date valid (dublin core)'; COMMENT ON COLUMN metadata.date_modified IS 'metadata date modified (dublin core)'; COMMENT ON COLUMN metadata.type IS 'metadata type (dublin core element 8)'; COMMENT ON COLUMN metadata.description IS 'metadata description (dublin core element 4)'; COMMENT ON COLUMN metadata.publisher IS 'metadata publisher (dublin core element 5)'; COMMENT ON COLUMN metadata.contributor IS 'metadata contributor (dublin core element 6)'; COMMENT ON COLUMN metadata.prepared_by IS 'metadata markup prepared by'; COMMENT ON COLUMN metadata.digitized_by IS 'metadata digitized by'; COMMENT ON COLUMN metadata.format IS 'metadata format (dublin core element 9)'; COMMENT ON COLUMN metadata.identifier IS 'metadata identifier (dublin core element 10)'; COMMENT ON COLUMN metadata.source IS 'metadata source (dublin core element 11)'; COMMENT ON COLUMN metadata.language IS 'metadata language (dublin core element 12)'; COMMENT ON COLUMN metadata.language_original IS 'metadata original language'; COMMENT ON COLUMN metadata.relation IS 'metadata (dublin core element 13)'; COMMENT ON COLUMN metadata.coverage IS 'metadata coverage (dublin core element 14)'; COMMENT ON COLUMN metadata.rights IS 'metadata rights / copyright / license (dublin core element 15)'; COMMENT ON COLUMN metadata.owner IS 'metadata owner'; COMMENT ON COLUMN metadata.keywords IS 'metadata keywords'; COMMENT ON COLUMN metadata.comment IS 'metadata comment'; COMMENT ON COLUMN metadata.abstract IS 'metadata abstract'; COMMENT ON COLUMN metadata.loc IS 'metadata library of congress'; COMMENT ON COLUMN metadata.dewey IS 'metadata dewey'; COMMENT ON COLUMN metadata.isbn IS 'metadata isbn'; COMMENT ON COLUMN metadata.pg IS 'metadata project gutenberg number'; COMMENT ON COLUMN metadata.prefix_a IS 'metadata prefix'; COMMENT ON COLUMN metadata.prefix_b IS 'metadata prefix'; COMMENT ON COLUMN metadata.skin IS 'metadata sisu skin'; COMMENT ON COLUMN metadata.markup IS 'metadata markup source'; COMMENT ON COLUMN metadata.links IS 'metadata links'; COMMENT ON COLUMN metadata.information IS 'metadata information'; COMMENT ON COLUMN metadata.contact IS 'metadata contact'; COMMENT ON COLUMN metadata.suffix IS 'metadata sisu suffix (output related)'; COMMENT ON COLUMN metadata.filename IS 'metadata source filename'; COMMENT ON COLUMN metadata.types IS 'document types scroll 1, seg 2, both 3'; COMMENT ON COLUMN metadata.subj IS 'subject areas - no way to populate at present as not mapped'; /* CREATE FUNCTION fileremoval() RETURNS opaque AS ' BEGIN DELETE FROM metadata WHERE tid=#@removetid; DELETE FROM documents WHERE documents.metadata_tid=#@removetid; DELETE FROM endnotes WHERE endnotes.metadata_tid=#@removetid; DELETE FROM endnotes_asterisk WHERE endnotes_asterisk.metadata_tid=#@removetid; DELETE FROM endnotes_plus WHERE endnotes_plus.metadata_tid=#@removetid; DELETE FROM urls WHERE urls.metadata_tid=#@removetid; END; ' LANGUAGE 'plpgsql'; CREATE TRIGGER removefile AFTER INSERT PROCEDURE fileremoval(); */ } @comment['documents'] =%{ COMMENT ON Table documents IS 'contains searchable text of SiSU documents'; COMMENT ON COLUMN documents.lid IS 'unique'; COMMENT ON COLUMN documents.metadata_tid IS 'tie to title in metadata'; COMMENT ON COLUMN documents.lev IS 'doc level 1-6 \d\~'; COMMENT ON COLUMN documents.seg IS 'segment name from level 4'; COMMENT ON COLUMN documents.ocn IS 'object citation number'; COMMENT ON COLUMN documents.en_a IS 'first endnote number in text object (eg. NULL or 34) (used with en_z to create range)'; COMMENT ON COLUMN documents.en_z IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a to create range)'; COMMENT ON COLUMN documents.en_a_asterisk IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_asterisk to create range)'; COMMENT ON COLUMN documents.en_z_asterisk IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_asterisk to create range)'; COMMENT ON COLUMN documents.en_a_plus IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_plus to create range)'; COMMENT ON COLUMN documents.en_z_plus IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_plus to create range)'; COMMENT ON COLUMN documents.types IS 'document types seg scroll'; COMMENT ON COLUMN documents.clean IS 'text object - substantive text: clean, stripped of markup'; COMMENT ON COLUMN documents.body IS 'text object - substantive text: light html markup'; COMMENT ON COLUMN documents.lev1 IS 'document structure, level 1'; COMMENT ON COLUMN documents.lev2 IS 'document structure, level 2'; COMMENT ON COLUMN documents.lev3 IS 'document structure, level 3'; COMMENT ON COLUMN documents.lev4 IS 'document structure, level 4'; COMMENT ON COLUMN documents.lev5 IS 'document structure, level 5'; COMMENT ON COLUMN documents.lev6 IS 'document structure, level 6'; } @comment['endnotes'] =%{ COMMENT ON Table endnotes IS 'contains searchable text of SiSU documents endnotes'; COMMENT ON COLUMN endnotes.nid IS 'unique'; COMMENT ON COLUMN endnotes.document_lid IS 'ties to text block from which referenced'; COMMENT ON COLUMN endnotes.nr IS 'endnote number '; COMMENT ON COLUMN endnotes.clean IS 'endnote substantive content, stripped of markup'; COMMENT ON COLUMN endnotes.body IS 'endnote substantive content'; COMMENT ON COLUMN endnotes.ocn IS 'object citation no# <\~(\d+)> from which endnote is referenced'; COMMENT ON COLUMN documents.metadata_tid IS 'tie to title in metadata - unique for each document'; } @comment['endnotes_asterisk'] =%{ COMMENT ON Table endnotes_asterisk IS 'contains searchable text of SiSU documents endnotes asterisk'; COMMENT ON COLUMN endnotes_asterisk.nid IS 'unique'; COMMENT ON COLUMN endnotes_asterisk.document_lid IS 'ties to text block from which referenced'; COMMENT ON COLUMN endnotes_asterisk.nr IS 'endnote number '; COMMENT ON COLUMN endnotes_asterisk.clean IS 'endnote substantive content, stripped of markup'; COMMENT ON COLUMN endnotes_asterisk.body IS 'endnote substantive content'; COMMENT ON COLUMN endnotes_asterisk.ocn IS 'object citation no# <\~(\d+)> from which endnote is referenced'; COMMENT ON COLUMN documents.metadata_tid IS 'tie to title in metadata - unique for each document'; } @comment['endnotes_plus'] =%{ COMMENT ON Table endnotes_plus IS 'contains searchable text of SiSU documents endnotes'; COMMENT ON COLUMN endnotes_plus.nid IS 'unique'; COMMENT ON COLUMN endnotes_plus.document_lid IS 'ties to text block from which referenced'; COMMENT ON COLUMN endnotes_plus.nr IS 'endnote number '; COMMENT ON COLUMN endnotes_plus.clean IS 'endnote substantive content, stripped of markup'; COMMENT ON COLUMN endnotes_plus.body IS 'endnote substantive content'; COMMENT ON COLUMN endnotes_plus.ocn IS 'object citation no# <\~(\d+)> from which endnote is referenced'; COMMENT ON COLUMN documents.metadata_tid IS 'tie to title in metadata - unique for each document'; } @comment['urls'] =%{ COMMENT ON Table urls IS 'contains base url links to different SiSU output'; COMMENT ON COLUMN documents.metadata_tid IS 'tie to title in metadata - unique for each document, the mapping of rows is one to one'; COMMENT ON COLUMN urls.plaintext IS 'plaintext utf-8'; COMMENT ON COLUMN urls.html_toc IS 'table of contents for segmented html document'; COMMENT ON COLUMN urls.html_doc IS 'html document (scroll)'; COMMENT ON COLUMN urls.xhtml IS 'xhtml document (scroll)'; COMMENT ON COLUMN urls.xml_sax IS 'xml sax oriented document (scroll)'; COMMENT ON COLUMN urls.xml_dom IS 'xml dom oriented document (scroll)'; COMMENT ON COLUMN urls.odf IS 'opendocument format text'; COMMENT ON COLUMN urls.pdf_p IS 'pdf portrait'; COMMENT ON COLUMN urls.pdf_l IS 'pdf landscape'; COMMENT ON COLUMN urls.concordance IS 'rudimentary document index linked to html'; COMMENT ON COLUMN urls.latex_p IS 'latex portrait'; COMMENT ON COLUMN urls.latex_l IS 'latex_landscape'; COMMENT ON COLUMN urls.markup IS 'markup'; COMMENT ON COLUMN urls.sisupod IS 'SiSU document format .tgz (all SiSU information on document)'; } end @comment end def output_dir? dir=SiSU_Env::Info_env.new('') if @opt.cmd =~/d/; dir.path.webserv_stub_ensure end end def create_table_metadata print %{ currently using sisu dbi module to be populated from documents files create tables metadata data import through ruby transfer } unless @opt.cmd =~/q/ @conn.execute(%{ CREATE TABLE metadata ( tid INT4 PRIMARY KEY, title VARCHAR(#{lt_title}) NULL, subtitle VARCHAR(#{lt_subtitle}) NULL, creator VARCHAR(#{lt_creator}) NULL, illustrator VARCHAR(#{lt_illustrator}) NULL, translator VARCHAR(#{lt_translator}) NULL, subject VARCHAR(#{lt_subject}) NULL, date VARCHAR(#{lt_date}) NULL, date_created VARCHAR(#{lt_date_created}) NULL, date_issued VARCHAR(#{lt_date_issued}) NULL, date_available VARCHAR(#{lt_date_available}) NULL, date_valid VARCHAR(#{lt_date_valid}) NULL, date_modified VARCHAR(#{lt_date_modified}) NULL, /* date DATE, */ /* date_created DATE, */ /* date_issued DATE, */ /* date_available DATE, */ /* date_valid DATE, */ /* date_modified DATE, */ type VARCHAR(#{lt_type}) NULL, description VARCHAR(#{lt_description}) NULL, publisher VARCHAR(#{lt_publisher}) NULL, contributor VARCHAR(#{lt_contributor}) NULL, prepared_by VARCHAR(#{lt_prepared_by}) NULL, digitized_by VARCHAR(#{lt_digitized_by}) NULL, format VARCHAR(#{lt_format}) NULL, identifier VARCHAR(#{lt_identifier}) NULL, source VARCHAR(#{lt_source}) NULL, language VARCHAR(#{lt_language}) NULL, language_original VARCHAR(#{lt_language_original}) NULL, relation VARCHAR(#{lt_relation}) NULL, coverage VARCHAR(#{lt_coverage}) NULL, rights VARCHAR(#{lt_rights}) NULL, copyright VARCHAR(#{lt_copyright}) NULL, owner VARCHAR(#{lt_owner}) NULL, keywords VARCHAR(#{lt_keywords}) NULL, comment VARCHAR(#{lt_comment}) NULL, loc VARCHAR(#{lt_loc}) NULL, dewey VARCHAR(#{lt_dewey}) NULL, isbn VARCHAR(#{lt_isbn}) NULL, pg VARCHAR(#{lt_pg}) NULL, abstract VARCHAR(#{lt_abstract}) NULL, prefix_a TEXT NULL, prefix_b TEXT NULL, skin VARCHAR(#{lt_skin}) NULL, markup VARCHAR(#{lt_markup}) NULL, links VARCHAR(#{lt_links}) NULL, information VARCHAR(#{lt_information}) NULL, contact VARCHAR(#{lt_contact}) NULL, suffix VARCHAR(#{lt_suffix}) NULL, filename VARCHAR(#{lt_filename}) NULL UNIQUE, types CHAR(#{lt_types}) NULL, subj VARCHAR(#{lt_subj}) NULL ); #{@comment['metadata']} }) end def create_table # create documents base print %{ to be populated from documents files create tables documents document_trade document_env data import through ruby transfer } unless @opt.cmd =~/q/ @conn.execute(%{ CREATE TABLE documents ( lid INT4 PRIMARY KEY, metadata_tid INT4 REFERENCES metadata, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), clean TEXT NULL, body TEXT NULL, seg VARCHAR(#{document_seg}) NULL, lev SMALLINT NULL, lev1 SMALLINT, lev2 SMALLINT, lev3 SMALLINT, lev4 SMALLINT, lev5 SMALLINT, lev6 SMALLINT, en_a SMALLINT NULL, en_z SMALLINT NULL, en_a_asterisk SMALLINT NULL, en_z_asterisk SMALLINT NULL, en_a_plus SMALLINT NULL, en_z_plus SMALLINT NULL, digest_clean CHAR(#{@@dl}), digest_all CHAR(#{@@dl}), types CHAR(1) NULL ); #{@comment['documents']} }) end def create_table_endnotes print %{ to be populated from documents files create tables endnotes data import through ruby transfer } unless @opt.cmd =~/q/ @conn.execute(%{ CREATE TABLE endnotes ( nid INT4 PRIMARY KEY, document_lid INT4 REFERENCES documents, nr SMALLINT, clean TEXT NULL, body TEXT NULL, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), digest_clean CHAR(#{@@dl}), metadata_tid INT4 REFERENCES metadata ); #{@comment['endnotes']} }) end def create_table_endnotes_asterisk print %{ to be populated from documents files create tables endnotes_asterisk data import through ruby transfer } unless @opt.cmd =~/q/ @conn.execute(%{ CREATE TABLE endnotes_asterisk ( nid INT4 PRIMARY KEY, document_lid INT4 REFERENCES documents, nr SMALLINT, clean TEXT NULL, body TEXT NULL, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), digest_clean CHAR(#{@@dl}), metadata_tid INT4 REFERENCES metadata ); #{@comment['endnotes_asterisk']} }) end def create_table_endnotes_plus print %{ to be populated from documents files create tables endnotes_plus data import through ruby transfer } unless @opt.cmd =~/q/ @conn.execute(%{ CREATE TABLE endnotes_plus ( nid INT4 PRIMARY KEY, document_lid INT4 REFERENCES documents, nr SMALLINT, clean TEXT NULL, body TEXT NULL, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), digest_clean CHAR(#{@@dl}), metadata_tid INT4 REFERENCES metadata ); #{@comment['endnotes_plus']} }) end def create_table_urls # create documents file links mapping print %{ currently using sisu dbi module to be populated from documents files create tables urls data import through ruby transfer } unless @opt.cmd =~/q/ @conn.execute(%{ CREATE TABLE urls ( metadata_tid INT4 REFERENCES metadata, plaintext varchar(512), html_toc varchar(512), html_doc varchar(512), xhtml varchar(512), xml_sax varchar(512), xml_dom varchar(512), odf varchar(512), pdf_p varchar(512), pdf_l varchar(512), concordance varchar(512), latex_p varchar(512), latex_l varchar(512), digest varchar(512), manifest varchar(512), markup varchar(512), sisupod varchar(512) ); #{@comment['urls']} }) end end end __END__