diff options
| -rw-r--r-- | CHANGELOG | 18 | ||||
| -rw-r--r-- | conf/sisu/version.yml | 6 | ||||
| -rw-r--r-- | lib/sisu/v0/cgi_pgsql.rb | 4 | ||||
| -rw-r--r-- | lib/sisu/v0/cgi_sqlite.rb | 4 | ||||
| -rw-r--r-- | lib/sisu/v0/concordance.rb | 2 | ||||
| -rw-r--r-- | lib/sisu/v0/constants.rb | 1 | ||||
| -rw-r--r-- | lib/sisu/v0/dal_metadata.rb | 2 | ||||
| -rw-r--r-- | lib/sisu/v0/db_create.rb | 527 | ||||
| -rw-r--r-- | lib/sisu/v0/db_import.rb | 129 | ||||
| -rw-r--r-- | lib/sisu/v0/db_indexes.rb | 61 | ||||
| -rw-r--r-- | lib/sisu/v0/db_load_tuple.rb | 67 | ||||
| -rw-r--r-- | lib/sisu/v0/db_remove.rb | 7 | ||||
| -rw-r--r-- | lib/sisu/v0/harvest.rb | 4 | ||||
| -rw-r--r-- | lib/sisu/v0/odf.rb | 1 | ||||
| -rw-r--r-- | lib/sisu/v0/shared_html_lite.rb | 7 | ||||
| -rw-r--r-- | lib/sisu/v0/xml_dom.rb | 2 | 
16 files changed, 440 insertions, 402 deletions
| @@ -9,11 +9,23 @@ Reverse Chronological:  %% STABLE MANIFEST +%% sisu_0.70.6.orig.tar.gz (2009-07-01:26/3) +http://www.jus.uio.no/sisu/pkg/src/sisu_0.70.6.orig.tar.gz +  sisu_0.70.6.orig.tar.gz +  sisu_0.70.6-1.dsc +  sisu_0.70.6-1.diff.gz + +  * db dbi, interfaces to sqlite and pgsql, fixes + +  * cgi sample search form, fix + +  * minor touches +  %% sisu_0.70.5.orig.tar.gz (2009-01-18:02/7)  http://www.jus.uio.no/sisu/pkg/src/sisu_0.70.5.orig.tar.gz -  sisu_0.70.5.orig.tar.gz -  sisu_0.70.5-1.dsc -  sisu_0.70.5-1.diff.gz +  5ab95108c9f1734fbb6dcca795d6c6e34445625adb296e797d97f0af1e73f985 1546846 sisu_0.70.5.orig.tar.gz +  dc95471b171637fd3cbc9e627283254d1f71b655d1a7c2a24eb904cd4539d790 1151 sisu_0.70.5-1.dsc +  9ef6f7891eafc10ec13f6e6bcfa23d993b7126e299b8b4db700621f9c35be95a 146964 sisu_0.70.5-1.diff.gz    * git repo moved to:      git clone git://git.sisudoc.org/git/code/sisu.git diff --git a/conf/sisu/version.yml b/conf/sisu/version.yml index 437019d6..64ebaa03 100644 --- a/conf/sisu/version.yml +++ b/conf/sisu/version.yml @@ -1,5 +1,5 @@  ---  -:version: 0.70.5 -:date_stamp: 2009w02/7 -:date: "2009-01-18" +:version: 0.70.6 +:date_stamp: 2009w26/3 +:date: "2009-07-01"  :project: SiSU diff --git a/lib/sisu/v0/cgi_pgsql.rb b/lib/sisu/v0/cgi_pgsql.rb index 38da777d..4f201dfa 100644 --- a/lib/sisu/v0/cgi_pgsql.rb +++ b/lib/sisu/v0/cgi_pgsql.rb @@ -22,7 +22,7 @@     This program is distributed in the hope that it will be useful, but WITHOUT     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for     more details.     You should have received a copy of the GNU General Public License along with @@ -136,7 +136,7 @@ module  SiSU_CGI_pgsql        @hosturl_db="#{@env.url.webserv_base_cgi}"        @hosturl_files="#{@env.url.webserv_files_from_db}"        @port="#{@db.psql.port}" -      user=''  # '#{@env.user}' +      user='#{@env.user}'  # check user name for access to pg database: '#{@env.user}'        WOK_SQL      end      def search_statement diff --git a/lib/sisu/v0/cgi_sqlite.rb b/lib/sisu/v0/cgi_sqlite.rb index 86d6987b..34cb67dc 100644 --- a/lib/sisu/v0/cgi_sqlite.rb +++ b/lib/sisu/v0/cgi_sqlite.rb @@ -22,7 +22,7 @@     This program is distributed in the hope that it will be useful, but WITHOUT     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for     more details.     You should have received a copy of the GNU General Public License along with @@ -93,7 +93,7 @@ module  SiSU_CGI_sqlite        serve.each do |x|          f3 << %{          when /SiSU_#{x}/;                      "#{@env.path.webserv}/#{x}/sisu_sqlite.db"\n}        end -      f3 << "else  #{@env.path.webserv}/#{serve[0]}/sisu_sqlite.db\n          end\n" +      f3 << %{           else  '#{@env.path.webserv}/#{serve[0]}/sisu_sqlite.db'\n          end\n}        if FileTest.writable?('.')          output=File.open('sisu_sqlite.cgi','w')          output << header0 << header1 << header_desc << header2 << f1 << buttons1 << buttons2 << search_request << search_statement << search_statement_common << search_query1 << @common.pages << search_query2 << @common.tail << @common.main1 << f2 << f3 << dbi_connect << @common.main2 diff --git a/lib/sisu/v0/concordance.rb b/lib/sisu/v0/concordance.rb index 4b6530ad..3c2b8326 100644 --- a/lib/sisu/v0/concordance.rb +++ b/lib/sisu/v0/concordance.rb @@ -125,7 +125,7 @@ WOK  <html>  <head>    <title> -    SiSU created WordIndex for: #{@md.full_title} +    SiSU created Concordance (WordIndex) for: #{@md.full_title}    </title>    <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />    <meta name="Description" content=" SiSU created"> diff --git a/lib/sisu/v0/constants.rb b/lib/sisu/v0/constants.rb index 8929a0bb..5c93d543 100644 --- a/lib/sisu/v0/constants.rb +++ b/lib/sisu/v0/constants.rb @@ -57,6 +57,7 @@   ** Description: system environment, resource control and configuration details  =end +Sfx={:txt=>'.txt',:html=>'.html',:xhtml=>'.xhtml',:xml=>'.xml',:epub=>'.epub',:epub_xhtml=>'.xhtml',:odt=>'.odt',:pdf=>'.pdf'}  Mx,Rx={},{}  Mx[:meta_o],Mx[:meta_c]='〔@','〕'  Mx[:lv_o_1],Mx[:lv_o_2],Mx[:lv_o_3],Mx[:lv_o_4],Mx[:lv_o_5],Mx[:lv_o_6],Mx[:lv_o_7],Mx[:lv_o_8],Mx[:lv_o_9]= diff --git a/lib/sisu/v0/dal_metadata.rb b/lib/sisu/v0/dal_metadata.rb index 2555829d..bdf0dace 100644 --- a/lib/sisu/v0/dal_metadata.rb +++ b/lib/sisu/v0/dal_metadata.rb @@ -142,7 +142,7 @@ module SiSU_metadata              "\n#{@tr.comments}: #{Mx[:fa_underscore_o]}#{@md.comments}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"            when /^#{Mx[:meta_o]}cls_loc#{Mx[:meta_c]}/              "\n#{@cls_dewey}: #{Mx[:fa_underscore_o]}#{@md.cls_dewey}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" -          when /^#{Mx[:meta_o]}cls_dewey#{Mx[:meta_c]}/ +          when /^#{Mx[:meta_o]}cls_loc#{Mx[:meta_c]}/              "\n#{@tr.cls_dewey}: #{Mx[:fa_underscore_o]}#{@md.cls_dewey}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}"            when /^#{Mx[:meta_o]}(?:cls_gutenberg|cls_pg)#{Mx[:meta_c]}/              "\n#{@tr.cls_gutenberg}: #{Mx[:fa_underscore_o]}#{@md.cls_gutenberg}#{Mx[:fa_underscore_c]} #{Mx[:id_o]}~0;m#{ocnm};d#{ocnd}#{Mx[:id_c]}" diff --git a/lib/sisu/v0/db_create.rb b/lib/sisu/v0/db_create.rb index 9f6ae360..2cdb2c06 100644 --- a/lib/sisu/v0/db_create.rb +++ b/lib/sisu/v0/db_create.rb @@ -22,7 +22,7 @@     This program is distributed in the hope that it will be useful, but WITHOUT     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for     more details.     You should have received a copy of the GNU General Public License along with @@ -65,7 +65,7 @@ module SiSU_DB_create      def initialize(opt,conn,file,sql_type='pg')        @opt,@conn,@file,@sql_type=opt,conn,file,sql_type        @cX=SiSU_Screen::Ansi.new(@opt.cmd).cX -      @comment=Comment.new(@sql_type) +      @comment=Comment.new(@conn,@sql_type)        @@dl ||=SiSU_Env::Info_env.new.digest.length      end      def available @@ -167,8 +167,8 @@ module SiSU_DB_create              writing_focus_nationality VARCHAR(#{lt_writing_focus_nationality}) NULL,              topic_register       VARCHAR(#{lt_topic_register}) NULL            ); -          #{@comment.metadata} -        }) +       }) +       @comment.metadata        end        def documents                                                 # create documents base          print %{ @@ -203,8 +203,8 @@ module SiSU_DB_create              digest_all      CHAR(#{@@dl}),              types           CHAR(1) NULL            ); -          #{@comment.documents} -        }) +       }) +       @comment.documents        end        def endnotes          print %{ @@ -225,8 +225,8 @@ module SiSU_DB_create              digest_clean    CHAR(#{@@dl}),              metadata_tid    BIGINT REFERENCES metadata            ); -          #{@comment.endnotes} -        }) +       }) +       @comment.endnotes        end        def endnotes_asterisk          print %{ @@ -247,8 +247,8 @@ module SiSU_DB_create              digest_clean    CHAR(#{@@dl}),              metadata_tid    BIGINT REFERENCES metadata            ); -          #{@comment.endnotes_asterisk} -        }) +       }) +       @comment.endnotes_asterisk        end        def endnotes_plus          print %{ @@ -269,8 +269,8 @@ module SiSU_DB_create              digest_clean    CHAR(#{@@dl}),              metadata_tid    BIGINT REFERENCES metadata            ); -          #{@comment.endnotes_plus} -        }) +       }) +       @comment.endnotes_plus        end        def urls                                                       # create documents file links mapping          print %{ @@ -299,276 +299,281 @@ module SiSU_DB_create              markup          varchar(512),              sisupod         varchar(512)            ); -          #{@comment.urls} -        }) +       }) +       @comment.urls        end        self      end    end    class Comment -    def initialize(sql_type='pg') +    def initialize(conn,sql_type='pg') +      @conn=conn        if sql_type =~ /pg/; psql        else none        end      end      def psql +      def conn_execute_array(sql_arr) +        @conn.transaction do |conn| +          sql_arr.each do |sql| +            conn.execute(sql) +          end +        end +      end +     #def conn_execute_array(sql_arr) +     #  sql_arr.each do |sql| +     #    @conn.execute(sql) +     #  end +     #end        def metadata -        %{ -        COMMENT ON Table metadata -          IS 'contains SiSU documents metadata with metadata'; -        COMMENT ON COLUMN metadata.tid -          IS 'unique'; -        COMMENT ON COLUMN metadata.filename -          IS 'document filename'; -        COMMENT ON COLUMN metadata.title -          IS 'metadata title (dublin core element 1)'; -        COMMENT ON COLUMN metadata.subtitle -          IS 'document subtitle'; -        COMMENT ON COLUMN metadata.creator -          IS 'metadata creator (dublin core element 2)'; -        COMMENT ON COLUMN metadata.author -          IS 'metadata author (dublin core element 2)'; -        COMMENT ON COLUMN metadata.illustrator -          IS 'metadata illustrator'; -        COMMENT ON COLUMN metadata.translator -          IS 'metadata translator'; -        COMMENT ON COLUMN metadata.subject -          IS 'metadata subject (dublin core element 3)'; -        COMMENT ON COLUMN metadata.date -          IS 'metadata date (dublin core element 7)'; -        COMMENT ON COLUMN metadata.date_created -          IS 'metadata date created (dublin core)'; -        COMMENT ON COLUMN metadata.date_issued -          IS 'metadata date of issue (dublin core)'; -        COMMENT ON COLUMN metadata.date_available -          IS 'metadata date available (dublin core)'; -        COMMENT ON COLUMN metadata.date_valid -          IS 'metadata date valid (dublin core)'; -        COMMENT ON COLUMN metadata.date_modified -          IS 'metadata date modified (dublin core)'; -        COMMENT ON COLUMN metadata.type -          IS 'metadata type (dublin core element 8)'; -        COMMENT ON COLUMN metadata.description -          IS 'metadata description (dublin core element 4)'; -        COMMENT ON COLUMN metadata.publisher -          IS 'metadata publisher (dublin core element 5)'; -        COMMENT ON COLUMN metadata.contributor -          IS 'metadata contributor (dublin core element 6)'; -        COMMENT ON COLUMN metadata.prepared_by -          IS 'metadata markup prepared by'; -        COMMENT ON COLUMN metadata.digitized_by -          IS 'metadata digitized by'; -        COMMENT ON COLUMN metadata.format -          IS 'metadata format (dublin core element 9)'; -        COMMENT ON COLUMN metadata.identifier -          IS 'metadata identifier (dublin core element 10)'; -        COMMENT ON COLUMN metadata.source -          IS 'metadata source (dublin core element 11)'; -        COMMENT ON COLUMN metadata.language -          IS 'metadata language (dublin core element 12)'; -        COMMENT ON COLUMN metadata.language_original -          IS 'metadata original language'; -        COMMENT ON COLUMN metadata.relation -          IS 'metadata  (dublin core element 13)'; -        COMMENT ON COLUMN metadata.coverage -          IS 'metadata coverage (dublin core element 14)'; -        COMMENT ON COLUMN metadata.rights -          IS 'metadata rights / copyright / license (dublin core element 15)'; -        COMMENT ON COLUMN metadata.owner -          IS 'metadata owner'; -        COMMENT ON COLUMN metadata.keywords -          IS 'metadata keywords'; -        COMMENT ON COLUMN metadata.comment -          IS 'metadata comment'; -        COMMENT ON COLUMN metadata.abstract -          IS 'metadata abstract'; -        COMMENT ON COLUMN metadata.loc -          IS 'metadata library of congress'; -        COMMENT ON COLUMN metadata.dewey -          IS 'metadata dewey'; -        COMMENT ON COLUMN metadata.isbn -          IS 'metadata isbn'; -        COMMENT ON COLUMN metadata.pg -          IS 'metadata project gutenberg number'; -        COMMENT ON COLUMN metadata.prefix_a -          IS 'metadata prefix'; -        COMMENT ON COLUMN metadata.prefix_b -          IS 'metadata prefix'; -        COMMENT ON COLUMN metadata.skin -          IS 'metadata sisu skin'; -        COMMENT ON COLUMN metadata.markup -          IS 'metadata markup source'; -        COMMENT ON COLUMN metadata.links -          IS 'metadata links'; -        COMMENT ON COLUMN metadata.information -          IS 'metadata information'; -        COMMENT ON COLUMN metadata.contact -          IS 'metadata contact'; -        COMMENT ON COLUMN metadata.suffix -          IS 'metadata sisu suffix (output related)'; -        COMMENT ON COLUMN metadata.filename -          IS 'metadata source filename'; -        COMMENT ON COLUMN metadata.types -          IS 'document types scroll 1, seg 2, both 3'; -        COMMENT ON COLUMN metadata.subj -          IS 'subject areas - no way to populate at present as not mapped'; -/* -        CREATE FUNCTION fileremoval() RETURNS opaque AS ' -          BEGIN -            DELETE FROM metadata WHERE tid=#@removetid; -            DELETE FROM documents WHERE documents.metadata_tid=#@removetid; -            DELETE FROM endnotes WHERE endnotes.metadata_tid=#@removetid; -            DELETE FROM endnotes_asterisk WHERE endnotes_asterisk.metadata_tid=#@removetid; -            DELETE FROM endnotes_plus WHERE endnotes_plus.metadata_tid=#@removetid; -            DELETE FROM urls WHERE urls.metadata_tid=#@removetid; -          END; -        ' LANGUAGE 'plpgsql'; -        CREATE TRIGGER removefile AFTER INSERT -        PROCEDURE fileremoval(); -*/ -        } +        sql_arr=[ +          %{COMMENT ON Table metadata +            IS 'contains SiSU documents metadata with metadata';}, +          %{COMMENT ON COLUMN metadata.tid +            IS 'unique';}, +          %{COMMENT ON COLUMN metadata.filename +            IS 'document filename';}, +          %{COMMENT ON COLUMN metadata.title +            IS 'metadata title (dublin core element 1)';}, +          %{COMMENT ON COLUMN metadata.subtitle +            IS 'document subtitle';}, +          %{COMMENT ON COLUMN metadata.creator +            IS 'metadata creator (dublin core element 2)';}, +          %{COMMENT ON COLUMN metadata.author +            IS 'metadata author (dublin core element 2)';}, +          %{COMMENT ON COLUMN metadata.illustrator +            IS 'metadata illustrator';}, +          %{COMMENT ON COLUMN metadata.translator +            IS 'metadata translator';}, +          %{COMMENT ON COLUMN metadata.subject +            IS 'metadata subject (dublin core element 3)';}, +          %{COMMENT ON COLUMN metadata.date +            IS 'metadata date (dublin core element 7)';}, +          %{COMMENT ON COLUMN metadata.date_created +            IS 'metadata date created (dublin core)';}, +          %{COMMENT ON COLUMN metadata.date_issued +            IS 'metadata date of issue (dublin core)';}, +          %{COMMENT ON COLUMN metadata.date_available +            IS 'metadata date available (dublin core)';}, +          %{COMMENT ON COLUMN metadata.date_valid +            IS 'metadata date valid (dublin core)';}, +          %{COMMENT ON COLUMN metadata.date_modified +            IS 'metadata date modified (dublin core)';}, +          %{COMMENT ON COLUMN metadata.type +            IS 'metadata type (dublin core element 8)';}, +          %{COMMENT ON COLUMN metadata.description +            IS 'metadata description (dublin core element 4)';}, +          %{COMMENT ON COLUMN metadata.publisher +            IS 'metadata publisher (dublin core element 5)';}, +          %{COMMENT ON COLUMN metadata.contributor +            IS 'metadata contributor (dublin core element 6)';}, +          %{COMMENT ON COLUMN metadata.prepared_by +            IS 'metadata markup prepared by';}, +          %{COMMENT ON COLUMN metadata.digitized_by +            IS 'metadata digitized by';}, +          %{COMMENT ON COLUMN metadata.format +            IS 'metadata format (dublin core element 9)';}, +          %{COMMENT ON COLUMN metadata.identifier +            IS 'metadata identifier (dublin core element 10)';}, +          %{COMMENT ON COLUMN metadata.source +            IS 'metadata source (dublin core element 11)';}, +          %{COMMENT ON COLUMN metadata.language +            IS 'metadata language (dublin core element 12)';}, +          %{COMMENT ON COLUMN metadata.language_original +            IS 'metadata original language';}, +          %{COMMENT ON COLUMN metadata.relation +            IS 'metadata  (dublin core element 13)';}, +          %{COMMENT ON COLUMN metadata.coverage +            IS 'metadata coverage (dublin core element 14)';}, +          %{COMMENT ON COLUMN metadata.rights +            IS 'metadata rights / copyright / license (dublin core element 15)';}, +          %{COMMENT ON COLUMN metadata.owner +            IS 'metadata owner';}, +          %{COMMENT ON COLUMN metadata.keywords +            IS 'metadata keywords';}, +          %{COMMENT ON COLUMN metadata.comment +            IS 'metadata comment';}, +          %{COMMENT ON COLUMN metadata.abstract +            IS 'metadata abstract';}, +          %{COMMENT ON COLUMN metadata.loc +            IS 'metadata library of congress';}, +          %{COMMENT ON COLUMN metadata.dewey +            IS 'metadata dewey';}, +          %{COMMENT ON COLUMN metadata.isbn +            IS 'metadata isbn';}, +          %{COMMENT ON COLUMN metadata.pg +            IS 'metadata project gutenberg number';}, +          %{COMMENT ON COLUMN metadata.prefix_a +            IS 'metadata prefix';}, +          %{COMMENT ON COLUMN metadata.prefix_b +            IS 'metadata prefix';}, +          %{COMMENT ON COLUMN metadata.skin +            IS 'metadata sisu skin';}, +          %{COMMENT ON COLUMN metadata.markup +            IS 'metadata markup source';}, +          %{COMMENT ON COLUMN metadata.links +            IS 'metadata links';}, +          %{COMMENT ON COLUMN metadata.information +            IS 'metadata information';}, +          %{COMMENT ON COLUMN metadata.contact +            IS 'metadata contact';}, +          %{COMMENT ON COLUMN metadata.suffix +            IS 'metadata sisu suffix (output related)';}, +          %{COMMENT ON COLUMN metadata.filename +            IS 'metadata source filename';}, +          %{COMMENT ON COLUMN metadata.types +            IS 'document types scroll 1, seg 2, both 3';}, +          %{COMMENT ON COLUMN metadata.subj +            IS 'subject areas - no way to populate at present as not mapped';}, +        ] +        conn_execute_array(sql_arr)        end        def documents -        %{ -        COMMENT ON Table documents -          IS 'contains searchable text of SiSU documents'; -        COMMENT ON COLUMN documents.lid -          IS 'unique'; -        COMMENT ON COLUMN documents.metadata_tid -          IS 'tie to title in metadata'; -        COMMENT ON COLUMN documents.lev -          IS 'doc level 1-6 \d\~'; -        COMMENT ON COLUMN documents.seg -          IS 'segment name from level 4'; -        COMMENT ON COLUMN documents.ocn -          IS 'object citation number'; -        COMMENT ON COLUMN documents.en_a -          IS 'first endnote number in text object (eg. NULL or 34) (used with en_z to create range)'; -        COMMENT ON COLUMN documents.en_z -          IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a to create range)'; -        COMMENT ON COLUMN documents.en_a_asterisk -          IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_asterisk to create range)'; -        COMMENT ON COLUMN documents.en_z_asterisk -          IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_asterisk to create range)'; -        COMMENT ON COLUMN documents.en_a_plus -          IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_plus to create range)'; -        COMMENT ON COLUMN documents.en_z_plus -          IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_plus to create range)'; -        COMMENT ON COLUMN documents.types -          IS 'document types seg scroll'; -        COMMENT ON COLUMN documents.clean -          IS 'text object - substantive text: clean, stripped of markup'; -        COMMENT ON COLUMN documents.body -          IS 'text object - substantive text: light html markup'; -        COMMENT ON COLUMN documents.lev1 -          IS 'document structure, level 1'; -        COMMENT ON COLUMN documents.lev2 -          IS 'document structure, level 2'; -        COMMENT ON COLUMN documents.lev3 -          IS 'document structure, level 3'; -        COMMENT ON COLUMN documents.lev4 -          IS 'document structure, level 4'; -        COMMENT ON COLUMN documents.lev5 -          IS 'document structure, level 5'; -        COMMENT ON COLUMN documents.lev6 -          IS 'document structure, level 6'; -        } +        sql_arr=[ +          %{COMMENT ON Table documents +            IS 'contains searchable text of SiSU documents';}, +          %{COMMENT ON COLUMN documents.lid +            IS 'unique';}, +          %{COMMENT ON COLUMN documents.metadata_tid +            IS 'tie to title in metadata';}, +          %{COMMENT ON COLUMN documents.lev +            IS 'doc level 1-6 \d\~';}, +          %{COMMENT ON COLUMN documents.seg +            IS 'segment name from level 4';}, +          %{COMMENT ON COLUMN documents.ocn +            IS 'object citation number';}, +          %{COMMENT ON COLUMN documents.en_a +            IS 'first endnote number in text object (eg. NULL or 34) (used with en_z to create range)';}, +          %{COMMENT ON COLUMN documents.en_z +            IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a to create range)';}, +          %{COMMENT ON COLUMN documents.en_a_asterisk +            IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_asterisk to create range)';}, +          %{COMMENT ON COLUMN documents.en_z_asterisk +            IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_asterisk to create range)';}, +          %{COMMENT ON COLUMN documents.en_a_plus +            IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_plus to create range)';}, +          %{COMMENT ON COLUMN documents.en_z_plus +            IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_plus to create range)';}, +          %{COMMENT ON COLUMN documents.types +            IS 'document types seg scroll';}, +          %{COMMENT ON COLUMN documents.clean +            IS 'text object - substantive text: clean, stripped of markup';}, +          %{COMMENT ON COLUMN documents.body +            IS 'text object - substantive text: light html markup';}, +          %{COMMENT ON COLUMN documents.lev1 +            IS 'document structure, level 1';}, +          %{COMMENT ON COLUMN documents.lev2 +            IS 'document structure, level 2';}, +          %{COMMENT ON COLUMN documents.lev3 +            IS 'document structure, level 3';}, +          %{COMMENT ON COLUMN documents.lev4 +            IS 'document structure, level 4';}, +          %{COMMENT ON COLUMN documents.lev5 +            IS 'document structure, level 5';}, +          %{COMMENT ON COLUMN documents.lev6 +            IS 'document structure, level 6';} +        ] +        conn_execute_array(sql_arr)        end        def endnotes -        %{ -        COMMENT ON Table endnotes -          IS 'contains searchable text of SiSU documents endnotes'; -        COMMENT ON COLUMN endnotes.nid -          IS 'unique'; -        COMMENT ON COLUMN endnotes.document_lid -          IS 'ties to text block from which referenced'; -        COMMENT ON COLUMN endnotes.nr -          IS 'endnote number <!e_(\d+)!>'; -        COMMENT ON COLUMN endnotes.clean -          IS 'endnote substantive content, stripped of markup'; -        COMMENT ON COLUMN endnotes.body -          IS 'endnote substantive content'; -        COMMENT ON COLUMN endnotes.ocn -          IS 'object citation no# <\~(\d+)> from which endnote is referenced'; -        COMMENT ON COLUMN documents.metadata_tid -          IS 'tie to title in metadata - unique for each document'; -        } +        sql_arr=[ +          %{COMMENT ON Table endnotes +            IS 'contains searchable text of SiSU documents endnotes';}, +          %{COMMENT ON COLUMN endnotes.nid +            IS 'unique';}, +          %{COMMENT ON COLUMN endnotes.document_lid +            IS 'ties to text block from which referenced';}, +          %{COMMENT ON COLUMN endnotes.nr +            IS 'endnote number <!e_(\d+)!>';}, +          %{COMMENT ON COLUMN endnotes.clean +            IS 'endnote substantive content, stripped of markup';}, +          %{COMMENT ON COLUMN endnotes.body +            IS 'endnote substantive content';}, +          %{COMMENT ON COLUMN endnotes.ocn +            IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, +          %{COMMENT ON COLUMN documents.metadata_tid +            IS 'tie to title in metadata - unique for each document';} +        ] +        conn_execute_array(sql_arr)        end        def endnotes_asterisk -        %{ -        COMMENT ON Table endnotes_asterisk -          IS 'contains searchable text of SiSU documents endnotes asterisk'; -        COMMENT ON COLUMN endnotes_asterisk.nid -          IS 'unique'; -        COMMENT ON COLUMN endnotes_asterisk.document_lid -          IS 'ties to text block from which referenced'; -        COMMENT ON COLUMN endnotes_asterisk.nr -          IS 'endnote number <!e_(\d+)!>'; -        COMMENT ON COLUMN endnotes_asterisk.clean -          IS 'endnote substantive content, stripped of markup'; -        COMMENT ON COLUMN endnotes_asterisk.body -          IS 'endnote substantive content'; -        COMMENT ON COLUMN endnotes_asterisk.ocn -          IS 'object citation no# <\~(\d+)> from which endnote is referenced'; -        COMMENT ON COLUMN documents.metadata_tid -          IS 'tie to title in metadata - unique for each document'; -        } +        sql_arr=[ +          %{COMMENT ON Table endnotes_asterisk +            IS 'contains searchable text of SiSU documents endnotes asterisk';}, +          %{COMMENT ON COLUMN endnotes_asterisk.nid +            IS 'unique';}, +          %{COMMENT ON COLUMN endnotes_asterisk.document_lid +            IS 'ties to text block from which referenced';}, +          %{COMMENT ON COLUMN endnotes_asterisk.nr +            IS 'endnote number <!e_(\d+)!>';}, +          %{COMMENT ON COLUMN endnotes_asterisk.clean +            IS 'endnote substantive content, stripped of markup';}, +          %{COMMENT ON COLUMN endnotes_asterisk.body +            IS 'endnote substantive content';}, +          %{COMMENT ON COLUMN endnotes_asterisk.ocn +            IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, +          %{COMMENT ON COLUMN documents.metadata_tid +            IS 'tie to title in metadata - unique for each document';} +        ] +        conn_execute_array(sql_arr)        end        def endnotes_plus -        %{ -        COMMENT ON Table endnotes_plus -          IS 'contains searchable text of SiSU documents endnotes'; -        COMMENT ON COLUMN endnotes_plus.nid -          IS 'unique'; -        COMMENT ON COLUMN endnotes_plus.document_lid -          IS 'ties to text block from which referenced'; -        COMMENT ON COLUMN endnotes_plus.nr -          IS 'endnote number <!e_(\d+)!>'; -        COMMENT ON COLUMN endnotes_plus.clean -          IS 'endnote substantive content, stripped of markup'; -        COMMENT ON COLUMN endnotes_plus.body -          IS 'endnote substantive content'; -        COMMENT ON COLUMN endnotes_plus.ocn -          IS 'object citation no# <\~(\d+)> from which endnote is referenced'; -        COMMENT ON COLUMN documents.metadata_tid -          IS 'tie to title in metadata - unique for each document'; -        } +        sql_arr=[ +          %{COMMENT ON Table endnotes_plus +            IS 'contains searchable text of SiSU documents endnotes';}, +          %{COMMENT ON COLUMN endnotes_plus.nid +            IS 'unique';}, +          %{COMMENT ON COLUMN endnotes_plus.document_lid +            IS 'ties to text block from which referenced';}, +          %{COMMENT ON COLUMN endnotes_plus.nr +            IS 'endnote number <!e_(\d+)!>';}, +          %{COMMENT ON COLUMN endnotes_plus.clean +            IS 'endnote substantive content, stripped of markup';}, +          %{COMMENT ON COLUMN endnotes_plus.body +            IS 'endnote substantive content';}, +          %{COMMENT ON COLUMN endnotes_plus.ocn +            IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, +          %{COMMENT ON COLUMN documents.metadata_tid +            IS 'tie to title in metadata - unique for each document';}, +        ] +        conn_execute_array(sql_arr)        end        def urls -        %{ -        COMMENT ON Table urls -          IS 'contains base url links to different SiSU output'; -        COMMENT ON COLUMN documents.metadata_tid -          IS 'tie to title in metadata - unique for each document, the mapping of rows is one to one'; -        COMMENT ON COLUMN urls.plaintext -          IS 'plaintext utf-8'; -        COMMENT ON COLUMN urls.html_toc -          IS 'table of contents for segmented html document'; -        COMMENT ON COLUMN urls.html_doc -          IS 'html document (scroll)'; -        COMMENT ON COLUMN urls.xhtml -          IS 'xhtml document (scroll)'; -        COMMENT ON COLUMN urls.xml_sax -          IS 'xml sax oriented document (scroll)'; -        COMMENT ON COLUMN urls.xml_dom -          IS 'xml dom oriented document (scroll)'; -        COMMENT ON COLUMN urls.odf -          IS 'opendocument format text'; -        COMMENT ON COLUMN urls.pdf_p -          IS 'pdf portrait'; -        COMMENT ON COLUMN urls.pdf_l -          IS 'pdf landscape'; -        COMMENT ON COLUMN urls.concordance -          IS 'rudimentary document index linked to html'; -        COMMENT ON COLUMN urls.latex_p -          IS 'latex portrait'; -        COMMENT ON COLUMN urls.latex_l -          IS 'latex_landscape'; -        COMMENT ON COLUMN urls.markup -          IS 'markup'; -        COMMENT ON COLUMN urls.sisupod -          IS 'SiSU document format .tgz (all SiSU information on document)'; -        } +        sql_arr=[ +          %{COMMENT ON Table urls +            IS 'contains base url links to different SiSU output';}, +          %{COMMENT ON COLUMN documents.metadata_tid +            IS 'tie to title in metadata - unique for each document, the mapping of rows is one to one';}, +          %{COMMENT ON COLUMN urls.plaintext +            IS 'plaintext utf-8';}, +          %{COMMENT ON COLUMN urls.html_toc +            IS 'table of contents for segmented html document';}, +          %{COMMENT ON COLUMN urls.html_doc +            IS 'html document (scroll)';}, +          %{COMMENT ON COLUMN urls.xhtml +            IS 'xhtml document (scroll)';}, +          %{COMMENT ON COLUMN urls.xml_sax +            IS 'xml sax oriented document (scroll)';}, +          %{COMMENT ON COLUMN urls.xml_dom +            IS 'xml dom oriented document (scroll)';}, +          %{COMMENT ON COLUMN urls.odf +            IS 'opendocument format text';}, +          %{COMMENT ON COLUMN urls.pdf_p +            IS 'pdf portrait';}, +          %{COMMENT ON COLUMN urls.pdf_l +            IS 'pdf landscape';}, +          %{COMMENT ON COLUMN urls.concordance +            IS 'rudimentary document index linked to html';}, +          %{COMMENT ON COLUMN urls.latex_p +            IS 'latex portrait';}, +          %{COMMENT ON COLUMN urls.latex_l +            IS 'latex_landscape';}, +          %{COMMENT ON COLUMN urls.markup +            IS 'markup';}, +          %{COMMENT ON COLUMN urls.sisupod +            IS 'SiSU document format .tgz (all SiSU information on document)';}, +        ] +        conn_execute_array(sql_arr)        end        self      end diff --git a/lib/sisu/v0/db_import.rb b/lib/sisu/v0/db_import.rb index 5810f13f..da564639 100644 --- a/lib/sisu/v0/db_import.rb +++ b/lib/sisu/v0/db_import.rb @@ -1,4 +1,4 @@ -# coding: utf-8 +#j coding: utf-8  =begin   * Name: SiSU @@ -22,7 +22,7 @@     This program is distributed in the hope that it will be useful, but WITHOUT     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for     more details.     You should have received a copy of the GNU General Public License along with @@ -95,9 +95,9 @@ module SiSU_DB_import          else @conn.execute( sql ) { |x| x.fetch_all.to_s.to_i }          end        rescue -        puts "#{__FILE__} #{__LINE__}" if @opt.cmd =~/M/ +        puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/        end -      @col[:lid] =0 if @col[:lid].nil? or @col[:lid].to_s.empty? +      @col[:lid]=0 if @col[:lid].nil? or @col[:lid].to_s.empty?        sql='SELECT MAX(nid) FROM endnotes'        begin          @id_n ||=0 @@ -107,7 +107,7 @@ module SiSU_DB_import            @id_n=@conn.execute( sql ) { |x| x.fetch_all.to_s.to_i }          end        rescue -        puts "#{__FILE__} #{__LINE__}" if @opt.cmd =~/M/ +        puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/        end        @id_n =0 if @col[:lid].nil? or @col[:lid].to_s.empty?        @col[:lv1]=@col[:lv2]=@col[:lv3]=@col[:lv4]=@col[:lv5]=@col[:lv6]=0 @@ -121,59 +121,66 @@ module SiSU_DB_import        tell.puts_blue unless @opt.cmd =~/q/        tell=SiSU_Screen::Ansi.new(@opt.cmd,'Marshal Load',@fnm)        tell.print_grey if @opt.cmd =~/v/ -      case @sql_type -      when /sqlite/                                                    #fix logic for sqlite ! -        #sqlite watch -        #pf_db_import_transaction_open if @opt =~/M/ -        db_import_metadata -        db_import_documents(@dal_array) -        db_import_urls(@dal_array,@fnm)                                #import OID on/off -        #pf_db_import_transaction_close if @opt =~/M/ -        #@conn.commit -        #@conn.close -        #@conn.disconnect -        if @opt.cmd =~/M/ +      file_exist=if @sql_type=~/sqlite/; nil +      else +        @conn.select_one(%{ SELECT metadata.tid FROM metadata WHERE metadata.filename ~ '#{@opt.fns}'; }) +      end +      if (@sql_type!~/sqlite/ and not file_exist) \ +      or @sql_type=~/sqlite/ +        t_d=[] # transaction_data +        t_d << db_import_metadata +        t_d << db_import_documents(@dal_array) +        t_d << db_import_urls(@dal_array,@fnc)                              #import OID on/off +        t_d=t_d.flatten +        if @opt.cmd =~/[MV]/ +          puts @conn.class if defined? @conn.class +          puts @conn.driver_name if defined? @conn.driver_name +          puts @conn.driver if defined? @conn.driver +        end +        begin +          @conn.transaction do |conn| +            t_d.each do |sql| +              conn.execute(sql) +            end +          end +        rescue +          SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error +          puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/ +        ensure +          #@conn.execute("COMMIT") +        end +        if @sql_type=~/sqlite/ \ +        and @opt.cmd =~/[MV]/            puts "\n" + @conn.inspect -          puts "\nat #{__FILE__} #{__LINE__}" +          puts "\nat #{__FILE__}:#{__LINE__}"          end        else -        file_exist=@conn.select_one(%{ SELECT metadata.tid FROM metadata WHERE metadata.filename ~ '#{@opt.fns}'; }) -        unless file_exist -          @conn.execute('BEGIN') -          db_import_metadata -          db_import_documents(@dal_array) -          db_import_urls(@dal_array,@fnm)                              #import OID on/off -          @conn.execute('COMMIT') -          if @opt.cmd =~/[MV]/ -            puts "\n" + @conn.inspect -            puts "\nat #{__FILE__} #{__LINE__}" -          end -        else +        if file_exist            @db=SiSU_Env::Info_db.new            puts "\n#{@cX.grey}file #{@cX.off} #{@cX.blue}#{@opt.fns}#{@cX.off} #{@cX.grey}already exists in database#{@cX.off} #{@cX.blue}#{@db.psql.db}#{@cX.off} #{@cX.brown}update instead?#{@cX.off}"          end        end      end -    def special_character_escape(string) -      string.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") -      string.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"<br />\n") -      string.gsub!(/#{Mx[:gr_o]}(?:code|alt|group|verse)(?:-end)?#{Mx[:gr_c]}/,'') -      string.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'') -      string.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2') -      string.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:https?|file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2') +    def special_character_escape(str) +      str.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") +      str.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"<br />\n") +      str.gsub!(/#{Mx[:gr_o]}(?:code|alt|group|verse)(?:-end)?#{Mx[:gr_c]}/,'') +      str.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'') +      str.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2') +      str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:https?|file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2')      end -    def strip_markup(string) #define rules, make same as in dal clean -      string.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]') -      string.gsub!(/#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/,'') -      string.gsub!(/(?: \\;|#{Mx[:nbsp]})+/,' ') -      string.gsub!(/#{Mx[:gr_o]}T[h]?#{Mx[:tc_p]}.+?#{Mx[:gr_c]}/u,"[TABLE]\n")             #tables #CHECK should take whole table -      string.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1')         #tables -      string.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ')                          #tables -      string.gsub!(/#{Mx[:tc_p]}/u,' ')                                                     #tables tidy later -      string.gsub!(/<.+?>/,'') -      string.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:https?|file|ftp)\\\:\S+ /,' [image] ') # else image names found in search -      string.gsub!(/\s\s+/,' ') -      string.strip! +    def strip_markup(str) #define rules, make same as in dal clean +      str.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]') +      str.gsub!(/#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/,'') +      str.gsub!(/(?: \\;|#{Mx[:nbsp]})+/,' ') +      str.gsub!(/#{Mx[:gr_o]}T[h]?#{Mx[:tc_p]}.+?#{Mx[:gr_c]}/u,"[TABLE]\n")             #tables #CHECK should take whole table +      str.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1')         #tables +      str.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ')                          #tables +      str.gsub!(/#{Mx[:tc_p]}/u,' ')                                                     #tables tidy later +      str.gsub!(/<.+?>/,'') +      str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:https?|file|ftp)\\\:\S+ /,' [image] ') # else image names found in search +      str.gsub!(/\s\s+/,' ') +      str.strip!      end                                                                                   #% import into database tables      def pf_db_import_transaction_open @@ -386,7 +393,8 @@ module SiSU_DB_import        #if @md.ruby_version;                      special_character_escape(@md.ruby_version)        SiSU_DB_DBI::Test.new(self,@opt).verify                          #% import title names, filenames (tuple)        t=SiSU_DB_tuple::Load_metadata.new(@conn,@tp,@@id_t,@opt,@file) -      t.tuple +      tuple=t.tuple +      tuple      end      def db_import_documents(dal_array)                                     #% import documents - populate main database table                                                                         #% import into substantive database tables (tuple) @@ -395,6 +403,7 @@ module SiSU_DB_import          @en,@en_ast,@en_pls=[],[],[]          @col[:en_a]=nil          @col[:en_z]=nil +        @tuple_array=[]          dal_array.each do |data|            #data.gsub!(/<[biu]>(.+?)<\/[biu]>/,'\1')                    # remove bold, italics, underscore            data.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') @@ -436,7 +445,7 @@ module SiSU_DB_import                if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last                end                t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) -              t.tuple +              @tuple_array << t.tuple                case @col[:lev]                when /1/; @col[:lv1]+=1                when /2/; @col[:lv2]+=1 @@ -478,7 +487,7 @@ module SiSU_DB_import                if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last                end                t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) -              t.tuple +              @tuple_array << t.tuple                @col[:lev]=@col[:plaintext]=@col[:body]=''              elsif data[/^#{Mx[:lv_o]}5:\S*?#{Mx[:lv_c]}\s*(.+?)#{Mx[:id_o]}~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)#{Mx[:id_c]}#{Mx[:id_o]}([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})#{Mx[:id_c]}/]                             # header lev5 seg level                txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=$1,$2,$3,$4,$5,$6 @@ -518,7 +527,7 @@ module SiSU_DB_import                if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last                end                t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) -              t.tuple +              @tuple_array << t.tuple                @col[:lev]=@col[:plaintext]=@col[:body]=''              elsif data[/^#{Mx[:lv_o]}6:\S*?#{Mx[:lv_c]}\s*(.+?)#{Mx[:id_o]}~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)#{Mx[:id_c]}#{Mx[:id_o]}([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})#{Mx[:id_c]}/] # header lev6 seg level                txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=$1,$2,$3,$4,$5,$6 @@ -558,7 +567,7 @@ module SiSU_DB_import                if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last                end                t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) -              t.tuple +              @tuple_array << t.tuple                @col[:lev]=@col[:plaintext]=@col[:body]=''              else                                                               #% regular text                @col[:lid]+=1 @@ -609,7 +618,7 @@ module SiSU_DB_import                @col[:plaintext]=@col[:body].dup                strip_markup(@col[:plaintext])                t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) -              t.tuple +              @tuple_array << t.tuple                @en,@en_ast,@en_pls=[],[],[]                @col[:en_a]=@col[:en_z]=nil                @col[:lev]=@col[:plaintext]=@col[:body]='' @@ -649,7 +658,7 @@ module SiSU_DB_import                           :hash => digest_clean                      }                      t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file) -                    t.tuple +                    @tuple_array << t.tuple                    end                  end                end @@ -691,7 +700,7 @@ module SiSU_DB_import                           :hash => digest_clean                      }                      t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file) -                    t.tuple +                    @tuple_array << t.tuple                    end                  end                end @@ -732,7 +741,7 @@ module SiSU_DB_import                           :hash => digest_clean                      }                      t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file) -                    t.tuple +                    @tuple_array << t.tuple                    end                  end                end @@ -743,6 +752,7 @@ module SiSU_DB_import        rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error        ensure        end +      @tuple_array      end      def endnotes(txt)        @txt=txt @@ -865,10 +875,11 @@ module SiSU_DB_import            f[:sisupod],u[:sisupod]='sisupod,', "'#{base}/#@fnb/#{@opt.fns}.tgz',"          end        t=SiSU_DB_tuple::Load_urls.new(@conn,f,u,@@id_t,@opt,@file) -      t.tuple +      tuple=t.tuple        rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error        ensure        end +      tuple      end    end  end diff --git a/lib/sisu/v0/db_indexes.rb b/lib/sisu/v0/db_indexes.rb index c22d5700..315ae199 100644 --- a/lib/sisu/v0/db_indexes.rb +++ b/lib/sisu/v0/db_indexes.rb @@ -22,7 +22,7 @@     This program is distributed in the hope that it will be useful, but WITHOUT     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for     more details.     You should have received a copy of the GNU General Public License along with @@ -63,36 +63,45 @@ module SiSU_DB_index        @opt,@conn,@file,@sql_type=opt,conn,file,sql_type      end      def create_indexes                                                           # check added from pg not tested +      def conn_execute_array(sql_arr) +        @conn.transaction do |conn| +          sql_arr.each do |sql| +            conn.execute(sql) +          end +        end +      end        def base          print "\n          create documents common indexes\n" unless @opt.cmd =~/q/ -        @conn.execute(' -          CREATE INDEX object_nr ON documents(ocn); -          CREATE INDEX digest_clean ON documents(digest_clean); -          CREATE INDEX digest_all ON documents(digest_all); -          CREATE INDEX lev1 ON documents(lev1); -          CREATE INDEX lev2 ON documents(lev2); -          CREATE INDEX lev3 ON documents(lev3); -          CREATE INDEX lev4 ON documents(lev4); -          CREATE INDEX lev5 ON documents(lev5); -          CREATE INDEX lev6 ON documents(lev6); -          CREATE INDEX endnote_nr ON endnotes(nr); -          CREATE INDEX digest_en ON endnotes(digest_clean); -          CREATE INDEX endnote_nr_asterisk ON endnotes_asterisk(nr); -          CREATE INDEX endnote_asterisk ON endnotes_asterisk(clean); -          CREATE INDEX digest_en_asterisk ON endnotes_asterisk(digest_clean); -          CREATE INDEX endnote_nr_plus ON endnotes_plus(nr); -          CREATE INDEX endnote_plus ON endnotes_plus(clean); -          CREATE INDEX digest_en_plus ON endnotes_plus(digest_clean); -          CREATE INDEX title ON metadata(title); -          CREATE INDEX filename ON metadata(filename) -        ') unless @opt.cmd =~/q/ +        sql_arr=[ +          %{CREATE INDEX object_nr ON documents(ocn);}, +          %{CREATE INDEX digest_clean ON documents(digest_clean);}, +          %{CREATE INDEX digest_all ON documents(digest_all);}, +          %{CREATE INDEX lev1 ON documents(lev1);}, +          %{CREATE INDEX lev2 ON documents(lev2);}, +          %{CREATE INDEX lev3 ON documents(lev3);}, +          %{CREATE INDEX lev4 ON documents(lev4);}, +          %{CREATE INDEX lev5 ON documents(lev5);}, +          %{CREATE INDEX lev6 ON documents(lev6);}, +          %{CREATE INDEX endnote_nr ON endnotes(nr);}, +          %{CREATE INDEX digest_en ON endnotes(digest_clean);}, +          %{CREATE INDEX endnote_nr_asterisk ON endnotes_asterisk(nr);}, +          %{CREATE INDEX endnote_asterisk ON endnotes_asterisk(clean);}, +          %{CREATE INDEX digest_en_asterisk ON endnotes_asterisk(digest_clean);}, +          %{CREATE INDEX endnote_nr_plus ON endnotes_plus(nr);}, +          %{CREATE INDEX endnote_plus ON endnotes_plus(clean);}, +          %{CREATE INDEX digest_en_plus ON endnotes_plus(digest_clean);}, +          %{CREATE INDEX title ON metadata(title);}, +          %{CREATE INDEX filename ON metadata(filename)}, +        ] +        conn_execute_array(sql_arr)        end        def text          print "\n          create documents text indexes\n" unless @opt.cmd =~/q/ -        @conn.execute(' -          CREATE INDEX clean ON documents(clean); -          CREATE INDEX endnote ON endnotes(clean); -        ') unless @opt.cmd =~/q/ +        sql_arr=[ +          %{CREATE INDEX clean ON documents(clean);}, +          %{CREATE INDEX endnote ON endnotes(clean);} +        ] +        conn_execute_array(sql_arr)        end        base        @opt.cmd=~/D/ || @opt.mod=~/psql/ ? '' : text diff --git a/lib/sisu/v0/db_load_tuple.rb b/lib/sisu/v0/db_load_tuple.rb index 37bfb1e6..447e9354 100644 --- a/lib/sisu/v0/db_load_tuple.rb +++ b/lib/sisu/v0/db_load_tuple.rb @@ -22,7 +22,7 @@     This program is distributed in the hope that it will be useful, but WITHOUT     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for     more details.     You should have received a copy of the GNU General Public License along with @@ -77,38 +77,35 @@ module SiSU_DB_tuple        @cX=SiSU_Screen::Ansi.new(@opt.cmd).cX      end      def tuple                                                                    #% import line -      begin -        sql_entry=if @col[:en_a] -          "INSERT INTO documents (lid, metadata_tid, lev, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, digest_clean, digest_all) VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" -        else -          "INSERT INTO documents (lid, metadata_tid, lev, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, digest_clean, digest_all) VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" -        end -        if @opt.cmd =~/M/ -          if @opt.cmd =~/V/ -            puts @file.inspect -            puts sql_entry -          end -          @file.puts sql_entry -        else @conn.execute(sql_entry) +      sql_entry=if @col[:en_a] +        "INSERT INTO documents (lid, metadata_tid, lev, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, digest_clean, digest_all) VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" +      else +        "INSERT INTO documents (lid, metadata_tid, lev, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, digest_clean, digest_all) VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" +      end +      if @opt.cmd =~/M/ +        if @opt.cmd =~/V/ +          puts @file.inspect +          puts sql_entry          end -        if @opt.cmd =~/v/ -          if @col[:lev].inspect =~/[1235678]/ -            lev=case @col[:lev].inspect -            when /1/; ':A' -            when /2/; ':B' -            when /3/; ':C' -            when /5/; ' 2' -            when /6/; ' 3' -            end -            puts %{#{lev}>\t#{@col[:lv1]}\t#{@col[:lv2]}\t#{@col[:lv3]}\t#{@col[:lv4]}\t#{@col[:lv5]}\t#{@col[:lv6]}\t#{@col[:ocn]}\t#{@col[:ocnd]}\t#{@col[:ocns]}} -          elsif @col[:lev].inspect =~/[4]/ -            puts %{ #{@cX.green}1>#{@cX.off}\t#{@col[:lv1]}\t#{@col[:lv2]}\t#{@col[:lv3]}\t#{@col[:lv4]}\t#{@col[:lv5]}\t#{@col[:lv6]}\t#{@col[:ocn]}\t#{@col[:ocnd]}\t#{@col[:ocns]}\t#{@col[:seg]}} +        @file.puts sql_entry +      else +        @file.puts sql_entry if @opt.cmd =~/V/ +      end +      if @opt.cmd =~/v/ +        if @col[:lev].inspect =~/[1235678]/ +          lev=case @col[:lev].inspect +          when /1/; ':A' +          when /2/; ':B' +          when /3/; ':C' +          when /5/; ' 2' +          when /6/; ' 3'            end +          puts %{#{lev}>\t#{@col[:lv1]}\t#{@col[:lv2]}\t#{@col[:lv3]}\t#{@col[:lv4]}\t#{@col[:lv5]}\t#{@col[:lv6]}\t#{@col[:ocn]}\t#{@col[:ocnd]}\t#{@col[:ocns]}} +        elsif @col[:lev].inspect =~/[4]/ +          puts %{ #{@cX.green}1>#{@cX.off}\t#{@col[:lv1]}\t#{@col[:lv2]}\t#{@col[:lv3]}\t#{@col[:lv4]}\t#{@col[:lv5]}\t#{@col[:lv6]}\t#{@col[:ocn]}\t#{@col[:ocnd]}\t#{@col[:ocns]}\t#{@col[:seg]}}          end -      rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error -      ensure -        #@conn.execute("COMMIT")        end +      sql_entry      end    end    class Load_metadata @@ -120,8 +117,10 @@ module SiSU_DB_tuple        if @opt.cmd =~/M/          puts "maintenance mode on: creating sql transaction file (for last transaction set (document) only):\n\t#{@file.inspect}"          @file.puts sql_entry -      else @conn.execute(sql_entry) +      else +        @file.puts sql_entry if @opt.cmd =~/V/        end +      sql_entry      end    end    class Load_urls @@ -132,8 +131,10 @@ module SiSU_DB_tuple        sql_entry="INSERT INTO urls (#{@f[:txt]} #{@f[:html_toc]} #{@f[:html_doc]} #{@f[:xhtml]} #{@f[:xml_sax]} #{@f[:xml_dom]} #{@f[:odf]} #{@f[:pdf_p]} #{@f[:pdf_l]} #{@f[:concordance]} #{@f[:latex_p]} #{@f[:latex_l]} #{@f[:manifest]} #{@f[:digest]} #{@f[:markup]} #{@f[:sisupod]} metadata_tid) VALUES (#{@u[:txt]} #{@u[:html_toc]} #{@u[:html_doc]} #{@u[:xhtml]} #{@u[:xml_sax]} #{@u[:xml_dom]} #{@u[:odf]} #{@u[:pdf_p]} #{@u[:pdf_l]} #{@u[:concordance]} #{@u[:latex_p]} #{@u[:latex_l]} #{@u[:manifest]} #{@u[:digest]} #{@u[:markup]} #{@u[:sisupod]} #{@id});"        if @opt.cmd =~/M/          @file.puts sql_entry -      else @conn.execute(sql_entry) +      else +        @file.puts sql_entry if @opt.cmd =~/V/        end +      sql_entry      end    end    class Load_endnotes @@ -144,8 +145,10 @@ module SiSU_DB_tuple        sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, ocn, ocnd, ocns, metadata_tid, digest_clean) VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');"        if @opt.cmd =~/M/          @file.puts sql_entry -      else @conn.execute(sql_entry) +      else +        @file.puts sql_entry if @opt.cmd =~/V/        end +      sql_entry      end    end  end diff --git a/lib/sisu/v0/db_remove.rb b/lib/sisu/v0/db_remove.rb index b210a3ca..bde5c891 100644 --- a/lib/sisu/v0/db_remove.rb +++ b/lib/sisu/v0/db_remove.rb @@ -22,7 +22,7 @@     This program is distributed in the hope that it will be useful, but WITHOUT     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for     more details.     You should have received a copy of the GNU General Public License along with @@ -90,8 +90,9 @@ module SiSU_DB_remove            end            @conn.commit if driver_sqlite3          else -          s=sql_entry.join(' ') -          @conn.execute(s) +          sql_entry.each do |s| +            @conn.execute(s) +          end          end          if @opt.cmd =~/M/            @file.puts sql_entry if @opt.cmd =~/M/ diff --git a/lib/sisu/v0/harvest.rb b/lib/sisu/v0/harvest.rb index 8f262ac8..1e390c50 100644 --- a/lib/sisu/v0/harvest.rb +++ b/lib/sisu/v0/harvest.rb @@ -24,7 +24,7 @@     This program is distributed in the hope that it will be useful, but WITHOUT     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for     more details.     You should have received a copy of the GNU General Public License along with @@ -87,8 +87,6 @@ def cases(opt)      help    end  end -branch='v0' -SiSU_lib="sisu/#{branch}"  require "#{SiSU_lib}/options"  require "#{SiSU_lib}/harvest_topics"  require "#{SiSU_lib}/harvest_authors" diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb index 62dd1f48..e4873a10 100644 --- a/lib/sisu/v0/odf.rb +++ b/lib/sisu/v0/odf.rb @@ -363,7 +363,6 @@ module SiSU_ODF        end        def odf_structure(md,t_o)          @md,@t_o=md,t_o -        @md,@t_o=md,t_o          if t_o.class == Hash            para =t_o[:txt]            || nil            lv =t_o[:lv]               || nil diff --git a/lib/sisu/v0/shared_html_lite.rb b/lib/sisu/v0/shared_html_lite.rb index d1e312e1..7fe77e0b 100644 --- a/lib/sisu/v0/shared_html_lite.rb +++ b/lib/sisu/v0/shared_html_lite.rb @@ -22,7 +22,7 @@     This program is distributed in the hope that it will be useful, but WITHOUT     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +   FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for     more details.     You should have received a copy of the GNU General Public License along with @@ -65,7 +65,7 @@ module SiSU_Format_Shared      @@fns=nil      def initialize(md,t_o)        @md,@t_o=md,t_o -      if t_o.class == Hash +      if t_o.class==Hash          @txt =t_o[:txt]            #|| nil #s/@content/@txt/          @col =t_o[:col]            #|| nil          @notenumber =t_o[:endnote_nr]      || nil @@ -79,9 +79,8 @@ module SiSU_Format_Shared          p t_o.class          p caller        end -        @txt.gsub!(/#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/,'') -      @id=@ocn=@col[:id] +      @id=@ocn=@col[:ocn]        @ocnd,@ocns=@col[:ocnd],@col[:ocns]        @lv=@col[:lev].to_s        @@hname=if @md.fns != @@fns diff --git a/lib/sisu/v0/xml_dom.rb b/lib/sisu/v0/xml_dom.rb index 7e1945b8..5145be98 100644 --- a/lib/sisu/v0/xml_dom.rb +++ b/lib/sisu/v0/xml_dom.rb @@ -356,7 +356,7 @@ WOK            end            if @rcdc==false \            and (para =~/~metadata/ \ -          or para =~/#{Mx[:lv_o]}1:meta#{Mx[:lv_x]}\s*Document Information/) +          or para =~/#{Mx[:lv_o]}1:meta#{Mx[:lv_c]}\s*Document Information/)              @rcdc=true            end            if para !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ | 
