aboutsummaryrefslogtreecommitdiffhomepage
path: root/org/output_sqlite_discrete.org
diff options
context:
space:
mode:
Diffstat (limited to 'org/output_sqlite_discrete.org')
-rw-r--r--org/output_sqlite_discrete.org1093
1 files changed, 522 insertions, 571 deletions
diff --git a/org/output_sqlite_discrete.org b/org/output_sqlite_discrete.org
index fced21d..c2f273c 100644
--- a/org/output_sqlite_discrete.org
+++ b/org/output_sqlite_discrete.org
@@ -20,18 +20,66 @@
#+BEGIN_SRC d :tangle ../src/sdp/output/sqlite_discrete.d
module sdp.output.sqlite_discrete;
<<output_imports>>
+template SQLiteDiscreteDbRun() {
+ void SQLiteDiscreteDbRun(Db,St,O)(
+ auto ref Db db,
+ auto ref St db_statement,
+ auto ref O opt_action,
+ string note,
+ ) {
+ debug(sql_statement) {
+ writeln(db_statement);
+ }
+ try {
+ db.run(
+ "\nBEGIN;\n" ~
+ db_statement ~
+ "\nCOMMIT;\n"
+ );
+ } catch (ErrnoException ex) {
+ writeln("ERROR SQLite : ", ex);
+ } catch (Exception ex) {
+ writeln("ERROR SQLite : ", ex);
+ }
+ if (opt_action.debug_do) {
+ writeln(note);
+ if (opt_action.verbose) {
+ writeln(db_statement);
+ }
+ }
+ }
+}
template SQLiteDiscreteBuildTablesAndPopulate() {
void SQLiteDiscreteBuildTablesAndPopulate(D,I)(
auto ref const D doc_abstraction,
- auto ref I doc_matters,
+ auto ref I doc_matters,
) {
import d2sqlite3;
import std.typecons : Nullable;
mixin SiSUoutputRgxInit;
static auto rgx = Rgx();
+ auto pth_sqlite = SiSUpathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language);
+ pth_sqlite.base.mkdirRecurse;
+ auto db = Database(pth_sqlite.sqlite_file(doc_matters.src.filename));
+ template SQLiteDiscreteDbStatementComposite() {
+ void SQLiteDiscreteDbStatementComposite(Db,D,I)(
+ auto ref Db db,
+ auto ref const D doc_abstraction,
+ auto ref I doc_matters,
+ ) {
+ <<sqlite_db_statement_composite>>
+ }
+ }
+ template SQLinsertDelimiter() {
+ auto SQLinsertDelimiter(string _txt) {
+ _txt = _txt
+ .replaceAll(rgx.quotation_mark_sql_insert_delimiter, "$0$0");
+ return _txt;
+ }
+ }
template SQLiteFormatAndLoadObject() {
auto SQLiteFormatAndLoadObject(I)(
- auto ref I doc_matters,
+ auto ref I doc_matters,
) {
mixin SiSUoutputRgxInit;
struct sqlite_format_and_load_objects {
@@ -44,45 +92,61 @@ template SQLiteDiscreteBuildTablesAndPopulate() {
return sqlite_format_and_load_objects();
}
}
- template SQLiteInstruct() {
- void SQLiteInstruct(Db,I)(
- Db db,
- auto ref I doc_matters,
+ template SQLiteDiscreteTablesReCreate() {
+ string SQLiteDiscreteTablesReCreate()() {
+ string _sql_instruct;
+ _sql_instruct = format(q"¶
+ <<sqlite_statement_drop_existing_index>>
+ <<sqlite_statement_drop_existing_tables>>
+ <<sqlite_statement_create_table_metadata_and_src_txt>>
+ <<sqlite_statement_create_table_objects>>
+ <<sqlite_statement_create_index>>
+ ¶",);
+ return _sql_instruct;
+ }
+ }
+ template SQLiteDiscreteInsertMetadata() {
+ string SQLiteDiscreteInsertMetadata(I)(
+ auto ref I doc_matters,
) {
- <<sqlite_db_initialize>>
- <<sqlite_table_initialize>>
- <<sqlite_create_table_metadata_and_src_txt>>
- /* */<<sqlite_create_table_urls>>
- <<sqlite_create_table_objects>>
- <<sqlite_close_initialize>>
- <<sqlite_insert_metadata_and_src_text>>
- //<<sqlite_insert_urls>>
+ <<sqlite_formatted_insertions_doc_matters_metadata>>
+ return _insert_metadata;
}
}
- template SQLiteObjectsLoop() {
- void SQLiteObjectsLoop(P)(
- auto ref P doc_parts,
+ template SQLiteDiscreteInsertDocObjectsLoop() {
+ string SQLiteDiscreteInsertDocObjectsLoop(P,I)(
+ auto ref P doc_parts,
+ auto ref I tid,
) {
- <<sqlite_insert_doc_objects>>
+ string insertDocObjectsRow(O)(O obj) {
+ <<sqlite_formatted_insertions_doc_objects>>
+ return _insert_doc_objects_row;
+ }
<<sqlite_objects_loop>>
}
}
- SQLiteObjectsLoop!()(doc_matters.xml.keys_seq.sql);
+ SQLiteDiscreteDbStatementComposite!()(db, doc_abstraction, doc_matters);
}
}
-template SQLiteDiscreteTablesCreate() {
- void SQLiteDiscreteTablesCreate()() {
- writeln("table create");
- }
-}
-template SQLiteDiscreteTablesDrop() {
- void SQLiteDiscreteTablesDrop()() {
- writeln("table drop");
+#+END_SRC
+
+** 1. sqlite_db_statement [#A]
+
+#+name: sqlite_db_statement_composite
+#+BEGIN_SRC d
+{
+ string _db_statement;
+ {
+ _db_statement ~= SQLiteDiscreteTablesReCreate!()();
+ _db_statement ~= SQLiteDiscreteInsertMetadata!()(doc_matters);
+ _db_statement ~= SQLiteDiscreteInsertDocObjectsLoop!()(doc_matters.xml.keys_seq.sql, 1); // FIX
+ SQLiteDiscreteDbRun!()(db, _db_statement, doc_matters.opt.action, "table CREATE Tables, INSERT DocObjects");
}
+ db.close;
}
#+END_SRC
-** 1. imports
+** 2. imports
#+name: output_imports
#+BEGIN_SRC d
@@ -92,7 +156,7 @@ import
std.conv : to;
#+END_SRC
-** 2. format and load template
+** 3. format and load template
*** 1. prepare objects (munge, sanitize, markup)
**** 1. _text_ generic munge (sanitize text for search)
@@ -469,532 +533,15 @@ auto html_table(O)(
#+END_SRC
*** 2. _sqlite_ (loop preparation & pre-loop action)
-***** initialize: prepare statements drop & create db & tables anew
-****** d sqlite statements (initialize database) {
-
-#+name: sqlite_db_initialize
-#+BEGIN_SRC d
-db.run("
-#+END_SRC
-
-******* 0. drop table if exists
-
-#+name: sqlite_table_initialize
-#+BEGIN_SRC sql
-DROP TABLE IF EXISTS metadata_and_text;
-DROP TABLE IF EXISTS doc_objects;
-DROP TABLE IF EXISTS urls;
-BEGIN;
-#+END_SRC
-
-******* 1. create tables
-******** metadata & src text
-
-#+name: sqlite_create_table_metadata_and_src_txt
-#+BEGIN_SRC sql
-CREATE TABLE metadata_and_text (
- tid BIGINT PRIMARY KEY,
- title VARCHAR(800) NOT NULL,
- title_main VARCHAR(400) NULL, /*NOT*/
- title_sub VARCHAR(400) NULL,
- title_short VARCHAR(400) NULL,
- title_edition VARCHAR(10) NULL,
- title_note VARCHAR(2500) NULL,
- title_language VARCHAR(100) NULL,
- title_language_char VARCHAR(6) NULL,
- creator_author VARCHAR(600) NULL,
- creator_author_email VARCHAR(100) NULL,
- creator_author_hon VARCHAR(100) NULL,
- creator_author_nationality VARCHAR(100) NULL,
- creator_editor VARCHAR(600) NULL,
- creator_contributor VARCHAR(600) NULL,
- creator_illustrator VARCHAR(600) NULL,
- creator_photographer VARCHAR(600) NULL,
- creator_translator VARCHAR(600) NULL,
- creator_prepared_by VARCHAR(600) NULL,
- creator_digitized_by VARCHAR(600) NULL,
- creator_audio VARCHAR(600) NULL,
- creator_video VARCHAR(600) NULL,
- language_document VARCHAR(100) NULL,
- language_document_char VARCHAR(6) NULL, /*NOT*/
- language_original VARCHAR(100) NULL,
- language_original_char VARCHAR(6) NULL,
- date_added_to_site VARCHAR(10) NULL,
- date_available VARCHAR(10) NULL,
- date_created VARCHAR(10) NULL,
- date_issued VARCHAR(10) NULL,
- date_modified VARCHAR(10) NULL,
- date_published VARCHAR(10) NULL,
- date_valid VARCHAR(10) NULL,
- date_translated VARCHAR(10) NULL,
- date_original_publication VARCHAR(10) NULL,
- date_generated VARCHAR(10) NULL,
- publisher VARCHAR(600) NULL,
- original_publisher VARCHAR(600) NULL,
- original_language VARCHAR(100) NULL,
- original_language_char VARCHAR(6) NULL,
- original_source VARCHAR(600) NULL,
- original_institution VARCHAR(600) NULL,
- original_nationality VARCHAR(100) NULL,
- original_title VARCHAR(800) NULL,
- rights_copyright VARCHAR(2500) NULL,
- rights_copyright_audio VARCHAR(2500) NULL,
- rights_copyright_cover VARCHAR(2500) NULL,
- rights_copyright_illustrations VARCHAR(2500) NULL,
- rights_copyright_photographs VARCHAR(2500) NULL,
- rights_copyright_text VARCHAR(2500) NULL,
- rights_copyright_translation VARCHAR(2500) NULL,
- rights_copyright_video VARCHAR(2500) NULL,
- rights_license VARCHAR(2500) NULL,
- identifier_oclc VARCHAR(30) NULL,
- identifier_isbn VARCHAR(16) NULL,
- classify_topic_register VARCHAR(2500) NULL,
- classify_subject VARCHAR(600) NULL,
- classify_loc VARCHAR(30) NULL,
- classify_dewey VARCHAR(30) NULL,
- classify_keywords VARCHAR(600) NULL,
- notes_abstract TEXT NULL,
- notes_description TEXT NULL,
- notes_comment TEXT NULL,
- notes_coverage VARCHAR(200) NULL,
- notes_relation VARCHAR(200) NULL,
- notes_history VARCHAR(600) NULL,
- notes_type VARCHAR(600) NULL,
- notes_format VARCHAR(600) NULL,
- notes_prefix TEXT NULL,
- notes_prefix_a TEXT NULL,
- notes_prefix_b TEXT NULL,
- notes_suffix TEXT NULL,
- src_filename VARCHAR(256) NULL,
- src_fingerprint VARCHAR(256) NULL,
- src_filesize VARCHAR(10) NULL,
- src_wordcount VARCHAR(10) NULL,
- src_text TEXT NULL,
- fulltext TEXT NULL,
- links TEXT NULL
-);
-#+END_SRC
-
-******** doc_objects
-
-#+name: sqlite_create_table_objects
-#+BEGIN_SRC sql
-CREATE TABLE doc_objects (
- lid BIGINT PRIMARY KEY,
- metadata_tid BIGINT REFERENCES metadata_and_text,
- ocn SMALLINT,
- ocnd VARCHAR(6),
- ocns VARCHAR(6),
- clean TEXT NULL,
- body TEXT NULL,
- book_idx TEXT NULL,
- seg VARCHAR(256) NULL,
- lev_an VARCHAR(1),
- lev SMALLINT NULL,
- lev0 SMALLINT,
- lev1 SMALLINT,
- lev2 SMALLINT,
- lev3 SMALLINT,
- lev4 SMALLINT,
- lev5 SMALLINT,
- lev6 SMALLINT,
- lev7 SMALLINT,
- en_a SMALLINT NULL,
- en_z SMALLINT NULL,
- en_a_asterisk SMALLINT NULL,
- en_z_asterisk SMALLINT NULL,
- en_a_plus SMALLINT NULL,
- en_z_plus SMALLINT NULL,
- t_of VARCHAR(16),
- t_is VARCHAR(16),
- node VARCHAR(16) NULL,
- parent VARCHAR(16) NULL,
- digest_clean CHAR(256),
- digest_all CHAR(256),
- types CHAR(1) NULL
-);
-#+END_SRC
-
-****** d }
-
-#+name: sqlite_close_initialize
-#+BEGIN_SRC d
- COMMIT
-");
-#+END_SRC
-
-***** prepare insert statements and do pre-loop inserts
-****** 1. _metadata & src_ text
-******* sql statement
-******** d { sql statement
-
-#+name: sqlite_insert_metadata_and_src_text
-#+BEGIN_SRC d
-Statement insert_metadata = db.prepare("
-#+END_SRC
-
-tid document number unique
-either:
-- increment by adding 1 for each document,
-- make hash of document filename or url and use?
-
-********* sql insert into
-
-#+name: sqlite_insert_metadata_and_src_text
-#+BEGIN_SRC sql
- INSERT INTO metadata_and_text (
- tid,
- title,
- title_main,
- title_sub,
- title_short,
- title_edition,
- title_note,
- title_language,
- title_language_char,
- creator_author,
- creator_author_email,
- creator_author_hon,
- creator_author_nationality,
- creator_editor,
- creator_contributor,
- creator_illustrator,
- creator_photographer,
- creator_translator,
- creator_prepared_by,
- creator_digitized_by,
- creator_audio,
- creator_video,
- language_document,
- language_document_char,
- language_original,
- language_original_char,
- date_added_to_site,
- date_available,
- date_created,
- date_issued,
- date_modified,
- date_published,
- date_valid,
- date_translated,
- date_original_publication,
- date_generated,
- publisher,
- original_publisher,
- original_language,
- original_language_char,
- original_source,
- original_institution,
- original_nationality,
- original_title,
- rights_copyright,
- rights_copyright_audio,
- rights_copyright_cover,
- rights_copyright_illustrations,
- rights_copyright_photographs,
- rights_copyright_text,
- rights_copyright_translation,
- rights_copyright_video,
- rights_license,
- identifier_oclc,
- identifier_isbn,
- classify_topic_register,
- classify_subject,
- classify_loc,
- classify_dewey,
- classify_keywords,
- notes_abstract,
- notes_description,
- notes_comment,
- notes_coverage,
- notes_relation,
- notes_history,
- notes_type,
- notes_format,
- notes_prefix,
- notes_prefix_a,
- notes_prefix_b,
- notes_suffix,
- src_filename,
- src_fingerprint,
- src_filesize,
- src_wordcount,
- src_text,
- fulltext,
- links
- )
-#+END_SRC
-
-********* sql values
-
-#+name: sqlite_insert_metadata_and_src_text
-#+BEGIN_SRC sql
- VALUES (
- :tid,
- :title,
- :title_main,
- :title_sub,
- :title_short,
- :title_edition,
- :title_note,
- :title_language,
- :title_language_char,
- :creator_author,
- :creator_author_email,
- :creator_author_hon,
- :creator_author_nationality,
- :creator_editor,
- :creator_contributor,
- :creator_illustrator,
- :creator_photographer,
- :creator_translator,
- :creator_prepared_by,
- :creator_digitized_by,
- :creator_audio,
- :creator_video,
- :language_document,
- :language_document_char,
- :language_original,
- :language_original_char,
- :date_added_to_site,
- :date_available,
- :date_created,
- :date_issued,
- :date_modified,
- :date_published,
- :date_valid,
- :date_translated,
- :date_original_publication,
- :date_generated,
- :publisher,
- :original_publisher,
- :original_language,
- :original_language_char,
- :original_source,
- :original_institution,
- :original_nationality,
- :original_title,
- :rights_copyright,
- :rights_copyright_audio,
- :rights_copyright_cover,
- :rights_copyright_illustrations,
- :rights_copyright_photographs,
- :rights_copyright_text,
- :rights_copyright_translation,
- :rights_copyright_video,
- :rights_license,
- :identifier_oclc,
- :identifier_isbn,
- :classify_topic_register,
- :classify_subject,
- :classify_loc,
- :classify_dewey,
- :classify_keywords,
- :notes_abstract,
- :notes_description,
- :notes_comment,
- :notes_coverage,
- :notes_relation,
- :notes_history,
- :notes_type,
- :notes_format,
- :notes_prefix,
- :notes_prefix_a,
- :notes_prefix_b,
- :notes_suffix,
- :src_filename,
- :src_fingerprint,
- :src_filesize,
- :src_wordcount,
- :src_text,
- :fulltext,
- :links
- )
-#+END_SRC
-
-******** d }
-
-#+name: sqlite_insert_metadata_and_src_text
-#+BEGIN_SRC d
-");
-#+END_SRC
-
-******* TODO insert document_metadata
-
-#+name: sqlite_insert_metadata_and_src_text
-#+BEGIN_SRC d
-// insert_metadata.bind(":tid", ); // unique identifier you could try use sha of title author & language? or auto increment??
-insert_metadata.bind(":title", doc_matters.conf_make_meta.meta.title_full);
-insert_metadata.bind(":title_main", doc_matters.conf_make_meta.meta.title_main);
-insert_metadata.bind(":title_sub", doc_matters.conf_make_meta.meta.title_subtitle);
-insert_metadata.bind(":title_short", doc_matters.conf_make_meta.meta.title_short); //
-insert_metadata.bind(":title_edition", doc_matters.conf_make_meta.meta.title_edition);
-insert_metadata.bind(":title_language", doc_matters.conf_make_meta.meta.title_language);
-// insert_metadata.bind(":title_language_char", doc_matters.conf_make_meta.meta.title_language_char);
-insert_metadata.bind(":classify_dewey", doc_matters.conf_make_meta.meta.classify_dewey);
-insert_metadata.bind(":classify_keywords", doc_matters.conf_make_meta.meta.classify_keywords);
-insert_metadata.bind(":classify_loc", doc_matters.conf_make_meta.meta.classify_loc);
-insert_metadata.bind(":classify_subject", doc_matters.conf_make_meta.meta.classify_subject);
-insert_metadata.bind(":classify_topic_register", doc_matters.conf_make_meta.meta.classify_topic_register);
-insert_metadata.bind(":creator_author", doc_matters.conf_make_meta.meta.creator_author);
-insert_metadata.bind(":creator_author_email", doc_matters.conf_make_meta.meta.creator_author_email);
-// insert_metadata.bind(":creator_editor", doc_matters.conf_make_meta.meta.creator_editor);
-// insert_metadata.bind(":creator_contributor", doc_matters.conf_make_meta.meta.creator_contributor);
-insert_metadata.bind(":creator_illustrator", doc_matters.conf_make_meta.meta.creator_illustrator);
-// insert_metadata.bind(":creator_photographer", doc_matters.conf_make_meta.meta.creator_photographer);
-insert_metadata.bind(":creator_translator", doc_matters.conf_make_meta.meta.creator_translator);
-// insert_metadata.bind(":creator_audio", doc_matters.conf_make_meta.meta.creator_audio);
-// insert_metadata.bind(":creator_video", doc_matters.conf_make_meta.meta.creator_video);
-insert_metadata.bind(":date_added_to_site", doc_matters.conf_make_meta.meta.date_added_to_site);
-insert_metadata.bind(":date_available", doc_matters.conf_make_meta.meta.date_available);
-insert_metadata.bind(":date_created", doc_matters.conf_make_meta.meta.date_created);
-insert_metadata.bind(":date_issued", doc_matters.conf_make_meta.meta.date_issued);
-insert_metadata.bind(":date_modified", doc_matters.conf_make_meta.meta.date_modified);
-insert_metadata.bind(":date_published", doc_matters.conf_make_meta.meta.date_published);
-insert_metadata.bind(":date_valid", doc_matters.conf_make_meta.meta.date_valid);
-// insert_metadata.bind(":date_translated", doc_matters.conf_make_meta.meta.date_translated);
-// insert_metadata.bind(":date_original_publication", doc_matters.conf_make_meta.meta.date_original_publication);
-// insert_metadata.bind(":date_generated", doc_matters.conf_make_meta.meta.date_generated);
-insert_metadata.bind(":identifier_isbn", doc_matters.conf_make_meta.meta.identifier_isbn);
-insert_metadata.bind(":identifier_oclc", doc_matters.conf_make_meta.meta.identifier_oclc);
-insert_metadata.bind(":language_document", doc_matters.conf_make_meta.meta.language_document);
-insert_metadata.bind(":language_document_char", doc_matters.conf_make_meta.meta.language_document_char);
-// insert_metadata.bind(":language_original", doc_matters.conf_make_meta.meta.language_original);
-// insert_metadata.bind(":language_original_char", doc_matters.conf_make_meta.meta.language_original_char);
-insert_metadata.bind(":notes_abstract", doc_matters.conf_make_meta.meta.notes_abstract);
-insert_metadata.bind(":notes_description", doc_matters.conf_make_meta.meta.notes_description);
-insert_metadata.bind(":original_publisher", doc_matters.conf_make_meta.meta.original_publisher);
-insert_metadata.bind(":original_language", doc_matters.conf_make_meta.meta.original_language);
-insert_metadata.bind(":original_language_char", doc_matters.conf_make_meta.meta.original_language_char);
-insert_metadata.bind(":original_source", doc_matters.conf_make_meta.meta.original_source);
-insert_metadata.bind(":original_title", doc_matters.conf_make_meta.meta.original_title);
-insert_metadata.bind(":publisher", doc_matters.conf_make_meta.meta.publisher);
-// insert_metadata.bind(":rights", doc_matters.conf_make_meta.meta.rights);
-insert_metadata.bind(":rights_copyright", doc_matters.conf_make_meta.meta.rights_copyright);
-insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_copyright_audio);
-insert_metadata.bind(":rights_copyright_cover", doc_matters.conf_make_meta.meta.rights_copyright_cover);
-insert_metadata.bind(":rights_copyright_illustrations", doc_matters.conf_make_meta.meta.rights_copyright_illustrations);
-insert_metadata.bind(":rights_copyright_photographs", doc_matters.conf_make_meta.meta.rights_copyright_photographs);
-// insert_metadata.bind(":rights_copyright_preparation", doc_matters.conf_make_meta.meta.rights_preparation);
-insert_metadata.bind(":rights_copyright_text", doc_matters.conf_make_meta.meta.rights_copyright_text);
-insert_metadata.bind(":rights_copyright_translation", doc_matters.conf_make_meta.meta.rights_copyright_translation);
-insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_copyright_video);
-// insert_metadata.bind(":rights_copyright_digitization", doc_matters.conf_make_meta.meta.rights_digitization);
-// insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_audio);
-// insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_video);
-insert_metadata.bind(":rights_license", doc_matters.conf_make_meta.meta.rights_license);
-// insert_metadata.bind(":src_filename", doc_matters.conf_make_meta.meta.src_filename);
-// insert_metadata.bind(":src_fingerprint", doc_matters.conf_make_meta.meta.src_fingerprint);
-// insert_metadata.bind(":src_filesize", doc_matters.conf_make_meta.meta.src_filesize);
-// insert_metadata.bind(":src_wordcount", doc_matters.conf_make_meta.meta.src_wordcount);
-// insert_metadata.bind(":src_text", doc_matters.conf_make_meta.meta.src_text);
-// insert_metadata.bind(":fulltext", doc_matters.conf_make_meta.meta.fulltext);
-// insert_metadata.bind(":links", doc_matters.conf_make_meta.meta.links);
-insert_metadata.execute(); insert_metadata.reset();
-/+ watch +/
-if ((doc_matters.opt.action.verbose)) {
- writeln("sql statement executed");
-}
-assert(db.totalChanges == 1);
-#+END_SRC
-
-###+name: sqlite_insert_metadata_and_src_text
-#+BEGIN_SRC d
-insert_metadata.inject(
- 1,
- doc_matters.dochead_meta["title"]["full"],
- "",
- "",
- "",
- "",
- "",
- "",
- "",
- doc_matters.dochead_meta["creator"]["author"]
-);
-#+END_SRC
-
-****** 2. _doc objects_ (used with doc_objects in document loop)
-******* prepare sql statement, sql utf8
-******** d { sql statement
-
-#+name: sqlite_insert_doc_objects
-#+BEGIN_SRC d
-string insertDocObjectsRow(O)(O obj) {
- auto sql_insert_delimiter(string _txt) {
- _txt = _txt
- .replaceAll(rgx.quotation_mark_sql_insert_delimiter, "$0$0");
- return _txt;
- }
- string _insert_doc_objects_row;
- _insert_doc_objects_row = format(q"¶
-#+END_SRC
-
-********* sql statement: formatted
-
-lid unique, increment by 1 per object, not ocn
-
-metadata tid document number unique
-either:
-- increment by adding 1 for each document,
-- make hash of document filename or url and use?
-
-********** INSERT INTO
-
-#+name: sqlite_insert_doc_objects
-#+BEGIN_SRC sql
- INSERT INTO doc_objects (
- ocn,
- clean,
- body,
- lev,
- t_of,
- t_is
- )
-#+END_SRC
-
-********** VALUES
-
-#+name: sqlite_insert_doc_objects
-#+BEGIN_SRC sql
- VALUES (
- %s,
- '%s',
- '%s',
- %s,
- '%s',
- '%s'
- );
-#+END_SRC
-
-********* sql statement: values for formatting
-
-#+name: sqlite_insert_doc_objects
-#+BEGIN_SRC sql
- ¶",
- obj.ocn,
- sql_insert_delimiter(obj_txt["text"]),
- sql_insert_delimiter(obj_txt["html"]),
- obj.heading_lev_markup,
- obj.is_of,
- obj.is_a,
- );
-#+END_SRC
-
-******** d }
-
-#+name: sqlite_insert_doc_objects
-#+BEGIN_SRC d
- return _insert_doc_objects_row;
-}
-#+END_SRC
-
-******* TODO (within loop not here - insert doc objects
+**** prepare insert statements and do pre-loop inserts
+***** 1. _metadata & src_ text
+***** 2. _doc objects_ (used with doc_objects in document loop)
+****** TODO (within loop not here - insert doc objects
work out
*** 3. hub (sqlite_format_and_load_objects)
-***** sql related
+**** sql related
#+name: sqlite_load_object
#+BEGIN_SRC d
@@ -1036,7 +583,7 @@ string sqlite_statement(O)(
}
#+END_SRC
-***** heading
+**** heading
#+name: hub_format_and_sqlite_load_objects
#+BEGIN_SRC d
@@ -1061,7 +608,7 @@ auto heading(O)(
}
#+END_SRC
-***** para
+**** para
#+name: hub_format_and_sqlite_load_objects
#+BEGIN_SRC d
@@ -1086,7 +633,7 @@ auto para(O)(
}
#+END_SRC
-***** quote
+**** quote
#+name: hub_format_and_sqlite_load_objects
#+BEGIN_SRC d
@@ -1110,7 +657,7 @@ auto quote(O)(
return obj_txt;
}
#+END_SRC
-***** group
+**** group
#+name: hub_format_and_sqlite_load_objects
#+BEGIN_SRC d
@@ -1135,7 +682,7 @@ auto group(O)(
}
#+END_SRC
-***** block
+**** block
#+name: hub_format_and_sqlite_load_objects
#+BEGIN_SRC d
@@ -1160,7 +707,7 @@ auto block(O)(
}
#+END_SRC
-***** verse
+**** verse
#+name: hub_format_and_sqlite_load_objects
#+BEGIN_SRC d
@@ -1185,7 +732,7 @@ auto verse(O)(
}
#+END_SRC
-***** code
+**** code
#+name: hub_format_and_sqlite_load_objects
#+BEGIN_SRC d
@@ -1210,7 +757,7 @@ auto code(O)(
}
#+END_SRC
-***** table
+**** table
#+name: hub_format_and_sqlite_load_objects
#+BEGIN_SRC d
@@ -1235,19 +782,14 @@ auto table(O)(
}
#+END_SRC
-** 3. loop, identify, load - loop template
+** 4. loop, identify, load - loop template
#+name: sqlite_objects_loop
#+BEGIN_SRC d
-auto pth_sqlite = SiSUpathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language);
-pth_sqlite.base.mkdirRecurse;
-auto db = Database(pth_sqlite.sqlite_file(doc_matters.src.filename));
-SQLiteInstruct!()(db, doc_matters); // consider best location, need to feed individual objects for sqlite table: doc_objects, possibly a separate template?
auto format_and_sqlite_load = SQLiteFormatAndLoadObject!()(doc_matters);
string[string] obj_txt;
string doc_text;
string[] _insert_doc_objects;
-_insert_doc_objects ~= "BEGIN;\n";
foreach (part; doc_parts) {
foreach (obj; doc_abstraction[part]) {
switch (obj.of_part) {
@@ -1397,12 +939,421 @@ foreach (part; doc_parts) {
}
} // loop closes
}
-_insert_doc_objects ~= "COMMIT";
-debug(sql_statement) {
- writeln("#+BEGIN_SRC sql\n", _insert_doc_objects.join, "\n#+END_SRC");
-}
-std.utf.validate(_insert_doc_objects.join); // TODO
-db.run(_insert_doc_objects.join.to!(char[]).toUTF8);
+return _insert_doc_objects.join.to!(char[]).toUTF8;
+#+END_SRC
+
+** 5. SQL statements
+*** create tables
+
+**** DROP INDEX IF EXISTS
+
+#+name: sqlite_statement_drop_existing_index
+#+BEGIN_SRC sql
+DROP INDEX IF EXISTS ocn;
+DROP INDEX IF EXISTS digest_clean;
+DROP INDEX IF EXISTS digest_all;
+DROP INDEX IF EXISTS clean;
+DROP INDEX IF EXISTS lev0;
+DROP INDEX IF EXISTS lev1;
+DROP INDEX IF EXISTS lev2;
+DROP INDEX IF EXISTS lev3;
+DROP INDEX IF EXISTS lev4;
+DROP INDEX IF EXISTS lev5;
+DROP INDEX IF EXISTS lev6;
+DROP INDEX IF EXISTS title;
+DROP INDEX IF EXISTS creator_author;
+DROP INDEX IF EXISTS src_filename;
+DROP INDEX IF EXISTS language_document_char;
+DROP INDEX IF EXISTS classify_topic_register;
+#+END_SRC
+
+**** DROP TABLE IF EXISTS
+
+#+name: sqlite_statement_drop_existing_tables
+#+BEGIN_SRC sql
+DROP TABLE IF EXISTS metadata_and_text;
+DROP TABLE IF EXISTS doc_objects;
+DROP TABLE IF EXISTS urls;
+#+END_SRC
+
+**** CREATE TABLE metadata_and_text
+
+#+name: sqlite_statement_create_table_metadata_and_src_txt
+#+BEGIN_SRC sql
+CREATE TABLE metadata_and_text (
+ tid INTEGER PRIMARY KEY,
+/* src_filename_composite VARCHAR(256) NOT NULL UNIQUE, /* z pod name if any + src filename + language code */
+ title VARCHAR(800) NOT NULL,
+ title_main VARCHAR(400) NOT NULL,
+ title_sub VARCHAR(400) NULL,
+ title_short VARCHAR(400) NULL,
+ title_edition VARCHAR(10) NULL,
+ title_note VARCHAR(2500) NULL,
+ title_language VARCHAR(100) NULL,
+ title_language_char VARCHAR(6) NULL,
+ creator_author VARCHAR(600) NOT NULL,
+ creator_author_email VARCHAR(100) NULL,
+ creator_author_hon VARCHAR(100) NULL,
+ creator_author_nationality VARCHAR(100) NULL,
+ creator_editor VARCHAR(600) NULL,
+ creator_contributor VARCHAR(600) NULL,
+ creator_illustrator VARCHAR(600) NULL,
+ creator_photographer VARCHAR(600) NULL,
+ creator_translator VARCHAR(600) NULL,
+ creator_prepared_by VARCHAR(600) NULL,
+ creator_digitized_by VARCHAR(600) NULL,
+ creator_audio VARCHAR(600) NULL,
+ creator_video VARCHAR(600) NULL,
+ language_document VARCHAR(100) NULL,
+ language_document_char VARCHAR(6) NOT NULL,
+ language_original VARCHAR(100) NULL,
+ language_original_char VARCHAR(6) NULL,
+ date_added_to_site VARCHAR(10) NULL,
+ date_available VARCHAR(10) NULL,
+ date_created VARCHAR(10) NULL,
+ date_issued VARCHAR(10) NULL,
+ date_modified VARCHAR(10) NULL,
+ date_published VARCHAR(10) NULL,
+ date_valid VARCHAR(10) NULL,
+ date_translated VARCHAR(10) NULL,
+ date_original_publication VARCHAR(10) NULL,
+ date_generated VARCHAR(10) NULL,
+ publisher VARCHAR(600) NULL,
+ original_publisher VARCHAR(600) NULL,
+ original_language VARCHAR(100) NULL,
+ original_language_char VARCHAR(6) NULL,
+ original_source VARCHAR(600) NULL,
+ original_institution VARCHAR(600) NULL,
+ original_nationality VARCHAR(100) NULL,
+ original_title VARCHAR(800) NULL,
+ rights_copyright VARCHAR(2500) NULL,
+ rights_copyright_audio VARCHAR(2500) NULL,
+ rights_copyright_cover VARCHAR(2500) NULL,
+ rights_copyright_illustrations VARCHAR(2500) NULL,
+ rights_copyright_photographs VARCHAR(2500) NULL,
+ rights_copyright_text VARCHAR(2500) NULL,
+ rights_copyright_translation VARCHAR(2500) NULL,
+ rights_copyright_video VARCHAR(2500) NULL,
+ rights_license VARCHAR(2500) NULL,
+ identifier_oclc VARCHAR(30) NULL,
+ identifier_isbn VARCHAR(16) NULL,
+ classify_topic_register VARCHAR(2500) NULL,
+ classify_subject VARCHAR(600) NULL,
+ classify_loc VARCHAR(30) NULL,
+ classify_dewey VARCHAR(30) NULL,
+ classify_keywords VARCHAR(600) NULL,
+ notes_abstract TEXT NULL,
+ notes_description TEXT NULL,
+ notes_comment TEXT NULL,
+ notes_coverage VARCHAR(200) NULL,
+ notes_relation VARCHAR(200) NULL,
+ notes_history VARCHAR(600) NULL,
+ notes_type VARCHAR(600) NULL,
+ notes_format VARCHAR(600) NULL,
+ notes_prefix TEXT NULL,
+ notes_prefix_a TEXT NULL,
+ notes_prefix_b TEXT NULL,
+ notes_suffix TEXT NULL,
+ src_filename VARCHAR(256) NOT NULL,
+ src_fingerprint VARCHAR(256) NULL,
+ src_filesize VARCHAR(10) NULL,
+ src_wordcount VARCHAR(10) NULL,
+ pod_name VARCHAR(256) NULL, /* zipped pod, work to be done here */
+ pod_fingerprint VARCHAR(256) NULL, /* zipped pod, work to be done here */
+ pod_size VARCHAR(10) NULL, /* zipped pod, work to be done here */
+ src_text TEXT NULL,
+ fulltext TEXT NULL,
+ links TEXT NULL
+);
+#+END_SRC
+
+**** CREATE TABLE doc_objects
+
+#+name: sqlite_statement_create_table_objects
+#+BEGIN_SRC sql
+CREATE TABLE doc_objects (
+ lid INTEGER PRIMARY KEY,
+ metadata_tid INTEGER REFERENCES metadata_and_text,
+ ocn SMALLINT,
+ ocnd VARCHAR(6),
+ ocns VARCHAR(6),
+ clean TEXT NULL,
+ body TEXT NULL,
+ book_idx TEXT NULL,
+ seg VARCHAR(256) NULL,
+ lev_an VARCHAR(1),
+ lev SMALLINT NULL,
+ lev0 SMALLINT,
+ lev1 SMALLINT,
+ lev2 SMALLINT,
+ lev3 SMALLINT,
+ lev4 SMALLINT,
+ lev5 SMALLINT,
+ lev6 SMALLINT,
+ lev7 SMALLINT,
+ en_a SMALLINT NULL,
+ en_z SMALLINT NULL,
+ en_a_asterisk SMALLINT NULL,
+ en_z_asterisk SMALLINT NULL,
+ en_a_plus SMALLINT NULL,
+ en_z_plus SMALLINT NULL,
+ t_of VARCHAR(16),
+ t_is VARCHAR(16),
+ node VARCHAR(16) NULL,
+ parent VARCHAR(16) NULL,
+ digest_clean CHAR(256),
+ digest_all CHAR(256),
+ types CHAR(1) NULL
+);
+#+END_SRC
+
+**** CREATE INDEX
+
+#+name: sqlite_statement_create_index
+#+BEGIN_SRC sql
+CREATE INDEX idx_ocn ON doc_objects(ocn);
+CREATE INDEX idx_digest_clean ON doc_objects(digest_clean);
+CREATE INDEX idx_digest_all ON doc_objects(digest_all);
+CREATE INDEX idx_clean ON doc_objects(clean);
+CREATE INDEX idx_lev0 ON doc_objects(lev0);
+CREATE INDEX idx_lev1 ON doc_objects(lev1);
+CREATE INDEX idx_lev2 ON doc_objects(lev2);
+CREATE INDEX idx_lev3 ON doc_objects(lev3);
+CREATE INDEX idx_lev4 ON doc_objects(lev4);
+CREATE INDEX idx_lev5 ON doc_objects(lev5);
+CREATE INDEX idx_lev6 ON doc_objects(lev6);
+CREATE INDEX idx_title ON metadata_and_text(title);
+CREATE INDEX idx_author ON metadata_and_text(creator_author);
+CREATE INDEX idx_filename ON metadata_and_text(src_filename);
+CREATE INDEX idx_language ON metadata_and_text(language_document_char);
+CREATE INDEX idx_topics ON metadata_and_text(classify_topic_register);
+#+END_SRC
+
+**** TODO local site link & info
+
+*** sql insert statement formatted doc objects
+
+lid unique, increment by 1 per object, not ocn
+
+metadata tid document number unique
+either:
+- increment by adding 1 for each document,
+- make hash of document filename or url and use?
+
+**** sql statement: dlang format
+#+name: sqlite_formatted_insertions_doc_objects
+#+BEGIN_SRC d
+string _insert_doc_objects_row;
+_insert_doc_objects_row = format(q"¶
+#+END_SRC
+
+**** INSERT INTO
+
+#+name: sqlite_formatted_insertions_doc_objects
+#+BEGIN_SRC sql
+ INSERT INTO doc_objects (
+ metadata_tid,
+ ocn,
+ clean,
+ body,
+ lev,
+ t_of,
+ t_is
+ )
+#+END_SRC
+
+**** VALUES
+
+#+name: sqlite_formatted_insertions_doc_objects
+#+BEGIN_SRC sql
+ VALUES (
+ %d,
+ %s,
+ '%s',
+ '%s',
+ %s,
+ '%s',
+ '%s'
+ );
+#+END_SRC
+
+**** sql statement: dlang values for formatting
+
+#+name: sqlite_formatted_insertions_doc_objects
+#+BEGIN_SRC d
+¶",
+ 1,
+ obj.ocn,
+ SQLinsertDelimiter!()(obj_txt["text"]),
+ SQLinsertDelimiter!()(obj_txt["html"]),
+ obj.heading_lev_markup,
+ obj.is_of,
+ obj.is_a,
+);
+#+END_SRC
+
+*** sqlite insert statement formatted doc matters metadata
+**** sql statement: dlang format
+#+name: sqlite_formatted_insertions_doc_matters_metadata
+#+BEGIN_SRC d
+string _insert_metadata;
+_insert_metadata = format(q"¶
+#+END_SRC
+
+**** INSERT INTO
+
+#+name: sqlite_formatted_insertions_doc_matters_metadata
+#+BEGIN_SRC sql
+ INSERT INTO metadata_and_text (
+ src_filename,
+ title,
+ title_main,
+ title_sub,
+ title_short,
+ title_edition,
+ title_language,
+ classify_dewey,
+ classify_keywords,
+ classify_loc,
+ classify_subject,
+ classify_topic_register,
+ creator_author,
+ creator_author_email,
+ creator_illustrator,
+ creator_translator,
+ date_added_to_site,
+ date_available,
+ date_created,
+ date_issued,
+ date_modified,
+ date_published,
+ date_valid,
+ identifier_isbn,
+ identifier_oclc,
+ language_document,
+ language_document_char,
+ notes_abstract,
+ notes_description,
+ original_publisher,
+ original_language,
+ original_language_char,
+ original_source,
+ original_title,
+ publisher,
+ rights_copyright,
+ rights_copyright_audio,
+ rights_copyright_cover,
+ rights_copyright_illustrations,
+ rights_copyright_photographs,
+ rights_copyright_text,
+ rights_copyright_translation,
+ rights_copyright_video,
+ rights_license
+ )
+#+END_SRC
+
+**** VALUES
+
+#+name: sqlite_formatted_insertions_doc_matters_metadata
+#+BEGIN_SRC sql
+ VALUES (
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s',
+ '%s'
+ );
+#+END_SRC
+
+**** sql statement: values for formatting
+
+#+name: sqlite_formatted_insertions_doc_matters_metadata
+#+BEGIN_SRC d
+¶",
+ SQLinsertDelimiter!()(doc_matters.src.filename),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_full),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_main),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_subtitle),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_short),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_edition),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_language),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_dewey),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_keywords),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_loc),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_subject),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_topic_register),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author_email),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_illustrator),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_translator),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_added_to_site),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_available),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_created),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_issued),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_modified),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_published),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_valid),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_isbn),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_oclc),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document_char),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_abstract),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_description),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_publisher),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language_char),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_source),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_title),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.publisher),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_audio),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_cover),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_illustrations),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_photographs),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_text),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_translation),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_video),
+ SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_license)
+);
#+END_SRC
* __END__