diff options
Diffstat (limited to 'org/output_sqlite_discrete.org')
-rw-r--r-- | org/output_sqlite_discrete.org | 1093 |
1 files changed, 522 insertions, 571 deletions
diff --git a/org/output_sqlite_discrete.org b/org/output_sqlite_discrete.org index fced21d..c2f273c 100644 --- a/org/output_sqlite_discrete.org +++ b/org/output_sqlite_discrete.org @@ -20,18 +20,66 @@ #+BEGIN_SRC d :tangle ../src/sdp/output/sqlite_discrete.d module sdp.output.sqlite_discrete; <<output_imports>> +template SQLiteDiscreteDbRun() { + void SQLiteDiscreteDbRun(Db,St,O)( + auto ref Db db, + auto ref St db_statement, + auto ref O opt_action, + string note, + ) { + debug(sql_statement) { + writeln(db_statement); + } + try { + db.run( + "\nBEGIN;\n" ~ + db_statement ~ + "\nCOMMIT;\n" + ); + } catch (ErrnoException ex) { + writeln("ERROR SQLite : ", ex); + } catch (Exception ex) { + writeln("ERROR SQLite : ", ex); + } + if (opt_action.debug_do) { + writeln(note); + if (opt_action.verbose) { + writeln(db_statement); + } + } + } +} template SQLiteDiscreteBuildTablesAndPopulate() { void SQLiteDiscreteBuildTablesAndPopulate(D,I)( auto ref const D doc_abstraction, - auto ref I doc_matters, + auto ref I doc_matters, ) { import d2sqlite3; import std.typecons : Nullable; mixin SiSUoutputRgxInit; static auto rgx = Rgx(); + auto pth_sqlite = SiSUpathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language); + pth_sqlite.base.mkdirRecurse; + auto db = Database(pth_sqlite.sqlite_file(doc_matters.src.filename)); + template SQLiteDiscreteDbStatementComposite() { + void SQLiteDiscreteDbStatementComposite(Db,D,I)( + auto ref Db db, + auto ref const D doc_abstraction, + auto ref I doc_matters, + ) { + <<sqlite_db_statement_composite>> + } + } + template SQLinsertDelimiter() { + auto SQLinsertDelimiter(string _txt) { + _txt = _txt + .replaceAll(rgx.quotation_mark_sql_insert_delimiter, "$0$0"); + return _txt; + } + } template SQLiteFormatAndLoadObject() { auto SQLiteFormatAndLoadObject(I)( - auto ref I doc_matters, + auto ref I doc_matters, ) { mixin SiSUoutputRgxInit; struct sqlite_format_and_load_objects { @@ -44,45 +92,61 @@ template SQLiteDiscreteBuildTablesAndPopulate() { return sqlite_format_and_load_objects(); } } - template SQLiteInstruct() { - void SQLiteInstruct(Db,I)( - Db db, - auto ref I doc_matters, + template SQLiteDiscreteTablesReCreate() { + string SQLiteDiscreteTablesReCreate()() { + string _sql_instruct; + _sql_instruct = format(q"¶ + <<sqlite_statement_drop_existing_index>> + <<sqlite_statement_drop_existing_tables>> + <<sqlite_statement_create_table_metadata_and_src_txt>> + <<sqlite_statement_create_table_objects>> + <<sqlite_statement_create_index>> + ¶",); + return _sql_instruct; + } + } + template SQLiteDiscreteInsertMetadata() { + string SQLiteDiscreteInsertMetadata(I)( + auto ref I doc_matters, ) { - <<sqlite_db_initialize>> - <<sqlite_table_initialize>> - <<sqlite_create_table_metadata_and_src_txt>> - /* */<<sqlite_create_table_urls>> - <<sqlite_create_table_objects>> - <<sqlite_close_initialize>> - <<sqlite_insert_metadata_and_src_text>> - //<<sqlite_insert_urls>> + <<sqlite_formatted_insertions_doc_matters_metadata>> + return _insert_metadata; } } - template SQLiteObjectsLoop() { - void SQLiteObjectsLoop(P)( - auto ref P doc_parts, + template SQLiteDiscreteInsertDocObjectsLoop() { + string SQLiteDiscreteInsertDocObjectsLoop(P,I)( + auto ref P doc_parts, + auto ref I tid, ) { - <<sqlite_insert_doc_objects>> + string insertDocObjectsRow(O)(O obj) { + <<sqlite_formatted_insertions_doc_objects>> + return _insert_doc_objects_row; + } <<sqlite_objects_loop>> } } - SQLiteObjectsLoop!()(doc_matters.xml.keys_seq.sql); + SQLiteDiscreteDbStatementComposite!()(db, doc_abstraction, doc_matters); } } -template SQLiteDiscreteTablesCreate() { - void SQLiteDiscreteTablesCreate()() { - writeln("table create"); - } -} -template SQLiteDiscreteTablesDrop() { - void SQLiteDiscreteTablesDrop()() { - writeln("table drop"); +#+END_SRC + +** 1. sqlite_db_statement [#A] + +#+name: sqlite_db_statement_composite +#+BEGIN_SRC d +{ + string _db_statement; + { + _db_statement ~= SQLiteDiscreteTablesReCreate!()(); + _db_statement ~= SQLiteDiscreteInsertMetadata!()(doc_matters); + _db_statement ~= SQLiteDiscreteInsertDocObjectsLoop!()(doc_matters.xml.keys_seq.sql, 1); // FIX + SQLiteDiscreteDbRun!()(db, _db_statement, doc_matters.opt.action, "table CREATE Tables, INSERT DocObjects"); } + db.close; } #+END_SRC -** 1. imports +** 2. imports #+name: output_imports #+BEGIN_SRC d @@ -92,7 +156,7 @@ import std.conv : to; #+END_SRC -** 2. format and load template +** 3. format and load template *** 1. prepare objects (munge, sanitize, markup) **** 1. _text_ generic munge (sanitize text for search) @@ -469,532 +533,15 @@ auto html_table(O)( #+END_SRC *** 2. _sqlite_ (loop preparation & pre-loop action) -***** initialize: prepare statements drop & create db & tables anew -****** d sqlite statements (initialize database) { - -#+name: sqlite_db_initialize -#+BEGIN_SRC d -db.run(" -#+END_SRC - -******* 0. drop table if exists - -#+name: sqlite_table_initialize -#+BEGIN_SRC sql -DROP TABLE IF EXISTS metadata_and_text; -DROP TABLE IF EXISTS doc_objects; -DROP TABLE IF EXISTS urls; -BEGIN; -#+END_SRC - -******* 1. create tables -******** metadata & src text - -#+name: sqlite_create_table_metadata_and_src_txt -#+BEGIN_SRC sql -CREATE TABLE metadata_and_text ( - tid BIGINT PRIMARY KEY, - title VARCHAR(800) NOT NULL, - title_main VARCHAR(400) NULL, /*NOT*/ - title_sub VARCHAR(400) NULL, - title_short VARCHAR(400) NULL, - title_edition VARCHAR(10) NULL, - title_note VARCHAR(2500) NULL, - title_language VARCHAR(100) NULL, - title_language_char VARCHAR(6) NULL, - creator_author VARCHAR(600) NULL, - creator_author_email VARCHAR(100) NULL, - creator_author_hon VARCHAR(100) NULL, - creator_author_nationality VARCHAR(100) NULL, - creator_editor VARCHAR(600) NULL, - creator_contributor VARCHAR(600) NULL, - creator_illustrator VARCHAR(600) NULL, - creator_photographer VARCHAR(600) NULL, - creator_translator VARCHAR(600) NULL, - creator_prepared_by VARCHAR(600) NULL, - creator_digitized_by VARCHAR(600) NULL, - creator_audio VARCHAR(600) NULL, - creator_video VARCHAR(600) NULL, - language_document VARCHAR(100) NULL, - language_document_char VARCHAR(6) NULL, /*NOT*/ - language_original VARCHAR(100) NULL, - language_original_char VARCHAR(6) NULL, - date_added_to_site VARCHAR(10) NULL, - date_available VARCHAR(10) NULL, - date_created VARCHAR(10) NULL, - date_issued VARCHAR(10) NULL, - date_modified VARCHAR(10) NULL, - date_published VARCHAR(10) NULL, - date_valid VARCHAR(10) NULL, - date_translated VARCHAR(10) NULL, - date_original_publication VARCHAR(10) NULL, - date_generated VARCHAR(10) NULL, - publisher VARCHAR(600) NULL, - original_publisher VARCHAR(600) NULL, - original_language VARCHAR(100) NULL, - original_language_char VARCHAR(6) NULL, - original_source VARCHAR(600) NULL, - original_institution VARCHAR(600) NULL, - original_nationality VARCHAR(100) NULL, - original_title VARCHAR(800) NULL, - rights_copyright VARCHAR(2500) NULL, - rights_copyright_audio VARCHAR(2500) NULL, - rights_copyright_cover VARCHAR(2500) NULL, - rights_copyright_illustrations VARCHAR(2500) NULL, - rights_copyright_photographs VARCHAR(2500) NULL, - rights_copyright_text VARCHAR(2500) NULL, - rights_copyright_translation VARCHAR(2500) NULL, - rights_copyright_video VARCHAR(2500) NULL, - rights_license VARCHAR(2500) NULL, - identifier_oclc VARCHAR(30) NULL, - identifier_isbn VARCHAR(16) NULL, - classify_topic_register VARCHAR(2500) NULL, - classify_subject VARCHAR(600) NULL, - classify_loc VARCHAR(30) NULL, - classify_dewey VARCHAR(30) NULL, - classify_keywords VARCHAR(600) NULL, - notes_abstract TEXT NULL, - notes_description TEXT NULL, - notes_comment TEXT NULL, - notes_coverage VARCHAR(200) NULL, - notes_relation VARCHAR(200) NULL, - notes_history VARCHAR(600) NULL, - notes_type VARCHAR(600) NULL, - notes_format VARCHAR(600) NULL, - notes_prefix TEXT NULL, - notes_prefix_a TEXT NULL, - notes_prefix_b TEXT NULL, - notes_suffix TEXT NULL, - src_filename VARCHAR(256) NULL, - src_fingerprint VARCHAR(256) NULL, - src_filesize VARCHAR(10) NULL, - src_wordcount VARCHAR(10) NULL, - src_text TEXT NULL, - fulltext TEXT NULL, - links TEXT NULL -); -#+END_SRC - -******** doc_objects - -#+name: sqlite_create_table_objects -#+BEGIN_SRC sql -CREATE TABLE doc_objects ( - lid BIGINT PRIMARY KEY, - metadata_tid BIGINT REFERENCES metadata_and_text, - ocn SMALLINT, - ocnd VARCHAR(6), - ocns VARCHAR(6), - clean TEXT NULL, - body TEXT NULL, - book_idx TEXT NULL, - seg VARCHAR(256) NULL, - lev_an VARCHAR(1), - lev SMALLINT NULL, - lev0 SMALLINT, - lev1 SMALLINT, - lev2 SMALLINT, - lev3 SMALLINT, - lev4 SMALLINT, - lev5 SMALLINT, - lev6 SMALLINT, - lev7 SMALLINT, - en_a SMALLINT NULL, - en_z SMALLINT NULL, - en_a_asterisk SMALLINT NULL, - en_z_asterisk SMALLINT NULL, - en_a_plus SMALLINT NULL, - en_z_plus SMALLINT NULL, - t_of VARCHAR(16), - t_is VARCHAR(16), - node VARCHAR(16) NULL, - parent VARCHAR(16) NULL, - digest_clean CHAR(256), - digest_all CHAR(256), - types CHAR(1) NULL -); -#+END_SRC - -****** d } - -#+name: sqlite_close_initialize -#+BEGIN_SRC d - COMMIT -"); -#+END_SRC - -***** prepare insert statements and do pre-loop inserts -****** 1. _metadata & src_ text -******* sql statement -******** d { sql statement - -#+name: sqlite_insert_metadata_and_src_text -#+BEGIN_SRC d -Statement insert_metadata = db.prepare(" -#+END_SRC - -tid document number unique -either: -- increment by adding 1 for each document, -- make hash of document filename or url and use? - -********* sql insert into - -#+name: sqlite_insert_metadata_and_src_text -#+BEGIN_SRC sql - INSERT INTO metadata_and_text ( - tid, - title, - title_main, - title_sub, - title_short, - title_edition, - title_note, - title_language, - title_language_char, - creator_author, - creator_author_email, - creator_author_hon, - creator_author_nationality, - creator_editor, - creator_contributor, - creator_illustrator, - creator_photographer, - creator_translator, - creator_prepared_by, - creator_digitized_by, - creator_audio, - creator_video, - language_document, - language_document_char, - language_original, - language_original_char, - date_added_to_site, - date_available, - date_created, - date_issued, - date_modified, - date_published, - date_valid, - date_translated, - date_original_publication, - date_generated, - publisher, - original_publisher, - original_language, - original_language_char, - original_source, - original_institution, - original_nationality, - original_title, - rights_copyright, - rights_copyright_audio, - rights_copyright_cover, - rights_copyright_illustrations, - rights_copyright_photographs, - rights_copyright_text, - rights_copyright_translation, - rights_copyright_video, - rights_license, - identifier_oclc, - identifier_isbn, - classify_topic_register, - classify_subject, - classify_loc, - classify_dewey, - classify_keywords, - notes_abstract, - notes_description, - notes_comment, - notes_coverage, - notes_relation, - notes_history, - notes_type, - notes_format, - notes_prefix, - notes_prefix_a, - notes_prefix_b, - notes_suffix, - src_filename, - src_fingerprint, - src_filesize, - src_wordcount, - src_text, - fulltext, - links - ) -#+END_SRC - -********* sql values - -#+name: sqlite_insert_metadata_and_src_text -#+BEGIN_SRC sql - VALUES ( - :tid, - :title, - :title_main, - :title_sub, - :title_short, - :title_edition, - :title_note, - :title_language, - :title_language_char, - :creator_author, - :creator_author_email, - :creator_author_hon, - :creator_author_nationality, - :creator_editor, - :creator_contributor, - :creator_illustrator, - :creator_photographer, - :creator_translator, - :creator_prepared_by, - :creator_digitized_by, - :creator_audio, - :creator_video, - :language_document, - :language_document_char, - :language_original, - :language_original_char, - :date_added_to_site, - :date_available, - :date_created, - :date_issued, - :date_modified, - :date_published, - :date_valid, - :date_translated, - :date_original_publication, - :date_generated, - :publisher, - :original_publisher, - :original_language, - :original_language_char, - :original_source, - :original_institution, - :original_nationality, - :original_title, - :rights_copyright, - :rights_copyright_audio, - :rights_copyright_cover, - :rights_copyright_illustrations, - :rights_copyright_photographs, - :rights_copyright_text, - :rights_copyright_translation, - :rights_copyright_video, - :rights_license, - :identifier_oclc, - :identifier_isbn, - :classify_topic_register, - :classify_subject, - :classify_loc, - :classify_dewey, - :classify_keywords, - :notes_abstract, - :notes_description, - :notes_comment, - :notes_coverage, - :notes_relation, - :notes_history, - :notes_type, - :notes_format, - :notes_prefix, - :notes_prefix_a, - :notes_prefix_b, - :notes_suffix, - :src_filename, - :src_fingerprint, - :src_filesize, - :src_wordcount, - :src_text, - :fulltext, - :links - ) -#+END_SRC - -******** d } - -#+name: sqlite_insert_metadata_and_src_text -#+BEGIN_SRC d -"); -#+END_SRC - -******* TODO insert document_metadata - -#+name: sqlite_insert_metadata_and_src_text -#+BEGIN_SRC d -// insert_metadata.bind(":tid", ); // unique identifier you could try use sha of title author & language? or auto increment?? -insert_metadata.bind(":title", doc_matters.conf_make_meta.meta.title_full); -insert_metadata.bind(":title_main", doc_matters.conf_make_meta.meta.title_main); -insert_metadata.bind(":title_sub", doc_matters.conf_make_meta.meta.title_subtitle); -insert_metadata.bind(":title_short", doc_matters.conf_make_meta.meta.title_short); // -insert_metadata.bind(":title_edition", doc_matters.conf_make_meta.meta.title_edition); -insert_metadata.bind(":title_language", doc_matters.conf_make_meta.meta.title_language); -// insert_metadata.bind(":title_language_char", doc_matters.conf_make_meta.meta.title_language_char); -insert_metadata.bind(":classify_dewey", doc_matters.conf_make_meta.meta.classify_dewey); -insert_metadata.bind(":classify_keywords", doc_matters.conf_make_meta.meta.classify_keywords); -insert_metadata.bind(":classify_loc", doc_matters.conf_make_meta.meta.classify_loc); -insert_metadata.bind(":classify_subject", doc_matters.conf_make_meta.meta.classify_subject); -insert_metadata.bind(":classify_topic_register", doc_matters.conf_make_meta.meta.classify_topic_register); -insert_metadata.bind(":creator_author", doc_matters.conf_make_meta.meta.creator_author); -insert_metadata.bind(":creator_author_email", doc_matters.conf_make_meta.meta.creator_author_email); -// insert_metadata.bind(":creator_editor", doc_matters.conf_make_meta.meta.creator_editor); -// insert_metadata.bind(":creator_contributor", doc_matters.conf_make_meta.meta.creator_contributor); -insert_metadata.bind(":creator_illustrator", doc_matters.conf_make_meta.meta.creator_illustrator); -// insert_metadata.bind(":creator_photographer", doc_matters.conf_make_meta.meta.creator_photographer); -insert_metadata.bind(":creator_translator", doc_matters.conf_make_meta.meta.creator_translator); -// insert_metadata.bind(":creator_audio", doc_matters.conf_make_meta.meta.creator_audio); -// insert_metadata.bind(":creator_video", doc_matters.conf_make_meta.meta.creator_video); -insert_metadata.bind(":date_added_to_site", doc_matters.conf_make_meta.meta.date_added_to_site); -insert_metadata.bind(":date_available", doc_matters.conf_make_meta.meta.date_available); -insert_metadata.bind(":date_created", doc_matters.conf_make_meta.meta.date_created); -insert_metadata.bind(":date_issued", doc_matters.conf_make_meta.meta.date_issued); -insert_metadata.bind(":date_modified", doc_matters.conf_make_meta.meta.date_modified); -insert_metadata.bind(":date_published", doc_matters.conf_make_meta.meta.date_published); -insert_metadata.bind(":date_valid", doc_matters.conf_make_meta.meta.date_valid); -// insert_metadata.bind(":date_translated", doc_matters.conf_make_meta.meta.date_translated); -// insert_metadata.bind(":date_original_publication", doc_matters.conf_make_meta.meta.date_original_publication); -// insert_metadata.bind(":date_generated", doc_matters.conf_make_meta.meta.date_generated); -insert_metadata.bind(":identifier_isbn", doc_matters.conf_make_meta.meta.identifier_isbn); -insert_metadata.bind(":identifier_oclc", doc_matters.conf_make_meta.meta.identifier_oclc); -insert_metadata.bind(":language_document", doc_matters.conf_make_meta.meta.language_document); -insert_metadata.bind(":language_document_char", doc_matters.conf_make_meta.meta.language_document_char); -// insert_metadata.bind(":language_original", doc_matters.conf_make_meta.meta.language_original); -// insert_metadata.bind(":language_original_char", doc_matters.conf_make_meta.meta.language_original_char); -insert_metadata.bind(":notes_abstract", doc_matters.conf_make_meta.meta.notes_abstract); -insert_metadata.bind(":notes_description", doc_matters.conf_make_meta.meta.notes_description); -insert_metadata.bind(":original_publisher", doc_matters.conf_make_meta.meta.original_publisher); -insert_metadata.bind(":original_language", doc_matters.conf_make_meta.meta.original_language); -insert_metadata.bind(":original_language_char", doc_matters.conf_make_meta.meta.original_language_char); -insert_metadata.bind(":original_source", doc_matters.conf_make_meta.meta.original_source); -insert_metadata.bind(":original_title", doc_matters.conf_make_meta.meta.original_title); -insert_metadata.bind(":publisher", doc_matters.conf_make_meta.meta.publisher); -// insert_metadata.bind(":rights", doc_matters.conf_make_meta.meta.rights); -insert_metadata.bind(":rights_copyright", doc_matters.conf_make_meta.meta.rights_copyright); -insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_copyright_audio); -insert_metadata.bind(":rights_copyright_cover", doc_matters.conf_make_meta.meta.rights_copyright_cover); -insert_metadata.bind(":rights_copyright_illustrations", doc_matters.conf_make_meta.meta.rights_copyright_illustrations); -insert_metadata.bind(":rights_copyright_photographs", doc_matters.conf_make_meta.meta.rights_copyright_photographs); -// insert_metadata.bind(":rights_copyright_preparation", doc_matters.conf_make_meta.meta.rights_preparation); -insert_metadata.bind(":rights_copyright_text", doc_matters.conf_make_meta.meta.rights_copyright_text); -insert_metadata.bind(":rights_copyright_translation", doc_matters.conf_make_meta.meta.rights_copyright_translation); -insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_copyright_video); -// insert_metadata.bind(":rights_copyright_digitization", doc_matters.conf_make_meta.meta.rights_digitization); -// insert_metadata.bind(":rights_copyright_audio", doc_matters.conf_make_meta.meta.rights_audio); -// insert_metadata.bind(":rights_copyright_video", doc_matters.conf_make_meta.meta.rights_video); -insert_metadata.bind(":rights_license", doc_matters.conf_make_meta.meta.rights_license); -// insert_metadata.bind(":src_filename", doc_matters.conf_make_meta.meta.src_filename); -// insert_metadata.bind(":src_fingerprint", doc_matters.conf_make_meta.meta.src_fingerprint); -// insert_metadata.bind(":src_filesize", doc_matters.conf_make_meta.meta.src_filesize); -// insert_metadata.bind(":src_wordcount", doc_matters.conf_make_meta.meta.src_wordcount); -// insert_metadata.bind(":src_text", doc_matters.conf_make_meta.meta.src_text); -// insert_metadata.bind(":fulltext", doc_matters.conf_make_meta.meta.fulltext); -// insert_metadata.bind(":links", doc_matters.conf_make_meta.meta.links); -insert_metadata.execute(); insert_metadata.reset(); -/+ watch +/ -if ((doc_matters.opt.action.verbose)) { - writeln("sql statement executed"); -} -assert(db.totalChanges == 1); -#+END_SRC - -###+name: sqlite_insert_metadata_and_src_text -#+BEGIN_SRC d -insert_metadata.inject( - 1, - doc_matters.dochead_meta["title"]["full"], - "", - "", - "", - "", - "", - "", - "", - doc_matters.dochead_meta["creator"]["author"] -); -#+END_SRC - -****** 2. _doc objects_ (used with doc_objects in document loop) -******* prepare sql statement, sql utf8 -******** d { sql statement - -#+name: sqlite_insert_doc_objects -#+BEGIN_SRC d -string insertDocObjectsRow(O)(O obj) { - auto sql_insert_delimiter(string _txt) { - _txt = _txt - .replaceAll(rgx.quotation_mark_sql_insert_delimiter, "$0$0"); - return _txt; - } - string _insert_doc_objects_row; - _insert_doc_objects_row = format(q"¶ -#+END_SRC - -********* sql statement: formatted - -lid unique, increment by 1 per object, not ocn - -metadata tid document number unique -either: -- increment by adding 1 for each document, -- make hash of document filename or url and use? - -********** INSERT INTO - -#+name: sqlite_insert_doc_objects -#+BEGIN_SRC sql - INSERT INTO doc_objects ( - ocn, - clean, - body, - lev, - t_of, - t_is - ) -#+END_SRC - -********** VALUES - -#+name: sqlite_insert_doc_objects -#+BEGIN_SRC sql - VALUES ( - %s, - '%s', - '%s', - %s, - '%s', - '%s' - ); -#+END_SRC - -********* sql statement: values for formatting - -#+name: sqlite_insert_doc_objects -#+BEGIN_SRC sql - ¶", - obj.ocn, - sql_insert_delimiter(obj_txt["text"]), - sql_insert_delimiter(obj_txt["html"]), - obj.heading_lev_markup, - obj.is_of, - obj.is_a, - ); -#+END_SRC - -******** d } - -#+name: sqlite_insert_doc_objects -#+BEGIN_SRC d - return _insert_doc_objects_row; -} -#+END_SRC - -******* TODO (within loop not here - insert doc objects +**** prepare insert statements and do pre-loop inserts +***** 1. _metadata & src_ text +***** 2. _doc objects_ (used with doc_objects in document loop) +****** TODO (within loop not here - insert doc objects work out *** 3. hub (sqlite_format_and_load_objects) -***** sql related +**** sql related #+name: sqlite_load_object #+BEGIN_SRC d @@ -1036,7 +583,7 @@ string sqlite_statement(O)( } #+END_SRC -***** heading +**** heading #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d @@ -1061,7 +608,7 @@ auto heading(O)( } #+END_SRC -***** para +**** para #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d @@ -1086,7 +633,7 @@ auto para(O)( } #+END_SRC -***** quote +**** quote #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d @@ -1110,7 +657,7 @@ auto quote(O)( return obj_txt; } #+END_SRC -***** group +**** group #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d @@ -1135,7 +682,7 @@ auto group(O)( } #+END_SRC -***** block +**** block #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d @@ -1160,7 +707,7 @@ auto block(O)( } #+END_SRC -***** verse +**** verse #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d @@ -1185,7 +732,7 @@ auto verse(O)( } #+END_SRC -***** code +**** code #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d @@ -1210,7 +757,7 @@ auto code(O)( } #+END_SRC -***** table +**** table #+name: hub_format_and_sqlite_load_objects #+BEGIN_SRC d @@ -1235,19 +782,14 @@ auto table(O)( } #+END_SRC -** 3. loop, identify, load - loop template +** 4. loop, identify, load - loop template #+name: sqlite_objects_loop #+BEGIN_SRC d -auto pth_sqlite = SiSUpathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language); -pth_sqlite.base.mkdirRecurse; -auto db = Database(pth_sqlite.sqlite_file(doc_matters.src.filename)); -SQLiteInstruct!()(db, doc_matters); // consider best location, need to feed individual objects for sqlite table: doc_objects, possibly a separate template? auto format_and_sqlite_load = SQLiteFormatAndLoadObject!()(doc_matters); string[string] obj_txt; string doc_text; string[] _insert_doc_objects; -_insert_doc_objects ~= "BEGIN;\n"; foreach (part; doc_parts) { foreach (obj; doc_abstraction[part]) { switch (obj.of_part) { @@ -1397,12 +939,421 @@ foreach (part; doc_parts) { } } // loop closes } -_insert_doc_objects ~= "COMMIT"; -debug(sql_statement) { - writeln("#+BEGIN_SRC sql\n", _insert_doc_objects.join, "\n#+END_SRC"); -} -std.utf.validate(_insert_doc_objects.join); // TODO -db.run(_insert_doc_objects.join.to!(char[]).toUTF8); +return _insert_doc_objects.join.to!(char[]).toUTF8; +#+END_SRC + +** 5. SQL statements +*** create tables + +**** DROP INDEX IF EXISTS + +#+name: sqlite_statement_drop_existing_index +#+BEGIN_SRC sql +DROP INDEX IF EXISTS ocn; +DROP INDEX IF EXISTS digest_clean; +DROP INDEX IF EXISTS digest_all; +DROP INDEX IF EXISTS clean; +DROP INDEX IF EXISTS lev0; +DROP INDEX IF EXISTS lev1; +DROP INDEX IF EXISTS lev2; +DROP INDEX IF EXISTS lev3; +DROP INDEX IF EXISTS lev4; +DROP INDEX IF EXISTS lev5; +DROP INDEX IF EXISTS lev6; +DROP INDEX IF EXISTS title; +DROP INDEX IF EXISTS creator_author; +DROP INDEX IF EXISTS src_filename; +DROP INDEX IF EXISTS language_document_char; +DROP INDEX IF EXISTS classify_topic_register; +#+END_SRC + +**** DROP TABLE IF EXISTS + +#+name: sqlite_statement_drop_existing_tables +#+BEGIN_SRC sql +DROP TABLE IF EXISTS metadata_and_text; +DROP TABLE IF EXISTS doc_objects; +DROP TABLE IF EXISTS urls; +#+END_SRC + +**** CREATE TABLE metadata_and_text + +#+name: sqlite_statement_create_table_metadata_and_src_txt +#+BEGIN_SRC sql +CREATE TABLE metadata_and_text ( + tid INTEGER PRIMARY KEY, +/* src_filename_composite VARCHAR(256) NOT NULL UNIQUE, /* z pod name if any + src filename + language code */ + title VARCHAR(800) NOT NULL, + title_main VARCHAR(400) NOT NULL, + title_sub VARCHAR(400) NULL, + title_short VARCHAR(400) NULL, + title_edition VARCHAR(10) NULL, + title_note VARCHAR(2500) NULL, + title_language VARCHAR(100) NULL, + title_language_char VARCHAR(6) NULL, + creator_author VARCHAR(600) NOT NULL, + creator_author_email VARCHAR(100) NULL, + creator_author_hon VARCHAR(100) NULL, + creator_author_nationality VARCHAR(100) NULL, + creator_editor VARCHAR(600) NULL, + creator_contributor VARCHAR(600) NULL, + creator_illustrator VARCHAR(600) NULL, + creator_photographer VARCHAR(600) NULL, + creator_translator VARCHAR(600) NULL, + creator_prepared_by VARCHAR(600) NULL, + creator_digitized_by VARCHAR(600) NULL, + creator_audio VARCHAR(600) NULL, + creator_video VARCHAR(600) NULL, + language_document VARCHAR(100) NULL, + language_document_char VARCHAR(6) NOT NULL, + language_original VARCHAR(100) NULL, + language_original_char VARCHAR(6) NULL, + date_added_to_site VARCHAR(10) NULL, + date_available VARCHAR(10) NULL, + date_created VARCHAR(10) NULL, + date_issued VARCHAR(10) NULL, + date_modified VARCHAR(10) NULL, + date_published VARCHAR(10) NULL, + date_valid VARCHAR(10) NULL, + date_translated VARCHAR(10) NULL, + date_original_publication VARCHAR(10) NULL, + date_generated VARCHAR(10) NULL, + publisher VARCHAR(600) NULL, + original_publisher VARCHAR(600) NULL, + original_language VARCHAR(100) NULL, + original_language_char VARCHAR(6) NULL, + original_source VARCHAR(600) NULL, + original_institution VARCHAR(600) NULL, + original_nationality VARCHAR(100) NULL, + original_title VARCHAR(800) NULL, + rights_copyright VARCHAR(2500) NULL, + rights_copyright_audio VARCHAR(2500) NULL, + rights_copyright_cover VARCHAR(2500) NULL, + rights_copyright_illustrations VARCHAR(2500) NULL, + rights_copyright_photographs VARCHAR(2500) NULL, + rights_copyright_text VARCHAR(2500) NULL, + rights_copyright_translation VARCHAR(2500) NULL, + rights_copyright_video VARCHAR(2500) NULL, + rights_license VARCHAR(2500) NULL, + identifier_oclc VARCHAR(30) NULL, + identifier_isbn VARCHAR(16) NULL, + classify_topic_register VARCHAR(2500) NULL, + classify_subject VARCHAR(600) NULL, + classify_loc VARCHAR(30) NULL, + classify_dewey VARCHAR(30) NULL, + classify_keywords VARCHAR(600) NULL, + notes_abstract TEXT NULL, + notes_description TEXT NULL, + notes_comment TEXT NULL, + notes_coverage VARCHAR(200) NULL, + notes_relation VARCHAR(200) NULL, + notes_history VARCHAR(600) NULL, + notes_type VARCHAR(600) NULL, + notes_format VARCHAR(600) NULL, + notes_prefix TEXT NULL, + notes_prefix_a TEXT NULL, + notes_prefix_b TEXT NULL, + notes_suffix TEXT NULL, + src_filename VARCHAR(256) NOT NULL, + src_fingerprint VARCHAR(256) NULL, + src_filesize VARCHAR(10) NULL, + src_wordcount VARCHAR(10) NULL, + pod_name VARCHAR(256) NULL, /* zipped pod, work to be done here */ + pod_fingerprint VARCHAR(256) NULL, /* zipped pod, work to be done here */ + pod_size VARCHAR(10) NULL, /* zipped pod, work to be done here */ + src_text TEXT NULL, + fulltext TEXT NULL, + links TEXT NULL +); +#+END_SRC + +**** CREATE TABLE doc_objects + +#+name: sqlite_statement_create_table_objects +#+BEGIN_SRC sql +CREATE TABLE doc_objects ( + lid INTEGER PRIMARY KEY, + metadata_tid INTEGER REFERENCES metadata_and_text, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + clean TEXT NULL, + body TEXT NULL, + book_idx TEXT NULL, + seg VARCHAR(256) NULL, + lev_an VARCHAR(1), + lev SMALLINT NULL, + lev0 SMALLINT, + lev1 SMALLINT, + lev2 SMALLINT, + lev3 SMALLINT, + lev4 SMALLINT, + lev5 SMALLINT, + lev6 SMALLINT, + lev7 SMALLINT, + en_a SMALLINT NULL, + en_z SMALLINT NULL, + en_a_asterisk SMALLINT NULL, + en_z_asterisk SMALLINT NULL, + en_a_plus SMALLINT NULL, + en_z_plus SMALLINT NULL, + t_of VARCHAR(16), + t_is VARCHAR(16), + node VARCHAR(16) NULL, + parent VARCHAR(16) NULL, + digest_clean CHAR(256), + digest_all CHAR(256), + types CHAR(1) NULL +); +#+END_SRC + +**** CREATE INDEX + +#+name: sqlite_statement_create_index +#+BEGIN_SRC sql +CREATE INDEX idx_ocn ON doc_objects(ocn); +CREATE INDEX idx_digest_clean ON doc_objects(digest_clean); +CREATE INDEX idx_digest_all ON doc_objects(digest_all); +CREATE INDEX idx_clean ON doc_objects(clean); +CREATE INDEX idx_lev0 ON doc_objects(lev0); +CREATE INDEX idx_lev1 ON doc_objects(lev1); +CREATE INDEX idx_lev2 ON doc_objects(lev2); +CREATE INDEX idx_lev3 ON doc_objects(lev3); +CREATE INDEX idx_lev4 ON doc_objects(lev4); +CREATE INDEX idx_lev5 ON doc_objects(lev5); +CREATE INDEX idx_lev6 ON doc_objects(lev6); +CREATE INDEX idx_title ON metadata_and_text(title); +CREATE INDEX idx_author ON metadata_and_text(creator_author); +CREATE INDEX idx_filename ON metadata_and_text(src_filename); +CREATE INDEX idx_language ON metadata_and_text(language_document_char); +CREATE INDEX idx_topics ON metadata_and_text(classify_topic_register); +#+END_SRC + +**** TODO local site link & info + +*** sql insert statement formatted doc objects + +lid unique, increment by 1 per object, not ocn + +metadata tid document number unique +either: +- increment by adding 1 for each document, +- make hash of document filename or url and use? + +**** sql statement: dlang format +#+name: sqlite_formatted_insertions_doc_objects +#+BEGIN_SRC d +string _insert_doc_objects_row; +_insert_doc_objects_row = format(q"¶ +#+END_SRC + +**** INSERT INTO + +#+name: sqlite_formatted_insertions_doc_objects +#+BEGIN_SRC sql + INSERT INTO doc_objects ( + metadata_tid, + ocn, + clean, + body, + lev, + t_of, + t_is + ) +#+END_SRC + +**** VALUES + +#+name: sqlite_formatted_insertions_doc_objects +#+BEGIN_SRC sql + VALUES ( + %d, + %s, + '%s', + '%s', + %s, + '%s', + '%s' + ); +#+END_SRC + +**** sql statement: dlang values for formatting + +#+name: sqlite_formatted_insertions_doc_objects +#+BEGIN_SRC d +¶", + 1, + obj.ocn, + SQLinsertDelimiter!()(obj_txt["text"]), + SQLinsertDelimiter!()(obj_txt["html"]), + obj.heading_lev_markup, + obj.is_of, + obj.is_a, +); +#+END_SRC + +*** sqlite insert statement formatted doc matters metadata +**** sql statement: dlang format +#+name: sqlite_formatted_insertions_doc_matters_metadata +#+BEGIN_SRC d +string _insert_metadata; +_insert_metadata = format(q"¶ +#+END_SRC + +**** INSERT INTO + +#+name: sqlite_formatted_insertions_doc_matters_metadata +#+BEGIN_SRC sql + INSERT INTO metadata_and_text ( + src_filename, + title, + title_main, + title_sub, + title_short, + title_edition, + title_language, + classify_dewey, + classify_keywords, + classify_loc, + classify_subject, + classify_topic_register, + creator_author, + creator_author_email, + creator_illustrator, + creator_translator, + date_added_to_site, + date_available, + date_created, + date_issued, + date_modified, + date_published, + date_valid, + identifier_isbn, + identifier_oclc, + language_document, + language_document_char, + notes_abstract, + notes_description, + original_publisher, + original_language, + original_language_char, + original_source, + original_title, + publisher, + rights_copyright, + rights_copyright_audio, + rights_copyright_cover, + rights_copyright_illustrations, + rights_copyright_photographs, + rights_copyright_text, + rights_copyright_translation, + rights_copyright_video, + rights_license + ) +#+END_SRC + +**** VALUES + +#+name: sqlite_formatted_insertions_doc_matters_metadata +#+BEGIN_SRC sql + VALUES ( + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s', + '%s' + ); +#+END_SRC + +**** sql statement: values for formatting + +#+name: sqlite_formatted_insertions_doc_matters_metadata +#+BEGIN_SRC d +¶", + SQLinsertDelimiter!()(doc_matters.src.filename), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_full), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_main), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_subtitle), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_short), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_edition), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_language), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_dewey), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_keywords), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_loc), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_subject), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_topic_register), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author_email), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_illustrator), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_translator), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_added_to_site), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_available), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_created), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_issued), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_modified), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_published), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_valid), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_isbn), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_oclc), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document_char), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_abstract), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_description), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_publisher), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language_char), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_source), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_title), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.publisher), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_audio), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_cover), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_illustrations), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_photographs), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_text), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_translation), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_video), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_license) +); #+END_SRC * __END__ |