module sdp.output.sqlite; import sdp.output; import std.file, std.conv : to; import d2sqlite3; import std.typecons : Nullable; mixin SiSUoutputRgxInit; static auto rgx = Rgx(); long _metadata_tid_lastrowid; template SQLiteHubBuildTablesAndPopulate() { void SQLiteHubBuildTablesAndPopulate(D,I)( auto ref const D doc_abstraction, auto ref I doc_matters, ) { auto pth_sqlite = SiSUpathsSQLite!()(doc_matters.output_path); pth_sqlite.base.mkdirRecurse; auto db = Database(pth_sqlite.sqlite_file(doc_matters.env.pwd.baseName)); template SQLiteDbStatementComposite() { void SQLiteDbStatementComposite(Db,D,I)( auto ref Db db, auto ref const D doc_abstraction, auto ref I doc_matters, ) { { string _db_statement; { if ((doc_matters.opt.action.sqlite_create)) { auto pth_sqlite = SiSUpathsSQLite!()(doc_matters.output_path); pth_sqlite.base.mkdirRecurse; _db_statement ~= SQLiteTablesReCreate!()(); SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "TABLE RE-CREATE"); } if ((doc_matters.opt.action.sqlite_update)) { // TODO _db_statement ~= SQLiteInsertMetadata!()(doc_matters); SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "table INSERT MetaData"); /+ get tid (lastrowid or max) for use in doc_objects table +/ _metadata_tid_lastrowid = db.lastInsertRowid(); _db_statement ~= SQLiteInsertDocObjectsLoop!()(doc_abstraction, doc_matters, _metadata_tid_lastrowid); // FIX SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "table INSERT DocObjects"); } } db.close; } } } SQLiteDbStatementComposite!()(db, doc_abstraction, doc_matters); } } template SQLiteHubDiscreteBuildTablesAndPopulate() { void SQLiteHubDiscreteBuildTablesAndPopulate(D,I)( auto ref const D doc_abstraction, auto ref I doc_matters, ) { auto pth_sqlite = SiSUpathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language); pth_sqlite.base.mkdirRecurse; auto db = Database(pth_sqlite.sqlite_file(doc_matters.src.filename)); template SQLiteDiscreteDbStatementComposite() { void SQLiteDiscreteDbStatementComposite(Db,D,I)( auto ref Db db, auto ref const D doc_abstraction, auto ref I doc_matters, ) { { string _db_statement; { _db_statement ~= SQLiteTablesReCreate!()(); _db_statement ~= SQLiteInsertMetadata!()(doc_matters); _db_statement ~= SQLiteInsertDocObjectsLoop!()(doc_abstraction, doc_matters, 1); // FIX SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "table CREATE Tables, INSERT DocObjects"); } db.close; } } } SQLiteDiscreteDbStatementComposite!()(db, doc_abstraction, doc_matters); } } template SQLiteDbRun() { void SQLiteDbRun(Db,St,O)( auto ref Db db, auto ref St db_statement, auto ref O opt_action, string note, ) { debug(sql_statement) { writeln(db_statement); } try { db.run( "\nBEGIN;\n" ~ db_statement ~ "\nCOMMIT;\n" ); if (!(opt_action.sqlite_discrete) && !(opt_action.sqlite_create) ) { _metadata_tid_lastrowid = db.lastInsertRowid(); writeln("last row id: ", _metadata_tid_lastrowid); } } catch (ErrnoException ex) { writeln("ERROR SQLite : ", ex); } catch (Exception ex) { writeln("ERROR SQLite : ", ex); } if (opt_action.debug_do) { writeln(note); if (opt_action.verbose) { writeln(db_statement); } } } } template SQLinsertDelimiter() { auto SQLinsertDelimiter(string _txt) { _txt = _txt .replaceAll(rgx.quotation_mark_sql_insert_delimiter, "$0$0"); return _txt; } } template SQLiteFormatAndLoadObject() { auto SQLiteFormatAndLoadObject(I)( auto ref I doc_matters, ) { mixin SiSUoutputRgxInit; struct sqlite_format_and_load_objects { auto generic_munge_sanitize_text_for_search( string _txt, ) { string _notes; string _urls; if (_txt.matchFirst(rgx.inline_notes_al_gen)) { foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { _notes ~= "\n" ~ m["text"]; } _txt = _txt.replaceAll(rgx.inline_notes_al_gen, ""); } if (_txt.matchFirst(rgx.inline_link)) { foreach (m; _txt.matchAll(rgx.inline_link)) { if (m["link"].match(rgx.url)) { _urls ~= "\n" ~ m["link"]; } } _txt = _txt.replaceAll(rgx.inline_link_clean, ""); } if (_notes.length > 0) { _txt ~= _notes; } if (_urls.length > 0) { _txt ~= _urls; } if (doc_matters.opt.action.debug_do) { writeln(_txt, "\n"); } debug(sql_text_clean) { writeln(_txt); } return _txt; } auto munge_html(O)( auto return ref const O obj, ) { string _html_special_characters(string _txt){ _txt = _txt .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") .replaceAll(rgx.xhtml_greater_than, ">") .replaceAll(rgx.nbsp_char, " ") .replaceAll(rgx.xhtml_line_break, "<br />"); return _txt; } string _html_font_face(string _txt){ _txt = _txt .replaceAll(rgx.inline_emphasis, "<em>$1</em>") .replaceAll(rgx.inline_bold, "<b>$1</b>") .replaceAll(rgx.inline_underscore, "<u>$1</u>") .replaceAll(rgx.inline_italics, "<i>$1</i>") .replaceAll(rgx.inline_superscript, "<sup>$1</sup>") .replaceAll(rgx.inline_subscript, "<sub>$1</sub>") .replaceAll(rgx.inline_strike, "<del>$1</del>") .replaceAll(rgx.inline_insert, "<ins>$1</ins>") .replaceAll(rgx.inline_mono, "<tt>$1</tt>") .replaceAll(rgx.inline_cite, "<cite>$1</cite>"); return _txt; } string _notes; string _urls; string _txt = _html_font_face(_html_special_characters(obj.text)); if (_txt.matchFirst(rgx.inline_notes_al_gen)) { foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { _notes ~= "\n" ~ m["text"]; } _txt = _txt.replaceAll(rgx.inline_notes_al_gen_ref, "<sup>$1</sup> "); } if (_txt.matchFirst(rgx.inline_link)) { foreach (m; _txt.matchAll(rgx.inline_link)) { } _txt = _txt.replaceAll(rgx.inline_link_clean, ""); } if (_notes.length > 0) { _txt ~= _notes; } if (doc_matters.opt.action.debug_do) { writeln(_txt, "\n"); } return _txt; } string html_special_characters(string _txt){ _txt = _txt .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") .replaceAll(rgx.xhtml_greater_than, ">") .replaceAll(rgx.nbsp_char, " ") .replaceAll(rgx.xhtml_line_break, "<br />"); return _txt; } string html_special_characters_code(string _txt){ _txt = _txt .replaceAll(rgx.xhtml_ampersand, "&") .replaceAll(rgx.xhtml_quotation, """) .replaceAll(rgx.xhtml_less_than, "<") .replaceAll(rgx.xhtml_greater_than, ">") .replaceAll(rgx.nbsp_char, " "); return _txt; } string html_font_face(string _txt){ _txt = _txt .replaceAll(rgx.inline_emphasis, "<em>$1</em>") .replaceAll(rgx.inline_bold, "<b>$1</b>") .replaceAll(rgx.inline_underscore, "<u>$1</u>") .replaceAll(rgx.inline_italics, "<i>$1</i>") .replaceAll(rgx.inline_superscript, "<sup>$1</sup>") .replaceAll(rgx.inline_subscript, "<sub>$1</sub>") .replaceAll(rgx.inline_strike, "<del>$1</del>") .replaceAll(rgx.inline_insert, "<ins>$1</ins>") .replaceAll(rgx.inline_mono, "<tt>$1</tt>") .replaceAll(rgx.inline_cite, "<cite>$1</cite>"); return _txt; } auto html_heading(O)( auto return ref const O obj, ) { string _txt = munge_html(obj); string o = format(q"¶<p class="%s"><b> %s </b></p>¶", obj.typeinfo.is_a, _txt, ); return o; } auto html_para(O)( auto return ref const O obj, ) { string _txt = munge_html(obj); _txt = (obj.attrib.bullet) ? ("●  " ~ _txt) : _txt; string o = format(q"¶<p class="%s" indent="h%si%s"> %s </p>¶", obj.typeinfo.is_a, obj.attrib.indent_hang, obj.attrib.indent_base, _txt ); return o; } auto html_quote(O)( auto return ref const O obj, ) { string _txt = munge_html(obj); string o = format(q"¶<p class="%s"> %s </p>¶", obj.typeinfo.is_a, _txt ); return o; } auto html_group(O)( auto return ref const O obj, ) { string _txt = munge_html(obj); string o = format(q"¶<p class="%s"> %s </p>¶", obj.typeinfo.is_a, _txt ); return o; } auto html_block(O)( auto return ref const O obj, ) { string _txt = munge_html(obj); string o = format(q"¶ <p class="%s">%s</p>¶", obj.typeinfo.is_a, _txt.stripRight ); return o; } auto html_verse(O)( auto return ref const O obj, ) { string _txt = munge_html(obj); string o = format(q"¶<p class="%s">%s</p>¶", obj.typeinfo.is_a, _txt ); return o; } auto html_code(O)( auto return ref const O obj, ) { string _txt = html_special_characters_code(obj.text); string o = format(q"¶<p class="%s">%s</p>¶", obj.typeinfo.is_a, _txt ); return o; } auto html_table(O)( auto return ref const O obj, ) { auto _tablarize(O)( auto return ref const O obj, string _txt, ) { string[] _table_rows = _txt.split(rgx.table_delimiter_row); string[] _table_cols; string _table; string _tablenote; foreach(row_idx, row; _table_rows) { _table_cols = row.split(rgx.table_delimiter_col); _table ~= "<tr>"; foreach(col_idx, cell; _table_cols) { if ((_table_cols.length == 1) && (_table_rows.length <= row_idx+2)) { // check row_idx+2 (rather than == ++row_idx) _tablenote ~= cell; } else { string _col_is = (row_idx == 0 && obj.table.heading) ? "th" : "td"; string _align = ("style=\"text-align:" ~ ((obj.table.column_aligns[col_idx] == "l") ? "left\"" : "right\"")); _table ~= "<" ~ _col_is ~ " width=\"" ~ obj.table.column_widths[col_idx].to!string ~ "%\" " ~ _align ~ ">"; _table ~= cell; _table ~= "</" ~ _col_is ~ ">"; } } _table ~= "</tr>"; } auto t = tuple( _table, _tablenote, ); return t; } string _txt = munge_html(obj); auto t = _tablarize(obj, _txt); _txt = t[0]; string _note = t[1]; string o = format(q"¶<p class="%s"> <table summary="normal text css" width="95%%" border="0" bgcolor="white" cellpadding="2" align="center"> %s </table> %s </p>¶", obj.typeinfo.is_a, _txt, _note ); return o; } auto sqlite_load_string(O,Dm)( auto return ref const O obj, auto return ref Dm doc_matters, ) { string o; return o; } auto postgresql_load_string(O,Dm)( auto return ref const O obj, auto return ref Dm doc_matters, ) { string o; return o; } string sqlite_statement(O)( auto return ref const O obj, string _txt, string _html, ) { void _sql_exe(O)( string _sql, ) { writeln(_html); writeln(_sql); } string _sql; return _sql; } auto heading(O)( auto return ref const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_heading(obj) ]; if (doc_matters.opt.action.debug_do) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } return obj_txt; } auto para(O)( auto return ref const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_para(obj) ]; if (doc_matters.opt.action.debug_do) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } return obj_txt; } auto quote(O)( auto return ref const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_quote(obj) ]; if (doc_matters.opt.action.debug_do) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } return obj_txt; } auto group(O)( auto return ref const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_group(obj) ]; if (doc_matters.opt.action.debug_do) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } return obj_txt; } auto block(O)( auto return ref const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_block(obj) ]; if (doc_matters.opt.action.debug_do) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } return obj_txt; } auto verse(O)( auto return ref const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_verse(obj) ]; if (doc_matters.opt.action.debug_do) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } return obj_txt; } auto code(O)( auto return ref const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_code(obj) ]; if (doc_matters.opt.action.debug_do) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } return obj_txt; } auto table(O)( auto return ref const O obj, ) { string[string] obj_txt = [ "text": generic_munge_sanitize_text_for_search(obj.text), "html": html_table(obj) ]; if (doc_matters.opt.action.debug_do) { debug(sql_txt) { writeln(obj_txt["text"]); } debug(sql_html) { writeln(obj_txt["html"]); } } else { // load sql } return obj_txt; } } return sqlite_format_and_load_objects(); } } template SQLiteTablesReCreate() { string SQLiteTablesReCreate()() { string _sql_instruct; _sql_instruct = format(q"¶ DROP INDEX IF EXISTS ocn; DROP INDEX IF EXISTS digest_clean; DROP INDEX IF EXISTS digest_all; DROP INDEX IF EXISTS clean; DROP INDEX IF EXISTS lev0; DROP INDEX IF EXISTS lev1; DROP INDEX IF EXISTS lev2; DROP INDEX IF EXISTS lev3; DROP INDEX IF EXISTS lev4; DROP INDEX IF EXISTS lev5; DROP INDEX IF EXISTS lev6; DROP INDEX IF EXISTS title; DROP INDEX IF EXISTS creator_author; DROP INDEX IF EXISTS src_filename; DROP INDEX IF EXISTS language_document_char; DROP INDEX IF EXISTS classify_topic_register; DROP TABLE IF EXISTS metadata_and_text; DROP TABLE IF EXISTS doc_objects; DROP TABLE IF EXISTS urls; CREATE TABLE metadata_and_text ( tid INTEGER PRIMARY KEY, src_composite_id_per_txt VARCHAR(256) NOT NULL UNIQUE, /* z pod name if any + src filename + language code */ src_composite_id_per_pod VARCHAR(256) NOT NULL UNIQUE, /* z pod name if any + src filename + language code */ title VARCHAR(800) NOT NULL, title_main VARCHAR(400) NOT NULL, title_sub VARCHAR(400) NULL, title_short VARCHAR(400) NULL, title_edition VARCHAR(10) NULL, title_note VARCHAR(2500) NULL, title_language VARCHAR(100) NULL, title_language_char VARCHAR(6) NULL, creator_author VARCHAR(600) NOT NULL, creator_author_email VARCHAR(100) NULL, creator_author_hon VARCHAR(100) NULL, creator_author_nationality VARCHAR(100) NULL, creator_editor VARCHAR(600) NULL, creator_contributor VARCHAR(600) NULL, creator_illustrator VARCHAR(600) NULL, creator_photographer VARCHAR(600) NULL, creator_translator VARCHAR(600) NULL, creator_prepared_by VARCHAR(600) NULL, creator_digitized_by VARCHAR(600) NULL, creator_audio VARCHAR(600) NULL, creator_video VARCHAR(600) NULL, language_document VARCHAR(100) NULL, language_document_char VARCHAR(6) NOT NULL, language_original VARCHAR(100) NULL, language_original_char VARCHAR(6) NULL, date_added_to_site VARCHAR(10) NULL, date_available VARCHAR(10) NULL, date_created VARCHAR(10) NULL, date_issued VARCHAR(10) NULL, date_modified VARCHAR(10) NULL, date_published VARCHAR(10) NULL, date_valid VARCHAR(10) NULL, date_translated VARCHAR(10) NULL, date_original_publication VARCHAR(10) NULL, date_generated VARCHAR(10) NULL, publisher VARCHAR(600) NULL, original_publisher VARCHAR(600) NULL, original_language VARCHAR(100) NULL, original_language_char VARCHAR(6) NULL, original_source VARCHAR(600) NULL, original_institution VARCHAR(600) NULL, original_nationality VARCHAR(100) NULL, original_title VARCHAR(800) NULL, rights_copyright VARCHAR(2500) NULL, rights_copyright_audio VARCHAR(2500) NULL, rights_copyright_cover VARCHAR(2500) NULL, rights_copyright_illustrations VARCHAR(2500) NULL, rights_copyright_photographs VARCHAR(2500) NULL, rights_copyright_text VARCHAR(2500) NULL, rights_copyright_translation VARCHAR(2500) NULL, rights_copyright_video VARCHAR(2500) NULL, rights_license VARCHAR(2500) NULL, identifier_oclc VARCHAR(30) NULL, identifier_isbn VARCHAR(16) NULL, classify_topic_register VARCHAR(2500) NULL, classify_subject VARCHAR(600) NULL, classify_loc VARCHAR(30) NULL, classify_dewey VARCHAR(30) NULL, classify_keywords VARCHAR(600) NULL, notes_abstract TEXT NULL, notes_description TEXT NULL, notes_comment TEXT NULL, notes_coverage VARCHAR(200) NULL, notes_relation VARCHAR(200) NULL, notes_history VARCHAR(600) NULL, notes_type VARCHAR(600) NULL, notes_format VARCHAR(600) NULL, notes_prefix TEXT NULL, notes_prefix_a TEXT NULL, notes_prefix_b TEXT NULL, notes_suffix TEXT NULL, src_filename VARCHAR(256) NOT NULL, src_fingerprint VARCHAR(256) NULL, src_filesize VARCHAR(10) NULL, src_wordcount VARCHAR(10) NULL, pod_name VARCHAR(256) NULL, /* zipped pod, work to be done here */ pod_fingerprint VARCHAR(256) NULL, /* zipped pod, work to be done here */ pod_size VARCHAR(10) NULL, /* zipped pod, work to be done here */ src_text TEXT NULL, fulltext TEXT NULL, links TEXT NULL ); CREATE TABLE doc_objects ( lid INTEGER PRIMARY KEY, metadata_tid INTEGER REFERENCES metadata_and_text, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), clean TEXT NULL, body TEXT NULL, seg VARCHAR(256) NULL, lev_an VARCHAR(1), lev SMALLINT NULL, t_of VARCHAR(16), t_is VARCHAR(16), node VARCHAR(16) NULL, parent VARCHAR(16) NULL, digest_clean CHAR(256), digest_all CHAR(256), types CHAR(1) NULL ); CREATE INDEX idx_ocn ON doc_objects(ocn); CREATE INDEX idx_digest_clean ON doc_objects(digest_clean); CREATE INDEX idx_digest_all ON doc_objects(digest_all); CREATE INDEX idx_clean ON doc_objects(clean); CREATE INDEX idx_title ON metadata_and_text(title); CREATE INDEX idx_author ON metadata_and_text(creator_author); CREATE INDEX idx_filename ON metadata_and_text(src_filename); CREATE INDEX idx_language ON metadata_and_text(language_document_char); CREATE INDEX idx_topics ON metadata_and_text(classify_topic_register); ¶",); return _sql_instruct; } } template SQLiteInsertMetadata() { string SQLiteInsertMetadata(I)( auto ref I doc_matters, ) { string _insert_metadata; _insert_metadata = format(q"¶ INSERT INTO metadata_and_text ( src_filename, src_composite_id_per_txt, src_composite_id_per_pod, title, title_main, title_sub, title_short, title_edition, title_language, classify_dewey, classify_keywords, classify_loc, classify_subject, classify_topic_register, creator_author, creator_author_email, creator_illustrator, creator_translator, date_added_to_site, date_available, date_created, date_issued, date_modified, date_published, date_valid, identifier_isbn, identifier_oclc, language_document, language_document_char, notes_abstract, notes_description, original_publisher, original_language, original_language_char, original_source, original_title, publisher, rights_copyright, rights_copyright_audio, rights_copyright_cover, rights_copyright_illustrations, rights_copyright_photographs, rights_copyright_text, rights_copyright_translation, rights_copyright_video, rights_license ) VALUES ( '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s' ); ¶", SQLinsertDelimiter!()(doc_matters.src.filename), SQLinsertDelimiter!()(doc_matters.src.docname_composite_unique_per_src_doc), SQLinsertDelimiter!()(doc_matters.src.docname_composite_unique_per_src_pod), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_full), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_main), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_subtitle), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_short), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_edition), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_language), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_dewey), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_keywords), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_loc), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_subject), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_topic_register), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author_email), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_illustrator), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_translator), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_added_to_site), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_available), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_created), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_issued), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_modified), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_published), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_valid), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_isbn), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_oclc), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document_char), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_abstract), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_description), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_publisher), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language_char), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_source), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_title), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.publisher), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_audio), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_cover), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_illustrations), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_photographs), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_text), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_translation), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_video), SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_license) ); return _insert_metadata; } } template SQLiteInsertDocObjectsLoop() { string SQLiteInsertDocObjectsLoop(D,I,X)( auto ref const D doc_abstraction, auto ref I doc_matters, auto ref X _metadata_tid, ) { string insertDocObjectsRow(O)(O obj) { string _insert_doc_objects_row; _insert_doc_objects_row = format(q"¶ INSERT INTO doc_objects ( metadata_tid, ocn, clean, body, lev, t_of, t_is ) VALUES ( %d, %s, '%s', '%s', %s, '%s', '%s' ); ¶", _metadata_tid, obj.node.ocn, SQLinsertDelimiter!()(obj_txt["text"]), SQLinsertDelimiter!()(obj_txt["html"]), obj.node.heading_lev_markup, obj.typeinfo.is_of_type, obj.typeinfo.is_a, ); return _insert_doc_objects_row; } auto format_and_sqlite_load = SQLiteFormatAndLoadObject!()(doc_matters); string[string] obj_txt; string doc_text; string[] _insert_doc_objects; foreach (part; doc_matters.xml.keys_seq.sql) { foreach (obj; doc_abstraction[part]) { switch (obj.typeinfo.is_of_part) { case "frontmatter": assert(part == "head", part); switch (obj.typeinfo.is_of_type) { case "para": switch (obj.typeinfo.is_a) { case "heading": obj_txt = format_and_sqlite_load.heading(obj); break; default: if ((doc_matters.opt.action.debug_do)) { writeln(__FILE__, ":", __LINE__, ": ", obj.typeinfo.is_a); } break; } break; default: if ((doc_matters.opt.action.debug_do)) { writeln(__FILE__, ":", __LINE__, ": ", obj.typeinfo.is_of_type); } break; } break; case "body": // assert(part == "body", part); // TODO broken switch (obj.typeinfo.is_of_type) { case "para": switch (obj.typeinfo.is_a) { case "heading": debug (asserts) { // TODO consider and fix or remove if (part != "body") { writeln(__LINE__, ": ", obj.text); } } obj_txt = format_and_sqlite_load.heading(obj); break; case "para": obj_txt = format_and_sqlite_load.para(obj); break; default: if ((doc_matters.opt.action.debug_do)) { writeln(__FILE__, ":", __LINE__, ": ", obj.typeinfo.is_a); } break; } break; case "block": switch (obj.typeinfo.is_a) { case "quote": obj_txt = format_and_sqlite_load.quote(obj); break; case "group": obj_txt = format_and_sqlite_load.group(obj); break; case "block": obj_txt = format_and_sqlite_load.block(obj); break; case "poem": // double check on keeping both poem & verse break; case "verse": obj_txt = format_and_sqlite_load.verse(obj); break; case "code": obj_txt = format_and_sqlite_load.code(obj); break; case "table": obj_txt = format_and_sqlite_load.table(obj); break; default: if ((doc_matters.opt.action.debug_do)) { writeln(__FILE__, ":", __LINE__, ": ", obj.typeinfo.is_a); } break; } break; default: if ((doc_matters.opt.action.debug_do)) { writeln(__FILE__, ":", __LINE__, ": ", obj.typeinfo.is_of_type); } break; } break; case "backmatter": assert(part == "glossary" || "bibliography" || "bookindex_seg" || "blurb" || "tail", part); switch (obj.typeinfo.is_of_type) { case "para": switch (obj.typeinfo.is_a) { case "heading": obj_txt = format_and_sqlite_load.heading(obj); break; case "glossary": assert(part == "glossary", part); obj_txt = format_and_sqlite_load.para(obj); break; case "bibliography": assert(part == "bibliography", part); obj_txt = format_and_sqlite_load.para(obj); break; case "bookindex": assert(part == "bookindex_seg", part); obj_txt = format_and_sqlite_load.para(obj); break; case "blurb": assert(part == "blurb", part); obj_txt = format_and_sqlite_load.para(obj); break; default: if ((doc_matters.opt.action.debug_do)) { writeln(__FILE__, ":", __LINE__, ": ", obj.typeinfo.is_a); } break; } break; default: if ((doc_matters.opt.action.debug_do)) { writeln(__FILE__, ":", __LINE__, ": ", obj.typeinfo.is_of_type); } break; } break; case "comment": break; default: if ((doc_matters.opt.action.debug_do)) { writeln(__FILE__, ":", __LINE__, ": ", obj.typeinfo.is_of_part); // check where empty value could come from writeln(__FILE__, ":", __LINE__, ": ", obj.typeinfo.is_a); writeln(__FILE__, ":", __LINE__, ": ", obj.text); // check where empty value could come from } break; } if (obj.typeinfo.is_a == "heading") { if ((doc_matters.opt.action.verbose)) { writeln( "markup: ", obj.node.heading_lev_markup, "> ", obj.node.dom_markedup, "; collapsed: ", obj.node.heading_lev_collapsed, "> ", obj.node.dom_collapsed, "; ocn: ", obj.node.ocn, " node: ", obj.node.node, "; parent: ", obj.node.parent_lev_markup, "; ocn: ", obj.node.parent_ocn, "; ", ); } } if (!(obj.typeinfo.is_a == "comment")) { _insert_doc_objects ~= insertDocObjectsRow(obj); } } // loop closes } return _insert_doc_objects.join.to!(char[]).toUTF8; } } template SQLiteTablesCreate() { void SQLiteTablesCreate(E,O)(E env, O opt_action) { import d2sqlite3; template SQLiteTablesReCreate() { string SQLiteTablesReCreate()() { string _sql_instruct; _sql_instruct = format(q"¶ DROP INDEX IF EXISTS ocn; DROP INDEX IF EXISTS digest_clean; DROP INDEX IF EXISTS digest_all; DROP INDEX IF EXISTS clean; DROP INDEX IF EXISTS lev0; DROP INDEX IF EXISTS lev1; DROP INDEX IF EXISTS lev2; DROP INDEX IF EXISTS lev3; DROP INDEX IF EXISTS lev4; DROP INDEX IF EXISTS lev5; DROP INDEX IF EXISTS lev6; DROP INDEX IF EXISTS title; DROP INDEX IF EXISTS creator_author; DROP INDEX IF EXISTS src_filename; DROP INDEX IF EXISTS language_document_char; DROP INDEX IF EXISTS classify_topic_register; DROP TABLE IF EXISTS metadata_and_text; DROP TABLE IF EXISTS doc_objects; DROP TABLE IF EXISTS urls; CREATE TABLE metadata_and_text ( tid INTEGER PRIMARY KEY, src_composite_id_per_txt VARCHAR(256) NOT NULL UNIQUE, /* z pod name if any + src filename + language code */ src_composite_id_per_pod VARCHAR(256) NOT NULL UNIQUE, /* z pod name if any + src filename + language code */ title VARCHAR(800) NOT NULL, title_main VARCHAR(400) NOT NULL, title_sub VARCHAR(400) NULL, title_short VARCHAR(400) NULL, title_edition VARCHAR(10) NULL, title_note VARCHAR(2500) NULL, title_language VARCHAR(100) NULL, title_language_char VARCHAR(6) NULL, creator_author VARCHAR(600) NOT NULL, creator_author_email VARCHAR(100) NULL, creator_author_hon VARCHAR(100) NULL, creator_author_nationality VARCHAR(100) NULL, creator_editor VARCHAR(600) NULL, creator_contributor VARCHAR(600) NULL, creator_illustrator VARCHAR(600) NULL, creator_photographer VARCHAR(600) NULL, creator_translator VARCHAR(600) NULL, creator_prepared_by VARCHAR(600) NULL, creator_digitized_by VARCHAR(600) NULL, creator_audio VARCHAR(600) NULL, creator_video VARCHAR(600) NULL, language_document VARCHAR(100) NULL, language_document_char VARCHAR(6) NOT NULL, language_original VARCHAR(100) NULL, language_original_char VARCHAR(6) NULL, date_added_to_site VARCHAR(10) NULL, date_available VARCHAR(10) NULL, date_created VARCHAR(10) NULL, date_issued VARCHAR(10) NULL, date_modified VARCHAR(10) NULL, date_published VARCHAR(10) NULL, date_valid VARCHAR(10) NULL, date_translated VARCHAR(10) NULL, date_original_publication VARCHAR(10) NULL, date_generated VARCHAR(10) NULL, publisher VARCHAR(600) NULL, original_publisher VARCHAR(600) NULL, original_language VARCHAR(100) NULL, original_language_char VARCHAR(6) NULL, original_source VARCHAR(600) NULL, original_institution VARCHAR(600) NULL, original_nationality VARCHAR(100) NULL, original_title VARCHAR(800) NULL, rights_copyright VARCHAR(2500) NULL, rights_copyright_audio VARCHAR(2500) NULL, rights_copyright_cover VARCHAR(2500) NULL, rights_copyright_illustrations VARCHAR(2500) NULL, rights_copyright_photographs VARCHAR(2500) NULL, rights_copyright_text VARCHAR(2500) NULL, rights_copyright_translation VARCHAR(2500) NULL, rights_copyright_video VARCHAR(2500) NULL, rights_license VARCHAR(2500) NULL, identifier_oclc VARCHAR(30) NULL, identifier_isbn VARCHAR(16) NULL, classify_topic_register VARCHAR(2500) NULL, classify_subject VARCHAR(600) NULL, classify_loc VARCHAR(30) NULL, classify_dewey VARCHAR(30) NULL, classify_keywords VARCHAR(600) NULL, notes_abstract TEXT NULL, notes_description TEXT NULL, notes_comment TEXT NULL, notes_coverage VARCHAR(200) NULL, notes_relation VARCHAR(200) NULL, notes_history VARCHAR(600) NULL, notes_type VARCHAR(600) NULL, notes_format VARCHAR(600) NULL, notes_prefix TEXT NULL, notes_prefix_a TEXT NULL, notes_prefix_b TEXT NULL, notes_suffix TEXT NULL, src_filename VARCHAR(256) NOT NULL, src_fingerprint VARCHAR(256) NULL, src_filesize VARCHAR(10) NULL, src_wordcount VARCHAR(10) NULL, pod_name VARCHAR(256) NULL, /* zipped pod, work to be done here */ pod_fingerprint VARCHAR(256) NULL, /* zipped pod, work to be done here */ pod_size VARCHAR(10) NULL, /* zipped pod, work to be done here */ src_text TEXT NULL, fulltext TEXT NULL, links TEXT NULL ); CREATE TABLE doc_objects ( lid INTEGER PRIMARY KEY, metadata_tid INTEGER REFERENCES metadata_and_text, ocn SMALLINT, ocnd VARCHAR(6), ocns VARCHAR(6), clean TEXT NULL, body TEXT NULL, seg VARCHAR(256) NULL, lev_an VARCHAR(1), lev SMALLINT NULL, t_of VARCHAR(16), t_is VARCHAR(16), node VARCHAR(16) NULL, parent VARCHAR(16) NULL, digest_clean CHAR(256), digest_all CHAR(256), types CHAR(1) NULL ); CREATE INDEX idx_ocn ON doc_objects(ocn); CREATE INDEX idx_digest_clean ON doc_objects(digest_clean); CREATE INDEX idx_digest_all ON doc_objects(digest_all); CREATE INDEX idx_clean ON doc_objects(clean); CREATE INDEX idx_title ON metadata_and_text(title); CREATE INDEX idx_author ON metadata_and_text(creator_author); CREATE INDEX idx_filename ON metadata_and_text(src_filename); CREATE INDEX idx_language ON metadata_and_text(language_document_char); CREATE INDEX idx_topics ON metadata_and_text(classify_topic_register); ¶",); return _sql_instruct; } } if (opt_action.sqlite_create) { string _db_statement; auto pth_sqlite = SiSUpathsSQLite!()(opt_action.output_dir_set); // ISSUE pth_sqlite.base.mkdirRecurse; auto db = Database(pth_sqlite.sqlite_file); // ISSUE { _db_statement ~= SQLiteTablesReCreate!()(); } SQLiteDbRun!()(db, _db_statement, opt_action, "TABLE RE-CREATE"); } } } template SQLiteTablesDrop() { void SQLiteTablesDrop()() { writeln("table drop"); } }