diff options
Diffstat (limited to 'src/sisudoc/io_out/sqlite.d')
-rw-r--r-- | src/sisudoc/io_out/sqlite.d | 1761 |
1 files changed, 1761 insertions, 0 deletions
diff --git a/src/sisudoc/io_out/sqlite.d b/src/sisudoc/io_out/sqlite.d new file mode 100644 index 0000000..bee9cad --- /dev/null +++ b/src/sisudoc/io_out/sqlite.d @@ -0,0 +1,1761 @@ +/+ +- Name: SisuDoc Spine, Doc Reform [a part of] + - Description: documents, structuring, processing, publishing, search + - static content generator + + - Author: Ralph Amissah + [ralph.amissah@gmail.com] + + - Copyright: (C) 2015 - 2024 Ralph Amissah, All Rights Reserved. + + - License: AGPL 3 or later: + + Spine (SiSU), a framework for document structuring, publishing and + search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU AFERO General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see [https://www.gnu.org/licenses/]. + + If you have Internet connection, the latest version of the AGPL should be + available at these locations: + [https://www.fsf.org/licensing/licenses/agpl.html] + [https://www.gnu.org/licenses/agpl.html] + + - Spine (by Doc Reform, related to SiSU) uses standard: + - docReform markup syntax + - standard SiSU markup syntax with modified headers and minor modifications + - docReform object numbering + - standard SiSU object citation numbering & system + + - Homepages: + [https://www.sisudoc.org] + [https://www.doc-reform.org] + + - Git + [https://git.sisudoc.org/] + ++/ +module sisudoc.io_out.sqlite; +import + sisudoc.io_out, + sisudoc.io_out.rgx, + sisudoc.io_out.rgx_xhtml; +import + std.file, + std.uri; +import std.conv : to; +import std.typecons : Nullable; +import d2sqlite3; +mixin spineRgxOut; +mixin spineRgxXHTML; +mixin InternalMarkup; +static auto rgx = RgxO(); +static auto rgx_xhtml = RgxXHTML(); +static auto mkup = InlineMarkup(); +long _metadata_tid_lastrowid; +template SQLiteHubBuildTablesAndPopulate() { + void SQLiteHubBuildTablesAndPopulate(D,M)( + const D doc_abstraction, + M doc_matters, + ) { + auto pth_sqlite = spinePathsSQLite!()(doc_matters.sqlite.filename, doc_matters.sqlite.path); + if ((isValidPath(pth_sqlite.base) && exists(pth_sqlite.base) != 0 && pth_sqlite.base.isDir)) { + } else { + try { + pth_sqlite.base.mkdirRecurse; + } catch (FileException ex) { } + } + template SQLiteDbStatementComposite() { + void SQLiteDbStatementComposite(Db,D,M)( + Db db, + const D doc_abstraction, + M doc_matters, + ) { + string _db_statement; + if ((doc_matters.opt.action.sqlite_db_create)) { + auto pth_sqlite = spinePathsSQLite!()(doc_matters.sqlite.filename, doc_matters.sqlite.path); + if ((isValidPath(pth_sqlite.base) && exists(pth_sqlite.base) != 0 && pth_sqlite.base.isDir)) { + } else { + try { + pth_sqlite.base.mkdirRecurse; + } catch (FileException ex) { } + } + _db_statement ~= SQLiteTablesReCreate!()(); + SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "TABLE RE-CREATE"); + _db_statement = []; + } + if (doc_matters.opt.action.sqlite_delete) { + _db_statement ~= SQLiteDeleteDocument!()(doc_matters); + SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "DELETE Document"); + _db_statement = []; + } + if (doc_matters.opt.action.sqlite_update) { + _db_statement ~= SQLiteDeleteDocument!()(doc_matters); + SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "DELETE Document"); + _db_statement = []; + _db_statement ~= SQLiteInsertMetadata!()(doc_matters); + SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "INSERT MetaData"); + _db_statement = []; + /+ get tid (lastrowid or max) for use in doc_objects table +/ + _db_statement ~= doc_abstraction.SQLiteInsertDocObjectsLoop!()(doc_matters); + SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "INSERT DocObjects"); + _db_statement = []; + _db_statement ~= SQLiteInsertMetadataTopics!()(doc_matters); + SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "INSERT MetaDataTopics"); + _db_statement = []; + } + db.close; + if (doc_matters.opt.action.vox_gt0) { + writeln(" ", pth_sqlite.sqlite_file); + } + } + } + try { + auto db = Database(pth_sqlite.sqlite_file); + SQLiteDbStatementComposite!()(db, doc_abstraction, doc_matters); + } + catch (FileException e) { + writeln("Failed (FileException): ", e.msg, " ", pth_sqlite.sqlite_file); + writeln(e.file, " line: ", e.line); + import core.runtime; + core.runtime.Runtime.terminate(); + } + catch (ErrnoException e) { + writeln("Failed (ErrnoException): ", e.msg, " ", pth_sqlite.sqlite_file); + writeln(e.file, " line: ", e.line); + import core.runtime; + core.runtime.Runtime.terminate(); + } + catch (Exception e) { + writeln("Failed (Exception): ", e.msg, " ", pth_sqlite.sqlite_file); + writeln(e.file, " line: ", e.line); + import core.runtime; + core.runtime.Runtime.terminate(); + } + catch (Throwable) { + writeln("Failed (Trowable): ", pth_sqlite.sqlite_file); + import core.runtime; + core.runtime.Runtime.terminate(); + } + } +} +template SQLiteHubDiscreteBuildTablesAndPopulate() { + void SQLiteHubDiscreteBuildTablesAndPopulate(D,M)( + const D doc_abstraction, + M doc_matters, + ) { + auto url_html = spineUrlsHTML!()(doc_matters.conf_make_meta.conf.w_srv_data_root_url_html, doc_matters.src.language); + auto pth_sqlite = spinePathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language); // doc_matters.db_path + if ((isValidPath(pth_sqlite.base) && exists(pth_sqlite.base) != 0 && pth_sqlite.base.isDir)) { + } else { + try { + pth_sqlite.base.mkdirRecurse; + } catch (FileException ex) { } + } + auto db = Database(pth_sqlite.sqlite_file(doc_matters.src.filename)); + template SQLiteDiscreteDbStatementComposite() { + void SQLiteDiscreteDbStatementComposite(Db,D,M)( + Db db, + const D doc_abstraction, + M doc_matters, + ) { + try { + { + string _db_statement; + _db_statement ~= SQLiteTablesReCreate!()(); + _db_statement ~= SQLiteInsertMetadata!()(doc_matters); + _db_statement ~= SQLiteInsertMetadataTopics!()(doc_matters); + _db_statement ~= doc_abstraction.SQLiteInsertDocObjectsLoop!()(doc_matters); + SQLiteDbRun!()(db, _db_statement, doc_matters.opt.action, "table CREATE Tables, INSERT DocObjects"); + } + db.close; + } + catch (FileException e) { + writeln("Failed (FileException): ", e.msg); + writeln(e.file, " line: ", e.line); + import core.runtime; + core.runtime.Runtime.terminate(); + } + catch (ErrnoException e) { + writeln("Failed (ErrnoException): ", e.msg); + writeln(e.file, " line: ", e.line); + import core.runtime; + core.runtime.Runtime.terminate(); + } + catch (Exception e) { + writeln("Failed (Exception): ", e.msg); + writeln(e.file, " line: ", e.line); + import core.runtime; + core.runtime.Runtime.terminate(); + } + catch (Throwable) { + import core.runtime; + core.runtime.Runtime.terminate(); + } + if (doc_matters.opt.action.vox_gt0) { + writeln(" ", pth_sqlite.sqlite_file(doc_matters.src.filename)); + } + } + } + SQLiteDiscreteDbStatementComposite!()(db, doc_abstraction, doc_matters); + } +} +template SQLiteDbRun() { + void SQLiteDbRun(Db,St,O)( + Db db, + St db_statement, + O opt_action, + string note, + ) { + debug(sql_statement) { + writeln(db_statement); + } + try { + db.run( + "\nBEGIN TRANSACTION;\n" ~ + db_statement ~ + "\nCOMMIT TRANSACTION;\n" + ); + } catch (ErrnoException ex) { + writeln("ERROR SQLite : ", ex); + } catch (Exception ex) { + writeln("ERROR SQLite : ", ex); + } + { /+ debug +/ + if (opt_action.debug_do_sqlite) { + writeln(note); + if (opt_action.vox_gt2) { + writeln(db_statement); + } + } + } + } +} +template SQLinsertDelimiter() { + string SQLinsertDelimiter(string _txt) { + _txt = _txt + .replaceAll(rgx.quotation_mark_sql_insert_delimiter, "$0$0"); + return _txt; + } +} +template SQLiteFormatAndLoadObject() { + auto SQLiteFormatAndLoadObject(M)( + M doc_matters, + ) { + mixin spineRgxOut; + mixin spineRgxXHTML; + struct sqlite_format_and_load_objects { + string generic_munge_sanitize_text_for_search( + string _txt, + ) { + string _notes; + string _urls; + if (_txt.matchFirst(rgx.inline_notes_al_gen)) { + foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { + _notes ~= "\n" ~ m["text"]; + } + _txt = _txt.replaceAll(rgx.inline_notes_al_gen, ""); + } + if (_txt.matchFirst(rgx.inline_link)) { + foreach (m; _txt.matchAll(rgx.inline_link)) { + if (m["link"].match(rgx.url)) { + _urls ~= "\n" ~ m["link"]; + } + } + _txt = _txt.replaceAll(rgx.inline_link_clean, ""); + } + if (_notes.length > 0) { + _txt ~= _notes; + } + if (_urls.length > 0) { + _txt ~= _urls; + } + { /+ debug +/ + if (doc_matters.opt.action.debug_do_sqlite) { + writeln(_txt, "\n"); + } + } + debug(sql_text_clean) { + writeln(_txt); + } + return _txt; + } + string munge_html(M,O)( + M doc_matters, + const O obj, + ) { + string _html_special_characters(string _txt){ + _txt = _txt + .replaceAll(rgx_xhtml.ampersand, "&") + .replaceAll(rgx_xhtml.quotation, """) + .replaceAll(rgx_xhtml.less_than, "<") + .replaceAll(rgx_xhtml.greater_than, ">") + .replaceAll(rgx.nbsp_char, " ") + .replaceAll(rgx.br_line_inline, "<br />") + .replaceAll(rgx.br_line, "<br />") + .replaceAll(rgx.br_line_spaced, "<br /><br />") + .replaceAll(rgx_xhtml.line_break, "<br />"); + return _txt; + } + string _html_font_face(string _txt){ + _txt = _txt + .replaceAll(rgx.inline_emphasis, "<em>$1</em>") + .replaceAll(rgx.inline_bold, "<b>$1</b>") + .replaceAll(rgx.inline_underscore, "<u>$1</u>") + .replaceAll(rgx.inline_italics, "<i>$1</i>") + .replaceAll(rgx.inline_superscript, "<sup>$1</sup>") + .replaceAll(rgx.inline_subscript, "<sub>$1</sub>") + .replaceAll(rgx.inline_strike, "<del>$1</del>") + .replaceAll(rgx.inline_insert, "<ins>$1</ins>") + .replaceAll(rgx.inline_mono, "<tt>$1</tt>") + .replaceAll(rgx.inline_cite, "<cite>$1</cite>"); + return _txt; + } + string _notes; + string _urls; + string _txt = _html_font_face(_html_special_characters(obj.text)); + { /+ debug +/ + if (doc_matters.opt.action.debug_do_sqlite) { + writeln(_txt, "\n"); + } + } + return _txt; + } + string html_special_characters(string _txt){ + _txt = _txt + .replaceAll(rgx_xhtml.ampersand, "&") + .replaceAll(rgx_xhtml.quotation, """) + .replaceAll(rgx_xhtml.less_than, "<") + .replaceAll(rgx_xhtml.greater_than, ">") + .replaceAll(rgx.nbsp_char, " ") + .replaceAll(rgx.br_line_inline, "<br />") + .replaceAll(rgx.br_line, "<br />") + .replaceAll(rgx.br_line_spaced, "<br /><br />") + .replaceAll(rgx_xhtml.line_break, "<br />"); + return _txt; + } + string html_special_characters_code(string _txt){ + _txt = _txt + .replaceAll(rgx_xhtml.ampersand, "&") + .replaceAll(rgx_xhtml.quotation, """) + .replaceAll(rgx_xhtml.less_than, "<") + .replaceAll(rgx_xhtml.greater_than, ">") + .replaceAll(rgx.nbsp_char, " "); + return _txt; + } + string html_font_face(string _txt){ + _txt = _txt + .replaceAll(rgx.inline_emphasis, "<em>$1</em>") + .replaceAll(rgx.inline_bold, "<b>$1</b>") + .replaceAll(rgx.inline_underscore, "<u>$1</u>") + .replaceAll(rgx.inline_italics, "<i>$1</i>") + .replaceAll(rgx.inline_superscript, "<sup>$1</sup>") + .replaceAll(rgx.inline_subscript, "<sub>$1</sub>") + .replaceAll(rgx.inline_strike, "<del>$1</del>") + .replaceAll(rgx.inline_insert, "<ins>$1</ins>") + .replaceAll(rgx.inline_mono, "<tt>$1</tt>") + .replaceAll(rgx.inline_cite, "<cite>$1</cite>"); + return _txt; + } + string inline_grouped_text_bullets_indents(M,O)( + M doc_matters, + const O obj, + string _txt, + string _suffix = ".html", + string _xml_type = "seg", + ) { + static auto rgx = RgxO(); + static auto rgx_xhtml = RgxXHTML(); + if (obj.metainfo.is_a == "group") { + _txt = (_txt) + .replaceAll(rgx.grouped_para_indent_1, + " ") + .replaceAll(rgx.grouped_para_indent_2, + " ") + .replaceAll(rgx.grouped_para_indent_3, + " ") + .replaceAll(rgx.grouped_para_indent_4, + " ") + .replaceAll(rgx.grouped_para_indent_5, + " ") + .replaceAll(rgx.grouped_para_indent_6, + " ") + .replaceAll(rgx.grouped_para_indent_7, + " ") + .replaceAll(rgx.grouped_para_indent_8, + " ") + .replaceAll(rgx.grouped_para_indent_9, + " ") + .replaceAll(rgx.grouped_para_indent_hang, " ") + .replaceAll(rgx.grouped_para_bullet, "● ") + .replaceAll(rgx.grouped_para_bullet_indent_1, + " ● ") + .replaceAll(rgx.grouped_para_bullet_indent_2, + " ● ") + .replaceAll(rgx.grouped_para_bullet_indent_3, + " ● ") + .replaceAll(rgx.grouped_para_bullet_indent_4, + " ● ") + .replaceAll(rgx.grouped_para_bullet_indent_5, + " ● ") + .replaceAll(rgx.grouped_para_bullet_indent_6, + " ● ") + .replaceAll(rgx.grouped_para_bullet_indent_7, + " ● ") + .replaceAll(rgx.grouped_para_bullet_indent_8, + " ● ") + .replaceAll(rgx.grouped_para_bullet_indent_9, + " ● "); + } + return _txt; + } + string inline_images(M,O)( + M doc_matters, + const O obj, + string _txt, + string _suffix = ".html", + string _xml_type = "seg", + ) { + string _img_pth; + if (_xml_type == "epub") { + _img_pth = "image/"; + } else if (_xml_type == "scroll") { + _img_pth = "../../image/"; + } else if (_xml_type == "seg") { + _img_pth = "../../../image/"; + } + if (_txt.match(rgx.inline_image)) { + _txt = _txt.replaceAll( // TODO bug where image dimensions (w or h) not given & consequently set to 0; should not be used (calculate earlier, abstraction) + rgx.inline_image, + ("$1<img src=\"" + ~ _img_pth + ~ "$3\" width=\"$4\" height=\"$5\" naturalsizeflag=\"0\" align=\"bottom\" border=\"0\" /> $6")); + } + return _txt; + } + string inline_links(M,O)( + M doc_matters, + const O obj, + string _txt, + string _xml_type = "seg", + ) { + if (obj.has.inline_links) { + if (obj.metainfo.is_a != "code") { + _txt = replaceAll!(m => + m["linked_text"] ~ "┤" ~ to!string((obj.stow.link[m["num"].to!ulong])).encode ~ "├" + )(_txt, rgx.inline_link_number_only); + } + if ((_txt.match(rgx.mark_internal_site_lnk)) + && (_xml_type == "scroll")) { // conditions reversed to avoid: gdc compiled program run segfault + _txt = _txt.replaceAll( + rgx.inline_seg_link, + "$1"); + } + auto pth_html = spinePathsHTML!()(doc_matters.output_path, doc_matters.src.language); + if (_xml_type == "seg") { + foreach (m; _txt.matchAll(rgx.inline_link_seg_and_hash)) { + if (m.captures["hash"] in doc_matters.has.tag_associations) { + if (m.captures["hash"] == doc_matters.has.tag_associations[(m.captures["hash"])]["seg_lv4"]) { + _txt = _txt.replaceFirst( + rgx.inline_link_seg_and_hash, + "┥$1┝┤" + ~ doc_matters.conf_make_meta.conf.w_srv_data_root_url_html + ~ "/" + ~ pth_html.tail_fn_seg(doc_matters.src.filename, "$2.html") + ~ "├" + ); + } else { + _txt = _txt.replaceFirst( + rgx.inline_link_seg_and_hash, + "┥$1┝┤" + ~ doc_matters.conf_make_meta.conf.w_srv_data_root_url_html + ~ "/" + ~ doc_matters.src.filename_base + ~ "/" + ~ doc_matters.has.tag_associations[(m.captures["hash"])]["seg_lv4"] + ~ ".html" + ~ "#" ~ m.captures["hash"] + ~ "├" + ); + } + } else { + if (doc_matters.opt.action.vox_gt0) { + writeln( + "WARNING on internal document links, anchor to link <<" + ~ m.captures["hash"] + ~ ">> not found in document, " + ~ "anchor: " ~ m.captures["hash"] + ~ " document: " ~ doc_matters.src.filename + ); + } + } + } + } else { + if (auto m = _txt.match(rgx.inline_link_seg_and_hash)) { + _txt = _txt.replaceFirst( + rgx.inline_link_seg_and_hash, + "┥$1┝┤" + ~ doc_matters.conf_make_meta.conf.w_srv_data_root_url_html + ~ "/" + ~ pth_html.tail_fn_scroll(doc_matters.src.filename) + ~ "#" ~ m.captures["hash"] + ~ "├" + ); + } + } + _txt = _txt + .replaceAll( + rgx.inline_link_fn_suffix, + ("$1.html")) + .replaceAll( + rgx.inline_link, + ("<a href=\"$2\">$1</a>")) + .replaceAll( + rgx.mark_internal_site_lnk, + ""); + } + debug(markup_links) { + if (_txt.match(rgx.inline_link)) { + writeln(__LINE__, + " (missed) markup link identified (", + obj.has.inline_links, + "): ", obj.metainfo.is_a, ": ", + obj.text + ); + } + // if (obj.metainfo.is_a == "bookindex") { // DEBUG LINE + // if (_txt.match(regex(r"<a href"))) { + // writeln(__LINE__, " ", + // doc_matters.conf_make_meta.conf.w_srv_data_root_url_html, + // "/", + // doc_matters.src.filename_base, + // "\n", + // _txt + // ); + // } + // } + } + debug(markup) { + if (_txt.match(rgx.inline_link)) { + writeln(__LINE__, + " (missed) markup link identified (", + obj.has.inline_links, + "): ", obj.metainfo.is_a, ": ", + obj.text + ); + } + } + return _txt; + } + string inline_notes_scroll(M,O)( + M doc_matters, + const O obj, + string _txt, + ) { + if (obj.has.inline_notes_reg) { + string[] _endnotes; + foreach(m; _txt.matchAll(rgx.inline_notes_al_regular_number_note)) { + _endnotes ~= "<p class=\"endnote\">" + ~ "<sup>" ~ m.captures["num"] ~ ".</sup>" + ~ m.captures["note"] + ~ "</p>"; + } + _txt = replaceAll!(m => + (" " ~ "<sup>" ~ m["num"] ~ "</sup>")) + (_txt, rgx.inline_notes_al_regular_number_note) + ~ _endnotes.join("\n"); + } + debug(markup_endnotes) { + if (_txt.match(rgx.inline_notes_al_regular_number_note)) { + writeln(__LINE__, " (missed) markup endnote: ", obj.metainfo.is_a, ": ", obj.text); + } + } + debug(markup) { + if (_txt.match(rgx.inline_notes_al_regular_number_note)) { + writeln(__LINE__, " (missed) markup endnote: ", obj.metainfo.is_a, ": ", obj.text); + } + } + return _txt; + } + string xml_type="seg"; /+ set html document type to be linked to here (seg|scroll) +/ + string inline_markup(M,O)( + M doc_matters, + const O obj, + string _txt, + ) { + if (obj.metainfo.is_a == "group") { + _txt = inline_grouped_text_bullets_indents(doc_matters, obj, _txt, xml_type); + } + _txt = inline_images(doc_matters, obj, _txt, xml_type); + _txt = inline_links(doc_matters, obj, _txt, xml_type); + _txt = inline_notes_scroll(doc_matters, obj, _txt); + return _txt; + } + string html_heading(M,O)( + M doc_matters, + const O obj, + ) { + assert(obj.metainfo.is_of_part == "body" || "frontmatter" || "backmatter"); + assert(obj.metainfo.is_of_section == "body" || "toc" || "endnotes" || "glossary" || "bibliography" || "bookindex" || "blurb"); + assert(obj.metainfo.is_of_type == "para"); + assert(obj.metainfo.is_a == "heading"); + string _txt = munge_html(doc_matters, obj); + _txt = inline_markup(doc_matters, obj, _txt); + string o = format(q"┃<p class="%s"><b> + %s + </b></p>┃", + obj.metainfo.is_a, + _txt, + ); + return o; + } + string html_para(M,O)( + M doc_matters, + const O obj, + ) { + assert(obj.metainfo.is_of_part == "body" || "frontmatter" || "backmatter"); + assert(obj.metainfo.is_of_section == "body" || "toc" || "endnotes" || "glossary" || "bibliography" || "bookindex" || "blurb"); + assert(obj.metainfo.is_of_type == "para"); + assert(obj.metainfo.is_a == "para" || "toc" || "endnote" || "glossary" || "bibliography" || "bookindex" || "blurb"); + string _txt = munge_html(doc_matters, obj); + _txt = (obj.attrib.bullet) ? ("●  " ~ _txt) : _txt; + _txt = inline_markup(doc_matters, obj, _txt); + string o = format(q"┃<p class="%s" indent="h%si%s"> + %s + </p>┃", + obj.metainfo.is_a, + obj.attrib.indent_hang, + obj.attrib.indent_base, + _txt + ); + return o; + } + string html_quote(M,O)( + M doc_matters, + const O obj, + ) { + assert(obj.metainfo.is_of_part == "body"); + assert(obj.metainfo.is_of_section == "body" || "glossary" || "bibliography" || "bookindex" || "blurb"); + assert(obj.metainfo.is_of_type == "block"); + assert(obj.metainfo.is_a == "quote"); + string _txt = munge_html(doc_matters, obj); + string o = format(q"┃<p class="%s"> + %s + </p>┃", + obj.metainfo.is_a, + _txt + ); + return o; + } + string html_group(M,O)( + M doc_matters, + const O obj, + ) { + assert(obj.metainfo.is_of_part == "body"); + assert(obj.metainfo.is_of_section == "body" || "glossary" || "bibliography" || "bookindex" || "blurb"); + assert(obj.metainfo.is_of_type == "block"); + assert(obj.metainfo.is_a == "group"); + string _txt = munge_html(doc_matters, obj); + _txt = inline_markup(doc_matters, obj, _txt); + string o = format(q"┃<p class="%s"> + %s + </p>┃", + obj.metainfo.is_a, + _txt + ); + return o; + } + string html_block(M,O)( + M doc_matters, + const O obj, + ) { + assert(obj.metainfo.is_of_part == "body"); + assert(obj.metainfo.is_of_section == "body" || "glossary" || "bibliography" || "bookindex" || "blurb"); + assert(obj.metainfo.is_of_type == "block"); + assert(obj.metainfo.is_a == "block"); + string _txt = munge_html(doc_matters, obj); + _txt = inline_markup(doc_matters, obj, _txt); + string o = format(q"┃ + <p class="%s">%s</p>┃", + obj.metainfo.is_a, + _txt.stripRight + ); + return o; + } + string html_verse(M,O)( + M doc_matters, + const O obj, + ) { + assert(obj.metainfo.is_of_part == "body"); + assert(obj.metainfo.is_of_section == "body" || "glossary" || "bibliography" || "bookindex" || "blurb"); + assert(obj.metainfo.is_of_type == "block"); + assert(obj.metainfo.is_a == "verse"); + string _txt = munge_html(doc_matters, obj); + string o = format(q"┃<p class="%s">%s</p>┃", + obj.metainfo.is_a, + _txt + ); + return o; + } + string html_code(O)( + const O obj, + ) { + assert(obj.metainfo.is_of_part == "body"); + assert(obj.metainfo.is_of_section == "body"); + assert(obj.metainfo.is_of_type == "block"); + assert(obj.metainfo.is_a == "code"); + string _txt = html_special_characters_code(obj.text); + string o = format(q"┃<p class="%s">%s</p>┃", + obj.metainfo.is_a, + _txt + ); + return o; + } + string html_table(M,O)( + M doc_matters, + const O obj, + ) { + assert(obj.metainfo.is_of_part == "body"); + assert(obj.metainfo.is_of_section == "body"); + assert(obj.metainfo.is_of_type == "block"); + assert(obj.metainfo.is_a == "table"); + Tuple!(string, string) _tablarize(O)( + const O obj, + string _txt, + ) { + string[] _table_rows = _txt.split(rgx.table_delimiter_row); + string[] _table_cols; + string _table; + string _tablenote; + foreach(row_idx, row; _table_rows) { + _table_cols = row.split(rgx.table_delimiter_col); + _table ~= "<tr>"; + foreach(col_idx, cell; _table_cols) { + if ((_table_cols.length == 1) + && (_table_rows.length <= row_idx+2)) { // check row_idx+2 (rather than == ++row_idx) + _tablenote ~= cell; + } else { + string _col_is = (row_idx == 0 && obj.table.heading) ? "th" : "td"; + string _align = ("style=\"text-align:" + ~ ((obj.table.column_aligns[col_idx] == "l") + ? "left\"" : "right\"")); + _table ~= "<" + ~ _col_is + ~ " width=\"" + ~ obj.table.column_widths[col_idx].to!string + ~ "%\" " + ~ _align + ~ ">"; + _table ~= cell; + _table ~= "</" + ~ _col_is + ~ ">"; + } + } + _table ~= "</tr>"; + } + Tuple!(string, string) t = tuple( + _table, + _tablenote, + ); + return t; + } + string _txt = munge_html(doc_matters, obj); + Tuple!(string, string) t = _tablarize(obj, _txt); + _txt = t[0]; + string _note = t[1]; + string o = format(q"┃<p class="%s"> + <table summary="normal text css" width="95%%" border="0" cellpadding="2" align="center"> + %s + </table> + %s + </p>┃", + obj.metainfo.is_a, + _txt, + _note + ); + return o; + } + string sqlite_load_string(M,O)( + M doc_matters, + const O obj, + ) { + string o; + return o; + } + string sqlite_statement(O)( + const O obj, + string _txt, + string _html, + ) { + void _sql_exe(O)( + string _sql, + ) { + writeln(_html); + writeln(_sql); + } + string _sql; + return _sql; + } + string[string] heading(M,O)( + M doc_matters, + const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_heading(doc_matters, obj) + ]; + { /+ debug +/ + if (doc_matters.opt.action.debug_do_sqlite) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + } + return obj_txt; + } + string[string] para(M,O)( + M doc_matters, + const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_para(doc_matters, obj) + ]; + { /+ debug +/ + if (doc_matters.opt.action.debug_do_sqlite) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + } + return obj_txt; + } + string[string] quote(M,O)( + M doc_matters, + const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_quote(doc_matters, obj) + ]; + { /+ debug +/ + if (doc_matters.opt.action.debug_do_sqlite) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + } + return obj_txt; + } + string[string] group(M,O)( + M doc_matters, + const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_group(doc_matters, obj) + ]; + { /+ debug +/ + if (doc_matters.opt.action.debug_do_sqlite) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + } + return obj_txt; + } + string[string] block(M,O)( + M doc_matters, + const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_block(doc_matters, obj) + ]; + { /+ debug +/ + if (doc_matters.opt.action.debug_do_sqlite) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + } + return obj_txt; + } + string[string] verse(M,O)( + M doc_matters, + const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_verse(doc_matters, obj) + ]; + { /+ debug +/ + if (doc_matters.opt.action.debug_do_sqlite) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + } + return obj_txt; + } + string[string] code(M,O)( + M doc_matters, + const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_code(obj) + ]; + { /+ debug +/ + if (doc_matters.opt.action.debug_do_sqlite) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + } + return obj_txt; + } + string[string] table(M,O)( + M doc_matters, + const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_table(doc_matters, obj) + ]; + { /+ debug +/ + if (doc_matters.opt.action.debug_do_sqlite) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + } + return obj_txt; + } + } + return sqlite_format_and_load_objects(); + } +} +template SQLiteTablesReCreate() { + string SQLiteTablesReCreate()() { + string _sql_instruct; + _sql_instruct = format(q"┃ + DROP INDEX IF EXISTS idx_ocn; + DROP INDEX IF EXISTS idx_uid; + DROP INDEX IF EXISTS idx_digest_clean; + DROP INDEX IF EXISTS idx_digest_all; + DROP INDEX IF EXISTS idx_clean; + DROP INDEX IF EXISTS idx_title; + DROP INDEX IF EXISTS idx_author; + DROP INDEX IF EXISTS src_filename_base; + DROP INDEX IF EXISTS idx_language_document_char; + DROP INDEX IF EXISTS idx_classify_topic_register; + DROP INDEX IF EXISTS idx_topic_list; + DROP TABLE IF EXISTS metadata_and_text; + DROP TABLE IF EXISTS topic_register; + DROP TABLE IF EXISTS doc_objects; + DROP TABLE IF EXISTS urls; + CREATE TABLE IF NOT EXISTS metadata_and_text ( + uid VARCHAR(256) UNIQUE, -- filename, language char, pod/txt (decide on delimiter [,;:/]) + src_composite_id_per_txt VARCHAR(256) NOT NULL, -- UNIQUE, z pod name if any + src filename + language code + src_composite_id_per_pod VARCHAR(256) NOT NULL, -- z pod name if any + src filename + title VARCHAR(800) NOT NULL, + title_main VARCHAR(400) NOT NULL, + title_sub VARCHAR(400) NULL, + title_short VARCHAR(400) NULL, + title_edition VARCHAR(10) NULL, + title_language VARCHAR(100) NULL, + title_language_char VARCHAR(6) NULL, + creator_author VARCHAR(600) NOT NULL, + creator_author_last_first VARCHAR(600) NOT NULL, + creator_author_email VARCHAR(100) NULL, + creator_author_hon VARCHAR(100) NULL, + creator_author_nationality VARCHAR(100) NULL, + creator_editor VARCHAR(600) NULL, + creator_contributor VARCHAR(600) NULL, + creator_illustrator VARCHAR(600) NULL, + creator_photographer VARCHAR(600) NULL, + creator_translator VARCHAR(600) NULL, + creator_prepared_by VARCHAR(600) NULL, + creator_digitized_by VARCHAR(600) NULL, + creator_audio VARCHAR(600) NULL, + creator_video VARCHAR(600) NULL, + language_document VARCHAR(100) NULL, + language_document_char VARCHAR(6) NOT NULL, + language_original VARCHAR(100) NULL, + language_original_char VARCHAR(6) NULL, + date_added_to_site VARCHAR(10) NULL, + date_available VARCHAR(10) NULL, + date_created VARCHAR(10) NULL, + date_issued VARCHAR(10) NULL, + date_modified VARCHAR(10) NULL, + date_published VARCHAR(10) NULL, + date_valid VARCHAR(10) NULL, + date_translated VARCHAR(10) NULL, + date_original_publication VARCHAR(10) NULL, + date_generated VARCHAR(10) NULL, + original_title VARCHAR(800) NULL, + original_publisher VARCHAR(600) NULL, + original_language VARCHAR(100) NULL, + original_language_char VARCHAR(6) NULL, + original_source VARCHAR(600) NULL, + original_institution VARCHAR(600) NULL, + original_nationality VARCHAR(100) NULL, + rights_copyright VARCHAR(2500) NULL, + rights_copyright_audio VARCHAR(2500) NULL, + rights_copyright_cover VARCHAR(2500) NULL, + rights_copyright_illustrations VARCHAR(2500) NULL, + rights_copyright_photographs VARCHAR(2500) NULL, + rights_copyright_text VARCHAR(2500) NULL, + rights_copyright_translation VARCHAR(2500) NULL, + rights_copyright_video VARCHAR(2500) NULL, + rights_license VARCHAR(2500) NULL, + identifier_oclc VARCHAR(30) NULL, + identifier_isbn VARCHAR(16) NULL, + classify_topic_register VARCHAR(2500) NULL, + classify_subject VARCHAR(600) NULL, + classify_loc VARCHAR(30) NULL, + classify_dewey VARCHAR(30) NULL, + classify_keywords VARCHAR(600) NULL, + notes_abstract TEXT NULL, + notes_description TEXT NULL, + notes_comment TEXT NULL, + notes_coverage VARCHAR(200) NULL, + notes_relation VARCHAR(200) NULL, + notes_history VARCHAR(600) NULL, + notes_type VARCHAR(600) NULL, + notes_format VARCHAR(600) NULL, + notes_prefix TEXT NULL, + notes_prefix_a TEXT NULL, + notes_prefix_b TEXT NULL, + notes_suffix TEXT NULL, + publisher VARCHAR(600) NULL, + src_filename_base VARCHAR(256) NOT NULL, + src_filename_suffix VARCHAR(6) NOT NULL, + src_fingerprint VARCHAR(256) NULL, + src_filesize VARCHAR(10) NULL, + src_wordcount VARCHAR(10) NULL, + pod_name VARCHAR(256) NULL, -- zipped pod, work to be done here + pod_fingerprint VARCHAR(256) NULL, -- zipped pod, work to be done here + pod_size VARCHAR(10) NULL, -- zipped pod, work to be done here + site_url_doc_root VARCHAR(256) NULL, -- url path to doc root + site_url_html_toc VARCHAR(256) NULL, + site_url_html_scroll VARCHAR(256) NULL, + site_url_epub VARCHAR(256) NULL, + links TEXT NULL + ); + CREATE TABLE IF NOT EXISTS topic_register ( + -- tid BIGINT PRIMARY KEY, + uid_metadata_and_text VARCHAR(256) REFERENCES metadata_and_text(uid) ON DELETE CASCADE, + -- src_composite_id_per_txt VARCHAR(256) NOT NULL, - UNIQUE, - z pod name if any + src filename + language code + -- src_composite_id_per_pod VARCHAR(256) NOT NULL, - z pod name if any + src filename + topic_register VARCHAR(250) NOT NULL, + site_url_doc_root VARCHAR(256) NULL, -- url path to doc root + site_url_html_toc VARCHAR(256) NULL, + site_url_html_scroll VARCHAR(256) NULL + ); + CREATE TABLE IF NOT EXISTS doc_objects ( + lid BIGINT PRIMARY KEY, + uid_metadata_and_text VARCHAR(256) REFERENCES metadata_and_text(uid) ON DELETE CASCADE, + ocn SMALLINT, + obj_id VARCHAR(6) NULL, + clean TEXT NULL, + body TEXT NULL, + seg VARCHAR(256) NULL, + lev_an VARCHAR(1), + is_of_type VARCHAR(16), + is_a VARCHAR(16), + lev SMALLINT NULL, + node VARCHAR(16) NULL, + parent VARCHAR(16) NULL, + last_descendant VARCHAR(16) NULL, -- headings only + digest_clean CHAR(256), + digest_all CHAR(256), + seg_name CHAR(256), + types CHAR(1) NULL + ); + CREATE INDEX IF NOT EXISTS idx_ocn ON doc_objects(ocn); + CREATE INDEX IF NOT EXISTS idx_digest_clean ON doc_objects(digest_clean); + CREATE INDEX IF NOT EXISTS idx_digest_all ON doc_objects(digest_all); + CREATE INDEX IF NOT EXISTS idx_clean ON doc_objects(clean); + CREATE INDEX IF NOT EXISTS idx_title ON metadata_and_text(title); + CREATE INDEX IF NOT EXISTS idx_author ON metadata_and_text(creator_author_last_first); + CREATE INDEX IF NOT EXISTS idx_uid ON metadata_and_text(uid); + CREATE INDEX IF NOT EXISTS idx_filename ON metadata_and_text(src_filename_base); + CREATE INDEX IF NOT EXISTS idx_language ON metadata_and_text(language_document_char); + CREATE INDEX IF NOT EXISTS idx_topics ON metadata_and_text(classify_topic_register); + CREATE INDEX IF NOT EXISTS idx_topic_list ON topic_register(topic_register); + ┃",); + return _sql_instruct; + } +} +template SQLiteDeleteDocument() { + string SQLiteDeleteDocument(M)( + M doc_matters, + ) { + string _uid = doc_matters.src.doc_uid; + string _delete_uid = format(q"┃ + DELETE FROM metadata_and_text + WHERE uid = '%s'; + DELETE FROM doc_objects + WHERE uid_metadata_and_text = '%s'; + ┃", + _uid, + _uid, + ); + return _delete_uid; + } +} +template SQLiteInsertMetadata() { + string SQLiteInsertMetadata(M)( + M doc_matters, + ) { + string _uid = SQLinsertDelimiter!()(doc_matters.src.doc_uid); + string _insert_metadata = format(q"┃ + INSERT INTO metadata_and_text ( + uid, + src_filename_base, + src_filename_suffix, + src_composite_id_per_txt, + src_composite_id_per_pod, + title, + title_main, + title_sub, + title_short, + title_edition, + title_language, + creator_author, + creator_author_last_first, + creator_author_email, + creator_illustrator, + creator_translator, + language_document, + language_document_char, + date_added_to_site, + date_available, + date_created, + date_issued, + date_modified, + date_published, + date_valid, + rights_copyright, + rights_copyright_audio, + rights_copyright_cover, + rights_copyright_illustrations, + rights_copyright_photographs, + rights_copyright_text, + rights_copyright_translation, + rights_copyright_video, + rights_license, + identifier_oclc, + identifier_isbn, + classify_dewey, + classify_keywords, + classify_loc, + classify_subject, + classify_topic_register, + original_title, + original_publisher, + original_language, + original_language_char, + original_source, + notes_abstract, + notes_description, + publisher, + site_url_doc_root + ) + VALUES ( + '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s' + ); + ┃", + _uid, + SQLinsertDelimiter!()(doc_matters.src.filename_base), + SQLinsertDelimiter!()(doc_matters.src.filename_extension), + SQLinsertDelimiter!()(doc_matters.src.docname_composite_unique_per_src_doc), + SQLinsertDelimiter!()(doc_matters.src.docname_composite_unique_per_src_pod), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_full), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_main), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_subtitle), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_short), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_edition), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.title_language), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author_surname_fn), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_author_email), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_illustrator), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.creator_translator), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.language_document_char), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_added_to_site), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_available), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_created), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_issued), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_modified), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_published), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.date_valid), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_audio), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_cover), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_illustrations), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_photographs), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_text), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_translation), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_copyright_video), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.rights_license), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_oclc), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.identifier_isbn), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_dewey), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_keywords), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_loc), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_subject), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.classify_topic_register), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_abstract), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.notes_description), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_title), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_publisher), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language_char), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_source), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.publisher), + SQLinsertDelimiter!()(doc_matters.conf_make_meta.conf.w_srv_data_root_url_html) + ); + return _insert_metadata; + } +} +template SQLiteInsertMetadataTopics() { + string SQLiteInsertMetadataTopics(M)( + M doc_matters, + ) { + string _uid = SQLinsertDelimiter!()(doc_matters.src.doc_uid); + string[] _insert_topics; + foreach (topic_line; doc_matters.conf_make_meta.meta.classify_topic_register_expanded_arr) { + // writeln(topic_line); + _insert_topics ~= format(q"┃ + INSERT INTO topic_register ( + uid_metadata_and_text, + topic_register + ) + VALUES ( + '%s', + '%s' + ); + ┃", + _uid, + SQLinsertDelimiter!()(topic_line) + ); + } + return _insert_topics.join.to!(char[]).toUTF8; + } +} +template SQLiteInsertDocObjectsLoop() { + string SQLiteInsertDocObjectsLoop(D,M)( + const D doc_abstraction, + M doc_matters, + ) { + string _uid = SQLinsertDelimiter!()(doc_matters.src.doc_uid); + auto url_html = spineUrlsHTML!()(doc_matters.conf_make_meta.conf.w_srv_data_root_url_html, doc_matters.src.language); + string insertDocObjectsRow(O)(O obj) { + string _insert_doc_objects_row = format(q"┃ + INSERT INTO doc_objects ( + uid_metadata_and_text, + ocn, + obj_id, + clean, + body, + lev, + is_of_type, + is_a, + seg_name + ) + VALUES ( + '%s', %s, '%s', '%s', '%s', %s, '%s', '%s', '%s' + ); + ┃", + _uid, + obj.metainfo.ocn, + obj.metainfo.identifier, + SQLinsertDelimiter!()(obj_txt["text"]), + SQLinsertDelimiter!()(obj_txt["html"]), + obj.metainfo.heading_lev_markup, + obj.metainfo.is_of_type, + obj.metainfo.is_a, + obj.tags.html_segment_anchor_tag_is + ); + return _insert_doc_objects_row; + } + auto format_and_sqlite_load = SQLiteFormatAndLoadObject!()(doc_matters); + string[string] obj_txt; + string doc_text; + string[] _insert_doc_objects; + foreach (part; doc_matters.has.keys_seq.sql) { + foreach (obj; doc_abstraction[part]) { + switch (obj.metainfo.is_of_part) { + case "frontmatter": assert(part == "head", part); + switch (obj.metainfo.is_of_type) { + case "para": + switch (obj.metainfo.is_a) { + case "heading": + obj_txt = format_and_sqlite_load.heading(doc_matters, obj); + break; + default: + { /+ debug +/ + if (doc_matters.opt.action.debug_do_sqlite) { + writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_a); + } + } + break; + } + break; + default: + { /+ debug +/ + if (doc_matters.opt.action.debug_do_sqlite) { + writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_of_type); + } + } + break; + } + break; + case "body": // assert(part == "body", part); + switch (obj.metainfo.is_of_type) { + case "para": + switch (obj.metainfo.is_a) { + case "heading": + debug (asserts) { + if (part != "body") { + writeln(__LINE__, ": ", obj.text); + } + } + obj_txt = format_and_sqlite_load.heading(doc_matters, obj); + break; + case "para": + obj_txt = format_and_sqlite_load.para(doc_matters, obj); + break; + default: + { /+ debug +/ + if (doc_matters.opt.action.debug_do_sqlite) { + writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_a); + } + } + break; + } + break; + case "block": + switch (obj.metainfo.is_a) { + case "quote": + obj_txt = format_and_sqlite_load.quote(doc_matters, obj); + break; + case "group": + obj_txt = format_and_sqlite_load.group(doc_matters, obj); + break; + case "block": + obj_txt = format_and_sqlite_load.block(doc_matters, obj); + break; + case "poem": // double check on keeping both poem & verse + break; + case "verse": + obj_txt = format_and_sqlite_load.verse(doc_matters, obj); + break; + case "code": + obj_txt = format_and_sqlite_load.code(doc_matters, obj); + break; + case "table": + obj_txt = format_and_sqlite_load.table(doc_matters, obj); + break; + default: + { /+ debug +/ + if (doc_matters.opt.action.debug_do_sqlite) { + writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_a); + } + } + break; + } + break; + default: + { /+ debug +/ + if (doc_matters.opt.action.debug_do_sqlite) { + writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_of_type); + } + } + break; + } + break; + case "backmatter": + assert(part == "glossary" || "bibliography" || "bookindex" || "blurb" || "tail", part); + switch (obj.metainfo.is_of_type) { + case "para": + switch (obj.metainfo.is_a) { + case "heading": + obj_txt = format_and_sqlite_load.heading(doc_matters, obj); + break; + case "glossary": assert(part == "glossary", part); + obj_txt = format_and_sqlite_load.para(doc_matters, obj); + break; + case "bibliography": assert(part == "bibliography", part); + obj_txt = format_and_sqlite_load.para(doc_matters, obj); + break; + case "bookindex": assert(part == "bookindex", part); + obj_txt = format_and_sqlite_load.para(doc_matters, obj); + break; + case "blurb": assert(part == "blurb", part); + obj_txt = format_and_sqlite_load.para(doc_matters, obj); + break; + default: + { /+ debug +/ + if (doc_matters.opt.action.debug_do_sqlite) { + writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_a); + } + } + break; + } + break; + default: + { /+ debug +/ + if (doc_matters.opt.action.debug_do_sqlite) { + writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_of_type); + } + } + break; + } + break; + case "comment": + break; + default: + { /+ debug +/ + if (doc_matters.opt.action.debug_do_sqlite) { + writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_of_part); // check where empty value could come from + writeln(__FILE__, ":", __LINE__, ": ", obj.metainfo.is_a); + writeln(__FILE__, ":", __LINE__, ": ", obj.text); // check where empty value could come from + } + } + break; + } + if (obj.metainfo.is_a == "heading") { + if (doc_matters.opt.action.show_sqlite) { + if (obj.metainfo.heading_lev_markup == 0) { + writeln(doc_matters.src.filename); + } + writeln( + "markup: ", obj.metainfo.heading_lev_markup, + "> ", obj.metainfo.dom_structure_markedup_tags_status, + "; collapsed: ", obj.metainfo.heading_lev_collapsed, + "> ", obj.metainfo.dom_structure_collapsed_tags_status, + "; ocn: ", obj.metainfo.ocn, + " node: ", obj.metainfo.node, + "; parent: ", obj.metainfo.parent_lev_markup, + "; ocn: ", obj.metainfo.parent_ocn, + "; ", + ); + } + } + if (!(obj.metainfo.is_a == "comment")) { + _insert_doc_objects ~= insertDocObjectsRow(obj); + } + } // loop closes + } + return _insert_doc_objects.join.to!(char[]).toUTF8; + } +} +template SQLiteTablesCreate() { + void SQLiteTablesCreate(E,O,C)(E env, O opt_action, C config) { + import d2sqlite3; + template SQLiteTablesReCreate() { + string SQLiteTablesReCreate()() { + string _sql_instruct; + _sql_instruct = format(q"┃ + DROP INDEX IF EXISTS idx_ocn; + DROP INDEX IF EXISTS idx_uid; + DROP INDEX IF EXISTS idx_digest_clean; + DROP INDEX IF EXISTS idx_digest_all; + DROP INDEX IF EXISTS idx_clean; + DROP INDEX IF EXISTS idx_title; + DROP INDEX IF EXISTS idx_author; + DROP INDEX IF EXISTS src_filename_base; + DROP INDEX IF EXISTS idx_language_document_char; + DROP INDEX IF EXISTS idx_classify_topic_register; + DROP INDEX IF EXISTS idx_topic_list; + DROP TABLE IF EXISTS metadata_and_text; + DROP TABLE IF EXISTS topic_register; + DROP TABLE IF EXISTS doc_objects; + DROP TABLE IF EXISTS urls; + CREATE TABLE IF NOT EXISTS metadata_and_text ( + uid VARCHAR(256) UNIQUE, -- filename, language char, pod/txt (decide on delimiter [,;:/]) + src_composite_id_per_txt VARCHAR(256) NOT NULL, -- UNIQUE, z pod name if any + src filename + language code + src_composite_id_per_pod VARCHAR(256) NOT NULL, -- z pod name if any + src filename + title VARCHAR(800) NOT NULL, + title_main VARCHAR(400) NOT NULL, + title_sub VARCHAR(400) NULL, + title_short VARCHAR(400) NULL, + title_edition VARCHAR(10) NULL, + title_language VARCHAR(100) NULL, + title_language_char VARCHAR(6) NULL, + creator_author VARCHAR(600) NOT NULL, + creator_author_last_first VARCHAR(600) NOT NULL, + creator_author_email VARCHAR(100) NULL, + creator_author_hon VARCHAR(100) NULL, + creator_author_nationality VARCHAR(100) NULL, + creator_editor VARCHAR(600) NULL, + creator_contributor VARCHAR(600) NULL, + creator_illustrator VARCHAR(600) NULL, + creator_photographer VARCHAR(600) NULL, + creator_translator VARCHAR(600) NULL, + creator_prepared_by VARCHAR(600) NULL, + creator_digitized_by VARCHAR(600) NULL, + creator_audio VARCHAR(600) NULL, + creator_video VARCHAR(600) NULL, + language_document VARCHAR(100) NULL, + language_document_char VARCHAR(6) NOT NULL, + language_original VARCHAR(100) NULL, + language_original_char VARCHAR(6) NULL, + date_added_to_site VARCHAR(10) NULL, + date_available VARCHAR(10) NULL, + date_created VARCHAR(10) NULL, + date_issued VARCHAR(10) NULL, + date_modified VARCHAR(10) NULL, + date_published VARCHAR(10) NULL, + date_valid VARCHAR(10) NULL, + date_translated VARCHAR(10) NULL, + date_original_publication VARCHAR(10) NULL, + date_generated VARCHAR(10) NULL, + original_title VARCHAR(800) NULL, + original_publisher VARCHAR(600) NULL, + original_language VARCHAR(100) NULL, + original_language_char VARCHAR(6) NULL, + original_source VARCHAR(600) NULL, + original_institution VARCHAR(600) NULL, + original_nationality VARCHAR(100) NULL, + rights_copyright VARCHAR(2500) NULL, + rights_copyright_audio VARCHAR(2500) NULL, + rights_copyright_cover VARCHAR(2500) NULL, + rights_copyright_illustrations VARCHAR(2500) NULL, + rights_copyright_photographs VARCHAR(2500) NULL, + rights_copyright_text VARCHAR(2500) NULL, + rights_copyright_translation VARCHAR(2500) NULL, + rights_copyright_video VARCHAR(2500) NULL, + rights_license VARCHAR(2500) NULL, + identifier_oclc VARCHAR(30) NULL, + identifier_isbn VARCHAR(16) NULL, + classify_topic_register VARCHAR(2500) NULL, + classify_subject VARCHAR(600) NULL, + classify_loc VARCHAR(30) NULL, + classify_dewey VARCHAR(30) NULL, + classify_keywords VARCHAR(600) NULL, + notes_abstract TEXT NULL, + notes_description TEXT NULL, + notes_comment TEXT NULL, + notes_coverage VARCHAR(200) NULL, + notes_relation VARCHAR(200) NULL, + notes_history VARCHAR(600) NULL, + notes_type VARCHAR(600) NULL, + notes_format VARCHAR(600) NULL, + notes_prefix TEXT NULL, + notes_prefix_a TEXT NULL, + notes_prefix_b TEXT NULL, + notes_suffix TEXT NULL, + publisher VARCHAR(600) NULL, + src_filename_base VARCHAR(256) NOT NULL, + src_filename_suffix VARCHAR(6) NOT NULL, + src_fingerprint VARCHAR(256) NULL, + src_filesize VARCHAR(10) NULL, + src_wordcount VARCHAR(10) NULL, + pod_name VARCHAR(256) NULL, -- zipped pod, work to be done here + pod_fingerprint VARCHAR(256) NULL, -- zipped pod, work to be done here + pod_size VARCHAR(10) NULL, -- zipped pod, work to be done here + site_url_doc_root VARCHAR(256) NULL, -- url path to doc root + site_url_html_toc VARCHAR(256) NULL, + site_url_html_scroll VARCHAR(256) NULL, + site_url_epub VARCHAR(256) NULL, + links TEXT NULL + ); + CREATE TABLE IF NOT EXISTS topic_register ( + -- tid BIGINT PRIMARY KEY, + uid_metadata_and_text VARCHAR(256) REFERENCES metadata_and_text(uid) ON DELETE CASCADE, + -- src_composite_id_per_txt VARCHAR(256) NOT NULL, - UNIQUE, - z pod name if any + src filename + language code + -- src_composite_id_per_pod VARCHAR(256) NOT NULL, - z pod name if any + src filename + topic_register VARCHAR(250) NOT NULL, + site_url_doc_root VARCHAR(256) NULL, -- url path to doc root + site_url_html_toc VARCHAR(256) NULL, + site_url_html_scroll VARCHAR(256) NULL + ); + CREATE TABLE IF NOT EXISTS doc_objects ( + lid BIGINT PRIMARY KEY, + uid_metadata_and_text VARCHAR(256) REFERENCES metadata_and_text(uid) ON DELETE CASCADE, + ocn SMALLINT, + obj_id VARCHAR(6) NULL, + clean TEXT NULL, + body TEXT NULL, + seg VARCHAR(256) NULL, + lev_an VARCHAR(1), + is_of_type VARCHAR(16), + is_a VARCHAR(16), + lev SMALLINT NULL, + node VARCHAR(16) NULL, + parent VARCHAR(16) NULL, + last_descendant VARCHAR(16) NULL, -- headings only + digest_clean CHAR(256), + digest_all CHAR(256), + seg_name CHAR(256), + types CHAR(1) NULL + ); + CREATE INDEX IF NOT EXISTS idx_ocn ON doc_objects(ocn); + CREATE INDEX IF NOT EXISTS idx_digest_clean ON doc_objects(digest_clean); + CREATE INDEX IF NOT EXISTS idx_digest_all ON doc_objects(digest_all); + CREATE INDEX IF NOT EXISTS idx_clean ON doc_objects(clean); + CREATE INDEX IF NOT EXISTS idx_title ON metadata_and_text(title); + CREATE INDEX IF NOT EXISTS idx_author ON metadata_and_text(creator_author_last_first); + CREATE INDEX IF NOT EXISTS idx_uid ON metadata_and_text(uid); + CREATE INDEX IF NOT EXISTS idx_filename ON metadata_and_text(src_filename_base); + CREATE INDEX IF NOT EXISTS idx_language ON metadata_and_text(language_document_char); + CREATE INDEX IF NOT EXISTS idx_topics ON metadata_and_text(classify_topic_register); + CREATE INDEX IF NOT EXISTS idx_topic_list ON topic_register(topic_register); + ┃",); + return _sql_instruct; + } + } + try { + if (opt_action.sqlite_db_create) { + string _db_statement; + string db_filename = (opt_action.sqliteDB_filename.length > 0) + ? opt_action.sqliteDB_filename + : (config.conf.w_srv_db_sqlite_filename.length > 0) + ? config.conf.w_srv_db_sqlite_filename + : ""; + string db_path = (opt_action.sqliteDB_path.length > 0) + ? opt_action.sqliteDB_path + : (config.conf.w_srv_db_sqlite_path.length > 0) + ? config.conf.w_srv_db_sqlite_path + : ""; + if (db_filename.length > 0 && db_path.length > 0) { + if (opt_action.vox_gt2) { + writeln("db name: ", db_filename); + writeln("db path: ", db_path); + writeln("db name & path: ", db_path, "/", db_filename); + } + if (opt_action.vox_gt1) { + writeln("attempting to create db: ", db_path, "/", db_filename); + } + auto pth_sqlite = spinePathsSQLite!()(db_filename, db_path); + if ((isValidPath(pth_sqlite.base) && exists(pth_sqlite.base) != 0 && pth_sqlite.base.isDir)) { + } else { + try { + pth_sqlite.base.mkdirRecurse; + } catch (FileException ex) { } + } + auto db = Database(pth_sqlite.sqlite_file); + { + _db_statement ~= SQLiteTablesReCreate!()(); + } + SQLiteDbRun!()(db, _db_statement, opt_action, "TABLE RE-CREATE"); + } else { + writeln("must provide db name & output root path either on the command line or in configuration file"); + writeln("db name: ", db_filename); + writeln("db path: ", db_path); + } + } + } + catch (FileException e) { + writeln("Failed (FileException): ", e.msg); + writeln(e.file, " line: ", e.line); + import core.runtime; + core.runtime.Runtime.terminate(); + } + catch (ErrnoException e) { + writeln("Failed (ErrnoException): ", e.msg); + writeln(e.file, " line: ", e.line); + import core.runtime; + core.runtime.Runtime.terminate(); + } + catch (Exception e) { + writeln("Failed (Exception): ", e.msg); + writeln(e.file, " line: ", e.line); + import core.runtime; + core.runtime.Runtime.terminate(); + } + catch (Throwable) { + import core.runtime; + core.runtime.Runtime.terminate(); + } + } +} +template SQLiteDbDrop() { + void SQLiteDbDrop(O,C)(O opt_action, C config) { + writeln("db drop"); + if ((opt_action.sqlite_db_drop)) { + string db_filename = (opt_action.sqliteDB_filename.length > 0) + ? opt_action.sqliteDB_filename + : (config.conf.w_srv_db_sqlite_filename.length > 0) + ? config.conf.w_srv_db_sqlite_filename + : ""; + string db_path = (opt_action.sqliteDB_path.length > 0) // + ? opt_action.sqliteDB_path + : (config.conf.w_srv_db_sqlite_path.length > 0) + ? config.conf.w_srv_db_sqlite_path + : ""; + if (db_filename.length > 0 && db_path.length > 0) { + auto pth_sqlite = spinePathsSQLite!()(db_filename, db_path); + writeln("remove(", pth_sqlite.sqlite_file, ")"); + try { + remove(pth_sqlite.sqlite_file); + } catch (FileException ex) { + // handle error + } + } else { + writeln("must provide db name & output root path either on the command line or in configuration file"); + writeln("db name: ", db_filename); + writeln("db path: ", db_path); + } + } + } +} |