diff options
Diffstat (limited to 'src/sdp/output/sqlite.d')
-rw-r--r-- | src/sdp/output/sqlite.d | 1093 |
1 files changed, 1093 insertions, 0 deletions
diff --git a/src/sdp/output/sqlite.d b/src/sdp/output/sqlite.d new file mode 100644 index 0000000..55d2573 --- /dev/null +++ b/src/sdp/output/sqlite.d @@ -0,0 +1,1093 @@ +module sdp.output.sqlite; +import sdp.output; +import + std.file, + std.conv : to; +import sdp.output.sqlite; +template SQLiteBuildTablesAndPopulate() { + void SQLiteBuildTablesAndPopulate(D,I)( + auto ref const D doc_abstraction, + auto ref I doc_matters, + ) { + import d2sqlite3; + import std.typecons : Nullable; + import sdp.output.sqlite; + mixin SiSUoutputRgxInit; + static auto rgx = Rgx(); + template SQLiteFormatAndLoadObject() { + auto SQLiteFormatAndLoadObject(I)( + auto ref I doc_matters, + ) { + mixin SiSUoutputRgxInit; + struct sqlite_format_and_load_objects { + auto generic_munge_sanitize_text_for_search( + string _txt, + ) { + string _notes; + string _urls; + _txt = _txt.replaceAll(rgx.inline_fontface_clean, ""); + if (_txt.matchFirst(rgx.inline_notes_al_gen)) { + foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { + _notes ~= "\n" ~ m["text"]; + } + _txt = _txt.replaceAll(rgx.inline_notes_al_gen, ""); + } + if (_txt.matchFirst(rgx.inline_link)) { + foreach (m; _txt.matchAll(rgx.inline_link)) { + if (m["link"].match(rgx.url)) { + _urls ~= "\n" ~ m["link"]; + } + } + _txt = _txt.replaceAll(rgx.inline_link_clean, ""); + } + if (_notes.length > 0) { + _txt ~= _notes; + } + if (_urls.length > 0) { + _txt ~= _urls; + } + if (doc_matters.opt_action["debug"]) { + writeln(_txt, "\n"); + } + debug(sql_text_clean) { + writeln(_txt); + } + return _txt; + } + auto munge_html(O)( + auto return ref const O obj, + ) { + string _html_special_characters(string _txt){ + _txt = (_txt) + .replaceAll(rgx.xhtml_ampersand, "&") + .replaceAll(rgx.xhtml_quotation, """) + .replaceAll(rgx.xhtml_less_than, "<") + .replaceAll(rgx.xhtml_greater_than, ">") + .replaceAll(rgx.nbsp_char, " ") + .replaceAll(rgx.xhtml_line_break, "<br />"); + return _txt; + } + string _html_font_face(string _txt){ + _txt = (_txt) + .replaceAll(rgx.inline_emphasis, ("<em>$1</em>")) + .replaceAll(rgx.inline_bold, ("<b>$1</b>")) + .replaceAll(rgx.inline_underscore, ("<u>$1</u>")) + .replaceAll(rgx.inline_italics, ("<i>$1</i>")) + .replaceAll(rgx.inline_superscript, ("<sup>$1</sup>")) + .replaceAll(rgx.inline_subscript, ("<sub>$1</sub>")) + .replaceAll(rgx.inline_strike, ("<del>$1</del>")) + .replaceAll(rgx.inline_insert, ("<ins>$1</ins>")) + .replaceAll(rgx.inline_mono, ("<tt>$1</tt>")) + .replaceAll(rgx.inline_cite, ("<cite>$1</cite>")); + return _txt; + } + string _notes; + string _urls; + string _txt = _html_font_face(_html_special_characters(obj.text)); + if (_txt.matchFirst(rgx.inline_notes_al_gen)) { + foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { + _notes ~= "\n" ~ m["text"]; + } + _txt = _txt.replaceAll(rgx.inline_notes_al_gen_ref, "<sup>$1</sup> "); + } + if (_txt.matchFirst(rgx.inline_link)) { + foreach (m; _txt.matchAll(rgx.inline_link)) { + } + _txt = _txt.replaceAll(rgx.inline_link_clean, ""); + } + if (_notes.length > 0) { + _txt ~= _notes; + } + if (doc_matters.opt_action["debug"]) { + writeln(_txt, "\n"); + } + return _txt; + } + string html_special_characters(string _txt){ + _txt = (_txt) + .replaceAll(rgx.xhtml_ampersand, "&") + .replaceAll(rgx.xhtml_quotation, """) + .replaceAll(rgx.xhtml_less_than, "<") + .replaceAll(rgx.xhtml_greater_than, ">") + .replaceAll(rgx.nbsp_char, " ") + .replaceAll(rgx.xhtml_line_break, "<br />"); + return _txt; + } + string html_special_characters_code(string _txt){ + _txt = (_txt) + .replaceAll(rgx.xhtml_ampersand, "&") + .replaceAll(rgx.xhtml_quotation, """) + .replaceAll(rgx.xhtml_less_than, "<") + .replaceAll(rgx.xhtml_greater_than, ">") + .replaceAll(rgx.nbsp_char, " "); + return _txt; + } + string html_font_face(string _txt){ + _txt = (_txt) + .replaceAll(rgx.inline_emphasis, ("<em>$1</em>")) + .replaceAll(rgx.inline_bold, ("<b>$1</b>")) + .replaceAll(rgx.inline_underscore, ("<u>$1</u>")) + .replaceAll(rgx.inline_italics, ("<i>$1</i>")) + .replaceAll(rgx.inline_superscript, ("<sup>$1</sup>")) + .replaceAll(rgx.inline_subscript, ("<sub>$1</sub>")) + .replaceAll(rgx.inline_strike, ("<del>$1</del>")) + .replaceAll(rgx.inline_insert, ("<ins>$1</ins>")) + .replaceAll(rgx.inline_mono, ("<tt>$1</tt>")) + .replaceAll(rgx.inline_cite, ("<cite>$1</cite>")); + return _txt; + } + auto html_heading(O)( + auto return ref const O obj, + ) { + string _txt = munge_html(obj); + string o = format(q"¶<p class="%s"><b> + %s + </b></p>¶", + obj.is_a, + _txt, + ); + return o; + } + auto html_para(O)( + auto return ref const O obj, + ) { + string _txt = munge_html(obj); + _txt = (obj.bullet) ? ("●  " ~ _txt) : _txt; + string o = format(q"¶<p class="%s" indent="h%si%s"> + %s + </p>¶", + obj.is_a, + obj.indent_hang, + obj.indent_base, + _txt + ); + return o; + } + auto html_quote(O)( + auto return ref const O obj, + ) { + string _txt = munge_html(obj); + string o = format(q"¶<p class="%s"> + %s + </p>¶", + obj.is_a, + _txt + ); + return o; + } + auto html_group(O)( + auto return ref const O obj, + ) { + string _txt = munge_html(obj); + string o = format(q"¶<p class="%s"> + %s + </p>¶", + obj.is_a, + _txt + ); + return o; + } + auto html_block(O)( + auto return ref const O obj, + ) { + string _txt = munge_html(obj); + string o = format(q"¶ + <p class="%s">%s</p>¶", + obj.is_a, + _txt.stripRight + ); + return o; + } + auto html_verse(O)( + auto return ref const O obj, + ) { + string _txt = munge_html(obj); + string o = format(q"¶<p class="%s">%s</p>¶", + obj.is_a, + _txt + ); + return o; + } + auto html_code(O)( + auto return ref const O obj, + ) { + string _txt = html_special_characters_code(obj.text); + string o = format(q"¶<p class="%s">%s</p>¶", + obj.is_a, + _txt + ); + return o; + } + auto html_table(O)( + auto return ref const O obj, + ) { + auto _tablarize(O)( + auto return ref const O obj, + string _txt, + ) { + string[] _table_rows = (_txt).split(rgx.table_delimiter_row); + string[] _table_cols; + string _table; + string _tablenote; + foreach(row_idx, row; _table_rows) { + _table_cols = row.split(rgx.table_delimiter_col); + _table ~= "<tr>"; + foreach(col_idx, cell; _table_cols) { + if ((_table_cols.length == 1) + && (_table_rows.length <= row_idx+2)) { // check row_idx+2 (rather than == ++row_idx) + _tablenote ~= cell; + } else { + string _col_is = (row_idx == 0 && obj.table_heading) ? "th" : "td"; + string _align = ("style=\"text-align:" + ~ ((obj.table_column_aligns[col_idx] == "l") + ? "left\"" : "right\"")); + _table ~= "<" ~ _col_is ~ " width=\"" ~ obj.table_column_widths[col_idx].to!string ~ "%\" " ~ _align ~ ">"; + _table ~= cell; + _table ~= "</" ~ _col_is ~ ">"; + } + } + _table ~= "</tr>"; + } + auto t = tuple( + _table, + _tablenote, + ); + return t; + } + string _txt = munge_html(obj); + auto t = _tablarize(obj, _txt); + _txt = t[0]; + string _note = t[1]; + string o = format(q"¶<p class="%s"> + <table summary="normal text css" width="95%%" border="0" bgcolor="white" cellpadding="2" align="center"> + %s + </table> + %s + </p>¶", + obj.is_a, + _txt, + _note + ); + return o; + } + auto sqlite_load_string(O,Dm)( + auto return ref const O obj, + auto return ref Dm doc_matters, + ) { + string o; + return o; + } + auto postgresql_load_string(O,Dm)( + auto return ref const O obj, + auto return ref Dm doc_matters, + ) { + string o; + return o; + } + string sqlite_statement(O)( + auto return ref const O obj, + string _txt, + string _html, + ) { + void _sql_exe(O)( + string _sql, + ) { + writeln(_html); + writeln(_sql); + } + string _sql; + return _sql; + } + auto heading(O)( + auto return ref const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_heading(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; + } + auto para(O)( + auto return ref const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_para(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; + } + auto quote(O)( + auto return ref const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_quote(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; + } + auto group(O)( + auto return ref const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_group(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; + } + auto block(O)( + auto return ref const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_block(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; + } + auto verse(O)( + auto return ref const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_verse(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; + } + auto code(O)( + auto return ref const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_code(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; + } + auto table(O)( + auto return ref const O obj, + ) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_table(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; + } + } + return sqlite_format_and_load_objects(); + } + } + template SQLiteInstruct() { + Statement SQLiteInstruct(I)( + auto ref I doc_matters, + ) { + auto pth_sqlite = SiSUpathsSQLite!()(doc_matters.src_path_info, doc_matters.language); + auto db = Database(pth_sqlite.sqlite_file(doc_matters.source_filename)); + // auto db = Database(":memory:"); // open database in memory + db.run(" + DROP TABLE IF EXISTS metadata_and_text; + DROP TABLE IF EXISTS doc_objects; + DROP TABLE IF EXISTS urls; + CREATE TABLE metadata_and_text ( + tid BIGINT PRIMARY KEY, + title VARCHAR(800) NOT NULL, + title_main VARCHAR(400) NULL, /*NOT*/ + title_sub VARCHAR(400) NULL, + title_short VARCHAR(400) NULL, + title_edition VARCHAR(10) NULL, + title_note VARCHAR(2500) NULL, + title_language VARCHAR(100) NULL, + title_language_char VARCHAR(6) NULL, + creator_author VARCHAR(600) NULL, + creator_author_hon VARCHAR(100) NULL, + creator_author_nationality VARCHAR(100) NULL, + creator_editor VARCHAR(600) NULL, + creator_contributor VARCHAR(600) NULL, + creator_illustrator VARCHAR(600) NULL, + creator_photographer VARCHAR(600) NULL, + creator_translator VARCHAR(600) NULL, + creator_prepared_by VARCHAR(600) NULL, + creator_digitized_by VARCHAR(600) NULL, + creator_audio VARCHAR(600) NULL, + creator_video VARCHAR(600) NULL, + language_document VARCHAR(100) NULL, + language_document_char VARCHAR(6) NULL, /*NOT*/ + language_original VARCHAR(100) NULL, + language_original_char VARCHAR(6) NULL, + date_added_to_site VARCHAR(10) NULL, + date_available VARCHAR(10) NULL, + date_created VARCHAR(10) NULL, + date_issued VARCHAR(10) NULL, + date_modified VARCHAR(10) NULL, + date_published VARCHAR(10) NULL, + date_valid VARCHAR(10) NULL, + date_translated VARCHAR(10) NULL, + date_original_publication VARCHAR(10) NULL, + date_generated VARCHAR(10) NULL, + publisher VARCHAR(600) NULL, + original_publisher VARCHAR(600) NULL, + original_language VARCHAR(100) NULL, + original_language_char VARCHAR(6) NULL, + original_source VARCHAR(600) NULL, + original_institution VARCHAR(600) NULL, + original_nationality VARCHAR(100) NULL, + rights VARCHAR(2500) NULL, + rights_copyright_text VARCHAR(2500) NULL, + rights_copyright_translation VARCHAR(2500) NULL, + rights_copyright_illustrations VARCHAR(2500) NULL, + rights_copyright_photographs VARCHAR(2500) NULL, + rights_copyright_preparation VARCHAR(2500) NULL, + rights_copyright_digitization VARCHAR(2500) NULL, + rights_copyright_audio VARCHAR(2500) NULL, + rights_copyright_video VARCHAR(2500) NULL, + rights_license VARCHAR(2500) NULL, + identifier_oclc VARCHAR(30) NULL, + identifier_isbn VARCHAR(16) NULL, + classify_topic_register VARCHAR(2500) NULL, + classify_subject VARCHAR(600) NULL, + classify_loc VARCHAR(30) NULL, + classify_dewey VARCHAR(30) NULL, + classify_keywords VARCHAR(600) NULL, + notes_abstract TEXT NULL, + notes_description TEXT NULL, + notes_comment TEXT NULL, + notes_coverage VARCHAR(200) NULL, + notes_relation VARCHAR(200) NULL, + notes_history VARCHAR(600) NULL, + notes_type VARCHAR(600) NULL, + notes_format VARCHAR(600) NULL, + notes_prefix TEXT NULL, + notes_prefix_a TEXT NULL, + notes_prefix_b TEXT NULL, + notes_suffix TEXT NULL, + src_filename VARCHAR(256) NULL, + src_fingerprint VARCHAR(256) NULL, + src_filesize VARCHAR(10) NULL, + src_wordcount VARCHAR(10) NULL, + src_text TEXT NULL, + fulltext TEXT NULL, + links TEXT NULL + ); + + CREATE TABLE doc_objects ( + lid BIGINT PRIMARY KEY, + metadata_tid BIGINT REFERENCES metadata_and_text, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + clean TEXT NULL, + body TEXT NULL, + book_idx TEXT NULL, + seg VARCHAR(256) NULL, + lev_an VARCHAR(1), + lev SMALLINT NULL, + lev0 SMALLINT, + lev1 SMALLINT, + lev2 SMALLINT, + lev3 SMALLINT, + lev4 SMALLINT, + lev5 SMALLINT, + lev6 SMALLINT, + lev7 SMALLINT, + en_a SMALLINT NULL, + en_z SMALLINT NULL, + en_a_asterisk SMALLINT NULL, + en_z_asterisk SMALLINT NULL, + en_a_plus SMALLINT NULL, + en_z_plus SMALLINT NULL, + t_of VARCHAR(16), + t_is VARCHAR(16), + node VARCHAR(16) NULL, + parent VARCHAR(16) NULL, + digest_clean CHAR(256), + digest_all CHAR(256), + types CHAR(1) NULL + ) + "); + Statement insert_metadata = db.prepare(" + INSERT INTO metadata_and_text ( + tid, + title, + title_main, + title_sub, + title_short, + title_edition, + title_note, + title_language, + title_language_char, + creator_author, + creator_author_hon, + creator_author_nationality, + creator_editor, + creator_contributor, + creator_illustrator, + creator_photographer, + creator_translator, + creator_prepared_by, + creator_digitized_by, + creator_audio, + creator_video, + language_document, + language_document_char, + language_original, + language_original_char, + date_added_to_site, + date_available, + date_created, + date_issued, + date_modified, + date_published, + date_valid, + date_translated, + date_original_publication, + date_generated, + publisher, + original_publisher, + original_language, + original_language_char, + original_source, + original_institution, + original_nationality, + rights, + rights_copyright_text, + rights_copyright_translation, + rights_copyright_illustrations, + rights_copyright_photographs, + rights_copyright_preparation, + rights_copyright_digitization, + rights_copyright_audio, + rights_copyright_video, + rights_license, + identifier_oclc, + identifier_isbn, + classify_topic_register, + classify_subject, + classify_loc, + classify_dewey, + classify_keywords, + notes_abstract, + notes_description, + notes_comment, + notes_coverage, + notes_relation, + notes_history, + notes_type, + notes_format, + notes_prefix, + notes_prefix_a, + notes_prefix_b, + notes_suffix, + src_filename, + src_fingerprint, + src_filesize, + src_wordcount, + src_text, + fulltext, + links + ) + VALUES ( + :tid, + :title, + :title_main, + :title_sub, + :title_short, + :title_edition, + :title_note, + :title_language, + :title_language_char, + :creator_author, + :creator_author_hon, + :creator_author_nationality, + :creator_editor, + :creator_contributor, + :creator_illustrator, + :creator_photographer, + :creator_translator, + :creator_prepared_by, + :creator_digitized_by, + :creator_audio, + :creator_video, + :language_document, + :language_document_char, + :language_original, + :language_original_char, + :date_added_to_site, + :date_available, + :date_created, + :date_issued, + :date_modified, + :date_published, + :date_valid, + :date_translated, + :date_original_publication, + :date_generated, + :publisher, + :original_publisher, + :original_language, + :original_language_char, + :original_source, + :original_institution, + :original_nationality, + :rights, + :rights_copyright_text, + :rights_copyright_translation, + :rights_copyright_illustrations, + :rights_copyright_photographs, + :rights_copyright_preparation, + :rights_copyright_digitization, + :rights_copyright_audio, + :rights_copyright_video, + :rights_license, + :identifier_oclc, + :identifier_isbn, + :classify_topic_register, + :classify_subject, + :classify_loc, + :classify_dewey, + :classify_keywords, + :notes_abstract, + :notes_description, + :notes_comment, + :notes_coverage, + :notes_relation, + :notes_history, + :notes_type, + :notes_format, + :notes_prefix, + :notes_prefix_a, + :notes_prefix_b, + :notes_suffix, + :src_filename, + :src_fingerprint, + :src_filesize, + :src_wordcount, + :src_text, + :fulltext, + :links + ) + "); + insert_metadata.bind(":title", doc_matters.dochead_meta["title"]["full"]); + // insert_metadata.bind(":title_main", doc_matters.dochead_meta["title"]["full"]); + // insert_metadata.bind(":title_sub", doc_matters.dochead_meta["title"][""]); + // insert_metadata.bind(":title_short", doc_matters.dochead_meta["title"][""]); + // insert_metadata.bind(":title_edition", doc_matters.dochead_meta["title"][""]); + // insert_metadata.bind(":title_note", doc_matters.dochead_meta["title"][""]); + // insert_metadata.bind(":title_language", doc_matters.dochead_meta["title"][""]); + // insert_metadata.bind(":title_language_char", doc_matters.dochead_meta["title"][""]); + insert_metadata.bind(":creator_author", doc_matters.dochead_meta["creator"]["author"]); + // insert_metadata.bind(":creator_author_hon", doc_matters.dochead_meta["creator"][""]); + // insert_metadata.bind(":creator_author_nationality", doc_matters.dochead_meta["creator"][""]); + // insert_metadata.bind(":creator_editor", doc_matters.dochead_meta["creator"][""]); + // insert_metadata.bind(":creator_contributor", doc_matters.dochead_meta["creator"][""]); + // insert_metadata.bind(":creator_illustrator", doc_matters.dochead_meta["creator"][""]); + // insert_metadata.bind(":creator_photographer", doc_matters.dochead_meta["creator"][""]); + // insert_metadata.bind(":creator_translator", doc_matters.dochead_meta["creator"][""]); + // insert_metadata.bind(":creator_prepared_by", doc_matters.dochead_meta["creator"][""]); + // insert_metadata.bind(":creator_digitized_by", doc_matters.dochead_meta["creator"][""]); + // insert_metadata.bind(":creator_audio", doc_matters.dochead_meta["creator"][""]); + // insert_metadata.bind(":creator_video", doc_matters.dochead_meta["creator"][""]); + // insert_metadata.bind(":language_document", doc_matters.dochead_meta["language"][""]); + // insert_metadata.bind(":language_document_char", doc_matters.dochead_meta["language"][""]); + // insert_metadata.bind(":language_original", doc_matters.dochead_meta["language"][""]); + // insert_metadata.bind(":language_original_char", doc_matters.dochead_meta["language"][""]); + // insert_metadata.bind(":date_added_to_site", doc_matters.dochead_meta["date"][""]); + // insert_metadata.bind(":date_available", doc_matters.dochead_meta["date"][""]); + // insert_metadata.bind(":date_created", doc_matters.dochead_meta["date"][""]); + // insert_metadata.bind(":date_issued", doc_matters.dochead_meta["date"][""]); + // insert_metadata.bind(":date_modified", doc_matters.dochead_meta["date"][""]); + // insert_metadata.bind(":date_published", doc_matters.dochead_meta["date"][""]); + // insert_metadata.bind(":date_valid", doc_matters.dochead_meta["date"][""]); + // insert_metadata.bind(":date_translated", doc_matters.dochead_meta["date"][""]); + // insert_metadata.bind(":date_original_publication", doc_matters.dochead_meta["date"][""]); + // insert_metadata.bind(":date_generated", doc_matters.dochead_meta["date"][""]); + // insert_metadata.bind(":publisher", doc_matters.dochead_meta["publisher"][""]); + // insert_metadata.bind(":original_publisher", doc_matters.dochead_meta["original"][""]); + // insert_metadata.bind(":original_language", doc_matters.dochead_meta["original"][""]); + // insert_metadata.bind(":original_language_char", doc_matters.dochead_meta["original"][""]); + // insert_metadata.bind(":original_source", doc_matters.dochead_meta["original"][""]); + // insert_metadata.bind(":original_institution", doc_matters.dochead_meta["original"][""]); + // insert_metadata.bind(":original_nationality", doc_matters.dochead_meta["original"][""]); + // insert_metadata.bind(":rights", doc_matters.dochead_meta["rights"][""]); + // insert_metadata.bind(":rights_copyright_text", doc_matters.dochead_meta["rights"][""]); + // insert_metadata.bind(":rights_copyright_translation", doc_matters.dochead_meta["rights"][""]); + // insert_metadata.bind(":rights_copyright_illustrations", doc_matters.dochead_meta["rights"][""]); + // insert_metadata.bind(":rights_copyright_photographs", doc_matters.dochead_meta["rights"][""]); + // insert_metadata.bind(":rights_copyright_preparation", doc_matters.dochead_meta["rights"][""]); + // insert_metadata.bind(":rights_copyright_digitization", doc_matters.dochead_meta["rights"][""]); + // insert_metadata.bind(":rights_copyright_audio", doc_matters.dochead_meta["rights"][""]); + // insert_metadata.bind(":rights_copyright_video", doc_matters.dochead_meta["rights"][""]); + // insert_metadata.bind(":rights_license", doc_matters.dochead_meta["rights"][""]); + // insert_metadata.bind(":identifier_oclc", doc_matters.dochead_meta["identifier"][""]); + // insert_metadata.bind(":identifier_isbn", doc_matters.dochead_meta["identifier"][""]); + // insert_metadata.bind(":classify_topic_register", doc_matters.dochead_meta["classify"][""]); + // insert_metadata.bind(":classify_subject", doc_matters.dochead_meta["classify"][""]); + // insert_metadata.bind(":classify_loc", doc_matters.dochead_meta["classify"][""]); + // insert_metadata.bind(":classify_loc", doc_matters.dochead_meta["classify"][""]); + // insert_metadata.bind(":notes_abstract", doc_matters.dochead_meta["notes"][""]); + // insert_metadata.bind(":notes_description", doc_matters.dochead_meta["notes"][""]); + // insert_metadata.bind(":notes_comment", doc_matters.dochead_meta["notes"][""]); + // insert_metadata.bind(":notes_coverage", doc_matters.dochead_meta["notes"][""]); + // insert_metadata.bind(":notes_relation", doc_matters.dochead_meta["notes"][""]); + // insert_metadata.bind(":notes_history", doc_matters.dochead_meta["notes"][""]); + // insert_metadata.bind(":notes_type", doc_matters.dochead_meta["notes"][""]); + // insert_metadata.bind(":notes_format", doc_matters.dochead_meta["notes"][""]); + // insert_metadata.bind(":notes_prefix", doc_matters.dochead_meta["notes"][""]); + // insert_metadata.bind(":notes_prefix_a", doc_matters.dochead_meta["notes"][""]); + // insert_metadata.bind(":notes_prefix_b", doc_matters.dochead_meta["notes"][""]); + // insert_metadata.bind(":notes_suffix", doc_matters.dochead_meta["notes"][""]); + // insert_metadata.bind(":src_filename", doc_matters.dochead_meta["src"][""]); + // insert_metadata.bind(":src_fingerprint", doc_matters.dochead_meta["src"][""]); + // insert_metadata.bind(":src_filesize", doc_matters.dochead_meta["src"][""]); + // insert_metadata.bind(":src_wordcount", doc_matters.dochead_meta["src"][""]); + // insert_metadata.bind(":src_text", doc_matters.dochead_meta["src"][""]); + // insert_metadata.bind(":fulltext", doc_matters.dochead_meta["fulltext"][""]); + // insert_metadata.bind(":links", doc_matters.dochead_meta["links"][""]); + insert_metadata.execute(); insert_metadata.reset(); + /+ watch +/ + writeln("sql statement executed"); + assert(db.totalChanges == 1); + + Statement insert_doc_objects = db.prepare(" + INSERT INTO doc_objects ( + lid, + metadata_tid, + ocn, + ocnd, + ocns, + clean, + body, + book_idx, + seg, + lev_an, + lev, + lev0, + lev1, + lev2, + lev3, + lev4, + lev5, + lev6, + lev7, + en_a, + en_z, + en_a_asterisk, + en_z_asterisk, + en_a_plus, + en_z_plus, + t_of, + t_is, + node, + parent, + digest_clean, + digest_all, + types + ) + VALUES ( + :lid, + :metadata_tid, + :ocn, + :ocnd, + :ocns, + :clean, + :body, + :book_idx, + :seg, + :lev_an, + :lev, + :lev0, + :lev1, + :lev2, + :lev3, + :lev4, + :lev5, + :lev6, + :lev7, + :en_a, + :en_z, + :en_a_asterisk, + :en_z_asterisk, + :en_a_plus, + :en_z_plus, + :t_of, + :t_is, + :node, + :parent, + :digest_clean, + :digest_all, + :types + ) + "); + return insert_doc_objects; + } + } + template SQLiteObjectsLoop() { + void SQLiteObjectsLoop(P)( + auto ref P doc_parts, + ) { + Statement insert_doc_objects = SQLiteInstruct!()(doc_matters); + auto format_and_sqlite_load = SQLiteFormatAndLoadObject!()(doc_matters); + string[string] obj_txt; + string doc_text; + foreach (part; doc_parts) { + foreach (obj; doc_abstraction[part]) { + switch (obj.of_part) { + case "frontmatter": assert(part == "head"); + switch (obj.is_of) { + case "para": + switch (obj.is_a) { + case "heading": + obj_txt = format_and_sqlite_load.heading(obj); + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + } + break; + } + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_of); + } + break; + } + break; + case "body": // assert(part == "body"); // TODO broken + switch (obj.is_of) { + case "para": + switch (obj.is_a) { + case "heading": + debug (asserts) { // TODO consider and fix or remove + if (part != "body") { + writeln(__LINE__, ": ", obj.text); + } + } + obj_txt = format_and_sqlite_load.heading(obj); + break; + case "para": + obj_txt = format_and_sqlite_load.para(obj); + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + } + break; + } + break; + case "block": + switch (obj.is_a) { + case "quote": + obj_txt = format_and_sqlite_load.quote(obj); + break; + case "group": + obj_txt = format_and_sqlite_load.group(obj); + break; + case "block": + obj_txt = format_and_sqlite_load.block(obj); + break; + case "poem": // double check on keeping both poem & verse + break; + case "verse": + obj_txt = format_and_sqlite_load.verse(obj); + break; + case "code": + obj_txt = format_and_sqlite_load.code(obj); + break; + case "table": + obj_txt = format_and_sqlite_load.table(obj); + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + } + break; + } + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_of); + } + break; + } + break; + case "backmatter": + assert(part == "endnotes" || "glossary" || "bibliography" || "bookindex_scroll" || "blurb" || "tail"); + switch (obj.is_of) { + case "para": + switch (obj.is_a) { + case "heading": + obj_txt = format_and_sqlite_load.heading(obj); + break; + case "endnote": assert(part == "endnotes"); + obj_txt = format_and_sqlite_load.para(obj); + break; + case "glossary": assert(part == "glossary"); + obj_txt = format_and_sqlite_load.para(obj); + break; + case "bibliography": assert(part == "bibliography"); + obj_txt = format_and_sqlite_load.para(obj); + break; + case "bookindex": assert(part == "bookindex_scroll"); + obj_txt = format_and_sqlite_load.para(obj); + break; + case "blurb": assert(part == "blurb"); + obj_txt = format_and_sqlite_load.para(obj); + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + } + break; + } + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_of); + } + break; + } + break; + case "comment": + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.of_part); // check where empty value could come from + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + writeln(__FILE__, ":", __LINE__, ": ", obj.text); // check where empty value could come from + } + break; + } + if (obj.is_a == "heading") { + writeln( + "markup: ", obj.heading_lev_markup, + "> ", obj.dom_markedup, + "; collapsed: ", obj.heading_lev_collapsed, + "> ", obj.dom_collapsed, + "; ocn: ", obj.ocn, + " node: ", obj.node, + "; parent: ", obj.parent_lev_markup, + "; ocn: ", obj.parent_ocn, + "; ", + ); + } + insert_doc_objects.bind(":t_of", obj.is_of); + insert_doc_objects.bind(":t_is", obj.is_a); + insert_doc_objects.bind(":ocn", obj.ocn); + insert_doc_objects.bind(":clean", obj_txt["text"]); // consider whether book index info should be made available within clear text for search + insert_doc_objects.bind(":body", obj_txt["html"]); + // insert_doc_objects.bind(":book_idx", ""); // not needed, but, consider whether should be made available within object for clear text search + insert_doc_objects.bind(":lev", obj.heading_lev_markup); + // // insert_doc_objects.bind(":dom_markedup", ""); // should make lev sequence below obsolete + // // insert_doc_objects.bind(":dom_collapsed", ""); // should add info + // insert_doc_objects.bind(":lev0", ""); + // insert_doc_objects.bind(":lev1", ""); + // insert_doc_objects.bind(":lev2", ""); + // insert_doc_objects.bind(":lev3", ""); + // insert_doc_objects.bind(":lev4", ""); + // insert_doc_objects.bind(":lev5", ""); + // insert_doc_objects.bind(":lev6", ""); + // insert_doc_objects.bind(":lev7", ""); + // insert_doc_objects.bind(":node", ""); + // insert_doc_objects.bind(":type", ""); + // insert_doc_objects.bind(":parent_ocn", ""); + // insert_doc_objects.bind(":ancestors", ""); + // insert_doc_objects.bind(":heading_lev_markup", ""); + // insert_doc_objects.bind(":heading_lev_collapsed", ""); + // insert_doc_objects.bind(":parent_lev_markup", ""); + // insert_doc_objects.bind(":heading_ancestors", ""); + // insert_doc_objects.bind(":node", ""); + insert_doc_objects.execute(); insert_doc_objects.reset(); + } + } + } + } + SQLiteObjectsLoop!()(doc_matters.keys_seq.sql); + } +} +template SQLiteTablesCreate() { + void SQLiteTablesCreate()() { + writeln("table create"); + } +} +template SQLiteTablesDrop() { + void SQLiteTablesDrop()() { + writeln("table drop"); + } +} |