From f3806668388a3d955261d9782c2cb5659b0f15e3 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 9 Aug 2017 06:09:18 -0400 Subject: sqlite, introduced, far to go --- org/default_paths.org | 33 ++ org/output_hub.org | 7 +- org/output_sqlite.org | 1445 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1482 insertions(+), 3 deletions(-) create mode 100644 org/output_sqlite.org (limited to 'org') diff --git a/org/default_paths.org b/org/default_paths.org index ddf9bfe..7dfef88 100644 --- a/org/default_paths.org +++ b/org/default_paths.org @@ -100,6 +100,7 @@ import sdp.meta.rgx; <> <> <> +<> #+END_SRC ** _sisupod_ (output bundled source) :sisupod: @@ -442,3 +443,35 @@ template SiSUpathsEPUB() { } } #+END_SRC + +** _sqlite_ :sqlite: + +#+name: template_paths_sqlite +#+BEGIN_SRC d +template SiSUpathsSQLite() { + mixin SiSUrgxInit; + static auto rgx = Rgx(); + auto SiSUpathsSQLite(Ps,Lng)( + Ps src_pth_info, + Lng lng, + ) { + auto out_pth = SiSUoutPaths!()(src_pth_info, lng); + string base_dir = "sqlite"; + struct _PathsStruct { + string base_filename(string fn_src) { + return fn_src.baseName.stripExtension; + } + string base() { + return (out_pth.output_base).chainPath(base_dir).array; + } + string seg(string fn_src) { + return base.chainPath(base_filename(fn_src)).array; + } + string sqlite_file(string fn_src) { + return base.chainPath(base_filename(fn_src) ~ ".sqlite").array; + } + } + return _PathsStruct(); + } +} +#+END_SRC diff --git a/org/output_hub.org b/org/output_hub.org index 8f06194..037a169 100644 --- a/org/output_hub.org +++ b/org/output_hub.org @@ -49,6 +49,7 @@ template outputHubOp() { import sdp.output, sdp.output.epub3, sdp.output.html, + sdp.output.sqlite, sdp.output.xmls, sdp.output.source_sisupod, sdp.output.create_zip_file, @@ -166,7 +167,7 @@ if (doc_matters.opt_action["odt"]) { #+BEGIN_SRC d if (doc_matters.opt_action["sqlite"]) { if ((doc_matters.opt_action["verbose"])) { writeln("sqlite processing... "); } - // SQLtableLoad!()(doc_abstraction, doc_matters); + SQLiteBuildTablesAndPopulate!()(doc_abstraction, doc_matters); } #+END_SRC @@ -176,11 +177,11 @@ if (doc_matters.opt_action["sqlite"]) { #+BEGIN_SRC d if ((config["sqlite-create"])) { if ((config["verbose"])) { writeln("sqlite create table... "); } - // SQLtableCreate!()(); + SQLiteTablesCreate!()(); } if ((config["sqlite-drop"])) { if ((config["verbose"])) { writeln("sqlite drop table... "); } - // SQLtableDrop!()(); + SQLiteTablesDrop!()(); } #+END_SRC diff --git a/org/output_sqlite.org b/org/output_sqlite.org new file mode 100644 index 0000000..d3f6745 --- /dev/null +++ b/org/output_sqlite.org @@ -0,0 +1,1445 @@ +#+TITLE: sdp output html +#+AUTHOR: Ralph Amissah +#+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] +#+DESCRIPTION for documents - structuring, publishing in multiple formats and search +#+KEYWORDS +#+LANGUAGE: en +#+STARTUP: indent content +#+OPTIONS: H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t +#+OPTIONS: TeX:t LaTeX:t skip:nil d:nil todo:t pri:nil tags:not-in-toc +#+OPTIONS: author:nil email:nil creator:nil timestamp:nil +#+PROPERTY: header-args :padline no :exports code :noweb yes +#+EXPORT_SELECT_TAGS: export +#+EXPORT_EXCLUDE_TAGS: noexport +#+FILETAGS: :sdp:niu:output: +#+TAGS: assert(a) class(c) debug(d) mixin(m) sdp(s) tangle(T) template(t) WEB(W) noexport(n) + +* sql +** 0. module, templates + +#+BEGIN_SRC d :tangle ../src/sdp/output/sqlite.d +module sdp.output.sqlite; +<> +template SQLiteBuildTablesAndPopulate() { + void SQLiteBuildTablesAndPopulate(D,I)( + auto ref const D doc_abstraction, + auto ref I doc_matters, + ) { + import d2sqlite3; + import std.typecons : Nullable; + import sdp.output.sqlite; + mixin SiSUoutputRgxInit; + static auto rgx = Rgx(); + template SQLiteFormatAndLoadObject() { + auto SQLiteFormatAndLoadObject(I)( + auto ref I doc_matters, + ) { + mixin SiSUoutputRgxInit; + struct sqlite_format_and_load_objects { + <> + <> + <> + <> + <> + } + return sqlite_format_and_load_objects(); + } + } + template SQLiteInstruct() { + Statement SQLiteInstruct(I)( + auto ref I doc_matters, + ) { + <> + <> + <> + <> + <> + <> + <> + <> + <> + } + } + template SQLiteObjectsLoop() { + void SQLiteObjectsLoop(P)( + auto ref P doc_parts, + ) { + <> + } + } + SQLiteObjectsLoop!()(doc_matters.keys_seq.sql); + } +} +template SQLiteTablesCreate() { + void SQLiteTablesCreate()() { + writeln("table create"); + } +} +template SQLiteTablesDrop() { + void SQLiteTablesDrop()() { + writeln("table drop"); + } +} +#+END_SRC + +** 1. imports + +#+name: output_imports +#+BEGIN_SRC d +import sdp.output; +import + std.file, + std.conv : to; +import sdp.output.sqlite; +#+END_SRC + +** 2. format and load template +*** 1. prepare objects (munge, sanitize, markup) +**** 1. _text_ generic munge (sanitize text for search) + +- [3/4] (search text, applies to all but code blocks) + - [ ] remove whitespace, paragraph on single line (formatting kept so far) + - [X] remove font face attributes + - [X] move embedded endnotes + - [X] place after text object + - [X] remove embedded endnote numbers (rely on html output to represent) + - [X] urls + - [X] clean url markers, leave plain link text + - [X] place urls after text object and its endnotes + +#+name: sanitize_text_for_search +#+BEGIN_SRC d +auto generic_munge_sanitize_text_for_search( + string _txt, +) { + string _notes; + string _urls; + _txt = _txt.replaceAll(rgx.inline_fontface_clean, ""); + if (_txt.matchFirst(rgx.inline_notes_al_gen)) { + foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { + _notes ~= "\n" ~ m["text"]; + } + _txt = _txt.replaceAll(rgx.inline_notes_al_gen, ""); + } + if (_txt.matchFirst(rgx.inline_link)) { + foreach (m; _txt.matchAll(rgx.inline_link)) { + if (m["link"].match(rgx.url)) { + _urls ~= "\n" ~ m["link"]; + } + } + _txt = _txt.replaceAll(rgx.inline_link_clean, ""); + } + if (_notes.length > 0) { + _txt ~= _notes; + } + if (_urls.length > 0) { + _txt ~= _urls; + } + if (doc_matters.opt_action["debug"]) { + writeln(_txt, "\n"); + } + debug(sql_text_clean) { + writeln(_txt); + } + return _txt; +} +#+END_SRC + +**** 2. _html_ + +light html objects +- place endnotes after text object +- light inline html markup + +***** munge +****** general munge (special characters, inline markup, move notes) + +#+name: sanitize_and_mmunge_inline_html +#+BEGIN_SRC d +auto munge_html(O)( + auto return ref const O obj, +) { + string _html_special_characters(string _txt){ + _txt = (_txt) + .replaceAll(rgx.xhtml_ampersand, "&") + .replaceAll(rgx.xhtml_quotation, """) + .replaceAll(rgx.xhtml_less_than, "<") + .replaceAll(rgx.xhtml_greater_than, ">") + .replaceAll(rgx.nbsp_char, " ") + .replaceAll(rgx.xhtml_line_break, "
"); + return _txt; + } + string _html_font_face(string _txt){ + _txt = (_txt) + .replaceAll(rgx.inline_emphasis, ("$1")) + .replaceAll(rgx.inline_bold, ("$1")) + .replaceAll(rgx.inline_underscore, ("$1")) + .replaceAll(rgx.inline_italics, ("$1")) + .replaceAll(rgx.inline_superscript, ("$1")) + .replaceAll(rgx.inline_subscript, ("$1")) + .replaceAll(rgx.inline_strike, ("$1")) + .replaceAll(rgx.inline_insert, ("$1")) + .replaceAll(rgx.inline_mono, ("$1")) + .replaceAll(rgx.inline_cite, ("$1")); + return _txt; + } + string _notes; + string _urls; + string _txt = _html_font_face(_html_special_characters(obj.text)); + if (_txt.matchFirst(rgx.inline_notes_al_gen)) { + foreach (m; _txt.matchAll(rgx.inline_notes_al_gen_text)) { + _notes ~= "\n" ~ m["text"]; + } + _txt = _txt.replaceAll(rgx.inline_notes_al_gen_ref, "$1 "); + } + if (_txt.matchFirst(rgx.inline_link)) { + foreach (m; _txt.matchAll(rgx.inline_link)) { + } + _txt = _txt.replaceAll(rgx.inline_link_clean, ""); + } + if (_notes.length > 0) { + _txt ~= _notes; + } + if (doc_matters.opt_action["debug"]) { + writeln(_txt, "\n"); + } + return _txt; +} +#+END_SRC + +****** special characters + +#+name: sanitize_and_mmunge_inline_html +#+BEGIN_SRC d +string html_special_characters(string _txt){ + _txt = (_txt) + .replaceAll(rgx.xhtml_ampersand, "&") + .replaceAll(rgx.xhtml_quotation, """) + .replaceAll(rgx.xhtml_less_than, "<") + .replaceAll(rgx.xhtml_greater_than, ">") + .replaceAll(rgx.nbsp_char, " ") + .replaceAll(rgx.xhtml_line_break, "
"); + return _txt; +} +#+END_SRC + +****** special characters for code + +#+name: sanitize_and_mmunge_inline_html +#+BEGIN_SRC d +string html_special_characters_code(string _txt){ + _txt = (_txt) + .replaceAll(rgx.xhtml_ampersand, "&") + .replaceAll(rgx.xhtml_quotation, """) + .replaceAll(rgx.xhtml_less_than, "<") + .replaceAll(rgx.xhtml_greater_than, ">") + .replaceAll(rgx.nbsp_char, " "); + return _txt; +} +#+END_SRC + +****** font_face + +#+name: sanitize_and_mmunge_inline_html +#+BEGIN_SRC d +string html_font_face(string _txt){ + _txt = (_txt) + .replaceAll(rgx.inline_emphasis, ("$1")) + .replaceAll(rgx.inline_bold, ("$1")) + .replaceAll(rgx.inline_underscore, ("$1")) + .replaceAll(rgx.inline_italics, ("$1")) + .replaceAll(rgx.inline_superscript, ("$1")) + .replaceAll(rgx.inline_subscript, ("$1")) + .replaceAll(rgx.inline_strike, ("$1")) + .replaceAll(rgx.inline_insert, ("$1")) + .replaceAll(rgx.inline_mono, ("$1")) + .replaceAll(rgx.inline_cite, ("$1")); + return _txt; +} +#+END_SRC + +***** objects +****** heading + +#+name: html_objects +#+BEGIN_SRC d +auto html_heading(O)( + auto return ref const O obj, +) { + string _txt = munge_html(obj); + string o = format(q"¶

+ %s +

¶", + obj.is_a, + _txt, + ); + return o; +} +#+END_SRC + +******* +fancy+ + +##+name: prepare_objects_html +#+BEGIN_SRC d +auto html_heading(O)( + auto return ref const O obj, +) { + string o; + string _txt = munge_html(obj); + o = format(q"¶ + %s + ¶", + obj.heading_lev_markup, + obj.is_a, + _txt, + obj.heading_lev_markup, + ); + return o; +} +#+END_SRC + +****** para + +#+name: html_objects +#+BEGIN_SRC d +auto html_para(O)( + auto return ref const O obj, +) { + string _txt = munge_html(obj); + _txt = (obj.bullet) ? ("●  " ~ _txt) : _txt; + string o = format(q"¶

+ %s +

¶", + obj.is_a, + obj.indent_hang, + obj.indent_base, + _txt + ); + return o; +} +#+END_SRC + +****** quote + +#+name: html_objects +#+BEGIN_SRC d +auto html_quote(O)( + auto return ref const O obj, +) { + string _txt = munge_html(obj); + string o = format(q"¶

+ %s +

¶", + obj.is_a, + _txt + ); + return o; +} +#+END_SRC + +****** group + +#+name: html_objects +#+BEGIN_SRC d +auto html_group(O)( + auto return ref const O obj, +) { + string _txt = munge_html(obj); + string o = format(q"¶

+ %s +

¶", + obj.is_a, + _txt + ); + return o; +} +#+END_SRC + +****** block + +#+name: html_objects +#+BEGIN_SRC d +auto html_block(O)( + auto return ref const O obj, +) { + string _txt = munge_html(obj); + string o = format(q"¶ +

%s

¶", + obj.is_a, + _txt.stripRight + ); + return o; +} +#+END_SRC + +****** verse + +#+name: html_objects +#+BEGIN_SRC d +auto html_verse(O)( + auto return ref const O obj, +) { + string _txt = munge_html(obj); + string o = format(q"¶

%s

¶", + obj.is_a, + _txt + ); + return o; +} +#+END_SRC + +****** code + +#+name: html_objects +#+BEGIN_SRC d +auto html_code(O)( + auto return ref const O obj, +) { + string _txt = html_special_characters_code(obj.text); + string o = format(q"¶

%s

¶", + obj.is_a, + _txt + ); + return o; +} +#+END_SRC + +****** table + +#+name: html_objects +#+BEGIN_SRC d +auto html_table(O)( + auto return ref const O obj, +) { + auto _tablarize(O)( + auto return ref const O obj, + string _txt, + ) { + string[] _table_rows = (_txt).split(rgx.table_delimiter_row); + string[] _table_cols; + string _table; + string _tablenote; + foreach(row_idx, row; _table_rows) { + _table_cols = row.split(rgx.table_delimiter_col); + _table ~= ""; + foreach(col_idx, cell; _table_cols) { + if ((_table_cols.length == 1) + && (_table_rows.length <= row_idx+2)) { // check row_idx+2 (rather than == ++row_idx) + _tablenote ~= cell; + } else { + string _col_is = (row_idx == 0 && obj.table_heading) ? "th" : "td"; + string _align = ("style=\"text-align:" + ~ ((obj.table_column_aligns[col_idx] == "l") + ? "left\"" : "right\"")); + _table ~= "<" ~ _col_is ~ " width=\"" ~ obj.table_column_widths[col_idx].to!string ~ "%\" " ~ _align ~ ">"; + _table ~= cell; + _table ~= ""; + } + } + _table ~= ""; + } + auto t = tuple( + _table, + _tablenote, + ); + return t; + } + string _txt = munge_html(obj); + auto t = _tablarize(obj, _txt); + _txt = t[0]; + string _note = t[1]; + string o = format(q"¶

+ + %s +
+ %s +

¶", + obj.is_a, + _txt, + _note + ); + return o; +} +#+END_SRC + +*** 2. _sqlite_ (loop preparation & pre-loop action) +***** initialize: prepare statements drop & create db & tables anew +****** d sqlite statements (initialize database) { + +#+name: sqlite_db_initialize +#+BEGIN_SRC d +auto pth_sqlite = SiSUpathsSQLite!()(doc_matters.src_path_info, doc_matters.language); +auto db = Database(pth_sqlite.sqlite_file(doc_matters.source_filename)); +// auto db = Database(":memory:"); // open database in memory +db.run(" +#+END_SRC + +******* 0. drop table if exists + +#+name: sqlite_table_initialize +#+BEGIN_SRC sql +DROP TABLE IF EXISTS metadata_and_text; +DROP TABLE IF EXISTS doc_objects; +DROP TABLE IF EXISTS urls; +#+END_SRC + +******* 1. create tables +******** metadata & src text + +#+name: sqlite_create_table_metadata_and_src_txt +#+BEGIN_SRC sql +CREATE TABLE metadata_and_text ( + tid BIGINT PRIMARY KEY, + title VARCHAR(800) NOT NULL, + title_main VARCHAR(400) NULL, /*NOT*/ + title_sub VARCHAR(400) NULL, + title_short VARCHAR(400) NULL, + title_edition VARCHAR(10) NULL, + title_note VARCHAR(2500) NULL, + title_language VARCHAR(100) NULL, + title_language_char VARCHAR(6) NULL, + creator_author VARCHAR(600) NULL, + creator_author_hon VARCHAR(100) NULL, + creator_author_nationality VARCHAR(100) NULL, + creator_editor VARCHAR(600) NULL, + creator_contributor VARCHAR(600) NULL, + creator_illustrator VARCHAR(600) NULL, + creator_photographer VARCHAR(600) NULL, + creator_translator VARCHAR(600) NULL, + creator_prepared_by VARCHAR(600) NULL, + creator_digitized_by VARCHAR(600) NULL, + creator_audio VARCHAR(600) NULL, + creator_video VARCHAR(600) NULL, + language_document VARCHAR(100) NULL, + language_document_char VARCHAR(6) NULL, /*NOT*/ + language_original VARCHAR(100) NULL, + language_original_char VARCHAR(6) NULL, + date_added_to_site VARCHAR(10) NULL, + date_available VARCHAR(10) NULL, + date_created VARCHAR(10) NULL, + date_issued VARCHAR(10) NULL, + date_modified VARCHAR(10) NULL, + date_published VARCHAR(10) NULL, + date_valid VARCHAR(10) NULL, + date_translated VARCHAR(10) NULL, + date_original_publication VARCHAR(10) NULL, + date_generated VARCHAR(10) NULL, + publisher VARCHAR(600) NULL, + original_publisher VARCHAR(600) NULL, + original_language VARCHAR(100) NULL, + original_language_char VARCHAR(6) NULL, + original_source VARCHAR(600) NULL, + original_institution VARCHAR(600) NULL, + original_nationality VARCHAR(100) NULL, + rights VARCHAR(2500) NULL, + rights_copyright_text VARCHAR(2500) NULL, + rights_copyright_translation VARCHAR(2500) NULL, + rights_copyright_illustrations VARCHAR(2500) NULL, + rights_copyright_photographs VARCHAR(2500) NULL, + rights_copyright_preparation VARCHAR(2500) NULL, + rights_copyright_digitization VARCHAR(2500) NULL, + rights_copyright_audio VARCHAR(2500) NULL, + rights_copyright_video VARCHAR(2500) NULL, + rights_license VARCHAR(2500) NULL, + identifier_oclc VARCHAR(30) NULL, + identifier_isbn VARCHAR(16) NULL, + classify_topic_register VARCHAR(2500) NULL, + classify_subject VARCHAR(600) NULL, + classify_loc VARCHAR(30) NULL, + classify_dewey VARCHAR(30) NULL, + classify_keywords VARCHAR(600) NULL, + notes_abstract TEXT NULL, + notes_description TEXT NULL, + notes_comment TEXT NULL, + notes_coverage VARCHAR(200) NULL, + notes_relation VARCHAR(200) NULL, + notes_history VARCHAR(600) NULL, + notes_type VARCHAR(600) NULL, + notes_format VARCHAR(600) NULL, + notes_prefix TEXT NULL, + notes_prefix_a TEXT NULL, + notes_prefix_b TEXT NULL, + notes_suffix TEXT NULL, + src_filename VARCHAR(256) NULL, + src_fingerprint VARCHAR(256) NULL, + src_filesize VARCHAR(10) NULL, + src_wordcount VARCHAR(10) NULL, + src_text TEXT NULL, + fulltext TEXT NULL, + links TEXT NULL +); +#+END_SRC + +******** doc_objects + +#+name: sqlite_create_table_objects +#+BEGIN_SRC sql +CREATE TABLE doc_objects ( + lid BIGINT PRIMARY KEY, + metadata_tid BIGINT REFERENCES metadata_and_text, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + clean TEXT NULL, + body TEXT NULL, + book_idx TEXT NULL, + seg VARCHAR(256) NULL, + lev_an VARCHAR(1), + lev SMALLINT NULL, + lev0 SMALLINT, + lev1 SMALLINT, + lev2 SMALLINT, + lev3 SMALLINT, + lev4 SMALLINT, + lev5 SMALLINT, + lev6 SMALLINT, + lev7 SMALLINT, + en_a SMALLINT NULL, + en_z SMALLINT NULL, + en_a_asterisk SMALLINT NULL, + en_z_asterisk SMALLINT NULL, + en_a_plus SMALLINT NULL, + en_z_plus SMALLINT NULL, + t_of VARCHAR(16), + t_is VARCHAR(16), + node VARCHAR(16) NULL, + parent VARCHAR(16) NULL, + digest_clean CHAR(256), + digest_all CHAR(256), + types CHAR(1) NULL +) +#+END_SRC + +****** d } + +#+name: sqlite_close_initialize +#+BEGIN_SRC d +"); +#+END_SRC + +***** prepare insert statements and do pre-loop inserts +****** 1. _metadata & src_ text +******* sql statement +******** d { sql statement + +#+name: sqlite_insert_metadata_and_src_text +#+BEGIN_SRC d +Statement insert_metadata = db.prepare(" +#+END_SRC + +tid document number unique +either: +- increment by adding 1 for each document, +- make hash of document filename or url and use? + +********* sql insert into + +#+name: sqlite_insert_metadata_and_src_text +#+BEGIN_SRC sql + INSERT INTO metadata_and_text ( + tid, + title, + title_main, + title_sub, + title_short, + title_edition, + title_note, + title_language, + title_language_char, + creator_author, + creator_author_hon, + creator_author_nationality, + creator_editor, + creator_contributor, + creator_illustrator, + creator_photographer, + creator_translator, + creator_prepared_by, + creator_digitized_by, + creator_audio, + creator_video, + language_document, + language_document_char, + language_original, + language_original_char, + date_added_to_site, + date_available, + date_created, + date_issued, + date_modified, + date_published, + date_valid, + date_translated, + date_original_publication, + date_generated, + publisher, + original_publisher, + original_language, + original_language_char, + original_source, + original_institution, + original_nationality, + rights, + rights_copyright_text, + rights_copyright_translation, + rights_copyright_illustrations, + rights_copyright_photographs, + rights_copyright_preparation, + rights_copyright_digitization, + rights_copyright_audio, + rights_copyright_video, + rights_license, + identifier_oclc, + identifier_isbn, + classify_topic_register, + classify_subject, + classify_loc, + classify_dewey, + classify_keywords, + notes_abstract, + notes_description, + notes_comment, + notes_coverage, + notes_relation, + notes_history, + notes_type, + notes_format, + notes_prefix, + notes_prefix_a, + notes_prefix_b, + notes_suffix, + src_filename, + src_fingerprint, + src_filesize, + src_wordcount, + src_text, + fulltext, + links + ) +#+END_SRC + +********* sql values + +#+name: sqlite_insert_metadata_and_src_text +#+BEGIN_SRC sql + VALUES ( + :tid, + :title, + :title_main, + :title_sub, + :title_short, + :title_edition, + :title_note, + :title_language, + :title_language_char, + :creator_author, + :creator_author_hon, + :creator_author_nationality, + :creator_editor, + :creator_contributor, + :creator_illustrator, + :creator_photographer, + :creator_translator, + :creator_prepared_by, + :creator_digitized_by, + :creator_audio, + :creator_video, + :language_document, + :language_document_char, + :language_original, + :language_original_char, + :date_added_to_site, + :date_available, + :date_created, + :date_issued, + :date_modified, + :date_published, + :date_valid, + :date_translated, + :date_original_publication, + :date_generated, + :publisher, + :original_publisher, + :original_language, + :original_language_char, + :original_source, + :original_institution, + :original_nationality, + :rights, + :rights_copyright_text, + :rights_copyright_translation, + :rights_copyright_illustrations, + :rights_copyright_photographs, + :rights_copyright_preparation, + :rights_copyright_digitization, + :rights_copyright_audio, + :rights_copyright_video, + :rights_license, + :identifier_oclc, + :identifier_isbn, + :classify_topic_register, + :classify_subject, + :classify_loc, + :classify_dewey, + :classify_keywords, + :notes_abstract, + :notes_description, + :notes_comment, + :notes_coverage, + :notes_relation, + :notes_history, + :notes_type, + :notes_format, + :notes_prefix, + :notes_prefix_a, + :notes_prefix_b, + :notes_suffix, + :src_filename, + :src_fingerprint, + :src_filesize, + :src_wordcount, + :src_text, + :fulltext, + :links + ) +#+END_SRC + +******** d } + +#+name: sqlite_insert_metadata_and_src_text +#+BEGIN_SRC d +"); +#+END_SRC + +******* TODO insert document_metadata + +#+name: sqlite_insert_metadata_and_src_text +#+BEGIN_SRC d +insert_metadata.bind(":title", doc_matters.dochead_meta["title"]["full"]); +// insert_metadata.bind(":title_main", doc_matters.dochead_meta["title"]["full"]); +// insert_metadata.bind(":title_sub", doc_matters.dochead_meta["title"][""]); +// insert_metadata.bind(":title_short", doc_matters.dochead_meta["title"][""]); +// insert_metadata.bind(":title_edition", doc_matters.dochead_meta["title"][""]); +// insert_metadata.bind(":title_note", doc_matters.dochead_meta["title"][""]); +// insert_metadata.bind(":title_language", doc_matters.dochead_meta["title"][""]); +// insert_metadata.bind(":title_language_char", doc_matters.dochead_meta["title"][""]); +insert_metadata.bind(":creator_author", doc_matters.dochead_meta["creator"]["author"]); +// insert_metadata.bind(":creator_author_hon", doc_matters.dochead_meta["creator"][""]); +// insert_metadata.bind(":creator_author_nationality", doc_matters.dochead_meta["creator"][""]); +// insert_metadata.bind(":creator_editor", doc_matters.dochead_meta["creator"][""]); +// insert_metadata.bind(":creator_contributor", doc_matters.dochead_meta["creator"][""]); +// insert_metadata.bind(":creator_illustrator", doc_matters.dochead_meta["creator"][""]); +// insert_metadata.bind(":creator_photographer", doc_matters.dochead_meta["creator"][""]); +// insert_metadata.bind(":creator_translator", doc_matters.dochead_meta["creator"][""]); +// insert_metadata.bind(":creator_prepared_by", doc_matters.dochead_meta["creator"][""]); +// insert_metadata.bind(":creator_digitized_by", doc_matters.dochead_meta["creator"][""]); +// insert_metadata.bind(":creator_audio", doc_matters.dochead_meta["creator"][""]); +// insert_metadata.bind(":creator_video", doc_matters.dochead_meta["creator"][""]); +// insert_metadata.bind(":language_document", doc_matters.dochead_meta["language"][""]); +// insert_metadata.bind(":language_document_char", doc_matters.dochead_meta["language"][""]); +// insert_metadata.bind(":language_original", doc_matters.dochead_meta["language"][""]); +// insert_metadata.bind(":language_original_char", doc_matters.dochead_meta["language"][""]); +// insert_metadata.bind(":date_added_to_site", doc_matters.dochead_meta["date"][""]); +// insert_metadata.bind(":date_available", doc_matters.dochead_meta["date"][""]); +// insert_metadata.bind(":date_created", doc_matters.dochead_meta["date"][""]); +// insert_metadata.bind(":date_issued", doc_matters.dochead_meta["date"][""]); +// insert_metadata.bind(":date_modified", doc_matters.dochead_meta["date"][""]); +// insert_metadata.bind(":date_published", doc_matters.dochead_meta["date"][""]); +// insert_metadata.bind(":date_valid", doc_matters.dochead_meta["date"][""]); +// insert_metadata.bind(":date_translated", doc_matters.dochead_meta["date"][""]); +// insert_metadata.bind(":date_original_publication", doc_matters.dochead_meta["date"][""]); +// insert_metadata.bind(":date_generated", doc_matters.dochead_meta["date"][""]); +// insert_metadata.bind(":publisher", doc_matters.dochead_meta["publisher"][""]); +// insert_metadata.bind(":original_publisher", doc_matters.dochead_meta["original"][""]); +// insert_metadata.bind(":original_language", doc_matters.dochead_meta["original"][""]); +// insert_metadata.bind(":original_language_char", doc_matters.dochead_meta["original"][""]); +// insert_metadata.bind(":original_source", doc_matters.dochead_meta["original"][""]); +// insert_metadata.bind(":original_institution", doc_matters.dochead_meta["original"][""]); +// insert_metadata.bind(":original_nationality", doc_matters.dochead_meta["original"][""]); +// insert_metadata.bind(":rights", doc_matters.dochead_meta["rights"][""]); +// insert_metadata.bind(":rights_copyright_text", doc_matters.dochead_meta["rights"][""]); +// insert_metadata.bind(":rights_copyright_translation", doc_matters.dochead_meta["rights"][""]); +// insert_metadata.bind(":rights_copyright_illustrations", doc_matters.dochead_meta["rights"][""]); +// insert_metadata.bind(":rights_copyright_photographs", doc_matters.dochead_meta["rights"][""]); +// insert_metadata.bind(":rights_copyright_preparation", doc_matters.dochead_meta["rights"][""]); +// insert_metadata.bind(":rights_copyright_digitization", doc_matters.dochead_meta["rights"][""]); +// insert_metadata.bind(":rights_copyright_audio", doc_matters.dochead_meta["rights"][""]); +// insert_metadata.bind(":rights_copyright_video", doc_matters.dochead_meta["rights"][""]); +// insert_metadata.bind(":rights_license", doc_matters.dochead_meta["rights"][""]); +// insert_metadata.bind(":identifier_oclc", doc_matters.dochead_meta["identifier"][""]); +// insert_metadata.bind(":identifier_isbn", doc_matters.dochead_meta["identifier"][""]); +// insert_metadata.bind(":classify_topic_register", doc_matters.dochead_meta["classify"][""]); +// insert_metadata.bind(":classify_subject", doc_matters.dochead_meta["classify"][""]); +// insert_metadata.bind(":classify_loc", doc_matters.dochead_meta["classify"][""]); +// insert_metadata.bind(":classify_loc", doc_matters.dochead_meta["classify"][""]); +// insert_metadata.bind(":notes_abstract", doc_matters.dochead_meta["notes"][""]); +// insert_metadata.bind(":notes_description", doc_matters.dochead_meta["notes"][""]); +// insert_metadata.bind(":notes_comment", doc_matters.dochead_meta["notes"][""]); +// insert_metadata.bind(":notes_coverage", doc_matters.dochead_meta["notes"][""]); +// insert_metadata.bind(":notes_relation", doc_matters.dochead_meta["notes"][""]); +// insert_metadata.bind(":notes_history", doc_matters.dochead_meta["notes"][""]); +// insert_metadata.bind(":notes_type", doc_matters.dochead_meta["notes"][""]); +// insert_metadata.bind(":notes_format", doc_matters.dochead_meta["notes"][""]); +// insert_metadata.bind(":notes_prefix", doc_matters.dochead_meta["notes"][""]); +// insert_metadata.bind(":notes_prefix_a", doc_matters.dochead_meta["notes"][""]); +// insert_metadata.bind(":notes_prefix_b", doc_matters.dochead_meta["notes"][""]); +// insert_metadata.bind(":notes_suffix", doc_matters.dochead_meta["notes"][""]); +// insert_metadata.bind(":src_filename", doc_matters.dochead_meta["src"][""]); +// insert_metadata.bind(":src_fingerprint", doc_matters.dochead_meta["src"][""]); +// insert_metadata.bind(":src_filesize", doc_matters.dochead_meta["src"][""]); +// insert_metadata.bind(":src_wordcount", doc_matters.dochead_meta["src"][""]); +// insert_metadata.bind(":src_text", doc_matters.dochead_meta["src"][""]); +// insert_metadata.bind(":fulltext", doc_matters.dochead_meta["fulltext"][""]); +// insert_metadata.bind(":links", doc_matters.dochead_meta["links"][""]); +insert_metadata.execute(); insert_metadata.reset(); +/+ watch +/ +writeln("sql statement executed"); +assert(db.totalChanges == 1); +#+END_SRC + +###+name: sqlite_insert_metadata_and_src_text +#+BEGIN_SRC d +insert_metadata.inject( + 1, + doc_matters.dochead_meta["title"]["full"], + "", + "", + "", + "", + "", + "", + "", + doc_matters.dochead_meta["creator"]["author"] +); +#+END_SRC + +****** 2. _doc objects_ (used with doc_objects in document loop) +******* prepare sql statement +******** d { sql statement + +#+name: sqlite_insert_doc_objects +#+BEGIN_SRC d +Statement insert_doc_objects = db.prepare(" +#+END_SRC + +********* sql insert into + +lid unique, increment by 1 per object, not ocn + +metadata tid document number unique +either: +- increment by adding 1 for each document, +- make hash of document filename or url and use? + +#+name: sqlite_insert_doc_objects +#+BEGIN_SRC sql + INSERT INTO doc_objects ( + lid, + metadata_tid, + ocn, + ocnd, + ocns, + clean, + body, + book_idx, + seg, + lev_an, + lev, + lev0, + lev1, + lev2, + lev3, + lev4, + lev5, + lev6, + lev7, + en_a, + en_z, + en_a_asterisk, + en_z_asterisk, + en_a_plus, + en_z_plus, + t_of, + t_is, + node, + parent, + digest_clean, + digest_all, + types + ) +#+END_SRC + +********* sql values + +#+name: sqlite_insert_doc_objects +#+BEGIN_SRC sql + VALUES ( + :lid, + :metadata_tid, + :ocn, + :ocnd, + :ocns, + :clean, + :body, + :book_idx, + :seg, + :lev_an, + :lev, + :lev0, + :lev1, + :lev2, + :lev3, + :lev4, + :lev5, + :lev6, + :lev7, + :en_a, + :en_z, + :en_a_asterisk, + :en_z_asterisk, + :en_a_plus, + :en_z_plus, + :t_of, + :t_is, + :node, + :parent, + :digest_clean, + :digest_all, + :types + ) +#+END_SRC + +******** d } + +#+name: sqlite_insert_doc_objects +#+BEGIN_SRC d +"); +return insert_doc_objects; +#+END_SRC + +******* TODO (within loop not here - insert doc objects + +work out + +*** 3. hub (sqlite_format_and_load_objects) +***** sql related + +#+name: sqlite_load_object +#+BEGIN_SRC d +auto sqlite_load_string(O,Dm)( + auto return ref const O obj, + auto return ref Dm doc_matters, +) { + string o; + return o; +} +#+END_SRC + +#+name: sqlite_load_object +#+BEGIN_SRC d +auto postgresql_load_string(O,Dm)( + auto return ref const O obj, + auto return ref Dm doc_matters, +) { + string o; + return o; +} +#+END_SRC + +#+name: sqlite_load_object +#+BEGIN_SRC d +string sqlite_statement(O)( + auto return ref const O obj, + string _txt, + string _html, +) { + void _sql_exe(O)( + string _sql, + ) { + writeln(_html); + writeln(_sql); + } + string _sql; + return _sql; +} +#+END_SRC + +***** heading + +#+name: hub_format_and_sqlite_load_objects +#+BEGIN_SRC d +auto heading(O)( + auto return ref const O obj, +) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_heading(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; +} +#+END_SRC + +***** para + +#+name: hub_format_and_sqlite_load_objects +#+BEGIN_SRC d +auto para(O)( + auto return ref const O obj, +) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_para(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; +} +#+END_SRC + +***** quote + +#+name: hub_format_and_sqlite_load_objects +#+BEGIN_SRC d +auto quote(O)( + auto return ref const O obj, +) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_quote(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; +} +#+END_SRC +***** group + +#+name: hub_format_and_sqlite_load_objects +#+BEGIN_SRC d +auto group(O)( + auto return ref const O obj, +) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_group(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; +} +#+END_SRC + +***** block + +#+name: hub_format_and_sqlite_load_objects +#+BEGIN_SRC d +auto block(O)( + auto return ref const O obj, +) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_block(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; +} +#+END_SRC + +***** verse + +#+name: hub_format_and_sqlite_load_objects +#+BEGIN_SRC d +auto verse(O)( + auto return ref const O obj, +) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_verse(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; +} +#+END_SRC + +***** code + +#+name: hub_format_and_sqlite_load_objects +#+BEGIN_SRC d +auto code(O)( + auto return ref const O obj, +) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_code(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; +} +#+END_SRC + +***** table + +#+name: hub_format_and_sqlite_load_objects +#+BEGIN_SRC d +auto table(O)( + auto return ref const O obj, +) { + string[string] obj_txt = [ + "text": generic_munge_sanitize_text_for_search(obj.text), + "html": html_table(obj) + ]; + if (doc_matters.opt_action["debug"]) { + debug(sql_txt) { + writeln(obj_txt["text"]); + } + debug(sql_html) { + writeln(obj_txt["html"]); + } + } else { + // load sql + } + return obj_txt; +} +#+END_SRC + +** 3. loop, identify, load - loop template + +#+name: sqlite_objects_loop +#+BEGIN_SRC d +Statement insert_doc_objects = SQLiteInstruct!()(doc_matters); +auto format_and_sqlite_load = SQLiteFormatAndLoadObject!()(doc_matters); +string[string] obj_txt; +string doc_text; +foreach (part; doc_parts) { + foreach (obj; doc_abstraction[part]) { + switch (obj.of_part) { + case "frontmatter": assert(part == "head"); + switch (obj.is_of) { + case "para": + switch (obj.is_a) { + case "heading": + obj_txt = format_and_sqlite_load.heading(obj); + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + } + break; + } + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_of); + } + break; + } + break; + case "body": // assert(part == "body"); // TODO broken + switch (obj.is_of) { + case "para": + switch (obj.is_a) { + case "heading": + debug (asserts) { // TODO consider and fix or remove + if (part != "body") { + writeln(__LINE__, ": ", obj.text); + } + } + obj_txt = format_and_sqlite_load.heading(obj); + break; + case "para": + obj_txt = format_and_sqlite_load.para(obj); + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + } + break; + } + break; + case "block": + switch (obj.is_a) { + case "quote": + obj_txt = format_and_sqlite_load.quote(obj); + break; + case "group": + obj_txt = format_and_sqlite_load.group(obj); + break; + case "block": + obj_txt = format_and_sqlite_load.block(obj); + break; + case "poem": // double check on keeping both poem & verse + break; + case "verse": + obj_txt = format_and_sqlite_load.verse(obj); + break; + case "code": + obj_txt = format_and_sqlite_load.code(obj); + break; + case "table": + obj_txt = format_and_sqlite_load.table(obj); + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + } + break; + } + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_of); + } + break; + } + break; + case "backmatter": + assert(part == "endnotes" || "glossary" || "bibliography" || "bookindex_scroll" || "blurb" || "tail"); + switch (obj.is_of) { + case "para": + switch (obj.is_a) { + case "heading": + obj_txt = format_and_sqlite_load.heading(obj); + break; + case "endnote": assert(part == "endnotes"); + obj_txt = format_and_sqlite_load.para(obj); + break; + case "glossary": assert(part == "glossary"); + obj_txt = format_and_sqlite_load.para(obj); + break; + case "bibliography": assert(part == "bibliography"); + obj_txt = format_and_sqlite_load.para(obj); + break; + case "bookindex": assert(part == "bookindex_scroll"); + obj_txt = format_and_sqlite_load.para(obj); + break; + case "blurb": assert(part == "blurb"); + obj_txt = format_and_sqlite_load.para(obj); + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + } + break; + } + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.is_of); + } + break; + } + break; + case "comment": + break; + default: + if ((doc_matters.opt_action["debug"])) { + writeln(__FILE__, ":", __LINE__, ": ", obj.of_part); // check where empty value could come from + writeln(__FILE__, ":", __LINE__, ": ", obj.is_a); + writeln(__FILE__, ":", __LINE__, ": ", obj.text); // check where empty value could come from + } + break; + } + if (obj.is_a == "heading") { + writeln( + "markup: ", obj.heading_lev_markup, + "> ", obj.dom_markedup, + "; collapsed: ", obj.heading_lev_collapsed, + "> ", obj.dom_collapsed, + "; ocn: ", obj.ocn, + " node: ", obj.node, + "; parent: ", obj.parent_lev_markup, + "; ocn: ", obj.parent_ocn, + "; ", + ); + } + insert_doc_objects.bind(":t_of", obj.is_of); + insert_doc_objects.bind(":t_is", obj.is_a); + insert_doc_objects.bind(":ocn", obj.ocn); + insert_doc_objects.bind(":clean", obj_txt["text"]); // consider whether book index info should be made available within clear text for search + insert_doc_objects.bind(":body", obj_txt["html"]); + // insert_doc_objects.bind(":book_idx", ""); // not needed, but, consider whether should be made available within object for clear text search + insert_doc_objects.bind(":lev", obj.heading_lev_markup); + // // insert_doc_objects.bind(":dom_markedup", ""); // should make lev sequence below obsolete + // // insert_doc_objects.bind(":dom_collapsed", ""); // should add info + // insert_doc_objects.bind(":lev0", ""); + // insert_doc_objects.bind(":lev1", ""); + // insert_doc_objects.bind(":lev2", ""); + // insert_doc_objects.bind(":lev3", ""); + // insert_doc_objects.bind(":lev4", ""); + // insert_doc_objects.bind(":lev5", ""); + // insert_doc_objects.bind(":lev6", ""); + // insert_doc_objects.bind(":lev7", ""); + // insert_doc_objects.bind(":node", ""); + // insert_doc_objects.bind(":type", ""); + // insert_doc_objects.bind(":parent_ocn", ""); + // insert_doc_objects.bind(":ancestors", ""); + // insert_doc_objects.bind(":heading_lev_markup", ""); + // insert_doc_objects.bind(":heading_lev_collapsed", ""); + // insert_doc_objects.bind(":parent_lev_markup", ""); + // insert_doc_objects.bind(":heading_ancestors", ""); + // insert_doc_objects.bind(":node", ""); + insert_doc_objects.execute(); insert_doc_objects.reset(); + } +} +#+END_SRC -- cgit v1.2.3