aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/sisudoc/meta/metadoc_from_src.d
diff options
context:
space:
mode:
Diffstat (limited to 'src/sisudoc/meta/metadoc_from_src.d')
-rw-r--r--src/sisudoc/meta/metadoc_from_src.d1509
1 files changed, 1509 insertions, 0 deletions
diff --git a/src/sisudoc/meta/metadoc_from_src.d b/src/sisudoc/meta/metadoc_from_src.d
new file mode 100644
index 0000000..32954f1
--- /dev/null
+++ b/src/sisudoc/meta/metadoc_from_src.d
@@ -0,0 +1,1509 @@
+/+
+- Name: SisuDoc Spine, Doc Reform [a part of]
+ - Description: documents, structuring, processing, publishing, search
+ - static content generator
+
+ - Author: Ralph Amissah
+ [ralph.amissah@gmail.com]
+
+ - Copyright: (C) 2015 - 2024 Ralph Amissah, All Rights Reserved.
+
+ - License: AGPL 3 or later:
+
+ Spine (SiSU), a framework for document structuring, publishing and
+ search
+
+ Copyright (C) Ralph Amissah
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU AFERO General Public License as published by the
+ Free Software Foundation, either version 3 of the License, or (at your
+ option) any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see [https://www.gnu.org/licenses/].
+
+ If you have Internet connection, the latest version of the AGPL should be
+ available at these locations:
+ [https://www.fsf.org/licensing/licenses/agpl.html]
+ [https://www.gnu.org/licenses/agpl.html]
+
+ - Spine (by Doc Reform, related to SiSU) uses standard:
+ - docReform markup syntax
+ - standard SiSU markup syntax with modified headers and minor modifications
+ - docReform object numbering
+ - standard SiSU object citation numbering & system
+
+ - Homepages:
+ [https://www.sisudoc.org]
+ [https://www.doc-reform.org]
+
+ - Git
+ [https://git.sisudoc.org/]
+
++/
+// document abstraction:
+// abstraction of sisu markup for downstream processing
+// metadoc_from_src.d
+module sisudoc.meta.metadoc_from_src;
+@safe:
+template docAbstraction() {
+ // ↓ abstraction imports
+ import
+ std.algorithm,
+ std.container,
+ std.file,
+ std.json,
+ std.path;
+ import
+ sisudoc.meta,
+ sisudoc.meta.defaults,
+ sisudoc.meta.rgx,
+ sisudoc.meta.metadoc_object_setter,
+ sisudoc.meta.rgx;
+ public import sisudoc.meta.metadoc_from_src_functions;
+ mixin docAbstractionFunctions;
+ @system auto docAbstraction(CMM,Opt,Mf) (
+ char[][] markup_sourcefile_content,
+ CMM conf_make_meta,
+ Opt opt_action,
+ Mf manifested,
+ bool _new_doc
+ ) {
+ static auto rgx = RgxI();
+ // ↓ abstraction init
+ scope(success) {
+ }
+ scope(failure) {
+ }
+ scope(exit) {
+ destroy(the_document_toc_section);
+ destroy(the_document_head_section);
+ destroy(the_document_body_section);
+ destroy(the_document_bibliography_section);
+ destroy(the_document_glossary_section);
+ destroy(the_document_blurb_section);
+ destroy(the_document_xml_dom_tail_section);
+ destroy(an_object);
+ destroy(processing);
+ destroy(biblio_arr_json);
+ previous_length = 0;
+ reset_note_numbers = true;
+ lev_anchor_tag = "";
+ anchor_tag = "";
+ }
+ mixin spineNode;
+ auto node_para_int_ = node_metadata_para_int;
+ auto node_para_str_ = node_metadata_para_str;
+ ObjGenericComposite comp_obj_;
+ line_occur = [
+ "heading" : 0,
+ "para" : 0,
+ "glossary" : 0,
+ "blurb" : 0,
+ ];
+ uint[string] dochas = [
+ "inline_links" : 0,
+ "inline_notes" : 0,
+ "inline_notes_star" : 0,
+ "codeblock" : 0,
+ "table" : 0,
+ "block" : 0,
+ "group" : 0,
+ "poem" : 0,
+ "quote" : 0,
+ "images" : 0,
+ ];
+ uint[string] pith = [
+ "ocn" : 1,
+ "section" : 0,
+ "txt_is" : 0,
+ "block_is" : 0,
+ "block_state" : 0,
+ "block_delim" : 0,
+ "make_headings" : 0,
+ "dummy_heading_status" : 0,
+ "dummy_heading_multiple_objects" : 0,
+ "no_ocn_multiple_objects" : 0,
+ "verse_new" : 0,
+ ];
+ string[string] object_number_poem = [
+ "start" : "",
+ "end" : ""
+ ];
+ string[] lv_ancestors_txt = [ "", "", "", "", "", "", "", "", ];
+ int[string] lv = [
+ "lv" : eN.bi.off,
+ "h0" : eN.bi.off,
+ "h1" : eN.bi.off,
+ "h2" : eN.bi.off,
+ "h3" : eN.bi.off,
+ "h4" : eN.bi.off,
+ "h5" : eN.bi.off,
+ "h6" : eN.bi.off,
+ "h7" : eN.bi.off,
+ "lev_int_collapsed" : 0,
+ ];
+ int[string] collapsed_lev = [
+ "h0" : eN.bi.off,
+ "h1" : eN.bi.off,
+ "h2" : eN.bi.off,
+ "h3" : eN.bi.off,
+ "h4" : eN.bi.off,
+ "h5" : eN.bi.off,
+ "h6" : eN.bi.off,
+ "h7" : eN.bi.off
+ ];
+ string[string] heading_match_str = [
+ "h_A": "^(none)",
+ "h_B": "^(none)",
+ "h_C": "^(none)",
+ "h_D": "^(none)",
+ "h_1": "^(none)",
+ "h_2": "^(none)",
+ "h_3": "^(none)",
+ "h_4": "^(none)"
+ ];
+ Regex!char[string] heading_match_rgx = [
+ "h_A": regex(r"^(none)"),
+ "h_B": regex(r"^(none)"),
+ "h_C": regex(r"^(none)"),
+ "h_D": regex(r"^(none)"),
+ "h_1": regex(r"^(none)"),
+ "h_2": regex(r"^(none)"),
+ "h_3": regex(r"^(none)"),
+ "h_4": regex(r"^(none)")
+ ];
+ string _anchor_tag;
+ string toc_txt_;
+ an_object["glossary_nugget"] = "";
+ an_object["blurb_nugget"] = "";
+ comp_obj_ = set_object_heading("lev4", "frontmatter", "toc", "Table of Contents");
+ comp_obj_.metainfo.identifier = "";
+ comp_obj_.metainfo.dummy_heading = false;
+ comp_obj_.metainfo.object_number_off = true;
+ comp_obj_.metainfo.object_number_type = 0;
+ comp_obj_.tags.segment_anchor_tag_epub = "toc";
+ comp_obj_.tags.anchor_tag_html = comp_obj_.tags.segment_anchor_tag_epub;
+ comp_obj_.tags.in_segment_html = comp_obj_.tags.anchor_tag_html;
+ comp_obj_.ptr.html_segnames = html_segnames_ptr;
+ comp_obj_.tags.anchor_tags = ["toc"];
+ tag_assoc[comp_obj_.tags.anchor_tag_html]["seg_lv4"] = comp_obj_.tags.in_segment_html;
+ tag_assoc[comp_obj_.tags.segment_anchor_tag_epub]["seg_lv1to4"] = comp_obj_.tags.segment_anchor_tag_epub;
+ auto toc_head = comp_obj_;
+ html_segnames_ptr_cntr++;
+ the_document_toc_section = [toc_head];
+ static auto mkup = InlineMarkup();
+ static auto munge = ObjInlineMarkupMunge();
+ auto note_section = NotesSection();
+ auto bookindex_extract_hash = BookIndexNuggetHash();
+ string[][string] lev4_subtoc;
+ string[][string] segnames = ["html": ["toc"], "epub": ["toc"]];
+ int cnt1 = 1; int cnt2 = 1; int cnt3 = 1;
+ // abstraction init ↑
+ debug (substitutions) {
+ writeln(__LINE__, ":", __FILE__, ": DEBUG substitutions:");
+ if (!(conf_make_meta.make.headings.empty)) {
+ writeln(conf_make_meta.make.headings);
+ }
+ if (conf_make_meta.make.substitute) {
+ foreach(substitution_pair; conf_make_meta.make.substitute) {
+ writeln("regex to match: ", substitution_pair[Substitute.match]);
+ writeln("substitution to make: ", substitution_pair[Substitute.markup]);
+ }
+ }
+ if (conf_make_meta.make.bold) {
+ writeln("regex to match: ", conf_make_meta.make.bold[Substitute.match]);
+ writeln("substitution to make: ", conf_make_meta.make.bold[Substitute.markup]);
+ }
+ if (conf_make_meta.make.emphasis) {
+ writeln("regex to match: ", conf_make_meta.make.emphasis[Substitute.match]);
+ writeln("substitution to make: ", conf_make_meta.make.emphasis[Substitute.markup]);
+ }
+ if (conf_make_meta.make.italics) {
+ writeln("regex to match: ", conf_make_meta.make.italics[Substitute.match]);
+ writeln("substitution to make: ", conf_make_meta.make.italics[Substitute.markup]);
+ }
+ }
+ auto loopMarkupSrcByLine(
+ char[][] markup_sourcefile_content,
+ string[string] an_object,
+ uint[string] pith,
+ ) {
+ _loopMarkupSrcByLineStruct ret;
+ srcDocLoopLineByLine_:
+ foreach (line; markup_sourcefile_content) {
+ // ↓ markup document/text line by line
+ // "line" variable can be empty but should never be null
+ // scope
+ scope(exit) { }
+ scope(failure) {
+ stderr.writefln(
+ "\n%s\n%s\n\n%s:%s\nFAILED while processing the file: ❮❮ %s ❯❯ on line with text:\n%s\n",
+ __MODULE__, __FUNCTION__,
+ __FILE__, __LINE__,
+ manifested.src.filename, line,
+ );
+ }
+ debug(source) { writeln(line); }
+ debug(srclines) { if (!line.empty) { writefln("* %s", line); } }
+ if (!line.empty) { pith = line._check_ocn_status_(pith); }
+ if ( pith["block_is"] == eN.blk_is.code
+ && pith["block_state"] == eN.blk_state.on
+ ) {
+ // block object: code
+ {
+ ST_txt_by_line_block_generic _get = line.txt_by_line_block_code(an_object, pith);
+ {
+ an_object = _get.this_object;
+ pith = _get.pith;
+ }
+ }
+ continue;
+ } else if (!matchFirst(line, rgx.skip_from_regular_parse)) {
+ // object other than "code block" object
+ // (includes regular text paragraph, headings & blocks other than code)
+ // heading, glossary, blurb, poem, group, block, quote, table
+ line = line.inline_markup_faces; // by text line (rather than by text object), linebreaks in para problematic
+ if (line.matchFirst(rgx.heading_biblio)
+ || (pith["section"] == eN.sect.bibliography
+ && ((!(line.matchFirst(rgx.heading_glossary)))
+ && (!(line.matchFirst(rgx.heading_blurb)))
+ && (!(line.matchFirst(rgx.heading)))
+ && (!(line.matchFirst(rgx.comment)))))
+ ) {
+ pith["section"] = eN.sect.bibliography;
+ if (opt_action.backmatter && opt_action.section_biblio) {
+ {
+ ST_txt_by_line_block_biblio _get = line.txt_by_line_block_biblio(pith, bib_entry, biblio_entry_str_json, biblio_arr_json);
+ {
+ pith = _get.pith;
+ bib_entry = _get.bib_entry;
+ biblio_entry_str_json = _get.biblio_entry_str_json;
+ biblio_arr_json = _get.biblio_arr_json;
+ }
+ }
+ debug(bibliobuild) {
+ writeln("- ", biblio_entry_str_json);
+ writeln("-> ", biblio_arr_json.length);
+ }
+ }
+ continue;
+ } else if (line.matchFirst(rgx.heading_glossary)
+ || (pith["section"] == eN.sect.glossary
+ && ((!(line.matchFirst(rgx.heading_biblio)))
+ && (!(line.matchFirst(rgx.heading_blurb)))
+ && (!(line.matchFirst(rgx.heading)))
+ && (!(line.matchFirst(rgx.comment)))))
+ ) {
+ // within section (block object): glossary
+ debug(glossary) { writeln(__LINE__); writeln(line); }
+ pith["section"] = eN.sect.glossary;
+ if (opt_action.backmatter && opt_action.section_glossary) {
+ ST_the_section add_to_glossary_sect = line.build_the_glossary_section(pith, tag_assoc); // double check, should not be necessary to pass pith
+ the_document_glossary_section ~= add_to_glossary_sect.comp_section_obj[0];
+ if (add_to_glossary_sect.comp_section_obj.length > 1) { // heading
+ the_document_glossary_section ~= add_to_glossary_sect.comp_section_obj[1];
+ }
+ pith = add_to_glossary_sect.pith;
+ tag_assoc = add_to_glossary_sect.tag_assoc;
+ }
+ continue;
+ } else if (line.matchFirst(rgx.heading_blurb)
+ || (pith["section"] == eN.sect.blurb
+ && ((!(line.matchFirst(rgx.heading_glossary)))
+ && (!(line.matchFirst(rgx.heading_biblio)))
+ && (!(line.matchFirst(rgx.heading)))
+ && (!(line.matchFirst(rgx.comment)))))
+ ) {
+ pith["section"] = eN.sect.blurb;
+ debug(blurb) { writeln(__LINE__); writeln(line); }
+ if ((opt_action.backmatter && opt_action.section_blurb) && !(line.empty)) {
+ ST_the_section add_to_blurb_sect = line.build_the_blurb_section(pith, tag_assoc, opt_action); // double check, should not be necessary to pass pith
+ the_document_blurb_section ~= add_to_blurb_sect.comp_section_obj[0];
+ if (add_to_blurb_sect.comp_section_obj.length > 1) { // heading
+ the_document_blurb_section ~= add_to_blurb_sect.comp_section_obj[1];
+ }
+ pith = add_to_blurb_sect.pith;
+ tag_assoc = add_to_blurb_sect.tag_assoc;
+ }
+ continue;
+ } else if (pith["block_state"] == eN.blk_state.on) {
+ if (pith["block_is"] == eN.blk_is.quote) {
+ line = line
+ ._doc_header_and_make_substitutions_(conf_make_meta)
+ ._doc_header_and_make_substitutions_fontface_(conf_make_meta);
+ {
+ auto _get = line.txt_by_line_block_quote(an_object, pith);
+ {
+ an_object = _get.this_object;
+ pith = _get.pith;
+ }
+ }
+ continue;
+ } else if (pith["block_is"] == eN.blk_is.group) {
+ line = line
+ ._doc_header_and_make_substitutions_(conf_make_meta)
+ ._doc_header_and_make_substitutions_fontface_(conf_make_meta)
+ .replaceAll(rgx.para_delimiter, mkup.br_line_spaced ~ "$1");
+ {
+ auto _get = line.txt_by_line_block_group(an_object, pith);
+ {
+ an_object = _get.this_object;
+ pith = _get.pith;
+ }
+ }
+ continue;
+ } else if (pith["block_is"] == eN.blk_is.block) {
+ line = line
+ ._doc_header_and_make_substitutions_(conf_make_meta)
+ ._doc_header_and_make_substitutions_fontface_(conf_make_meta);
+ if (auto m = line.match(rgx.spaces_keep)) {
+ line = line
+ .replaceAll(rgx.spaces_keep, (m.captures[1]).translate([ ' ' : mkup.nbsp ]));
+ }
+ {
+ auto _get = line.txt_by_line_block_block(an_object, pith);
+ {
+ an_object = _get.this_object;
+ pith = _get.pith;
+ }
+ }
+ continue;
+ } else if (pith["block_is"] == eN.blk_is.poem) {
+ {
+ auto _get = line.txt_by_line_block_poem(an_object, pith, cntr, object_number_poem, conf_make_meta, tag_in_seg);
+ {
+ an_object = _get.this_object;
+ pith = _get.pith;
+ cntr = _get.cntr;
+ }
+ }
+ continue;
+ } else if (pith["block_is"] == eN.blk_is.table) {
+ {
+ auto _get = line.txt_by_line_block_table(an_object, pith, conf_make_meta);
+ {
+ an_object = _get.this_object;
+ pith = _get.pith;
+ conf_make_meta = _get.conf_make_meta;
+ }
+ }
+ continue;
+ }
+ } else {
+ // not within a block group
+ assert(
+ (pith["block_state"] == eN.blk_state.off)
+ || (pith["block_state"] == eN.blk_state.closing),
+ "block status: none or closed"
+ );
+ if (line.matchFirst(rgx.block_open)) {
+ if (line.matchFirst(rgx.block_poem_open)) {
+ // poem to verse exceptions!
+ object_reset(an_object);
+ processing.remove("verse");
+ object_number_poem["start"] = obj_cite_digits.object_number.to!string;
+ }
+ {
+ auto _get = line.txt_by_line_block_start(pith, dochas, object_number_poem);
+ {
+ pith = _get.pith;
+ dochas = _get.dochas;
+ object_number_poem = _get.object_number_poem;
+ }
+ }
+ continue;
+ } else if (!line.empty) {
+ // line not empty - non blocks (headings, paragraphs) & closed blocks
+ assert(!line.empty, "line tested, line not empty surely:\n \"" ~ line ~ "\"");
+ assert(
+ (pith["block_state"] == eN.blk_state.off)
+ || (pith["block_state"] == eN.blk_state.closing),
+ "code block status: none or closed"
+ );
+ if (pith["block_state"] == eN.blk_state.closing) {
+ debug(check) { writeln(__LINE__); writeln(line); }
+ assert(
+ line.matchFirst(rgx.book_index_item)
+ || line.matchFirst(rgx.book_index_item_open)
+ || pith["section"] == eN.sect.book_index,
+ "\nblocks closed, unless followed by book index, non-matching line:\n \""
+ ~ line ~ "\""
+ );
+ }
+ if (line.matchFirst(rgx.book_index_item)
+ || line.matchFirst(rgx.book_index_item_open)
+ || pith["section"] == eN.sect.book_index) {
+ { // book_index
+ auto _get = line.flow_book_index_(an_object, book_idx_tmp, pith, opt_action);
+ {
+ an_object = _get.this_object;
+ pith = _get.pith;
+ book_idx_tmp = _get.book_idx_tmp;
+ }
+ }
+ } else {
+ // not book_index
+ an_object_key = "body_nugget";
+ if (auto m = line.matchFirst(rgx.comment)) {
+ // matched comment
+ debug(comment) { writeln(line); }
+ an_object[an_object_key] ~= line ~= "\n";
+ comp_obj_comment = comp_obj_comment.init;
+ comp_obj_comment.metainfo.is_of_part = "comment"; // breaks flow
+ comp_obj_comment.metainfo.is_of_section = "comment"; // breaks flow
+ comp_obj_comment.metainfo.is_of_type = "comment";
+ comp_obj_comment.metainfo.is_a = "comment";
+ comp_obj_comment.text = an_object[an_object_key].strip;
+ the_document_body_section ~= comp_obj_comment;
+ {
+ auto _get = txt_by_line_common_reset_(line_occur, an_object, pith);
+ {
+ line_occur = _get.line_occur;
+ an_object = _get.this_object;
+ pith = _get.pith;
+ }
+ }
+ processing.remove("verse");
+ ++cntr;
+ } else if ((line_occur["para"] == eN.bi.off
+ && line_occur["heading"] == eN.bi.off)
+ && pith["txt_is"] == eN.txt_is.off
+ ) { // heading or para but neither flag nor line exists
+ if ((conf_make_meta.make.headings.length > 2)
+ && (pith["make_headings"] == eN.bi.off)) {
+ // heading found
+ {
+ auto _get = line.flow_heading_found_(heading_match_str, conf_make_meta.make.headings, heading_match_rgx, pith);
+ {
+ heading_match_str = _get.heading_match_str;
+ heading_match_rgx = _get.heading_match_rgx;
+ pith = _get.pith;
+ }
+ }
+ }
+ if (pith["make_headings"] == eN.bi.on
+ && (line_occur["para"] == eN.bi.off
+ && line_occur["heading"] == eN.bi.off)
+ && pith["txt_is"] == eN.txt_is.off
+ ) {
+ // heading make set
+ {
+ auto _get = line.flow_heading_make_set_(line_occur, heading_match_rgx, pith);
+ {
+ line = _get.line;
+ an_object = _get.this_object;
+ pith = _get.pith;
+ }
+ }
+ }
+ // TODO node info: all headings identified at this point,
+ // - extract node info here??
+ // - how long can it wait?
+ // - should be incorporated in composite objects
+ // - should happen before endnote links set (they need to be moved down?)
+ if (line.matchFirst(rgx.headings)) {
+ // heading match
+ line = line._doc_header_and_make_substitutions_(conf_make_meta);
+ {
+ auto _get = line.flow_heading_matched_(
+ an_object,
+ line_occur,
+ an_object_key,
+ lv,
+ collapsed_lev,
+ pith,
+ conf_make_meta,
+ );
+ {
+ an_object = _get.this_object;
+ pith = _get.pith;
+ }
+ }
+ } else if (line_occur["para"] == eN.bi.off) {
+ // para match
+ an_object_key = "body_nugget";
+ line = line
+ ._doc_header_and_make_substitutions_(conf_make_meta)
+ ._doc_header_and_make_substitutions_fontface_(conf_make_meta);
+ {
+ auto _get = line.flow_para_match_(an_object, an_object_key, indent, bullet, pith, line_occur);
+ {
+ an_object = _get.this_object;
+ an_object_key = _get.this_object_key;
+ pith = _get.pith;
+ indent = _get.indent;
+ bullet = _get.bullet;
+ line_occur = _get.line_occur;
+ }
+ }
+ }
+ } else if (line_occur["heading"] > eN.bi.off) {
+ // heading
+ debug(heading) { writeln(line); }
+ an_object[an_object_key] ~= line ~= "\n";
+ ++line_occur["heading"];
+ } else if (line_occur["para"] > eN.bi.off) {
+ // paragraph
+ debug(para) { writeln(an_object_key, "-> ", line); }
+ line = line
+ ._doc_header_and_make_substitutions_(conf_make_meta)
+ ._doc_header_and_make_substitutions_fontface_(conf_make_meta);
+ an_object[an_object_key] ~= " " ~ line;
+ ++line_occur["para"];
+ }
+ }
+ } else if (pith["block_state"] == eN.blk_state.closing) {
+ // line empty, with blocks flag
+ {
+ auto _get = line.flow_block_flag_line_empty_(
+ an_object,
+ bookindex_extract_hash,
+ the_document_body_section,
+ bookindex_unordered_hashes,
+ obj_cite_digits,
+ comp_obj_,
+ cntr,
+ pith,
+ object_number_poem,
+ conf_make_meta,
+ tag_in_seg,
+ );
+ {
+ an_object = _get.this_object;
+ the_document_body_section = _get.the_document_body_section;
+ bookindex_unordered_hashes = _get.bookindex_unordered_hashes;
+ obj_cite_digits = _get.obj_cite_digits;
+ comp_obj_ = _get.comp_obj_;
+ cntr = _get.cntr;
+ pith = _get.pith;
+ }
+ }
+ } else {
+ // line.empty, post contents, empty variables:
+ assert(
+ line.empty,
+ "\nline should be empty:\n \""
+ ~ line ~ "\""
+ );
+ assert(
+ (pith["block_state"] == eN.blk_state.off),
+ "code block status: none"
+ );
+ if (_new_doc) {
+ tag_assoc = tag_assoc.init;
+ lv0to3_tags = lv0to3_tags.init;
+ tag_in_seg = tag_in_seg.init;
+ }
+ if (pith["txt_is"] == eN.txt_is.heading
+ && line_occur["heading"] > eN.bi.off
+ ) {
+ // heading object (current line empty)
+ obj_cite_digits = (an_object["lev_markup_number"].to!int == 0)
+ ? ocn_emit(eN.ocn.reset)
+ : ocn_emit(pith["ocn"]);
+ an_object["is"] = "heading";
+ an_object_key = "body_nugget";
+ ST_txtAndAnchorTagPlusHasFootnotesUrlsImages substantive_object_and_anchor_tags_struct
+ = obj_im.obj_inline_markup_and_anchor_tags_and_misc(an_object, an_object_key, conf_make_meta, ((_new_doc) ? Yes._new_doc : No._new_doc));
+ an_object["substantive"] = substantive_object_and_anchor_tags_struct.obj_txt;
+ anchor_tag = substantive_object_and_anchor_tags_struct.anchor_tag;
+ if (_new_doc) {
+ cnt1 = 1;
+ cnt2 = 1;
+ cnt3 = 1;
+ _new_doc = false;
+ }
+ if (
+ an_object["lev_markup_number"].to!int == 4
+ && (!(anchor_tag.empty)
+ || (lv0to3_tags.length > 0))
+ ) {
+ tag_in_seg["seg_lv4"] = anchor_tag;
+ tag_in_seg["seg_lv1to4"] = anchor_tag;
+ lev_anchor_tag = anchor_tag;
+ tag_assoc[anchor_tag]["seg_lv4"] = tag_in_seg["seg_lv4"];
+ tag_assoc[anchor_tag]["seg_lv1to4"] = tag_in_seg["seg_lv1to4"];
+ if (lv0to3_tags.length > 0) {
+ // names used for html markup segments 1 to 4 (rather than epub which has separate segments for A to D)
+ foreach (lv0_to_lv3_html_tag; lv0to3_tags) {
+ tag_assoc[lv0_to_lv3_html_tag]["seg_lv4"] = anchor_tag;
+ }
+ }
+ anchor_tag_ = anchor_tag;
+ lv0to3_tags = lv0to3_tags.init;
+ } else if (an_object["lev_markup_number"].to!int > 4) {
+ tag_in_seg["seg_lv4"] = anchor_tag_;
+ tag_in_seg["seg_lv1to4"] = anchor_tag_;
+ lev_anchor_tag = anchor_tag;
+ tag_assoc[anchor_tag]["seg_lv4"] = tag_in_seg["seg_lv4"];
+ tag_assoc[anchor_tag]["seg_lv1to4"] = tag_in_seg["seg_lv1to4"];
+ } else if (an_object["lev_markup_number"].to!int < 4) {
+ string segn;
+ switch (an_object["lev_markup_number"].to!int) {
+ // names used for epub markup segments A to D
+ case 0:
+ segn = "_the_title";
+ goto default;
+ case 1:
+ segn = "_part_" ~ cnt1.to!string;
+ ++cnt1;
+ goto default;
+ case 2:
+ segn = "_part_" ~ cnt1.to!string ~ "_" ~ cnt2.to!string;
+ ++cnt2;
+ goto default;
+ case 3:
+ segn = "_part_" ~ cnt1.to!string ~ "_" ~ cnt2.to!string ~ "_" ~ cnt3.to!string;
+ ++cnt3;
+ goto default;
+ default:
+ lv0to3_tags ~= obj_cite_digits.object_number.to!string;
+ lv0to3_tags ~= segn;
+ tag_in_seg["seg_lv4"] = segn; // for html segname need following lv4 not yet known
+ tag_in_seg["seg_lv1to4"] = segn;
+ break;
+ }
+ }
+ an_object["bookindex_nugget"]
+ = ("bookindex_nugget" in an_object) ? an_object["bookindex_nugget"] : "";
+ bookindex_unordered_hashes
+ = bookindex_extract_hash.bookindex_nugget_hash(an_object["bookindex_nugget"], obj_cite_digits, tag_in_seg);
+ _anchor_tag = obj_cite_digits.identifier;
+ // (incrementally build toc) table of contents here!
+ {
+ auto _get = obj_im.flow_table_of_contents_gather_headings(
+ an_object,
+ conf_make_meta,
+ tag_in_seg,
+ _anchor_tag,
+ lev4_subtoc,
+ the_document_toc_section,
+ );
+ {
+ the_document_toc_section = _get.the_document_toc_section;
+ lev4_subtoc = _get.lev4_subtoc;
+ }
+ }
+ if (an_object["lev_markup_number"] == "4") {
+ segnames["html"] ~= tag_in_seg["seg_lv4"];
+ html_segnames_ptr = html_segnames_ptr_cntr;
+ html_segnames_ptr_cntr++;
+ }
+ if (an_object["lev_markup_number"].to!int <= 4) {
+ segnames["epub"] ~= tag_in_seg["seg_lv1to4"];
+ }
+ auto comp_obj_ = node_construct.node_emitter_heading(
+ an_object,
+ tag_in_seg,
+ lev_anchor_tag,
+ tag_assoc,
+ obj_cite_digits, // OCNset
+ cntr, // int
+ heading_ptr, // int
+ lv_ancestors_txt, // string[]
+ html_segnames_ptr, // int
+ substantive_object_and_anchor_tags_struct,
+ );
+ ++heading_ptr;
+ debug(segments) {
+ writeln(an_object["lev_markup_number"]);
+ writeln(tag_in_seg["seg_lv4"]);
+ writeln(tag_in_seg["seg_lv1to4"]);
+ }
+ the_document_body_section ~= comp_obj_;
+ debug(objectrelated1) { writeln(line); } // check
+ {
+ auto _get = txt_by_line_common_reset_(line_occur, an_object, pith);
+ {
+ line_occur = _get.line_occur;
+ an_object = _get.this_object;
+ pith = _get.pith;
+ }
+ }
+ an_object.remove("lev");
+ an_object.remove("lev_markup_number");
+ processing.remove("verse");
+ ++cntr;
+ } else if (pith["txt_is"] == eN.txt_is.para
+ && line_occur["para"] > eN.bi.off
+ ) { // paragraph object (current line empty) - repeated character paragraph separator
+ if ((an_object[an_object_key].to!string).matchFirst(rgx.repeated_character_line_separator)) {
+ pith["ocn"] = eN.ocn.off;
+ }
+ obj_cite_digits = ocn_emit(pith["ocn"]);
+ an_object["bookindex_nugget"] = ("bookindex_nugget" in an_object) ? an_object["bookindex_nugget"] : "";
+ bookindex_unordered_hashes = bookindex_extract_hash.bookindex_nugget_hash(an_object["bookindex_nugget"], obj_cite_digits, tag_in_seg);
+ an_object["is"] = "para";
+ auto comp_obj_ = node_construct.node_location_emitter(
+ content_non_header,
+ tag_in_seg,
+ lev_anchor_tag,
+ tag_assoc,
+ obj_cite_digits,
+ cntr,
+ heading_ptr-1,
+ an_object["is"],
+ );
+ ST_txtAndAnchorTagPlusHasFootnotesUrlsImages substantive_obj_misc_struct
+ = obj_im.obj_inline_markup_and_anchor_tags_and_misc(an_object, an_object_key, conf_make_meta, No._new_doc);
+ an_object["substantive"] = substantive_obj_misc_struct.obj_txt;
+ anchor_tag = substantive_obj_misc_struct.anchor_tag;
+ comp_obj_ = set_object_generic("body", "body", "para", "para", an_object["substantive"].to!string.strip, obj_cite_digits.object_number);
+ comp_obj_.tags.html_segment_anchor_tag_is = tag_in_seg["seg_lv4"];
+ comp_obj_.tags.epub_segment_anchor_tag_is = tag_in_seg["seg_lv1to4"];
+ comp_obj_.metainfo.identifier = obj_cite_digits.identifier;
+ comp_obj_.metainfo.object_number_off = (obj_cite_digits.off == 0) ? true : false; // TODO
+ comp_obj_.metainfo.o_n_book_index = obj_cite_digits.bkidx;
+ comp_obj_.metainfo.object_number_type = obj_cite_digits.type;
+ comp_obj_.attrib.indent_hang = indent["hang_position"];
+ comp_obj_.attrib.indent_base = indent["base_position"];
+ comp_obj_.attrib.bullet = bullet;
+ comp_obj_.tags.anchor_tags = [anchor_tag]; anchor_tag="";
+ comp_obj_.has.inline_notes_reg = substantive_obj_misc_struct.has_notes_reg;
+ comp_obj_.has.inline_notes_star = substantive_obj_misc_struct.has_notes_star;
+ comp_obj_.has.inline_links = substantive_obj_misc_struct.has_links;
+ comp_obj_.has.image_without_dimensions = substantive_obj_misc_struct.has_images_without_dimensions;
+ the_document_body_section ~= comp_obj_;
+ tag_assoc = an_object.inline_para_link_anchor(tag_in_seg, tag_assoc);
+ {
+ auto _get = txt_by_line_common_reset_(line_occur, an_object, pith);
+ {
+ line_occur = _get.line_occur;
+ an_object = _get.this_object;
+ pith = _get.pith;
+ }
+ }
+ indent = [
+ "hang_position" : 0,
+ "base_position" : 0,
+ ];
+ bullet = false;
+ processing.remove("verse");
+ ++cntr;
+ // } else { // could be useful to test line variable should be empty and never null
+ }
+ } // close else for line empty
+ } // close else for not the above
+ } // close after non code, other blocks or regular text
+ // unless (the_document_body_section.length == 0) ?
+ if (the_document_body_section.length > 0) {
+ if (((the_document_body_section[$-1].metainfo.is_a == "para")
+ || (the_document_body_section[$-1].metainfo.is_a == "heading")
+ || (the_document_body_section[$-1].metainfo.is_a == "quote")
+ || (the_document_body_section[$-1].metainfo.is_a == "group")
+ || (the_document_body_section[$-1].metainfo.is_a == "block")
+ || (the_document_body_section[$-1].metainfo.is_a == "verse"))
+ && (the_document_body_section.length > previous_length)) {
+ if ((the_document_body_section[$-1].metainfo.is_a == "heading")
+ && (the_document_body_section[$-1].metainfo.heading_lev_markup < 5)) {
+ pith["section"] = eN.sect.unset;
+ }
+ if (the_document_body_section[$-1].metainfo.is_a == "verse") {
+ // scan for endnotes for whole poem (each verse in poem)
+ foreach (i; previous_length .. the_document_body_section.length) {
+ if (the_document_body_section[i].metainfo.is_a == "verse") {
+ if ((the_document_body_section[i].text).match(
+ rgx.inline_notes_al_all_note
+ )) {
+ object_notes = note_section.gather_notes_for_endnote_section(
+ the_document_body_section,
+ tag_in_seg,
+ (i).to!int,
+ );
+ }
+ }
+ }
+ } else {
+ // scan object for endnotes
+ previous_length = the_document_body_section.length.to!int;
+ if ((the_document_body_section[$-1].text).match(
+ rgx.inline_notes_al_all_note
+ )) {
+ previous_count = (the_document_body_section.length -1).to!int;
+ object_notes = note_section.gather_notes_for_endnote_section(
+ the_document_body_section,
+ tag_in_seg,
+ (the_document_body_section.length-1).to!int,
+ );
+ }
+ }
+ previous_length = the_document_body_section.length.to!int;
+ }
+ }
+ }
+ ret.toc = the_document_toc_section;
+ ret.body = the_document_body_section;
+ ret.glossary = the_document_glossary_section;
+ ret.blurb = the_document_blurb_section;
+ ret.object_notes = object_notes;
+ ret.segnames = segnames;
+ return ret;
+ }
+ { // loopMarkupSrcByLine
+ auto _doc_by_line = loopMarkupSrcByLine(markup_sourcefile_content, an_object, pith);
+ the_document_toc_section = _doc_by_line.toc;
+ the_document_body_section = _doc_by_line.body;
+ the_document_glossary_section = _doc_by_line.glossary;
+ the_document_blurb_section = _doc_by_line.blurb;
+ segnames = _doc_by_line.segnames;
+ object_notes = _doc_by_line.object_notes; // endnotes, compare, not sure is used
+ destroy(_doc_by_line);
+ }
+ { // EOF backMatter
+ comp_obj_ = set_object_heading("lev1", "backmatter", "tail", "");
+ comp_obj_.metainfo.identifier = "";
+ comp_obj_.metainfo.dummy_heading = false;
+ comp_obj_.metainfo.object_number_off = false;
+ comp_obj_.metainfo.object_number_type = 0;
+ comp_obj_.tags.segment_anchor_tag_epub = "_part_eof";
+ comp_obj_.tags.anchor_tag_html = comp_obj_.tags.segment_anchor_tag_epub;
+ comp_obj_.tags.in_segment_html = "tail";
+ comp_obj_.tags.anchor_tags = ["section_eof"];
+ comp_obj_.metainfo.dom_structure_markedup_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0];
+ comp_obj_.metainfo.dom_structure_collapsed_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0];
+ the_document_xml_dom_tail_section ~= comp_obj_;
+ tag_assoc[comp_obj_.tags.anchor_tag_html]["seg_lv4"] = comp_obj_.tags.in_segment_html;
+ tag_assoc[comp_obj_.tags.segment_anchor_tag_epub]["seg_lv1to4"] = comp_obj_.tags.segment_anchor_tag_epub;
+ }
+ // endNotes
+ ST_endnotes en_st = note_section.backmatter_endnote_objects(obj_cite_digits, opt_action);
+ { // endnotes
+ the_document_endnotes_section = en_st.endnotes;
+ obj_cite_digits = en_st.ocn;
+ debug(endnotes) {
+ writefln("%s %s", __LINE__, the_document_endnotes_section.length);
+ foreach (o; the_document_endnotes_section) { writeln(o); }
+ }
+ }
+ { // glossary
+ if (an_object["glossary_nugget"].length == 0) {
+ comp_obj_ = set_object_heading("lev1", "empty", "empty", "(skip) there is no Glossary section");
+ comp_obj_.metainfo.identifier = "";
+ comp_obj_.metainfo.dummy_heading = true;
+ comp_obj_.metainfo.object_number_off = true;
+ comp_obj_.metainfo.object_number_type = 0;
+ the_document_glossary_section ~= comp_obj_;
+ }
+ debug(glossary) { foreach (gloss; the_document_glossary_section) { writeln(gloss.text); } }
+ }
+ { // bibliography
+ string[] biblio_unsorted_incomplete = biblio_arr_json.dup;
+ ST_biblio_section biblio_section = backmatter_make_the_bibliography_section(biblio_unsorted_incomplete, bib_arr_json);
+ the_document_bibliography_section = biblio_section.bibliography_section;
+ tag_assoc = biblio_section.tag_assoc;
+ }
+ { // bookindex
+ BookIndexReportSection bi = BookIndexReportSection();
+ ST_bookindex bi_st
+ = bi.backmatter_bookindex_build_abstraction_section(bookindex_unordered_hashes, obj_cite_digits, opt_action);
+ destroy(bookindex_unordered_hashes);
+ the_document_bookindex_section = bi_st.bookindex;
+ obj_cite_digits = bi_st.ocn;
+ debug(bookindex) { foreach (bi_entry; the_document_bookindex_section) { writeln(bi_entry); } }
+ }
+ { // blurb
+ if (an_object["blurb_nugget"].length == 0) {
+ comp_obj_ = set_object_heading("lev1", "empty", "empty", "(skip) there is no Blurb section");
+ comp_obj_.metainfo.identifier = "";
+ comp_obj_.metainfo.object_number_off = true;
+ comp_obj_.metainfo.object_number_type = 0;
+ comp_obj_.tags.segment_anchor_tag_epub = "";
+ comp_obj_.tags.anchor_tag_html = "";
+ comp_obj_.tags.in_segment_html = "";
+ the_document_blurb_section ~= comp_obj_;
+ }
+ debug(blurb) { foreach (blurb; the_document_blurb_section) { writeln(blurb.text); } }
+ }
+ { // toc gather backmatter
+ the_document_toc_section ~= backmatter_gather_table_of_contents(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section); //
+ }
+ { // document head and body
+ the_document_head_section ~= the_document_body_section[0];
+ the_document_body_section = the_document_body_section[1..$];
+ }
+ { // document ancestors
+ ST_ancestors get_ancestors;
+ get_ancestors = the_document_body_section.after_doc_determine_ancestors(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section);
+ the_document_body_section = get_ancestors.the_document_body_section;
+ the_document_endnotes_section = get_ancestors.the_document_endnotes_section;
+ the_document_glossary_section = get_ancestors.the_document_glossary_section;
+ the_document_bibliography_section = get_ancestors.the_document_bibliography_section;
+ the_document_bookindex_section = get_ancestors.the_document_bookindex_section;
+ the_document_blurb_section = get_ancestors.the_document_blurb_section;
+ }
+ { // document segnames
+ ST_segnames get_segnames;
+ get_segnames = the_document_body_section.after_doc_determine_segnames(the_document_endnotes_section, the_document_glossary_section, the_document_bibliography_section, the_document_bookindex_section, the_document_blurb_section, segnames, html_segnames_ptr_cntr, html_segnames_ptr); //
+ segnames = get_segnames.segnames;
+ html_segnames_ptr_cntr = get_segnames.html_segnames_ptr_cntr;
+ html_segnames_ptr = get_segnames.html_segnames_ptr;
+ }
+ // document head
+ string[] segnames_0_to_4;
+ foreach (ref obj; the_document_head_section) {
+ if (obj.metainfo.is_a == "heading") {
+ debug(dom) { writeln(obj.text); }
+ if (obj.metainfo.heading_lev_markup <= 4) {
+ segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub;
+ }
+ if (obj.metainfo.heading_lev_markup == 0) {
+ // TODO second hit (of two) with same assertion failure, check, fix and reinstate
+ // assert( obj.metainfo.ocn == 1,
+ // "Title OCN should be 1 not: " ~ obj.metainfo.ocn.to!string); // bug introduced 0.18.1
+ obj.metainfo.ocn = 1;
+ obj.metainfo.identifier = "1";
+ obj.metainfo.object_number_type = OCNtype.ocn;
+ }
+ // dom structure (marked up & collapsed)
+ if (opt_action.meta_processing_xml_dom) {
+ obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup);
+ obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed);
+ }
+ obj = obj.obj_heading_ancestors(lv_ancestors_txt);
+ }
+ obj = _links(obj);
+ }
+ if (the_document_toc_section.length > 1) {
+ // scroll
+ dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup;
+ dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup;
+ foreach (ref obj; the_document_toc_section) {
+ if (obj.metainfo.is_a == "heading") {
+ if (obj.metainfo.heading_lev_markup <= 4) {
+ segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub;
+ if (obj.metainfo.heading_lev_markup == 4) {
+ obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1];
+ assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames],
+ obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]);
+ }
+ }
+ // dom structure (marked up & collapsed)
+ if (opt_action.meta_processing_xml_dom) {
+ obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup);
+ obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed);
+ }
+ obj = obj.obj_heading_ancestors(lv_ancestors_txt);
+ }
+ obj = _links(obj);
+ }
+ }
+ // images
+ string[] _images;
+ // multiple 1~ levels, loop through document body
+ if (the_document_body_section.length > 1) {
+ foreach (ref obj; the_document_body_section) {
+ if (!(obj.metainfo.identifier.empty)) {
+ if (!(((obj.metainfo.identifier) in tag_assoc)
+ && ("seg_lv4" in tag_assoc[(obj.metainfo.identifier)]))
+ ) {
+ tag_assoc[(obj.metainfo.identifier)]["seg_lv4"]
+ = obj.tags.html_segment_anchor_tag_is;
+ }
+ tag_assoc[(obj.metainfo.identifier)]["seg_lv1to4"]
+ = obj.tags.epub_segment_anchor_tag_is;
+ }
+ if (obj.metainfo.is_a == "heading") {
+ debug(dom) { writeln(obj.text); }
+ if (obj.metainfo.heading_lev_markup <= 4) {
+ segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub;
+ if (obj.metainfo.heading_lev_markup == 4) {
+ obj.tags.lev4_subtoc = lev4_subtoc[obj.tags.anchor_tag_html];
+ obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1];
+ if (segnames["html"].length > obj.ptr.html_segnames + 1) {
+ obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1];
+ }
+ assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames],
+ obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]);
+ }
+ }
+ // dom structure (marked up & collapsed)
+ if (opt_action.meta_processing_xml_dom) {
+ obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup);
+ obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed);
+ }
+ obj = obj.obj_heading_ancestors(lv_ancestors_txt);
+ } else if (obj.metainfo.is_a == "para") {
+ _images ~= extract_images(obj.text);
+ obj = _image_dimensions(obj, manifested);
+ }
+ obj = _links(obj);
+ }
+ }
+ auto image_list = (_images.sort()).uniq;
+ // endnotes optional only one 1~ level
+ if (the_document_endnotes_section.length > 1) {
+ dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup;
+ dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup;
+ dom_structure_markedup_tags_status = dom_structure_markedup_tags_status_buffer.dup;
+ dom_structure_collapsed_tags_status = dom_structure_collapsed_tags_status_buffer.dup;
+ foreach (ref obj; the_document_endnotes_section) {
+ if (obj.metainfo.is_a == "heading") {
+ debug(dom) { writeln(obj.text); }
+ if (obj.metainfo.heading_lev_markup == 1) {
+ obj_cite_digits = ocn_emit(eN.ocn.on);
+ obj.metainfo.ocn = obj_cite_digits.object_number;
+ obj.metainfo.identifier = obj_cite_digits.identifier;
+ }
+ if (obj.metainfo.heading_lev_markup <= 4) {
+ segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub;
+ if (obj.metainfo.heading_lev_markup == 4) {
+ obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1];
+ if (segnames["html"].length > obj.ptr.html_segnames + 1) {
+ obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1];
+ }
+ assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames],
+ obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]);
+ }
+ }
+ // dom structure (marked up & collapsed)
+ if (opt_action.meta_processing_xml_dom) {
+ obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup);
+ obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed);
+ }
+ obj = obj.obj_heading_ancestors(lv_ancestors_txt);
+ }
+ obj = _links(obj);
+ }
+ }
+ // glossary optional only one 1~ level
+ if (the_document_glossary_section.length > 1) {
+ foreach (ref obj; the_document_glossary_section) {
+ if (obj.metainfo.is_a == "heading") {
+ debug(dom) { writeln(obj.text); }
+ if (obj.metainfo.heading_lev_markup == 1) {
+ obj_cite_digits = ocn_emit(eN.ocn.on);
+ obj.metainfo.ocn = obj_cite_digits.object_number;
+ obj.metainfo.identifier = obj_cite_digits.identifier;
+ }
+ if (obj.metainfo.heading_lev_markup <= 4) {
+ segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub;
+ if (obj.metainfo.heading_lev_markup == 4) {
+ obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1];
+ if (segnames["html"].length > obj.ptr.html_segnames + 1) {
+ obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1];
+ }
+ assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames],
+ obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]);
+ }
+ }
+ // dom structure (marked up & collapsed)
+ if (opt_action.meta_processing_xml_dom) {
+ obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup);
+ obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed);
+ }
+ obj = obj.obj_heading_ancestors(lv_ancestors_txt);
+ } else if (obj.metainfo.is_a == "glossary" && !(obj.text.empty)) {
+ obj_cite_digits = ocn_emit(eN.ocn.on);
+ obj.metainfo.ocn = obj_cite_digits.object_number;
+ obj.metainfo.identifier = obj_cite_digits.identifier;
+ }
+ obj = _links(obj);
+ }
+ }
+ // bibliography optional only one 1~ level
+ if (the_document_bibliography_section.length > 1) {
+ foreach (ref obj; the_document_bibliography_section) {
+ if (obj.metainfo.is_a == "heading") {
+ debug(dom) { writeln(obj.text); }
+ if (obj.metainfo.heading_lev_markup == 1) {
+ obj_cite_digits = ocn_emit(eN.ocn.on);
+ obj.metainfo.ocn = obj_cite_digits.object_number;
+ obj.metainfo.identifier = obj_cite_digits.identifier;
+ }
+ if (obj.metainfo.heading_lev_markup <= 4) {
+ segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub;
+ if (obj.metainfo.heading_lev_markup == 4) {
+ obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1];
+ if (segnames["html"].length > obj.ptr.html_segnames + 1) {
+ obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1];
+ }
+ assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames],
+ obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]);
+ }
+ }
+ // dom structure (marked up & collapsed)
+ if (opt_action.meta_processing_xml_dom) {
+ obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup);
+ obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed);
+ }
+ obj = obj.obj_heading_ancestors(lv_ancestors_txt);
+ } else if (obj.metainfo.is_a == "bibliography") {
+ obj_cite_digits = ocn_emit(eN.ocn.on);
+ obj.metainfo.ocn = obj_cite_digits.object_number;
+ obj.metainfo.identifier = obj_cite_digits.identifier;
+ }
+ obj = _links(obj);
+ }
+ }
+ // book index, optional only one 1~ level
+ int ocn_ = obj_cite_digits.object_number;
+ int ocn_bkidx_ = 0;
+ int ocn_bidx_;
+ if (the_document_bookindex_section.length > 1) { // scroll
+ dom_structure_markedup_tags_status_buffer = dom_structure_markedup_tags_status.dup;
+ dom_structure_collapsed_tags_status_buffer = dom_structure_collapsed_tags_status.dup;
+ foreach (ref obj; the_document_bookindex_section) {
+ if (obj.metainfo.is_a == "heading") {
+ // debug(dom) { }
+ if (obj.metainfo.heading_lev_markup <= 4) {
+ segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub;
+ }
+ if (obj.metainfo.heading_lev_markup == 1) {
+ obj_cite_digits = ocn_emit(eN.ocn.on);
+ obj.metainfo.ocn = obj_cite_digits.object_number;
+ obj.metainfo.identifier = obj_cite_digits.identifier;
+ }
+ if (obj.metainfo.heading_lev_markup <= 4) {
+ if (obj.metainfo.heading_lev_markup == 4) {
+ obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1];
+ if (segnames["html"].length > obj.ptr.html_segnames + 1) {
+ obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1];
+ }
+ assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames],
+ obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]);
+ }
+ }
+ // dom structure (marked up & collapsed)
+ if (opt_action.meta_processing_xml_dom) {
+ obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup);
+ obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed);
+ }
+ obj = obj.obj_heading_ancestors(lv_ancestors_txt);
+ } else if (obj.metainfo.is_a == "bookindex") {
+ obj_cite_digits = ocn_emit(eN.ocn.bkidx);
+ obj.metainfo.ocn = obj_cite_digits.object_number;
+ obj.metainfo.identifier = obj_cite_digits.identifier;
+ obj.metainfo.o_n_book_index = obj_cite_digits.bkidx;
+ obj.metainfo.object_number_type = OCNtype.bkidx;
+ }
+ obj = _links(obj);
+ }
+ // TODO assert failure, reinstate
+ // assert(obj_cite_digit_bkidx == ocn_bidx_ obj_cite_digit_bkidx ~ " == ocn_" ~ ocn_ ~ "?");
+ }
+ // blurb optional only one 1~ level
+ if (the_document_blurb_section.length > 1) {
+ foreach (ref obj; the_document_blurb_section) {
+ if (obj.metainfo.is_a == "heading") {
+ debug(dom) { writeln(obj.text); }
+ if (obj.metainfo.heading_lev_markup == 1) {
+ obj_cite_digits = ocn_emit(eN.ocn.on);
+ obj.metainfo.ocn = obj_cite_digits.object_number;
+ obj.metainfo.identifier = obj_cite_digits.identifier;
+ }
+ if (obj.metainfo.heading_lev_markup <= 4) {
+ segnames_0_to_4 ~= obj.tags.segment_anchor_tag_epub;
+ if (obj.metainfo.heading_lev_markup == 4) {
+ obj.tags.segname_prev = segnames["html"][obj.ptr.html_segnames - 1];
+ if (segnames["html"].length > obj.ptr.html_segnames + 1) {
+ obj.tags.segname_next = segnames["html"][obj.ptr.html_segnames + 1];
+ }
+ assert(obj.tags.anchor_tag_html == segnames["html"][obj.ptr.html_segnames],
+ obj.tags.anchor_tag_html ~ "!=" ~ segnames["html"][obj.ptr.html_segnames]);
+ }
+ }
+ // dom structure (marked up & collapsed)
+ if (opt_action.meta_processing_xml_dom) {
+ obj = obj.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, obj.metainfo.heading_lev_markup);
+ obj = obj.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, obj.metainfo.heading_lev_collapsed);
+ }
+ obj = obj.obj_heading_ancestors(lv_ancestors_txt);
+ } else if (obj.metainfo.is_a == "blurb") {
+ obj_cite_digits = ocn_emit(eN.ocn.off);
+ obj.metainfo.object_number_off = obj_cite_digits.off;
+ obj.metainfo.object_number_type = OCNtype.non;
+ }
+ obj = _links(obj);
+ }
+ }
+ // get descendants
+ if (the_document_body_section.length > 1) {
+ auto pairs = after_doc_get_descendants(
+ the_document_head_section ~
+ the_document_body_section ~
+ the_document_endnotes_section ~
+ the_document_glossary_section ~
+ the_document_bibliography_section ~
+ the_document_bookindex_section ~
+ the_document_blurb_section ~
+ the_document_xml_dom_tail_section
+ );
+ debug(descendants_tuple) {
+ pairs = pairs.sort();
+ foreach (pair; pairs) { // (pair; pairs.sort())
+ writeln(pair[0], "..", pair[1]);
+ }
+ }
+ foreach (ref obj; the_document_head_section) {
+ if (obj.metainfo.is_a == "heading") {
+ foreach (pair; pairs) {
+ if (obj.metainfo.ocn == pair[0]) {
+ obj.metainfo.last_descendant_ocn = pair[1];
+ }
+ }
+ }
+ }
+ if (the_document_body_section.length > 1) {
+ foreach (ref obj; the_document_body_section) {
+ if (obj.metainfo.is_a == "heading") {
+ foreach (pair; pairs) {
+ if (obj.metainfo.ocn == pair[0]) {
+ obj.metainfo.last_descendant_ocn = pair[1];
+ }
+ }
+ }
+ }
+ }
+ if (the_document_endnotes_section.length > 1) {
+ foreach (ref obj; the_document_endnotes_section) {
+ if (obj.metainfo.is_a == "heading") {
+ foreach (pair; pairs) {
+ if (obj.metainfo.ocn == pair[0]) {
+ obj.metainfo.last_descendant_ocn = pair[1];
+ }
+ }
+ }
+ }
+ }
+ if (the_document_glossary_section.length > 1) {
+ foreach (ref obj; the_document_glossary_section) {
+ if (obj.metainfo.is_a == "heading") {
+ foreach (pair; pairs) {
+ if (obj.metainfo.ocn == pair[0]) {
+ obj.metainfo.last_descendant_ocn = pair[1];
+ }
+ }
+ }
+ }
+ }
+ if (the_document_bibliography_section.length > 1) {
+ foreach (ref obj; the_document_bibliography_section) {
+ if (obj.metainfo.is_a == "heading") {
+ foreach (pair; pairs) {
+ if (obj.metainfo.ocn == pair[0]) {
+ obj.metainfo.last_descendant_ocn = pair[1];
+ }
+ }
+ }
+ }
+ }
+ if (the_document_bookindex_section.length > 1) {
+ foreach (ref obj; the_document_bookindex_section) {
+ if (obj.metainfo.is_a == "heading") {
+ foreach (pair; pairs) {
+ if (obj.metainfo.ocn == pair[0]) {
+ obj.metainfo.last_descendant_ocn = pair[1];
+ }
+ }
+ }
+ }
+ }
+ if (the_document_blurb_section.length > 1) {
+ foreach (ref obj; the_document_blurb_section) {
+ if (obj.metainfo.is_a == "heading") {
+ foreach (pair; pairs) {
+ if (obj.metainfo.ocn == pair[0]) {
+ obj.metainfo.last_descendant_ocn = pair[1];
+ }
+ }
+ }
+ }
+ }
+ if (the_document_xml_dom_tail_section.length > 1) {
+ foreach (ref obj; the_document_xml_dom_tail_section) {
+ if (obj.metainfo.is_a == "heading") {
+ foreach (pair; pairs) {
+ if (obj.metainfo.ocn == pair[0]) {
+ obj.metainfo.last_descendant_ocn = pair[1];
+ }
+ }
+ }
+ }
+ }
+ }
+ // TODO
+ // - note create/insert heading object sole purpose eof close all open tags
+ // sort out:
+ // - obj.metainfo.dom_structure_markedup_tags_status = dom_structure_markedup_tags_status;
+ // - obj.metainfo.dom_structure_collapsed_tags_status = dom_structure_collapsed_tags_status;
+ comp_obj_ = set_object_heading("lev1", "empty", "empty", "");
+ comp_obj_.metainfo.identifier = "";
+ comp_obj_.metainfo.dummy_heading = true;
+ comp_obj_.metainfo.object_number_off = true;
+ comp_obj_.metainfo.object_number_type = 0;
+ comp_obj_.tags.segment_anchor_tag_epub = "";
+ comp_obj_.tags.anchor_tag_html = "";
+ comp_obj_.tags.in_segment_html = "";
+ comp_obj_.tags.html_segment_anchor_tag_is = "";
+ comp_obj_.tags.epub_segment_anchor_tag_is = "";
+ comp_obj_.metainfo.heading_lev_markup = 9;
+ comp_obj_.metainfo.heading_lev_collapsed = 9;
+ comp_obj_.metainfo.parent_ocn = 0;
+ comp_obj_.metainfo.parent_lev_markup = 0;
+ comp_obj_.metainfo.dom_structure_markedup_tags_status = dom_structure_markedup_tags_status.dup;
+ comp_obj_.metainfo.dom_structure_collapsed_tags_status = dom_structure_collapsed_tags_status.dup;
+ comp_obj_ = comp_obj_.obj_dom_structure_set_markup_tags(dom_structure_markedup_tags_status, 0);
+ comp_obj_ = comp_obj_.obj_dom_set_collapsed_tags(dom_structure_collapsed_tags_status, 0);
+ comp_obj_ = comp_obj_.obj_heading_ancestors(lv_ancestors_txt);
+ // the_dom_tail_section ~= comp_obj_; // remove tail for now, decide on later
+ // the doc
+ ObjGenericComposite[][string] document_the = [
+ "head": the_document_head_section,
+ "toc": the_document_toc_section,
+ // substantive/body:
+ "body": the_document_body_section,
+ // backmatter:
+ "endnotes": the_document_endnotes_section,
+ "glossary": the_document_glossary_section,
+ "bibliography": the_document_bibliography_section,
+ "bookindex": the_document_bookindex_section,
+ "blurb": the_document_blurb_section,
+ // dom tail only
+ "tail": the_document_xml_dom_tail_section,
+ ];
+ // document parts keys as needed
+ string[][string] document_section_keys_sequenced = [
+ "scroll": ["head", "toc", "body",],
+ "seg": ["head", "toc", "body",],
+ "sql": ["head", "body",],
+ "latex": ["head", "toc", "body",]
+ ];
+ if (document_the["endnotes"].length > 1) {
+ document_section_keys_sequenced["scroll"] ~= "endnotes";
+ document_section_keys_sequenced["seg"] ~= "endnotes";
+ document_section_keys_sequenced["latex"] ~= "endnotes";
+ }
+ if (document_the["glossary"].length > 1) {
+ document_section_keys_sequenced["scroll"] ~= "glossary";
+ document_section_keys_sequenced["seg"] ~= "glossary";
+ document_section_keys_sequenced["sql"] ~= "glossary";
+ document_section_keys_sequenced["latex"] ~= "glossary";
+ }
+ if (document_the["bibliography"].length > 1) {
+ document_section_keys_sequenced["scroll"] ~= "bibliography";
+ document_section_keys_sequenced["seg"] ~= "bibliography";
+ document_section_keys_sequenced["sql"] ~= "bibliography";
+ document_section_keys_sequenced["latex"] ~= "bibliography";
+ }
+ if (document_the["bookindex"].length > 1) {
+ document_section_keys_sequenced["scroll"] ~= "bookindex";
+ document_section_keys_sequenced["seg"] ~= "bookindex";
+ document_section_keys_sequenced["sql"] ~= "bookindex";
+ document_section_keys_sequenced["latex"] ~= "bookindex";
+ }
+ if (document_the["blurb"].length > 1) {
+ document_section_keys_sequenced["scroll"] ~= "blurb";
+ document_section_keys_sequenced["seg"] ~= "blurb";
+ document_section_keys_sequenced["sql"] ~= "blurb";
+ document_section_keys_sequenced["latex"] ~= "blurb";
+ }
+ if ((opt_action.html)
+ || (opt_action.html_scroll)
+ || (opt_action.html_seg)
+ || (opt_action.epub)) {
+ document_section_keys_sequenced["scroll"] ~= "tail";
+ document_section_keys_sequenced["seg"] ~= "tail";
+ }
+ // segnames
+ string[] segnames_4 = segnames["html"].dup;
+ string[] segnames_lv1to4 = segnames["epub"].dup;
+ debug(segnames) {
+ writeln("segnames_lv4: ", segnames_4);
+ writeln("segnames_lv1to4: ", segnames_lv1to4);
+ }
+ // restart
+ destroy(the_document_head_section);
+ destroy(the_document_toc_section);
+ destroy(the_document_body_section);
+ destroy(the_document_endnotes_section);
+ destroy(the_document_glossary_section);
+ destroy(the_document_bibliography_section);
+ destroy(the_document_bookindex_section);
+ destroy(the_document_blurb_section);
+ destroy(the_document_xml_dom_tail_section);
+ destroy(segnames);
+ destroy(bookindex_unordered_hashes);
+ destroy(an_object);
+ obj_cite_digits = ocn_emit(eN.ocn.reset);
+ biblio_arr_json = [];
+ obj_cite_digit_ = 0;
+ html_segnames_ptr = 0;
+ html_segnames_ptr_cntr = 0;
+ content_non_header = "8";
+ dom_structure_markedup_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0,];
+ dom_structure_markedup_tags_status_buffer = [ 0, 0, 0, 0, 0, 0, 0, 0,];
+ dom_structure_collapsed_tags_status = [ 0, 0, 0, 0, 0, 0, 0, 0,];
+ dom_structure_collapsed_tags_status_buffer = [ 0, 0, 0, 0, 0, 0, 0, 0,];
+ lev_anchor_tag = "";
+ anchor_tag = "";
+ // identify parts
+ struct DocHas_ {
+ uint inline_links() {
+ return dochas["inline_links"];
+ }
+ uint inline_notes_reg() {
+ return dochas["inline_notes"];
+ }
+ uint inline_notes_star() {
+ return dochas["inline_notes_star"];
+ }
+ uint codeblocks() {
+ return dochas["codeblock"];
+ }
+ uint tables() {
+ return dochas["table"];
+ }
+ uint blocks() {
+ return dochas["block"];
+ }
+ uint groups() {
+ return dochas["group"];
+ }
+ uint poems() {
+ return dochas["poem"];
+ }
+ uint quotes() {
+ return dochas["quote"];
+ }
+ ulong images() { // TODO not ideal rethink
+ return (image_list.to!string.strip("[","]").split(",").length);
+ }
+ auto imagelist() {
+ return image_list;
+ }
+ auto keys_seq() {
+ return docSectKeysSeq!()(document_section_keys_sequenced);
+ }
+ string[] segnames_lv4() {
+ return segnames_4;
+ }
+ string[] segnames_lv_0_to_4() {
+ return segnames_0_to_4;
+ }
+ string[string][string] tag_associations() {
+ return tag_assoc;
+ }
+ }
+ auto doc_has() {
+ return DocHas_();
+ }
+ // the doc to be returned
+ struct ST_docAbstraction {
+ ObjGenericComposite[][string] document_the;
+ DocHas_ doc_has;
+ }
+ ST_docAbstraction ret;
+ {
+ ret.document_the = document_the;
+ ret.doc_has = doc_has;
+ }
+ return ret;
+ } // ← closed: abstract doc source
+}