cleaningdoc-reform_v0.4.1

author: Ralph Amissah <ralph.amissah@gmail.com> 2019-01-13 13:50:10 -0500
committer: Ralph Amissah <ralph.amissah@gmail.com> 2019-05-17 16:59:38 -0400
commit: 86204d301ae0dc2e5990f78081646e6d1189fcce (patch)
tree: 49967fa34a4222070b50d29468068b186e8bce9d /org
parent: doc generator info related (diff)
5 files changed, 153 insertions, 141 deletions
diff --git a/org/default_regex.org b/org/default_regex.org
index 6138eb5..c237239 100644
--- a/org/default_regex.org
+++ b/org/default_regex.org
@@ -489,7 +489,7 @@ static inline_al_delimiter_open_symbol_plus           = ctRegex!(`【[+]\s`, "m"
 static inline_al_delimiter_close_regular              = ctRegex!(`】`, "m");
 static inline_al_delimiter_open_and_close_regular     = ctRegex!(`【|】`, "m");
 static inline_notes_delimiter_al_regular              = ctRegex!(`【(.+?)】`, "mg");
-static inline_notes_delimiter_al_regular_number_note  = ctRegex!(`【(\d+)\s+(.+?)】`, "mg");
+static inline_notes_delimiter_al_regular_number_note  = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)】`, "mg");
 static inline_al_delimiter_open_asterisk              = ctRegex!(`【\*`, "m");
 static inline_al_delimiter_open_plus                  = ctRegex!(`【\+`, "m");
 static inline_text_and_note_al                        = ctRegex!(`(?P<text>.+?)【(?:[*+ ]*)(?P<note>.+?)】`, "mg");
@@ -536,10 +536,10 @@ static inline_mono                                    = ctRegex!(`#\{(?P<text>.+
 static inline_mono_box                                = ctRegex!(`■\{(?P<text>.+?)\}■`, "mg");
 static inline_cite                                    = ctRegex!(`"\{(?P<text>.+?)\}"`, "mg");
 static inline_faces_line                              = ctRegex!(`^[*!/_]_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`);
-static inline_emphasis_line                           = ctRegex!(`^\*_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`);
-static inline_bold_line                               = ctRegex!(`^!_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`);
-static inline_italics_line                            = ctRegex!(`^/_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`);
-static inline_underscore_line                         = ctRegex!(`^__ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`);
+static inline_emphasis_line                           = ctRegex!(`^\*_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
+static inline_bold_line                               = ctRegex!(`^!_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
+static inline_italics_line                            = ctRegex!(`^/_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
+static inline_underscore_line                         = ctRegex!(`^__ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
 static inline_fontface_clean                          = ctRegex!(`[*!_/^,+#■"-]\{|\}[*!_/^,+#■"-]`, "mg");
 static no_header_rgx                                  = ctRegex!(`^=NULL$`);
 #+END_SRC
diff --git a/org/doc_reform.org b/org/doc_reform.org
index 422d25c..6b2f87c 100644
--- a/org/doc_reform.org
+++ b/org/doc_reform.org
@@ -8,7 +8,7 @@
 #+OPTIONS:     H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t
 #+OPTIONS:     TeX:t LaTeX:t skip:nil d:nil todo:t pri:nil tags:not-in-toc
 #+OPTIONS:     author:nil email:nil creator:nil timestamp:nil
-#+PROPERTY:    header-args :padline no :exports code :cache no :noweb yes
+#+PROPERTY:    header-args :results silent :padline no :exports code :cache no :noweb yes
 #+EXPORT_SELECT_TAGS:  export
 #+EXPORT_EXCLUDE_TAGS: noexport
 #+TAGS: assert(a) class(c) debug(d) mixin(m) doc_reform(s) tangle(T) template(t) WEB(W) noexport(n)
@@ -58,7 +58,7 @@ version (Posix) {
   - process file
   - output
 
-** 0. sdp src/sdp                                                 :template:
+** 0. doc_reform src/doc_reform                                   :template:
 
 - process files (act according to requirements of each type)
   - by sourcefilename
@@ -219,15 +219,13 @@ static auto rgx = Rgx();
 #+NAME: doc_reform_args
 #+BEGIN_SRC d
 scope(success) {
-  debug(checkdoc) {
-    writefln(
-      "~ run complete, ok ~ (%s-%s.%s.%s, %s D:%s, %s %s)",
-      program_name,
-      _ver.major, _ver.minor, _ver.patch,
-      __VENDOR__, __VERSION__,
-      bits, os,
-    );
-  }
+  writefln(
+    "~ run complete, ok ~ (%s-%s.%s.%s, %s D:%s, %s %s)",
+    program_name,
+    _ver.major, _ver.minor, _ver.patch,
+    __VENDOR__, __VERSION__,
+    bits, os,
+  );
 }
 scope(failure) {
   debug(checkdoc) {
@@ -1326,38 +1324,8 @@ writefln(
 * __END__
 dev notes
 
-** doc_reform glossary / terms
-
-|------------+-------------------------------------|
-| doc_reform | sisu document parser                |
-|------------+-------------------------------------|
-| dmso       | document markup, structure, objects |
-|------------+-------------------------------------|
-| meta       | meta document, document abstraction |
-| mda        | meta, meta document abstraction     |
-| adr        | abstract document representation    |
-| dar        | document abstract representation    |
-| (da)       | (document abstraction)              |
-|            | (code representation of document)   |
-|------------+-------------------------------------|
-| ao         | abstract objects                    |
-|            | (code representation of objects)    |
-|------------+-------------------------------------|
-
-consider
-|-------+----------------------------------------------|
-| dao   | document abstraction, objects                |
-|-------+----------------------------------------------|
-| daso  | document abstraction, structure, objects     |
-|-------+----------------------------------------------|
-| drso  | document representation, structure, objects  |
-|-------+----------------------------------------------|
-| daows | document abstraction, objects with structure |
-|-------+----------------------------------------------|
-
 ** the document notes
-*** document sections
-**** summary
+*** document sections (table)
 
 |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---|
 | section      | part         | opt. |   | objects                                          | ocn                            |   |   |
@@ -1398,36 +1366,71 @@ consider
 |              |              |      |   | - paras                                          |                                |   |   |
 |--------------+--------------+------+---+--------------------------------------------------+--------------------------------+---+---|
 
-**** on abstraction
-
-- abstract for downstream processing
-  - identify document structure and objects
-    - identify document structure (headings/levels/sections)
-    - identify objects (headings, paragraphs, tables, code blocks, verse ...)
-  - set document, generate common abstraction for downstream parsing
-    - set different _document sections_:
-      - _head_, toc, _body_, endnotes, glossary, bibliography, book index, blurb
-    - _object numbers_, heading/ chapter numbering etc, endnote numbers
-      - _regular ocn_
-        - body objects
-        - glossary objects
-        - bibliography objects
-      - _special ocn_
-        - non substantive text (provide special numbers)
-          - blurb objects
-        - book index
-      - special (_exceptions_)
-        - endnotes
-  - unify object representations
-    - multiple markups for same object type given single representation
-  - extract object attributes
-  - unify inline markup on objects
-    - inline markup made easier to identify
-
-- simplify downstream parsing
-
-*** objects
-**** summary
+*** document objects (table)
+- check, keep up to date
+
+|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| doc object   | doc object       | attributes              | inline         | appended            | structure            | delimiters                     |
+|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| is_of        | is_a             |                         |                |                     |                      |                                |
+|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| para/heading | heading          | - level                 | - font face    | - object number off | - level              | - delimiter == two newlines    |
+|              |                  | - object number         | - endnotes     | - book index meta   | (document structure) |                                |
+|              |                  | - object number off     |                |                     |                      |                                |
+|              |                  | - dummy (toc & seg)     |                |                     |                      |                                |
+|              |                  | - tags (internal links) |                |                     |                      |                                |
+|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+|              | toc              | - level                 | - font face    |                     |                      | - auto generated from headings |
+|              |                  |                         | - links (auto) |                     |                      |                                |
+|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| para         | para             | - bullet                | - font face    | - object number off |                      | - delimiter == two newlines    |
+|              |                  | - indent                | - links/urls * | - book index meta   |                      |                                |
+|              |                  | - object number         | - images*      |                     |                      |                                |
+|              |                  | - object number off     | - endnotes     |                     |                      |                                |
+|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+|              | toc              | - indent                | - font face    |                     |                      | - delimiter == two newlines    |
+|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+|              | endnote          |                         | - font face    |                     |                      | (generated from                |
+|              |                  |                         |                |                     |                      | inline markup tags)            |
+|              |                  |                         |                |                     |                      | - delimiter == two newlines    |
+|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+|              | bookindex        | - auto indent           | - font face    |                     |                      | - delimiter == two newlines    |
+|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+|              | blurb            | - bullet                | - font face    | - object number off |                      | - delimiter == two newlines    |
+|              |                  | - indent                | - links/urls * | - book index meta   |                      |                                |
+|              |                  | - object number         | - images*      |                     |                      |                                |
+|              |                  | - object number off     | - endnotes     |                     |                      |                                |
+|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| group        | group            | - object number         | - font face    | - book index meta   | - para break         | - delimiter tags (group)       |
+|              |                  | - object number off     | - links/urls * |                     |                      |                                |
+|              |                  |                         | - images*      |                     |                      |                                |
+|              |                  |                         | - endnotes     |                     |                      |                                |
+|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+|              | block            | - object number         | - font face    | - book index meta   | - new line           | - delimiter tags (block)       |
+|              |                  | - object number off     | - links/urls * |                     |                      |                                |
+|              |                  |                         | - images*      |                     |                      |                                |
+|              |                  |                         | - endnotes     |                     |                      |                                |
+|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+|              | quote            | - object number         | - font face    | - book index meta   |                      | - delimiter tags (quote)       |
+|              |                  |                         | - endnotes     |                     |                      |                                |
+|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+|              | poem (see verse) |                         |                | - book index meta   |                      | - delimiter tags (poem)        |
+|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+|              | verse (of poem)  | - object number         | - font face    |                     | - new line           | - (see poem delimiter)         |
+|              |                  |                         | - endnotes     |                     | - preceeding spaces  |                                |
+|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| group/code   | code             | - syntax                |                |                     | - new line           | - delimiter tags (code)        |
+|              |                  | - numbered              |                |                     | - preceeding spaces  |                                |
+|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+| group/table  | table            | - object number         |                |                     |                      | - delimiter tags (table)       |
+|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------|
+
+- consider special treatment for links/urls (& for images?) take them out of
+  document munge (for various outputs), by storing in own array (within each
+  object struct), and providing info on where in array to extract them from,
+  debating whether necessary or even worthwhile as is extra work
+
+**** check
 
 |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------|
 |       |              | identified by              | object notes             | attributes     | inline          | embedded       | special    |
@@ -1468,7 +1471,35 @@ consider
 |       |              |                            |                          | - heading row  |                 |                |            |
 |-------+--------------+----------------------------+--------------------------+----------------+-----------------+----------------+------------|
 
-**** ocn
+*** on abstraction
+
+- abstract for downstream processing
+  - identify document structure and objects
+    - identify document structure (headings/levels/sections)
+    - identify objects (headings, paragraphs, tables, code blocks, verse ...)
+  - set document, generate common abstraction for downstream parsing
+    - set different _document sections_:
+      - _head_, toc, _body_, endnotes, glossary, bibliography, book index, blurb
+    - _object numbers_, heading/ chapter numbering etc, endnote numbers
+      - _regular ocn_
+        - body objects
+        - glossary objects
+        - bibliography objects
+      - _special ocn_
+        - non substantive text (provide special numbers)
+          - blurb objects
+        - book index
+      - special (_exceptions_)
+        - endnotes
+  - unify object representations
+    - multiple markups for same object type given single representation
+  - extract object attributes
+  - unify inline markup on objects
+    - inline markup made easier to identify
+
+- simplify downstream parsing
+
+*** ocn
 
 |-------------+-----------------------+-----------------------+----------------+------|
 | objects     | section / part        | ocn described         | how used       | type |
@@ -1502,6 +1533,35 @@ consider
 |             |                       | + footnote seq. digit | anchor visible |      |
 |-------------+-----------------------+-----------------------+----------------+------|
 
+** doc_reform glossary / terms
+
+|------------+-------------------------------------|
+| doc_reform | sisu document parser                |
+|------------+-------------------------------------|
+| dmso       | document markup, structure, objects |
+|------------+-------------------------------------|
+| meta       | meta document, document abstraction |
+| mda        | meta, meta document abstraction     |
+| adr        | abstract document representation    |
+| dar        | document abstract representation    |
+| (da)       | (document abstraction)              |
+|            | (code representation of document)   |
+|------------+-------------------------------------|
+| ao         | abstract objects                    |
+|            | (code representation of objects)    |
+|------------+-------------------------------------|
+
+consider
+|-------+----------------------------------------------|
+| dao   | document abstraction, objects                |
+|-------+----------------------------------------------|
+| daso  | document abstraction, structure, objects     |
+|-------+----------------------------------------------|
+| drso  | document representation, structure, objects  |
+|-------+----------------------------------------------|
+| daows | document abstraction, objects with structure |
+|-------+----------------------------------------------|
+
 ** make config - _composite make_
 
 work on composite make a unification of make instructions for each document run
diff --git a/org/meta_abstraction.org b/org/meta_abstraction.org
index 1a9c53c..4c98f66 100644
--- a/org/meta_abstraction.org
+++ b/org/meta_abstraction.org
@@ -639,7 +639,7 @@ if (!line.empty) {
 }
 #+END_SRC
 
-*** [#A] separate _code blocks_ from _other markup text_ [+5]
+*** separate _code blocks_ from _other markup text_ [+5] [#A]
 **** _code blocks_                                            :block:code:
 
 #+name: abs_in_loop_body_00_code_block
@@ -1076,7 +1076,7 @@ if (line.matchFirst(rgx.book_index)
 #+name: abs_in_loop_body_not_block_obj
 #+BEGIN_SRC d
   an_object_key="body_nugget";
-  if (auto m = matchFirst(line, rgx.comment)) {
+  if (auto m = line.matchFirst(rgx.comment)) {
     /+ matched comment +/
     debug(comment) {
       writeln(line);
@@ -1382,7 +1382,7 @@ if ((obj_type_status["heading"] == State.on)
   comp_obj_para.has.inline_notes_reg                        = substantive_obj_misc_tuple[sObj.notes_reg];
   comp_obj_para.has.inline_notes_star                       = substantive_obj_misc_tuple[sObj.notes_star];
   comp_obj_para.has.inline_links                            = substantive_obj_misc_tuple[sObj.links];
-  comp_obj_para.has.contains_image_without_dimensions       = substantive_obj_misc_tuple[sObj.image_no_dimensions];
+  comp_obj_para.has.image_without_dimensions                = substantive_obj_misc_tuple[sObj.image_no_dimensions];
   the_document_body_section                                 ~= comp_obj_para;
   tag_assoc = inline_para_link_anchor(an_object, tag_in_seg, tag_assoc);
   _common_reset_(line_occur, an_object, obj_type_status);
@@ -2259,7 +2259,7 @@ string[] segnames_lv0_to_4;
 #+name: abs_post
 #+BEGIN_SRC d
 auto _image_dimensions(M,O)(M manifest_matter, O obj) {
-  if (obj.has.contains_image_without_dimensions) {
+  if (obj.has.image_without_dimensions) {
     import std.math;
     import imageformats;
     int w, h, chans;
@@ -2900,58 +2900,6 @@ dom_structure_collapsed_tags_status          = [ 0, 0, 0, 0, 0, 0, 0, 0, 0,];
 dom_structure_collapsed_tags_status_buffer   = [ 0, 0, 0, 0, 0, 0, 0, 0, 0,];
 #+END_SRC
 
-*** [#A] ↻ _reloop_                                                  :reloop:
-
-- this would work, but no need for extra loop, remove
-
-#+name: abs_post_doc_reloop_processing
-#+BEGIN_SRC d
-debug(abstraction) {
-  foreach (part; sequenced_document_keys.scroll) {
-    foreach (ref obj; document_the[part]) {
-      debug(node_misc) {
-        if (obj.metainfo.is_a == "heading") {
-          writeln("heading ancestors text?: ", obj.tags.heading_ancestors_text);
-        }
-      }
-      debug(node_headings) {
-        if (obj.metainfo.is_a == "heading") {
-          writeln("---");
-          writeln(obj.text);
-          writeln("  node:                    ", obj.metainfo.node);
-          writeln("  ocn:                     ", obj.metainfo.ocn);
-          writeln("  markedup heading lev:    ", obj.metainfo.heading_lev_markup);
-          writeln("  collapsed heading lev    ", obj.metainfo.heading_lev_collapsed);
-          writeln("  parent ocn:              ", obj.metainfo.parent_ocn);
-          writeln("  parent lev:              ", obj.metainfo.parent_lev_markup);
-          writeln("  markedup ancestors o_n:  ", obj.metainfo.markedup_ancestors);
-          writeln("  dom markedup:            ", obj.metainfo.dom_structure_markedup_tags_status);
-          writeln("  collapsed ancestors o_n: ", obj.metainfo.collapsed_ancestors);
-          writeln("  dom collapsed:           ", obj.metainfo.dom_structure_collapsed_tags_status);
-        }
-      }
-      debug(node_all) {
-        writeln("---");
-        if (obj.metainfo.is_a == "heading") {
-          writeln(obj.text);
-        }
-        writeln("node:                    ", obj.metainfo.node);
-        writeln("ocn:                     ", obj.metainfo.ocn);
-        writeln("markedup heading lev:    ", obj.metainfo.heading_lev_markup);
-        writeln("collapsed heading lev    ", obj.metainfo.heading_lev_collapsed);
-        writeln("markedup ancestors o_n:  ", obj.metainfo.markedup_ancestors);
-        writeln("collapsed ancestors o_n: ", obj.metainfo.collapsed_ancestors);
-        writeln("dom markedup:            ", obj.metainfo.dom_structure_markedup_tags_status);
-        writeln("dom collapsed:           ", obj.metainfo.dom_structure_collapsed_tags_status);
-        writeln("parent ocn:              ", obj.metainfo.parent_ocn);
-        writeln("parent lev:              ", obj.metainfo.parent_lev_markup);
-        writeln("Node:                    ", obj.metainfo.ocn);
-      }
-    }
-  }
-}
-#+END_SRC
-
 *** [#A] _return document tuple_                               :return:tuple:
 
 #+name: abs_return_tuple
@@ -6117,7 +6065,7 @@ private:
           if (auto n = munge_.match(rgx.heading_anchor_tag_plus_colon)) {
             auto tag_remunge_ = n.captures[2]
               .replaceAll(rgx.heading_marker_tag_has_colon, "..");
-            munge_=munge_.replaceFirst(rgx.heading_anchor_tag_plus_colon, n.captures[1] ~ tag_remunge_ ~ " ");
+            munge_ = munge_.replaceFirst(rgx.heading_anchor_tag_plus_colon, n.captures[1] ~ tag_remunge_ ~ " ");
           }
         } else if (auto m = munge_.match(rgx.heading_extract_unnamed_anchor_tag)) {
           munge_ = munge_.replaceFirst(
@@ -7791,7 +7739,7 @@ struct HeadingAttrib {
 }
 #+END_SRC
 
-*** [#A] _composite object_
+*** _composite object_ [#A]
 
 #+name: meta_structs_init
 #+BEGIN_SRC d
@@ -7799,7 +7747,7 @@ struct DocObj_MetaInfo_ {
   string                 is_of_part                         = ""; // frontmatter, body, backmatter
   string                 is_of_section                      = ""; // toc, body, glossary, biography, book index, blurb
   string                 is_of_type                         = ""; // para, block ?
-  string                 is_a                               = ""; // heading, para, table, code block, group, ...
+  string                 is_a                               = ""; // heading, para, table, code block, group, verse/poem ...
   alias                  of_part                            = is_of_part;
   alias                  of_section                         = is_of_section;
   alias                  is_of                              = is_of_type;
@@ -7887,7 +7835,7 @@ struct DocObj_Has_ {                                         // has
   bool                   inline_links                        = false;
   bool                   inline_notes_reg                    = false;
   bool                   inline_notes_star                   = false;
-  bool                   contains_image_without_dimensions   = false;
+  bool                   image_without_dimensions            = false;
 }
 #+END_SRC
 
diff --git a/org/output_hub.org b/org/output_hub.org
index 51736ae..9ffa011 100644
--- a/org/output_hub.org
+++ b/org/output_hub.org
@@ -37,6 +37,9 @@ template outputHub() {
       auto msg = Msg!()(doc_matters);
       <<output_scheduled_task>>
     }
+    if (!(doc_matters.opt.action.quiet)) {
+      writeln(" ", doc_matters.src.filename_base);
+    }
     if (!(doc_matters.opt.action.parallelise_subprocesses)) {
       foreach(schedule; doc_matters.opt.action.output_task_scheduler) {
         Scheduled!()(schedule, doc_abstraction, doc_matters);
@@ -77,10 +80,10 @@ import doc_reform.output,
 #+name: output_scheduled_task
 #+BEGIN_SRC d
 if (sched == outTask.pod) {
-  msg.v("sisu source processing... ");
+  msg.v("doc reform source processing... ");
   import doc_reform.output.source_pod;
   DocReformPod!()(doc_matters);
-  msg.vv("sisu source done");
+  msg.vv("doc reform source done");
 }
 #+END_SRC
 
diff --git a/org/output_xmls.org b/org/output_xmls.org
index e44a9c9..29b999b 100644
--- a/org/output_xmls.org
+++ b/org/output_xmls.org
@@ -86,7 +86,8 @@ string div_delimit(
   return delimit;
 }
 #+END_SRC
-**** special characters
+
+**** special characters text
 
 #+name: xhtml_format_objects
 #+BEGIN_SRC d
author	Ralph Amissah <ralph.amissah@gmail.com>	2019-01-13 13:50:10 -0500
committer	Ralph Amissah <ralph.amissah@gmail.com>	2019-05-17 16:59:38 -0400
commit	86204d301ae0dc2e5990f78081646e6d1189fcce (patch)
tree	49967fa34a4222070b50d29468068b186e8bce9d /org
parent	doc generator info related (diff)