diff options
author | Ralph Amissah <ralph.amissah@gmail.com> | 2019-01-13 14:19:04 -0500 |
---|---|---|
committer | Ralph Amissah <ralph.amissah@gmail.com> | 2019-05-17 16:59:38 -0400 |
commit | 87d62f48d6c8a2ccf9807f56c23a6ca71d1102e6 (patch) | |
tree | e367aff1a05f89104c7c81ac8a6c618a4f38b021 /org | |
parent | 0.4.2 xmls output, internal (diff) |
0.4.3 stow (most) uri/links in array, separate from object text
- munge independently
- no need to consider special munging of uri with text
- uri can easily be munged independently (encoded as need be)
Diffstat (limited to 'org')
-rw-r--r-- | org/default_regex.org | 4 | ||||
-rw-r--r-- | org/doc_reform.org | 112 | ||||
-rw-r--r-- | org/meta_abstraction.org | 49 | ||||
-rw-r--r-- | org/output_sqlite.org | 6 | ||||
-rw-r--r-- | org/output_xmls.org | 8 |
5 files changed, 121 insertions, 58 deletions
diff --git a/org/default_regex.org b/org/default_regex.org index c237239..5705fb2 100644 --- a/org/default_regex.org +++ b/org/default_regex.org @@ -505,9 +505,11 @@ static inline_image = ctRegex!(`(?P<pre>β₯)β static inline_image_without_dimensions = ctRegex!(`(?P<pre>β₯)βΌ(?P<imginf>(?P<img>\S+?\.(?:jpg|gif|png)),w(?P<width>0)h(?P<height>0))\s*(?P<post>.*?ββ€.+?β)`, "mg"); static inline_link_anchor = ctRegex!(`β(?P<anchor>\S+?)β`, "mg"); static inline_link = ctRegex!(`β₯(?P<text>.+?)ββ€(?P<link>\S+?)β`, "mg"); +static inline_link_number_only = ctRegex!(`(β₯.+?β)β€(?P<num>[0-9]+)β`, "mg"); +static inline_link_stow_uri = ctRegex!(`β₯(?P<text>.+?)ββ€(?P<link>[^ 0-9#β₯ββ€β][^ 0-9β₯ββ€β]+)β`, "mg"); // will not stow (stowed links) or object number internal links static inline_link_hash = ctRegex!(`β₯(?P<text>.+?)ββ€(?P<link>#(?P<segname>\S+?))β`, "mg"); static inline_link_clean = ctRegex!(`β€(?:.+?)β|[β₯β]`, "mg"); -static inline_a_url = ctRegex!(`(β€)(\S+?)(β)`, "mg"); +static inline_a_url = ctRegex!(`(β€)([^\sβ₯ββ€β]+)(β)`, "mg"); static url = ctRegex!(`https?://`, "mg"); static inline_link_subtoc = ctRegex!(`^(?P<level>[5-7])~ β₯(?P<text>.+?)ββ€(?P<link>.+?)β`, "mg"); static fn_suffix = ctRegex!(`\.fnSuffix`, "mg"); diff --git a/org/doc_reform.org b/org/doc_reform.org index 55bd41d..fe66011 100644 --- a/org/doc_reform.org +++ b/org/doc_reform.org @@ -26,7 +26,7 @@ struct Version { int minor; int patch; } -enum _ver = Version(0, 4, 2); +enum _ver = Version(0, 4, 3); #+END_SRC ** compilation restrictions (supported compilers) @@ -1369,61 +1369,61 @@ dev notes *** document objects (table) - check, keep up to date -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| doc object | doc object | attributes | inline | appended | structure | delimiters | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| is_of | is_a | | | | | | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| para/heading | heading | - level | - font face | - object number off | - level | - delimiter == two newlines | -| | | - object number | - endnotes | - book index meta | (document structure) | | -| | | - object number off | | | | | -| | | - dummy (toc & seg) | | | | | -| | | - tags (internal links) | | | | | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| | toc | - level | - font face | | | - auto generated from headings | -| | | | - links (auto) | | | | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| para | para | - bullet | - font face | - object number off | | - delimiter == two newlines | -| | | - indent | - links/urls * | - book index meta | | | -| | | - object number | - images* | | | | -| | | - object number off | - endnotes | | | | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| | toc | - indent | - font face | | | - delimiter == two newlines | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| | endnote | | - font face | | | (generated from | -| | | | | | | inline markup tags) | -| | | | | | | - delimiter == two newlines | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| | bookindex | - auto indent | - font face | | | - delimiter == two newlines | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| | blurb | - bullet | - font face | - object number off | | - delimiter == two newlines | -| | | - indent | - links/urls * | - book index meta | | | -| | | - object number | - images* | | | | -| | | - object number off | - endnotes | | | | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| group | group | - object number | - font face | - book index meta | - para break | - delimiter tags (group) | -| | | - object number off | - links/urls * | | | | -| | | | - images* | | | | -| | | | - endnotes | | | | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| | block | - object number | - font face | - book index meta | - new line | - delimiter tags (block) | -| | | - object number off | - links/urls * | | | | -| | | | - images* | | | | -| | | | - endnotes | | | | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| | quote | - object number | - font face | - book index meta | | - delimiter tags (quote) | -| | | | - endnotes | | | | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| | poem (see verse) | | | - book index meta | | - delimiter tags (poem) | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| | verse (of poem) | - object number | - font face | | - new line | - (see poem delimiter) | -| | | | - endnotes | | - preceeding spaces | | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| group/code | code | - syntax | | | - new line | - delimiter tags (code) | -| | | - numbered | | | - preceeding spaces | | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| -| group/table | table | - object number | | | | - delimiter tags (table) | -|--------------+------------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| doc object | doc object | attributes | inline | appended | structure | delimiters | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| is_a | is_of_type | | | | | | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| heading | para | - level | - font face | - object number off | - level | - two newlines | +| | | - object number | - endnotes | - book index meta | (document structure) | | +| | | - object number off | | | | | +| | | - dummy (toc & seg) | | | | | +| | | - tags (internal links) | | | | | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| toc | para | - level | - font face | | | - auto generated from headings | +| | | (auto-indent) | - links (auto) | | | | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| para | para | - bullet | - font face | - object number off | | - two newlines | +| | | - indent | - links/urls * | - book index meta | | | +| | | - object number | - images* | | | | +| | | - object number off | - endnotes | | | | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| bookindex | para | - auto indent | - font face | | | - two newlines | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| blurb | para | - bullet | - font face | - object number off | | - two newlines | +| | | - indent | - links/urls * | - book index meta | | | +| | | - object number | - images* | | | | +| | | - object number off | - endnotes | | | | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| group | block | - object number | - font face | - book index meta | - para break | - block tags | +| | | - object number off | - links/urls * | | | (group) | +| | | | - images* | | | | +| | | | - endnotes | | | | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| block | block | - object number | - font face | - book index meta | - new line | - block tags | +| | | - object number off | - links/urls * | | | (block) | +| | | | - images* | | | | +| | | | - endnotes | | | | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| quote | block | - object number | - font face | - book index meta | | - block tags | +| | | | - endnotes | | | (quote) | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| poem (see verse) | block | | | - book index meta | | - block tags | +| | | | | | | (poem) | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| verse (of poem) | | - object number | - font face | | - new line | - (see poem delimiter) | +| | | | - endnotes | | - preceeding spaces | | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| code | block | - syntax | | | - new line | - block tags | +| | | - numbered | | | - preceeding spaces | (code) | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| table | block | - object number | | | | - block tags (table) | +| | | | | | | (table) | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| +| endnote | | | - font face | | | (generated from | +| | | | | | | inline markup tags) | +| | | | | | | - two newlines | +|------------------+------------+-------------------------+----------------+---------------------+----------------------+--------------------------------| - consider special treatment for links/urls (& for images?) take them out of document munge (for various outputs), by storing in own array (within each diff --git a/org/meta_abstraction.org b/org/meta_abstraction.org index 4c98f66..5249df1 100644 --- a/org/meta_abstraction.org +++ b/org/meta_abstraction.org @@ -2294,6 +2294,37 @@ auto _image_dimensions(M,O)(M manifest_matter, O obj) { } #+END_SRC +***** links: think about!!! +- move actual links to an array in object struct so they cannot be regex munged within text block + - you may wish to exclude certain types of internal document link + - object number links + - toc + - book index + - footnotes and footnote numbers + +#+name: abs_post +#+BEGIN_SRC d +auto _links(O)(O obj) { + if (auto m = obj.text.match(rgx.inline_link_stow_uri)) { + debug(links) { + writeln("number of link matches to stow: ", (obj.text.match(rgx.inline_link_stow_uri)).count); + writeln("links to stow: ", (obj.text.match(rgx.inline_link_stow_uri))); + } + int _n_matches = (obj.text.match(rgx.inline_link_stow_uri)).count.to!int; + for(int i=0; i < _n_matches; ++i) { + if (obj.text.match(rgx.inline_link_stow_uri)) { + obj.stow.link ~= obj.text.matchFirst(rgx.inline_link_stow_uri)[2]; + obj.text = obj.text.replaceFirst( + rgx.inline_link_stow_uri, + format(q"ΒΆβ₯%sββ€%sβΒΆ", "$1", i) + ); + } + } + } + return obj; +} +#+END_SRC + ***** β» Loop section: head #+name: abs_post @@ -2327,6 +2358,7 @@ foreach (ref obj; the_document_head_section) { } obj = obj_heading_ancestors(obj, lv_ancestors_txt); } + obj = _links(obj); } #+END_SRC @@ -2360,6 +2392,7 @@ if (the_table_of_contents_section.length > 1) { } obj = obj_heading_ancestors(obj, lv_ancestors_txt); } + obj = _links(obj); } } #+END_SRC @@ -2412,6 +2445,7 @@ if (the_document_body_section.length > 1) { _images ~= extract_images(obj.text); obj = _image_dimensions(manifest_matter, obj); } + obj = _links(obj); } } auto images=uniq(_images.sort()); @@ -2460,6 +2494,7 @@ if (the_endnotes_section.length > 1) { } obj = obj_heading_ancestors(obj, lv_ancestors_txt); } + obj = _links(obj); } } #+END_SRC @@ -2507,6 +2542,7 @@ if (the_glossary_section.length > 1) { obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } + obj = _links(obj); } } #+END_SRC @@ -2554,6 +2590,7 @@ if (the_bibliography_section.length > 1) { obj.metainfo.ocn = obj_cite_digits.object_number; obj.metainfo.identifier = obj_cite_digits.identifier; } + obj = _links(obj); } } #+END_SRC @@ -2610,6 +2647,7 @@ if (the_bookindex_section.length > 1) { obj.metainfo.o_n_book_index = obj_cite_digits.bkidx; obj.metainfo.object_number_type = OCNtype.bkidx; } + obj = _links(obj); } /+ TODO assert failure, reinstate assert(obj_cite_digit_bkidx == ocn_bidx_ @@ -2659,6 +2697,7 @@ if (the_blurb_section.length > 1) { obj.metainfo.object_number_off = obj_cite_digits.off; obj.metainfo.object_number_type = OCNtype.non; } + obj = _links(obj); } } #+END_SRC @@ -7861,6 +7900,15 @@ struct DocObj_CodeBlock_ { } #+END_SRC +**** stow (things to be protected from regular text transformations, so far links) + +#+name: meta_structs_init +#+BEGIN_SRC d +struct DocObj_Stow_ { + string[] link = []; +} +#+END_SRC + **** pointers #+name: meta_structs_init @@ -7903,6 +7951,7 @@ struct ObjGenericComposite { DocObj_Has_ has; DocObj_Table_ table; DocObj_CodeBlock_ code_block; + DocObj_Stow_ stow; DocObj_Pointer_ ptr; } #+END_SRC diff --git a/org/output_sqlite.org b/org/output_sqlite.org index b1c9cf4..bdb2ca7 100644 --- a/org/output_sqlite.org +++ b/org/output_sqlite.org @@ -322,6 +322,7 @@ template SQLiteDbDrop() { import doc_reform.output; import std.file, + std.uri, std.conv : to; #+END_SRC @@ -534,6 +535,11 @@ auto inline_links(M,O)( string _xml_type = "seg", ) { if (obj.has.inline_links) { + if (obj.metainfo.is_a != "code") { + _txt = replaceAll!(hit => + hit[1] ~ "β€" ~ to!string((obj.stow.link[hit[2].to!ulong])).encode ~ "β" + )(_txt, rgx.inline_link_number_only); + } if ((_txt.match(rgx.mark_internal_site_lnk)) && (_xml_type == "scroll")) { // conditions reversed to avoid: gdc compiled program run segfault _txt = _txt.replaceAll( diff --git a/org/output_xmls.org b/org/output_xmls.org index b9302bd..45a1c3c 100644 --- a/org/output_xmls.org +++ b/org/output_xmls.org @@ -42,6 +42,7 @@ import std.digest.sha, std.file, std.outbuffer, + std.uri, std.zip, std.conv : to; import @@ -193,7 +194,7 @@ auto header_metadata(M)( doc_matters.conf_make_meta.meta.date_modified, doc_matters.src.language, doc_matters.conf_make_meta.meta.rights_copyright, - doc_matters.generator_program.name_and_version, + doc_matters.opt.action.debug_do ? "" : doc_matters.generator_program.name_and_version, doc_matters.generator_program.url_home, ); return o; @@ -466,6 +467,11 @@ auto inline_links(M,O)( ) { string seg_lvs; if (obj.has.inline_links) { + if (obj.metainfo.is_a != "code") { + _txt = replaceAll!(hit => + hit[1] ~ "β€" ~ to!string((obj.stow.link[hit[2].to!ulong])).encode ~ "β" + )(_txt, rgx.inline_link_number_only); + } if ((_txt.match(rgx.mark_internal_site_lnk)) && (_xml_type == "scroll")) { // conditions reversed to avoid: gdc compiled program run segfault _txt = _txt.replaceAll( |