From 6b3555e2ae0a7a93f7ab7be1b908a4c15350d460 Mon Sep 17 00:00:00 2001
From: Ralph Amissah <ralph.amissah@gmail.com>
Date: Fri, 3 Apr 2020 15:13:31 -0400
Subject: sql, sqlite internal site links

---
 src/doc_reform/io_out/rgx.d                  |  3 ++-
 src/doc_reform/io_out/sqlite.d               | 38 +++++++++++++++-------------
 src/doc_reform/meta/conf_make_meta_structs.d |  1 +
 src/doc_reform/meta/conf_make_meta_yaml.d    | 10 +++++++-
 src/doc_reform/meta/rgx.d                    |  3 ++-
 5 files changed, 34 insertions(+), 21 deletions(-)

(limited to 'src')

diff --git a/src/doc_reform/io_out/rgx.d b/src/doc_reform/io_out/rgx.d
index 32d457d..5024319 100644
--- a/src/doc_reform/io_out/rgx.d
+++ b/src/doc_reform/io_out/rgx.d
@@ -49,9 +49,10 @@ static template spineRgxOut() {
     static inline_link                                    = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#?(\S+?))├`, "mg");
     static inline_link_empty                              = ctRegex!(`┥(?P<text>.+?)┝┤├`, "mg");
     static inline_link_number                             = ctRegex!(`┥(?P<text>.+?)┝┤(?P<num>[0-9]+)├`, "mg"); // not used
-    static inline_link_number_only                        = ctRegex!(`(┥.+?┝)┤(?P<num>[0-9]+)├`, "mg");
+    static inline_link_number_only                        = ctRegex!(`(?P<linked_text>┥.+?┝)┤(?P<num>[0-9]+)├`, "mg");
     static inline_link_stow_uri                           = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
     static inline_link_hash                               = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#(?P<segname>\S+?))├`, "mg");
+    static inline_link_seg_and_hash                       = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>[^/]+?#(?P<segname>.+?))├`, "mg");
     static inline_link_clean                              = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
     static inline_link_toc_to_backmatter                  = ctRegex!(`┤#(?P<link>endnotes|bibliography|bookindex|glossary|blurb)├`, "mg");
     static url                                            = ctRegex!(`https?://`, "mg");
diff --git a/src/doc_reform/io_out/sqlite.d b/src/doc_reform/io_out/sqlite.d
index ce8f688..392d8b0 100644
--- a/src/doc_reform/io_out/sqlite.d
+++ b/src/doc_reform/io_out/sqlite.d
@@ -63,7 +63,7 @@ template SQLiteHubDiscreteBuildTablesAndPopulate() {
     const D    doc_abstraction,
           M    doc_matters,
   ) {
-    auto url_html = spineUrlsHTML!()(doc_matters.conf_make_meta.conf.w_srv_data_root_url, doc_matters.src.language);
+    auto url_html = spineUrlsHTML!()(doc_matters.conf_make_meta.conf.w_srv_data_root_url_html, doc_matters.src.language);
     auto pth_sqlite = spinePathsSQLiteDiscrete!()(doc_matters.output_path, doc_matters.src.language);
     pth_sqlite.base.mkdirRecurse;
     auto db = Database(pth_sqlite.sqlite_file(doc_matters.src.filename));
@@ -290,7 +290,7 @@ template SQLiteFormatAndLoadObject() {
         if (obj.has.inline_links) {
           if  (obj.metainfo.is_a != "code") {
             _txt = replaceAll!(m =>
-                m[1] ~ "┤" ~ to!string((obj.stow.link[m[2].to!ulong])).encode ~ "├"
+                m["linked_text"] ~ "┤" ~ to!string((obj.stow.link[m["num"].to!ulong])).encode ~ "├"
               )(_txt, rgx.inline_link_number_only);
           }
           if ((_txt.match(rgx.mark_internal_site_lnk))
@@ -301,24 +301,26 @@ template SQLiteFormatAndLoadObject() {
           }
           auto pth_html = spinePathsHTML!()(doc_matters.output_path, doc_matters.src.language);
           if (_xml_type == "seg") {
-            foreach (m; _txt.match(rgx.inline_link_hash)) {
-              if (m.captures[3] in doc_matters.has.tag_associations) {
-                if (m.captures[3] == doc_matters.has.tag_associations[(m.captures[3])]["seg_lv4"]) {
+            foreach (m; _txt.match(rgx.inline_link_seg_and_hash)) {
+              if (m.captures["segname"] in doc_matters.has.tag_associations) {
+                if (m.captures["segname"] == doc_matters.has.tag_associations[(m.captures["segname"])]["seg_lv4"]) {
                   _txt = _txt.replaceFirst(
-                    rgx.inline_link_hash,
+                    rgx.inline_link_seg_and_hash,
                     "┥$1┝┤"
-                      ~ doc_matters.conf_make_meta.conf.w_srv_data_root_url
+                      ~ doc_matters.conf_make_meta.conf.w_srv_data_root_url_html
                       ~ "/"
-                      ~ pth_html.tail_fn_seg(doc_matters.src.filename, "$3.html")
+                      ~ pth_html.tail_fn_seg(doc_matters.src.filename, "$2.html")
                     ~ "├"
                   );
                 } else {
                   _txt = _txt.replaceFirst(
-                    rgx.inline_link_hash,
+                    rgx.inline_link_seg_and_hash,
                     "┥$1┝┤"
-                      ~ doc_matters.conf_make_meta.conf.w_srv_data_root_url
+                      ~ doc_matters.conf_make_meta.conf.w_srv_data_root_url_html
                       ~ "/"
-                      ~ doc_matters.has.tag_associations[(m.captures[3])]["seg_lv4"]
+                      ~ doc_matters.src.filename_base
+                      ~ "/"
+                      ~ doc_matters.has.tag_associations[(m.captures["segname"])]["seg_lv4"]
                       ~ ".html"
                       ~ "#" ~ "$3"
                     ~ "├"
@@ -328,20 +330,20 @@ template SQLiteFormatAndLoadObject() {
                 if (!(doc_matters.opt.action.quiet)) {
                   writeln(
                     "WARNING on internal document links, anchor to link <<"
-                     ~ m.captures[3]
+                     ~ m.captures["segname"]
                      ~ ">> not found in document, "
-                     ~ "anchor: " ~ m.captures[3]
+                     ~ "anchor: " ~ m.captures["segname"]
                      ~ " document: " ~ doc_matters.src.filename
                   );
                 }
               }
             }
           } else {
-            if (auto m = _txt.match(rgx.inline_link_hash)) {
+            if (auto m = _txt.match(rgx.inline_link_seg_and_hash)) {
               _txt = _txt.replaceFirst(
-                rgx.inline_link_hash,
+                rgx.inline_link_seg_and_hash,
                 "┥$1┝┤"
-                  ~ doc_matters.conf_make_meta.conf.w_srv_data_root_url
+                  ~ doc_matters.conf_make_meta.conf.w_srv_data_root_url_html
                   ~ "/"
                   ~ pth_html.tail_fn_scroll(doc_matters.src.filename)
                   ~ "#" ~ "$3"
@@ -1138,7 +1140,7 @@ template SQLiteInsertMetadata() {
       SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_language_char),
       SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.original_source),
       SQLinsertDelimiter!()(doc_matters.conf_make_meta.meta.publisher),
-      SQLinsertDelimiter!()(doc_matters.conf_make_meta.conf.w_srv_data_root_url)
+      SQLinsertDelimiter!()(doc_matters.conf_make_meta.conf.w_srv_data_root_url_html)
     );
     if (doc_matters.conf_make_meta.meta.classify_topic_register_arr.length > 0) {
     
@@ -1176,7 +1178,7 @@ template SQLiteInsertDocObjectsLoop() {
           M    doc_matters,
   ) {
     string _uid = SQLinsertDelimiter!()(doc_matters.src.doc_uid);
-    auto url_html = spineUrlsHTML!()(doc_matters.conf_make_meta.conf.w_srv_data_root_url, doc_matters.src.language);
+    auto url_html = spineUrlsHTML!()(doc_matters.conf_make_meta.conf.w_srv_data_root_url_html, doc_matters.src.language);
     string insertDocObjectsRow(O)(O obj) {
       string _insert_doc_objects_row = format(q"┃
         INSERT INTO doc_objects (
diff --git a/src/doc_reform/meta/conf_make_meta_structs.d b/src/doc_reform/meta/conf_make_meta_structs.d
index 9707dab..3bab7d0 100644
--- a/src/doc_reform/meta/conf_make_meta_structs.d
+++ b/src/doc_reform/meta/conf_make_meta_structs.d
@@ -147,6 +147,7 @@ struct ConfCompositeSiteLocal {
   string w_srv_data_domain;          // if not set same as webserv_domain
   string w_srv_data_root_part;
   string w_srv_data_root_url;
+  string w_srv_data_root_url_html;
   string w_srv_data_root_path;
   string w_srv_images_root_part;
   // string w_srv_url_doc_path;
diff --git a/src/doc_reform/meta/conf_make_meta_yaml.d b/src/doc_reform/meta/conf_make_meta_yaml.d
index 1b5ec70..c33b04a 100644
--- a/src/doc_reform/meta/conf_make_meta_yaml.d
+++ b/src/doc_reform/meta/conf_make_meta_yaml.d
@@ -243,9 +243,17 @@ template contentYAMLtoSpineStruct() {
           && _yaml["webserv"]["data_root_url"].type.string
           && _yaml["webserv"]["data_root_url"].tag.match(rgx.yaml_tag_is_str)
         ) {
-          _struct_composite.conf.w_srv_data_root_url = _yaml["webserv"]["data_root_url"].get!string;
+          _struct_composite.conf.w_srv_data_root_url      = _yaml["webserv"]["data_root_url"].get!string;
+          _struct_composite.conf.w_srv_data_root_url_html =
+            _yaml["webserv"]["data_root_url"].get!string ~ "/"
+            ~ _manifested.src.language ~ "/"
+            ~ "html";
         } else {
           _struct_composite.conf.w_srv_data_root_url =  _struct_composite.conf.w_srv_data_root_part;
+          _struct_composite.conf.w_srv_data_root_url_html =
+            _struct_composite.conf.w_srv_data_root_part ~ "/"
+            ~ _manifested.src.language ~ "/"
+            ~ "html";
         }
         if ("cgi_domain" in _yaml["webserv"]
           && _yaml["webserv"]["cgi_domain"].type.string
diff --git a/src/doc_reform/meta/rgx.d b/src/doc_reform/meta/rgx.d
index 94ef091..d4dd201 100644
--- a/src/doc_reform/meta/rgx.d
+++ b/src/doc_reform/meta/rgx.d
@@ -215,9 +215,10 @@ static template spineRgxIn() {
     static inline_link                                    = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#?(\S+?))├`, "mg");
     static inline_link_empty                              = ctRegex!(`┥(?P<text>.+?)┝┤├`, "mg");
     static inline_link_number                             = ctRegex!(`┥(?P<text>.+?)┝┤(?P<num>[0-9]+)├`, "mg"); // not used
-    static inline_link_number_only                        = ctRegex!(`(┥.+?┝)┤(?P<num>[0-9]+)├`, "mg");
+    static inline_link_number_only                        = ctRegex!(`(?P<linked_text>┥.+?┝)┤(?P<num>[0-9]+)├`, "mg");
     static inline_link_stow_uri                           = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
     static inline_link_hash                               = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#(?P<segname>\S+?))├`, "mg");
+    static inline_link_seg_and_hash                       = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>[^/]+?#(?P<segname>.+?))├`, "mg");
     static inline_link_clean                              = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
     static inline_link_toc_to_backmatter                  = ctRegex!(`┤#(?P<link>endnotes|bibliography|bookindex|glossary|blurb)├`, "mg");
     static url                                            = ctRegex!(`https?://`, "mg");
-- 
cgit v1.2.3