diff options
author | Ralph Amissah <ralph.amissah@gmail.com> | 2022-11-25 22:06:40 -0500 |
---|---|---|
committer | Ralph Amissah <ralph.amissah@gmail.com> | 2022-12-23 18:17:41 -0500 |
commit | f6d28b62f0e02b8a88a1832589e203c7a613f45b (patch) | |
tree | b5d6462e45bae998190194784e02b143a83f79a3 /src/doc_reform/io_out | |
parent | gitignore & things nix (diff) |
regex review, match speed & compile time, ctregex
- improve match time
- add interim fontface identifier marker
- improve compile time
- remove unused regexs
- separate out some specialized output matches
Diffstat (limited to 'src/doc_reform/io_out')
-rw-r--r-- | src/doc_reform/io_out/epub3.d | 15 | ||||
-rw-r--r-- | src/doc_reform/io_out/html.d | 13 | ||||
-rw-r--r-- | src/doc_reform/io_out/hub.d | 4 | ||||
-rw-r--r-- | src/doc_reform/io_out/latex.d | 29 | ||||
-rw-r--r-- | src/doc_reform/io_out/odt.d | 23 | ||||
-rw-r--r-- | src/doc_reform/io_out/package.d | 3 | ||||
-rw-r--r-- | src/doc_reform/io_out/paths_output.d | 21 | ||||
-rw-r--r-- | src/doc_reform/io_out/rgx.d | 38 | ||||
-rw-r--r-- | src/doc_reform/io_out/rgx_latex.d | 68 | ||||
-rw-r--r-- | src/doc_reform/io_out/rgx_xhtml.d | 63 | ||||
-rw-r--r-- | src/doc_reform/io_out/source_pod.d | 10 | ||||
-rw-r--r-- | src/doc_reform/io_out/sqlite.d | 39 | ||||
-rw-r--r-- | src/doc_reform/io_out/xmls.d | 15 |
13 files changed, 238 insertions, 103 deletions
diff --git a/src/doc_reform/io_out/epub3.d b/src/doc_reform/io_out/epub3.d index d19545c..41d6d9d 100644 --- a/src/doc_reform/io_out/epub3.d +++ b/src/doc_reform/io_out/epub3.d @@ -58,18 +58,21 @@ template outputEPub3() { std.conv : to; import doc_reform.io_out, + doc_reform.io_out.rgx, + doc_reform.io_out.rgx_xhtml, doc_reform.io_out.create_zip_file, doc_reform.io_out.xmls, doc_reform.io_out.xmls_css; mixin InternalMarkup; mixin outputXHTMLs; static auto rgx = RgxO(); + static auto rgx_xhtml = RgxXHTML(); @safe string special_characters_text(string _txt) { _txt = _txt - .replaceAll(rgx.xhtml_ampersand, "&") // "&" - .replaceAll(rgx.xhtml_quotation, """) // """ - .replaceAll(rgx.xhtml_less_than, "<") // "<" - .replaceAll(rgx.xhtml_greater_than, ">") // ">" + .replaceAll(rgx_xhtml.ampersand, "&") // "&" + .replaceAll(rgx_xhtml.quotation, """) // """ + .replaceAll(rgx_xhtml.less_than, "<") // "<" + .replaceAll(rgx_xhtml.greater_than, ">") // ">" .replaceAll(rgx.br_line, "<br />") .replaceAll(rgx.br_line_inline, "<br />") .replaceAll(rgx.br_line_spaced, "<br />\n<br />") @@ -184,6 +187,7 @@ template outputEPub3() { enum DomTags { none, open, close, close_and_open, open_still, } auto markup = InlineMarkup(); static auto rgx = RgxO(); + static auto rgx_xhtml = RgxXHTML(); string toc; bool _new_title_set = false; string toc_head = format(q"┃<html xmlns="https://www.w3.org/1999/xhtml" @@ -297,8 +301,10 @@ template outputEPub3() { I doc_matters, ) { mixin spineRgxOut; + mixin spineRgxXHTML; auto xhtml_format = outputXHTMLs(); static auto rgx = RgxO(); + static auto rgx_xhtml = RgxXHTML(); string[] doc; string segment_filename; string[] top_level_headings = ["","","",""]; @@ -607,6 +613,7 @@ template outputEPub3() { static assert(is(typeof(epub_write.oebps_content_opf) == string)); } static auto rgx = RgxO(); + static auto rgx_xhtml = RgxXHTML(); auto pth_epub3 = spinePathsEPUB!()(doc_matters.output_path, doc_matters.src.language); auto xhtml_format = outputXHTMLs(); /+ zip file +/ diff --git a/src/doc_reform/io_out/html.d b/src/doc_reform/io_out/html.d index 26636ba..dd50252 100644 --- a/src/doc_reform/io_out/html.d +++ b/src/doc_reform/io_out/html.d @@ -57,6 +57,9 @@ template outputHTML() { std.conv : to; import doc_reform.io_out, + doc_reform.io_out.rgx, + doc_reform.meta.rgx_files, + doc_reform.io_out.rgx_xhtml, doc_reform.io_out.create_zip_file, doc_reform.io_out.xmls, doc_reform.io_out.xmls_css; @@ -66,8 +69,10 @@ template outputHTML() { M doc_matters, ) { mixin spineRgxOut; + mixin spineRgxXHTML; auto xhtml_format = outputXHTMLs(); static auto rgx = RgxO(); + static auto rgx_xhtml = RgxXHTML(); string[] doc_html; string[] doc; string suffix = ".html"; @@ -257,7 +262,9 @@ template outputHTML() { M doc_matters, ) { mixin spineRgxOut; + mixin spineRgxXHTML; static auto rgx = RgxO(); + static auto rgx_xhtml = RgxXHTML(); auto xhtml_format = outputXHTMLs(); string[][string] doc_html; string[][string] doc_html_endnotes; @@ -508,11 +515,11 @@ template outputHTML() { debug(asserts) { static assert(is(typeof(doc_html) == string[][string])); } - mixin spineRgxOut; - static auto rgx = RgxO(); + mixin spineRgxFiles; + static auto rgx_files = RgxFiles(); auto pth_html = spinePathsHTML!()(doc_matters.output_path, doc_matters.src.language); auto xhtml_format = outputXHTMLs(); - auto m = doc_matters.src.filename.matchFirst(rgx.src_fn); + auto m = doc_matters.src.filename.matchFirst(rgx_files.src_fn); try { if (!exists(pth_html.seg(doc_matters.src.filename))) { pth_html.seg(doc_matters.src.filename).mkdirRecurse; diff --git a/src/doc_reform/io_out/hub.d b/src/doc_reform/io_out/hub.d index 4c5254b..c53055d 100644 --- a/src/doc_reform/io_out/hub.d +++ b/src/doc_reform/io_out/hub.d @@ -63,10 +63,8 @@ template outputHub() { const D doc_abstraction, I doc_matters ) { - mixin spineRgxOut; mixin Msg; auto msg = Msg!()(doc_matters); - static auto rgx = RgxO(); enum outTask { source_or_pod, sqlite, sqlite_multi, latex, odt, epub, html_scroll, html_seg, html_stuff } void Scheduled(D,I)(int sched, D doc_abstraction, I doc_matters) { auto msg = Msg!()(doc_matters); @@ -201,8 +199,6 @@ template outputHubOp() { doc_reform.io_out.create_zip_file, doc_reform.io_out.paths_output; @system void outputHubOp(E,O,C)(E env, O opt_action, C config) { - mixin spineRgxOut; - static auto rgx = RgxO(); if ((opt_action.sqlite_db_drop)) { if ((opt_action.vox_gt1)) { writeln("sqlite drop db..."); diff --git a/src/doc_reform/io_out/latex.d b/src/doc_reform/io_out/latex.d index 992887d..f7dee74 100644 --- a/src/doc_reform/io_out/latex.d +++ b/src/doc_reform/io_out/latex.d @@ -325,10 +325,14 @@ template outputLaTeX() { std.outbuffer, std.uri, std.conv : to; - import doc_reform.io_out; - mixin InternalMarkup; // watch + import + doc_reform.io_out, + doc_reform.io_out.rgx, + doc_reform.io_out.rgx_latex; mixin spineRgxOut; static auto rgx = RgxO(); + mixin spineRgxLSC; + static auto rgx_sc = RgxLSC(); mixin spineLanguageCodes; auto lang = Lang(); auto paper = paperLaTeX; @@ -337,23 +341,23 @@ template outputLaTeX() { ) { string _unescape_sp_char_esc()(string _txt) { _txt = _txt - .replaceAll(rgx.latex_special_char_escaped, + .replaceAll(rgx_sc.latex_special_char_escaped, format(q"┃%s┃", "$1")) - .replaceAll(rgx.latex_special_char_escaped_braced, + .replaceAll(rgx_sc.latex_special_char_escaped_braced, format(q"┃%s┃", "$1")); return _txt; } string _unescape_fontface_esc()(string _txt) { - _txt = _txt.replaceAll(rgx.latex_identify_inline_fontface, + _txt = _txt.replaceAll(rgx_sc.latex_identify_inline_fontface, format(q"┃%s%s┃", "$1", "$2")); return _txt; } - _txt = replaceAll!(m => "\\" ~ m[1])(_txt, rgx.latex_special_char_for_escape); - _txt = replaceAll!(m => "{\\" ~ m[1] ~ "}")(_txt, rgx.latex_special_char_for_escape_and_braces); + _txt = replaceAll!(m => "\\" ~ m[1])(_txt, rgx_sc.latex_special_char_for_escape); + _txt = replaceAll!(m => "{\\" ~ m[1] ~ "}")(_txt, rgx_sc.latex_special_char_for_escape_and_braces); _txt = replaceAll!(m => "''")(_txt, rgx.quotes_open_and_close); _txt = replaceAll!(m => "$\\cdot$")(_txt, rgx.middle_dot); - _txt = replaceAll!(m => _unescape_sp_char_esc(m[0]))(_txt, rgx.latex_identify_inline_link); - _txt = replaceAll!(m => _unescape_fontface_esc(m[0]))(_txt, rgx.latex_identify_inline_fontface); + _txt = replaceAll!(m => _unescape_sp_char_esc(m[0]))(_txt, rgx_sc.latex_identify_inline_link); + _txt = replaceAll!(m => _unescape_fontface_esc(m[0]))(_txt, rgx_sc.latex_identify_inline_fontface); return _txt; } @safe string sp_char_esc(O)( @@ -461,8 +465,8 @@ template outputLaTeX() { } string _check_link(string _link) { _link = _link - .replaceFirst(rgx.latex_clean_internal_link, "") - .replaceAll(rgx.latex_special_char_for_escape_url, "\\$1"); + .replaceFirst(rgx_sc.latex_clean_internal_link, "") + .replaceAll(rgx_sc.latex_special_char_for_escape_url, "\\$1"); return _link; } if (obj.metainfo.is_a != "code") { @@ -529,7 +533,7 @@ template outputLaTeX() { string _tex_para; _tex_para = q"┃%s┃"; _txt = format(_tex_para, - _txt.replaceAll(rgx.latex_clean_bookindex_linebreak, "\n") ~ "\n\\brln\n" + _txt.replaceAll(rgx_sc.latex_clean_bookindex_linebreak, "\n") ~ "\n\\brln\n" ); } return _txt; @@ -1381,7 +1385,6 @@ template outputLaTeX() { } template outputLaTeXstyInit() { import doc_reform.io_out; - mixin spineRgxOut; auto paper = paperLaTeX; void writeOutputLaTeXstyStatic( string latex_sty, diff --git a/src/doc_reform/io_out/odt.d b/src/doc_reform/io_out/odt.d index c5fb469..1cde2d4 100644 --- a/src/doc_reform/io_out/odt.d +++ b/src/doc_reform/io_out/odt.d @@ -50,7 +50,10 @@ +/ module doc_reform.io_out.odt; template formatODT() { - import doc_reform.io_out; + import + doc_reform.io_out, + doc_reform.io_out.rgx, + doc_reform.io_out.rgx_xhtml; import std.digest.sha, std.file, @@ -63,8 +66,10 @@ template formatODT() { doc_reform.io_out.xmls, doc_reform.io_out.xmls_css; mixin spineRgxOut; + mixin spineRgxXHTML; struct formatODT { static auto rgx = RgxO(); + static auto rgx_xhtml = RgxXHTML(); @safe string _tags(O)(const O obj) { string _tags = ""; if (obj.tags.anchor_tags.length > 0) { @@ -107,6 +112,7 @@ template formatODT() { } @safe string _footnotes()(string _txt) { static auto rgx = RgxO(); + static auto rgx_xhtml = RgxXHTML(); _txt = _txt.replaceAll( rgx.inline_notes_al_regular_number_note, format(q"┃<text:note text:id="ftn%s" text:note-class="footnote"> @@ -274,10 +280,10 @@ template formatODT() { } @safe string _special_characters(O)(string _txt, const O obj) { _txt = _txt - .replaceAll(rgx.xhtml_ampersand, "&") - .replaceAll(rgx.xhtml_quotation, """) - .replaceAll(rgx.xhtml_less_than, "<") - .replaceAll(rgx.xhtml_greater_than, ">") + .replaceAll(rgx_xhtml.ampersand, "&") + .replaceAll(rgx_xhtml.quotation, """) + .replaceAll(rgx_xhtml.less_than, "<") + .replaceAll(rgx_xhtml.greater_than, ">") .replaceAll(rgx.nbsp_char, " "); return _txt; } @@ -640,7 +646,10 @@ template formatODT() { } } template outputODT() { - import doc_reform.io_out; + import + doc_reform.io_out, + doc_reform.io_out.rgx, + doc_reform.io_out.rgx_xhtml; import std.digest.sha, std.file, @@ -654,7 +663,9 @@ template outputODT() { doc_reform.io_out.xmls_css; mixin InternalMarkup; mixin spineRgxOut; + mixin spineRgxXHTML; static auto rgx = RgxO(); + static auto rgx_xhtml = RgxXHTML(); // mixin outputXmlODT; @safe string odt_head(I)(I doc_matters) { string _has_tables = format(q"┃ diff --git a/src/doc_reform/io_out/package.d b/src/doc_reform/io_out/package.d index b6932ea..0a75f98 100644 --- a/src/doc_reform/io_out/package.d +++ b/src/doc_reform/io_out/package.d @@ -67,5 +67,4 @@ public import doc_reform.share.defaults, doc_reform.io_in.paths_source, doc_reform.io_out.defaults, - doc_reform.io_out.paths_output, - doc_reform.io_out.rgx; + doc_reform.io_out.paths_output; diff --git a/src/doc_reform/io_out/paths_output.d b/src/doc_reform/io_out/paths_output.d index fc31711..471c966 100644 --- a/src/doc_reform/io_out/paths_output.d +++ b/src/doc_reform/io_out/paths_output.d @@ -57,7 +57,8 @@ import std.path, std.regex, std.stdio; -import doc_reform.meta.rgx; +import + doc_reform.meta.rgx_files; template spineOutPaths() { auto spineOutPaths()( string output_pth_root, @@ -149,8 +150,6 @@ template spineOutPathsFnPd() { } template spineDocRootTreeHTML() { - mixin spineRgxIn; - static auto rgx = RgxI(); auto spineDocRootTreeHTML()(string lng) { auto lng_pth = spineOutPaths!()("", lng); string base_dir = "html"; @@ -209,8 +208,6 @@ template spineDocRootTreeHTML() { } } template spinePathsHTML() { - mixin spineRgxIn; - static auto rgx = RgxI(); auto spinePathsHTML()( string output_path_root, string lng, @@ -270,8 +267,6 @@ template spinePathsHTML() { } template spineUrlsHTML() { import std.format; - mixin spineRgxIn; - static auto rgx = RgxI(); auto spineUrlsHTML()( string url_doc_root, string lng, @@ -371,8 +366,6 @@ template spineUrlsHTML() { } } template spinePathsEPUB() { - mixin spineRgxIn; - static auto rgx = RgxI(); auto spinePathsEPUB()( string output_pth_root, string lng, @@ -477,8 +470,6 @@ template spinePathsEPUB() { } template spinePathsODT() { import std.conv; - mixin spineRgxIn; - static auto rgx = RgxI(); auto spinePathsODT(M)( M doc_matters, ) { @@ -541,8 +532,6 @@ template spinePathsODT() { } } template spinePathsPDF() { - mixin spineRgxIn; - static auto rgx = RgxI(); auto spinePathsPDF(M)( M doc_matters, ) { @@ -565,8 +554,6 @@ template spinePathsPDF() { } } template spinePathsLaTeX() { - mixin spineRgxIn; - static auto rgx = RgxI(); auto spinePathsLaTeX(M)( M doc_matters, ) { @@ -636,8 +623,6 @@ template spinePathsLaTeXsty() { } } template spinePathsSQLiteDiscrete() { - mixin spineRgxIn; - static auto rgx = RgxI(); auto spinePathsSQLiteDiscrete()( string output_pth_root, string lng, @@ -662,8 +647,6 @@ template spinePathsSQLiteDiscrete() { } } template spinePathsSQLite() { - mixin spineRgxIn; - static auto rgx = RgxI(); auto spinePathsSQLite()( string db_name, string output_pth_root, diff --git a/src/doc_reform/io_out/rgx.d b/src/doc_reform/io_out/rgx.d index 8369735..943643c 100644 --- a/src/doc_reform/io_out/rgx.d +++ b/src/doc_reform/io_out/rgx.d @@ -68,8 +68,7 @@ static template spineRgxOut() { static src_pth_contents = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9][a-zA-Z0-9._-]*)/pod[.]manifest$`); static src_pth_zip = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]zip)$`); static src_pth_types = ctRegex!(`^(?P<path>[/]?[a-zA-Z0-9._-]+/)*(?P<gotfile>(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])|(?P<filelist>[a-zA-Z0-9._-]+/pod[.]manifest)|(?P<filezip>[a-zA-Z0-9._-]+[.]zip))$`); - static src_fn = - ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P<fn_src>(?P<fn_base>[a-zA-Z0-9._-]+)[.](?P<fn_src_suffix>ss[tm]))$`); + static src_fn = ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P<fn_src>(?P<fn_base>[a-zA-Z0-9._-]+)[.](?P<fn_src_suffix>ss[tm]))$`); static src_fn_master = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ssm)$`); static src_fn_find_inserts = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`); static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[ti])$`); @@ -119,34 +118,19 @@ static template spineRgxOut() { static mark_internal_site_lnk = ctRegex!(`¤`, "mg"); static quotation_mark_sql_insert_delimiter = ctRegex!("[']", "mg"); /+ inline markup font face mod +/ - static inline_emphasis = ctRegex!(`[*]┨(?P<text>.+?)┣[*]`, "mg"); - static inline_bold = ctRegex!(`[!]┨(?P<text>.+?)┣[!]`, "mg"); - static inline_underscore = ctRegex!(`[_]┨(?P<text>.+?)┣[_]`, "mg"); - static inline_italics = ctRegex!(`[/]┨(?P<text>.+?)┣[/]`, "mg"); - static inline_superscript = ctRegex!(`\^┨(?P<text>.+?)┣\^`, "mg"); - static inline_subscript = ctRegex!(`[,]┨(?P<text>.+?)┣[,]`, "mg"); - static inline_strike = ctRegex!(`[-]┨(?P<text>.+?)┣[-]`, "mg"); - static inline_insert = ctRegex!(`[+]┨(?P<text>.+?)┣[+]`, "mg"); - static inline_mono = ctRegex!(`[■]┨(?P<text>.+?)┣[■]`, "mg"); - static inline_cite = ctRegex!(`[‖]┨(?P<text>.+?)┣[‖]`, "mg"); + static inline_emphasis = ctRegex!(`⑆[*]┨(?P<text>.+?)┣[*]`, "mg"); + static inline_bold = ctRegex!(`⑆[!]┨(?P<text>.+?)┣[!]`, "mg"); + static inline_underscore = ctRegex!(`⑆[_]┨(?P<text>.+?)┣[_]`, "mg"); + static inline_italics = ctRegex!(`⑆[/]┨(?P<text>.+?)┣[/]`, "mg"); + static inline_superscript = ctRegex!(`⑆\^┨(?P<text>.+?)┣\^`, "mg"); + static inline_subscript = ctRegex!(`⑆[,]┨(?P<text>.+?)┣[,]`, "mg"); + static inline_strike = ctRegex!(`⑆[-]┨(?P<text>.+?)┣[-]`, "mg"); + static inline_insert = ctRegex!(`⑆[+]┨(?P<text>.+?)┣[+]`, "mg"); + static inline_mono = ctRegex!(`⑆[■]┨(?P<text>.+?)┣[■]`, "mg"); + static inline_cite = ctRegex!(`⑆[‖]┨(?P<text>.+?)┣[‖]`, "mg"); /+ table delimiters +/ static table_delimiter_col = ctRegex!("[ ]*[┊][ ]*", "mg"); static table_delimiter_row = ctRegex!("[ ]*\n", "mg"); - static xhtml_ampersand = ctRegex!(`[&]`, "m"); // & - static xhtml_quotation = ctRegex!(`["]`, "m"); // " - static xhtml_less_than = ctRegex!(`[<]`, "m"); // < - static xhtml_greater_than = ctRegex!(`[>]`, "m"); // > - static xhtml_line_break = ctRegex!(` [\\]{2}`, "m"); // <br /> - static latex_special_char = ctRegex!(`([%${}_#&\\])`); - static latex_special_char_for_escape = ctRegex!(`([%${}_#\\])`); - static latex_special_char_for_escape_and_braces = ctRegex!(`([&])`); - static latex_special_char_for_escape_url = ctRegex!(`([%])`); - static latex_special_char_escaped = ctRegex!(`\\([%${}_#\\])`); - static latex_special_char_escaped_braced = ctRegex!(`[{]\\([&])[}]`); - static latex_identify_inline_link = ctRegex!(`┥.+?┝┤\S+?├`, "mg"); - static latex_identify_inline_fontface = ctRegex!(`\\([_#$]┨.+?┣)\\([_#$])`, "mg"); - static latex_clean_internal_link = ctRegex!(`^(?:#|¤\S+?#)`, "m"); - static latex_clean_bookindex_linebreak = ctRegex!(`\s*\\\\\\\\\s*`, "m"); /+ paragraph operators +/ static grouped_para_indent_1 = ctRegex!(`^_1[ ]`, "m"); static grouped_para_indent_2 = ctRegex!(`^_2[ ]`, "m"); diff --git a/src/doc_reform/io_out/rgx_latex.d b/src/doc_reform/io_out/rgx_latex.d new file mode 100644 index 0000000..25b9c60 --- /dev/null +++ b/src/doc_reform/io_out/rgx_latex.d @@ -0,0 +1,68 @@ +/+ +- Name: Spine, Doc Reform [a part of] + - Description: documents, structuring, processing, publishing, search + - static content generator + + - Author: Ralph Amissah + [ralph.amissah@gmail.com] + + - Copyright: (C) 2015 - 2022 Ralph Amissah, All Rights + Reserved. + + - License: AGPL 3 or later: + + Spine (SiSU), a framework for document structuring, publishing and + search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU AFERO General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see [https://www.gnu.org/licenses/]. + + If you have Internet connection, the latest version of the AGPL should be + available at these locations: + [https://www.fsf.org/licensing/licenses/agpl.html] + [https://www.gnu.org/licenses/agpl.html] + + - Spine (by Doc Reform, related to SiSU) uses standard: + - docReform markup syntax + - standard SiSU markup syntax with modified headers and minor modifications + - docReform object numbering + - standard SiSU object citation numbering & system + + - Homepages: + [https://www.doc_reform.org] + [https://www.sisudoc.org] + + - Git + [https://git.sisudoc.org/projects/?p=software/spine.git;a=summary] + ++/ +/++ + regex: regular expressions used in sisu document parser ++/ +module doc_reform.io_out.rgx_latex; +static template spineRgxLSC() { + static struct RgxLSC { + static latex_special_char = ctRegex!(`([%${}_#&\\])`); + static latex_special_char_for_escape = ctRegex!(`([%${}_#\\])`); + static latex_special_char_for_escape_and_braces = ctRegex!(`([&])`); + static latex_special_char_for_escape_url = ctRegex!(`([%])`); + static latex_special_char_escaped = ctRegex!(`\\([%${}_#\\])`); + static latex_special_char_escaped_braced = ctRegex!(`[{]\\([&])[}]`); + static latex_identify_inline_link = ctRegex!(`┥.+?┝┤\S+?├`, "mg"); + static latex_identify_inline_fontface = ctRegex!(`\\([_#$]┨.+?┣)\\([_#$])`, "mg"); + static latex_clean_internal_link = ctRegex!(`^(?:#|¤\S+?#)`, "m"); + static latex_clean_bookindex_linebreak = ctRegex!(`\s*\\\\\\\\\s*`, "m"); + } +} diff --git a/src/doc_reform/io_out/rgx_xhtml.d b/src/doc_reform/io_out/rgx_xhtml.d new file mode 100644 index 0000000..2d9aab1 --- /dev/null +++ b/src/doc_reform/io_out/rgx_xhtml.d @@ -0,0 +1,63 @@ +/+ +- Name: Spine, Doc Reform [a part of] + - Description: documents, structuring, processing, publishing, search + - static content generator + + - Author: Ralph Amissah + [ralph.amissah@gmail.com] + + - Copyright: (C) 2015 - 2022 Ralph Amissah, All Rights + Reserved. + + - License: AGPL 3 or later: + + Spine (SiSU), a framework for document structuring, publishing and + search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU AFERO General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see [https://www.gnu.org/licenses/]. + + If you have Internet connection, the latest version of the AGPL should be + available at these locations: + [https://www.fsf.org/licensing/licenses/agpl.html] + [https://www.gnu.org/licenses/agpl.html] + + - Spine (by Doc Reform, related to SiSU) uses standard: + - docReform markup syntax + - standard SiSU markup syntax with modified headers and minor modifications + - docReform object numbering + - standard SiSU object citation numbering & system + + - Homepages: + [https://www.doc_reform.org] + [https://www.sisudoc.org] + + - Git + [https://git.sisudoc.org/projects/?p=software/spine.git;a=summary] + ++/ +/++ + regex: regular expressions used in sisu document parser ++/ +module doc_reform.io_out.rgx_xhtml; +static template spineRgxXHTML() { + static struct RgxXHTML { + static ampersand = ctRegex!(`[&]`, "m"); // & + static quotation = ctRegex!(`["]`, "m"); // " + static less_than = ctRegex!(`[<]`, "m"); // < + static greater_than = ctRegex!(`[>]`, "m"); // > + static line_break = ctRegex!(` [\\]{2}`, "m"); // <br /> + } +} diff --git a/src/doc_reform/io_out/source_pod.d b/src/doc_reform/io_out/source_pod.d index 14d63d7..0f9aef7 100644 --- a/src/doc_reform/io_out/source_pod.d +++ b/src/doc_reform/io_out/source_pod.d @@ -50,7 +50,9 @@ +/ module doc_reform.io_out.source_pod; template spinePod() { - import doc_reform.io_out; + import + doc_reform.meta.rgx_files, + doc_reform.io_out; import std.digest.sha, std.file, @@ -64,15 +66,15 @@ template spinePod() { debug(asserts) { // static assert(is(typeof(doc_matters) == tuple)); } - mixin spineRgxOut; + mixin spineRgxFiles; string pwd = doc_matters.env.pwd; auto src_path_info = doc_matters.src_path_info; auto pth_dr_doc_src = doc_matters.src_path_info; auto pths_pod = spinePathsPods!()(doc_matters); mixin spineLanguageCodes; auto lang = Lang(); - static auto rgx = RgxO(); - assert (doc_matters.src.filename.match(rgx.src_fn)); + static auto rgx_files = RgxFiles(); + assert (doc_matters.src.filename.match(rgx_files.src_fn)); @system auto pod_archive(Z)( string _source_type, string _data_in, diff --git a/src/doc_reform/io_out/sqlite.d b/src/doc_reform/io_out/sqlite.d index a546998..33c25fe 100644 --- a/src/doc_reform/io_out/sqlite.d +++ b/src/doc_reform/io_out/sqlite.d @@ -49,7 +49,10 @@ +/ module doc_reform.io_out.sqlite; -import doc_reform.io_out; +import + doc_reform.io_out, + doc_reform.io_out.rgx, + doc_reform.io_out.rgx_xhtml; import std.file, std.uri; @@ -57,8 +60,10 @@ import std.conv : to; import std.typecons : Nullable; import d2sqlite3; mixin spineRgxOut; +mixin spineRgxXHTML; mixin InternalMarkup; static auto rgx = RgxO(); +static auto rgx_xhtml = RgxXHTML(); static auto mkup = InlineMarkup(); long _metadata_tid_lastrowid; template SQLiteHubBuildTablesAndPopulate() { @@ -193,6 +198,7 @@ template SQLiteFormatAndLoadObject() { M doc_matters, ) { mixin spineRgxOut; + mixin spineRgxXHTML; struct sqlite_format_and_load_objects { string generic_munge_sanitize_text_for_search( string _txt, @@ -235,15 +241,15 @@ template SQLiteFormatAndLoadObject() { ) { string _html_special_characters(string _txt){ _txt = _txt - .replaceAll(rgx.xhtml_ampersand, "&") - .replaceAll(rgx.xhtml_quotation, """) - .replaceAll(rgx.xhtml_less_than, "<") - .replaceAll(rgx.xhtml_greater_than, ">") + .replaceAll(rgx_xhtml.ampersand, "&") + .replaceAll(rgx_xhtml.quotation, """) + .replaceAll(rgx_xhtml.less_than, "<") + .replaceAll(rgx_xhtml.greater_than, ">") .replaceAll(rgx.nbsp_char, " ") .replaceAll(rgx.br_line_inline, "<br />") .replaceAll(rgx.br_line, "<br />") .replaceAll(rgx.br_line_spaced, "<br /><br />") - .replaceAll(rgx.xhtml_line_break, "<br />"); + .replaceAll(rgx_xhtml.line_break, "<br />"); return _txt; } string _html_font_face(string _txt){ @@ -272,23 +278,23 @@ template SQLiteFormatAndLoadObject() { } string html_special_characters(string _txt){ _txt = _txt - .replaceAll(rgx.xhtml_ampersand, "&") - .replaceAll(rgx.xhtml_quotation, """) - .replaceAll(rgx.xhtml_less_than, "<") - .replaceAll(rgx.xhtml_greater_than, ">") + .replaceAll(rgx_xhtml.ampersand, "&") + .replaceAll(rgx_xhtml.quotation, """) + .replaceAll(rgx_xhtml.less_than, "<") + .replaceAll(rgx_xhtml.greater_than, ">") .replaceAll(rgx.nbsp_char, " ") .replaceAll(rgx.br_line_inline, "<br />") .replaceAll(rgx.br_line, "<br />") .replaceAll(rgx.br_line_spaced, "<br /><br />") - .replaceAll(rgx.xhtml_line_break, "<br />"); + .replaceAll(rgx_xhtml.line_break, "<br />"); return _txt; } string html_special_characters_code(string _txt){ _txt = _txt - .replaceAll(rgx.xhtml_ampersand, "&") - .replaceAll(rgx.xhtml_quotation, """) - .replaceAll(rgx.xhtml_less_than, "<") - .replaceAll(rgx.xhtml_greater_than, ">") + .replaceAll(rgx_xhtml.ampersand, "&") + .replaceAll(rgx_xhtml.quotation, """) + .replaceAll(rgx_xhtml.less_than, "<") + .replaceAll(rgx_xhtml.greater_than, ">") .replaceAll(rgx.nbsp_char, " "); return _txt; } @@ -314,6 +320,7 @@ template SQLiteFormatAndLoadObject() { string _xml_type = "seg", ) { static auto rgx = RgxO(); + static auto rgx_xhtml = RgxXHTML(); if (obj.metainfo.is_a == "group") { _txt = (_txt) .replaceAll(rgx.grouped_para_indent_1, @@ -1614,7 +1621,7 @@ template SQLiteTablesCreate() { : ""; if (db_filename.length > 0 && db_path.length > 0) { if ((opt_action.vox_gt1)) { - writeln("db name & path: ", db_path, db_filename); + writeln("db name & path: ", db_path, "/", db_filename); } auto pth_sqlite = spinePathsSQLite!()(db_filename, db_path); pth_sqlite.base.mkdirRecurse; diff --git a/src/doc_reform/io_out/xmls.d b/src/doc_reform/io_out/xmls.d index 0b0dce7..131dbb6 100644 --- a/src/doc_reform/io_out/xmls.d +++ b/src/doc_reform/io_out/xmls.d @@ -57,12 +57,17 @@ template outputXHTMLs() { std.conv : to; import doc_reform.io_out, + doc_reform.io_out.rgx, + doc_reform.meta.rgx_files, + doc_reform.io_out.rgx_xhtml, doc_reform.io_out.create_zip_file, doc_reform.io_out.xmls, doc_reform.io_out.xmls_css; mixin spineRgxOut; + mixin spineRgxXHTML; struct outputXHTMLs { static auto rgx = RgxO(); + static auto rgx_xhtml = RgxXHTML(); @safe string div_delimit( string part, return ref string previous_part @@ -95,10 +100,10 @@ template outputXHTMLs() { } @safe string special_characters_text(string _txt) { _txt = _txt - .replaceAll(rgx.xhtml_ampersand, "&") // "&" - .replaceAll(rgx.xhtml_quotation, """) // """ - .replaceAll(rgx.xhtml_less_than, "<") // "<" - .replaceAll(rgx.xhtml_greater_than, ">") // ">" + .replaceAll(rgx_xhtml.ampersand, "&") // "&" + .replaceAll(rgx_xhtml.quotation, """) // """ + .replaceAll(rgx_xhtml.less_than, "<") // "<" + .replaceAll(rgx_xhtml.greater_than, ">") // ">" .replaceAll(rgx.br_line, "<br />") .replaceAll(rgx.br_line_inline, "<br />") .replaceAll(rgx.br_line_spaced, "<br />\n<br />") @@ -161,7 +166,7 @@ template outputXHTMLs() { } if (!(obj.metainfo.is_a == "code")) { _txt = (_txt) - .replaceAll(rgx.xhtml_line_break, "<br />"); + .replaceAll(rgx_xhtml.line_break, "<br />"); } return _txt; } |