From 90873fabd7451e1dd8c4b39303906e19bdc481f7 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 10 Apr 2024 22:24:34 -0400 Subject: 0.16.0 sisudoc (src/sisudoc sisudoc spine) - src/sisudoc (replaces src/doc_reform) - sisudoc spine (used more) --- src/sisudoc/io_out/rgx.d | 157 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 src/sisudoc/io_out/rgx.d (limited to 'src/sisudoc/io_out/rgx.d') diff --git a/src/sisudoc/io_out/rgx.d b/src/sisudoc/io_out/rgx.d new file mode 100644 index 0000000..474a120 --- /dev/null +++ b/src/sisudoc/io_out/rgx.d @@ -0,0 +1,157 @@ +/+ +- Name: SisuDoc Spine, Doc Reform [a part of] + - Description: documents, structuring, processing, publishing, search + - static content generator + + - Author: Ralph Amissah + [ralph.amissah@gmail.com] + + - Copyright: (C) 2015 - 2024 Ralph Amissah, All Rights Reserved. + + - License: AGPL 3 or later: + + Spine (SiSU), a framework for document structuring, publishing and + search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU AFERO General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see [https://www.gnu.org/licenses/]. + + If you have Internet connection, the latest version of the AGPL should be + available at these locations: + [https://www.fsf.org/licensing/licenses/agpl.html] + [https://www.gnu.org/licenses/agpl.html] + + - Spine (by Doc Reform, related to SiSU) uses standard: + - docReform markup syntax + - standard SiSU markup syntax with modified headers and minor modifications + - docReform object numbering + - standard SiSU object citation numbering & system + + - Homepages: + [https://www.sisudoc.org] + [https://www.doc-reform.org] + + - Git + [https://git.sisudoc.org/] + ++/ +/++ + regex: regular expressions used in sisu document parser ++/ +module sisudoc.io_out.rgx; +@safe: +static template spineRgxOut() { + static struct RgxO { + static make_breakpage = ctRegex!(`new=(?P.+?)(?:;|$)`); + static make_breakcolumn = ctRegex!(`break=(?P.+?)(?:;|$)`,); + static newline = ctRegex!("\n", "mg"); + static space = ctRegex!(`[ ]`, "mg"); + static spaces_keep = ctRegex!(`(?P^[ ]+|[ ]{2,})`, "mg"); // code, verse, block + static spaces_line_start = ctRegex!(`^(?P[ ]+)`, "mg"); + static nbsp_char = ctRegex!(`░`, "mg"); + static nbsp_chars = ctRegex!(`[░]+`, "mg"); + static middle_dot = ctRegex!(`·`, "mg"); + static src_pth_sst_or_ssm = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.](?Pss[tm]))$`); + static src_pth_pod_sst_or_ssm = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)media/text/[a-z]{2}/(?P[a-zA-Z0-9][a-zA-Z0-9._-]*?[.]ss[tm])$`); + static src_pth_contents = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9][a-zA-Z0-9._-]*)/pod[.]manifest$`); + static src_pth_zip = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]zip)$`); + static src_pth_types = ctRegex!(`^(?P[/]?[a-zA-Z0-9._-]+/)*(?P(?P[a-zA-Z0-9._-]+[.]ss[tm])|(?P[a-zA-Z0-9._-]+/pod[.]manifest)|(?P[a-zA-Z0-9._-]+[.]zip))$`); + static src_fn = ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P(?P[a-zA-Z0-9._-]+)[.](?Pss[tm]))$`); + static src_fn_master = ctRegex!(`^(?P/?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]ssm)$`); + static src_fn_find_inserts = ctRegex!(`^(?P/?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]ss[im])$`); + static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[ti])$`); + static src_base_parent_dir_name = ctRegex!(`[/](?P(?:[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure + static src_formalised_file_path_parts = ctRegex!(`(?P(?:[/a-zA-Z0-9._-]+?)(?P[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure + /+ line breaks +/ + static br_empty_line = ctRegex!(`\n[ ]*\n`, "mg"); + static br_linebreaks_newlines = ctRegex!(`[\n┘┙]`, "mg"); + static br_linebreaks = ctRegex!(`[┘┙]`, "mg"); + static br_line = ctRegex!(`┘`, "mg"); + static br_line_inline = ctRegex!(`┙`, "mg"); + static br_line_spaced = ctRegex!(`┚`, "mg"); + /+ quotation marks +/ + static quotes_open_and_close = ctRegex!(`[“”]`, "mg"); + /+ inline markup footnotes endnotes +/ + static inline_notes_al = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg"); + static inline_notes_al_special = ctRegex!(`【(?:[*+]\s+)(.+?)】`, "mg"); // TODO remove match when special footnotes are implemented + static inline_notes_al_gen = ctRegex!(`【.+?】`, "m"); + static inline_notes_al_gen_text = ctRegex!(`【(?P.+?)】`, "m"); + static inline_notes_al_all_note = ctRegex!(`【(?P\d+|(?:[*]|[+])+)\s+(?P.+?)\s*】`, "mg"); + static inline_notes_al_regular_number_note = ctRegex!(`【(?P\d+)\s+(?P.+?)\s*】`, "mg"); + static inline_notes_al_special_char_note = ctRegex!(`【(?P(?:[*]|[+])+)\s+(?P.+?)】`, "mg"); + static inline_al_delimiter_open_regular = ctRegex!(`【\s`, "m"); + static inline_al_delimiter_open_symbol_star = ctRegex!(`【[*]\s`, "m"); + static inline_al_delimiter_open_symbol_plus = ctRegex!(`【[+]\s`, "m"); + static inline_text_and_note_al_ = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|.+))`, "mg"); + /+ inline markup links +/ + static inline_image = ctRegex!(`(?P
┥)☼(?P(?P[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P\d+)h(?P\d+))\s*(?P.*?┝┤.*?├)`, "mg");
+    static inline_image_without_dimensions          = ctRegex!(`(?P
┥)☼(?P(?P[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P0)h(?P0))\s*(?P.*?┝┤.*?├)`, "mg");
+    static inline_image_info                        = ctRegex!(`☼?(?P[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P\d+)h(?P\d+)`, "mg");
+    static inline_link_anchor                       = ctRegex!(`┃(?P\S+?)┃`, "mg"); // TODO *~text_link_anchor
+    static inline_link                              = ctRegex!(`┥(?P.+?)┝┤(?P#?(\S+?))├`, "mg");
+    static inline_link_empty                        = ctRegex!(`┥(?P.+?)┝┤├`, "mg");
+    static inline_link_number                       = ctRegex!(`┥(?P.+?)┝┤(?P[0-9]+)├`, "mg"); // not used
+    static inline_link_number_only                  = ctRegex!(`(?P┥.+?┝)┤(?P[0-9]+)├`, "mg");
+    static inline_link_stow_uri                     = ctRegex!(`┥(?P.+?)┝┤(?P[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
+    static inline_link_hash                         = ctRegex!(`┥(?P.+?)┝┤(?P#(?P\S+?))├`, "mg");
+    static inline_link_seg_and_hash                 = ctRegex!(`┥(?P.+?)┝┤(?P(?P[^/#├]*)#(?P.+?))├`, "mg");
+    static inline_link_clean                        = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
+    static inline_link_toc_to_backmatter            = ctRegex!(`┤#(?Pendnotes|bibliography|bookindex|glossary|blurb)├`, "mg");
+    static url                                      = ctRegex!(`https?://`, "mg");
+    static uri                                      = ctRegex!(`(?:https?|git)://`, "mg");
+    static uri_identify_components                  = ctRegex!(`(?P(?:https?|git)://)(?P\S+?/)(?P[^/]+)$`, "mg");
+    static inline_link_subtoc                       = ctRegex!(`^(?P[5-7])~ ┥(?P.+?)┝┤(?P.+?)├`, "mg");
+    static inline_link_fn_suffix                    = ctRegex!(`¤(.+?)(\.fnSuffix)`, "mg");
+    static inline_seg_link                          = ctRegex!(`(¤)(?:.+?)\.fnSuffix`, "mg");
+    static mark_internal_site_lnk                   = ctRegex!(`¤`, "mg");
+    static quotation_mark_sql_insert_delimiter      = ctRegex!("[']", "mg");
+    /+ inline markup font face mod +/
+    static inline_emphasis                          = ctRegex!(`⑆[*]┨(?P.+?)┣[*]`, "mg");
+    static inline_bold                              = ctRegex!(`⑆[!]┨(?P.+?)┣[!]`, "mg");
+    static inline_underscore                        = ctRegex!(`⑆[_]┨(?P.+?)┣[_]`, "mg");
+    static inline_italics                           = ctRegex!(`⑆[/]┨(?P.+?)┣[/]`, "mg");
+    static inline_superscript                       = ctRegex!(`⑆\^┨(?P.+?)┣\^`, "mg");
+    static inline_subscript                         = ctRegex!(`⑆[,]┨(?P.+?)┣[,]`, "mg");
+    static inline_strike                            = ctRegex!(`⑆[-]┨(?P.+?)┣[-]`, "mg");
+    static inline_insert                            = ctRegex!(`⑆[+]┨(?P.+?)┣[+]`, "mg");
+    static inline_mono                              = ctRegex!(`⑆[■]┨(?P.+?)┣[■]`, "mg");
+    static inline_cite                              = ctRegex!(`⑆[‖]┨(?P.+?)┣[‖]`, "mg");
+    /+ table delimiters +/
+    static table_delimiter_col                      = ctRegex!("[ ]*[┊][ ]*", "mg");
+    static table_delimiter_row                      = ctRegex!("[ ]*\n", "mg");
+    /+ paragraph operators +/
+    static grouped_para_indent_1                    = ctRegex!(`^_1[ ]`, "m");
+    static grouped_para_indent_2                    = ctRegex!(`^_2[ ]`, "m");
+    static grouped_para_indent_3                    = ctRegex!(`^_3[ ]`, "m");
+    static grouped_para_indent_4                    = ctRegex!(`^_4[ ]`, "m");
+    static grouped_para_indent_5                    = ctRegex!(`^_5[ ]`, "m");
+    static grouped_para_indent_6                    = ctRegex!(`^_6[ ]`, "m");
+    static grouped_para_indent_7                    = ctRegex!(`^_7[ ]`, "m");
+    static grouped_para_indent_8                    = ctRegex!(`^_8[ ]`, "m");
+    static grouped_para_indent_9                    = ctRegex!(`^_9[ ]`, "m");
+    static grouped_para_bullet                      = ctRegex!(`^_[*] `, "m");
+    static grouped_para_bullet_indent_1             = ctRegex!(`^_1[*] `, "m");
+    static grouped_para_bullet_indent_2             = ctRegex!(`^_2[*] `, "m");
+    static grouped_para_bullet_indent_3             = ctRegex!(`^_3[*] `, "m");
+    static grouped_para_bullet_indent_4             = ctRegex!(`^_4[*] `, "m");
+    static grouped_para_bullet_indent_5             = ctRegex!(`^_5[*] `, "m");
+    static grouped_para_bullet_indent_6             = ctRegex!(`^_6[*] `, "m");
+    static grouped_para_bullet_indent_7             = ctRegex!(`^_7[*] `, "m");
+    static grouped_para_bullet_indent_8             = ctRegex!(`^_8[*] `, "m");
+    static grouped_para_bullet_indent_9             = ctRegex!(`^_9[*] `, "m");
+    static grouped_para_bullet_indent               = ctRegex!(`^_(?P[1-9])[*] `, "m");
+    static grouped_para_indent_hang                 = ctRegex!(`^_(?P[0-9])_(?P[0-9])[ ]`, "m");
+  }
+}
-- 
cgit v1.2.3