/+
- Name: SisuDoc Spine, Doc Reform [a part of]
  - Description: documents, structuring, processing, publishing, search
    - static content generator

  - Author: Ralph Amissah
    [ralph.amissah@gmail.com]

  - Copyright: (C) 2015 - 2025 Ralph Amissah, All Rights Reserved.

  - License: AGPL 3 or later:

    Spine (SiSU), a framework for document structuring, publishing and
    search

    Copyright (C) Ralph Amissah

    This program is free software: you can redistribute it and/or modify it
    under the terms of the GNU AFERO General Public License as published by the
    Free Software Foundation, either version 3 of the License, or (at your
    option) any later version.

    This program is distributed in the hope that it will be useful, but WITHOUT
    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
    more details.

    You should have received a copy of the GNU General Public License along with
    this program. If not, see [https://www.gnu.org/licenses/].

    If you have Internet connection, the latest version of the AGPL should be
    available at these locations:
    [https://www.fsf.org/licensing/licenses/agpl.html]
    [https://www.gnu.org/licenses/agpl.html]

  - Spine (by Doc Reform, related to SiSU) uses standard:
    - docReform markup syntax
      - standard SiSU markup syntax with modified headers and minor modifications
    - docReform object numbering
      - standard SiSU object citation numbering & system

  - Homepages:
    [https://www.sisudoc.org]
    [https://www.doc-reform.org]

  - Git
    [https://git.sisudoc.org/]

+/
/++
  regex: regular expressions used in sisu document parser
+/
module sisudoc.io_out.rgx;
@safe:
static template spineRgxOut() {
  static struct RgxO {
    static make_breakpage                           = ctRegex!(`new=(?P<breakpage>.+?)(?:;|$)`);
    static make_breakcolumn                         = ctRegex!(`break=(?P<breakcolumn>.+?)(?:;|$)`,);
    static newline                                  = ctRegex!("\n", "mg");
    static space                                    = ctRegex!(`[ ]`, "mg");
    static spaces_keep                              = ctRegex!(`(?P<keep_spaces>^[ ]+|[ ]{2,})`, "mg"); // code, verse, block
    static spaces_line_start                        = ctRegex!(`^(?P<opening_spaces>[ ]+)`, "mg");
    static nbsp_char                                = ctRegex!(`░`, "mg");
    static nbsp_chars                               = ctRegex!(`[░]+`, "mg");
    static middle_dot                               = ctRegex!(`·`, "mg");
    static src_pth_sst_or_ssm                       = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.](?P<extension>ss[tm]))$`);
    static src_pth_pod_sst_or_ssm                   = ctRegex!(`^(?P<podpath>[/]?(?:[a-zA-Z0-9._-]+/)*)media/text/[a-z]{2}/(?P<filename>[a-zA-Z0-9][a-zA-Z0-9._-]*?[.]ss[tm])$`);
    static src_pth_contents                         = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9][a-zA-Z0-9._-]*)/pod[.]manifest$`);
    static src_pth_zip                              = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]zip)$`);
    static src_pth_types                            = ctRegex!(`^(?P<path>[/]?[a-zA-Z0-9._-]+/)*(?P<gotfile>(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])|(?P<filelist>[a-zA-Z0-9._-]+/pod[.]manifest)|(?P<filezip>[a-zA-Z0-9._-]+[.]zip))$`);
    static src_fn                                   = ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P<fn_src>(?P<fn_base>[a-zA-Z0-9._-]+)[.](?P<fn_src_suffix>ss[tm]))$`);
    static src_fn_master                            = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ssm)$`);
    static src_fn_find_inserts                      = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`);
    static insert_src_fn_ssi_or_sst                 = ctRegex!(`^<<\s*(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[ti])$`);
    static src_base_parent_dir_name                 = ctRegex!(`[/](?P<dir>(?:[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
    static src_formalised_file_path_parts           = ctRegex!(`(?P<pth>(?:[/a-zA-Z0-9._-]+?)(?P<dir>[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
    /+ line breaks +/
    static br_empty_line                            = ctRegex!(`\n[ ]*\n`, "mg");
    static br_linebreaks_newlines                   = ctRegex!(`[\n┘┙]`, "mg");
    static br_linebreaks                            = ctRegex!(`[┘┙]`, "mg");
    static br_line                                  = ctRegex!(`┘`, "mg");
    static br_line_inline                           = ctRegex!(`┙`, "mg");
    static br_line_spaced                           = ctRegex!(`┚`, "mg");
    /+ quotation marks +/
    static quotes_open_and_close                    = ctRegex!(`[“”]`, "mg");
    /+ inline markup footnotes endnotes +/
    static inline_notes_al                          = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg");
    static inline_notes_al_special                  = ctRegex!(`【(?:[*+]\s+)(.+?)】`, "mg"); // TODO remove match when special footnotes are implemented
    static inline_notes_al_gen                      = ctRegex!(`【.+?】`, "m");
    static inline_notes_al_gen_text                 = ctRegex!(`【(?P<text>.+?)】`, "m");
    static inline_notes_al_all_note                 = ctRegex!(`【(?P<num>\d+|(?:[*]|[+])+)\s+(?P<note>.+?)\s*】`, "mg");
    static inline_notes_al_regular_number_note      = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*】`, "mg");
    static inline_notes_al_special_char_note        = ctRegex!(`【(?P<char>(?:[*]|[+])+)\s+(?P<note>.+?)】`, "mg");
    static inline_al_delimiter_open_regular         = ctRegex!(`【\s`, "m");
    static inline_al_delimiter_open_symbol_star     = ctRegex!(`【[*]\s`, "m");
    static inline_al_delimiter_open_symbol_plus     = ctRegex!(`【[+]\s`, "m");
    static inline_text_and_note_al_                 = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|.+))`, "mg");
    /+ inline markup links +/
    static inline_image                             = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+))\s*(?P<post>.*?┝┤.*?├)`, "mg");
    static inline_image_without_dimensions          = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>0)h(?P<height>0))\s*(?P<post>.*?┝┤.*?├)`, "mg");
    static inline_image_info                        = ctRegex!(`☼?(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+)`, "mg");
    static inline_link_anchor                       = ctRegex!(`┃(?P<anchor>\S+?)┃`, "mg"); // TODO *~text_link_anchor
    static inline_link                              = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#?(\S+?))├`, "mg");
    static inline_link_empty                        = ctRegex!(`┥(?P<text>.+?)┝┤├`, "mg");
    static inline_link_number                       = ctRegex!(`┥(?P<text>.+?)┝┤(?P<num>[0-9]+)├`, "mg"); // not used
    static inline_link_number_only                  = ctRegex!(`(?P<linked_text>┥.+?┝)┤(?P<num>[0-9]+)├`, "mg");
    static inline_link_stow_uri                     = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
    static inline_link_hash                         = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#(?P<hash>\S+?))├`, "mg");
    static inline_link_seg_and_hash                 = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>(?P<seg>[^/#├]*)#(?P<hash>.+?))├`, "mg");
    static inline_link_clean                        = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
    static inline_link_toc_to_backmatter            = ctRegex!(`┤#(?P<link>endnotes|bibliography|bookindex|glossary|blurb)├`, "mg");
    static url                                      = ctRegex!(`https?://`, "mg");
    static uri                                      = ctRegex!(`(?:https?|git)://`, "mg");
    static uri_identify_components                  = ctRegex!(`(?P<type>(?:https?|git)://)(?P<path>\S+?/)(?P<file>[^/]+)$`, "mg");
    static inline_link_subtoc                       = ctRegex!(`^(?P<level>[5-7])~ ┥(?P<text>.+?)┝┤(?P<link>.+?)├`, "mg");
    static inline_link_fn_suffix                    = ctRegex!(`¤(.+?)(\.fnSuffix)`, "mg");
    static inline_seg_link                          = ctRegex!(`(¤)(?:.+?)\.fnSuffix`, "mg");
    static mark_internal_site_lnk                   = ctRegex!(`¤`, "mg");
    static quotation_mark_sql_insert_delimiter      = ctRegex!("[']", "mg");
    /+ inline markup font face mod +/
    static inline_emphasis                          = ctRegex!(`⑆[*]┨(?P<text>.+?)┣[*]`, "mg");
    static inline_bold                              = ctRegex!(`⑆[!]┨(?P<text>.+?)┣[!]`, "mg");
    static inline_underscore                        = ctRegex!(`⑆[_]┨(?P<text>.+?)┣[_]`, "mg");
    static inline_italics                           = ctRegex!(`⑆[/]┨(?P<text>.+?)┣[/]`, "mg");
    static inline_superscript                       = ctRegex!(`⑆\^┨(?P<text>.+?)┣\^`, "mg");
    static inline_subscript                         = ctRegex!(`⑆[,]┨(?P<text>.+?)┣[,]`, "mg");
    static inline_strike                            = ctRegex!(`⑆[-]┨(?P<text>.+?)┣[-]`, "mg");
    static inline_insert                            = ctRegex!(`⑆[+]┨(?P<text>.+?)┣[+]`, "mg");
    static inline_mono                              = ctRegex!(`⑆[■]┨(?P<text>.+?)┣[■]`, "mg");
    static inline_cite                              = ctRegex!(`⑆[‖]┨(?P<text>.+?)┣[‖]`, "mg");
    /+ table delimiters +/
    static table_delimiter_col                      = ctRegex!("[ ]*[┊][ ]*", "mg");
    static table_delimiter_row                      = ctRegex!("[ ]*\n", "mg");
    /+ paragraph operators +/
    static grouped_para_indent_1                    = ctRegex!(`^_1[ ]`, "m");
    static grouped_para_indent_2                    = ctRegex!(`^_2[ ]`, "m");
    static grouped_para_indent_3                    = ctRegex!(`^_3[ ]`, "m");
    static grouped_para_indent_4                    = ctRegex!(`^_4[ ]`, "m");
    static grouped_para_indent_5                    = ctRegex!(`^_5[ ]`, "m");
    static grouped_para_indent_6                    = ctRegex!(`^_6[ ]`, "m");
    static grouped_para_indent_7                    = ctRegex!(`^_7[ ]`, "m");
    static grouped_para_indent_8                    = ctRegex!(`^_8[ ]`, "m");
    static grouped_para_indent_9                    = ctRegex!(`^_9[ ]`, "m");
    static grouped_para_bullet                      = ctRegex!(`^_[*] `, "m");
    static grouped_para_bullet_indent_1             = ctRegex!(`^_1[*] `, "m");
    static grouped_para_bullet_indent_2             = ctRegex!(`^_2[*] `, "m");
    static grouped_para_bullet_indent_3             = ctRegex!(`^_3[*] `, "m");
    static grouped_para_bullet_indent_4             = ctRegex!(`^_4[*] `, "m");
    static grouped_para_bullet_indent_5             = ctRegex!(`^_5[*] `, "m");
    static grouped_para_bullet_indent_6             = ctRegex!(`^_6[*] `, "m");
    static grouped_para_bullet_indent_7             = ctRegex!(`^_7[*] `, "m");
    static grouped_para_bullet_indent_8             = ctRegex!(`^_8[*] `, "m");
    static grouped_para_bullet_indent_9             = ctRegex!(`^_9[*] `, "m");
    static grouped_para_bullet_indent               = ctRegex!(`^_(?P<indent>[1-9])[*] `, "m");
    static grouped_para_indent_hang                 = ctRegex!(`^_(?P<hang>[0-9])_(?P<indent>[0-9])[ ]`, "m");
  }
}