#+TITLE:       sdp regex defaults
#+AUTHOR:      Ralph Amissah
#+EMAIL:       [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]]
#+DESCRIPTION: documents - structuring, publishing in multiple formats & search
#+KEYWORDS
#+LANGUAGE:    en
#+STARTUP:     indent content
#+OPTIONS:     H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t
#+OPTIONS:     TeX:t LaTeX:t skip:nil d:nil todo:t pri:nil tags:not-in-toc
#+OPTIONS:     author:nil email:nil creator:nil timestamp:nil
#+PROPERTY:    header-args :padline no :exports code :noweb yes
#+EXPORT_SELECT_TAGS:  export
#+EXPORT_EXCLUDE_TAGS: noexport
#+FILETAGS:            :sdp:rel:meta:
#+TAGS: assert(a) class(c) debug(d) mixin(m) sdp(s) tangle(T) template(t) WEB(W) noexport(n)

[[./sdp.org][sdp]]  [[./][org/]]
* 0. meta ctRegex                                           :module:sdp:meta_rgx:
[[./sdp.org][sdp]]  [[./][org/]]
http://dlang.org/phobos/std_regex.html
- Plain string, in which case it's compiled to bytecode before matching.
- Regex!char (wchar/dchar) that contains a pattern in the form of compiled bytecode.
- StaticRegex!char (wchar/dchar) that contains a pattern in the form of compiled native machine code.

** 0. module template

#+name: tangle_meta_rgx
#+BEGIN_SRC d :tangle ../src/sdp/meta/rgx.d
/++
  regex: regular expressions used in sisu document parser
+/
module sdp.meta.rgx;
static template SiSUrgxInit() {
  import sdp.meta.defaults;
  static struct Rgx {
    <<meta_rgx>>
    <<prgmkup_rgx>>
  }
}
#+END_SRC

** misc                                                               :misc:

#+name: meta_rgx
#+BEGIN_SRC d
/+ misc +/
static true_dollar                                    = ctRegex!(`\$`, "gm");
static flag_action                                    = ctRegex!(`^(--[a-z][a-z0-9-]+)$`);
static flag_action_str                                = ctRegex!(` (--[a-z][a-z0-9-]+)`);
static within_quotes                                  = ctRegex!(`"(.+?)"`);
static make_heading_delimiter                         = ctRegex!(`[;][ ]*`);
static arr_delimiter                                  = ctRegex!(`[ ]*[;][ ]*`);
static name_delimiter                                 = ctRegex!(`^([^,]+)[ ]*,[ ]+(.+?)$`);
static book_index_go                                  = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?)");
static book_index_go_scroll                           = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?)");
static book_index_go_seg                              = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?):(?P<seg>[a-z0-9_-]+)");
static book_index_go_seg_                             = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?)(:(?P<seg>[a-z0-9_-]+))?");
static book_index_go_seg_anchorless                   = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?)");
static trailing_comma                                 = ctRegex!(",[ ]*$");
static trailing_linebreak                             = ctRegex!(",[ ]{1,2}\\\\\\\\\n[ ]{4}$","m");
static newline_eol_delimiter                          = ctRegex!("\n");
static newline_eol_strip_preceding                    = ctRegex!("[ ]*\n");
static newline_eol_delimiter_only                     = ctRegex!("^\n");
static line_delimiter_ws_strip                        = ctRegex!("[ ]*\n[ ]*");
static para_delimiter                                 = ctRegex!("\n[ ]*\n+");
static table_col_delimiter                            = ctRegex!("[ ]*\n+", "mg");
static table_row_delimiter                            = ctRegex!("\n[ ]*\n+", "mg");
static table_row_delimiter_special                    = ctRegex!("[ ]*\n", "mg");
static table_col_delimiter_special                    = ctRegex!("[ ]*[|][ ]*", "mg");
static levels_markup                                  = ctRegex!(`^[A-D1-4]$`);
static levels_numbered                                = ctRegex!(`^[0-9]$`);
static levels_numbered_headings                       = ctRegex!(`^[0-7]$`);
static numeric                                        = ctRegex!(`[ 0-9,.-]+`);
static numeric_col                                    = ctRegex!(`^[ 0-9,.$£₤Є€€¥-]+$`);
#+END_SRC

** comments                                                        :comment:

#+name: meta_rgx
#+BEGIN_SRC d
/+ comments +/
static comment                                        = ctRegex!(`^%+ `);
static comments                                       = ctRegex!(`^%+ |^%+$`);
#+END_SRC

** config

#+name: meta_rgx
#+BEGIN_SRC d
/+ header +/
static make_simple_substitutions_rb                   = ctRegex!(`(?P<substitution>/(?P<match>.+?)/,[ ]*['"](?P<replace>.+?)['"])`);
static make_simple_substitutions_d                    = ctRegex!(`(?P<substitution>` ~ '`' ~ `(?P<match>.+?)` ~ '`' ~ `,[ ]*['"](?P<replace>.+?)['"])`);
#+END_SRC

** native headers
*** native header                                           :native:header:

#+name: meta_rgx
#+BEGIN_SRC d
/+ header +/
static main_headers                                   =
  ctRegex!(`^(?:creator|title|rights|date|original|classify|identifier|notes|publisher|make|links)$`, "m");
static native_header                                  = ctRegex!(`^@([a-z_]+):(?:\s|$)`);
static native_header_make                             = ctRegex!(`^@(make):(?:\s|$)`);
static native_header_meta                             =
  ctRegex!(`^@(?:creator|title|rights|date|original|classify|identifier|notes|publisher|links):(?:\s|$)`);
static native_header_main                             = ctRegex!(`^@(?P<header>[a-z_]+):\s*(?P<content>.*)`, "m");
static native_header_sub                              = ctRegex!(`^[ ]*:(?P<subheader>[a-z_]+):\s+(?P<content>.+)`, "m");
static native_header_meta_title                       = ctRegex!(`^@title:\s`, "m");
static variable_doc_title                             = ctRegex!(`@title`);
static variable_doc_author                            = ctRegex!(`@author|@creator`);
static raw_author_munge                               = ctRegex!(`(\S.+?),\s+(.+)`,"i");
static toml_header_meta_title                         = ctRegex!(`^\s*(title\s*=\s*"|\[title\])`, "m");
#+END_SRC

*** subheader                                            :native:subheader:

#+name: meta_rgx
#+BEGIN_SRC d
/+ head +/
static native_subhead_creator                         = ctRegex!(`^(?:author|translator|illustrator)$`, "m");
static native_subhead_title                           = ctRegex!(`^(?:main|sub(?:title)?|full|language|edition|note)$`, "m");
static native_subhead_rights                          = ctRegex!(`^(?:copyright|illustrations|license|cover)$`, "m");
static native_subhead_date                            = ctRegex!(`^(?:published|created|issued|available|valid|modified|added_to_site)$`, "m");
static native_subhead_original                        = ctRegex!(`^(?:title|language|source)$`, "m");
static native_subhead_classify                        = ctRegex!(`^(?:topic_register|subject|keywords|loc|dewey)$`, "m");
static native_subhead_identifier                      = ctRegex!(`^(?:oclc|pg|isbn)$`, "m");
static native_subhead_notes                           = ctRegex!(`^(?:abstract|description)$`, "m");
static native_subhead_publisher                       = ctRegex!(`^(?:name)$`, "m");
static native_subhead_make                            = ctRegex!(`^(?:cover_image|home_button_image|home_button_text|footer|headings|num_top|auto_num_depth|breaks|substitute|bold|italics|emphasis|texpdf_font|css)$`, "m");
#+END_SRC

** heading & paragraph operators                        :paragraph:operator:

#+name: meta_rgx
#+BEGIN_SRC d
/+ heading & paragraph operators +/
static heading_a                                      = ctRegex!(`^:?[A][~] `, "m");
static heading                                        = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+`,"i");
static heading_seg_and_above                          = ctRegex!(`^:?([A-D1])[~]([a-z0-9_.-]*[?]?)\s+`,"i");
static heading_marker                                 = ctRegex!(`^:?([A-D1-4])[~]`);
static heading_anchor_tag                             = ctRegex!(`^:?[A-D1-4][~]([a-z0-9_.-]+) `,"i");
static heading_identify_anchor_tag                    = ctRegex!(`^:?[A-D1-4][~]\s+(?:(?:(?:chapter|article|section|clause)\s+[0-9.]+)|(?:[0-9]+))`,"i");
static heading_extract_named_anchor_tag               = ctRegex!(`^:?[A-D1-4][~]\s+(chapter|article|section|clause)\s+((?:[0-9]+.)*[0-9]+)(?:[.:;, ]|$)`,"i");
static heading_extract_unnamed_anchor_tag             = ctRegex!(`^:?[A-D1-4][~]\s+((?:[0-9]+.)*[0-9]+)(?:[.:;, ]|$)`);
static heading_marker_missing_tag                     = ctRegex!(`^:?([A-D1-4])[~] `);
static heading_title                                  = ctRegex!(`^:?[A-D1-4][~][a-z0-9_.-]*[?]?\s+(.+?)$`);
static heading_all                                    = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+(.+?)$`);
static heading_backmatter                             = ctRegex!(`^:?1[~][!](glossary|bibliography|biblio|blurb)\s+`,"i");
static heading_biblio                                 = ctRegex!(`^:?(1)[~][!](biblio(?:graphy)?|references?)`);
static heading_glossary                               = ctRegex!(`^:?(1)[~][!](glossary)`);
static heading_blurb                                  = ctRegex!(`^:?(1)[~][!](blurb)`);
static heading_biblio_glossary                        = ctRegex!(`^:?(?:(1)[~][!](?:(?:biblio(?:graphy)?|references?)|glossary)|[A-D1][~])`);
static heading_biblio_blurb                           = ctRegex!(`^:?(?:(1)[~][!](?:(?:biblio(?:graphy)?|references?)|blurb)|[A-D1][~])`);
static heading_blurb_glossary                         = ctRegex!(`^:?(?:(1)[~][!](?:blurb|glossary)|[A-D1][~])`);
static para_bullet                                    = ctRegex!(`^_[*] `);
static para_bullet_indent                             = ctRegex!(`^_([1-9])[*] `);
static para_indent                                    = ctRegex!(`^_([1-9]) `);
static para_indent_hang                               = ctRegex!(`^_([0-9])_([0-9]) `);
static para_attribs                                   = ctRegex!(`^_(?:(?:[0-9])(?:_([0-9]))?|(?:[1-9])?[*]) `);
#+END_SRC

** blocked markup
*** blocked markup                                              :block:tic:

#+name: meta_rgx
#+BEGIN_SRC d
/+ blocked markup +/
static block_open                                     = ctRegex!("^((code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)[{].*?$)|^`{3} (code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)|^[{]table(~h)?(?P<columns>(?:[ ]+[0-9]+;)+)[}]");
static block_poem_open                                = ctRegex!("^((poem[{].*?$)|`{3} poem)");
#+END_SRC

** blocked markup tics                                           :block:tic:

#+name: meta_rgx
#+BEGIN_SRC d
/+ blocked markup tics +/
static block_tic_open                                 = ctRegex!("^`{3} (code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)"); // what of numbered code?
static block_tic_code_open                            = ctRegex!("^`{3} (?:code)(?:[.]([a-z][0-9a-z_]+))?(?:[ ]+([#]))?"); // extract additional info
static block_tic_poem_open                            = ctRegex!("^`{3} (poem)");
static block_tic_group_open                           = ctRegex!("^`{3} (group)");
static block_tic_block_open                           = ctRegex!("^`{3} (block)");
static block_tic_quote_open                           = ctRegex!("^`{3} (quote)");
static block_tic_table_open                           = ctRegex!("^`{3} table(.*)");
static block_tic_close                                = ctRegex!("^(`{3})$","m");
#+END_SRC

*** blocked markup curly                                      :block:curly:

#+name: meta_rgx
#+BEGIN_SRC d
/+ blocked markup curly +/
static block_curly_open                               = ctRegex!(`^((code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)[{].*?$)`);
static block_curly_code_open                          = ctRegex!(`^(?:code(?:[.]([a-z][0-9a-z_]+))?[{]([#]?)\s*$)`); // extract additional info
static block_curly_code_close                         = ctRegex!(`^([}]code)`);
static block_curly_poem_open                          = ctRegex!(`^(poem[{].*?$)`);
static block_curly_poem_close                         = ctRegex!(`^([}]poem)`);
static block_curly_group_open                         = ctRegex!(`^(group[{].*?$)`);
static block_curly_group_close                        = ctRegex!(`^([}]group)`);
static block_curly_block_open                         = ctRegex!(`^(block[{].*?$)`);
static block_curly_block_close                        = ctRegex!(`^([}]block)`);
static block_curly_quote_open                         = ctRegex!(`^(quote[{].*?$)`);
static block_curly_quote_close                        = ctRegex!(`^([}]quote)`);
static block_curly_table_open                         = ctRegex!(`^table[{](.*)`);
static block_curly_table_close                        = ctRegex!(`^([}]table)`);
static block_curly_table_special_markup               = ctRegex!(`^[{]table((~h)?(?P<columns>(?:[ ]+[0-9]+;)+))[}]`, "mg");
#+END_SRC

*** block sub-matches                                         :block:curly:

#+name: meta_rgx
#+BEGIN_SRC d
static table_head_instructions                        = ctRegex!(`(?P<c_heading>h)?(?:[ ]+c(?P<c_num>[0-9]);)?(?P<c_widths>(?:[ ]+[0-9]+[lr]?;)+)`);
static table_col_widths_and_alignment                 = ctRegex!(`(?P<width>[0-9]+)(?P<align>[lr]?)`);
static table_col_widths                               = ctRegex!(`(?P<widths>[0-9]+)`);
static table_col_align                                = ctRegex!(`(?P<align>[lr]?)`);
static table_col_align_match                          = ctRegex!(`(?P<align>[lr])`);
static table_col_separator                            = ctRegex!(`┊`);
static table_col_separator_nl                         = ctRegex!(`[┊]$`, "mg");
#+END_SRC

** inline markup footnotes endnotes                        :inline:footnote:

#+name: meta_rgx
#+BEGIN_SRC d
/+ inline markup footnotes endnotes +/
static inline_notes_curly_gen                         = ctRegex!(`~\{.+?\}~`, "m");
static inline_notes_curly                             = ctRegex!(`~\{\s*(.+?)\}~`, "mg");
static inline_curly_delimiter_open_and_close_regular  = ctRegex!(`~\{\s*|\s*\}~`, "m");
static inline_notes_delimiter_curly_regular           = ctRegex!(`~\{[ ]*(.+?)\}~`, "m");
static inline_notes_curly_sp                          = ctRegex!(`~\{[*+]+\s+(.+?)\}~`, "m");
static inline_notes_curly_sp_asterisk                 = ctRegex!(`~\{[*]+\s+(.+?)\}~`, "m");
static inline_notes_curly_sp_plus                     = ctRegex!(`~\{[+]+\s+(.+?)\}~`, "m");
static inline_note_curly_delimiters                   = ctRegex!(`(~\{[*+]?\s*)(.+?)(\}~)`, "mg");
static inline_notes_square                            = ctRegex!(`~\[\s*(.+?)\]~`, "mg");
static inline_text_and_note_square_sp                 = ctRegex!(`(.+?)~\[[*+]+\s+(.+?)\]~`, "mg");
static inline_text_and_note_square                    = ctRegex!(`(.+?)~\[\s*(.+?)\]~`, "mg");
static inline_note_square_delimiters                  = ctRegex!(`(~\[\s*)(.+?)(\]~)`, "mg");
static inline_curly_delimiter_open_regular            = ctRegex!(`~\{\s*`, "m");
static inline_curly_delimiter_open_symbol_star        = ctRegex!(`~\{[*]\s`, "m");
static inline_curly_delimiter_open_symbol_plus        = ctRegex!(`~\{[+]\s`, "m");
static inline_curly_delimiter_open_star_or_plus       = ctRegex!(`~\{[+*]`, "m");
static inline_curly_delimiter_close_regular           = ctRegex!(`\s*\}~`, "m");
static inline_text_and_note_curly                     = ctRegex!(`(?P<text>.+?)(?:(?:[~])[{][*+ ]*)(?P<note>.+?)(?:[}][~])`, "mg");
static note_ref                                       = ctRegex!(`^\S+?noteref_([0-9]+)`, "mg");     // {^{73.}^}#noteref_73
#+END_SRC

*** links/ urls                                           :inline:footnote:

#+name: meta_rgx
#+BEGIN_SRC d
static inline_url_generic                              = ctRegex!(`(?:^|[}(\[ ])(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_#]`, "mg");
static inline_url                                      = ctRegex!(`((?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_]\S*)`, "mg");
static inline_link_naked_url                           = ctRegex!(`(?P<before>^|[ ])(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P<after>[.,;:?!'"]?(?:[ ]|$))`, "mg");
static inline_link_markup_regular                      = ctRegex!(`(?P<before>^|[ ])\{\s*(?P<content>.+?)\s*\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P<after>(?:[,;:? ]|[!.]?(?:[ ]|$)))`, "mg");
static inline_link_endnote_url_helper_punctuated       = ctRegex!(`\{~\^\s+(?P<content>.+?)\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?P<after>[.,;:?!]?(?:[ ]|$))`, "mg");
static inline_link_endnote_url_helper                  = ctRegex!(`\{~\^\s+(?P<content>.+?)\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+)`, "mg");
#+END_SRC

*** images                                                         :images:

#+name: meta_rgx
#+BEGIN_SRC d
static image                                           = ctRegex!(`([a-zA-Z0-9._-]+?\.(?:png|gif|jpg))`, "mg");
#+END_SRC

*** inline markup book index                             :inline:bookindex:

#+name: meta_rgx
#+BEGIN_SRC d
/+ inline markup book index +/
static book_index                                     = ctRegex!(`^=\{\s*(.+?)\}$`, "m");
static book_index_open                                = ctRegex!(`^=\{\s*([^}]+?)$`);
static book_index_close                               = ctRegex!(`^(.*?)\}$`, "m");
#+END_SRC

** no obj_cite_number object                                :ocn:off:object:

#+name: meta_rgx
#+BEGIN_SRC d
/+ no obj_cite_number object +/
static obj_cite_number_off                            = ctRegex!(`~#$`, "m");
static obj_cite_number_off_dh                         = ctRegex!(`-#$`, "m");
static obj_cite_number_off_all                        = ctRegex!(`[~-]#$`, "m");
#+END_SRC

** no obj_cite_number block                                  :ocn:off:block:

#+name: meta_rgx
#+BEGIN_SRC d
/+ no obj_cite_number block +/
static obj_cite_number_off_block                      = ctRegex!(`^--~#$`);
static obj_cite_number_off_block_dh                   = ctRegex!(`^---#$`);
static obj_cite_number_off_block_close                = ctRegex!(`^--\+#$`);
static obj_cite_number_block_marks                    = ctRegex!(`^--[+~-]#$`);
#+END_SRC

** ignore outside code blocks                                   :block:code:

#+name: meta_rgx
#+BEGIN_SRC d
/+ ignore outside code blocks +/
static skip_from_regular_parse    = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`);
#+END_SRC

** line & page breaks                                                :break:

#+name: meta_rgx
#+BEGIN_SRC d
/+ line & page breaks +/
static break_line_within_object                       = ctRegex!(`[\\]{2}( |$)`);
static break_page                                     = ctRegex!(`^-[\\]{2}-$`);
static break_page_new                                 = ctRegex!(`^=[\\]{2}=$`);
static break_page_line_across                         = ctRegex!(`^=[.]{2}=$`);
static break_string                                   = ctRegex!(`』`);
static parent                                         = ctRegex!(`([0-7]):([0-9]+)`);
static header_regex_content                           = ctRegex!(`([0-7]):([0-9]+)`);
#+END_SRC

** json                                                               :json:

#+name: meta_rgx
#+BEGIN_SRC d
/+ json +/
static tailing_comma                                  = ctRegex!(`,$`, "m");
#+END_SRC

** biblio tags                                                 :biblio:tags:

#+name: meta_rgx
#+BEGIN_SRC d
/+ biblio tags +/
static biblio_tags                                    = ctRegex!(`^(is|au|author_raw|author|author_arr|editor_raw|ed|editor_arr|ti|title|subtitle|fulltitle|lng|language|trans|src|jo|journal|in|vol|volume|edn|edition|yr|year|pl|place|pb|pub|publisher|url|pg|pages|note|short_name|id):\s+(.+)`);
static biblio_abbreviations                           = ctRegex!(`^(au|ed|ti|lng|jo|vol|edn|yr|pl|pb|pub|pg|pgs|sn)$`);
#+END_SRC

** bookindex split                                         :bookindex:split:

#+name: meta_rgx
#+BEGIN_SRC d
/+ bookindex split +/
static bi_main_terms_split                            = ctRegex!(`\s*;\s*`);
static bi_main_term_plus_rest_split                   = ctRegex!(`\s*:\s*`);
static bi_sub_terms_plus_obj_cite_number_offset_split = ctRegex!(`\s*\|\s*`);
static bi_term_and_obj_cite_numbers_match             = ctRegex!(`^(.+?)\+(\d+)`);
#+END_SRC

** language codes                                           :language:codes:

#+name: meta_rgx
#+BEGIN_SRC d
/+ language codes +/
auto language_codes                                    =
   ctRegex!("(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)");
auto language_code_and_filename                                    =
   ctRegex!("(?:^|[/])(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)/[A-Za-z0-9._-].+?[.](?:sst|ssm)$");
#+END_SRC

* 1. output ctRegex                                   :module:sdp:output_rgx:
[[./sdp.org][sdp]]  [[./][org/]]
http://dlang.org/phobos/std_regex.html
- Plain string, in which case it's compiled to bytecode before matching.
- Regex!char (wchar/dchar) that contains a pattern in the form of compiled bytecode.
- StaticRegex!char (wchar/dchar) that contains a pattern in the form of compiled native machine code.

** 0. module template

#+name: tangle_meta_rgx
#+BEGIN_SRC d :tangle ../src/sdp/output/rgx.d
/++
  regex: regular expressions used in sisu document parser
+/
module sdp.output.rgx;
static template SiSUoutputRgxInit() {
  import sdp.output.defaults;
  static struct Rgx {
    <<prgmkup_rgx>>
    <<sp_ch_xhtml_rgx>>
  }
}
#+END_SRC

** special characters
*** xhtml special characters

#+name: sp_ch_xhtml_rgx
#+BEGIN_SRC d
static xhtml_ampersand                            = ctRegex!(`[&]`);      // &amp;
static xhtml_quotation                            = ctRegex!(`[&]`);      // &quot;
static xhtml_less_than                            = ctRegex!(`[<]`);      // &lt;
static xhtml_greater_than                         = ctRegex!(`[>]`);      // &gt;
static xhtml_line_break                           = ctRegex!(` [\\]{2}`); // <br />
#+END_SRC

* 2. ctRegex defaults shared by meta & output (generic)
** misc generic

#+name: prgmkup_rgx
#+BEGIN_SRC d
static newline                                        = ctRegex!("\n", "mg");
static strip_br                                       = ctRegex!("^<br>\n|<br>\n*$");
static space                                          = ctRegex!(`[ ]`, "mg");
static spaces_line_start                              = ctRegex!(`^(?P<opening_spaces>[ ]+)`, "mg");
static spaces_multiple                                = ctRegex!(`(?P<multiple_spaces>[ ]{2,})`, "mg");
static two_spaces                                     = ctRegex!(`[ ]{2}`, "mg");
static nbsp_char                                      = ctRegex!(`░`, "mg");
static nbsp_chars_line_start                          = ctRegex!(`^░+`, "mg");
static nbsp_and_space                                 = ctRegex!(`&nbsp;[ ]`, "mg");
static nbsp_char_and_space                            = ctRegex!(`░[ ]`, "mg");
#+END_SRC

** filename (& path) (including insert file)     :insert:file:path:filename:

#+name: prgmkup_rgx
#+BEGIN_SRC d
static src_pth_sst_or_ssm                             = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])$`);
static src_pth_pod_sst_or_ssm                         = ctRegex!(`^(?P<podpath>[/]?(?:[a-zA-Z0-9._-]+/)*)media/text/[a-z]{2}/(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])$`);
static src_pth_contents                               = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+)/sisupod[.]manifest$`);
static src_pth_pod_root                               = ctRegex!(`^(?P<podroot>(?:[/]?(?:[a-zA-Z0-9._-]+/)*)(sisupod))$`);
static src_pth_zip                                    = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]zip)$`);
static src_pth_unzip_pod                              = ctRegex!(`^(?P<path>media/text/[a-z]{2}/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`);
static src_pth_types                                  =
  ctRegex!(`^(?P<path>[/]?[a-zA-Z0-9._-]+/)*(?P<gotfile>(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])|(?P<filelist>[a-zA-Z0-9._-]+/sisupod[.]manifest)|(?P<filezip>[a-zA-Z0-9._-]+[.]zip))$`);
static pod_content_location                           =
  ctRegex!(`^(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])(?P<languages>(?:\s+[a-z]{2}(?:,|$))+)`, "mg");
static src_fn                                         =
  ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P<fn_src>(?P<fn_base>[a-zA-Z0-9._-]+)[.](?P<fn_src_suffix>ss[tm]))$`);
static src_fn_master                                  = ctRegex!(`^(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ssm)$`);
static src_fn_text                                    = ctRegex!(`^(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]sst)$`);
static src_fn_insert                                  = ctRegex!(`^(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ssi)$`);
static src_fn_find_inserts                            = ctRegex!(`^(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`);
static insert_src_fn_ssi_or_sst                       = ctRegex!(`^<<\s*(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[ti])$`);
static src_base_parent_dir_name                       = ctRegex!(`[/](?P<dir>(?:[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
static src_base_parent_path                           = ctRegex!(`(?P<dir>(?:[/a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
static src_formalised_file_path_parts                 = ctRegex!(`(?P<pth>(?:[/a-zA-Z0-9._-]+?)(?P<dir>[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
#+END_SRC

** inline markup

*** inline breaks

#+name: prgmkup_rgx
#+BEGIN_SRC d
/+ line breaks +/
static br_line                                        = ctRegex!(`┘`, "mg");
static br_nl                                          = ctRegex!(`┙`, "mg");
static br_paragraph                                   = ctRegex!(`┚`, "mg");
static br_page_line                                   = ctRegex!(`┼`, "mg");
static br_page                                        = ctRegex!(`┿`, "mg");
static br_page_new                                    = ctRegex!(`╂`, "mg");
#+END_SRC
*** inline (internal program) markup footnotes endnotes   :inline:footnote:

#+name: prgmkup_rgx
#+BEGIN_SRC d
/+ inline markup footnotes endnotes +/
static inline_notes_al                                = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg");
static inline_notes_al_special                        = ctRegex!(`【(?:[*+]\s+)(.+?)】`, "mg"); // TODO remove match when special footnotes are implemented
static inline_notes_al_gen                            = ctRegex!(`【.+?】`, "m");
static inline_notes_al_gen_text                       = ctRegex!(`【(?P<text>.+?)】`, "m");
static inline_notes_al_gen_ref                        = ctRegex!(`【(?P<ref>[*+]\s+)\s*(?P<text>.+?)】`, "mg");
static inline_al_delimiter_open_regular               = ctRegex!(`【\s`, "m");
static inline_al_delimiter_open_symbol_star           = ctRegex!(`【[*]\s`, "m");
static inline_al_delimiter_open_symbol_plus           = ctRegex!(`【[+]\s`, "m");
static inline_al_delimiter_close_regular              = ctRegex!(`】`, "m");
static inline_al_delimiter_open_and_close_regular     = ctRegex!(`【|】`, "m");
static inline_notes_delimiter_al_regular              = ctRegex!(`【(.+?)】`, "mg");
static inline_notes_delimiter_al_regular_number_note  = ctRegex!(`【(\d+)\s+(.+?)】`, "mg");
static inline_al_delimiter_open_asterisk              = ctRegex!(`【\*`, "m");
static inline_al_delimiter_open_plus                  = ctRegex!(`【\+`, "m");
static inline_text_and_note_al                        = ctRegex!(`(?P<text>.+?)【(?:[*+ ]*)(?P<note>.+?)】`, "mg");
static inline_text_and_note_al_                       = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|$))`, "mg");
#+END_SRC

*** inline links

#+name: prgmkup_rgx
#+BEGIN_SRC d
/+ inline markup footnotes endnotes +/
static inline_link                                    = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>.+?)├`, "mg");
static inline_link_clean                              = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
static inline_a_url                                   = ctRegex!(`(┤)(\S+?)(├)`, "mg");
static url                                            = ctRegex!(`https?://`, "mg");
static inline_link_subtoc                             = ctRegex!(`^(?P<level>[5-7])~ ┥(?P<text>.+?)┝┤(?P<link>.+?)├`, "mg");
static fn_suffix                                      = ctRegex!(`\.fnSuffix`, "mg");
static inline_link_fn_suffix                          = ctRegex!(`¤(.+?)(\.fnSuffix)`, "mg");
static inline_seg_link                                = ctRegex!(`(¤)(?:.+?)\.fnSuffix`, "mg");
static mark_internal_site_lnk                         = ctRegex!(`¤`, "mg");
#+END_SRC

*** inline markup font face mod                          :inline:font:face:

#+name: prgmkup_rgx
#+BEGIN_SRC d
/+ inline markup font face mod +/
static inline_faces                                   = ctRegex!(`(?P<markup>(?P<mod>[*!_^,+#-])\{(?P<text>.+?)\}[*!_^,+#-])`, "mg");
static inline_emphasis                                = ctRegex!(`\*\{(?P<text>.+?)\}\*`, "mg");
static inline_bold                                    = ctRegex!(`!\{(?P<text>.+?)\}!`, "mg");
static inline_underscore                              = ctRegex!(`_\{(?P<text>.+?)\}_`, "mg");
static inline_italics                                 = ctRegex!(`/\{(?P<text>.+?)\}/`, "mg");
static inline_superscript                             = ctRegex!(`\^\{(?P<text>.+?)\}\^`, "mg");
static inline_subscript                               = ctRegex!(`,\{(?P<text>.+?)\},`, "mg");
static inline_strike                                  = ctRegex!(`-\{(?P<text>.+?)\}-`, "mg");
static inline_insert                                  = ctRegex!(`\+\{(?P<text>.+?)\}\+`, "mg");
static inline_mono                                    = ctRegex!(`#\{(?P<text>.+?)\}#`, "mg");
static inline_mono_box                                = ctRegex!(`■\{(?P<text>.+?)\}■`, "mg");
static inline_cite                                    = ctRegex!(`"\{(?P<text>.+?)\}"`, "mg");
static inline_faces_line                              = ctRegex!(`^[*!/_]_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`);
static inline_emphasis_line                           = ctRegex!(`^\*_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`);
static inline_bold_line                               = ctRegex!(`^!_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`);
static inline_italics_line                            = ctRegex!(`^/_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`);
static inline_underscore_line                         = ctRegex!(`^__ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`);
static inline_fontface_clean                          = ctRegex!(`[*!_/^,+#■"-]\{|\}[*!_/^,+#■"-]`, "mg");
static no_header_rgx                                  = ctRegex!(`^=NULL$`);
#+END_SRC

*** table related

#+name: prgmkup_rgx
#+BEGIN_SRC d
/+ table delimiters +/
static table_delimiter_col                           = ctRegex!("[ ]*[┊][ ]*", "mg");
static table_delimiter_row                           = ctRegex!("[ ]*\n", "mg");
#+END_SRC

* __END__