-*- mode: org -*- #+TITLE: doc_reform regex defaults #+DESCRIPTION: documents - structuring, publishing in multiple formats & search #+FILETAGS: :doc_reform:regex: #+AUTHOR: Ralph Amissah #+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] #+COPYRIGHT: Copyright (C) 2015 - 2019 Ralph Amissah #+LANGUAGE: en #+STARTUP: indent content hideblocks hidestars #+OPTIONS: H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t #+OPTIONS: TeX:t LaTeX:t skip:nil d:nil todo:t pri:nil tags:not-in-toc #+OPTIONS: author:nil email:nil creator:nil timestamp:nil #+PROPERTY: header-args :padline no :exports code :cache no :noweb yes #+EXPORT_SELECT_TAGS: export #+EXPORT_EXCLUDE_TAGS: noexport #+TAGS: assert(a) class(c) debug(d) mixin(m) doc_reform(s) tangle(T) template(t) WEB(W) noexport(n) [[./doc_reform.org][doc_reform]] [[./][org/]] * 0. meta ctRegex [[./doc_reform.org][doc_reform]] [[./][org/]] http://dlang.org/phobos/std_regex.html - Plain string, in which case it's compiled to bytecode before matching. - Regex!char (wchar/dchar) that contains a pattern in the form of compiled bytecode. - StaticRegex!char (wchar/dchar) that contains a pattern in the form of compiled native machine code. 22 special characters used: #+BEGIN_SRC text 【】〖〗┥┝┤├¤░┘┙┚┼┿╂┊┏┚┆■☼ #+END_SRC ** 0. module template :module: #+name: tangle_meta_rgx #+BEGIN_SRC d :tangle "../src/doc_reform/meta/rgx.d" /++ regex: regular expressions used in sisu document parser +/ module doc_reform.meta.rgx; static template DocReformRgxInit() { import doc_reform.meta.defaults; static struct Rgx { <> <> } } #+END_SRC ** misc :misc: #+name: meta_rgx #+BEGIN_SRC d /+ misc +/ static true_dollar = ctRegex!(`\$`, "gm"); static sep = ctRegex!(`␣`, "gm"); static flag_action = ctRegex!(`^(--[a-z][a-z0-9-]+)$`); static flag_action_str = ctRegex!(` (--[a-z][a-z0-9-]+)`); static within_quotes = ctRegex!(`"(.+?)"`, "m"); static yaml_tag_is_str = ctRegex!(`:str$`); static yaml_tag_is_int = ctRegex!(`:int$`); static yaml_tag_is_map = ctRegex!(`:map$`); static yaml_tag_is_seq = ctRegex!(`:seq$`); static make_heading_delimiter = ctRegex!(`[;][ ]*`); static arr_delimiter = ctRegex!(`[ ]*[;][ ]*`); static name_delimiter = ctRegex!(`^([^,]+)[ ]*,[ ]+(.+?)$`); static book_index_go = ctRegex!("(?P(?P[0-9]+)(?:-[0-9]+)?)"); static book_index_go_scroll = ctRegex!("(?P(?P[0-9]+)(?:-[0-9]+)?)"); static book_index_go_seg = ctRegex!("(?P(?P[0-9]+)(?:-[0-9]+)?):(?P[a-z0-9_-]+)"); static book_index_go_seg_ = ctRegex!("(?P(?P[0-9]+)(?:-[0-9]+)?)(:(?P[a-z0-9_-]+))?"); static book_index_go_seg_anchorless = ctRegex!("(?P(?P[0-9]+)(?:-[0-9]+)?)"); static trailing_comma = ctRegex!(",[ ]*$"); static trailing_linebreak = ctRegex!(",[ ]{1,2}\\\\\\\\\n[ ]{4}$","m"); static newline_eol_delimiter = ctRegex!("\n"); static newline_eol_strip_preceding = ctRegex!("[ ]*\n"); static newline_eol_delimiter_only = ctRegex!("^\n"); static line_delimiter_ws_strip = ctRegex!("[ ]*\n[ ]*"); static para_delimiter = ctRegex!("\n[ ]*\n+"); static table_col_delimiter = ctRegex!("[ ]*\n+", "mg"); static table_row_delimiter = ctRegex!("\n[ ]*\n+", "mg"); static table_row_delimiter_special = ctRegex!("[ ]*\n", "mg"); static table_col_delimiter_special = ctRegex!("[ ]*[|][ ]*", "mg"); static levels_markup = ctRegex!(`^[A-D1-4]$`); static levels_numbered = ctRegex!(`^[0-9]$`); static levels_numbered_headings = ctRegex!(`^[0-7]$`); static numeric = ctRegex!(`[ 0-9,.-]+`); static numeric_col = ctRegex!(`^[ 0-9,.%$£₤Є€€¥()-]+$`); #+END_SRC ** comments :comment: #+name: meta_rgx #+BEGIN_SRC d /+ comments +/ static comment = ctRegex!(`^%+ `); static comments = ctRegex!(`^%+ |^%+$`); #+END_SRC ** config #+name: meta_rgx #+BEGIN_SRC d /+ header +/ static make_simple_substitutions_rb = ctRegex!(`(?P/(?P.+?)/,[ ]*['"](?P.+?)['"])`); static make_simple_substitutions_d = ctRegex!(`(?P` ~ '`' ~ `(?P.+?)` ~ '`' ~ `,[ ]*['"](?P.+?)['"])`); #+END_SRC ** native headers *** native header :native:header: #+name: meta_rgx #+BEGIN_SRC d /+ header +/ static variable_doc_title = ctRegex!(`@title`); static variable_doc_author = ctRegex!(`@author|@creator`); static raw_author_munge = ctRegex!(`(?P\S.+?),\s+(?P.+)`,"i"); static toml_header_meta_title = ctRegex!(`^\s*(?:title\s*=\s*"|\[title\])`, "m"); static yaml_header_meta_title = ctRegex!(`^\s*(?:title\s*:\s*(?:"?\w|$))`, "m"); static toml_config = ctRegex!(`^\s*(?:[a-z]+\s*=\s*"|\[\w+?\])`, "m"); static yaml_config = ctRegex!(`^[a-z]+\s*:\s*(?:"?\w|$)`, "m"); #+END_SRC ** heading & paragraph operators :paragraph:operator: #+name: meta_rgx #+BEGIN_SRC d /+ heading & paragraph operators +/ static heading_a = ctRegex!(`^:?[A][~] `, "m"); static heading = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+`,"i"); static heading_seg_and_above = ctRegex!(`^:?([A-D1])[~]([a-z0-9_.-]*[?]?)\s+`,"i"); static heading_marker = ctRegex!(`^:?([A-D1-4])[~]`); static heading_anchor_tag = ctRegex!(`^:?[A-D1-4][~](?P[a-z0-9_.-]+) `,"i"); static heading_identify_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+(?:(?:(?:chapter|article|section|clause)\s+[0-9.]+)|(?:[0-9]+))`,"i"); static heading_extract_named_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+(chapter|article|section|clause)\s+((?:[0-9]+[.:])*[0-9]+)(?=[.:;, ]|$)`,"i"); static heading_extract_unnamed_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+((?:[0-9]+.)*[0-9]+)(?=[.:;, ]|$)`); static heading_marker_missing_tag = ctRegex!(`^:?([A-D1-4])[~] `); static heading_anchor_tag_plus_colon = ctRegex!(`^:?([A-D1-4][~])([a-z0-9_.:-]+) `,"i"); static heading_marker_tag_has_colon = ctRegex!(`([:])`); static heading_title = ctRegex!(`^:?[A-D1-4][~][a-z0-9_.-]*[?]?\s+(.+?)$`); static heading_all = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+(.+?)$`); static heading_backmatter = ctRegex!(`^:?1[~][!](glossary|bibliography|biblio|blurb)\s+`,"i"); static heading_biblio = ctRegex!(`^:?(1)[~][!](biblio(?:graphy)?|references?)`); static heading_glossary = ctRegex!(`^:?(1)[~][!](glossary)`); static heading_blurb = ctRegex!(`^:?(1)[~][!](blurb)`); static heading_biblio_glossary = ctRegex!(`^:?(?:(1)[~][!](?:(?:biblio(?:graphy)?|references?)|glossary)|[A-D1][~])`); static heading_biblio_blurb = ctRegex!(`^:?(?:(1)[~][!](?:(?:biblio(?:graphy)?|references?)|blurb)|[A-D1][~])`); static heading_blurb_glossary = ctRegex!(`^:?(?:(1)[~][!](?:blurb|glossary)|[A-D1][~])`); static para_bullet = ctRegex!(`^_[*] `); static para_bullet_indent = ctRegex!(`^_(?P[1-9])[*] `); static para_indent = ctRegex!(`^_(?P[1-9])[ ]`); static para_indent_hang = ctRegex!(`^_(?P[0-9])_(?P[0-9])[ ]`); static para_attribs = ctRegex!(`^_(?:(?:[0-9])(?:_([0-9]))?|(?:[1-9])?[*]) `); static para_inline_link_anchor = ctRegex!(`\*[~](?P[a-z0-9_.-]+)(?= |$)`,"i"); #+END_SRC ** blocked markup *** blocked markup curly & tic :block: #+name: meta_rgx #+BEGIN_SRC d /+ blocked markup +/ static block_open = ctRegex!("^((code(?:[.][a-z][0-9a-z#+_]+)?|(?:poem|group|block|quote)(?:[.][a-z][0-9a-z_]+)?|table)(?:[(][ a-zA-Z0-9;:,]*[)])?[{][ ]*$)|^`{3} (code(?:[.][a-z][0-9a-z#+_]+)?|(?:poem|group|block|quote)(?:[.][a-z][0-9a-z_]+)?|table)(?:[(][ a-zA-Z0-9;:,]*[)])?|^[{]table[(](?:h;)?(?P(?:[ ,]+[0-9]+)+)[)][}]"); static block_poem_open = ctRegex!("^((poem(?:[(][ a-zA-Z0-9;:,]*[)])?[{][ ]*$)|`{3} poem(?:[(][ a-zA-Z0-9;:,]*[)])?)"); #+END_SRC *** blocked markup tic :block:tic: #+name: meta_rgx #+BEGIN_SRC d /+ blocked markup tics +/ static block_tic_open = ctRegex!("^`{3} (code(?:[.][a-z][0-9a-z#+_]+)?|(?:poem|group|block|quote)(?:[.][a-z][0-9a-z_]+)?|table)"); static block_tic_code_open = ctRegex!("^`{3} code(?:[.](?P[a-z][0-9a-z#+_]+))?(?:[(](?P[ a-zA-Z0-9;:,]*)[)])?"); static block_tic_poem_open = ctRegex!("^`{3} poem(?:[.](?P[a-z][0-9a-z_]+))?(?:[(](?P[ a-zA-Z0-9;:,]*)[)])?"); static block_tic_group_open = ctRegex!("^`{3} group(?:[.](?P[a-z][0-9a-z_]+))?(?:[(](?P[ a-zA-Z0-9;:,]*)[)])?"); static block_tic_block_open = ctRegex!("^`{3} block(?:[.](?P[a-z][0-9a-z_]+))?(?:[(](?P[ a-zA-Z0-9;:,]*)[)])?"); static block_tic_quote_open = ctRegex!("^`{3} quote(?:[.](?P[a-z][0-9a-z_]+))?(?:[(](?P[ a-zA-Z0-9;:,]*)[)])?"); static block_tic_table_open = ctRegex!("^`{3} table(?:[(](?P[ a-zA-Z0-9;:,]*)[)])?"); // ctRegex!("^`{3} table(?:\(.*?\))?"); static block_tic_close = ctRegex!("^(`{3})$","m"); #+END_SRC *** blocked markup curly :block:curly: #+name: meta_rgx #+BEGIN_SRC d /+ blocked markup curly +/ static block_curly_open = ctRegex!(`^((?:code([.][a-z][0-9a-z#+_]+)?|(?:poem|group|block|quote)(?:[.][a-z][0-9a-z_]+)?|table)(?:[(][ a-zA-Z0-9;:,]*[)])?[{][ ]*$)`); static block_curly_code_open = ctRegex!(`^(?:code(?:[.](?P[a-z][0-9a-z_]+))?(?:[(](?P[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`); static block_curly_code_close = ctRegex!(`^([}]code)`); static block_curly_poem_open = ctRegex!(`^(poem(?:[.](?P[a-z][0-9a-z_]+))?(?:[(](?P[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`); static block_curly_poem_close = ctRegex!(`^([}]poem)`); static block_curly_group_open = ctRegex!(`^(group(?:[.](?P[a-z][0-9a-z_]+))?(?:[(](?P[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`); static block_curly_group_close = ctRegex!(`^([}]group)`); static block_curly_block_open = ctRegex!(`^(block(?:[.](?P[a-z][0-9a-z_]+))?(?:[(](?P[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`); static block_curly_block_close = ctRegex!(`^([}]block)`); static block_curly_quote_open = ctRegex!(`^(quote(?:[.](?P[a-z][0-9a-z_]+))?(?:[(](?P[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`); static block_curly_quote_close = ctRegex!(`^([}]quote)`); static block_curly_table_open = ctRegex!(`^table(?:[(](?P[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$`); static block_curly_table_close = ctRegex!(`^([}]table)`); static block_curly_table_special_markup = ctRegex!(`^[{]table[(](?P(?:(h);)?(?P(?:[, ]+[0-9]+)+))[)][}]`, "mg"); #+END_SRC *** block sub-matches :block: **** code #+name: meta_rgx #+BEGIN_SRC d static code_numbering = ctRegex!(`(?P\blinenumber\b|\bnumber\b|\blnr\b)`); #+END_SRC **** table #+name: meta_rgx #+BEGIN_SRC d static table_head_instructions = ctRegex!(`(?:(?Ph);)?(?:[ ]+c(?P[0-9]):)?(?P(?:[, ]+[0-9]+[lr]?)+)`); static table_col_widths_and_alignment = ctRegex!(`(?P[0-9]+)(?P[lr]?)`); static table_col_widths = ctRegex!(`(?P[0-9]+)`); static table_col_align = ctRegex!(`(?P[lr]?)`); static table_col_align_match = ctRegex!(`(?P[lr])`); static table_col_separator = ctRegex!(`┊`); static table_col_separator_nl = ctRegex!(`[┊]$`, "mg"); #+END_SRC ** inline markup :inline:footnote: *** footnotes & endnotes #+name: meta_rgx #+BEGIN_SRC d /+ inline markup footnotes endnotes +/ static inline_notes_curly_gen = ctRegex!(`~\{.+?\}~`, "m"); static inline_notes_curly = ctRegex!(`~\{\s*(.+?)\}~`, "mg"); static inline_curly_delimiter_open_and_close_regular = ctRegex!(`~\{\s*|\s*\}~`, "m"); static inline_notes_delimiter_curly_regular = ctRegex!(`~\{[ ]*(.+?)\}~`, "m"); static inline_notes_curly_sp = ctRegex!(`~\{[*+]+\s+(.+?)\}~`, "m"); static inline_notes_curly_sp_asterisk = ctRegex!(`~\{[*]+\s+(.+?)\}~`, "m"); static inline_notes_curly_sp_plus = ctRegex!(`~\{[+]+\s+(.+?)\}~`, "m"); static inline_note_curly_delimiters = ctRegex!(`(~\{[*+]?\s*)(.+?)(\}~)`, "mg"); static inline_notes_square = ctRegex!(`~\[\s*(.+?)\]~`, "mg"); static inline_text_and_note_square_sp = ctRegex!(`(.+?)~\[[*+]+\s+(.+?)\]~`, "mg"); static inline_text_and_note_square = ctRegex!(`(.+?)~\[\s*(.+?)\]~`, "mg"); static inline_note_square_delimiters = ctRegex!(`(~\[\s*)(.+?)(\]~)`, "mg"); static inline_curly_delimiter_open_regular = ctRegex!(`~\{\s*`, "m"); static inline_curly_delimiter_open_symbol_star = ctRegex!(`~\{[*]\s`, "m"); static inline_curly_delimiter_open_symbol_plus = ctRegex!(`~\{[+]\s`, "m"); static inline_curly_delimiter_open_star_or_plus = ctRegex!(`~\{[+*]`, "m"); static inline_curly_delimiter_close_regular = ctRegex!(`\s*\}~`, "m"); static inline_text_and_note_curly = ctRegex!(`(?P.+?)(?:(?:[~])[{][*+ ]*)(?P.+?)(?:[}][~])`, "mg"); static note_ref = ctRegex!(`^\S+?noteref_(?P[0-9]+)`, "mg"); // {^{73.}^}#noteref_73 #+END_SRC *** links/ urls :inline:footnote: #+name: meta_rgx #+BEGIN_SRC d static webserv_url_doc_root = ctRegex!(`(?P(?Phttps?:\/\/[^ /]+)\/(?P\S*))`, "mg"); static smid_inline_url_generic = ctRegex!(`(?:^|[}(\[ ])(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_#]`, "mg"); static smid_inline_url = ctRegex!(`((?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_]\S*)`, "mg"); static smid_inline_link_naked_url = ctRegex!(`(?P
^|[ (\[])(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤)\S+?)(?=[.,;:?!'"]?([ )\]]|$))`, "mg");
static smid_inline_link_markup_regular                = ctRegex!(`(?P
^|[ (\[])\{\s*(?P.+?)\s*\}(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
static smid_inline_link_endnote_url_helper_punctuated = ctRegex!(`\{~\^\s+(?P.+?)\}(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[.,;:?!]?([ ]|$))`, "mg");
static smid_inline_link_endnote_url_helper            = ctRegex!(`\{~\^\s+(?P.+?)\}(?P(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+)`, "mg");
#+END_SRC

*** images                                                         :images:

#+name: meta_rgx
#+BEGIN_SRC d
static image                                           = ctRegex!(`([a-zA-Z0-9._-]+?\.(?:png|gif|jpg))`, "mg");
static smid_image                                      = ctRegex!(`(?P
(?:^|[ ]|[^\S]?)[{┥](?:~\^\s+|\s*))(?P[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))(?P(?:.*?)\s*[}┝](?:image|┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
static smid_image_generic                              = ctRegex!(`(?:^|[ ]|[^\S]?)[{┥](?:~\^\s+|\s*)\S+\.(?:png|gif|jpg).*?[}┝](?:image|┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
static smid_image_with_dimensions                      = ctRegex!(`(?P
(?:^|[ ]|[^\S]?)[{┥](?:~\^\s+|\s*))(?P[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))\s+(?P\d+)x(?P\d+)\s*(?P(?:.*?)\s*[}┝](?:image|┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
static smid_mod_image_without_dimensions               = ctRegex!(`[{┥](?:~\^\s+|\s*)☼\S+\.(?:png|gif|jpg),w0h0.*[}┝](?:image|┤.*?├|(?:https?|git):\/\/\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
static smid_a_image                                    = ctRegex!(`(?P
(?:^|[ ]|[^\S]?)[{](?:~\^\s+|\s*))(?P[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))(?P(?:.*?)\s*[}](?:image|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
static smid_a_image_generic                            = ctRegex!(`(?:^|[ ]|[^\S]?)[{](?:~\^\s+|\s*)\S+\.(?:png|gif|jpg).*?[}](?:image|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
static smid_a_image_with_dimensions                    = ctRegex!(`(?P
(?:^|[ ]|[^\S]?)[{](?:~\^\s+|\s*))(?P[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))\s+(?P\d+)x(?P\d+)\s*(?P(?:.*?)\s*[}](?:image|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
static smid_a_mod_image_without_dimensions             = ctRegex!(`[{](?:~\^\s+|\s*)☼\S+\.(?:png|gif|jpg),w0h0.*[}](?:image|(?:https?|git):\/\/\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
static smid_b_image                                    = ctRegex!(`(?P
(?:^|[ ]|[^\S]?)[┥](?:~\^\s+|\s*))(?P[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))(?P(?:.*?)\s*[┝](?:┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
static smid_b_image_generic                            = ctRegex!(`(?:^|[ ]|[^\S]?)[┥](?:~\^\s+|\s*)\S+\.(?:png|gif|jpg).*?[┝](?:┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
static smid_b_image_with_dimensions                    = ctRegex!(`(?P
(?:^|[ ]|[^\S]?)[┥](?:~\^\s+|\s*))(?P[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))\s+(?P\d+)x(?P\d+)\s*(?P(?:.*?)\s*[┝](?:┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
static smid_b_mod_image_without_dimensions             = ctRegex!(`[┥](?:~\^\s+|\s*)☼\S+\.(?:png|gif|jpg),w0h0.*[┝](?:┤.*?├|(?:https?|git):\/\/\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
static smid_image_delimit                              = ctRegex!(`(?P
^|[ ]|[^\S]?)\{\s*(?P.+?)\s*\}(?:image)(?=[;:!,?.]?([ )\]]|$))`, "mg");
#+END_SRC

*** inline markup book index                             :inline:bookindex:

#+name: meta_rgx
#+BEGIN_SRC d
/+ inline markup book index +/
static book_index                                     = ctRegex!(`^=\{\s*(?P.+?)\}$`, "m");
static book_index_open                                = ctRegex!(`^=\{\s*([^}]*?)$`);
static book_index_close                               = ctRegex!(`^(.*?)\}$`, "m");
#+END_SRC

** switch
*** switch off auto-heading number

#+name: meta_rgx
#+BEGIN_SRC d
static auto_heading_numbering_lv1                    = ctRegex!(`^1~`, "m");
static auto_heading_numbering_lv2                    = ctRegex!(`^2~`, "m");
static auto_heading_numbering_lv3                    = ctRegex!(`^3~`, "m");
static auto_heading_numbering_lv4                    = ctRegex!(`^4~`, "m");
static auto_heading_numbering_off                    = ctRegex!(`^[A-D1-4]~\S*?-\s`, "m");
static auto_heading_numbering_off_lv1                = ctRegex!(`^1~\S*?-\s`, "m");
static auto_heading_numbering_off_lv2                = ctRegex!(`^2~\S*?-\s`, "m");
static auto_heading_numbering_off_lv3                = ctRegex!(`^3~\S*?-\s`, "m");
static auto_heading_numbering_off_lv4                = ctRegex!(`^4~\S*?-\s`, "m");
#+END_SRC

** no object_number object                                :ocn:off:object:

#+name: meta_rgx
#+BEGIN_SRC d
/+ no object_number object +/
static object_number_off                            = ctRegex!(`~#[ ]*$`, "m");
static object_number_off_dummy_heading              = ctRegex!(`-#$`, "m");
static object_number_off_all                        = ctRegex!(`[~-]#$`, "m");
static repeated_character_line_separator            = ctRegex!(`^(?:[ ]*(?:(?:[.][ ]*){4,}|(?:[-][ ]*|[~][ ]*|[*][ ]*|[$][ ]*|[#][ ]*|[\\][ ]*|[/][ ]*){2,})\s*?)+$`);
#+END_SRC

** no object_number block                                  :ocn:off:block:

#+name: meta_rgx
#+BEGIN_SRC d
/+ no object_number block +/
static object_number_off_block                      = ctRegex!(`^--~#$`);
static object_number_off_block_dummy_heading        = ctRegex!(`^---#$`);
static object_number_off_block_close                = ctRegex!(`^--\+#$`);
static object_number_block_marks                    = ctRegex!(`^--[+~-]#$`);
#+END_SRC

** ignore outside code blocks                                   :block:code:

#+name: meta_rgx
#+BEGIN_SRC d
/+ ignore outside code blocks +/
static skip_from_regular_parse    = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`);
#+END_SRC

** line & page breaks                                                :break:

#+name: meta_rgx
#+BEGIN_SRC d
/+ line & page breaks +/
static break_line_within_object                       = ctRegex!(`[\\]{2}( |$)`);
static break_page                                     = ctRegex!(`^-[\\]{2}-$`);
static break_page_new                                 = ctRegex!(`^=[\\]{2}=$`);
static break_page_line_across                         = ctRegex!(`^=[.]{2}=$`);
static break_string                                   = ctRegex!(`』`);
static parent                                         = ctRegex!(`([0-7]):([0-9]+)`);
static header_regex_content                           = ctRegex!(`([0-7]):([0-9]+)`);
#+END_SRC

** json                                                               :json:

#+name: meta_rgx
#+BEGIN_SRC d
/+ json +/
static tailing_comma                                  = ctRegex!(`,$`, "m");
#+END_SRC

** biblio tags                                                 :biblio:tags:

#+name: meta_rgx
#+BEGIN_SRC d
/+ biblio tags +/
static biblio_tags                                    = ctRegex!(`^(is|au|author_raw|author|author_arr|editor_raw|ed|editor_arr|ti|title|subtitle|fulltitle|lng|language|trans|src|jo|journal|in|vol|volume|edn|edition|yr|year|pl|place|pb|pub|publisher|url|pg|pages|note|short_name|id):\s+(.+)`);
static biblio_abbreviations                           = ctRegex!(`^(au|ed|ti|lng|jo|vol|edn|yr|pl|pb|pub|pg|pgs|sn)$`);
#+END_SRC

** bookindex split                                         :bookindex:split:

#+name: meta_rgx
#+BEGIN_SRC d
/+ bookindex split +/
static bi_main_terms_split                            = ctRegex!(`\s*;\s*`);
static bi_main_term_plus_rest_split                   = ctRegex!(`\s*:\s*`);
static bi_sub_terms_plus_object_number_offset_split   = ctRegex!(`\s*\|\s*`);
static bi_term_and_object_numbers_match               = ctRegex!(`^(.+?)\+(\d+)`);
#+END_SRC

** topic register split (document classify)

#+name: meta_rgx
#+BEGIN_SRC d
static topic_register_main_terms_split                = ctRegex!(`\s*;\s*`);
static topic_register_main_term_plus_rest_split       = ctRegex!(`\s*:\s*`);
static topic_register_sub_terms_split                 = ctRegex!(`\s*\|\s*`);
static topic_register_multiple_sub_terms_split        = ctRegex!(`␣([^|␣]+(?:\|[^|␣]+)+)`);
#+END_SRC

** language codes                                           :language:codes:

#+name: meta_rgx
#+BEGIN_SRC d
/+ language codes +/
auto language_codes                                    =
   ctRegex!("(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)");
auto language_code_and_filename                                    =
   ctRegex!("(?:^|[/])(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)/[A-Za-z0-9._-].+?[.](?:sst|ssm)$");
#+END_SRC

* 1. output ctRegex
[[./doc_reform.org][doc_reform]]  [[./][org/]]
http://dlang.org/phobos/std_regex.html
- Plain string, in which case it's compiled to bytecode before matching.
- Regex!char (wchar/dchar) that contains a pattern in the form of compiled bytecode.
- StaticRegex!char (wchar/dchar) that contains a pattern in the form of compiled native machine code.

** 0. module template                                        :module:output:

#+name: tangle_meta_rgx
#+BEGIN_SRC d :tangle "../src/doc_reform/output/rgx.d"
/++
  regex: regular expressions used in sisu document parser
+/
module doc_reform.output.rgx;
static template DocReformOutputRgxInit() {
  import doc_reform.output.defaults;
  static struct Rgx {
    <>
    <>
  }
}
#+END_SRC

** special characters
*** xhtml special characters

#+name: sp_ch_xhtml_rgx
#+BEGIN_SRC d
static xhtml_ampersand                            = ctRegex!(`[&]`, "m");      // &
static xhtml_quotation                            = ctRegex!(`["]`, "m");      // "
static xhtml_less_than                            = ctRegex!(`[<]`, "m");      // <
static xhtml_greater_than                         = ctRegex!(`[>]`, "m");      // >
static xhtml_line_break                           = ctRegex!(` [\\]{2}`, "m"); // 
#+END_SRC *** latex special characters #+name: sp_ch_xhtml_rgx #+BEGIN_SRC d static latex_special_char_shortlist = ctRegex!(`([%$_#&\\])`); static latex_special_char_curlybraces = ctRegex!(`([{}])`); static latex_special_char = ctRegex!(`([%${}_#&\\])`); static latex_special_char_for_escape = ctRegex!(`([%${}_#\\])`); static latex_special_char_for_escape_and_braces = ctRegex!(`([&])`); static latex_special_char_for_escape_url = ctRegex!(`([%])`); static latex_special_char_escaped = ctRegex!(`\\([%${}_#\\])`); static latex_special_char_escaped_braced = ctRegex!(`[{]\\([&])[}]`); static latex_identify_inline_link = ctRegex!(`┥.+?┝┤\S+?├`, "mg"); static latex_clean_internal_link = ctRegex!(`^(?:#|¤\S+?#)`, "m"); static latex_identify_inline_fontface = ctRegex!(`\\([_#$]┨.+?┣)\\([_#$])`, "mg"); static latex_clean_bookindex_linebreak = ctRegex!(`\s*\\\\\\\\\s*`, "m"); #+END_SRC * 2. ctRegex defaults shared by meta & output (generic) ** misc generic #+name: prgmkup_rgx #+BEGIN_SRC d static newline = ctRegex!("\n", "mg"); static strip_br = ctRegex!("^
\n|
\n*$"); static space = ctRegex!(`[ ]`, "mg"); static spaces_keep = ctRegex!(`(?P^[ ]+|[ ]{2,})`, "mg"); // code, verse, block static spaces_line_start = ctRegex!(`^(?P[ ]+)`, "mg"); static spaces_multiple = ctRegex!(`(?P[ ]{2,})`, "mg"); static two_spaces = ctRegex!(`[ ]{2}`, "mg"); static nbsp_char = ctRegex!(`░`, "mg"); static nbsp_chars_line_start = ctRegex!(`^░+`, "mg"); static nbsp_and_space = ctRegex!(` [ ]`, "mg"); static nbsp_char_and_space = ctRegex!(`░[ ]`, "mg"); static special_markup_chars = ctRegex!(`[【】〖〗┥┝┤├¤░┘┙┚┼┿╂┊┏┚┆■]`, "mg"); #+END_SRC ** filename (& path) (including insert file) :insert:file:path:filename: #+name: prgmkup_rgx #+BEGIN_SRC d static src_pth_sst_or_ssm = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.](?Pss[tm]))$`); static src_pth_pod_sst_or_ssm = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)media/text/[a-z]{2}/(?P[a-zA-Z0-9._-]+[.]ss[tm])$`); static src_pth_contents = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+)/pod[.]manifest$`); static src_pth_zip = ctRegex!(`^(?P[/]?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]zip)$`); static src_pth_unzip_pod = ctRegex!(`^(?Pmedia/text/[a-z]{2}/)*(?P[a-zA-Z0-9._-]+[.]ss[im])$`); static src_pth_types = ctRegex!(`^(?P[/]?[a-zA-Z0-9._-]+/)*(?P(?P[a-zA-Z0-9._-]+[.]ss[tm])|(?P[a-zA-Z0-9._-]+/pod[.]manifest)|(?P[a-zA-Z0-9._-]+[.]zip))$`); static pod_content_location = ctRegex!(`^(?P[a-zA-Z0-9._-]+[.]ss[tm])(?P(?:\s+[a-z]{2}(?:,|$))+)`, "mg"); static src_fn = ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P(?P[a-zA-Z0-9._-]+)[.](?Pss[tm]))$`); static src_fn_master = ctRegex!(`^(?P/?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]ssm)$`); static src_fn_text = ctRegex!(`^(?P/?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]sst)$`); static src_fn_insert = ctRegex!(`^(?P/?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]ssi)$`); static src_fn_find_inserts = ctRegex!(`^(?P/?(?:[a-zA-Z0-9._-]+/)*)(?P[a-zA-Z0-9._-]+[.]ss[im])$`); static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P[a-zA-Z0-9._-]+/)*(?P[a-zA-Z0-9._-]+[.]ss[ti])$`); static src_base_parent_dir_name = ctRegex!(`[/](?P(?:[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure static src_base_parent_path = ctRegex!(`(?P(?:[/a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure static src_formalised_file_path_parts = ctRegex!(`(?P(?:[/a-zA-Z0-9._-]+?)(?P[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure #+END_SRC ** inline markup *** inline breaks #+name: prgmkup_rgx #+BEGIN_SRC d /+ line breaks +/ static empty_line = ctRegex!(`^\s*$`); static empty_block = ctRegex!(`^\s*$`, "mg"); static br_line_natural = ctRegex!(`\n`, "mg"); static br_empty_line = ctRegex!(`\n[ ]*\n`, "mg"); static br_newlines_linebreaks = ctRegex!(`[\n┘┙]`, "mg"); static br_line = ctRegex!(`┘`, "mg"); static br_nl = ctRegex!(`┙`, "mg"); static br_paragraph = ctRegex!(`┚`, "mg"); static br_page_line = ctRegex!(`┼`, "mg"); static br_page = ctRegex!(`┿`, "mg"); static br_page_new = ctRegex!(`╂`, "mg"); #+END_SRC *** inline (internal program) markup footnotes endnotes :inline:footnote: #+name: prgmkup_rgx #+BEGIN_SRC d /+ inline markup footnotes endnotes +/ static inline_notes_al = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg"); static inline_notes_al_special = ctRegex!(`【(?:[*+]\s+)(.+?)】`, "mg"); // TODO remove match when special footnotes are implemented static inline_notes_al_gen = ctRegex!(`【.+?】`, "m"); static inline_notes_al_regular = ctRegex!(`【(.+?)】`, "mg"); static inline_notes_al_gen_text = ctRegex!(`【(?P.+?)】`, "m"); static inline_notes_al_gen_ref = ctRegex!(`【(?P[*+]\s+)\s*(?P.+?)】`, "mg"); static inline_notes_al_all_note = ctRegex!(`【(?P\d+|(?:[*]|[+])+)\s+(?P.+?)\s*】`, "mg"); static inline_notes_al_regular_number_note = ctRegex!(`【(?P\d+)\s+(?P.+?)\s*】`, "mg"); static inline_notes_al_special_char_note = ctRegex!(`【(?P(?:[*]|[+])+)\s+(?P.+?)】`, "mg"); static inline_al_delimiter_open_regular = ctRegex!(`【\s`, "m"); static inline_al_delimiter_open_symbol_star = ctRegex!(`【[*]\s`, "m"); static inline_al_delimiter_open_symbol_plus = ctRegex!(`【[+]\s`, "m"); static inline_al_delimiter_close_regular = ctRegex!(`】`, "m"); static inline_al_delimiter_open_and_close_regular = ctRegex!(`【|】`, "m"); static inline_al_delimiter_open_asterisk = ctRegex!(`【\*`, "m"); static inline_al_delimiter_open_plus = ctRegex!(`【\+`, "m"); static inline_text_and_note_al = ctRegex!(`(?P.+?)【(?:[*+ ]*)(?P.+?)】`, "mg"); static inline_text_and_note_al_ = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|$))`, "mg"); #+END_SRC *** inline links #+name: prgmkup_rgx #+BEGIN_SRC d /+ inline markup links +/ static inline_image = ctRegex!(`(?P
┥)☼(?P(?P[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P\d+)h(?P\d+))\s*(?P.*?┝┤.*?├)`, "mg");
static inline_image_without_dimensions                = ctRegex!(`(?P
┥)☼(?P(?P[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P0)h(?P0))\s*(?P.*?┝┤.*?├)`, "mg");
static inline_image_info                              = ctRegex!(`☼?(?P[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P\d+)h(?P\d+)`, "mg");
static inline_link_anchor                             = ctRegex!(`┃(?P\S+?)┃`, "mg"); // TODO *~text_link_anchor
static inline_link_                                   = ctRegex!(`┥(?P.+?)┝┤(?P.+?)├`, "mg");
static inline_link                                    = ctRegex!(`┥(?P.+?)┝┤(?P#?(\S+?))├`, "mg");
static inline_link_empty                              = ctRegex!(`┥(?P.+?)┝┤├`, "mg");
static inline_link_number                             = ctRegex!(`┥(?P.+?)┝┤(?P[0-9]+)├`, "mg"); // not used
static inline_link_number_only                        = ctRegex!(`(┥.+?┝)┤(?P[0-9]+)├`, "mg");
static inline_link_stow_uri                           = ctRegex!(`┥(?P.+?)┝┤(?P[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
static inline_link_hash                               = ctRegex!(`┥(?P.+?)┝┤(?P#(?P\S+?))├`, "mg");
static inline_link_clean                              = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
static inline_link_toc_to_backmatter                  = ctRegex!(`┤#(?Pendnotes|bibliography|bookindex|glossary|blurb)├`, "mg");
static inline_a_url                                   = ctRegex!(`(┤)([^\s┥┝┤├]+)(├)`, "mg");
static url                                            = ctRegex!(`https?://`, "mg");
static uri                                            = ctRegex!(`(?:https?|git)://`, "mg");
static uri_identify_components                        = ctRegex!(`(?P(?:https?|git)://)(?P\S+?/)(?P[^/]+)$`, "mg");
static inline_link_subtoc                             = ctRegex!(`^(?P[5-7])~ ┥(?P.+?)┝┤(?P.+?)├`, "mg");
static fn_suffix                                      = ctRegex!(`\.fnSuffix`, "mg");
static inline_link_fn_suffix                          = ctRegex!(`¤(.+?)(\.fnSuffix)`, "mg");
static inline_seg_link                                = ctRegex!(`(¤)(?:.+?)\.fnSuffix`, "mg");
static mark_internal_site_lnk                         = ctRegex!(`¤`, "mg");
static quotation_mark_sql_insert_delimiter            = ctRegex!("[']", "mg");
static quotation_mark_various                         = ctRegex!(q"┃['‘’“”"`´¨]┃", "mg");
#+END_SRC

*** inline markup font face mod                          :inline:font:face:

#+name: prgmkup_rgx
#+BEGIN_SRC d
/+ inline markup font face mod +/
static inline_mark_faces                            = ctRegex!(`(?P(?P[*!/_^,+#"-])\{(?P.+?)\}[*!/_^,+#"-])`, "mg");
static inline_mark_faces_to_mod                     = ctRegex!(`(?P[*!/_^,+#"-])\{(?P.+?)\}([*!/_^,+#"-])`, "mg");
static inline_mark_emphasis                         = ctRegex!(`(?P[*])\{(?P.+?)\}[*]`, "mg");
static inline_mark_bold                             = ctRegex!(`(?P[!])\{(?P.+?)\}[!]`, "mg");
static inline_mark_underscore                       = ctRegex!(`(?P[_])\{(?P.+?)\}[_]`, "mg");
static inline_mark_italics                          = ctRegex!(`(?P[/])\{(?P.+?)\}[/]`, "mg");
static inline_mark_superscript                      = ctRegex!(`(?P\^)\{(?P.+?)\}\^`, "mg");
static inline_mark_subscript                        = ctRegex!(`(?P[,])\{(?P.+?)\}[,]`, "mg");
static inline_mark_strike                           = ctRegex!(`(?P[-])\{(?P.+?)\}[-]`, "mg");
static inline_mark_insert                           = ctRegex!(`(?P[+])\{(?P.+?)\}[+]`, "mg");
static inline_mark_mono                             = ctRegex!(`(?P[#])\{(?P.+?)\}[#]`, "mg");
static inline_mark_cite                             = ctRegex!(`(?P["])\{(?P.+?)\}["]`, "mg");
static inline_mark_fontface_clean                   = ctRegex!(`[*!_/^,+#■"-]\{|\}[*!_/^,+#■"-]`, "mg");
#+END_SRC

#+name: prgmkup_rgx
#+BEGIN_SRC d
static inline_faces_line                              = ctRegex!(`^[*!/_]_ (?P.+?)((?: [\\]{2}|[~]#){0,2}$)`);
static inline_emphasis_line                           = ctRegex!(`^\*_ (?P.+?)(?P(?: [\\]{2}|[~]#){0,2}$)`);
static inline_bold_line                               = ctRegex!(`^!_ (?P.+?)(?P(?: [\\]{2}|[~]#){0,2}$)`);
static inline_italics_line                            = ctRegex!(`^/_ (?P.+?)(?P(?: [\\]{2}|[~]#){0,2}$)`);
static inline_underscore_line                         = ctRegex!(`^__ (?P.+?)(?P(?: [\\]{2}|[~]#){0,2}$)`);
static no_header_rgx                                  = ctRegex!(`^=NULL$`);
#+END_SRC

#+name: prgmkup_rgx
#+BEGIN_SRC d
/+ inline markup font face mod +/
static inline_faces                                   = ctRegex!(`(?P(?P[*!_^,+■‖-])┨(?P.+?)┣[*!_^,+■‖-])`, "mg");
static inline_emphasis                                = ctRegex!(`[*]┨(?P.+?)┣[*]`, "mg");
static inline_bold                                    = ctRegex!(`[!]┨(?P.+?)┣[!]`, "mg");
static inline_underscore                              = ctRegex!(`[_]┨(?P.+?)┣[_]`, "mg");
static inline_italics                                 = ctRegex!(`[/]┨(?P.+?)┣[/]`, "mg");
static inline_superscript                             = ctRegex!(`\^┨(?P.+?)┣\^`, "mg");
// static inline_superscript                             = ctRegex!(`[\^]┨(?P.+?)┣[\^]`, "mg");
static inline_subscript                               = ctRegex!(`[,]┨(?P.+?)┣[,]`, "mg");
static inline_strike                                  = ctRegex!(`[-]┨(?P.+?)┣[-]`, "mg");
static inline_insert                                  = ctRegex!(`[+]┨(?P.+?)┣[+]`, "mg");
static inline_mono                                    = ctRegex!(`[■]┨(?P.+?)┣[■]`, "mg");
static inline_cite                                    = ctRegex!(`[‖]┨(?P.+?)┣[‖]`, "mg");
static inline_fontface_clean                          = ctRegex!(`[*!_/^,+■‖-]┨|┣[*!_/^,+■‖-]`, "mg");
#+END_SRC

*** table related

#+name: prgmkup_rgx
#+BEGIN_SRC d
/+ table delimiters +/
static table_delimiter_col                           = ctRegex!("[ ]*[┊][ ]*", "mg");
static table_delimiter_row                           = ctRegex!("[ ]*\n", "mg");
#+END_SRC

* __END__