diff options
Diffstat (limited to 'org/default_regex.org')
| -rw-r--r-- | org/default_regex.org | 116 | 
1 files changed, 78 insertions, 38 deletions
| diff --git a/org/default_regex.org b/org/default_regex.org index d9020d4..d78409e 100644 --- a/org/default_regex.org +++ b/org/default_regex.org @@ -7,6 +7,7 @@  #+COPYRIGHT:   Copyright (C) 2015 - 2020 Ralph Amissah  #+LANGUAGE:    en  #+STARTUP:     content hideblocks hidestars noindent entitiespretty +#+OPTIONS:     H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t  #+PROPERTY:    header-args  :exports code  #+PROPERTY:    header-args+ :noweb yes  #+PROPERTY:    header-args+ :eval no @@ -37,15 +38,45 @@ http://dlang.org/phobos/std_regex.html  module doc_reform.meta.rgx;  static template spineRgxIn() {    static struct RgxI { -    <<meta_rgx>> -    <<prgmkup_rgx>> +    <<meta_rgx_0>> +    <<meta_rgx_1>> +    <<meta_rgx_2>> +    <<meta_rgx_3>> +    <<meta_rgx_4>> +    <<meta_rgx_5>> +    <<meta_rgx_6>> +    <<meta_rgx_7>> +    <<meta_rgx_8>> +    <<meta_rgx_9>> +    <<meta_rgx_10>> +    <<meta_rgx_11>> +    <<meta_rgx_12>> +    <<meta_rgx_13>> +    <<meta_rgx_14>> +    <<meta_rgx_15>> +    <<meta_rgx_16>> +    <<meta_rgx_17>> +    <<meta_rgx_18>> +    <<meta_rgx_19>> +    <<meta_rgx_20>> +    <<meta_rgx_21>> +    <<meta_rgx_22>> +    <<prgmkup_rgx_0>> +    <<prgmkup_rgx_1>> +    <<prgmkup_rgx_2>> +    <<prgmkup_rgx_3>> +    <<prgmkup_rgx_4>> +    <<prgmkup_rgx_5>> +    <<prgmkup_rgx_6>> +    <<prgmkup_rgx_7>> +    <<prgmkup_rgx_8>>    }  }  #+END_SRC  ** misc                                                                :misc: -#+NAME: meta_rgx +#+NAME: meta_rgx_0  #+BEGIN_SRC d  /+ misc +/  // static true_dollar                                    = ctRegex!(`\$`, "gm"); @@ -84,7 +115,7 @@ static numeric_col                                    = ctRegex!(`^[ 0-9,.%$£  ** comments                                                         :comment: -#+NAME: meta_rgx +#+NAME: meta_rgx_1  #+BEGIN_SRC d  /+ comments +/  static comment                                        = ctRegex!(`^%+ `); @@ -92,7 +123,7 @@ static comment                                        = ctRegex!(`^%+ `);  ** config -#+NAME: meta_rgx +#+NAME: meta_rgx_2  #+BEGIN_SRC d  /+ header +/  #+END_SRC @@ -100,7 +131,7 @@ static comment                                        = ctRegex!(`^%+ `);  ** native headers  *** native header                                             :native:header: -#+NAME: meta_rgx +#+NAME: meta_rgx_3  #+BEGIN_SRC d  /+ header +/  static variable_doc_title                             = ctRegex!(`@title`); @@ -112,7 +143,7 @@ static yaml_config                                    = ctRegex!(`^[a-z]+\s*:\s*  ** heading & paragraph operators                         :paragraph:operator: -#+NAME: meta_rgx +#+NAME: meta_rgx_4  #+BEGIN_SRC d  /+ heading & paragraph operators +/  static heading_a                                      = ctRegex!(`^:?[A][~] `, "m"); @@ -141,7 +172,7 @@ static para_inline_link_anchor                        = ctRegex!(`\*[~](?P<ancho  ** blocked markup  *** blocked markup curly & tic                                        :block: -#+NAME: meta_rgx +#+NAME: meta_rgx_5  #+BEGIN_SRC d  /+ blocked markup +/  static block_open                                     = ctRegex!("^((code(?:[.][a-z][0-9a-z#+_]+)?|(?:poem|group|block|quote)(?:[.][a-z][0-9a-z_]+)?|table)(?:[(][ a-zA-Z0-9;:,]*[)])?[{][ ]*$)|^`{3} (code(?:[.][a-z][0-9a-z#+_]+)?|(?:poem|group|block|quote)(?:[.][a-z][0-9a-z_]+)?|table)(?:[(][ a-zA-Z0-9;:,]*[)])?|^[{]table[(](?:h;)?(?P<columns>(?:[ ,]+[0-9]+)+)[)][}]"); @@ -150,7 +181,7 @@ static block_poem_open                                = ctRegex!("^((poem(?:[(][  *** blocked markup tic                                            :block:tic: -#+NAME: meta_rgx +#+NAME: meta_rgx_6  #+BEGIN_SRC d  /+ blocked markup tics +/  static block_tic_code_open                            = ctRegex!("^`{3} code(?:[.](?P<syntax>[a-z][0-9a-z#+_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?"); @@ -164,7 +195,7 @@ static block_tic_close                                = ctRegex!("^(`{3})$","m")  *** blocked markup curly                                        :block:curly: -#+NAME: meta_rgx +#+NAME: meta_rgx_7  #+BEGIN_SRC d  /+ blocked markup curly +/  static block_curly_code_open                          = ctRegex!(`^(?:code(?:[.](?P<syntax>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`); @@ -185,14 +216,14 @@ static block_curly_table_special_markup               = ctRegex!(`^[{]table[(](?  *** block sub-matches                                                 :block:  **** code -#+NAME: meta_rgx +#+NAME: meta_rgx_8  #+BEGIN_SRC d  static code_numbering                                 = ctRegex!(`(?P<number>\blinenumber\b|\bnumber\b|\blnr\b)`);  #+END_SRC  **** table -#+NAME: meta_rgx +#+NAME: meta_rgx_9  #+BEGIN_SRC d  static table_head_instructions                        = ctRegex!(`(?:(?P<c_heading>h);)?(?:[ ]+c(?P<c_num>[0-9]):)?(?P<c_widths>(?:[, ]+[0-9]+[lr]?)+)`);  static table_col_widths_and_alignment                 = ctRegex!(`(?P<width>[0-9]+)(?P<align>[lr]?)`); @@ -206,7 +237,7 @@ static table_col_separator_nl                         = ctRegex!(`[┊]$`, "mg")  ** inline markup                                            :inline:footnote:  *** footnotes & endnotes -#+NAME: meta_rgx +#+NAME: meta_rgx_10  #+BEGIN_SRC d  /+ inline markup footnotes endnotes +/  static inline_notes_curly_gen                         = ctRegex!(`~\{.+?\}~`, "m"); @@ -224,7 +255,7 @@ static note_ref                                       = ctRegex!(`^\S+?noteref_(  *** links/ urls                                             :inline:footnote: -#+NAME: meta_rgx +#+NAME: meta_rgx_11  #+BEGIN_SRC d  static webserv_url_doc_root                           = ctRegex!(`(?P<url>(?P<domain>https?:\/\/[^ /]+)\/(?P<path>\S*))`, "mg");  static smid_inline_url_generic                        = ctRegex!(`(?:^|[}(\[ ])(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_#]`, "mg"); @@ -237,7 +268,7 @@ static smid_inline_link_endnote_url_helper            = ctRegex!(`\{~\^\s+(?P<co  *** images                                                           :images: -#+NAME: meta_rgx +#+NAME: meta_rgx_12  #+BEGIN_SRC d  static image                                           = ctRegex!(`([a-zA-Z0-9._-]+?\.(?:png|gif|jpg))`, "mg");  static smid_image                                      = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[{┥](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))(?P<post>(?:.*?)\s*[}┝](?:image|┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg"); @@ -250,7 +281,7 @@ static smid_image_delimit                              = ctRegex!(`(?P<pre>^|[ ]  *** inline markup book index                               :inline:bookindex: -#+NAME: meta_rgx +#+NAME: meta_rgx_13  #+BEGIN_SRC d  /+ inline markup book index +/  static book_index_item                                = ctRegex!(`^=\{\s*(?P<bookindex>.+?)\}$`, "m"); @@ -261,7 +292,7 @@ static book_index_item_close                          = ctRegex!(`^(.*?)\}$`, "m  ** switch  *** switch off auto-heading number -#+NAME: meta_rgx +#+NAME: meta_rgx_14  #+BEGIN_SRC d  static auto_heading_numbering_lv1                    = ctRegex!(`^1~`, "m");  static auto_heading_numbering_lv2                    = ctRegex!(`^2~`, "m"); @@ -276,7 +307,7 @@ static auto_heading_numbering_off_lv4                = ctRegex!(`^4~\S*?-\s`, "m  ** no object_number object                                    :ocn:off:object: -#+NAME: meta_rgx +#+NAME: meta_rgx_15  #+BEGIN_SRC d  /+ no object_number object +/  static object_number_off                            = ctRegex!(`~#[ ]*$`, "m"); @@ -287,7 +318,7 @@ static repeated_character_line_separator            = ctRegex!(`^(?:[ ]*(?:(?:[.  ** no object_number block                                      :ocn:off:block: -#+NAME: meta_rgx +#+NAME: meta_rgx_16  #+BEGIN_SRC d  /+ no object_number block +/  static object_number_off_block                      = ctRegex!(`^--~#$`); @@ -298,7 +329,7 @@ static object_number_block_marks                    = ctRegex!(`^--[+~-]#$`);  ** ignore outside code blocks                                    :block:code: -#+NAME: meta_rgx +#+NAME: meta_rgx_17  #+BEGIN_SRC d  /+ ignore outside code blocks +/  static skip_from_regular_parse    = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`); @@ -306,7 +337,7 @@ static skip_from_regular_parse    = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`  ** line & page breaks                                                 :break: -#+NAME: meta_rgx +#+NAME: meta_rgx_18  #+BEGIN_SRC d  /+ line & page breaks +/  static break_string                                   = ctRegex!(`』`); @@ -314,7 +345,7 @@ static break_string                                   = ctRegex!(`』`);  ** biblio tags                                                  :biblio:tags: -#+NAME: meta_rgx +#+NAME: meta_rgx_19  #+BEGIN_SRC d  /+ biblio tags +/  static biblio_tags                                    = ctRegex!(`^(is|au|author_raw|author|author_arr|editor_raw|ed|editor_arr|ti|title|subtitle|fulltitle|lng|language|trans|src|jo|journal|in|vol|volume|edn|edition|yr|year|pl|place|pb|pub|publisher|url|pg|pages|note|short_name|id):\s+(.+)`); @@ -323,7 +354,7 @@ static biblio_abbreviations                           = ctRegex!(`^(au|ed|ti|lng  ** bookindex split                                          :bookindex:split: -#+NAME: meta_rgx +#+NAME: meta_rgx_20  #+BEGIN_SRC d  /+ bookindex split +/  static bi_main_terms_split                            = ctRegex!(`\s*;\s*`); @@ -334,7 +365,7 @@ static bi_term_and_object_numbers_match               = ctRegex!(`^(.+?)\+(\d+)`  ** topic register split (document classify) -#+NAME: meta_rgx +#+NAME: meta_rgx_21  #+BEGIN_SRC d  static topic_register_main_terms_split                = ctRegex!(`\s*;\s*`);  static topic_register_main_term_plus_rest_split       = ctRegex!(`\s*:\s*`); @@ -344,7 +375,7 @@ static topic_register_multiple_sub_terms_split        = ctRegex!(`␣([^|␣]+(?  ** language codes                                            :language:codes: -#+NAME: meta_rgx +#+NAME: meta_rgx_22  #+BEGIN_SRC d  /+ language codes +/  auto language_code_and_filename                                    = @@ -367,8 +398,17 @@ http://dlang.org/phobos/std_regex.html  module doc_reform.io_out.rgx;  static template spineRgxOut() {    static struct RgxO { -    <<prgmkup_rgx>> -    <<sp_ch_xhtml_rgx>> +    <<prgmkup_rgx_0>> +    <<prgmkup_rgx_1>> +    <<prgmkup_rgx_2>> +    <<prgmkup_rgx_3>> +    <<prgmkup_rgx_4>> +    <<prgmkup_rgx_5>> +    <<prgmkup_rgx_6>> +    <<prgmkup_rgx_7>> +    <<prgmkup_rgx_8>> +    <<sp_ch_xhtml_rgx_0>> +    <<sp_ch_xhtml_rgx_1>>    }  }  #+END_SRC @@ -376,7 +416,7 @@ static template spineRgxOut() {  ** special characters  *** xhtml special characters -#+NAME: sp_ch_xhtml_rgx +#+NAME: sp_ch_xhtml_rgx_0  #+BEGIN_SRC d  static xhtml_ampersand                            = ctRegex!(`[&]`, "m");      // &  static xhtml_quotation                            = ctRegex!(`["]`, "m");      // " @@ -387,7 +427,7 @@ static xhtml_line_break                           = ctRegex!(` [\\]{2}`, "m"); /  *** latex special characters -#+NAME: sp_ch_xhtml_rgx +#+NAME: sp_ch_xhtml_rgx_1  #+BEGIN_SRC d  static latex_special_char                         = ctRegex!(`([%${}_#&\\])`);  static latex_special_char_for_escape              = ctRegex!(`([%${}_#\\])`); @@ -404,7 +444,7 @@ static latex_clean_bookindex_linebreak            = ctRegex!(`\s*\\\\\\\\\s*`, "  * 2. ctRegex defaults shared by meta & output (generic)  ** misc generic -#+NAME: prgmkup_rgx +#+NAME: prgmkup_rgx_0  #+BEGIN_SRC d  static newline                                        = ctRegex!("\n", "mg");  static space                                          = ctRegex!(`[ ]`, "mg"); @@ -415,7 +455,7 @@ static nbsp_char                                      = ctRegex!(`░`, "mg");  ** filename (& path) (including insert file)      :insert:file:path:filename: -#+NAME: prgmkup_rgx +#+NAME: prgmkup_rgx_1  #+BEGIN_SRC d  static src_pth_sst_or_ssm                             = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.](?P<extension>ss[tm]))$`);  static src_pth_pod_sst_or_ssm                         = ctRegex!(`^(?P<podpath>[/]?(?:[a-zA-Z0-9._-]+/)*)media/text/[a-z]{2}/(?P<filename>[a-zA-Z0-9][a-zA-Z0-9._-]*?[.]ss[tm])$`); @@ -436,7 +476,7 @@ static src_formalised_file_path_parts                 = ctRegex!(`(?P<pth>(?:[/a  *** inline breaks -#+NAME: prgmkup_rgx +#+NAME: prgmkup_rgx_2  #+BEGIN_SRC d  /+ line breaks +/  static br_empty_line                                  = ctRegex!(`\n[ ]*\n`, "mg"); @@ -447,7 +487,7 @@ static br_nl                                          = ctRegex!(`┙`, "mg");  *** inline (internal program) markup footnotes endnotes     :inline:footnote: -#+NAME: prgmkup_rgx +#+NAME: prgmkup_rgx_3  #+BEGIN_SRC d  /+ inline markup footnotes endnotes +/  static inline_notes_al                                = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg"); @@ -467,7 +507,7 @@ static inline_text_and_note_al_                       = ctRegex!(`(.+?(?:【[*+]  *** inline links -#+NAME: prgmkup_rgx +#+NAME: prgmkup_rgx_4  #+BEGIN_SRC d  /+ inline markup links +/  static inline_image                                   = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+))\s*(?P<post>.*?┝┤.*?├)`, "mg"); @@ -495,7 +535,7 @@ static quotation_mark_sql_insert_delimiter            = ctRegex!("[']", "mg");  *** inline markup font face mod                            :inline:font:face: -#+NAME: prgmkup_rgx +#+NAME: prgmkup_rgx_5  #+BEGIN_SRC d  /+ inline markup font face mod +/  static inline_mark_emphasis                         = ctRegex!(`(?P<mark>[*])\{(?P<text>.+?)\}[*]`, "mg"); @@ -510,7 +550,7 @@ static inline_mark_mono                             = ctRegex!(`(?P<mark>[#])\{(  static inline_mark_cite                             = ctRegex!(`(?P<mark>["])\{(?P<text>.+?)\}["]`, "mg");  #+END_SRC -#+NAME: prgmkup_rgx +#+NAME: prgmkup_rgx_6  #+BEGIN_SRC d  static inline_faces_line                              = ctRegex!(`^[*!/_]_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`);  static inline_emphasis_line                           = ctRegex!(`^\*_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`); @@ -519,7 +559,7 @@ static inline_italics_line                            = ctRegex!(`^/_ (?P<text>.  static inline_underscore_line                         = ctRegex!(`^__ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);  #+END_SRC -#+NAME: prgmkup_rgx +#+NAME: prgmkup_rgx_7  #+BEGIN_SRC d  /+ inline markup font face mod +/  static inline_emphasis                                = ctRegex!(`[*]┨(?P<text>.+?)┣[*]`, "mg"); @@ -538,7 +578,7 @@ static inline_fontface_clean                          = ctRegex!(`[*!_/^,+■‖  *** table related -#+NAME: prgmkup_rgx +#+NAME: prgmkup_rgx_8  #+BEGIN_SRC d  /+ table delimiters +/  static table_delimiter_col                           = ctRegex!("[ ]*[┊][ ]*", "mg"); | 
