1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
|
/++
regex: regular expressions used in sisu document parser
+/
module doc_reform.meta.rgx;
static template DocReformRgxInit() {
import doc_reform.meta.defaults;
static struct Rgx {
/+ misc +/
static true_dollar = ctRegex!(`\$`, "gm");
static flag_action = ctRegex!(`^(--[a-z][a-z0-9-]+)$`);
static flag_action_str = ctRegex!(` (--[a-z][a-z0-9-]+)`);
static within_quotes = ctRegex!(`"(.+?)"`, "m");
static make_heading_delimiter = ctRegex!(`[;][ ]*`);
static arr_delimiter = ctRegex!(`[ ]*[;][ ]*`);
static name_delimiter = ctRegex!(`^([^,]+)[ ]*,[ ]+(.+?)$`);
static book_index_go = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?)");
static book_index_go_scroll = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?)");
static book_index_go_seg = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?):(?P<seg>[a-z0-9_-]+)");
static book_index_go_seg_ = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?)(:(?P<seg>[a-z0-9_-]+))?");
static book_index_go_seg_anchorless = ctRegex!("(?P<link>(?P<ocn>[0-9]+)(?:-[0-9]+)?)");
static trailing_comma = ctRegex!(",[ ]*$");
static trailing_linebreak = ctRegex!(",[ ]{1,2}\\\\\\\\\n[ ]{4}$","m");
static newline_eol_delimiter = ctRegex!("\n");
static newline_eol_strip_preceding = ctRegex!("[ ]*\n");
static newline_eol_delimiter_only = ctRegex!("^\n");
static line_delimiter_ws_strip = ctRegex!("[ ]*\n[ ]*");
static para_delimiter = ctRegex!("\n[ ]*\n+");
static table_col_delimiter = ctRegex!("[ ]*\n+", "mg");
static table_row_delimiter = ctRegex!("\n[ ]*\n+", "mg");
static table_row_delimiter_special = ctRegex!("[ ]*\n", "mg");
static table_col_delimiter_special = ctRegex!("[ ]*[|][ ]*", "mg");
static levels_markup = ctRegex!(`^[A-D1-4]$`);
static levels_numbered = ctRegex!(`^[0-9]$`);
static levels_numbered_headings = ctRegex!(`^[0-7]$`);
static numeric = ctRegex!(`[ 0-9,.-]+`);
static numeric_col = ctRegex!(`^[ 0-9,.%$£₤Є€€¥()-]+$`);
/+ comments +/
static comment = ctRegex!(`^%+ `);
static comments = ctRegex!(`^%+ |^%+$`);
/+ header +/
static make_simple_substitutions_rb = ctRegex!(`(?P<substitution>/(?P<match>.+?)/,[ ]*['"](?P<replace>.+?)['"])`);
static make_simple_substitutions_d = ctRegex!(`(?P<substitution>` ~ '`' ~ `(?P<match>.+?)` ~ '`' ~ `,[ ]*['"](?P<replace>.+?)['"])`);
/+ header +/
static main_headers =
ctRegex!(`^(?:creator|title|rights|date|original|classify|identifier|notes|publisher|make|links)$`, "m");
static native_header = ctRegex!(`^@([a-z_]+):(?:\s|$)`);
static native_header_make = ctRegex!(`^@(make):(?:\s|$)`);
static native_header_meta =
ctRegex!(`^@(?:creator|title|rights|date|original|classify|identifier|notes|publisher|links):(?:\s|$)`);
static native_header_main = ctRegex!(`^@(?P<header>[a-z_]+):\s*(?P<content>.*)`, "m");
static native_header_sub = ctRegex!(`^[ ]*:(?P<subheader>[a-z_]+):\s+(?P<content>.+)`, "m");
static native_header_meta_title = ctRegex!(`^@title:\s`, "m");
static variable_doc_title = ctRegex!(`@title`);
static variable_doc_author = ctRegex!(`@author|@creator`);
static raw_author_munge = ctRegex!(`(\S.+?),\s+(.+)`,"i");
static toml_header_meta_title = ctRegex!(`^\s*(title\s*=\s*"|\[title\])`, "m");
/+ head +/
static native_subhead_creator = ctRegex!(`^(?:author|translator|illustrator)$`, "m");
static native_subhead_title = ctRegex!(`^(?:main|sub(?:title)?|full|language|edition|note)$`, "m");
static native_subhead_rights = ctRegex!(`^(?:copyright|illustrations|license|cover)$`, "m");
static native_subhead_date = ctRegex!(`^(?:published|created|issued|available|valid|modified|added_to_site)$`, "m");
static native_subhead_original = ctRegex!(`^(?:title|language|source)$`, "m");
static native_subhead_classify = ctRegex!(`^(?:topic_register|subject|keywords|loc|dewey)$`, "m");
static native_subhead_identifier = ctRegex!(`^(?:oclc|pg|isbn)$`, "m");
static native_subhead_notes = ctRegex!(`^(?:abstract|description)$`, "m");
static native_subhead_publisher = ctRegex!(`^(?:name)$`, "m");
static native_subhead_make = ctRegex!(`^(?:cover_image|home_button_image|home_button_text|footer|headings|num_top|auto_num_depth|breaks|substitute|bold|italics|emphasis|texpdf_font|css)$`, "m");
/+ heading & paragraph operators +/
static heading_a = ctRegex!(`^:?[A][~] `, "m");
static heading = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+`,"i");
static heading_seg_and_above = ctRegex!(`^:?([A-D1])[~]([a-z0-9_.-]*[?]?)\s+`,"i");
static heading_marker = ctRegex!(`^:?([A-D1-4])[~]`);
static heading_anchor_tag = ctRegex!(`^:?[A-D1-4][~](?P<anchor>[a-z0-9_.-]+) `,"i");
static heading_identify_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+(?:(?:(?:chapter|article|section|clause)\s+[0-9.]+)|(?:[0-9]+))`,"i");
static heading_extract_named_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+(chapter|article|section|clause)\s+((?:[0-9]+[.:])*[0-9]+)(?=[.:;, ]|$)`,"i");
static heading_extract_unnamed_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+((?:[0-9]+.)*[0-9]+)(?=[.:;, ]|$)`);
static heading_marker_missing_tag = ctRegex!(`^:?([A-D1-4])[~] `);
static heading_anchor_tag_plus_colon = ctRegex!(`^:?([A-D1-4][~])([a-z0-9_.:-]+) `,"i");
static heading_marker_tag_has_colon = ctRegex!(`([:])`);
static heading_title = ctRegex!(`^:?[A-D1-4][~][a-z0-9_.-]*[?]?\s+(.+?)$`);
static heading_all = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+(.+?)$`);
static heading_backmatter = ctRegex!(`^:?1[~][!](glossary|bibliography|biblio|blurb)\s+`,"i");
static heading_biblio = ctRegex!(`^:?(1)[~][!](biblio(?:graphy)?|references?)`);
static heading_glossary = ctRegex!(`^:?(1)[~][!](glossary)`);
static heading_blurb = ctRegex!(`^:?(1)[~][!](blurb)`);
static heading_biblio_glossary = ctRegex!(`^:?(?:(1)[~][!](?:(?:biblio(?:graphy)?|references?)|glossary)|[A-D1][~])`);
static heading_biblio_blurb = ctRegex!(`^:?(?:(1)[~][!](?:(?:biblio(?:graphy)?|references?)|blurb)|[A-D1][~])`);
static heading_blurb_glossary = ctRegex!(`^:?(?:(1)[~][!](?:blurb|glossary)|[A-D1][~])`);
static para_bullet = ctRegex!(`^_[*] `);
static para_bullet_indent = ctRegex!(`^_([1-9])[*] `);
static para_indent = ctRegex!(`^_(?P<indent>[1-9])[ ]`);
static para_indent_hang = ctRegex!(`^_(?P<hang>[0-9])_(?P<indent>[0-9])[ ]`);
static para_attribs = ctRegex!(`^_(?:(?:[0-9])(?:_([0-9]))?|(?:[1-9])?[*]) `);
static para_inline_link_anchor = ctRegex!(`\*[~](?P<anchor>[a-z0-9_.-]+)(?= |$)`,"i");
/+ blocked markup +/
static block_open = ctRegex!("^((code(?:[.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)(?:[(][ a-zA-Z0-9;:,]*[)])?[{][ ]*$)|^`{3} (code(?:[.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)(?:[(][ a-zA-Z0-9;:,]*[)])?|^[{]table[(](?:h;)?(?P<columns>(?:[ ,]+[0-9]+)+)[)][}]");
static block_poem_open = ctRegex!("^((poem(?:[(][ a-zA-Z0-9;:,]*[)])?[{][ ]*$)|`{3} poem(?:[(][ a-zA-Z0-9;:,]*[)])?)");
/+ blocked markup tics +/
static block_tic_open = ctRegex!("^`{3} (code(?:[.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)");
static block_tic_code_open = ctRegex!("^`{3} (?:code)(?:[.](?P<syntax>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?");
static block_tic_poem_open = ctRegex!("^`{3} (poem)(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?");
static block_tic_group_open = ctRegex!("^`{3} (group)(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?");
static block_tic_block_open = ctRegex!("^`{3} (block)(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?");
static block_tic_quote_open = ctRegex!("^`{3} (quote)(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?");
static block_tic_table_open = ctRegex!("^`{3} table(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?"); // ctRegex!("^`{3} table(?:\(.*?\))?");
static block_tic_close = ctRegex!("^(`{3})$","m");
/+ blocked markup curly +/
static block_curly_open = ctRegex!(`^((?:code([.][a-z][0-9a-z_]+)?|poem|group|block|quote|table)(?:[(][ a-zA-Z0-9;:,]*[)])?[{][ ]*$)`);
static block_curly_code_open = ctRegex!(`^(?:code(?:[.](?P<syntax>[a-z][0-9a-z_]+))?(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`);
static block_curly_code_close = ctRegex!(`^([}]code)`);
static block_curly_poem_open = ctRegex!(`^(poem(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`);
static block_curly_poem_close = ctRegex!(`^([}]poem)`);
static block_curly_group_open = ctRegex!(`^(group(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`);
static block_curly_group_close = ctRegex!(`^([}]group)`);
static block_curly_block_open = ctRegex!(`^(block(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`);
static block_curly_block_close = ctRegex!(`^([}]block)`);
static block_curly_quote_open = ctRegex!(`^(quote(?:[(](?P<attrib>[ a-zA-Z0-9;:,]*)[)])?[{][ ]*$)`);
static block_curly_quote_close = ctRegex!(`^([}]quote)`);
static block_curly_table_open = ctRegex!(`^table(?:[(]([?P<attrib> a-zA-Z0-9;:,]*)[)])?[{][ ]*$`);
static block_curly_table_close = ctRegex!(`^([}]table)`);
static block_curly_table_special_markup = ctRegex!(`^[{]table[(](?P<attrib>(?:(h);)?(?P<columns>(?:[, ]+[0-9]+)+))[)][}]`, "mg");
static code_numbering = ctRegex!(`(?P<number>\bnumber\b)`);
static table_head_instructions = ctRegex!(`(?:(?P<c_heading>h);)?(?:[ ]+c(?P<c_num>[0-9]):)?(?P<c_widths>(?:[, ]+[0-9]+[lr]?)+)`);
static table_col_widths_and_alignment = ctRegex!(`(?P<width>[0-9]+)(?P<align>[lr]?)`);
static table_col_widths = ctRegex!(`(?P<widths>[0-9]+)`);
static table_col_align = ctRegex!(`(?P<align>[lr]?)`);
static table_col_align_match = ctRegex!(`(?P<align>[lr])`);
static table_col_separator = ctRegex!(`┊`);
static table_col_separator_nl = ctRegex!(`[┊]$`, "mg");
/+ inline markup footnotes endnotes +/
static inline_notes_curly_gen = ctRegex!(`~\{.+?\}~`, "m");
static inline_notes_curly = ctRegex!(`~\{\s*(.+?)\}~`, "mg");
static inline_curly_delimiter_open_and_close_regular = ctRegex!(`~\{\s*|\s*\}~`, "m");
static inline_notes_delimiter_curly_regular = ctRegex!(`~\{[ ]*(.+?)\}~`, "m");
static inline_notes_curly_sp = ctRegex!(`~\{[*+]+\s+(.+?)\}~`, "m");
static inline_notes_curly_sp_asterisk = ctRegex!(`~\{[*]+\s+(.+?)\}~`, "m");
static inline_notes_curly_sp_plus = ctRegex!(`~\{[+]+\s+(.+?)\}~`, "m");
static inline_note_curly_delimiters = ctRegex!(`(~\{[*+]?\s*)(.+?)(\}~)`, "mg");
static inline_notes_square = ctRegex!(`~\[\s*(.+?)\]~`, "mg");
static inline_text_and_note_square_sp = ctRegex!(`(.+?)~\[[*+]+\s+(.+?)\]~`, "mg");
static inline_text_and_note_square = ctRegex!(`(.+?)~\[\s*(.+?)\]~`, "mg");
static inline_note_square_delimiters = ctRegex!(`(~\[\s*)(.+?)(\]~)`, "mg");
static inline_curly_delimiter_open_regular = ctRegex!(`~\{\s*`, "m");
static inline_curly_delimiter_open_symbol_star = ctRegex!(`~\{[*]\s`, "m");
static inline_curly_delimiter_open_symbol_plus = ctRegex!(`~\{[+]\s`, "m");
static inline_curly_delimiter_open_star_or_plus = ctRegex!(`~\{[+*]`, "m");
static inline_curly_delimiter_close_regular = ctRegex!(`\s*\}~`, "m");
static inline_text_and_note_curly = ctRegex!(`(?P<text>.+?)(?:(?:[~])[{][*+ ]*)(?P<note>.+?)(?:[}][~])`, "mg");
static note_ref = ctRegex!(`^\S+?noteref_([0-9]+)`, "mg"); // {^{73.}^}#noteref_73
static webserv_url_doc_root = ctRegex!(`(?P<url>(?P<domain>https?:\/\/[^ /]+)\/(?P<path>\S*))`, "mg");
static smid_inline_url_generic = ctRegex!(`(?:^|[}(\[ ])(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_#]`, "mg");
static smid_inline_url = ctRegex!(`((?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)[a-zA-Z0-9_]\S*)`, "mg");
static smid_inline_link_naked_url = ctRegex!(`(?P<pre>^|[ (\[])(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤)\S+?)(?=[.,;:?!'"]?([ )\]]|$))`, "mg");
static smid_inline_link_markup_regular = ctRegex!(`(?P<pre>^|[ (\[])\{\s*(?P<content>.+?)\s*\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
static smid_inline_link_endnote_url_helper_punctuated = ctRegex!(`\{~\^\s+(?P<content>.+?)\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[.,;:?!]?([ ]|$))`, "mg");
static smid_inline_link_endnote_url_helper = ctRegex!(`\{~\^\s+(?P<content>.+?)\}(?P<link>(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+)`, "mg");
static image = ctRegex!(`([a-zA-Z0-9._-]+?\.(?:png|gif|jpg))`, "mg");
static smid_image = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[{┥](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))(?P<post>(?:.*?)\s*[}┝](?:image|┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
static smid_image_generic = ctRegex!(`(?:^|[ ]|[^\S]?)[{┥](?:~\^\s+|\s*)\S+\.(?:png|gif|jpg).*?[}┝](?:image|┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
static smid_image_with_dimensions = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[{┥](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))\s+(?P<width>\d+)x(?P<height>\d+)\s*(?P<post>(?:.*?)\s*[}┝](?:image|┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
static smid_mod_image_without_dimensions = ctRegex!(`[{┥](?:~\^\s+|\s*)☼\S+\.(?:png|gif|jpg),w0h0.*[}┝](?:image|┤.*?├|(?:https?|git):\/\/\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
static smid_a_image = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[{](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))(?P<post>(?:.*?)\s*[}](?:image|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
static smid_a_image_generic = ctRegex!(`(?:^|[ ]|[^\S]?)[{](?:~\^\s+|\s*)\S+\.(?:png|gif|jpg).*?[}](?:image|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
static smid_a_image_with_dimensions = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[{](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))\s+(?P<width>\d+)x(?P<height>\d+)\s*(?P<post>(?:.*?)\s*[}](?:image|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
static smid_a_mod_image_without_dimensions = ctRegex!(`[{](?:~\^\s+|\s*)☼\S+\.(?:png|gif|jpg),w0h0.*[}](?:image|(?:https?|git):\/\/\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
static smid_b_image = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[┥](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))(?P<post>(?:.*?)\s*[┝](?:┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
static smid_b_image_generic = ctRegex!(`(?:^|[ ]|[^\S]?)[┥](?:~\^\s+|\s*)\S+\.(?:png|gif|jpg).*?[┝](?:┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
static smid_b_image_with_dimensions = ctRegex!(`(?P<pre>(?:^|[ ]|[^\S]?)[┥](?:~\^\s+|\s*))(?P<image>[a-zA-Z0-9._-]+?\.(?:png|gif|jpg))\s+(?P<width>\d+)x(?P<height>\d+)\s*(?P<post>(?:.*?)\s*[┝](?:┤.*?├|(?:(?:https?|git):\/\/|¤?\.\.\/|¤?\.\/|¤|#)\S+?)(?=[;:!,?.]?([ )\]]|$)))`, "mg");
static smid_b_mod_image_without_dimensions = ctRegex!(`[┥](?:~\^\s+|\s*)☼\S+\.(?:png|gif|jpg),w0h0.*[┝](?:┤.*?├|(?:https?|git):\/\/\S+?)(?=[;:!,?.]?([ )\]]|$))`, "mg");
static smid_image_delimit = ctRegex!(`(?P<pre>^|[ ]|[^\S]?)\{\s*(?P<text>.+?)\s*\}(?:image)(?=[;:!,?.]?([ )\]]|$))`, "mg");
/+ inline markup book index +/
static book_index = ctRegex!(`^=\{\s*(.+?)\}$`, "m");
static book_index_open = ctRegex!(`^=\{\s*([^}]*?)$`);
static book_index_close = ctRegex!(`^(.*?)\}$`, "m");
static auto_heading_numbering_lv1 = ctRegex!(`^1~`, "m");
static auto_heading_numbering_lv2 = ctRegex!(`^2~`, "m");
static auto_heading_numbering_lv3 = ctRegex!(`^3~`, "m");
static auto_heading_numbering_lv4 = ctRegex!(`^4~`, "m");
static auto_heading_numbering_off = ctRegex!(`^[A-D1-4]~\S*?-\s`, "m");
static auto_heading_numbering_off_lv1 = ctRegex!(`^1~\S*?-\s`, "m");
static auto_heading_numbering_off_lv2 = ctRegex!(`^2~\S*?-\s`, "m");
static auto_heading_numbering_off_lv3 = ctRegex!(`^3~\S*?-\s`, "m");
static auto_heading_numbering_off_lv4 = ctRegex!(`^4~\S*?-\s`, "m");
/+ no object_number object +/
static object_number_off = ctRegex!(`~#[ ]*$`, "m");
static object_number_off_dh = ctRegex!(`-#$`, "m");
static object_number_off_all = ctRegex!(`[~-]#$`, "m");
static repeated_character_line_separator = ctRegex!(`^(?:(?:(?:[.][ ]?){4,}|(?:[-][ ]?|[~][ ]?|[*][ ]?|[$][ ]?|[#][ ]?|[\\][ ]?|[/][ ]?){2,})\s*?)*$`);
/+ no object_number block +/
static object_number_off_block = ctRegex!(`^--~#$`);
static object_number_off_block_dh = ctRegex!(`^---#$`);
static object_number_off_block_close = ctRegex!(`^--\+#$`);
static object_number_block_marks = ctRegex!(`^--[+~-]#$`);
/+ ignore outside code blocks +/
static skip_from_regular_parse = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`);
/+ line & page breaks +/
static break_line_within_object = ctRegex!(`[\\]{2}( |$)`);
static break_page = ctRegex!(`^-[\\]{2}-$`);
static break_page_new = ctRegex!(`^=[\\]{2}=$`);
static break_page_line_across = ctRegex!(`^=[.]{2}=$`);
static break_string = ctRegex!(`』`);
static parent = ctRegex!(`([0-7]):([0-9]+)`);
static header_regex_content = ctRegex!(`([0-7]):([0-9]+)`);
/+ json +/
static tailing_comma = ctRegex!(`,$`, "m");
/+ biblio tags +/
static biblio_tags = ctRegex!(`^(is|au|author_raw|author|author_arr|editor_raw|ed|editor_arr|ti|title|subtitle|fulltitle|lng|language|trans|src|jo|journal|in|vol|volume|edn|edition|yr|year|pl|place|pb|pub|publisher|url|pg|pages|note|short_name|id):\s+(.+)`);
static biblio_abbreviations = ctRegex!(`^(au|ed|ti|lng|jo|vol|edn|yr|pl|pb|pub|pg|pgs|sn)$`);
/+ bookindex split +/
static bi_main_terms_split = ctRegex!(`\s*;\s*`);
static bi_main_term_plus_rest_split = ctRegex!(`\s*:\s*`);
static bi_sub_terms_plus_object_number_offset_split = ctRegex!(`\s*\|\s*`);
static bi_term_and_object_numbers_match = ctRegex!(`^(.+?)\+(\d+)`);
/+ language codes +/
auto language_codes =
ctRegex!("(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)");
auto language_code_and_filename =
ctRegex!("(?:^|[/])(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)/[A-Za-z0-9._-].+?[.](?:sst|ssm)$");
static newline = ctRegex!("\n", "mg");
static strip_br = ctRegex!("^<br>\n|<br>\n*$");
static space = ctRegex!(`[ ]`, "mg");
static spaces_keep = ctRegex!(`(?P<keep_spaces>^[ ]+|[ ]{2,})`, "mg"); // code, verse, block
static spaces_line_start = ctRegex!(`^(?P<opening_spaces>[ ]+)`, "mg");
static spaces_multiple = ctRegex!(`(?P<multiple_spaces>[ ]{2,})`, "mg");
static two_spaces = ctRegex!(`[ ]{2}`, "mg");
static nbsp_char = ctRegex!(`░`, "mg");
static nbsp_chars_line_start = ctRegex!(`^░+`, "mg");
static nbsp_and_space = ctRegex!(` [ ]`, "mg");
static nbsp_char_and_space = ctRegex!(`░[ ]`, "mg");
static special_markup_chars = ctRegex!(`[【】〖〗┥┝┤├¤░┘┙┚┼┿╂┊┏┚┆■]`, "mg");
static src_pth_sst_or_ssm = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.](?P<extension>ss[tm]))$`);
static src_pth_pod_sst_or_ssm = ctRegex!(`^(?P<podpath>[/]?(?:[a-zA-Z0-9._-]+/)*)media/text/[a-z]{2}/(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])$`);
static src_pth_contents = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+)/pod[.]manifest$`);
static src_pth_pod_root = ctRegex!(`^(?P<podroot>(?:[/]?(?:[a-zA-Z0-9._-]+/)*)(pod))$`);
static src_pth_zip = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]zip)$`);
static src_pth_unzip_pod = ctRegex!(`^(?P<path>media/text/[a-z]{2}/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`);
static src_pth_types =
ctRegex!(`^(?P<path>[/]?[a-zA-Z0-9._-]+/)*(?P<gotfile>(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])|(?P<filelist>[a-zA-Z0-9._-]+/pod[.]manifest)|(?P<filezip>[a-zA-Z0-9._-]+[.]zip))$`);
static pod_content_location =
ctRegex!(`^(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])(?P<languages>(?:\s+[a-z]{2}(?:,|$))+)`, "mg");
static src_fn =
ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P<fn_src>(?P<fn_base>[a-zA-Z0-9._-]+)[.](?P<fn_src_suffix>ss[tm]))$`);
static src_fn_master = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ssm)$`);
static src_fn_text = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]sst)$`);
static src_fn_insert = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ssi)$`);
static src_fn_find_inserts = ctRegex!(`^(?P<path>/?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`);
static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[ti])$`);
static src_base_parent_dir_name = ctRegex!(`[/](?P<dir>(?:[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
static src_base_parent_path = ctRegex!(`(?P<dir>(?:[/a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
static src_formalised_file_path_parts = ctRegex!(`(?P<pth>(?:[/a-zA-Z0-9._-]+?)(?P<dir>[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure
/+ line breaks +/
static empty_line = ctRegex!(`^\s*$`);
static empty_block = ctRegex!(`^\s*$`, "mg");
static br_line_natural = ctRegex!(`\n`, "mg");
static br_empty_line = ctRegex!(`\n[ ]*\n`, "mg");
static br_newlines_linebreaks = ctRegex!(`[\n┘┙]`, "mg");
static br_line = ctRegex!(`┘`, "mg");
static br_nl = ctRegex!(`┙`, "mg");
static br_paragraph = ctRegex!(`┚`, "mg");
static br_page_line = ctRegex!(`┼`, "mg");
static br_page = ctRegex!(`┿`, "mg");
static br_page_new = ctRegex!(`╂`, "mg");
/+ inline markup footnotes endnotes +/
static inline_notes_al = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg");
static inline_notes_al_special = ctRegex!(`【(?:[*+]\s+)(.+?)】`, "mg"); // TODO remove match when special footnotes are implemented
static inline_notes_al_gen = ctRegex!(`【.+?】`, "m");
static inline_notes_al_regular = ctRegex!(`【(.+?)】`, "mg");
static inline_notes_al_gen_text = ctRegex!(`【(?P<text>.+?)】`, "m");
static inline_notes_al_gen_ref = ctRegex!(`【(?P<ref>[*+]\s+)\s*(?P<text>.+?)】`, "mg");
static inline_notes_al_regular_number_note = ctRegex!(`【(?P<num>\d+)\s+(?P<note>.+?)\s*】`, "mg");
static inline_al_delimiter_open_regular = ctRegex!(`【\s`, "m");
static inline_al_delimiter_open_symbol_star = ctRegex!(`【[*]\s`, "m");
static inline_al_delimiter_open_symbol_plus = ctRegex!(`【[+]\s`, "m");
static inline_al_delimiter_close_regular = ctRegex!(`】`, "m");
static inline_al_delimiter_open_and_close_regular = ctRegex!(`【|】`, "m");
static inline_al_delimiter_open_asterisk = ctRegex!(`【\*`, "m");
static inline_al_delimiter_open_plus = ctRegex!(`【\+`, "m");
static inline_text_and_note_al = ctRegex!(`(?P<text>.+?)【(?:[*+ ]*)(?P<note>.+?)】`, "mg");
static inline_text_and_note_al_ = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|$))`, "mg");
/+ inline markup links +/
static inline_image = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+))\s*(?P<post>.*?┝┤.*?├)`, "mg");
static inline_image_without_dimensions = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>0)h(?P<height>0))\s*(?P<post>.*?┝┤.*?├)`, "mg");
static inline_image_info = ctRegex!(`☼?(?P<img>[a-zA-Z0-9._-]+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+)`, "mg");
static inline_link_anchor = ctRegex!(`┋(?P<anchor>\S+?)┋`, "mg"); // TODO *~text_link_anchor
static inline_link_ = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>.+?)├`, "mg");
static inline_link = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>\S+?)├`, "mg");
static inline_link_empty = ctRegex!(`┥(?P<text>.+?)┝┤├`, "mg");
static inline_link_number = ctRegex!(`┥(?P<text>.+?)┝┤(?P<num>[0-9]+)├`, "mg"); // not used
static inline_link_number_only = ctRegex!(`(┥.+?┝)┤(?P<num>[0-9]+)├`, "mg");
static inline_link_stow_uri = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>[^ 0-9#┥┝┤├][^ 0-9┥┝┤├]+)├`, "mg"); // will not stow (stowed links) or object number internal links
static inline_link_hash = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>#(?P<segname>\S+?))├`, "mg");
static inline_link_clean = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg");
static inline_a_url = ctRegex!(`(┤)([^\s┥┝┤├]+)(├)`, "mg");
static url = ctRegex!(`https?://`, "mg");
static uri = ctRegex!(`(?:https?|git)://`, "mg");
static inline_link_subtoc = ctRegex!(`^(?P<level>[5-7])~ ┥(?P<text>.+?)┝┤(?P<link>.+?)├`, "mg");
static fn_suffix = ctRegex!(`\.fnSuffix`, "mg");
static inline_link_fn_suffix = ctRegex!(`¤(.+?)(\.fnSuffix)`, "mg");
static inline_seg_link = ctRegex!(`(¤)(?:.+?)\.fnSuffix`, "mg");
static mark_internal_site_lnk = ctRegex!(`¤`, "mg");
static quotation_mark_sql_insert_delimiter = ctRegex!("[']", "mg");
static quotation_mark_various = ctRegex!(q"┋['‘’“”"`´¨]┋", "mg");
/+ inline markup font face mod +/
static inline_mark_faces = ctRegex!(`(?P<markup>(?P<mod>[*!/_^,+#"-])\{(?P<text>.+?)\}[*!/_^,+#"-])`, "mg");
static inline_mark_faces_to_mod = ctRegex!(`(?P<mod>[*!/_^,+#"-])\{(?P<text>.+?)\}([*!/_^,+#"-])`, "mg");
static inline_mark_emphasis = ctRegex!(`([*])\{(?P<text>.+?)\}[*]`, "mg");
static inline_mark_bold = ctRegex!(`([!])\{(?P<text>.+?)\}[!]`, "mg");
static inline_mark_underscore = ctRegex!(`([_])\{(?P<text>.+?)\}[_]`, "mg");
static inline_mark_italics = ctRegex!(`([/])\{(?P<text>.+?)\}[/]`, "mg");
static inline_mark_superscript = ctRegex!(`(\^)\{(?P<text>.+?)\}\^`, "mg");
static inline_mark_subscript = ctRegex!(`([,])\{(?P<text>.+?)\}[,]`, "mg");
static inline_mark_strike = ctRegex!(`([-])\{(?P<text>.+?)\}[-]`, "mg");
static inline_mark_insert = ctRegex!(`([+])\{(?P<text>.+?)\}[+]`, "mg");
static inline_mark_mono = ctRegex!(`([#])\{(?P<text>.+?)\}[#]`, "mg");
static inline_mark_cite = ctRegex!(`(["])\{(?P<text>.+?)\}["]`, "mg");
static inline_mark_fontface_clean = ctRegex!(`[*!_/^,+#■"-]\{|\}[*!_/^,+#■"-]`, "mg");
static inline_faces_line = ctRegex!(`^[*!/_]_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`);
static inline_emphasis_line = ctRegex!(`^\*_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
static inline_bold_line = ctRegex!(`^!_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
static inline_italics_line = ctRegex!(`^/_ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
static inline_underscore_line = ctRegex!(`^__ (?P<text>.+?)(?P<tail>(?: [\\]{2}|[~]#){0,2}$)`);
static no_header_rgx = ctRegex!(`^=NULL$`);
/+ inline markup font face mod +/
static inline_faces = ctRegex!(`(?P<markup>(?P<mod>[*!_^,+■‖-])┨(?P<text>.+?)┣[*!_^,+■‖-])`, "mg");
static inline_emphasis = ctRegex!(`[*]┨(?P<text>.+?)┣[*]`, "mg");
static inline_bold = ctRegex!(`[!]┨(?P<text>.+?)┣[!]`, "mg");
static inline_underscore = ctRegex!(`[_]┨(?P<text>.+?)┣[_]`, "mg");
static inline_italics = ctRegex!(`[/]┨(?P<text>.+?)┣[/]`, "mg");
static inline_superscript = ctRegex!(`\^┨(?P<text>.+?)┣\^`, "mg");
// static inline_superscript = ctRegex!(`[\^]┨(?P<text>.+?)┣[\^]`, "mg");
static inline_subscript = ctRegex!(`[,]┨(?P<text>.+?)┣[,]`, "mg");
static inline_strike = ctRegex!(`[-]┨(?P<text>.+?)┣[-]`, "mg");
static inline_insert = ctRegex!(`[+]┨(?P<text>.+?)┣[+]`, "mg");
static inline_mono = ctRegex!(`[■]┨(?P<text>.+?)┣[■]`, "mg");
static inline_cite = ctRegex!(`[‖]┨(?P<text>.+?)┣[‖]`, "mg");
static inline_fontface_clean = ctRegex!(`[*!_/^,+■‖-]┨|┣[*!_/^,+■‖-]`, "mg");
/+ table delimiters +/
static table_delimiter_col = ctRegex!("[ ]*[┊][ ]*", "mg");
static table_delimiter_row = ctRegex!("[ ]*\n", "mg");
}
}
|