diff options
author | Ralph Amissah <ralph@amissah.com> | 2017-05-06 11:33:10 -0400 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2019-04-10 15:14:14 -0400 |
commit | 7a73aa10276b0c1151500d26b194336b56af7aba (patch) | |
tree | e267901f73375393cc429c5c17ce661216de111a /src/sdp/ao_rgx.d | |
parent | xmls work particularly with epub output (diff) |
cleaningdoc-reform_v0.0.14
Diffstat (limited to 'src/sdp/ao_rgx.d')
-rw-r--r-- | src/sdp/ao_rgx.d | 25 |
1 files changed, 12 insertions, 13 deletions
diff --git a/src/sdp/ao_rgx.d b/src/sdp/ao_rgx.d index 8c73423..38c7fae 100644 --- a/src/sdp/ao_rgx.d +++ b/src/sdp/ao_rgx.d @@ -26,8 +26,8 @@ template SiSUrgxInit() { static para_delimiter = ctRegex!("\n[ ]*\n+"); static table_col_delimiter = ctRegex!("[ ]*\n+", "mg"); static table_row_delimiter = ctRegex!("\n[ ]*\n+", "mg"); - static table_row_delimiter_special = ctRegex!("[ ]*\n", "mg"); // - static table_col_delimiter_special = ctRegex!("[ ]*[|][ ]*", "mg"); // + static table_row_delimiter_special = ctRegex!("[ ]*\n", "mg"); + static table_col_delimiter_special = ctRegex!("[ ]*[|][ ]*", "mg"); static levels_markup = ctRegex!(`^[A-D1-4]$`); static levels_numbered = ctRegex!(`^[0-9]$`); static levels_numbered_headings = ctRegex!(`^[0-7]$`); @@ -67,12 +67,11 @@ template SiSUrgxInit() { static heading_marker = ctRegex!(`^:?([A-D1-4])[~]`); static heading_anchor_tag = ctRegex!(`^:?[A-D1-4][~]([a-z0-9_.-]+) `,"i"); static heading_identify_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+(?:(?:(?:chapter|article|section|clause)\s+[0-9.]+)|(?:[0-9]+))`,"i"); - // unless dob.obj =~/^:?[A-D1-4]~\s+(?:|(?:chapter|article|section|clause)\s+)([0-9.]+)/i static heading_extract_named_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+(chapter|article|section|clause)\s+((?:[0-9]+.)*[0-9]+)(?:[.:;, ]|$)`,"i"); static heading_extract_unnamed_anchor_tag = ctRegex!(`^:?[A-D1-4][~]\s+((?:[0-9]+.)*[0-9]+)(?:[.:;, ]|$)`); static heading_marker_missing_tag = ctRegex!(`^:?([A-D1-4])[~] `); static heading_title = ctRegex!(`^:?[A-D1-4][~][a-z0-9_.-]*[?]?\s+(.+?)$`); - static heading_all = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+(.+?)$`); // test, particularly [2] name/hashtag which may or may not be, does this affect title [3] + static heading_all = ctRegex!(`^:?([A-D1-4])[~]([a-z0-9_.-]*[?]?)\s+(.+?)$`); static heading_backmatter = ctRegex!(`^:?1[~][!](glossary|bibliography|biblio|blurb)\s+`,"i"); static heading_biblio = ctRegex!(`^:?(1)[~][!](biblio(?:graphy)?|references?)`); static heading_glossary = ctRegex!(`^:?(1)[~][!](glossary)`); @@ -111,7 +110,7 @@ template SiSUrgxInit() { static block_curly_quote_close = ctRegex!(`^([}]quote)`); static block_curly_table_open = ctRegex!(`^table[{](.*)`); static block_curly_table_close = ctRegex!(`^([}]table)`); - static block_curly_table_special_markup = ctRegex!(`^[{]table((~h)?(?P<columns>(?:[ ]+[0-9]+;)+))[}]`, "mg"); // sepcial table block markup + static block_curly_table_special_markup = ctRegex!(`^[{]table((~h)?(?P<columns>(?:[ ]+[0-9]+;)+))[}]`, "mg"); static table_head_instructions = ctRegex!(`(?P<c_heading>h)?(?:[ ]+c(?P<c_num>[0-9]);)?(?P<c_widths>(?:[ ]+[0-9]+[lr]?;)+)`); static table_col_widths_and_alignment = ctRegex!(`(?P<width>[0-9]+)(?P<align>[lr]?)`); static table_col_widths = ctRegex!(`(?P<widths>[0-9]+)`); @@ -133,9 +132,9 @@ template SiSUrgxInit() { static inline_text_and_note_square = ctRegex!(`(.+?)~\[\s*(.+?)\]~`, "mg"); static inline_note_square_delimiters = ctRegex!(`(~\[\s*)(.+?)(\]~)`, "mg"); static inline_curly_delimiter_open_regular = ctRegex!(`~\{\s*`, "m"); - static inline_curly_delimiter_open_symbol_star = ctRegex!(`~\{[*]\s`, "m"); // - static inline_curly_delimiter_open_symbol_plus = ctRegex!(`~\{[+]\s`, "m"); // - static inline_curly_delimiter_open_star_or_plus = ctRegex!(`~\{[+*]`, "m"); // + static inline_curly_delimiter_open_symbol_star = ctRegex!(`~\{[*]\s`, "m"); + static inline_curly_delimiter_open_symbol_plus = ctRegex!(`~\{[+]\s`, "m"); + static inline_curly_delimiter_open_star_or_plus = ctRegex!(`~\{[+*]`, "m"); static inline_curly_delimiter_close_regular = ctRegex!(`\s*\}~`, "m"); static inline_text_and_note_curly = ctRegex!(`(?P<text>.+?)(?:(?:[~])[{][*+ ]*)(?P<note>.+?)(?:[}][~])`, "mg"); static note_ref = ctRegex!(`^\S+?noteref_([0-9]+)`, "mg"); // {^{73.}^}#noteref_73 @@ -149,7 +148,7 @@ template SiSUrgxInit() { /+ inline markup book index +/ static book_index = ctRegex!(`^=\{\s*(.+?)\}$`, "m"); static book_index_open = ctRegex!(`^=\{\s*([^}]+?)$`); - static book_index_close = ctRegex!(`^(.*?)\}$`, "m"); // strip + static book_index_close = ctRegex!(`^(.*?)\}$`, "m"); /+ no obj_cite_number object +/ static obj_cite_number_off = ctRegex!(`~#$`, "m"); static obj_cite_number_off_dh = ctRegex!(`-#$`, "m"); @@ -160,7 +159,7 @@ template SiSUrgxInit() { static obj_cite_number_off_block_close = ctRegex!(`^--\+#$`); static obj_cite_number_block_marks = ctRegex!(`^--[+~-]#$`); /+ ignore outside code blocks +/ - static skip_from_regular_parse = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`); // not structural info + static skip_from_regular_parse = ctRegex!(`^(--[+~-]#|-[\\]{2}-|=[.\\]{2}=)$`); /+ line & page breaks +/ static break_line_within_object = ctRegex!(`[\\]{2}( |$)`); static break_page = ctRegex!(`^-[\\]{2}-$`); @@ -187,7 +186,7 @@ template SiSUrgxInit() { static strip_br = ctRegex!("^<br>\n|<br>\n*$"); static space = ctRegex!(`[ ]`, "mg"); static spaces_line_start = ctRegex!(`^(?P<opening_spaces>[ ]+)`, "mg"); - static spaces_multiple = ctRegex!(`(?P<multiple_spaces>[ ]{2,})`, "mg"); // could be issues for endnotes + static spaces_multiple = ctRegex!(`(?P<multiple_spaces>[ ]{2,})`, "mg"); static two_spaces = ctRegex!(`[ ]{2}`, "mg"); static nbsp_char = ctRegex!(`░`, "mg"); static nbsp_chars_line_start = ctRegex!(`^░+`, "mg"); @@ -240,7 +239,7 @@ template SiSUrgxInit() { static inline_italics_line = ctRegex!(`^/_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`); static inline_underscore_line = ctRegex!(`^__ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`); /+ table delimiters +/ - static table_delimiter_col = ctRegex!("[ ]*[┊][ ]*", "mg"); // - static table_delimiter_row = ctRegex!("[ ]*\n", "mg"); // + static table_delimiter_col = ctRegex!("[ ]*[┊][ ]*", "mg"); + static table_delimiter_row = ctRegex!("[ ]*\n", "mg"); } } |