diff options
Diffstat (limited to 'src/doc_reform/output/rgx.d')
-rw-r--r-- | src/doc_reform/output/rgx.d | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/src/doc_reform/output/rgx.d b/src/doc_reform/output/rgx.d new file mode 100644 index 0000000..0d23f11 --- /dev/null +++ b/src/doc_reform/output/rgx.d @@ -0,0 +1,106 @@ +/++ + regex: regular expressions used in sisu document parser ++/ +module doc_reform.output.rgx; +static template SiSUoutputRgxInit() { + import doc_reform.output.defaults; + static struct Rgx { + static newline = ctRegex!("\n", "mg"); + static strip_br = ctRegex!("^<br>\n|<br>\n*$"); + static space = ctRegex!(`[ ]`, "mg"); + static spaces_line_start = ctRegex!(`^(?P<opening_spaces>[ ]+)`, "mg"); + static spaces_multiple = ctRegex!(`(?P<multiple_spaces>[ ]{2,})`, "mg"); + static two_spaces = ctRegex!(`[ ]{2}`, "mg"); + static nbsp_char = ctRegex!(`░`, "mg"); + static nbsp_chars_line_start = ctRegex!(`^░+`, "mg"); + static nbsp_and_space = ctRegex!(` [ ]`, "mg"); + static nbsp_char_and_space = ctRegex!(`░[ ]`, "mg"); + static special_markup_chars = ctRegex!(`[【】〖〗┥┝┤├¤░┘┙┚┼┿╂┊┏┚┆■]`, "mg"); + static src_pth_sst_or_ssm = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.](?P<extension>ss[tm]))$`); + static src_pth_pod_sst_or_ssm = ctRegex!(`^(?P<podpath>[/]?(?:[a-zA-Z0-9._-]+/)*)media/text/[a-z]{2}/(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])$`); + static src_pth_contents = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+)/sisupod[.]manifest$`); + static src_pth_pod_root = ctRegex!(`^(?P<podroot>(?:[/]?(?:[a-zA-Z0-9._-]+/)*)(sisupod))$`); + static src_pth_zip = ctRegex!(`^(?P<path>[/]?(?:[a-zA-Z0-9._-]+/)*)(?P<filename>[a-zA-Z0-9._-]+[.]zip)$`); + static src_pth_unzip_pod = ctRegex!(`^(?P<path>media/text/[a-z]{2}/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`); + static src_pth_types = + ctRegex!(`^(?P<path>[/]?[a-zA-Z0-9._-]+/)*(?P<gotfile>(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])|(?P<filelist>[a-zA-Z0-9._-]+/sisupod[.]manifest)|(?P<filezip>[a-zA-Z0-9._-]+[.]zip))$`); + static pod_content_location = + ctRegex!(`^(?P<filename>[a-zA-Z0-9._-]+[.]ss[tm])(?P<languages>(?:\s+[a-z]{2}(?:,|$))+)`, "mg"); + static src_fn = + ctRegex!(`^([/]?(?:[a-zA-Z0-9._-]+/)*)(?P<fn_src>(?P<fn_base>[a-zA-Z0-9._-]+)[.](?P<fn_src_suffix>ss[tm]))$`); + static src_fn_master = ctRegex!(`^(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ssm)$`); + static src_fn_text = ctRegex!(`^(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]sst)$`); + static src_fn_insert = ctRegex!(`^(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ssi)$`); + static src_fn_find_inserts = ctRegex!(`^(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[im])$`); + static insert_src_fn_ssi_or_sst = ctRegex!(`^<<\s*(?P<path>[a-zA-Z0-9._-]+/)*(?P<filename>[a-zA-Z0-9._-]+[.]ss[ti])$`); + static src_base_parent_dir_name = ctRegex!(`[/](?P<dir>(?:[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure + static src_base_parent_path = ctRegex!(`(?P<dir>(?:[/a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure + static src_formalised_file_path_parts = ctRegex!(`(?P<pth>(?:[/a-zA-Z0-9._-]+?)(?P<dir>[a-zA-Z0-9._-]+))(?:/media/text/[a-z]{2})$`); // formalizes dir structure + /+ line breaks +/ + static br_line = ctRegex!(`┘`, "mg"); + static br_nl = ctRegex!(`┙`, "mg"); + static br_paragraph = ctRegex!(`┚`, "mg"); + static br_page_line = ctRegex!(`┼`, "mg"); + static br_page = ctRegex!(`┿`, "mg"); + static br_page_new = ctRegex!(`╂`, "mg"); + /+ inline markup footnotes endnotes +/ + static inline_notes_al = ctRegex!(`【(?:[*+]\s+|\s*)(.+?)】`, "mg"); + static inline_notes_al_special = ctRegex!(`【(?:[*+]\s+)(.+?)】`, "mg"); // TODO remove match when special footnotes are implemented + static inline_notes_al_gen = ctRegex!(`【.+?】`, "m"); + static inline_notes_al_gen_text = ctRegex!(`【(?P<text>.+?)】`, "m"); + static inline_notes_al_gen_ref = ctRegex!(`【(?P<ref>[*+]\s+)\s*(?P<text>.+?)】`, "mg"); + static inline_al_delimiter_open_regular = ctRegex!(`【\s`, "m"); + static inline_al_delimiter_open_symbol_star = ctRegex!(`【[*]\s`, "m"); + static inline_al_delimiter_open_symbol_plus = ctRegex!(`【[+]\s`, "m"); + static inline_al_delimiter_close_regular = ctRegex!(`】`, "m"); + static inline_al_delimiter_open_and_close_regular = ctRegex!(`【|】`, "m"); + static inline_notes_delimiter_al_regular = ctRegex!(`【(.+?)】`, "mg"); + static inline_notes_delimiter_al_regular_number_note = ctRegex!(`【(\d+)\s+(.+?)】`, "mg"); + static inline_al_delimiter_open_asterisk = ctRegex!(`【\*`, "m"); + static inline_al_delimiter_open_plus = ctRegex!(`【\+`, "m"); + static inline_text_and_note_al = ctRegex!(`(?P<text>.+?)【(?:[*+ ]*)(?P<note>.+?)】`, "mg"); + static inline_text_and_note_al_ = ctRegex!(`(.+?(?:【[*+]*\s+.+?】|$))`, "mg"); + /+ inline markup footnotes endnotes +/ + static inline_image = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>\S+?\.(?:jpg|gif|png)),w(?P<width>\d+)h(?P<height>\d+))\s*(?P<post>.*?┝┤.+?├)`, "mg"); + static inline_image_without_dimensions = ctRegex!(`(?P<pre>┥)☼(?P<imginf>(?P<img>\S+?\.(?:jpg|gif|png)),w(?P<width>0)h(?P<height>0))\s*(?P<post>.*?┝┤.+?├)`, "mg"); + static inline_link = ctRegex!(`┥(?P<text>.+?)┝┤(?P<link>.+?)├`, "mg"); + static inline_link_clean = ctRegex!(`┤(?:.+?)├|[┥┝]`, "mg"); + static inline_a_url = ctRegex!(`(┤)(\S+?)(├)`, "mg"); + static url = ctRegex!(`https?://`, "mg"); + static inline_link_subtoc = ctRegex!(`^(?P<level>[5-7])~ ┥(?P<text>.+?)┝┤(?P<link>.+?)├`, "mg"); + static fn_suffix = ctRegex!(`\.fnSuffix`, "mg"); + static inline_link_fn_suffix = ctRegex!(`¤(.+?)(\.fnSuffix)`, "mg"); + static inline_seg_link = ctRegex!(`(¤)(?:.+?)\.fnSuffix`, "mg"); + static mark_internal_site_lnk = ctRegex!(`¤`, "mg"); + static quotation_mark_sql_insert_delimiter = ctRegex!("[']", "mg"); + static quotation_mark_various = ctRegex!(q"¶['‘’“”"`´¨]¶", "mg"); + /+ inline markup font face mod +/ + static inline_faces = ctRegex!(`(?P<markup>(?P<mod>[*!_^,+#-])\{(?P<text>.+?)\}[*!_^,+#-])`, "mg"); + static inline_emphasis = ctRegex!(`\*\{(?P<text>.+?)\}\*`, "mg"); + static inline_bold = ctRegex!(`!\{(?P<text>.+?)\}!`, "mg"); + static inline_underscore = ctRegex!(`_\{(?P<text>.+?)\}_`, "mg"); + static inline_italics = ctRegex!(`/\{(?P<text>.+?)\}/`, "mg"); + static inline_superscript = ctRegex!(`\^\{(?P<text>.+?)\}\^`, "mg"); + static inline_subscript = ctRegex!(`,\{(?P<text>.+?)\},`, "mg"); + static inline_strike = ctRegex!(`-\{(?P<text>.+?)\}-`, "mg"); + static inline_insert = ctRegex!(`\+\{(?P<text>.+?)\}\+`, "mg"); + static inline_mono = ctRegex!(`#\{(?P<text>.+?)\}#`, "mg"); + static inline_mono_box = ctRegex!(`■\{(?P<text>.+?)\}■`, "mg"); + static inline_cite = ctRegex!(`"\{(?P<text>.+?)\}"`, "mg"); + static inline_faces_line = ctRegex!(`^[*!/_]_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`); + static inline_emphasis_line = ctRegex!(`^\*_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`); + static inline_bold_line = ctRegex!(`^!_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`); + static inline_italics_line = ctRegex!(`^/_ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`); + static inline_underscore_line = ctRegex!(`^__ (?P<text>.+?)((?: [\\]{2}|[~]#){0,2}$)`); + static inline_fontface_clean = ctRegex!(`[*!_/^,+#■"-]\{|\}[*!_/^,+#■"-]`, "mg"); + static no_header_rgx = ctRegex!(`^=NULL$`); + /+ table delimiters +/ + static table_delimiter_col = ctRegex!("[ ]*[┊][ ]*", "mg"); + static table_delimiter_row = ctRegex!("[ ]*\n", "mg"); + static xhtml_ampersand = ctRegex!(`[&]`); // & + static xhtml_quotation = ctRegex!(`["]`); // " + static xhtml_less_than = ctRegex!(`[<]`); // < + static xhtml_greater_than = ctRegex!(`[>]`); // > + static xhtml_line_break = ctRegex!(` [\\]{2}`); // <br /> + } +} |