/+ - Name: SisuDoc Spine, Doc Reform [a part of] - Description: documents, structuring, processing, publishing, search - static content generator - Author: Ralph Amissah [ralph.amissah@gmail.com] - Copyright: (C) 2015 - 2025 Ralph Amissah, All Rights Reserved. - License: AGPL 3 or later: Spine (SiSU), a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU AFERO General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see [https://www.gnu.org/licenses/]. If you have Internet connection, the latest version of the AGPL should be available at these locations: [https://www.fsf.org/licensing/licenses/agpl.html] [https://www.gnu.org/licenses/agpl.html] - Spine (by Doc Reform, related to SiSU) uses standard: - docReform markup syntax - standard SiSU markup syntax with modified headers and minor modifications - docReform object numbering - standard SiSU object citation numbering & system - Homepages: [https://www.sisudoc.org] [https://www.doc-reform.org] - Git [https://git.sisudoc.org/] +/ /++ module source_read_source_files;
- open markup files
- if master file scan for addional files to import/insert +/ module sisudoc.io_in.read_source_files; @safe: template spineRawMarkupContent() { import std.digest.sha, std.file, std.path; import sisudoc.meta, sisudoc.io_in.paths_source, sisudoc.meta.rgx_files, sisudoc.meta.rgx; mixin spineRgxIn; static auto rgx = RgxI(); mixin spineRgxFiles; static auto rgx_files = RgxFiles(); string[] _images=[]; string[] _extract_images(S)(S content_block) { string[] images_; string _content_block = content_block.to!string; if (auto m = _content_block.matchAll(rgx.image)) { images_ ~= m.captures[1].to!string; } return images_; } auto rawsrc = RawMarkupContent(); struct ST_contents_inserts_images { char[][] contents; string[] insert_files; string[] images; } struct ST_header_content_inserts_images { char[] header; char[][] src_txt; string[] insert_files; string[] images; } struct ST_doc_digest { ubyte[32] markup_doc; ubyte[32] header; ubyte[32] text; } struct ST_doc_parts { char[] header_raw; char[][] sourcefile_body_content; string[] insert_file_list; string[] images_list; ST_doc_digest doc_digest; } ST_doc_parts spineRawMarkupContent(O,Fn)(O _opt_action, Fn fn_src) { ST_doc_parts _0_header_1_body_content_2_insert_filelist_struct = rawsrc.sourceContentSplitIntoHeaderAndBody(_opt_action, rawsrc.sourceContent(fn_src), fn_src); return _0_header_1_body_content_2_insert_filelist_struct; } struct RawMarkupContent { final sourceContent(in string fn_src) { auto raw = MarkupRawUnit(); string source_txt_str = raw.markupSourceReadIn(fn_src); return source_txt_str; } final ST_doc_parts sourceContentSplitIntoHeaderAndBody(O)( O _opt_action, in string source_txt_str, in string fn_src="" ) { auto raw = MarkupRawUnit(); string[] insert_file_list_get; string[] images_list_get; ST_header_content_inserts_images st = raw.markupSourceHeaderContentRawLineStructArray(source_txt_str); char[] header_raw = st.header; char[][] sourcefile_body_content = st.src_txt; if (fn_src.match(rgx_files.src_fn_master)) { // filename with path needed if master file (.ssm) not otherwise auto ins = Inserts(); ST_contents_inserts_images _cii = ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src); sourcefile_body_content = _cii.contents; insert_file_list_get = _cii.insert_files.dup; images_list_get = _cii.images.dup; } else if (_opt_action.source || _opt_action.pod) { auto ins = Inserts(); ST_contents_inserts_images _cii = ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src); images_list_get = _cii.images.dup; } // image_list, if path could take sha256 digests already here? string header_type = ""; ST_doc_digest dig; { dig.markup_doc = source_txt_str.sha256Of; dig.header = st.header.sha256Of; dig.text = sourcefile_body_content.sha256Of; } ST_doc_parts ret; { ret.header_raw = st.header; ret.sourcefile_body_content = sourcefile_body_content; ret.insert_file_list = insert_file_list_get; ret.images_list = images_list_get; ret.doc_digest = dig; } return ret; } } struct MarkupRawUnit { import std.digest.sha, std.file; final private string readInMarkupSource(in char[] fn_src) { enforce( exists(fn_src) != 0, "file not found: «" ~ fn_src ~ "»" ); string source_txt_str; try { if (exists(fn_src)) { if (fn_src.getLinkAttributes.attrIsFile) { source_txt_str = fn_src.readText; } else { } } } catch (ErrnoException ex) { } catch (UTFException ex) { // Handle validation errors } catch (FileException ex) { // Handle errors } std.utf.validate(source_txt_str); return source_txt_str; } @trusted final private char[][] header0Content1(in string src_text) { // cast(char[]) /+ split string on _first_ match of "^:?A~\s" into [header, content] array/tuple +/ char[][] header_and_content; auto m = (cast(char[]) src_text).matchFirst(rgx.heading_a); header_and_content ~= m.pre; header_and_content ~= m.hit ~ m.post; assert(header_and_content.length == 2, "document markup is broken, header body split == " ~ header_and_content.length.to!string ~ "; (header / body array split should == 2 (split is on level A~))" ); return header_and_content; } @trusted final private char[][] markupSourceLineArray(in char[] src_text) { // cast(char[]) char[][] source_line_arr = (cast(char[]) src_text).split(rgx.newline_eol_strip_preceding); return source_line_arr; } string markupSourceReadIn(in string fn_src) { static auto rgx_files = RgxFiles(); enforce( fn_src.match(rgx_files.src_pth_sst_or_ssm), "not a dr markup filename: «" ~ fn_src ~ "»" ); string source_txt_str = readInMarkupSource(fn_src); return source_txt_str; } ST_header_content_inserts_images markupSourceHeaderContentRawLineStructArray(in string source_txt_str) { string[] file_insert_list = []; string[] images_list = []; char[][] hc = header0Content1(source_txt_str); char[] header = hc[0]; char[] source_txt = hc[1]; char[][] source_line_arr = markupSourceLineArray(source_txt); ST_header_content_inserts_images ret; { ret.header = header; ret.src_txt = source_line_arr; ret.insert_files = file_insert_list; ret.images = images_list; } return ret; } final char[][] getInsertMarkupSourceContentRawLineArray( in char[] fn_src_insert, Regex!(char) rgx_file ) { enforce( fn_src_insert.match(rgx_file), "not a dr markup filename: «" ~ fn_src_insert ~ "»" ); string source_txt_str = readInMarkupSource(fn_src_insert); char[][] source_line_arr = markupSourceLineArray(source_txt_str); return source_line_arr; } } struct Inserts { struct ST_contents_and_images { char[][] insert_contents; string[] images; } ST_contents_and_images scan_subdoc_source(O)( O _opt_action, char[][] markup_sourcefile_insert_content, string fn_src ) { char[][] contents_insert; int code_block_status = 0; enum codeBlock { off, curly, tic, } auto fn_pth_full = fn_src.match(rgx_files.src_pth_sst_or_ssm); auto markup_src_file_path = fn_pth_full.captures[1]; foreach (line; markup_sourcefile_insert_content) { if (code_block_status == codeBlock.curly) { if (line.matchFirst(rgx.block_curly_code_close)) { code_block_status = codeBlock.off; } contents_insert ~= line; } else if (line.matchFirst(rgx.block_curly_code_open)) { code_block_status = codeBlock.curly; contents_insert ~= line; } else if (code_block_status == codeBlock.tic) { if (line.matchFirst(rgx.block_tic_close)) { code_block_status = codeBlock.off; } contents_insert ~= line; } else if (line.matchFirst(rgx.block_tic_code_open)) { code_block_status = codeBlock.tic; contents_insert ~= line; } else if (auto m = line.match(rgx_files.insert_src_fn_ssi_or_sst)) { auto insert_fn = m.captures[2]; auto insert_sub_pth = m.captures[1]; auto fn_src_insert = chainPath(markup_src_file_path, insert_sub_pth ~ insert_fn).array; auto raw = MarkupRawUnit(); auto markup_sourcesubfile_insert_content = raw.getInsertMarkupSourceContentRawLineArray(fn_src_insert, rgx_files.src_fn_find_inserts); debug(insert_file) { writeln(line); writeln(fn_src_insert); writeln( " length contents insert array: ", markup_sourcesubfile_insert_content.length ); } if (_opt_action.source || _opt_action.pod) { _images ~= _extract_images(markup_sourcesubfile_insert_content); } auto ins = Inserts(); /+ - 1. load file - 2. read lines - 3. scan lines - a. if filename insert, and insert filename - repeat 1 - b. else - add line to new array; - build image list, search for any image files to add to image list +/ } else { contents_insert ~= line; // images to extract for image list? if (_opt_action.source || _opt_action.pod) { string[] _image_linelist = _extract_images(line); if (_image_linelist.length > 0) { _images ~= _image_linelist; } } } } // end src subdoc (inserts) loop ST_contents_and_images ret; { ret.insert_contents = contents_insert; ret.images = _images; } return ret; } ST_contents_inserts_images scan_master_src_for_insert_files_and_import_content(O)( O _opt_action, char[][] sourcefile_body_content, string fn_src ) { import std.algorithm; char[][] contents; int code_block_status = 0; enum codeBlock { off, curly, tic, } auto fn_pth_full = fn_src.match(rgx_files.src_pth_sst_or_ssm); auto markup_src_file_path = fn_pth_full.captures[1]; char[][] contents_insert; string[] _images =[]; string[] insert_file_list =[]; foreach (line; sourcefile_body_content) { if (code_block_status == codeBlock.curly) { if (line.matchFirst(rgx.block_curly_code_close)) { code_block_status = codeBlock.off; } contents ~= line; } else if (line.matchFirst(rgx.block_curly_code_open)) { code_block_status = codeBlock.curly; contents ~= line; } else if (code_block_status == codeBlock.tic) { if (line.matchFirst(rgx.block_tic_close)) { code_block_status = codeBlock.off; } contents ~= line; } else if (line.matchFirst(rgx.block_tic_code_open)) { code_block_status = codeBlock.tic; contents ~= line; } else if (auto m = line.match(rgx_files.insert_src_fn_ssi_or_sst)) { auto insert_fn = m.captures[2]; auto insert_sub_pth = m.captures[1]; auto fn_src_insert = chainPath(markup_src_file_path, insert_sub_pth ~ insert_fn).array; insert_file_list ~= fn_src_insert.to!string; auto raw = MarkupRawUnit(); /+ TODO +/ auto markup_sourcefile_insert_content = raw.getInsertMarkupSourceContentRawLineArray(fn_src_insert, rgx_files.src_fn_find_inserts); debug(insert_file) { writeln(line); writeln(fn_src_insert); writeln( " length contents insert array: ", markup_sourcefile_insert_content.length ); } auto ins = Inserts(); ST_contents_and_images contents_insert_st = ins.scan_subdoc_source( _opt_action, markup_sourcefile_insert_content, fn_src_insert.to!string ); contents ~= contents_insert_st.insert_contents; if (_opt_action.source || _opt_action.pod) { string[] _image_linelist = _extract_images(contents_insert_st.images); if (_image_linelist.length > 0) { _images ~= _image_linelist; } } /+ - 1. load file - 2. read lines - 3. scan lines - a. if filename insert, and insert filename - repeat 1 - b. else - add line to new array; - build image list, search for any image files to add to image list +/ } else { contents ~= line; if (_opt_action.source || _opt_action.pod) { string[] _image_linelist = _extract_images(line); if (_image_linelist.length > 0) { _images ~= _image_linelist; } } } } // end src doc loop string[] images = []; foreach(i; uniq(_images.sort())) { images ~= i; } debug(insert_file) { writeln(__LINE__); writeln(contents.length); } ST_contents_inserts_images ret; { ret.contents = contents; ret.insert_files = insert_file_list; ret.images = images; } return ret; } } }