/+
- Name: SisuDoc Spine, Doc Reform [a part of]
- Description: documents, structuring, processing, publishing, search
- static content generator
- Author: Ralph Amissah
[ralph.amissah@gmail.com]
- Copyright: (C) 2015 - 2025 Ralph Amissah, All Rights Reserved.
- License: AGPL 3 or later:
Spine (SiSU), a framework for document structuring, publishing and
search
Copyright (C) Ralph Amissah
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU AFERO General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see [https://www.gnu.org/licenses/].
If you have Internet connection, the latest version of the AGPL should be
available at these locations:
[https://www.fsf.org/licensing/licenses/agpl.html]
[https://www.gnu.org/licenses/agpl.html]
- Spine (by Doc Reform, related to SiSU) uses standard:
- docReform markup syntax
- standard SiSU markup syntax with modified headers and minor modifications
- docReform object numbering
- standard SiSU object citation numbering & system
- Homepages:
[https://www.sisudoc.org]
[https://www.doc-reform.org]
- Git
[https://git.sisudoc.org/]
+/
/++
module source_read_source_files;
- open markup files
- if master file scan for addional files to import/insert
+/
module sisudoc.io_in.read_source_files;
@safe:
template spineRawMarkupContent() {
import
std.digest.sha,
std.file,
std.path;
import
sisudoc.meta,
sisudoc.io_in.paths_source,
sisudoc.meta.rgx_files,
sisudoc.meta.rgx;
mixin spineRgxIn;
static auto rgx = RgxI();
mixin spineRgxFiles;
static auto rgx_files = RgxFiles();
string[] _images=[];
string[] _extract_images(S)(S content_block) {
string[] images_;
string _content_block = content_block.to!string;
if (auto m = _content_block.matchAll(rgx.image)) {
images_ ~= m.captures[1].to!string;
}
return images_;
}
auto rawsrc = RawMarkupContent();
struct ST_contents_inserts_images {
char[][] contents;
string[] insert_files;
string[] images;
}
struct ST_header_content_inserts_images {
char[] header;
char[][] src_txt;
string[] insert_files;
string[] images;
}
struct ST_doc_digest {
ubyte[32] markup_doc;
ubyte[32] header;
ubyte[32] text;
}
struct ST_doc_parts {
char[] header_raw;
char[][] sourcefile_body_content;
string[] insert_file_list;
string[] images_list;
ST_doc_digest doc_digest;
}
ST_doc_parts spineRawMarkupContent(O,Fn)(O _opt_action, Fn fn_src) {
ST_doc_parts _0_header_1_body_content_2_insert_filelist_struct
= rawsrc.sourceContentSplitIntoHeaderAndBody(_opt_action, rawsrc.sourceContent(fn_src), fn_src);
return _0_header_1_body_content_2_insert_filelist_struct;
}
struct RawMarkupContent {
final sourceContent(in string fn_src) {
auto raw = MarkupRawUnit();
string source_txt_str
= raw.markupSourceReadIn(fn_src);
return source_txt_str;
}
final ST_doc_parts sourceContentSplitIntoHeaderAndBody(O)(
O _opt_action,
in string source_txt_str,
in string fn_src=""
) {
auto raw = MarkupRawUnit();
string[] insert_file_list_get;
string[] images_list_get;
ST_header_content_inserts_images st
= raw.markupSourceHeaderContentRawLineStructArray(source_txt_str);
char[] header_raw = st.header;
char[][] sourcefile_body_content = st.src_txt;
if (fn_src.match(rgx_files.src_fn_master)) { // filename with path needed if master file (.ssm) not otherwise
auto ins = Inserts();
ST_contents_inserts_images _cii
= ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src);
sourcefile_body_content = _cii.contents;
insert_file_list_get = _cii.insert_files.dup;
images_list_get = _cii.images.dup;
} else if (_opt_action.source || _opt_action.pod) {
auto ins = Inserts();
ST_contents_inserts_images _cii
= ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src);
images_list_get = _cii.images.dup;
} // image_list, if path could take sha256 digests already here?
string header_type = "";
ST_doc_digest dig;
{
dig.markup_doc = source_txt_str.sha256Of;
dig.header = st.header.sha256Of;
dig.text = sourcefile_body_content.sha256Of;
}
ST_doc_parts ret;
{
ret.header_raw = st.header;
ret.sourcefile_body_content = sourcefile_body_content;
ret.insert_file_list = insert_file_list_get;
ret.images_list = images_list_get;
ret.doc_digest = dig;
}
return ret;
}
}
struct MarkupRawUnit {
import
std.digest.sha,
std.file;
final private string readInMarkupSource(in char[] fn_src) {
enforce(
exists(fn_src) != 0,
"file not found: «" ~
fn_src ~ "»"
);
string source_txt_str;
try {
if (exists(fn_src)) {
if (fn_src.getLinkAttributes.attrIsFile) {
source_txt_str = fn_src.readText;
} else {
}
}
} catch (ErrnoException ex) {
} catch (UTFException ex) {
// Handle validation errors
} catch (FileException ex) {
// Handle errors
}
std.utf.validate(source_txt_str);
return source_txt_str;
}
@trusted final private char[][] header0Content1(in string src_text) { // cast(char[])
/+ split string on _first_ match of "^:?A~\s" into [header, content] array/tuple +/
char[][] header_and_content;
auto m = (cast(char[]) src_text).matchFirst(rgx.heading_a);
header_and_content ~= m.pre;
header_and_content ~= m.hit ~ m.post;
assert(header_and_content.length == 2,
"document markup is broken, header body split == "
~ header_and_content.length.to!string
~ "; (header / body array split should == 2 (split is on level A~))"
);
return header_and_content;
}
@trusted final private char[][] markupSourceLineArray(in char[] src_text) { // cast(char[])
char[][] source_line_arr
= (cast(char[]) src_text).split(rgx.newline_eol_strip_preceding);
return source_line_arr;
}
string markupSourceReadIn(in string fn_src) {
static auto rgx_files = RgxFiles();
enforce(
fn_src.match(rgx_files.src_pth_sst_or_ssm),
"not a dr markup filename: «" ~
fn_src ~ "»"
);
string source_txt_str = readInMarkupSource(fn_src);
return source_txt_str;
}
ST_header_content_inserts_images markupSourceHeaderContentRawLineStructArray(in string source_txt_str) {
string[] file_insert_list = [];
string[] images_list = [];
char[][] hc = header0Content1(source_txt_str);
char[] header = hc[0];
char[] source_txt = hc[1];
char[][] source_line_arr = markupSourceLineArray(source_txt);
ST_header_content_inserts_images ret;
{
ret.header = header;
ret.src_txt = source_line_arr;
ret.insert_files = file_insert_list;
ret.images = images_list;
}
return ret;
}
final char[][] getInsertMarkupSourceContentRawLineArray(
in char[] fn_src_insert,
Regex!(char) rgx_file
) {
enforce(
fn_src_insert.match(rgx_file),
"not a dr markup filename: «" ~
fn_src_insert ~ "»"
);
string source_txt_str = readInMarkupSource(fn_src_insert);
char[][] source_line_arr = markupSourceLineArray(source_txt_str);
return source_line_arr;
}
}
struct Inserts {
struct ST_contents_and_images {
char[][] insert_contents;
string[] images;
}
ST_contents_and_images scan_subdoc_source(O)(
O _opt_action,
char[][] markup_sourcefile_insert_content,
string fn_src
) {
char[][] contents_insert;
int code_block_status = 0;
enum codeBlock { off, curly, tic, }
auto fn_pth_full = fn_src.match(rgx_files.src_pth_sst_or_ssm);
auto markup_src_file_path = fn_pth_full.captures[1];
foreach (line; markup_sourcefile_insert_content) {
if (code_block_status == codeBlock.curly) {
if (line.matchFirst(rgx.block_curly_code_close)) {
code_block_status = codeBlock.off;
}
contents_insert ~= line;
} else if (line.matchFirst(rgx.block_curly_code_open)) {
code_block_status = codeBlock.curly;
contents_insert ~= line;
} else if (code_block_status == codeBlock.tic) {
if (line.matchFirst(rgx.block_tic_close)) {
code_block_status = codeBlock.off;
}
contents_insert ~= line;
} else if (line.matchFirst(rgx.block_tic_code_open)) {
code_block_status = codeBlock.tic;
contents_insert ~= line;
} else if (auto m = line.match(rgx_files.insert_src_fn_ssi_or_sst)) {
auto insert_fn = m.captures[2];
auto insert_sub_pth = m.captures[1];
auto fn_src_insert
= chainPath(markup_src_file_path, insert_sub_pth ~ insert_fn).array;
auto raw = MarkupRawUnit();
auto markup_sourcesubfile_insert_content
= raw.getInsertMarkupSourceContentRawLineArray(fn_src_insert, rgx_files.src_fn_find_inserts);
debug(insert_file) {
writeln(line);
writeln(fn_src_insert);
writeln(
" length contents insert array: ",
markup_sourcesubfile_insert_content.length
);
}
if (_opt_action.source || _opt_action.pod) {
_images ~= _extract_images(markup_sourcesubfile_insert_content);
}
auto ins = Inserts();
/+
- 1. load file
- 2. read lines
- 3. scan lines
- a. if filename insert, and insert filename
- repeat 1
- b. else
- add line to new array;
- build image list, search for any image files to add to image list
+/
} else {
contents_insert ~= line; // images to extract for image list?
if (_opt_action.source || _opt_action.pod) {
string[] _image_linelist = _extract_images(line);
if (_image_linelist.length > 0) {
_images ~= _image_linelist;
}
}
}
} // end src subdoc (inserts) loop
ST_contents_and_images ret;
{
ret.insert_contents = contents_insert;
ret.images = _images;
}
return ret;
}
ST_contents_inserts_images scan_master_src_for_insert_files_and_import_content(O)(
O _opt_action,
char[][] sourcefile_body_content,
string fn_src
) {
import std.algorithm;
char[][] contents;
int code_block_status = 0;
enum codeBlock { off, curly, tic, }
auto fn_pth_full = fn_src.match(rgx_files.src_pth_sst_or_ssm);
auto markup_src_file_path = fn_pth_full.captures[1];
char[][] contents_insert;
string[] _images =[];
string[] insert_file_list =[];
foreach (line; sourcefile_body_content) {
if (code_block_status == codeBlock.curly) {
if (line.matchFirst(rgx.block_curly_code_close)) {
code_block_status = codeBlock.off;
}
contents ~= line;
} else if (line.matchFirst(rgx.block_curly_code_open)) {
code_block_status = codeBlock.curly;
contents ~= line;
} else if (code_block_status == codeBlock.tic) {
if (line.matchFirst(rgx.block_tic_close)) {
code_block_status = codeBlock.off;
}
contents ~= line;
} else if (line.matchFirst(rgx.block_tic_code_open)) {
code_block_status = codeBlock.tic;
contents ~= line;
} else if (auto m = line.match(rgx_files.insert_src_fn_ssi_or_sst)) {
auto insert_fn = m.captures[2];
auto insert_sub_pth = m.captures[1];
auto fn_src_insert
= chainPath(markup_src_file_path, insert_sub_pth ~ insert_fn).array;
insert_file_list ~= fn_src_insert.to!string;
auto raw = MarkupRawUnit();
/+ TODO +/
auto markup_sourcefile_insert_content
= raw.getInsertMarkupSourceContentRawLineArray(fn_src_insert, rgx_files.src_fn_find_inserts);
debug(insert_file) {
writeln(line);
writeln(fn_src_insert);
writeln(
" length contents insert array: ",
markup_sourcefile_insert_content.length
);
}
auto ins = Inserts();
ST_contents_and_images contents_insert_st = ins.scan_subdoc_source(
_opt_action,
markup_sourcefile_insert_content,
fn_src_insert.to!string
);
contents ~= contents_insert_st.insert_contents;
if (_opt_action.source || _opt_action.pod) {
string[] _image_linelist = _extract_images(contents_insert_st.images);
if (_image_linelist.length > 0) {
_images ~= _image_linelist;
}
}
/+
- 1. load file
- 2. read lines
- 3. scan lines
- a. if filename insert, and insert filename
- repeat 1
- b. else
- add line to new array;
- build image list, search for any image files to add to image list
+/
} else {
contents ~= line;
if (_opt_action.source || _opt_action.pod) {
string[] _image_linelist = _extract_images(line);
if (_image_linelist.length > 0) {
_images ~= _image_linelist;
}
}
}
} // end src doc loop
string[] images = [];
foreach(i; uniq(_images.sort())) {
images ~= i;
}
debug(insert_file) {
writeln(__LINE__);
writeln(contents.length);
}
ST_contents_inserts_images ret;
{
ret.contents = contents;
ret.insert_files = insert_file_list;
ret.images = images;
}
return ret;
}
}
}