-*- mode: org -*- #+TITLE: spine (doc_reform) markup source raw #+DESCRIPTION: documents - structuring, publishing in multiple formats & search #+FILETAGS: :spine:sourcefile:read: #+AUTHOR: Ralph Amissah #+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] #+COPYRIGHT: Copyright (C) 2015 - 2023 Ralph Amissah #+LANGUAGE: en #+STARTUP: content hideblocks hidestars noindent entitiespretty #+PROPERTY: header-args :exports code #+PROPERTY: header-args+ :noweb yes #+PROPERTY: header-args+ :results no #+PROPERTY: header-args+ :cache no #+PROPERTY: header-args+ :padline no #+PROPERTY: header-args+ :mkdirp yes #+OPTIONS: H:3 num:nil toc:t \n:t ::t |:t ^:nil -:t f:t *:t - [[./doc-reform.org][doc-reform.org]] [[./][org/]] * A. get _config file_, read in (.readText) [#A] ** _module template_ :module:config_files: #+HEADER: :tangle "../src/doc_reform/io_in/read_config_files.d" #+HEADER: :noweb yes #+BEGIN_SRC d <<doc_header_including_copyright_and_license>> /++ read configuration files<BR> - read config files<BR> meta_config_files.d +/ module doc_reform.io_in.read_config_files; import std.file, std.path; import doc_reform.meta, doc_reform.io_in.paths_source, doc_reform.meta.rgx_files, doc_reform.meta.rgx; <<meta_config_file_in>> <<meta_config_file_hub_read_site_config>> <<meta_config_file_hub_read_document_config>> <<meta_config_file_hub_read_site_yaml>> #+END_SRC *** read config files (config local site & dr document make) (yaml) **** site configuration SEE NOTES on configuration hierarchy in spine.org #+NAME: meta_config_file_hub_read_site_config #+BEGIN_SRC d template readConfigSite() { @system final auto readConfigSite(Cf,O,Cfg)(Cf _conf_file_details, O _opt_action, Cfg _cfg) { mixin spineRgxIn; static auto rgx = RgxI(); string conf_filename = "NONE"; string config_file_str; string default_config_file_str = format(q"┃ flag: act0: "--html" act1: "--html --epub" output: path: "%s" default: language: "en" papersize: "a4" text_wrap: "80" digest: "sha256" webserv: http: "%s" host: "%s" data_http: "%s" data_host: "%s" data_root_url: "%s" data_root_path: "%s" data_root_part: "" images_root_part: "image" cgi_search_form_title: "%s" cgi_http: "%s" cgi_host: "%s" cgi_bin_url: "%s" cgi_bin_subpath: "%s" cgi_bin_path: "%s" cgi_search_script: "%s" cgi_port: "" cgi_user: "" cgi_action: "%s" db_sqlite_path: "%s" db_sqlite_filename: "%s" db_pg_table: "" db_pg_user: "" ┃", _cfg.processing_path_doc_root, // doc root _cfg.http_request_type, // http _cfg.http_host, // host / domain _cfg.http_request_type, // data "http" or "https" _cfg.http_host, // data domain "localhost" _cfg.www_url_doc_root, // data root url "http://locahost" "https://sisudoc.org" _cfg.processing_path_doc_root, // data root path _cfg.cgi_search_form_title, // cgi title // e.g. "≅ SiSU Spine search" _cfg.http_request_type, // cgi http _cfg.http_host, // cgi host _cfg.cgi_url_root, // cgi bin url _cfg.cgi_bin_subpath, // cgi bin path _cfg.cgi_bin_root, // cgi bin path _cfg.cgi_filename, // cgi filename _cfg.cgi_url_action, // cgi action _cfg.db_sqlite_path, // sqlite db path _cfg.db_sqlite_filename, // sqlite db filename ); foreach(conf_fn; [_conf_file_details.config_filename_site]) { foreach(pth; _conf_file_details.possible_config_path_locations.config_local_site) { char[] conf_file; conf_filename = conf_fn; if (exists(pth)) { auto f_attrib = pth.getLinkAttributes; if ( _conf_file_details.possible_config_path_locations.config_local_site.length == 1 && f_attrib.attrIsFile ) { conf_file = pth.to!(char[]); conf_filename = pth.baseName; } else if (f_attrib.attrIsDir) { conf_file = ((chainPath(pth.to!string, conf_fn)).asNormalizedPath).array; conf_filename = conf_fn; } try { if (exists(conf_file)) { if (conf_file.getLinkAttributes.attrIsFile) { if (_opt_action.vox_gt1 || _opt_action.debug_do) { writeln("config file used: \"", conf_file, "\" (cli flag settings override config file's individual settings)"); } config_file_str = conf_file.readText; break; } } } catch (ErrnoException ex) { } catch (FileException ex) { } } } if (config_file_str.length > 0) { break; } } if (config_file_str.length > 0) { import dyaml; Node yaml_root; try { yaml_root = Loader.fromString(config_file_str).load(); } catch (Throwable) { import std.stdio; writeln("ERROR failed to read config file content, not parsed as yaml, program default used"); conf_filename = "VIRTUAL"; config_file_str = default_config_file_str; } } if (config_file_str.length == 0) { /+ use dummy default config file +/ // writeln("WARNING config file NOT found, default provided"); conf_filename = "VIRTUAL"; config_file_str = default_config_file_str; } struct _ConfContent { string filename() { return conf_filename; } string filetype() { string _ft = ""; if (content.match(rgx.yaml_config)) { _ft = "yaml"; } return _ft; } string content() { return config_file_str; } } return _ConfContent(); } } #+END_SRC **** document make/config #+NAME: meta_config_file_hub_read_document_config #+HEADER: :noweb yes #+BEGIN_SRC d static template readConfigDoc() { import std.file, std.path; import doc_reform.meta, doc_reform.io_in.paths_source, doc_reform.meta.rgx_files, doc_reform.meta.rgx; @system final auto readConfigDoc(M,E)(M _manifested, E _env) { mixin spineRgxIn; static auto rgx = RgxI(); mixin spineRgxFiles; static auto rgx_files = RgxFiles(); string config_file_str; string conf_filename = "NONE"; auto _conf_file_details = configFilePaths!()(_manifested, _env); string[] possible_config_path_locations = _conf_file_details.possible_config_path_locations.dr_document_make; foreach(conf_fn; [_conf_file_details.config_filename_document]) { foreach(pth; possible_config_path_locations) { char[] conf_file = ((chainPath(pth.to!string, conf_fn)).asNormalizedPath).array; conf_filename = conf_fn; if (config_file_str.length > 0) { break; } try { if (exists(conf_file)) { if (conf_file.getLinkAttributes.attrIsFile) { config_file_str = conf_file.readText; break; } } } catch (ErrnoException ex) { } catch (FileException ex) { } } if (config_file_str.length > 0) { break; } } struct _ConfContent { @safe string filename() { return conf_filename; } @safe string content() { return config_file_str; } @safe string filetype() { string _ft = ""; if (content.match(rgx.yaml_config)) { _ft = "yaml"; } return _ft; } } return _ConfContent(); } } #+END_SRC *** YAML config (config local site & dr document make) :file:config:hub: #+NAME: meta_config_file_hub_read_site_yaml #+HEADER: :noweb yes #+BEGIN_SRC d static template configReadSiteYAML() { import std.file, std.path; import doc_reform.meta, doc_reform.io_in.paths_source, doc_reform.meta.rgx_files, doc_reform.meta.rgx; @safe final YAMLDocument configReadSiteYAML(M,E)(M _manifested, E _env) { string _configuration = configReadInSiteYAML!()(_manifested, _env); auto _conf_file_details = configFilePaths!()(_manifested, _env); string _conf_yaml_fn = _conf_file_details.config_filename_site; YAMLDocument _yaml_conf = configYAML!()(_configuration, _conf_yaml_fn); return _yaml_conf; } } static template configReadDocYAML() { import std.file, std.path; import doc_reform.meta, doc_reform.io_in.paths_source; @safe final YAMLDocument configReadDocYAML(M,E)(M _manifested, E _env) { string _configuration = configReadInDocYAML!()(_manifested, _env); auto _conf_file_details = configFilePaths!()(_manifested, _env); string _conf_yaml_fn = _conf_file_details.config_filename_document; YAMLDocument _yaml_conf = configYAML!()(_configuration, _conf_yaml_fn); return _yaml_conf; } } #+END_SRC * B. get _markup source_, read file (.readText) [#A]:module:source_files: ** _module template_ (includes tuple) #+HEADER: :tangle "../src/doc_reform/io_in/read_source_files.d" #+HEADER: :noweb yes #+BEGIN_SRC d <<doc_header_including_copyright_and_license>> /++ module source_read_source_files;<BR> - open markup files<BR> - if master file scan for addional files to import/insert +/ module doc_reform.io_in.read_source_files; template spineRawMarkupContent() { import std.file, std.path; import doc_reform.meta, doc_reform.io_in.paths_source, doc_reform.meta.rgx_files, doc_reform.meta.rgx; mixin spineRgxIn; static auto rgx = RgxI(); mixin spineRgxFiles; static auto rgx_files = RgxFiles(); string[] _images=[]; @safe string[] _extract_images(S)(S content_block) { string[] images_; string _content_block = content_block.to!string; if (auto m = _content_block.matchAll(rgx.image)) { images_ ~= m.captures[1].to!string; } return images_; } auto rawsrc = RawMarkupContent(); alias ContentsInsertsImages = Tuple!( char[][], "contents", string[], "insert_files", string[], "images" ); alias HeaderContentInsertsImages = Tuple!( char[], "header", char[][], "src_txt", string[], "insert_files", string[], "images" ); @safe auto spineRawMarkupContent(O,Fn)(O _opt_action, Fn fn_src) { auto _0_header_1_body_content_2_insert_filelist_tuple = rawsrc.sourceContentSplitIntoHeaderAndBody(_opt_action, rawsrc.sourceContent(fn_src), fn_src); return _0_header_1_body_content_2_insert_filelist_tuple; } struct RawMarkupContent { @safe final sourceContent(in string fn_src) { auto raw = MarkupRawUnit(); string source_txt_str = raw.markupSourceReadIn(fn_src); return source_txt_str; } @safe final auto sourceContentSplitIntoHeaderAndBody(O)( O _opt_action, in string source_txt_str, in string fn_src="" ) { auto raw = MarkupRawUnit(); string[] insert_file_list; string[] images_list; HeaderContentInsertsImages t = raw.markupSourceHeaderContentRawLineTupleArray(source_txt_str); char[] header_raw = t.header; char[][] sourcefile_body_content = t.src_txt; if (fn_src.match(rgx_files.src_fn_master)) { // filename with path needed if master file (.ssm) not otherwise auto ins = Inserts(); ContentsInsertsImages tu = ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src); sourcefile_body_content = tu.contents; insert_file_list = tu.insert_files.dup; images_list = tu.images.dup; } else if (_opt_action.source || _opt_action.pod) { auto ins = Inserts(); ContentsInsertsImages tu = ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src); images_list = tu.images.dup; } string header_type = ""; t = tuple( header_raw, sourcefile_body_content, insert_file_list, images_list ); return t; } } struct MarkupRawUnit { import std.file; <<meta_markup_source_raw_read_file_source_string>> <<meta_markup_source_raw_doc_header_and_content_split>> <<meta_markup_source_raw_source_line_array>> <<meta_markup_source_raw_read_in_file>> <<meta_markup_source_raw_tuple_of_header_and_body>> <<meta_markup_source_raw_get_insert_source_line_array>> } struct Inserts { alias ContentsAndImages = Tuple!( char[][], "insert_contents", string[], "images" ); @safe ContentsAndImages scan_subdoc_source(O)( O _opt_action, char[][] markup_sourcefile_insert_content, string fn_src ) { <<meta_inserts_scan>> foreach (line; markup_sourcefile_insert_content) { <<meta_inserts_scan_loop>> } // end src subdoc (inserts) loop <<meta_inserts_scan_post>> } @safe ContentsInsertsImages scan_master_src_for_insert_files_and_import_content(O)( O _opt_action, char[][] sourcefile_body_content, string fn_src ) { import std.algorithm; <<meta_master_doc_scan_for_insert_filenames>> foreach (line; sourcefile_body_content) { <<meta_master_doc_scan_for_insert_filenames_loop>> } // end src doc loop <<meta_master_doc_scan_for_insert_filenames_post>> } } } #+END_SRC ** get markup source, read file :source:markup: *** read file, source string [#A] :string: #+NAME: meta_markup_source_raw_read_file_source_string #+BEGIN_SRC d @safe final private string readInMarkupSource(in char[] fn_src) { enforce( exists(fn_src) != 0, "file not found: «" ~ fn_src ~ "»" ); string source_txt_str; try { if (exists(fn_src)) { if (fn_src.getLinkAttributes.attrIsFile) { source_txt_str = fn_src.readText; } else { } } } catch (ErrnoException ex) { } catch (UTFException ex) { // Handle validation errors } catch (FileException ex) { // Handle errors } std.utf.validate(source_txt_str); return source_txt_str; } #+END_SRC *** document header & content, array.length == 2 [#A] :array: here you split document header and body, an array.length == 2 split is on first match of level A~ (which is required) #+NAME: meta_markup_source_raw_doc_header_and_content_split #+BEGIN_SRC d @trusted final private char[][] header0Content1(in string src_text) { // cast(char[]) /+ split string on _first_ match of "^:?A~\s" into [header, content] array/tuple +/ char[][] header_and_content; auto m = (cast(char[]) src_text).matchFirst(rgx.heading_a); header_and_content ~= m.pre; header_and_content ~= m.hit ~ m.post; assert(header_and_content.length == 2, "document markup is broken, header body split == " ~ header_and_content.length.to!string ~ "; (header / body array split should == 2 (split is on level A~))" ); return header_and_content; } #+END_SRC *** source line array :array: #+NAME: meta_markup_source_raw_source_line_array #+BEGIN_SRC d @trusted final private char[][] markupSourceLineArray(in char[] src_text) { // cast(char[]) char[][] source_line_arr = (cast(char[]) src_text).split(rgx.newline_eol_strip_preceding); return source_line_arr; } #+END_SRC *** source content raw line array :array: - used for regular .sst files; master .ssm files and; .ssi inserts - regex is passed for relevant enforce match **** read in file #+NAME: meta_markup_source_raw_read_in_file #+BEGIN_SRC d @safe string markupSourceReadIn(in string fn_src) { static auto rgx_files = RgxFiles(); enforce( fn_src.match(rgx_files.src_pth_sst_or_ssm), "not a dr markup filename: «" ~ fn_src ~ "»" ); string source_txt_str = readInMarkupSource(fn_src); return source_txt_str; } #+END_SRC **** tuple (a) header, (b) body content, (c) file insert list & (d) image list? - header - body content - file insert list - [image list?] #+NAME: meta_markup_source_raw_tuple_of_header_and_body #+BEGIN_SRC d @safe HeaderContentInsertsImages markupSourceHeaderContentRawLineTupleArray(in string source_txt_str) { string[] file_insert_list = []; string[] images_list = []; char[][] hc = header0Content1(source_txt_str); char[] header = hc[0]; char[] source_txt = hc[1]; char[][] source_line_arr = markupSourceLineArray(source_txt); HeaderContentInsertsImages t = tuple( header, source_line_arr, file_insert_list, images_list ); return t; } #+END_SRC **** get insert source line array #+NAME: meta_markup_source_raw_get_insert_source_line_array #+BEGIN_SRC d @safe final char[][] getInsertMarkupSourceContentRawLineArray( in char[] fn_src_insert, Regex!(char) rgx_file ) { enforce( fn_src_insert.match(rgx_file), "not a dr markup filename: «" ~ fn_src_insert ~ "»" ); string source_txt_str = readInMarkupSource(fn_src_insert); char[][] source_line_arr = markupSourceLineArray(source_txt_str); return source_line_arr; } #+END_SRC ** get markup source, master file & inserts :masterfile:inserts: - [[./doc-reform.org][doc-reform.org]] [[./][org/]] *** scan inserts (sub-document) source :scan_insert_src: **** scan subdoc source #+NAME: meta_inserts_scan #+BEGIN_SRC d char[][] contents_insert; int code_block_status = 0; enum codeBlock { off, curly, tic, } auto fn_pth_full = fn_src.match(rgx_files.src_pth_sst_or_ssm); auto markup_src_file_path = fn_pth_full.captures[1]; #+END_SRC **** loop insert (sub-document) #+NAME: meta_inserts_scan_loop #+BEGIN_SRC d if (code_block_status == codeBlock.curly) { if (line.matchFirst(rgx.block_curly_code_close)) { code_block_status = codeBlock.off; } contents_insert ~= line; } else if (line.matchFirst(rgx.block_curly_code_open)) { code_block_status = codeBlock.curly; contents_insert ~= line; } else if (code_block_status == codeBlock.tic) { if (line.matchFirst(rgx.block_tic_close)) { code_block_status = codeBlock.off; } contents_insert ~= line; } else if (line.matchFirst(rgx.block_tic_code_open)) { code_block_status = codeBlock.tic; contents_insert ~= line; } else if (auto m = line.match(rgx_files.insert_src_fn_ssi_or_sst)) { auto insert_fn = m.captures[2]; auto insert_sub_pth = m.captures[1]; auto fn_src_insert = chainPath(markup_src_file_path, insert_sub_pth ~ insert_fn).array; auto raw = MarkupRawUnit(); auto markup_sourcesubfile_insert_content = raw.getInsertMarkupSourceContentRawLineArray(fn_src_insert, rgx_files.src_fn_find_inserts); debug(insert_file) { writeln(line); writeln(fn_src_insert); writeln( " length contents insert array: ", markup_sourcesubfile_insert_content.length ); } if (_opt_action.source || _opt_action.pod) { _images ~= _extract_images(markup_sourcesubfile_insert_content); } auto ins = Inserts(); /+ - 1. load file - 2. read lines - 3. scan lines - a. if filename insert, and insert filename - repeat 1 - b. else - add line to new array; - build image list, search for any image files to add to image list +/ } else { contents_insert ~= line; // images to extract for image list? if (_opt_action.source || _opt_action.pod) { string[] _image_linelist = _extract_images(line); if (_image_linelist.length > 0) { _images ~= _image_linelist; } } } #+END_SRC **** post loop #+NAME: meta_inserts_scan_post #+BEGIN_SRC d ContentsAndImages t = tuple( contents_insert, _images ); return t; #+END_SRC *** scan document source :scan_src: **** scan doc source #+NAME: meta_master_doc_scan_for_insert_filenames #+BEGIN_SRC d char[][] contents; int code_block_status = 0; enum codeBlock { off, curly, tic, } auto fn_pth_full = fn_src.match(rgx_files.src_pth_sst_or_ssm); auto markup_src_file_path = fn_pth_full.captures[1]; char[][] contents_insert; string[] _images =[]; string[] insert_file_list =[]; #+END_SRC **** include inserts: _loop master_ scan for inserts (insert documents) #+NAME: meta_master_doc_scan_for_insert_filenames_loop #+BEGIN_SRC d if (code_block_status == codeBlock.curly) { if (line.matchFirst(rgx.block_curly_code_close)) { code_block_status = codeBlock.off; } contents ~= line; } else if (line.matchFirst(rgx.block_curly_code_open)) { code_block_status = codeBlock.curly; contents ~= line; } else if (code_block_status == codeBlock.tic) { if (line.matchFirst(rgx.block_tic_close)) { code_block_status = codeBlock.off; } contents ~= line; } else if (line.matchFirst(rgx.block_tic_code_open)) { code_block_status = codeBlock.tic; contents ~= line; } else if (auto m = line.match(rgx_files.insert_src_fn_ssi_or_sst)) { auto insert_fn = m.captures[2]; auto insert_sub_pth = m.captures[1]; auto fn_src_insert = chainPath(markup_src_file_path, insert_sub_pth ~ insert_fn).array; insert_file_list ~= fn_src_insert.to!string; auto raw = MarkupRawUnit(); /+ TODO +/ auto markup_sourcefile_insert_content = raw.getInsertMarkupSourceContentRawLineArray(fn_src_insert, rgx_files.src_fn_find_inserts); debug(insert_file) { writeln(line); writeln(fn_src_insert); writeln( " length contents insert array: ", markup_sourcefile_insert_content.length ); } auto ins = Inserts(); ContentsAndImages contents_insert_tu = ins.scan_subdoc_source( _opt_action, markup_sourcefile_insert_content, fn_src_insert.to!string ); contents ~= contents_insert_tu.insert_contents; if (_opt_action.source || _opt_action.pod) { string[] _image_linelist = _extract_images(contents_insert_tu.images); if (_image_linelist.length > 0) { _images ~= _image_linelist; } } /+ - 1. load file - 2. read lines - 3. scan lines - a. if filename insert, and insert filename - repeat 1 - b. else - add line to new array; - build image list, search for any image files to add to image list +/ } else { contents ~= line; if (_opt_action.source || _opt_action.pod) { string[] _image_linelist = _extract_images(line); if (_image_linelist.length > 0) { _images ~= _image_linelist; } } } #+END_SRC **** post loop #+NAME: meta_master_doc_scan_for_insert_filenames_post #+BEGIN_SRC d string[] images = []; foreach(i; uniq(_images.sort())) { images ~= i; } debug(insert_file) { writeln(__LINE__); writeln(contents.length); } ContentsInsertsImages t = tuple( contents, insert_file_list, images ); return t; #+END_SRC * document header including copyright & license #+NAME: doc_header_including_copyright_and_license #+BEGIN_SRC txt /+ - Name: Spine, Doc Reform [a part of] - Description: documents, structuring, processing, publishing, search - static content generator - Author: Ralph Amissah [ralph.amissah@gmail.com] - Copyright: (C) 2015 - 2023 Ralph Amissah, All Rights Reserved. - License: AGPL 3 or later: Spine (SiSU), a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU AFERO General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see [https://www.gnu.org/licenses/]. If you have Internet connection, the latest version of the AGPL should be available at these locations: [https://www.fsf.org/licensing/licenses/agpl.html] [https://www.gnu.org/licenses/agpl.html] - Spine (by Doc Reform, related to SiSU) uses standard: - docReform markup syntax - standard SiSU markup syntax with modified headers and minor modifications - docReform object numbering - standard SiSU object citation numbering & system - Homepages: [https://www.doc_reform.org] [https://www.sisudoc.org] - Git [https://git.sisudoc.org/projects/?p=software/spine.git;a=summary] +/ #+END_SRC * __END__