/+
- Name: SisuDoc Spine, Doc Reform [a part of]
  - Description: documents, structuring, processing, publishing, search
    - static content generator

  - Author: Ralph Amissah
    [ralph.amissah@gmail.com]

  - Copyright: (C) 2015 - 2025 Ralph Amissah, All Rights Reserved.

  - License: AGPL 3 or later:

    Spine (SiSU), a framework for document structuring, publishing and
    search

    Copyright (C) Ralph Amissah

    This program is free software: you can redistribute it and/or modify it
    under the terms of the GNU AFERO General Public License as published by the
    Free Software Foundation, either version 3 of the License, or (at your
    option) any later version.

    This program is distributed in the hope that it will be useful, but WITHOUT
    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
    more details.

    You should have received a copy of the GNU General Public License along with
    this program. If not, see [https://www.gnu.org/licenses/].

    If you have Internet connection, the latest version of the AGPL should be
    available at these locations:
    [https://www.fsf.org/licensing/licenses/agpl.html]
    [https://www.gnu.org/licenses/agpl.html]

  - Spine (by Doc Reform, related to SiSU) uses standard:
    - docReform markup syntax
      - standard SiSU markup syntax with modified headers and minor modifications
    - docReform object numbering
      - standard SiSU object citation numbering & system

  - Homepages:
    [https://www.sisudoc.org]
    [https://www.doc-reform.org]

  - Git
    [https://git.sisudoc.org/]

+/
/++
  module source_read_source_files;<BR>
  - open markup files<BR>
  - if master file scan for addional files to import/insert
+/
module sisudoc.io_in.read_source_files;
@safe:
template spineRawMarkupContent() {
  import
    std.digest.sha,
    std.file,
    std.path;
  import
    sisudoc.meta,
    sisudoc.io_in.paths_source,
    sisudoc.meta.rgx_files,
    sisudoc.meta.rgx;
  mixin spineRgxIn;
  static auto rgx = RgxI();
  mixin spineRgxFiles;
  static auto rgx_files = RgxFiles();
  string[] _images=[];
  string[] _extract_images(S)(S content_block) {
    string[] images_;
    string _content_block = content_block.to!string;
    if (auto m = _content_block.matchAll(rgx.image)) {
      images_ ~= m.captures[1].to!string;
    }
    return images_;
  }
  auto rawsrc = RawMarkupContent();
  struct ST_contents_inserts_images {
    char[][]   contents;
    string[]   insert_files;
    string[]   images;
  }
  struct ST_header_content_inserts_images {
    char[]     header;
    char[][]   src_txt;
    string[]   insert_files;
    string[]   images;
  }
  struct ST_doc_digest {
    ubyte[32]  markup_doc;
    ubyte[32]  header;
    ubyte[32]  text;
  }
  struct ST_doc_parts {
    char[]        header_raw;
    char[][]      sourcefile_body_content;
    string[]      insert_file_list;
    string[]      images_list;
    ST_doc_digest doc_digest;
  }
  ST_doc_parts spineRawMarkupContent(O,Fn)(O _opt_action, Fn fn_src) {
    ST_doc_parts _0_header_1_body_content_2_insert_filelist_struct
      = rawsrc.sourceContentSplitIntoHeaderAndBody(_opt_action, rawsrc.sourceContent(fn_src), fn_src);
    return _0_header_1_body_content_2_insert_filelist_struct;
  }
  struct RawMarkupContent {
    final sourceContent(in string fn_src) {
      auto raw = MarkupRawUnit();
      string source_txt_str
        = raw.markupSourceReadIn(fn_src);
      return source_txt_str;
    }
    final ST_doc_parts sourceContentSplitIntoHeaderAndBody(O)(
      O         _opt_action,
      in string source_txt_str,
      in string fn_src=""
    ) {
      auto raw = MarkupRawUnit();
      string[] insert_file_list_get;
      string[] images_list_get;
      ST_header_content_inserts_images st
        = raw.markupSourceHeaderContentRawLineStructArray(source_txt_str);
      char[] header_raw = st.header;
      char[][] sourcefile_body_content = st.src_txt;
      if (fn_src.match(rgx_files.src_fn_master)) { // filename with path needed if master file (.ssm) not otherwise
        auto ins = Inserts();
        ST_contents_inserts_images _cii
          = ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src);
        sourcefile_body_content = _cii.contents;
        insert_file_list_get = _cii.insert_files.dup;
        images_list_get = _cii.images.dup;
      } else if (_opt_action.source || _opt_action.pod) {
        auto ins = Inserts();
        ST_contents_inserts_images _cii
          = ins.scan_master_src_for_insert_files_and_import_content(_opt_action, sourcefile_body_content, fn_src);
        images_list_get = _cii.images.dup;
      } // image_list, if path could take sha256 digests already here?
      string header_type = "";
      ST_doc_digest dig;
      {
        dig.markup_doc = source_txt_str.sha256Of;
        dig.header = st.header.sha256Of;
        dig.text = sourcefile_body_content.sha256Of;
      }
      ST_doc_parts ret;
      {
        ret.header_raw = st.header;
        ret.sourcefile_body_content = sourcefile_body_content;
        ret.insert_file_list = insert_file_list_get;
        ret.images_list = images_list_get;
        ret.doc_digest = dig;
      }
      return ret;
    }
  }
  struct MarkupRawUnit {
    import
      std.digest.sha,
      std.file;
    final private string readInMarkupSource(in char[] fn_src) {
      enforce(
        exists(fn_src) != 0,
        "file not found: «" ~
        fn_src ~ "»"
      );
      string source_txt_str;
      try {
        if (exists(fn_src)) {
          if (fn_src.getLinkAttributes.attrIsFile) {
            source_txt_str = fn_src.readText;
          } else {
          }
        }
      } catch (ErrnoException ex) {
      } catch (UTFException ex) {
        // Handle validation errors
      } catch (FileException ex) {
        // Handle errors
      }
      std.utf.validate(source_txt_str);
      return source_txt_str;
    }
    @trusted final private char[][] header0Content1(in string src_text) { // cast(char[])
      /+ split string on _first_ match of "^:?A~\s" into [header, content] array/tuple +/
      char[][] header_and_content;
      auto m = (cast(char[]) src_text).matchFirst(rgx.heading_a);
      header_and_content ~= m.pre;
      header_and_content ~= m.hit ~ m.post;
      assert(header_and_content.length == 2,
        "document markup is broken, header body split == "
        ~ header_and_content.length.to!string
        ~ "; (header / body array split should == 2 (split is on level A~))"
      );
      return header_and_content;
    }
    @trusted final private char[][] markupSourceLineArray(in char[] src_text) { // cast(char[])
      char[][] source_line_arr
        = (cast(char[]) src_text).split(rgx.newline_eol_strip_preceding);
      return source_line_arr;
    }
    string markupSourceReadIn(in string fn_src) {
      static auto rgx_files = RgxFiles();
      enforce(
        fn_src.match(rgx_files.src_pth_sst_or_ssm),
        "not a dr markup filename: «" ~
        fn_src ~ "»"
      );
      string source_txt_str = readInMarkupSource(fn_src);
      return source_txt_str;
    }
    ST_header_content_inserts_images markupSourceHeaderContentRawLineStructArray(in string source_txt_str) {
      string[] file_insert_list = [];
      string[] images_list = [];
      char[][] hc = header0Content1(source_txt_str);
      char[] header = hc[0];
      char[] source_txt = hc[1];
      char[][] source_line_arr = markupSourceLineArray(source_txt);
      ST_header_content_inserts_images ret;
      {
        ret.header          = header;
        ret.src_txt         = source_line_arr;
        ret.insert_files    = file_insert_list;
        ret.images          = images_list;
      }
      return ret;
    }
    final char[][] getInsertMarkupSourceContentRawLineArray(
      in char[]    fn_src_insert,
      Regex!(char) rgx_file
    ) {
      enforce(
        fn_src_insert.match(rgx_file),
        "not a dr markup filename: «" ~
        fn_src_insert  ~ "»"
      );
      string source_txt_str = readInMarkupSource(fn_src_insert);
      char[][] source_line_arr = markupSourceLineArray(source_txt_str);
      return source_line_arr;
    }
  }
  struct Inserts {
    struct ST_contents_and_images {
      char[][]   insert_contents;
      string[]   images;
    }
    ST_contents_and_images scan_subdoc_source(O)(
      O        _opt_action,
      char[][] markup_sourcefile_insert_content,
      string   fn_src
    ) {
      char[][] contents_insert;
      int code_block_status     = 0;
      enum codeBlock { off, curly, tic, quotemarks }
      auto fn_pth_full = fn_src.match(rgx_files.src_pth_sst_or_ssm);
      auto markup_src_file_path = fn_pth_full.captures[1];
      foreach (line; markup_sourcefile_insert_content) {
        if (code_block_status == codeBlock.curly) {
          if (line.matchFirst(rgx.block_curly_code_close)) {
            code_block_status = codeBlock.off;
          }
          contents_insert ~= line;
        } else if (line.matchFirst(rgx.block_curly_code_open)) {
          code_block_status   = codeBlock.curly;
          contents_insert ~= line;
        } else if (code_block_status == codeBlock.quotemarks) {
          if (line.matchFirst(rgx.block_quotemarks_close)) {
            code_block_status = codeBlock.off;
          }
          contents_insert ~= line;
        } else if (code_block_status == codeBlock.tic) {
          if (line.matchFirst(rgx.block_tic_close)) {
            code_block_status = codeBlock.off;
          }
          contents_insert ~= line;
        } else if (line.matchFirst(rgx.block_quotemarks_code_open)) {
          code_block_status   = codeBlock.quotemarks;
          contents_insert ~= line;
        } else if (line.matchFirst(rgx.block_tic_code_open)) {
          code_block_status   = codeBlock.tic;
          contents_insert ~= line;
        } else if (auto m = line.match(rgx_files.insert_src_fn_ssi_or_sst)) {
          auto insert_fn = m.captures[2];
          auto insert_sub_pth = m.captures[1];
          auto fn_src_insert
            = chainPath(markup_src_file_path, insert_sub_pth ~ insert_fn).array;
          auto raw = MarkupRawUnit();
          auto markup_sourcesubfile_insert_content
            = raw.getInsertMarkupSourceContentRawLineArray(fn_src_insert, rgx_files.src_fn_find_inserts);
          debug(insert_file) {
            writeln(line);
            writeln(fn_src_insert);
            writeln(
              "  length contents insert array: ",
              markup_sourcesubfile_insert_content.length
            );
          }
          if (_opt_action.source || _opt_action.pod) {
            _images ~= _extract_images(markup_sourcesubfile_insert_content);
          }
          auto ins = Inserts();
          /+
            - 1. load file
            - 2. read lines
            - 3. scan lines
              - a. if filename insert, and insert filename
                 - repeat 1
              - b. else
                 - add line to new array;
                 - build image list, search for any image files to add to image list
          +/
        } else {
          contents_insert ~= line; // images to extract for image list?
          if (_opt_action.source || _opt_action.pod) {
            string[] _image_linelist = _extract_images(line);
            if (_image_linelist.length > 0) {
              _images ~= _image_linelist;
            }
          }
        }
      } // end src subdoc (inserts) loop
      ST_contents_and_images ret;
      {
        ret.insert_contents = contents_insert;
        ret.images          = _images;
      }
      return ret;
    }
    ST_contents_inserts_images scan_master_src_for_insert_files_and_import_content(O)(
      O        _opt_action,
      char[][] sourcefile_body_content,
      string   fn_src
    ) {
      import std.algorithm;
      char[][] contents;
      int code_block_status     = 0;
      enum codeBlock { off, curly, tic, quotemarks }
      auto fn_pth_full = fn_src.match(rgx_files.src_pth_sst_or_ssm);
      auto markup_src_file_path = fn_pth_full.captures[1];
      char[][] contents_insert;
      string[] _images          =[];
      string[] insert_file_list =[];
      foreach (line; sourcefile_body_content) {
        if (code_block_status == codeBlock.curly) {
          if (line.matchFirst(rgx.block_curly_code_close)) {
            code_block_status = codeBlock.off;
          }
          contents ~= line;
        } else if (line.matchFirst(rgx.block_curly_code_open)) {
          code_block_status = codeBlock.curly;
          contents ~= line;
        } else if (code_block_status == codeBlock.quotemarks) {
          if (line.matchFirst(rgx.block_quotemarks_close)) {
            code_block_status = codeBlock.off;
          }
          contents ~= line;
        } else if (code_block_status == codeBlock.tic) {
          if (line.matchFirst(rgx.block_tic_close)) {
            code_block_status = codeBlock.off;
          }
          contents ~= line;
        } else if (line.matchFirst(rgx.block_quotemarks_code_open)) {
          code_block_status = codeBlock.quotemarks;
          contents ~= line;
        } else if (line.matchFirst(rgx.block_tic_code_open)) {
          code_block_status = codeBlock.tic;
          contents ~= line;
        } else if (auto m = line.match(rgx_files.insert_src_fn_ssi_or_sst)) {
          auto insert_fn      = m.captures[2];
          auto insert_sub_pth = m.captures[1];
          auto fn_src_insert
            = chainPath(markup_src_file_path, insert_sub_pth ~ insert_fn).array;
            insert_file_list ~= fn_src_insert.to!string;
          auto raw = MarkupRawUnit();
          /+ TODO +/
          auto markup_sourcefile_insert_content
            = raw.getInsertMarkupSourceContentRawLineArray(fn_src_insert, rgx_files.src_fn_find_inserts);
          debug(insert_file) {
            writeln(line);
            writeln(fn_src_insert);
            writeln(
              "  length contents insert array: ",
              markup_sourcefile_insert_content.length
            );
          }
          auto ins = Inserts();
          ST_contents_and_images contents_insert_st = ins.scan_subdoc_source(
            _opt_action,
            markup_sourcefile_insert_content,
            fn_src_insert.to!string
          );
          contents ~= contents_insert_st.insert_contents;
          if (_opt_action.source || _opt_action.pod) {
            string[] _image_linelist = _extract_images(contents_insert_st.images);
            if (_image_linelist.length > 0) {
              _images ~= _image_linelist;
            }
          }
          /+
            - 1. load file
            - 2. read lines
            - 3. scan lines
              - a. if filename insert, and insert filename
                 - repeat 1
              - b. else
                 - add line to new array;
                 - build image list, search for any image files to add to image list
          +/
        } else {
          contents ~= line;
          if (_opt_action.source || _opt_action.pod) {
            string[] _image_linelist = _extract_images(line);
            if (_image_linelist.length > 0) {
              _images ~= _image_linelist;
            }
          }
        }
      } // end src doc loop
      string[] images = [];
      foreach(i; uniq(_images.sort())) {
        images ~= i;
      }
      debug(insert_file) {
        writeln(__LINE__);
        writeln(contents.length);
      }
      ST_contents_inserts_images ret;
      {
        ret.contents = contents;
        ret.insert_files = insert_file_list;
        ret.images = images;
      }
      return ret;
    }
  }
}