diff options
Diffstat (limited to 'org/ao_header_extract.org')
-rw-r--r-- | org/ao_header_extract.org | 438 |
1 files changed, 438 insertions, 0 deletions
diff --git a/org/ao_header_extract.org b/org/ao_header_extract.org new file mode 100644 index 0000000..d075c7c --- /dev/null +++ b/org/ao_header_extract.org @@ -0,0 +1,438 @@ +#+TITLE: sdp header extract +#+AUTHOR: Ralph Amissah +#+EMAIL: ralph.amissah@gmail.com +#+STARTUP: indent +#+LANGUAGE: en +#+OPTIONS: H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t +#+OPTIONS: TeX:t LaTeX:t skip:nil d:nil todo:t pri:nil tags:not-in-toc +#+OPTIONS: author:nil email:nil creator:nil timestamp:nil +#+PROPERTY: header-args :padline no :exports code :noweb yes +#+EXPORT_SELECT_TAGS: export +#+EXPORT_EXCLUDE_TAGS: noexport +#+FILETAGS: :sdp:niu:ao: +#+TAGS: assert(a) class(c) debug(d) mixin(m) sdp(s) tangle(T) template(t) WEB(W) noexport(n) + +[[./sdp.org][sdp]] [[./][org/]] +* header + +// mixin SiSUheader; +// auto set_header = HeaderDocMetadataMakeJson(); // reintroduce + +** header document metadata in json :json: + +#+name: ao_markup_header_extract +#+BEGIN_SRC d +auto header_metadata_and_make_jsonstr( + string header, + JSONValue[string] dochead_meta, + JSONValue[string] dochead_make +) +in { } +body { + scope(exit) { + destroy(header); + destroy(dochead_meta); + destroy(dochead_make); + } + if (auto t = match(header, rgx.head_main)) { + char[][] obj_spl = split( + cast(char[]) header, + rgx.line_delimiter_ws_strip + ); + auto hm = to!string(t.captures[1]); + if (match(hm, rgx.main_headers)) { + foreach (line; obj_spl) { + if (auto m = match(line, rgx.head_main)) { + if (!empty(m.captures[2])) { + if (hm == "creator") { + dochead_meta[hm]["author"].str = + to!string(m.captures[2]); + } else if (hm == "title") { + dochead_meta[hm]["main"].str = + to!string(m.captures[2]); + } else if (hm == "publisher") { + dochead_meta[hm]["name"].str = + to!string(m.captures[2]); + } + } + } else if (auto s = match(line, rgx.head_sub)) { + if (!empty(s.captures[2])) { + auto hs = to!string(s.captures[1]); + if ((hm == "make" ) + && (dochead_make[hm].type() == JSON_TYPE.OBJECT)) { + switch (hm) { + case "make": + if (match(hs, rgx.subhead_make)) { + if (dochead_make[hm][hs].type() == JSON_TYPE.STRING) { + dochead_make[hm][hs].str = to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + default: + break; + } + } else if (dochead_meta[hm].type() == JSON_TYPE.OBJECT) { + switch (hm) { + case "creator": + if (match(hs, rgx.subhead_creator)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "title": + if (match(hs, rgx.subhead_title)) { + if ((hs == "subtitle") + && (dochead_meta[hm]["sub"].type() == JSON_TYPE.STRING)) { + dochead_meta[hm]["sub"].str = + to!string(s.captures[2]); + } else if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "rights": + if (match(hs, rgx.subhead_rights)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "date": + if (match(hs, rgx.subhead_date)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "original": + if (match(hs, rgx.subhead_original)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "classify": + if (match(hs, rgx.subhead_classify)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "identifier": + if (match(hs, rgx.subhead_identifier)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "notes": + if (match(hs, rgx.subhead_notes)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "publisher": + if (match(hs, rgx.subhead_publisher)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "links": + destroy(hm); + destroy(hs); + // if (match(hs, rgx.subhead_links)) { + // if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + // dochead_meta[hm][hs].str = to!string(s.captures[2]); + // } + // } else { + // writeln("not a valid header type:", hm, ":", hs); + // destroy(hm); + // destroy(hs); + // } + break; + default: + break; + } + } + } + } + } + } else { + writeln("not a valid header type:", hm); + } + } + auto t = tuple(dochead_meta, dochead_make); + static assert(!isTypeTuple!(t)); + return t; +} +#+END_SRC + +** header extract +#+name: ao_markup_header_extract +#+BEGIN_SRC d +private auto header_extract( + char[] line, + ref int[string] line_occur, + ref string[string] an_object, + ref int[string] type +) { + if (matchFirst(line, rgx.header_make)) { + /+ matched header_make +/ + debug(header1) { // header + // tell_l("yellow", line); + } + type["header"] = State.on; + type["header_make"] = State.on; + type["header_meta"] = State.off; + ++line_occur["header_make"]; + an_object["obj"] ~= line ~= "\n"; + } else if (matchFirst(line, rgx.header_meta)) { + /+ matched header_metadata +/ + debug(header1) { // header + // tell_l("yellow", line); + } + type["header"] = State.on; + type["header_make"] = State.off; + type["header_meta"] = State.on; + ++line_occur["header_meta"]; + an_object["obj"] ~= line ~= "\n"; + } else if (type["header_make"] == State.on + && (line_occur["header_make"] > State.off)) { + /+ header_make flag set +/ + if (matchFirst(line, rgx.header_sub)) { + /+ sub-header +/ + debug(header1) { + // tell_l("yellow", line); + } + // type["header"] = State.on; + ++line_occur["header_make"]; + an_object["obj"] ~= line ~= "\n"; + } + } else if (type["header_meta"] == State.on + && (line_occur["header_meta"] > State.off)) { + /+ header_metadata flag set +/ + if (matchFirst(line, rgx.header_sub)) { + /+ sub-header +/ + debug(header1) { + // tell_l("yellow", line); + } + ++line_occur["header_meta"]; + an_object["obj"] ~= line ~= "\n"; + } + } + // return 0; + return an_object; +} +#+END_SRC + +** header array :header: +#+name: ao_markup_header_extract +#+BEGIN_SRC d +auto header_set_common( + ref int[string] line_occur, + ref string[string] an_object, + ref int[string] type +) { + // line_occur["header"] = State.off; + line_occur["header_make"] = State.off; + line_occur["header_meta"] = State.off; + type["header"] = State.off; + // type["header_make"] = State.off; + // type["header_meta"] = State.off; + an_object.remove("obj"); + an_object.remove("is"); + an_object.remove("attrib"); +} +private auto headerContentJSON(in char[] src_header) { + auto type = flags_type_init; + type = [ + "header" : State.off, + "header_make" : State.off, + "header_meta" : State.off, + ]; + string[string] an_object; + int[string] line_occur; + auto dochead_make = parseJSON(header_make_jsonstr).object; + auto dochead_meta = parseJSON(header_meta_jsonstr).object; + auto set_header = HeaderDocMetadataMakeJson(); + char[][] source_header_arr = + split(cast(char[]) src_header, rgx.line_delimiter); + foreach(header_line; source_header_arr) { + if (auto m = matchFirst(header_line, rgx.comment)) { + /+ matched comment +/ + debug(comment) { + // tell_l("blue", header_line); + } + header_set_common(line_occur, an_object, type); + // type["header_make"] = State.off; + // type["header_meta"] = State.off; + } else if ((matchFirst(header_line, rgx.header)) + || (type["header_make"] == State.on + && (line_occur["header_make"] > State.off)) + || (type["header_meta"] == State.on + && (line_occur["header_meta"] > State.off))) { + if (header_line.length == 0) { + /+ header_make instructions (current line empty) +/ + auto dochead_metadata_and_make = + set_header.header_metadata_and_make_jsonstr(strip(an_object["obj"]), dochead_meta, dochead_make); + static assert(!isTypeTuple!(dochead_metadata_and_make)); + dochead_meta = dochead_metadata_and_make[0]; + dochead_make = dochead_metadata_and_make[1]; + header_set_common(line_occur, an_object, type); + type["header_make"] = State.off; + type["header_meta"] = State.off; + writeln(dochead_metadata_and_make); + } else { + an_object = header_extract(header_line, line_occur, an_object, type); + } + } else { + // writeln(__LINE__); + } + } + auto t = tuple( + dochead_make, + dochead_meta, + ); + return t; +} +#+END_SRC + +** +header document metadata+ :document:metadata: +*** +within abstraction loop+ + +**** +line exist: header make+ :header:make: +# #+name: abs_in_loop_body_not_block_obj +# #+BEGIN_SRC d +# } else if (line_occur["header_make"] > State.off) { +# /+ header_make +/ +# // should be caught by sub-header +# debug(header) { +# tell_l("red", line); +# } +# an_object["obj"] ~= line ~= "\n"; +# ++line_occur["header_make"]; +# #+END_SRC + +**** +line exist: header metadata+ :header:metadata: +# #+name: abs_in_loop_body_not_block_obj +# #+BEGIN_SRC d +# } else if (line_occur["header_meta"] > State.off) { +# /+ header_metadata +/ +# // should be caught by sub-header +# debug(header) { // para +# tell_l("red", line); +# } +# an_object["obj"] ~= line ~= "\n"; +# ++line_occur["header_meta"]; +# #+END_SRC + +**** +header_make instructions+ :header:make:instructions: + +# #+name: abs_in_loop_body_not_block_obj_line_empty +# #+BEGIN_SRC d +# if ((type["header_make"] == State.on) +# && (line_occur["header_make"] > State.off)) { +# /+ header_make instructions (current line empty) +/ +# auto dochead_metadata_and_make = +# set_header.header_metadata_and_make_jsonstr(strip(an_object["obj"]), dochead_meta, dochead_make); +# static assert(!isTypeTuple!(dochead_metadata_and_make)); +# dochead_meta = dochead_metadata_and_make[0]; +# dochead_make = dochead_metadata_and_make[1]; +# header_set_common(line_occur, an_object, type); +# processing.remove("verse"); +# #+END_SRC + +**** +header_metadata+ :header:metadata: + +# #+name: abs_in_loop_body_not_block_obj_line_empty +# #+BEGIN_SRC d +# } else if ((type["header_meta"] == State.on) +# && (line_occur["header_meta"] > State.off)) { +# /+ header_meta (current line empty) +/ +# auto dochead_metadata_and_make = +# set_header.header_metadata_and_make_jsonstr(strip(an_object["obj"]), dochead_meta, dochead_make); +# static assert(!isTypeTuple!(dochead_metadata_and_make)); +# dochead_meta = dochead_metadata_and_make[0]; +# dochead_make = dochead_metadata_and_make[1]; +# header_set_common(line_occur, an_object, type); +# type["header_make"] = State.off; +# type["header_meta"] = State.off; +# processing.remove("verse"); +# #+END_SRC + +* tangles (code structure) :tangle: +** ao_markup_header_extract.d: :ao_markup_header_extract.d: +#+BEGIN_SRC d :tangle ../src/sdp/ao_header_extract.d +/+ + extract header return json ++/ +template SiSUheaderExtract() { + private import + std.exception, + std.regex, + std.utf, + std.conv : to; + private import + ao_rgx; // ao_defaults.d + struct HeaderDocMetadataMakeJson { + mixin SiSUrgxInitFlags; + mixin RgxInit; + auto rgx = Rgx(); + enum State { off, on } + string hm, hs; + <<ao_markup_header_extract>> + } +} +#+END_SRC |