diff options
author | Ralph Amissah <ralph@amissah.com> | 2016-06-25 06:37:22 -0400 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2019-04-04 14:48:18 -0400 |
commit | f0c845eba3718fd0c732aebf25eb33e689798e03 (patch) | |
tree | c2ea36e199950647b1d3cce153691bb8f4fec378 /src/sdp/ao_header_extract_native.d | |
parent | step5.1 headers sdlang (like native headers) converted to json for common int... (diff) |
step6 headers (&config), native & sdlang to json internally
Diffstat (limited to 'src/sdp/ao_header_extract_native.d')
-rw-r--r-- | src/sdp/ao_header_extract_native.d | 320 |
1 files changed, 320 insertions, 0 deletions
diff --git a/src/sdp/ao_header_extract_native.d b/src/sdp/ao_header_extract_native.d new file mode 100644 index 0000000..49555ef --- /dev/null +++ b/src/sdp/ao_header_extract_native.d @@ -0,0 +1,320 @@ +/+ + extract native/orig header return json ++/ +template SiSUheaderExtractNative() { + private import + std.exception, + std.regex, + std.utf, + std.conv : to; + private import + ao_rgx; + struct HeaderDocMetadataAndMakeNativeToJson { + mixin SiSUheaderRegister; + mixin SiSUrgxInitFlags; + mixin RgxInit; + auto rgx = Rgx(); + enum State { off, on } + string hm, hs; + auto header_metadata_and_make_jsonstr( + string header, + JSONValue[string] dochead_meta, + JSONValue[string] dochead_make + ) + in { } + body { + scope(exit) { + destroy(header); + destroy(dochead_meta); + destroy(dochead_make); + } + if (auto t = match(header, rgx.native_header_main)) { + char[][] header_obj_spl = split( + cast(char[]) header, + rgx.line_delimiter_ws_strip + ); + auto hm = to!string(t.captures[1]); + if (match(hm, rgx.main_headers)) { + foreach (line; header_obj_spl) { + if (auto m = match(line, rgx.native_header_main)) { + if (!empty(m.captures[2])) { + if (hm == "creator") { + dochead_meta[hm]["author"].str = + to!string(m.captures[2]); + } else if (hm == "title") { + dochead_meta[hm]["main"].str = + to!string(m.captures[2]); + } else if (hm == "publisher") { + dochead_meta[hm]["name"].str = + to!string(m.captures[2]); + } + } + } else if (auto s = match(line, rgx.native_header_sub)) { + if (!empty(s.captures[2])) { + auto hs = to!string(s.captures[1]); + if ((hm == "make" ) + && (dochead_make[hm].type() == JSON_TYPE.OBJECT)) { + switch (hm) { + case "make": + if (match(hs, rgx.native_subhead_make)) { + if (dochead_make[hm][hs].type() == JSON_TYPE.STRING) { + dochead_make[hm][hs].str = to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + default: + break; + } + } else if (dochead_meta[hm].type() == JSON_TYPE.OBJECT) { + switch (hm) { + case "creator": + if (match(hs, rgx.native_subhead_creator)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "title": + if (match(hs, rgx.native_subhead_title)) { + if ((hs == "subtitle") + && (dochead_meta[hm]["sub"].type() == JSON_TYPE.STRING)) { + dochead_meta[hm]["sub"].str = + to!string(s.captures[2]); + } else if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "rights": + if (match(hs, rgx.native_subhead_rights)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "date": + if (match(hs, rgx.native_subhead_date)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "original": + if (match(hs, rgx.native_subhead_original)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "classify": + if (match(hs, rgx.native_subhead_classify)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "identifier": + if (match(hs, rgx.native_subhead_identifier)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "notes": + if (match(hs, rgx.native_subhead_notes)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "publisher": + if (match(hs, rgx.native_subhead_publisher)) { + if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + dochead_meta[hm][hs].str = + to!string(s.captures[2]); + } + } else { + writeln("not a valid header type:", hm, ":", hs); + destroy(hm); + destroy(hs); + } + break; + case "links": + destroy(hm); + destroy(hs); + // if (match(hs, rgx.native_subhead_links)) { + // if (dochead_meta[hm][hs].type() == JSON_TYPE.STRING) { + // dochead_meta[hm][hs].str = to!string(s.captures[2]); + // } + // } else { + // writeln("not a valid header type:", hm, ":", hs); + // destroy(hm); + // destroy(hs); + // } + break; + default: + break; + } + } + } + } + } + } else { + writeln("not a valid header type:", hm); + } + } + auto t = tuple(dochead_meta, dochead_make); + static assert(!isTypeTuple!(t)); + return t; + } + private auto native_header_extract( + char[] line, + ref int[string] line_occur, + ref string[string] an_object, + ref int[string] type + ) { + if (matchFirst(line, rgx.native_header_make)) { /+ matched header_make +/ + debug(header1) { /+ writeln(line); +/ } + type["header"] = State.on; + type["header_make"] = State.on; + type["header_meta"] = State.off; + ++line_occur["header_make"]; + an_object["obj"] ~= line ~= "\n"; + } else if (matchFirst(line, rgx.native_header)) { /+ matched header_metadata +/ + /+ (generic header match and not previously caught by header_make) +/ + debug(header1) { /+ writeln(line); +/ } + type["header"] = State.on; + type["header_make"] = State.off; + type["header_meta"] = State.on; + ++line_occur["header_meta"]; + an_object["obj"] ~= line ~= "\n"; + } else if (type["header_make"] == State.on + && (line_occur["header_make"] > State.off)) { /+ header_make flag set +/ + if (matchFirst(line, rgx.native_header_sub)) { /+ sub-header +/ + debug(header1) { /+ writeln(line); +/ } + ++line_occur["header_make"]; + an_object["obj"] ~= line ~= "\n"; + } + } else if (type["header_meta"] == State.on + && (line_occur["header_meta"] > State.off)) { /+ header_metadata flag set +/ + if (matchFirst(line, rgx.native_header_sub)) { /+ sub-header +/ + debug(header1) { /+ writeln(line); +/ } + ++line_occur["header_meta"]; + an_object["obj"] ~= line ~= "\n"; + } + } + return an_object; + } + auto header_reset_states_common( + ref int[string] line_occur, + ref string[string] an_object, + ref int[string] type + ) { + // line_occur["header"] = State.off; + line_occur["header_make"] = State.off; + line_occur["header_meta"] = State.off; + type["header"] = State.off; + // type["header_make"] = State.off; + // type["header_meta"] = State.off; + an_object.remove("obj"); + an_object.remove("is"); + an_object.remove("attrib"); + } + private auto headerNativeToJSON(in char[] src_header) { + auto type = flags_type_init; + type = [ + "header" : State.off, + "header_make" : State.off, + "header_meta" : State.off, + ]; + string[string] an_object; + int[string] line_occur; + auto dochead_make = parseJSON(header_make_jsonstr).object; + auto dochead_meta = parseJSON(header_meta_jsonstr).object; + auto set_header = HeaderDocMetadataAndMakeNativeToJson(); + char[][] source_header_arr = + split(cast(char[]) src_header, rgx.line_delimiter); + foreach(header_line; source_header_arr) { + if (auto m = matchFirst(header_line, rgx.comment)) { + /+ matched comment +/ + debug(comment) { + // tell_l("blue", header_line); + } + header_reset_states_common(line_occur, an_object, type); + // type["header_make"] = State.off; + // type["header_meta"] = State.off; + } else if ((matchFirst(header_line, rgx.native_header)) + || (type["header_make"] == State.on + && (line_occur["header_make"] > State.off)) + || (type["header_meta"] == State.on + && (line_occur["header_meta"] > State.off))) { + if (header_line.length == 0) { + /+ header_make instructions (current line empty) +/ + auto dochead_metadata_and_make = + set_header.header_metadata_and_make_jsonstr(strip(an_object["obj"]), dochead_meta, dochead_make); + static assert(!isTypeTuple!(dochead_metadata_and_make)); + dochead_meta = dochead_metadata_and_make[0]; + dochead_make = dochead_metadata_and_make[1]; + header_reset_states_common(line_occur, an_object, type); + type["header_make"] = State.off; + type["header_meta"] = State.off; + writeln(dochead_metadata_and_make); + } else { + an_object = native_header_extract(header_line, line_occur, an_object, type); + } + } else { + // writeln(__LINE__); + } + } + auto t = tuple( + dochead_make, + dochead_meta, + ); + return t; + } + } +} |