diff options
author | Ralph Amissah <ralph.amissah@gmail.com> | 2019-06-20 13:45:58 -0400 |
---|---|---|
committer | Ralph Amissah <ralph.amissah@gmail.com> | 2019-10-17 19:07:20 -0400 |
commit | 2dd7d2083c23fe9e79a984c534c2283fc4d7e581 (patch) | |
tree | 22deaa43f6888ea6c324a27b5fd4ec9b3546fb63 /org | |
parent | harvest html output (diff) |
harvest reorganized
Diffstat (limited to 'org')
-rw-r--r-- | org/default_misc.org | 35 | ||||
-rw-r--r-- | org/default_paths.org | 12 | ||||
-rw-r--r-- | org/doc_reform.org | 1368 |
3 files changed, 748 insertions, 667 deletions
diff --git a/org/default_misc.org b/org/default_misc.org index b55e651..18bacfa 100644 --- a/org/default_misc.org +++ b/org/default_misc.org @@ -26,6 +26,7 @@ module doc_reform.meta.defaults; <<meta_defaults_template_init_flags>> <<meta_defaults_template_node>> +<<meta_defaults_template_harvest>> <<meta_defaults_template_biblio>> <<defaults_template_markup>> <<defaults_template_language_codes>> @@ -129,6 +130,40 @@ template DocReformNode() { } #+END_SRC +** template: harvest + +#+name: meta_defaults_template_harvest +#+BEGIN_SRC d +template DocReformHarvest() { + auto DocReformHarvest() { + struct _Harvest { + struct Harvest { + string title = ""; + string[] author_arr = []; + string author = ""; + string author_surname = ""; + string author_surname_fn = ""; + string language = ""; + string language_original = ""; + string uid = ""; + string date_published = ""; + string[] topic_register_arr = []; + string path_html_seg = ""; + string path_html_scroll = ""; + string path_epub = ""; + string url_html_seg = ""; + string url_html_scroll = ""; + string url_epub = ""; + } + Harvest harvest; + Harvest[] harvests; + Harvest[][string][string][string][string] subject_trees; + } + return _Harvest(); + } +} +#+END_SRC + ** template: bibliography :biblio: #+name: meta_defaults_template_biblio diff --git a/org/default_paths.org b/org/default_paths.org index 859415c..7bf838e 100644 --- a/org/default_paths.org +++ b/org/default_paths.org @@ -253,13 +253,6 @@ template PathMatters() { return _uid; } string docname_composite_unique_per_src_doc() { - /+ - z pod name if any + src filename + lng code - filename ~ "." ~ lng - * unique per src doc - used by - - sqlite discrete index (multilingual, each language of a document) - +/ string _fn; if (pod_name_with_path.baseName == filename_base) { _fn = filename_base ~ mkup.sep ~ filename_extension ~ mkup.sep ~ lng; @@ -1131,7 +1124,7 @@ template DocReformPathsEPUB() { Po output_pth_root, Lng lng, ) { - auto out_pth = DocReformOutPaths!()( output_pth_root, lng); + auto out_pth = DocReformOutPaths!()(output_pth_root, lng); string base_dir = "epub"; struct _PathsStruct { string base() { @@ -1241,9 +1234,6 @@ template DocReformPathsODT() { string base_pth() { // dir will contain odt document file (also debug file tree) return asNormalizedPath((out_pth.output_base).chainPath(base_dir)).array; } - // string base_filename() { - // return doc_matters.src.filename_base; - // } string odt_file() { return asNormalizedPath(base_pth.chainPath(doc_matters.src.filename_base ~ ".odt")).array; } diff --git a/org/doc_reform.org b/org/doc_reform.org index 3cd5d9c..4d39e53 100644 --- a/org/doc_reform.org +++ b/org/doc_reform.org @@ -117,610 +117,12 @@ void main(string[] args) { } } } - if (_opt_action.very_verbose - && harvests.length > 0 - ) { - auto min_repeat_number = 42; - string[] _document_topic_register; - string[] _topic_register; - string[] _sub_topic_register; - Harvest[][string][string][string][string] subject_trees; - string[] topics = []; - string _auth = ""; - foreach(k, doc_harvest; harvests) { - _topic_register = []; - foreach(topic; doc_harvest.topic_register_arr.sort) { - _sub_topic_register = []; - string _spaces; - string[] subject_tree = topic.split(mkup.sep); - switch (subject_tree.length) { - case 1: - subject_trees[subject_tree[0]]["_a"]["_a"]["_a"] ~= doc_harvest; - break; - case 2: - subject_trees[subject_tree[0]][subject_tree[1]]["_a"]["_a"] ~= doc_harvest; - break; - case 3: - subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]]["_a"] ~= doc_harvest; - break; - case 4: - subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]][subject_tree[3]] ~= doc_harvest; - break; - default: - break; - } - _topic_register ~= _sub_topic_register.join("\n"); - } - auto char_repeat_number = (doc_harvest.title.length - + doc_harvest.author.length + 16); - char_repeat_number = (char_repeat_number > min_repeat_number) - ? char_repeat_number - : min_repeat_number; - _document_topic_register ~= format( - "\"%s\", %s%s\n%s", - doc_harvest.title, - doc_harvest.author, - (doc_harvest.date_published.length > 0) ? " (" ~ doc_harvest.date_published ~ ")" : "", - _topic_register.sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable).release.join("\n"), - ); - } - topics ~= format(q"┃<!DOCTYPE html> -<html> -<head> -<meta charset="utf-8"> -<title>Metadata Harvest - Topics</title> -<meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> -<meta name="dc.title" content= "metadata harvest, Topics - information Structuring Universe, Structured information Serialised Units" /> -<meta name="dc.subject" content= "document structuring, ebook, publishing, PDF, LaTeX, XML, ODF, SQL, postgresql, sqlite, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, granular search, digital library" /> -<meta name="generator" content="doc_reform" /> -<link rel="generator" href="http://sisudoc.org" /> -<link href="../../_sisu/css/harvest.css" rel="stylesheet"> -<style TYPE="text/css"> -/* DocReform harvest css default stylesheet */ - body { - color: black; - background: #ffffff; - background-color: #ffffff; - } - a:link { - color: #003399; - text-decoration: none; - } - a:visited { - color: #003399; - text-decoration: none; - } - a:hover { - color: #000000; - background-color: #f9f9aa; - } - a:hover img { - background-color: #ffffff; - } - a:active { - color: #003399; - text-decoration: underline; - } - - .norm, .bold { - line-height: 150%%; - margin-left: 1em; - margin-right: 2em; - margin-top: 10px; - margin-bottom: 0px; - text-indent: 0mm; - } - p, h0, h1, h2, h3, h4, h5, h6, h7 { - display: block; - font-family: verdana, arial, georgia, tahoma, sans-serif, helvetica, times, roman; - font-size: 100%%; - font-weight: normal; - line-height: 150%%; - /* text-align: justify; */ - margin-left: 1em; - text-indent: 0mm; - margin-top: 2px; - margin-bottom: 2px; - margin-right: 6px; - text-align: left; - } - h1 { - font-size: 120%%; - font-weight: bold; - color: white; - background: #000088; - margin-left: 0em; - } - p.work { - font-size: 80%%; - margin-left: 5em; - margin-top: 0px; - margin-bottom: 0px; - margin-right: 6px; - text-align: left; - } - p.author { - font-size: 100%%; - margin-left: 2em; - margin-top: 0px; - margin-bottom: 0px; - margin-right: 6px; - text-align: left; - } - p.publication { - font-size: 80%%; - margin-left: 4em; - margin-top: 0px; - margin-bottom: 0px; - margin-right: 6px; - text-align: left; - } - p.letter { - font-weight: bold; - font-size: 60%%; - margin-left: 1em; - margin-top: 0px; - margin-bottom: 0px; - margin-right: 6px; - text-align: left; - color: white; - background: #880000; - } - p.lev0 { - font-size: 120%%; - margin-left: 1em; - color: white; - background: #000000; - } - - p.lev1 { - font-size: 110%%; - margin-left: 2em; - color: white; - background: #444444; - } - p.lev2 { - font-size: 100%%; - margin-left: 3em; - background: #888888; - } - p.lev3 { - font-size: 90%%; - margin-left: 4em; - background: #bbbbbb; - } - p.lev4 { - font-size: 80%%; - margin-left: 5em; - background: #eeeeee; - } - p.lev5 { - font-size: 80%%; - margin-left: 6em; - } -</style> -<link rel="shortcut icon" href="../_sisu/image/rb7.ico" /> -</head> -<body lang="en" xml:lang="en"> -<a name="top" id="top"></a> -<a name="up" id="up"></a> -<a name="start" id="start"></a> -<h1>Metadata Harvest - Topics (output organised by language & filetype)</h1> -<p>[<a href="../../index.html"> HOME </a>] also see <a href="authors.html">Metadata Harvest - Authors</a></p> -<p><a href="#A">A</a>, <a href="#B">B</a>, <a href="#C">C</a>, <a href="#D">D</a>, <a href="#E">E</a>, <a href="#F">F</a>, <a href="#G">G</a>, <a href="#H">H</a>, <a href="#I">I</a>, <a href="#J">J</a>, <a href="#K">K</a>, <a href="#L">L</a>, <a href="#M">M</a>, <a href="#N">N</a>, <a href="#O">O</a>, <a href="#P">P</a>, <a href="#Q">Q</a>, <a href="#R">R</a>, <a href="#S">S</a>, <a href="#T">T</a>, <a href="#U">U</a>, <a href="#V">V</a>, <a href="#W">W</a>, <a href="#X">X</a>, <a href="#Y">Y</a>, <a href="#Z">Z</a>, -<p></p> -<hr /> -<p class="tiny"><a href="../../en/manifest/topics.html">English</a> </p> -<hr /> -┃") ~ "\n"; - char _prev_k = "_".to!char; - int _kn; - foreach(k0; - subject_trees.keys - .sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable) - ) { - if (k0.toUpper.to!(char[])[0] != _prev_k) { - topics ~= format(q"┃<p class="letter"><a name="%s">%s</a></p><p class="book_index_lev1"><a name="a"></a></p>┃", - k0.toUpper.to!(char[])[0], - k0.toUpper.to!(char[])[0], - ); - _prev_k = k0.toUpper.to!(char[])[0]; - } - if (k0 != "_a") { - topics ~= format(q"┃<p class="lev0"><a name="%s">%s</a></p>┃", - k0, k0,) ~ "\n"; - writeln("", k0); - if ("_a" in subject_trees[k0]) { - foreach (t_a_; - subject_trees[k0]["_a"]["_a"]["_a"] - .multiSort!("toUpper(a.title) < toUpper(b.title)", "a.author < b.author", SwapStrategy.unstable) - ) { - _auth = []; - if (t_a_.author_arr.length < 2) { - _auth = format(q"┃ <a href="authors.html#%s">%s</a>┃", - t_a_.author_surname, - t_a_.author, - ); - } else { - foreach (a; t_a_.author_arr) { - _auth ~= format(q"┃ <a href="authors.html#%s">%s</a>,┃", - t_a_.author_surname, - a, - ); - } - } - topics ~= format(q"┃<p class="work"><a href="%s">"%s"</a> -%s┃", - "url", - t_a_.title, - _auth, - ) ~ "\n"; - writeln("- ", t_a_.title, " - ", t_a_.author); - } - } - foreach(k1; - subject_trees[k0].keys - .sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable) - ) { - if (k1 != "_a") { - topics ~= format(q"┃<p class="lev1"><a name="%s">%s</a></p>┃", - k1, k1,) ~ "\n"; - writeln(" ", k1); - if ("_a" in subject_trees[k0][k1]) { - foreach (t_a_; - subject_trees[k0][k1]["_a"]["_a"] - .multiSort!("toUpper(a.title) < toUpper(b.title)", "a.author < b.author", SwapStrategy.unstable) - ) { - _auth = []; - if (t_a_.author_arr.length < 2) { - _auth = format(q"┃ <a href="authors.html#%s">%s</a>┃", - t_a_.author_surname, - t_a_.author, - ); - } else { - foreach (a; t_a_.author_arr) { - _auth ~= format(q"┃ <a href="authors.html#%s">%s</a>,┃", - t_a_.author_surname, - a, - ); - } - } - topics ~= format(q"┃<p class="work"><a href="%s">%s</a> -%s┃", - "url", - t_a_.title, - _auth, - ) ~ "\n"; - writeln(" - ", t_a_.title, " - ", t_a_.author); - } - } - } - foreach(k2; - subject_trees[k0][k1].keys - .sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable) - ) { - if (k2 != "_a") { - topics ~= format(q"┃<p class="lev2"><a name="%s">%s</a></p>┃", - k2, k2,) ~ "\n"; - writeln(" ", k2); - if ("_a" in subject_trees[k0][k1][k2]) { - foreach (t_a_; - subject_trees[k0][k1][k2]["_a"] - .multiSort!("toUpper(a.title) < toUpper(b.title)", "a.author < b.author", SwapStrategy.unstable) - ) { - _auth = []; - if (t_a_.author_arr.length < 2) { - _auth = format(q"┃ <a href="authors.html#%s">%s</a>┃", - t_a_.author_surname, - t_a_.author, - ); - } else { - foreach (a; t_a_.author_arr) { - _auth ~= format(q"┃ <a href="authors.html#%s">%s</a>,┃", - t_a_.author_surname, - a, - ); - } - } - topics ~= format(q"┃<p class="work"><a href="%s">%s</a> -%s┃", - "url", - t_a_.title, - _auth, - ) ~ "\n"; - writeln(" - ", t_a_.title, " - ", t_a_.author); - } - } - } - foreach(k3; - subject_trees[k0][k1][k2].keys - .sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable) - ) { - if (k3 != "_a") { - topics ~= format(q"┃<p class="lev3"><a name="%s">%s</a></p>┃", - k3, k3,) ~ "\n"; - writeln(" ", k3); - { - foreach (t_a_; - subject_trees[k0][k1][k2][k3] - .multiSort!("toUpper(a.title) < toUpper(b.title)", "a.author < b.author", SwapStrategy.unstable) - ) { - _auth = []; - if (t_a_.author_arr.length < 2) { - _auth = format(q"┃<a href="authors.html#%s">%s</a>┃", - t_a_.author_surname, - t_a_.author, - ); - } else { - foreach (a; t_a_.author_arr) { - _auth ~= format(q"┃ <a href="authors.html#%s">%s</a>,┃", - t_a_.author_surname, - a, - ); - } - } - topics ~= format(q"┃ <p class="work"><a href="%s">%s</a> -%s┃", - "url", - t_a_.title, - _auth, - ) ~ "\n"; - writeln(" - ", t_a_.title, " - ", t_a_.author); - } - } - } - } - } - } - } + if (hvst.harvests.length > 0) { + if (_opt_action.harvest_topics) { + DocReformMetaDocHarvestsTopics!()(hvst, _opt_action); } - topics ~= format(q"┃ -<hr /> -<a name="bottom" id="bottom"></a> -<a name="down" id="down"></a> -<a name="end" id="end"></a> -<a name="finish" id="finish"></a> -<a name="stop" id="stop"></a> -<a name="credits"></a> -</body> -</html> -┃") ~ "\n"; - try { - auto f = File("topics.html", "w"); - foreach (o; topics) { - f.writeln(o); - } - } catch (ErrnoException ex) { - // Handle error - } - } - if ((_opt_action.verbose - || _opt_action.very_verbose) - && harvests.length > 0 - ) { - string[] authors = []; - authors ~= format(q"┃ -<!DOCTYPE html> -<html> -<head> -<meta charset="utf-8"> -<title>Metadata Harvest - Authors</title> -<meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> -<meta name="dc.title" content= "metadata harvest, Authors - information Structuring Universe, Structured information Serialised Units" /> -<meta name="dc.subject" content= "document structuring, ebook, publishing, PDF, LaTeX, XML, ODF, SQL, postgresql, sqlite, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, granular search, digital library" /> -<meta name="generator" content="doc_reform" /> -<link rel="generator" href="http://sisudoc.org" /> -<link href="../../_sisu/css/harvest.css" rel="stylesheet" > -<style TYPE="text/css"> -/* DocReform harvest css default stylesheet */ - body { - color: black; - background: #ffffff; - background-color: #ffffff; - } - a:link { - color: #003399; - text-decoration: none; - } - a:visited { - color: #003399; - text-decoration: none; - } - a:hover { - color: #000000; - background-color: #f9f9aa; - } - a:hover img { - background-color: #ffffff; - } - a:active { - color: #003399; - text-decoration: underline; - } - - .norm, .bold { - line-height: 150%%; - margin-left: 1em; - margin-right: 2em; - margin-top: 10px; - margin-bottom: 0px; - text-indent: 0mm; - } - p, h0, h1, h2, h3, h4, h5, h6, h7 { - display: block; - font-family: verdana, arial, georgia, tahoma, sans-serif, helvetica, times, roman; - font-size: 100%%; - font-weight: normal; - line-height: 150%%; - /* text-align: justify; */ - margin-left: 1em; - text-indent: 0mm; - margin-top: 2px; - margin-bottom: 2px; - margin-right: 6px; - text-align: left; - } - h1 { - font-size: 120%%; - font-weight: bold; - color: white; - background: #000088; - margin-left: 0em; - } - p.work { - font-size: 80%%; - margin-left: 5em; - margin-top: 0px; - margin-bottom: 0px; - margin-right: 6px; - text-align: left; - } - p.author { - font-size: 100%%; - margin-left: 2em; - margin-top: 0px; - margin-bottom: 0px; - margin-right: 6px; - text-align: left; - } - p.publication { - font-size: 80%%; - margin-left: 4em; - margin-top: 0px; - margin-bottom: 0px; - margin-right: 6px; - text-align: left; - } - p.letter { - font-weight: bold; - font-size: 60%%; - margin-left: 1em; - margin-top: 0px; - margin-bottom: 0px; - margin-right: 6px; - text-align: left; - color: white; - background: #880000; - } - p.lev0 { - font-size: 120%%; - margin-left: 1em; - color: white; - background: #000000; - } - - p.lev1 { - font-size: 110%%; - margin-left: 2em; - color: white; - background: #444444; - } - p.lev2 { - font-size: 100%%; - margin-left: 3em; - background: #888888; - } - p.lev3 { - font-size: 90%%; - margin-left: 4em; - background: #bbbbbb; - } - p.lev4 { - font-size: 80%%; - margin-left: 5em; - background: #eeeeee; - } - p.lev5 { - font-size: 80%%; - margin-left: 6em; - } -</style> -<link rel="shortcut icon" href="../_sisu/image/rb7.ico" /> -</head> -<body lang="en" xml:lang="en"> -<a name="top" id="top"></a> -<a name="up" id="up"></a> -<a name="start" id="start"></a> -<h1>Metadata Harvest - Authors (output organised by language & filetype)</h1> -<p>[<a href="../../index.html"> HOME </a>] also see <a href="topics.html">Metadata Harvest - Topics</a></p> -<p></p> -<hr /> -<p class="tiny"><a href="../../en/manifest/authors.html">English</a> </p> -<hr /> -<p><a href="#A">A</a>, <a href="#B">B</a>, <a href="#C">C</a>, <a href="#D">D</a>, <a href="#E">E</a>, <a href="#F">F</a>, <a href="#G">G</a>, <a href="#H">H</a>, <a href="#I">I</a>, <a href="#J">J</a>, <a href="#K">K</a>, <a href="#L">L</a>, <a href="#M">M</a>, <a href="#N">N</a>, <a href="#O">O</a>, <a href="#P">P</a>, <a href="#Q">Q</a>, <a href="#R">R</a>, <a href="#S">S</a>, <a href="#T">T</a>, <a href="#U">U</a>, <a href="#V">V</a>, <a href="#W">W</a>, <a href="#X">X</a>, <a href="#Y">Y</a>, <a href="#Z">Z</a>, -┃") ~ "\n"; - string[string] _au; - string[] _auth_date_title; - string[] _author_date_title; - string _prev_auth = ""; - char _prev_k = "_".to!char; - foreach(doc_harvest; - harvests - .multiSort!( - "toUpper(a.author_surname_fn) < toUpper(b.author_surname_fn)", - "a.date_published < b.date_published", - "a.title < b.title", - SwapStrategy.unstable - ) - ) { - if (doc_harvest.author_surname_fn != _prev_auth) { - _au[doc_harvest.author_surname_fn] - = format(q"┃<p class="author"><a name="%s">%s</a></p> <p class="publication">%s "<a href="%s">%s</a>" [%s]</p>┃", - doc_harvest.author_surname, - doc_harvest.author_surname_fn, - (doc_harvest.date_published.length > 0) - ? doc_harvest.date_published : "", - "url", - doc_harvest.title, - doc_harvest.language, - ); - _prev_auth = doc_harvest.author_surname_fn; - } else { - _au[doc_harvest.author_surname_fn] - ~= format(q"┃<p class="publication">%s "<a href="%s">%s</a>" [%s]</p>┃", - (doc_harvest.date_published.length > 0) - ? doc_harvest.date_published : "", - "url", - doc_harvest.title, - doc_harvest.language, - ); - } - _author_date_title ~= format(q"┃%s %s "%s" [%s]┃", - doc_harvest.author_surname_fn, - (doc_harvest.date_published.length > 0) - ? "(" ~ doc_harvest.date_published ~ ")" : "", - doc_harvest.title, - doc_harvest.language, - ); - // writeln(doc_harvest.author_date_title); - } - foreach (k; _au.keys.sort) { - if (k.toUpper.to!(char[])[0] != _prev_k) { - authors ~= format(q"┃<p class="letter"><a name="%s">%s</a></p><p class="book_index_lev1"><a name="a"></a></p>┃", - k.toUpper.to!(char[])[0], - k.toUpper.to!(char[])[0], - ); - _prev_k = k.toUpper.to!(char[])[0]; - } - authors ~= _au[k]; - } - authors ~= format(q"┃ -<hr /> -<a name="bottom" id="bottom"></a> -<a name="down" id="down"></a> -<a name="end" id="end"></a> -<a name="finish" id="finish"></a> -<a name="stop" id="stop"></a> -<a name="credits"></a> -</body> -</html> -┃") ~ "\n"; - try { - auto f = File("authors.html", "w"); - foreach (o; authors) { - f.writeln(o); - } - } catch (ErrnoException ex) { - // Handle error - } - foreach(_adt; _author_date_title.sort) { - writeln(_adt); + if (_opt_action.harvest_authors) { + DocReformMetaDocHarvestsAuthors!()(hvst.harvests, _opt_action); } } } @@ -798,6 +200,8 @@ import std.process; import doc_reform.meta, + doc_reform.meta.metadoc_harvests_authors, + doc_reform.meta.metadoc_harvests_topics, doc_reform.meta.metadoc_summary, doc_reform.meta.metadoc_harvest, doc_reform.meta.metadoc_from_src, @@ -875,26 +279,7 @@ mixin outputHub; #+NAME: doc_reform_init #+BEGIN_SRC d -struct Harvest { - string title = ""; - string[] author_arr = []; - string author = ""; - string author_surname = ""; - string author_surname_fn = ""; - string language = ""; - string language_original = ""; - string uid = ""; - string date_published = ""; - string[] topic_register_arr = []; - string path_html_seg = ""; - string path_html_scroll = ""; - string path_epub = ""; - string url_html_seg = ""; - string url_html_scroll = ""; - string url_epub = ""; -} -Harvest harvested; -Harvest[] harvests; +auto hvst = DocReformHarvest!(); #+END_SRC **** args :args: @@ -1094,15 +479,24 @@ struct OptActions { || opts["harvest-authors"] || opts["harvest-topics"] ) - ? true - : false; + ? true : false; return _is; } bool harvest_authors() { - return opts["harvest-authors"]; + bool _is = ( + opts["harvest"] + || opts["harvest-authors"] + ) + ? true : false; + return _is; } bool harvest_topics() { - return opts["harvest-topics"]; + bool _is = ( + opts["harvest"] + || opts["harvest-topics"] + ) + ? true : false; + return _is; } bool html() { bool _is; @@ -1253,6 +647,7 @@ struct OptActions { _is = false; } else if (opts["abstraction"] || concordance + || harvest || html || epub || odt @@ -1306,6 +701,7 @@ struct OptActions { || concordance || source || pod + || harvest || html || epub || odt @@ -1555,19 +951,7 @@ if (doc_matters.opt.action.verbose) { #+NAME: doc_reform_each_file_do_debugs_checkdoc #+BEGIN_SRC d if (doc_matters.opt.action.harvest) { - if (doc_matters.opt.action.harvest_authors) { - } - if (doc_matters.opt.action.harvest_topics) { - } - Harvest[] DocReformMetaDocHarvests()( - Harvest harvested, - Harvest[] harvests, - ) { - harvests ~= harvested; - return harvests; - } - harvested = DocReformMetaDocHarvest!()(doc_matters, harvested); - harvests = DocReformMetaDocHarvests!()(harvested, harvests); + hvst.harvests ~= DocReformMetaDocHarvest!()(doc_matters, hvst); } #+END_SRC @@ -1848,7 +1232,7 @@ struct DocumentMatters { #+NAME: doc_reform_each_file_do_document_matters #+BEGIN_SRC d - auto conf_make_meta() { // TODO meld with all make instructions + auto conf_make_meta() { return _make_and_meta_struct; } auto has() { @@ -1947,6 +1331,72 @@ if ((_opt_action.debug_do) } #+END_SRC +**** H. abridged doc matters, for harvest (from doc head only, doc abstraction not performed) +- harvest abridged doc matters gathered +***** doc matters shared + +#+NAME: doc_reform_each_file_do_document_matters_abridged +#+BEGIN_SRC d +if ((_opt_action.debug_do) +|| (_opt_action.verbose) +) { + writeln("step4 commence → (doc_matters)"); +} +struct DocumentMattersShared { + auto env() { + struct Env_ { + auto pwd() { + return _manifest.env.pwd; + } + auto home() { + return _manifest.env.home; + } + } + return Env_(); + } + auto opt() { + struct Opt_ { + auto action() { + return _opt_action; + } + } + return Opt_(); + } +} +DocumentMattersShared doc_matters_shared = DocumentMattersShared(); +#+END_SRC + +***** abridged doc matters, for harvest (from doc head only, doc abstraction not performed) TODO + +#+NAME: doc_reform_each_file_do_document_matters_abridged +#+BEGIN_SRC d +struct DocumentMattersAbridged { + auto conf_make_meta() { + return _make_and_meta_struct; + } + auto src() { + return _manifest.src; + } + auto src_path_info() { + return DocReformPathsSRC!()(_manifest.env.pwd, _manifest.src.file_with_absolute_path); + } + auto pod() { + return _manifest.pod; + } + auto sqlite() { + struct SQLite_ { + string filename() { + return _opt_action.sqlite_filename; + } + } + return SQLite_(); + } + auto output_path() { + return _manifest.output.path; + } +} +#+END_SRC + * 3. document abstraction _summary_ :module:doc_reform:metadoc_summary: ** 0. module template metadoc summary - document summary from abstraction @@ -2100,7 +1550,7 @@ module doc_reform.meta.metadoc_harvest; template DocReformMetaDocHarvest() { auto DocReformMetaDocHarvest(T,H)( T doc_matters, - H harvest, + H hvst, ) { <<metadoc_harvest_imports>> mixin InternalMarkup; @@ -2164,20 +1614,626 @@ writefln( #+name: meta_metadoc_harvest #+BEGIN_SRC d import doc_reform.output.paths_output; -auto pth_html = DocReformPathsHTML!()(doc_matters.output_path, doc_matters.src.language); -harvest.title = doc_matters.conf_make_meta.meta.title_full; -harvest.author = doc_matters.conf_make_meta.meta.creator_author; -harvest.author_surname = doc_matters.conf_make_meta.meta.creator_author_surname; -harvest.author_surname_fn = doc_matters.conf_make_meta.meta.creator_author_surname_fn; -harvest.author_arr = doc_matters.conf_make_meta.meta.creator_author_arr; -harvest.language_original = doc_matters.conf_make_meta.meta.original_language; -harvest.language = doc_matters.src.language; -harvest.uid = doc_matters.src.doc_uid; -harvest.date_published = doc_matters.conf_make_meta.meta.date_published; -harvest.topic_register_arr = doc_matters.conf_make_meta.meta.classify_topic_register_arr; -harvest.path_html_scroll = pth_html.fn_scroll(doc_matters.src.filename); -harvest.path_html_seg = pth_html.fn_seg(doc_matters.src.filename, "toc"); -return harvest; +auto pth_html = DocReformPathsHTML!()(doc_matters.output_path, doc_matters.src.language); +hvst.harvest.title = doc_matters.conf_make_meta.meta.title_full; +hvst.harvest.author = doc_matters.conf_make_meta.meta.creator_author; +hvst.harvest.author_surname = doc_matters.conf_make_meta.meta.creator_author_surname; +hvst.harvest.author_surname_fn = doc_matters.conf_make_meta.meta.creator_author_surname_fn; +hvst.harvest.author_arr = doc_matters.conf_make_meta.meta.creator_author_arr; +hvst.harvest.language_original = doc_matters.conf_make_meta.meta.original_language; +hvst.harvest.language = doc_matters.src.language; +hvst.harvest.uid = doc_matters.src.doc_uid; +hvst.harvest.date_published = doc_matters.conf_make_meta.meta.date_published; +hvst.harvest.topic_register_arr = doc_matters.conf_make_meta.meta.classify_topic_register_arr; +hvst.harvest.path_html_scroll = pth_html.fn_scroll(doc_matters.src.filename); +hvst.harvest.path_html_seg = pth_html.fn_seg(doc_matters.src.filename, "toc"); +return hvst.harvest; +#+END_SRC + +** 0. module template metadoc harvest topics +*** 0. module template metadoc harvest topics template + +#+BEGIN_SRC d :tangle "../src/doc_reform/meta/metadoc_harvests_topics.d" +module doc_reform.meta.metadoc_harvests_topics; + import + std.algorithm, + std.array, + std.exception, + std.regex, + std.stdio, + std.string, + std.conv : to; + import + doc_reform.meta.defaults, + doc_reform.meta.rgx; + mixin DocReformHarvest; + mixin InternalMarkup; + mixin DocReformRgxInit; +template DocReformMetaDocHarvestsTopics() { + auto mkup = InlineMarkup(); + void DocReformMetaDocHarvestsTopics(H,O)( + H hvst, + O _opt_action, + ) { + <<harvested_topics>> +<<harvested_topics_html_head_1>> +<<harvested_html_head>> +<<harvested_topics_html_head_2>> + <<harvested_topics_html>> + topics +<<harvested_html_bottom>> + <<harvested_topics_html_write>> + } +} +#+END_SRC + +*** order topic register + +#+NAME: harvested_topics +#+BEGIN_SRC d +auto min_repeat_number = 42; +string[] _document_topic_register; +string[] _topic_register; +string[] _sub_topic_register; +string[] topics = []; +string _auth = ""; +foreach(k, doc_harvest; hvst.harvests) { + _topic_register = []; + foreach(topic; doc_harvest.topic_register_arr.sort) { + _sub_topic_register = []; + string _spaces; + string[] subject_tree = topic.split(mkup.sep); + switch (subject_tree.length) { + case 1: + hvst.subject_trees[subject_tree[0]]["_a"]["_a"]["_a"] ~= doc_harvest; + break; + case 2: + hvst.subject_trees[subject_tree[0]][subject_tree[1]]["_a"]["_a"] ~= doc_harvest; + break; + case 3: + hvst.subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]]["_a"] ~= doc_harvest; + break; + case 4: + hvst.subject_trees[subject_tree[0]][subject_tree[1]][subject_tree[2]][subject_tree[3]] ~= doc_harvest; + break; + default: + break; + } + _topic_register ~= _sub_topic_register.join("\n"); + } + auto char_repeat_number = (doc_harvest.title.length + + doc_harvest.author.length + 16); + char_repeat_number = (char_repeat_number > min_repeat_number) + ? char_repeat_number + : min_repeat_number; + _document_topic_register ~= format( + "\"%s\", %s%s\n%s", + doc_harvest.title, + doc_harvest.author, + (doc_harvest.date_published.length > 0) ? " (" ~ doc_harvest.date_published ~ ")" : "", + _topic_register.sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable).release.join("\n"), + ); +} +#+END_SRC + +*** harvested topics html head + +#+NAME: harvested_topics_html_head_1 +#+BEGIN_SRC d + topics ~= format(q"┃<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8"> +<title>Metadata Harvest - Topics</title> +#+END_SRC + +*** harvested topics html head + +#+NAME: harvested_topics_html_head_2 +#+BEGIN_SRC d +</head> +<body lang="en" xml:lang="en"> +<a name="top" id="top"></a> +<a name="up" id="up"></a> +<a name="start" id="start"></a> +<h1>Metadata Harvest - Topics (output organised by language & filetype)</h1> +<p>[<a href="../../index.html"> HOME </a>] also see <a href="authors.html">Metadata Harvest - Authors</a></p> +<p><a href="#A">A</a>, <a href="#B">B</a>, <a href="#C">C</a>, <a href="#D">D</a>, <a href="#E">E</a>, <a href="#F">F</a>, <a href="#G">G</a>, <a href="#H">H</a>, <a href="#I">I</a>, <a href="#J">J</a>, <a href="#K">K</a>, <a href="#L">L</a>, <a href="#M">M</a>, <a href="#N">N</a>, <a href="#O">O</a>, <a href="#P">P</a>, <a href="#Q">Q</a>, <a href="#R">R</a>, <a href="#S">S</a>, <a href="#T">T</a>, <a href="#U">U</a>, <a href="#V">V</a>, <a href="#W">W</a>, <a href="#X">X</a>, <a href="#Y">Y</a>, <a href="#Z">Z</a>, +<p></p> +<hr /> +<p class="tiny"><a href="../../en/manifest/topics.html">English</a> </p> +<hr /> +┃") ~ "\n"; +#+END_SRC + +*** harvested topics html + +#+NAME: harvested_topics_html +#+BEGIN_SRC d +char _prev_k = "_".to!char; +int _kn; +foreach(k0; + hvst.subject_trees.keys + .sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable) +) { + if (k0.toUpper.to!(char[])[0] != _prev_k) { + topics ~= format(q"┃<p class="letter"><a name="%s">%s</a></p><p class="book_index_lev1"><a name="a"></a></p>┃", + k0.toUpper.to!(char[])[0], + k0.toUpper.to!(char[])[0], + ); + _prev_k = k0.toUpper.to!(char[])[0]; + } + if (k0 != "_a") { + topics ~= format(q"┃<p class="lev0"><a name="%s">%s</a></p>┃", + k0, k0,) ~ "\n"; + if (_opt_action.very_verbose) { + writeln("", k0); + } + if ("_a" in hvst.subject_trees[k0]) { + foreach (t_a_; + hvst.subject_trees[k0]["_a"]["_a"]["_a"] + .multiSort!("toUpper(a.title) < toUpper(b.title)", "a.author < b.author", SwapStrategy.unstable) + ) { + _auth = []; + if (t_a_.author_arr.length < 2) { + _auth = format(q"┃ <a href="authors.html#%s">%s</a>┃", + t_a_.author_surname, + t_a_.author, + ); + } else { + foreach (a; t_a_.author_arr) { + _auth ~= format(q"┃ <a href="authors.html#%s">%s</a>,┃", + t_a_.author_surname, + a, + ); + } + } + topics ~= format(q"┃<p class="work"><a href="%s">"%s"</a> -%s┃", + "url", + t_a_.title, + _auth, + ) ~ "\n"; + if (_opt_action.very_verbose) { + writeln("- ", t_a_.title, " - ", t_a_.author); + } + } + } + foreach(k1; + hvst.subject_trees[k0].keys + .sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable) + ) { + if (k1 != "_a") { + topics ~= format(q"┃<p class="lev1"><a name="%s">%s</a></p>┃", + k1, k1,) ~ "\n"; + if (_opt_action.very_verbose) { + writeln(" ", k1); + } + if ("_a" in hvst.subject_trees[k0][k1]) { + foreach (t_a_; + hvst.subject_trees[k0][k1]["_a"]["_a"] + .multiSort!("toUpper(a.title) < toUpper(b.title)", "a.author < b.author", SwapStrategy.unstable) + ) { + _auth = []; + if (t_a_.author_arr.length < 2) { + _auth = format(q"┃ <a href="authors.html#%s">%s</a>┃", + t_a_.author_surname, + t_a_.author, + ); + } else { + foreach (a; t_a_.author_arr) { + _auth ~= format(q"┃ <a href="authors.html#%s">%s</a>,┃", + t_a_.author_surname, + a, + ); + } + } + topics ~= format(q"┃<p class="work"><a href="%s">%s</a> -%s┃", + "url", + t_a_.title, + _auth, + ) ~ "\n"; + if (_opt_action.very_verbose) { + writeln(" - ", t_a_.title, " - ", t_a_.author); + } + } + } + } + foreach(k2; + hvst.subject_trees[k0][k1].keys + .sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable) + ) { + if (k2 != "_a") { + topics ~= format(q"┃<p class="lev2"><a name="%s">%s</a></p>┃", + k2, k2,) ~ "\n"; + if (_opt_action.very_verbose) { + writeln(" ", k2); + } + if ("_a" in hvst.subject_trees[k0][k1][k2]) { + foreach (t_a_; + hvst.subject_trees[k0][k1][k2]["_a"] + .multiSort!("toUpper(a.title) < toUpper(b.title)", "a.author < b.author", SwapStrategy.unstable) + ) { + _auth = []; + if (t_a_.author_arr.length < 2) { + _auth = format(q"┃ <a href="authors.html#%s">%s</a>┃", + t_a_.author_surname, + t_a_.author, + ); + } else { + foreach (a; t_a_.author_arr) { + _auth ~= format(q"┃ <a href="authors.html#%s">%s</a>,┃", + t_a_.author_surname, + a, + ); + } + } + topics ~= format(q"┃<p class="work"><a href="%s">%s</a> -%s┃", + "url", + t_a_.title, + _auth, + ) ~ "\n"; + if (_opt_action.very_verbose) { + writeln(" - ", t_a_.title, " - ", t_a_.author); + } + } + } + } + foreach(k3; + hvst.subject_trees[k0][k1][k2].keys + .sort!("toUpper(a) < toUpper(b)", SwapStrategy.unstable) + ) { + if (k3 != "_a") { + topics ~= format(q"┃<p class="lev3"><a name="%s">%s</a></p>┃", + k3, k3,) ~ "\n"; + if (_opt_action.very_verbose) { + writeln(" ", k3); + } + { + foreach (t_a_; + hvst.subject_trees[k0][k1][k2][k3] + .multiSort!("toUpper(a.title) < toUpper(b.title)", "a.author < b.author", SwapStrategy.unstable) + ) { + _auth = []; + if (t_a_.author_arr.length < 2) { + _auth = format(q"┃<a href="authors.html#%s">%s</a>┃", + t_a_.author_surname, + t_a_.author, + ); + } else { + foreach (a; t_a_.author_arr) { + _auth ~= format(q"┃ <a href="authors.html#%s">%s</a>,┃", + t_a_.author_surname, + a, + ); + } + } + topics ~= format(q"┃ <p class="work"><a href="%s">%s</a> -%s┃", + "url", + t_a_.title, + _auth, + ) ~ "\n"; + if (_opt_action.very_verbose) { + writeln(" - ", t_a_.title, " - ", t_a_.author); + } + } + } + } + } + } + } + } +} +#+END_SRC + +*** harvested topics write + +#+NAME: harvested_topics_html_write +#+BEGIN_SRC d +try { + auto f = File("topics.html", "w"); + foreach (o; topics) { + f.writeln(o); + } +} catch (ErrnoException ex) { + // Handle error +} +#+END_SRC + +** 0. module template metadoc harvests authors +*** 0. module template metadoc harvest authors + +#+BEGIN_SRC d :tangle "../src/doc_reform/meta/metadoc_harvests_authors.d" +module doc_reform.meta.metadoc_harvests_authors; + import + std.algorithm, + std.array, + std.exception, + std.regex, + std.stdio, + std.string, + std.conv : to; + import + doc_reform.meta.defaults, + doc_reform.meta.rgx; + mixin DocReformHarvest; + mixin InternalMarkup; + mixin DocReformRgxInit; +template DocReformMetaDocHarvestsAuthors() { + auto mkup = InlineMarkup(); + void DocReformMetaDocHarvestsAuthors(H,O)( + H harvests, + O _opt_action, + ) { +<<harvested_authors_html_head_1>> +<<harvested_html_head>> +<<harvested_authors_html_head_2>> + authors +<<harvested_html_bottom>> + <<harvested_authors_html_write>> + } +} +#+END_SRC + +*** harvested authors html head + +#+NAME: harvested_authors_html_head_1 +#+BEGIN_SRC d + string[] authors = []; + authors ~= format(q"┃ +<!DOCTYPE html> +<html> +<head> +<meta charset="utf-8"> +<title>Metadata Harvest - Authors</title> +#+END_SRC + +*** harvested authors html head + +#+NAME: harvested_authors_html_head_2 +#+BEGIN_SRC d +</head> +<body lang="en" xml:lang="en"> +<a name="top" id="top"></a> +<a name="up" id="up"></a> +<a name="start" id="start"></a> +<h1>Metadata Harvest - Authors (output organised by language & filetype)</h1> +<p>[<a href="../../index.html"> HOME </a>] also see <a href="topics.html">Metadata Harvest - Topics</a></p> +<p></p> +<hr /> +<p class="tiny"><a href="../../en/manifest/authors.html">English</a> </p> +<hr /> +<p><a href="#A">A</a>, <a href="#B">B</a>, <a href="#C">C</a>, <a href="#D">D</a>, <a href="#E">E</a>, <a href="#F">F</a>, <a href="#G">G</a>, <a href="#H">H</a>, <a href="#I">I</a>, <a href="#J">J</a>, <a href="#K">K</a>, <a href="#L">L</a>, <a href="#M">M</a>, <a href="#N">N</a>, <a href="#O">O</a>, <a href="#P">P</a>, <a href="#Q">Q</a>, <a href="#R">R</a>, <a href="#S">S</a>, <a href="#T">T</a>, <a href="#U">U</a>, <a href="#V">V</a>, <a href="#W">W</a>, <a href="#X">X</a>, <a href="#Y">Y</a>, <a href="#Z">Z</a>, +┃") ~ "\n"; + string[string] _au; + string[] _auth_date_title; + string[] _author_date_title; + string _prev_auth = ""; + char _prev_k = "_".to!char; + foreach(doc_harvest; + harvests + .multiSort!( + "toUpper(a.author_surname_fn) < toUpper(b.author_surname_fn)", + "a.date_published < b.date_published", + "a.title < b.title", + SwapStrategy.unstable + ) + ) { + if (doc_harvest.author_surname_fn != _prev_auth) { + _au[doc_harvest.author_surname_fn] + = format(q"┃<p class="author"><a name="%s">%s</a></p> <p class="publication">%s "<a href="%s">%s</a>" [%s]</p>┃", + doc_harvest.author_surname, + doc_harvest.author_surname_fn, + (doc_harvest.date_published.length > 0) + ? doc_harvest.date_published : "", + "url", + doc_harvest.title, + doc_harvest.language, + ); + _prev_auth = doc_harvest.author_surname_fn; + } else { + _au[doc_harvest.author_surname_fn] + ~= format(q"┃<p class="publication">%s "<a href="%s">%s</a>" [%s]</p>┃", + (doc_harvest.date_published.length > 0) + ? doc_harvest.date_published : "", + "url", + doc_harvest.title, + doc_harvest.language, + ); + } + _author_date_title ~= format(q"┃%s %s "%s" [%s]┃", + doc_harvest.author_surname_fn, + (doc_harvest.date_published.length > 0) + ? "(" ~ doc_harvest.date_published ~ ")" : "", + doc_harvest.title, + doc_harvest.language, + ); + } + foreach (k; _au.keys.sort) { + if (k.toUpper.to!(char[])[0] != _prev_k) { + authors ~= format(q"┃<p class="letter"><a name="%s">%s</a></p><p class="book_index_lev1"><a name="a"></a></p>┃", + k.toUpper.to!(char[])[0], + k.toUpper.to!(char[])[0], + ); + _prev_k = k.toUpper.to!(char[])[0]; + } + authors ~= _au[k]; + } +#+END_SRC + +*** harvested authors write + +#+NAME: harvested_authors_html_write +#+BEGIN_SRC d +try { + auto f = File("authors.html", "w"); + foreach (o; authors) { + f.writeln(o); + } +} catch (ErrnoException ex) { + // Handle error +} +if (_opt_action.verbose + || _opt_action.very_verbose +) { + foreach(_adt; _author_date_title.sort) { + writeln(_adt); + } +} +#+END_SRC + +** harvested authors & topics shared html +*** harvested html head + +#+NAME: harvested_html_head +#+BEGIN_SRC d +<meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> +<meta name="dc.title" content= "metadata harvest, Authors & Topics - information Structuring Universe, Structured information Serialised Units" /> +<meta name="dc.subject" content= "document structuring, ebook, publishing, PDF, LaTeX, XML, ODF, SQL, postgresql, sqlite, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, granular search, digital library" /> +<meta name="generator" content="doc_reform" /> +<link rel="generator" href="http://sisudoc.org" /> +<link href="../../_sisu/css/harvest.css" rel="stylesheet"> +<style TYPE="text/css"> +/* DocReform harvest css default stylesheet */ + body { + color: black; + background: #ffffff; + background-color: #ffffff; + } + a:link { + color: #003399; + text-decoration: none; + } + a:visited { + color: #003399; + text-decoration: none; + } + a:hover { + color: #000000; + background-color: #f9f9aa; + } + a:hover img { + background-color: #ffffff; + } + a:active { + color: #003399; + text-decoration: underline; + } + + .norm, .bold { + line-height: 150%%; + margin-left: 1em; + margin-right: 2em; + margin-top: 10px; + margin-bottom: 0px; + text-indent: 0mm; + } + p, h0, h1, h2, h3, h4, h5, h6, h7 { + display: block; + font-family: verdana, arial, georgia, tahoma, sans-serif, helvetica, times, roman; + font-size: 100%%; + font-weight: normal; + line-height: 150%%; + /* text-align: justify; */ + margin-left: 1em; + text-indent: 0mm; + margin-top: 2px; + margin-bottom: 2px; + margin-right: 6px; + text-align: left; + } + h1 { + font-size: 120%%; + font-weight: bold; + color: white; + background: #000088; + margin-left: 0em; + } + p.work { + font-size: 80%%; + margin-left: 5em; + margin-top: 0px; + margin-bottom: 0px; + margin-right: 6px; + text-align: left; + } + p.author { + font-size: 100%%; + margin-left: 2em; + margin-top: 0px; + margin-bottom: 0px; + margin-right: 6px; + text-align: left; + } + p.publication { + font-size: 80%%; + margin-left: 4em; + margin-top: 0px; + margin-bottom: 0px; + margin-right: 6px; + text-align: left; + } + p.letter { + font-weight: bold; + font-size: 60%%; + margin-left: 1em; + margin-top: 0px; + margin-bottom: 0px; + margin-right: 6px; + text-align: left; + color: white; + background: #880000; + } + p.lev0 { + font-size: 120%%; + margin-left: 1em; + color: white; + background: #000000; + } + + p.lev1 { + font-size: 110%%; + margin-left: 2em; + color: white; + background: #444444; + } + p.lev2 { + font-size: 100%%; + margin-left: 3em; + background: #888888; + } + p.lev3 { + font-size: 90%%; + margin-left: 4em; + background: #bbbbbb; + } + p.lev4 { + font-size: 80%%; + margin-left: 5em; + background: #eeeeee; + } + p.lev5 { + font-size: 80%%; + margin-left: 6em; + } +</style> +<link rel="shortcut icon" href="../_sisu/image/rb7.ico" /> +#+END_SRC + +*** harvested html bottom + +#+NAME: harvested_html_bottom +#+BEGIN_SRC d + ~= format(q"┃ +<hr /> +<a name="bottom" id="bottom"></a> +<a name="down" id="down"></a> +<a name="end" id="end"></a> +<a name="finish" id="finish"></a> +<a name="stop" id="stop"></a> +<a name="credits"></a> +</body> +</html> +┃") ~ "\n"; #+END_SRC * __END__ |