diff options
Diffstat (limited to 'org/meta_abstraction.org')
-rw-r--r-- | org/meta_abstraction.org | 244 |
1 files changed, 158 insertions, 86 deletions
diff --git a/org/meta_abstraction.org b/org/meta_abstraction.org index 43cd85d..f74c8d6 100644 --- a/org/meta_abstraction.org +++ b/org/meta_abstraction.org @@ -36,10 +36,11 @@ template SiSUdocAbstraction() { /+ ↓ abstraction struct init +/ <<abs_top_init_struct>> /+ ↓ abstract marked up document +/ - auto SiSUdocAbstraction(Src,CMM,Opt)( + auto SiSUdocAbstraction(Src,CMM,Opt,Mfst)( Src markup_sourcefile_content, CMM conf_make_meta, Opt opt_action, + Mfst manifest_matter, ) { static auto rgx = Rgx(); debug(asserts) { @@ -49,7 +50,7 @@ template SiSUdocAbstraction() { <<abs_init_rest>> /+ abstraction init ↑ +/ <<make_tests>> - /+ ↓ loop markup document/text line by line +/ + /+ ↓ ↻ loop markup document/text line by line +/ srcDocLoop: foreach (line; markup_sourcefile_content) { // "line" variable can be empty but should never be null @@ -84,6 +85,7 @@ template SiSUdocAbstraction() { } /+ ← closed: loop markup document/text line by line +/ /+ ↓ post loop markup document/text +/ <<abs_post>> + <<abs_return_tuple>> /+ post loop markup document/text ↑ +/ } /+ ← closed: abstract doc source +/ /+ ↓ abstraction functions +/ @@ -409,7 +411,7 @@ string[][string][string] bookindex_unordered_hashes; /+ node +/ ObjGenericComposite comp_obj_heading, comp_obj_location, comp_obj_block, comp_obj_code, comp_obj_poem_ocn, comp_obj_comment; auto node_construct = NodeStructureMetadata(); -enum sObj { content, anchor_tags, notes_reg, notes_star, links } +enum sObj { content, anchor_tags, notes_reg, notes_star, links, image_no_dimensions } #+END_SRC *** scope @@ -566,8 +568,8 @@ debug (substitutions) { } #+END_SRC -** 2. _loop: process document body_ [+6] :loop: -*** loop scope :scope: +** 2. ↻ *LOOP* _loop: process document body_ [+6] :loop: +*** Loop scope :scope: #+name: abs_in_loop_body_00 #+BEGIN_SRC d @@ -1297,6 +1299,7 @@ if ((obj_type_status["heading"] == State.on) comp_obj_para.inline_notes_reg = substantive_obj_misc_tuple[sObj.notes_reg]; comp_obj_para.inline_notes_star = substantive_obj_misc_tuple[sObj.notes_star]; comp_obj_para.inline_links = substantive_obj_misc_tuple[sObj.links]; + comp_obj_para.contains_image_without_dimensions = substantive_obj_misc_tuple[sObj.image_no_dimensions]; the_document_body_section ~= comp_obj_para; _common_reset_(line_occur, an_object, obj_type_status); indent=[ @@ -1782,11 +1785,11 @@ the_document_head_section ~= the_document_body_section[0]; the_document_body_section=the_document_body_section[1..$]; #+END_SRC -*** _post main-loop loops_ :post: -**** 1. _loop backmatter:_ loop up to lev4, extract html_segnames, set pointers +*** ↻ *LOOPs* _post main-loop loops_ :post: +**** 1. ↻ _Loop backmatter:_ loop up to lev4, extract html_segnames, set pointers -this extra loop is needed to determine pre and (in particular) next segment for -html, that is then used in a subsequent loop +this extra loop is used/needed to determine pre and (in particular) next segment +for html, that is then used in a subsequent loop NOTE there are issues attempting to do this on first pass as: - backmatter is created out of sequence and @@ -1795,10 +1798,9 @@ NOTE there are issues attempting to do this on first pass as: - it is quite neat to have all in one place as we have here - could optimise a bit by - - skipping second (and third) loop unless the html seg or epub output is - selected + - skipping this loop unless the html seg or epub output is selected -***** section: endnotes +***** ↻ Loop section: endnotes #+name: abs_post #+BEGIN_SRC d @@ -1815,7 +1817,7 @@ if (the_endnotes_section.length > 1) { } #+END_SRC -***** section: glossary +***** ↻ Loop section: glossary #+name: abs_post #+BEGIN_SRC d @@ -1832,7 +1834,7 @@ if (the_glossary_section.length > 1) { } #+END_SRC -***** section: bibliography +***** ↻ Loop section: bibliography #+name: abs_post #+BEGIN_SRC d @@ -1849,7 +1851,7 @@ if (the_bibliography_section.length > 1) { } #+END_SRC -***** section: book index +***** ↻ Loop section: book index #+name: abs_post #+BEGIN_SRC d @@ -1872,7 +1874,7 @@ if (the_bookindex_section["scroll"].length > 1) { } #+END_SRC -***** section: blurb +***** ↻ Loop section: blurb #+name: abs_post #+BEGIN_SRC d @@ -1889,7 +1891,7 @@ if (the_blurb_section.length > 1) { } #+END_SRC -**** 2. _loop all objects:_ encode _structural relationships_ (sections, segments, objects) +**** 2. ↻ _Loop all objects:_ encode _structural relationships_ (sections, segments, objects) needed for DOM structure, segnames & subtoc, backmatter pointers & unique image list @@ -1898,25 +1900,36 @@ if used minimally only for DOM structure, segnames, subtoc, could optimise by - skipping second and third pass unless the output html seg or epub is being made! -or could conveniently be used more extensively for ancestors as well (though -this can be extracted earlier) +- this loop could conveniently be used more extensively for ancestors as well + (though this information can be extracted earlier) Build here: - DOM structure -- ancestors and decendants - - ancestors could be determined earlier, but convenient to have here - - descendants could be in the form of: headings contained under current - heading, and/or; the range of objects under the current heading -- you could decide on a sequential object list, containing all objects (both - substantive and non-substantive objects), in addition to ocn, which are for - substantive/ citable objects within the document + - ancestors & decendants + - ancestors could be determined earlier, but convenient to have here + - descendants could be in the form of: headings contained under current + heading, and/or; the range of objects under the current heading +- numbering + - already given + - substantive object numbers + - endnote + - provide + - glossary + - bibliography + - book index + - blurb + - other non-substantive objects (prefix & other stuff) + - you could also decide on a sequential object list, containing all objects + (both substantive and non-substantive objects), in addition to ocn, which + are for substantive/ citable objects within the document (as needed) up to document heading 1~, lev4 html: -during the third pass all previous and next segment names are known -next are not yet known for backmatter during the second pass +- during this (the third) pass all previous and next segment names are known +- next are not yet known for backmatter during the second pas -***** images +***** Methods +****** images: extract #+name: abs_post #+BEGIN_SRC d @@ -1931,7 +1944,47 @@ auto extract_images(S)(S content_block) { string[] segnames_0_4; #+END_SRC -***** section: head +****** images: dimensions + +#+name: abs_post +#+BEGIN_SRC d +auto _image_dimensions(M,O)(M manifest_matter, O obj) { + if (obj.contains_image_without_dimensions) { + import std.math; + import imageformats; + int w, h, chans; + real _w, _h; + int max_width = 640; + foreach (m; obj.text.matchAll(rgx.inline_image_without_dimensions)) { + debug(images) { + writeln(manifest_matter.src.image_dir_path ~ "/" ~ m.captures["img"]); + } + read_image_info(manifest_matter.src.image_dir_path ~ "/" ~ m.captures["img"], w, h, chans); + // calculate, decide max width and proportionally reduce to keep w & h within it + debug(images) { + writeln("width: ", w, ", height: ", h); + } + if (w > max_width) { + _w = max_width; + _h = round((max_width / w.to!real) * h.to!real); + } else { + _w = w; + _h = h; + } + obj.text = obj.text.replaceFirst( + rgx.inline_image_without_dimensions, + ("$1☼$3,w" ~ _w.to!string ~ "h" ~ _h.to!string ~ " $6") + ); + } + debug(images) { + writeln("image without dimensions: ", obj.text); + } + } + return obj; +} +#+END_SRC + +***** ↻ Loop section: head #+name: abs_post #+BEGIN_SRC d @@ -1968,7 +2021,7 @@ foreach (ref obj; the_document_head_section) { } #+END_SRC -***** section: toc +***** ↻ Loop section: toc #+name: abs_post #+BEGIN_SRC d @@ -2029,7 +2082,7 @@ if (the_table_of_contents_section["scroll"].length > 1) { } #+END_SRC -***** section: document body +***** ↻ Loop section: document body #+name: abs_post #+BEGIN_SRC d @@ -2066,13 +2119,16 @@ if (the_document_body_section.length > 1) { obj = obj_heading_ancestors(obj, lv_ancestors_txt); } else if (obj.is_a == "para") { _images ~= extract_images(obj.text); + obj = _image_dimensions(manifest_matter, obj); } } } auto images=uniq(_images.sort()); #+END_SRC -***** section: endnotes +***** ↻ Loop section: endnotes + +- endnotes have their own number, (also use in node) and they belong to calling object #+name: abs_post #+BEGIN_SRC d @@ -2118,7 +2174,9 @@ if (the_endnotes_section.length > 1) { } #+END_SRC -***** section: glossary +***** ↻ Loop section: glossary + +- add glossary numbering, (also use in node) no need to show in text #+name: abs_post #+BEGIN_SRC d @@ -2164,7 +2222,9 @@ if (the_glossary_section.length > 1) { } #+END_SRC -***** section: bibliography +***** ↻ Loop section: bibliography + +- add bibliography numbering, (also use in node) no need to show in text #+name: abs_post #+BEGIN_SRC d @@ -2210,7 +2270,9 @@ if (the_bibliography_section.length > 1) { } #+END_SRC -***** section: book index (scroll, seg) +***** ↻ Loop section: book index (scroll, seg) + +- add book index numbering?, (also use in node) no need to show in text #+name: abs_post #+BEGIN_SRC d @@ -2305,7 +2367,7 @@ if (the_bookindex_section["scroll"].length > 1) { } #+END_SRC -***** section: blurb +***** ↻ Loop section: blurb #+name: abs_post #+BEGIN_SRC d @@ -2449,6 +2511,7 @@ if ((opt_action.html) document_section_keys_sequenced["seg"] ~= "tail"; document_section_keys_sequenced["scroll"] ~= "tail"; } +auto sequenced_document_keys = docSectKeysSeq!()(document_section_keys_sequenced); #+END_SRC *** dup @@ -2487,11 +2550,11 @@ dom_collapsed_buffer = [ 0, 0, 0, 0, 0, 0, 0, 0, 0,]; *** [#A] _return document tuple_ :return:tuple: -#+name: abs_post +#+name: abs_return_tuple #+BEGIN_SRC d auto t = tuple( document_the, - docSectKeysSeq!()(document_section_keys_sequenced), + sequenced_document_keys, segnames, segnames_0_4, images, @@ -4940,6 +5003,7 @@ static struct ObjInlineMarkupMunge { body { obj_txt_out = ""; bool urls = false; + bool images_without_dimensions = false; tail = ""; /+ special endnotes +/ obj_txt_in = obj_txt_in.replaceAll( @@ -4954,6 +5018,9 @@ static struct ObjInlineMarkupMunge { /+ image matched +/ if (obj_txt_in.match(rgx.smid_image_generic)) { obj_txt_in = images(obj_txt_in); + if (obj_txt_in.match(rgx.smid_mod_image_without_dimensions)) { + images_without_dimensions = true; + } } /+ url matched +/ if (obj_txt_in.match(rgx.smid_inline_url)) { @@ -4979,6 +5046,7 @@ static struct ObjInlineMarkupMunge { ftn[2], ftn[3], urls, + images_without_dimensions, ); return t; } @@ -5249,9 +5317,10 @@ static struct ObjInlineMarkup { static __gshared string[] anchor_tags_ = []; auto x = munge.init; bool[string] obj_notes_and_links; - obj_notes_and_links["notes_reg"] = false; - obj_notes_and_links["notes_star"] = false; - obj_notes_and_links["links"] = false; + obj_notes_and_links["notes_reg"] = false; + obj_notes_and_links["notes_star"] = false; + obj_notes_and_links["links"] = false; + obj_notes_and_links["image_no_dimensions"] = false; switch (obj_["is"]) { case "heading": static __gshared string anchor_tag = ""; @@ -5300,6 +5369,7 @@ static struct ObjInlineMarkup { obj_notes_and_links["notes_star"] = x[2]; obj_notes_and_links["notes_plus"] = x[3]; obj_notes_and_links["links"] = x[4]; + obj_notes_and_links["image_no_dimensions"] = x[5]; break; } auto t = tuple( @@ -5308,6 +5378,7 @@ static struct ObjInlineMarkup { obj_notes_and_links["notes_reg"], obj_notes_and_links["notes_star"], obj_notes_and_links["links"], + obj_notes_and_links["image_no_dimensions"], ); anchor_tags_=[]; return t; @@ -7311,50 +7382,51 @@ struct HeadingAttrib { #+BEGIN_SRC d struct ObjGenericComposite { // size_t id; - string of_part = ""; - string of_section = ""; - string is_of = ""; - string is_a = ""; - string text = ""; - string obj_cite_number = ""; - string obj_cite_number_off = ""; - string obj_cite_number_bkidx = ""; - int obj_cite_number_type = 0; - string[] anchor_tags = []; - int indent_base = 0; - int indent_hang = 0; - bool bullet = false; - bool inline_links = false; - bool inline_notes_reg = false; - bool inline_notes_star = false; - string language = ""; // not implemented, consider - string code_block_syntax = ""; - int table_number_of_columns = 0; - double[] table_column_widths = []; - string[] table_column_aligns = []; - bool table_heading = false; - bool table_walls = false; // not implemented - int ocn = 0; - string segment_anchor_tag = ""; - string segname_prev = ""; - string segname_next = ""; - int parent_lev_markup = 0; - int parent_ocn = 0; - int[] ancestors = []; - string marked_up_level = "9"; - int heading_lev_markup = 9; - int heading_lev_collapsed = 9; - int[] dom_markedup = [ 0, 0, 0, 0, 0, 0, 0, 0,]; - int[] dom_collapsed = [ 0, 0, 0, 0, 0, 0, 0, 0,]; - int[] heading_ancestors = [ 0, 0, 0, 0, 0, 0, 0, 0,]; - string[] heading_ancestors_text = [ "", "", "", "", "", "", "", "", ]; - string[] lev4_subtoc = []; - int heading_array_ptr = 0; - int ptr_doc_object = 0; - int ptr_html_segnames = 0; - int ptr_heading = 0; - int array_ptr = 0; - int heading_array_ptr_segments = 0; + string of_part = ""; + string of_section = ""; + string is_of = ""; + string is_a = ""; + string text = ""; + string obj_cite_number = ""; + string obj_cite_number_off = ""; + string obj_cite_number_bkidx = ""; + int obj_cite_number_type = 0; + string[] anchor_tags = []; + int indent_base = 0; + int indent_hang = 0; + bool bullet = false; + bool inline_links = false; + bool inline_notes_reg = false; + bool inline_notes_star = false; + bool contains_image_without_dimensions = false; + string language = ""; // not implemented, consider + string code_block_syntax = ""; + int table_number_of_columns = 0; + double[] table_column_widths = []; + string[] table_column_aligns = []; + bool table_heading = false; + bool table_walls = false; // not implemented + int ocn = 0; + string segment_anchor_tag = ""; + string segname_prev = ""; + string segname_next = ""; + int parent_lev_markup = 0; + int parent_ocn = 0; + int[] ancestors = []; + string marked_up_level = "9"; + int heading_lev_markup = 9; + int heading_lev_collapsed = 9; + int[] dom_markedup = [ 0, 0, 0, 0, 0, 0, 0, 0,]; + int[] dom_collapsed = [ 0, 0, 0, 0, 0, 0, 0, 0,]; + int[] heading_ancestors = [ 0, 0, 0, 0, 0, 0, 0, 0,]; + string[] heading_ancestors_text = [ "", "", "", "", "", "", "", "", ]; + string[] lev4_subtoc = []; + int heading_array_ptr = 0; + int ptr_doc_object = 0; + int ptr_html_segnames = 0; + int ptr_heading = 0; + int array_ptr = 0; + int heading_array_ptr_segments = 0; string[string][string] node; } #+END_SRC |