From 6fd685918309318549009821d73d87d85c558017 Mon Sep 17 00:00:00 2001
From: Ralph Amissah <ralph@amissah.com>
Date: Thu, 5 Sep 2013 23:45:58 -0400
Subject: v5: dal, objects, store book index as hash (simplify downstream
 processing)

---
 data/doc/sisu/CHANGELOG_v5 |   2 +
 lib/sisu/v5/dal_doc_str.rb |  53 +++++++++++++++++++++++-
 lib/sisu/v5/dal_idx.rb     | 101 ++++++++++++++-------------------------------
 3 files changed, 84 insertions(+), 72 deletions(-)

diff --git a/data/doc/sisu/CHANGELOG_v5 b/data/doc/sisu/CHANGELOG_v5
index 466513b2..9ca1e163 100644
--- a/data/doc/sisu/CHANGELOG_v5
+++ b/data/doc/sisu/CHANGELOG_v5
@@ -30,6 +30,8 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_5.0.15.orig.tar.xz
   sisu_5.0.15.orig.tar.xz
   sisu_5.0.15-1.dsc
 
+* dal, objects, store book index as hash (simplify downstream processing)
+
 * dal, book index markup, allow spaces on both sides of delimiters, or a space
   before and newline following a delimiter \s+[:|;](\s+|\n)
   [else parses as before no spaces]
diff --git a/lib/sisu/v5/dal_doc_str.rb b/lib/sisu/v5/dal_doc_str.rb
index 83493c48..5b6c03bf 100644
--- a/lib/sisu/v5/dal_doc_str.rb
+++ b/lib/sisu/v5/dal_doc_str.rb
@@ -168,6 +168,54 @@ module SiSU_DAL_DocumentStructureExtract
       end
       [str,tags]
     end
+    def rgx_idx_ocn_seg
+      @rgx_idx_ocn_seg=/(.+?)\s*[+](\d+)/
+    end
+    def construct_idx_array_and_hash(idxraw)
+      idx_array_raw=idxraw.scan(/[^;]+/)
+      idx_hash,idx_array,idx_lst={},[],[]
+      idx_array_raw.each do |idx|
+        idx_lst=case idx
+        when /\S+?\s*:/
+          idx_couplet_tmp=[]
+          idx_couplet=idx.scan(/\s*[^:]+\s*/)
+          if idx_couplet[1] =~/[|]/
+            idx_couplet_tmp << idx_couplet[0] << idx_couplet[1].scan(/\s*[^|]+\s*/)
+          else
+            idx_couplet_tmp << idx_couplet[0] << [idx_couplet[1]]
+          end
+          idx_couplet=idx_couplet_tmp
+        else [idx]
+        end
+        term_nodes=[]
+        idx_lst.each do |term_node|
+          case term_node
+          when String
+            term_node=term_node[0].chr.capitalize + term_node[1,term_node.length]
+            term_node=(term_node =~/.+?[+]\d+/) \
+            ? term_node
+            : (term_node + '+0')
+            term_nodes << term_node
+            @use,plus=rgx_idx_ocn_seg.match(term_node)[1,2]
+            idx_hash[@use]={ sub: [], plus: plus } unless idx_hash[@use] and defined? idx_hash[@use]
+          when Array
+            subterm_nodes=[]
+            term_node.each do |subterm_node|
+              subterm_node=(subterm_node =~/.+?[+]\d+/) \
+              ? subterm_node
+              : (subterm_node + '+0')
+              subterm_nodes << subterm_node
+              sub,sub_plus=rgx_idx_ocn_seg.match(subterm_node)[1,2]
+              idx_hash[@use]={ sub: [], plus: 0 } unless idx_hash[@use] and defined? idx_hash[@use]
+              idx_hash[@use][:sub] << {sub.strip => { plus: sub_plus }}
+            end
+            term_nodes << subterm_nodes
+          end
+        end
+        idx_array << term_nodes
+      end
+      { hash: idx_hash, array: idx_array }
+    end
     def identify_parts
       tuned_file=[]
       @tuned_block,@tuned_code=[],[]
@@ -205,9 +253,10 @@ module SiSU_DAL_DocumentStructureExtract
             idx=if t_o=~/^=\{(.+)\}\s*$\Z/m; m=$1
               m=m.split(/\n/).join(' ').
                 gsub(/\s+([|:;])\s+/,'\1').
-                gsub(/\s+([+])\s+/,'\1')
+                gsub(/\s+([+]\d+)\s+/,'\1')
               t_o=t_o.gsub(/\n=\{.+\}\s*$\Z/m,'')
-              m
+              idx_array_and_hash=construct_idx_array_and_hash(m)
+              idx_array_and_hash[:hash]
             else nil
             end
           end
diff --git a/lib/sisu/v5/dal_idx.rb b/lib/sisu/v5/dal_idx.rb
index b87797bc..86ef06f8 100644
--- a/lib/sisu/v5/dal_idx.rb
+++ b/lib/sisu/v5/dal_idx.rb
@@ -84,13 +84,11 @@ module SiSU_DAL_BookIndex
           @seg=dob.name
         end
         if defined? dob.idx \
-        and dob.idx.is_a?(String) \
-        and not dob.idx.empty?
-          idx_array << "#{dob.idx}~#{dob.ocn}~#{@seg}"
+        and dob.idx.is_a?(Hash)
+          idx_array << {idx: dob.idx, ocn: dob.ocn, seg: @seg }
         end
         tuned_file << dob if dob
       end
-      idx_array=construct_idx_array(idx_array) if idx_array.length > 0
       if idx_array.length > 0
         the_idx=construct_book_index(idx_array)
         sisu_markup_idx_rel,sisu_markup_idx_rel_html_seg,html_idx,xhtml_idx=nil,nil,nil,nil
@@ -101,61 +99,28 @@ module SiSU_DAL_BookIndex
       end
       [tuned_file,sisu_markup_idx_rel,sisu_markup_idx_rel_html_seg,html_idx,xhtml_idx]
     end
-    def construct_idx_array(idx_array)
-      idx_lst=[]
-      idx_array.each do |idx|
-        idx_list,ocn,seg=@rgx_idx_ocn_seg.match(idx)[1..3]
-        idx_lst <<=if idx_list =~/;/
-          g=idx_list.scan(/[^;]+/)
-          idxl=[]
-          g.each do |i|
-            i=i.strip
-            idxl << { rough_idx: i, ocn: ocn, seg: seg }
-          end
-          idxl
-        else { rough_idx: idx_list, ocn: ocn, seg: seg }
-        end
-      end
-      idx_lst=idx_lst.flatten
-    end
     def construct_book_index(idx_array)
       the_idx={}
       idx_array.each do |idx|
-        if idx[:rough_idx] =~/[|]/ \
-        && idx[:rough_idx] !~/[:]/
-          if @md.opt.cmd =~/[MVv]/
-            p 'book index error? --> ' + idx[:rough_idx]
-          end
-        else
-          idx_lst=idx[:rough_idx].scan(/[^|:]+/)
-          idx_lst[0]=idx_lst[0].strip
-          if idx_lst[0] =~/.+?\+\d+/
-            use=/(.+?)\+(?:\d+)/.match(idx_lst[0])[1]
-          else use=idx_lst[0]
-          end
-          use=use[0].chr.capitalize + use[1,use.length]
-          the_idx[use]={} unless the_idx[use] and defined? the_idx[use]
-          idx_lst.each do |i|
-            i=i.strip
-            i,r=/(.+?)\+(\d+)/.match(i)[1,2] if i =~/.+?\+\d+/
-            x=if idx_lst.length==1 or idx_lst[0].gsub(/\+\d+/,'')==i
-              the_idx[use]['term_node_lev1']=[] unless the_idx[use]['term_node_lev1'] and defined? the_idx[use]['term_node_lev1']
-              x=if r
-                the_idx[use]['term_node_lev1'] << { ocn: idx[:ocn], range: "#{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}", seg: idx[:seg] }
-                "#{i} #{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}"
-              else
-                the_idx[use]['term_node_lev1'] << { ocn: idx[:ocn], seg: idx[:seg] }
-                "#{i} #{idx[:ocn]}"
-              end
-            else
-              the_idx[use]['term_node_lev2']={} unless the_idx[use]['term_node_lev2'] and defined? the_idx[use]['term_node_lev2']
-              the_idx[use]['term_node_lev2'][i]=[] unless the_idx[use]['term_node_lev2'][i] and defined? the_idx[use]['term_node_lev2'][i]
-              x=if r
-                the_idx[use]['term_node_lev2'][i] << { ocn: idx[:ocn], range: "#{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}", seg: idx[:seg] }
-                "#{idx_lst[0]}:#{i} #{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}"
-              else
-                the_idx[use]['term_node_lev2'][i] << { ocn: idx[:ocn], seg: idx[:seg] }
-                "#{idx_lst[0]}:#{i} #{idx[:ocn]}"
+        idx[:idx].each_pair do |term,term_info|
+          location=(term_info[:plus].to_i > 0) \
+          ? (%{#{idx[:ocn]}-#{idx[:ocn].to_i + term_info[:plus].to_i}})
+          : idx[:ocn].to_s
+          the_idx[term]={} unless the_idx[term] and defined? the_idx[term]
+          the_idx[term]['node_0_terms']=[] unless the_idx[term]['node_0_terms'] and defined? the_idx[term]['node_0_terms']
+          the_idx[term]['node_0_terms'] << { ocn: idx[:ocn], range: location, seg: idx[:seg] }
+          if term_info[:sub].is_a?(Array) \
+          and term_info[:sub].length > 0
+            term_info[:sub].each do |y|
+              y.each_pair do |subterm,subterm_info|
+                location=(subterm_info[:plus].to_i > 0) \
+                ? (%{#{idx[:ocn]}-#{idx[:ocn].to_i + subterm_info[:plus].to_i}})
+                : idx[:ocn].to_s
+                the_idx[term]={} unless the_idx[term] and defined? the_idx[term]
+                the_idx[term]['node_0_terms']=[] unless the_idx[term]['node_0_terms'] and defined? the_idx[term]['node_0_terms']
+                the_idx[term]['node_1_subterms']={} unless the_idx[term]['node_1_subterms'] and defined? the_idx[term]['node_1_subterms']
+                the_idx[term]['node_1_subterms'][subterm]=[] unless the_idx[term]['node_1_subterms'][subterm] and defined? the_idx[term]['node_1_subterms'][subterm]
+                the_idx[term]['node_1_subterms'][subterm] << { ocn: idx[:ocn], range: location, seg: idx[:seg] }
               end
             end
           end
@@ -230,8 +195,8 @@ module SiSU_DAL_BookIndex
             p 'array error? -->'
             print x
           elsif x.is_a?(Hash)
-            if x['term_node_lev1'].is_a?(Array)
-              x['term_node_lev1'].each do |a|
+            if x['node_0_terms'].is_a?(Array)
+              x['node_0_terms'].each do |a|
                 if a[:range]
                   idx[:sst_rel_html_seg][@o]=idx[:sst_rel_html_seg][@o] + %{#{Mx[:lnk_o]}#{a[:range]}#{Mx[:lnk_c]}#{Mx[:rel_o]}/#{a[:seg]}.html##{a[:ocn]}#{Mx[:rel_c]}, }
                   idx[:sst_rel][@t]=idx[:sst_rel][@t] + %{#{Mx[:lnk_o]}#{a[:range]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{a[:ocn]}#{Mx[:rel_c]}, }
@@ -250,11 +215,9 @@ module SiSU_DAL_BookIndex
               idx[:html][@q]=idx[:html][@q] + '</p>'
               idx[:xhtml][@r]=idx[:xhtml][@r] + '</p>'
             end
-            if x['term_node_lev2']
-              m=x['term_node_lev2']
-              m=m.sort
-              m.each do |k,y|
-                if k !~/term_node_lev1/
+            if x['node_1_subterms']
+             x['node_1_subterms'].sort.each do |k,y|
+                if k !~/node_0_terms/
                   idx[:sst_rel_html_seg][@o]=idx[:sst_rel_html_seg][@o] + %{#{k}, }
                   idx[:sst_rel][@t]=idx[:sst_rel][@t] + %{#{k}, }
                   idx[:html][@q]=idx[:html][@q] + %{\n<p class="book_index_lev2">#{k}, }
@@ -298,8 +261,8 @@ module SiSU_DAL_BookIndex
             p 'array error? -->'
             print x
           elsif x.is_a?(Hash)
-            if x['term_node_lev1'].is_a?(Array)
-              x['term_node_lev1'].each do |a|
+            if x['node_0_terms'].is_a?(Array)
+              x['node_0_terms'].each do |a|
                 if a[:range]
                   print a[:range] + ', '
                 elsif a[:ocn]
@@ -308,11 +271,9 @@ module SiSU_DAL_BookIndex
                 end
               end
             end
-            if x['term_node_lev2']
-              m=x['term_node_lev2']
-              m=m.sort
-              m.each do |k,y|
-                if k !~/term_node_lev1/
+            if x['node_1_subterms']
+              x['node_1_subterms'].sort.each do |k,y|
+                if k !~/node_0_terms/
                   print "\n\t" + k + ', '
                   y.each do |z|
                     if z[:range]
-- 
cgit v1.2.3