cd24854f99a3374539e870c3031cd6b4a38b581a
[software/sisu] / lib / sisu / develop / ao_idx.rb
1 # encoding: utf-8
2 =begin
3
4 * Name: SiSU
5
6 ** Description: documents, structuring, processing, publishing, search
7 *** system environment, resource control and configuration details
8
9 ** Author: Ralph Amissah
10 <ralph@amissah.com>
11 <ralph.amissah@gmail.com>
12
13 ** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
14 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah,
15 All Rights Reserved.
16
17 ** License: GPL 3 or later:
18
19 SiSU, a framework for document structuring, publishing and search
20
21 Copyright (C) Ralph Amissah
22
23 This program is free software: you can redistribute it and/or modify it
24 under the terms of the GNU General Public License as published by the Free
25 Software Foundation, either version 3 of the License, or (at your option)
26 any later version.
27
28 This program is distributed in the hope that it will be useful, but WITHOUT
29 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
30 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
31 more details.
32
33 You should have received a copy of the GNU General Public License along with
34 this program. If not, see <http://www.gnu.org/licenses/>.
35
36 If you have Internet connection, the latest version of the GPL should be
37 available at these locations:
38 <http://www.fsf.org/licensing/licenses/gpl.html>
39 <http://www.gnu.org/licenses/gpl.html>
40
41 <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>
42
43 ** SiSU uses:
44 * Standard SiSU markup syntax,
45 * Standard SiSU meta-markup syntax, and the
46 * Standard SiSU object citation numbering and system
47
48 ** Hompages:
49 <http://www.jus.uio.no/sisu>
50 <http://www.sisudoc.org>
51
52 ** Git
53 <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary>
54 <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/develop/ao_idx.rb;hb=HEAD>
55
56 =end
57 module SiSU_AO_BookIndex
58 class BookIndex
59 def initialize(md,data,env=nil)
60 @md,@data,@env=md,data,env
61 @rgx_idx=/#{Mx[:idx_o]}(?:.+?)#{Mx[:idx_c]}\s*/
62 @rgx_idx_ocn_seg=/(.+?)~(\d+)~(\S+)/
63 @rgx_idx_ocn=/(.+?)~(\d+)/
64 @ocn_html_identifier=
65 SiSU_Env::ProcessingSettings.new(@md).ocn_html_identifier
66 @env ||=SiSU_Env::InfoEnv.new(@md.fns)
67 end
68 def indexing_song
69 data=@data
70 data,
71 sisu_markup_idx_rel,
72 sisu_markup_idx_rel_html_seg,
73 html_idx,xhtml_idx=
74 extract_book_index(data)
75 data=
76 clean_and_insert_index(
77 data,
78 sisu_markup_idx_rel_html_seg
79 )
80 [
81 data,
82 sisu_markup_idx_rel,
83 sisu_markup_idx_rel_html_seg,
84 html_idx,
85 xhtml_idx,
86 ]
87 end
88 def extract_book_index(data)
89 tuned_file=[]
90 idx_array=[]
91 data.each do |dob|
92 if (dob.is ==:heading \
93 || dob.is ==:heading_insert) \
94 && dob.ln==4
95 @seg=dob.name
96 end
97 if defined? dob.idx \
98 and dob.idx.is_a?(Hash)
99 idx_array << {
100 idx: dob.idx,
101 ocn: dob.ocn,
102 seg: @seg
103 }
104 end
105 tuned_file << dob if dob
106 end
107 if idx_array.length > 0
108 the_idx=construct_book_index(idx_array)
109 if @md.book_idx
110 idx=index(the_idx)
111 sisu_markup_idx_rel,sisu_markup_idx_rel_html_seg,html_idx, xhtml_idx=
112 idx[:sst_rel], idx[:sst_rel_html_seg], idx[:html],idx[:xhtml]
113 else
114 sisu_markup_idx_rel=
115 sisu_markup_idx_rel_html_seg=
116 html_idx=
117 xhtml_idx=
118 nil
119 end
120 end
121 [
122 tuned_file,
123 sisu_markup_idx_rel,
124 sisu_markup_idx_rel_html_seg,
125 html_idx,
126 xhtml_idx,
127 ]
128 end
129 def construct_book_index(idx_array)
130 the_idx={}
131 idx_array.each do |idx|
132 idx[:idx].each_pair do |term,term_info|
133 location=(term_info[:plus].to_i > 0) \
134 ? (%{#{idx[:ocn]}-#{idx[:ocn].to_i + term_info[:plus].to_i}})
135 : idx[:ocn].to_s
136 the_idx[term]={} \
137 unless the_idx[term] \
138 and defined? the_idx[term]
139 the_idx[term]['node_0_terms']=[] \
140 unless the_idx[term]['node_0_terms'] \
141 and defined? the_idx[term]['node_0_terms']
142 the_idx[term]['node_0_terms'] << { ocn: idx[:ocn], range: location, seg: idx[:seg] }
143 if term_info[:sub].is_a?(Array) \
144 and term_info[:sub].length > 0
145 term_info[:sub].each do |y|
146 y.each_pair do |subterm,subterm_info|
147 location=(subterm_info[:plus].to_i > 0) \
148 ? (%{#{idx[:ocn]}-#{idx[:ocn].to_i + subterm_info[:plus].to_i}})
149 : idx[:ocn].to_s
150 the_idx[term]={} \
151 unless the_idx[term] \
152 and defined? the_idx[term]
153 the_idx[term]['node_0_terms']=[] \
154 unless the_idx[term]['node_0_terms']\
155 and defined? the_idx[term]['node_0_terms']
156 the_idx[term]['node_1_subterms']={} \
157 unless the_idx[term]['node_1_subterms'] \
158 and defined? the_idx[term]['node_1_subterms']
159 the_idx[term]['node_1_subterms'][subterm]=[] \
160 unless the_idx[term]['node_1_subterms'][subterm] \
161 and defined? the_idx[term]['node_1_subterms'][subterm]
162 the_idx[term]['node_1_subterms'][subterm] <<
163 { ocn: idx[:ocn], range: location, seg: idx[:seg] }
164 end
165 end
166 end
167 end
168 end
169 the_idx=the_idx.sort
170 the_idx
171 end
172 def clean_xml(str)
173 str=str.gsub(/&/,'&amp;')
174 str
175 end
176 def index(the_idx)
177 @x=1
178 idx={}
179 idx[:sst_rel_html_seg],idx[:sst_rel],idx[:html],idx[:xhtml]=
180 [], [], [], []
181 h={
182 obj: Mx[:br_page]
183 }
184 o=SiSU_AO_DocumentStructure::ObjectLayout.new.break(h)
185 idx[:sst_rel_html_seg] << o
186 idx[:sst_rel] << o
187 h={
188 lv: '1',
189 name: 'index',
190 obj: "Index"
191 }
192 o=SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h)
193 idx[:sst_rel_html_seg] << o
194 idx[:sst_rel] << o
195 h={
196 lv: '4',
197 name: 'idx',
198 obj: " [Index] #{Mx[:pa_non_object_dummy_heading]}"
199 }
200 o=SiSU_AO_DocumentStructure::ObjectHeading.new.heading(h)
201 idx[:sst_rel_html_seg] << o
202 idx[:sst_rel] << o
203 alph=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z]
204 idx[:html] << '<p>'
205 idx[:xhtml] << '<p>'
206 alph.each do |x|
207 if x =~/[0-9]/
208 idx[:html] << ''
209 idx[:xhtml] << ''
210 else
211 idx[:html] <<
212 %{<a href="##{x}">#{x}</a>,#{$ep[:hsp]}}
213 idx[:xhtml] <<
214 %{<a href="##{x.downcase}">#{x}</a>,#{$ep[:hsp]}}
215 end
216 end
217 idx[:html] << '</p>'
218 idx[:xhtml] << '</p>'
219 letter=alph.shift
220 idx[:html] <<
221 %{\n<p class="book_index_lev1"><a name="numeral"></a></p>}
222 idx[:xhtml] <<
223 %{\n<p class="letter" id="numeral">0 - 9</p>}
224 the_idx.each do |i|
225 i.each do |x|
226 if x.is_a?(String)
227 f=/^(\S)/.match(x)[1]
228 if letter < f
229 while letter < f
230 if alph.length > 0
231 letter=alph.shift
232 idx[:html] <<
233 %{\n<p class="letter"><a name="#{letter}">#{letter}</a></p><p class="book_index_lev1"><a name="#{letter.downcase}"> </a></p>}
234 idx[:xhtml] <<
235 %{\n<p class="letter" id="#{letter.downcase}">#{letter}</p>}
236 else break
237 end
238 end
239 end
240 idx[:sst_rel_html_seg] <<
241 %{\n\n#{Mx[:fa_bold_o]}#{x},#{Mx[:fa_bold_c]} }
242 idx[:sst_rel] <<
243 %{\n\n#{Mx[:fa_bold_o]}#{x},#{Mx[:fa_bold_c]} }
244 aname=x.gsub(/\s+/,'_')
245 idx[:html] <<
246 %{\n<p class="book_index_lev1"><a name="#{aname}"><b>#{x}</b></a>, }
247 c=clean_xml(x.dup)
248 idx[:xhtml] <<
249 %{\n<p class="book_index_lev1"><b>#{c}</b>, }
250 @o=idx[:sst_rel_html_seg].index(idx[:sst_rel_html_seg].last)
251 @t=idx[:sst_rel].index(idx[:sst_rel].last)
252 @q=idx[:html].index(idx[:html].last)
253 @r=idx[:xhtml].index(idx[:xhtml].last)
254 print "\n" + x + ', ' if @md.opt.act[:verbose_plus][:set]==:on
255 elsif x.is_a?(Array)
256 p 'array error? -->'
257 print x
258 elsif x.is_a?(Hash)
259 if x['node_0_terms'].is_a?(Array)
260 x['node_0_terms'].each do |a|
261 if a[:range]
262 idx[:sst_rel_html_seg][@o]=
263 idx[:sst_rel_html_seg][@o] +
264 %{#{Mx[:lnk_o]}#{a[:range]}#{Mx[:lnk_c]}#{Mx[:rel_o]}/#{a[:seg]}.html##{@ocn_html_identifier}#{a[:ocn]}#{Mx[:rel_c]}, }
265 idx[:sst_rel][@t]=
266 idx[:sst_rel][@t] +
267 %{#{Mx[:lnk_o]}#{a[:range]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{a[:ocn]}#{Mx[:rel_c]}, }
268 idx[:html][@q]=
269 idx[:html][@q] +
270 %{<a href="#{a[:seg]}.html##{@ocn_html_identifier}#{a[:ocn]}">#{a[:range]}</a>, }
271 idx[:xhtml][@q]=
272 idx[:xhtml][@q] +
273 %{<a href="#{a[:seg]}.xhtml#o#{a[:ocn]}">#{a[:range]}</a>, }
274 print a[:range] + ', ' if @md.opt.act[:verbose_plus][:set]==:on
275 elsif a[:ocn]
276 idx[:sst_rel_html_seg][@o]=
277 idx[:sst_rel_html_seg][@o] +
278 %{#{Mx[:lnk_o]}#{a[:ocn]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{a[:seg]}.html##{@ocn_html_identifier}#{a[:ocn]}#{Mx[:rel_c]}, }
279 idx[:sst_rel][@t]=
280 idx[:sst_rel][@t] +
281 %{#{Mx[:lnk_o]}#{a[:ocn]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{a[:ocn]}#{Mx[:rel_c]}, }
282 idx[:html][@q]=
283 idx[:html][@q] +
284 %{<a href="#{a[:seg]}.html##{@ocn_html_identifier}#{a[:ocn]}">#{a[:ocn]}</a>, }
285 idx[:xhtml][@q]=
286 idx[:xhtml][@q] +
287 %{<a href="#{a[:seg]}.xhtml#o#{a[:ocn]}">#{a[:ocn]}</a>, }
288 print a[:ocn] + ', ' if @md.opt.act[:verbose_plus][:set]==:on
289 else p 'error'
290 end
291 end
292 idx[:html][@q]=idx[:html][@q] + '</p>'
293 idx[:xhtml][@r]=idx[:xhtml][@r] + '</p>'
294 end
295 if x['node_1_subterms']
296 x['node_1_subterms'].sort.each do |k,y|
297 if k !~/node_0_terms/
298 idx[:sst_rel_html_seg][@o]=
299 idx[:sst_rel_html_seg][@o] +
300 %{#{k}, }
301 idx[:sst_rel][@t]=
302 idx[:sst_rel][@t] +
303 %{#{k}, }
304 idx[:html][@q]=
305 idx[:html][@q] +
306 %{\n<p class="book_index_lev2">#{k}, }
307 c=clean_xml(k.dup)
308 idx[:xhtml][@r]=
309 idx[:xhtml][@r] +
310 %{\n<p class="book_index_lev2">#{c}, }
311 print "\n\t" + k + ', ' if @md.opt.act[:verbose_plus][:set]==:on
312 y.each do |z|
313 if z[:range]
314 idx[:sst_rel_html_seg][@o]=
315 idx[:sst_rel_html_seg][@o] +
316 %{#{Mx[:lnk_o]}#{z[:range]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{z[:seg]}.html##{@ocn_html_identifier}#{z[:ocn]}#{Mx[:rel_c]}, }
317 idx[:sst_rel][@t]=
318 idx[:sst_rel][@t] +
319 %{#{Mx[:lnk_o]}#{z[:range]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{z[:ocn]}#{Mx[:rel_c]}, }
320 idx[:html][@q]=
321 idx[:html][@q] +
322 %{<a href="#{z[:seg]}.html##{@ocn_html_identifier}#{z[:ocn]}">#{z[:range]}</a>, }
323 idx[:xhtml][@q]=
324 idx[:xhtml][@q] +
325 %{<a href="#{z[:seg]}.xhtml#o#{z[:ocn]}">#{z[:range]}</a>, }
326 print z[:range] + ', ' if @md.opt.act[:verbose_plus][:set]==:on
327 elsif z[:ocn]
328 idx[:sst_rel_html_seg][@o]=
329 idx[:sst_rel_html_seg][@o] +
330 %{#{Mx[:lnk_o]}#{z[:ocn]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{z[:seg]}.html##{@ocn_html_identifier}#{z[:ocn]}#{Mx[:rel_c]}, }
331 idx[:sst_rel][@t]=
332 idx[:sst_rel][@t] +
333 %{#{Mx[:lnk_o]}#{z[:ocn]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{z[:ocn]}#{Mx[:rel_c]}, }
334 idx[:html][@q]=
335 idx[:html][@q] +
336 %{<a href="#{z[:seg]}.html##{@ocn_html_identifier}#{z[:ocn]}">#{z[:ocn]}</a>, }
337 idx[:xhtml][@q]=
338 idx[:xhtml][@q] +
339 %{<a href="#{z[:seg]}.xhtml#o#{z[:ocn]}">#{z[:ocn]}</a>, }
340 print z[:ocn] + ', ' if @md.opt.act[:verbose_plus][:set]==:on
341 else p 'error'
342 end
343 end
344 idx[:html][@q]=idx[:html][@q] + '</p>'
345 idx[:xhtml][@r]=idx[:xhtml][@r] + '</p>'
346 end
347 end
348 end
349 @x +=1
350 end
351 end
352 end
353 print "\n" if @md.opt.act[:verbose_plus][:set]==:on
354 idx
355 end
356 def screen_print(the_idx)
357 the_idx.each do |i|
358 i.each do |x|
359 if x.is_a?(String)
360 print "\n" + x + ', '
361 elsif x.is_a?(Array)
362 p 'array error? -->'
363 print x
364 elsif x.is_a?(Hash)
365 if x['node_0_terms'].is_a?(Array)
366 x['node_0_terms'].each do |a|
367 if a[:range]
368 print a[:range] + ', '
369 elsif a[:ocn]
370 print a[:ocn] + ', '
371 else p 'error'
372 end
373 end
374 end
375 if x['node_1_subterms']
376 x['node_1_subterms'].sort.each do |k,y|
377 if k !~/node_0_terms/
378 print "\n\t" + k + ', '
379 y.each do |z|
380 if z[:range]
381 print z[:range] + ', '
382 elsif z[:ocn]
383 print z[:ocn] + ', '
384 else p 'error'
385 end
386 end
387 end
388 end
389 end
390 end
391 end
392 end
393 end
394 def output_idx(idx)
395 if @md.book_idx
396 path="#{@env.path.output}/#{@md.fnb}"
397 Dir.mkdir(path) unless FileTest.directory?(path)
398 puts "#{path}/#{@md.fn[:book_idx_html]} #{__FILE__}::#{__LINE__}"
399 html_index_file=File.new("#{path}/#{@md.fn[:book_idx_html]}",'w')
400 idx[:html].each {|x| html_index_file << x }
401 html_index_file.close
402 end
403 end
404 def clean_and_insert_index(data,sisu_markup_idx)
405 tuned_file=[]
406 data.each do |dob|
407 tuned_file << dob
408 if dob.obj =~/#{Mx[:br_endnotes]}/ \
409 and sisu_markup_idx
410 sisu_markup_idx.each do |idx|
411 tuned_file << idx
412 end
413 end
414 end
415 tuned_file
416 end
417 def clean_index(data) #check on use of dob
418 tuned_file=[]
419 data.each do |para|
420 para=para.gsub(/\n*#{@rgx_idx}/m,'')
421 tuned_file << para
422 end
423 tuned_file
424 end
425 end
426 end
427 __END__