b33c29f5002777e521d812de0664b9788b9d2acc
[software/sisu] / lib / sisu / develop / shared_markup_alt.rb
1 # encoding: utf-8
2 =begin
3
4 * Name: SiSU
5
6 ** Description: documents, structuring, processing, publishing, search
7 *** system environment, resource control and configuration details
8
9 ** Author: Ralph Amissah
10 <ralph@amissah.com>
11 <ralph.amissah@gmail.com>
12
13 ** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
14 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah,
15 All Rights Reserved.
16
17 ** License: GPL 3 or later:
18
19 SiSU, a framework for document structuring, publishing and search
20
21 Copyright (C) Ralph Amissah
22
23 This program is free software: you can redistribute it and/or modify it
24 under the terms of the GNU General Public License as published by the Free
25 Software Foundation, either version 3 of the License, or (at your option)
26 any later version.
27
28 This program is distributed in the hope that it will be useful, but WITHOUT
29 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
30 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
31 more details.
32
33 You should have received a copy of the GNU General Public License along with
34 this program. If not, see <http://www.gnu.org/licenses/>.
35
36 If you have Internet connection, the latest version of the GPL should be
37 available at these locations:
38 <http://www.fsf.org/licensing/licenses/gpl.html>
39 <http://www.gnu.org/licenses/gpl.html>
40
41 <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>
42
43 ** SiSU uses:
44 * Standard SiSU markup syntax,
45 * Standard SiSU meta-markup syntax, and the
46 * Standard SiSU object citation numbering and system
47
48 ** Hompages:
49 <http://www.jus.uio.no/sisu>
50 <http://www.sisudoc.org>
51
52 ** Git
53 <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary>
54 <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/develop/shared_markup_alt.rb;hb=HEAD>
55
56 =end
57 module SiSU_TextRepresentation
58 class Alter
59 def initialize(x)
60 if x.is_a?(String)
61 @t_o,@s=nil,x
62 else
63 @t_o,@s=x,x.obj.dup
64 end
65 end
66 def strip_clean_of_extra_spaces # dal output tuned
67 @s=@s.dup
68 @s=@s.gsub(/[ ]+([,.;:?](?:$|\s))/,'\1') unless @s =~/#{Mx[:en_a_o]}|#{Mx[:en_b_o]}/
69 @s=@s.gsub(/ [ ]+/,' ').
70 gsub(/^ [ ]+/,'').
71 gsub(/ [ ]+$/,'').
72 gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2').
73 gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2')
74 end
75 def strip_clean_of_markup # text form used in sql db search, used for digest, define rules, make same as in db clean
76 @s=@s.dup #% same as db clean -->
77 @s=@s.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1').
78 gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1').
79 gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1').
80 gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1').
81 gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1').
82 gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strke_c]}/,'\1').
83 gsub(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]').
84 gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1').
85 gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1').
86 gsub(/#{Mx[:fa_hilite_o]}(.+?)#{Mx[:fa_hilite_c]}/,'\1').
87 gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~').
88 gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,''). # endnote removed
89 gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,''). # endnote removed
90 gsub(/(?:#{Mx[:nbsp]})+/,' ').
91 gsub(/(?:#{Mx[:br_nl]})+/,"\n").
92 gsub(/(?:#{Mx[:br_paragraph]})+/,"\n").
93 gsub(/(?:#{Mx[:br_line]})+/,"\n").
94 gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<').
95 gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>').
96 gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&').
97 gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!').
98 gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#').
99 gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*').
100 gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-').
101 gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/').
102 gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_').
103 gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{').
104 gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}').
105 gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~').
106 gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©').
107 gsub(/\s\s+/,' ').
108 gsub(/\s\s+/,' ').
109 strip
110 end
111 def semi_revert_markup # used for digest, define rules, make same as in db clean
112 if @t_o
113 @s=@s.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'*{\1}*').
114 gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'/{\1}/').
115 gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'_{\1}_').
116 gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"{\1}"').
117 gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+{\1}+').
118 gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strke_c]}/,'-{\1}-').
119 gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'^{\1}^').
120 gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,',{\1},').
121 gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~').
122 gsub(/#{Mx[:en_a_o]}([\d*+]+\s+.+?)#{Mx[:en_a_c]}/,'~{\1}~'). # endnote marker marked up
123 gsub(/#{Mx[:en_b_o]}([\d*+]+\s+.+?)#{Mx[:en_b_c]}/,'~[\1]~') # endnote marker marked up
124 if @t_o.is==:heading \
125 || @t_o.is==:para
126 @s=@s.gsub(/ [ ]+/,' ')
127 @s=@s.gsub(/(?:#{Mx[:nbsp]})+/,' ')
128 if @t_o.is==:heading
129 @s=@t_o.lv + '~ ' + @s
130 end
131 if @t_o.is==:para
132 if @t_o.bullet_
133 @s='_* ' + @s
134 end
135 if @t_o.indent.to_i > 0
136 @s="_#{@t_o.indent} " + @s
137 @s=@s.gsub(/^(_[1-9])\s_\*\s/,'\1* ')
138 end
139 end
140 end
141 if @t_o.is==:block \
142 || @t_o.is==:group \
143 || @t_o.is==:code
144 @s=@s.gsub(/#{Mx[:nbsp]}/,' ')
145 @s="#{@t_o.is.to_s}{\n\n#{@s}\n\n}#{@t_o.is.to_s}"
146 @s=@s.gsub(/(?:#{Mx[:br_nl]}|\n)+/m,"\n\n")
147 end
148 #dealing with poem and verse calls for change in dal, where start and end verse of poem are marked as such
149 @s=@s.strip
150 end
151 @s
152 end
153 def html_lite #test whether eventually can be used in db_import replacing shared_html_lite (search for SiSU_FormatShared)
154 if @t_o
155 @s=@s.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'<b>\1</b>').
156 gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'<i>\1</i>').
157 gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'<u>\1</u>').
158 gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"\1"').
159 gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+{\1}+').
160 gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strke_c]}/,'-{\1}-').
161 gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'<sup>\1</sup>').
162 gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'<sub>\1</sub>').
163 gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~')
164 if @t_o.is !=:code
165 if @s =~/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/
166 wm=@s.scan(/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)|\S+/)
167 words=urls(wm)
168 @s=@s.gsub(/.+/m,words)
169 end
170 @s=@s.gsub(/#{Mx[:gl_o]}(#[0-9]{3})#{Mx[:gl_c]}/u,'&\1;').
171 gsub(/#{Mx[:gl_o]}#([a-z]{2,4})#{Mx[:gl_c]}/u,'&\1;').
172 gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'<a href="\1" target="_top">\1</a>'). #http ftp matches escaped, no decoration
173 gsub(/(#{Mx[:lnk_c]})#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1<a href="\2" target="_top">\2</a>\3'). #special case \{ e.g. \}http://url
174 gsub(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,%{#{@url_brace.xml_open}<a href="\\1" target="_top">\\1</a>#{@url_brace.xml_close}}) #http ftp matches with decoration
175 else
176 @s=@s.gsub(/</m,'&lt;').gsub(/>/m,'&gt;')
177 end
178 if @t_o.is==:paragraph
179 if @t_o.bullet_
180 @s=@s
181 end
182 if @t_o.indent > 0
183 @s=@s
184 end
185 end
186 if @t_o.is==:heading
187 @s=@s
188 end
189 else
190 p __FILE__ << ':' << __LINE__.to_s
191 end
192 @s
193 end
194 end
195 class ModifiedTextPlusHashDigest
196 def initialize(md,x)
197 @md=md
198 if x.is_a?(String)
199 @t_o,@s=nil,x
200 else
201 @t_o,@s=x,x.obj.dup
202 end
203 @env ||=SiSU_Env::InfoEnv.new(@md.fns)
204 @sha_ = @env.digest(@md.opt).type
205 begin
206 case @sha_
207 when :sha512
208 require 'digest/sha2'
209 when :sha256
210 require 'digest/sha2'
211 when :md5
212 require 'digest/md5'
213 end
214 rescue LoadError
215 SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia).error((@sha_ ? 'digest/sha2' : 'digest/md5') + ' NOT FOUND')
216 end
217 end
218 def digest(txt)
219 d=nil
220 case @sha_
221 when :sha512
222 for hash_class in [ Digest::SHA512 ]
223 d=hash_class.hexdigest(txt)
224 end
225 when :sha256
226 for hash_class in [ Digest::SHA256 ]
227 d=hash_class.hexdigest(txt)
228 end
229 when :md5
230 for hash_class in [ Digest::MD5 ]
231 d=hash_class.hexdigest(txt)
232 end
233 end
234 d
235 end
236 def strip_clean_of_markup
237 def txt
238 SiSU_TextRepresentation::Alter.new(@s).strip_clean_of_markup
239 end
240 def dgst
241 txt_dgst=digest(txt)
242 { txt: txt, dgst_txt: txt_dgst }
243 end
244 self
245 end
246 def semi_revert_markup
247 def txt
248 SiSU_TextRepresentation::Alter.new(@s).semi_revert_markup
249 end
250 def dgst
251 txt_dgst=digest(txt)
252 { txt: txt, dgst_txt: txt_dgst }
253 end
254 self
255 end
256 def composite
257 def stripped_clean(txt)
258 SiSU_TextRepresentation::Alter.new(txt).strip_clean_of_markup
259 end
260 def markup_reverted(txt)
261 SiSU_TextRepresentation::Alter.new(txt).semi_revert_markup
262 end
263 def images(imgs)
264 sys=SiSU_Env::SystemCall.new
265 line_image=[]
266 if imgs and imgs.length > 0
267 @image_name,@image_dgst,@img=[],[],[]
268 imgs.each do |i|
269 image_source=if FileTest.file?("#{@env.path.image_source_include_local}/#{i}")
270 @env.path.image_source_include_local
271 elsif FileTest.file?("#{@env.path.image_source_include_remote}/#{i}")
272 @env.path.image_source_include_remote
273 elsif FileTest.file?("#{@env.path.image_source_include}/#{i}")
274 @env.path.image_source_include
275 else
276 SiSU_Screen::Ansi.new(
277 @md.opt.act[:color_state][:set],
278 "ERROR - image:",
279 %{"#{i}" missing},
280 "search locations: #{@env.path.image_source_include_local}, #{@env.path.image_source_include_remote} and #{@env.path.image_source_include}"
281 ).error2 unless @md.opt.act[:quiet][:set]==:on
282 nil
283 end
284 img_type = /\S+\.(png|jpg|gif)/.match(i)[1]
285 if image_source
286 para_image = image_source + '/' + i
287 image_name = i
288 image_dgst =(@sha_ ? sys.sha256(para_image) : sys.md5(para_image))
289 else
290 image_name = i + ' [image missing]'
291 image_dgst = ''
292 end
293 line_image << { img_dgst: image_dgst[1], img_name: image_name, img_type: img_type }
294 end
295 end
296 line_image
297 end
298 def endnotes(en)
299 en_dgst=[]
300 if en and en.length > 0
301 en.flatten.each do |e|
302 note_no=e.gsub(/^([\d*+]+)\s+.+/,'\1')
303 e=digest(stripped_clean(e))
304 note_dgst=digest(e)
305 en_dgst << { note_number: note_no, note_dgst: note_dgst }
306 end
307 end
308 en_dgst
309 end
310 def dgst
311 if @t_o.of !=:comment \
312 && @t_o.of !=:structure \
313 && @t_o.of !=:layout
314 txt_stripped_dgst=digest(stripped_clean(@t_o))
315 txt_markup_reverted_dgst=digest(markup_reverted(@t_o))
316 endnotes_dgst=[]
317 rgx_notes=/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/
318 notes=@t_o.obj.scan(rgx_notes)
319 endnotes_dgst=endnotes(notes)
320 rgx_image=/#{Mx[:lnk_o]}(\S+\.(?:png|jpg|gif))\s.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/
321 imgs=if (@t_o.is==:para \
322 || @t_o.is==:image) \
323 and @t_o.obj =~rgx_image
324 imgs=@t_o.obj.scan(rgx_image).flatten
325 line_image=images(imgs)
326 end
327 dgst={ is: @t_o.is, ocn: @t_o.ocn, dgst_stripped_txt: txt_stripped_dgst, dgst_markedup_txt: txt_markup_reverted_dgst }
328 dgst[:endnotes]=endnotes_dgst if endnotes_dgst and endnotes_dgst.length > 0
329 dgst[:images]=line_image if line_image and line_image.length > 0
330 end
331 dgst
332 end
333 self
334 end
335 end
336 end
337 __END__