d: po4a, continue reorganization (translation request dev stopped)
[software/sisu] / lib / sisu / develop / object_munge.rb
1 # encoding: utf-8
2 =begin
3
4 * Name: SiSU
5
6 ** Description: documents, structuring, processing, publishing, search
7 *** object munge
8
9 ** Author: Ralph Amissah
10 <ralph@amissah.com>
11 <ralph.amissah@gmail.com>
12
13 ** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
14 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah,
15 All Rights Reserved.
16
17 ** License: GPL 3 or later:
18
19 SiSU, a framework for document structuring, publishing and search
20
21 Copyright (C) Ralph Amissah
22
23 This program is free software: you can redistribute it and/or modify it
24 under the terms of the GNU General Public License as published by the Free
25 Software Foundation, either version 3 of the License, or (at your option)
26 any later version.
27
28 This program is distributed in the hope that it will be useful, but WITHOUT
29 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
30 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
31 more details.
32
33 You should have received a copy of the GNU General Public License along with
34 this program. If not, see <http://www.gnu.org/licenses/>.
35
36 If you have Internet connection, the latest version of the GPL should be
37 available at these locations:
38 <http://www.fsf.org/licensing/licenses/gpl.html>
39 <http://www.gnu.org/licenses/gpl.html>
40
41 <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>
42
43 ** SiSU uses:
44 * Standard SiSU markup syntax,
45 * Standard SiSU meta-markup syntax, and the
46 * Standard SiSU object citation numbering and system
47
48 ** Hompages:
49 <http://www.jus.uio.no/sisu>
50 <http://www.sisudoc.org>
51
52 ** Git
53 <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary>
54 <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/develop/html_parts.rb;hb=HEAD>
55
56 =end
57 module SiSU_Object_Munge
58 def i_src_o_strip_markup(txtobj)
59 txtobj=txtobj.
60 gsub(/#{Mx[:srcrgx_bold_o]}(.+?)#{Mx[:srcrgx_bold_c]}/m,'\1').
61 gsub(/#{Mx[:srcrgx_italics_o]}(.+?)#{Mx[:srcrgx_italics_c]}/m,'\1').
62 gsub(/#{Mx[:srcrgx_underscore_o]}(.+?)#{Mx[:srcrgx_underscore_c]}/m,'\1').
63 gsub(/#{Mx[:srcrgx_cite_o]}(.+?)#{Mx[:srcrgx_cite_c]}/m,'\1').
64 gsub(/#{Mx[:srcrgx_insert_o]}(.+?)#{Mx[:srcrgx_insert_c]}/m,'\1').
65 gsub(/#{Mx[:srcrgx_strike_o]}(.+?)#{Mx[:srcrgx_strike_c]}/m,'\1').
66 gsub(/#{Mx[:srcrgx_superscript_o]}(\d+)#{Mx[:srcrgx_superscript_c]}/m,'[\1]').
67 gsub(/#{Mx[:srcrgx_superscript_o]}(.+?)#{Mx[:srcrgx_superscript_c]}/m,'\1').
68 gsub(/#{Mx[:srcrgx_subscript_o]}(.+?)#{Mx[:srcrgx_subscript_c]}/m,'\1').
69 gsub(/#{Mx[:srcrgx_hilite_o]}(.+?)#{Mx[:srcrgx_hilite_c]}/m,'\1').
70 gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~').
71 gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/m,''). # endnote removed
72 gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/m,''). # endnote removed
73 gsub(/(?:#{Mx[:nbsp]})+/,' ').
74 gsub(/(?:#{Mx[:br_nl]})+/,"\n").
75 gsub(/(?:#{Mx[:br_paragraph]})+/,"\n").
76 gsub(/(?:#{Mx[:br_line]})+/,"\n").
77 gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<').
78 gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>').
79 gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&').
80 gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!').
81 gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#').
82 gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*').
83 gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-').
84 gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/').
85 gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_').
86 gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{').
87 gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}').
88 gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~').
89 gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©').
90 gsub(/[ ][ ]s+/,' ').
91 strip
92 if txtobj =~/Reading this/
93 puts txtobj
94 if txtobj =~ /#{Mx[:srcrgx_italics_o]}(.+?)#{Mx[:srcrgx_italics_c]}/
95 puts __LINE__
96 puts Mx[:srcrgx_italics_o]
97 puts txtobj
98 end
99 end
100 ; txtobj
101 end
102 def i_ao_o_strip_markup(txtobj)
103 txtobj=txtobj.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1').
104 gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1').
105 gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1').
106 gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1').
107 gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1').
108 gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1').
109 gsub(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]').
110 gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1').
111 gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1').
112 gsub(/#{Mx[:fa_hilite_o]}(.+?)#{Mx[:fa_hilite_c]}/,'\1').
113 gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~').
114 gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,''). # endnote removed
115 gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,''). # endnote removed
116 gsub(/(?:#{Mx[:nbsp]})+/,' ').
117 gsub(/(?:#{Mx[:br_nl]})+/,"\n").
118 gsub(/(?:#{Mx[:br_paragraph]})+/,"\n").
119 gsub(/(?:#{Mx[:br_line]})+/,"\n").
120 gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<').
121 gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>').
122 gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&').
123 gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!').
124 gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#').
125 gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*').
126 gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-').
127 gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/').
128 gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_').
129 gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{').
130 gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}').
131 gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~').
132 gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©').
133 gsub(/[ ][ ]s+/,' ').
134 strip
135 end
136 def i_ao_o_src_markup_restore(txtobj)
137 @txtobj=txtobj
138 def textface_marks
139 @txtobj.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'*{\1}*').
140 gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'/{\1}/').
141 gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'_{\1}_').
142 gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"{\1}"').
143 gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+{\1}+').
144 gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'-{\1}-').
145 gsub(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'^{[\1]}^').
146 gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'^{\1}^').
147 gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,',{\1},').
148 gsub(/#{Mx[:fa_hilite_o]}(.+?)#{Mx[:fa_hilite_c]}/,'\1').
149 gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~').
150 gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'~{\1 \2}~').
151 gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,''). # endnote removed
152 gsub(/(?:#{Mx[:nbsp]})+/,' ').
153 gsub(/(?:#{Mx[:br_nl]})+/,"\n").
154 gsub(/(?:#{Mx[:br_paragraph]})+/,"\n").
155 gsub(/(?:#{Mx[:br_line]})+/,"\n").
156 gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<').
157 gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>').
158 gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&').
159 gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!').
160 gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#').
161 gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*').
162 gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-').
163 gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/').
164 gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_').
165 gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{').
166 gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}').
167 gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~').
168 gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©').
169 gsub(/[ ][ ]s+/,' ').
170 strip
171 end
172 def object_marks
173 @txtobj
174 end
175 self
176 end
177 def clean_text(txtobj,markup=:ao)
178 if txtobj.class==String
179 txtobj=if markup ==:ao
180 i_ao_o_strip_markup(txtobj)
181 elsif markup ==:src
182 i_src_o_strip_markup(txtobj)
183 else p __FILE__; p __LINE__
184 end
185 elsif txtobj.class.inspect=~/^SiSU_AO_DocumentStructure::/
186 txtobj.obj=i_ao_o_strip_markup(txtobj.obj)
187 else p 'error'
188 end
189 txtobj
190 end
191 def footnotes_inline(txtobj)
192 end
193 def footnotes_ref_and_note(txtobj)
194 end
195 def src_markup(txtobj)
196 txtobj
197 end
198 def extract_endnotes(doc_obj_txt,endnotes_) #% used for extraction of endnotes from paragraphs
199 if endnotes_ ==:separate
200 notes_a=doc_obj_txt.scan(/#{Mx[:en_a_o]}([\d]+\s+.+?)#{Mx[:en_a_c]}/)
201 ##notes_a=doc_obj_txt.scan(/#{Mx[:en_a_o]}([\d*+]+\s+.+?)#{Mx[:en_a_c]}/)
202 #notes_b=doc_obj_txt.scan(/#{Mx[:en_b_o]}([\d*+]+\s+.+?)#{Mx[:en_b_c]}/)
203 n=[]
204 notes_a.flatten.each do |note| #high cost to deal with <br> appropriately within plaintext, consider
205 note=note.dup.to_s
206 note=note.gsub(/^([\d]+)\s+/,'^~\1 ').
207 gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,
208 ' \\\\\\ ')
209 n << note
210 end
211 notes_a=n.flatten
212 doc_obj_txt=doc_obj_txt.
213 gsub(/#{Mx[:en_a_o]}([\d]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'~^') # endnote marker marked up
214 else
215 doc_obj_txt=doc_obj_txt.
216 gsub(/#{Mx[:en_b_o]}[\d]+\s+(.+?)#{Mx[:en_b_c]}/,
217 '~[ \1 ]~'). # inline endnote with marker marked up
218 gsub(/#{Mx[:en_a_o]}([*+]+)\s+(.+?)#{Mx[:en_a_c]}/,
219 '~{\1 \2 }~'). # inline endnote with marker marked up
220 gsub(/#{Mx[:en_b_o]}([*+]+)\s+(.+?)#{Mx[:en_b_c]}/,
221 '~[\1 \2 ]~') # inline endnote with marker marked up
222 end
223 [doc_obj_txt,notes_a]
224 end
225 def objects #def i_ao_o_src_markup_restore(txtobj)
226 def code_(dob)
227 if dob.is==:code
228 dob.obj=dob.obj.gsub(/(^|[^}])_([<>])/m,'\1\2'). # _> _<
229 gsub(/(^|[^}])_([<>])/m,'\1\2') # _<_<
230 end
231 dob
232 end
233 def block_(dob)
234 dob.obj=if dob.of==:block # watch
235 dob.obj.gsub(/#{Mx[:gl_o]}●#{Mx[:gl_c]}/,"* ").
236 gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n")
237 else dob.obj.gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"\n\n")
238 end
239 dob
240 end
241 def textface_marks_po4a(dob,endnotes_=:inline)
242 notes=''
243 dob.obj=dob.obj.
244 gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,
245 Mx[:src_bold_o] + '\1' + Mx[:src_bold_c]).
246 gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,
247 Mx[:src_italics_o] + '\1' + Mx[:src_italics_c]).
248 gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,
249 Mx[:src_underscore_o] + '\1' + Mx[:src_underscore_c]).
250 gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,
251 Mx[:src_subscript_o] + '\1' + Mx[:src_subscript_c]).
252 gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,
253 Mx[:src_superscript_o] + '\1' + Mx[:src_superscript_c]).
254 gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,
255 Mx[:src_insert_o] + '\1' + Mx[:src_insert_c]).
256 gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,
257 Mx[:src_cite_o] + '\1' + Mx[:src_cite_c]).
258 gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,
259 Mx[:src_strike_o] + '\1' + Mx[:src_strike_c]).
260 gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,
261 Mx[:src_monospace_o] + '\1' + Mx[:src_monospace_c])
262 unless dob.is==:code
263 dob.obj=dob.obj.
264 gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}/,'\1').
265 gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1').
266 gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,
267 '\1 [link: <\2>]').
268 gsub(/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}image/,
269 '\1 [link: local image]').
270 gsub(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1')
271 dob.obj,notes=extract_endnotes(dob.obj,endnotes_)
272 dob.obj=dob.obj.
273 gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<').
274 gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>').
275 gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&').
276 gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!').
277 gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#').
278 gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*').
279 gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-').
280 gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/').
281 gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_').
282 gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{').
283 gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}').
284 gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~').
285 gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©')
286 end
287 dob=block_(dob)
288 dob=code_(dob)
289 dob.obj=dob.obj.gsub(/#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}/,''). # remove page breaks
290 gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1').
291 gsub(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,''). # remove name links
292 gsub(/&nbsp;|#{Mx[:nbsp]}/,' '). # decide on
293 gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,
294 ' [ \1 ]'). #"[ #{dir.url.images_local}\/\\1 ]")
295 gsub(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}image/,
296 ' [ \1 ]'). #"[ #{dir.url.images_local}\/\\1 ]")
297 gsub(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,
298 '[image: "\1"]')
299 [dob,notes]
300 end
301 def object_marks
302 @txtobj
303 end
304 self
305 end
306 end
307 __END__