From 804a103722aa7731ca7f2062ee2ebf533607e6aa Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 3 Oct 2012 00:11:08 -0400 Subject: v4: 4.0.0 new branch & version & changelog "opened" --- lib/sisu/v2/shared_markup_alt.rb | 318 --------------------------------------- 1 file changed, 318 deletions(-) delete mode 100644 lib/sisu/v2/shared_markup_alt.rb (limited to 'lib/sisu/v2/shared_markup_alt.rb') diff --git a/lib/sisu/v2/shared_markup_alt.rb b/lib/sisu/v2/shared_markup_alt.rb deleted file mode 100644 index 52ecfe24..00000000 --- a/lib/sisu/v2/shared_markup_alt.rb +++ /dev/null @@ -1,318 +0,0 @@ -# coding: utf-8 -=begin - - * Name: SiSU - - * Description: a framework for document structuring, publishing and search - #___# - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Download: - - - * Ralph Amissah - - - - ** Description: system environment, resource control and configuration details - -=end -module SiSU_text_representation - class Alter - def initialize(x) - if x.class==String - @t_o,@s=nil,x - else - @t_o,@s=x,x.obj.dup - end - end - def strip_clean_of_extra_spaces # dal output tuned - @s=@s.dup - @s=@s.gsub(/[ ]+([,.;:?](?:$|\s))/,'\1') unless @s =~/#{Mx[:en_a_o]}|#{Mx[:en_b_o]}/ - @s=@s.gsub(/ [ ]+/,' ') - @s=@s.gsub(/^ [ ]+/,'') - @s=@s.gsub(/ [ ]+$/,'') - @s=@s.gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2') - @s=@s.gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2') - end - def strip_clean_of_markup # text form used in sql db search, used for digest, define rules, make same as in db clean - @s=@s.dup #% same as db clean --> - @s=@s.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strke_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]') - @s=@s.gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_hilite_o]}(.+?)#{Mx[:fa_hilite_c]}/,'\1') - @s=@s.gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~') - @s=@s.gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'') # endnote removed - @s=@s.gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'') # endnote removed - @s=@s.gsub(/(?:#{Mx[:nbsp]})+/,' ') - @s=@s.gsub(/(?:#{Mx[:br_nl]})+/,"\n") - @s=@s.gsub(/(?:#{Mx[:br_paragraph]})+/,"\n") - @s=@s.gsub(/(?:#{Mx[:br_line]})+/,"\n") - @s=@s.gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<') - @s=@s.gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>') - @s=@s.gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&') - @s=@s.gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!') - @s=@s.gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#') - @s=@s.gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*') - @s=@s.gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-') - @s=@s.gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/') - @s=@s.gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_') - @s=@s.gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{') - @s=@s.gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}') - @s=@s.gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~') - @s=@s.gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©') - @s=@s.gsub(/\s\s+/,' ') - @s=@s.gsub(/\s\s+/,' ') - @s=@s.strip - end - def semi_revert_markup # used for digest, define rules, make same as in db clean - if @t_o - @s=@s.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'*{\1}*') - @s=@s.gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'/{\1}/') - @s=@s.gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'_{\1}_') - @s=@s.gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"{\1}"') - @s=@s.gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+{\1}+') - @s=@s.gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strke_c]}/,'-{\1}-') - @s=@s.gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'^{\1}^') - @s=@s.gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,',{\1},') - @s=@s.gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~') - @s=@s.gsub(/#{Mx[:en_a_o]}([\d*+]+\s+.+?)#{Mx[:en_a_c]}/,'~{\1}~') # endnote marker marked up - @s=@s.gsub(/#{Mx[:en_b_o]}([\d*+]+\s+.+?)#{Mx[:en_b_c]}/,'~[\1]~') # endnote marker marked up - if @t_o.is=='heading' or @t_o.is=='para' - @s=@s.gsub(/ [ ]+/,' ') - @s=@s.gsub(/(?:#{Mx[:nbsp]})+/,' ') - if @t_o.is=='heading' - @s=@t_o.lv + '~ ' + @s - end - if @t_o.is=='para' - if @t_o.bullet_ - @s='_* ' + @s - end - if @t_o.indent.to_i > 0 - @s="_#{@t_o.indent} " + @s - @s=@s.gsub(/^(_[1-9])\s_\*\s/,'\1* ') - end - end - end - if @t_o.is=='group' or @t_o.is=='code' - @s=@s.gsub(/#{Mx[:nbsp]}/,' ') - @s="#{@t_o.is}{\n\n#{@s}\n\n}#{@t_o.is}" - @s=@s.gsub(/(?:#{Mx[:br_nl]}|\n)+/m,"\n\n") - end - #dealing with poem and verse calls for change in dal, where start and end verse of poem are marked as such - @s=@s.strip - end - @s - end - def html_lite #test whether eventually can be used in db_import replacing shared_html_lite (search for SiSU_Format_Shared) - if @t_o - @s=@s.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"\1"') - @s=@s.gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+{\1}+') - @s=@s.gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strke_c]}/,'-{\1}-') - @s=@s.gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1') - @s=@s.gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~') - if @t_o.is !='code' - if @s =~/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/ - wm=@s.scan(/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)|\S+/) - words=urls(wm) - @s=@s.gsub(/.+/m,words) - end - @s=@s.gsub(/#{Mx[:gl_o]}(#[0-9]{3})#{Mx[:gl_c]}/u,'&\1;') - @s=@s.gsub(/#{Mx[:gl_o]}#([a-z]{2,4})#{Mx[:gl_c]}/u,'&\1;') - @s=@s.gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1') #http ftp matches escaped, no decoration - @s=@s.gsub(/(#{Mx[:lnk_c]})#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1\2\3') #special case \{ e.g. \}http://url - @s=@s.gsub(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,%{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}}) #http ftp matches with decoration - else - @s=@s.gsub(/(^|[^}])_/m,'\1>') #code-block: angle brackets special characters - @s=@s.gsub(/(^|[^}])_/m,'\1>') - end - if @t_o.is=='paragraph' - if @t_o.bullet_ - @s=@s - end - if @t_o.indent > 0 - @s=@s - end - end - if @t_o.is=='heading' - @s=@s - end - else - p __FILE__ +':'+ __LINE__.to_s - end - @s - end - end - class Modified_text_plus_Hash_digest - def initialize(md,x) - @md=md - if x.class==String - @t_o,@s=nil,x - else - @t_o,@s=x,x.obj.dup - end - @env ||=SiSU_Env::Info_env.new(@md.fns) - @sha_ =((@env.digest.type =='sha256') ? true : false) - @sha_ ? (require 'digest/sha2') : (require 'digest/md5') - end - def digest(txt) - d=nil - if @sha_ - for hash_class in [ Digest::SHA256 ] - d=hash_class.hexdigest(txt) - end - else - for hash_class in [ Digest::MD5 ] - d=hash_class.hexdigest(txt) - end - end - d - end - def strip_clean_of_markup - def txt - SiSU_text_representation::Alter.new(@s).strip_clean_of_markup - end - def dgst - en_dgst,img_dgst={},{} - txt_dgst=digest(txt) - {:txt=>txt,:dgst_txt=>txt_dgst} - end - self - end - def semi_revert_markup - def txt - SiSU_text_representation::Alter.new(@s).semi_revert_markup - end - def dgst - txt_dgst=digest(txt) - {:txt=>txt,:dgst_txt=>txt_dgst} - end - self - end - def composite - def stripped_clean(txt) - SiSU_text_representation::Alter.new(txt).strip_clean_of_markup - end - def markup_reverted(txt) - SiSU_text_representation::Alter.new(txt).semi_revert_markup - end - def images(imgs) - sys=SiSU_Env::System_call.new - line_image=[] - img_dgst={} - if imgs and imgs.length > 0 - @image_name,@image_dgst,@img=[],[],[] - imgs.each do |i| - image_source=if FileTest.file?("#{@env.path.image_source_local_tex}/#{i}") - @env.path.image_source_local_tex - elsif FileTest.file?("#{@env.path.image_source_remote_tex}/#{i}") - @env.path.image_source_remote_tex - elsif FileTest.file?("#{@env.path.image_source_tex}/#{i}") - @env.path.image_source_tex - else - SiSU_Screen::Ansi.new(@md.cmd,"ERROR - image:", %{"#{i}" missing}, "search locations: #{@env.path.image_source_local_tex}, #{@env.path.image_source_remote_tex} and #{@env.path.image_source_tex}").error2 unless @md.cmd =~/q/ - nil - end - img_type = /\S+\.(png|jpg|gif)/.match(i)[1] - not_found_msg='image not found' - if image_source - para_image = image_source + '/' + i - image_name = i - image_dgst =(@sha_ ? sys.sha256(para_image) : sys.md5(para_image)) - else - image_name = i + ' [image missing]' - image_dgst = '' - end - line_image << {:img_dgst=>image_dgst[1],:img_name=>image_name,:img_type=>img_type} - end - end - line_image - end - def endnotes(en) - en_dgst=[] - if en and en.length > 0 - en.flatten.each do |e| - note_no=e.gsub(/^([\d*+]+)\s+.+/,'\1') - e=digest(stripped_clean(e)) - note_dgst=digest(e) - en_dgst << {:note_number=>note_no,:note_dgst=>note_dgst} - end - end - en_dgst - end - def dgst - if @t_o.of !='comment' and @t_o.of !='structure' and @t_o.of !='layout' - en_dgst,img_dgst={},{} - txt_stripped_dgst=digest(stripped_clean(@t_o)) - txt_markup_reverted_dgst=digest(markup_reverted(@t_o)) - endnotes_dgst=[] - rgx_notes=/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ - notes=@t_o.obj.scan(rgx_notes) - endnotes_dgst=endnotes(notes) - rgx_image=/#{Mx[:lnk_o]}(\S+\.(?:png|jpg|gif))\s.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/ - imgs=if (@t_o.is=='para' or @t_o.is=='image') \ - and @t_o.obj =~rgx_image - imgs=@t_o.obj.scan(rgx_image).flatten - line_image=images(imgs) - end - dgst={:is=>@t_o.is,:ocn=>@t_o.ocn,:dgst_stripped_txt=>txt_stripped_dgst,:dgst_markedup_txt=>txt_markup_reverted_dgst} - dgst[:endnotes]=endnotes_dgst if endnotes_dgst and endnotes_dgst.length > 0 - dgst[:images]=line_image if line_image and line_image.length > 0 - end - dgst - end - self - end - end -end -__END__ -- cgit v1.2.3