From 6811ac91f21a434fc7d967c11e1b20f33918c6ea Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Mon, 19 Mar 2012 22:07:29 -0400 Subject: v3: 3.2 branch is main (v3dv --> v3); dev (v3dv) branch directories removed * v3dv (3.2) "merged" into v3 (previously 3.1) (& removed) * conf/sisu/v3dv --> conf/sisu/v3 * data/sisu/v3dv --> data/sisu/v3 * lib/sisu/v3dv --> lib/sisu/v3 * bin/sisu* (v3dv references changed to v3) * (--dev modifier (superfluous for the time being) runs main v3 branch) --- lib/sisu/v3/shared_markup_alt.rb | 174 ++++++++++++++++++++------------------- 1 file changed, 89 insertions(+), 85 deletions(-) (limited to 'lib/sisu/v3/shared_markup_alt.rb') diff --git a/lib/sisu/v3/shared_markup_alt.rb b/lib/sisu/v3/shared_markup_alt.rb index 12cb8393..dabb765e 100644 --- a/lib/sisu/v3/shared_markup_alt.rb +++ b/lib/sisu/v3/shared_markup_alt.rb @@ -56,7 +56,7 @@ ** Description: system environment, resource control and configuration details =end -module SiSU_text_representation +module SiSU_TextRepresentation class Alter def initialize(x) if x.class==String @@ -68,68 +68,69 @@ module SiSU_text_representation def strip_clean_of_extra_spaces # dal output tuned @s=@s.dup @s=@s.gsub(/[ ]+([,.;:?](?:$|\s))/,'\1') unless @s =~/#{Mx[:en_a_o]}|#{Mx[:en_b_o]}/ - @s=@s.gsub(/ [ ]+/,' ') - @s=@s.gsub(/^ [ ]+/,'') - @s=@s.gsub(/ [ ]+$/,'') - @s=@s.gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2') - @s=@s.gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2') + @s=@s.gsub(/ [ ]+/,' '). + gsub(/^ [ ]+/,''). + gsub(/ [ ]+$/,''). + gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2'). + gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2') end def strip_clean_of_markup # text form used in sql db search, used for digest, define rules, make same as in db clean @s=@s.dup #% same as db clean --> - @s=@s.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strke_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]') - @s=@s.gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_hilite_o]}(.+?)#{Mx[:fa_hilite_c]}/,'\1') - @s=@s.gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~') - @s=@s.gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,'') # endnote removed - @s=@s.gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,'') # endnote removed - @s=@s.gsub(/(?:#{Mx[:nbsp]})+/,' ') - @s=@s.gsub(/(?:#{Mx[:br_nl]})+/,"\n") - @s=@s.gsub(/(?:#{Mx[:br_paragraph]})+/,"\n") - @s=@s.gsub(/(?:#{Mx[:br_line]})+/,"\n") - @s=@s.gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<') - @s=@s.gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>') - @s=@s.gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&') - @s=@s.gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!') - @s=@s.gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#') - @s=@s.gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*') - @s=@s.gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-') - @s=@s.gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/') - @s=@s.gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_') - @s=@s.gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{') - @s=@s.gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}') - @s=@s.gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~') - @s=@s.gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©') - @s=@s.gsub(/\s\s+/,' ') - @s=@s.gsub(/\s\s+/,' ') - @s=@s.strip + @s=@s.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1'). + gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1'). + gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1'). + gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1'). + gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1'). + gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strke_c]}/,'\1'). + gsub(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]'). + gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1'). + gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1'). + gsub(/#{Mx[:fa_hilite_o]}(.+?)#{Mx[:fa_hilite_c]}/,'\1'). + gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~'). + gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,''). # endnote removed + gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,''). # endnote removed + gsub(/(?:#{Mx[:nbsp]})+/,' '). + gsub(/(?:#{Mx[:br_nl]})+/,"\n"). + gsub(/(?:#{Mx[:br_paragraph]})+/,"\n"). + gsub(/(?:#{Mx[:br_line]})+/,"\n"). + gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<'). + gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>'). + gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&'). + gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!'). + gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#'). + gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*'). + gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-'). + gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/'). + gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_'). + gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{'). + gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}'). + gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~'). + gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©'). + gsub(/\s\s+/,' '). + gsub(/\s\s+/,' '). + strip end def semi_revert_markup # used for digest, define rules, make same as in db clean if @t_o - @s=@s.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'*{\1}*') - @s=@s.gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'/{\1}/') - @s=@s.gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'_{\1}_') - @s=@s.gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"{\1}"') - @s=@s.gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+{\1}+') - @s=@s.gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strke_c]}/,'-{\1}-') - @s=@s.gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'^{\1}^') - @s=@s.gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,',{\1},') - @s=@s.gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~') - @s=@s.gsub(/#{Mx[:en_a_o]}([\d*+]+\s+.+?)#{Mx[:en_a_c]}/,'~{\1}~') # endnote marker marked up - @s=@s.gsub(/#{Mx[:en_b_o]}([\d*+]+\s+.+?)#{Mx[:en_b_c]}/,'~[\1]~') # endnote marker marked up - if @t_o.is=='heading' or @t_o.is=='para' + @s=@s.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'*{\1}*'). + gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'/{\1}/'). + gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'_{\1}_'). + gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"{\1}"'). + gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+{\1}+'). + gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strke_c]}/,'-{\1}-'). + gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'^{\1}^'). + gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,',{\1},'). + gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~'). + gsub(/#{Mx[:en_a_o]}([\d*+]+\s+.+?)#{Mx[:en_a_c]}/,'~{\1}~'). # endnote marker marked up + gsub(/#{Mx[:en_b_o]}([\d*+]+\s+.+?)#{Mx[:en_b_c]}/,'~[\1]~') # endnote marker marked up + if @t_o.is==:heading \ + || @t_o.is==:para @s=@s.gsub(/ [ ]+/,' ') @s=@s.gsub(/(?:#{Mx[:nbsp]})+/,' ') - if @t_o.is=='heading' + if @t_o.is==:heading @s=@t_o.lv + '~ ' + @s end - if @t_o.is=='para' + if @t_o.is==:para if @t_o.bullet_ @s='_* ' + @s end @@ -139,11 +140,11 @@ module SiSU_text_representation end end end - if @t_o.is=='block' \ - or @t_o.is=='group' \ - or @t_o.is=='code' + if @t_o.is==:block \ + || @t_o.is==:group \ + || @t_o.is==:code @s=@s.gsub(/#{Mx[:nbsp]}/,' ') - @s="#{@t_o.is}{\n\n#{@s}\n\n}#{@t_o.is}" + @s="#{@t_o.is.to_s}{\n\n#{@s}\n\n}#{@t_o.is.to_s}" @s=@s.gsub(/(?:#{Mx[:br_nl]}|\n)+/m,"\n\n") end #dealing with poem and verse calls for change in dal, where start and end verse of poem are marked as such @@ -151,32 +152,32 @@ module SiSU_text_representation end @s end - def html_lite #test whether eventually can be used in db_import replacing shared_html_lite (search for SiSU_Format_Shared) + def html_lite #test whether eventually can be used in db_import replacing shared_html_lite (search for SiSU_FormatShared) if @t_o - @s=@s.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"\1"') - @s=@s.gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+{\1}+') - @s=@s.gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strke_c]}/,'-{\1}-') - @s=@s.gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1') - @s=@s.gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1') - @s=@s.gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~') - if @t_o.is !='code' + @s=@s.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1'). + gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1'). + gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1'). + gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"\1"'). + gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+{\1}+'). + gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strke_c]}/,'-{\1}-'). + gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1'). + gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1'). + gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~') + if @t_o.is !=:code if @s =~/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/ wm=@s.scan(/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)|\S+/) words=urls(wm) @s=@s.gsub(/.+/m,words) end - @s=@s.gsub(/#{Mx[:gl_o]}(#[0-9]{3})#{Mx[:gl_c]}/u,'&\1;') - @s=@s.gsub(/#{Mx[:gl_o]}#([a-z]{2,4})#{Mx[:gl_c]}/u,'&\1;') - @s=@s.gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1') #http ftp matches escaped, no decoration - @s=@s.gsub(/(#{Mx[:lnk_c]})#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1\2\3') #special case \{ e.g. \}http://url - @s=@s.gsub(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,%{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}}) #http ftp matches with decoration + @s=@s.gsub(/#{Mx[:gl_o]}(#[0-9]{3})#{Mx[:gl_c]}/u,'&\1;'). + gsub(/#{Mx[:gl_o]}#([a-z]{2,4})#{Mx[:gl_c]}/u,'&\1;'). + gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1'). #http ftp matches escaped, no decoration + gsub(/(#{Mx[:lnk_c]})#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1\2\3'). #special case \{ e.g. \}http://url + gsub(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,%{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}}) #http ftp matches with decoration else - @s=@s.gsub(//m,'>') + @s=@s.gsub(//m,'>') end - if @t_o.is=='paragraph' + if @t_o.is==:paragraph if @t_o.bullet_ @s=@s end @@ -184,7 +185,7 @@ module SiSU_text_representation @s=@s end end - if @t_o.is=='heading' + if @t_o.is==:heading @s=@s end else @@ -193,7 +194,7 @@ module SiSU_text_representation @s end end - class Modified_text_plus_Hash_digest + class ModifiedTextPlusHashDigest def initialize(md,x) @md=md if x.class==String @@ -201,7 +202,7 @@ module SiSU_text_representation else @t_o,@s=x,x.obj.dup end - @env ||=SiSU_Env::Info_env.new(@md.fns) + @env ||=SiSU_Env::InfoEnv.new(@md.fns) @sha_ =((@env.digest.type =='sha256') ? true : false) @sha_ ? (require 'digest/sha2') : (require 'digest/md5') end @@ -220,7 +221,7 @@ module SiSU_text_representation end def strip_clean_of_markup def txt - SiSU_text_representation::Alter.new(@s).strip_clean_of_markup + SiSU_TextRepresentation::Alter.new(@s).strip_clean_of_markup end def dgst en_dgst,img_dgst={},{} @@ -231,7 +232,7 @@ module SiSU_text_representation end def semi_revert_markup def txt - SiSU_text_representation::Alter.new(@s).semi_revert_markup + SiSU_TextRepresentation::Alter.new(@s).semi_revert_markup end def dgst txt_dgst=digest(txt) @@ -241,13 +242,13 @@ module SiSU_text_representation end def composite def stripped_clean(txt) - SiSU_text_representation::Alter.new(txt).strip_clean_of_markup + SiSU_TextRepresentation::Alter.new(txt).strip_clean_of_markup end def markup_reverted(txt) - SiSU_text_representation::Alter.new(txt).semi_revert_markup + SiSU_TextRepresentation::Alter.new(txt).semi_revert_markup end def images(imgs) - sys=SiSU_Env::System_call.new + sys=SiSU_Env::SystemCall.new line_image=[] img_dgst={} if imgs and imgs.length > 0 @@ -291,7 +292,9 @@ module SiSU_text_representation en_dgst end def dgst - if @t_o.of !='comment' and @t_o.of !='structure' and @t_o.of !='layout' + if @t_o.of !=:comment \ + && @t_o.of !=:structure \ + && @t_o.of !=:layout en_dgst,img_dgst={},{} txt_stripped_dgst=digest(stripped_clean(@t_o)) txt_markup_reverted_dgst=digest(markup_reverted(@t_o)) @@ -300,7 +303,8 @@ module SiSU_text_representation notes=@t_o.obj.scan(rgx_notes) endnotes_dgst=endnotes(notes) rgx_image=/#{Mx[:lnk_o]}(\S+\.(?:png|jpg|gif))\s.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/ - imgs=if (@t_o.is=='para' or @t_o.is=='image') \ + imgs=if (@t_o.is==:para \ + || @t_o.is==:image) \ and @t_o.obj =~rgx_image imgs=@t_o.obj.scan(rgx_image).flatten line_image=images(imgs) -- cgit v1.2.3