aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/v3dv/db_sqltxt.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/v3dv/db_sqltxt.rb')
-rw-r--r--lib/sisu/v3dv/db_sqltxt.rb60
1 files changed, 29 insertions, 31 deletions
diff --git a/lib/sisu/v3dv/db_sqltxt.rb b/lib/sisu/v3dv/db_sqltxt.rb
index 49dcf10a..e65a8521 100644
--- a/lib/sisu/v3dv/db_sqltxt.rb
+++ b/lib/sisu/v3dv/db_sqltxt.rb
@@ -59,46 +59,45 @@
module SiSU_DbText
class Prepare
def special_character_escape(str)
- str.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'")
- str.gsub!(/(\\)/m,'\1\1') #ok but with warnings, double backslash on sqlite #str.gsub!(/[\\]/m,'\\x5C') #ok but with warnings, but not for sqlite #str.gsub!(/(\\)/m,'\1') #ok for sqlite not for pgsql
- str.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"<br />\n")
- str.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check
- str.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2')
- str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2')
- str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'\1')
- str
+ str=str.gsub(/'/,"''"). #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'")
+ gsub(/(\\)/m,'\1\1'). #ok but with warnings, double backslash on sqlite #str.gsub!(/[\\]/m,'\\x5C') #ok but with warnings, but not for sqlite #str.gsub!(/(\\)/m,'\1') #ok for sqlite not for pgsql
+ gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"<br />\n").
+ gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,''). #check
+ gsub(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2').
+ gsub(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2').
+ gsub(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'\1')
end
def clean_searchable_text(arr) #produce clean, searchable, plaintext from document source
txt_arr,en=[],[]
arr=arr.class==String ? arr.split(/\n+/m) : arr
arr.each do |s|
- s.gsub!(/([*\/_-])\{(.+?)\}\1/m,'\2')
- s.gsub!(/^(?:block|group|poem|code)\{/m,''); s.gsub!(/^\}(?:block|group|poem|code)/m,'')
- s.gsub!(/\A(?:@\S+:\s+.+)\Z/m,'')
+ s=s.gsub(/([*\/_-])\{(.+?)\}\1/m,'\2').
+ gsub(/^(?:block|group|poem|code)\{/m,'').gsub(/^\}(?:block|group|poem|code)/m,'').
+ gsub(/\A(?:@\S+:\s+.+)\Z/m,'')
if s =~/^:A~/
if defined? @md.creator \
and defined? @md.creator.author \
and not @md.creator.author.empty?
- s.gsub!(/@author/,@md.creator.author)
+ s=s.gsub(/@author/,@md.creator.author)
else
SiSU_Screen::Ansi.new('v','WARNING Document Author information missing; provide @creator: :author:',@md.fnb).warn unless @md.opt.cmd.inspect =~/q/
end
if defined? @md.title \
and defined? @md.title.full \
and not @md.title.full.empty?
- s.gsub!(/@title/,@md.title.full)
+ s=s.gsub(/@title/,@md.title.full)
else
SiSU_Screen::Ansi.new('v','WARNING Document Title missing; provide @title:',@md.fnb).warn unless @md.opt.cmd.inspect =~/q/
end
end
- s.gsub!(/^(?:_[1-9]\*?|_\*)\s+/m,'')
- s.gsub!(/^(?:[1-9]\~(\S+)?)\s+/m,'')
- s.gsub!(/^(?::?[A-C]\~(\S+)?)\s+/m,'')
- s.gsub!(/^%{1,3} .+/m,'') #removed even if contained in code block
- s.gsub!(/<br>/m,' ')
+ s=s.gsub(/^(?:_[1-9]\*?|_\*)\s+/m,'').
+ gsub(/^(?:[1-9]\~(\S+)?)\s+/m,'').
+ gsub(/^(?::?[A-C]\~(\S+)?)\s+/m,'').
+ gsub(/^%{1,3} .+/m,''). #removed even if contained in code block
+ gsub(/<br>/m,' ')
en << s.scan(/~\{\s*(.+?)\s*\}~/m)
- s.gsub!(/~\{.+?\}~/m,'')
- s.gsub!(/ \s+/m,' ')
+ s=s.gsub(/~\{.+?\}~/m,'').
+ gsub(/ \s+/m,' ')
#special_character_escape(s)
s
end
@@ -109,17 +108,16 @@ module SiSU_DbText
txt
end
def strip_markup(str) #define rules, make same as in dal clean
- str.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]')
- str.gsub!(/(?:&nbsp\\;|#{Mx[:nbsp]})+/,' ')
- str.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1') #tables
- str.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ') #tables
- str.gsub!(/#{Mx[:tc_p]}/u,' ') #tables tidy later
- str.gsub!(/<.+?>/,'')
- str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] ') # else image names found in search
- str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]') # else image names found in search
- str.gsub!(/\s\s+/,' ')
- str.strip!
- str
+ str=str.gsub(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]').
+ gsub(/(?:&nbsp\\;|#{Mx[:nbsp]})+/,' ').
+ gsub(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1'). #tables
+ gsub(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' '). #tables
+ gsub(/#{Mx[:tc_p]}/u,' '). #tables tidy later
+ gsub(/<.+?>/,'').
+ gsub(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] '). # else image names found in search
+ gsub(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]'). # else image names found in search
+ gsub(/\s\s+/,' ').
+ strip
end
def unique_words(str)
a=str.scan(/[a-zA-Z0-9\\\/_-]{2,}/) #a=str.scan(/\S+{2,}/)