From a407aff43ba7b58caa7c018343d95c58e317abb8 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 28 Jul 2007 14:04:32 +0100 Subject: url matching, refinement necessary, use of positive lookahead not avoided, correction and rebuilt --- lib/sisu/v0/html_tune.rb | 2 +- lib/sisu/v0/odf.rb | 6 ++++-- lib/sisu/v0/shared_html_lite.rb | 2 +- lib/sisu/v0/shared_xml.rb | 6 ++++-- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/lib/sisu/v0/html_tune.rb b/lib/sisu/v0/html_tune.rb index 66c45aed..ac8d6594 100644 --- a/lib/sisu/v0/html_tune.rb +++ b/lib/sisu/v0/html_tune.rb @@ -327,7 +327,7 @@ module SiSU_Tune end para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2') #http ftp matches escaped, no decoration para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #special case \{ e.g. \}http://url - para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}\\2}) #http ftp matches with decoration + para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #http ftp matches with decoration if (para =~/..\/\S+/ and para !~/(\"..\/\S+?\"|>\s*..\/\S+<)/) para.gsub!(/(\.\.\/\S+)/,'\1') end diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb index fbd4cc62..ff788116 100644 --- a/lib/sisu/v0/odf.rb +++ b/lib/sisu/v0/odf.rb @@ -299,8 +299,10 @@ module SiSU_ODF %{\\1\\2}) #http ftp matches escaped, no decoration para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, %{\\1\\2\\3}) #special case \{ e.g. \}http://url - para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, - %{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}\\2}) #http ftp matches with decoration + para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/, + %{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #http ftp matches with decoration + #para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, also works + #%{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}\\2}) #http ftp matches with decoration para.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/, %{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}}) para=case para diff --git a/lib/sisu/v0/shared_html_lite.rb b/lib/sisu/v0/shared_html_lite.rb index 16491ebf..43fb4446 100644 --- a/lib/sisu/v0/shared_html_lite.rb +++ b/lib/sisu/v0/shared_html_lite.rb @@ -133,7 +133,7 @@ module SiSU_Format_Shared end para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2') #http ftp matches escaped, no decoration para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #special case \{ e.g. \}http://url - para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}\\2}) #http ftp matches with decoration + para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #http ftp matches with decoration para end def paragraph diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index 249085a1..914391b7 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -358,8 +358,10 @@ module SiSU_XML_munge para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1\\2}) para.gsub!(/\B\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/, '\1\3') #watch, compare html_tune - para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, - %{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}\\2}) + para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/, + %{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) + #para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, #also works + #%{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}\\2}) para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2') #escaped urls not linked, deal with later #para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #escaped urls not linked, deal with later para.gsub!(/ /,' ') #clean -- cgit v1.2.3