From a99e0de5885441989c2ae9ae6fad15fd35d0bb97 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Mon, 30 Jul 2007 09:06:51 +0100 Subject: url matching, semi-colon as possible terminator, in dal match https --- CHANGELOG | 10 ++++++++++ lib/sisu/v0/dal_doc_str_code.rb | 2 +- lib/sisu/v0/dal_syntax.rb | 6 +++--- lib/sisu/v0/html_tune.rb | 10 +++++----- lib/sisu/v0/odf.rb | 20 ++++++++++---------- lib/sisu/v0/shared_html_lite.rb | 10 +++++----- lib/sisu/v0/shared_xml.rb | 4 ++-- lib/sisu/v0/texpdf_format.rb | 9 ++++----- 8 files changed, 40 insertions(+), 31 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 733ce3ba..c7f444aa 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -6,6 +6,16 @@ Reverse Chronological: %% STABLE MANIFEST +%% sisu_0.55.7.orig.tar.gz (2007-07-30:31/1) +http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.7.orig.tar.gz + sisu_0.55.7.orig.tar.gz + sisu_0.55.7-1.dsc + sisu_0.55.7-1.diff.gz + + * url matching refinement + * add semi-colon as possible url terminator + * dal match https + %% sisu_0.55.6.orig.tar.gz (2007-07-28:30/6) http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.6.orig.tar.gz 69368f8eb4da28d07f3a1ee1ea5b89f3 1271022 sisu_0.55.6.orig.tar.gz diff --git a/lib/sisu/v0/dal_doc_str_code.rb b/lib/sisu/v0/dal_doc_str_code.rb index e6a3ae1e..18ac03d8 100644 --- a/lib/sisu/v0/dal_doc_str_code.rb +++ b/lib/sisu/v0/dal_doc_str_code.rb @@ -147,7 +147,7 @@ module SiSU_document_structure_code if line =~/\S/ and line !~/^(?:alt|code|group|poem)\{|^\}(?:alt|code|group|poem)|<:(?:code|verse|alt|group).+/ line.gsub!(/\s\s/,'  ') line.gsub!(/^/,'<:codeline>') if type=='code' # try sort for texpdf special case - if line =~/http:\/\/\S+$/ + if line =~/https?:\/\/\S+$/ line.gsub!(/$/,' <:br>') else line.gsub!(/$/,'<:br>') #unless type=='code' diff --git a/lib/sisu/v0/dal_syntax.rb b/lib/sisu/v0/dal_syntax.rb index ce5fdc72..4fb0f5d3 100644 --- a/lib/sisu/v0/dal_syntax.rb +++ b/lib/sisu/v0/dal_syntax.rb @@ -65,7 +65,7 @@ module Syntax @data,@md=data,md @vz=SiSU_Env::Get_init.instance.skin @data_new=[] - @http_m='\{.+?\}http://\S+|http:\S+|\.\.\/\S+|\S+?\.png\b|[*]~\S+|^0~.+|<:(?:code|group|alt|verse)(?:-end)?>|<:br>' + @http_m='\{.+?\}https?://\S+|https?:\S+|\.\.\/\S+|\S+?\.png\b|[*]~\S+|^0~.+|<:(?:code|group|alt|verse)(?:-end)?>|<:br>' @manmkp_ital='[i/]\\{.+?\\}[i/]' tail_m_ital=%q{(?:\s|[.,;:?!'")]|~\^|~\\\{\s|$)} tail_m_bold=%q{(?:(?:<\/i>)?(?:\s|[.,;:?!'")]|~\^|~\\\{\s|$))?} @@ -283,8 +283,8 @@ module Syntax else line.gsub!(/(
)/i,"\\1\n") end else #code blocks - line.gsub!(/(^|\s)(http:\/\/\S+)/,'\1_\2') #line.gsub!(/(^|\s)(http:\/\/\S+)/,"\\1\\\\\\2") #escape urls - line.gsub!(/(^|\s)<(http:\/\/\S+)>([\s,.]|$)/,'\1\2\3') #clean/unescape urls with decoration, re-apply decoration later + line.gsub!(/(^|\s)(https?:\/\/\S+)/,'\1_\2') #line.gsub!(/(^|\s)(http:\/\/\S+)/,"\\1\\\\\\2") #escape urls + line.gsub!(/(^|\s)<(https?:\/\/\S+)>([\s,.]|$)/,'\1\2\3') #clean/unescape urls with decoration, re-apply decoration later line.gsub!(/<:codeline>/,"\n") end line diff --git a/lib/sisu/v0/html_tune.rb b/lib/sisu/v0/html_tune.rb index ac8d6594..cca41056 100644 --- a/lib/sisu/v0/html_tune.rb +++ b/lib/sisu/v0/html_tune.rb @@ -245,8 +245,8 @@ module SiSU_Tune @words=[] data.each do |word| @words << if word=~/\{(.+?)\}((?:https?|ftp)\S+|image)/ - if word =~/\{(.+?)\}((?:https?|ftp)\S+|image)([.,](?:\s|$))/ - m,u,d=/\{(.+?)\}((?:https?|ftp)\S+|image)([.,](?:\s|$))/.match(word).captures + if word =~/\{(.+?)\}((?:https?|ftp)\S+|image)([;.,](?:\s|$))/ + m,u,d=/\{(.+?)\}((?:https?|ftp)\S+|image)([;.,](?:\s|$))/.match(word).captures else m,u=/\{(.+?)\}((?:https?|ftp)\S+|image)/.match(word).captures d='' end @@ -325,9 +325,9 @@ module SiSU_Tune if (para =~/\b\S+\@\S+?\.\S+/ and para !~/(\"\S+\@\S+?\.\S+\"|>\S+\@\S+?\.\S+?<)/) para.gsub!(/\b(\S+\@\S+?\.\S+)(\s)/,'<\1>\2') end - para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2') #http ftp matches escaped, no decoration - para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #special case \{ e.g. \}http://url - para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #http ftp matches with decoration + para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\2') #http ftp matches escaped, no decoration + para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\2\3') #special case \{ e.g. \}http://url + para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #http ftp matches with decoration if (para =~/..\/\S+/ and para !~/(\"..\/\S+?\"|>\s*..\/\S+<)/) para.gsub!(/(\.\.\/\S+)/,'\1') end diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb index ff788116..6025dfb2 100644 --- a/lib/sisu/v0/odf.rb +++ b/lib/sisu/v0/odf.rb @@ -272,21 +272,21 @@ module SiSU_ODF end para end - def text_link_odf(txt,url) + def text_link_odf(txt,url,trail) txt.gsub!(/(\\\+)/,'+') #this is convoluted, and risky :-( url.gsub!(/(\\\+)/,'+') #this is convoluted, and risky :-( - %{#{txt}} + %{#{txt.strip}#{trail}} end def text_link(para) para.gsub!(@serial,'') - m=para.scan(/(\{([^}]+?)\}((?:https?|ftp)\S+))/) #sort + m=para.scan(/(\{([^}]+?)\}((?:https?|ftp)\S+?))([;.,]?$)/) #sort if m m.each do |i| - txt,url=i[1],i[2] + txt,url,trail=i[1],i[2] txt.gsub!(/([)(\]\[])/,"\\\\\\1") txt.gsub!(/([+?])/,"\\\\\\1") # problems with + url.gsub!(/([+?])/,"\\\\\\1") # problems with + - para.gsub!(/\{\s*#{txt}\}#{url}/m,text_link_odf(txt,url)) #make sure trailing ']' are not caught in url + para.gsub!(/\{\s*#{txt}\}#{url}/m,text_link_odf(txt,url,trail)) #make sure trailing ']' are not caught in url para.gsub!(/\\([)(\]\[?])/,'\1') #clumsy fix end m=nil @@ -295,13 +295,13 @@ module SiSU_ODF end def normal(para) #P1 - P3 para.gsub!(@serial,'') - para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, + para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, %{\\1\\2}) #http ftp matches escaped, no decoration - para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, + para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, %{\\1\\2\\3}) #special case \{ e.g. \}http://url - para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/, + para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, %{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #http ftp matches with decoration - #para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, also works + #para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, also works #%{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}\\2}) #http ftp matches with decoration para.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/, %{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}}) @@ -377,7 +377,7 @@ module SiSU_ODF parray=[] para.split(/<:?br(?: \/)?>/).each do |parablock| parablock=group_clean(parablock) - parablock.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, + parablock.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, %{\\1\\2}) #http ftp matches escaped, no decoration parray << %{#{parablock}} if parablock =~/\S+/ end diff --git a/lib/sisu/v0/shared_html_lite.rb b/lib/sisu/v0/shared_html_lite.rb index 43fb4446..1218aa79 100644 --- a/lib/sisu/v0/shared_html_lite.rb +++ b/lib/sisu/v0/shared_html_lite.rb @@ -88,8 +88,8 @@ module SiSU_Format_Shared @words=[] data.each do |word| @words << if word=~/\{(.+?)\}((?:https?|ftp)\S+|image)/ - if word =~/\{(.+?)\}((?:https?|ftp)\S+|image)([.,](?:\s|$))/ - m,u,d=/\{(.+?)\}((?:https?|ftp)\S+|image)([.,](?:\s|$))/.match(word).captures + if word =~/\{(.+?)\}((?:https?|ftp)\S+|image)([;.,](?:\s|$))/ + m,u,d=/\{(.+?)\}((?:https?|ftp)\S+|image)([;.,](?:\s|$))/.match(word).captures else m,u=/\{(.+?)\}((?:https?|ftp)\S+|image)/.match(word).captures d='' end @@ -131,9 +131,9 @@ module SiSU_Format_Shared words=word_mode.join(' ') para.gsub!(/.+/,words) end - para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2') #http ftp matches escaped, no decoration - para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #special case \{ e.g. \}http://url - para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #http ftp matches with decoration + para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\2') #http ftp matches escaped, no decoration + para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\2\3') #special case \{ e.g. \}http://url + para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #http ftp matches with decoration para end def paragraph diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index c30dc5db..c54ab42d 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -360,11 +360,11 @@ module SiSU_XML_munge '\1\2\4') #watch, compare html_tune #para.gsub!(/\B\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/, # '\1\3') #watch, compare html_tune - para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/, + para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, %{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, #also works #%{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}\\2}) - para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2') #escaped urls not linked, deal with later + para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\2') #escaped urls not linked, deal with later #para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #escaped urls not linked, deal with later para.gsub!(/ /,' ') #clean para diff --git a/lib/sisu/v0/texpdf_format.rb b/lib/sisu/v0/texpdf_format.rb index 81646f23..92333d28 100644 --- a/lib/sisu/v0/texpdf_format.rb +++ b/lib/sisu/v0/texpdf_format.rb @@ -423,8 +423,7 @@ WOK end @string end - def special_characters_1(para) - # ~ ^ $ & % _ { } #LaTeX special characters - KEEP list + def special_characters_1(para) # ~ ^ $ & % _ { } #LaTeX special characters - KEEP list #p @@utf_8.list #@string=Iconv.conv('ISO-8859-1', 'UTF-8', @string) word=@string.scan(/\S+|\n/) #unless line =~/^(?:0~\S|%+\s)/ @@ -791,9 +790,9 @@ WOK @words=[] @string.each do |word| @words << if word=~/\{.+?\}(?:https?|ftp):\S+/ - if word =~/\\\{(.+?)\\\}((?:https?|ftp)\S+?)([.,](?:\s|$))/ - r=%r/\\\{(.+?)\\?\}((?:https?|ftp):\S+?)(?:[.,](?:\s|$)|(?:\s|$))/ - d=/\\\{.+?\\?\}(?:https?|ftp):\S+([.,](?:\s|$))/.match(word).captures.to_s + if word =~/\\\{(.+?)\\\}((?:https?|ftp)\S+?)([;.,](?:\s|$))/ + r=%r/\\\{(.+?)\\?\}((?:https?|ftp):\S+?)(?:[;.,](?:\s|$)|(?:\s|$))/ + d=/\\\{.+?\\?\}(?:https?|ftp):\S+([;.,](?:\s|$))/.match(word).captures.to_s else r=%r/\\\{(.+?)\\?\}((?:https?|ftp):\S+)/ d='' -- cgit v1.2.3