diff options
author | Ralph Amissah <ralph@amissah.com> | 2007-07-30 09:07:16 +0100 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2007-07-30 09:07:16 +0100 |
commit | 30144be628ce04818c75e9ce9e389fa93e707dd8 (patch) | |
tree | 774cefff14665adf821cbaf8c67bef858bb94738 | |
parent | Updated sisu-0.55.6 (diff) | |
parent | url matching, semi-colon as possible terminator, in dal match https (diff) |
Merge branch 'upstream' into debian/sid
-rw-r--r-- | CHANGELOG | 16 | ||||
-rw-r--r-- | lib/sisu/v0/dal_doc_str_code.rb | 2 | ||||
-rw-r--r-- | lib/sisu/v0/dal_syntax.rb | 6 | ||||
-rw-r--r-- | lib/sisu/v0/html_tune.rb | 10 | ||||
-rw-r--r-- | lib/sisu/v0/odf.rb | 20 | ||||
-rw-r--r-- | lib/sisu/v0/shared_html_lite.rb | 10 | ||||
-rw-r--r-- | lib/sisu/v0/shared_xml.rb | 10 | ||||
-rw-r--r-- | lib/sisu/v0/texpdf_format.rb | 9 |
8 files changed, 47 insertions, 36 deletions
@@ -6,11 +6,21 @@ Reverse Chronological: %% STABLE MANIFEST +%% sisu_0.55.7.orig.tar.gz (2007-07-30:31/1) +http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.7.orig.tar.gz + sisu_0.55.7.orig.tar.gz + sisu_0.55.7-1.dsc + sisu_0.55.7-1.diff.gz + + * url matching refinement + * add semi-colon as possible url terminator + * dal match https + %% sisu_0.55.6.orig.tar.gz (2007-07-28:30/6) http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.6.orig.tar.gz - sisu_0.55.6.orig.tar.gz - sisu_0.55.6-1.dsc - sisu_0.55.6-1.diff.gz + 69368f8eb4da28d07f3a1ee1ea5b89f3 1271022 sisu_0.55.6.orig.tar.gz + 1d5fa22bdad75c1c27c2171d2a054d66 606 sisu_0.55.6-1.dsc + 6fc6d8abf18aef047bb15e0079ae0f1c 142211 sisu_0.55.6-1.diff.gz * matching multiple and consecutive urls within a paragraph, refinements * db html (html_lite), multiple urls, bug fix diff --git a/lib/sisu/v0/dal_doc_str_code.rb b/lib/sisu/v0/dal_doc_str_code.rb index e6a3ae1e..18ac03d8 100644 --- a/lib/sisu/v0/dal_doc_str_code.rb +++ b/lib/sisu/v0/dal_doc_str_code.rb @@ -147,7 +147,7 @@ module SiSU_document_structure_code if line =~/\S/ and line !~/^(?:alt|code|group|poem)\{|^\}(?:alt|code|group|poem)|<:(?:code|verse|alt|group).+/ line.gsub!(/\s\s/,' ') line.gsub!(/^/,'<:codeline>') if type=='code' # try sort for texpdf special case - if line =~/http:\/\/\S+$/ + if line =~/https?:\/\/\S+$/ line.gsub!(/$/,' <:br>') else line.gsub!(/$/,'<:br>') #unless type=='code' diff --git a/lib/sisu/v0/dal_syntax.rb b/lib/sisu/v0/dal_syntax.rb index ce5fdc72..4fb0f5d3 100644 --- a/lib/sisu/v0/dal_syntax.rb +++ b/lib/sisu/v0/dal_syntax.rb @@ -65,7 +65,7 @@ module Syntax @data,@md=data,md @vz=SiSU_Env::Get_init.instance.skin @data_new=[] - @http_m='\{.+?\}http://\S+|http:\S+|\.\.\/\S+|\S+?\.png\b|[*]~\S+|^0~.+|<:(?:code|group|alt|verse)(?:-end)?>|<:br>' + @http_m='\{.+?\}https?://\S+|https?:\S+|\.\.\/\S+|\S+?\.png\b|[*]~\S+|^0~.+|<:(?:code|group|alt|verse)(?:-end)?>|<:br>' @manmkp_ital='[i/]\\{.+?\\}[i/]' tail_m_ital=%q{(?:\s|[.,;:?!'")]|~\^|~\\\{\s|$)} tail_m_bold=%q{(?:(?:<\/i>)?(?:\s|[.,;:?!'")]|~\^|~\\\{\s|$))?} @@ -283,8 +283,8 @@ module Syntax else line.gsub!(/(<br \/>)/i,"\\1\n") end else #code blocks - line.gsub!(/(^|\s)(http:\/\/\S+)/,'\1_\2') #line.gsub!(/(^|\s)(http:\/\/\S+)/,"\\1\\\\\\2") #escape urls - line.gsub!(/(^|\s)<(http:\/\/\S+)>([\s,.]|$)/,'\1\2\3') #clean/unescape urls with decoration, re-apply decoration later + line.gsub!(/(^|\s)(https?:\/\/\S+)/,'\1_\2') #line.gsub!(/(^|\s)(http:\/\/\S+)/,"\\1\\\\\\2") #escape urls + line.gsub!(/(^|\s)<(https?:\/\/\S+)>([\s,.]|$)/,'\1\2\3') #clean/unescape urls with decoration, re-apply decoration later line.gsub!(/<:codeline>/,"\n") end line diff --git a/lib/sisu/v0/html_tune.rb b/lib/sisu/v0/html_tune.rb index ac8d6594..cca41056 100644 --- a/lib/sisu/v0/html_tune.rb +++ b/lib/sisu/v0/html_tune.rb @@ -245,8 +245,8 @@ module SiSU_Tune @words=[] data.each do |word| @words << if word=~/\{(.+?)\}((?:https?|ftp)\S+|image)/ - if word =~/\{(.+?)\}((?:https?|ftp)\S+|image)([.,](?:\s|$))/ - m,u,d=/\{(.+?)\}((?:https?|ftp)\S+|image)([.,](?:\s|$))/.match(word).captures + if word =~/\{(.+?)\}((?:https?|ftp)\S+|image)([;.,](?:\s|$))/ + m,u,d=/\{(.+?)\}((?:https?|ftp)\S+|image)([;.,](?:\s|$))/.match(word).captures else m,u=/\{(.+?)\}((?:https?|ftp)\S+|image)/.match(word).captures d='' end @@ -325,9 +325,9 @@ module SiSU_Tune if (para =~/\b\S+\@\S+?\.\S+/ and para !~/(\"\S+\@\S+?\.\S+\"|>\S+\@\S+?\.\S+?<)/) para.gsub!(/\b(\S+\@\S+?\.\S+)(\s)/,'<<a href="mailto:\1">\1</a>>\2') end - para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration - para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url - para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration + para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration + para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url + para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration if (para =~/..\/\S+/ and para !~/(\"..\/\S+?\"|>\s*..\/\S+<)/) para.gsub!(/(\.\.\/\S+)/,'<a href="\1">\1</a>') end diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb index ff788116..6025dfb2 100644 --- a/lib/sisu/v0/odf.rb +++ b/lib/sisu/v0/odf.rb @@ -272,21 +272,21 @@ module SiSU_ODF end para end - def text_link_odf(txt,url) + def text_link_odf(txt,url,trail) txt.gsub!(/(\\\+)/,'+') #this is convoluted, and risky :-( url.gsub!(/(\\\+)/,'+') #this is convoluted, and risky :-( - %{<text:a xlink:type="simple" xlink:href="#{url}">#{txt}</text:a>} + %{<text:a xlink:type="simple" xlink:href="#{url}">#{txt.strip}</text:a>#{trail}} end def text_link(para) para.gsub!(@serial,'') - m=para.scan(/(\{([^}]+?)\}((?:https?|ftp)\S+))/) #sort + m=para.scan(/(\{([^}]+?)\}((?:https?|ftp)\S+?))([;.,]?$)/) #sort if m m.each do |i| - txt,url=i[1],i[2] + txt,url,trail=i[1],i[2] txt.gsub!(/([)(\]\[])/,"\\\\\\1") txt.gsub!(/([+?])/,"\\\\\\1") # problems with + url.gsub!(/([+?])/,"\\\\\\1") # problems with + - para.gsub!(/\{\s*#{txt}\}#{url}/m,text_link_odf(txt,url)) #make sure trailing ']' are not caught in url + para.gsub!(/\{\s*#{txt}\}#{url}/m,text_link_odf(txt,url,trail)) #make sure trailing ']' are not caught in url para.gsub!(/\\([)(\]\[?])/,'\1') #clumsy fix end m=nil @@ -295,13 +295,13 @@ module SiSU_ODF end def normal(para) #P1 - P3 para.gsub!(@serial,'') - para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, + para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, %{<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>\\2}) #http ftp matches escaped, no decoration - para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, + para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, %{\\1<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>\\3}) #special case \{ e.g. \}http://url - para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/, + para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, %{\\1#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration - #para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, also works + #para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, also works #%{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration para.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/, %{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="mailto:\\1">\\1</text:a>#{@url_brace.xml_close}}) @@ -377,7 +377,7 @@ module SiSU_ODF parray=[] para.split(/<:?br(?: \/)?>/).each do |parablock| parablock=group_clean(parablock) - parablock.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, + parablock.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, %{<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>\\2}) #http ftp matches escaped, no decoration parray << %{<text:p text:style-name="P5">#{parablock}</text:p>} if parablock =~/\S+/ end diff --git a/lib/sisu/v0/shared_html_lite.rb b/lib/sisu/v0/shared_html_lite.rb index 43fb4446..1218aa79 100644 --- a/lib/sisu/v0/shared_html_lite.rb +++ b/lib/sisu/v0/shared_html_lite.rb @@ -88,8 +88,8 @@ module SiSU_Format_Shared @words=[] data.each do |word| @words << if word=~/\{(.+?)\}((?:https?|ftp)\S+|image)/ - if word =~/\{(.+?)\}((?:https?|ftp)\S+|image)([.,](?:\s|$))/ - m,u,d=/\{(.+?)\}((?:https?|ftp)\S+|image)([.,](?:\s|$))/.match(word).captures + if word =~/\{(.+?)\}((?:https?|ftp)\S+|image)([;.,](?:\s|$))/ + m,u,d=/\{(.+?)\}((?:https?|ftp)\S+|image)([;.,](?:\s|$))/.match(word).captures else m,u=/\{(.+?)\}((?:https?|ftp)\S+|image)/.match(word).captures d='' end @@ -131,9 +131,9 @@ module SiSU_Format_Shared words=word_mode.join(' ') para.gsub!(/.+/,words) end - para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration - para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url - para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration + para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration + para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url + para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration para end def paragraph diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index 914391b7..c54ab42d 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -356,13 +356,15 @@ module SiSU_XML_munge #para.gsub!(/^_\*\s+/,'<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="/usr/share/sisu/image/bullet_red.png" width="12" height="12" alt="*" /> ') para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\2" width="\\3" height="\\4" />[\\2] \\5}) para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\2"/>\\2}) - para.gsub!(/\B\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/, - '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\1</link>\3') #watch, compare html_tune - para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/, + para.gsub!(/(^|\s)\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?=\s|$))/, + '\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\3">\2</link>\4') #watch, compare html_tune + #para.gsub!(/\B\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/, + # '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\1</link>\3') #watch, compare html_tune + para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, %{\\1#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\2">\\2</link>#{@url_brace.xml_close}\\3}) #para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, #also works #%{#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\1">\\1</link>#{@url_brace.xml_close}\\2}) - para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>\2') #escaped urls not linked, deal with later + para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>\2') #escaped urls not linked, deal with later #para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\2</link>\3') #escaped urls not linked, deal with later para.gsub!(/ /,' ') #clean para diff --git a/lib/sisu/v0/texpdf_format.rb b/lib/sisu/v0/texpdf_format.rb index 81646f23..92333d28 100644 --- a/lib/sisu/v0/texpdf_format.rb +++ b/lib/sisu/v0/texpdf_format.rb @@ -423,8 +423,7 @@ WOK end @string end - def special_characters_1(para) - # ~ ^ $ & % _ { } #LaTeX special characters - KEEP list + def special_characters_1(para) # ~ ^ $ & % _ { } #LaTeX special characters - KEEP list #p @@utf_8.list #@string=Iconv.conv('ISO-8859-1', 'UTF-8', @string) word=@string.scan(/\S+|\n/) #unless line =~/^(?:0~\S|%+\s)/ @@ -791,9 +790,9 @@ WOK @words=[] @string.each do |word| @words << if word=~/\{.+?\}(?:https?|ftp):\S+/ - if word =~/\\\{(.+?)\\\}((?:https?|ftp)\S+?)([.,](?:\s|$))/ - r=%r/\\\{(.+?)\\?\}((?:https?|ftp):\S+?)(?:[.,](?:\s|$)|(?:\s|$))/ - d=/\\\{.+?\\?\}(?:https?|ftp):\S+([.,](?:\s|$))/.match(word).captures.to_s + if word =~/\\\{(.+?)\\\}((?:https?|ftp)\S+?)([;.,](?:\s|$))/ + r=%r/\\\{(.+?)\\?\}((?:https?|ftp):\S+?)(?:[;.,](?:\s|$)|(?:\s|$))/ + d=/\\\{.+?\\?\}(?:https?|ftp):\S+([;.,](?:\s|$))/.match(word).captures.to_s else r=%r/\\\{(.+?)\\?\}((?:https?|ftp):\S+)/ d='' |