aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2007-07-26 17:51:16 +0100
committerRalph Amissah <ralph@amissah.com>2007-07-26 17:51:16 +0100
commita1fd226ef8ae434f81f010ee8681fc059dbbe6f2 (patch)
treeb39b4f69f633da655998b89ba100ab3224bed7f8
parentchangelog, info touch (diff)
multiple url matching refinements, open archive initiative
-rw-r--r--CHANGELOG13
-rw-r--r--lib/sisu/v0/html_tune.rb4
-rw-r--r--lib/sisu/v0/odf.rb12
-rw-r--r--lib/sisu/v0/shared_html_lite.rb4
-rw-r--r--lib/sisu/v0/shared_xml.rb10
-rw-r--r--lib/sisu/v0/texpdf_format.rb4
-rw-r--r--lib/sisu/v0/xml_md_oai_pmh_dc.rb5
7 files changed, 28 insertions, 24 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 93d3ed72..2db96703 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -12,20 +12,18 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.6.orig.tar.gz
sisu_0.55.6-1.dsc
sisu_0.55.6-1.diff.gz
- * db html, fix related to match of multiple urls within paragraph
+ * matching of multiple urls within a paragraph
+ * db html (html_lite), bug fix
+ * multiple uls listed, refinement: html, html_lite, xml, odf, texpdf
* open archive initiative for metadata harvesting, initial implementation,
Dublin Core, XML output available (-O), decide use later (filenames, output
- dir etc.), look at later and refine accordingly:
- http://www.openarchives.org/pmh/
- http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore
- http://es.dublincore.org/documents/usageguide/elements.shtml
- http://dublincore.org/documents/dces/
- see also http://dublincore.org/documents/dcmes-xml/
+ dir etc.)
* debian vim
* moved vim install back to addons
* added recommends vim-addon-manager
+ (thanks zack)
%% sisu_0.55.5.orig.tar.gz (2007-07-22:29/7)
http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.5.orig.tar.gz
@@ -47,6 +45,7 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.5.orig.tar.gz
open standards
* debian vim, syntax and ftplugin install moved to /usr/share/vim-scripts
+ (syntax file synced with Bram, thanks)
%% sisu_0.55.4.orig.tar.gz (2007-07-20:29/5)
http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.4.orig.tar.gz
diff --git a/lib/sisu/v0/html_tune.rb b/lib/sisu/v0/html_tune.rb
index 1d3461c3..66c45aed 100644
--- a/lib/sisu/v0/html_tune.rb
+++ b/lib/sisu/v0/html_tune.rb
@@ -325,9 +325,9 @@ module SiSU_Tune
if (para =~/\b\S+\@\S+?\.\S+/ and para !~/(\"\S+\@\S+?\.\S+\"|>\S+\@\S+?\.\S+?<)/)
para.gsub!(/\b(\S+\@\S+?\.\S+)(\s)/,'&lt;<a href="mailto:\1">\1</a>&gt;\2')
end
- para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #http ftp matches escaped, no decoration
+ para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration
para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url
- para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration
+ para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{#{@url_brace.xml_open}<a href="\\1" target="_top">\\1</a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration
if (para =~/..\/\S+/ and para !~/(\"..\/\S+?\"|>\s*..\/\S+<)/)
para.gsub!(/(\.\.\/\S+)/,'<a href="\1">\1</a>')
end
diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb
index cff57888..fbd4cc62 100644
--- a/lib/sisu/v0/odf.rb
+++ b/lib/sisu/v0/odf.rb
@@ -295,12 +295,12 @@ module SiSU_ODF
end
def normal(para) #P1 - P3
para.gsub!(@serial,'')
- para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
- %{\\1<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>\\3}) #http ftp matches escaped, no decoration
+ para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
+ %{<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>\\2}) #http ftp matches escaped, no decoration
para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
%{\\1<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>\\3}) #special case \{ e.g. \}http://url
- para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
- %{\\1#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration
+ para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
+ %{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration
para.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/,
%{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="mailto:\\1">\\1</text:a>#{@url_brace.xml_close}})
para=case para
@@ -375,8 +375,8 @@ module SiSU_ODF
parray=[]
para.split(/<:?br(?: \/)?>/).each do |parablock|
parablock=group_clean(parablock)
- parablock.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
- %{\\1<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>\\3}) #http ftp matches escaped, no decoration
+ parablock.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
+ %{<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>\\2}) #http ftp matches escaped, no decoration
parray << %{<text:p text:style-name="P5">#{parablock}</text:p>} if parablock =~/\S+/
end
para=parray.join + '<text:p text:style-name="Standard"/>'
diff --git a/lib/sisu/v0/shared_html_lite.rb b/lib/sisu/v0/shared_html_lite.rb
index 2bcea532..16491ebf 100644
--- a/lib/sisu/v0/shared_html_lite.rb
+++ b/lib/sisu/v0/shared_html_lite.rb
@@ -131,9 +131,9 @@ module SiSU_Format_Shared
words=word_mode.join(' ')
para.gsub!(/.+/,words)
end
- para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #http ftp matches escaped, no decoration
+ para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration
para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url
- para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration
+ para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{#{@url_brace.xml_open}<a href="\\1" target="_top">\\1</a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration
para
end
def paragraph
diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb
index 995044db..249085a1 100644
--- a/lib/sisu/v0/shared_xml.rb
+++ b/lib/sisu/v0/shared_xml.rb
@@ -356,11 +356,11 @@ module SiSU_XML_munge
#para.gsub!(/^_\*\s+/,'<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="/usr/share/sisu/image/bullet_red.png" width="12" height="12" alt="*" /> ')
para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\2" width="\\3" height="\\4" />[\\2] \\5})
para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\2"/>\\2})
- para.gsub!(/(^|\s)\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/,
- '\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\3">\2</link>\4') #watch, compare html_tune
- para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
- %{\\1#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\2">\\2</link>#{@url_brace.xml_close}\\3})
- para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #escaped urls not linked, deal with later
+ para.gsub!(/\B\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/,
+ '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\1</link>\3') #watch, compare html_tune
+ para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
+ %{#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\1">\\1</link>#{@url_brace.xml_close}\\2})
+ para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>\2') #escaped urls not linked, deal with later
#para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\2</link>\3') #escaped urls not linked, deal with later
para.gsub!(/&nbsp;/,' ') #clean
para
diff --git a/lib/sisu/v0/texpdf_format.rb b/lib/sisu/v0/texpdf_format.rb
index 4a8d2cb5..81646f23 100644
--- a/lib/sisu/v0/texpdf_format.rb
+++ b/lib/sisu/v0/texpdf_format.rb
@@ -501,8 +501,8 @@ WOK
@string.gsub!(/<\/a>/,' ')
@string.gsub!(/[^\}>_]((?:https?|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case
@string.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url
- @string.gsub!(/(^|\s)(?:\\_|\\)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #specially escaped url no decoration
- @string.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration <url>
+ @string.gsub!(/\B(?:\\_|\\)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration
+ @string.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration <url> positive lookahead, sequence issue with { linked }http://url cannot use \b at start
@string.gsub!(/<:ee>/,'')
@string.gsub!(/<!>/,' ')
#proposed change, insert, but may be redundant
diff --git a/lib/sisu/v0/xml_md_oai_pmh_dc.rb b/lib/sisu/v0/xml_md_oai_pmh_dc.rb
index 7ac7c3a6..1d7008a1 100644
--- a/lib/sisu/v0/xml_md_oai_pmh_dc.rb
+++ b/lib/sisu/v0/xml_md_oai_pmh_dc.rb
@@ -182,6 +182,11 @@ WOK
end
end
__END__
+http://www.openarchives.org/pmh/
+http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore
+http://es.dublincore.org/documents/usageguide/elements.shtml
+http://dublincore.org/documents/dces/
+see also http://dublincore.org/documents/dcmes-xml/
#http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore
#sample implementation, e.g. 2
<?xml version="1.0" encoding="UTF-8"?>