diff options
author | Ralph Amissah <ralph@amissah.com> | 2007-07-26 17:51:16 +0100 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2007-07-26 17:51:16 +0100 |
commit | a1fd226ef8ae434f81f010ee8681fc059dbbe6f2 (patch) | |
tree | b39b4f69f633da655998b89ba100ab3224bed7f8 | |
parent | changelog, info touch (diff) |
multiple url matching refinements, open archive initiative
-rw-r--r-- | CHANGELOG | 13 | ||||
-rw-r--r-- | lib/sisu/v0/html_tune.rb | 4 | ||||
-rw-r--r-- | lib/sisu/v0/odf.rb | 12 | ||||
-rw-r--r-- | lib/sisu/v0/shared_html_lite.rb | 4 | ||||
-rw-r--r-- | lib/sisu/v0/shared_xml.rb | 10 | ||||
-rw-r--r-- | lib/sisu/v0/texpdf_format.rb | 4 | ||||
-rw-r--r-- | lib/sisu/v0/xml_md_oai_pmh_dc.rb | 5 |
7 files changed, 28 insertions, 24 deletions
@@ -12,20 +12,18 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.6.orig.tar.gz sisu_0.55.6-1.dsc sisu_0.55.6-1.diff.gz - * db html, fix related to match of multiple urls within paragraph + * matching of multiple urls within a paragraph + * db html (html_lite), bug fix + * multiple uls listed, refinement: html, html_lite, xml, odf, texpdf * open archive initiative for metadata harvesting, initial implementation, Dublin Core, XML output available (-O), decide use later (filenames, output - dir etc.), look at later and refine accordingly: - http://www.openarchives.org/pmh/ - http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore - http://es.dublincore.org/documents/usageguide/elements.shtml - http://dublincore.org/documents/dces/ - see also http://dublincore.org/documents/dcmes-xml/ + dir etc.) * debian vim * moved vim install back to addons * added recommends vim-addon-manager + (thanks zack) %% sisu_0.55.5.orig.tar.gz (2007-07-22:29/7) http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.5.orig.tar.gz @@ -47,6 +45,7 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.5.orig.tar.gz open standards * debian vim, syntax and ftplugin install moved to /usr/share/vim-scripts + (syntax file synced with Bram, thanks) %% sisu_0.55.4.orig.tar.gz (2007-07-20:29/5) http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.4.orig.tar.gz diff --git a/lib/sisu/v0/html_tune.rb b/lib/sisu/v0/html_tune.rb index 1d3461c3..66c45aed 100644 --- a/lib/sisu/v0/html_tune.rb +++ b/lib/sisu/v0/html_tune.rb @@ -325,9 +325,9 @@ module SiSU_Tune if (para =~/\b\S+\@\S+?\.\S+/ and para !~/(\"\S+\@\S+?\.\S+\"|>\S+\@\S+?\.\S+?<)/) para.gsub!(/\b(\S+\@\S+?\.\S+)(\s)/,'<<a href="mailto:\1">\1</a>>\2') end - para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #http ftp matches escaped, no decoration + para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url - para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration + para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{#{@url_brace.xml_open}<a href="\\1" target="_top">\\1</a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration if (para =~/..\/\S+/ and para !~/(\"..\/\S+?\"|>\s*..\/\S+<)/) para.gsub!(/(\.\.\/\S+)/,'<a href="\1">\1</a>') end diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb index cff57888..fbd4cc62 100644 --- a/lib/sisu/v0/odf.rb +++ b/lib/sisu/v0/odf.rb @@ -295,12 +295,12 @@ module SiSU_ODF end def normal(para) #P1 - P3 para.gsub!(@serial,'') - para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, - %{\\1<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>\\3}) #http ftp matches escaped, no decoration + para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, + %{<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>\\2}) #http ftp matches escaped, no decoration para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, %{\\1<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>\\3}) #special case \{ e.g. \}http://url - para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, - %{\\1#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration + para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, + %{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration para.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/, %{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="mailto:\\1">\\1</text:a>#{@url_brace.xml_close}}) para=case para @@ -375,8 +375,8 @@ module SiSU_ODF parray=[] para.split(/<:?br(?: \/)?>/).each do |parablock| parablock=group_clean(parablock) - parablock.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, - %{\\1<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>\\3}) #http ftp matches escaped, no decoration + parablock.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, + %{<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>\\2}) #http ftp matches escaped, no decoration parray << %{<text:p text:style-name="P5">#{parablock}</text:p>} if parablock =~/\S+/ end para=parray.join + '<text:p text:style-name="Standard"/>' diff --git a/lib/sisu/v0/shared_html_lite.rb b/lib/sisu/v0/shared_html_lite.rb index 2bcea532..16491ebf 100644 --- a/lib/sisu/v0/shared_html_lite.rb +++ b/lib/sisu/v0/shared_html_lite.rb @@ -131,9 +131,9 @@ module SiSU_Format_Shared words=word_mode.join(' ') para.gsub!(/.+/,words) end - para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #http ftp matches escaped, no decoration + para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url - para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration + para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{#{@url_brace.xml_open}<a href="\\1" target="_top">\\1</a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration para end def paragraph diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index 995044db..249085a1 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -356,11 +356,11 @@ module SiSU_XML_munge #para.gsub!(/^_\*\s+/,'<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="/usr/share/sisu/image/bullet_red.png" width="12" height="12" alt="*" /> ') para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\2" width="\\3" height="\\4" />[\\2] \\5}) para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\2"/>\\2}) - para.gsub!(/(^|\s)\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/, - '\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\3">\2</link>\4') #watch, compare html_tune - para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, - %{\\1#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\2">\\2</link>#{@url_brace.xml_close}\\3}) - para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #escaped urls not linked, deal with later + para.gsub!(/\B\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/, + '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\1</link>\3') #watch, compare html_tune + para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, + %{#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\1">\\1</link>#{@url_brace.xml_close}\\2}) + para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>\2') #escaped urls not linked, deal with later #para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\2</link>\3') #escaped urls not linked, deal with later para.gsub!(/ /,' ') #clean para diff --git a/lib/sisu/v0/texpdf_format.rb b/lib/sisu/v0/texpdf_format.rb index 4a8d2cb5..81646f23 100644 --- a/lib/sisu/v0/texpdf_format.rb +++ b/lib/sisu/v0/texpdf_format.rb @@ -501,8 +501,8 @@ WOK @string.gsub!(/<\/a>/,' ') @string.gsub!(/[^\}>_]((?:https?|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case @string.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url - @string.gsub!(/(^|\s)(?:\\_|\\)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #specially escaped url no decoration - @string.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration <url> + @string.gsub!(/\B(?:\\_|\\)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration + @string.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration <url> positive lookahead, sequence issue with { linked }http://url cannot use \b at start @string.gsub!(/<:ee>/,'') @string.gsub!(/<!>/,' ') #proposed change, insert, but may be redundant diff --git a/lib/sisu/v0/xml_md_oai_pmh_dc.rb b/lib/sisu/v0/xml_md_oai_pmh_dc.rb index 7ac7c3a6..1d7008a1 100644 --- a/lib/sisu/v0/xml_md_oai_pmh_dc.rb +++ b/lib/sisu/v0/xml_md_oai_pmh_dc.rb @@ -182,6 +182,11 @@ WOK end end __END__ +http://www.openarchives.org/pmh/ +http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore +http://es.dublincore.org/documents/usageguide/elements.shtml +http://dublincore.org/documents/dces/ +see also http://dublincore.org/documents/dcmes-xml/ #http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore #sample implementation, e.g. 2 <?xml version="1.0" encoding="UTF-8"?> |