diff options
| author | Ralph Amissah <ralph@amissah.com> | 2007-07-26 17:51:16 +0100 | 
|---|---|---|
| committer | Ralph Amissah <ralph@amissah.com> | 2007-07-26 17:51:16 +0100 | 
| commit | a1fd226ef8ae434f81f010ee8681fc059dbbe6f2 (patch) | |
| tree | b39b4f69f633da655998b89ba100ab3224bed7f8 | |
| parent | changelog, info touch (diff) | |
multiple url matching refinements, open archive initiative
| -rw-r--r-- | CHANGELOG | 13 | ||||
| -rw-r--r-- | lib/sisu/v0/html_tune.rb | 4 | ||||
| -rw-r--r-- | lib/sisu/v0/odf.rb | 12 | ||||
| -rw-r--r-- | lib/sisu/v0/shared_html_lite.rb | 4 | ||||
| -rw-r--r-- | lib/sisu/v0/shared_xml.rb | 10 | ||||
| -rw-r--r-- | lib/sisu/v0/texpdf_format.rb | 4 | ||||
| -rw-r--r-- | lib/sisu/v0/xml_md_oai_pmh_dc.rb | 5 | 
7 files changed, 28 insertions, 24 deletions
| @@ -12,20 +12,18 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.6.orig.tar.gz    sisu_0.55.6-1.dsc    sisu_0.55.6-1.diff.gz -  * db html, fix related to match of multiple urls within paragraph +  * matching of multiple urls within a paragraph +    * db html (html_lite), bug fix +    * multiple uls listed, refinement: html, html_lite, xml, odf, texpdf    * open archive initiative for metadata harvesting, initial implementation,      Dublin Core, XML output available (-O), decide use later (filenames, output -    dir etc.), look at later and refine accordingly: -      http://www.openarchives.org/pmh/ -      http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore -      http://es.dublincore.org/documents/usageguide/elements.shtml -      http://dublincore.org/documents/dces/ -      see also http://dublincore.org/documents/dcmes-xml/ +    dir etc.)    * debian vim      * moved vim install back to addons      * added recommends vim-addon-manager +    (thanks zack)  %% sisu_0.55.5.orig.tar.gz (2007-07-22:29/7)  http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.5.orig.tar.gz @@ -47,6 +45,7 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.5.orig.tar.gz      open standards    * debian vim, syntax and ftplugin install moved to /usr/share/vim-scripts +    (syntax file synced with Bram, thanks)  %% sisu_0.55.4.orig.tar.gz (2007-07-20:29/5)  http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.4.orig.tar.gz diff --git a/lib/sisu/v0/html_tune.rb b/lib/sisu/v0/html_tune.rb index 1d3461c3..66c45aed 100644 --- a/lib/sisu/v0/html_tune.rb +++ b/lib/sisu/v0/html_tune.rb @@ -325,9 +325,9 @@ module SiSU_Tune            if (para =~/\b\S+\@\S+?\.\S+/ and para !~/(\"\S+\@\S+?\.\S+\"|>\S+\@\S+?\.\S+?<)/)              para.gsub!(/\b(\S+\@\S+?\.\S+)(\s)/,'<<a href="mailto:\1">\1</a>>\2')            end -          para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #http ftp matches escaped, no decoration +          para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration            para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url -          para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration +          para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{#{@url_brace.xml_open}<a href="\\1" target="_top">\\1</a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration            if (para =~/..\/\S+/ and para !~/(\"..\/\S+?\"|>\s*..\/\S+<)/)              para.gsub!(/(\.\.\/\S+)/,'<a href="\1">\1</a>')            end diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb index cff57888..fbd4cc62 100644 --- a/lib/sisu/v0/odf.rb +++ b/lib/sisu/v0/odf.rb @@ -295,12 +295,12 @@ module SiSU_ODF        end        def normal(para)                                                           #P1 - P3          para.gsub!(@serial,'') -        para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, -          %{\\1<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>\\3}) #http ftp matches escaped, no decoration +        para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, +          %{<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>\\2}) #http ftp matches escaped, no decoration          para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,            %{\\1<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>\\3}) #special case \{ e.g. \}http://url -        para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, -          %{\\1#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration +        para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, +          %{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration          para.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/,            %{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="mailto:\\1">\\1</text:a>#{@url_brace.xml_close}})          para=case para @@ -375,8 +375,8 @@ module SiSU_ODF          parray=[]          para.split(/<:?br(?: \/)?>/).each do |parablock|            parablock=group_clean(parablock) -          parablock.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, -            %{\\1<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>\\3}) #http ftp matches escaped, no decoration +          parablock.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, +            %{<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>\\2}) #http ftp matches escaped, no decoration            parray << %{<text:p text:style-name="P5">#{parablock}</text:p>} if parablock =~/\S+/          end          para=parray.join + '<text:p text:style-name="Standard"/>' diff --git a/lib/sisu/v0/shared_html_lite.rb b/lib/sisu/v0/shared_html_lite.rb index 2bcea532..16491ebf 100644 --- a/lib/sisu/v0/shared_html_lite.rb +++ b/lib/sisu/v0/shared_html_lite.rb @@ -131,9 +131,9 @@ module SiSU_Format_Shared          words=word_mode.join(' ')          para.gsub!(/.+/,words)        end -      para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #http ftp matches escaped, no decoration +      para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration        para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url -      para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration +      para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{#{@url_brace.xml_open}<a href="\\1" target="_top">\\1</a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration        para      end      def paragraph diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index 995044db..249085a1 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -356,11 +356,11 @@ module SiSU_XML_munge        #para.gsub!(/^_\*\s+/,'<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="/usr/share/sisu/image/bullet_red.png" width="12" height="12" alt="*" /> ')        para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\2" width="\\3" height="\\4" />[\\2] \\5})        para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\2"/>\\2}) -      para.gsub!(/(^|\s)\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/, -        '\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\3">\2</link>\4') #watch, compare html_tune -      para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, -        %{\\1#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\2">\\2</link>#{@url_brace.xml_close}\\3}) -      para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #escaped urls not linked, deal with later +      para.gsub!(/\B\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/, +        '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\1</link>\3') #watch, compare html_tune +      para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, +        %{#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\1">\\1</link>#{@url_brace.xml_close}\\2}) +      para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>\2') #escaped urls not linked, deal with later        #para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\2</link>\3') #escaped urls not linked, deal with later        para.gsub!(/ /,' ') #clean        para diff --git a/lib/sisu/v0/texpdf_format.rb b/lib/sisu/v0/texpdf_format.rb index 4a8d2cb5..81646f23 100644 --- a/lib/sisu/v0/texpdf_format.rb +++ b/lib/sisu/v0/texpdf_format.rb @@ -501,8 +501,8 @@ WOK        @string.gsub!(/<\/a>/,' ')        @string.gsub!(/[^\}>_]((?:https?|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case        @string.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url -      @string.gsub!(/(^|\s)(?:\\_|\\)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #specially escaped url no decoration -      @string.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration <url> +      @string.gsub!(/\B(?:\\_|\\)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration +      @string.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration <url> positive lookahead, sequence issue with { linked }http://url cannot use \b at start        @string.gsub!(/<:ee>/,'')        @string.gsub!(/<!>/,' ')        #proposed change, insert, but may be redundant diff --git a/lib/sisu/v0/xml_md_oai_pmh_dc.rb b/lib/sisu/v0/xml_md_oai_pmh_dc.rb index 7ac7c3a6..1d7008a1 100644 --- a/lib/sisu/v0/xml_md_oai_pmh_dc.rb +++ b/lib/sisu/v0/xml_md_oai_pmh_dc.rb @@ -182,6 +182,11 @@ WOK    end  end  __END__ +http://www.openarchives.org/pmh/ +http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore +http://es.dublincore.org/documents/usageguide/elements.shtml +http://dublincore.org/documents/dces/ +see also http://dublincore.org/documents/dcmes-xml/  #http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore  #sample implementation, e.g. 2  <?xml version="1.0" encoding="UTF-8"?> | 
