From 55d549807a443531fdd20d61d90078e3b5638abc Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 24 Jul 2007 01:55:42 +0100 Subject: matching of multiple urls within paragraph, fix --- CHANGELOG | 2 ++ lib/sisu/v0/db_import.rb | 2 +- lib/sisu/v0/shared_html_lite.rb | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index a791c3f2..bb53e377 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -12,6 +12,8 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.6.orig.tar.gz sisu_0.55.6-1.dsc sisu_0.55.6-1.diff.gz + * db html, fix related to match of multiple urls within paragraph + * debian vim * moved vim install back to addons * added recommends vim-addon-manager diff --git a/lib/sisu/v0/db_import.rb b/lib/sisu/v0/db_import.rb index 91360613..5ef26466 100644 --- a/lib/sisu/v0/db_import.rb +++ b/lib/sisu/v0/db_import.rb @@ -125,7 +125,7 @@ module SiSU_DB_import string.gsub!(/<:(?:code|alt|group|verse)(?:-end)?>/,'') string.gsub!(/<:name#\S+?>/,'') string.gsub!(/\{\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)\}\S+/,'[image: \1] \2') - string.gsub!(/\{\s*(.+?)\s*\}http:\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2') + string.gsub!(/\{\s*(.+?)\s*\}https?:\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2') end def unicode_special_character_escape(string) #string.gsub!(/(["';:,])/, %{\\\\\\1}) diff --git a/lib/sisu/v0/shared_html_lite.rb b/lib/sisu/v0/shared_html_lite.rb index 76def67d..50abb284 100644 --- a/lib/sisu/v0/shared_html_lite.rb +++ b/lib/sisu/v0/shared_html_lite.rb @@ -127,7 +127,7 @@ module SiSU_Format_Shared def markup(para) if para =~/\{.+?\}((?:http|ftp)\S+|image)/ @word_mode=para.scan(/\{.+?\}(?:(?:https?|ftp)\S+|image)|\S+/) - word_mode=urls(para) + word_mode=urls(@word_mode) words=word_mode.join(' ') para.gsub!(/.+/,words) end -- cgit v1.2.3 From b0481de5c3a378f3e1d6cec774cd7ce36d7fcac8 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 24 Jul 2007 15:13:50 +0100 Subject: open archive initiative for metadata harvesting, initial implementation, decide use later -O --- CHANGELOG | 3 + lib/sisu/v0/hub.rb | 33 ++++--- lib/sisu/v0/manifest.rb | 1 - lib/sisu/v0/param.rb | 3 +- lib/sisu/v0/shared_html_lite.rb | 4 +- lib/sisu/v0/sysenv.rb | 5 + lib/sisu/v0/xml_md_oai_pmh_dc.rb | 204 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 234 insertions(+), 19 deletions(-) create mode 100644 lib/sisu/v0/xml_md_oai_pmh_dc.rb diff --git a/CHANGELOG b/CHANGELOG index bb53e377..8493671c 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -14,6 +14,9 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.6.orig.tar.gz * db html, fix related to match of multiple urls within paragraph + * open archive initiative for metadata harvesting, initial implementation, + XML output available (-O), decide use later + * debian vim * moved vim install back to addons * added recommends vim-addon-manager diff --git a/lib/sisu/v0/hub.rb b/lib/sisu/v0/hub.rb index 35160d23..125a0500 100644 --- a/lib/sisu/v0/hub.rb +++ b/lib/sisu/v0/hub.rb @@ -150,6 +150,7 @@ module SiSU when /^plaintext$/; SiSU_Plaintext::Source.new(@opt).read # -a -A -e -E -f when /^wikispeak$/; SiSU_Wikispeak::Source.new(@opt).read # -g when /^odf$/; SiSU_ODF::Source.new(@opt).read # -o + when /^xml_md_oai_pmh_dc$/; SiSU_XML_metadata::OAI_PMH.new(@opt).read # -O when /^texpdf$/; SiSU_TeX::Source.new(@opt).read # -p when /^texinfo$/; SiSU_TexInfo::Source.new(@opt).read # -I #when /^docbook$/; SiSU_Docbook::Source.new(@opt).read # -B @@ -283,7 +284,7 @@ p "here #{__FILE__} #{__LINE__}" if @opt =~/M/ def actions if @opt.mod.inspect =~/--convert|--to|--from/; require "#{SiSU_lib}/sst_convert_markup" end - if @opt.cmd =~/([AabCcDdEeFfgGHhIiLMmNnoprRSsTtQqUuVvwWXxYyZ_0-9])/ and + if @opt.cmd =~/([AabCcDdEeFfgGHhIiLMmNnOoprRSsTtQqUuVvwWXxYyZ_0-9])/ and @opt.cmd =~/^-/ and @opt.mod.inspect !~/--(?:sitemaps|query|identify)/ or @opt.mod.inspect =~/--(?:(?:sq)?lite|pg(?:sql)?)/ #and @@ -292,7 +293,7 @@ p "here #{__FILE__} #{__LINE__}" if @opt =~/M/ flag=SiSU_Env::Info_processing_flag.new extra='' if @opt.cmd !~/[mn]/ - extra+=if @opt.cmd =~/[abeghHhINoptTwXxz]/ and @opt.cmd !~/[mn]/; 'm' #% add dal + extra+=if @opt.cmd =~/[abeghHhINOoptTwXxz]/ and @opt.cmd !~/[mn]/; 'm' #% add dal elsif ((@opt.cmd =~/[Dd]/ or (@opt.mod.inspect =~/--(?:(?:sq)?lite|pg(?:sql)?)/)) \ and @opt.mod.inspect !~/(?:remove|(?:(?:re)?create(?:all)?|dropall|drop)$)/) \ and @opt.cmd !~/[mn]/ @@ -447,32 +448,34 @@ p "here #{__FILE__} #{__LINE__}" if @opt =~/M/ end @opt.files=@opt.files.collect {|x| x=x.gsub(/\.ssm$/,'._sst') } if @opt.cmd =~/S/ - op('sisupod_make','sisupod (zip)') #% -S make sisupod + op('sisupod_make','sisupod (zip)') #% -S make sisupod if @opt.fns=~/\.kdi._sst/ - op('share_src_kdissert','kdissert (kdi)') #% -S share kdissert source + op('share_src_kdissert','kdissert (kdi)') #% -S share kdissert source end end - if @opt.cmd =~/N/; op('digests','digests') #% -N digest tree + if @opt.cmd =~/N/; op('digests','digests') #% -N digest tree end - if @opt.cmd =~/[hHz]/; op('html','html') #% -h -H -z html css + if @opt.cmd =~/[hHz]/; op('html','html') #% -h -H -z html css end - if @opt.cmd =~/[aAfeE]/; op('plaintext','plaintext') #% -a -A -f -e -E plaintext -a creates ms-dos type; -A creates unix type, plaintext file + if @opt.cmd =~/[aAfeE]/; op('plaintext','plaintext') #% -a -A -f -e -E plaintext -a creates ms-dos type; -A creates unix type, plaintext file end - if @opt.cmd =~/g/; op('wikispeak','wikispeak') #% -g wiki + if @opt.cmd =~/g/; op('wikispeak','wikispeak') #% -g wiki end - if @opt.cmd =~/o/; op('odf','OpenDocument') #% -o opendocument + if @opt.cmd =~/o/; op('odf','OpenDocument') #% -o opendocument end - if @opt.cmd =~/x/; op('xml','xml sax') #% -x xml sax type + if @opt.cmd =~/x/; op('xml','xml sax') #% -x xml sax type end - if @opt.cmd =~/X/; op('xml_dom','xml dom') #% -X xml dom type + if @opt.cmd =~/X/; op('xml_dom','xml dom') #% -X xml dom type end - if @opt.cmd =~/b/; op('xhtml','xhtml sax') #% -b xhtml sax type + if @opt.cmd =~/b/; op('xhtml','xhtml sax') #% -b xhtml sax type end - #if @opt.cmd =~/B/; op('docbook','docbook xml') #% -B docbook xml + #if @opt.cmd =~/B/; op('docbook','docbook xml') #% -B docbook xml #end - if @opt.cmd =~/w/; op('concordance','Concordance') #% -w concordance + if @opt.cmd =~/w/; op('concordance','Concordance') #% -w concordance end - if @opt.cmd =~/t/ #% -t termsheet/standard form + if @opt.cmd =~/O/; op('xml_md_oai_pmh_dc','OAI PMH') #% -O open archive initiative, metadata harvesting + end + if @opt.cmd =~/t/ #% -t termsheet/standard form SiSU_Help::Help.new('termsheet').help_request @opt.files.each do |fns| if FileTest.file?(fns) diff --git a/lib/sisu/v0/manifest.rb b/lib/sisu/v0/manifest.rb index 931ea96b..998d3c59 100644 --- a/lib/sisu/v0/manifest.rb +++ b/lib/sisu/v0/manifest.rb @@ -445,7 +445,6 @@ module SiSU_Manifest begin id,file='','' vz=SiSU_Env::Get_init.instance.skin - #vz=SiSU_Viz::Skin.new banner_table=if vz.banner_home_button_only !~ /http:\/\/www\.jus\.uio\.no\/sisu/ and vz.banner_home_button_only !~ /sisu\.home\.png/ < diff --git a/lib/sisu/v0/param.rb b/lib/sisu/v0/param.rb index 2ab37afb..2264c48a 100644 --- a/lib/sisu/v0/param.rb +++ b/lib/sisu/v0/param.rb @@ -338,7 +338,7 @@ module SiSU_Param end when /^(?:0~type|@type:)\s+(.+?)$/m; @dc_type=$1 #% metainfo DC when /^(?:0~format|@format:)\s+(.+?)$/m; @dc_format=$1 #% metainfo DC - when /^(?:0~identifier|@identifier:)\s+(.+?)$/m; @dc_identifier=$1 #% metainfo DC + #when /^(?:0~identifier|@identifier:)\s+(.+?)$/m; @dc_identifier=$1 #% metainfo DC when /^(?:0~source|@source:)\s+(.+?)$/m; @dc_source=$1 #% metainfo DC when /^(?:0~language(?:\.document)?|@language(?:\.document)?:)\s+(.+?)$/m #% metainfo DC x=$1.strip @@ -655,6 +655,7 @@ module SiSU_Param end if @flv @lang.uniq! @fn=SiSU_Env::Env_call.new(@fns).lang(fn_set_lang[:c]) + @dc_identifier="#{@env.url.root}/#@fnb/#{@fn[:toc]}" #DC note constructed dc identifier if @en[:note] > 0 and @en[:sum] > 0 if @en[:sum] > 0 else tell=SiSU_Screen::Ansi.new(@cmd,'both endnote styles used',"~{ #{@en[:sum]} }~ and ^~ #{@en[:mark]}") diff --git a/lib/sisu/v0/shared_html_lite.rb b/lib/sisu/v0/shared_html_lite.rb index 50abb284..2bcea532 100644 --- a/lib/sisu/v0/shared_html_lite.rb +++ b/lib/sisu/v0/shared_html_lite.rb @@ -126,8 +126,8 @@ module SiSU_Format_Shared end def markup(para) if para =~/\{.+?\}((?:http|ftp)\S+|image)/ - @word_mode=para.scan(/\{.+?\}(?:(?:https?|ftp)\S+|image)|\S+/) - word_mode=urls(@word_mode) + wm=para.scan(/\{.+?\}(?:(?:https?|ftp)\S+|image)|\S+/) + word_mode=urls(wm) words=word_mode.join(' ') para.gsub!(/.+/,words) end diff --git a/lib/sisu/v0/sysenv.rb b/lib/sisu/v0/sysenv.rb index e9e39268..91e03459 100644 --- a/lib/sisu/v0/sysenv.rb +++ b/lib/sisu/v0/sysenv.rb @@ -454,6 +454,7 @@ module SiSU_Env :digest => filename(code,'digest','.txt'), :metadata => filename(code,'metadata','.html'), #chk :manifest => filename(code,'sisu_manifest','.html'), + :oai_pmh => filename(code,'oai_pmh','.xml'), :sitemap => filename(code,'sitemap','.xml'), :sitemap_touch => filename(code,"sitemap_#@fnb",'.xml'), :sxs => filename(code,@fnb,'.sxs.xml'), @@ -1780,6 +1781,8 @@ module SiSU_Env end if @md.cmd =~ /o/; ft << @md.fn[:odf] end + if @md.cmd =~ /O/; ft << @md.fn[:oai_pmh] + end if @md.cmd =~ /s/; ft << @md.fns end if @md.cmd =~ /S/; ft << 'sisupod.zip' << '.kdi' @@ -1808,6 +1811,8 @@ module SiSU_Env end if @opt.cmd =~ /o/; ft << 'opendocument.odt' << '??.opendocument.odt' << 'opendocument.??.odt' end + if @opt.cmd =~ /O/; ft << 'oai_pmh.xml' + end if @opt.cmd =~ /s/; ft << '.sst' << '.ssi' << '.ssm' end if @opt.cmd =~ /S/; ft << 'sisupod.zip' << '.kdi' diff --git a/lib/sisu/v0/xml_md_oai_pmh_dc.rb b/lib/sisu/v0/xml_md_oai_pmh_dc.rb new file mode 100644 index 00000000..7ac7c3a6 --- /dev/null +++ b/lib/sisu/v0/xml_md_oai_pmh_dc.rb @@ -0,0 +1,204 @@ +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007 Ralph Amissah All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007 Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: summary of generated outputs and metadata + +=end +module SiSU_XML_metadata + require "#{SiSU_lib}/sysenv" + include SiSU_Env + require "#{SiSU_lib}/param" + include SiSU_Param + class OAI_PMH + def initialize(opt) + @md=SiSU_Param::Parameters.new(opt).get + @oai_pmh=[] + end + def read + output + end + def pre +< + +WOK + end + def body + if @md.dc_title # DublinCore 1 - title + @oai_pmh << %{ #{@md.dc_title}\n} + #@oai_pmh << %{ #{seg_name}#{@md.dc_title}\n} + end + if @md.dc_creator # DublinCore 2 - creator/author (author) + txt=meta_content_clean(@md.dc_creator) + @oai_pmh << %{ #{txt}\n} + end + if @md.dc_subject # DublinCore 3 - subject (us library of congress, eric or udc, or schema???) + txt=meta_content_clean(@md.dc_subject) + @oai_pmh << %{ #{txt}\n} + end + if @md.dc_description # DublinCore 4 - description + txt=meta_content_clean(@md.dc_description) + @oai_pmh << %{ #{txt}\n} + end + if @md.dc_publisher # DublinCore 5 - publisher (current copy published by) + txt=meta_content_clean(@md.dc_publisher) + @oai_pmh << %{ #{txt}\n} + end + if @md.dc_contributor # DublinCore 6 - contributor + txt=meta_content_clean(@md.dc_contributor) + @oai_pmh << %{ #{txt}\n} + end + if @md.dc_date # DublinCore 7 - date year-mm-dd + @oai_pmh << %{ #{@md.dc_date}\n} + end + if @md.dc_date_created # DublinCore 7 - date.created + @oai_pmh << %{ #{@md.dc_date_created}\n} + end + if @md.dc_date_issued # DublinCore 7 - date.issued + @oai_pmh << %{ #{@md.dc_date_issued}\n} + end + if @md.dc_date_available # DublinCore 7 - date.available + @oai_pmh << %{ #{@md.dc_date_available}\n} + end + if @md.dc_date_valid # DublinCore 7 - date.valid + @oai_pmh << %{ #{@md.dc_date_valid}\n} + end + if @md.dc_date_modified # DublinCore 7 - date.modified + @oai_pmh << %{ #{@md.dc_date_modified}\n} + end + if @md.dc_type # DublinCore 8 - type + txt=meta_content_clean(@md.dc_type) + @oai_pmh << %{ #{txt}\n} + end + if @md.dc_format # DublinCore 9 - format + txt=meta_content_clean(@md.dc_format) + @oai_pmh << %{ #{txt}\n} + end + if @md.dc_identifier # DublinCore 10 - identifier + txt=meta_content_clean(@md.dc_identifier) + @oai_pmh << %{ #{txt}\n} + end + if @md.dc_source # DublinCore 11 - source + txt=meta_content_clean(@md.dc_source) + @oai_pmh << %{ #{txt}\n} + end + if @md.dc_language[:name] # DublinCore 12 - language (English) + @oai_pmh << %{ #{@md.dc_language[:name]}\n} + end + if @md.language_original[:name] + @oai_pmh << %{ #{@md.language_original[:name]}\n} + end + if @md.dc_relation # DublinCore 13 - relation + txt=meta_content_clean(@md.dc_relation) + @oai_pmh << %{ #{txt}\n} + end + if @md.dc_coverage # DublinCore 14 - coverage + txt=meta_content_clean(@md.dc_coverage) + @oai_pmh << %{ #{txt}\n} + end + if @md.dc_rights # DublinCore 15 - rights + txt=meta_content_clean(@md.dc_rights) + @oai_pmh << %{ #{txt}\n} + end + if @md.keywords + txt=meta_content_clean(@md.keywords) + @oai_pmh << %{ #{txt}\n} + end + @oai_pmh + end + def meta_content_clean(content='') + unless content.nil? + content.tr!('"',"'") + end + content + end + def post + '' + end + def output + SiSU_Env::SiSU_file.new(@md).mkdir + oai_pmh=SiSU_Env::SiSU_file.new(@md,@md.fn[:oai_pmh]).mkfile #implement in param + oai_pmh << pre + body.each do |x| + oai_pmh << x + end + oai_pmh << post + end + end +end +__END__ +#http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore +#sample implementation, e.g. 2 + + + Grassmann's space analysis + Hyde, E. W. (Edward Wyllys) + LCSH:Ausdehnungslehre; LCCN QA205.H99 + J. Wiley & Sons + Created: 1906; Available: 1991 + text + http://resolver.library.cornell.edu/math/1796949 + + english + Public Domain + -- cgit v1.2.3 From 169af66867011286903df18fd260ce4df473cb93 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 24 Jul 2007 15:15:43 +0100 Subject: changelog, info touch --- CHANGELOG | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 8493671c..0f13f9f8 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,7 +15,7 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.6.orig.tar.gz * db html, fix related to match of multiple urls within paragraph * open archive initiative for metadata harvesting, initial implementation, - XML output available (-O), decide use later + XML output available (-O), decide use later (filenames, output dir etc.) * debian vim * moved vim install back to addons -- cgit v1.2.3 From d91b99e2f39d6f8f8a2952e04a69a44c76179214 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 24 Jul 2007 15:25:12 +0100 Subject: changelog, info touch --- CHANGELOG | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 0f13f9f8..93d3ed72 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,7 +15,13 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.6.orig.tar.gz * db html, fix related to match of multiple urls within paragraph * open archive initiative for metadata harvesting, initial implementation, - XML output available (-O), decide use later (filenames, output dir etc.) + Dublin Core, XML output available (-O), decide use later (filenames, output + dir etc.), look at later and refine accordingly: + http://www.openarchives.org/pmh/ + http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore + http://es.dublincore.org/documents/usageguide/elements.shtml + http://dublincore.org/documents/dces/ + see also http://dublincore.org/documents/dcmes-xml/ * debian vim * moved vim install back to addons -- cgit v1.2.3 From a1fd226ef8ae434f81f010ee8681fc059dbbe6f2 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Thu, 26 Jul 2007 17:51:16 +0100 Subject: multiple url matching refinements, open archive initiative --- CHANGELOG | 13 ++++++------- lib/sisu/v0/html_tune.rb | 4 ++-- lib/sisu/v0/odf.rb | 12 ++++++------ lib/sisu/v0/shared_html_lite.rb | 4 ++-- lib/sisu/v0/shared_xml.rb | 10 +++++----- lib/sisu/v0/texpdf_format.rb | 4 ++-- lib/sisu/v0/xml_md_oai_pmh_dc.rb | 5 +++++ 7 files changed, 28 insertions(+), 24 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 93d3ed72..2db96703 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -12,20 +12,18 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.6.orig.tar.gz sisu_0.55.6-1.dsc sisu_0.55.6-1.diff.gz - * db html, fix related to match of multiple urls within paragraph + * matching of multiple urls within a paragraph + * db html (html_lite), bug fix + * multiple uls listed, refinement: html, html_lite, xml, odf, texpdf * open archive initiative for metadata harvesting, initial implementation, Dublin Core, XML output available (-O), decide use later (filenames, output - dir etc.), look at later and refine accordingly: - http://www.openarchives.org/pmh/ - http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore - http://es.dublincore.org/documents/usageguide/elements.shtml - http://dublincore.org/documents/dces/ - see also http://dublincore.org/documents/dcmes-xml/ + dir etc.) * debian vim * moved vim install back to addons * added recommends vim-addon-manager + (thanks zack) %% sisu_0.55.5.orig.tar.gz (2007-07-22:29/7) http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.5.orig.tar.gz @@ -47,6 +45,7 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.5.orig.tar.gz open standards * debian vim, syntax and ftplugin install moved to /usr/share/vim-scripts + (syntax file synced with Bram, thanks) %% sisu_0.55.4.orig.tar.gz (2007-07-20:29/5) http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.4.orig.tar.gz diff --git a/lib/sisu/v0/html_tune.rb b/lib/sisu/v0/html_tune.rb index 1d3461c3..66c45aed 100644 --- a/lib/sisu/v0/html_tune.rb +++ b/lib/sisu/v0/html_tune.rb @@ -325,9 +325,9 @@ module SiSU_Tune if (para =~/\b\S+\@\S+?\.\S+/ and para !~/(\"\S+\@\S+?\.\S+\"|>\S+\@\S+?\.\S+?<)/) para.gsub!(/\b(\S+\@\S+?\.\S+)(\s)/,'<\1>\2') end - para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #http ftp matches escaped, no decoration + para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2') #http ftp matches escaped, no decoration para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #special case \{ e.g. \}http://url - para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #http ftp matches with decoration + para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}\\2}) #http ftp matches with decoration if (para =~/..\/\S+/ and para !~/(\"..\/\S+?\"|>\s*..\/\S+<)/) para.gsub!(/(\.\.\/\S+)/,'\1') end diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb index cff57888..fbd4cc62 100644 --- a/lib/sisu/v0/odf.rb +++ b/lib/sisu/v0/odf.rb @@ -295,12 +295,12 @@ module SiSU_ODF end def normal(para) #P1 - P3 para.gsub!(@serial,'') - para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, - %{\\1\\2\\3}) #http ftp matches escaped, no decoration + para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, + %{\\1\\2}) #http ftp matches escaped, no decoration para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, %{\\1\\2\\3}) #special case \{ e.g. \}http://url - para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, - %{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #http ftp matches with decoration + para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, + %{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}\\2}) #http ftp matches with decoration para.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/, %{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}}) para=case para @@ -375,8 +375,8 @@ module SiSU_ODF parray=[] para.split(/<:?br(?: \/)?>/).each do |parablock| parablock=group_clean(parablock) - parablock.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, - %{\\1\\2\\3}) #http ftp matches escaped, no decoration + parablock.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, + %{\\1\\2}) #http ftp matches escaped, no decoration parray << %{#{parablock}} if parablock =~/\S+/ end para=parray.join + '' diff --git a/lib/sisu/v0/shared_html_lite.rb b/lib/sisu/v0/shared_html_lite.rb index 2bcea532..16491ebf 100644 --- a/lib/sisu/v0/shared_html_lite.rb +++ b/lib/sisu/v0/shared_html_lite.rb @@ -131,9 +131,9 @@ module SiSU_Format_Shared words=word_mode.join(' ') para.gsub!(/.+/,words) end - para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #http ftp matches escaped, no decoration + para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2') #http ftp matches escaped, no decoration para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #special case \{ e.g. \}http://url - para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) #http ftp matches with decoration + para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}\\2}) #http ftp matches with decoration para end def paragraph diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index 995044db..249085a1 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -356,11 +356,11 @@ module SiSU_XML_munge #para.gsub!(/^_\*\s+/,'* ') para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1[\\2] \\5}) para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1\\2}) - para.gsub!(/(^|\s)\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/, - '\1\2\4') #watch, compare html_tune - para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, - %{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) - para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #escaped urls not linked, deal with later + para.gsub!(/\B\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/, + '\1\3') #watch, compare html_tune + para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, + %{#{@url_brace.xml_open}\\1#{@url_brace.xml_close}\\2}) + para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2') #escaped urls not linked, deal with later #para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #escaped urls not linked, deal with later para.gsub!(/ /,' ') #clean para diff --git a/lib/sisu/v0/texpdf_format.rb b/lib/sisu/v0/texpdf_format.rb index 4a8d2cb5..81646f23 100644 --- a/lib/sisu/v0/texpdf_format.rb +++ b/lib/sisu/v0/texpdf_format.rb @@ -501,8 +501,8 @@ WOK @string.gsub!(/<\/a>/,' ') @string.gsub!(/[^\}>_]((?:https?|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case @string.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url - @string.gsub!(/(^|\s)(?:\\_|\\)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #specially escaped url no decoration - @string.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration + @string.gsub!(/\B(?:\\_|\\)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration + @string.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration positive lookahead, sequence issue with { linked }http://url cannot use \b at start @string.gsub!(/<:ee>/,'') @string.gsub!(//,' ') #proposed change, insert, but may be redundant diff --git a/lib/sisu/v0/xml_md_oai_pmh_dc.rb b/lib/sisu/v0/xml_md_oai_pmh_dc.rb index 7ac7c3a6..1d7008a1 100644 --- a/lib/sisu/v0/xml_md_oai_pmh_dc.rb +++ b/lib/sisu/v0/xml_md_oai_pmh_dc.rb @@ -182,6 +182,11 @@ WOK end end __END__ +http://www.openarchives.org/pmh/ +http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore +http://es.dublincore.org/documents/usageguide/elements.shtml +http://dublincore.org/documents/dces/ +see also http://dublincore.org/documents/dcmes-xml/ #http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore #sample implementation, e.g. 2 -- cgit v1.2.3