aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2007-07-27 12:26:02 +0100
committerRalph Amissah <ralph@amissah.com>2007-07-27 12:26:02 +0100
commitf4b6f637ab695ab1e7ff879f5d10d4f4bceaa603 (patch)
tree7f3d3ebba89ce94f61b99d4e114a4c202d21c6d9
parentMerge branch 'upstream' into debian/sid (diff)
parentmultiple url matching refinements, open archive initiative (diff)
Merge branch 'upstream' into debian/sid
-rw-r--r--CHANGELOG10
-rw-r--r--lib/sisu/v0/db_import.rb2
-rw-r--r--lib/sisu/v0/html_tune.rb4
-rw-r--r--lib/sisu/v0/hub.rb33
-rw-r--r--lib/sisu/v0/manifest.rb1
-rw-r--r--lib/sisu/v0/odf.rb12
-rw-r--r--lib/sisu/v0/param.rb3
-rw-r--r--lib/sisu/v0/shared_html_lite.rb8
-rw-r--r--lib/sisu/v0/shared_xml.rb10
-rw-r--r--lib/sisu/v0/sysenv.rb5
-rw-r--r--lib/sisu/v0/texpdf_format.rb4
-rw-r--r--lib/sisu/v0/xml_md_oai_pmh_dc.rb209
12 files changed, 264 insertions, 37 deletions
diff --git a/CHANGELOG b/CHANGELOG
index a791c3f2..2db96703 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -12,9 +12,18 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.6.orig.tar.gz
sisu_0.55.6-1.dsc
sisu_0.55.6-1.diff.gz
+ * matching of multiple urls within a paragraph
+ * db html (html_lite), bug fix
+ * multiple uls listed, refinement: html, html_lite, xml, odf, texpdf
+
+ * open archive initiative for metadata harvesting, initial implementation,
+ Dublin Core, XML output available (-O), decide use later (filenames, output
+ dir etc.)
+
* debian vim
* moved vim install back to addons
* added recommends vim-addon-manager
+ (thanks zack)
%% sisu_0.55.5.orig.tar.gz (2007-07-22:29/7)
http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.5.orig.tar.gz
@@ -36,6 +45,7 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.5.orig.tar.gz
open standards
* debian vim, syntax and ftplugin install moved to /usr/share/vim-scripts
+ (syntax file synced with Bram, thanks)
%% sisu_0.55.4.orig.tar.gz (2007-07-20:29/5)
http://www.jus.uio.no/sisu/pkg/src/sisu_0.55.4.orig.tar.gz
diff --git a/lib/sisu/v0/db_import.rb b/lib/sisu/v0/db_import.rb
index 91360613..5ef26466 100644
--- a/lib/sisu/v0/db_import.rb
+++ b/lib/sisu/v0/db_import.rb
@@ -125,7 +125,7 @@ module SiSU_DB_import
string.gsub!(/<:(?:code|alt|group|verse)(?:-end)?>/,'')
string.gsub!(/<:name#\S+?>/,'')
string.gsub!(/\{\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)\}\S+/,'[image: \1] \2')
- string.gsub!(/\{\s*(.+?)\s*\}http:\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2')
+ string.gsub!(/\{\s*(.+?)\s*\}https?:\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2')
end
def unicode_special_character_escape(string)
#string.gsub!(/(["';:,])/, %{\\\\\\1})
diff --git a/lib/sisu/v0/html_tune.rb b/lib/sisu/v0/html_tune.rb
index 1d3461c3..66c45aed 100644
--- a/lib/sisu/v0/html_tune.rb
+++ b/lib/sisu/v0/html_tune.rb
@@ -325,9 +325,9 @@ module SiSU_Tune
if (para =~/\b\S+\@\S+?\.\S+/ and para !~/(\"\S+\@\S+?\.\S+\"|>\S+\@\S+?\.\S+?<)/)
para.gsub!(/\b(\S+\@\S+?\.\S+)(\s)/,'&lt;<a href="mailto:\1">\1</a>&gt;\2')
end
- para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #http ftp matches escaped, no decoration
+ para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration
para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url
- para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration
+ para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{#{@url_brace.xml_open}<a href="\\1" target="_top">\\1</a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration
if (para =~/..\/\S+/ and para !~/(\"..\/\S+?\"|>\s*..\/\S+<)/)
para.gsub!(/(\.\.\/\S+)/,'<a href="\1">\1</a>')
end
diff --git a/lib/sisu/v0/hub.rb b/lib/sisu/v0/hub.rb
index 35160d23..125a0500 100644
--- a/lib/sisu/v0/hub.rb
+++ b/lib/sisu/v0/hub.rb
@@ -150,6 +150,7 @@ module SiSU
when /^plaintext$/; SiSU_Plaintext::Source.new(@opt).read # -a -A -e -E -f
when /^wikispeak$/; SiSU_Wikispeak::Source.new(@opt).read # -g
when /^odf$/; SiSU_ODF::Source.new(@opt).read # -o
+ when /^xml_md_oai_pmh_dc$/; SiSU_XML_metadata::OAI_PMH.new(@opt).read # -O
when /^texpdf$/; SiSU_TeX::Source.new(@opt).read # -p
when /^texinfo$/; SiSU_TexInfo::Source.new(@opt).read # -I
#when /^docbook$/; SiSU_Docbook::Source.new(@opt).read # -B
@@ -283,7 +284,7 @@ p "here #{__FILE__} #{__LINE__}" if @opt =~/M/
def actions
if @opt.mod.inspect =~/--convert|--to|--from/; require "#{SiSU_lib}/sst_convert_markup"
end
- if @opt.cmd =~/([AabCcDdEeFfgGHhIiLMmNnoprRSsTtQqUuVvwWXxYyZ_0-9])/ and
+ if @opt.cmd =~/([AabCcDdEeFfgGHhIiLMmNnOoprRSsTtQqUuVvwWXxYyZ_0-9])/ and
@opt.cmd =~/^-/ and
@opt.mod.inspect !~/--(?:sitemaps|query|identify)/ or
@opt.mod.inspect =~/--(?:(?:sq)?lite|pg(?:sql)?)/ #and
@@ -292,7 +293,7 @@ p "here #{__FILE__} #{__LINE__}" if @opt =~/M/
flag=SiSU_Env::Info_processing_flag.new
extra=''
if @opt.cmd !~/[mn]/
- extra+=if @opt.cmd =~/[abeghHhINoptTwXxz]/ and @opt.cmd !~/[mn]/; 'm' #% add dal
+ extra+=if @opt.cmd =~/[abeghHhINOoptTwXxz]/ and @opt.cmd !~/[mn]/; 'm' #% add dal
elsif ((@opt.cmd =~/[Dd]/ or (@opt.mod.inspect =~/--(?:(?:sq)?lite|pg(?:sql)?)/)) \
and @opt.mod.inspect !~/(?:remove|(?:(?:re)?create(?:all)?|dropall|drop)$)/) \
and @opt.cmd !~/[mn]/
@@ -447,32 +448,34 @@ p "here #{__FILE__} #{__LINE__}" if @opt =~/M/
end
@opt.files=@opt.files.collect {|x| x=x.gsub(/\.ssm$/,'._sst') }
if @opt.cmd =~/S/
- op('sisupod_make','sisupod (zip)') #% -S make sisupod
+ op('sisupod_make','sisupod (zip)') #% -S make sisupod
if @opt.fns=~/\.kdi._sst/
- op('share_src_kdissert','kdissert (kdi)') #% -S share kdissert source
+ op('share_src_kdissert','kdissert (kdi)') #% -S share kdissert source
end
end
- if @opt.cmd =~/N/; op('digests','digests') #% -N digest tree
+ if @opt.cmd =~/N/; op('digests','digests') #% -N digest tree
end
- if @opt.cmd =~/[hHz]/; op('html','html') #% -h -H -z html css
+ if @opt.cmd =~/[hHz]/; op('html','html') #% -h -H -z html css
end
- if @opt.cmd =~/[aAfeE]/; op('plaintext','plaintext') #% -a -A -f -e -E plaintext -a creates ms-dos type; -A creates unix type, plaintext file
+ if @opt.cmd =~/[aAfeE]/; op('plaintext','plaintext') #% -a -A -f -e -E plaintext -a creates ms-dos type; -A creates unix type, plaintext file
end
- if @opt.cmd =~/g/; op('wikispeak','wikispeak') #% -g wiki
+ if @opt.cmd =~/g/; op('wikispeak','wikispeak') #% -g wiki
end
- if @opt.cmd =~/o/; op('odf','OpenDocument') #% -o opendocument
+ if @opt.cmd =~/o/; op('odf','OpenDocument') #% -o opendocument
end
- if @opt.cmd =~/x/; op('xml','xml sax') #% -x xml sax type
+ if @opt.cmd =~/x/; op('xml','xml sax') #% -x xml sax type
end
- if @opt.cmd =~/X/; op('xml_dom','xml dom') #% -X xml dom type
+ if @opt.cmd =~/X/; op('xml_dom','xml dom') #% -X xml dom type
end
- if @opt.cmd =~/b/; op('xhtml','xhtml sax') #% -b xhtml sax type
+ if @opt.cmd =~/b/; op('xhtml','xhtml sax') #% -b xhtml sax type
end
- #if @opt.cmd =~/B/; op('docbook','docbook xml') #% -B docbook xml
+ #if @opt.cmd =~/B/; op('docbook','docbook xml') #% -B docbook xml
#end
- if @opt.cmd =~/w/; op('concordance','Concordance') #% -w concordance
+ if @opt.cmd =~/w/; op('concordance','Concordance') #% -w concordance
end
- if @opt.cmd =~/t/ #% -t termsheet/standard form
+ if @opt.cmd =~/O/; op('xml_md_oai_pmh_dc','OAI PMH') #% -O open archive initiative, metadata harvesting
+ end
+ if @opt.cmd =~/t/ #% -t termsheet/standard form
SiSU_Help::Help.new('termsheet').help_request
@opt.files.each do |fns|
if FileTest.file?(fns)
diff --git a/lib/sisu/v0/manifest.rb b/lib/sisu/v0/manifest.rb
index 931ea96b..998d3c59 100644
--- a/lib/sisu/v0/manifest.rb
+++ b/lib/sisu/v0/manifest.rb
@@ -445,7 +445,6 @@ module SiSU_Manifest
begin
id,file='',''
vz=SiSU_Env::Get_init.instance.skin
- #vz=SiSU_Viz::Skin.new
banner_table=if vz.banner_home_button_only !~ /http:\/\/www\.jus\.uio\.no\/sisu/ and vz.banner_home_button_only !~ /sisu\.home\.png/
<<WOK
<table summary="band" width="100%" border="0" cellpadding="3" cellspacing="0">
diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb
index cff57888..fbd4cc62 100644
--- a/lib/sisu/v0/odf.rb
+++ b/lib/sisu/v0/odf.rb
@@ -295,12 +295,12 @@ module SiSU_ODF
end
def normal(para) #P1 - P3
para.gsub!(@serial,'')
- para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
- %{\\1<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>\\3}) #http ftp matches escaped, no decoration
+ para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
+ %{<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>\\2}) #http ftp matches escaped, no decoration
para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
%{\\1<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>\\3}) #special case \{ e.g. \}http://url
- para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
- %{\\1#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration
+ para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
+ %{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration
para.gsub!(/([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+)/,
%{#{@url_brace.xml_open}<text:a xlink:type="simple" xlink:href="mailto:\\1">\\1</text:a>#{@url_brace.xml_close}})
para=case para
@@ -375,8 +375,8 @@ module SiSU_ODF
parray=[]
para.split(/<:?br(?: \/)?>/).each do |parablock|
parablock=group_clean(parablock)
- parablock.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
- %{\\1<text:a xlink:type="simple" xlink:href="\\2">\\2</text:a>\\3}) #http ftp matches escaped, no decoration
+ parablock.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
+ %{<text:a xlink:type="simple" xlink:href="\\1">\\1</text:a>\\2}) #http ftp matches escaped, no decoration
parray << %{<text:p text:style-name="P5">#{parablock}</text:p>} if parablock =~/\S+/
end
para=parray.join + '<text:p text:style-name="Standard"/>'
diff --git a/lib/sisu/v0/param.rb b/lib/sisu/v0/param.rb
index 2ab37afb..2264c48a 100644
--- a/lib/sisu/v0/param.rb
+++ b/lib/sisu/v0/param.rb
@@ -338,7 +338,7 @@ module SiSU_Param
end
when /^(?:0~type|@type:)\s+(.+?)$/m; @dc_type=$1 #% metainfo DC
when /^(?:0~format|@format:)\s+(.+?)$/m; @dc_format=$1 #% metainfo DC
- when /^(?:0~identifier|@identifier:)\s+(.+?)$/m; @dc_identifier=$1 #% metainfo DC
+ #when /^(?:0~identifier|@identifier:)\s+(.+?)$/m; @dc_identifier=$1 #% metainfo DC
when /^(?:0~source|@source:)\s+(.+?)$/m; @dc_source=$1 #% metainfo DC
when /^(?:0~language(?:\.document)?|@language(?:\.document)?:)\s+(.+?)$/m #% metainfo DC
x=$1.strip
@@ -655,6 +655,7 @@ module SiSU_Param
end if @flv
@lang.uniq!
@fn=SiSU_Env::Env_call.new(@fns).lang(fn_set_lang[:c])
+ @dc_identifier="#{@env.url.root}/#@fnb/#{@fn[:toc]}" #DC note constructed dc identifier
if @en[:note] > 0 and @en[:sum] > 0
if @en[:sum] > 0
else tell=SiSU_Screen::Ansi.new(@cmd,'both endnote styles used',"~{ #{@en[:sum]} }~ and ^~ #{@en[:mark]}")
diff --git a/lib/sisu/v0/shared_html_lite.rb b/lib/sisu/v0/shared_html_lite.rb
index 76def67d..16491ebf 100644
--- a/lib/sisu/v0/shared_html_lite.rb
+++ b/lib/sisu/v0/shared_html_lite.rb
@@ -126,14 +126,14 @@ module SiSU_Format_Shared
end
def markup(para)
if para =~/\{.+?\}((?:http|ftp)\S+|image)/
- @word_mode=para.scan(/\{.+?\}(?:(?:https?|ftp)\S+|image)|\S+/)
- word_mode=urls(para)
+ wm=para.scan(/\{.+?\}(?:(?:https?|ftp)\S+|image)|\S+/)
+ word_mode=urls(wm)
words=word_mode.join(' ')
para.gsub!(/.+/,words)
end
- para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #http ftp matches escaped, no decoration
+ para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<a href="\1" target="_top">\1</a>\2') #http ftp matches escaped, no decoration
para.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<a href="\2" target="_top">\2</a>\3') #special case \{ e.g. \}http://url
- para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{\\1#{@url_brace.xml_open}<a href="\\2" target="_top">\\2</a>#{@url_brace.xml_close}\\3}) #http ftp matches with decoration
+ para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,%{#{@url_brace.xml_open}<a href="\\1" target="_top">\\1</a>#{@url_brace.xml_close}\\2}) #http ftp matches with decoration
para
end
def paragraph
diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb
index 995044db..249085a1 100644
--- a/lib/sisu/v0/shared_xml.rb
+++ b/lib/sisu/v0/shared_xml.rb
@@ -356,11 +356,11 @@ module SiSU_XML_munge
#para.gsub!(/^_\*\s+/,'<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="/usr/share/sisu/image/bullet_red.png" width="12" height="12" alt="*" /> ')
para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\2" width="\\3" height="\\4" />[\\2] \\5})
para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1<image xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:actuate="onLoad" xlink:show="embed" xlink:href="#{@dir.url.images_local}/\\2"/>\\2})
- para.gsub!(/(^|\s)\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/,
- '\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\3">\2</link>\4') #watch, compare html_tune
- para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
- %{\\1#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\2">\\2</link>#{@url_brace.xml_close}\\3})
- para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #escaped urls not linked, deal with later
+ para.gsub!(/\B\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/,
+ '<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\1</link>\3') #watch, compare html_tune
+ para.gsub!(/\b((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,
+ %{#{@url_brace.xml_open}<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\\1">\\1</link>#{@url_brace.xml_close}\\2})
+ para.gsub!(/\b[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\1">\1</link>\2') #escaped urls not linked, deal with later
#para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1<link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="\2">\2</link>\3') #escaped urls not linked, deal with later
para.gsub!(/&nbsp;/,' ') #clean
para
diff --git a/lib/sisu/v0/sysenv.rb b/lib/sisu/v0/sysenv.rb
index e9e39268..91e03459 100644
--- a/lib/sisu/v0/sysenv.rb
+++ b/lib/sisu/v0/sysenv.rb
@@ -454,6 +454,7 @@ module SiSU_Env
:digest => filename(code,'digest','.txt'),
:metadata => filename(code,'metadata','.html'), #chk
:manifest => filename(code,'sisu_manifest','.html'),
+ :oai_pmh => filename(code,'oai_pmh','.xml'),
:sitemap => filename(code,'sitemap','.xml'),
:sitemap_touch => filename(code,"sitemap_#@fnb",'.xml'),
:sxs => filename(code,@fnb,'.sxs.xml'),
@@ -1780,6 +1781,8 @@ module SiSU_Env
end
if @md.cmd =~ /o/; ft << @md.fn[:odf]
end
+ if @md.cmd =~ /O/; ft << @md.fn[:oai_pmh]
+ end
if @md.cmd =~ /s/; ft << @md.fns
end
if @md.cmd =~ /S/; ft << 'sisupod.zip' << '.kdi'
@@ -1808,6 +1811,8 @@ module SiSU_Env
end
if @opt.cmd =~ /o/; ft << 'opendocument.odt' << '??.opendocument.odt' << 'opendocument.??.odt'
end
+ if @opt.cmd =~ /O/; ft << 'oai_pmh.xml'
+ end
if @opt.cmd =~ /s/; ft << '.sst' << '.ssi' << '.ssm'
end
if @opt.cmd =~ /S/; ft << 'sisupod.zip' << '.kdi'
diff --git a/lib/sisu/v0/texpdf_format.rb b/lib/sisu/v0/texpdf_format.rb
index 4a8d2cb5..81646f23 100644
--- a/lib/sisu/v0/texpdf_format.rb
+++ b/lib/sisu/v0/texpdf_format.rb
@@ -501,8 +501,8 @@ WOK
@string.gsub!(/<\/a>/,' ')
@string.gsub!(/[^\}>_]((?:https?|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case
@string.gsub!(/((?:^|\s)[}])((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url
- @string.gsub!(/(^|\s)(?:\\_|\\)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #specially escaped url no decoration
- @string.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration <url>
+ @string.gsub!(/\B(?:\\_|\\)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration
+ @string.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration <url> positive lookahead, sequence issue with { linked }http://url cannot use \b at start
@string.gsub!(/<:ee>/,'')
@string.gsub!(/<!>/,' ')
#proposed change, insert, but may be redundant
diff --git a/lib/sisu/v0/xml_md_oai_pmh_dc.rb b/lib/sisu/v0/xml_md_oai_pmh_dc.rb
new file mode 100644
index 00000000..1d7008a1
--- /dev/null
+++ b/lib/sisu/v0/xml_md_oai_pmh_dc.rb
@@ -0,0 +1,209 @@
+=begin
+
+ * Name: SiSU
+
+ * Description: a framework for document structuring, publishing and search
+
+ * Author: Ralph Amissah
+
+ * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2007 Ralph Amissah All Rights Reserved.
+
+ * License: GPL 3 or later:
+
+ SiSU, a framework for document structuring, publishing and search
+
+ Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2007 Ralph Amissah
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see <http://www.gnu.org/licenses/>.
+
+ If you have Internet connection, the latest version of the GPL should be
+ available at these locations:
+ <http://www.fsf.org/licenses/gpl.html>
+ <http://www.gnu.org/copyleft/gpl.html>
+ <http://www.jus.uio.no/sisu/gpl.fsf>
+
+ * SiSU uses:
+ * Standard SiSU markup syntax,
+ * Standard SiSU meta-markup syntax, and the
+ * Standard SiSU object citation numbering and system
+
+ * Hompages:
+ <http://www.jus.uio.no/sisu>
+ <http://www.sisudoc.org>
+
+ * Download:
+ <http://www.jus.uio.no/sisu/SiSU/download.html>
+
+ * Ralph Amissah
+ <ralph@amissah.com>
+ <ralph.amissah@gmail.com>
+
+ ** Description: summary of generated outputs and metadata
+
+=end
+module SiSU_XML_metadata
+ require "#{SiSU_lib}/sysenv"
+ include SiSU_Env
+ require "#{SiSU_lib}/param"
+ include SiSU_Param
+ class OAI_PMH
+ def initialize(opt)
+ @md=SiSU_Param::Parameters.new(opt).get
+ @oai_pmh=[]
+ end
+ def read
+ output
+ end
+ def pre
+<<WOK
+<?xml version="1.0" encoding="UTF-8"?>
+<oai_dc:dc
+ xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/
+ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
+WOK
+ end
+ def body
+ if @md.dc_title # DublinCore 1 - title
+ @oai_pmh << %{ <dc:title xml:lang="en">#{@md.dc_title}</dc:title>\n}
+ #@oai_pmh << %{ <dc:title xml:lang="en">#{seg_name}#{@md.dc_title}</dc:title>\n}
+ end
+ if @md.dc_creator # DublinCore 2 - creator/author (author)
+ txt=meta_content_clean(@md.dc_creator)
+ @oai_pmh << %{ <dc:creator>#{txt}</dc:creator>\n}
+ end
+ if @md.dc_subject # DublinCore 3 - subject (us library of congress, eric or udc, or schema???)
+ txt=meta_content_clean(@md.dc_subject)
+ @oai_pmh << %{ <dc:subject>#{txt}</dc:subject>\n}
+ end
+ if @md.dc_description # DublinCore 4 - description
+ txt=meta_content_clean(@md.dc_description)
+ @oai_pmh << %{ <dc:description>#{txt}</dc:description>\n}
+ end
+ if @md.dc_publisher # DublinCore 5 - publisher (current copy published by)
+ txt=meta_content_clean(@md.dc_publisher)
+ @oai_pmh << %{ <dc:publisher>#{txt}</dc:publisher>\n}
+ end
+ if @md.dc_contributor # DublinCore 6 - contributor
+ txt=meta_content_clean(@md.dc_contributor)
+ @oai_pmh << %{ <dc:contributor>#{txt}</dc:contributor>\n}
+ end
+ if @md.dc_date # DublinCore 7 - date year-mm-dd
+ @oai_pmh << %{ <dc:date>#{@md.dc_date}</dc:date>\n}
+ end
+ if @md.dc_date_created # DublinCore 7 - date.created
+ @oai_pmh << %{ <dc:date_created>#{@md.dc_date_created}</dc:date_created>\n}
+ end
+ if @md.dc_date_issued # DublinCore 7 - date.issued
+ @oai_pmh << %{ <dc:date_issued>#{@md.dc_date_issued}</dc:date_issued>\n}
+ end
+ if @md.dc_date_available # DublinCore 7 - date.available
+ @oai_pmh << %{ <dc:date_available>#{@md.dc_date_available}</dc:date_available>\n}
+ end
+ if @md.dc_date_valid # DublinCore 7 - date.valid
+ @oai_pmh << %{ <dc:date_valid>#{@md.dc_date_valid}</dc:date_valid>\n}
+ end
+ if @md.dc_date_modified # DublinCore 7 - date.modified
+ @oai_pmh << %{ <dc:date_modified>#{@md.dc_date_modified}</dc:date_modified>\n}
+ end
+ if @md.dc_type # DublinCore 8 - type
+ txt=meta_content_clean(@md.dc_type)
+ @oai_pmh << %{ <dc:type>#{txt}</dc:type>\n}
+ end
+ if @md.dc_format # DublinCore 9 - format
+ txt=meta_content_clean(@md.dc_format)
+ @oai_pmh << %{ <dc:format>#{txt}</dc:format>\n}
+ end
+ if @md.dc_identifier # DublinCore 10 - identifier
+ txt=meta_content_clean(@md.dc_identifier)
+ @oai_pmh << %{ <dc:identifier>#{txt}</dc:identifier>\n}
+ end
+ if @md.dc_source # DublinCore 11 - source
+ txt=meta_content_clean(@md.dc_source)
+ @oai_pmh << %{ <dc:source>#{txt}</dc:source>\n}
+ end
+ if @md.dc_language[:name] # DublinCore 12 - language (English)
+ @oai_pmh << %{ <dc:language>#{@md.dc_language[:name]}</dc:language>\n}
+ end
+ if @md.language_original[:name]
+ @oai_pmh << %{ <dc:language>#{@md.language_original[:name]}</dc:language>\n}
+ end
+ if @md.dc_relation # DublinCore 13 - relation
+ txt=meta_content_clean(@md.dc_relation)
+ @oai_pmh << %{ <dc:relation>#{txt}</dc:relation>\n}
+ end
+ if @md.dc_coverage # DublinCore 14 - coverage
+ txt=meta_content_clean(@md.dc_coverage)
+ @oai_pmh << %{ <dc:coverage>#{txt}</dc:coverage>\n}
+ end
+ if @md.dc_rights # DublinCore 15 - rights
+ txt=meta_content_clean(@md.dc_rights)
+ @oai_pmh << %{ <dc:rights>#{txt}</dc:rights>\n}
+ end
+ if @md.keywords
+ txt=meta_content_clean(@md.keywords)
+ @oai_pmh << %{ <dc:keywords>#{txt}</dc:keywords>\n}
+ end
+ @oai_pmh
+ end
+ def meta_content_clean(content='')
+ unless content.nil?
+ content.tr!('"',"'")
+ end
+ content
+ end
+ def post
+ '</oai_dc:dc>'
+ end
+ def output
+ SiSU_Env::SiSU_file.new(@md).mkdir
+ oai_pmh=SiSU_Env::SiSU_file.new(@md,@md.fn[:oai_pmh]).mkfile #implement in param
+ oai_pmh << pre
+ body.each do |x|
+ oai_pmh << x
+ end
+ oai_pmh << post
+ end
+ end
+end
+__END__
+http://www.openarchives.org/pmh/
+http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore
+http://es.dublincore.org/documents/usageguide/elements.shtml
+http://dublincore.org/documents/dces/
+see also http://dublincore.org/documents/dcmes-xml/
+#http://www.openarchives.org/OAI/2.0/openarchivesprotocol.htm#dublincore
+#sample implementation, e.g. 2
+<?xml version="1.0" encoding="UTF-8"?>
+<oai_dc:dc
+ xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/
+ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
+ <dc:title xml:lang="en">Grassmann's space analysis</dc:title>
+ <dc:creator>Hyde, E. W. (Edward Wyllys)</dc:creator>
+ <dc:subject>LCSH:Ausdehnungslehre; LCCN QA205.H99</dc:subject>
+ <dc:publisher>J. Wiley &amp; Sons</dc:publisher>
+ <dc:date>Created: 1906; Available: 1991</dc:date>
+ <dc:type>text</dc:type>
+ <dc:identifier>http://resolver.library.cornell.edu/math/1796949
+ </dc:identifier>
+ <dc:language>english</dc:language>
+ <dc:rights xml:lang="en">Public Domain</dc:rights>
+</oai_dc:dc>