From 0e6fc15ada3c5d9a86b227163f35a54993b32529 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 2 Dec 2008 23:54:23 -0500 Subject: sisu harvest, introduce module along with header syntax addition & modification * sisu markup, additional header and new format rule: * @creator: / @author: header field, introduced author name format rules for more usable metadata harvesting: surname comma other names, additional authors separated by semi-colon * param added meta-tag, @topic_register: formatting topic levels are separated from sub-levels by a colon, a semi-colon separates main topics if there are multiple topics at lowest sub-level, a pipe can be used to create multiple headings * harvest module, harvests metadata from document set currently extracts: (i) authors and their writings from document set; (ii) topics and associated writings from document set (topics use topic_register header). harvest (when run against documents common to a directory of a site) extracts metadata and organises the documents on a site by author and topic information provided (there is a new "topic_register" header, with formatting rules similar to those of the book index), results are placed in [output_path]/sisu_site_metadata. sisu --harvest *.sst * by author (see change in param @creator: / @author: header field) * by topic / subject index (see addition in param of @topic_register: header field) initially there should be an example samples here: http://www.jus.uio.no/sisu/sisu_site_metadata/harvest_authors.html http://www.jus.uio.no/sisu/sisu_site_metadata/harvest_topics.html together with update markup source files The authors and their writings list will be made to take on a more biblographical form, with the use of additional fields as required. (concept example, suitable for medium sized sites [to remove size constraint: implement SQL equivalent]) make feature more robust * css, for harvest output added * remote placement of sisu_site_metadata (output produced by metadata harvest) * sisu markup, update document samples accordingly * tidy copyright marks in program headers, remove repetition of dates [version bump because formatting rule introduced to author / creator header - where new site metadata harvest feature is used, (at present changes changes should not be noticed except when using metadata harvest)] --- lib/sisu/v0/sysenv.rb | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) (limited to 'lib/sisu/v0/sysenv.rb') diff --git a/lib/sisu/v0/sysenv.rb b/lib/sisu/v0/sysenv.rb index 2481cbdc..57695904 100644 --- a/lib/sisu/v0/sysenv.rb +++ b/lib/sisu/v0/sysenv.rb @@ -14,8 +14,7 @@ SiSU, a framework for document structuring, publishing and search - Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, - 2007, 2008 Ralph Amissah + Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -846,6 +845,7 @@ module SiSU_Env @stub_pwd=@@pwd[m,1] @stub_src=@stub_pwd + '/src' @stub_pod=@stub_pwd + '/pod' + @stub_md=@stub_pwd + '/sisu_site_metadata' end def user @sys.user @@ -879,6 +879,9 @@ module SiSU_Env def stub_pwd #200412 @stub_pwd end + def stub_md_harvest + @stub_md + end def stub_src @stub_src end @@ -1263,6 +1266,9 @@ WOK def stub_pod @stub_pod end + def stub_md_harvest + @stub_md + end def etc defaults[:sisu_etc] #live/dynamic # @sys.sisu_etc #broken: live/dynamic @@ -1359,9 +1365,15 @@ WOK def output_src #web/webserv output directory... subdirectory into which further subdirectories are made based on file names "#{path.output}/src" end + def output_md_harvest + "#{path.output}/sisu_site_metadata" + end def output_pod "#{path.output}/pod" end + def output_harvest + "#{path.output}/sisu_site_metadata" + end def manpage "#{path.output}/man" end @@ -2269,6 +2281,11 @@ WOK "#{@env.path.output}/pod" else @env.path.output_pod end + @source_path_harvest=if @fnb \ + and not @fnb.empty? + "#{@env.path.output}/sisu_site_metadata" + else @env.path.output_harvest + end @local_sisu_source=if @filetypes =~/\S/; "#@source_path/#@filetypes" else @source_path end @@ -2486,7 +2503,7 @@ WOK if FileTest.file?("#{local_pod}/#{src_pod}") System_call.new("#{local_pod}/#{src_pod}",remote_pod,@opt.cmd).rsync end - elsif @opt.cmd =~/U/ + elsif @opt.cmd =~/U/ puts "#{__FILE__} #{__LINE__}" if @opt.cmd =~/M/ puts "#{local_gen} -> #{remote_gen}" if FileTest.file?("#{local_src}/#{src_doc}") \ @@ -2603,6 +2620,20 @@ WOK end end end + def rsync_harvest + self.remote_host_base.each do |remote_conn| + local=@source_path_harvest + remote="#{remote_conn[:name]}/#{@env.path.stub_pwd}/." + if @@flag_remote + delete_extra_files='--delete' # '--delete-after' + System_call.new(local,remote).rsync(delete_extra_files) + elsif @opt.cmd =~/U/ + puts "#{__FILE__} #{__LINE__}" if @opt.cmd =~/M/ + puts "rsync_sitemaps: #{local} -> #{remote}" + else puts "permission not granted #{__FILE__} #{__LINE__}" if @opt.cmd =~/M/ + end + end + end end class Info_version