aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/html_harvest.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/html_harvest.rb')
-rw-r--r--lib/sisu/html_harvest.rb109
1 files changed, 109 insertions, 0 deletions
diff --git a/lib/sisu/html_harvest.rb b/lib/sisu/html_harvest.rb
new file mode 100644
index 00000000..ea4da0a3
--- /dev/null
+++ b/lib/sisu/html_harvest.rb
@@ -0,0 +1,109 @@
+# encoding: utf-8
+=begin
+
+* Name: SiSU
+
+** Description: documents, structuring, processing, publishing, search
+*** metadata harvest, harvest metadata from document corpus
+
+** Author: Ralph Amissah
+ <ralph@amissah.com>
+ <ralph.amissah@gmail.com>
+
+** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+ 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah,
+ All Rights Reserved.
+
+** License: GPL 3 or later:
+
+ SiSU, a framework for document structuring, publishing and search
+
+ Copyright (C) Ralph Amissah
+
+ This program is free software: you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation, either version 3 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see <http://www.gnu.org/licenses/>.
+
+ If you have Internet connection, the latest version of the GPL should be
+ available at these locations:
+ <http://www.fsf.org/licensing/licenses/gpl.html>
+ <http://www.gnu.org/licenses/gpl.html>
+
+ <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>
+
+** SiSU uses:
+ * Standard SiSU markup syntax,
+ * Standard SiSU meta-markup syntax, and the
+ * Standard SiSU object citation numbering and system
+
+** Hompages:
+ <http://www.jus.uio.no/sisu>
+ <http://www.sisudoc.org>
+
+** Git
+ <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary>
+ <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/harvest.rb;hb=HEAD>
+
+=end
+module SiSU_Harvest
+ @@the_idx_topics,@@the_idx_authors={},{}
+ class Source
+ require_relative 'hub_options' # hub_options.rb
+ require_relative 'html_harvest_topics' # html_harvest_topics.rb
+ require_relative 'html_harvest_authors' # html_harvest_authors.rb
+ require_relative 'se' # se.rb
+ include SiSU_Env
+ def initialize(opt)
+ @opt=opt
+ @env=SiSU_Env::InfoEnv.new
+ end
+ def read
+ begin
+ harvest_pth=@env.path.webserv + '/' + @opt.base_stub
+ FileUtils::mkdir_p(harvest_pth) unless FileTest.directory?(harvest_pth)
+ cases(@opt,@env)
+ rescue
+ ensure
+ SiSU_Env::CreateSite.new(@opt).cp_css
+ end
+ end
+ def help
+ puts <<WOK
+ harvest --harvest extracts document index metadata
+
+WOK
+ end
+ def css(opt)
+ require_relative 'css' # css.rb
+ css=SiSU_Style::CSS.new
+ fn_css=SiSU_Env::CSS_Default.new
+ style=File.new("#{@env.path.pwd}/#{fn_css.harvest}",'w')
+ style << css.harvest
+ style.close
+ end
+ def cases(opt,env)
+ case opt.selections.str.inspect
+ when/--harvest/i
+ css(opt) if @opt.act[:maintenance][:set]==:on
+ SiSU_HarvestAuthors::Songsheet.new(opt,env).songsheet
+ SiSU_HarvestTopics::Songsheet.new(opt,env).songsheet
+ if @opt.act[:rsync][:set]==:on
+ require_relative 'remote' # remote.rb
+ SiSU_Remote::Put.new(opt).rsync_harvest
+ end
+ else
+ help
+ end
+ end
+ end
+end
+__END__