From 7372f56054259457f77c64cbdb34e736531cfc0e Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 4 Jul 2009 11:57:29 -0400 Subject: move lib to version 1 directory, (lib/sisu/v1) and make related changes --- lib/sisu/v0/harvest_authors.rb | 331 ----------------------------------------- 1 file changed, 331 deletions(-) delete mode 100644 lib/sisu/v0/harvest_authors.rb (limited to 'lib/sisu/v0/harvest_authors.rb') diff --git a/lib/sisu/v0/harvest_authors.rb b/lib/sisu/v0/harvest_authors.rb deleted file mode 100644 index c7ca80fb..00000000 --- a/lib/sisu/v0/harvest_authors.rb +++ /dev/null @@ -1,331 +0,0 @@ -# coding: utf-8 -=begin - - * Name: SiSU - - * Description: a framework for document structuring, publishing and search - metadata harvest, extract authors and their writings from document set - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2009 Ralph Amissah All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Download: - - - * Ralph Amissah - - - - ** Description: simple xml representation (sax style) - -=end -module HARVEST_authors - require "#{SiSU_lib}/author_format" - @@the_idx_authors=[] - class Songsheet - def initialize(opt) - @opt=opt - @file_list=opt.files - @env=SiSU_Env::Info_env.new - end - def songsheet - files,idx_array=[],[] - @file_list.each do |f| - if f =~/.+?\.ss[tm]$/ - files << f[/(.+?\.ss[tm])$/,1] - else - print "not .sst or .ssm ? << #{f} >> " - end - end - files.each do |filename| - file_array=[] - File.open(filename,'r') do |file| - file.each_line("\n\n") do |line| - if line =~/^@\S+?: / - file_array << line - elsif line =~/^(?:\s*\n|%+ )/ - else break - end - end - end - idx_array=HARVEST_authors::Harvest.new(file_array,filename,idx_array).extract_harvest - end - the_idx=HARVEST_authors::Index.new(idx_array,@@the_idx_authors).construct_book_author_index - #HARVEST_authors::Output_index.new(the_idx).screen_print.cycle - HARVEST_authors::Output_index.new(@opt,the_idx).html_print.html_songsheet - puts "file://#{@env.path.output_md_harvest}/harvest_authors.html" - puts "file://#{@env.path.pwd}/harvest_authors.html" if @opt.cmd.inspect =~/-M/ - end - end - class Harvest - def initialize(data,filename,idx_array) - @data,@filename,@idx_array=data,filename,idx_array - end - def extract_harvest - data,filename,idx_array=@data,@filename,@idx_array - @orig_pub,@title,@subtitle,@fulltitle,@author,@author_format=nil,nil,nil,nil,nil,nil - @authors=[] - rgx={} - rgx[:author]=/^@(?:author|creator):\s+(.+)/ - rgx[:title]=/^@title:\s+(.+)/ - rgx[:subtitle]=/^@subtitle:\s+(.+)/ - rgx[:date]=/^@subtitle:\s+(.+)/ - rgx[:date]=/^@date:\s+(\d{4})/ - rgx[:orig_pub]=/^@original_publication:\s+(.+)/ - data.each do |para| - if para=~ rgx[:orig_pub] - @orig_pub=rgx[:orig_pub].match(para)[1] - end - if para=~ rgx[:title] - @title=rgx[:title].match(para)[1] - end - if para=~ rgx[:subtitle] - @subtitle=rgx[:subtitle].match(para)[1] - end - if para=~ rgx[:author] - @author_format=rgx[:author].match(para)[1] - end - if para=~ rgx[:date] - @date=rgx[:date].match(para)[1] - end - break if @title and @subtitle and @author and @date and @orig_pub - end - @fulltitle=if @subtitle - @title + ' - ' + @subtitle - else @title - end - if @title and @author_format #and @orig_pub (publication details) - creator=FORMAT::Author.new(@author_format.strip).author_details - @authors,@authorship=creator[:authors],creator[:authorship] - file=if filename=~/~[a-z]{2,3}\.ss[mt]$/ - lang='.' + /~([a-z]{2,3})\.ss[mt]$/.match(filename)[1] - filename.sub(/~[a-z]{2,3}\.ss[mt]$/,'') - else - lang='' - filename.sub(/\.ss[mt]$/,'') - end - page="sisu_manifest#{lang}.html" - idx_array <<= { :filename => filename, :file => file, :orig_pub => @orig_pub, :date => @date, :title => @fulltitle, :author => creator, :page => page } - else - #p "missing author field: #@filename title: #@title; author: #@author_format; idx: #@orig_pub" - end - idx_array.flatten! - idx_array - end - end - class Index - def initialize(idx_array,the_idx) - @idx_array,@the_idx=idx_array,the_idx - @@the_idx_authors=@the_idx - end - def capital(txt) - txt[0].chr.capitalize + txt[1,txt.length] - end - def construct_book_author_index - idx_array=@idx_array - idx_array.each do |idx| - idx[:author][:last_first_format_a].each do |author| - author.strip! - if @@the_idx_authors[author].class==NilClass - @@the_idx_authors[author]={:md => []} - end - @@the_idx_authors[author][:md] << { :filename => idx[:filename], :file => idx[:file], :author => idx[:author], :title => idx[:title], :date => idx[:date], :page => idx[:page] } - end - end - @the_idx=@@the_idx_authors - end - end - class Output_index - def initialize(opt,the_idx) - @opt,@the_idx=opt,the_idx - @env=SiSU_Env::Info_env.new - @rc=Get_init.instance.yamlrc - @alph=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] - @letter=@alph.shift - @vz=SiSU_Env::Get_init.instance.skin - end - def html_file_open - @output={} - @output[:html]=File.new("#{@env.path.output_md_harvest}/harvest_authors.html",'w') - @output[:html_mnt]= if @opt.cmd.inspect =~/-M/ - File.new("#{@env.path.pwd}/harvest_authors.html",'w') - else nil - end - end - def html_file_close - @output[:html].close - @output[:html_mnt].close if @output[:html_mnt].class == File - end - def html_print - def html_songsheet - html_file_open - html_head - html_alph - html_body - html_tail - html_file_close - end - def html_head_adjust(type='') - css_path=if type !~/maintenance/ - '../_sisu/css/harvest.css' - else 'harvest.css' - end - sv=SiSU_Env::Info_version.instance.get_version - < - - -SiSU Metadata Harvest - Authors - - - - - - - - - - - - -

SiSU Metadata Harvest - Authors

-

[ HOME ] also see SiSU Metadata Harvest - Topics

-
-WOK - end - def html_head - @output[:html_mnt] << html_head_adjust('maintenance') if @opt.cmd.inspect =~/-M/ - @output[:html] << html_head_adjust - end - def html_alph - a=[] - a << '

' - @alph.each do |x| - a << if x =~/[0-9]/; '' - else - %{#{x}, } - end - end - @output[:html_mnt] << a.join if @output[:html_mnt].class == File - @output[:html] << a.join - end - def html_tail - a=[] - a <<< - - - - - - -#{@vz.credits_sisu} - - -WOK - @output[:html_mnt] << a if @output[:html_mnt].class == File - @output[:html] << a - end - def do_html(html) - @output[:html_mnt] << html if @output[:html_mnt].class == File - @output[:html] << html - end - def do_string(attrib,string) - html=%{

#{string}

} - do_html(html) - end - def do_string_name(attrib,string) - f=/^(\S)/.match(string[0])[1] - if @letter < f - while @letter < f - if @alph.length > 0 - @letter=@alph.shift - if @output[:html_mnt].class == File - @output[:html_mnt] << %{\n

#{@letter}

} - end - @output[:html] << %{\n

#{@letter}

} - else break - end - end - end - end - def html_body - the_idx=@the_idx - the_idx.sort.each do |a| - do_string_name('',a) - name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') - x = %{

#{a[0]}

} - if @output[:html_mnt].class == File - @output[:html_mnt] << x - end - @output[:html] << x - works=[] - a[1][:md].each do |x| - work=[ "#{x[:date]} #{x[:title]}", %{

#{x[:date]} #{x[:title]}, #{x[:author][:authors_s]}

} ] - works<<=if @output[:html_mnt].class == File - work.concat([%{

[src]  #{x[:date]} #{x[:title]}, #{x[:author][:authors_s]} -- [#{x[:file]}.sst]

}]) - else work - end - end - works.sort_by {|x| x[0]}.each do |x| - @output[:html] << x[1] - @output[:html_mnt] << x[2] if @output[:html_mnt].class == File - end - end - end - self - end - def screen_print - def cycle - the_idx=@the_idx - the_idx.sort.each do |a| - puts a[0] - a[1][:md].each do |x| - puts "\t" + x[:file] - end - end - end - self - end - end -end -__END__ -- cgit v1.2.3