# coding: utf-8 =begin * Name: SiSU * Description: a framework for document structuring, publishing and search metadata harvest, extract authors and their writings from document set * Author: Ralph Amissah * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. * License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . If you have Internet connection, the latest version of the GPL should be available at these locations: * SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system * Hompages: * Download: * Ralph Amissah ** Description: simple xml representation (sax style) =end module HARVEST_authors require "#{SiSU_lib}/author_format" # author_format.rb @@the_idx_authors=[] class Songsheet def initialize(opt) @opt=opt @file_list=opt.files @env=SiSU_Env::Info_env.new end def songsheet files,idx_array=[],[] @file_list.each do |f| (f =~/.+?\.ss[tm]$/) \ ? (files << f[/(.+?\.ss[tm])$/,1]) \ : (print "not .sst or .ssm ? << #{f} >> ") end files.each do |filename| file_array=[] File.open(filename,'r') do |file| file.each_line("\n\n") do |line| if line =~/^@(?:title|creator|date):(?:\s|$)/m file_array << line elsif line =~/^@\S+?:(?:\s|$)/m \ or line =~/^(?:\s*\n|%+ )/ else break end end end idx_array=HARVEST_authors::Harvest.new(file_array,filename,idx_array).extract_harvest end the_idx=HARVEST_authors::Index.new(idx_array,@@the_idx_authors).construct_book_author_index HARVEST_authors::Output_index.new(@opt,the_idx).html_print.html_songsheet puts "file://#{@env.path.output_md_harvest}/harvest_authors.html" puts "file://#{@env.path.pwd}/harvest_authors.html" if @opt.cmd.inspect =~/M/ end end class Harvest def initialize(data,filename,idx_array) @data,@filename,@idx_array=data,filename,idx_array end def extract_harvest data,filename,idx_array=@data,@filename,@idx_array @title,@subtitle,@fulltitle,@author,@author_format,@date=nil,nil,nil,nil,nil,nil @authors=[] rgx={} rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m rgx[:title]=/^@title:[ ]+(.+)/ rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m data.each do |para| if para=~ rgx[:title] @title=rgx[:title].match(para)[1] end if para=~ rgx[:subtitle] @subtitle=rgx[:subtitle].match(para)[1] end if para=~ rgx[:author] @author_format=rgx[:author].match(para)[1] end if para=~ rgx[:date] @date=rgx[:date].match(para)[1] end break if @title and @subtitle and @author and @date end @fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title if @title and @author_format creator=FORMAT::Author.new(@author_format.strip).author_details @authors,@authorship=creator[:authors],creator[:authorship] file=if filename=~/~[a-z]{2,3}\.ss[mt]$/ lang='.' + /~([a-z]{2,3})\.ss[mt]$/.match(filename)[1] filename.sub(/~[a-z]{2,3}\.ss[mt]$/,'') else lang='' filename.sub(/\.ss[mt]$/,'') end page="sisu_manifest#{lang}.html" idx_array <<= { :filename => filename, :file => file, :date => @date, :title => @fulltitle, :author => creator, :page => page } else #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}" end idx_array.flatten! idx_array end end class Index def initialize(idx_array,the_idx) @idx_array,@the_idx=idx_array,the_idx @@the_idx_authors=@the_idx end def capital(txt) txt[0].chr.capitalize + txt[1,txt.length] end def construct_book_author_index idx_array=@idx_array idx_array.each do |idx| idx[:author][:last_first_format_a].each do |author| author.strip! if @@the_idx_authors[author].class==NilClass @@the_idx_authors[author]={:md => []} end @@the_idx_authors[author][:md] << { :filename => idx[:filename], :file => idx[:file], :author => idx[:author], :title => idx[:title], :date => idx[:date], :page => idx[:page] } end end @the_idx=@@the_idx_authors end end class Output_index def initialize(opt,the_idx) @opt,@the_idx=opt,the_idx @env=SiSU_Env::Info_env.new @rc=Get_init.instance.yamlrc @alph=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] @letter=@alph.shift @vz=SiSU_Env::Get_init.instance.skin end def html_file_open @output={} @output[:html]=File.new("#{@env.path.output_md_harvest}/harvest_authors.html",'w') @output[:html_mnt]=(@opt.cmd.inspect =~/M/) \ ? File.new("#{@env.path.pwd}/harvest_authors.html",'w') \ : nil end def html_file_close @output[:html].close @output[:html_mnt].close if @output[:html_mnt].class==File end def html_print def html_songsheet html_file_open html_head html_alph html_body html_tail html_file_close end def html_head_adjust(type='') css_path=(type !~/maintenance/) \ ? '../_sisu/css/harvest.css' \ : 'harvest.css' sv=SiSU_Env::Info_version.instance.get_version < SiSU Metadata Harvest - Authors

SiSU Metadata Harvest - Authors

[ HOME ] also see SiSU Metadata Harvest - Topics

#{@env.widget_static.search_form}


WOK end def html_head @output[:html_mnt] << html_head_adjust('maintenance') if @opt.cmd.inspect =~/M/ @output[:html] << html_head_adjust end def html_alph a=[] a << '

' @alph.each do |x| a << ((x =~/[0-9]/) ? '' : %{#{x}, }) end @output[:html_mnt] << a.join if @output[:html_mnt].class==File @output[:html] << a.join end def html_tail a=[] a <<< #{@vz.credits_sisu} WOK @output[:html_mnt] << a if @output[:html_mnt].class==File @output[:html] << a end def do_html(html) @output[:html_mnt] << html if @output[:html_mnt].class==File @output[:html] << html end def do_string(attrib,string) html=%{

#{string}

} do_html(html) end def do_string_name(attrib,string) f=/^(\S)/.match(string[0])[1] if @letter < f while @letter < f if @alph.length > 0 @letter=@alph.shift if @output[:html_mnt].class==File @output[:html_mnt] << %{\n

#{@letter}

} end @output[:html] << %{\n

#{@letter}

} else break end end end end def html_body the_idx=@the_idx the_idx.sort.each do |a| do_string_name('',a) name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') x = %{

#{a[0]}

} if @output[:html_mnt].class==File @output[:html_mnt] << x end @output[:html] << x works=[] a[1][:md].each do |x| work=[ "#{x[:date]} #{x[:title]}", %{

#{x[:date]} #{x[:title]}, #{x[:author][:authors_s]}

} ] works<<=(@output[:html_mnt].class==File) \ ? (work.concat([%{

[src]  #{x[:date]} #{x[:title]}, #{x[:author][:authors_s]} -- [#{x[:file]}.sst]

}])) \ : work end works.sort_by {|x| x[0]}.each do |x| @output[:html] << x[1] @output[:html_mnt] << x[2] if @output[:html_mnt].class==File end end end self end def screen_print def cycle the_idx=@the_idx the_idx.sort.each do |a| puts a[0] a[1][:md].each do |x| puts "\t" + x[:file] end end end self end end end __END__