From 6811ac91f21a434fc7d967c11e1b20f33918c6ea Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Mon, 19 Mar 2012 22:07:29 -0400 Subject: v3: 3.2 branch is main (v3dv --> v3); dev (v3dv) branch directories removed * v3dv (3.2) "merged" into v3 (previously 3.1) (& removed) * conf/sisu/v3dv --> conf/sisu/v3 * data/sisu/v3dv --> data/sisu/v3 * lib/sisu/v3dv --> lib/sisu/v3 * bin/sisu* (v3dv references changed to v3) * (--dev modifier (superfluous for the time being) runs main v3 branch) --- lib/sisu/v3dv/harvest_authors.rb | 378 --------------------------------------- 1 file changed, 378 deletions(-) delete mode 100644 lib/sisu/v3dv/harvest_authors.rb (limited to 'lib/sisu/v3dv/harvest_authors.rb') diff --git a/lib/sisu/v3dv/harvest_authors.rb b/lib/sisu/v3dv/harvest_authors.rb deleted file mode 100644 index 5bb702be..00000000 --- a/lib/sisu/v3dv/harvest_authors.rb +++ /dev/null @@ -1,378 +0,0 @@ -# encoding: utf-8 -=begin - - * Name: SiSU - - * Description: a framework for document structuring, publishing and search - metadata harvest, extract authors and their writings from document set - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2012, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Download: - - - * Ralph Amissah - - - - ** Description: simple xml representation (sax style) - -=end -module SiSU_Harvest_Authors - require_relative 'author_format' # author_format.rb - class Songsheet - @@the_idx_authors={} - def initialize(opt,env) - @opt,@env=opt,env - @file_list=opt.files - end - def songsheet - puts 'authors:' - idx_array={} - @opt.f_pths.each do |y| - lang_hash_file_array={} - name=y[:f] - filename=y[:pth] + '/' + y[:f] - File.open(filename,'r') do |file| - file.each_line("\n\n") do |line| - if line =~/^@(?:title|creator|date):(?:\s|$)/m - lang_hash_file_array[y[:lng_is]] ||= [] - lang_hash_file_array[y[:lng_is]] << line - elsif line =~/^@\S+?:(?:\s|$)/m \ - or line =~/^(?:\s*\n|%+ )/ - else break - end - end - end - lang_hash_file_array.each_pair do |lang,a| - idx_array[lang] ||= [] - idx_array=SiSU_Harvest_Authors::Harvest.new(@opt,@env,a,filename,name,idx_array,lang).extract_harvest - end - end - the_idx=SiSU_Harvest_Authors::Index.new(idx_array,@@the_idx_authors).construct_book_author_index - SiSU_Harvest_Authors::OutputIndex.new(@opt,the_idx).html_print.html_songsheet - end - end - class Harvest - def initialize(opt,env,data,filename,name,idx_array,lang) - @opt,@env,@data,@filename,@name,@idx_array,@lang=opt,env,data,filename,name,idx_array,lang - end - def extract_harvest - data,filename,name,idx_array,lang=@data,@filename,@name,@idx_array,@lang - @title,@subtitle,@fulltitle,@author,@author_format,@date=nil,nil,nil,nil,nil,nil - @authors=[] - rgx={} - rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m - rgx[:title]=/^@title:[ ]+(.+)/ - rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m - rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m - data.each do |para| - if para=~ rgx[:title] - @title=rgx[:title].match(para)[1] - end - if para=~ rgx[:subtitle] - @subtitle=rgx[:subtitle].match(para)[1] - end - if para=~ rgx[:author] - @author_format=rgx[:author].match(para)[1] - end - if para=~ rgx[:date] - @date=rgx[:date].match(para)[1] - end - break if @title and @subtitle and @author and @date - end - @fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title - if @title \ - and @author_format - creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details - @authors,@authorship=creator[:authors],creator[:authorship] - file=if name=~/~[a-z]{2,3}\.ss[mt]$/ - name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') - else - name.sub(/\.ss[mt]$/,'') - end - page=if @env.output_dir_structure.by_language_code? - "#{lang}/sisu_manifest.html" - else - "sisu_manifest.#{lang}.html" - end - idx_array[lang] <<= { filename: filename, file: file, date: @date, title: @fulltitle, author: creator, page: page, lang: lang } - else - #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}" - end - idx_array[lang]=idx_array[lang].flatten - idx_array - end - end - class Index - def initialize(idx_array,the_idx) - @idx_array,@the_idx=idx_array,the_idx - @@the_idx_authors=@the_idx - end - def capital(txt) - txt[0].chr.capitalize + txt[1,txt.length] - end - def construct_book_author_index - idx_array=@idx_array - idx_array.each_pair do |lang,idx_array| - @@the_idx_authors[lang] ||= {} - idx_array.each do |idx| - idx[:author][:last_first_format_a].each do |author| - author=author.strip - if @@the_idx_authors[lang][author].class==NilClass - @@the_idx_authors[lang][author]={ md: [] } - end - @@the_idx_authors[lang][author][:md] << { filename: idx[:filename], file: idx[:file], author: idx[:author], title: idx[:title], date: idx[:date], page: idx[:page], lang: idx[:lang] } - end - end - end - @the_idx=@@the_idx_authors - end - end - class OutputIndex - require_relative 'i18n' # i18n.rb - def initialize(opt,the_idx) - @opt,@the_idx=opt,the_idx - @env=SiSU_Env::InfoEnv.new - @rc=SiSU_Env::GetInit.instance.sisu_yaml.rc - @alph=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] - @letter=@alph.shift - @vz=SiSU_Env::GetInit.instance.skin - end - def html_file_open - @the_idx.keys.each do |lng| - @output ||={} - @output[lng] ||={} - harvest_pth,file='','' - if @env.output_dir_structure.by_language_code? - harvest_pth="#{@env.path.webserv}/#{@opt.base_stub}/#{lng}/manifest" - file="#{harvest_pth}/authors.html" - else - harvest_pth="#{@env.path.webserv}/#{@opt.base_stub}/manifest" - file="#{harvest_pth}/authors.#{lng}.html" - end - FileUtils::mkdir_p(harvest_pth) unless FileTest.directory?(harvest_pth) - puts "file://#{file}" - @output[lng][:html]=File.new(file,'w') - end - end - def html_file_close - @the_idx.keys.each do |lng| - @output[lng][:html].close - @output[lng][:html_mnt].close if @output[lng][:html_mnt].class==File - end - end - def html_print - def html_songsheet - html_file_open - html_head - html_alph - html_body - html_tail - html_file_close - end - def html_head_adjust(lng,type='') - css_path,topics='','' - if @env.output_dir_structure.by_language_code? - css_path=(type !~/maintenance/) \ - ? '../../_sisu/css/harvest.css' - : 'harvest.css' - topics='topics.html' - elsif @env.output_dir_structure.by_filetype? - css_path=(type !~/maintenance/) \ - ? '../_sisu/css/harvest.css' - : 'harvest.css' - topics="topics.#{lng}.html" - elsif @env.output_dir_structure.by_filename? - css_path=(type !~/maintenance/) \ - ? '../_sisu/css/harvest.css' - : 'harvest.css' - topics="topics.#{lng}.html" - end - ln=SiSU_i18n::Languages.new.language.list - harvest_languages='' - @the_idx.keys.each do |lng| - if @env.output_dir_structure.by_language_code? - harvest_pth="../../#{lng}/manifest" - file="#{harvest_pth}/authors.html" - else @env.output_dir_structure.by_filetype? - harvest_pth='.' - file="#{harvest_pth}/authors.#{lng}.html" - end - l=ln[lng][:t] - harvest_languages += %{#{l}   } - end - sv=SiSU_Env::InfoVersion.instance.get_version - < - - -SiSU Metadata Harvest - Authors - - - - - - - - - - - - -

SiSU Metadata Harvest - Authors

-

[ HOME ] also see SiSU Metadata Harvest - Topics

-

#{@env.widget_static.search_form}

-
-

#{harvest_languages}

-
-WOK - end - def html_head - @the_idx.keys.each do |lng| - @output[lng][:html_mnt] << html_head_adjust(lng,'maintenance') if @opt.cmd.inspect =~/M/ - @output[lng][:html] << html_head_adjust(lng) - end - end - def html_alph - a=[] - a << '

' - @alph.each do |x| - a << ((x =~/[0-9]/) \ - ? '' - : %{#{x}, }) - end - a=a.join - @the_idx.keys.each do |lng| - @output[lng][:html_mnt] << a if @opt.cmd.inspect =~/M/ - @output[lng][:html] << a - end - end - def html_tail - a =< - - - - - - -#{@vz.credits_sisu} - - -WOK - @the_idx.keys.each do |lng| - @output[lng][:html_mnt] << a if @output[lng][:html_mnt].class==File - @output[lng][:html] << a - end - end - def do_html(lng,html) - @output[lng][:html_mnt] << html if @output[lng][:html_mnt].class==File - @output[lng][:html] << html - end - def do_string_name(lng,attrib,string) - f=/^(\S)/.match(string[0])[1] - if @letter < f - while @letter < f - if @alph.length > 0 - @letter=@alph.shift - if @output[lng][:html_mnt].class==File - @output[lng][:html_mnt] << %{\n

#{@letter}

} - end - @output[lng][:html] << %{\n

#{@letter}

} - else break - end - end - end - end - def html_body - the_idx=@the_idx - the_idx.each_pair do |lng,lng_array| - lng_array.sort.each do |a| - do_string_name(lng,'',a) - name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') - x = %{

#{a[0]}

} - if @output[lng][:html_mnt].class==File - @output[lng][:html_mnt] << x - end - @output[lng][:html] << x - works=[] - a[1][:md].each do |x| - if @env.output_dir_structure.by_language_code? - manifest_pth="#{@env.path.output}/#{x[:file]}" - manifest_at=x[:file] + '.html' - elsif @env.output_dir_structure.by_filetype? - manifest_name=x[:file] - manifest_at=x[:file] + '.' + lng + '.html' - elsif @env.output_dir_structure.by_filename? - manifest_at="../#{x[:file]}/#{x[:page]}" - end - work=[ "#{x[:date]} #{x[:title]}", %{

#{x[:date]} #{x[:title]}, #{x[:author][:authors_s]}

} ] - works<<=(@output[lng][:html_mnt].class==File) \ - ? (work.concat([%{

[src]  #{x[:date]} #{x[:title]}, #{x[:author][:authors_s]} -- [#{x[:file]}.sst]

}])) - : work - end - works.sort_by {|x| x[0]}.each do |x| - @output[lng][:html] << x[1] - @output[lng][:html_mnt] << x[2] if @output[lng][:html_mnt].class==File - end - end - end - end - self - end - def screen_print - def cycle - the_idx=@the_idx - the_idx.sort.each do |a| - puts a[0] - a[1][:md].each do |x| - puts "\t" + x[:file] - end - end - end - self - end - end -end -__END__ -- cgit v1.2.3