From 75e3bf86382edf99275a25895b362647158e25c1 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 10 Jan 2012 22:37:26 -0500 Subject: v3dv, add dev branch (use to make some changes to module & class names & test) * (intended as) short term branch, merge back into v3 once tested * sisu --dev (to invoke) --- lib/sisu/v3dv/harvest_authors.rb | 378 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 378 insertions(+) create mode 100644 lib/sisu/v3dv/harvest_authors.rb (limited to 'lib/sisu/v3dv/harvest_authors.rb') diff --git a/lib/sisu/v3dv/harvest_authors.rb b/lib/sisu/v3dv/harvest_authors.rb new file mode 100644 index 00000000..996b748c --- /dev/null +++ b/lib/sisu/v3dv/harvest_authors.rb @@ -0,0 +1,378 @@ +# encoding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + metadata harvest, extract authors and their writings from document set + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2012, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: simple xml representation (sax style) + +=end +module HARVEST_authors + require_relative 'author_format' # author_format.rb + class Songsheet + @@the_idx_authors={} + def initialize(opt,env) + @opt,@env=opt,env + @file_list=opt.files + end + def songsheet + puts 'authors:' + idx_array={} + @opt.f_pths.each do |y| + lang_hash_file_array={} + name=y[:f] + filename=y[:pth] + '/' + y[:f] + File.open(filename,'r') do |file| + file.each_line("\n\n") do |line| + if line =~/^@(?:title|creator|date):(?:\s|$)/m + lang_hash_file_array[y[:lng_is]] ||= [] + lang_hash_file_array[y[:lng_is]] << line + elsif line =~/^@\S+?:(?:\s|$)/m \ + or line =~/^(?:\s*\n|%+ )/ + else break + end + end + end + lang_hash_file_array.each_pair do |lang,a| + idx_array[lang] ||= [] + idx_array=HARVEST_authors::Harvest.new(@opt,@env,a,filename,name,idx_array,lang).extract_harvest + end + end + the_idx=HARVEST_authors::Index.new(idx_array,@@the_idx_authors).construct_book_author_index + HARVEST_authors::Output_index.new(@opt,the_idx).html_print.html_songsheet + end + end + class Harvest + def initialize(opt,env,data,filename,name,idx_array,lang) + @opt,@env,@data,@filename,@name,@idx_array,@lang=opt,env,data,filename,name,idx_array,lang + end + def extract_harvest + data,filename,name,idx_array,lang=@data,@filename,@name,@idx_array,@lang + @title,@subtitle,@fulltitle,@author,@author_format,@date=nil,nil,nil,nil,nil,nil + @authors=[] + rgx={} + rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m + rgx[:title]=/^@title:[ ]+(.+)/ + rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m + rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m + data.each do |para| + if para=~ rgx[:title] + @title=rgx[:title].match(para)[1] + end + if para=~ rgx[:subtitle] + @subtitle=rgx[:subtitle].match(para)[1] + end + if para=~ rgx[:author] + @author_format=rgx[:author].match(para)[1] + end + if para=~ rgx[:date] + @date=rgx[:date].match(para)[1] + end + break if @title and @subtitle and @author and @date + end + @fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title + if @title \ + and @author_format + creator=FORMAT::Author.new(@author_format.strip).author_details + @authors,@authorship=creator[:authors],creator[:authorship] + file=if name=~/~[a-z]{2,3}\.ss[mt]$/ + name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') + else + name.sub(/\.ss[mt]$/,'') + end + page=if @env.output_dir_structure.by_language_code? + "#{lang}/sisu_manifest.html" + else + "sisu_manifest.#{lang}.html" + end + idx_array[lang] <<= { filename: filename, file: file, date: @date, title: @fulltitle, author: creator, page: page, lang: lang } + else + #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}" + end + idx_array[lang].flatten! + idx_array + end + end + class Index + def initialize(idx_array,the_idx) + @idx_array,@the_idx=idx_array,the_idx + @@the_idx_authors=@the_idx + end + def capital(txt) + txt[0].chr.capitalize + txt[1,txt.length] + end + def construct_book_author_index + idx_array=@idx_array + idx_array.each_pair do |lang,idx_array| + @@the_idx_authors[lang] ||= {} + idx_array.each do |idx| + idx[:author][:last_first_format_a].each do |author| + author.strip! + if @@the_idx_authors[lang][author].class==NilClass + @@the_idx_authors[lang][author]={ md: [] } + end + @@the_idx_authors[lang][author][:md] << { filename: idx[:filename], file: idx[:file], author: idx[:author], title: idx[:title], date: idx[:date], page: idx[:page], lang: idx[:lang] } + end + end + end + @the_idx=@@the_idx_authors + end + end + class Output_index + require_relative 'i18n' # i18n.rb + def initialize(opt,the_idx) + @opt,@the_idx=opt,the_idx + @env=SiSU_Env::Info_env.new + @rc=SiSU_Env::Get_init.instance.sisu_yaml.rc + @alph=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] + @letter=@alph.shift + @vz=SiSU_Env::Get_init.instance.skin + end + def html_file_open + @the_idx.keys.each do |lng| + @output ||={} + @output[lng] ||={} + harvest_pth,file='','' + if @env.output_dir_structure.by_language_code? + harvest_pth="#{@env.path.webserv}/#{@opt.base_stub}/#{lng}/manifest" + file="#{harvest_pth}/authors.html" + else + harvest_pth="#{@env.path.webserv}/#{@opt.base_stub}/manifest" + file="#{harvest_pth}/authors.#{lng}.html" + end + FileUtils::mkdir_p(harvest_pth) unless FileTest.directory?(harvest_pth) + puts "file://#{file}" + @output[lng][:html]=File.new(file,'w') + end + end + def html_file_close + @the_idx.keys.each do |lng| + @output[lng][:html].close + @output[lng][:html_mnt].close if @output[lng][:html_mnt].class==File + end + end + def html_print + def html_songsheet + html_file_open + html_head + html_alph + html_body + html_tail + html_file_close + end + def html_head_adjust(lng,type='') + css_path,topics='','' + if @env.output_dir_structure.by_language_code? + css_path=(type !~/maintenance/) \ + ? '../../_sisu/css/harvest.css' + : 'harvest.css' + topics='topics.html' + elsif @env.output_dir_structure.by_filetype? + css_path=(type !~/maintenance/) \ + ? '../_sisu/css/harvest.css' + : 'harvest.css' + topics="topics.#{lng}.html" + elsif @env.output_dir_structure.by_filename? + css_path=(type !~/maintenance/) \ + ? '../_sisu/css/harvest.css' + : 'harvest.css' + topics="topics.#{lng}.html" + end + ln=SiSU_i18n::Languages.new.language.list + harvest_languages='' + @the_idx.keys.each do |lng| + if @env.output_dir_structure.by_language_code? + harvest_pth="../../#{lng}/manifest" + file="#{harvest_pth}/authors.html" + else @env.output_dir_structure.by_filetype? + harvest_pth='.' + file="#{harvest_pth}/authors.#{lng}.html" + end + l=ln[lng][:t] + harvest_languages += %{#{l}   } + end + sv=SiSU_Env::Info_version.instance.get_version + < + + +SiSU Metadata Harvest - Authors + + + + + + + + + + + + +

SiSU Metadata Harvest - Authors

+

[ HOME ] also see SiSU Metadata Harvest - Topics

+

#{@env.widget_static.search_form}

+
+

#{harvest_languages}

+
+WOK + end + def html_head + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << html_head_adjust(lng,'maintenance') if @opt.cmd.inspect =~/M/ + @output[lng][:html] << html_head_adjust(lng) + end + end + def html_alph + a=[] + a << '

' + @alph.each do |x| + a << ((x =~/[0-9]/) \ + ? '' + : %{#{x}, }) + end + a=a.join + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << a if @opt.cmd.inspect =~/M/ + @output[lng][:html] << a + end + end + def html_tail + a =< + + + + + + +#{@vz.credits_sisu} + + +WOK + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << a if @output[lng][:html_mnt].class==File + @output[lng][:html] << a + end + end + def do_html(lng,html) + @output[lng][:html_mnt] << html if @output[lng][:html_mnt].class==File + @output[lng][:html] << html + end + def do_string_name(lng,attrib,string) + f=/^(\S)/.match(string[0])[1] + if @letter < f + while @letter < f + if @alph.length > 0 + @letter=@alph.shift + if @output[lng][:html_mnt].class==File + @output[lng][:html_mnt] << %{\n

#{@letter}

} + end + @output[lng][:html] << %{\n

#{@letter}

} + else break + end + end + end + end + def html_body + the_idx=@the_idx + the_idx.each_pair do |lng,lng_array| + lng_array.sort.each do |a| + do_string_name(lng,'',a) + name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') + x = %{

#{a[0]}

} + if @output[lng][:html_mnt].class==File + @output[lng][:html_mnt] << x + end + @output[lng][:html] << x + works=[] + a[1][:md].each do |x| + if @env.output_dir_structure.by_language_code? + manifest_pth="#{@env.path.output}/#{x[:file]}" + manifest_at=x[:file] + '.html' + elsif @env.output_dir_structure.by_filetype? + manifest_name=x[:file] + manifest_at=x[:file] + '.' + lng + '.html' + elsif @env.output_dir_structure.by_filename? + manifest_at="../#{x[:file]}/#{x[:page]}" + end + work=[ "#{x[:date]} #{x[:title]}", %{

#{x[:date]} #{x[:title]}, #{x[:author][:authors_s]}

} ] + works<<=(@output[lng][:html_mnt].class==File) \ + ? (work.concat([%{

[src]  #{x[:date]} #{x[:title]}, #{x[:author][:authors_s]} -- [#{x[:file]}.sst]

}])) + : work + end + works.sort_by {|x| x[0]}.each do |x| + @output[lng][:html] << x[1] + @output[lng][:html_mnt] << x[2] if @output[lng][:html_mnt].class==File + end + end + end + end + self + end + def screen_print + def cycle + the_idx=@the_idx + the_idx.sort.each do |a| + puts a[0] + a[1][:md].each do |x| + puts "\t" + x[:file] + end + end + end + self + end + end +end +__END__ -- cgit v1.2.3 From 51ba2d70af41137ddc3323d79e01b28fbcb40082 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 10 Jan 2012 22:44:24 -0500 Subject: v3dv: module names, use more CamelCase --- lib/sisu/v3dv/harvest_authors.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'lib/sisu/v3dv/harvest_authors.rb') diff --git a/lib/sisu/v3dv/harvest_authors.rb b/lib/sisu/v3dv/harvest_authors.rb index 996b748c..8b820c9b 100644 --- a/lib/sisu/v3dv/harvest_authors.rb +++ b/lib/sisu/v3dv/harvest_authors.rb @@ -57,7 +57,7 @@ ** Description: simple xml representation (sax style) =end -module HARVEST_authors +module SiSU_Harvest_Authors require_relative 'author_format' # author_format.rb class Songsheet @@the_idx_authors={} @@ -85,11 +85,11 @@ module HARVEST_authors end lang_hash_file_array.each_pair do |lang,a| idx_array[lang] ||= [] - idx_array=HARVEST_authors::Harvest.new(@opt,@env,a,filename,name,idx_array,lang).extract_harvest + idx_array=SiSU_Harvest_Authors::Harvest.new(@opt,@env,a,filename,name,idx_array,lang).extract_harvest end end - the_idx=HARVEST_authors::Index.new(idx_array,@@the_idx_authors).construct_book_author_index - HARVEST_authors::Output_index.new(@opt,the_idx).html_print.html_songsheet + the_idx=SiSU_Harvest_Authors::Index.new(idx_array,@@the_idx_authors).construct_book_author_index + SiSU_Harvest_Authors::Output_index.new(@opt,the_idx).html_print.html_songsheet end end class Harvest @@ -123,7 +123,7 @@ module HARVEST_authors @fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title if @title \ and @author_format - creator=FORMAT::Author.new(@author_format.strip).author_details + creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details @authors,@authorship=creator[:authors],creator[:authorship] file=if name=~/~[a-z]{2,3}\.ss[mt]$/ name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') -- cgit v1.2.3 From df67ee1079d074dc2af54aa87187c6538e73d5d7 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 10 Jan 2012 22:50:12 -0500 Subject: v3dv: class names, use more CamelCase --- lib/sisu/v3dv/harvest_authors.rb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/sisu/v3dv/harvest_authors.rb') diff --git a/lib/sisu/v3dv/harvest_authors.rb b/lib/sisu/v3dv/harvest_authors.rb index 8b820c9b..3d3c2378 100644 --- a/lib/sisu/v3dv/harvest_authors.rb +++ b/lib/sisu/v3dv/harvest_authors.rb @@ -89,7 +89,7 @@ module SiSU_Harvest_Authors end end the_idx=SiSU_Harvest_Authors::Index.new(idx_array,@@the_idx_authors).construct_book_author_index - SiSU_Harvest_Authors::Output_index.new(@opt,the_idx).html_print.html_songsheet + SiSU_Harvest_Authors::OutputIndex.new(@opt,the_idx).html_print.html_songsheet end end class Harvest @@ -168,15 +168,15 @@ module SiSU_Harvest_Authors @the_idx=@@the_idx_authors end end - class Output_index + class OutputIndex require_relative 'i18n' # i18n.rb def initialize(opt,the_idx) @opt,@the_idx=opt,the_idx - @env=SiSU_Env::Info_env.new - @rc=SiSU_Env::Get_init.instance.sisu_yaml.rc + @env=SiSU_Env::InfoEnv.new + @rc=SiSU_Env::GetInit.instance.sisu_yaml.rc @alph=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] @letter=@alph.shift - @vz=SiSU_Env::Get_init.instance.skin + @vz=SiSU_Env::GetInit.instance.skin end def html_file_open @the_idx.keys.each do |lng| @@ -241,7 +241,7 @@ module SiSU_Harvest_Authors l=ln[lng][:t] harvest_languages += %{#{l}   } end - sv=SiSU_Env::Info_version.instance.get_version + sv=SiSU_Env::InfoVersion.instance.get_version < -- cgit v1.2.3