From 960c3088bc88f2db879154053280b06c160d4d70 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 21 Apr 2015 14:45:52 -0400 Subject: lib/sisu/*, single libs directory (c&d gone) (7) * removed lib/sisu/{current,develop} dir branches v7 (v5 & v6 retired) * simplify dir structure, offer single version per snapshot * have enjoyed carrying stable and development versions v5 & v6 in a single tarball, may return to this structure --- lib/sisu/html_harvest_authors.rb | 466 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 466 insertions(+) create mode 100644 lib/sisu/html_harvest_authors.rb (limited to 'lib/sisu/html_harvest_authors.rb') diff --git a/lib/sisu/html_harvest_authors.rb b/lib/sisu/html_harvest_authors.rb new file mode 100644 index 00000000..c6895ec8 --- /dev/null +++ b/lib/sisu/html_harvest_authors.rb @@ -0,0 +1,466 @@ +# encoding: utf-8 +=begin + +* Name: SiSU + +** Description: documents, structuring, processing, publishing, search +*** metadata harvest, extract authors and their writings from document set + +** Author: Ralph Amissah + + + +** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah, + All Rights Reserved. + +** License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + +** SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + +** Hompages: + + + +** Git + + + +=end +module SiSU_HarvestAuthors + require_relative 'html_harvest_author_format' # html_harvest_author_format.rb + require_relative 'html_parts' # html_parts.rb + class Songsheet + @@the_idx_authors={} + def initialize(opt,env) + @opt,@env=opt,env + @file_list=opt.files + end + def songsheet + idx_array={} + @opt.f_pths.each do |y| + lang_hash_file_array={} + name=y[:f] + filename=y[:pth] + '/' + y[:f] + File.open(filename,'r') do |file| + file.each_line("\n\n") do |line| + if line =~/^@(?:title|creator|date):(?:\s|$)/m + lang_hash_file_array[y[:lng_is]] ||= [] + lang_hash_file_array[y[:lng_is]] << line + elsif line =~/^@\S+?:(?:\s|$)/m \ + or line =~/^(?:\s*\n|%+ )/ + else break + end + end + end + lang_hash_file_array.each_pair do |lang,a| + idx_array[lang] ||= [] + idx_array=SiSU_HarvestAuthors::Harvest.new( + @opt, + @env, + a, + filename, + name, + idx_array, + lang + ).extract_harvest + end + end + the_idx=SiSU_HarvestAuthors::Index.new( + idx_array, + @@the_idx_authors + ).construct_book_author_index + SiSU_HarvestAuthors::OutputIndex.new( + @opt, + the_idx + ).html_print.html_songsheet + end + end + class Harvest + def initialize(opt,env,data,filename,name,idx_array,lang) + @opt, @env,@data,@filename,@name,@idx_array,@lang= + opt,env, data, filename, name, idx_array, lang + end + def extract_harvest + data, filename, name, idx_array, lang = + @data,@filename,@name,@idx_array,@lang + @title=@subtitle=@fulltitle=@author=@author_format=@date=nil + @authors=[] + rgx={} + rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m + rgx[:title]=/^@title:[ ]+(.+)/ + rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m + rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m + data.each do |para| + if para=~ rgx[:title] + @title=rgx[:title].match(para)[1] + end + if para=~ rgx[:subtitle] + @subtitle=rgx[:subtitle].match(para)[1] + end + if para=~ rgx[:author] + @author_format=rgx[:author].match(para)[1] + end + if para=~ rgx[:date] + @date=rgx[:date].match(para)[1] + end + break if @title && @subtitle && @author && @date + end + @fulltitle=@subtitle \ + ? (@title + ' - ' + @subtitle) + : @title + if @title \ + and @author_format + creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details + @authors,@authorship=creator[:authors],creator[:authorship] + file=if name=~/~[a-z]{2,3}\.ss[mt]$/ + name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') + else + name.sub(/\.ss[mt]$/,'') + end + page=if @env.output_dir_structure.by? == :language + "#{lang}/sisu_manifest.html" + else + "sisu_manifest.#{lang}.html" + end + idx_array[lang] <<= { + filename: filename, + file: file, + date: @date, + title: @fulltitle, + author: creator, + page: page, + lang: lang + } + else + #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}" + end + idx_array[lang]=idx_array[lang].flatten + idx_array + end + end + class Index + def initialize(idx_array,the_idx) + @idx_array,@the_idx=idx_array,the_idx + @@the_idx_authors=@the_idx + end + def capital(txt) + txt[0].chr.capitalize + txt[1,txt.length] + end + def construct_book_author_index + idx_array=@idx_array + idx_array.each_pair do |lang,idx_arr| + @@the_idx_authors[lang] ||= {} + idx_arr.each do |idx| + idx[:author][:last_first_format_a].each do |author| + author=author.strip + if @@the_idx_authors[lang][author].is_a?(NilClass) + @@the_idx_authors[lang][author]={ md: [] } + end + @@the_idx_authors[lang][author][:md] << { + filename: idx[:filename], + file: idx[:file], + author: idx[:author], + title: idx[:title], + date: idx[:date], + page: idx[:page], + lang: idx[:lang] + } + end + end + end + @the_idx=@@the_idx_authors + end + end + class OutputIndex + require_relative 'i18n' # i18n.rb + def initialize(opt,the_idx) + @opt,@the_idx=opt,the_idx + @env=SiSU_Env::InfoEnv.new + @rc=SiSU_Env::GetInit.new.sisu_yaml.rc + @alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] + @alph=@alphabet_list.dup + @letter=@alph.shift + end + def html_file_open + @the_idx.keys.each do |lng| + @output ||={} + @output[lng] ||={} + harvest_pth,file='','' + if @env.output_dir_structure.by? == :language + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + '/' \ + + lng + '/' \ + + 'manifest' + file="#{harvest_pth}/authors.html" + elsif @env.output_dir_structure.by? == :filetype + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + '/' \ + + 'manifest' + file="#{harvest_pth}/authors.#{lng}.html" + elsif @env.output_dir_structure.by? == :filename + harvest_pth=@env.path.webserv + '/' \ + + @opt.base_stub + file="#{harvest_pth}/authors.#{lng}.html" + end + FileUtils::mkdir_p(harvest_pth) \ + unless FileTest.directory?(harvest_pth) + fileinfo=(@opt.act[:verbose][:set]==:on \ + || @opt.act[:verbose_plus][:set]==:on \ + || @opt.act[:urls_selected][:set]==:on \ + || @opt.act[:maintenance][:set]==:on) \ + ? ("file://#{file}") : '' + SiSU_Screen::Ansi.new( + @opt.act[:color_state][:set], + "harvest authors (#{@opt.files.length} files)", + fileinfo + ).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on + @output[lng][:html]=File.new(file,'w') + end + end + def html_file_close + @the_idx.keys.each do |lng| + @output[lng][:html].close + @output[lng][:html_mnt].close \ + if @output[lng][:html_mnt].is_a?(File) + end + end + def html_print + def html_songsheet + html_file_open + html_head + html_alph + html_body + html_tail + html_file_close + end + def html_head_adjust(lng,type='') + css_path,topics='','' + if @env.output_dir_structure.by? == :language + css_path=(type !~/maintenance/) \ + ? '../../_sisu/css/harvest.css' + : 'harvest.css' + topics='topics.html' + elsif @env.output_dir_structure.by? == :filetype + css_path=(type !~/maintenance/) \ + ? '../_sisu/css/harvest.css' + : 'harvest.css' + topics="topics.#{lng}.html" + elsif @env.output_dir_structure.by? == :filename + css_path=(type !~/maintenance/) \ + ? './_sisu/css/harvest.css' + : 'harvest.css' + topics="topics.#{lng}.html" + end + ln=SiSU_i18n::Languages.new.language.list + harvest_languages='' + @the_idx.keys.each do |lg| + if @env.output_dir_structure.by? == :language + harvest_pth="../../#{lg}/manifest" + file="#{harvest_pth}/authors.html" + elsif @env.output_dir_structure.by? == :filetype + harvest_pth='.' + file="#{harvest_pth}/authors.#{lg}.html" + elsif @env.output_dir_structure.by? == :filename + harvest_pth='.' + file="#{harvest_pth}/authors.#{lg}.html" + end + l=ln[lg][:t] + harvest_languages += + %{#{l}   } + end + sv=SiSU_Env::InfoVersion.instance.get_version + if @env.output_dir_structure.by? == :language + home_pth='../..' + output_structure_by= + '(output organised by language & filetype)' + elsif @env.output_dir_structure.by? == :filetype + home_pth='..' + output_structure_by= + '(output organised by filetype)' + elsif @env.output_dir_structure.by? == :filename + home_pth='.' + output_structure_by= + '(output organised by filename)' + else + home_pth='.' + output_structure_by='(output organised by ?)' + end + < + + + +SiSU Metadata Harvest - Authors + + + + + + + + + + + + +

SiSU Metadata Harvest - Authors #{output_structure_by}

+

[ HOME ] also see SiSU Metadata Harvest - Topics

+

#{@env.widget_static.search_form}

+
+

#{harvest_languages}

+
+WOK + end + def html_head + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] \ + << html_head_adjust(lng,'maintenance') \ + if @opt.act[:maintenance][:set]==:on + @output[lng][:html] \ + << html_head_adjust(lng) + end + end + def html_alph + a=[] + a << '

' + @alph.each do |x| + a << ((x =~/[0-9]/) \ + ? '' + : %{#{x}, }) + end + a=a.join + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << a \ + if @opt.act[:maintenance][:set]==:on + @output[lng][:html] << a + end + end + def html_tail + a =< + + + + + + +#{SiSU_Proj_HTML::Bits.new.credits_sisu} + + +WOK + @the_idx.keys.each do |lng| + @output[lng][:html_mnt] << a \ + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html] << a + end + end + def do_html(lng,html) + @output[lng][:html_mnt] << html \ + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html] << html + end + def do_string_name(lng,attrib,string) + f=/^(\S)/.match(string[0])[1] + if @lng != lng + @alph=@alphabet_list.dup + @letter=@alph.shift + @lng = lng + end + if @letter < f + while @letter < f + if @alph.length > 0 + @letter=@alph.shift + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html_mnt] \ + << %{\n

#{@letter}

} + end + @output[lng][:html] \ + << %{\n

#{@letter}

} + else break + end + end + end + end + def html_body + the_idx=@the_idx + the_idx.each_pair do |lng,lng_array| + lng_array.sort.each do |a| + do_string_name(lng,'',a) + name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') + x = %{

#{a[0]}

} + if @output[lng][:html_mnt].is_a?(File) + @output[lng][:html_mnt] << x + end + @output[lng][:html] << x + lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert + works=[] + a[1][:md].each do |i| + manifest_at=if @env.output_dir_structure.by? == :language + i[:file] + Sfx[:html] + elsif @env.output_dir_structure.by? == :filetype + i[:file] + lang_code_insert + Sfx[:html] + elsif @env.output_dir_structure.by? == :filename + './' + i[:file] + '/' + i[:page] + else '' #error + end + work=[ + "#{i[:date]} #{i[:title]}", + %{

#{i[:date]} #{i[:title]}, #{i[:author][:authors_s]}

} + ] + works<<=(@output[lng][:html_mnt].is_a?(File)) \ + ? (work.concat([%{

[src]  #{i[:date]} #{i[:title]}, #{i[:author][:authors_s]} -- [#{i[:file]}.sst]

}])) + : work + end + works.sort_by {|y| y[0]}.each do |z| + @output[lng][:html] << z[1] + @output[lng][:html_mnt] << z[2] \ + if @output[lng][:html_mnt].is_a?(File) + end + end + end + end + self + end + def screen_print + def cycle + the_idx=@the_idx + the_idx.sort.each do |a| + puts a[0] + a[1][:md].each do |x| + puts "\t" + x[:file] + end + end + end + self + end + end +end +__END__ -- cgit v1.2.3