aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib/sisu/develop/html_harvest_authors.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sisu/develop/html_harvest_authors.rb')
-rw-r--r--lib/sisu/develop/html_harvest_authors.rb466
1 files changed, 0 insertions, 466 deletions
diff --git a/lib/sisu/develop/html_harvest_authors.rb b/lib/sisu/develop/html_harvest_authors.rb
deleted file mode 100644
index 8298b1af..00000000
--- a/lib/sisu/develop/html_harvest_authors.rb
+++ /dev/null
@@ -1,466 +0,0 @@
-# encoding: utf-8
-=begin
-
-* Name: SiSU
-
-** Description: documents, structuring, processing, publishing, search
-*** metadata harvest, extract authors and their writings from document set
-
-** Author: Ralph Amissah
- <ralph@amissah.com>
- <ralph.amissah@gmail.com>
-
-** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
- 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah,
- All Rights Reserved.
-
-** License: GPL 3 or later:
-
- SiSU, a framework for document structuring, publishing and search
-
- Copyright (C) Ralph Amissah
-
- This program is free software: you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the Free
- Software Foundation, either version 3 of the License, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program. If not, see <http://www.gnu.org/licenses/>.
-
- If you have Internet connection, the latest version of the GPL should be
- available at these locations:
- <http://www.fsf.org/licensing/licenses/gpl.html>
- <http://www.gnu.org/licenses/gpl.html>
-
- <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>
-
-** SiSU uses:
- * Standard SiSU markup syntax,
- * Standard SiSU meta-markup syntax, and the
- * Standard SiSU object citation numbering and system
-
-** Hompages:
- <http://www.jus.uio.no/sisu>
- <http://www.sisudoc.org>
-
-** Git
- <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary>
- <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/develop/harvest_authors.rb;hb=HEAD>
-
-=end
-module SiSU_HarvestAuthors
- require_relative 'html_harvest_author_format' # html_harvest_author_format.rb
- require_relative 'html_parts' # html_parts.rb
- class Songsheet
- @@the_idx_authors={}
- def initialize(opt,env)
- @opt,@env=opt,env
- @file_list=opt.files
- end
- def songsheet
- idx_array={}
- @opt.f_pths.each do |y|
- lang_hash_file_array={}
- name=y[:f]
- filename=y[:pth] + '/' + y[:f]
- File.open(filename,'r') do |file|
- file.each_line("\n\n") do |line|
- if line =~/^@(?:title|creator|date):(?:\s|$)/m
- lang_hash_file_array[y[:lng_is]] ||= []
- lang_hash_file_array[y[:lng_is]] << line
- elsif line =~/^@\S+?:(?:\s|$)/m \
- or line =~/^(?:\s*\n|%+ )/
- else break
- end
- end
- end
- lang_hash_file_array.each_pair do |lang,a|
- idx_array[lang] ||= []
- idx_array=SiSU_HarvestAuthors::Harvest.new(
- @opt,
- @env,
- a,
- filename,
- name,
- idx_array,
- lang
- ).extract_harvest
- end
- end
- the_idx=SiSU_HarvestAuthors::Index.new(
- idx_array,
- @@the_idx_authors
- ).construct_book_author_index
- SiSU_HarvestAuthors::OutputIndex.new(
- @opt,
- the_idx
- ).html_print.html_songsheet
- end
- end
- class Harvest
- def initialize(opt,env,data,filename,name,idx_array,lang)
- @opt, @env,@data,@filename,@name,@idx_array,@lang=
- opt,env, data, filename, name, idx_array, lang
- end
- def extract_harvest
- data, filename, name, idx_array, lang =
- @data,@filename,@name,@idx_array,@lang
- @title=@subtitle=@fulltitle=@author=@author_format=@date=nil
- @authors=[]
- rgx={}
- rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m
- rgx[:title]=/^@title:[ ]+(.+)/
- rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m
- rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m
- data.each do |para|
- if para=~ rgx[:title]
- @title=rgx[:title].match(para)[1]
- end
- if para=~ rgx[:subtitle]
- @subtitle=rgx[:subtitle].match(para)[1]
- end
- if para=~ rgx[:author]
- @author_format=rgx[:author].match(para)[1]
- end
- if para=~ rgx[:date]
- @date=rgx[:date].match(para)[1]
- end
- break if @title && @subtitle && @author && @date
- end
- @fulltitle=@subtitle \
- ? (@title + ' - ' + @subtitle)
- : @title
- if @title \
- and @author_format
- creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details
- @authors,@authorship=creator[:authors],creator[:authorship]
- file=if name=~/~[a-z]{2,3}\.ss[mt]$/
- name.sub(/~[a-z]{2,3}\.ss[mt]$/,'')
- else
- name.sub(/\.ss[mt]$/,'')
- end
- page=if @env.output_dir_structure.by? == :language
- "#{lang}/sisu_manifest.html"
- else
- "sisu_manifest.#{lang}.html"
- end
- idx_array[lang] <<= {
- filename: filename,
- file: file,
- date: @date,
- title: @fulltitle,
- author: creator,
- page: page,
- lang: lang
- }
- else
- #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}"
- end
- idx_array[lang]=idx_array[lang].flatten
- idx_array
- end
- end
- class Index
- def initialize(idx_array,the_idx)
- @idx_array,@the_idx=idx_array,the_idx
- @@the_idx_authors=@the_idx
- end
- def capital(txt)
- txt[0].chr.capitalize + txt[1,txt.length]
- end
- def construct_book_author_index
- idx_array=@idx_array
- idx_array.each_pair do |lang,idx_arr|
- @@the_idx_authors[lang] ||= {}
- idx_arr.each do |idx|
- idx[:author][:last_first_format_a].each do |author|
- author=author.strip
- if @@the_idx_authors[lang][author].is_a?(NilClass)
- @@the_idx_authors[lang][author]={ md: [] }
- end
- @@the_idx_authors[lang][author][:md] << {
- filename: idx[:filename],
- file: idx[:file],
- author: idx[:author],
- title: idx[:title],
- date: idx[:date],
- page: idx[:page],
- lang: idx[:lang]
- }
- end
- end
- end
- @the_idx=@@the_idx_authors
- end
- end
- class OutputIndex
- require_relative 'i18n' # i18n.rb
- def initialize(opt,the_idx)
- @opt,@the_idx=opt,the_idx
- @env=SiSU_Env::InfoEnv.new
- @rc=SiSU_Env::GetInit.new.sisu_yaml.rc
- @alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z]
- @alph=@alphabet_list.dup
- @letter=@alph.shift
- end
- def html_file_open
- @the_idx.keys.each do |lng|
- @output ||={}
- @output[lng] ||={}
- harvest_pth,file='',''
- if @env.output_dir_structure.by? == :language
- harvest_pth=@env.path.webserv + '/' \
- + @opt.base_stub + '/' \
- + lng + '/' \
- + 'manifest'
- file="#{harvest_pth}/authors.html"
- elsif @env.output_dir_structure.by? == :filetype
- harvest_pth=@env.path.webserv + '/' \
- + @opt.base_stub + '/' \
- + 'manifest'
- file="#{harvest_pth}/authors.#{lng}.html"
- elsif @env.output_dir_structure.by? == :filename
- harvest_pth=@env.path.webserv + '/' \
- + @opt.base_stub
- file="#{harvest_pth}/authors.#{lng}.html"
- end
- FileUtils::mkdir_p(harvest_pth) \
- unless FileTest.directory?(harvest_pth)
- fileinfo=(@opt.act[:verbose][:set]==:on \
- || @opt.act[:verbose_plus][:set]==:on \
- || @opt.act[:urls_selected][:set]==:on \
- || @opt.act[:maintenance][:set]==:on) \
- ? ("file://#{file}") : ''
- SiSU_Screen::Ansi.new(
- @opt.act[:color_state][:set],
- "harvest authors (#{@opt.files.length} files)",
- fileinfo
- ).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on
- @output[lng][:html]=File.new(file,'w')
- end
- end
- def html_file_close
- @the_idx.keys.each do |lng|
- @output[lng][:html].close
- @output[lng][:html_mnt].close \
- if @output[lng][:html_mnt].is_a?(File)
- end
- end
- def html_print
- def html_songsheet
- html_file_open
- html_head
- html_alph
- html_body
- html_tail
- html_file_close
- end
- def html_head_adjust(lng,type='')
- css_path,topics='',''
- if @env.output_dir_structure.by? == :language
- css_path=(type !~/maintenance/) \
- ? '../../_sisu/css/harvest.css'
- : 'harvest.css'
- topics='topics.html'
- elsif @env.output_dir_structure.by? == :filetype
- css_path=(type !~/maintenance/) \
- ? '../_sisu/css/harvest.css'
- : 'harvest.css'
- topics="topics.#{lng}.html"
- elsif @env.output_dir_structure.by? == :filename
- css_path=(type !~/maintenance/) \
- ? './_sisu/css/harvest.css'
- : 'harvest.css'
- topics="topics.#{lng}.html"
- end
- ln=SiSU_i18n::Languages.new.language.list
- harvest_languages=''
- @the_idx.keys.each do |lg|
- if @env.output_dir_structure.by? == :language
- harvest_pth="../../#{lg}/manifest"
- file="#{harvest_pth}/authors.html"
- elsif @env.output_dir_structure.by? == :filetype
- harvest_pth='.'
- file="#{harvest_pth}/authors.#{lg}.html"
- elsif @env.output_dir_structure.by? == :filename
- harvest_pth='.'
- file="#{harvest_pth}/authors.#{lg}.html"
- end
- l=ln[lg][:t]
- harvest_languages +=
- %{<a href="#{file}">#{l}</a>&nbsp;&nbsp;&nbsp;}
- end
- sv=SiSU_Env::InfoVersion.instance.get_version
- if @env.output_dir_structure.by? == :language
- home_pth='../..'
- output_structure_by=
- '(output organised by language &amp; filetype)'
- elsif @env.output_dir_structure.by? == :filetype
- home_pth='..'
- output_structure_by=
- '(output organised by filetype)'
- elsif @env.output_dir_structure.by? == :filename
- home_pth='.'
- output_structure_by=
- '(output organised by filename)'
- else
- home_pth='.'
- output_structure_by='(output organised by ?)'
- end
- <<WOK
-<!DOCTYPE html>
-<html>
-<head>
-<meta charset="utf-8">
-<title>SiSU Metadata Harvest - Authors</title>
-<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
-<meta name="dc.title" content= "SiSU metadata harvest, Authors - SiSU information Structuring Universe, Structured information Serialised Units" />
-<meta name="dc.subject" content= "document structuring, ebook, publishing, PDF, LaTeX, XML, ODF, SQL, postgresql, sqlite, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, granular search, digital library" />
-<meta name="generator" content="#{sv.project} #{sv.version} of #{sv.date_stamp} (n*x and Ruby!)" />
-<link rel="generator" href="http://www.jus.uio.no/sisu/SiSU" />
-<link href="#{css_path}" rel="stylesheet" >
-<link rel="shortcut icon" href="../_sisu/image/rb7.ico" />
-</head>
-<body lang="en" xml:lang="en">
-<a name="top" id="top"></a>
-<a name="up" id="up"></a>
-<a name="start" id="start"></a>
-<h1>SiSU Metadata Harvest - Authors #{output_structure_by}</h1>
-<p>[<a href="#{home_pth}/index.html">&nbsp;HOME&nbsp;</a>] also see <a href="#{topics}">SiSU Metadata Harvest - Topics</a></p>
-<p>#{@env.widget_static.search_form}</p>
-<hr />
-<p class="tiny">#{harvest_languages}</p>
-<hr />
-WOK
- end
- def html_head
- @the_idx.keys.each do |lng|
- @output[lng][:html_mnt] \
- << html_head_adjust(lng,'maintenance') \
- if @opt.act[:maintenance][:set]==:on
- @output[lng][:html] \
- << html_head_adjust(lng)
- end
- end
- def html_alph
- a=[]
- a << '<p>'
- @alph.each do |x|
- a << ((x =~/[0-9]/) \
- ? ''
- : %{<a href="##{x}">#{x}</a>,&nbsp;})
- end
- a=a.join
- @the_idx.keys.each do |lng|
- @output[lng][:html_mnt] << a \
- if @opt.act[:maintenance][:set]==:on
- @output[lng][:html] << a
- end
- end
- def html_tail
- a =<<WOK
-<hr />
-<a name="bottom" id="bottom"></a>
-<a name="down" id="down"></a>
-<a name="end" id="end"></a>
-<a name="finish" id="finish"></a>
-<a name="stop" id="stop"></a>
-<a name="credits"></a>
-#{SiSU_Proj_HTML::Bits.new.credits_sisu}
-</body>
-</html>
-WOK
- @the_idx.keys.each do |lng|
- @output[lng][:html_mnt] << a \
- if @output[lng][:html_mnt].is_a?(File)
- @output[lng][:html] << a
- end
- end
- def do_html(lng,html)
- @output[lng][:html_mnt] << html \
- if @output[lng][:html_mnt].is_a?(File)
- @output[lng][:html] << html
- end
- def do_string_name(lng,attrib,string)
- f=/^(\S)/.match(string[0])[1]
- if @lng != lng
- @alph=@alphabet_list.dup
- @letter=@alph.shift
- @lng = lng
- end
- if @letter < f
- while @letter < f
- if @alph.length > 0
- @letter=@alph.shift
- if @output[lng][:html_mnt].is_a?(File)
- @output[lng][:html_mnt] \
- << %{\n<p class="letter"><a name="#{@letter}"></p>#{@letter}</a><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>}
- end
- @output[lng][:html] \
- << %{\n<p class="letter"><a name="#{@letter}">#{@letter}</a></p><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>}
- else break
- end
- end
- end
- end
- def html_body
- the_idx=@the_idx
- the_idx.each_pair do |lng,lng_array|
- lng_array.sort.each do |a|
- do_string_name(lng,'',a)
- name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_')
- x = %{<p class="author"><a name="#{name}">#{a[0]}</a></p>}
- if @output[lng][:html_mnt].is_a?(File)
- @output[lng][:html_mnt] << x
- end
- @output[lng][:html] << x
- lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert
- works=[]
- a[1][:md].each do |i|
- manifest_at=if @env.output_dir_structure.by? == :language
- i[:file] + Sfx[:html]
- elsif @env.output_dir_structure.by? == :filetype
- i[:file] + lang_code_insert + Sfx[:html]
- elsif @env.output_dir_structure.by? == :filename
- './' + i[:file] + '/' + i[:page]
- else '' #error
- end
- work=[
- "#{i[:date]} #{i[:title]}",
- %{<p class="publication">#{i[:date]} <a href="#{manifest_at}">#{i[:title]}</a>, #{i[:author][:authors_s]}</p>}
- ]
- works<<=(@output[lng][:html_mnt].is_a?(File)) \
- ? (work.concat([%{<p class="publication">[<a href="#{i[:file]}.sst">src</a>]&nbsp;&nbsp;#{i[:date]} <a href="file://#{manifest_at}">#{i[:title]}</a>, #{i[:author][:authors_s]} -- [<a href="#{i[:file]}.sst">#{i[:file]}.sst</a>]</p>}]))
- : work
- end
- works.sort_by {|y| y[0]}.each do |z|
- @output[lng][:html] << z[1]
- @output[lng][:html_mnt] << z[2] \
- if @output[lng][:html_mnt].is_a?(File)
- end
- end
- end
- end
- self
- end
- def screen_print
- def cycle
- the_idx=@the_idx
- the_idx.sort.each do |a|
- puts a[0]
- a[1][:md].each do |x|
- puts "\t" + x[:file]
- end
- end
- end
- self
- end
- end
-end
-__END__