-*- mode: org -*-
#+TITLE:       sisu harvest
#+DESCRIPTION: documents - structuring, various output representations & search
#+FILETAGS:    :sisu:harvest:
#+AUTHOR:      Ralph Amissah
#+EMAIL:       [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]]
#+COPYRIGHT:   Copyright (C) 2015 - 2021 Ralph Amissah
#+LANGUAGE:    en
#+STARTUP:     content hideblocks hidestars noindent entitiespretty
#+OPTIONS:     H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t
#+PROPERTY:    header-args  :exports code
#+PROPERTY:    header-args+ :noweb yes
#+PROPERTY:    header-args+ :eval no
#+PROPERTY:    header-args+ :results no
#+PROPERTY:    header-args+ :cache no
#+PROPERTY:    header-args+ :padline no

* harvest
** html_harvest.rb

#+BEGIN_SRC ruby  :tangle "../lib/sisu/html_harvest.rb"
# <<sisu_document_header>>
module SiSU_Harvest
  @@the_idx_topics,@@the_idx_authors={},{}
  class Source
    require_relative 'hub_options'                          # hub_options.rb
    require_relative 'html_harvest_topics'                  # html_harvest_topics.rb
    require_relative 'html_harvest_authors'                 # html_harvest_authors.rb
    require_relative 'se'                                   # se.rb
      include SiSU_Env
    def initialize(opt)
      @opt=opt
      @env=SiSU_Env::InfoEnv.new
    end
    def read
      begin
        harvest_pth=@env.path.webserv + '/' + @opt.base_stub
        FileUtils::mkdir_p(harvest_pth) unless FileTest.directory?(harvest_pth)
        cases(@opt,@env)
      rescue
      ensure
        SiSU_Env::CreateSite.new(@opt).cp_css
      end
    end
    def help
      puts <<WOK
      harvest --harvest   extracts document index metadata

WOK
    end
    def css(opt)
      require_relative 'css'                                # css.rb
      css=SiSU_Style::CSS.new
      fn_css=SiSU_Env::CSS_Default.new
      style=File.new("#{@env.path.pwd}/#{fn_css.harvest}",'w')
      style << css.harvest
      style.close
    end
    def cases(opt,env)
      case opt.selections.str.inspect
      when/--harvest/i
        css(opt) if @opt.act[:maintenance][:set]==:on
        SiSU_HarvestAuthors::Songsheet.new(opt,env).songsheet
        SiSU_HarvestTopics::Songsheet.new(opt,env).songsheet
        if @opt.act[:rsync][:set]==:on
          require_relative 'remote'                         # remote.rb
          SiSU_Remote::Put.new(opt).rsync_harvest
        end
      else
        help
      end
    end
  end
end
#+END_SRC

** topics
*** html_harvest_topics.rb

#+BEGIN_SRC ruby  :tangle "../lib/sisu/html_harvest_topics.rb"
# <<sisu_document_header>>
module SiSU_HarvestTopics
  require_relative 'html_harvest_author_format'          # html_harvest_author_format.rb
  require_relative 'html_parts'                          # html_parts.rb
  class Songsheet
    @@the_idx_topics={}
    def initialize(opt,env)
      @opt,@env=opt,env
      @file_list=opt.files
    end
    def songsheet
      idx_array={}
      @opt.f_pths.each do |y|
        lang_hash_file_array={}
        name=y[:f]
        filename=y[:pth] + '/' + y[:f]
        File.open(filename,'r') do |file|
          file.each_line("\n\n") do |line|
            if line =~/^@(?:title|creator|classify):(?:\s|$)/m
              lang_hash_file_array[y[:lng_is]] ||= []
              lang_hash_file_array[y[:lng_is]] << line
            elsif line =~/^@\S+?:(?:\s|$)/m \
            or line =~/^(?:\s*\n|\s*$|%+ )/
            else break
            end
          end
        end
        lang_hash_file_array.each_pair do |lang,a|
          idx_array[lang] ||=[]
          idx_array=SiSU_HarvestTopics::Harvest.new(
            @opt,
            @env,
            a,
            filename,
            name,
            idx_array,
            lang
          ).extract_harvest
        end
      end
      the_hash=SiSU_HarvestTopics::Index.new(
        @opt,
        @env,
        idx_array,
        @@the_idx_topics
      ).song
      SiSU_HarvestTopics::OutputIndex.new(
        @opt,
        the_hash
      ).html_print.html_songsheet
    end
  end
  class Mix
    def spaces
      Ax[:spaces]
    end
  end
  class Harvest
    def initialize(opt,env,data,filename,name,idx_array,lang)
      @opt, @env,@data,@filename,@name,@idx_array,@lang=
        opt,env, data, filename, name, idx_array, lang
    end
    def extract_harvest
      data,   filename, name, idx_array, lang=
        @data,@filename,@name,@idx_array,@lang
      @idx_lst=@title=@subtitle=@fulltitle=@author=@author_format=nil
      rgx={}
      rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m
      rgx[:title]=/^@title:[ ]+(.+)/
      rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m
      rgx[:idx]=/^@classify:.+?:topic_register:[ ]+(.+?)(?:\n\n|\n\s+:\S|\n%)/m
      data.each do |para|
        if para=~ rgx[:idx]
          @idx_list=(rgx[:idx].match(para)[1]).split(/\s*\n\s*/).join
        end
        if para=~ rgx[:title]
          @title=rgx[:title].match(para)[1]
        end
        if para=~ rgx[:subtitle]
          @subtitle=rgx[:subtitle].match(para)[1]
        end
        if para=~ rgx[:author]
          @author_format=rgx[:author].match(para)[1]
        end
        break if @title && @subtitle && @author && @idx_lst
      end
      @fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title
      if @title \
      and @author_format \
      and @idx_list
        creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details
        @authors,@authorship=creator[:authors],creator[:authorship]
        file=if name=~/~[a-z]{2,3}\.ss[mt]$/
          name.sub(/~[a-z]{2,3}\.ss[mt]$/,'')
        else
          name.sub(/\.ss[mt]$/,'')
        end
        page=if @env.output_dir_structure.by? == :language
          "#{lang}/sisu_manifest.html"
        else
          "sisu_manifest.#{lang}.html"
        end
        idx_array[lang] <<=if @idx_list =~/;/
          g=@idx_list.scan(/[^;]+/)
          g.each.map do |i|
            i=i.strip
            {
              filename: filename,
              file: file,
              rough_idx: i,
              title: @fulltitle,
              author: creator,
              page: page,
              lang: lang
            }
          end
        else {
            filename: filename,
            file: file,
            rough_idx: @idx_list,
            title: @fulltitle,
            author: creator,
            page: page,
            lang: lang,
          }
        end
      else
        if (@opt.act[:verbose_plus][:set]==:on \
        || @opt.act[:maintenance][:set]==:on)
          p "missing required field in #{@filename} - [title]: <#{@title}>; [author]: <#{@author_format}>; [idx]: <#{@idx_list}>"
        end
      end
      idx_array[lang]=idx_array[lang].flatten
      idx_array
    end
  end
  class Index < Mix
    def initialize(opt,env,idx_array,the_idx)
      @opt, @env,@idx_array,@the_idx=
        opt,env, idx_array, the_idx
      @@the_idx_topics=@the_idx
    end
    def song
      the_idx=construct_book_topic_keys
      construct_book_topic_hash(the_idx)
    end
    def capital(txt)
      txt_a=txt.scan(/\S+/)
      tx=''
      txt_a.each do |t|
        tx += t[0].chr.capitalize + t[1,txt.length] + ' '
      end
      tx.strip
    end
    def capital_(txt)
      txt[0].chr.capitalize + txt[1,txt.length]
    end
    def contents(idx,lang)
      names=''
      idx[:author][:last_first_format_a].each do |n|
        s=n.sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_')
        names=if @env.output_dir_structure.by? == :language
          names += %{<a href="authors.html##{s}">#{n}</a>, }
        else
          names += %{<a href="authors.#{lang}.html##{s}">#{n}</a>, }
        end
      end
      {
        filename: idx[:filename],
        file: idx[:file],
        author: names,
        title: idx[:title],
        page: idx[:page]
      }
    end
    def capital_(txt)
      txt[0].chr.capitalize + txt[1,txt.length]
    end
    def key_create(c,alt)
      x=nil
      x=if c.length==6
        c[0].to_s + '|' +
          capital(c[1][0].to_s) + '|' +
          capital(c[2][0].to_s) + '|' +
          capital(c[3][0].to_s) + '|' +
          capital(alt.to_s)
      elsif c.length==5
        c[0].to_s + '|' +
          capital(c[1][0].to_s) + '|' +
          capital(c[2][0].to_s) + '|' +
          capital(alt.to_s)
      elsif c.length==4
        c[0].to_s + '|' +
          capital(c[1][0].to_s) + '|' +
          capital(alt.to_s)
      elsif c.length==3
        c[0].to_s + '|' +
          capital(alt.to_s)
      end
    end
    def construct_book_topic_keys
      idx_array=@idx_array
      @idx_a=[]
      @the_a=[]
      idx_array.each_pair do |lang,idx_arr|
        @@the_idx_topics[lang] ||= {}
        idx_arr.each do |idx|
          if idx[:rough_idx]
            idx_lst=idx[:rough_idx].scan(/[^:]+/)
          else
            puts "no topic register in: << #{idx[:filename]} >>"
            next
          end
          idx_a=[]
          idx_lst.each do |c|
            idx_a << c.scan(/[^|\n]+/m)
          end
          idx_a << contents(idx,lang)
          @idx_a << [lang] + idx_a
        end
      end
      @idx_a.each do |c|
        if c.length > 1 \
        and c.is_a?(Array)
          if c[2].is_a?(Hash)
            c[1].each do |alt|
              v=key_create(c,alt)
              @the_a << [v, c[2]] if v
            end
          end
        end
        if c.length > 2 \
        and c.is_a?(Array)
          if c[3].is_a?(Hash)
            c[2].each do |alt|
              v=key_create(c,alt)
              @the_a << [v, c[3]] if v
            end
          end
        end
        if c.length > 3 \
        and c.is_a?(Array)
          if c[4].is_a?(Hash)
            c[3].each do |alt|
              v=key_create(c,alt)
              @the_a << [v, c[4]] if v
            end
          end
        end
        if c.length > 4 \
        and c.is_a?(Array)
          if c[5].is_a?(Hash)
            c[4].each do |alt|
              v=key_create(c,alt)
              @the_a << [v, c[5]] if v
            end
          end
        end
        if c.length > 5 \
        and c.is_a?(Array)
          if c[6].is_a?(Hash)
            c[5].each do |alt|
              v=key_create(c,alt)
              @the_a << [v, c[6]] if v
            end
          end
        end
      end
      @the_a.sort_by { |x| x[0] } #; y.each {|z| puts z}
    end
    def construct_book_topic_hash(t)
      @the_h={}
      t.each do |z|
        x=z[0].scan(/[^|]+/)
        depth=x.length
        extract=(depth-1)
        k=case extract
        when 4
          { x[0] => { x[1] => { x[2] => { x[3] => { x[4] => z[1] } } } } }
        when 3
          { x[0] => { x[1] => { x[2] => { x[3] => z[1] } } } }
        when 2
          { x[0] => { x[1] => { x[2] => z[1] } } }
        when 1
          { x[0] => { x[1] => z[1] } }
        when 0
          { x[0] => z[1] }
        end
        if extract >= 0
          k.each_pair do |x0,y0|
            if extract == 0
              @the_h[x0] ||={ md: [] }
              @the_h[x0][:md] << y0
            else
              @the_h[x0] ||={}
            end
            #puts spaces*0 + x0
            if extract >= 1
              y0.each_pair do |x1,y1|
                if extract == 1
                  @the_h[x0][x1] ||={ md: [] }
                  @the_h[x0][x1][:md] << y1
                else
                  @the_h[x0][x1] ||={}
                end
                #puts spaces*1 + x1
                if extract >= 2
                  y1.each_pair do |x2,y2|
                    if extract == 2
                      @the_h[x0][x1][x2] ||={ md: [] }
                      @the_h[x0][x1][x2][:md] << y2
                    else
                      @the_h[x0][x1][x2] ||={}
                    end
                    #puts spaces*2 + x2
                    if extract >= 3
                      y2.each_pair do |x3,y3|
                        if extract == 3
                          @the_h[x0][x1][x2][x3] ||={ md: [] }
                          @the_h[x0][x1][x2][x3][:md] << y3
                        else
                          @the_h[x0][x1][x2][x3] ||={}
                        end
                        #puts spaces*3 + x3
                        if extract == 4
                          y3.each_pair do |x4,y4|
                            if extract == 4
                              @the_h[x0][x1][x2][x3][x4] ||={ md: [] }
                              @the_h[x0][x1][x2][x3][x4][:md] << y4
                            else
                              @the_h[x0][x1][x2][x3][x4] ||={}
                            end
                            #puts spaces*4 + x4
                            if extract == 5
                              y4.each_pair do |x5,y5|
                                if extract == 5
                                  @the_h[x0][x1][x2][x3][x4][x5] ||={ md: [] }
                                  @the_h[x0][x1][x2][x3][x4][x5][:md] << y5
                                end
                                #puts spaces*5 + x5
                              end
                            end
                          end
                        end
                      end
                    end
                  end
                end
              end
            end
          end
        end
      end
      #@the_h.each_pair { |x,y| p x; p y }
      @the_h
    end
    def traverse_base
      @the_h.each_pair do |x0,y0|
        puts spaces*0 + x0 if x0.is_a?(String)
        if y0.is_a?(Hash)
          y0.each_pair do |x1,y1|
            puts spaces*1 + x1 if x1.is_a?(String)
            if y1.is_a?(Hash)
              y1.each_pair do |x2,y2|
                puts spaces*2 + x2 if x2.is_a?(String)
                if y2.is_a?(Hash)
                  y2.each_pair do |x3,y3|
                    puts spaces*3 + x3 if x3.is_a?(String)
                    if y3.is_a?(Hash)
                      y3.each_pair do |x4,y4|
                        puts spaces*4 + x4 if x4.is_a?(String)
                        if y4.is_a?(Hash)
                          y4.each_pair do |x5,y5|
                            puts spaces*5 + x5 if x5.is_a?(String)
                          end
                        end
                      end
                    end
                  end
                end
              end
            end
          end
        end
      end
    end
    def traverse
      @the_h.each_pair do |x0,y0|
        puts spaces*0 + x0 if x0.is_a?(String)
        if y0.is_a?(Hash)
          if y0.has_key?(:md)
            y0[:md].each { |x| puts spaces*5 + x[:title] }
          end
          y0.each_pair do |x1,y1|
            puts spaces*1 + x1 if x1.is_a?(String)
            if y1.is_a?(Hash)
              if y1.has_key?(:md)
                y1[:md].each { |x| puts spaces*5 + x[:title] }
              end
              y1.each_pair do |x2,y2|
                puts spaces*2 + x2 if x2.is_a?(String)
                if y2.is_a?(Hash)
                  if y2.has_key?(:md)
                    y2[:md].each { |x| puts spaces*5 + x[:title] }
                  end
                  y2.each_pair do |x3,y3|
                    puts spaces*3 + x3 if x3.is_a?(String)
                    if y3.is_a?(Hash)
                      if y3.has_key?(:md)
                        y3[:md].each { |x| puts spaces*5 + x[:title] }
                      end
                      y3.each_pair do |x4,y4|
                        puts spaces*4 + x4 if x4.is_a?(String)
                        if y4.is_a?(Hash)
                          if y4.has_key?(:md)
                            y4[:md].each { |x| puts spaces*5 + x[:title] }
                          end
                          y4.each_pair do |x5,y5|
                            puts spaces*5 + x4 if x4.is_a?(String)
                          end
                        end
                      end
                    end
                  end
                end
              end
            end
          end
        end
      end
    end
  end
  class OutputIndex < Mix
    require_relative 'i18n'                               # i18n.rb
    def initialize(opt,the_idx)
      @opt,@the_idx=opt,the_idx
      @env=SiSU_Env::InfoEnv.new
      @rc=SiSU_Env::GetInit.new.sisu_yaml.rc
      @alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z]
      @alph=@alphabet_list.dup
      @letter=@alph.shift
    end
    def html_file_open
      @the_idx.keys.each do |lng|
        @output ||={}
        @output[lng] ||={}
        harvest_pth,file='',''
        if @env.output_dir_structure.by? == :language
          harvest_pth=@env.path.webserv + '/' \
          + @opt.base_stub + '/' \
          + lng + '/' \
          + 'manifest'
          file=harvest_pth + '/' + 'topics.html'
        elsif @env.output_dir_structure.by? == :filetype
          harvest_pth=@env.path.webserv + '/' \
          + @opt.base_stub + '/' \
          + 'manifest'
          file=harvest_pth + '/' + 'topics.' + lng + '.html'
        elsif @env.output_dir_structure.by? == :filename
          harvest_pth=@env.path.webserv + '/' \
          + @opt.base_stub
          file=harvest_pth + '/' + 'topics.' + lng + '.html'
        end
        FileUtils::mkdir_p(harvest_pth) \
          unless FileTest.directory?(harvest_pth)
        fileinfo=(@opt.act[:verbose][:set]==:on \
        || @opt.act[:verbose_plus][:set]==:on \
        || @opt.act[:urls_selected][:set]==:on \
        || @opt.act[:maintenance][:set]==:on) \
        ? ("file://#{file}")
        : ''
        SiSU_Screen::Ansi.new(
          @opt.act[:color_state][:set],
          "harvest topics(#{@opt.files.length} files)",
          fileinfo
        ).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on
        @output[lng][:html]=File.new(file,'w')
        if @opt.act[:maintenance][:set]==:on
          @output[lng][:html_mnt]=File.new("#{@env.path.pwd}/topics.html",'w')
        end
      end
    end
    def html_file_close
      @the_idx.keys.each do |lng|
        @output[lng][:html].close
        @output[lng][:html_mnt].close if @output[lng][:html_mnt].is_a?(File)
      end
    end
    def html_print
      def html_songsheet
        #traverse
        html_file_open
        html_head
        html_alph
        html_body_traverse
        html_tail
        html_file_close
      end
      def html_body_traverse
        @the_idx.each_pair do |x0,y0|
          lng=x0
          if x0.is_a?(String)
            #do_string_name(lng,'lev0',x0)
            #puts spaces*0 + x0
          end
          if y0.is_a?(Hash)
            if y0.has_key?(:md)
              y0[:md].each do |x|
                #do_hash(lng,attrib,x) #lv==0 ?
                #puts spaces*5 + x[:title]
              end
            end
            y0.each_pair do |x1,y1|
              if x1.is_a?(String)
                do_string_name(lng,'lev0',x1)
                #puts spaces*1 + x1
              end
              if y1.is_a?(Hash)
                if y1.has_key?(:md)
                  y1[:md].each do |x|
                    do_hash(lng,0,x)
                    #puts spaces*5 + x[:title]
                  end
                end
                y1.each_pair do |x2,y2|
                  if x2.is_a?(String)
                    do_string(lng,'lev1',x2)
                    #puts spaces*2 + x2
                  end
                  if y2.is_a?(Hash)
                    if y2.has_key?(:md)
                      y2[:md].each do |x|
                        do_hash(lng,1,x)
                        #puts spaces*5 + x[:title]
                      end
                    end
                    y2.each_pair do |x3,y3|
                      if x3.is_a?(String)
                        do_string(lng,'lev2',x3)
                        #puts spaces*3 + x3
                      end
                      if y3.is_a?(Hash)
                        if y3.has_key?(:md)
                          y3[:md].each do |x|
                            do_hash(lng,2,x)
                            #puts spaces*5 + x[:title]
                          end
                        end
                        y3.each_pair do |x4,y4|
                          if x4.is_a?(String)
                            do_string(lng,'lev3',x4)
                            #puts spaces*4 + x4
                          end
                          if y4.is_a?(Hash)
                            if y4.has_key?(:md)
                              y4[:md].each do |x|
                                do_hash(lng,3,x)
                                #puts spaces*5 + x[:title]
                              end
                            end
                            y4.each_pair do |x5,y5|
                              if x5.is_a?(String)
                                do_string(lng,'lev4',x5)
                                #puts spaces*5 + x5
                              end
                            end
                          end
                        end
                      end
                    end
                  end
                end
              end
            end
          end
        end
      end
      def html_head_adjust(lng,type='')
        css_path,authors='',''
        if @env.output_dir_structure.by? == :language
          css_path=(type !~/maintenance/) \
          ? '../../_sisu/css/harvest.css'
          : 'harvest.css'
          authors='authors.html'
        elsif @env.output_dir_structure.by? == :filetype
          css_path=(type !~/maintenance/) \
          ? '../_sisu/css/harvest.css'
          : 'harvest.css'
          authors="authors.#{lng}.html"
        elsif @env.output_dir_structure.by? == :filename
          css_path=(type !~/maintenance/) \
          ? './_sisu/css/harvest.css'
          : 'harvest.css'
          authors="authors.#{lng}.html"
        end
        ln=SiSU_i18n::Languages.new.language.list
        harvest_languages=''
        @the_idx.keys.each do |lg|
          if @env.output_dir_structure.by? == :language
            harvest_pth="../../#{lg}/manifest"
            file=harvest_pth + '/' + 'topics.html'
          elsif @env.output_dir_structure.by? == :filetype
            harvest_pth='.'
            file=harvest_pth + '/' + 'topics.' + lg + '.html'
          elsif @env.output_dir_structure.by? == :filename
            harvest_pth='.'
            file=harvest_pth + '/topics.' + lg + '.html'
          end
          l=ln[lg][:t]
          harvest_languages +=
            %{<a href="#{file}">#{l}</a>&nbsp;&nbsp;&nbsp;}
        end
        sv=SiSU_Env::InfoVersion.instance.get_version
        if @env.output_dir_structure.by? == :language
          home_pth='../..'
          output_structure_by='(output organised by language &amp; filetype)'
        elsif @env.output_dir_structure.by? == :filetype
          home_pth='..'
          output_structure_by='(output organised by filetype)'
        elsif @env.output_dir_structure.by? == :filename
          home_pth='.'
          output_structure_by='(output organised by filename)'
        else
          home_pth='.'
          output_structure_by='(output organised by ?)'
        end
        <<WOK
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>SiSU Metadata Harvest - Topics</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name="dc.title" content= "SiSU metadata harvest, Topics - SiSU information Structuring Universe, Structured information Serialised Units" />
<meta name="dc.subject" content= "document structuring, ebook, publishing, PDF, LaTeX, XML, ODF, SQL, postgresql, sqlite, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, granular search, digital library" />
<meta name="generator" content="#{sv.project} #{sv.version} of #{sv.date_stamp} (n*x and Ruby!)" />
<link rel="generator" href="http://www.jus.uio.no/sisu/SiSU" />
<link href="#{css_path}" rel="stylesheet">
<link rel="shortcut icon" href="../_sisu/image/rb7.ico" />
</head>
<body lang="en" xml:lang="en">
<a name="top" id="top"></a>
<a name="up" id="up"></a>
<a name="start" id="start"></a>
<h1>SiSU Metadata Harvest - Topics #{output_structure_by}</h1>
<p>[<a href="#{home_pth}/index.html">&nbsp;HOME&nbsp;</a>] also see <a href="#{authors}">SiSU Metadata Harvest - Authors</a></p>
<p>#{@env.widget_static.search_form}</p>
<hr />
<p class="tiny">#{harvest_languages}</p>
<hr />
WOK
      end
      def html_head
        @the_idx.keys.each do |lng|
          @output[lng][:html_mnt] \
          << html_head_adjust(lng,'maintenance') \
            if @opt.act[:maintenance][:set]==:on
          @output[lng][:html] << html_head_adjust(lng)
        end
      end
      def html_alph
        a=[]
        a << '<p>'
        @alph.each do |x|
          a << ((x =~/[0-9]/) \
          ? ''
          : %{<a href="##{x}">#{x}</a>,&nbsp;})
        end
        a=a.join
        @the_idx.keys.each do |lng|
          @output[lng][:html_mnt] << a \
            if @opt.act[:maintenance][:set]==:on
          @output[lng][:html] << a
        end
      end
      def html_tail
        a =<<WOK
<hr />
<a name="bottom" id="bottom"></a>
<a name="down" id="down"></a>
<a name="end" id="end"></a>
<a name="finish" id="finish"></a>
<a name="stop" id="stop"></a>
<a name="credits"></a>
#{SiSU_Proj_HTML::Bits.new.credits_sisu}
</body>
</html>
WOK
        @the_idx.keys.each do |lng|
          @output[lng][:html_mnt] << a \
            if @output[lng][:html_mnt].is_a?(File)
          @output[lng][:html] << a
        end
      end
      def do_html(lng,html)
        @output[lng][:html] << html
      end
      def do_html_maintenance(lng,html)
        @output[lng][:html_mnt] << html \
          if @output[lng][:html_mnt].is_a?(File)
      end
      def do_string(lng,attrib,string)
        html=%{<p class="#{attrib}">#{string}</p>}
        do_html(lng,html)
        do_html_maintenance(lng,html) \
          if @output[lng][:html_mnt].is_a?(File)
      end
      def do_string_default(lng,attrib,string)
        html=%{<p class="#{attrib}">#{string}</p>}
        do_html(lng,html)
      end
      def do_string_maintenance(lng,attrib,string)
        html=%{<p class="#{attrib}">#{string}</p>}
        do_html_maintenance(lng,html) \
          if @output[lng][:html_mnt].is_a?(File)
      end
      def do_string_name(lng,attrib,string)
        f=/^(\S)/.match(string)[1]
        if @lng != lng
          @alph=@alphabet_list.dup
          @letter=@alph.shift
          @lng = lng
        end
        if @letter < f
          while @letter < f
            if @alph.length > 0
              @letter=@alph.shift
              if @output[lng][:html_mnt].is_a?(File)
                @output[lng][:html_mnt] \
                << %{\n<p class="letter"><a name="#{@letter}">#{@letter}</a></p><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>}
              end
              @output[lng][:html] \
              << %{\n<p class="letter"><a name="#{@letter}">#{@letter}</a></p><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>}
            else break
            end
          end
        end
        name=string.strip.gsub(/\s+/,'_')
        html=%{<p class="#{attrib}"><a name="#{name}">#{string}</a></p>}
        do_html(lng,html)
        do_html_maintenance(lng,html) \
          if @output[lng][:html_mnt].is_a?(File)
      end
      def do_array(lng,lv,array)
        lv+=1
        array.each do |b|
          do_case(lng,lv,b)
        end
      end
      def do_hash_md(lng,attrib,hash)
        lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert
        manifest_at=if @env.output_dir_structure.by? == :language
          hash[:file] + Sfx[:html]
        elsif @env.output_dir_structure.by? == :filetype
          hash[:file] + lang_code_insert +  Sfx[:html]
        elsif @env.output_dir_structure.by? == :filename
          "./#{hash[:file]}/#{hash[:page]}"
        else '' #error
        end
        html=%{<a href="#{manifest_at}">#{hash[:title]}</a> - #{hash[:author]}}
        do_string_default(lng,attrib,html)
      end
      def do_hash_md_maintenance(lng,attrib,hash)
        if @output[lng][:html_mnt].is_a?(File) #should not be run for presentation output
          html=%{[<a href="#{hash[:file]}.sst">src</a>]&nbsp;&nbsp;<a href="file://#{@env.path.output}/#{hash[:file]}/#{hash[:page]}">#{hash[:title]}</a> - #{hash[:author]}}
          do_string_maintenance(lng,attrib,html)
        end
      end
      def do_hash(lng,lv,hash)
        lv+=1
        key=[]
        hash.each_key do |m|
          if m == :md
            do_case(lng,lv,hash[m])
          elsif m != :title \
          and m != :author \
          and m != :filename \
          and m != :file \
          and m != :rough_idx \
          and m != :page
            key << m
          elsif m == :title
            do_hash_md(lng,'work',hash)
            do_hash_md_maintenance(lng,'work',hash)
          end
        end
        if key.length > 0
          key.sort.each do |m|
            attrib="lev#{lv}"
            lv==0 ? do_string_name(lng,attrib,m) : do_string(lng,attrib,m)
            do_case(lng,lv,hash[m])
          end
        end
      end
      def do_case(lng,lv,a)
        case a
        when String
          attrib="lev#{lv}"
          if a=~/S/
            lv==0 ? do_string_name(lng,attrib,a) : do_string(lng,attrib,a)
          end
        when Array
          do_array(lng,lv,a)
        when Hash
          do_hash(lng,lv,a)
        end
      end
      #def html_body
      #  the_idx=@the_idx
      #  the_idx.each_pair do |lng,lng_array|
      #    lng_array.sort.each do |a|
      #      do_case(lng,-1,a)
      #    end
      #  end
      #end
      self
    end
  end
end
__END__
terms -|_  t{tl1} -|_ {fa}[fa]{filenames and other details}
       |           |_ {tl2} -|_ {fa}[fa]{filenames and other details}
       |           |         |_{tl3} -|_ {fa}[fa]{filenames and other details}
       |           |         |        |_{tl4} - {fa}[fa]{filenames and other details}
       |           |         |        |
       |           |         |        |_{tl4a} - {fa}[fa]{filenames and other details}
       |           |         |        |
       |           |         |        |_{tl4b} - {fa}[fa]{filenames and other details}
       |           |         |        |
       |           |         |        |_ ...
       |           |         |
       |           |         |_{tl3a} - {fa}[fa]{filenames and other details}
       |           |
       |           |_{tl2a} - {fa}[fa]{filenames and other details}
       |
       |_ t{tl1a} -|_ {fa}[fa]{filenames and other details}
                   |_ ...
#+END_SRC

** authors
*** html_harvest_authors.rb

#+BEGIN_SRC ruby  :tangle "../lib/sisu/html_harvest_authors.rb"
# <<sisu_document_header>>
module SiSU_HarvestAuthors
  require_relative 'html_harvest_author_format'          # html_harvest_author_format.rb
  require_relative 'html_parts'                          # html_parts.rb
  class Songsheet
    @@the_idx_authors={}
    def initialize(opt,env)
      @opt,@env=opt,env
      @file_list=opt.files
    end
    def songsheet
      idx_array={}
      @opt.f_pths.each do |y|
        lang_hash_file_array={}
        name=y[:f]
        filename=y[:pth] + '/' + y[:f]
        File.open(filename,'r') do |file|
          file.each_line("\n\n") do |line|
            if line =~/^@(?:title|creator|date):(?:\s|$)/m
              lang_hash_file_array[y[:lng_is]] ||= []
              lang_hash_file_array[y[:lng_is]] << line
            elsif line =~/^@\S+?:(?:\s|$)/m \
            or line =~/^(?:\s*\n|%+ )/
            else break
            end
          end
        end
        lang_hash_file_array.each_pair do |lang,a|
          idx_array[lang] ||= []
          idx_array=SiSU_HarvestAuthors::Harvest.new(
            @opt,
            @env,
            a,
            filename,
            name,
            idx_array,
            lang
          ).extract_harvest
        end
      end
      the_idx=SiSU_HarvestAuthors::Index.new(
        idx_array,
        @@the_idx_authors
      ).construct_book_author_index
      SiSU_HarvestAuthors::OutputIndex.new(
        @opt,
        the_idx
      ).html_print.html_songsheet
    end
  end
  class Harvest
    def initialize(opt,env,data,filename,name,idx_array,lang)
      @opt, @env,@data,@filename,@name,@idx_array,@lang=
        opt,env, data, filename, name, idx_array, lang
    end
    def extract_harvest
      data,   filename, name, idx_array, lang =
        @data,@filename,@name,@idx_array,@lang
      @title=@subtitle=@fulltitle=@author=@author_format=@date=nil
      @authors=[]
      rgx={}
      rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m
      rgx[:title]=/^@title:[ ]+(.+)/
      rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m
      rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m
      data.each do |para|
        if para=~ rgx[:title]
          @title=rgx[:title].match(para)[1]
        end
        if para=~ rgx[:subtitle]
          @subtitle=rgx[:subtitle].match(para)[1]
        end
        if para=~ rgx[:author]
          @author_format=rgx[:author].match(para)[1]
        end
        if para=~ rgx[:date]
          @date=rgx[:date].match(para)[1]
        end
        break if @title && @subtitle && @author && @date
      end
      @fulltitle=@subtitle \
      ? (@title + ' - ' + @subtitle)
      : @title
      if @title \
      and @author_format
        creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details
        @authors,@authorship=creator[:authors],creator[:authorship]
        file=if name=~/~[a-z]{2,3}\.ss[mt]$/
          name.sub(/~[a-z]{2,3}\.ss[mt]$/,'')
        else
          name.sub(/\.ss[mt]$/,'')
        end
        page=if @env.output_dir_structure.by? == :language
          "#{lang}/sisu_manifest.html"
        else
          "sisu_manifest.#{lang}.html"
        end
        idx_array[lang] <<= {
          filename: filename,
          file: file,
          date: @date,
          title: @fulltitle,
          author: creator,
          page: page,
          lang: lang
        }
      else
        #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}"
      end
      idx_array[lang]=idx_array[lang].flatten
      idx_array
    end
  end
  class Index
    def initialize(idx_array,the_idx)
      @idx_array,@the_idx=idx_array,the_idx
      @@the_idx_authors=@the_idx
    end
    def capital(txt)
      txt[0].chr.capitalize + txt[1,txt.length]
    end
    def construct_book_author_index
      idx_array=@idx_array
      idx_array.each_pair do |lang,idx_arr|
        @@the_idx_authors[lang] ||= {}
        idx_arr.each do |idx|
          idx[:author][:last_first_format_a].each do |author|
            author=author.strip
            if @@the_idx_authors[lang][author].is_a?(NilClass)
              @@the_idx_authors[lang][author]={ md: [] }
            end
            @@the_idx_authors[lang][author][:md] << {
              filename: idx[:filename],
              file: idx[:file],
              author: idx[:author],
              title: idx[:title],
              date: idx[:date],
              page: idx[:page],
              lang: idx[:lang]
            }
          end
        end
      end
      @the_idx=@@the_idx_authors
    end
  end
  class OutputIndex
    require_relative 'i18n'                               # i18n.rb
    def initialize(opt,the_idx)
      @opt,@the_idx=opt,the_idx
      @env=SiSU_Env::InfoEnv.new
      @rc=SiSU_Env::GetInit.new.sisu_yaml.rc
      @alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z]
      @alph=@alphabet_list.dup
      @letter=@alph.shift
    end
    def html_file_open
      @the_idx.keys.each do |lng|
        @output ||={}
        @output[lng] ||={}
        harvest_pth,file='',''
        if @env.output_dir_structure.by? == :language
          harvest_pth=@env.path.webserv + '/' \
          + @opt.base_stub + '/' \
          + lng + '/' \
          + 'manifest'
          file="#{harvest_pth}/authors.html"
        elsif @env.output_dir_structure.by? == :filetype
          harvest_pth=@env.path.webserv + '/' \
          + @opt.base_stub + '/' \
          + 'manifest'
          file="#{harvest_pth}/authors.#{lng}.html"
        elsif @env.output_dir_structure.by? == :filename
          harvest_pth=@env.path.webserv + '/' \
          + @opt.base_stub
          file="#{harvest_pth}/authors.#{lng}.html"
        end
        FileUtils::mkdir_p(harvest_pth) \
          unless FileTest.directory?(harvest_pth)
        fileinfo=(@opt.act[:verbose][:set]==:on \
        || @opt.act[:verbose_plus][:set]==:on \
        || @opt.act[:urls_selected][:set]==:on \
        || @opt.act[:maintenance][:set]==:on) \
        ? ("file://#{file}") : ''
        SiSU_Screen::Ansi.new(
          @opt.act[:color_state][:set],
          "harvest authors (#{@opt.files.length} files)",
          fileinfo
        ).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on
        @output[lng][:html]=File.new(file,'w')
      end
    end
    def html_file_close
      @the_idx.keys.each do |lng|
        @output[lng][:html].close
        @output[lng][:html_mnt].close \
          if @output[lng][:html_mnt].is_a?(File)
      end
    end
    def html_print
      def html_songsheet
        html_file_open
        html_head
        html_alph
        html_body
        html_tail
        html_file_close
      end
      def html_head_adjust(lng,type='')
        css_path,topics='',''
        if @env.output_dir_structure.by? == :language
          css_path=(type !~/maintenance/) \
          ? '../../_sisu/css/harvest.css'
          : 'harvest.css'
          topics='topics.html'
        elsif @env.output_dir_structure.by? == :filetype
          css_path=(type !~/maintenance/) \
          ? '../_sisu/css/harvest.css'
          : 'harvest.css'
          topics="topics.#{lng}.html"
        elsif @env.output_dir_structure.by? == :filename
          css_path=(type !~/maintenance/) \
          ? './_sisu/css/harvest.css'
          : 'harvest.css'
          topics="topics.#{lng}.html"
        end
        ln=SiSU_i18n::Languages.new.language.list
        harvest_languages=''
        @the_idx.keys.each do |lg|
          if @env.output_dir_structure.by? == :language
            harvest_pth="../../#{lg}/manifest"
            file="#{harvest_pth}/authors.html"
          elsif @env.output_dir_structure.by? == :filetype
            harvest_pth='.'
            file="#{harvest_pth}/authors.#{lg}.html"
          elsif @env.output_dir_structure.by? == :filename
            harvest_pth='.'
            file="#{harvest_pth}/authors.#{lg}.html"
          end
          l=ln[lg][:t]
          harvest_languages +=
            %{<a href="#{file}">#{l}</a>&nbsp;&nbsp;&nbsp;}
        end
        sv=SiSU_Env::InfoVersion.instance.get_version
        if @env.output_dir_structure.by? == :language
          home_pth='../..'
          output_structure_by=
            '(output organised by language &amp; filetype)'
        elsif @env.output_dir_structure.by? == :filetype
          home_pth='..'
          output_structure_by=
            '(output organised by filetype)'
        elsif @env.output_dir_structure.by? == :filename
          home_pth='.'
          output_structure_by=
            '(output organised by filename)'
        else
          home_pth='.'
          output_structure_by='(output organised by ?)'
        end
        <<WOK
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>SiSU Metadata Harvest - Authors</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<meta name="dc.title" content= "SiSU metadata harvest, Authors - SiSU information Structuring Universe, Structured information Serialised Units" />
<meta name="dc.subject" content= "document structuring, ebook, publishing, PDF, LaTeX, XML, ODF, SQL, postgresql, sqlite, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, granular search, digital library" />
<meta name="generator" content="#{sv.project} #{sv.version} of #{sv.date_stamp} (n*x and Ruby!)" />
<link rel="generator" href="http://www.jus.uio.no/sisu/SiSU" />
<link href="#{css_path}" rel="stylesheet" >
<link rel="shortcut icon" href="../_sisu/image/rb7.ico" />
</head>
<body lang="en" xml:lang="en">
<a name="top" id="top"></a>
<a name="up" id="up"></a>
<a name="start" id="start"></a>
<h1>SiSU Metadata Harvest - Authors #{output_structure_by}</h1>
<p>[<a href="#{home_pth}/index.html">&nbsp;HOME&nbsp;</a>] also see <a href="#{topics}">SiSU Metadata Harvest - Topics</a></p>
<p>#{@env.widget_static.search_form}</p>
<hr />
<p class="tiny">#{harvest_languages}</p>
<hr />
WOK
      end
      def html_head
        @the_idx.keys.each do |lng|
          @output[lng][:html_mnt] \
          << html_head_adjust(lng,'maintenance') \
            if @opt.act[:maintenance][:set]==:on
          @output[lng][:html] \
          << html_head_adjust(lng)
        end
      end
      def html_alph
        a=[]
        a << '<p>'
        @alph.each do |x|
          a << ((x =~/[0-9]/) \
          ? ''
          : %{<a href="##{x}">#{x}</a>,&nbsp;})
        end
        a=a.join
        @the_idx.keys.each do |lng|
          @output[lng][:html_mnt] << a \
            if @opt.act[:maintenance][:set]==:on
          @output[lng][:html] << a
        end
      end
      def html_tail
        a =<<WOK
<hr />
<a name="bottom" id="bottom"></a>
<a name="down" id="down"></a>
<a name="end" id="end"></a>
<a name="finish" id="finish"></a>
<a name="stop" id="stop"></a>
<a name="credits"></a>
#{SiSU_Proj_HTML::Bits.new.credits_sisu}
</body>
</html>
WOK
        @the_idx.keys.each do |lng|
          @output[lng][:html_mnt] << a \
            if @output[lng][:html_mnt].is_a?(File)
          @output[lng][:html] << a
        end
      end
      def do_html(lng,html)
        @output[lng][:html_mnt] << html \
          if @output[lng][:html_mnt].is_a?(File)
        @output[lng][:html] << html
      end
      def do_string_name(lng,attrib,string)
        f=/^(\S)/.match(string[0])[1]
        if @lng != lng
          @alph=@alphabet_list.dup
          @letter=@alph.shift
          @lng = lng
        end
        if @letter < f
          while @letter < f
            if @alph.length > 0
              @letter=@alph.shift
              if @output[lng][:html_mnt].is_a?(File)
                @output[lng][:html_mnt] \
                << %{\n<p class="letter"><a name="#{@letter}"></p>#{@letter}</a><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>}
              end
              @output[lng][:html] \
              << %{\n<p class="letter"><a name="#{@letter}">#{@letter}</a></p><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>}
            else break
            end
          end
        end
      end
      def html_body
        the_idx=@the_idx
        the_idx.each_pair do |lng,lng_array|
          lng_array.sort.each do |a|
            do_string_name(lng,'',a)
            name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_')
            x = %{<p class="author"><a name="#{name}">#{a[0]}</a></p>}
            if @output[lng][:html_mnt].is_a?(File)
              @output[lng][:html_mnt] << x
            end
            @output[lng][:html] << x
            lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert
            works=[]
            a[1][:md].each do |i|
              manifest_at=if @env.output_dir_structure.by? == :language
                i[:file] + Sfx[:html]
              elsif @env.output_dir_structure.by? == :filetype
                i[:file] + lang_code_insert + Sfx[:html]
              elsif @env.output_dir_structure.by? == :filename
                './' + i[:file] + '/' + i[:page]
              else '' #error
              end
              work=[
                "#{i[:date]} #{i[:title]}",
                %{<p class="publication">#{i[:date]} <a href="#{manifest_at}">#{i[:title]}</a>, #{i[:author][:authors_s]}</p>}
              ]
              works<<=(@output[lng][:html_mnt].is_a?(File)) \
              ? (work.concat([%{<p class="publication">[<a href="#{i[:file]}.sst">src</a>]&nbsp;&nbsp;#{i[:date]} <a href="file://#{manifest_at}">#{i[:title]}</a>, #{i[:author][:authors_s]} -- [<a href="#{i[:file]}.sst">#{i[:file]}.sst</a>]</p>}]))
              : work
            end
            works.sort_by {|y| y[0]}.each do |z|
              @output[lng][:html] << z[1]
              @output[lng][:html_mnt] << z[2] \
                if @output[lng][:html_mnt].is_a?(File)
            end
          end
        end
      end
      self
    end
    def screen_print
      def cycle
        the_idx=@the_idx
        the_idx.sort.each do |a|
          puts a[0]
          a[1][:md].each do |x|
            puts "\t" + x[:file]
          end
        end
      end
      self
    end
  end
end
__END__
#+END_SRC

*** html_harvest_author_format.rb

#+BEGIN_SRC ruby  :tangle "../lib/sisu/html_harvest_author_format.rb"
# <<sisu_document_header>>
module SiSU_FormatAuthor
  class Author
    def initialize(author_param)
      @author_param=author_param
    end
    def author_details
      @authors,@author_array=[],[]
      authors=@author_param.scan(/[^;]+/)
      authors.each do |a|
        a=a.strip
        if a =~/"(.+?)"/
          @authors << { the: $1 }
          @author_array << $1.upcase
        else #if a =~/,/
          x=a.scan(/[^,]+/)
          x[0]=x[0].strip
          x[1]=x[1].strip if x[1]
          if x.length==1
            @authors << { the: x[0] }
            @author_array << x[0].upcase
          elsif x.length==2
            @authors << { the: x[0], others: x[1] }
            @author_array << "#{x[0].upcase}, #{x[1]}"
          else #p x.length
          end
        end
      end
      l = @authors.length
      authors_string=''
      @authors.each_with_index do |a,i|
        authors_string += if a[:others]
          if (l - i) > 1
            "#{a[:others]} #{a[:the]}, "
          else
            "#{a[:others]} #{a[:the]}"
          end
        else
          if (l - i) > 2
            "#{a[:the]}, "
          else
            "#{a[:the]}"
          end
        end
      end
      {
        last_first_a: authors,
        last_first_format_a: @author_array,
        authors_h: @authors,
        authors_s: authors_string,
        authors_param: @author_param
      }
    end
  end
end
__END__
#+END_SRC

* document header

#+NAME: sisu_document_header
#+BEGIN_SRC text
encoding: utf-8
- Name: SiSU

  - Description: documents, structuring, processing, publishing, search
    harvest

  - Author: Ralph Amissah
    <ralph.amissah@gmail.com>

  - Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
    2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2019,
    2020, 2021, Ralph Amissah,
    All Rights Reserved.

  - License: GPL 3 or later:

    SiSU, a framework for document structuring, publishing and search

    Copyright (C) Ralph Amissah

    This program is free software: you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by the Free
    Software Foundation, either version 3 of the License, or (at your option)
    any later version.

    This program is distributed in the hope that it will be useful, but WITHOUT
    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
    FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
    more details.

    You should have received a copy of the GNU General Public License along with
    this program. If not, see <http://www.gnu.org/licenses/>.

    If you have Internet connection, the latest version of the GPL should be
    available at these locations:
    <http://www.fsf.org/licensing/licenses/gpl.html>
    <http://www.gnu.org/licenses/gpl.html>

    <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>

  - SiSU uses:
    - Standard SiSU markup syntax,
    - Standard SiSU meta-markup syntax, and the
    - Standard SiSU object citation numbering and system

  - Homepages:
    <http://www.sisudoc.org>

  - Git
    <https://git.sisudoc.org/projects/>
    <https://git.sisudoc.org/projects/?p=software/sisu.git;a=summary>
    <https://git.sisudoc.org/projects/?p=markup/sisu-markup-samples.git;a=summary>
#+END_SRC