-*- mode: org -*- #+TITLE: sisu harvest #+DESCRIPTION: documents - structuring, various output representations & search #+FILETAGS: :sisu:harvest: #+AUTHOR: Ralph Amissah #+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] #+COPYRIGHT: Copyright (C) 2015 - 2021 Ralph Amissah #+LANGUAGE: en #+STARTUP: content hideblocks hidestars noindent entitiespretty #+OPTIONS: H:3 num:nil toc:t \n:nil @:t ::t |:t ^:nil _:nil -:t f:t *:t <:t #+PROPERTY: header-args :exports code #+PROPERTY: header-args+ :noweb yes #+PROPERTY: header-args+ :eval no #+PROPERTY: header-args+ :results no #+PROPERTY: header-args+ :cache no #+PROPERTY: header-args+ :padline no * harvest ** html_harvest.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/html_harvest.rb" # <<sisu_document_header>> module SiSU_Harvest @@the_idx_topics,@@the_idx_authors={},{} class Source require_relative 'hub_options' # hub_options.rb require_relative 'html_harvest_topics' # html_harvest_topics.rb require_relative 'html_harvest_authors' # html_harvest_authors.rb require_relative 'se' # se.rb include SiSU_Env def initialize(opt) @opt=opt @env=SiSU_Env::InfoEnv.new end def read begin harvest_pth=@env.path.webserv + '/' + @opt.base_stub FileUtils::mkdir_p(harvest_pth) unless FileTest.directory?(harvest_pth) cases(@opt,@env) rescue ensure SiSU_Env::CreateSite.new(@opt).cp_css end end def help puts <<WOK harvest --harvest extracts document index metadata WOK end def css(opt) require_relative 'css' # css.rb css=SiSU_Style::CSS.new fn_css=SiSU_Env::CSS_Default.new style=File.new("#{@env.path.pwd}/#{fn_css.harvest}",'w') style << css.harvest style.close end def cases(opt,env) case opt.selections.str.inspect when/--harvest/i css(opt) if @opt.act[:maintenance][:set]==:on SiSU_HarvestAuthors::Songsheet.new(opt,env).songsheet SiSU_HarvestTopics::Songsheet.new(opt,env).songsheet if @opt.act[:rsync][:set]==:on require_relative 'remote' # remote.rb SiSU_Remote::Put.new(opt).rsync_harvest end else help end end end end #+END_SRC ** topics *** html_harvest_topics.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/html_harvest_topics.rb" # <<sisu_document_header>> module SiSU_HarvestTopics require_relative 'html_harvest_author_format' # html_harvest_author_format.rb require_relative 'html_parts' # html_parts.rb class Songsheet @@the_idx_topics={} def initialize(opt,env) @opt,@env=opt,env @file_list=opt.files end def songsheet idx_array={} @opt.f_pths.each do |y| lang_hash_file_array={} name=y[:f] filename=y[:pth] + '/' + y[:f] File.open(filename,'r') do |file| file.each_line("\n\n") do |line| if line =~/^@(?:title|creator|classify):(?:\s|$)/m lang_hash_file_array[y[:lng_is]] ||= [] lang_hash_file_array[y[:lng_is]] << line elsif line =~/^@\S+?:(?:\s|$)/m \ or line =~/^(?:\s*\n|\s*$|%+ )/ else break end end end lang_hash_file_array.each_pair do |lang,a| idx_array[lang] ||=[] idx_array=SiSU_HarvestTopics::Harvest.new( @opt, @env, a, filename, name, idx_array, lang ).extract_harvest end end the_hash=SiSU_HarvestTopics::Index.new( @opt, @env, idx_array, @@the_idx_topics ).song SiSU_HarvestTopics::OutputIndex.new( @opt, the_hash ).html_print.html_songsheet end end class Mix def spaces Ax[:spaces] end end class Harvest def initialize(opt,env,data,filename,name,idx_array,lang) @opt, @env,@data,@filename,@name,@idx_array,@lang= opt,env, data, filename, name, idx_array, lang end def extract_harvest data, filename, name, idx_array, lang= @data,@filename,@name,@idx_array,@lang @idx_lst=@title=@subtitle=@fulltitle=@author=@author_format=nil rgx={} rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m rgx[:title]=/^@title:[ ]+(.+)/ rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m rgx[:idx]=/^@classify:.+?:topic_register:[ ]+(.+?)(?:\n\n|\n\s+:\S|\n%)/m data.each do |para| if para=~ rgx[:idx] @idx_list=(rgx[:idx].match(para)[1]).split(/\s*\n\s*/).join end if para=~ rgx[:title] @title=rgx[:title].match(para)[1] end if para=~ rgx[:subtitle] @subtitle=rgx[:subtitle].match(para)[1] end if para=~ rgx[:author] @author_format=rgx[:author].match(para)[1] end break if @title && @subtitle && @author && @idx_lst end @fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title if @title \ and @author_format \ and @idx_list creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details @authors,@authorship=creator[:authors],creator[:authorship] file=if name=~/~[a-z]{2,3}\.ss[mt]$/ name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') else name.sub(/\.ss[mt]$/,'') end page=if @env.output_dir_structure.by? == :language "#{lang}/sisu_manifest.html" else "sisu_manifest.#{lang}.html" end idx_array[lang] <<=if @idx_list =~/;/ g=@idx_list.scan(/[^;]+/) g.each.map do |i| i=i.strip { filename: filename, file: file, rough_idx: i, title: @fulltitle, author: creator, page: page, lang: lang } end else { filename: filename, file: file, rough_idx: @idx_list, title: @fulltitle, author: creator, page: page, lang: lang, } end else if (@opt.act[:verbose_plus][:set]==:on \ || @opt.act[:maintenance][:set]==:on) p "missing required field in #{@filename} - [title]: <#{@title}>; [author]: <#{@author_format}>; [idx]: <#{@idx_list}>" end end idx_array[lang]=idx_array[lang].flatten idx_array end end class Index < Mix def initialize(opt,env,idx_array,the_idx) @opt, @env,@idx_array,@the_idx= opt,env, idx_array, the_idx @@the_idx_topics=@the_idx end def song the_idx=construct_book_topic_keys construct_book_topic_hash(the_idx) end def capital(txt) txt_a=txt.scan(/\S+/) tx='' txt_a.each do |t| tx += t[0].chr.capitalize + t[1,txt.length] + ' ' end tx.strip end def capital_(txt) txt[0].chr.capitalize + txt[1,txt.length] end def contents(idx,lang) names='' idx[:author][:last_first_format_a].each do |n| s=n.sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') names=if @env.output_dir_structure.by? == :language names += %{<a href="authors.html##{s}">#{n}</a>, } else names += %{<a href="authors.#{lang}.html##{s}">#{n}</a>, } end end { filename: idx[:filename], file: idx[:file], author: names, title: idx[:title], page: idx[:page] } end def capital_(txt) txt[0].chr.capitalize + txt[1,txt.length] end def key_create(c,alt) x=nil x=if c.length==6 c[0].to_s + '|' + capital(c[1][0].to_s) + '|' + capital(c[2][0].to_s) + '|' + capital(c[3][0].to_s) + '|' + capital(alt.to_s) elsif c.length==5 c[0].to_s + '|' + capital(c[1][0].to_s) + '|' + capital(c[2][0].to_s) + '|' + capital(alt.to_s) elsif c.length==4 c[0].to_s + '|' + capital(c[1][0].to_s) + '|' + capital(alt.to_s) elsif c.length==3 c[0].to_s + '|' + capital(alt.to_s) end end def construct_book_topic_keys idx_array=@idx_array @idx_a=[] @the_a=[] idx_array.each_pair do |lang,idx_arr| @@the_idx_topics[lang] ||= {} idx_arr.each do |idx| if idx[:rough_idx] idx_lst=idx[:rough_idx].scan(/[^:]+/) else puts "no topic register in: << #{idx[:filename]} >>" next end idx_a=[] idx_lst.each do |c| idx_a << c.scan(/[^|\n]+/m) end idx_a << contents(idx,lang) @idx_a << [lang] + idx_a end end @idx_a.each do |c| if c.length > 1 \ and c.is_a?(Array) if c[2].is_a?(Hash) c[1].each do |alt| v=key_create(c,alt) @the_a << [v, c[2]] if v end end end if c.length > 2 \ and c.is_a?(Array) if c[3].is_a?(Hash) c[2].each do |alt| v=key_create(c,alt) @the_a << [v, c[3]] if v end end end if c.length > 3 \ and c.is_a?(Array) if c[4].is_a?(Hash) c[3].each do |alt| v=key_create(c,alt) @the_a << [v, c[4]] if v end end end if c.length > 4 \ and c.is_a?(Array) if c[5].is_a?(Hash) c[4].each do |alt| v=key_create(c,alt) @the_a << [v, c[5]] if v end end end if c.length > 5 \ and c.is_a?(Array) if c[6].is_a?(Hash) c[5].each do |alt| v=key_create(c,alt) @the_a << [v, c[6]] if v end end end end @the_a.sort_by { |x| x[0] } #; y.each {|z| puts z} end def construct_book_topic_hash(t) @the_h={} t.each do |z| x=z[0].scan(/[^|]+/) depth=x.length extract=(depth-1) k=case extract when 4 { x[0] => { x[1] => { x[2] => { x[3] => { x[4] => z[1] } } } } } when 3 { x[0] => { x[1] => { x[2] => { x[3] => z[1] } } } } when 2 { x[0] => { x[1] => { x[2] => z[1] } } } when 1 { x[0] => { x[1] => z[1] } } when 0 { x[0] => z[1] } end if extract >= 0 k.each_pair do |x0,y0| if extract == 0 @the_h[x0] ||={ md: [] } @the_h[x0][:md] << y0 else @the_h[x0] ||={} end #puts spaces*0 + x0 if extract >= 1 y0.each_pair do |x1,y1| if extract == 1 @the_h[x0][x1] ||={ md: [] } @the_h[x0][x1][:md] << y1 else @the_h[x0][x1] ||={} end #puts spaces*1 + x1 if extract >= 2 y1.each_pair do |x2,y2| if extract == 2 @the_h[x0][x1][x2] ||={ md: [] } @the_h[x0][x1][x2][:md] << y2 else @the_h[x0][x1][x2] ||={} end #puts spaces*2 + x2 if extract >= 3 y2.each_pair do |x3,y3| if extract == 3 @the_h[x0][x1][x2][x3] ||={ md: [] } @the_h[x0][x1][x2][x3][:md] << y3 else @the_h[x0][x1][x2][x3] ||={} end #puts spaces*3 + x3 if extract == 4 y3.each_pair do |x4,y4| if extract == 4 @the_h[x0][x1][x2][x3][x4] ||={ md: [] } @the_h[x0][x1][x2][x3][x4][:md] << y4 else @the_h[x0][x1][x2][x3][x4] ||={} end #puts spaces*4 + x4 if extract == 5 y4.each_pair do |x5,y5| if extract == 5 @the_h[x0][x1][x2][x3][x4][x5] ||={ md: [] } @the_h[x0][x1][x2][x3][x4][x5][:md] << y5 end #puts spaces*5 + x5 end end end end end end end end end end end end end #@the_h.each_pair { |x,y| p x; p y } @the_h end def traverse_base @the_h.each_pair do |x0,y0| puts spaces*0 + x0 if x0.is_a?(String) if y0.is_a?(Hash) y0.each_pair do |x1,y1| puts spaces*1 + x1 if x1.is_a?(String) if y1.is_a?(Hash) y1.each_pair do |x2,y2| puts spaces*2 + x2 if x2.is_a?(String) if y2.is_a?(Hash) y2.each_pair do |x3,y3| puts spaces*3 + x3 if x3.is_a?(String) if y3.is_a?(Hash) y3.each_pair do |x4,y4| puts spaces*4 + x4 if x4.is_a?(String) if y4.is_a?(Hash) y4.each_pair do |x5,y5| puts spaces*5 + x5 if x5.is_a?(String) end end end end end end end end end end end end def traverse @the_h.each_pair do |x0,y0| puts spaces*0 + x0 if x0.is_a?(String) if y0.is_a?(Hash) if y0.has_key?(:md) y0[:md].each { |x| puts spaces*5 + x[:title] } end y0.each_pair do |x1,y1| puts spaces*1 + x1 if x1.is_a?(String) if y1.is_a?(Hash) if y1.has_key?(:md) y1[:md].each { |x| puts spaces*5 + x[:title] } end y1.each_pair do |x2,y2| puts spaces*2 + x2 if x2.is_a?(String) if y2.is_a?(Hash) if y2.has_key?(:md) y2[:md].each { |x| puts spaces*5 + x[:title] } end y2.each_pair do |x3,y3| puts spaces*3 + x3 if x3.is_a?(String) if y3.is_a?(Hash) if y3.has_key?(:md) y3[:md].each { |x| puts spaces*5 + x[:title] } end y3.each_pair do |x4,y4| puts spaces*4 + x4 if x4.is_a?(String) if y4.is_a?(Hash) if y4.has_key?(:md) y4[:md].each { |x| puts spaces*5 + x[:title] } end y4.each_pair do |x5,y5| puts spaces*5 + x4 if x4.is_a?(String) end end end end end end end end end end end end end class OutputIndex < Mix require_relative 'i18n' # i18n.rb def initialize(opt,the_idx) @opt,@the_idx=opt,the_idx @env=SiSU_Env::InfoEnv.new @rc=SiSU_Env::GetInit.new.sisu_yaml.rc @alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] @alph=@alphabet_list.dup @letter=@alph.shift end def html_file_open @the_idx.keys.each do |lng| @output ||={} @output[lng] ||={} harvest_pth,file='','' if @env.output_dir_structure.by? == :language harvest_pth=@env.path.webserv + '/' \ + @opt.base_stub + '/' \ + lng + '/' \ + 'manifest' file=harvest_pth + '/' + 'topics.html' elsif @env.output_dir_structure.by? == :filetype harvest_pth=@env.path.webserv + '/' \ + @opt.base_stub + '/' \ + 'manifest' file=harvest_pth + '/' + 'topics.' + lng + '.html' elsif @env.output_dir_structure.by? == :filename harvest_pth=@env.path.webserv + '/' \ + @opt.base_stub file=harvest_pth + '/' + 'topics.' + lng + '.html' end FileUtils::mkdir_p(harvest_pth) \ unless FileTest.directory?(harvest_pth) fileinfo=(@opt.act[:verbose][:set]==:on \ || @opt.act[:verbose_plus][:set]==:on \ || @opt.act[:urls_selected][:set]==:on \ || @opt.act[:maintenance][:set]==:on) \ ? ("file://#{file}") : '' SiSU_Screen::Ansi.new( @opt.act[:color_state][:set], "harvest topics(#{@opt.files.length} files)", fileinfo ).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on @output[lng][:html]=File.new(file,'w') if @opt.act[:maintenance][:set]==:on @output[lng][:html_mnt]=File.new("#{@env.path.pwd}/topics.html",'w') end end end def html_file_close @the_idx.keys.each do |lng| @output[lng][:html].close @output[lng][:html_mnt].close if @output[lng][:html_mnt].is_a?(File) end end def html_print def html_songsheet #traverse html_file_open html_head html_alph html_body_traverse html_tail html_file_close end def html_body_traverse @the_idx.each_pair do |x0,y0| lng=x0 if x0.is_a?(String) #do_string_name(lng,'lev0',x0) #puts spaces*0 + x0 end if y0.is_a?(Hash) if y0.has_key?(:md) y0[:md].each do |x| #do_hash(lng,attrib,x) #lv==0 ? #puts spaces*5 + x[:title] end end y0.each_pair do |x1,y1| if x1.is_a?(String) do_string_name(lng,'lev0',x1) #puts spaces*1 + x1 end if y1.is_a?(Hash) if y1.has_key?(:md) y1[:md].each do |x| do_hash(lng,0,x) #puts spaces*5 + x[:title] end end y1.each_pair do |x2,y2| if x2.is_a?(String) do_string(lng,'lev1',x2) #puts spaces*2 + x2 end if y2.is_a?(Hash) if y2.has_key?(:md) y2[:md].each do |x| do_hash(lng,1,x) #puts spaces*5 + x[:title] end end y2.each_pair do |x3,y3| if x3.is_a?(String) do_string(lng,'lev2',x3) #puts spaces*3 + x3 end if y3.is_a?(Hash) if y3.has_key?(:md) y3[:md].each do |x| do_hash(lng,2,x) #puts spaces*5 + x[:title] end end y3.each_pair do |x4,y4| if x4.is_a?(String) do_string(lng,'lev3',x4) #puts spaces*4 + x4 end if y4.is_a?(Hash) if y4.has_key?(:md) y4[:md].each do |x| do_hash(lng,3,x) #puts spaces*5 + x[:title] end end y4.each_pair do |x5,y5| if x5.is_a?(String) do_string(lng,'lev4',x5) #puts spaces*5 + x5 end end end end end end end end end end end end end def html_head_adjust(lng,type='') css_path,authors='','' if @env.output_dir_structure.by? == :language css_path=(type !~/maintenance/) \ ? '../../_sisu/css/harvest.css' : 'harvest.css' authors='authors.html' elsif @env.output_dir_structure.by? == :filetype css_path=(type !~/maintenance/) \ ? '../_sisu/css/harvest.css' : 'harvest.css' authors="authors.#{lng}.html" elsif @env.output_dir_structure.by? == :filename css_path=(type !~/maintenance/) \ ? './_sisu/css/harvest.css' : 'harvest.css' authors="authors.#{lng}.html" end ln=SiSU_i18n::Languages.new.language.list harvest_languages='' @the_idx.keys.each do |lg| if @env.output_dir_structure.by? == :language harvest_pth="../../#{lg}/manifest" file=harvest_pth + '/' + 'topics.html' elsif @env.output_dir_structure.by? == :filetype harvest_pth='.' file=harvest_pth + '/' + 'topics.' + lg + '.html' elsif @env.output_dir_structure.by? == :filename harvest_pth='.' file=harvest_pth + '/topics.' + lg + '.html' end l=ln[lg][:t] harvest_languages += %{<a href="#{file}">#{l}</a> } end sv=SiSU_Env::InfoVersion.instance.get_version if @env.output_dir_structure.by? == :language home_pth='../..' output_structure_by='(output organised by language & filetype)' elsif @env.output_dir_structure.by? == :filetype home_pth='..' output_structure_by='(output organised by filetype)' elsif @env.output_dir_structure.by? == :filename home_pth='.' output_structure_by='(output organised by filename)' else home_pth='.' output_structure_by='(output organised by ?)' end <<WOK <!DOCTYPE html> <html> <head> <meta charset="utf-8"> <title>SiSU Metadata Harvest - Topics</title> <meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> <meta name="dc.title" content= "SiSU metadata harvest, Topics - SiSU information Structuring Universe, Structured information Serialised Units" /> <meta name="dc.subject" content= "document structuring, ebook, publishing, PDF, LaTeX, XML, ODF, SQL, postgresql, sqlite, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, granular search, digital library" /> <meta name="generator" content="#{sv.project} #{sv.version} of #{sv.date_stamp} (n*x and Ruby!)" /> <link rel="generator" href="http://www.jus.uio.no/sisu/SiSU" /> <link href="#{css_path}" rel="stylesheet"> <link rel="shortcut icon" href="../_sisu/image/rb7.ico" /> </head> <body lang="en" xml:lang="en"> <a name="top" id="top"></a> <a name="up" id="up"></a> <a name="start" id="start"></a> <h1>SiSU Metadata Harvest - Topics #{output_structure_by}</h1> <p>[<a href="#{home_pth}/index.html"> HOME </a>] also see <a href="#{authors}">SiSU Metadata Harvest - Authors</a></p> <p>#{@env.widget_static.search_form}</p> <hr /> <p class="tiny">#{harvest_languages}</p> <hr /> WOK end def html_head @the_idx.keys.each do |lng| @output[lng][:html_mnt] \ << html_head_adjust(lng,'maintenance') \ if @opt.act[:maintenance][:set]==:on @output[lng][:html] << html_head_adjust(lng) end end def html_alph a=[] a << '<p>' @alph.each do |x| a << ((x =~/[0-9]/) \ ? '' : %{<a href="##{x}">#{x}</a>, }) end a=a.join @the_idx.keys.each do |lng| @output[lng][:html_mnt] << a \ if @opt.act[:maintenance][:set]==:on @output[lng][:html] << a end end def html_tail a =<<WOK <hr /> <a name="bottom" id="bottom"></a> <a name="down" id="down"></a> <a name="end" id="end"></a> <a name="finish" id="finish"></a> <a name="stop" id="stop"></a> <a name="credits"></a> #{SiSU_Proj_HTML::Bits.new.credits_sisu} </body> </html> WOK @the_idx.keys.each do |lng| @output[lng][:html_mnt] << a \ if @output[lng][:html_mnt].is_a?(File) @output[lng][:html] << a end end def do_html(lng,html) @output[lng][:html] << html end def do_html_maintenance(lng,html) @output[lng][:html_mnt] << html \ if @output[lng][:html_mnt].is_a?(File) end def do_string(lng,attrib,string) html=%{<p class="#{attrib}">#{string}</p>} do_html(lng,html) do_html_maintenance(lng,html) \ if @output[lng][:html_mnt].is_a?(File) end def do_string_default(lng,attrib,string) html=%{<p class="#{attrib}">#{string}</p>} do_html(lng,html) end def do_string_maintenance(lng,attrib,string) html=%{<p class="#{attrib}">#{string}</p>} do_html_maintenance(lng,html) \ if @output[lng][:html_mnt].is_a?(File) end def do_string_name(lng,attrib,string) f=/^(\S)/.match(string)[1] if @lng != lng @alph=@alphabet_list.dup @letter=@alph.shift @lng = lng end if @letter < f while @letter < f if @alph.length > 0 @letter=@alph.shift if @output[lng][:html_mnt].is_a?(File) @output[lng][:html_mnt] \ << %{\n<p class="letter"><a name="#{@letter}">#{@letter}</a></p><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>} end @output[lng][:html] \ << %{\n<p class="letter"><a name="#{@letter}">#{@letter}</a></p><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>} else break end end end name=string.strip.gsub(/\s+/,'_') html=%{<p class="#{attrib}"><a name="#{name}">#{string}</a></p>} do_html(lng,html) do_html_maintenance(lng,html) \ if @output[lng][:html_mnt].is_a?(File) end def do_array(lng,lv,array) lv+=1 array.each do |b| do_case(lng,lv,b) end end def do_hash_md(lng,attrib,hash) lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert manifest_at=if @env.output_dir_structure.by? == :language hash[:file] + Sfx[:html] elsif @env.output_dir_structure.by? == :filetype hash[:file] + lang_code_insert + Sfx[:html] elsif @env.output_dir_structure.by? == :filename "./#{hash[:file]}/#{hash[:page]}" else '' #error end html=%{<a href="#{manifest_at}">#{hash[:title]}</a> - #{hash[:author]}} do_string_default(lng,attrib,html) end def do_hash_md_maintenance(lng,attrib,hash) if @output[lng][:html_mnt].is_a?(File) #should not be run for presentation output html=%{[<a href="#{hash[:file]}.sst">src</a>] <a href="file://#{@env.path.output}/#{hash[:file]}/#{hash[:page]}">#{hash[:title]}</a> - #{hash[:author]}} do_string_maintenance(lng,attrib,html) end end def do_hash(lng,lv,hash) lv+=1 key=[] hash.each_key do |m| if m == :md do_case(lng,lv,hash[m]) elsif m != :title \ and m != :author \ and m != :filename \ and m != :file \ and m != :rough_idx \ and m != :page key << m elsif m == :title do_hash_md(lng,'work',hash) do_hash_md_maintenance(lng,'work',hash) end end if key.length > 0 key.sort.each do |m| attrib="lev#{lv}" lv==0 ? do_string_name(lng,attrib,m) : do_string(lng,attrib,m) do_case(lng,lv,hash[m]) end end end def do_case(lng,lv,a) case a when String attrib="lev#{lv}" if a=~/S/ lv==0 ? do_string_name(lng,attrib,a) : do_string(lng,attrib,a) end when Array do_array(lng,lv,a) when Hash do_hash(lng,lv,a) end end #def html_body # the_idx=@the_idx # the_idx.each_pair do |lng,lng_array| # lng_array.sort.each do |a| # do_case(lng,-1,a) # end # end #end self end end end __END__ terms -|_ t{tl1} -|_ {fa}[fa]{filenames and other details} | |_ {tl2} -|_ {fa}[fa]{filenames and other details} | | |_{tl3} -|_ {fa}[fa]{filenames and other details} | | | |_{tl4} - {fa}[fa]{filenames and other details} | | | | | | | |_{tl4a} - {fa}[fa]{filenames and other details} | | | | | | | |_{tl4b} - {fa}[fa]{filenames and other details} | | | | | | | |_ ... | | | | | |_{tl3a} - {fa}[fa]{filenames and other details} | | | |_{tl2a} - {fa}[fa]{filenames and other details} | |_ t{tl1a} -|_ {fa}[fa]{filenames and other details} |_ ... #+END_SRC ** authors *** html_harvest_authors.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/html_harvest_authors.rb" # <<sisu_document_header>> module SiSU_HarvestAuthors require_relative 'html_harvest_author_format' # html_harvest_author_format.rb require_relative 'html_parts' # html_parts.rb class Songsheet @@the_idx_authors={} def initialize(opt,env) @opt,@env=opt,env @file_list=opt.files end def songsheet idx_array={} @opt.f_pths.each do |y| lang_hash_file_array={} name=y[:f] filename=y[:pth] + '/' + y[:f] File.open(filename,'r') do |file| file.each_line("\n\n") do |line| if line =~/^@(?:title|creator|date):(?:\s|$)/m lang_hash_file_array[y[:lng_is]] ||= [] lang_hash_file_array[y[:lng_is]] << line elsif line =~/^@\S+?:(?:\s|$)/m \ or line =~/^(?:\s*\n|%+ )/ else break end end end lang_hash_file_array.each_pair do |lang,a| idx_array[lang] ||= [] idx_array=SiSU_HarvestAuthors::Harvest.new( @opt, @env, a, filename, name, idx_array, lang ).extract_harvest end end the_idx=SiSU_HarvestAuthors::Index.new( idx_array, @@the_idx_authors ).construct_book_author_index SiSU_HarvestAuthors::OutputIndex.new( @opt, the_idx ).html_print.html_songsheet end end class Harvest def initialize(opt,env,data,filename,name,idx_array,lang) @opt, @env,@data,@filename,@name,@idx_array,@lang= opt,env, data, filename, name, idx_array, lang end def extract_harvest data, filename, name, idx_array, lang = @data,@filename,@name,@idx_array,@lang @title=@subtitle=@fulltitle=@author=@author_format=@date=nil @authors=[] rgx={} rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m rgx[:title]=/^@title:[ ]+(.+)/ rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m data.each do |para| if para=~ rgx[:title] @title=rgx[:title].match(para)[1] end if para=~ rgx[:subtitle] @subtitle=rgx[:subtitle].match(para)[1] end if para=~ rgx[:author] @author_format=rgx[:author].match(para)[1] end if para=~ rgx[:date] @date=rgx[:date].match(para)[1] end break if @title && @subtitle && @author && @date end @fulltitle=@subtitle \ ? (@title + ' - ' + @subtitle) : @title if @title \ and @author_format creator=SiSU_FormatAuthor::Author.new(@author_format.strip).author_details @authors,@authorship=creator[:authors],creator[:authorship] file=if name=~/~[a-z]{2,3}\.ss[mt]$/ name.sub(/~[a-z]{2,3}\.ss[mt]$/,'') else name.sub(/\.ss[mt]$/,'') end page=if @env.output_dir_structure.by? == :language "#{lang}/sisu_manifest.html" else "sisu_manifest.#{lang}.html" end idx_array[lang] <<= { filename: filename, file: file, date: @date, title: @fulltitle, author: creator, page: page, lang: lang } else #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}" end idx_array[lang]=idx_array[lang].flatten idx_array end end class Index def initialize(idx_array,the_idx) @idx_array,@the_idx=idx_array,the_idx @@the_idx_authors=@the_idx end def capital(txt) txt[0].chr.capitalize + txt[1,txt.length] end def construct_book_author_index idx_array=@idx_array idx_array.each_pair do |lang,idx_arr| @@the_idx_authors[lang] ||= {} idx_arr.each do |idx| idx[:author][:last_first_format_a].each do |author| author=author.strip if @@the_idx_authors[lang][author].is_a?(NilClass) @@the_idx_authors[lang][author]={ md: [] } end @@the_idx_authors[lang][author][:md] << { filename: idx[:filename], file: idx[:file], author: idx[:author], title: idx[:title], date: idx[:date], page: idx[:page], lang: idx[:lang] } end end end @the_idx=@@the_idx_authors end end class OutputIndex require_relative 'i18n' # i18n.rb def initialize(opt,the_idx) @opt,@the_idx=opt,the_idx @env=SiSU_Env::InfoEnv.new @rc=SiSU_Env::GetInit.new.sisu_yaml.rc @alphabet_list=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] @alph=@alphabet_list.dup @letter=@alph.shift end def html_file_open @the_idx.keys.each do |lng| @output ||={} @output[lng] ||={} harvest_pth,file='','' if @env.output_dir_structure.by? == :language harvest_pth=@env.path.webserv + '/' \ + @opt.base_stub + '/' \ + lng + '/' \ + 'manifest' file="#{harvest_pth}/authors.html" elsif @env.output_dir_structure.by? == :filetype harvest_pth=@env.path.webserv + '/' \ + @opt.base_stub + '/' \ + 'manifest' file="#{harvest_pth}/authors.#{lng}.html" elsif @env.output_dir_structure.by? == :filename harvest_pth=@env.path.webserv + '/' \ + @opt.base_stub file="#{harvest_pth}/authors.#{lng}.html" end FileUtils::mkdir_p(harvest_pth) \ unless FileTest.directory?(harvest_pth) fileinfo=(@opt.act[:verbose][:set]==:on \ || @opt.act[:verbose_plus][:set]==:on \ || @opt.act[:urls_selected][:set]==:on \ || @opt.act[:maintenance][:set]==:on) \ ? ("file://#{file}") : '' SiSU_Screen::Ansi.new( @opt.act[:color_state][:set], "harvest authors (#{@opt.files.length} files)", fileinfo ).dark_grey_title_hi unless @opt.act[:quiet][:set]==:on @output[lng][:html]=File.new(file,'w') end end def html_file_close @the_idx.keys.each do |lng| @output[lng][:html].close @output[lng][:html_mnt].close \ if @output[lng][:html_mnt].is_a?(File) end end def html_print def html_songsheet html_file_open html_head html_alph html_body html_tail html_file_close end def html_head_adjust(lng,type='') css_path,topics='','' if @env.output_dir_structure.by? == :language css_path=(type !~/maintenance/) \ ? '../../_sisu/css/harvest.css' : 'harvest.css' topics='topics.html' elsif @env.output_dir_structure.by? == :filetype css_path=(type !~/maintenance/) \ ? '../_sisu/css/harvest.css' : 'harvest.css' topics="topics.#{lng}.html" elsif @env.output_dir_structure.by? == :filename css_path=(type !~/maintenance/) \ ? './_sisu/css/harvest.css' : 'harvest.css' topics="topics.#{lng}.html" end ln=SiSU_i18n::Languages.new.language.list harvest_languages='' @the_idx.keys.each do |lg| if @env.output_dir_structure.by? == :language harvest_pth="../../#{lg}/manifest" file="#{harvest_pth}/authors.html" elsif @env.output_dir_structure.by? == :filetype harvest_pth='.' file="#{harvest_pth}/authors.#{lg}.html" elsif @env.output_dir_structure.by? == :filename harvest_pth='.' file="#{harvest_pth}/authors.#{lg}.html" end l=ln[lg][:t] harvest_languages += %{<a href="#{file}">#{l}</a> } end sv=SiSU_Env::InfoVersion.instance.get_version if @env.output_dir_structure.by? == :language home_pth='../..' output_structure_by= '(output organised by language & filetype)' elsif @env.output_dir_structure.by? == :filetype home_pth='..' output_structure_by= '(output organised by filetype)' elsif @env.output_dir_structure.by? == :filename home_pth='.' output_structure_by= '(output organised by filename)' else home_pth='.' output_structure_by='(output organised by ?)' end <<WOK <!DOCTYPE html> <html> <head> <meta charset="utf-8"> <title>SiSU Metadata Harvest - Authors</title> <meta http-equiv="Content-Type" content="text/html;charset=utf-8" /> <meta name="dc.title" content= "SiSU metadata harvest, Authors - SiSU information Structuring Universe, Structured information Serialised Units" /> <meta name="dc.subject" content= "document structuring, ebook, publishing, PDF, LaTeX, XML, ODF, SQL, postgresql, sqlite, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, granular search, digital library" /> <meta name="generator" content="#{sv.project} #{sv.version} of #{sv.date_stamp} (n*x and Ruby!)" /> <link rel="generator" href="http://www.jus.uio.no/sisu/SiSU" /> <link href="#{css_path}" rel="stylesheet" > <link rel="shortcut icon" href="../_sisu/image/rb7.ico" /> </head> <body lang="en" xml:lang="en"> <a name="top" id="top"></a> <a name="up" id="up"></a> <a name="start" id="start"></a> <h1>SiSU Metadata Harvest - Authors #{output_structure_by}</h1> <p>[<a href="#{home_pth}/index.html"> HOME </a>] also see <a href="#{topics}">SiSU Metadata Harvest - Topics</a></p> <p>#{@env.widget_static.search_form}</p> <hr /> <p class="tiny">#{harvest_languages}</p> <hr /> WOK end def html_head @the_idx.keys.each do |lng| @output[lng][:html_mnt] \ << html_head_adjust(lng,'maintenance') \ if @opt.act[:maintenance][:set]==:on @output[lng][:html] \ << html_head_adjust(lng) end end def html_alph a=[] a << '<p>' @alph.each do |x| a << ((x =~/[0-9]/) \ ? '' : %{<a href="##{x}">#{x}</a>, }) end a=a.join @the_idx.keys.each do |lng| @output[lng][:html_mnt] << a \ if @opt.act[:maintenance][:set]==:on @output[lng][:html] << a end end def html_tail a =<<WOK <hr /> <a name="bottom" id="bottom"></a> <a name="down" id="down"></a> <a name="end" id="end"></a> <a name="finish" id="finish"></a> <a name="stop" id="stop"></a> <a name="credits"></a> #{SiSU_Proj_HTML::Bits.new.credits_sisu} </body> </html> WOK @the_idx.keys.each do |lng| @output[lng][:html_mnt] << a \ if @output[lng][:html_mnt].is_a?(File) @output[lng][:html] << a end end def do_html(lng,html) @output[lng][:html_mnt] << html \ if @output[lng][:html_mnt].is_a?(File) @output[lng][:html] << html end def do_string_name(lng,attrib,string) f=/^(\S)/.match(string[0])[1] if @lng != lng @alph=@alphabet_list.dup @letter=@alph.shift @lng = lng end if @letter < f while @letter < f if @alph.length > 0 @letter=@alph.shift if @output[lng][:html_mnt].is_a?(File) @output[lng][:html_mnt] \ << %{\n<p class="letter"><a name="#{@letter}"></p>#{@letter}</a><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>} end @output[lng][:html] \ << %{\n<p class="letter"><a name="#{@letter}">#{@letter}</a></p><p class="book_index_lev1"><a name="#{@letter.downcase}"></a></p>} else break end end end end def html_body the_idx=@the_idx the_idx.each_pair do |lng,lng_array| lng_array.sort.each do |a| do_string_name(lng,'',a) name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') x = %{<p class="author"><a name="#{name}">#{a[0]}</a></p>} if @output[lng][:html_mnt].is_a?(File) @output[lng][:html_mnt] << x end @output[lng][:html] << x lang_code_insert=SiSU_Env::FilenameLanguageCodeInsert.new(@opt,lng).language_code_insert works=[] a[1][:md].each do |i| manifest_at=if @env.output_dir_structure.by? == :language i[:file] + Sfx[:html] elsif @env.output_dir_structure.by? == :filetype i[:file] + lang_code_insert + Sfx[:html] elsif @env.output_dir_structure.by? == :filename './' + i[:file] + '/' + i[:page] else '' #error end work=[ "#{i[:date]} #{i[:title]}", %{<p class="publication">#{i[:date]} <a href="#{manifest_at}">#{i[:title]}</a>, #{i[:author][:authors_s]}</p>} ] works<<=(@output[lng][:html_mnt].is_a?(File)) \ ? (work.concat([%{<p class="publication">[<a href="#{i[:file]}.sst">src</a>] #{i[:date]} <a href="file://#{manifest_at}">#{i[:title]}</a>, #{i[:author][:authors_s]} -- [<a href="#{i[:file]}.sst">#{i[:file]}.sst</a>]</p>}])) : work end works.sort_by {|y| y[0]}.each do |z| @output[lng][:html] << z[1] @output[lng][:html_mnt] << z[2] \ if @output[lng][:html_mnt].is_a?(File) end end end end self end def screen_print def cycle the_idx=@the_idx the_idx.sort.each do |a| puts a[0] a[1][:md].each do |x| puts "\t" + x[:file] end end end self end end end __END__ #+END_SRC *** html_harvest_author_format.rb #+BEGIN_SRC ruby :tangle "../lib/sisu/html_harvest_author_format.rb" # <<sisu_document_header>> module SiSU_FormatAuthor class Author def initialize(author_param) @author_param=author_param end def author_details @authors,@author_array=[],[] authors=@author_param.scan(/[^;]+/) authors.each do |a| a=a.strip if a =~/"(.+?)"/ @authors << { the: $1 } @author_array << $1.upcase else #if a =~/,/ x=a.scan(/[^,]+/) x[0]=x[0].strip x[1]=x[1].strip if x[1] if x.length==1 @authors << { the: x[0] } @author_array << x[0].upcase elsif x.length==2 @authors << { the: x[0], others: x[1] } @author_array << "#{x[0].upcase}, #{x[1]}" else #p x.length end end end l = @authors.length authors_string='' @authors.each_with_index do |a,i| authors_string += if a[:others] if (l - i) > 1 "#{a[:others]} #{a[:the]}, " else "#{a[:others]} #{a[:the]}" end else if (l - i) > 2 "#{a[:the]}, " else "#{a[:the]}" end end end { last_first_a: authors, last_first_format_a: @author_array, authors_h: @authors, authors_s: authors_string, authors_param: @author_param } end end end __END__ #+END_SRC * document header #+NAME: sisu_document_header #+BEGIN_SRC text encoding: utf-8 - Name: SiSU - Description: documents, structuring, processing, publishing, search harvest - Author: Ralph Amissah <ralph.amissah@gmail.com> - Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2019, 2020, 2021, Ralph Amissah, All Rights Reserved. - License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. If you have Internet connection, the latest version of the GPL should be available at these locations: <http://www.fsf.org/licensing/licenses/gpl.html> <http://www.gnu.org/licenses/gpl.html> <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html> - SiSU uses: - Standard SiSU markup syntax, - Standard SiSU meta-markup syntax, and the - Standard SiSU object citation numbering and system - Homepages: <http://www.sisudoc.org> - Git <https://git.sisudoc.org/projects/> <https://git.sisudoc.org/projects/?p=software/sisu.git;a=summary> <https://git.sisudoc.org/projects/?p=markup/sisu-markup-samples.git;a=summary> #+END_SRC