# encoding: utf-8
=begin

* Name: SiSU

** Description: documents, structuring, processing, publishing, search
*** system environment, resource control and configuration details

** Author: Ralph Amissah
  [ralph@amissah.com]
  [ralph.amissah@gmail.com]

** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
  2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Ralph Amissah,
  All Rights Reserved.

** License: GPL 3 or later:

  SiSU, a framework for document structuring, publishing and search

  Copyright (C) Ralph Amissah

  This program is free software: you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the Free
  Software Foundation, either version 3 of the License, or (at your option)
  any later version.

  This program is distributed in the hope that it will be useful, but WITHOUT
  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  more details.

  You should have received a copy of the GNU General Public License along with
  this program. If not, see [http://www.gnu.org/licenses/].

  If you have Internet connection, the latest version of the GPL should be
  available at these locations:
  [http://www.fsf.org/licensing/licenses/gpl.html]
  [http://www.gnu.org/licenses/gpl.html]

** SiSU uses:
  * Standard SiSU markup syntax,
  * Standard SiSU meta-markup syntax, and the
  * Standard SiSU object citation numbering and system

** Hompages:
  [http://www.jus.uio.no/sisu]
  [http://www.sisudoc.org]

** Git
  [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary]
  [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/ao_appendices.rb;hb=HEAD]

=end
module SiSU_AO_Appendices
  class Glossary
    def initialize(md,data)
      @md,@data=md,data
    end
    def glossary_extraction
      glossary=[]
      glossaryflag=false
      code_flag=false
      flag_code_curly=:not_code_curly
      flag_code_tics=:not_code_tics
      @data=@data.select do |t_o|
        if t_o =~/^code\{/
          flag_code_curly=:code_curly
        elsif t_o =~/^\}code/
          flag_code_curly=:not_code_curly
        elsif t_o =~/^``` code/
          flag_code_tics=:code_tics
        elsif flag_code_tics ==:code_tics \
        and t_o =~/^```/
          flag_code_tics=:not_code_tics
        end
        code_flag=if flag_code_curly==:code_curly \
        or flag_code_tics==:code_tics
          true
        else false
        end
        unless code_flag
          if @md.flag_glossary
            if t_o =~/^1~!glossary/
              glossaryflag = true
              next
            elsif t_o =~/^:?[B-D]~/
              next
            elsif t_o =~/^:?[B-D1]~/
              glossaryflag = false
              t_o
            elsif glossaryflag
              if t_o !~/\A%+ /
                glossary << t_o
                next
              else
                t_o
              end
            else t_o
            end
          else t_o
          end
        else t_o
        end
      end.compact
      [@data,glossary]
    end
  end
  class Bibliography
    def initialize(md,data)
      @md,@data=md,data
    end
    def sort_bibliography_array_by_deemed_author_year_title(bib)
      if bib
        bib.compact.sort_by do |c|
          [c[:deemed_author],c[:ymd],c[:title]]
        end
      end
    end
    def citation_in_prepared_bibliography(cite)
      @cite=cite
      def generic
        {
           is:         nil, # :book, :article, :magazine, :newspaper, :blog, :other
           author_raw: nil,
           author:     nil,
           author_arr: nil,
           editor_raw: nil,
           editor:     nil,
           editor_arr: nil,
           title:      nil,
           subtitle:   nil,
           fulltitle:  nil,
           language:   nil,
           trans:      nil,
           src:        nil,
           journal:    nil,
           in:         nil,
           volume:     nil,
           edition:    nil,
           year:       nil,
           place:      nil,
           publisher:  nil,
           url:        nil,
           pages:      nil,
           note:       nil,
          #format:     nil, #consider list of fields arranged with markup
           short_name: nil,
           id:         nil,
        }
      end
      def citation_metadata
        type=:generic
        if type
          citation=generic
          citeblock=@cite.split("\n")
          citeblock.select do |meta|
            case meta
            when /^((?:au|author):\s+)\S+/ #req
              citation[:author_raw]=/^#{$1}(.+)/.match(meta)[1]
            when /^((?:ti|title):\s+)\S+/ #req
              citation[:title]=/^#{$1}(.+)/.match(meta)[1]
            when /^((?:st|subtitle):\s+)\S+/
              citation[:subtitle]=/^#{$1}(.+)/.match(meta)[1]
            when /^((?:lng|language):\s+)\S+/
              citation[:language]=/^#{$1}(.+)/.match(meta)[1]
            when /^((?:edr?|editor):\s+)\S+/
              citation[:editor_raw]=/^#{$1}(.+)/.match(meta)[1]
            when /^((?:tr|trans(:?lator)?):\s+)\S+/
              citation[:editor_raw]=/^#{$1}(.+)/.match(meta)[1]
            when /^((?:pb|publisher):\s+)\S+/
              citation[:publisher]=/^#{$1}(.+)/.match(meta)[1]
            when /^((?:edn|edition):\s+)\S+/
              citation[:edition]=/^#{$1}(.+)/.match(meta)[1]
            when /^((?:yr|year):\s+)\S+/ #req?
              citation[:year]=/^#{$1}(.+)/.match(meta)[1]
            when /^((?:pl|publisher_state):\s+)\S+/
              citation[:place]=/^#{$1}(.+)/.match(meta)[1]
            when /^((?:jo|journal):\s+)\S+/ #req?
              citation[:journal]=/^#{$1}(.+)/.match(meta)[1]
            when /^((?:vol?|volume):\s+)\S+/
              citation[:volume]=/^#{$1}(.+)/.match(meta)[1]
            when /^((?:in):\s+)\S+/
              citation[:in]=/^#{$1}(.+)/.match(meta)[1]
            when /^((?:src):\s+)\S+/
              citation[:src]=/^#{$1}(.+)/.match(meta)[1]
            when /^((?:pg|pages?):\s+)\S+/
              citation[:pages]=/^#{$1}(.+)/.match(meta)[1]
            when /^(url:\s+)\S+/
              citation[:url]=/^#{$1}(.+)/.match(meta)[1]
            when /^(note:\s+)\S+/
              citation[:note]=/^#{$1}(.+)/.match(meta)[1]
            when /^((?:sn|shortname):\s+)\S+/ # substitution: (/#{id}/,"#{sn}")
              citation[:short_name]=/^#{$1}(.+)/.match(meta)[1]
            when /^(id:\s+)\S+/               # substitution: (/#{id}/,"#{sn}")
              citation[:id]=/^#{$1}(.+)/.match(meta)[1]
            end
          end
          if citation[:subtitle]
            citation[:fulltitle] = citation[:title] \
            + ' - ' \
            + citation[:subtitle]
          else
            citation[:fulltitle] = citation[:title]
          end
          if citation[:author_raw]
            citation[:author_arr]=citation[:author_raw].split(/;\s*/)
            citation[:author]=citation[:author_arr].map do |author|
              author.gsub(/(.+?),\s+(.+)/,'\2 \1').strip
            end.join(', ').strip
          end
          if citation[:editor_raw]
            citation[:editor_arr]=citation[:editor_raw].split(/;\s*/)
            citation[:editor]=citation[:editor_arr].map do |editor|
              editor.gsub(/(.+?),\s+(.+)/,'\2 \1').strip
            end.join(', ').strip
          end
          citation[:ymd]=if not citation[:year] =~/^[0-9]{4}/
            '9999'
          else citation[:year]
          end
          citation[:deemed_author]=if not citation[:author_raw] \
          and citation[:editor_raw]
            citation[:editor_arr][0]
          elsif citation[:author_raw]
            citation[:author_arr][0]
          else
            SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia).
              warn('Citation needs an author or editor, title: "' \
              + citation[:title] + '"')
            '000'
          end
          unless citation[:short_name]
            citation[:short_name]=%{#{citation[:author]}, "#{citation[:title]}" (#{citation[:date]})}
          end
        end
        citation
      end
      self
    end
    def biblio_format
      def generic(c)
        cite=%{#{c[:author]}. /{"#{c[:fulltitle]}".}/}
        cite=(c[:journal]) \
        ? cite + %{ #{c[:journal]},}
        : cite
        cite=(c[:source]) \
        ? cite + %{ #{c[:source]},}
        : cite
        cite=(c[:in]) \
        ? cite + %{ in #{c[:in]},}
        : cite
        cite=(c[:volume]) \
        ? cite + %{ #{c[:volume]},}
        : cite
        cite=(c[:trans]) \
        ? cite + %{ trans. #{c[:trans]},}
        : cite
        cite=(c[:editor]) \
        ? cite + %{ ed. #{c[:editor]},}
        : cite
        cite=(c[:place]) \
        ? cite + %{ #{c[:place]},}
        : cite
        cite=(c[:publisher]) \
        ? cite + %{ #{c[:publisher]},}
        : cite
        cite=(c[:year]) \
        ? cite + %{ (#{c[:year]})}
        : cite
        cite=(c[:pages]) \
        ? cite + %{ #{c[:pages]}}
        : cite
        cite=(c[:url]) \
        ? cite + %{ #{c[:url]}}
        : cite
        cite=(c[:note]) \
        ? cite + %{ #{c[:note]}}
        : cite
        cite
      end
      def generic_editor(c)
        cite=%{#{c[:editor]} ed. /{"#{c[:fulltitle]}".}/}
        cite=(c[:journal]) \
        ? cite + %{ #{c[:journal]}, }
        : cite
        cite=(c[:source]) \
        ? cite + %{ #{c[:source]}, }
        : cite
        cite=(c[:in]) \
        ? cite + %{ in #{c[:in]},}
        : cite
        cite=(c[:volume]) \
        ? cite + %{ #{c[:volume]},}
        : cite
        cite=(c[:trans]) \
        ? cite + %{ trans. #{c[:trans]},}
        : cite
        cite=(c[:place]) \
        ? cite + %{ #{c[:place]},}
        : cite
        cite=(c[:publisher]) \
        ? cite + %{ #{c[:publisher]}}
        : cite
        cite=(c[:year]) \
        ? cite + %{ (#{c[:year]})}
        : cite
        cite=(c[:pages]) \
        ? cite + %{ #{c[:pages]}}
        : cite
        cite=(c[:url]) \
        ? cite + %{ #{c[:url]}}
        : cite
        cite=(c[:note]) \
        ? cite + %{ #{c[:note]}}
        : cite
        cite
      end
      self
    end
    def biblio_make(cite)
      if cite[:author]
        biblio_format.generic(cite)
      elsif cite[:editor]
        biblio_format.generic_editor(cite)
      else
        biblio_format.generic(cite)
      end
    end
    def biblio_extraction
      bibliography=[]
      biblioflag=false
      code_flag=false
      flag_code_curly=:not_code_curly
      flag_code_tics=:not_code_tics
      @data=@data.select do |t_o|
        if t_o =~/^code\{/
          flag_code_curly=:code_curly
        elsif t_o =~/^\}code/
          flag_code_curly=:not_code_curly
        elsif t_o =~/^``` code/
          flag_code_tics=:code_tics
        elsif flag_code_tics ==:code_tics \
        and t_o =~/^```/
          flag_code_tics=:not_code_tics
        end
        code_flag=if flag_code_curly==:code_curly \
        or flag_code_tics==:code_tics
          true
        else false
        end
        unless code_flag
          if @md.flag_auto_biblio
            if t_o =~/^1~!biblio(?:graphy)?/
              biblioflag = true
              t_o
            elsif t_o =~/^:?[B-D1]~/
              biblioflag = false
              t_o
            elsif biblioflag
              if t_o !~/\A%+ /
                bibliography << citation_in_prepared_bibliography(t_o).citation_metadata
                next
              else
                t_o
              end
            else t_o
            end
          elsif @md.flag_biblio
            if t_o =~/^1~!biblio(?:graphy)?/
              biblioflag = true
              next
            elsif t_o =~/^:?[B-D]~/
              next
            elsif t_o =~/^:?[B-D1]~/
              biblioflag = false
              t_o
            elsif biblioflag
              if t_o !~/\A%+ /
                bibliography << t_o
                next
              else
                t_o
              end
            else t_o
            end
          else t_o
          end
        else t_o
        end
      end.compact
      if @md.flag_auto_biblio \
      and bibliography.length > 0
        data_new=[]
        bib=sort_bibliography_array_by_deemed_author_year_title(bibliography)
        biblio_done=[]
        @data.select do |t_o|
          if t_o =~/^1~!biblio(?:graphy)?/
            bib.each do |c|
              d=c
              d.store(:obj, biblio_make(c))
              biblio_done << d
              #biblio_done << { obj: biblio_make(c), id: c[:id] }
            end
          else data_new << t_o
          end
        end
        @data=data_new
      end
      [@data,biblio_done]
    end
  end
  class Citations
    def initialize(md='',data='')
      @md,@data=md,data
      #@biblio=[]
    end
    def songsheet
      tuned_file,citations=citations_scan(@data)
      [tuned_file,citations]
    end
    def sort_bibliography_array_by_author_year(bib)
      bib.sort_by do |c|
        [c[:author_raw],c[:year]]
        #[c[:author_arr][0],c[:year],c[:title]]
      end
    end
    def citations_regex
      def pages_pattern
        %r{(?:[,.:]?\s+(?:p{1,2}\.?\s+)?(?:\d+--?\d+)[,.]?\s+)?}
      end
      def editor_pattern
        %r{(?<editor>(?:editor|edited by)\s+.+?)}
      end
      def year_pattern
        %r{[(\[]?(?<year>\d{4})[\])]?[.,]?}
      end
      def authors_year_title_publication_editor_pages
        /(?<authors>.+?)\s+#{year_pattern}\s+"(?<title>.+?)"\s+(?:#{Mx[:fa_italics_o]}|#{Mx[:srcrgx_italics_o]})(?<publication>.+?)(?:#{Mx[:fa_italics_c]}|#{Mx[:srcrgx_italics_c]})\s+#{editor_pattern}#{pages_pattern}/m # note ed. is usually edition rather than editor
      end
      def authors_title_publication_year_editor_pages
        /(?<authors>.+?)\s+"(?<title>.+?)"\s+(?:#{Mx[:fa_italics_o]}|#{Mx[:srcrgx_italics_o]})(?<publication>.+?)(?:#{Mx[:fa_italics_c]}|#{Mx[:srcrgx_italics_c]})\s+#{year_pattern}\s+#{editor_pattern}#{pages_pattern}/m # note ed. is usually edition rather than editor
      end
      def authors_title_publication_editor_year_pages ###
        /(?<authors>.+?)\s+"(?<title>.+?)"\s+(?:#{Mx[:fa_italics_o]}|#{Mx[:srcrgx_italics_o]})(?<publication>.+?)(?:#{Mx[:fa_italics_c]}|#{Mx[:srcrgx_italics_c]})\s+ed.\s+#{editor_pattern}#{year_pattern}#{pages_pattern}/m
 # note ed. is usually edition rather than editor
      end
      def authors_title_publication_editor_pages_year ###
        /(?<authors>.+?)\s+"(?<title>.+?)"\s+(?:#{Mx[:fa_italics_o]}|#{Mx[:srcrgx_italics_o]})(?<publication>.+?)(?:#{Mx[:fa_italics_c]}|#{Mx[:srcrgx_italics_c]})\s+#{editor_pattern}#{pages_pattern}#{year_pattern}/m # note ed. is usually edition rather than editor
      end
      def authors_year_title_publication_pages
        /(?<authors>.+?)\s+#{year_pattern}\s+"(?<title>.+?)"\s+(?:#{Mx[:fa_italics_o]}|#{Mx[:srcrgx_italics_o]})(?<publication>.+?)(?:#{Mx[:fa_italics_c]}|#{Mx[:srcrgx_italics_c]})[,.;]?#{pages_pattern}/m
      end
      def authors_title_publication_year_pages
        /(?<authors>.+?)\s+"(?<title>.+?)"\s+(?:#{Mx[:fa_italics_o]}|#{Mx[:srcrgx_italics_o]})(?<publication>.+?)(?:#{Mx[:fa_italics_c]}|#{Mx[:srcrgx_italics_c]})\s+#{year_pattern}\s+#{pages_pattern}/m
      end
      def authors_title_publication_pages_year ###
        /(?<authors>.+?)\s+"(?<title>.+?)"\s+(?:#{Mx[:fa_italics_o]}|#{Mx[:srcrgx_italics_o]})(?<publication>.+?)(?:#{Mx[:fa_italics_c]}|#{Mx[:srcrgx_italics_c]})#{pages_pattern}#{year_pattern}/m
      end
      def authors_year_publication_pages
        /(?<authors>.+?)\s+#{year_pattern}\s+(?:#{Mx[:fa_italics_o]}|#{Mx[:srcrgx_italics_o]})(?<publication>.+?)(?:#{Mx[:fa_italics_c]}|#{Mx[:srcrgx_italics_c]})#{pages_pattern}/m
      end
      def authors_publication_year_pages
        /(?<authors>.+?)\s+(?:#{Mx[:fa_italics_o]}|#{Mx[:srcrgx_italics_o]})(?<publication>.+?)(?:#{Mx[:fa_italics_c]}|#{Mx[:srcrgx_italics_c]})[,.;]?\s+(?<publisher>.+?)?#{year_pattern}#{pages_pattern}[.;]?/m
      end
      self
    end
    def authors?(citations)
      citations.each.map do |b|
        if b =~ /^.+\s+::.+?:$/
          c=/^(?<citation>.+?)\s+::(?<shortref>.+?):$/.match(b)
          {
            citation: c[:citation],
            shortref: c[:shortref],
            c[:shortref].to_s => c[:citation]
          }
        else { citation: b }
        end
      end
    end
    def long_and_short_ref?(citations) #could be useful, keep ... ectract shortref
      citations.each.map do |b|
        if b =~ /^.+\s+::.+?:$/
          c=/^(?<citation>.+?)\s+::(?<shortref>.+?):$/.match(b)
          {
            citation: c[:citation],
            shortref: c[:shortref],
            c[:shortref].to_s => c[:citation]
          }
        else { citation: b }
        end
      end
    end
    def citation_detail(citations) #could be useful, keep ... extract shortref
      bibahash=[]
      number=0
      missed=0
      citations.select do |b|
        z=if b =~citations_regex.authors_year_title_publication_editor_pages
          c=citations_regex.authors_year_title_publication_editor_pages.match(b)
          {
            is: :article,
            author_raw: c[:authors],
            year: c[:year],
            title: c[:title],
            publication: c[:publication],
            editor: c[:editor],
          }
        elsif b =~citations_regex.authors_title_publication_year_editor_pages
          c=citations_regex.authors_title_publication_year_editor_pages.match(b)
          {
            is: :article,
            author_raw: c[:authors],
            year: c[:year],
            title: c[:title],
            publication: c[:publication],
            editor: c[:editor],
          }
        elsif b =~citations_regex.authors_title_publication_editor_year_pages
          c=citations_regex.authors_title_publication_editor_year_pages.match(b)
          {
            is: :article,
            author_raw: c[:authors],
            year: c[:year],
            title: c[:title],
            publication: c[:publication],
            editor: c[:editor],
          }
        elsif b =~citations_regex.authors_title_publication_editor_pages_year
          c=citations_regex.authors_title_publication_editor_pages_year.match(b)
          {
            is: :article,
            author_raw: c[:authors],
            year: c[:year],
            title: c[:title],
            publication: c[:publication],
            editor: c[:editor],
          }
        elsif b =~citations_regex.authors_year_title_publication_pages
          c=citations_regex.authors_year_title_publication_pages.match(b)
          {
            is: :article,
            author_raw: c[:authors],
            year: c[:year],
            title: c[:title],
            publication: c[:publication],
          }
        elsif b =~citations_regex.authors_title_publication_year_pages
          c=citations_regex.authors_title_publication_year_pages.match(b)
          {
            is: :article,
            author_raw: c[:authors],
            year: c[:year],
            title: c[:title],
            publication: c[:publication],
          }
        elsif b =~citations_regex.authors_year_publication_pages
          c=citations_regex.authors_year_publication_pages.match(b)
          {
            is: :book,
            author_raw: c[:authors],
            year: c[:year],
            publication: c[:publication],
          }
        elsif b =~citations_regex.authors_publication_year_pages
          c=citations_regex.authors_publication_year_pages.match(b)
          {
            is: :book,
            author_raw: c[:authors],
            year: c[:year],
            publication: c[:publication],
          }
        else b
        end
        if not z.is_a?(NilClass) \
        and z.is_a?(Hash) \
        and z[:author_raw].length > 0
          z[:author_arr]=z[:author_raw].split(/;\s*/)
          z[:author]=z[:author_arr].map do |author|
            author.gsub(/(.+?),\s+(.+)/,'\2 \1').strip
          end.join(', ').strip
          if @md.opt.act[:verbose_plus][:set]==:on \
          || @md.opt.act[:maintenance][:set]==:on
            number +=1 if z.is_a?(Hash)
            missed +=1 if z.is_a?(String)
            (z.is_a?(Hash)) \
            ? (p '[' + number.to_s + '] ' + z.to_s)
            : (p '<' + missed.to_s + '> ' + z.to_s)
          end
        end
        bibahash << z if z.is_a?(Hash)
      end
      bibahash=sort_bibliography_array_by_author_year(bibahash.compact)
      bibahash
    end
    def citations_scan(data)
      citations=[]
      #short_ref=[]
      tuned_file = data.compact.select do |dob|
        if dob.is !=:meta \
        && dob.is !=:comment \
        && dob.is !=:code \
        && dob.is !=:table
          if dob.obj =~/\.:.+?:\./
            citations << dob.obj.scan(/\.:\s*(.+?)\s*:\./m)
            #short_ref << dob.obj.scan(/\.:\s+(.+?)\s+::([^:]+)::\./m) #look at later
            ##short_ref << dob.obj.scan(/\.:\s+(.+?)\s+::(.+?)::\./m) #look at later
            #short_ref << dob.obj.scan(/\.:\s*(.+?)\s*(::(.+?):)?:\./m) #look at later
            citations=citations.flatten.compact
            dob.obj=dob.obj.   #remove citations delimiter & helpers from text
              gsub(/\.:|:\./,'')
          end
        end
        dob if dob.is_a?(Object)
      end
      #bib=long_and_short_ref?(citations) #could be useful, keep ... extract shortref
      citations=citation_detail(citations)
      [tuned_file,citations]
    end
  end
end
__END__