# encoding: utf-8
=begin

* Name: SiSU

** Description: documents, structuring, processing, publishing, search
*** system environment, resource control and configuration details

** Author: Ralph Amissah
  <ralph@amissah.com>
  <ralph.amissah@gmail.com>

** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
  2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Ralph Amissah,
  All Rights Reserved.

** License: GPL 3 or later:

  SiSU, a framework for document structuring, publishing and search

  Copyright (C) Ralph Amissah

  This program is free software: you can redistribute it and/or modify it
  under the terms of the GNU General Public License as published by the Free
  Software Foundation, either version 3 of the License, or (at your option)
  any later version.

  This program is distributed in the hope that it will be useful, but WITHOUT
  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  more details.

  You should have received a copy of the GNU General Public License along with
  this program. If not, see <http://www.gnu.org/licenses/>.

  If you have Internet connection, the latest version of the GPL should be
  available at these locations:
  <http://www.fsf.org/licensing/licenses/gpl.html>
  <http://www.gnu.org/licenses/gpl.html>

  <http://www.sisudoc.org/sisu/en/manifest/gpl.fsf.html>

** SiSU uses:
  * Standard SiSU markup syntax,
  * Standard SiSU meta-markup syntax, and the
  * Standard SiSU object citation numbering and system

** Hompages:
  <http://www.jus.uio.no/sisu>
  <http://www.sisudoc.org>

** Git
  <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary>
  <http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/current/shared_markup_alt.rb;hb=HEAD>

=end
module SiSU_TextRepresentation
  class Alter
    def initialize(x)
      if x.is_a?(String)
        @t_o,@s=nil,x
      else
        @t_o,@s=x,x.obj.dup
      end
    end
    def strip_clean_of_extra_spaces                                              # dal output tuned
      @s=@s.dup
      @s=@s.gsub(/[ ]+([,.;:?](?:$|\s))/,'\1') unless @s =~/#{Mx[:en_a_o]}|#{Mx[:en_b_o]}/
      @s=@s.gsub(/ [ ]+/,' ').
        gsub(/^ [ ]+/,'').
        gsub(/ [ ]+$/,'').
        gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2').
        gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2')
    end
    def strip_clean_of_markup                                                  # text form used in sql db search, used for digest, define rules, make same as in db clean
      @s=@s.dup                                                                  #% same as db clean -->
      @s=@s.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1').
        gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1').
        gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1').
        gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1').
        gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1').
        gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1').
        gsub(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]').
        gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1').
        gsub(/#{Mx[:fa_hilite_o]}(.+?)#{Mx[:fa_hilite_c]}/,'\1').
        gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~').
        gsub(/#{Mx[:en_a_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_a_c]}/,''). # endnote removed
        gsub(/#{Mx[:en_b_o]}([\d*+]+)\s+(?:.+?)#{Mx[:en_b_c]}/,''). # endnote removed
        gsub(/(?:#{Mx[:nbsp]})+/,' ').
        gsub(/(?:#{Mx[:br_nl]})+/,"\n").
        gsub(/(?:#{Mx[:br_paragraph]})+/,"\n").
        gsub(/(?:#{Mx[:br_line]})+/,"\n").
        gsub(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<').
        gsub(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'>').
        gsub(/#{Mx[:gl_o]}#(?:038|amp)#{Mx[:gl_c]}/,'&').
        gsub(/#{Mx[:gl_o]}#033#{Mx[:gl_c]}/,'!').
        gsub(/#{Mx[:gl_o]}#035#{Mx[:gl_c]}/,'#').
        gsub(/#{Mx[:gl_o]}#042#{Mx[:gl_c]}/,'*').
        gsub(/#{Mx[:gl_o]}#045#{Mx[:gl_c]}/,'-').
        gsub(/#{Mx[:gl_o]}#047#{Mx[:gl_c]}/,'/').
        gsub(/#{Mx[:gl_o]}#095#{Mx[:gl_c]}/,'_').
        gsub(/#{Mx[:gl_o]}#123#{Mx[:gl_c]}/,'{').
        gsub(/#{Mx[:gl_o]}#125#{Mx[:gl_c]}/,'}').
        gsub(/#{Mx[:gl_o]}#126#{Mx[:gl_c]}/,'~').
        gsub(/#{Mx[:gl_o]}#169#{Mx[:gl_c]}/,'©').
        gsub(/\s\s+/,' ').
        gsub(/\s\s+/,' ').
        strip
    end
    def semi_revert_markup                                             # used for digest, define rules, make same as in db clean
      if @t_o
        @s=@s.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'*{\1}*').
          gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'/{\1}/').
          gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'_{\1}_').
          gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"{\1}"').
          gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+{\1}+').
          gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strke_c]}/,'-{\1}-').
          gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'^{\1}^').
          gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,',{\1},').
          gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~').
          gsub(/#{Mx[:en_a_o]}([\d*+]+\s+.+?)#{Mx[:en_a_c]}/,'~{\1}~'). # endnote marker marked up
          gsub(/#{Mx[:en_b_o]}([\d*+]+\s+.+?)#{Mx[:en_b_c]}/,'~[\1]~') # endnote marker marked up
        if @t_o.is==:heading \
        || @t_o.is==:para
          @s=@s.gsub(/ [ ]+/,' ')
          @s=@s.gsub(/(?:#{Mx[:nbsp]})+/,' ')
          if @t_o.is==:heading
            @s=@t_o.lv + '~ ' + @s
          end
          if @t_o.is==:para
            if @t_o.bullet_
              @s='_* ' + @s
            end
            if @t_o.indent.to_i > 0
              @s="_#{@t_o.indent} " + @s
              @s=@s.gsub(/^(_[1-9])\s_\*\s/,'\1* ')
            end
          end
        end
        if @t_o.is==:block \
        || @t_o.is==:group \
        || @t_o.is==:code
          @s=@s.gsub(/#{Mx[:nbsp]}/,' ')
          @s="#{@t_o.is.to_s}{\n\n#{@s}\n\n}#{@t_o.is.to_s}"
          @s=@s.gsub(/(?:#{Mx[:br_nl]}|\n)+/m,"\n\n")
        end
        #dealing with poem and verse calls for change in dal, where start and end verse of poem are marked as such
        @s=@s.strip
      end
      @s
    end
    def html_lite #test whether eventually can be used in db_import replacing shared_html_lite (search for SiSU_FormatShared)
      if @t_o
        @s=@s.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'<b>\1</b>').
          gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'<i>\1</i>').
          gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'<u>\1</u>').
          gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'"\1"').
          gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'+{\1}+').
          gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strke_c]}/,'-{\1}-').
          gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'<sup>\1</sup>').
          gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'<sub>\1</sub>').
          gsub(/#{Mx[:gl_o]}#(?:126|152)#{Mx[:gl_c]}/i,'~')
        if @t_o.is !=:code
          if @s =~/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/
            wm=@s.scan(/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)|\S+/)
            words=urls(wm)
            @s=@s.gsub(/.+/m,words)
          end
          @s=@s.gsub(/#{Mx[:gl_o]}(#[0-9]{3})#{Mx[:gl_c]}/u,'&\1;').
            gsub(/#{Mx[:gl_o]}#([a-z]{2,4})#{Mx[:gl_c]}/u,'&\1;').
            gsub(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'<a href="\1" target="_top">\1</a>'). #http ftp matches escaped, no decoration
            gsub(/(#{Mx[:lnk_c]})#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,'\1<a href="\2" target="_top">\2</a>\3'). #special case \{ e.g. \}http://url
            gsub(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,%{#{@url_brace.xml_open}<a href="\\1" target="_top">\\1</a>#{@url_brace.xml_close}}) #http ftp matches with decoration
        else
          @s=@s.gsub(/</m,'&lt;').gsub(/>/m,'&gt;')
        end
        if @t_o.is==:paragraph
          if @t_o.bullet_
            @s=@s
          end
          if @t_o.indent > 0
            @s=@s
          end
        end
        if @t_o.is==:heading
          @s=@s
        end
      else
        p __FILE__ << ':' << __LINE__.to_s
      end
      @s
    end
  end
  class ModifiedTextPlusHashDigest
    def initialize(md,x)
      @md=md
      if x.is_a?(String)
        @t_o,@s=nil,x
      else
        @t_o,@s=x,x.obj.dup
      end
      @env ||=SiSU_Env::InfoEnv.new(@md.fns)
      @sha_ = @env.digest(@md.opt).type
      begin
        case @sha_
        when :sha512
          require 'digest/sha2'
        when :sha256
          require 'digest/sha2'
        when :md5
          require 'digest/md5'
        end
      rescue LoadError
        SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia).error((@sha_ ? 'digest/sha2' : 'digest/md5') + ' NOT FOUND')
      end
    end
    def digest(txt)
      d=nil
      case @sha_
      when :sha512
        for hash_class in [ Digest::SHA512 ]
          d=hash_class.hexdigest(txt)
        end
      when :sha256
        for hash_class in [ Digest::SHA256 ]
          d=hash_class.hexdigest(txt)
        end
      when :md5
        for hash_class in [ Digest::MD5 ]
          d=hash_class.hexdigest(txt)
        end
      end
      d
    end
    def strip_clean_of_markup
      def txt
        SiSU_TextRepresentation::Alter.new(@s).strip_clean_of_markup
      end
      def dgst
        txt_dgst=digest(txt)
        { txt: txt, dgst_txt: txt_dgst }
      end
      self
    end
    def semi_revert_markup
      def txt
        SiSU_TextRepresentation::Alter.new(@s).semi_revert_markup
      end
      def dgst
        txt_dgst=digest(txt)
        { txt: txt, dgst_txt: txt_dgst }
      end
      self
    end
    def composite
      def stripped_clean(txt)
        SiSU_TextRepresentation::Alter.new(txt).strip_clean_of_markup
      end
      def markup_reverted(txt)
        SiSU_TextRepresentation::Alter.new(txt).semi_revert_markup
      end
      def images(imgs)
        sys=SiSU_Env::SystemCall.new
        line_image=[]
        if imgs and imgs.length > 0
           @image_name,@image_dgst,@img=[],[],[]
           imgs.each do |i|
             image_source=if FileTest.file?("#{@env.path.image_source_include_local}/#{i}")
               @env.path.image_source_include_local
             elsif FileTest.file?("#{@env.path.image_source_include_remote}/#{i}")
               @env.path.image_source_include_remote
             elsif FileTest.file?("#{@env.path.image_source_include}/#{i}")
               @env.path.image_source_include
             else
               SiSU_Screen::Ansi.new(
                 @md.opt.act[:color_state][:set],
                 "ERROR - image:",
                 %{"#{i}" missing},
                 "search locations: #{@env.path.image_source_include_local}, #{@env.path.image_source_include_remote} and #{@env.path.image_source_include}"
               ).error2 unless @md.opt.act[:quiet][:set]==:on
               nil
             end
             img_type = /\S+\.(png|jpg|gif)/.match(i)[1]
             if image_source
               para_image = image_source + '/' + i
               image_name = i
               image_dgst =(@sha_ ? sys.sha256(para_image) : sys.md5(para_image))
             else
               image_name = i + ' [image missing]'
               image_dgst = ''
             end
             line_image << { img_dgst: image_dgst[1], img_name: image_name, img_type: img_type }
           end
        end
        line_image
      end
      def endnotes(en)
        en_dgst=[]
        if en and en.length > 0
          en.flatten.each do |e|
             note_no=e.gsub(/^([\d*+]+)\s+.+/,'\1')
             e=digest(stripped_clean(e))
             note_dgst=digest(e)
             en_dgst << { note_number: note_no, note_dgst: note_dgst }
          end
        end
        en_dgst
      end
      def dgst
        if @t_o.of !=:comment \
        && @t_o.of !=:structure \
        && @t_o.of !=:layout
          txt_stripped_dgst=digest(stripped_clean(@t_o))
          txt_markup_reverted_dgst=digest(markup_reverted(@t_o))
          endnotes_dgst=[]
          rgx_notes=/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+\s+.+?)(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/
          notes=@t_o.obj.scan(rgx_notes)
          endnotes_dgst=endnotes(notes)
          rgx_image=/#{Mx[:lnk_o]}(\S+\.(?:png|jpg|gif))\s.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/
          imgs=if (@t_o.is==:para \
          || @t_o.is==:image) \
          and @t_o.obj =~rgx_image
            imgs=@t_o.obj.scan(rgx_image).flatten
            line_image=images(imgs)
          end
          dgst={ is: @t_o.is, ocn: @t_o.ocn, dgst_stripped_txt: txt_stripped_dgst, dgst_markedup_txt: txt_markup_reverted_dgst }
          dgst[:endnotes]=endnotes_dgst if endnotes_dgst and endnotes_dgst.length > 0
          dgst[:images]=line_image if line_image and line_image.length > 0
        end
        dgst
      end
      self
    end
  end
end
__END__