# encoding: utf-8
=begin
* Name: SiSU
* Description: a framework for document structuring, publishing and search
* Author: Ralph Amissah
* Copyright: (C) 1997 - 2012, Ralph Amissah, All Rights Reserved.
* License: GPL 3 or later:
SiSU, a framework for document structuring, publishing and search
Copyright (C) Ralph Amissah
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see .
If you have Internet connection, the latest version of the GPL should be
available at these locations:
* SiSU uses:
* Standard SiSU markup syntax,
* Standard SiSU meta-markup syntax, and the
* Standard SiSU object citation numbering and system
* Hompages:
* Download:
* Ralph Amissah
** Description: system environment, resource control and configuration details
=end
module SiSU_DAL_Hash
require_relative 'shared_markup_alt.rb' #shared_markup_alt.rb
class ObjectDigest
def initialize(md,data,env=nil)
@md,@data,@env=md,data,env
@env ||=SiSU_Env::InfoEnv.new(@md.fns)
end
def object_digest
# 1. clean/stripped text without any markup, paragraph, headings etc. without endnotes
# 2. endnotes clean/stripped text digest only (there may be several endnotes within a paragraph)
# 3. whole object, text with markup and any endnotes, (question: with or without the endnote digests??? presumption better without, [however may be easier to check with?])
# [digests should not include other digests]
data=@data.compact
@tuned_file=[]
sha_ =(@env.digest.type=='sha256' ? true : false)
sha_ ? (require 'digest/sha2') : (require 'digest/md5')
data.each do |t_o|
unless t_o.obj.class==Array
t_o.obj=t_o.obj.strip
end
if t_o.of !~/structure|comment|layout/ \
and t_o.ocn.class==Fixnum
if sha_
for hash_class in [ Digest::SHA256 ]
@tuned_file << stamped(t_o,hash_class)
end
else
for hash_class in [ Digest::MD5 ]
@tuned_file << stamped(t_o,hash_class)
end
end
else @tuned_file << t_o unless t_o.nil?
end
end
@tuned_file=@tuned_file.flatten
#use md5 or to create hash of each dal object including ocn, & add into to each dal object
end
def endnote_digest(data)
t_o_bit=[]
data.each do |en_plus|
t_o_bit <<= case en_plus
when /#{Mx[:en_a_o]}|#{Mx[:en_b_o]}/
if en_plus =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/
t_o_txt,en_open,en_txt,en_close=/(.*?)(#{Mx[:en_a_o]}|#{Mx[:en_b_o]})(.+?)(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m.match(en_plus)[1..4]
stripped_en=SiSU_TextRepresentation::Alter.new(en_txt).strip_clean_of_markup
digest_en_strip=if @env.digest.type =~/sha256/
Digest::SHA256.hexdigest(stripped_en)
else
Digest::MD5.hexdigest(stripped_en)
end
t_o_txt + en_open + en_txt + Mx[:id_o] + digest_en_strip + Mx[:id_c] + en_close
else puts "Error Exception - problem encountered with:\n#{en_plus}" #arbitrary exception, tidy up
end
else en_plus
end
end
t_o_bit.join
end
def stamped(t_o,hash_class) #decide what hash information is most useful, is compromise necessary?
t_o.obj=SiSU_TextRepresentation::Alter.new(t_o).strip_clean_of_extra_spaces
t_obj=t_o.inspect.sub(/:0x[0-9a-f]{8}\s/,': ')
stripped=SiSU_TextRepresentation::Alter.new(t_o).strip_clean_of_markup
markup=SiSU_TextRepresentation::Alter.new(t_o).semi_revert_markup
digests=SiSU_TextRepresentation::ModifiedTextPlusHashDigest.new(@md,t_o).composite.dgst
unless t_o.is=='code'
case t_o.obj
when /#{Mx[:en_a_o]}[\d*+]+\s+.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}[*+]\d+\s+.+?#{Mx[:en_b_c]}/m
en_and_t_o,en_and_t_o_digest=[],[]
t_o.obj=t_o.obj.gsub(/\s*(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m,' \1') #watch
t_o_plus_en=t_o.obj.scan(/.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m)
t_o_tail=if t_o.obj =~/(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+([\s\S]+)/m
/(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+.*/m.match(t_o.obj)[1]
else ''
end
t_o_plus_en << t_o_tail
en_and_t_o_digest << endnote_digest(t_o_plus_en)
t_o_new=en_and_t_o_digest.join(' ')
#@tuned << t_o_new + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless t_o.nil?
else #@tuned << t_o + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless t_o.nil?
end
else #@tuned << t_o + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless t_o.nil?
end
t_o #KEEP intact
end
def strip_clean_extra_spaces(s) # dal output tuned
s=s.dup
s=s.gsub(/[ ]+([,.;:?](?:$|\s))/,'\1') unless s =~/#{Mx[:en_a_o]}|#{Mx[:en_b_o]}/
s=s.gsub(/ [ ]+/,' ').
gsub(/^ [ ]+/,'').
gsub(/ [ ]+$/,'').
gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2').
gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2')
end
end
end
__END__