# encoding: utf-8 =begin * Name: SiSU ** Description: documents, structuring, processing, publishing, search *** system environment, resource control and configuration details ** Author: Ralph Amissah [ralph@amissah.com] [ralph.amissah@gmail.com] ** Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Ralph Amissah, All Rights Reserved. ** License: GPL 3 or later: SiSU, a framework for document structuring, publishing and search Copyright (C) Ralph Amissah This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see [http://www.gnu.org/licenses/]. If you have Internet connection, the latest version of the GPL should be available at these locations: [http://www.fsf.org/licensing/licenses/gpl.html] [http://www.gnu.org/licenses/gpl.html] ** SiSU uses: * Standard SiSU markup syntax, * Standard SiSU meta-markup syntax, and the * Standard SiSU object citation numbering and system ** Hompages: [http://www.jus.uio.no/sisu] [http://www.sisudoc.org] ** Git [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary] [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/ao_hash_digest.rb;hb=HEAD] =end module SiSU_AO_Hash require_relative 'shared_markup_alt.rb' #shared_markup_alt.rb class ObjectDigest def initialize(md,data,env=nil) @md,@data,@env=md,data,env @env ||=SiSU_Env::InfoEnv.new(@md.fns,@md) end def object_digest # 1. clean/stripped text without any markup, paragraph, headings etc. without endnotes # 2. endnotes clean/stripped text digest only (there may be several endnotes within a paragraph) # 3. whole object, text with markup and any endnotes, (question: with or without the endnote digests??? presumption better without, [however may be easier to check with?]) # [digests should not include other digests] data=@data.compact @tuned_file=[] sha_ =@env.digest(@md.opt).type begin sha_ ? (require 'digest/sha2') : (require 'digest/md5') rescue LoadError SiSU_Utils::CodeMarker.new(__LINE__,__FILE__,:fuchsia).error(sha_ + ' NOT FOUND') end data.each do |t_o| unless t_o.obj.is_a?(Array) t_o.obj=t_o.obj.strip end if (t_o.of !=:structure \ && t_o.of !=:comment \ && t_o.of !=:layout) \ && t_o.ocn.is_a?(Fixnum) case sha_ when :sha512 for hash_class in [ Digest::SHA512 ] @tuned_file << stamped(t_o,hash_class) end when :sha256 for hash_class in [ Digest::SHA256 ] @tuned_file << stamped(t_o,hash_class) end when :md5 for hash_class in [ Digest::MD5 ] @tuned_file << stamped(t_o,hash_class) end end else @tuned_file << t_o unless t_o.nil? end end @tuned_file=@tuned_file.flatten #use md5 or to create hash of each ao object including ocn, & add into to each ao object end def endnote_digest(data) data.each.map do |en_plus| case en_plus when /#{Mx[:en_a_o]}|#{Mx[:en_b_o]}/ if en_plus =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/ t_o_txt,en_open,en_txt,en_close= /(.*?)(#{Mx[:en_a_o]}|#{Mx[:en_b_o]})(.+?)(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m. match(en_plus)[1..4] stripped_en=SiSU_TextRepresentation::Alter.new(en_txt).strip_clean_of_markup digest_en_strip=case @env.digest(@md.opt).type when :sha512 Digest::SHA512.hexdigest(stripped_en) when :sha256 Digest::SHA256.hexdigest(stripped_en) when :md5 Digest::MD5.hexdigest(stripped_en) else Digest::SHA256.hexdigest(stripped_en) end t_o_txt + en_open + en_txt + Mx[:id_o] + digest_en_strip + Mx[:id_c] + en_close else STDERR.puts "Error Exception - problem encountered with:\n#{en_plus}" #arbitrary exception, tidy up end else en_plus end end.join end def stamped(t_o,hash_class) #decide what hash information is most useful, is compromise necessary? t_o.obj=SiSU_TextRepresentation::Alter.new(t_o).strip_clean_of_extra_spaces #SiSU_TextRepresentation::Alter.new(t_o).strip_clean_of_markup #check #SiSU_TextRepresentation::Alter.new(t_o).semi_revert_markup #check #SiSU_TextRepresentation::ModifiedTextPlusHashDigest.new(@md,t_o).composite.dgst #check unless t_o.is==:code case t_o.obj when /#{Mx[:en_a_o]}[\d*+]+\s+.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}[*+]\d+\s+.+?#{Mx[:en_b_c]}/m en_and_t_o_digest=[] t_o.obj=t_o.obj. gsub(/\s*(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m,' \1') #watch t_o_plus_en=t_o.obj. scan(/.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m) t_o_tail=if t_o.obj =~/(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+([\s\S]+)/m /(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+.*/m.match(t_o.obj)[1] else '' end t_o_plus_en << t_o_tail en_and_t_o_digest << endnote_digest(t_o_plus_en) en_and_t_o_digest.join(' ') else #@tuned << t_o + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless t_o.nil? end else #@tuned << t_o + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless t_o.nil? end t_o #KEEP intact end def strip_clean_extra_spaces(s) # ao output tuned s=s.dup s=s.gsub(/[ ]+([,.;:?](?:$|\s))/,'\1') unless s =~/#{Mx[:en_a_o]}|#{Mx[:en_b_o]}/ s=s.gsub(/ [ ]+/,' '). gsub(/^ [ ]+/,''). gsub(/ [ ]+$/,''). gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2'). gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2') end end end __END__