From 58020c13d503a6c8724ac4a2f26b8d33f0a0c6f5 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Mon, 12 May 2014 21:10:22 -0400 Subject: v5: digests cleaning --- data/doc/sisu/CHANGELOG_v5 | 2 + lib/sisu/v5/digests.rb | 158 +++++---------------------------------------- 2 files changed, 18 insertions(+), 142 deletions(-) diff --git a/data/doc/sisu/CHANGELOG_v5 b/data/doc/sisu/CHANGELOG_v5 index c25d71ee..41771585 100644 --- a/data/doc/sisu/CHANGELOG_v5 +++ b/data/doc/sisu/CHANGELOG_v5 @@ -49,6 +49,8 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_5.3.5.orig.tar.xz * metadata, pdf, remove link to manifest page where --no-manifest is used (Closes: #744378) +* digests, cleaning + * added sisu.org emacs:evil:org mode notes related to sisu development %% 5.3.4.orig.tar.xz (2014-02-14:06/5) diff --git a/lib/sisu/v5/digests.rb b/lib/sisu/v5/digests.rb index 3acd51eb..b3dd40bd 100644 --- a/lib/sisu/v5/digests.rb +++ b/lib/sisu/v5/digests.rb @@ -71,7 +71,7 @@ module SiSU_DigestView @fnb=@opt.fnb @@endnotes_para=[] @@dg=nil - @dg=@@dg ||=SiSU_Env::InfoEnv.new.digest.type + @dg=@@dg ||=SiSU_Env::InfoEnv.new.digest(opt).type @particulars=SiSU_Particulars::CombinedSingleton.instance.get_all(opt) end def read @@ -114,8 +114,8 @@ module SiSU_DigestView @particulars=particulars @data,@env,@md=@particulars.ao_array,@particulars.env,@particulars.md SiSU_Env::FileOp.new(@md).mkdir - @@dg ||=@env.digest.type - @@dl ||=@env.digest.length + @@dg ||=@env.digest(@md.opt).type + @@dl ||=@env.digest(@md.opt).length @dg,@dl=@@dg,@@dl l=SiSU_Env::StandardiseLanguage.new(@md.opt.lng).language @language=l[:n] @@ -158,135 +158,6 @@ module SiSU_DigestView txt=txt.gsub(/([()])/,"\\\\\\1") end def message_digest - data=@data - sys=SiSU_Env::SystemCall.new - @p=[] - @g,@v,@r='','','' - data.each do |para| - x=nil - y,para_endnotes=[],[] - if para =~/#{Mx[:id_o]}~(\d+);(?:(?:\w|[0-6]:)\d+);(?:\w\d+)#{Mx[:id_c]}#{Mx[:id_o]}([0-9a-f]{#{@dl}}):([0-9a-f]{#{@dl}})#{Mx[:id_c]}/ - ocn,d_clean,d_all=$1,$2,$3 - @ocn=ocn unless ocn.to_i==0 - para=para.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1'). - gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1'). - gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') - if para=~/#{Mx[:en_a_o]}[\d*+]+.+?#{Mx[:id_o]}[0-9a-f]{#{@dl}}#{Mx[:id_c]}#{Mx[:en_a_c]}/ - para_endnotes << para.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+).+?#{Mx[:id_o]}([0-9a-f]{#{@dl}})#{Mx[:id_c]}(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/) - end - images=[] - if para !~/^%+\s/ \ - and para =~/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+\.(png|jpg|gif))\s.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/ - images=para.scan(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+\.(?:png|jpg|gif))\s.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/).flatten - else nil - end - x=case para - when /^#{Mx[:meta_o]}title#{Mx[:meta_c]}/ - "\n" << ' '*0 << '@' << ' '*9 - when /^#{Mx[:meta_o]}subtitle#{Mx[:meta_c]}/ - "\n" << ' '*1 << '@' << ' '*8 - when /^#{Mx[:lv_o]}1:/ #fix Mx[:lv_o] - "\n" << ' '*2 << ':A ' << ' '*6 << '- ' << ocn << ' '*(10-ocn.length) << d_clean << ' ' << d_all - when /^#{Mx[:lv_o]}2:/ #fix Mx[:lv_o] - "\n" << ' '*3 << ':B ' << ' '*5 << '- ' << ocn << ' '*(10-ocn.length) << d_clean << ' ' << d_all - when /^#{Mx[:lv_o]}3:/ #fix Mx[:lv_o] - "\n" << ' '*4 << ':C ' << ' '*4 << '- ' << ocn << ' '*(10-ocn.length) << d_clean << ' ' << d_all - when /^#{Mx[:lv_o]}4:/ #fix Mx[:lv_o] - "\n" << ' '*5 << '1' << ' '*4 << '- ' << ocn << ' '*(10-ocn.length) << d_clean << ' ' << d_all - when /^#{Mx[:lv_o]}5:/ #fix Mx[:lv_o] - "\n" << ' '*6 << '2' << ' '*3 << '- ' << ocn << ' '*(10-ocn.length) << d_clean << ' ' << d_all - when /^#{Mx[:lv_o]}6:/ #fix Mx[:lv_o] - "\n" << ' '*7 << '3' << ' '*2 << '- ' << ocn << ' '*(10-ocn.length) << d_clean << ' ' << d_all - else - if para =~/MD5\(\S+?\.sst\)=\s*([0-9a-f]{#{@dl}})<\/u>/ #watch - @n,@s=/MD5\((\S+?\.sst)\)=\s*([0-9a-f]{#{@dl}})<\/u>/.match(para)[1,2] - end - x=unless ocn =~ /^0$/ - if images \ - and images.length > 0 # then get path of image & produce digest - @image_name,@image_dgst,@img=[],[],[] - images.each do |i| - image_source=if FileTest.file?("#{@env.path.image_source_include_local}/#{i}") - @env.path.image_source_include_local - elsif FileTest.file?("#{@env.path.image_source_include_remote}/#{i}") - @env.path.image_source_include_remote - elsif FileTest.file?("#{@env.path.image_source_include}/#{i}") - @env.path.image_source_include - else - SiSU_Screen::Ansi.new(@md.opt.act[:color_state][:set],"ERROR - image:", %{"#{i}" missing}, "search locations: #{@env.path.image_source_include_local}, #{@env.path.image_source_include_remote} and #{@env.path.image_source_include}").error2 unless @md.opt.act[:quiet][:set]==:on - nil - end - @img << /\S+\.(png|jpg|gif)/.match(i)[1] - not_found_msg='image not found' - if image_source - para_image = image_source + '/' + i - @image_name << i - @image_dgst << (@dg =~/^sha(?:2|256)$/) \ - ? sys.sha256(para_image) - : sys.md5(para_image) - else - @image_name << ' '*16 + i + ' [image missing]' - @image_dgst << '' - @image_dgst[1]=not_found_msg + ' '*(32-not_found_msg.length) - end - end - line= "\n" + ' '*9 + ' - ' + ocn + ' '*(10-ocn.length) + d_clean + ' ' + d_all + "\n" - line_image=[] - c=0 - @image_name.each do |ok| - line_image << %{ #{@img[c]} #{@image_dgst[c][1]} #{@image_name[c]}} - c +=1 - end - line=line + line_image.join("\n") - else "\n" + ' '*9 + ' - ' + ocn + ' '*(10-ocn.length) + d_clean + ' ' + d_all - end - else - prefix='' - metad=[@tr.full_title,@tr.author,@tr.translator,@tr.illustrator,@tr.prepared_by,@tr.digitized_by,@tr.description,@tr.subject,@tr.abstract,@tr.publisher,@tr.contributor,@tr.date_created,@tr.date_issued,@tr.date_available,@tr.date_modified,@tr.date_valid,@tr.date,@tr.type,@tr.format,@tr.rights,@tr.identifier,@tr.source,@tr.language,@tr.language_original,@tr.relation,@tr.coverage,@tr.keywords,@tr.comments,@tr.cls_loc,@tr.cls_dewey,@tr.cls_gutenberg,@tr.cls_isbn,@tr.prefix_a,@tr.prefix_b,@tr.sourcefile,@tr.sourcefile_digest,@tr.last_generated,@tr.sisu_version,@tr.ruby_version,@tr.sc_number,@tr.sc_date,'Generated by: ','Ruby version: '] - metad.each do |n| - m=rgx_txt(n) - if m=~/\S+/ \ - and para=~/^#{m}:/ - x,o=0,18 - while x < 2; o = o + 2 - x=o - n.length - end - space=' '*x - prefix="#{n.downcase}#{space}" - break - else prefix=' '*9 - end - end - m_title=rgx_txt(@tr.full_title) - m_author=rgx_txt(@tr.author) - m_sourcefile_digest=rgx_txt(@tr.sourcefile_digest) - m_sisu_version=rgx_txt(@tr.sisu_version) - m_last_generated=rgx_txt(@tr.last_generated) - m_ruby_version=rgx_txt(@tr.ruby_version) - case para - when /#{m_title}: / - @t=/#{m_title}: (.+?)#{Mx[:id_o]}~\d;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@dl}}:[0-9a-f]{#{@dl}}#{Mx[:id_c]}/.match(para)[1].gsub(/<\/?u>/,'').strip - when /#{m_author}: / - @c=/#{m_author}: (.+?)#{Mx[:id_o]}~\d;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@dl}}:[0-9a-f]{#{@dl}}#{Mx[:id_c]}/.match(para)[1].gsub(/<\/?u>/,'').strip - when /#{m_sourcefile_digest}.+?/ #watch - dgst_extra="\n" << ' '*21 << 'source' << ' '*4 << @md.dgst[1] << ' '*34 << @md.fns - when /#{m_sisu_version}: / - @v=/#{m_sisu_version}: (.+?)#{Mx[:id_o]}~\d;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@dl}}:[0-9a-f]{#{@dl}}#{Mx[:id_c]}/.match(para)[1].gsub(/<\/?u>/,'').strip - when /#{m_last_generated}: / - @g=/#{m_last_generated}: (.+?)#{Mx[:id_o]}~\d;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@dl}}:[0-9a-f]{#{@dl}}#{Mx[:id_c]}/.match(para)[1].gsub(/<\/?u>/,'').strip - when /#{m_ruby_version}: / - @r=/#{m_ruby_version}: (.+?)#{Mx[:id_o]}~\d;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@dl}}:[0-9a-f]{#{@dl}}#{Mx[:id_c]}/.match(para)[1].gsub(/<\/?u>/,'').strip - end - dgst_extra ||='' - "\n" << prefix << ' - ' << ocn << ' '*(10-ocn.length) << d_clean << ' ' << d_all << dgst_extra << "\n" - end - end - para_endnotes[0].each { |e| y << "\n" + ' '*(28-e[0].length) + "[#{e[0].to_s}] #{e[1].to_s}" } if para_endnotes[0] - if y; digests(x,y) - else digests(x) - end - end - end manifest="#{@env.url.root}/#{@md.fnb}/sisu_manifest.html" a=%{ocn digest clean (no markup/notes),#{@sp*33}digest all (includes markup & endnotes)\n} description("#{@md.title.full}\n") @@ -340,18 +211,20 @@ module SiSU_DigestView data.each do |t_o| if t_o.is==:heading x=case t_o.ln - when 1 then l[1] +=1 #fix Mx[:lv_o] + when 1 then l[0] +=1 #fix Mx[:lv_o] ' '*0 << ':A' - when 2 then l[2] +=1 #fix Mx[:lv_o] + when 1 then l[1] +=1 #fix Mx[:lv_o] ' '*1 << ':B' - when 3 then l[3] +=1 #fix Mx[:lv_o] + when 2 then l[2] +=1 #fix Mx[:lv_o] ' '*2 << ':C' + when 3 then l[3] +=1 #fix Mx[:lv_o] + ' '*3 << ':D' when 4 then l[4] +=1 #fix Mx[:lv_o] - ' '*3 << '1' + ' '*4 << '1' when 5 then l[5] +=1 #fix Mx[:lv_o] - ' '*4 << '2' + ' '*5 << '2' when 6 then l[6] +=1 #fix Mx[:lv_o] - ' '*5 << '3' + ' '*6 << '3' else nil end end @@ -363,9 +236,10 @@ module SiSU_DigestView ao_structure_summary("document structure[*]\n") [0,1,2,3,4,5,6].each do |y| v=case y - when 1 then ':A' - when 2 then ':B' - when 3 then ':C' + when 0 then ':A' + when 1 then ':B' + when 2 then ':C' + when 3 then ':D' when 4 then '1 ' when 5 then '2 ' when 6 then '3 ' @@ -374,7 +248,7 @@ module SiSU_DigestView end ao_structure_summary("objects (ocn) = #{ocn}\n") ao_structure_summary("endnotes = #{endnotes}\n") - ao_structure_summary(" [*] number of headers (@) and of each heading level (:A to :C and 1 to 3)\n") + ao_structure_summary(" [*] number of headers (@) and of each heading level (:A to :D and 1 to 3)\n") end def supplementary if defined? @md.sc_number \ -- cgit v1.2.3