From 45acf3b71d8f04ffb608ae2c9ba9e0da6030e66d Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 16 Oct 2007 01:14:39 +0100 Subject: image list, extracting conditions and regex visited --- CHANGELOG | 3 +++ lib/sisu/v0/composite.rb | 15 +++++++++++---- lib/sisu/v0/digests.rb | 5 +++-- lib/sisu/v0/manpage.rb | 4 ++-- lib/sisu/v0/param.rb | 14 +++++++++++--- lib/sisu/v0/plaintext.rb | 5 +++-- lib/sisu/v0/remote.rb | 18 +++++++++++++----- lib/sisu/v0/shared_xml.rb | 6 +++--- lib/sisu/v0/sisupod_make.rb | 12 +++++++++--- lib/sisu/v0/wikispeak.rb | 5 +++-- 10 files changed, 61 insertions(+), 26 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 06e22317..19750853 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -36,6 +36,9 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_0.62.0.orig.tar.gz content associated with document to desired output locations, [decide how audio, video/multimedia are to be handled, initially just links to content] + * image conditions and regex for matching and extracting list of images used + by a document visited + * copy images associated with document along with document, does not take care of skins though, for all images still need to use -CC diff --git a/lib/sisu/v0/composite.rb b/lib/sisu/v0/composite.rb index ebad049e..4366b3ef 100644 --- a/lib/sisu/v0/composite.rb +++ b/lib/sisu/v0/composite.rb @@ -157,7 +157,8 @@ module SiSU_Assemble elsif i =~/^\}code/; false else @code_flag end - unless @code_flag + if not @code_flag \ + and i !~/^%+\s/ i.gsub!(/^([123]|:?[ABC])~\? /,'% [conditional heading:] \1~ ') #off conditional heading (consider syntax) if i =~/^0~|^@\S+?:/ i.gsub!(/\n/m,"\n% ") @@ -166,11 +167,17 @@ module SiSU_Assemble end end file[:prepared] << i - file[:images] << i.scan(rgx_image).uniq if i =~rgx_image #flag + if i !~/^%+\s/ \ + and i =~rgx_image + file[:images] << i.scan(rgx_image).uniq + end end file[:prepared] << "\n% end import" << "\n\n" - file[:images].uniq! if file[:images].length > 0 - file[:images].flatten! if file[:images].length > 0 + if file[:images].length > 0 + file[:images].flatten! + file[:images].uniq! + file[:images].delete_if {|x| x =~/http:\/\// } + end file end def insertions? diff --git a/lib/sisu/v0/digests.rb b/lib/sisu/v0/digests.rb index 1adc8582..d75ad7f1 100644 --- a/lib/sisu/v0/digests.rb +++ b/lib/sisu/v0/digests.rb @@ -158,8 +158,9 @@ module SiSU_Digest_view para_endnotes << para.scan(/~[{\[]([\d*+]+).+?<([0-9a-f]{#@dl})>[}\]]~/) end ima=[] - if para =~/\{(\S+\.(png|jpg|gif))\s.+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/ - images=para.scan(/\{(\S+\.(?:png|jpg|gif))\s.+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/).flatten + if para !~/^%+\s/ \ + and para =~/(?:^|[^_\\])\{(\S+\.(png|jpg|gif))\s.+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/ + images=para.scan(/(?:^|[^_\\])\{(\S+\.(?:png|jpg|gif))\s.+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/).flatten else image=nil end x=case para diff --git a/lib/sisu/v0/manpage.rb b/lib/sisu/v0/manpage.rb index e747e22a..20dd0450 100644 --- a/lib/sisu/v0/manpage.rb +++ b/lib/sisu/v0/manpage.rb @@ -373,8 +373,8 @@ WOK para.gsub!(/<:name#\S+?>/,'') # remove name links para.gsub!(/ /,' ') # decide on para.gsub!(/(["''])/,"\\\\\\1") # quotation marks need escape - para.gsub!(/\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") - para.gsub!(/^\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') + para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") + para.gsub!(/^(?:^|[^_\\])\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') wordlist=para.scan(/\S+/) if para =~/^0~(\S+)\s+(.+?)\Z/m # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta diff --git a/lib/sisu/v0/param.rb b/lib/sisu/v0/param.rb index 8a95942a..1147cd80 100644 --- a/lib/sisu/v0/param.rb +++ b/lib/sisu/v0/param.rb @@ -253,7 +253,7 @@ module SiSU_Param end @code_flag=false fns_array.each do |para| #% Scan document - if para !~/^\%+\s/ \ + if para !~/^%+\s/ \ and para =~/ 0 + @ec[:image].flatten! + @ec[:image].uniq! + @ec[:image].delete_if {|x| x =~/http:\/\// } + @ec[:image].sort! + end @ec[:audio].uniq!; @ec[:audio].flatten!; @ec[:audio].sort! @ec[:multimedia].uniq!; @ec[:multimedia].flatten!; @ec[:multimedia].sort! @man_name.gsub!(/(-)/,"\\\\\\1") diff --git a/lib/sisu/v0/plaintext.rb b/lib/sisu/v0/plaintext.rb index 18486803..1a0303cd 100644 --- a/lib/sisu/v0/plaintext.rb +++ b/lib/sisu/v0/plaintext.rb @@ -359,8 +359,9 @@ WOK para.gsub!(/(.+?)<\/a>/m,'\1') para.gsub!(/<:name#\S+?>/,'') # remove name links para.gsub!(/ /,' ') # decide on - para.gsub!(/\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") - para.gsub!(/^\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') + para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") + para.gsub!(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') + #para.gsub!(/^\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') wordlist=para.scan(/\S+/) if para =~/^0~(\S+)\s+(.+?)\Z/m # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta diff --git a/lib/sisu/v0/remote.rb b/lib/sisu/v0/remote.rb index efa49dc0..c65c619f 100644 --- a/lib/sisu/v0/remote.rb +++ b/lib/sisu/v0/remote.rb @@ -117,20 +117,25 @@ module SiSU_Remote @rgx_skin=/(?:0~|@)skin:?\s+(\S+)/ #@rgx_skin=/^0~skin\s+(\S+)/ threads=[] for requested_page in @get_s + re_fnb=/((?:https?|file):\/\/[^\/ ]+?\/[^\/ ]+?)\/\S+?\/([^\/]+?)\.ss(t)/ #revisit and remove DO threads << Thread.new(requested_page) do |url| open(url) do |f| raise "#{url} not found" unless f - re_fnb=/((?:https?|file):\/\/[^\/ ]+?\/[^\/ ]+?)\/\S+?\/([^\/]+?)\.ss(t)/ #revisit and remove DO base_uri,fnb,instr=re_fnb.match(url)[1..3] if re_fnb - imagedir= base_uri + '/_sisu/image' #check on + imagedir=base_uri + '/_sisu/image' #check on doc_skin_dir = /((?:https?|file):\/\/\S+?)\/[^\/]+?\.sst$/.match(url).captures.join + '/_sisu/skin/doc' #"Got file, and ready to process: #{fnb}.t#{instr}" downloaded_file=File.new("#{fnb}.-sst",'w+') images=SiSU_Assemble::Remote_image.new.image(imagedir) skin=SiSU_Assemble::Remote_image.new.image(doc_skin_dir) f.collect.each do |r| # work area - skin << r.scan(@rgx_skin).uniq if r =~@rgx_skin - images << r.scan(@rgx_image).uniq if r =~@rgx_image + unless r =~/^%+\s/ + skin << r.scan(@rgx_skin).uniq if r =~@rgx_skin + if r !~/^%+\s/ \ + and r =~@rgx_image + images << r.scan(@rgx_image).uniq + end + end downloaded_file << r end if skin \ @@ -139,7 +144,10 @@ module SiSU_Remote end if images \ and images.length > 1 - images.flatten!.uniq! + images.flatten! + images.uniq! + images.delete_if {|x| x =~/http:\/\// } + images.sort! @msg,@msgs='downloading images:', [ images.join(',') ] @tell.call.warn unless @opt.cmd =~/q/ SiSU_Assemble::Remote_image.new.download_images(images) diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index 11dead2d..d2897a1f 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -355,8 +355,8 @@ module SiSU_XML_munge para.gsub!(/<0;\w\d+;[um]\d+><#@dp:#@dp>/,'') if para !~/^<:code>/ #embeds a red-bullet image --> - para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1[\\2] \\5}) - para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1\\2}) + para.gsub!(/(?:^|[^_\\])\{\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?\}(https?:\/\/\S+)/,%{[\\1] \\4}) + para.gsub!(/(?:^|[^_\\])\{\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1}) para.gsub!(/(^|\s)\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?=\s|$))/, '\1\2\4') #watch, compare html_tune para.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/, @@ -381,7 +381,7 @@ module SiSU_XML_munge para.gsub!(/<[-~]#>/,'') para.gsub!(/(^|\s)&\s+/,'\1& ') #sort para.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax - para.gsub!(/\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/, + para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/, "#{@dir.url.images_local}\/\\1") para.gsub!(/ /,' ') #para.gsub!(/ /,' ') #clean diff --git a/lib/sisu/v0/sisupod_make.rb b/lib/sisu/v0/sisupod_make.rb index 277358c4..af60cf4e 100644 --- a/lib/sisu/v0/sisupod_make.rb +++ b/lib/sisu/v0/sisupod_make.rb @@ -167,9 +167,12 @@ module SiSU_Doc file_array=IO.readlines(use_file,'') skin,images,doc_import=[],[],[] file_array.each do |f| #% work area - if f !~/^%\s/ + if f !~/^%+\s/ skin << f.scan(@rgx_skin).uniq.flatten if f =~@rgx_skin - images << f.scan(@rgx_image).uniq if f =~@rgx_image + if f !~/^%+\s/ \ + and f =~@rgx_image + images << f.scan(@rgx_image).uniq + end elsif f =~/^%\s/ \ and @opt.fns =~/\.ssm\.sst$/ doc_import << f.scan(@rgx_doc_import).uniq if f =~@rgx_doc_import @@ -208,7 +211,10 @@ module SiSU_Doc #2. need images used by skin, scan skin?? if images \ and images.length > 1 - images.flatten!.uniq! + images.flatten! + images.uniq! + images.delete_if {|x| x =~/http:\/\// } + #images.sort! image_path_pwd='_sisu/image' path_pod_conf="#{@env.path.processing}/sisupod/_sisu" images_path_pod="#{path_pod_conf}/image" diff --git a/lib/sisu/v0/wikispeak.rb b/lib/sisu/v0/wikispeak.rb index 7fe7ac89..0dda6ce0 100644 --- a/lib/sisu/v0/wikispeak.rb +++ b/lib/sisu/v0/wikispeak.rb @@ -257,8 +257,9 @@ WOK para.gsub!(/(.+?)<\/a>/m,'\1') para.gsub!(/<:name#\S+?>/,'') # remove name links para.gsub!(/ /,' ') # decide on - para.gsub!(/\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") - para.gsub!(/^\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') + para.gsub!(/(?:^|[^_\\])\{(\S+?\.(?:png|jpg|gif)) .+?\}(?:(?:https?|file|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") + para.gsub!(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') + #para.gsub!(/^\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') wordlist=para.scan(/\S+/) if para =~/^0~(\S+)\s+(.+?)\Z/m # for headers d_meta=SiSU_text_utils::Header_scan.new(@md,para).meta -- cgit v1.2.3