From 291080c0495f59f031bf5c0de2482f1bc7df59f2 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Wed, 10 Sep 2008 21:24:31 -0400 Subject: primarily on book index where (markup) provided * book index, html, tex/pdf, xml * texpdf no ocn if ocn is 0 * odf, plaintext, if book index? remove ... do other outputs for which not relevant * concordance, better matches * constants, dal special character for hardspace changed as (ruby) regx bug in replacing it in xml, odd but move on Note: to fix html seg headers for endnotes and for index --- lib/sisu/v0/concordance.rb | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'lib/sisu/v0/concordance.rb') diff --git a/lib/sisu/v0/concordance.rb b/lib/sisu/v0/concordance.rb index 485f3df5..fad91d14 100644 --- a/lib/sisu/v0/concordance.rb +++ b/lib/sisu/v0/concordance.rb @@ -121,8 +121,8 @@ WOK end def create head_banner=SiSU_HTML_Format::Head_toc.new(@md) -minitoc=SiSU_HTML_minitoc::Toc_mini.new(@md,@data).songsheet -toc='
' + minitoc.to_s + '
' + minitoc=SiSU_HTML_minitoc::Toc_mini.new(@md,@data).songsheet + toc='
' + minitoc.to_s + '
' < @@ -199,7 +199,8 @@ WOK @rxp_t3=Regexp.new('^T3') @rxp_excluded1=/(?:https?|file|ftp):\/\/\S+/ @rxp_excluded0=/^(?:#{Mx[:fa_bold_o]}|#{Mx[:fa_italics_o]})?(?:to\d+|\d+| |#{Mx[:br_endnotes]}|EOF|#{Mx[:br_eof]}|thumb_\S+|snap_\S+|_+|-+|[(]?(?:ii+|iv|vi+|ix|xi+|xiv|xv|xvi+|xix|xx)[).]?|\S+?_\S+|[\d_]+\w\S+|[\w\d]{1,2}|\d{1,3}\w?|#@dp|[0-9a-f]{16,64}|\d{2,3}x\d{2,3}|\S{0,2}sha\d|\S{0,3}\d{4}w\d\d|\b\w\d+|\d_all\b|e\.?g\.?)(?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})?$/mi #this regex causes and cures a stack dump in ruby 1.9 !!! - @rgx_scanlist=%r{#{Mx[:fa_italics_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_italics_c]}|#{Mx[:fa_bold_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_bold_c]}|(?:https?|file)://\S+|#{Mx[:gr_o]}code#{Mx[:gr_o]}.+?#{Mx[:gr_o]}code-end#{Mx[:gr_o]}|<\S+?>|#{Mx[:id_o]}\S+?#{Mx[:id_c]}|\w+|[a-zA-Z]+}mi + @rgx_splitlist=%r{[—.,;:-]|#{Mx[:id_o]}\S+?#{Mx[:id_c]}}mi + @rgx_scanlist=%r{#{Mx[:fa_italics_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_italics_c]}|#{Mx[:fa_bold_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_bold_c]}|(?:https?|file)://\S+|#{Mx[:gr_o]}code#{Mx[:gr_o]}.+?#{Mx[:gr_o]}code-end#{Mx[:gr_o]}|<\S+?>|\w+|[a-zA-Z]+}mi rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error end end @@ -223,13 +224,13 @@ WOK @sfx='.html' #used for hardlinks, previous setting @sfx='', web server takes care of suffix @word_location_seg=wordlocation.gsub(/(.+?)\#(\d+)/,"#{@md.fnl[:pre]}\\1#{@md.fnl[:mid]}#@sfx#{@md.fnl[:post]}#\\2") unless wordlocation.nil? case @wordlocation - when @rxp_t1 - %{[H]#@show, } - when @rxp_t2 - %{[H]#@show, } - when @rxp_t3 - %{[H]#@show, } - else %{#@show, } + when @rxp_t1 + %{[H]#@show, } + when @rxp_t2 + %{[H]#@show, } + when @rxp_t3 + %{[H]#@show, } + else %{#@show, } end end def map_para @@ -243,6 +244,7 @@ WOK end if toy =~/\d+/ \ and toy !~/^0$/ + line=line.split(@rgx_splitlist).join(' ') #%take in word or other match for word in line.scan(@rgx_scanlist) #%take in word or other match #word.gsub!(@rxp_clean,'') word.gsub!(/#{Mx[:lnk_o]}|#{Mx[:lnk_c]}(?:http)?/,'') -- cgit v1.2.3