diff options
Diffstat (limited to 'lib/sisu/v0/concordance.rb')
-rw-r--r-- | lib/sisu/v0/concordance.rb | 71 |
1 files changed, 48 insertions, 23 deletions
diff --git a/lib/sisu/v0/concordance.rb b/lib/sisu/v0/concordance.rb index f62b20ac..babc6655 100644 --- a/lib/sisu/v0/concordance.rb +++ b/lib/sisu/v0/concordance.rb @@ -66,8 +66,9 @@ module SiSU_Concordance include SiSU_Env require "#{SiSU_lib}/defaults" include SiSU_Viz - require "#{SiSU_lib}/html_format_css" + require "#{SiSU_lib}/html_format" include SiSU_HTML_Format + require "#{SiSU_lib}/html_minitoc" class Source def initialize(opt) @opt=opt @@ -105,22 +106,23 @@ module SiSU_Concordance #revisit, both requires (html & shared_xml) needed for stand alone operation (sisu -w [filename]) require "#{SiSU_lib}/shared_xml" require "#{SiSU_lib}/html" - def initialize(lnk,env,md) - @env,@md=env,md + def initialize(particulars) + @particulars,@md=particulars,particulars.md + @data=SiSU_HTML::Source::Html_environment.new(particulars).tuned_file_instructions @vz=SiSU_Env::Get_init.instance.skin - file_array=@env.read_source_file(@md.fns) txt_path=%{#{@md.dir_out}} SiSU_Env::Info_skin.new(@md).select @md_title=@md.title @fnb=@md.fnb @lex_button=%{<a href="http://www.jus.uio.no/sisu/" target="_top"><img border="0" height="44" width="144" valign="center" src="../_sisu/image/sisu.png" alt="SiSU home -->"></a>} - @lnk=lnk @doc_details =<<WOK <table summary="links to text related to this rudimentary index" width="96%" border="0" bgcolor="white" cellpadding="0" align="center"><tr><td width="2%" align="right"> </td><td width="94%" valign="top" align="justify"><h1 class="small"><a href="#{@md.fn[:toc]}" #{@vz.js_toc}><b>#{@md.dc_title}</b></a></h1><p class="bold">#{@md.dc_creator}</p></td></tr></table> WOK end def create - head_banner=SiSU_HTML_Format_type::Head_toc.new(@md) + head_banner=SiSU_HTML_Format::Head_toc.new(@md) + minitoc=SiSU_HTML_minitoc::Toc_mini.new(@md,@data).songsheet + toc='<div class="toc">' + minitoc.to_s + '</div>' <<WOK <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> @@ -140,6 +142,8 @@ WOK <body> #{@vz.js_top} #{head_banner.concordance_navigation_band('pdf')} + #{toc} +<div class="content"> #@doc_details <p>Word index links are to html versions of the text the segmented version followed by the scroll (single document) version.<br />[For segmented text references [T1], [T2] or [T3] appearing without a link, indicates that the word appears in a title (or subtitle) of the text (that is identifiable by the appended object citation number).]</p> <p>(The word listing/index is Case sensitive: Capitalized words appear before lower case)</p> @@ -148,7 +152,7 @@ WOK [if number of occurences exceed number of references - word occurs more than once in at least one reference. Footnote/endnotes are either assigned to the paragraph from which they are referenced or ignored, so it is relevant to check the footnotes referenced from within a paragraph as well.] </p> <p> - (After the page is fully loaded) you can jump directly to a word by appending a hash (#) and the word to the url for this text, (do not forget that words are case sensitive, and may be listed twice (starting with and without an upper case letter)), #your_word # [ http://[web host]/#@fnb/concordance.html#your_word ] + (After the page is fully loaded) you can jump directly to a word by appending a hash (#) and the word to the url for this text, (do not forget that words are case sensitive, and may be listed twice (starting with and without an upper case letter)), #your_word # [ http://[web host]/#{@fnb}/concordance.html#your_word ] </p> WOK end @@ -171,12 +175,13 @@ WOK class Words require "#{SiSU_lib}/defaults" include SiSU_Viz - require "#{SiSU_lib}/html_format_css" + require "#{SiSU_lib}/html_format" include SiSU_HTML_Format require "#{SiSU_lib}/sysenv" include SiSU_Screen @@dp=nil def initialize(particulars) + @particulars=particulars begin @vz=SiSU_Env::Get_init.instance.skin @env,@md,@dal_array=particulars.env,particulars.md,particulars.dal_array @@ -194,18 +199,19 @@ WOK @rxp_t3=Regexp.new('^T3') @rxp_excluded1=/(?:https?|file|ftp):\/\/\S+/ @rxp_excluded0=/^(?:#{Mx[:fa_bold_o]}|#{Mx[:fa_italics_o]})?(?:to\d+|\d+| |#{Mx[:br_endnotes]}|EOF|#{Mx[:br_eof]}|thumb_\S+|snap_\S+|_+|-+|[(]?(?:ii+|iv|vi+|ix|xi+|xiv|xv|xvi+|xix|xx)[).]?|\S+?_\S+|[\d_]+\w\S+|[\w\d]{1,2}|\d{1,3}\w?|#@dp|[0-9a-f]{16,64}|\d{2,3}x\d{2,3}|\S{0,2}sha\d|\S{0,3}\d{4}w\d\d|\b\w\d+|\d_all\b|e\.?g\.?)(?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})?$/mi #this regex causes and cures a stack dump in ruby 1.9 !!! - @rgx_scanlist=%r{#{Mx[:fa_italics_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_italics_c]}|#{Mx[:fa_bold_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_bold_c]}|(?:https?|file)://\S+|#{Mx[:gr_o]}code#{Mx[:gr_o]}.+?#{Mx[:gr_o]}code-end#{Mx[:gr_o]}|<\S+?>|#{Mx[:id_o]}\S+?#{Mx[:id_c]}|\w+|[a-zA-Z]+}mi + @rgx_splitlist=%r{[—.,;:-]|#{Mx[:id_o]}\S+?#{Mx[:id_c]}}mi + @rgx_scanlist=%r{#{Mx[:fa_italics_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_italics_c]}|#{Mx[:fa_bold_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_bold_c]}|(?:https?|file)://\S+|#{Mx[:gr_o]}code#{Mx[:gr_o]}.+?#{Mx[:gr_o]}code-end#{Mx[:gr_o]}|<\S+?>|\w+|[a-zA-Z]+}mi rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error end end def songsheet begin mkdir_p(@path) unless FileTest.directory?(@path) - @file_index_all=File.open("#@path/#{@md.fn[:concordance]}",'w') + @file_concordance=File.open("#@path/#{@md.fn[:concordance]}",'w') map_para rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error ensure - @file_index_all.close + @file_concordance.close end end protected @@ -218,13 +224,13 @@ WOK @sfx='.html' #used for hardlinks, previous setting @sfx='', web server takes care of suffix @word_location_seg=wordlocation.gsub(/(.+?)\#(\d+)/,"#{@md.fnl[:pre]}\\1#{@md.fnl[:mid]}#@sfx#{@md.fnl[:post]}#\\2") unless wordlocation.nil? case @wordlocation - when @rxp_t1 - %{[<a href="#{@md.fnl[:pre]}doc#{@md.fnl[:mid]}#@sfx#{@md.fnl[:post]}##@show">H</a>]#@show, } - when @rxp_t2 - %{[<a href="#{@md.fnl[:pre]}doc#{@md.fnl[:mid]}#@sfx#{@md.fnl[:post]}##@show">H</a>]#@show, } - when @rxp_t3 - %{[<a href="#{@md.fnl[:pre]}doc#{@md.fnl[:mid]}#@sfx#{@md.fnl[:post]}##@show">H</a>]#@show, } - else %{<a href="#@word_location_seg">#@show</a>, } + when @rxp_t1 + %{[<a href="#{@md.fnl[:pre]}doc#{@md.fnl[:mid]}#@sfx#{@md.fnl[:post]}##@show">H</a>]#@show, } + when @rxp_t2 + %{[<a href="#{@md.fnl[:pre]}doc#{@md.fnl[:mid]}#@sfx#{@md.fnl[:post]}##@show">H</a>]#@show, } + when @rxp_t3 + %{[<a href="#{@md.fnl[:pre]}doc#{@md.fnl[:mid]}#@sfx#{@md.fnl[:post]}##@show">H</a>]#@show, } + else %{<a href="#@word_location_seg">#@show</a>, } end end def map_para @@ -238,8 +244,10 @@ WOK end if toy =~/\d+/ \ and toy !~/^0$/ + line=line.split(@rgx_splitlist).join(' ') #%take in word or other match for word in line.scan(@rgx_scanlist) #%take in word or other match #word.gsub!(@rxp_clean,'') + word.gsub!(/#{Mx[:lnk_o]}|#{Mx[:lnk_c]}(?:http)?/,'') word.gsub!(/#{Mx[:fa_o]}\S+?#{Mx[:fa_o_c]}/,'') word.gsub!(/#{Mx[:fa_c_o]}\S+?#{Mx[:fa_c]}/,'') word.gsub!(/#{Mx[:gl_o]}#[a-z]+#{Mx[:gl_c]}/,'') @@ -274,7 +282,8 @@ WOK word.gsub!(/^\(?[a-zA-Z]\)$/,'') word.gsub!(/^\d+(st|nd|rd|th)$/,'') word.gsub!(/^(\d+\.?)+$/, '') - word.gsub(/#{Mx[:mk_o]}|#{Mx[:mk_c]}/,'') + word.gsub!(/#{Mx[:mk_o]}|#{Mx[:mk_c]}/,'') + word.gsub!(/:name#\S+/,'') word.gsub!(/^\S$/,'') word=nil if word =~/^\S$/ word=nil if word =~/^\s*$/ #watch @@ -306,20 +315,36 @@ WOK end scr='<font size="1" color="#777777" face=times new roman><img border="0" height="15" width="15" src="../_sisu/image/b_doc.png" alt="Full Text"> scroll: </font><font size="1" color="#222222" face=times new roman>doc# </font> ' seg='' - @file_index_all << SiSU_Concordance::Source::Doc_title.new('toc',@env,@md).create + @file_concordance << SiSU_Concordance::Source::Doc_title.new(@particulars).create + alph=%W[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] + @file_concordance << '<p>' + alph.each {|x| @file_concordance << %{<a href="##{x}">#{x}</a>, }} + @file_concordance << '</p>' + letter=alph.shift + @file_concordance << %{\n<hr />\n<p class="book_index_lev1"><a name="A">A</a></p>} for word in @freq.keys.sort! {|a,b| a.downcase<=>b.downcase} + f=/^(\S)/.match(word)[1] + if letter < f.upcase + while letter < f.upcase + if alph.length > 0 + letter=alph.shift + @file_concordance << %{\n<hr />\n<p class="book_index_lev1"><a name="#{letter}">#{letter}</a></p>} + else break + end + end + end keyword=SiSU_Concordance::Source::Word.new(word,@freq[word]).html if keyword !~ @rxp_excluded0 if @word_map[word][0] =~ /\d+/ wm=[] - @file_index_all << %{#{keyword}#{seg}#{@word_map[word].uniq.compact.join}} + @file_concordance << %{#{keyword}#{seg}#{@word_map[word].uniq.compact.join}} end - @file_index_all << '</p>' + @file_concordance << '</p>' end # special cases endnotes and header levels 1 - 3 end credits=@vz.credits_sisu - @file_index_all << "#{credits}</body>\n</html>" # footer + @file_concordance << %{</div><div class="content">#{credits}<div></body>\n</html>} # footer tell=SiSU_Screen::Ansi.new(@md.cmd,@md.fns,"#{@env.path.output_tell}/#{@md.fn[:concordance]}") tell.flow if @md.cmd =~/[MV]/ end |