diff options
Diffstat (limited to 'lib/sisu/v3dv/concordance.rb')
-rw-r--r-- | lib/sisu/v3dv/concordance.rb | 71 |
1 files changed, 36 insertions, 35 deletions
diff --git a/lib/sisu/v3dv/concordance.rb b/lib/sisu/v3dv/concordance.rb index 2e14ab14..e51514c3 100644 --- a/lib/sisu/v3dv/concordance.rb +++ b/lib/sisu/v3dv/concordance.rb @@ -245,54 +245,55 @@ WOK ocn=line.ocn.to_s if ocn =~/\d+/ \ and ocn !~/^0$/ - line.obj.gsub!(/#{@rxp_excluded1}/,' ') + line.obj=line.obj.gsub(/#{@rxp_excluded1}/,' ') line.obj=line.obj.split(@rgx_splitlist).join(' ') #%take in word or other match for word in line.obj.scan(@rgx_scanlist) #%take in word or other match if word =~ /^([#{@alphlst[:l]}])/ firstletter=$1 flu=firstletter.tr(@alphlst[:l],@alphlst[:u]) - word.gsub!(/^#{firstletter}/,flu ) + word=word.gsub(/^#{firstletter}/,flu ) end - word.gsub!(/#{Mx[:lnk_o]}|#{Mx[:lnk_c]}|#{Mx[:url_o]}|#{Mx[:url_c]}/,'') - word.gsub!(/#{Mx[:fa_o]}\S+?#{Mx[:fa_o_c]}/,'') - word.gsub!(/#{Mx[:fa_c_o]}\S+?#{Mx[:fa_c]}/,'') - word.gsub!(/#{Mx[:gl_o]}#[a-z]+#{Mx[:gl_c]}/,'') - word.gsub!(/#{Mx[:gl_o]}#[0-9]+#{Mx[:gl_c]}/,'') - word.gsub!(/[0-9a-f]{10,}/,' ') if word =~/[0-9]/ - word.gsub!(/#{Mx[:br_line]}/,' ') - word.gsub!(/^ +/,'') - word.gsub!(/^\S$/,'') + word=word.gsub(/#{Mx[:lnk_o]}|#{Mx[:lnk_c]}|#{Mx[:url_o]}|#{Mx[:url_c]}/,''). + gsub(/#{Mx[:fa_o]}\S+?#{Mx[:fa_o_c]}/,''). + gsub(/#{Mx[:fa_c_o]}\S+?#{Mx[:fa_c]}/,''). + gsub(/#{Mx[:gl_o]}#[a-z]+#{Mx[:gl_c]}/,''). + gsub(/#{Mx[:gl_o]}#[0-9]+#{Mx[:gl_c]}/,'') + word=word.gsub(/[0-9a-f]{10,}/,' ') if word =~/[0-9]/ + word=word.gsub(/#{Mx[:br_line]}/,' '). + gsub(/^ +/,''). + gsub(/^\S$/,'') word=nil if word.empty? word=nil if word =~@rxp_excluded0 #watch word=nil if word =~/^\S$/ if word - word.gsub!(/#{Mx[:br_nl]}|#{Mx[:br_line]}/,' ') - word.gsub!(/#{Mx[:fa_o]}[a-z]{1,7}#{Mx[:fa_o_c]}|#{Mx[:fa_c_o]}[a-z]{1,7}#{Mx[:fa_c]}/,'') - word.gsub!(/#{Mx[:en_a_o]}(?:\d|[*+])*|#{Mx[:en_b_o]}(?:\d|[*+])*|#{Mx[:en_a_c]}|#{Mx[:en_b_c]}/mi,'') - word.gsub!(/#{Mx[:fa_o]}\S+?#{Mx[:fa_o_c]}/,''); word.gsub!(/#{Mx[:fa_c_o]}\S+?#{Mx[:fa_c]}/,'') - word.gsub!(/<\/?\S+?>/,'') - word.gsub!(/^\@+/,'') - word.strip! - word.gsub!(/#{Mx[:tc_p]}.+/,'') - word.gsub!(/[\.,;:"]$/,'') - word.gsub!(/["]/,'') - word.gsub!(/^\s*[\(]/,'') - word.gsub!(/[\(]\s*$/,'') - word.gsub!(/^(?:See|e\.?g\.?).+/,'') - word.gsub!(/^\s*[.,;:]\s*/,'') - word.strip! - word.gsub!(/^\(?[a-zA-Z]\)$/,'') - word.gsub!(/^\d+(st|nd|rd|th)$/,'') - word.gsub!(/^(\d+\.?)+$/, '') - word.gsub!(/#{Mx[:mk_o]}|#{Mx[:mk_c]}/,'') - word.gsub!(/:name#\S+/,'') - word.gsub!(/^\S$/,'') + word=word.gsub(/#{Mx[:br_nl]}|#{Mx[:br_line]}/,' '). + gsub(/#{Mx[:fa_o]}[a-z]{1,7}#{Mx[:fa_o_c]}|#{Mx[:fa_c_o]}[a-z]{1,7}#{Mx[:fa_c]}/,''). + gsub(/#{Mx[:en_a_o]}(?:\d|[*+])*|#{Mx[:en_b_o]}(?:\d|[*+])*|#{Mx[:en_a_c]}|#{Mx[:en_b_c]}/mi,''). + gsub(/#{Mx[:fa_o]}\S+?#{Mx[:fa_o_c]}/,''). + gsub(/#{Mx[:fa_c_o]}\S+?#{Mx[:fa_c]}/,''). + gsub(/<\/?\S+?>/,''). + gsub(/^\@+/,''). + strip. + gsub(/#{Mx[:tc_p]}.+/,''). + gsub(/[\.,;:"]$/,''). + gsub(/["]/,''). + gsub(/^\s*[\(]/,''). + gsub(/[\(]\s*$/,''). + gsub(/^(?:See|e\.?g\.?).+/,''). + gsub(/^\s*[.,;:]\s*/,''). + strip. + gsub(/^\(?[a-zA-Z]\)$/,''). + gsub(/^\d+(st|nd|rd|th)$/,''). + gsub(/^(\d+\.?)+$/, ''). + gsub(/#{Mx[:mk_o]}|#{Mx[:mk_c]}/,''). + gsub(/:name#\S+/,''). + gsub(/^\S$/,'') word=nil if word =~/^\S$/ word=nil if word =~/^\s*$/ #watch if word unless word =~/[A-Z][A-Z]/ \ or word =~/\w+\s\w+/ - word.capitalize! + word=word.capitalize end @freq[word] +=1 @word_map[word] ||= [] @@ -316,8 +317,8 @@ WOK scr='<font size="1" color="#777777" face=times new roman><img border="0" height="15" width="15" src="../_sisu/image/b_doc.png" alt="Full Text"> scroll: </font><font size="1" color="#222222" face=times new roman>doc# </font> ' seg='' head=SiSU_Concordance::Source::DocTitle.new(@particulars).create - head.gsub!(/#{Xx[:html_relative2]}/m,@file.path_rel_links.html_seg_2) - head.gsub!(/#{Xx[:html_relative1]}/m,@file.path_rel_links.html_seg_1) + head=head.gsub(/#{Xx[:html_relative2]}/m,@file.path_rel_links.html_seg_2). + gsub(/#{Xx[:html_relative1]}/m,@file.path_rel_links.html_seg_1) @file_concordance << head @file_concordance << '<p>' alph=@alph[:u] |