aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2011-08-18 23:16:25 -0400
committerRalph Amissah <ralph@amissah.com>2011-08-18 23:16:27 -0400
commit6bd77e1c0f6c1d1818c5673cb5d4e1eb1c3af12d (patch)
treeb731ce1e66b5c2bbe59477c42ba13754764615d7
parentv3: epub, param, (defaults) epub tuning (diff)
v3: concordance, i18n, start adding alternative language alphabets
* Danish, Norwegian, Swedish
-rw-r--r--data/doc/sisu/CHANGELOG_v33
-rw-r--r--lib/sisu/v3/concordance.rb13
-rw-r--r--lib/sisu/v3/i18n.rb34
3 files changed, 48 insertions, 2 deletions
diff --git a/data/doc/sisu/CHANGELOG_v3 b/data/doc/sisu/CHANGELOG_v3
index 1298d880..015ca6e6 100644
--- a/data/doc/sisu/CHANGELOG_v3
+++ b/data/doc/sisu/CHANGELOG_v3
@@ -26,6 +26,9 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_3.0.16.orig.tar.xz
* sync ncx with sisu internal structural logic
(correct depth, include level A, place sisu toc)
+ * concordance, i18n, start adding alternative language alphabets
+ (Danish, Norwegian, Swedish)
+
%% 3.0.15.orig.tar.gz (2011-08-08:32/1)
http://git.sisudoc.org/?p=code/sisu.git;a=log;h=refs/tags/debian/sisu_3.0.15-1
http://www.jus.uio.no/sisu/pkg/src/sisu_3.0.15.orig.tar.xz
diff --git a/lib/sisu/v3/concordance.rb b/lib/sisu/v3/concordance.rb
index 2c1c9ddf..f8118cd0 100644
--- a/lib/sisu/v3/concordance.rb
+++ b/lib/sisu/v3/concordance.rb
@@ -173,6 +173,8 @@ WOK
end
end
class Words
+ require_relative 'i18n' # i18n.rb
+ include SiSU_i18n
require_relative 'defaults' # defaults.rb
include SiSU_Viz
require_relative 'html_format' # html_format.rb
@@ -198,7 +200,9 @@ WOK
@rxp_excluded1=/#{Mx[:url_o]}\S+?#{Mx[:url_c]}|#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}/
@rxp_excluded0=/^(?:#{Mx[:fa_bold_o]}|#{Mx[:fa_italics_o]})?(?:to\d+|\d+|&nbsp;|#{Mx[:br_endnotes]}|EOF|#{Mx[:br_eof]}|thumb_\S+|snap_\S+|_+|-+|[(]?(?:ii+|iv|vi+|ix|xi+|xiv|xv|xvi+|xix|xx)[).]?|\S+?_\S+|[\d_]+\w\S+|[\w\d]{1,2}|\d{1,3}\w?|#{@dp}|[0-9a-f]{16,64}|\d{2,3}x\d{2,3}|\S{0,2}sha\d|\S{0,3}\d{4}w\d\d|\b\w\d+|\d_all\b|e\.?g\.?)(?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})?$/mi #this regex causes and cures a stack dump in ruby 1.9 !!!
@rgx_splitlist=%r{[—.,;:#{Mx[:nbsp]}-]}mi
- @rgx_scanlist=%r{#{Mx[:fa_italics_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_italics_c]}|#{Mx[:fa_bold_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_bold_c]}|(?:https?|file)://\S+|<\S+?>|\w+|[a-zA-Z]+}mi
+ @alph=SiSU_i18n::Alphabet.new(@md.opt.lng).hash_arrays
+ @alphlst=SiSU_i18n::Alphabet.new(@md.opt.lng).hash_strings
+ @rgx_scanlist=%r{#{Mx[:fa_italics_o]}[#{@alphlst[:l]}#{@alphlst[:u]}0-9"\s]{2,12}#{Mx[:fa_italics_c]}|#{Mx[:fa_bold_o]}[#{@alphlst[:l]}#{@alphlst[:u]}0-9"\s]{2,12}#{Mx[:fa_bold_c]}|(?:https?|file)://\S+|<\S+?>|[#{@alphlst[:l]}#{@alphlst[:u]}]+|\w+}mi
rescue; SiSU_Errors::Info_error.new($!,$@,@md.opt.cmd,@md.fns).error
end
end
@@ -244,6 +248,11 @@ WOK
line.obj.gsub!(/#{@rxp_excluded1}/,' ')
line.obj=line.obj.split(@rgx_splitlist).join(' ') #%take in word or other match
for word in line.obj.scan(@rgx_scanlist) #%take in word or other match
+ if word =~ /^([#{@alphlst[:l]}])/
+ firstletter=$1
+ flu=firstletter.tr(@alphlst[:l],@alphlst[:u])
+ word.gsub!(/^#{firstletter}/,flu )
+ end
word.gsub!(/#{Mx[:lnk_o]}|#{Mx[:lnk_c]}|#{Mx[:url_o]}|#{Mx[:url_c]}/,'')
word.gsub!(/#{Mx[:fa_o]}\S+?#{Mx[:fa_o_c]}/,'')
word.gsub!(/#{Mx[:fa_c_o]}\S+?#{Mx[:fa_c]}/,'')
@@ -310,8 +319,8 @@ WOK
head.gsub!(/#{Xx[:html_relative2]}/m,@file.path_rel_links.html_seg_2)
head.gsub!(/#{Xx[:html_relative1]}/m,@file.path_rel_links.html_seg_1)
@file_concordance << head
- alph=%W[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z]
@file_concordance << '<p>'
+ alph=@alph[:u]
alph.each {|x| @file_concordance << %{<a href="##{x}">#{x}</a>,&nbsp;}}
@file_concordance << '</p>'
letter=alph.shift
diff --git a/lib/sisu/v3/i18n.rb b/lib/sisu/v3/i18n.rb
index 0666cede..00fe7101 100644
--- a/lib/sisu/v3/i18n.rb
+++ b/lib/sisu/v3/i18n.rb
@@ -189,6 +189,40 @@ module SiSU_i18n
self
end
end
+ class Alphabet
+ def initialize(lng_code)
+ @lng_code=lng_code
+ end
+ def hash_arrays
+ @alph=case @lng_code
+ when /en/ #english
+ {
+ u: %w[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z],
+ l: %w[a b c d e f g h i j k l m n o p q r s t u v w x y z]
+ }
+ when /da|no|nn/ #danish, norwegian
+ {
+ u: %w[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z Å Æ Ø],
+ l: %w[a b c d e f g h i j k l m n o p q r s t u v w x y z å æ ø]
+ #u: %W[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z Æ Ø Å],
+ #l: %w[a b c d e f g h i j k l m n o p q r s t u v w x y z æ ø å]
+ }
+ when /sv/ #swedish
+ {
+ u: %w[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z Å Ä Ö],
+ l: %w[a b c d e f g h i j k l m n o p q r s t u v w x y z å ä ö]
+ }
+ else #english default
+ {
+ u: %w[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z],
+ l: %w[a b c d e f g h i j k l m n o p q r s t u v w x y z]
+ }
+ end
+ end
+ def hash_strings
+ { u: hash_arrays[:u].join, l: hash_arrays[:l].join }
+ end
+ end
end
__END__
Language Lists