From 3993be7dfe7097abfb00ff2b685ca0dc47acfe09 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Mon, 16 Jul 2007 12:46:18 +0100 Subject: refactoring, tidy --- lib/sisu/v0/db_import.rb | 180 +++++++++++++++++++++++------------------------ 1 file changed, 87 insertions(+), 93 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v0/db_import.rb b/lib/sisu/v0/db_import.rb index b9528063..91360613 100644 --- a/lib/sisu/v0/db_import.rb +++ b/lib/sisu/v0/db_import.rb @@ -364,23 +364,15 @@ module SiSU_DB_import if data[/^([123])~\s+(.+?)<~(\d+);((?:\w|[0-6]:)\d+);(\w\d+)><([0-9a-f]{#{@@dl}}):([0-9a-f]{#{@@dl}})>/] @col[:lev],txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:digest_clean],@col[:digest_all]=$1,$2,$3,$4,$5,$6,$7 @col[:lid]+=1 - if txt =~/~\{.+?\}~/ - word_mode=txt.scan(/\S+/) - endnote_range(word_mode) - @en << txt.scan(/~\{(\d+).+?\}~/) - txt.gsub!(/~\{(\d+).+?\}~/,'\1') - end - if txt =~/~\[\*.+?\]~/ - word_mode=txt.scan(/\S+/) - endnote_range(word_mode) - @en_ast << txt.scan(/~\[[*](\d+).+?\]~/) - txt.gsub!(/~\[([*]\d+).+?\]~/,'\1') - end - if txt =~/~\[\+.+?\]~/ - word_mode=txt.scan(/\S+/) - endnote_range(word_mode) - @en_pls << txt.scan(/~\[[+](\d+).+?\]~/) - txt.gsub!(/~\[([+]\d+).+?\]~/,'\1') + if txt =~/~[{\[][*+]?(\d+)\s+.+?[}\]]~/ + endnotes(txt).range + if txt =~/~\{.+?\}~/; @en << endnotes(txt).standard + end + if txt =~/~\[\*.+?\]~/; @en_ast << endnotes(txt).asterisk + end + if txt =~/~\[\+.+?\]~/; @en_pls << endnotes(txt).plus + end + txt=endnotes(txt).clean_text end @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).lev4_minus special_character_escape(@col[:body]) @@ -412,23 +404,15 @@ module SiSU_DB_import end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#@hname.html" - if txt =~ /~\{.+?\}~/ - word_mode=txt.scan(/\S+/) - endnote_range(word_mode) - @en << txt.scan(/~\{(\d+).+?\}~/) - txt.gsub!(/~\{(\d+).+?\}~/,%{\\1}) - end - if txt =~/~\[\*.+?\]~/ - word_mode=txt.scan(/\S+/) - endnote_range(word_mode) - @en_ast << txt.scan(/~\[[*](\d+).+?\]~/) - txt.gsub!(/~\[([*]\d+).+?\]~/,%{\\1}) - end - if txt =~/~\[\+.+?\]~/ - word_mode=txt.scan(/\S+/) - endnote_range(word_mode) - @en_pls << txt.scan(/~\[[+](\d+).+?\]~/) - txt.gsub!(/~\[([+]\d+).+?\]~/,%{\\1}) + if txt =~/~[{\[][*+]?(\d+)\s+.+?[}\]]~/ + endnotes(txt).range + if txt =~ /~\{.+?\}~/; @en << endnotes(txt).standard + end + if txt =~/~\[\*.+?\]~/; @en_ast << endnotes(txt).asterisk + end + if txt =~/~\[\+.+?\]~/; @en_pls << endnotes(txt).plus + end + txt=endnotes(txt).clean_text(@base_url) end @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).lev4_plus special_character_escape(@col[:body]) @@ -458,23 +442,15 @@ module SiSU_DB_import end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#@hname.html" - if txt =~ /~\{.+?\}~/ - word_mode=txt.scan(/\S+/) - endnote_range(word_mode) - @en << txt.scan(/~\{(\d+).+?\}~/) - txt.gsub!(/~\{(\d+).+?\}~/,%{\\1}) - end - if txt =~/~\[\*.+?\]~/ - word_mode=txt.scan(/\S+/) - endnote_range(word_mode) - @en_ast << txt.scan(/~\[[*](\d+).+?\]~/) - txt.gsub!(/~\[([*]\d+).+?\]~/,%{\\1}) - end - if txt =~/~\[\+.+?\]~/ - word_mode=txt.scan(/\S+/) - endnote_range(word_mode) - @en_pls << txt.scan(/~\[[+](\d+).+?\]~/) - txt.gsub!(/~\[([+]\d+).+?\]~/,%{\\1}) + if txt =~/~[{\[][*+]?(\d+)\s+.+?[}\]]~/ + endnotes(txt).range + if txt =~ /~\{.+?\}~/; @en << endnotes(txt).standard + end + if txt =~/~\[\*.+?\]~/; @en_ast << endnotes(txt).asterisk + end + if txt =~/~\[\+.+?\]~/; @en_pls << endnotes(txt).plus + end + txt=endnotes(txt).clean_text(@base_url) end @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).lev4_plus special_character_escape(@col[:body]) @@ -504,23 +480,15 @@ module SiSU_DB_import end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#@hname.html" - if txt =~ /~\{.+?\}~/ - word_mode=txt.scan(/\S+/) - endnote_range(word_mode) - @en << txt.scan(/~\{(\d+).+?\}~/) - txt.gsub!(/~\{(\d+).+?\}~/,%{\\1}) - end - if txt =~/~\[\*.+?\]~/ - word_mode=txt.scan(/\S+/) - endnote_range(word_mode) - @en_ast << txt.scan(/~\[[*](\d+).+?\]~/) - txt.gsub!(/~\[([*]\d+).+?\]~/,%{\\1}) - end - if txt =~/~\[\+.+?\]~/ - word_mode=txt.scan(/\S+/) - endnote_range(word_mode) - @en_pls << txt.scan(/~\[[+](\d+).+?\]~/) - txt.gsub!(/~\[([+]\d+).+?\]~/,%{\\1}) + if txt =~/~[{\[][*+]?(\d+)\s+.+?[}\]]~/ + endnotes(txt).range + if txt =~ /~\{.+?\}~/; @en << endnotes(txt).standard + end + if txt =~/~\[\*.+?\]~/; @en_ast << endnotes(txt).asterisk + end + if txt =~/~\[\+.+?\]~/; @en_pls << endnotes(txt).plus + end + txt=endnotes(txt).clean_text(@base_url) end @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).lev4_plus special_character_escape(@col[:body]) @@ -545,24 +513,15 @@ module SiSU_DB_import end @env=SiSU_Env::Info_env.new(@md.fns) @base_url="#{@env.url.root}/#{@md.fnb}/#@hname.html" - if txt =~ /~\{.+?\}~/ - word_mode=txt.scan(/\S+/) - endnote_range(word_mode) - @en << txt.scan(/~\{(\d+).+?\}~/) - txt.gsub!(/~\{(\d+).+?\}~/,%{\\1}) - #txt.gsub!(/~\{(\d+).+?\}~/,'^[\1]') # remove endnote, keep endnote reference number, display as, e.g. [^1] - end - if txt =~/~\[\*.+?\]~/ - word_mode=txt.scan(/\S+/) - endnote_range(word_mode) - @en_ast << txt.scan(/~\[[*](\d+).+?\]~/) - txt.gsub!(/~\[([*]\d+).+?\]~/,%{\\1}) - end - if txt =~/~\[\+.+?\]~/ - word_mode=txt.scan(/\S+/) - endnote_range(word_mode) - @en_pls << txt.scan(/~\[[+](\d+).+?\]~/) - txt.gsub!(/~\[([+]\d+).+?\]~/,%{\\1}) + if txt =~/~[{\[][*+]?(\d+)\s+.+?[}\]]~/ + endnotes(txt).range + if txt =~ /~\{.+?\}~/; @en << endnotes(txt).standard + end + if txt =~/~\[\*.+?\]~/; @en_ast << endnotes(txt).asterisk + end + if txt =~/~\[\+.+?\]~/; @en_pls << endnotes(txt).plus + end + txt=endnotes(txt).clean_text(@base_url) end if @sql_type=~/pg/ and txt.size > (document_clean - 1) #% examine pg build & remove limitation puts "\n\nTOO LARGE (TXT - see error log)\n\n" @@ -688,15 +647,50 @@ module SiSU_DB_import ensure end end - def endnote_range(word_array) - @col[:en_a]=@col[:en_z]=nil - word_array.each do |w| - if w[/~[{\[][*+]?(\d+)\s+.+?[}\]]~/] # not tested since change 2003w31 - @col[:en_a]=$1 unless @col[:en_a] - @col[:en_z]=@col[:en_a].dup unless @col[:en_a] - @col[:en_z]=$1 if @col[:en_a] + def endnotes(txt) + @txt=txt + def standard + x=if @txt =~ /~\{.+?\}~/; @txt.scan(/~\{(\d+).+?\}~/) + else nil + end + end + def asterisk + x=if @txt =~/~\[\*.+?\]~/; @txt.scan(/~\[[*](\d+).+?\]~/) + else nil + end + end + def plus + x=if @txt =~/~\[\+.+?\]~/; @txt.scan(/~\[[+](\d+).+?\]~/) + else nil + end + end + def clean_text(base_url=nil) + if base_url + @txt.gsub!(/~\{(\d+).+?\}~/,%{\\1}) + @txt.gsub!(/~\[([*]\d+).+?\]~/,%{\\1}) + @txt.gsub!(/~\[([+]\d+).+?\]~/,%{\\1}) + else + @txt.gsub!(/~\{(\d+).+?\}~/,'\1') + @txt.gsub!(/~\[([*]\d+).+?\]~/,'\1') + @txt.gsub!(/~\[([+]\d+).+?\]~/,'\1') + end + @txt + end + def range + @col[:en_a]=@col[:en_z]=nil + if @txt =~ /~\{.+?\}~|~\[([*]\d+).+?\]~|~\[([+]\d+).+?\]~/ + word_array=@txt.scan(/\S+/) + word_array.each do |w| + if w[/~[{\[][*+]?(\d+)\s+.+?[}\]]~/] # not tested since change 2003w31 + @col[:en_a]=$1 unless @col[:en_a] + @col[:en_z]=@col[:en_a].dup unless @col[:en_a] + @col[:en_z]=$1 if @col[:en_a] + end + end end + @col end + self end def import_db_urls(dbi_unit,meta) #% import documents OID - populate database begin -- cgit v1.2.3