diff options
author | Ralph Amissah <ralph@amissah.com> | 2008-09-16 00:36:14 -0400 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2008-09-16 00:36:14 -0400 |
commit | bc9228c42269bfb4d451ca2d2d92a6a12afb094f (patch) | |
tree | 42b666a81ef350d0ff6153a49d159b3a1c348959 /lib/sisu/v0/db_import.rb | |
parent | Updated sisu-0.68.0 (diff) | |
parent | fixes: alphabet list (concordance, dal_idx), and file types (dal_expand_inser... (diff) |
Merge branch 'upstream' into debian/sid
Diffstat (limited to 'lib/sisu/v0/db_import.rb')
-rw-r--r-- | lib/sisu/v0/db_import.rb | 38 |
1 files changed, 24 insertions, 14 deletions
diff --git a/lib/sisu/v0/db_import.rb b/lib/sisu/v0/db_import.rb index 1e788f8e..f02ccd3f 100644 --- a/lib/sisu/v0/db_import.rb +++ b/lib/sisu/v0/db_import.rb @@ -141,19 +141,19 @@ module SiSU_DB_import string.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"<br />\n") string.gsub!(/#{Mx[:gr_o]}(?:code|alt|group|verse)(?:-end)?#{Mx[:gr_c]}/,'') string.gsub!(/#{Mx[:mk_o]}:name#\S+?#{Mx[:mk_c]}/,'') - string.gsub!(/\{\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)\}\S+/,'[image: \1] \2') - string.gsub!(/\{\s*(.+?)\s*\}(?:https?|file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2') + string.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2') + string.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:https?|file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2') end def strip_markup(string) #define rules, make same as in dal clean string.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]') string.gsub!(/#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/,'') - string.gsub!(/(?: \\;)+/,' ') + string.gsub!(/(?: \\;|#{Mx[:nbsp]})+/,' ') string.gsub!(/#{Mx[:gr_o]}T[h]?#{Mx[:tc_p]}.+?#{Mx[:gr_c]}/u,"[TABLE]\n") #tables #CHECK should take whole table string.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1') #tables string.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ') #tables string.gsub!(/#{Mx[:tc_p]}/u,' ') #tables tidy later string.gsub!(/<.+?>/,'') - string.gsub!(/\{.+?\.(?:png|jpg|gif).+?\}(?:https?|file|ftp)\\\:\S+ /,' [image] ') # else image names found in search + string.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:https?|file|ftp)\\\:\S+ /,' [image] ') # else image names found in search string.gsub!(/\s\s+/,' ') string.strip! end @@ -402,7 +402,8 @@ module SiSU_DB_import end txt=endnotes(txt).clean_text end - @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).lev4_minus + txt_obj={:txt =>txt,:col =>@col} + @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,txt_obj).lev4_minus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup strip_markup(@col[:plaintext]) @@ -443,7 +444,8 @@ module SiSU_DB_import end txt=endnotes(txt).clean_text(@base_url) end - @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).lev4_plus + txt_obj={:txt =>txt,:col =>@col} + @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,txt_obj).lev4_plus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup strip_markup(@col[:plaintext]) @@ -482,7 +484,8 @@ module SiSU_DB_import end txt=endnotes(txt).clean_text(@base_url) end - @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).lev4_plus + txt_obj={:txt =>txt,:col =>@col} + @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,txt_obj).lev4_plus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup strip_markup(@col[:plaintext]) @@ -521,7 +524,8 @@ module SiSU_DB_import end txt=endnotes(txt).clean_text(@base_url) end - @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).lev4_plus + txt_obj={:txt =>txt,:col =>@col} + @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,txt_obj).lev4_plus special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup strip_markup(@col[:plaintext]) @@ -570,11 +574,14 @@ module SiSU_DB_import if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last end @col[:body]=if txt=~/#{Mx[:gr_o]}T[h]?#{Mx[:tc_p]}.+?#{Mx[:tc_p]}~\d+;\w\d+;\w\d+#{Mx[:gr_c]}/ #watch - SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).html_table + txt_obj={:txt =>txt,:col =>@col} + SiSU_Format_Shared::CSS_Format.new(@md,txt_obj).html_table elsif txt=~/^#{Mx[:pa_o]}:i([1-9])#{Mx[:pa_c]}/ - SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).indent($1) + txt_obj={:txt =>txt,:col =>@col} + SiSU_Format_Shared::CSS_Format.new(@md,txt_obj).indent($1) else - SiSU_Format_Shared::CSS_Format.new(@md,txt,@col).norm + txt_obj={:txt =>txt,:col =>@col} + SiSU_Format_Shared::CSS_Format.new(@md,txt_obj).norm end special_character_escape(@col[:body]) @col[:plaintext]=@col[:body].dup @@ -593,7 +600,8 @@ module SiSU_DB_import nr,txt,digest_clean=$1,$2,$3 end @id_n+=1 - body=SiSU_Format_Shared::CSS_Format.new(@md,txt,@col,nr).endnote + txt_obj={:txt =>txt,:col =>@col,:endnote_nr =>nr} + body=SiSU_Format_Shared::CSS_Format.new(@md,txt_obj).endnote special_character_escape(body) special_character_escape(txt) strip_markup(txt) @@ -633,7 +641,8 @@ module SiSU_DB_import nr,txt,digest_clean=$1,$2,$3 end @id_n+=1 - body=SiSU_Format_Shared::CSS_Format.new(@md,txt,@col,nr).endnote + txt_obj={:txt =>txt,:col =>@col,:endnote_nr =>nr} + body=SiSU_Format_Shared::CSS_Format.new(@md,txt_obj).endnote special_character_escape(body) special_character_escape(txt) strip_markup(txt) @@ -674,7 +683,8 @@ module SiSU_DB_import nr,txt,digest_clean=$1,$2,$3 end @id_n+=1 - body=SiSU_Format_Shared::CSS_Format.new(@md,txt,@col,nr).endnote + txt_obj={:txt =>txt,:col =>@col,:endnote_nr =>nr} + body=SiSU_Format_Shared::CSS_Format.new(@md,txt_obj).endnote special_character_escape(body) special_character_escape(txt) strip_markup(txt) |