aboutsummaryrefslogtreecommitdiffhomepage
path: root/lib
diff options
context:
space:
mode:
authorRalph Amissah <ralph@amissah.com>2014-10-19 21:13:52 -0400
committerRalph Amissah <ralph@amissah.com>2014-10-19 21:13:52 -0400
commit2c73f3060f9678f751c236fe17863d443f6a650f (patch)
tree80592d406e45eb6626f6cfdc79dbe65716cb70fc /lib
parentv5 v6: html_format, "id"s for objects & footnotes (diff)
v5 v6: db, text search & display field, footnotes moved to end of text object
* cleaner, more useful search results * cleaner text search field * separate footnote fields redundant for search purposes
Diffstat (limited to 'lib')
-rw-r--r--lib/sisu/v5/db_import.rb34
-rw-r--r--lib/sisu/v5/db_sqltxt.rb28
-rw-r--r--lib/sisu/v6/db_import.rb34
-rw-r--r--lib/sisu/v6/db_sqltxt.rb28
4 files changed, 78 insertions, 46 deletions
diff --git a/lib/sisu/v5/db_import.rb b/lib/sisu/v5/db_import.rb
index 59cff28a..72fb3753 100644
--- a/lib/sisu/v5/db_import.rb
+++ b/lib/sisu/v5/db_import.rb
@@ -334,17 +334,17 @@ module SiSU_DbImport
@en,@en_ast,@en_pls,@tuple_array=[],[],[],[]
@col[:en_a],@col[:en_z]=nil,nil
ao_array.each do |data|
- data.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:gl_o]}(●)#{Mx[:gl_c]}\s*/,'\1 ')
- data.obj.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check
+ data.obj=data.obj.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1').
+ gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1').
+ gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1').
+ gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1').
+ gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1').
+ gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1').
+ gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1').
+ gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1').
+ gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1').
+ gsub(/#{Mx[:gl_o]}(●)#{Mx[:gl_c]}\s*/,'\1 ').
+ gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check
@col[:seg]=@@seg
if data.of ==:para \
|| data.of ==:heading \
@@ -374,7 +374,7 @@ module SiSU_DbImport
@col[:lid]+=1
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_minus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -425,7 +425,7 @@ module SiSU_DbImport
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -470,7 +470,7 @@ module SiSU_DbImport
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -501,7 +501,7 @@ module SiSU_DbImport
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -532,7 +532,7 @@ module SiSU_DbImport
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -603,7 +603,7 @@ module SiSU_DbImport
else
SiSU_FormatShared::CSS_Format.new(@md,data).norm
end
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
diff --git a/lib/sisu/v5/db_sqltxt.rb b/lib/sisu/v5/db_sqltxt.rb
index 6585fd66..3f6cf951 100644
--- a/lib/sisu/v5/db_sqltxt.rb
+++ b/lib/sisu/v5/db_sqltxt.rb
@@ -60,7 +60,7 @@
module SiSU_DbText
class Prepare
def special_character_escape(str)
- str=str.gsub(/'/,"''"). #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'")
+ str=str.gsub(/'/m,"''"). #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'")
gsub(/(\\)/m,'\1\1'). #ok but with warnings, double backslash on sqlite #str.gsub!(/[\\]/m,'\\x5C') #ok but with warnings, but not for sqlite #str.gsub!(/(\\)/m,'\1') #ok for sqlite not for pgsql
gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/m,"<br>\n").
gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/m,''). #check
@@ -80,13 +80,29 @@ module SiSU_DbText
gsub(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m,'').
gsub(/ \s+/m,' ')
#p s if s =~/[^ \nA-Za-z0-9'"`?!#@$%^&*=+,.;:\[\]()<>{}‹›|\\\/~_-]/
- s
+ txt_arr << s
end
- txt_arr << arr << en
- #txt_arr=txt_arr.flatten
+ txt_arr=txt_arr << en
txt=txt_arr.flatten.join("\n")
- txt=special_character_escape(txt)
- txt
+ special_character_escape(txt)
+ end
+ def clean_document_objects_body(arr)
+ txt_arr,en,en_arr=[],[],[]
+ arr=(arr.is_a?(String)) ? [ arr ] : arr
+ arr.each do |s|
+ en << s.scan(/#{Mx[:en_a_o]}\s*(.+?)\s*#{Mx[:en_a_c]}/m)
+ s=s.gsub(/#{Mx[:en_a_o]}\s*(\d+).+?#{Mx[:en_a_c]}/m,'<sup>\1</sup>').
+ gsub(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m,'').
+ gsub(/ \s+/m,' ')
+ txt_arr << s
+ end
+ en.flatten.each do |e|
+ e=e.sub(/^(\d+)\s*/,'<sup>\1</sup> ')
+ en_arr << e
+ end
+ txt_arr=txt_arr << en_arr
+ txt=txt_arr.flatten.join("\n<br>")
+ special_character_escape(txt)
end
def clean_searchable_text_from_document_source(arr)
txt_arr,en=[],[]
diff --git a/lib/sisu/v6/db_import.rb b/lib/sisu/v6/db_import.rb
index 9473863d..5e159451 100644
--- a/lib/sisu/v6/db_import.rb
+++ b/lib/sisu/v6/db_import.rb
@@ -334,17 +334,17 @@ module SiSU_DbImport
@en,@en_ast,@en_pls,@tuple_array=[],[],[],[]
@col[:en_a],@col[:en_z]=nil,nil
ao_array.each do |data|
- data.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:gl_o]}(●)#{Mx[:gl_c]}\s*/,'\1 ')
- data.obj.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check
+ data.obj=data.obj.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1').
+ gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1').
+ gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1').
+ gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1').
+ gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1').
+ gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1').
+ gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1').
+ gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1').
+ gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1').
+ gsub(/#{Mx[:gl_o]}(●)#{Mx[:gl_c]}\s*/,'\1 ').
+ gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check
@col[:seg]=@@seg
if data.of ==:para \
|| data.of ==:heading \
@@ -374,7 +374,7 @@ module SiSU_DbImport
@col[:lid]+=1
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_minus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -425,7 +425,7 @@ module SiSU_DbImport
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -470,7 +470,7 @@ module SiSU_DbImport
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -501,7 +501,7 @@ module SiSU_DbImport
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -532,7 +532,7 @@ module SiSU_DbImport
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -603,7 +603,7 @@ module SiSU_DbImport
else
SiSU_FormatShared::CSS_Format.new(@md,data).norm
end
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
diff --git a/lib/sisu/v6/db_sqltxt.rb b/lib/sisu/v6/db_sqltxt.rb
index 2fd39fb7..2375d5ca 100644
--- a/lib/sisu/v6/db_sqltxt.rb
+++ b/lib/sisu/v6/db_sqltxt.rb
@@ -60,7 +60,7 @@
module SiSU_DbText
class Prepare
def special_character_escape(str)
- str=str.gsub(/'/,"''"). #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'")
+ str=str.gsub(/'/m,"''"). #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'")
gsub(/(\\)/m,'\1\1'). #ok but with warnings, double backslash on sqlite #str.gsub!(/[\\]/m,'\\x5C') #ok but with warnings, but not for sqlite #str.gsub!(/(\\)/m,'\1') #ok for sqlite not for pgsql
gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/m,"<br>\n").
gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/m,''). #check
@@ -80,13 +80,29 @@ module SiSU_DbText
gsub(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m,'').
gsub(/ \s+/m,' ')
#p s if s =~/[^ \nA-Za-z0-9'"`?!#@$%^&*=+,.;:\[\]()<>{}‹›|\\\/~_-]/
- s
+ txt_arr << s
end
- txt_arr << arr << en
- #txt_arr=txt_arr.flatten
+ txt_arr=txt_arr << en
txt=txt_arr.flatten.join("\n")
- txt=special_character_escape(txt)
- txt
+ special_character_escape(txt)
+ end
+ def clean_document_objects_body(arr)
+ txt_arr,en,en_arr=[],[],[]
+ arr=(arr.is_a?(String)) ? [ arr ] : arr
+ arr.each do |s|
+ en << s.scan(/#{Mx[:en_a_o]}\s*(.+?)\s*#{Mx[:en_a_c]}/m)
+ s=s.gsub(/#{Mx[:en_a_o]}\s*(\d+).+?#{Mx[:en_a_c]}/m,'<sup>\1</sup>').
+ gsub(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m,'').
+ gsub(/ \s+/m,' ')
+ txt_arr << s
+ end
+ en.flatten.each do |e|
+ e=e.sub(/^(\d+)\s*/,'<sup>\1</sup> ')
+ en_arr << e
+ end
+ txt_arr=txt_arr << en_arr
+ txt=txt_arr.flatten.join("\n<br>")
+ special_character_escape(txt)
end
def clean_searchable_text_from_document_source(arr)
txt_arr,en=[],[]