aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--data/doc/sisu/CHANGELOG_v55
-rw-r--r--data/doc/sisu/CHANGELOG_v65
-rw-r--r--lib/sisu/v5/db_import.rb34
-rw-r--r--lib/sisu/v5/db_sqltxt.rb28
-rw-r--r--lib/sisu/v6/db_import.rb34
-rw-r--r--lib/sisu/v6/db_sqltxt.rb28
6 files changed, 88 insertions, 46 deletions
diff --git a/data/doc/sisu/CHANGELOG_v5 b/data/doc/sisu/CHANGELOG_v5
index 0271a637..39591639 100644
--- a/data/doc/sisu/CHANGELOG_v5
+++ b/data/doc/sisu/CHANGELOG_v5
@@ -42,6 +42,11 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_5.7.1.orig.tar.xz
* html_format, "id"s for objects & footnotes
* remove trailing backslash for empty linebreak & paragraph, <br> <p>
+* db, text search & display field, footnotes moved to end of text object
+ * cleaner, more useful search results
+ * cleaner text search field
+ * separate footnote fields redundant for search purposes
+
%% 5.7.0.orig.tar.xz (2014-10-12:40/7)
http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=log;h=refs/tags/sisu_5.7.0
http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=log;h=refs/tags/debian/sisu_5.7.0-1
diff --git a/data/doc/sisu/CHANGELOG_v6 b/data/doc/sisu/CHANGELOG_v6
index 005803bf..6a76425c 100644
--- a/data/doc/sisu/CHANGELOG_v6
+++ b/data/doc/sisu/CHANGELOG_v6
@@ -32,6 +32,11 @@ http://www.jus.uio.no/sisu/pkg/src/sisu_6.3.1.orig.tar.xz
* html_format, "id"s for objects & footnotes
* remove trailing backslash for empty linebreak & paragraph, <br> <p>
+* db, text search & display field, footnotes moved to end of text object
+ * cleaner, more useful search results
+ * cleaner text search field
+ * separate footnote fields redundant for search purposes
+
%% 6.3.0.orig.tar.xz (2014-10-12:40/7)
http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=log;h=refs/tags/sisu_6.3.0
http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=log;h=refs/tags/debian/sisu_6.3.0-1
diff --git a/lib/sisu/v5/db_import.rb b/lib/sisu/v5/db_import.rb
index 59cff28a..72fb3753 100644
--- a/lib/sisu/v5/db_import.rb
+++ b/lib/sisu/v5/db_import.rb
@@ -334,17 +334,17 @@ module SiSU_DbImport
@en,@en_ast,@en_pls,@tuple_array=[],[],[],[]
@col[:en_a],@col[:en_z]=nil,nil
ao_array.each do |data|
- data.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:gl_o]}(●)#{Mx[:gl_c]}\s*/,'\1 ')
- data.obj.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check
+ data.obj=data.obj.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1').
+ gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1').
+ gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1').
+ gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1').
+ gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1').
+ gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1').
+ gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1').
+ gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1').
+ gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1').
+ gsub(/#{Mx[:gl_o]}(●)#{Mx[:gl_c]}\s*/,'\1 ').
+ gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check
@col[:seg]=@@seg
if data.of ==:para \
|| data.of ==:heading \
@@ -374,7 +374,7 @@ module SiSU_DbImport
@col[:lid]+=1
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_minus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -425,7 +425,7 @@ module SiSU_DbImport
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -470,7 +470,7 @@ module SiSU_DbImport
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -501,7 +501,7 @@ module SiSU_DbImport
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -532,7 +532,7 @@ module SiSU_DbImport
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -603,7 +603,7 @@ module SiSU_DbImport
else
SiSU_FormatShared::CSS_Format.new(@md,data).norm
end
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
diff --git a/lib/sisu/v5/db_sqltxt.rb b/lib/sisu/v5/db_sqltxt.rb
index 6585fd66..3f6cf951 100644
--- a/lib/sisu/v5/db_sqltxt.rb
+++ b/lib/sisu/v5/db_sqltxt.rb
@@ -60,7 +60,7 @@
module SiSU_DbText
class Prepare
def special_character_escape(str)
- str=str.gsub(/'/,"''"). #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'")
+ str=str.gsub(/'/m,"''"). #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'")
gsub(/(\\)/m,'\1\1'). #ok but with warnings, double backslash on sqlite #str.gsub!(/[\\]/m,'\\x5C') #ok but with warnings, but not for sqlite #str.gsub!(/(\\)/m,'\1') #ok for sqlite not for pgsql
gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/m,"<br>\n").
gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/m,''). #check
@@ -80,13 +80,29 @@ module SiSU_DbText
gsub(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m,'').
gsub(/ \s+/m,' ')
#p s if s =~/[^ \nA-Za-z0-9'"`?!#@$%^&*=+,.;:\[\]()<>{}‹›|\\\/~_-]/
- s
+ txt_arr << s
end
- txt_arr << arr << en
- #txt_arr=txt_arr.flatten
+ txt_arr=txt_arr << en
txt=txt_arr.flatten.join("\n")
- txt=special_character_escape(txt)
- txt
+ special_character_escape(txt)
+ end
+ def clean_document_objects_body(arr)
+ txt_arr,en,en_arr=[],[],[]
+ arr=(arr.is_a?(String)) ? [ arr ] : arr
+ arr.each do |s|
+ en << s.scan(/#{Mx[:en_a_o]}\s*(.+?)\s*#{Mx[:en_a_c]}/m)
+ s=s.gsub(/#{Mx[:en_a_o]}\s*(\d+).+?#{Mx[:en_a_c]}/m,'<sup>\1</sup>').
+ gsub(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m,'').
+ gsub(/ \s+/m,' ')
+ txt_arr << s
+ end
+ en.flatten.each do |e|
+ e=e.sub(/^(\d+)\s*/,'<sup>\1</sup> ')
+ en_arr << e
+ end
+ txt_arr=txt_arr << en_arr
+ txt=txt_arr.flatten.join("\n<br>")
+ special_character_escape(txt)
end
def clean_searchable_text_from_document_source(arr)
txt_arr,en=[],[]
diff --git a/lib/sisu/v6/db_import.rb b/lib/sisu/v6/db_import.rb
index 9473863d..5e159451 100644
--- a/lib/sisu/v6/db_import.rb
+++ b/lib/sisu/v6/db_import.rb
@@ -334,17 +334,17 @@ module SiSU_DbImport
@en,@en_ast,@en_pls,@tuple_array=[],[],[],[]
@col[:en_a],@col[:en_z]=nil,nil
ao_array.each do |data|
- data.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1')
- data.obj.gsub!(/#{Mx[:gl_o]}(●)#{Mx[:gl_c]}\s*/,'\1 ')
- data.obj.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check
+ data.obj=data.obj.gsub(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1').
+ gsub(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1').
+ gsub(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1').
+ gsub(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1').
+ gsub(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1').
+ gsub(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1').
+ gsub(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1').
+ gsub(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1').
+ gsub(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1').
+ gsub(/#{Mx[:gl_o]}(●)#{Mx[:gl_c]}\s*/,'\1 ').
+ gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check
@col[:seg]=@@seg
if data.of ==:para \
|| data.of ==:heading \
@@ -374,7 +374,7 @@ module SiSU_DbImport
@col[:lid]+=1
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_minus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -425,7 +425,7 @@ module SiSU_DbImport
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -470,7 +470,7 @@ module SiSU_DbImport
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -501,7 +501,7 @@ module SiSU_DbImport
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -532,7 +532,7 @@ module SiSU_DbImport
@base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html"
txt=endnotes(txt).extract_any
body=SiSU_FormatShared::CSS_Format.new(@md,data).lev4_plus
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
@@ -603,7 +603,7 @@ module SiSU_DbImport
else
SiSU_FormatShared::CSS_Format.new(@md,data).norm
end
- @col[:body]=special_character_escape(body)
+ @col[:body]=clean_document_objects_body(body)
plaintext=@col[:body].dup
plaintext=strip_markup(plaintext)
@col[:plaintext]=clean_searchable_text_from_document_objects(plaintext)
diff --git a/lib/sisu/v6/db_sqltxt.rb b/lib/sisu/v6/db_sqltxt.rb
index 2fd39fb7..2375d5ca 100644
--- a/lib/sisu/v6/db_sqltxt.rb
+++ b/lib/sisu/v6/db_sqltxt.rb
@@ -60,7 +60,7 @@
module SiSU_DbText
class Prepare
def special_character_escape(str)
- str=str.gsub(/'/,"''"). #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'")
+ str=str.gsub(/'/m,"''"). #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'")
gsub(/(\\)/m,'\1\1'). #ok but with warnings, double backslash on sqlite #str.gsub!(/[\\]/m,'\\x5C') #ok but with warnings, but not for sqlite #str.gsub!(/(\\)/m,'\1') #ok for sqlite not for pgsql
gsub(/#{Mx[:br_line]}|#{Mx[:br_nl]}/m,"<br>\n").
gsub(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/m,''). #check
@@ -80,13 +80,29 @@ module SiSU_DbText
gsub(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m,'').
gsub(/ \s+/m,' ')
#p s if s =~/[^ \nA-Za-z0-9'"`?!#@$%^&*=+,.;:\[\]()<>{}‹›|\\\/~_-]/
- s
+ txt_arr << s
end
- txt_arr << arr << en
- #txt_arr=txt_arr.flatten
+ txt_arr=txt_arr << en
txt=txt_arr.flatten.join("\n")
- txt=special_character_escape(txt)
- txt
+ special_character_escape(txt)
+ end
+ def clean_document_objects_body(arr)
+ txt_arr,en,en_arr=[],[],[]
+ arr=(arr.is_a?(String)) ? [ arr ] : arr
+ arr.each do |s|
+ en << s.scan(/#{Mx[:en_a_o]}\s*(.+?)\s*#{Mx[:en_a_c]}/m)
+ s=s.gsub(/#{Mx[:en_a_o]}\s*(\d+).+?#{Mx[:en_a_c]}/m,'<sup>\1</sup>').
+ gsub(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m,'').
+ gsub(/ \s+/m,' ')
+ txt_arr << s
+ end
+ en.flatten.each do |e|
+ e=e.sub(/^(\d+)\s*/,'<sup>\1</sup> ')
+ en_arr << e
+ end
+ txt_arr=txt_arr << en_arr
+ txt=txt_arr.flatten.join("\n<br>")
+ special_character_escape(txt)
end
def clean_searchable_text_from_document_source(arr)
txt_arr,en=[],[]