diff options
Diffstat (limited to 'lib/sisu/v2/db_import.rb')
| -rw-r--r-- | lib/sisu/v2/db_import.rb | 321 | 
1 files changed, 40 insertions, 281 deletions
| diff --git a/lib/sisu/v2/db_import.rb b/lib/sisu/v2/db_import.rb index 1f795e68..5610a1d0 100644 --- a/lib/sisu/v2/db_import.rb +++ b/lib/sisu/v2/db_import.rb @@ -60,9 +60,10 @@  module SiSU_DB_import    require "#{SiSU_lib}/db_columns"                         # db_columns.rb    require "#{SiSU_lib}/db_load_tuple"                      # db_load_tuple.rb +  require "#{SiSU_lib}/db_sqltxt"                          # db_sqltxt.rb    require "#{SiSU_lib}/shared_html_lite"                   # shared_html_lite.rb    require 'sqlite3' -  class Import < SiSU_DB_columns::Column_size +  class Import < SiSU_DB_text::Prepare      include SiSU_Param      include SiSU_Screen      @@dl=nil @@ -86,7 +87,7 @@ module SiSU_DB_import        @counter={}        @db=SiSU_Env::Info_db.new        @driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) ? true : false -      sql='SELECT MAX(lid) FROM documents' +      sql='SELECT MAX(lid) FROM doc_objects'        begin          @col[:lid] ||=0          @col[:lid]=if @driver_sqlite3 @@ -122,7 +123,7 @@ module SiSU_DB_import        tell.print_grey if @opt.cmd =~/v/        file_exist=if @sql_type=~/sqlite/; nil        else -        @conn.select_one(%{ SELECT metadata.tid FROM metadata WHERE metadata.filename ~ '#{@opt.fns}'; }) +        @conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.filename = '#{@opt.fns}'; })        end        if (@sql_type!~/sqlite/ and not file_exist) \        or @sql_type=~/sqlite/ @@ -192,28 +193,6 @@ module SiSU_DB_import          end        end      end -    def special_character_escape(str) -      str.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") -      str.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"<br />\n") -      str.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check -      str.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2') -      str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2') -      str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'\1') -      str -    end -    def strip_markup(str) #define rules, make same as in dal clean -      str.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]') -      str.gsub!(/(?: \\;|#{Mx[:nbsp]})+/,' ') -      str.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1')         #tables -      str.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ')                          #tables -      str.gsub!(/#{Mx[:tc_p]}/u,' ')                                                     #tables tidy later -      str.gsub!(/<.+?>/,'') -      str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] ') # else image names found in search -      str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]') # else image names found in search -      str.gsub!(/\s\s+/,' ') -      str.strip! -      str -    end      def pf_db_import_transaction_open      end      def pf_db_import_transaction_close @@ -222,12 +201,23 @@ module SiSU_DB_import        print %{ #{@cX.grey}import documents dbi_unit #{@cX.off} } unless @opt.cmd =~/q/        @tp={}        @md=SiSU_Param::Parameters.new(@opt).get +#% sisutxt & fulltxt +      if FileTest.exist?(@md.fns) +        txt_arr=IO.readlines(@md.fns,'') +        src=txt_arr.join("\n") +        src=special_character_escape(src) +        @tp[:sisutxt_f],@tp[:sisutxt_i]='sisutxt, ',"'#{src}', " +        txt=clean_searchable_text(txt_arr) +        #special_character_escape(txt) +        @tp[:fulltxt_f],@tp[:fulltxt_i]='fulltxt, ',"'#{txt}', " +      end +#% title        if defined? @md.title.full \        and @md.title.full=~/\S+/                                              # DublinCore 1 - title -        @tp[:title]=@md.title.full -        special_character_escape(@tp[:title]) -        @tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', " -        sql='SELECT MAX(tid) FROM metadata' +        #@tp[:title]=@md.title.full +        #special_character_escape(@tp[:title]) +        #@tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', " +        sql='SELECT MAX(tid) FROM metadata_and_text'          begin            @@id_t ||=0            id_t=if @driver_sqlite3 @@ -242,220 +232,9 @@ module SiSU_DB_import          @@id_t+=1 #bug related, needs to be performed once at start of file, but consider moving, as, placed here it means program will fail if document header lacks @title:          puts %{\n#{@cX.grey}Processing file number#{@cX.off}: #{@cX.green}#{@@id_t}#{@@cX.off}} unless @opt.cmd =~/q/        end -      if defined? @md.creator.author \ -      and @md.creator.author=~/\S+/                                           # DublinCore 2 - creator/author (author) -        txt=@md.creator.author #dc -        special_character_escape(txt) -        @tp[:creator_f],@tp[:creator_i]='creator, ',"'#{txt}', " -      end -      if defined? @md.creator.contributor \ -      and @md.creator.contributor=~/\S+/                                      # DublinCore 6 - contributor -        txt=@md.creator.contributor #dc -        special_character_escape(txt) -        @tp[:contributor_f],@tp[:contributor_i]='contributor, ',"'#{txt}', " -      end -      if defined? @md.creator.translator \ -      and @md.creator.translator=~/\S+/ -        txt=@md.creator.translator -        special_character_escape(txt) -        @tp[:translator_f],@tp[:translator_i]='translator, ',"'#{txt}', " -      end -      if defined? @md.creator.illustrator \ -      and @md.creator.illustrator=~/\S+/ -        txt=@md.creator.illustrator -        special_character_escape(txt) -        @tp[:illustrator_f],@tp[:illustrator_i]='illustrator, ',"'#{txt}', " -      end -      if defined? @md.publisher \ -      and @md.publisher -        txt=@md.publisher #dc -        special_character_escape(txt) -        @tp[:publisher_f],@tp[:publisher_i]='publisher, ',"'#{txt}', " -      end -      if defined? @md.creator.prepared_by \ -      and @md.creator.prepared_by=~/\S+/ -        txt=@md.creator.prepared_by -        special_character_escape(txt) -        @tp[:prepared_by_f],@tp[:prepared_by_i]='prepared_by, ',"'#{txt}', " -      end -      if defined? @md.creator.digitized_by \ -      and @md.creator.digitized_by=~/\S+/ -        txt=@md.creator.digitized_by -        special_character_escape(txt) -        @tp[:digitized_by_f],@tp[:digitized_by_i]='digitized_by, ',"'#{txt}', " -      end -      if defined? @md.classify.subject \ -      and @md.classify.subject=~/\S+/                                          # DublinCore 3 - subject (us library of congress, eric or udc, or schema???) -        txt=@md.classify.subject #dc -        special_character_escape(txt) -        @tp[:subject_f],@tp[:subject_i]='subject, ',"'#{txt}', " -      end -      if defined? @md.notes.description \ -      and @md.notes.description=~/\S+/                                         # DublinCore 4 - description -        txt=@md.notes.description #dc -        special_character_escape(txt) -        @tp[:description_f],@tp[:description_i]='description, ',"'#{txt}', " -      end -      if defined? @md.classify.subject \ -      and @md.classify.subject=~/\S+/                                         # DublinCore 8 - type (genre eg. report, convention etc) -        txt=@md.classify.abstract -        special_character_escape(txt) -        @tp[:abstract_f],@tp[:abstract_i]='abstract, ',"'#{txt}', " -      end -      if defined? @md.rights.all \ -      and @md.rights.all=~/\S+/                                               # DublinCore 15 - rights -        txt=@md.rights.all #dc -        special_character_escape(txt) -        @tp[:rights_f],@tp[:rights_i]='rights, ',"'#{txt}', " -      end -      if defined? @md.date.published \ -      and @md.date.published=~/\S+/                                           # DublinCore 7 - date year-mm-dd -        txt=@md.date.published #dc -        special_character_escape(txt) -        @tp[:date_f],@tp[:date_i]='date, ',"'#{txt}', " -      end -      if defined? @md.date.created \ -      and @md.date.created=~/\S+/ -        txt=@md.date.created #dc -        special_character_escape(txt) -        @tp[:date_created_f],@tp[:date_created_i]='date_created, ',"'#{txt}', " -      end -      if defined? @md.date.issued \ -      and @md.date.issued=~/\S+/ -        txt=@md.date.issued #dc -        special_character_escape(txt) -        @tp[:date_issued_f],@tp[:date_issued_i]='date_issued, ',"'#{txt}', " -      end -      if defined? @md.date.available \ -      and @md.date.available=~/\S+/ -        txt=@md.date.available #dc -        special_character_escape(txt) -        @tp[:date_available_f],@tp[:date_available_i]='date_available, ',"'#{txt}', " -      end -      if defined? @md.date.modified \ -      and @md.date.modified=~/\S+/ -        txt=@md.date.modified #dc -        special_character_escape(txt) -        @tp[:date_modified_f],@tp[:date_modified_i]='date_modified, ',"'#{txt}', " -      end -      if defined? @md.date.valid \ -      and @md.date.valid=~/\S+/ -        txt=@md.date.valid #dc -        special_character_escape(txt) -        @tp[:date_valid_f],@tp[:date_valid_i]='date_valid, ',"'#{txt}', " -      end -      if defined? @md.title.language \ -      and @md.title.language=~/\S+/ -        txt=@md.title.language -        special_character_escape(txt) -        @tp[:language_f],@tp[:language_i]='language, ',"'#{txt}', " -      end -      if defined? @md.original.language \ -      and @md.original.language=~/\S+/ -        txt=@md.original.language -        special_character_escape(txt) -        @tp[:language_original_f],@tp[:language_original_i]='language_original, ',"'#{txt}', " -      end -      if defined? @md.classify.format \ -      and @md.classify.format=~/\S+/                                          # DublinCore 9 - format (use your mime type) -        txt=@md.classify.format #dc -        special_character_escape(txt) -        @tp[:format_f],@tp[:format_i]='format, ',"'#{txt}', " -      end -      if defined? @md.classify.identifier \ -      and @md.classify.identifier=~/\S+/                                       # DublinCore 10 - identifier (your identifier, could use urn which is free) -        txt=@md.classify.identifier #dc -        special_character_escape(txt) -        @tp[:identifier_f],@tp[:identifier_i]='identifier, ',"'#{txt}', " -      end -      if defined? @md.original.source \ -      and @md.original.source=~/\S+/                                           # DublinCore 11 - source (document source) -        txt=@md.original.source #dc -        special_character_escape(txt) -        @tp[:source_f],@tp[:source_i]='source, ',"'#{txt}', " -      end -      if defined? @md.classify.relation \ -      and @md.classify.relation=~/\S+/                                         # DublinCore 13 - relation -        txt=@md.classify.relation #dc -        special_character_escape(txt) -        @tp[:relation_f],@tp[:relation_i]='relation, ',"'#{txt}', " -      end -      if defined? @md.classify.coverage \ -      and @md.classify.coverage=~/\S+/                                         # DublinCore 14 - coverage -        txt=@md.classify.coverage #dc -        special_character_escape(txt) -        @tp[:coverage_f],@tp[:coverage_i]='coverage, ',"'#{txt}', " -      end -      if defined? @md.classify.keywords \ -      and @md.classify.keywords=~/\S+/ -        txt=@md.classify.keywords -        special_character_escape(txt) -        @tp[:keywords_f],@tp[:keywords_i]='keywords, ',"'#{txt}', " -      end -      if defined? @md.notes.comment \ -      and @md.notes.comment=~/\S+/ -        txt=@md.notes.comments -        special_character_escape(txt) -        @tp[:comments_f],@tp[:comments_i]='comments, ',"'#{txt}', " -      end -      if defined? @md.classify.loc \ -      and @md.classify.loc=~/\S+/ -        txt=@md.classify.loc -        special_character_escape(txt) -        @tp[:cls_loc_f],@tp[:cls_loc_i]='cls_loc, ',"'#{txt}', " -      end -      if defined? @md.classify.dewey \ -      and @md.classify.dewey=~/\S+/ -        txt=@md.classify.dewey -        special_character_escape(txt) -        @tp[:cls_dewey_f],@tp[:cls_dewey_i]='cls_dewey, ',"'#{txt}', " -      end -      if defined? @md.classify.pg \ -      and @md.classify.pg=~/\S+/ -        txt=@md.classify.pg -        special_character_escape(txt) -        @tp[:cls_pg_f],@tp[:cls_pg_i]='cls_pg, ',"'#{txt}', " -      end -      if defined? @md.classify.isbn \ -      and @md.classify.isbn=~/\S+/ -        txt=@md.classify.isbn -        special_character_escape(txt) -        @tp[:cls_isbn_f],@tp[:cls_isbn_i]='cls_isbn, ',"'#{txt}', " -      end -      if defined? @md.notes.prefix_a \ -      and @md.notes.prefix_a=~/\S+/ -        txt=@md.notes.prefix_a -        special_character_escape(txt) -        @tp[:prefix_a_f],@tp[:prefix_a_i]='prefix_a, ',"'#{txt}', " -      end -      if defined? @md.notes.prefix_b \ -      and @md.notes.prefix_b=~/\S+/ -        txt=@md.notes.prefix_b -        special_character_escape(txt) -        @tp[:prefix_b_f],@tp[:prefix_b_i]='prefix_b, ',"'#{txt}', " -      end -      if defined? @md.fns \ -      and @md.fns=~/\S+/ -        txt=@md.fns -        special_character_escape(txt) -        @tp[:fns_f],@tp[:fns_i]="filename, ","'#{txt}', " -      end -      if @md.wc_words; txt=@md.wc_words -        @tp[:wc_words_f],@tp[:wc_words_i]='wc_words, ',"'#{txt}', " -      end -      if defined? @md.dgst \ -      and @md.dgst.class==Array -        txt=@md.dgst[1] -        @tp[:dgst_f],@tp[:dgst_i]='dgst, ',"'#{txt}', " -      end -      if @md.sc_date; txt=@md.sc_date -        @tp[:sc_date_f],@tp[:sc_date_i]='sc_date, ',"'#{txt}', " -      end -      if @md.generated; txt=@md.generated -        @tp[:generated_f],@tp[:generated_i]='generated, ',"'#{@txt}', " -      end +      ################ CLEAR ##############        SiSU_DB_DBI::Test.new(self,@opt).verify                          #% import title names, filenames (tuple) -      t=SiSU_DB_tuple::Load_metadata.new(@conn,@tp,@@id_t,@opt,@file) +      t=SiSU_DB_tuple::Load_metadata.new(@conn,@@id_t,@md,@file)        tuple=t.tuple        tuple      end @@ -482,13 +261,7 @@ module SiSU_DB_import              and data.ln.inspect=~/[123]/                @col[:lev],txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.ln,data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'',''                @col[:lid]+=1 -              if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ -                endnotes(txt).range -                @en << endnotes(txt).standard if txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ -                @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ -                @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_o]}/ -                txt=endnotes(txt).clean_text -              end +              txt=endnotes(txt).extract_any                @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_minus                special_character_escape(@col[:body])                @col[:plaintext]=@col[:body].dup @@ -521,13 +294,7 @@ module SiSU_DB_import                end                @env=SiSU_Env::Info_env.new(@md.fns)                @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" -              if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ -                endnotes(txt).range -                @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ -                @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ -                @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ -                txt=endnotes(txt).clean_text(@base_url) -              end +              txt=endnotes(txt).extract_any                @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus                special_character_escape(@col[:body])                @col[:plaintext]=@col[:body].dup @@ -553,13 +320,7 @@ module SiSU_DB_import                end                @env=SiSU_Env::Info_env.new(@md.fns)                @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" -              if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ -                endnotes(txt).range -                @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ -                @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ -                @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ -                txt=endnotes(txt).clean_text(@base_url) -              end +              txt=endnotes(txt).extract_any                @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus                special_character_escape(@col[:body])                @col[:plaintext]=@col[:body].dup @@ -585,13 +346,7 @@ module SiSU_DB_import                end                @env=SiSU_Env::Info_env.new(@md.fns)                @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" -              if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ -                endnotes(txt).range -                @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ -                @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ -                @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ -                txt=endnotes(txt).clean_text(@base_url) -              end +              txt=endnotes(txt).extract_any                @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus                special_character_escape(@col[:body])                @col[:plaintext]=@col[:body].dup @@ -613,15 +368,9 @@ module SiSU_DB_import                end                @env=SiSU_Env::Info_env.new(@md.fns)                @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" -              if txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ -                endnotes(txt).range -                @en << endnotes(txt).standard if txt =~ /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ -                @en_ast << endnotes(txt).asterisk if txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ -                @en_pls << endnotes(txt).plus if txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ -                txt=endnotes(txt).clean_text(@base_url) -              end +              txt=endnotes(txt).extract_any                if @sql_type=~/pg/ \ -              and txt.size > (document_clean - 1)             #% examine pg build & remove limitation +              and txt.size > (SiSU_DB_columns::Column_size.new.document_clean - 1)             #% examine pg build & remove limitation                  puts "\n\nTOO LARGE (TXT - see error log)\n\n"                  open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|                    error.puts("\n#{@opt.fns}\nTEXT BODY\n#{@col[:body].size} object #{@col[:ocn]} -> #{@col[:body].slice(0..500)}") @@ -660,7 +409,7 @@ module SiSU_DB_import                    #special_character_escape(body)                    #special_character_escape(txt)                    strip_markup(txt) -                  if txt.size > (endnote_clean - 1) +                  if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)                      puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"                      open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|                        error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") @@ -698,7 +447,7 @@ module SiSU_DB_import                    special_character_escape(txt)                    body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)                    strip_markup(txt) -                  if txt.size > (endnote_clean - 1) +                  if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)                      puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"                      open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|                        error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") @@ -736,7 +485,7 @@ module SiSU_DB_import                    special_character_escape(txt)                    body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt)                    strip_markup(txt) -                  if txt.size > (endnote_clean - 1) +                  if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1)                      puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n"                      open("#{Dir.pwd}/pg_documents_error_log",'a') do |error|                        error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") @@ -772,6 +521,16 @@ module SiSU_DB_import      end      def endnotes(txt)        @txt=txt +      def extract_any +        if @txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ +          endnotes(@txt).range +          @en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ +          @en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ +          @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_o]}/ +          @txt=endnotes(@txt).clean_text +        end +        @txt +      end        def standard          x=if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/; @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/)          else nil | 
