From dbc227d7f5f164f1bb584295581f7a98ecac1292 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Fri, 16 Apr 2010 10:15:34 -0400 Subject: param checks metadata string lengths against set db column sizes * param, checks metadata string lengths against set db column sizes, drops entries that are too long with warning * param, classify "populated" a fix --- lib/sisu/v2/param.rb | 278 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 207 insertions(+), 71 deletions(-) diff --git a/lib/sisu/v2/param.rb b/lib/sisu/v2/param.rb index 9d1f03e9..c50a55ee 100644 --- a/lib/sisu/v2/param.rb +++ b/lib/sisu/v2/param.rb @@ -152,6 +152,21 @@ module SiSU_Param def initialize(str) @s=str end + def validate_length(s,l,n) + #s=(s.length <= l) ? s : nil + s=if s.class==String \ + and s.length <= l + s + elsif s.class==NilClass; nil + elsif s.class !=String + puts "#{n} is #{s.class}: programming error, String expected #{__FILE__}:#{__LINE__}" + s + else + tell=SiSU_Screen::Ansi.new('v',"#{n} length #{s.length} exceeds set db field length #{l}, metadata dropped") + tell.warn + nil + end + end def name_format(name) if name name.strip! @@ -214,49 +229,81 @@ module SiSU_Param a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) def main - @h['main'] + s=@h['main'] + l,n=Db[:col_title_part],'title.main' + validate_length(s,l,n) end def sub - @h['subtitle'] + s=@h['subtitle'] + l,n=Db[:col_title_part],'title.subtitle' + validate_length(s,l,n) end def edition - @h['edition'] + s=@h['edition'] + l,n=Db[:col_title_edition],'title.edition' + validate_length(s,l,n) end def note - @h['note'] + @h['note'] #TEXT end def short s=(@h['short'] ? @h['short'] : @h['main']) + l,n=Db[:col_title_part],'title.short' + validate_length(s,l,n) end def full s=(@h['subtitle'] ? (@h['main'] + ' - ' + @h['subtitle']) : @h['main']) + l,n=Db[:col_title],'title.full' + validate_length(s,l,n) + end + def language + s=@h['language'] + l,n=Db[:col_language],'title.language' + validate_length(s,l,n) + end + def language_char + s=@h['language_char'] + l,n=Db[:col_language_char],'title.language_char' + validate_length(s,l,n) end self end - def creator #there are sub categories that need to be catered for and sometimes more than one author etc. + def creator #there are sub categories that need to be catered for and sometimes more than one author etc.; implement array.to_s.length validation test later, current test on string approximate as string is not used a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) def author @h['author']=(@h['author'] ? @h['author'] : @h['main']) names=name_format(@h['author']) - names[:name_str] + s=names[:name_str] + l,n=Db[:col_name],'creator.author' + validate_length(s,l,n) end - def author_detail + def author_detail s=(@h['author'] ? @h['author'] : @h['main']) names=name_format(s) names[:name_a_h] end - def translator - names=(@h['translator'] ? name_format(@h['translator']) : nil) - (names.class==Hash) ? names[:name_str] : nil + def contributor + names=(@h['contributor'] ? name_format(@h['contributor']) : nil) + s=(names.class==Hash) ? names[:name_str] : nil + s=if s + l,n=Db[:col_name],'creator.author' + validate_length(s,l,n) + else nil + end end - def translator_detail - names=(@h['translator'] ? name_format(@h['translator']) : nil) + def contributor_detail + names=(@h['contributor'] ? name_format(@h['contributor']) : nil) (names.class==Hash) ? names[:name_a_h] : nil end def illustrator names=(@h['illustrator'] ? name_format(@h['illustrator']) : nil) - (names.class==Hash) ? names[:name_str] : nil + s=(names.class==Hash) ? names[:name_str] : nil + s=if s + l,n=Db[:col_name],'creator.illustrator' + validate_length(s,l,n) + else nil + end end def illustrator_detail names=(@h['illustrator'] ? name_format(@h['illustrator']) : nil) @@ -264,15 +311,38 @@ module SiSU_Param end def photographer names=(@h['photographer'] ? name_format(@h['photographer']) : nil) - (names.class==Hash) ? names[:name_str] : nil + s=(names.class==Hash) ? names[:name_str] : nil + s=if s + l,n=Db[:col_name],'creator.photographer' + validate_length(s,l,n) + else nil + end end def photographer_detail names=(@h['photographer'] ? name_format(@h['photographer']) : nil) (names.class==Hash) ? names[:name_a_h] : nil end + def translator + names=(@h['translator'] ? name_format(@h['translator']) : nil) + s=(names.class==Hash) ? names[:name_str] : nil + s=if s + l,n=Db[:col_name],'creator.translator' + validate_length(s,l,n) + else nil + end + end + def translator_detail + names=(@h['translator'] ? name_format(@h['translator']) : nil) + (names.class==Hash) ? names[:name_a_h] : nil + end def audio names=(@h['audio'] ? name_format(@h['audio']) : nil) - (names.class==Hash) ? names[:name_str] : nil + s=(names.class==Hash) ? names[:name_str] : nil + s=if s + l,n=Db[:col_name],'creator.audio' + validate_length(s,l,n) + else nil + end end def audio_detail names=(@h['audio'] ? name_format(@h['audio']) : nil) @@ -280,7 +350,12 @@ module SiSU_Param end def digitized_by names=(@h['digitized_by'] ? name_format(@h['digitized_by']) : nil) - (names.class==Hash) ? names[:name_str] : nil + s=(names.class==Hash) ? names[:name_str] : nil + s=if s + l,n=Db[:col_name],'creator.digitized_by' + validate_length(s,l,n) + else nil + end end def digitized_by_detail names=(@h['digitized_by'] ? name_format(@h['digitized_by']) : nil) @@ -288,27 +363,24 @@ module SiSU_Param end def prepared_by names=(@h['prepared_by'] ? name_format(@h['prepared_by']) : nil) - (names.class==Hash) ? names[:name_str] : nil + s=(names.class==Hash) ? names[:name_str] : nil + s=if s + l,n=Db[:col_name],'creator.prepared_by' + validate_length(s,l,n) + else nil + end end def prepared_by_detail names=(@h['prepared_by'] ? name_format(@h['prepared_by']) : nil) names=name_format(@h['prepared_by']) (names.class==Hash) ? names[:name_a_h] : nil end - def contributor - names=(@h['contributor'] ? name_format(@h['contributor']) : nil) - (names.class==Hash) ? names[:name_str] : nil - end - def contributor_detail - names=(@h['contributor'] ? name_format(@h['contributor']) : nil) - (names.class==Hash) ? names[:name_a_h] : nil - end self end def rights a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) - def copyright + def copyright # TEXT used db sql def text #you may wish to expand to take from all r=if @h['copyright'] @h['copyright'] @@ -376,80 +448,127 @@ module SiSU_Param def classify a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) - def topic_register - @h['topic_register'] + def coverage + s=@h['coverage'] + l,n=Db[:col_classify_short],'classify.coverage' + validate_length(s,l,n) + end + def relation + s=@h['relation'] + l,n=Db[:col_classify_short],'classify.short' + validate_length(s,l,n) end def subject - @h['subject'] + s=@h['subject'] + l,n=Db[:col_classify_txt_short],'classify.subject' + validate_length(s,l,n) + end + def topic_register + s=@h['topic_register'] + l,n=Db[:col_classify_txt_long],'classify.topic_register' + validate_length(s,l,n) end def type - @h['type'] + s=@h['type'] + l,n=Db[:col_classify_txt_short],'classify.type' + validate_length(s,l,n) end def identifier - @h['identifier'] - end - def isbn - @h['isbn'] - end - def dewey - @h['dewey'] + s=@h['identifier'] + l,n=Db[:col_classify_identify],'classify.identifier' + validate_length(s,l,n) end def loc - @h['loc'] + s=@h['loc'] + l,n=Db[:col_classify_library],'classify.loc' + validate_length(s,l,n) end def dewey - @h['dewey'] + s=@h['dewey'] + l,n=Db[:col_classify_library],'classify.dewey' + validate_length(s,l,n) end def pg - @h['pg'] - end - def relation - @h['relation'] + s=@h['pg'] + l,n=Db[:col_classify_small],'classify.pg' + validate_length(s,l,n) end - def coverage - @h['coverage'] + def isbn + s=@h['isbn'] + l,n=Db[:col_classify_small],'classify.isbn' + validate_length(s,l,n) end self end def publisher a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) - @h['main'] + s=@h['main'] + l,n=Db[:col_name],'publisher' + validate_length(s,l,n) end def date a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) - def published - @h['published']=(@h['published'] ? @h['published'] : @h['main']) + def added_to_site + s=@h['added_to_site'] + l,n=Db[:col_date_text],'date.added_to_site' + validate_length(s,l,n) end def available - @h['available'] + s=@h['available'] + l,n=Db[:col_date_text],'date.available' + validate_length(s,l,n) end def created - @h['created'] + s=@h['created'] + l,n=Db[:col_date_text],'date.created' + validate_length(s,l,n) end def issued - @h['issued'] + s=@h['issued'] + l,n=Db[:col_date_text],'date.issued' + validate_length(s,l,n) end def modified - @h['modified'] + s=@h['modified'] + l,n=Db[:col_date_text],'date.modified' + validate_length(s,l,n) end - def valid - @h['valid'] + def published + s=@h['published']=(@h['published'] ? @h['published'] : @h['main']) + l,n=Db[:col_date_text],'date.published' + validate_length(s,l,n) end - def added_to_site - @h['added_to_site'] + def valid + s=@h['valid'] + l,n=Db[:col_date_text],'date.valid' + validate_length(s,l,n) end self end - def language + def language # as things stand this should really be populated from title.language and original.language, resolve a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) def document - @h['document']=(@h['document'] ? @h['document'] : @h['main']) + s=@h['document']=(@h['document'] ? @h['document'] : @h['main']) + l,n=Db[:col_language],'language.document' + validate_length(s,l,n) + end + def document_char + s=@h['document_char']=(@h['document_char'] ? @h['document_char'] : nil) + l,n=Db[:col_language_char],'language.document_char' + validate_length(s,l,n) end def original - @h['original'] + s=@h['original'] + l,n=Db[:col_language],'language.original' + validate_length(s,l,n) + end + def original_char + s=@h['original_char'] + l,n=Db[:col_language_char],'language.original_char' + validate_length(s,l,n) end self end @@ -479,7 +598,9 @@ module SiSU_Param @h['num_top'] end def breaks - pagebreaks=((@h['breaks'] =~/;/) ? (@h['breaks'].split(/;\s*/)) : [ @h['breaks'] ]) + pagebreaks=((@h['breaks'] =~/;/) \ + ? (@h['breaks'].split(/;\s*/)) \ + : [ @h['breaks'] ]) page_new,page_break=nil,nil pagebreaks.each do |x| page_new=x[/(:?[\dA-C],?)+/] if x=~/new|clear/ @@ -555,19 +676,34 @@ module SiSU_Param a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) def publisher - @h['publisher'] + s=@h['publisher'] + l,n=Db[:col_name],'original.publisher' + validate_length(s,l,n) end def language - @h['language'] + s=@h['language'] + l,n=Db[:col_language],'original.language' + validate_length(s,l,n) + end + def language_char + s=@h['language_char'] + l,n=Db[:col_language_char],'original.language_char' + validate_length(s,l,n) end def source - @h['source'] + s=@h['source'] + l,n=Db[:col_name],'original.source' + validate_length(s,l,n) end def institution - @h['institution'] + s=@h['institution'] + l,n=Db[:col_name],'original.institution' + validate_length(s,l,n) end def nationality - @h['nationality'] + s=@h['nationality'] + l,n=Db[:col_language],'original.nationality' + validate_length(s,l,n) end self end @@ -578,12 +714,12 @@ module SiSU_Param def notes a=@s.split(/[ ]*\n[ ]*/m) @h=build_hash(a) - def comment - @h['comment'] - end def abstract @h['abstract'] end + def comment + @h['comment'] + end def description @h['description'] end @@ -600,9 +736,9 @@ module SiSU_Param @doc={ :lv=>[] } @doc[:fns],@doc[:fnb],@doc[:scr_suffix]='','','' @@publisher='SiSU scribe' - attr_accessor :cmd,:make,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:subtitle,:full_title,:html_title,:subtitle_tex,:creator,:author_home,:author,:author_title,:author_nationality,:authors,:authorship,:translator,:illustrator,:prepared_by,:digitized_by,:subject,:description,:publisher,:contributor,:date,:date_created,:date_issued,:date_available,:date_valid,:date_modified,:date_translated,:date_added_to_site,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:type,:format,:identifier,:source,:language,:language_original,:relation,:coverage,:rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:lvs,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:markup,:markup_instruction,:markup_version,:markup_declared,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:file_size,:user,:home,:hostname,:pwd,:firstseg,:programs,:author_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:ec,:opt,:sem_tag,:book_idx,:topic_register,:topic_register_array,:original_publication,:original_publication_date,:original_publication_nationality,:original_publication_institution,:writing_focus,:audio,:daisy + attr_accessor :cmd,:make,:mod,:env,:fn,:fns,:fnb,:fnn,:fnt,:fnl,:flv,:fnz,:fnstex,:ocn,:sfx_src,:pdf,:file_type,:dir_out,:dir_tex,:dir_lout,:txt_path,:site_skin,:sisu,:sisu_version,:ruby_version,:title,:subtitle,:full_title,:html_title,:subtitle_tex,:creator,:classify,:author_home,:author,:author_title,:author_nationality,:authors,:authorship,:translator,:illustrator,:prepared_by,:digitized_by,:subject,:description,:publisher,:contributor,:date,:date_created,:date_issued,:date_available,:date_valid,:date_modified,:date_translated,:date_added_to_site,:date_scheme,:date_created_scheme,:date_issued_scheme,:date_available_scheme,:date_valid_scheme,:date_modified_scheme,:type,:format,:identifier,:source,:language,:language_original,:relation,:coverage,:rights,:keywords,:comments,:abstract,:cls_loc,:cls_dewey,:cls_pg,:cls_isbn,:papersize,:papersize_array,:toc,:lv1,:lv2,:lv3,:lv4,:lv5,:lv6,:lvs,:pagenew,:pagebreak,:num_top,:toc_lev_limit,:flag_endnotes,:flag_auto_endnotes,:flag_separate_endnotes,:flag_separate_endnotes_make,:markup,:markup_instruction,:markup_version,:markup_declared,:flag_tables,:vocabulary,:doc_skin,:doc_css,:yaml,:lnk,:prefix_a,:prefix_b,:suffix,:information,:contact,:icon,:image,:ad_url,:ad_png,:ad_alt,:ad_began,:flag_promo,:promo,:ad_home,:stmp,:stmpd,:sc_filename,:sc_number,:sc_date,:sc_time,:sc_info,:yamladdr,:locale,:wc_lines,:wc_words,:wc_bytes,:file_encoding,:file_size,:user,:home,:hostname,:pwd,:firstseg,:programs,:author_copymark,:lang,:en,:dgst,:dgst_skin,:generated,:tags,:tag_array,:concord_make,:seg_names,:seg_autoname_safe,:set_header_title,:set_heading_top,:set_heading_seg,:heading_seg_first,:heading_seg_first_flag,:base_program,:ec,:opt,:sem_tag,:book_idx,:topic_register,:topic_register_array,:original_publication,:original_publication_date,:original_publication_nationality,:original_publication_institution,:writing_focus,:audio,:daisy def initialize(fns_array,opt) - @env=@fn=@fns=@fnb=@fnn=@fnt=@fnl=@flv=@fnz=@fnstex=@ocn=@sfx_src=@pdf=@file_type=@dir_out=@dir_tex=@dir_lout=@txt_path=@make=@flag_endnotes=@flag_auto_endnotes=@flag_separate_endnotes=@flag_separate_endnotes_make=@site_skin=@sisu=@sisu_version=@ruby_version=@title=@subtitle=@full_title=@html_title=@subtitle_tex=@creator=@author_home=@author=@author_title=@author_nationality=@translator=@illustrator=@prepared_by=@digitized_by=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@date_translated=@date_added_to_site=@date_scheme=@date_created_scheme=@date_issued_scheme=@date_available_scheme=@date_valid_scheme=@date_modified_scheme=@type=@format=@identifier=@source=@language=@language_original=@relation=@coverage=@rights=@keywords=@comments=@abstract=@cls_loc=@cls_dewey=@cls_pg=@cls_isbn=@papersize=@toc=@lv1=@lv2=@lv3=@lv4=@lv5=@lv6=@pagenew=@pagebreak=@num_top=@toc_lev_limit=@flag_tables=@vocabulary=@doc_skin=@doc_css=@yaml=@lnk=@prefix_a=@prefix_b=@suffix=@information=@contact=@icon=@ad_url=@ad_png=@ad_alt=@ad_began=@promo=@ad_home=@stmp=@stmpd=@sc_filename=@sc_number=@sc_date=@sc_time=@sc_info=@yamladdr=@locale=@wc_lines=@wc_words=@wc_bytes=@file_encoding=@file_size=@firstseg=@programs=@author_copymark=@lang=@en=@dgst=@dgst_skin=@generated=@heading_seg_first=@base_program=@topic_register=@original_publication_details=@original_publication=@original_publication_date=@original_publication_nationality=@original_publication_institution=@writing_focus=@audio=nil + @env=@fn=@fns=@fnb=@fnn=@fnt=@fnl=@flv=@fnz=@fnstex=@ocn=@sfx_src=@pdf=@file_type=@dir_out=@dir_tex=@dir_lout=@txt_path=@make=@flag_endnotes=@flag_auto_endnotes=@flag_separate_endnotes=@flag_separate_endnotes_make=@site_skin=@sisu=@sisu_version=@ruby_version=@title=@subtitle=@full_title=@html_title=@subtitle_tex=@creator=@classify=@author_home=@author=@author_title=@author_nationality=@translator=@illustrator=@prepared_by=@digitized_by=@subject=@description=@publisher=@contributor=@date=@date_created=@date_issued=@date_available=@date_valid=@date_modified=@date_translated=@date_added_to_site=@date_scheme=@date_created_scheme=@date_issued_scheme=@date_available_scheme=@date_valid_scheme=@date_modified_scheme=@type=@format=@identifier=@source=@language=@language_original=@relation=@coverage=@rights=@keywords=@comments=@abstract=@cls_loc=@cls_dewey=@cls_pg=@cls_isbn=@papersize=@toc=@lv1=@lv2=@lv3=@lv4=@lv5=@lv6=@pagenew=@pagebreak=@num_top=@toc_lev_limit=@flag_tables=@vocabulary=@doc_skin=@doc_css=@yaml=@lnk=@prefix_a=@prefix_b=@suffix=@information=@contact=@icon=@ad_url=@ad_png=@ad_alt=@ad_began=@promo=@ad_home=@stmp=@stmpd=@sc_filename=@sc_number=@sc_date=@sc_time=@sc_info=@yamladdr=@locale=@wc_lines=@wc_words=@wc_bytes=@file_encoding=@file_size=@firstseg=@programs=@author_copymark=@lang=@en=@dgst=@dgst_skin=@generated=@heading_seg_first=@base_program=@topic_register=@original_publication_details=@original_publication=@original_publication_date=@original_publication_nationality=@original_publication_institution=@writing_focus=@audio=nil @data,@fns,@cmd,@mod,@opt=fns_array,opt.fns,opt.cmd,opt.mod,opt #@data used as data @flag_tables,@set_header_title,@set_heading_top,@set_heading_seg,@heading_seg_first_flag,@flag_promo,@book_idx=false,false,false,false,false,false,false @seg_autoname_safe=true @@ -748,7 +884,7 @@ module SiSU_Param @authorship=@author=@creator.author @authors=@creator.author_detail when /^@classify:(.+)/m; classify=$1 - Md.new($1.strip).classify + @classify=Md.new($1.strip).classify when /^@publisher:\s+(.+)/m @publisher=Md.new($1.strip).current_publisher when /^@original:(.+)/m -- cgit v1.2.3