From 5e3e7429d5f0f808be70b4c919754344554245a9 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Mon, 13 Dec 2010 10:02:52 -0500 Subject: dal_syntax, sisu syntax modification (insist on curly braces) * enforce use of curly braces in markup, remove single word markup without curly braces (requested fix, Ben Armstrong (live-manual), problematic for technical writing) * also removed some undocumented alternative markup attributes --- lib/sisu/v2/dal_syntax.rb | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/dal_syntax.rb b/lib/sisu/v2/dal_syntax.rb index 0422d7b3..ead284ce 100644 --- a/lib/sisu/v2/dal_syntax.rb +++ b/lib/sisu/v2/dal_syntax.rb @@ -249,30 +249,12 @@ module SiSU_Syntax "\\1#{Mx[:fa_cite_o]}\\2#{Mx[:fa_c_o]}cite#{Mx[:fa_c]}") #cite /blockquote? dob.obj.gsub!(/(^|[^\\])\^\{(.+?)\}\^/m, "\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") #superscript - dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|\(|\>|\S)9\{(.+?)\}9/m, - "\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") #superscript dob.obj.gsub!(/(^|[^\\]),\{(.+?)\},/m, "\\1#{Mx[:fa_subscript_o]}\\2#{Mx[:fa_subscript_c]}") #subscript - dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)6\{(.+?)\}6/m, - "\\1#{Mx[:fa_subscript_o]}\\2#{Mx[:fa_subscript_c]}") #subscript dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\+\{(.+?)\}\+/m, "\\1#{Mx[:fa_insert_o]}\\2#{Mx[:fa_insert_c]}") #inserted text - dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)v\{(.+?)\}v/m, - "\\1#{Mx[:fa_insert_o]}\\2#{Mx[:fa_insert_c]}") #inserted text dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)-\{(.+?)\}-/m, "\\1#{Mx[:fa_strike_o]}\\2#{Mx[:fa_strike_c]}") #strikethrough - deleted text - dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)x\{(.+?)\}x/m, - "\\1#{Mx[:fa_strike_o]}\\2#{Mx[:fa_strike_c]}") #deleted text - dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\*(\S+?)\*/, - "\\1#{@emph[:o]}\\2#{@emph[:c]}") #emphasise single word, watch - dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\!(\S+?)\!/, - "\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold single word, watch - dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\/([\(\)a-zA-Z0-9']+?)\/([^a-zA-Z0-9]|[ ,.;:'"~$]|$)/, - "\\1#{Mx[:fa_italics_o]}\\2#{Mx[:fa_italics_c]}\\3") #italics single word, watch - dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)_(\S+?)_([.,!'")]?(?:\s|$))/, - "\\1#{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]}\\3") #underscore single word, watch (made more complicated by url decoration escape tag (_url)) - dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+)-([^{]\S+?)-( |$)/, - "\\1#{Mx[:fa_strike_o]}\\2#{Mx[:fa_strike_c]}\\3") #underscore single word, watch dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>|\d+)\^(\S+?)\^/, "\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") #superscript single word, watch digit added dob -- cgit v1.2.3 From ff4ceb260ae3931072810a0ada124841d3b8e032 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Thu, 16 Dec 2010 23:19:35 -0500 Subject: document objects: page break; object separator (introduced & used, adjust later) * object separator introduced (requested Cory Doctorow, implementation not discussed) * dal_syntax, object separator syntax (<:---> or <:ols>) * constants, object separator * vim syntax highlighting, match object separator * document objects: page break; object separator (introduced & used) * dal, page break and object separator objects * represent page break and object separator: plaintext, html, epub, odf, texpdf (adjust later) --- lib/sisu/v2/constants.rb | 1 + lib/sisu/v2/dal_doc_str.rb | 8 +++++--- lib/sisu/v2/dal_syntax.rb | 1 - lib/sisu/v2/epub_format.rb | 5 +++++ lib/sisu/v2/epub_segments.rb | 3 ++- lib/sisu/v2/html_format.rb | 7 ++++++- lib/sisu/v2/html_scroll.rb | 2 ++ lib/sisu/v2/html_segments.rb | 3 ++- lib/sisu/v2/odf.rb | 19 +++++++++++++++---- lib/sisu/v2/odf_format.rb | 14 ++++++++++++++ lib/sisu/v2/plaintext.rb | 12 ++++++++++-- lib/sisu/v2/texpdf.rb | 6 ++++-- 12 files changed, 66 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/constants.rb b/lib/sisu/v2/constants.rb index 9367e9cd..6c949d53 100644 --- a/lib/sisu/v2/constants.rb +++ b/lib/sisu/v2/constants.rb @@ -93,6 +93,7 @@ Mx[:nbsp]= '░' #'▭ ' Mx[:br_line]= '▌' #lB ▌ 9612 #'┘' #'¶' Mx[:br_paragraph]= '█' #FB █ 9608 # PP ∥ 8741 #'▐' #'┘' #'¶' #FB █ 9608 lB ▌ 9612 RB ▐ 9616 Mx[:br_nl]= '』' # '┘' +Mx[:obj_ln_sep]= 'obj_ln_sep'; Hx[:obj_ln_sep]= {:obj=>Mx[:obj_ln_sep]} # line sep Mx[:br_page]= 'break_page'; Hx[:br_page]= {:obj=>Mx[:br_page]} # newpage Mx[:br_page_new]= 'break_page_new'; Hx[:br_page_new]= {:obj=>Mx[:br_page_new]} # clearpage Mx[:br_endnotes]= "#{Mx[:mk_o]}ENDNOTES#{Mx[:mk_c]}" diff --git a/lib/sisu/v2/dal_doc_str.rb b/lib/sisu/v2/dal_doc_str.rb index f1282df2..bca3cf7d 100644 --- a/lib/sisu/v2/dal_doc_str.rb +++ b/lib/sisu/v2/dal_doc_str.rb @@ -182,11 +182,13 @@ module SiSU_document_structure_extract SiSU_document_structure::Object_para.new.paragraph(h) else nil end - when /^(?:?)\s*$/ - if t_o =~/^(?:?)\s*$/ + when /^?\s*$/ + if t_o =~/^?\s*$/ SiSU_document_structure::Object_layout.new.break(Hx[:br_page_new]) else SiSU_document_structure::Object_layout.new.break(Hx[:br_page]) end + when /^?\s*$/ + SiSU_document_structure::Object_layout.new.break(Hx[:obj_ln_sep]) else #paragraph image=image_test(t_o) note=endnote_test?(t_o) @@ -784,7 +786,7 @@ module SiSU_document_structure_extract @o_array=[] node=ocn=ocn_dv=ocn_sp=ocnh=ocnh1=ocnh2=ocnh3=ocnh4=ocnh5=ocnh6=ocno=ocnp=ocnt=ocnc=ocng=ocni=ocnm=ocnu=ocnk=nm=0 # h heading, o other, t table, g group, i image node_count_flag=false - regex_exclude_ocn_and_node = /#{Rx[:meta]}|^@\S+?:\s|^4~endnotes|^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^\^~ |<:e[:_]\d+?>|^#{Mx[:fa_o]}p[bn]#{Mx[:fa_c]}|^<:\#|<:- |<[:!]!4|
|^<:\#|<:- |<[:!]!4|
< ]+?)([,.;'"]?)(?=[\s#{Mx[:en_a_c]}#{Mx[:en_b_c]}#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}]|$)/m, %{\\1#{Mx[:url_o]}\\2#{Mx[:url_c]}\\3}) end - dob.obj.gsub!(/<:?p([nb])>/,"#{Mx[:fa_o]}p\\1#{Mx[:fa_c]}") #indent used in endnotes, not implemented, replace when ready with: line.gsub!(/(?:
|
)\s*_([12])\s+/,'
<:i\1> ') dob=fontface(dob) dob.obj.gsub!(/<[:e]\s+(.+?)!?>/, "#{Mx[:en_a_o]}\\1#{Mx[:en_a_c]}") #not tested diff --git a/lib/sisu/v2/epub_format.rb b/lib/sisu/v2/epub_format.rb index e016b011..5b452f83 100644 --- a/lib/sisu/v2/epub_format.rb +++ b/lib/sisu/v2/epub_format.rb @@ -1744,6 +1744,11 @@ WOK end para_form_css('p','norm') end + def break + @txt.gsub!(/#{Mx[:br_page_new]}|#{Mx[:br_page]}/,'

') + @txt.gsub!(/#{Mx[:obj_ln_sep]}/,'

') + para_form_css('p','norm') + end def format(tag,attrib) para_form_css(tag,attrib) end diff --git a/lib/sisu/v2/epub_segments.rb b/lib/sisu/v2/epub_segments.rb index 4edcc93b..a09aebe3 100644 --- a/lib/sisu/v2/epub_segments.rb +++ b/lib/sisu/v2/epub_segments.rb @@ -357,7 +357,6 @@ WOK end def markup(dob) @debug=[] - dob.obj.gsub!(/(?:\s*#{Mx[:br_page]}\s*|\s*#{Mx[:br_page_new]}\s*)+/m,'') format_head_seg=SiSU_EPUB_Format::Head_seg.new(@md) if dob.is =~/(?:heading|para)/ #extend as necessary FIX @p_num=SiSU_EPUB_Format::Paragraph_number.new(@md,dob.ocn) @@ -392,6 +391,8 @@ WOK sto.code elsif dob.is=='table' sto.table + elsif dob.is=='break' + sto.break end if @md.flag_separate_endnotes # may need to revisit, check dob.obj.gsub!(/"\s+href="#note_ref(\d+)">/,%{" href=\"endnotes#{Sfx[:epub_xhtml]}#note_ref\\1">}) #endnote- twice #removed file type diff --git a/lib/sisu/v2/html_format.rb b/lib/sisu/v2/html_format.rb index 8074a7ee..a59f82ac 100644 --- a/lib/sisu/v2/html_format.rb +++ b/lib/sisu/v2/html_format.rb @@ -299,7 +299,7 @@ WOK #{@vz.banner_band} - #{doc_types} + #{doc_types}  #{firstseg}  @@ -1023,6 +1023,11 @@ WOK end para_form_css('p','norm') end + def break + @txt.gsub!(/#{Mx[:br_page_new]}|#{Mx[:br_page]}/,'

') + @txt.gsub!(/#{Mx[:obj_ln_sep]}/,'

') + para_form_css('p','norm') + end def format(tag,attrib) para_form_css(tag,attrib) end diff --git a/lib/sisu/v2/html_scroll.rb b/lib/sisu/v2/html_scroll.rb index 049eb922..bc6b4812 100644 --- a/lib/sisu/v2/html_scroll.rb +++ b/lib/sisu/v2/html_scroll.rb @@ -174,6 +174,8 @@ module SiSU_HTML_scroll sto.code elsif dob.is=='table' sto.table + elsif dob.is=='break' + sto.break end if dob =~// \ and dob =~/^(?:\^~\d+\s|)/ # hmmm re-adjusted 200507, for alt endnote which should again be matched ^~ ... not in response to problem though diff --git a/lib/sisu/v2/html_segments.rb b/lib/sisu/v2/html_segments.rb index a516d809..c1649a88 100644 --- a/lib/sisu/v2/html_segments.rb +++ b/lib/sisu/v2/html_segments.rb @@ -357,7 +357,6 @@ module SiSU_HTML_seg end def markup(dob) @debug=[] - dob.obj.gsub!(/(?:\s*#{Mx[:br_page]}\s*|\s*#{Mx[:br_page_new]}\s*)+/m,'') format_head_seg=SiSU_HTML_Format::Head_seg.new(@md) if dob.is !~/meta/ if dob.is =~/(?:heading|para)/ #extend as necessary FIX @@ -398,6 +397,8 @@ module SiSU_HTML_seg sto.code elsif dob.is=='table' sto.table + elsif dob.is=='break' + sto.break end if @md.flag_separate_endnotes dob.obj.gsub!(/"\s+href="#_(\d+)">/,%{" href=\"endnotes#{Sfx[:html]}#_\\1">}) #endnote- twice #removed file type diff --git a/lib/sisu/v2/odf.rb b/lib/sisu/v2/odf.rb index 769870eb..d19945c0 100644 --- a/lib/sisu/v2/odf.rb +++ b/lib/sisu/v2/odf.rb @@ -411,6 +411,18 @@ module SiSU_ODF end dob end + def obj_break(dob) + if dob.is =='break' + br=SiSU_ODF_format::Format_obj_break.new(@md,dob) + if dob.obj==Mx[:br_page] \ + or dob.obj==Mx[:br_page_new] + dob=br.br_page + elsif dob.obj==Mx[:obj_ln_sep] + dob=br.obj_sep + end + end + dob + end def odf_structure(md,dob) @md,@dob=md,dob dob=if dob.is !='code' @@ -431,6 +443,8 @@ module SiSU_ODF @@odf[:body] << code(dob).obj << @br*2 elsif dob.is=='table' #elsif dob.obj =~ /') - dob.obj.gsub!(/#{Mx[:br_page]}|#{Mx[:br_page_new]}/, - ' ') dob.obj.gsub!(/©/,'©') #too arbitrary dob.obj.gsub!(/.+?<-#>/,'') # remove dummy headings (used by html) #check dob.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/, @@ -517,7 +529,6 @@ module SiSU_ODF dob.obj.gsub!(/\\copy(?:right)?\b/,'©') dob.obj.gsub!(/\\trademark\b|\\tm\b/,'®') dob.obj.gsub!(/\44/,'$') #$ watch - dob.obj.gsub!(/#{Mx[:br_page]}|#{Mx[:br_page_new]}/,'') # remove page breaks dob.obj.gsub!(/(.+?)<\/a>/,'\1') dob.obj.gsub!(/#{Mx[:mk_o]}name#\S+?#{Mx[:mk_c]}/,'') # remove name links wordlist=dob.obj.scan(/\S+/) @@ -528,7 +539,7 @@ module SiSU_ODF if defined? dob.ocn and dob.ocn =~/\d+/ @p_num=SiSU_ODF_format::Paragraph_number.new(dob.ocn) end - if dob.is=~/heading|para|group|verse|code|table/ # extend, include other types + if dob.is=~/heading|para|group|verse|code|table|break/ # extend, include other types odf_structure(@md,dob) end dob.obj.gsub!(//,' ') if dob.obj ## Clean Prepared Text diff --git a/lib/sisu/v2/odf_format.rb b/lib/sisu/v2/odf_format.rb index 384b46b7..77b5bbbc 100644 --- a/lib/sisu/v2/odf_format.rb +++ b/lib/sisu/v2/odf_format.rb @@ -197,6 +197,20 @@ module SiSU_ODF_format @dob end end + class Format_obj_break + def initialize(md,t_o) + @md,@t_o=md,t_o + end + def br_page + @t_o.obj=' ' + @t_o + end + def obj_sep #center later + sep='--- ' + @t_o.obj=%{#{sep*20}} + @t_o + end + end class XML end end diff --git a/lib/sisu/v2/plaintext.rb b/lib/sisu/v2/plaintext.rb index e919af78..ba146978 100644 --- a/lib/sisu/v2/plaintext.rb +++ b/lib/sisu/v2/plaintext.rb @@ -328,13 +328,12 @@ WOK dob.obj.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _> _< dob.obj.gsub!(/(^|[^}])_([<>])/m,'\1\2') # _<_< end - dob.obj.gsub!(/#{Mx[:br_page]}\s*|#{Mx[:br_page_new]}/,'') # remove page breaks dob.obj.gsub!(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1') dob.obj.gsub!(/(.+?)<\/a>/m,'\1') dob.obj.gsub!(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'') # remove name links dob.obj.gsub!(/ |#{Mx[:nbsp]}/,' ') # decide on dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") - dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}image/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") + dob.obj.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}image/,' [ \1 ]') dob.obj.gsub!(/(?:^|[^_\\])\{\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') wordlist=dob.obj.scan(/\S+/) if dob.obj !~/(^#{Rx[:meta]}|#{Mx[:br_eof]}|#{Mx[:br_endnotes]})/ @@ -350,6 +349,15 @@ WOK or dob.is=='code' \ or dob.is=='table' @plaintext[:body] << dob.obj << @br + elsif dob.is=='break' + sp=' ' + ln='-' + @plaintext[:body] <<=if dob.obj==Mx[:br_page] \ + or dob.obj==Mx[:br_page_new] + "#{@br}#{ln*40}#{@br*2}" + elsif dob.obj ==Mx[:obj_ln_sep] + "#{@br}#{sp*20}* * *#{@br*2}" + end # following empty line (@br) missing, fix end dob='' if (dob.obj =~// \ and dob.obj =~/^(-\{{2}~\d+|)/) # -endnote diff --git a/lib/sisu/v2/texpdf.rb b/lib/sisu/v2/texpdf.rb index 3a868cd8..a1a2813f 100644 --- a/lib/sisu/v2/texpdf.rb +++ b/lib/sisu/v2/texpdf.rb @@ -72,7 +72,7 @@ module SiSU_TeX @@tex_pattern_margin_number=/\\\\begin\\\{tiny\\\}\\\\hspace\\\{0mm\\\}\\\\end\\\{tiny\\\}\\\{\\\\marginpar.+?\s+/ @@n=@@tableheader=@@rights=nil @@date ||=SiSU_Env::Info_date.new - class Source #Songsheet # Date: Tue, 1 Feb 2011 07:53:24 -0500 Subject: prepare for v3 branch --- lib/sisu/v2/defaults.rb | 2 +- lib/sisu/v2/odf.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/sisu/v2/defaults.rb b/lib/sisu/v2/defaults.rb index a5305e4d..2c3cf207 100644 --- a/lib/sisu/v2/defaults.rb +++ b/lib/sisu/v2/defaults.rb @@ -1980,7 +1980,7 @@ outputs include: plaintext, html, XHTML, XML, ODF (OpenDocument), EPUB, LaTeX, P

-SiSU Short Description +SiSU Short Description

SiSU is a comprehensive future-proofing electronic document management system. Built-in search capabilities allow you to search across multiple documents and highlight matches in an easy-to-follow format. Paragraph numbering system allows you to cite your electronic documents in a consistent manner across multiple file formats. Multiple format outputs allow you to display your documents in plain text, PDF (portrait and horizontal), OpenDocument format, HTML, or e-book reading format (EPUB). Word mapping allows you to easily create word indexes for your documents. Future-proofing flexibility allows you to quickly adapt your documents to newer output formats as needed. All these and many other features are achieved with little or no additional work on your documents - by marking up the documents with a super simplistic markup language, leaving the SiSU engine to handle the heavy-lifting processing. diff --git a/lib/sisu/v2/odf.rb b/lib/sisu/v2/odf.rb index d19945c0..2e2a88b4 100644 --- a/lib/sisu/v2/odf.rb +++ b/lib/sisu/v2/odf.rb @@ -612,7 +612,7 @@ module SiSU_ODF %{#{@br}} + %{#{@br}} + %{#{@br}} + - %{#{@br}} + + %{#{@br}} + %{#{@br}} + %{#{@br}} + %{#{@br}} + -- cgit v1.2.3 From 4b51bc00cda70d3c118401a74f1704df38c947a3 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Tue, 1 Feb 2011 09:48:30 -0500 Subject: v3 introduced as development branch, invoked using "sisu --v3 [instructions] --- lib/sisu/v3/air.rb | 85 + lib/sisu/v3/author_format.rb | 113 + lib/sisu/v3/cgi.rb | 86 + lib/sisu/v3/cgi_pgsql.rb | 230 ++ lib/sisu/v3/cgi_sql_common.rb | 992 ++++++ lib/sisu/v3/cgi_sqlite.rb | 216 ++ lib/sisu/v3/character_encoding.rb | 379 +++ lib/sisu/v3/composite.rb | 275 ++ lib/sisu/v3/concordance.rb | 345 ++ lib/sisu/v3/conf.rb | 249 ++ lib/sisu/v3/constants.rb | 595 ++++ lib/sisu/v3/css.rb | 2085 ++++++++++++ lib/sisu/v3/dal.rb | 506 +++ lib/sisu/v3/dal_character_check.rb | 104 + lib/sisu/v3/dal_doc_objects.rb | 444 +++ lib/sisu/v3/dal_doc_str.rb | 1195 +++++++ lib/sisu/v3/dal_endnotes.rb | 125 + lib/sisu/v3/dal_expand_insertions.rb | 198 ++ lib/sisu/v3/dal_hash_digest.rb | 155 + lib/sisu/v3/dal_idx.rb | 357 ++ lib/sisu/v3/dal_images.rb | 155 + lib/sisu/v3/dal_metadata.rb | 79 + lib/sisu/v3/dal_numbering.rb | 465 +++ lib/sisu/v3/dal_substitutions_and_insertions.rb | 154 + lib/sisu/v3/dal_syntax.rb | 523 +++ lib/sisu/v3/db_columns.rb | 2079 ++++++++++++ lib/sisu/v3/db_create.rb | 612 ++++ lib/sisu/v3/db_dbi.rb | 93 + lib/sisu/v3/db_drop.rb | 187 ++ lib/sisu/v3/db_import.rb | 649 ++++ lib/sisu/v3/db_indexes.rb | 113 + lib/sisu/v3/db_load_tuple.rb | 331 ++ lib/sisu/v3/db_remove.rb | 109 + lib/sisu/v3/db_select.rb | 209 ++ lib/sisu/v3/db_sqltxt.rb | 134 + lib/sisu/v3/db_tests.rb | 117 + lib/sisu/v3/dbi.rb | 143 + lib/sisu/v3/defaults.rb | 2342 +++++++++++++ lib/sisu/v3/digests.rb | 388 +++ lib/sisu/v3/embedded.rb | 139 + lib/sisu/v3/epub.rb | 673 ++++ lib/sisu/v3/epub_concordance.rb | 312 ++ lib/sisu/v3/epub_format.rb | 2030 ++++++++++++ lib/sisu/v3/epub_segments.rb | 525 +++ lib/sisu/v3/epub_tune.rb | 417 +++ lib/sisu/v3/errors.rb | 81 + lib/sisu/v3/git.rb | 276 ++ lib/sisu/v3/harvest.rb | 101 + lib/sisu/v3/harvest_authors.rb | 316 ++ lib/sisu/v3/harvest_topics.rb | 559 ++++ lib/sisu/v3/help.rb | 1924 +++++++++++ lib/sisu/v3/html.rb | 654 ++++ lib/sisu/v3/html_format.rb | 1480 +++++++++ lib/sisu/v3/html_minitoc.rb | 198 ++ lib/sisu/v3/html_promo.rb | 434 +++ lib/sisu/v3/html_scroll.rb | 202 ++ lib/sisu/v3/html_segments.rb | 553 ++++ lib/sisu/v3/html_table.rb | 65 + lib/sisu/v3/html_tune.rb | 377 +++ lib/sisu/v3/hub.rb | 594 ++++ lib/sisu/v3/i18n.rb | 1781 ++++++++++ lib/sisu/v3/manifest.rb | 738 +++++ lib/sisu/v3/manpage.rb | 429 +++ lib/sisu/v3/manpage_format.rb | 108 + lib/sisu/v3/objects.txt | 42 + lib/sisu/v3/odf.rb | 751 +++++ lib/sisu/v3/odf_format.rb | 220 ++ lib/sisu/v3/options.rb | 248 ++ lib/sisu/v3/param.rb | 1403 ++++++++ lib/sisu/v3/param_identify_markup.rb | 168 + lib/sisu/v3/particulars.rb | 210 ++ lib/sisu/v3/plaintext.rb | 424 +++ lib/sisu/v3/plaintext_format.rb | 113 + lib/sisu/v3/po4a.rb | 974 ++++++ lib/sisu/v3/po4a_set.rb | 300 ++ lib/sisu/v3/relaxng.rb | 1161 +++++++ lib/sisu/v3/remote.rb | 211 ++ lib/sisu/v3/response.rb | 75 + lib/sisu/v3/rexml.rb | 121 + lib/sisu/v3/screen_text_color.rb | 414 +++ lib/sisu/v3/share_src.rb | 97 + lib/sisu/v3/share_src_kdissert.rb | 88 + lib/sisu/v3/shared_html.rb | 65 + lib/sisu/v3/shared_html_lite.rb | 316 ++ lib/sisu/v3/shared_markup_alt.rb | 320 ++ lib/sisu/v3/shared_metadata.rb | 1361 ++++++++ lib/sisu/v3/shared_sem.rb | 156 + lib/sisu/v3/shared_txt.rb | 232 ++ lib/sisu/v3/shared_xhtml.rb | 65 + lib/sisu/v3/shared_xml.rb | 712 ++++ lib/sisu/v3/sisupod_make.rb | 341 ++ lib/sisu/v3/sitemaps.rb | 217 ++ lib/sisu/v3/spell.rb | 101 + lib/sisu/v3/sst_convert_markup.rb | 318 ++ lib/sisu/v3/sst_do_inline_footnotes.rb | 474 +++ lib/sisu/v3/sst_from_xml.rb | 169 + lib/sisu/v3/sst_identify_markup.rb | 474 +++ lib/sisu/v3/sst_to_s_xml_sax.rb | 453 +++ lib/sisu/v3/sysenv.rb | 3993 +++++++++++++++++++++++ lib/sisu/v3/termsheet.rb | 166 + lib/sisu/v3/texinfo.rb | 385 +++ lib/sisu/v3/texinfo_format.rb | 587 ++++ lib/sisu/v3/texpdf.rb | 832 +++++ lib/sisu/v3/texpdf_format.rb | 1372 ++++++++ lib/sisu/v3/update.rb | 130 + lib/sisu/v3/urls.rb | 305 ++ lib/sisu/v3/webrick.rb | 176 + lib/sisu/v3/wikispeak.rb | 373 +++ lib/sisu/v3/xhtml.rb | 425 +++ lib/sisu/v3/xhtml_table.rb | 99 + lib/sisu/v3/xml.rb | 462 +++ lib/sisu/v3/xml_dom.rb | 536 +++ lib/sisu/v3/xml_fictionbook.rb | 300 ++ lib/sisu/v3/xml_format.rb | 1644 ++++++++++ lib/sisu/v3/xml_md_oai_pmh_dc.rb | 230 ++ lib/sisu/v3/xml_scaffold.rb | 199 ++ lib/sisu/v3/xml_tables.rb | 203 ++ lib/sisu/v3/zap.rb | 83 + 118 files changed, 57876 insertions(+) create mode 100644 lib/sisu/v3/air.rb create mode 100644 lib/sisu/v3/author_format.rb create mode 100644 lib/sisu/v3/cgi.rb create mode 100644 lib/sisu/v3/cgi_pgsql.rb create mode 100644 lib/sisu/v3/cgi_sql_common.rb create mode 100644 lib/sisu/v3/cgi_sqlite.rb create mode 100644 lib/sisu/v3/character_encoding.rb create mode 100644 lib/sisu/v3/composite.rb create mode 100644 lib/sisu/v3/concordance.rb create mode 100644 lib/sisu/v3/conf.rb create mode 100644 lib/sisu/v3/constants.rb create mode 100644 lib/sisu/v3/css.rb create mode 100644 lib/sisu/v3/dal.rb create mode 100644 lib/sisu/v3/dal_character_check.rb create mode 100644 lib/sisu/v3/dal_doc_objects.rb create mode 100644 lib/sisu/v3/dal_doc_str.rb create mode 100644 lib/sisu/v3/dal_endnotes.rb create mode 100644 lib/sisu/v3/dal_expand_insertions.rb create mode 100644 lib/sisu/v3/dal_hash_digest.rb create mode 100644 lib/sisu/v3/dal_idx.rb create mode 100644 lib/sisu/v3/dal_images.rb create mode 100644 lib/sisu/v3/dal_metadata.rb create mode 100644 lib/sisu/v3/dal_numbering.rb create mode 100644 lib/sisu/v3/dal_substitutions_and_insertions.rb create mode 100644 lib/sisu/v3/dal_syntax.rb create mode 100644 lib/sisu/v3/db_columns.rb create mode 100644 lib/sisu/v3/db_create.rb create mode 100644 lib/sisu/v3/db_dbi.rb create mode 100644 lib/sisu/v3/db_drop.rb create mode 100644 lib/sisu/v3/db_import.rb create mode 100644 lib/sisu/v3/db_indexes.rb create mode 100644 lib/sisu/v3/db_load_tuple.rb create mode 100644 lib/sisu/v3/db_remove.rb create mode 100644 lib/sisu/v3/db_select.rb create mode 100644 lib/sisu/v3/db_sqltxt.rb create mode 100644 lib/sisu/v3/db_tests.rb create mode 100644 lib/sisu/v3/dbi.rb create mode 100644 lib/sisu/v3/defaults.rb create mode 100644 lib/sisu/v3/digests.rb create mode 100644 lib/sisu/v3/embedded.rb create mode 100644 lib/sisu/v3/epub.rb create mode 100644 lib/sisu/v3/epub_concordance.rb create mode 100644 lib/sisu/v3/epub_format.rb create mode 100644 lib/sisu/v3/epub_segments.rb create mode 100644 lib/sisu/v3/epub_tune.rb create mode 100644 lib/sisu/v3/errors.rb create mode 100644 lib/sisu/v3/git.rb create mode 100644 lib/sisu/v3/harvest.rb create mode 100644 lib/sisu/v3/harvest_authors.rb create mode 100644 lib/sisu/v3/harvest_topics.rb create mode 100644 lib/sisu/v3/help.rb create mode 100644 lib/sisu/v3/html.rb create mode 100644 lib/sisu/v3/html_format.rb create mode 100644 lib/sisu/v3/html_minitoc.rb create mode 100644 lib/sisu/v3/html_promo.rb create mode 100644 lib/sisu/v3/html_scroll.rb create mode 100644 lib/sisu/v3/html_segments.rb create mode 100644 lib/sisu/v3/html_table.rb create mode 100644 lib/sisu/v3/html_tune.rb create mode 100644 lib/sisu/v3/hub.rb create mode 100644 lib/sisu/v3/i18n.rb create mode 100644 lib/sisu/v3/manifest.rb create mode 100644 lib/sisu/v3/manpage.rb create mode 100644 lib/sisu/v3/manpage_format.rb create mode 100644 lib/sisu/v3/objects.txt create mode 100644 lib/sisu/v3/odf.rb create mode 100644 lib/sisu/v3/odf_format.rb create mode 100644 lib/sisu/v3/options.rb create mode 100644 lib/sisu/v3/param.rb create mode 100644 lib/sisu/v3/param_identify_markup.rb create mode 100644 lib/sisu/v3/particulars.rb create mode 100644 lib/sisu/v3/plaintext.rb create mode 100644 lib/sisu/v3/plaintext_format.rb create mode 100644 lib/sisu/v3/po4a.rb create mode 100644 lib/sisu/v3/po4a_set.rb create mode 100644 lib/sisu/v3/relaxng.rb create mode 100644 lib/sisu/v3/remote.rb create mode 100644 lib/sisu/v3/response.rb create mode 100644 lib/sisu/v3/rexml.rb create mode 100644 lib/sisu/v3/screen_text_color.rb create mode 100644 lib/sisu/v3/share_src.rb create mode 100644 lib/sisu/v3/share_src_kdissert.rb create mode 100644 lib/sisu/v3/shared_html.rb create mode 100644 lib/sisu/v3/shared_html_lite.rb create mode 100644 lib/sisu/v3/shared_markup_alt.rb create mode 100644 lib/sisu/v3/shared_metadata.rb create mode 100644 lib/sisu/v3/shared_sem.rb create mode 100644 lib/sisu/v3/shared_txt.rb create mode 100644 lib/sisu/v3/shared_xhtml.rb create mode 100644 lib/sisu/v3/shared_xml.rb create mode 100644 lib/sisu/v3/sisupod_make.rb create mode 100644 lib/sisu/v3/sitemaps.rb create mode 100644 lib/sisu/v3/spell.rb create mode 100644 lib/sisu/v3/sst_convert_markup.rb create mode 100644 lib/sisu/v3/sst_do_inline_footnotes.rb create mode 100644 lib/sisu/v3/sst_from_xml.rb create mode 100644 lib/sisu/v3/sst_identify_markup.rb create mode 100644 lib/sisu/v3/sst_to_s_xml_sax.rb create mode 100644 lib/sisu/v3/sysenv.rb create mode 100644 lib/sisu/v3/termsheet.rb create mode 100644 lib/sisu/v3/texinfo.rb create mode 100644 lib/sisu/v3/texinfo_format.rb create mode 100644 lib/sisu/v3/texpdf.rb create mode 100644 lib/sisu/v3/texpdf_format.rb create mode 100644 lib/sisu/v3/update.rb create mode 100644 lib/sisu/v3/urls.rb create mode 100644 lib/sisu/v3/webrick.rb create mode 100644 lib/sisu/v3/wikispeak.rb create mode 100644 lib/sisu/v3/xhtml.rb create mode 100644 lib/sisu/v3/xhtml_table.rb create mode 100644 lib/sisu/v3/xml.rb create mode 100644 lib/sisu/v3/xml_dom.rb create mode 100644 lib/sisu/v3/xml_fictionbook.rb create mode 100644 lib/sisu/v3/xml_format.rb create mode 100644 lib/sisu/v3/xml_md_oai_pmh_dc.rb create mode 100644 lib/sisu/v3/xml_scaffold.rb create mode 100644 lib/sisu/v3/xml_tables.rb create mode 100644 lib/sisu/v3/zap.rb (limited to 'lib') diff --git a/lib/sisu/v3/air.rb b/lib/sisu/v3/air.rb new file mode 100644 index 00000000..abe0ee6c --- /dev/null +++ b/lib/sisu/v3/air.rb @@ -0,0 +1,85 @@ +# coding:utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_Air + require "#{SiSU_lib}/particulars" # particulars.rb + #require "#{SiSU_lib}/defaults" # defaults.rb + #require "#{SiSU_lib}/sysenv" # sysenv.rb + #require "#{SiSU_lib}/param" # param.rb + class Source + @@dal_array=[] + @@fns=nil + def initialize(opt) + @opt=opt + @@fns||@opt.fns + @particulars=SiSU_Particulars::Combined.new(opt) + #@env=@particulars.env + #@md=@particulars.md + #@dal_array=@particulars.dal_array + end + def read + end + protected + def print + puts @particulars.md.inspect + puts @particulars.env.inspect + puts @particulars.dal_array + end + end +end +__END__ diff --git a/lib/sisu/v3/author_format.rb b/lib/sisu/v3/author_format.rb new file mode 100644 index 00000000..dee83eb6 --- /dev/null +++ b/lib/sisu/v3/author_format.rb @@ -0,0 +1,113 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module FORMAT + class Author + def initialize(author_param) + @author_param=author_param + end + def author_details + @authors,@author_array=[],[] + authors=@author_param.scan(/[^;]+/) + authors.each do |a| + a.strip! + if a =~/"(.+?)"/ + @authors << { :the => $1 } + @author_array << $1.upcase + else #if a =~/,/ + x=a.scan(/[^,]+/) + x[0].strip! + x[1].strip! if x[1] + if x.length==1 + @authors << { :the => x[0] } + @author_array << x[0].upcase + elsif x.length==2 + @authors << { :the=> x[0], :others => x[1] } + @author_array << "#{x[0].upcase}, #{x[1]}" + else #p x.length + end + end + end + l = @authors.length + authors_string='' + @authors.each_with_index do |a,i| + authors_string += if a[:others] + if (l - i) > 1 + "#{a[:others]} #{a[:the]}, " + else + "#{a[:others]} #{a[:the]}" + end + else + if (l - i) > 2 + "#{a[:the]}, " + else + "#{a[:the]}" + end + end + end + { + :last_first_a => authors, + :last_first_format_a => @author_array, + :authors_h => @authors, + :authors_s => authors_string, + :authors_param => @author_param + } + end + end +end +__END__ diff --git a/lib/sisu/v3/cgi.rb b/lib/sisu/v3/cgi.rb new file mode 100644 index 00000000..a375282f --- /dev/null +++ b/lib/sisu/v3/cgi.rb @@ -0,0 +1,86 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: generates naive cgi search form for search of sisu database + (pgsql sqlite) + +=end +module SiSU_CGI #% database building documents + require "#{SiSU_lib}/sysenv" # sysenv.rb + require "#{SiSU_lib}/cgi_pgsql" # cgi_pgsql.rb + include SiSU_CGI_pgsql + require "#{SiSU_lib}/cgi_sqlite" # cgi_sqlite.rb + class SiSU_search + def initialize(opt) + @opt=opt + @webserv=@opt.files[0].to_s.strip #verify @opt.files[0].class + end + def search_info + a=%{ + For help on sisu search, type: + sisu --help search + For help on setting up hyperestraier for sisu, type: + sisu --help hyperestraier + } + SiSU_Screen::Ansi.new(@opt.cmd,a).print_grey + end + def read + SiSU_CGI_sqlite::SiSU_search_sqlite.new(@opt,@webserv).sqlite + SiSU_CGI_pgsql::SiSU_search_pgsql.new(@opt,@webserv).pgsql + search_info unless @opt.cmd =~/q/ + end + end +end +__END__ diff --git a/lib/sisu/v3/cgi_pgsql.rb b/lib/sisu/v3/cgi_pgsql.rb new file mode 100644 index 00000000..7a948366 --- /dev/null +++ b/lib/sisu/v3/cgi_pgsql.rb @@ -0,0 +1,230 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: generates naive cgi search form for search of sisu database (pgsql sqlite) + +=end +module SiSU_CGI_pgsql #% database building documents + require "#{SiSU_lib}/sysenv" # sysenv.rb + require "#{SiSU_lib}/cgi_sql_common" # cgi_sql_common.rb + include SiSU_CGI_sql + class SiSU_search_pgsql < SiSU_CGI_common + def initialize(opt,webserv) + @opt,@webserv=opt,webserv + @env=SiSU_Env::Info_env.new('',opt) + @sys=SiSU_Env::System_call.new + @image_src="#{@env.url.webserv_cgi}/_sisu/image_sys" + @common=SiSU_CGI_sql::SiSU_CGI_common.new(@webserv,@opt.cmd,@image_src,@env) + @db=SiSU_Env::Info_db.new + @cgi_file_name="#{Db[:name_prefix_db]}pgsql.cgi" + end + def pgsql + serve=[] + if @sys.psql + available_db_table=`psql --list` # system call requires psql + available_db=available_db_table.scan(/(#{Db[:name_prefix]}\S+)/) if not available_db_table.nil? + if available_db \ + and available_db.class==Array + available_db.flatten.each do |x| + serve << x.gsub(/#{Db[:name_prefix]}(\S+)/,'\1') + end + else put "WARNING: no postgresql database available, (have you created one?)" + end + serve.sort! + f1,f2,f3=[],[],[] + serve.each do |x| + f1 << %{ \n} unless x =~/apache|sisu\/image/ #check + end + f2 << %{ selected_db=case cgi['db']\n} + serve.each do |x| + f2 << %{ when /#{Db[:name_prefix]}#{x}/; ''\n} unless x =~/apache|sisu\/image/ #check + end + end + f2 << " end\n" + if FileTest.writable?('.') + output=File.open(@cgi_file_name,'w') + output << header0 << header1 << header_desc << header2 << f1 << buttons1 << buttons1_pgsql << buttons2 << search_request << search_statement << search_statement_common << search_query1 << @common.pages << search_query2 << @common.tail << @common.main1 << f2 << dbi_connect << @common.main2 + a=%{ generated sisu_pgsql.cgi, + BASED ON ALREADY EXISTING directories that could potentially be used to populate postgresql db, (-D) + } + SiSU_Screen::Ansi.new(@opt.cmd,a).print_grey + c=case @webserv + when /pwd/; '' + else "if necessary make the directory /usr/lib/cgi-bin : + sudo cp -vi #{Dir.pwd}/#{@cgi_file_name} /usr/lib/cgi-bin/. + sudo chmod -v 755 /usr/lib/cgi-bin/#{@cgi_file_name} + sudo ln -s /usr/lib/cgi-bin/#{@cgi_file_name} /usr/lib/cgi-bin/sisu_pgsql.cgi + (copy #{@cgi_file_name} to your cgi directory) set file permissions to 755, and make symbolic link to sisu_pgsql.cgi" + end + a=%{#{c} + #{@env.webserv_base_cgi}/cgi-bin/sisu_pgsql.cgi + } + SiSU_Screen::Ansi.new(@opt.cmd,a).warn + a="postgresql db used for present directory: #{@db.psql.db}" + b="\n\t(to create and populate postgresql database see 'man sisu' and in particular the -D flag)\n\t[the database to be used for this directory (#{@db.psql.db}) will have to be created manually if it does not exist,\n\tsee 'sisu --help sql'\n\tif you have permission to create databases:\n\t'sisu -d --createdb'\n\tor using postgresql tools directly:\n\t'createdb #{@db.psql.db}'\n\tfor a list of existing databases try 'psql --list']" + SiSU_Screen::Ansi.new(@opt.cmd,a,b).txt_cyan + else puts 'failed in attempt to write sisu_pgsql.cgi to present directory, is directory writable?' + end + end + def header0 + <<-WOK_SQL +#!/usr/bin/env ruby +=begin +#{about} + * Description: generates naive cgi search form for search of sisu database (pgsql) +#{gpl} +=end + require 'cgi' + require 'fcgi' + require 'dbi' + @version='sisu_pgsql' + @image_src="#{@env.url.webserv_cgi}/_sisu/image_sys" + @hosturl_db="#{@env.url.webserv_base_cgi}" + @hosturl_files="#{@env.url.webserv_files_from_db}" + @port="#{@db.psql.port}" + @db_name_prefix="#{Db[:name_prefix]}" + user='#{@env.user}' # check user name for access to pg database: '#{@env.user}' + WOK_SQL + end + def search_statement + <<-'WOK_SQL' + class Dbi_search_string + def initialize(l,t,q,cse=false) + @l,@t,@q,@c=l,t,q,cse + end + def string + search={ :search => [],:flag => false } + if @t =~/\S+/ or @q =~/\S+/ + if @t =~/\S+/; unescaped_search=CGI.unescape(@t) + elsif @q =~/\S+/; unescaped_search=CGI.unescape(@q) + end + search_construct=[] + if @c + unescaped_search.gsub!(/\s*(AND|OR)\s*/,"' \) \\1 #{@l}~\( '") + unescaped_search.gsub!(/(.+)/,"#{@l}~\( '\\1' \)") + else + unescaped_search.gsub!(/\s*(AND|OR)\s*/,"' \) \\1 #{@l}~*\( '") + unescaped_search.gsub!(/(.+)/,"#{@l}~*\( '\\1' \)") + end + search_construct << unescaped_search + search_construct=search_construct.join(' ') + search[:search] << search_construct + search[:flag]=true + search + end + search + end + end + WOK_SQL + end + def search_query1 + <<-'WOK_SQL' + @search_text,@search_endnotes=[],[] + search[:text].each{|x| @search_text << "#{x} AND " } + @search_text=@search_text.join.gsub!(/AND\s+$/,'') #watch + @search_text.gsub!(/(doc_objects\.clean~[*]?\(\s*'[^']+'\s*\)\s+(?:(?:AND|OR)\s+doc_objects\.clean~[*]?\(\s*'[^']+'\s*\))+)/,'(\1)') + search[:endnotes].each{|x| @search_endnotes << "#{x} AND " } + @search_endnotes=@search_endnotes.join.gsub!(/AND\s+$/,'') #watch + @search_endnotes.gsub!(/(endnotes\.clean~\(\s*'[^']+'\s*\)\s+(?:(?:AND|OR)\s+endnotes\.clean~\(\s*'[^']+'\s*\))+)/,'(\1)') + end + WOK_SQL + end + def search_query2 + <<-'WOK_SQL' + def sql_select_body + limit ||=@@limit + offset ||=@@offset + @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE (#{@search_text}) AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.title, metadata_and_text.src_filename, doc_objects.ocn} + @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} + select=@sql_statement[:body] + ' ' + @sql_statement[:range] + select + end + def sql_select_endnotes + limit ||=@@limit + offset ||=@@offset + @sql_statement[:endnotes]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, endnotes.body, endnotes.nr, endnotes.ocn, endnotes.metadata_tid FROM metadata_and_text, endnotes WHERE (#{@search_endnotes}) AND metadata_and_text.tid = endnotes.metadata_tid ORDER BY metadata_and_text.title, metadata_and_text.src_filename, endnotes.nr} + @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} + select=@sql_statement[:endnotes] + ' ' + @sql_statement[:range] + select + end + def sql_select_body_format + %{#{sql_select_body}} + end + def sql_select_endnotes_format + %{#{sql_select_endnotes}} + end + def contents + @conn.select_all(sql_select_body) + end + def endnotes + @conn.select_all(sql_select_endnotes) + end + end + WOK_SQL + end + def buttons1_pgsql + <<-'WOK_SQL' + case sensitive + WOK_SQL + end + def dbi_connect + <<-'WOK_SQL' + dbi="dbi:Pg:database=#{@db};port=#{@port}" + @conn=DBI.connect(dbi,user) + WOK_SQL + end + end +end +__END__ diff --git a/lib/sisu/v3/cgi_sql_common.rb b/lib/sisu/v3/cgi_sql_common.rb new file mode 100644 index 00000000..1b043f00 --- /dev/null +++ b/lib/sisu/v3/cgi_sql_common.rb @@ -0,0 +1,992 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: generates naive cgi search form for search of sisu database (pgsql sqlite) + +=end +module SiSU_CGI_sql + class SiSU_CGI_common + def initialize(webserv,cmd,image_src,dir) + @webserv,@cmd,@image_src,@env=webserv,cmd,image_src,dir + end + def about + <<-'WOK_SQL' + * Name: SiSU information Structuring Universe + * Author: Ralph Amissah + * http://www.jus.uio.no/sisu + * http://www.jus.uio.no/sisu/SiSU/download + WOK_SQL + end + def gpl + <<-'WOK_SQL' + * Name: SiSU generated sample cgi search form + + * Description: generated sample cgi search form for SiSU + (SiSU is a framework for document structuring, publishing and search) + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + WOK_SQL + end + def header1 + <<-'WOK_SQL' +#Common TOP + @@offset=0 + @base="#{@hosturl_db}/cgi-bin/#{@version}.cgi" + @@canned_search_url=@base + @color_heading='#DDFFAA' + @color_match='#ffff48' + class Form + def initialize(base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') + search_note='' if checked_searched !~/\S/ + the_can='' if checked_url !~/\S/ + search_field='' if checked_echo !~/\S/ + @base,@search_field,@selected_db,@result_type,@checked_sql_limit,@checked_tip,@checked_stats,@checked_searched,@checked_url,@checked_case,@checked_echo,@checked_sql,@checked_all,@checked_none,@checked_selected,@checked_default,@search_note,@the_can=base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can + @tip=if checked_tip =~/\S/ + 'text:__; fulltxt:__; keywords:__; title:__; author:__; topic_register:__; subject:__; description:__; publisher:__; contributor:__; date:__; type:__; format:__; identifier:__; source:__; language:__; relation:__; coverage:__; rights:__; comment:__; abstract:__; filename:__;
' + else '' + end + end + def submission_form + search_form=<<-WOK + WOK_SQL + end + def header_desc + <<-WOK_SQL + + + + + SiSU search form (sample): SiSU information Structuring Universe + + + + + + + + + + +
SiSU --> + + +
+ WOK_SQL + end + def header2 + <<-'WOK_SQL' +

+ + +
+ + + #{@tip} + #{@search_note} + #{@the_can} +
+ + + + + to search: select which database to search (drop-down menu below); enter your search query (in the form above); and click on the search button (below) +
+ + + index + text / grep + WOK_SQL + end + def buttons2 + <<-'WOK_SQL' +
+ match limit: + 1,000 + 2,500 +
+ echo query + result stats + search url + searched + available fields + sql statement +
+ checks: + default + selected + all + none +
+ + +
+ WOK + end + end + WOK_SQL + end + def search_request + <<-'WOK_SQL' + class Search_request #% search_for + attr_accessor :text1,:fulltext,:keywords,:title,:author,:topic_register,:subject,:description,:publisher,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:filename + def initialize(search_field='',q='') + @search_field,@q=search_field,q + @text1=@fulltext=@keywords=@title=@author=@topic_register=@subject=@description=@publisher=@contributor=@date=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@comment=@abstract=@owner=@date_created=@date_issued=@date_modified=@date_available=@date_valid=@filename='' + if @search_field=~/\S/ + @text1=text_to_match('text:') + @fulltext=text_to_match('fulltxt:') + @topic_register=text_to_match('topic_register:') + @title=text_to_match('title:') # DublinCore 1 - title + @author=text_to_match('(?:author|creator)s?:') # DublinCore 2 - creator/author + @subject=text_to_match('subj(?:ect)?:') # DublinCore 3 - subject + @description=text_to_match('description:') # DublinCore 4 - description + @publisher=text_to_match('pub(?:lisher)?:') # DublinCore 5 - publisher + @contributor=text_to_match('contributor:') # DublinCore 6 - contributor + @date=text_to_match('date:') # DublinCore 7 - date dd-mm-yy + @type=text_to_match('type:') # DublinCore 8 - type + @format=text_to_match('format:') # DublinCore 9 - format + @identifier=text_to_match('identifier:') # DublinCore 10 - identifier + @source=text_to_match('source:') # DublinCore 11 - source + @language=text_to_match('language:') # DublinCore 12 - language + @relation=text_to_match('relation:') # DublinCore 13 - relation + @coverage=text_to_match('coverage:') # DublinCore 14 - coverage + @rights=text_to_match('rights:') # DublinCore 15 - rights + @keywords=text_to_match('key(?:words?)?:') + @comment=text_to_match('comment:') + @abstract=text_to_match('abs(?:tract)?:') + @owner=text_to_match('owner:') + @date_created=text_to_match('date_created:') + @date_issued=text_to_match('date_issued:') + @date_modified=text_to_match('date_modified:') + @date_available=text_to_match('date_available:') + @date_valid=text_to_match('date_valid:') + @filename=text_to_match('filename:') + @text1=text_to_match unless @keywords or @author or @title or @text1 or @fulltext or @comment or @abstract or @rights or @subject or @publisher or @date or @filename or @topic_register + else + @text1=q['s1'] if q['s1']=~/\S/ + @fulltext=q['ft'] if q['ft']=~/\S/ + @keywords=q['key'] if q['key']=~/\S/ + @title=q['ti'] if q['ti']=~/\S/ + @author=q['au'] if q['au']=~/\S/ + @topic_register=q['tr'] if q['tr']=~/\S/ + @subject=q['sj'] if q['sj']=~/\S/ + @description=q['dsc'] if q['dsc']=~/\S/ + @publisher=q['pb'] if q['pb']=~/\S/ + @contributor=q['cntr'] if q['cntr']=~/\S/ + @date=q['dt'] if q['dt']=~/\S/ + @type=q['ty'] if q['ty']=~/\S/ + @identifier=q['id'] if q['id']=~/\S/ + @source=q['src'] if q['src']=~/\S/ + @language=q['lang'] if q['lang']=~/\S/ + @relation=q['rel'] if q['rel']=~/\S/ + @coverage=q['cov'] if q['cov']=~/\S/ + @rights=q['cr'] if q['cr']=~/\S/ + @comment=q['co'] if q['co']=~/\S/ + @abstract=q['ab'] if q['ab']=~/\S/ + @date_created=q['dtc'] if q['dtc']=~/\S/ + @date_issued=q['dti'] if q['dti']=~/\S/ + @date_modified=q['dtm'] if q['dtm']=~/\S/ + @date_available=q['dta'] if q['dta']=~/\S/ + @date_valid=q['dtv'] if q['dtv']=~/\S/ + @filename=if q['doc'] and q['search'] !~/search db/; q['doc'] + elsif q['fns']=~/\S/; q['fns'] + end + @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 + @@offset=q['off'] if q['off']=~/\d+/ # 0 + end + end + def text_to_match(identifier='') + m={ + :string => /#{identifier}\s*(.+?)/, + :string => /#{identifier}\s*(.+?)(?:;|\n|\r|$)/, + :word => /#{identifier}[\s(]*(\S+)/ + } + search_string=if @search_field =~m[:word] + search_string=if @search_field =~m[:braces]; m[:braces].match(@search_field)[1] + elsif @search_field =~m[:string]; m[:string].match(@search_field)[1] + else + str=m[:word].match(@search_field)[1] + str.gsub!(/[()]/,'') + str + end + search_string.strip! + search_string.gsub!(/\s+/,'+') + search_string + #else + # "__" + end + end + end + WOK_SQL + end + def search_statement_common + <<-'WOK_SQL' + class Dbi_search_statement + attr_reader :text_search_flag,:sql_select_body_format,:sql_select_endnotes_format,:sql_offset,:sql_limit + def initialize(conn,search_for,q,c) + @conn=conn + @text_search_flag=false + @sql_statement={ :body=>'',:endnotes=>'',:range=>'' } + #@offset||=@@offset + #@offset+=@@limit + search={ :text => [],:endnotes => [] } + cse=(c =~/\S/) ? true : false + st=Dbi_search_string.new('doc_objects.clean',search_for.text1,q['s1'],cse).string + se=Dbi_search_string.new('endnotes.clean',search_for.text1,q['s1'],cse).string + @text_search_flag=st[:flag] + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << se[:search] + end + st=Dbi_search_string.new('metadata_and_text.fulltext',search_for.fulltext,q['ft'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.classify_keywords',search_for.keywords,q['key'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.title',search_for.title,q['ti'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.creator_author',search_for.author,q['au'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.classify_topic_register',search_for.topic_register,q['tr'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.classify_subject',search_for.subject,q['sj'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.notes_description',search_for.description,q['dsc'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.publisher',search_for.publisher,q['pb'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.creator_contributor',search_for.contributor,q['cntr'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.date_published',search_for.date,q['dt'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.classify_type',search_for.type,q['ty'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.classify_identifier',search_for.identifier,q['id'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.original_source',search_for.source,q['src'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.title_language',search_for.language,q['lang'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.classify_relation',search_for.relation,q['rel'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.classify_coverage',search_for.coverage,q['cov'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.rights_all',search_for.rights,q['cr'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.notes_comment',search_for.comment,q['co'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.notes_abstract',search_for.abstract,q['ab'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + st=Dbi_search_string.new('metadata_and_text.src_filename',search_for.filename,q['fns'],cse).string + if st[:flag] + search[:text] << st[:search] + search[:endnotes] << st[:search] + end + @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 + @@offset=q['off'] if q['off']=~/\d+/ # 0 + WOK_SQL + end + def pages + <<-'WOK_SQL' + def sql_offset + @@offset + end + def sql_match_limit + @@limit + end + def sql_canned_search + @offset_next=sql_offset.to_i + sql_match_limit.to_i + @offset_previous=sql_offset.to_i - sql_match_limit.to_i + def current + @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + sql_offset.to_s + end + def next + @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_next.to_s + end + def previous + @offset_previous >= 0 \ + ? (@@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_previous.to_s) \ + : '' + end + def start + @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + 0.to_s + end + self + end + def pre_next(beyond_limit,img) + can=sql_canned_search + page=(sql_offset.to_i + sql_match_limit.to_i)/sql_match_limit.to_i + if beyond_limit + if page.to_s =~ /^1$/ + %{
+ pg. #{page.to_s} + +  >> + +
} + elsif page.to_s =~ /^2$/ + %{
+ + <<  + + pg. #{page.to_s} + +  >> + +
} + else + %{
+ + |<  + + + <<  + + pg. #{page.to_s} + +  >> + +
} + end + else + if page.to_s =~ /^1$/; '' + elsif page.to_s =~ /^2$/ + %{
+ + <<  + + pg. #{page.to_s} +
} + else + %{
+ + |<  + + + <<  + + pg. #{page.to_s} +
} + end + end + end + WOK_SQL + end + def tail + v=SiSU_Env::Info_version.instance.get_version + <<-WOK_SQL + def tail + <<-'WOK' +


+ + + + + +
+

+ + SiSU + +

+
+ +

+ Generated by + + #{v[:project]} #{v[:version]} #{v[:date]} (#{v[:date_stamp]}) +
+ + #{v[:project]} © Ralph Amissah + 1993, current 2010. + All Rights Reserved. +
+ + #{v[:project]} + is software for document structuring, publishing and search, +
+ + www.jus.uio.no/sisu + + and + + www.sisudoc.org + +
+ w3 since October 3 1993 + + ralph@amissah.com + +
+ mailing list subscription + + http://lists.sisudoc.org/listinfo/sisu + +
+ + sisu@lists.sisudoc.org + +

+
+

+ #{v[:project]} using: +
Standard SiSU markup syntax, +
Standard SiSU meta-markup syntax, and the +
Standard SiSU object citation numbering and system, (object/text positioning system) +
+ © Ralph Amissah 1997, current 2010. + All Rights Reserved. +

+
+

+ + GPLv3 + +

+
+

+ SiSU is released under + GPL v3 + or later, + + http://www.gnu.org/licenses/gpl.html + +

+
+

+ #{v[:project]}, developed using + + Ruby + + on + + Debian/Gnu/Linux + + software infrastructure, + with the usual GPL (or OSS) suspects. +
+ Better - "performance, reliability, scalability, security & total cost of ownership" + [not to mention flexibility & choice] use of and adherence to open standards (where practical and fair) and it is software libré. +
+ Get With the Future + + Way Better! + +

+
+ + + WOK + end + WOK_SQL + end + def main1 + <<-'WOK_SQL' + @tail=tail + @counter_txt_doc,@counter_txt_ocn,@counter_endn_doc,@counter_endn_ocn=0,0,0,0 + @counters_txt,@counters_endn,@sql_select_body,@sql_select_endnotes='','','','' + FCGI.each_cgi do |cgi| + begin # all code goes in begin section + @search={ :text => [],:endnotes => [] } + q=CGI.new + @db=if cgi['db'] =~/\S+/; + @stub=/#{@db_name_prefix}(\S+)/.match(cgi['db'])[1] + cgi['db'] + else + @stub='sisu' + "#{@db_name_prefix}#{@stub}" + end + checked_url,checked_stats,checked_searched,checked_tip,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,selected_db='','','','','','','','','' + result_type=(cgi['view']=~/text/) \ + ? result_type={:index=>'',:text=>'checked'} \ + : result_type={:index=>'checked',:text=>''} + @@limit=if cgi['sql_match_limit'].to_s=~/2500/ + checked_sql_limit={:l1000=>'',:l2500=>'checked'} + '2500' + else + checked_sql_limit={:l1000=>'checked',:l2500=>''} + '1000' + end + checked_echo='checked' if cgi['echo'] =~/\S/ + checked_stats='checked' if cgi['stats'] =~/\S/ + checked_url='checked' if cgi['url'] =~/\S/ or cgi['u'].to_i==1 + checked_searched='checked' if cgi['searched'] =~/\S/ + checked_tip='checked' if cgi['tip'] =~/\S/ + checked_case='checked' if cgi['casesense'] =~/\S/ + checked_sql='checked' if cgi['sql'] =~/\S/ + if cgi['checks'] =~/check_all/ or cgi['check_all'] =~/\S/ or cgi['a'].to_i==1 + checked_all='checked' + checked_echo=checked_stats=checked_url=checked_searched=checked_tip=checked_sql='checked' + checked_none='' + elsif cgi['checks'] =~/check_none/ + checked_none='checked' + checked_all=checked_url=checked_stats=checked_searched=checked_tip=checked_echo=checked_sql='' + elsif cgi['checks'] =~/check_selected/ + checked_selected='checked' + elsif cgi['checks'] =~/check_default/ + checked_default='checked' + checked_echo=checked_stats=checked_url='checked' + checked_searched=checked_tip=checked_case=checked_sql='' + else + checked_selected='checked' + checked_echo=checked_stats=checked_url='checked' + checked_searched=checked_tip=checked_case=checked_sql='' + end + WOK_SQL + end + def main2 + <<-'WOK_SQL' + search_field=cgi['find'] if cgi['find'] # =~/\S+/ + @search_for=Search_request.new(search_field,q) #.analyze #% search_for + #% searches + #Canned_search.new(@base,@search_for.text1,cgi) + if @search_for.text1=~/\S+/ or @search_for.fulltext=~/\S+/ or @search_for.author=~/\S+/ or @search_for.topic_register=~/\S+/ #and search_field =~/\S/ + s1='s1=' + CGI.escape(@search_for.text1) if @search_for.text1=~/\S/ + ft='&ft=' + CGI.escape(@search_for.fulltext) if @search_for.fulltext=~/\S/ + key='key=' + CGI.escape(@search_for.keywords) if @search_for.keywords=~/\S/ + ti='&ti=' + CGI.escape(@search_for.title) if @search_for.title=~/\S/ + au='&au=' + CGI.escape(@search_for.author) if @search_for.author=~/\S/ + tr='&tr=' + CGI.escape(@search_for.topic_register) if @search_for.topic_register=~/\S/ + sj='&sj=' + CGI.escape(@search_for.subject) if @search_for.subject=~/\S/ + dsc='&dsc=' + CGI.escape(@search_for.description) if @search_for.description=~/\S/ + pb='&pb=' + CGI.escape(@search_for.publisher) if @search_for.publisher=~/\S/ + cntr='&cntr=' + CGI.escape(@search_for.contributor) if @search_for.contributor=~/\S/ + dt='&dt=' + CGI.escape(@search_for.date) if @search_for.date=~/\S/ + ty='&ty=' + CGI.escape(@search_for.type) if @search_for.type=~/\S/ + id='&id=' + CGI.escape(@search_for.identifier) if @search_for.identifier=~/\S/ + src='&src=' + CGI.escape(@search_for.source) if @search_for.source=~/\S/ + lang='&lang=' + CGI.escape(@search_for.language) if @search_for.language=~/\S/ + rel='&rel=' + CGI.escape(@search_for.relation) if @search_for.relation=~/\S/ + cov='&cov=' + CGI.escape(@search_for.coverage) if @search_for.coverage=~/\S/ + cr='&cr=' + CGI.escape(@search_for.rights) if @search_for.rights=~/\S/ + co='&co=' + CGI.escape(@search_for.comment) if @search_for.comment=~/\S/ + ab='&ab=' + CGI.escape(@search_for.abstract) if @search_for.abstract=~/\S/ + dtc='&dtc=' + CGI.escape(@search_for.date_created) if @search_for.date_created=~/\S/ + dti='&dti=' + CGI.escape(@search_for.date_issued) if @search_for.date_issued=~/\S/ + dtm='&dtm=' + CGI.escape(@search_for.date_modified) if @search_for.date_modified=~/\S/ + dta='&dta=' + CGI.escape(@search_for.date_available) if @search_for.date_available=~/\S/ + dtv='&dtv=' + CGI.escape(@search_for.date_valid) if @search_for.date_valid=~/\S/ + fns='&fns=' + CGI.escape(@search_for.filename) if @search_for.filename=~/\S/ + @@canned_search_url=(checked_all =~/checked/) \ + ? "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" \ + : "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" + mod=ft=~/\S+/ ? (ft.gsub(/ft/,'s1')) : s1 + @canned_base_url="#{@base}?#{mod}&db=#{cgi['db']}" + if checked_case=~/\S/ + @search[:text][1]=%{doc_objects.clean~'#{@search_for.text1}'} #s1 + @search[:endnotes][1]=%{endnotes.clean~'#{@search_for.text1}'} #s1 + else + @search[:text][1]=%{doc_objects.clean~*'#{@search_for.text1}'} #s1 + @search[:endnotes][1]=%{endnotes.clean~*'#{@search_for.text1}'} #s1 + end + canned_note='search url:' + else + @@canned_search_url="#{@base}?db=#{@db}&view=index" + canned_note='search url example:' + end + if search_field =~/\S+/ + analyze_format=search_field.gsub(/\s*\n/,'; ') + elsif checked_all =~/checked/ or checked_url =~/checked/ + canned_search=@@canned_search_url.scan(/(?:s1|ft|au|ti|fns|tr)=[^&]+/) + af=canned_search.join('; ') + af.gsub!(/s1=/,'text: ') + af.gsub!(/ft=/,'fulltxt: ') + af.gsub!(/au=/,'author: ') + af.gsub!(/ti=/,'title: ') + af.gsub!(/fns=/,'filename: ') + af.gsub!(/tr=/,'topic_register: ') + af.gsub!(/%2B/,' ') + analyze_format=af + st=af.split(/\s*;\s*/) + search_field=st.join("\n") + end + green=%{} + canned_search_url_txt=CGI.escapeHTML(@@canned_search_url) + the_can=%{#{canned_note} #{canned_search_url_txt}
} + p_text=p_fulltext=p_keywords=p_title=p_author=p_topic_register=p_subject=p_description=p_publisher=p_contributor=p_date=p_type=p_format=p_identifier=p_source=p_language=p_relation=p_coverage=p_rights=p_comment=p_abstract=p_filename='' + p_filename=%{filename: #{green}#{@search_for.filename}

} if @search_for.filename =~/\S+/ + p_text=%{text: #{green}#{@search_for.text1}
} if @search_for.text1 =~/\S+/ + p_fulltext=%{fulltxt: #{green}#{@search_for.fulltext}
} if @search_for.fulltext =~/\S+/ + p_title=%{title: #{green}#{@search_for.title}
} if @search_for.title =~/\S+/ + p_author=%{author: #{green}#{@search_for.author}
} if @search_for.author =~/\S+/ + p_contributor=%{contributor: #{green}#{@search_for.contributor}
} if @search_for.contributor =~/\S+/ + p_date=%{date: #{green}#{@search_for.date}
} if @search_for.date =~/\S+/ + p_rights=%{rights: #{green}#{@search_for.rights}
} if @search_for.rights =~/\S+/ + p_topic_register=%{topic_register: #{green}#{@search_for.topic_register}
} if @search_for.topic_register =~/\S+/ + p_subject=%{subject: #{green}#{@search_for.subject}
} if @search_for.subject =~/\S+/ + p_keywords=%{keywords: #{green}#{@search_for.keywords}
} if @search_for.keywords =~/\S+/ + p_identifier=%{identifier: #{green}#{@search_for.identifier}
} if @search_for.identifier =~/\S+/ + p_type=%{type: #{green}#{@search_for.type}
} if @search_for.type =~/\S+/ + p_format=%{format: #{green}#{@search_for.format}
} if @search_for.format =~/\S+/ + p_relation=%{relation: #{green}#{@search_for.relation}
} if @search_for.relation =~/\S+/ + p_coverage=%{coverage: #{green}#{@search_for.coverage}
} if @search_for.coverage =~/\S+/ + p_description=%{description: #{green}#{@search_for.description}
} if @search_for.description =~/\S+/ + p_abstract=%{abstract: #{green}#{@search_for.abstract}
} if @search_for.abstract =~/\S+/ + p_comment=%{comment: #{green}#{@search_for.comment}
} if @search_for.comment =~/\S+/ + p_publisher=%{publisher: #{green}#{@search_for.publisher}
} if @search_for.publisher =~/\S+/ + p_source=%{source: #{green}#{@search_for.source}
} if @search_for.source =~/\S+/ + p_language=%{language: #{green}#{@search_for.language}
} if @search_for.language =~/\S+/ + search_note=<<-WOK + + database: #{green}#{@db}; selected view: #{green}#{cgi['view']} + search string: "#{green}#{analyze_format}"
+ #{p_text} #{p_fulltext} #{p_keywords} #{p_title} #{p_author} #{p_topic_register} #{p_subject} #{p_description} #{p_publisher} #{p_contributor} #{p_date} #{p_type} #{p_format} #{p_identifier} #{p_source} #{p_language} #{p_relation} #{p_coverage} #{p_rights} #{p_comment} #{p_abstract} #{p_filename} + + WOK + #eg = %{canned search e.g.:
#{url}
find: #{analyze}
database: #{database}} + #dbi_canning + @header=Form.new(@base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form + unless q['s1'] =~/\S/ or q['au'] =~/\S/ or @search[:text][1] =~/\S/ + print "Content-type: text/html\n\n" + puts (@header+@tail) + else #% searches + s1=(@search_for.text1 =~/\S/) \ + ? @search_for.text1 \ + : 'Unavailable' + if checked_case=~/\S/ + @search[:text]<<%{doc_objects.clean~'#{CGI.unescape(s1)}'} + @search[:endnotes]<<%{endnotes.clean~'#{CGI.unescape(s1)}'} + else + @search[:text]<<%{doc_objects.clean~*'#{CGI.unescape(s1)}'} + @search[:endnotes]<<%{endnotes.clean~*'#{CGI.unescape(s1)}'} + end + #dbi_request + dbi_statement=Dbi_search_statement.new(@conn,@search_for,q,checked_case) + @text_search_flag=false + @text_search_flag=dbi_statement.text_search_flag + s_contents=dbi_statement.contents + s_endnotes=dbi_statement.endnotes + @body_main,@endnotes=[],[] + @search_regx=nil + oldtid=0 + if @text_search_flag + if checked_sql =~/\S/ + sql_select_body=dbi_statement.sql_select_body_format + sql_select_endnotes=dbi_statement.sql_select_endnotes_format + else sql_select_body,sql_select_endnotes='','' + end + @body_main << '



Main Text:
' << sql_select_body + @endnotes << '



Endnotes:
' << sql_select_endnotes + else + end + #text_objects_body + s_contents.each do |c| #% text body + location=c['src_filename'][/(.+?)\.(?:ssm\.sst|sst)$/,1] + file_suffix=c['src_filename'][/.+?\.(_?sst|ssm)$/,1] + lang=if location =~ /\S+?~(\S\S\S?)$/ + l=location[/\S+?~(\S\S\S?)$/,1] + location.gsub!(/(\S+?)~\S\S\S?/,'\1') + l=".#{l}" + else '' + end + #metadata_found_body + if c['tid'].to_i != oldtid.to_i + ti=c['title'] + can_txt_srch=(cgi['view']=~/index/) \ + ? %{search } \ + : %{search } + title=%{toc html #{ti} by #{c['creator_author']} #{can_txt_srch}toc html epub pdf portrait pdf landscape odf manifest
} if file_suffix=~/s/ #hmm watch file_suffix + title=@text_search_flag \ + ? '

'+title \ + : '
'+title + @counter_txt_doc+=1 + oldtid=c['tid'].to_i + else title='' + end + if @text_search_flag + if cgi['view']=~/text/ \ + or (cgi['view']!~/index/ and cgi['search'] !~/search db/) #% txt body + text=if c['suffix'] !~/1/ #seg + if @search_for.text1 =~/\S+/ \ + or q['s1'] =~/\S+/ #% only this branch is working !! + unescaped_search=if @search_for.text1 =~/\S+/ + CGI.unescape(@search_for.text1) + elsif q['s1'] =~/\S+/ + CGI.unescape(q['s1']) + else nil + end + @search_regx=if unescaped_search #check + search_regex=[] + build=unescaped_search.scan(/\S+/).each do |g| + (g.to_s =~/(AND|OR)/) \ + ? (search_regex << '|') \ + : (search_regex << %{#{g.to_s}}) + end + search_regex=search_regex.join(' ') + search_regex=search_regex.gsub(/\s*\|\s*/,'|') + Regexp.new(search_regex, Regexp::IGNORECASE) + else nil + end + else nil + end + matched_para=(@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) \ + ? (c['body'].gsub(/(<\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{\\1})) \ + : c['body'] + %{

ocn #{c['ocn']}:

#{matched_para}} + elsif c['suffix'] =~/1/ #doc + %{#{title}

ocn #{c['ocn']}:#{c['body']}} + end + @counter_txt_ocn+=1 + output=title+text + else #elsif cgi['view']=~/index/ #% idx body + if c['suffix'] !~/1/ #seg + index=%{#{c['ocn']}, } if @text_search_flag + elsif c['suffix'] =~/1/ #doc + index=%{#{c['ocn']}, } + end + if c['seg'] =~/\S+/ + if @text_search_flag + @counter_txt_ocn+=1 + output=title+index + end + else + @counter_txt_ocn+=1 + output=c['suffix'] !~/1/ \ + ? title+index \ + : %{#{title}#{c['ocn'].sort}, } + end + end + else output=title + end + @counters_txt=if @counter_txt_doc > 0 + if checked_stats =~/\S/ + @@lt_t=(@counter_txt_ocn==dbi_statement.sql_match_limit.to_i) ? true : false + start=(@@offset.to_i+1).to_s + range=(@@offset.to_i+@counter_txt_ocn.to_i).to_s + %{


Found #{@counter_txt_ocn} times in the main body of #{@counter_txt_doc} documents [ matches #{start} to #{range} ]
} + else '' + end + else '' + end + @body_main << output #+ details + end + #text_objects_endnote + oldtid = 0 + s_endnotes.each do |e| #% endnotes + location=e['src_filename'][/(.+?)\.(?:ssm\.sst|sst)$/,1] + file_suffix=e['src_filename'][/.+?\.(_?sst|ssm)$/,1] + lang=if location =~ /\S+?~(\S\S\S?)$/ + l=location[/\S+?~(\S\S\S?)$/,1] + location.gsub!(/(\S+?)~\S\S\S?/,'\1') + l=".#{l}" + else '' + end + #metadata_found_endnotes + if @text_search_flag + if e['metadata_tid'].to_i != oldtid.to_i + ti=e['title'] + can_txt_srch=(cgi['view']=~/index/) \ + ? %{search } \ + : %{search } + title=%{

toc html #{ti} by #{e['creator_author']} #{can_txt_srch}toc html epub pdf portrait pdf landscape odf manifest
} if file_suffix=~/s/ + @counter_endn_doc+=1 + oldtid=e['metadata_tid'].to_i + else title = '' + end + if cgi['view']=~/text/ \ + or (cgi['view']!~/index/ and cgi['search'] !~/search db/) #% txt endnotes + @counter_endn_ocn+=1 + matched_endnote=(@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) \ + ? matched=e['body'].gsub(/(<\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{\\1}) \ + : e['body'] + output=%{#{title}
note #{e['nr']} referred to from ocn #{e['ocn']}: #{matched_endnote}} + else #elsif cgi['view']=~/index/ #% idx endnotes + @counter_endn_ocn+=1 + output=%{#{title}#{e['nr']}#{e['ocn']}], } + end + @counters_endn=if @counter_endn_doc > 0 + if checked_stats =~/\S/ + @@lt_e=@counter_endn_ocn==dbi_statement.sql_match_limit.to_i \ + ? true \ + : false + start=(@@offset.to_i+1).to_s + range=(@@offset.to_i+@counter_endn_ocn.to_i).to_s + %{Found #{@counter_endn_ocn} times in the endnotes of #{@counter_endn_doc} documents [ matches #{start} to #{range} ]
} + else '' + end + end + @endnotes << output #+ details + else @endnotes=[] #does not take out yet + end + end + offset=dbi_statement.sql_offset.to_s + limit=dbi_statement.sql_match_limit.to_s + @@lt_t ||=false; @@lt_e ||=false + canned=(@@lt_t or @@lt_e) \ + ? dbi_statement.pre_next(true,@image_src).to_s \ + : dbi_statement.pre_next(false,@image_src).to_s + limit=dbi_statement.sql_match_limit.to_s + cgi.out{@header + @counters_txt + @counters_endn + canned + @body_main.join + @endnotes.join + canned + @tail} #% print cgi_output_header+counters+body+endnotes + end + rescue Exception => e + s='
' + CGI::escapeHTML(e.backtrace.reverse.join("\n"))
+          s << CGI::escapeHTML(e.message) + '
' + cgi.out{s} + next + ensure # eg. disconnect from server + @conn.disconnect if @conn + end + end + WOK_SQL + end + end +end +__END__ diff --git a/lib/sisu/v3/cgi_sqlite.rb b/lib/sisu/v3/cgi_sqlite.rb new file mode 100644 index 00000000..5c20b072 --- /dev/null +++ b/lib/sisu/v3/cgi_sqlite.rb @@ -0,0 +1,216 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: generates naive cgi search form for search of sisu database (pgsql sqlite) + +=end +module SiSU_CGI_sqlite #% database building documents + require "#{SiSU_lib}/sysenv" # sysenv.rb + require "#{SiSU_lib}/cgi_sql_common" # cgi_sql_common.rb + include SiSU_CGI_sql + class SiSU_search_sqlite < SiSU_CGI_common + def initialize(opt,webserv) + @opt,@webserv=opt,webserv + @env=SiSU_Env::Info_env.new('',opt) + @image_src="#{@env.url.webserv_cgi}/_sisu/image_sys" + @common=SiSU_CGI_sql::SiSU_CGI_common.new(@webserv,@opt.cmd,@image_src,@env) + end + def sqlite + serve=[] + Dir.foreach(@env.path.webserv) do |x| + if x !~/^\./ \ + and FileTest.directory?("#{@env.path.webserv}/#{x}") + if FileTest.file?("#{@env.path.webserv}/#{x}/sisu_sqlite.db") + serve << x unless x =~/^_\S+/ + end + end + end + serve.sort! + f1,f2,f3=[],[],[] + serve.each do |x| + f1 << %{ \n} + end + f2 << %{ selected_db=case cgi['db']\n} + serve.each do |x| + f2 << %{ when /#{Db[:name_prefix]}#{x}/; ''\n} + end + f2 << " end\n" + #f3 + f3 << %{ db_name='sisu_sqlite.db'\n} + f3 << %{ db_sqlite=case cgi['db']\n} + serve.each do |x| + f3 << %{ when /#{Db[:name_prefix]}#{x}/; "#{@env.path.webserv}/#{x}/sisu_sqlite.db"\n} + end + f3 << %{ else '#{@env.path.webserv}/#{serve[0]}/sisu_sqlite.db'\n end\n} + if FileTest.writable?('.') + output=File.open('sisu_sqlite.cgi','w') + output << header0 << header1 << header_desc << header2 << f1 << buttons1 << buttons2 << search_request << search_statement << search_statement_common << search_query1 << @common.pages << search_query2 << @common.tail << @common.main1 << f2 << f3 << dbi_connect << @common.main2 + a=%{ generated sisu_sqlite.cgi, + BASED ON ALREADY CREATED sisu_sqlite.db OUTPUT, (-d) + } + SiSU_Screen::Ansi.new(@opt.cmd,a).print_grey + c=case @webserv + when /pwd/; '' + else "if necessary make the directory /usr/lib/cgi-bin : + sudo cp -vi #{Dir.pwd}/sisu_sqlite.cgi /usr/lib/cgi-bin/. + sudo chmod -v 755 /usr/lib/cgi-bin/sisu_sqlite.cgi + (copy sisu_sqlite.cgi to your cgi directory) and set file permissions to 755" + end + a=%{#{c} + #{@env.webserv_base_cgi}/cgi-bin/sisu_sqlite.cgi + } + b='(to create and populate sisu sqlite database see "man sisu" and in particular the -d flag)' + SiSU_Screen::Ansi.new(@opt.cmd,a,b).warn + else puts 'failed in attempt to write sisu_sqlite.cgi to present directory, is directory writable?' + end + end + def header0 + <<-WOK_SQL +#!/usr/bin/env ruby +=begin +#{about} + * Description: generates naive cgi search form for search of sisu database (sqlite) +#{gpl} +=end + require 'cgi' + require 'fcgi' + require 'dbi' + @version='sisu_sqlite' + @image_src="#{@env.url.webserv_cgi}/_sisu/image_sys" + @hosturl_db="#{@env.url.webserv_base_cgi}" + @hosturl_files="#{@env.url.webserv_files_from_db}" + @db_name_prefix="#{Db[:name_prefix]}" + WOK_SQL + end + def search_statement + <<-'WOK_SQL' + class Dbi_search_string + def initialize(l,t,q,cse=false) + @l,@t,@q=l,t,q + end + def string + search={ :search => [],:flag => false } + if @t =~/\S+/ or @q =~/\S+/ + if @t =~/\S+/; unescaped_search=CGI.unescape(@t) + elsif @q =~/\S+/; unescaped_search=CGI.unescape(@q) + end + search_construct=[] + unescaped_search.gsub!(/\s*(AND|OR)\s*/,"%' \) \\1 #{@l} LIKE \( '%") + unescaped_search.gsub!(/(.+)/,"#{@l} LIKE \( '%\\1%' \)") + search_construct << unescaped_search + search_construct=search_construct.join(' ') + search[:search] << search_construct + search[:flag]=true + search + end + search + end + end + WOK_SQL + end + def search_query1 + <<-'WOK_SQL' + @search_text,@search_endnotes=[],[] + search[:text].each{|x| @search_text << "#{x} AND " } + @search_text=@search_text.join.gsub!(/AND\s+$/,'') + @search_text.gsub!(/(doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\)\s+(?:(?:AND|OR)\s+doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\))+)/,'(\1)') + search[:endnotes].each{|x| @search_endnotes << "#{x} AND " } + @search_endnotes=@search_endnotes.join.gsub!(/AND\s+$/,'') + @search_text.gsub!(/(endnotes\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\)\s+(?:(?:AND|OR)\s+endnotes\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\))+)/,'(\1)') + end + WOK_SQL + end + def search_query2 + <<-'WOK_SQL' + def sql_select_body + limit ||=@@limit + offset ||=@@offset + @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE #{@search_text} AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.title, metadata_and_text.src_filename, doc_objects.ocn} + @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} + select=@sql_statement[:body] + ' ' + @sql_statement[:range] + select + end + def sql_select_endnotes + limit ||=@@limit + offset ||=@@offset + @sql_statement[:endnotes]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, endnotes.body, endnotes.nr, endnotes.ocn, endnotes.metadata_tid FROM metadata_and_text, endnotes WHERE #{@search_endnotes} AND metadata_and_text.tid = endnotes.metadata_tid ORDER BY metadata_and_text.title, metadata_and_text.src_filename, endnotes.nr} + @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} + select=@sql_statement[:endnotes] + ' ' + @sql_statement[:range] + select + end + def sql_select_body_format + %{#{sql_select_body}} + end + def sql_select_endnotes_format + %{#{sql_select_endnotes}} + end + def contents + @conn.select_all(sql_select_body) + end + def endnotes + @conn.select_all(sql_select_endnotes) + end + end + WOK_SQL + end + def dbi_connect + <<-'WOK_SQL' + @dbi="DBI:SQLite3:#{db_sqlite}" #sqlite3 ? + @conn=DBI.connect(@dbi) + WOK_SQL + end + end +end +__END__ diff --git a/lib/sisu/v3/character_encoding.rb b/lib/sisu/v3/character_encoding.rb new file mode 100644 index 00000000..5d19a61f --- /dev/null +++ b/lib/sisu/v3/character_encoding.rb @@ -0,0 +1,379 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: modules related to locales, character encoding for different + output generators [requires kirbybase] + +=end +module SiSU_character_encode + require 'kirbybase' + require "#{SiSU_lib}/sysenv" # sysenv.rb + class Characters + attr_accessor(:character_encoding,:glyph,:dec,:hex,:rb,:html,:html_name,:tex,:tex_licr,:tex_alt,:name) + def initialize(&block) + instance_eval(&block) + end + end + class Create ', 62, '76', '\76', '>', '>', '', '', '', 'Greater than >' ], + ['?', 63, '77', '\77', '?', nil, '?', '?', '?', 'Punctuation Question mark ?' ], + ['@', 64, '100', '\100', '@', nil, '', '', '', 'Commercial at sign @' ], + ['A', 65, '101', '\101', 'A', nil, 'A', 'A', 'A', 'Captial A' ], + ['B', 66, '102', '\102', 'B', nil, 'B', 'B', 'B', 'Captial B' ], + ['C', 67, '103', '\103', 'C', nil, 'C', 'C', 'C', 'Captial C' ], + ['D', 68, '104', '\104', 'D', nil, 'D', 'D', 'D', 'Captial D' ], + ['E', 69, '105', '\105', 'E', nil, 'E', 'E', 'E', 'Captial E' ], + ['F', 70, '106', '\106', 'F', nil, 'F', 'F', 'F', 'Captial F' ], + ['G', 71, '107', '\107', 'G', nil, 'G', 'G', 'G', 'Captial G' ], + ['H', 72, '110', '\110', 'H', nil, 'H', 'H', 'H', 'Captial H' ], + ['I', 73, '111', '\111', 'I', nil, 'I', 'I', 'I', 'Captial I' ], + ['J', 74, '112', '\112', 'J', nil, 'J', 'J', 'J', 'Captial J' ], + ['K', 75, '113', '\113', 'K', nil, 'K', 'K', 'K', 'Captial K' ], + ['L', 76, '114', '\114', 'L', nil, 'L', 'L', 'L', 'Captial L' ], + ['M', 77, '115', '\115', 'M', nil, 'M', 'M', 'M', 'Captial M' ], + ['N', 78, '116', '\116', 'N', nil, 'N', 'N', 'N', 'Captial N' ], + ['O', 79, '117', '\117', 'O', nil, 'O', 'O', 'O', 'Captial O' ], + ['P', 80, '120', '\120', 'P', nil, 'P', 'P', 'P', 'Captial P' ], + ['Q', 81, '121', '\121', 'Q', nil, 'Q', 'Q', 'Q', 'Captial Q' ], + ['R', 82, '122', '\122', 'R', nil, 'R', 'R', 'R', 'Captial R' ], + ['S', 83, '123', '\123', 'S', nil, 'S', 'S', 'S', 'Captial S' ], + ['T', 84, '124', '\124', 'T', nil, 'T', 'T', 'T', 'Captial T' ], + ['U', 85, '125', '\125', 'U', nil, 'U', 'U', 'U', 'Captial U' ], + ['V', 86, '126', '\126', 'V', nil, 'V', 'V', 'V', 'Captial V' ], + ['W', 87, '127', '\127', 'W', nil, 'W', 'W', 'W', 'Captial W' ], + ['X', 88, '130', '\130', 'X', nil, 'X', 'X', 'X', 'Captial X' ], + ['Y', 89, '131', '\131', 'Y', nil, 'Y', 'Y', 'Y', 'Captial Y' ], + ['Z', 90, '132', '\132', 'Z', nil, 'Z', 'Z', 'Z', 'Captial Z' ], + ['[', 91, '133', '\133', '[', nil, '', '', '', 'Left square bracket [' ], + ['\\', 92, '134', '\134', '\', nil, '', '\textbackslash', '', 'Backslash \\'], + [']', 93, '135', '\135', ']', nil, '', '', '', 'Right square bracket ]' ], + ['^', 94, '136', '\136', '^', nil, '', '', '', 'Caret ^' ], + ['_', 95, '137', '\137', '_', nil, '{\_}', '\textunderscore', '', 'Underscore _' ], + ['`', 96, '140', '\140', '`', nil, '', '', '', 'Grave accent `' ], + ['a', 97, '141', '\141', 'a', nil, 'a', 'a', 'a', 'Small a' ], + ['b', 98, '142', '\142', 'b', nil, 'b', 'b', 'b', 'Small b' ], + ['c', 99, '143', '\143', 'c', nil, 'c', 'c', 'c', 'Small c' ], + ['d', 100, '144', '\144', 'd', nil, 'd', 'd', 'd', 'Small d' ], + ['e', 101, '145', '\145', 'e', nil, 'e', 'e', 'e', 'Small e' ], + ['f', 102, '146', '\146', 'f', nil, 'f', 'f', 'f', 'Small f' ], + ['g', 103, '147', '\147', 'g', nil, 'g', 'g', 'g', 'Small g' ], + ['h', 104, '150', '\150', 'h', nil, 'h', 'h', 'h', 'Small h' ], + ['i', 105, '151', '\151', 'i', nil, 'i', 'i', 'i', 'Small i' ], + ['j', 106, '152', '\152', 'j', nil, 'j', 'j', 'j', 'Small j' ], + ['k', 107, '153', '\153', 'k', nil, 'k', 'k', 'k', 'Small k' ], + ['l', 108, '154', '\154', 'l', nil, 'l', 'l', 'l', 'Small l' ], + ['m', 109, '155', '\155', 'm', nil, 'm', 'm', 'm', 'Small m' ], + ['n', 110, '156', '\156', 'n', nil, 'n', 'n', 'n', 'Small n' ], + ['o', 111, '157', '\157', 'o', nil, 'o', 'o', 'o', 'Small o' ], + ['p', 112, '160', '\160', 'p', nil, 'p', 'p', 'p', 'Small p' ], + ['q', 113, '161', '\161', 'q', nil, 'q', 'q', 'q', 'Small q' ], + ['r', 114, '162', '\162', 'r', nil, 'r', 'r', 'r', 'Small r' ], + ['s', 115, '163', '\163', 's', nil, 's', 's', 's', 'Small s' ], + ['t', 116, '164', '\164', 't', nil, 't', 't', 't', 'Small t' ], + ['u', 117, '165', '\165', 'u', nil, 'u', 'u', 'u', 'Small u' ], + ['v', 118, '166', '\166', 'v', nil, 'v', 'v', 'v', 'Small v' ], + ['w', 119, '167', '\167', 'w', nil, 'w', 'w', 'w', 'Small w' ], + ['x', 120, '170', '\170', 'x', nil, 'x', 'x', 'x', 'Small x' ], + ['y', 121, '171', '\171', 'y', nil, 'y', 'y', 'y', 'Small y' ], + ['z', 122, '172', '\172', 'z', nil, 'z', 'z', 'z', 'Small z' ], + ['{', 123, '173', '\173', '{', nil, '{\{}', '\{', '', 'Left curly brace {' ], + ['|', 124, '174', '\174', '|', nil, '', '', '', 'Vertical bar / pipe |' ], + ['}', 125, '175', '\175', '}', nil, '{\}}', '\}', '', 'Right curly brace }' ], + ['~', 126, '176', '\176', '~', nil, '', '', '', 'Tilde ~' ], + ['', 127, '177', '', '', nil, '', '', '', ' ' ], + ['', 128, '200', '', '€', nil, '', '', '', ' ' ], + ['', 129, '201', '', '', nil, '', '', '', ' ' ], + ['\'', 130, '202', '', '‚', nil, '', '', '', 'Low left single quote \''], + [' ', 131, '203', '', 'ƒ', nil, '', '', '', 'Florin ' ], + ['"', 132, '204', '', '„', nil, '', '', '', 'Low left double quote "' ], + ['…', 133, '205', '\342\200\246', '…', nil, '…', '\textellipsis', '', 'Ellipsis …' ], + ['†', 134, '206', '\342\200\240', '†', nil, '†', '\textdagger', '', 'Dagger †' ], + ['‡', 135, '207', '\342\200\241', '‡', nil, '‡', '\textdaggerbl', '', 'Double dagger ‡' ], + ['^', 136, '210', '', 'ˆ', nil, '', '', '', 'Circumflex ^' ], + ['', 137, '211', '', '‰', nil, '', '', '', 'Permil ' ], + ['', 138, '212', '', 'Š', nil, '', '', '', 'Capital S, caron ' ], + ['<', 139, '213', '', '‹', nil, '', '', '', 'Less than sign (see &060;) <' ], + ['', 140, '214', '', 'Œ', nil, '', '', '', 'Capital OE ligature ' ], + ['', 141, '215', '', '', nil, '', '', '', ' ' ], + ['', 142, '216', '', 'Ž', nil, '', '', '', 'Capital Z, caron ' ], + ['', 143, '217', '', '', nil, '', '', '', ' ' ], + ['', 144, '220', '', '', nil, '', '', '', ' ' ], + ['', 145, '221', '', '‘', nil, '', '', '', 'Left single quote ' ], + ['', 146, '222', '', '’', nil, '', '', '', 'Right single quote ' ], + ['', 147, '223', '', '“', nil, '', '', '', 'Left double quote ' ], + ['', 148, '224', '', '”', nil, '', '', '', 'Right double quote ' ], + ['', 149, '225', '', '•', nil, '', '', '', 'Bullet ' ], + ['-', 150, '226', '', '–', nil, '', '', '', 'En dash -' ], + ['', 151, '227', '', '—', nil, '', '', '', 'Em dash -' ], + ['~', 152, '230', '', '˜', nil, '', '', '', 'Tilde (see &126;) ~' ], + ['t', 153, '231', '', '™', nil, '', '', '', 'Trademark t' ], + ['', 154, '232', '', 'š', nil, '', '', '', 'small s, caron ' ], + ['', 155, '233', '', '›', nil, '', '', '', 'Greater than sign (see &062;) ' ], + ['', 156, '234', '', 'œ', nil, '', '', '', 'Small oe ligature ' ], + ['', 157, '235', '', '', nil, '', '', '', ' ' ], + ['', 158, '236', '', 'ž', nil, '', '', '', 'Small z, caron ' ], + ['', 159, '237', '', 'Ÿ', nil, '', '', '', 'Capital Y, umlaut ' ], + ['', 160, '240', '', ' ', ' ', '', '', '', 'Non-breaking space ' ], + ['¡', 161, '241', '\302\241', '¡', '¡', '¡', '', '', 'Inverted exclamation ' ], + ['¢', 162, '242', '\302\242', '¢', '¢', '¢', '', '', 'Cent sign ¢' ], + ['£', 163, '243', '\302\243', '£', '£', '£', '\textsterling', '', 'Pound sign £' ], + ['¤', 164, '244', '\302\244', '¤', '¤', '¤', '\textcurrency', '', 'General currency sign ' ], + ['¥', 165, '245', '\302\245', '¥', '¥', '¥', '', '', 'Yen sign ¥' ], + ['¦', 166, '246', '\302\246', '¦', '¦', '¦', '', '', 'Broken vertical bar ' ], + ['§', 167, '247', '\302\247', '§', '§', '§', '\textsection', '', 'Section sign §' ], + ['¨', 168, '250', '\302\250', '¨', '¨', '¨', '\"', '', 'Umlaut ' ], + ['©', 169, '251', '\302\251', '©', '©', '©', '\copyright', '\textcopyright', 'Copyright ©' ], + ['ª', 170, '252', '\302\252', 'ª', 'ª', 'ª', '', '', 'Feminine ordinal ª' ], + ['«', 171, '253', '\302\253', '«', '«', '«', '', '', 'Left angle quote «' ], + ['¬', 172, '254', '\302\254', '¬', '¬', '¬', '', '', 'Not sign ' ], + ['­', 173, '255', '\302\255', '­', '­', '­', '', '', 'Soft hyphen ' ], + ['®', 174, '256', '\302\256', '®', '®', '®', '', '', 'Registered trademark ®' ], + ['¯', 175, '257', '\302\257', '¯', '¯', '¯', '', '', 'Macron accent ' ], + ['°', 176, '260', '\302\260', '°', '°', '°', '', '', 'Degree sign °' ], + ['±', 177, '261', '\302\261', '±', '&plusmin;', '±', '', '', 'Plus or minus ±' ], + ['²', 178, '262', '\302\262', '²', '²', '²', '', '', 'Superscript 2 ²' ], + ['³', 179, '263', '\302\263', '³', '³', '³', '', '', 'Superscript 3 ³' ], + ['', 180, '264', '\302\264', '´', '´', ''', '', '', 'Acute accent ' ], + ['µ', 181, '265', '\302\265', 'µ', 'µ', 'µ', '', '', 'Micro sign (Greek mu) µ' ], + ['¶', 182, '266', '\302\266', '¶', '¶', '¶', '\textparagraph', '', 'Paragraph sign ¶' ], + ['·', 183, '267', '\302\267', '·', '·', %q{·}, '', %q{}, 'Middle dot ' ], + ['¸', 184, '270', '\302\270', '¸', '¸', '¸', '', '', 'Cedilla ' ], + ['¹', 185, '271', '\302\271', '¹', '¹', '¹', '', '', 'Superscript 1 ¹' ], + ['º', 186, '272', '\302\272', 'º', 'º', 'º', '', '', 'Masculine ordinal º' ], + ['»', 187, '273', '\302\273', '»', '»', '»', '', '', 'Right angle quote ' ], + ['¼', 188, '274', '\302\274', '¼', '¼', '¼', '', '', 'Fraction one quarter ¼' ], + ['½', 189, '275', '\302\275', '½', '½', '½', '', '', 'Fraction on half ½' ], + ['¾', 190, '276', '\302\276', '¾', '¾', '¾', '', '', 'Fraction three quarters ¾' ], + ['¿', 191, '277', '\302\277', '¿', '¿', '¿', '', '', 'Inverted question mark ¿' ], + ['À', 192, '300', '\303\200', 'À', 'À', 'À', '\`{A}', '', 'Capital A, grave accent À' ], + ['Á', 193, '301', '\303\201', 'Á', 'Á', 'Á', %q{\'{A}}, '', 'Capital A, acute accent Á' ], + ['Â', 194, '302', '\303\202', 'Â', 'Â', 'Â', '^{A}', '', 'Capital A, circumflex accent Â' ], + ['Ã', 195, '303', '\303\203', 'Ã', 'Ã', 'Ã', '~{A}', '', 'Capital A, tilde Ã' ], + ['Ä', 196, '304', '\303\204', 'Ä', 'Ä', 'Ä', '"{A}', '', 'Capital A, umlaut Ä' ], + ['Å', 197, '305', '\303\205', 'Å', 'Å', 'Å', 'r{A}', '', 'Capital A, ring Å' ], + ['Æ', 198, '306', '\303\206', 'Æ', 'Æ', 'Æ', 'AE', '', 'Capital AE ligature Æ' ], + ['Ç', 199, '307', '\303\207', 'Ç', 'Ç', 'Ç', '', '', 'Capital C, cedilla Ç' ], + ['È', 200, '310', '\303\210', 'È', 'È', 'È', '`{E}', '', 'Capital E, grave accent È' ], + ['É', 201, '311', '\303\211', 'É', 'É', 'É', ''{E}', '', 'Capital E, acute accent É' ], + ['Ê', 202, '312', '\303\212', 'Ê', 'Ê', 'Ê', '^{E}', '', 'Capital E, circumflex accent Ê' ], + ['Ë', 203, '313', '\303\213', 'Ë', 'Ë', 'Ë', '"{E}', '', 'Capital E, umlaut Ë' ], + ['Ì', 204, '314', '\303\214', 'Ì', 'Ì', 'Ì', '`{I}', '', 'Capital I, grave accent Ì' ], + ['Í', 205, '315', '\303\215', 'Í', 'Í', 'Í', ''{I}', '', 'Capital I, acute accent Í' ], + ['Î', 206, '316', '\303\216', 'Î', 'Î', 'Î', '^{I}', '', 'Capital I, circumflex accent Î' ], + ['Ï', 207, '317', '\303\217', 'Ï', 'Ï', 'Ï', '"{I}', '', 'Capital I, umlaut Ï' ], + ['Ð', 208, '320', '\303\220', 'Ð', 'Ð', 'Ð', '', '', 'Capital eth, Icelandic ' ], + ['Ñ', 209, '321', '\303\221', 'Ñ', 'Ñ', 'Ñ', '', '', 'Capital N, tilde Ñ' ], + ['Ò', 210, '322', '\303\222', 'Ò', 'Ò', 'Ò', '`{O}', '', 'Capital O, grave accent Ò' ], + ['Ó', 211, '323', '\303\223', 'Ó', 'Ó', 'Ó', ''{O}', '', 'Capital O, acute accent Ó' ], + ['Ô', 212, '324', '\303\224', 'Ô', 'Ô', 'Ô', '^{O}', '', 'Capital O, circumflex accent Ô' ], + ['Õ', 213, '325', '\303\225', 'Õ', 'Õ', 'Õ', '~{O}', '', 'Capital O, tilde Õ' ], + ['Ö', 214, '326', '\303\226', 'Ö', 'Ö', 'Ö', '"{O}', '', 'Capital O, umlaut Ö' ], + ['×', 215, '327', '\303\227', '×', '×', '×', '', '', 'Multiply sign ×' ], + ['Ø', 216, '330', '\303\230', 'Ø', 'Ø', 'Ø', 'O', '', 'Capital O, slash Ø' ], + ['Ù', 217, '331', '\303\231', 'Ù', 'Ù', 'Ù', '', '', 'Capital U, grave accent Ù' ], + ['Ú', 218, '332', '\303\232', 'Ú', 'Ú', 'Ú', '', '', 'Capital U, acute accent Ú' ], + ['Û', 219, '333', '\303\233', 'Û', 'Û', 'Û', '', '', 'Capital U, circumflex accent Û' ], + ['Ü', 220, '334', '\303\234', 'Ü', 'Ü', 'Ü', '', '', 'Capital U, umlaut Ü' ], + ['Ý', 221, '335', '\303\235', 'Ý', 'Ý', 'Ý', '', '', 'Capital Y, acute accent Ý' ], + ['Þ', 222, '336', '\303\236', 'Þ', 'Þ', 'Þ', '', '', 'Capital thorn, Icelandic Þ' ], + ['ß', 223, '337', '\303\237', 'ß', 'ß', 'ß', '', '', 'Small sz ligature, German ß' ], + ['à', 224, '340', '\303\240', 'à', 'à', 'à', '\`{a}', '', 'Small a, grave accent à' ], + ['á', 225, '341', '\303\241', 'á', 'á', 'á', %q{\'{a}}, '', 'Small a, acute accent á' ], + ['â', 226, '342', '\303\242', 'â', 'â', 'â', '\^{a}', '', 'Small a, circumflex accent â' ], + ['ã', 227, '343', '\303\243', 'ã', 'ã', 'ã', '\~{a}', '', 'Small a, tilde ã' ], + ['ä', 228, '344', '\303\244', 'ä', 'ä', 'ä', '\"{a}', '', 'Small a, umlaut ä' ], + ['å', 229, '345', '\303\245', 'å', 'å', 'å', '\r{a}', '', 'Small a, ring å' ], + ['æ', 230, '346', '\303\246', 'æ', 'æ', 'æ', '\ae', '', 'Small ae ligature æ' ], + ['ç', 231, '347', '\303\257', 'ç', 'ç', 'ç', '', '', 'Small c, cedilla ç' ], + ['è', 232, '350', '\303\250', 'è', 'è', 'è', '\`{e}', '', 'Small e, grave accent è' ], + ['é', 233, '351', '\303\251', 'é', 'é', 'é', %q{\'{e}}, '', 'Small e, acute accent é' ], + ['ê', 234, '352', '\303\252', 'ê', 'ê', 'ê', '\^{e}', '', 'Small e, circumflex accent ê' ], + ['ë', 235, '353', '\303\253', 'ë', 'ë', 'ë', '\"{e}', '', 'Small e, umlaut ë' ], + ['ì', 236, '354', '\303\254', 'ì', 'ì', 'ì', '\`{i}', '', 'Small i, grave accent ì' ], + ['í', 237, '355', '\303\255', 'í', 'í', 'í', '\'{i}', '', 'Small i, acute accent í' ], + ['î', 238, '356', '\303\256', 'î', 'î', 'î', '\^{i}', '', 'Small i, circumflex accent î' ], + ['ï', 239, '357', '\303\257', 'ï', 'ï', 'ï', '\"{i}', '', 'Small i, umlaut ï' ], + ['ð', 240, '360', '\303\260', 'ð', 'ð', 'ð', '', '', 'Small eth, Icelandic ð' ], + ['ñ', 241, '361', '\303\261', 'ñ', 'ñ', 'ñ', '', '', 'Small n, tilde ñ' ], + ['ò', 242, '362', '\303\262', 'ò', 'ò', 'ò', '\`{o}', '', 'Small o, grave accent ò' ], + ['ó', 243, '363', '\303\263', 'ó', 'ó', 'ó', %q{\'{o}}, '', 'Small o, acute accent ó' ], + ['ô', 244, '364', '\303\264', 'ô', 'ô', 'ô', '\^{o}', '', 'Small o, circumflex accent ô' ], + ['õ', 245, '365', '\303\265', 'õ', 'õ', 'õ', '\^{o}', '', 'Small o, tilde õ' ], + ['ö', 246, '366', '\303\266', 'ö', 'ö', 'ö', '\"{o}', '', 'Small o, umlaut ö' ], + ['÷', 247, '367', '\303\267', '÷', '÷', '÷', '', '', 'Divide sign ÷' ], + ['ø', 248, '370', '\303\270', 'ø', 'ø', 'ø', '', '', 'Small o, slash ø' ], + ['ù', 249, '371', '\303\271', 'ù', 'ù', 'ù', '\`{u}', '', 'Small u, grave accent ù' ], + ['ú', 250, '372', '\303\272', 'ú', 'ú', 'ú', %q{\'{u}}, '', 'Small u, acute accent ú' ], + ['û', 251, '373', '\303\273', 'û', 'û', 'û', '\^{u}', '', 'Small u, circumflex accent û' ], + ['ü', 252, '374', '\303\274', 'ü', 'ü', 'ü', '\"{u}', '', 'Small u, umlaut ü' ], + ['ý', 253, '375', '\303\275', 'ý', 'ý', 'ý', '', '', 'Small y, acute accent ý' ], + ['þ', 254, '376', '\303\276', 'þ', 'þ', 'þ', '', '', 'Small thorn, Icelandic þ' ], + ['ÿ', 255, '377', '\303\277', 'ÿ', 'ÿ', 'ÿ', '', '', 'Smally y, umlaut ÿ' ], + ['∝', , '', '', '∝', '∝', '∝', '', '', 'proportional to U+221D (8733) ∝' ], + ['∞', , '', '', '∞', '∞', '∞', '', '', 'infinity U+221E (8734) ∞' ], +] + end + end +end +__END__ diff --git a/lib/sisu/v3/composite.rb b/lib/sisu/v3/composite.rb new file mode 100644 index 00000000..1a801e58 --- /dev/null +++ b/lib/sisu/v3/composite.rb @@ -0,0 +1,275 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: composite documents, assemble/build documents from other + documents or parts of marked up text + +=end +module SiSU_Assemble + require 'fileutils' + require "#{SiSU_lib}/sysenv" # sysenv.rb + class Remote_image + include FileUtils + def initialize + @env=SiSU_Env::Info_env.new + end + def image(dir) + images=[] + images[0]=dir + images + end + def download_images(download_from,images_array) + path="#{@env.path.processing}/external_document/image" + mkdir_p(path) unless FileTest.directory?(path) + images_array.each do |i| + image="#{path}/#{i}" + imagefile=File.new(image,'w+') + open("#{download_from}/#{i}") do |g| + imagefile << g.read + end + imagefile.close + end + output_path="#{@env.path.webserv}/#{@env.path.stub_pwd}/_sisu/image_external" + mkdir_p(output_path) unless FileTest.directory?(output_path) + SiSU_Env::System_call.new("#{path}/*",output_path,'q').rsync + end + def download_doc_skin(doc_skin) #first element in array is source url + path="#{@env.path.processing}/external_document/skin/doc" + mkdir_p(path) unless FileTest.directory?(path) + download_from=doc_skin.shift + doc_skin.each do |i| + skin="#{path}/#{i}.rb" + unless FileTest.exists?(skin) + skinfile=File.new(skin,'w+') + open("#{download_from}/#{i}.rb") do |g| + skinfile << g.read + end + skinfile.close + end + end + end + end + class Composite + @@imager={} + def initialize(opt) + @opt=opt + @env=SiSU_Env::Info_env.new + end + def read + begin + @fns_array=IO.readlines(@opt.fns,'') + assembled=insertions? + write(assembled) + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + ensure + end + end + def write(assembled) + assembled_file=File.new("#{@env.path.composite_file}/#{@opt.fnb}.ssm.sst",'w+') + assembled.each{|a| assembled_file << a } + assembled_file.close + end + def download_images(download_from,images_array) + path="#{@env.path.processing}/external_document/image" + mkdir_p(path) unless FileTest.directory?(path) + images_array.each do |i| + image="#{path}/#{i}" + unless FileTest.exists?(image) + imagefile=File.new(image,'w+') + open("#{download_from}/#{i}") do |g| + imagefile << g.read + end + imagefile.close + end + end + end + def download_doc_skin(doc_skin) #first element in array is source url + path="#{@env.path.processing}/external_document/skin/doc" + mkdir_p(path) unless FileTest.directory?(path) + download_from=doc_skin.shift + doc_skin.each do |i| + skin="#{path}/#{i}.rb" + unless FileTest.exists?(skin) + skinfile=File.new(skin,'w+') + open("#{download_from}/#{i}.rb") do |g| + skinfile << g.read + end + skinfile.close + end + end + end + def insertion(fni,insert_array) + file={ :prepared=>[],:images=>[] } + rgx_image=/(?:^|[^_\\])\{\s*(\S+?\.(?:png|jpg|gif))/ + file[:prepared] << "\n% |#{fni}|@|^|>>ok\n" + @code_flag=false + insert_array.each do |i| + @code_flag=if i =~/^code\{/; true + elsif i =~/^\}code/; false + else @code_flag + end + if not @code_flag \ + and i !~/^%+\s/ + i.gsub!(/^([123]|:?[ABC])~\? /,'% [conditional heading:] \1~ ') #off conditional heading (consider syntax) + if i =~/^@\S+?:/ + i.gsub!(/\n/m,"\n% ") + i.gsub!(/\n%\s+$/m,'') + i.gsub!(/^@\S+?:/m,"\n% [imported header:] ") #off imported headers + end + end + file[:prepared] << i + if i !~/^%+\s/ \ + and i =~rgx_image + file[:images] << i.scan(rgx_image).uniq + end + end + file[:prepared] << "\n% end import" << "\n\n" + if file[:images].length > 0 + file[:images].flatten! + file[:images].uniq! + file[:images].delete_if {|x| x =~/https?:\/\// } + end + file + end + def insertions? + data=@fns_array + tuned_file,imagedir=[],[] + SiSU_Screen::Ansi.new(@opt.cmd,'Composite Document',@opt.fns).grey_title_hi unless @opt.cmd =~/q/ + para=data.each do |para| + if para =~/^<<\s+(\S+?\.ss[it])$/ \ + or para =~/^<<\{(\S+?\.ss[it])\}$/ \ + or para =~/^(?:<<\s*)\|(\S+?)\|@\|.+?\|(?:req(?:quire)?\b|\s*\})?/ \ + or para =~/^r\{(.+?)\}/ #depreciated + loadfile=$1.strip + SiSU_Screen::Ansi.new(@opt.cmd,'loading:',loadfile).txt_grey if @opt.cmd =~/[MVv]/ + tuned_file << if loadfile =~ /(?:https?|file):\/\/\S+?\.ss[ti]$/ # and NetTest + imagedir = /((?:https?|file):\/\/\S+?)\/[^\/]+?\.ss[ti]$/.match(loadfile).captures.join + '/_sisu/image' #watch + require 'uri' + image_uri=URI.parse(imagedir) + require 'open-uri' + require 'pp' + insert=open(loadfile) + insert_array=insert.dup + insert.close + file=insertion(loadfile,insert_array) + @@imager[image_uri] ||=[] + @@imager[image_uri] << file[:images] + file[:prepared] + elsif loadfile =~ /\.ss[ti]$/ \ + and FileTest.file?(loadfile) + insert_array=IO.readlines(loadfile,'') + file=insertion(loadfile,insert_array) + file[:prepared] + else + cX=SiSU_Screen::Ansi.new(@opt.cmd).cX + puts "\t #{cX.fuschia}ERROR#{cX.off} #{cX.brown}#{@opt.fns}#{cX.off} #{cX.fuschia}requires invalid or non-existent file:#{cX.off} #{cX.brown}#{loadfile}#{cX.off}" + para + end + else tuned_file << para + end + tuned_file.flatten! + tuned_file.compact! + end + if @@imager.length >0 + @@imager.each do |d,i| + i.flatten! + i.uniq! + download_images(d,i) + end + end + tuned_file + end + end + class Composite_file_list + @@imager={} + def initialize(opt) + @opt=opt + @env=SiSU_Env::Info_env.new + end + def read + begin + @opt.fns=@opt.fns.gsub(/\.ssm\.sst$/,'.ssm') #FIX earlier, hub + @fns_array=IO.readlines(@opt.fns,'') + files=insertions? + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + ensure + end + end + def insertions? + data=@fns_array + tuned_file,imagedir=[],[] + SiSU_Screen::Ansi.new(@opt.cmd,'Composite Document',@opt.fns).grey_title_hi unless @opt.cmd =~/q/ + @ssm=[@opt.fns] + para=data.each do |para| + if para =~/^<<\s+(\S+?\.ss[it])$/ + loadfile=$1.strip + SiSU_Screen::Ansi.new(@opt.cmd,'loading:',loadfile).txt_grey if @opt.cmd =~/[MVv]/ + tuned_file << if loadfile =~ /(?:https?|file):\/\/\S+?\.ss[ti]$/ + @ssm << loadfile + elsif loadfile =~ /\.ss[ti]$/ \ + and FileTest.file?(loadfile) + @ssm << loadfile + else + cX=SiSU_Screen::Ansi.new(@opt.cmd).cX + puts "\t #{cX.fuschia}ERROR#{cX.off} #{cX.brown}#{@opt.fns}#{cX.off} #{cX.fuschia}requires invalid or non-existent file:#{cX.off} #{cX.brown}#{loadfile}#{cX.off}" + para + end + end + end + @ssm + end + end +end +__END__ diff --git a/lib/sisu/v3/concordance.rb b/lib/sisu/v3/concordance.rb new file mode 100644 index 00000000..9b62b441 --- /dev/null +++ b/lib/sisu/v3/concordance.rb @@ -0,0 +1,345 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: concordance file (html concordance, wordmap, linked index of + words in document) + +=end +module SiSU_Concordance + require "#{SiSU_lib}/particulars" # particulars.rb + include SiSU_Particulars + require "#{SiSU_lib}/sysenv" # sysenv.rb + include SiSU_Env + require "#{SiSU_lib}/defaults" # defaults.rb + include SiSU_Viz + require "#{SiSU_lib}/html_format" # html_format.rb + include SiSU_HTML_Format + require "#{SiSU_lib}/html_minitoc" # html_minitoc.rb + class Source + def initialize(opt) + @opt=opt + @particulars=SiSU_Particulars::Combined_singleton.instance.get_all(opt) + end + def read + begin + @env,@md=@particulars.env,@particulars.md + loc=@env.url.output_tell + unless @md.cmd =~/q/ + tool=(@md.cmd =~/[MVv]/) ? "#{@env.program.web_browser} #{loc}/#{@md.fnb}/#{@md.fn[:concordance]}" : @md.fns + @md.cmd=~/[MVvz]/ \ + ? SiSU_Screen::Ansi.new(@md.cmd,"Concordance",tool).grey_title_hi \ + : SiSU_Screen::Ansi.new(@md.cmd,'Concordance',tool).green_title_hi + end + wordmax=@env.concord_max + unless @md.wc_words.nil? + if @md.wc_words < wordmax + SiSU_Concordance::Source::Words.new(@particulars).songsheet + else + SiSU_Screen::Ansi.new(@md.cmd,"concordance skipped, large document has over #{wordmax} words (#{@md.wc_words})").warn unless @md.cmd =~/q/ + end + else + SiSU_Screen::Ansi.new(@md.cmd,"wc (word count) is off, concordance will be processed for all files including those over the max set size of: #{wordmax} words").warn unless @md.cmd =~/q/ + SiSU_Concordance::Source::Words.new(@particulars).songsheet + end + rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error + ensure + end + end + private + class Doc_title + include SiSU_Viz + #revisit, both requires (html & shared_xml) needed for stand alone operation (sisu -w [filename]) + require "#{SiSU_lib}/shared_xml" # shared_xml.rb + require "#{SiSU_lib}/html" # html.rb + def initialize(particulars) + @particulars,@md=particulars,particulars.md + @data=SiSU_HTML::Source::Html_environment.new(particulars).tuned_file_instructions + @file=SiSU_Env::SiSU_file.new(@md) + @vz=SiSU_Env::Get_init.instance.skin + txt_path=%{#{@md.dir_out}} + SiSU_Env::Info_skin.new(@md).select + @fnb=@md.fnb + @lex_button=%{SiSU home -->} + @doc_details =< 

#{@md.title.full}

#{@md.author}

+WOK + end + def create + head_banner=SiSU_HTML_Format::Head_toc.new(@md) + minitoc=SiSU_HTML_minitoc::Toc_mini.new(@md,@data).songsheet.join("\n") + @css=SiSU_Env::CSS_stylesheet.new(@md) + toc='
' + minitoc + '
' + < + + + + SiSU created WordIndex for: #{@md.title.full} + + + + + + + + #{@css.html_seg} + #{@vz.js_head} + + + #{@vz.js_top} + #{head_banner.concordance_navigation_band('pdf')} + #{toc} +
+ #{@doc_details} +

Word index links are to html versions of the text the segmented version followed by the scroll (single document) version.
[For segmented text references [T1], [T2] or [T3] appearing without a link, indicates that the word appears in a title (or subtitle) of the text (that is identifiable by the appended object citation number).]

+

(The word listing/index is Case sensitive: Capitalized words appear before lower case)

+

+ word (number of occurences)
linked references to word within document
+ [if number of occurences exceed number of references - word occurs more than once in at least one reference. Footnote/endnotes are either assigned to the paragraph from which they are referenced or ignored, so it is relevant to check the footnotes referenced from within a paragraph as well.] +

+

+ (After the page is fully loaded) you can jump directly to a word by appending a hash (#) and the word to the url for this text, (do not forget that words are case sensitive, and may be listed twice (starting with and without an upper case letter)), #your_word # [ http://[web host]/#{@fnb}/concordance.html#your_word ] +

+WOK + end + end + class Word + @@word_previous='' + def initialize(word,freq) + @word,@freq=word,freq + end + def html + w=if @word.capitalize==@@word_previous + %{\n

#{@word}

(#{@freq})

\n\t

} + else n=@word.strip.gsub(/\s+/,'_') #also need to convert extended character set to html + %{\n

#{@word}

(#{@freq})

\n\t

} + end + @@word_previous=@word.capitalize + w + end + end + class Words + require "#{SiSU_lib}/defaults" # defaults.rb + include SiSU_Viz + require "#{SiSU_lib}/html_format" # html_format.rb + include SiSU_HTML_Format + require "#{SiSU_lib}/sysenv" # sysenv.rb + include SiSU_Screen + @@dp=nil + def initialize(particulars) + @particulars=particulars + begin + @vz=SiSU_Env::Get_init.instance.skin + @env,@md,@dal_array=particulars.env,particulars.md,particulars.dal_array + @file=SiSU_Env::SiSU_file.new(@md) + @freq=Hash.new(0) + @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern + @rxp_lv1=/^#{Mx[:lv_o]}1:/ #fix @rxp_lv # Mx[:lv_o] + @rxp_lv2=/^#{Mx[:lv_o]}2:/ #fix @rxp_lv # Mx[:lv_o] + @rxp_lv3=/^#{Mx[:lv_o]}3:/ #fix @rxp_lv # Mx[:lv_o] + @rxp_title=Regexp.new("^#{Mx[:meta_o]}title#{Mx[:meta_c]}\s*(.+?)\s*$") + @rxp_t1=Regexp.new('^T1') + @rxp_t2=Regexp.new('^T2') + @rxp_t3=Regexp.new('^T3') + @rxp_excluded1=/#{Mx[:url_o]}\S+?#{Mx[:url_c]}|#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}/ + @rxp_excluded0=/^(?:#{Mx[:fa_bold_o]}|#{Mx[:fa_italics_o]})?(?:to\d+|\d+| |#{Mx[:br_endnotes]}|EOF|#{Mx[:br_eof]}|thumb_\S+|snap_\S+|_+|-+|[(]?(?:ii+|iv|vi+|ix|xi+|xiv|xv|xvi+|xix|xx)[).]?|\S+?_\S+|[\d_]+\w\S+|[\w\d]{1,2}|\d{1,3}\w?|#{@dp}|[0-9a-f]{16,64}|\d{2,3}x\d{2,3}|\S{0,2}sha\d|\S{0,3}\d{4}w\d\d|\b\w\d+|\d_all\b|e\.?g\.?)(?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})?$/mi #this regex causes and cures a stack dump in ruby 1.9 !!! + @rgx_splitlist=%r{[—.,;:#{Mx[:nbsp]}-]}mi + @rgx_scanlist=%r{#{Mx[:fa_italics_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_italics_c]}|#{Mx[:fa_bold_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_bold_c]}|(?:https?|file)://\S+|<\S+?>|\w+|[a-zA-Z]+}mi + rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error + end + end + def songsheet + begin + mkdir_p(@file.output_path.html_concordance) unless FileTest.directory?(@file.output_path.html_concordance) + @file_concordance=File.open(@file.place_file.html_concordance,'w') + map_para + rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error + ensure + @file_concordance.close + end + end + protected + def location_scroll(wordlocation,show) + %{#{wordlocation}; } + end + def location_seg(wordlocation,show) ##fix + @word_location_seg=wordlocation.gsub(/(.+?)\#(\d+)/,"#{@md.fnl[:pre]}\\1#{@md.fnl[:mid]}#{Sfx[:html]}#{@md.fnl[:post]}#\\2") unless wordlocation.nil? + case wordlocation + when @rxp_t1 + %{[H]#{show}, } + when @rxp_t2 + %{[H]#{show}, } + when @rxp_t3 + %{[H]#{show}, } + else %{#{show}, } + end + end + def map_para + @seg,ocn=nil,nil + @word_map={} + @dal_array.each do |line| + if defined? line.ocn \ + and line.ocn.to_s =~/\d/ + if line.is =~/heading/ \ + and line.ln==4 + @seg=line.name + end + ocn=line.ocn.to_s + if ocn =~/\d+/ \ + and ocn !~/^0$/ + line.obj.gsub!(/#{@rxp_excluded1}/,' ') + line.obj=line.obj.split(@rgx_splitlist).join(' ') #%take in word or other match + for word in line.obj.scan(@rgx_scanlist) #%take in word or other match + word.gsub!(/#{Mx[:lnk_o]}|#{Mx[:lnk_c]}|#{Mx[:url_o]}|#{Mx[:url_c]}/,'') + word.gsub!(/#{Mx[:fa_o]}\S+?#{Mx[:fa_o_c]}/,'') + word.gsub!(/#{Mx[:fa_c_o]}\S+?#{Mx[:fa_c]}/,'') + word.gsub!(/#{Mx[:gl_o]}#[a-z]+#{Mx[:gl_c]}/,'') + word.gsub!(/#{Mx[:gl_o]}#[0-9]+#{Mx[:gl_c]}/,'') + word.gsub!(/[0-9a-f]{10,}/,' ') if word =~/[0-9]/ + word.gsub!(/#{Mx[:br_line]}/,' ') + word.gsub!(/^ +/,'') + word.gsub!(/^\S$/,'') + word=nil if word.empty? + word=nil if word =~@rxp_excluded0 #watch + word=nil if word =~/^\S$/ + if word + word.gsub!(/#{Mx[:br_nl]}|#{Mx[:br_line]}/,' ') + word.gsub!(/#{Mx[:fa_o]}[a-z]{1,7}#{Mx[:fa_o_c]}|#{Mx[:fa_c_o]}[a-z]{1,7}#{Mx[:fa_c]}/,'') + word.gsub!(/#{Mx[:en_a_o]}(?:\d|[*+])*|#{Mx[:en_b_o]}(?:\d|[*+])*|#{Mx[:en_a_c]}|#{Mx[:en_b_c]}/mi,'') + word.gsub!(/#{Mx[:fa_o]}\S+?#{Mx[:fa_o_c]}/,''); word.gsub!(/#{Mx[:fa_c_o]}\S+?#{Mx[:fa_c]}/,'') + word.gsub!(/<\/?\S+?>/,'') + word.gsub!(/^\@+/,'') + word.strip! + word.gsub!(/#{Mx[:tc_p]}.+/,'') + word.gsub!(/[\.,;:"]$/,'') + word.gsub!(/["]/,'') + word.gsub!(/^\s*[\(]/,'') + word.gsub!(/[\(]\s*$/,'') + word.gsub!(/^(?:See|e\.?g\.?).+/,'') + word.gsub!(/^\s*[.,;:]\s*/,'') + word.strip! + word.gsub!(/^\(?[a-zA-Z]\)$/,'') + word.gsub!(/^\d+(st|nd|rd|th)$/,'') + word.gsub!(/^(\d+\.?)+$/, '') + word.gsub!(/#{Mx[:mk_o]}|#{Mx[:mk_c]}/,'') + word.gsub!(/:name#\S+/,'') + word.gsub!(/^\S$/,'') + word=nil if word =~/^\S$/ + word=nil if word =~/^\s*$/ #watch + if word + unless word =~/[A-Z][A-Z]/ \ + or word =~/\w+\s\w+/ + word.capitalize! + end + @freq[word] +=1 + @word_map[word] ||= [] + if line !~@rxp_lv1 \ + and line !~@rxp_lv2 \ + and line !~@rxp_lv3 #fix @rxp_lv # Mx[:lv_o] + @word_map[word] << location_seg("#{@seg}\##{ocn}",ocn) + else + @word_map[word] << case line + when @rxp_lv1; location_seg('T1',ocn) #fix @rxp_lv # Mx[:lv_o] + when @rxp_lv2; location_seg('T2',ocn) #fix @rxp_lv # Mx[:lv_o] + when @rxp_lv3; location_seg('T3',ocn) #fix @rxp_lv # Mx[:lv_o] + end + end + end + end + end + end + end + end + scr='Full Text scroll: doc#  ' + seg='' + head=SiSU_Concordance::Source::Doc_title.new(@particulars).create + head.gsub!(/#{Xx[:html_relative2]}/m,@file.path_rel_links.html_seg_2) + head.gsub!(/#{Xx[:html_relative1]}/m,@file.path_rel_links.html_seg_1) + @file_concordance << head + alph=%W[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] + @file_concordance << '

' + alph.each {|x| @file_concordance << %{#{x}, }} + @file_concordance << '

' + letter=alph.shift + @file_concordance << %{\n

A

} + for word in @freq.keys.sort! {|a,b| a.downcase<=>b.downcase} + f=/^(\S)/.match(word)[1] + if letter < f.upcase + while letter < f.upcase + if alph.length > 0 + letter=alph.shift + @file_concordance << %{\n

#{letter}

} + else break + end + end + end + keyword=SiSU_Concordance::Source::Word.new(word,@freq[word]).html + if keyword !~ @rxp_excluded0 + if @word_map[word][0] =~ /\d+/ + wm=[] + @file_concordance << %{#{keyword}#{seg}#{@word_map[word].uniq.compact.join}} + end + @file_concordance << '

' + end + # special cases endnotes and header levels 1 - 3 + end + credits=@vz.credits_sisu + @file_concordance << %{
\n} # footer + SiSU_Screen::Ansi.new(@md.cmd,@md.fns,"#{@env.path.output_tell}/#{@md.fn[:concordance]}").flow if @md.cmd =~/[MV]/ + end + end + end +end +__END__ diff --git a/lib/sisu/v3/conf.rb b/lib/sisu/v3/conf.rb new file mode 100644 index 00000000..f3f978db --- /dev/null +++ b/lib/sisu/v3/conf.rb @@ -0,0 +1,249 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: configuration + +=end +module SiSU_Initialize + require "#{SiSU_lib}/sysenv" # sysenv.rb + include SiSU_Env + include SiSU_Screen + require "#{SiSU_lib}/relaxng" # relaxng.rb + include SiSU_relaxng + require "#{SiSU_lib}/css" # css.rb + include SiSU_Style + class Source + def initialize(opt) + @opt=opt + end + def read + SiSU_Config.new(@opt).make_homepage + SiSU_Config.new(@opt).css + SiSU_Config.new(@opt).dtd + SiSU_Config.new(@opt).cp_local_images + SiSU_Config.new(@opt).cp_external_images + SiSU_Config.new(@opt).cp_webserver_images #if @opt.mod.inspect =~/--init(?:ialize)?=site/ + end + end + class SiSU_Config #config files such as css are not updated if they already exist unless forced using the --init=site modifier + require 'fileutils' + require "#{SiSU_lib}/sysenv" # sysenv.rb + include FileUtils #::Verbose + def initialize(opt) + @opt=opt + @env=SiSU_Env::Info_env.new(@opt.fns) + @suffix,@path={},{} + @suffix[:rnc]='rnc' + @suffix[:rng]='rng' + @suffix[:xsd]='xsd' + @path[:xml]= @env.path.output + '/_sisu/xml' + @path[:xsd]= @env.path.output + '/_sisu/xml/xsd' + @path[:rnc]= @env.path.output + '/_sisu/xml/rnc' + @path[:rng]= @env.path.output + '/_sisu/xml/rng' + @pwd,@home=Dir.pwd,@env.path.home + end + def make_homepage + SiSU_Screen::Ansi.new(@opt.cmd,'invert','Make homepage','').colorize unless @opt.cmd =~/q/ + SiSU_Env::Create_site.new(@opt.cmd).homepage + end + def cp_local_images + SiSU_Screen::Ansi.new(@opt.cmd,'invert','Copy images','').colorize unless @opt.cmd =~/q/ + SiSU_Env::Create_site.new(@opt.cmd).cp_local_images + SiSU_Env::Create_site.new(@opt.cmd).cp_webserver_images_local #this should not have been necessary + SiSU_Env::Create_site.new(@opt.cmd).cp_base_images #base images (nav etc.) used by all html + end + def cp_external_images + SiSU_Screen::Ansi.new(@opt.cmd,'invert','Copy external images','').colorize if @opt.cmd =~/V/ + SiSU_Env::Create_site.new(@opt.cmd).cp_external_images + end + def cp_webserver_images + SiSU_Screen::Ansi.new(@opt.cmd,'invert','Copy webserver/output file images','').colorize unless @opt.cmd =~/q/ + SiSU_Env::Create_site.new(@opt.cmd).cp_webserver_images + SiSU_Env::Create_system_link.new.images + end + def css + SiSU_Screen::Ansi.new(@opt.cmd,'invert','Configuring CSSs','').colorize unless @opt.cmd =~/q/ + SiSU_Env::Create_site.new(@opt.cmd).cp_css + end + def dtd + SiSU_Screen::Ansi.new(@opt.cmd,'invert','Configuring DTDs','').colorize unless @opt.cmd =~/q/ + @rxng=SiSU_relaxng::RelaxNG.new + @path.each { |d| mkdir_p(d[1]) unless FileTest.directory?(d[1]) } + #ugly code, sort later + if @rxng.methods.join =~/[^_]dtd_sax\b/ + if @rxng.dtd_sax.length > 200 + dtd=File.new("#{@env.path.output}/#{@env.path.style}/#{@rxng.rng_name.output_sax}",'w') + dtd << @rxng.dtd_sax + dtd.close + else trang_rnc_model_output_sax + end + else trang_rnc_model_output_sax + end + if @rxng.methods.join =~/[^_]dtd_dom\b/ + if @rxng.dtd_dom.length > 200 + dtd=File.new("#{@env.path.output}/#{@env.path.style}/#{@rxng.rng_name.output_dom}",'w') + dtd << @rxng.dtd_dom + dtd.close + else trang_rnc_model_output_dom + end + else trang_rnc_model_output_dom + end + if @rxng.methods.join =~/[^_]dtd_node\b/ + if @rxng.dtd_node.length > 200 + dtd=File.new("#{@env.path.output}/#{@env.path.style}/#{@rxng.rng_name.input_node}",'w') + dtd << @rxng.dtd_node + dtd.close + else trang_rnc_model_input_node + end + else trang_rnc_model_input_node + end + if @rxng.methods.join =~/[^_]dtd_xhtml\b/ + if @rxng.dtd_xhtml.length > 200 + dtd=File.new("#{@env.path.output}/#{@env.path.style}/#{@rxng.rng_name.output_xhtml}",'w') + dtd << @rxng.dtd_xhtml + dtd.close + else trang_rnc_model_output_xhtml + end + else trang_rnc_model_output_xhtml + end + end + def trang_rnc_model_output_sax + s=@suffix + rnc_src=@env.path.dal + '/sax.' + s[:rnc] + rnc_file=@path[:rnc] + '/' + @rxng.rnc_name.output_sax + rng_file=@path[:rng] + '/' + @rxng.rng_name.output_sax + xsd_file=@path[:xsd] + '/' + @rxng.xsd_name.output_sax + rnc=File.new(rnc_src,'w') + rnc << @rxng.rnc_model_output_sax + rnc.close + #xsd + schema=SiSU_Env::System_call.new(rnc_src,xsd_file) + schema.relaxng(@opt.cmd) + #rng + schema=SiSU_Env::System_call.new(rnc_src,rng_file) + schema.relaxng(@opt.cmd) + #rnc + cp(rnc_src,rnc_file) + chmod(0644,rnc_file) + end + def trang_rnc_model_output_dom + s=@suffix + rnc_src=@env.path.dal + '/dom.' + s[:rnc] + rnc_file=@path[:rnc] + '/' + @rxng.rnc_name.output_dom + rng_file=@path[:rng] + '/' + @rxng.rng_name.output_dom + xsd_file=@path[:xsd] + '/' + @rxng.xsd_name.output_dom + rnc=File.new(rnc_src,'w') + rnc << @rxng.rnc_model_output_dom + rnc.close + #xsd + schema=SiSU_Env::System_call.new(rnc_src,xsd_file) + schema.relaxng(@opt.cmd) + #rng + schema=SiSU_Env::System_call.new(rnc_src,rng_file) + schema.relaxng(@opt.cmd) + #rnc + cp(rnc_src,rnc_file) + chmod(0644,rnc_file) + end + def trang_rnc_model_output_xhtml + s=@suffix + rnc_src=@env.path.dal + '/xhtml.' + s[:rnc] + rnc_file=@path[:rnc] + '/' + @rxng.rnc_name.output_xhtml + rng_file=@path[:rng] + '/' + @rxng.rng_name.output_xhtml + xsd_file=@path[:xsd] + '/' + @rxng.xsd_name.output_xhtml + rnc=File.new(rnc_src,'w') + rnc << @rxng.rnc_model_output_xhtml + rnc.close + #xsd + schema=SiSU_Env::System_call.new(rnc_src,xsd_file) + schema.relaxng(@opt.cmd) + #rng + schema=SiSU_Env::System_call.new(rnc_src,rng_file) + schema.relaxng(@opt.cmd) + #rnc + cp(rnc_src,rnc_file) + chmod(0644,rnc_file) + end + def trang_rnc_model_input_sax + rnc_file=@env.path.dal + '/sax.rnc' + dtd_file=@path[:xsd] + '/' + @rxng.rng_name.input_sax + rnc=File.new(rnc_file,'w') + rnc << @rxng.rnc_model_output_sax + rnc.close + schema=SiSU_Env::System_call.new(rnc_file,dtd_file) + schema.relaxng(@opt.cmd) + end + def trang_rnc_model_input_dom + rnc_file=@env.path.dal + '/dom.rnc' + dtd_file=@path[:xsd] + '/' + @rxng.rng_name.input_dom + rnc=File.new(rnc_file,'w') + rnc << @rxng.rnc_model_output_dom + rnc.close + schema=SiSU_Env::System_call.new(rnc_file,dtd_file) + schema.relaxng(@opt.cmd) + end + def trang_rnc_model_input_node + rnc_file=@env.path.dal + '/node.rnc' + rng_file=@env.path.dal + '/node.rng' + dtd_file=@path[:xsd] + '/' + @rxng.rng_name.input_node + rnc=File.new(rnc_file,'w') + rnc << @rxng.rnc_model_input_node + rnc.close + schema=SiSU_Env::System_call.new(rnc_file,dtd_file) + schema.relaxng(@opt.cmd) + end + end +end +__END__ diff --git a/lib/sisu/v3/constants.rb b/lib/sisu/v3/constants.rb new file mode 100644 index 00000000..c8cac1f6 --- /dev/null +++ b/lib/sisu/v3/constants.rb @@ -0,0 +1,595 @@ +# coding:utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + constants + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +Sfx={:txt=>'.txt',:html=>'.html',:xhtml=>'.xhtml',:xml=>'.xml',:epub=>'.epub',:epub_xhtml=>'.xhtml',:odt=>'.odt',:pdf=>'.pdf'} +Ax,Xx,Mx,Rx,Hx,Dx,Px,Db,Gt,Tex=Array.new(10){{}} +Ax[:tab]="\t" +Xx[:protect]='☞' +Xx[:segment]='Ф' +Xx[:html_relative2]='※※' #'※' '☼' +Xx[:html_relative1]='※' #'※' '☼' +Mx[:meta_o],Mx[:meta_c]='〔@','〕' +Mx[:lv_o_1],Mx[:lv_o_2],Mx[:lv_o_3],Mx[:lv_o_4],Mx[:lv_o_5],Mx[:lv_o_6],Mx[:lv_o_7],Mx[:lv_o_8],Mx[:lv_o_9]=1,2,3,4,5,6,7,8,9; +Mx[:lv_o],Mx[:lv_c]='〔','〕' +Mx[:en_a_o]='【'; Mx[:en_a_c]='】' #endnote Mx[:en_a_o]='~{'; Mx[:en_a_c]='}~' +Mx[:en_b_o]='〖'; Mx[:en_b_c]='〗' #endnote Mx[:en_b_o]='~['; Mx[:en_b_c]=']~' +Mx[:bl_o]='〔'; Mx[:bl_c]='〕' #block text mark +Mx[:gr_o]='〔'; Mx[:gr_c]='〕' #group text mark #REPLACE & RETIRE +Mx[:id_o]='〔'; Mx[:id_c]='〕' #object id mark +Mx[:tc_o]='『'; Mx[:tc_c]="』" #table row mark #Mx[:tc_c]="』\n" +Mx[:tc_p]='┆' #table col/misc mark +Mx[:pa_o]='〔'; Mx[:pa_c]='〕' #affects paragraph mark +Mx[:mk_o]='〔'; Mx[:mk_c]='〕' #generic mark +Mx[:gl_o]='〔'; Mx[:gl_c]='〕' #glyph +Mx[:fa_o]='〔'; Mx[:fa_o_c]='¤'; Mx[:fa_c_o]='¤'; Mx[:fa_c]='〕' +Mx[:fa_bold_o]= "#{Mx[:fa_o]}b#{Mx[:fa_o_c]}"; Mx[:fa_bold_c]= "#{Mx[:fa_c_o]}b#{Mx[:fa_c]}" +Mx[:fa_italics_o]= "#{Mx[:fa_o]}i#{Mx[:fa_o_c]}"; Mx[:fa_italics_c]= "#{Mx[:fa_c_o]}i#{Mx[:fa_c]}" +Mx[:fa_underscore_o]= "#{Mx[:fa_o]}u#{Mx[:fa_o_c]}"; Mx[:fa_underscore_c]= "#{Mx[:fa_c_o]}u#{Mx[:fa_c]}" +Mx[:fa_cite_o]= "#{Mx[:fa_o]}cite#{Mx[:fa_o_c]}"; Mx[:fa_cite_c]= "#{Mx[:fa_c_o]}cite#{Mx[:fa_c]}" +Mx[:fa_insert_o]= "#{Mx[:fa_o]}ins#{Mx[:fa_o_c]}"; Mx[:fa_insert_c]= "#{Mx[:fa_c_o]}ins#{Mx[:fa_c]}" +Mx[:fa_strike_o]= "#{Mx[:fa_o]}del#{Mx[:fa_o_c]}"; Mx[:fa_strike_c]= "#{Mx[:fa_c_o]}del#{Mx[:fa_c]}" +Mx[:fa_superscript_o]="#{Mx[:fa_o]}sup#{Mx[:fa_o_c]}"; Mx[:fa_superscript_c]="#{Mx[:fa_c_o]}sup#{Mx[:fa_c]}" +Mx[:fa_subscript_o]= "#{Mx[:fa_o]}sub#{Mx[:fa_o_c]}"; Mx[:fa_subscript_c]= "#{Mx[:fa_c_o]}sub#{Mx[:fa_c]}" +Mx[:fa_hilite_o]= "#{Mx[:fa_o]}hi#{Mx[:fa_o_c]}"; Mx[:fa_hilite_c]= "#{Mx[:fa_c_o]}hi#{Mx[:fa_c]}" +Mx[:fa_monospace_o]= "#{Mx[:fa_o]}mono#{Mx[:fa_o_c]}"; Mx[:fa_monospace_c]= "#{Mx[:fa_c_o]}mono#{Mx[:fa_c]}" +Mx[:gl_bullet]= "#{Mx[:gl_o]}●#{Mx[:gl_c]}" + Mx[:pa_non_object_dummy_heading]="#{Mx[:pa_o]}-##{Mx[:pa_c]}" #unnumbered paragraph, delete when not required [used in dummy headings, eg. for segmented html] (place marker at end of paragraph) + Mx[:pa_non_object_no_heading]="#{Mx[:pa_o]}~##{Mx[:pa_c]}" #unnumbered paragraph (place marker at end of paragraph) +Mx[:idx_o]='▢ '; Mx[:idx_c]='▢ ' # +Mx[:nbsp]= '░' #'▭ ' +Mx[:br_line]= '╱' #lB ▌ 9612 ┘ ¶ +Mx[:br_nl]= '╲' #lB ▌ 』 ┘ +Mx[:br_paragraph]= '█' #FB █ 9608 # PP ∥ 8741 #▐ #'┘' #'¶' #FB █ 9608 lB ▌ 9612 RB ▐ 9616 +Mx[:br_obj]= 'break_obj'; Hx[:br_obj]= {:obj=>Mx[:br_obj]} # line sep +Mx[:br_page]= 'break_page'; Hx[:br_page]= {:obj=>Mx[:br_page]} # newpage +Mx[:br_page_new]= 'break_page_new'; Hx[:br_page_new]= {:obj=>Mx[:br_page_new]} # clearpage +Mx[:br_endnotes]= "#{Mx[:mk_o]}ENDNOTES#{Mx[:mk_c]}" +Mx[:br_eof]= "#{Mx[:mk_o]}EOF#{Mx[:mk_c]}" +Mx[:lnk_o]='⌠'; Mx[:lnk_c]='⌡' #'⌈' '⌋' '⌠' '⌡' #Mx[:lnk_o]='◁'; Mx[:lnk_c]='▷' #‹ › +Mx[:url_o]='「'; Mx[:url_c]='」' +Mx[:rel_o]='⌈'; Mx[:rel_c]='⌋' +Mx[:tag_o]='⌊'; Mx[:tag_c]='⌉' +Mx[:sm_set_o]='《'; Mx[:sm_set_c]='》' +Mx[:sm_subset_o]='《 '; Mx[:sm_subset_c]='》' +Mx[:vline]='┆' # ¦ | +#Mx[:sm_set_o]='∈ '; Mx[:sm_set_c]='∋ ' +#Mx[:sm_subset_o]='∈ '; Mx[:sm_subset_c]='∋ ' +Rx[:mx_fa_clean]= /#{Mx[:fa_o]}.+?#{Mx[:fa_c]}|#{Mx[:pa_o]}.+?#{Mx[:pa_c]}|#{Mx[:mk_o]}.+?#{Mx[:mk_c]}/ +Rx[:lv],Rx[:lv_1],Rx[:lv_2],Rx[:lv_3],Rx[:lv_4],Rx[:lv_5],Rx[:lv_6],Rx[:lv_7],Rx[:lv_8],Rx[:lv_9]= + /〔([1-9]):(\S*?)〕/,/#{Mx[:lv_o_1]}(\S*?)#{Mx[:lv_c]}/,/#{Mx[:lv_o_2]}(\S*?)#{Mx[:lv_c]}/,/#{Mx[:lv_o_3]}(\S*?)#{Mx[:lv_c]}/,/#{Mx[:lv_o_4]}(\S*?)#{Mx[:lv_c]}/,/#{Mx[:lv_o_5]}(\S*?)#{Mx[:lv_c]}/,/#{Mx[:lv_o_6]}(\S*?)#{Mx[:lv_c]}/,/#{Mx[:lv_o_7]}(\S*?)#{Mx[:lv_c]}/,/#{Mx[:lv_o_8]}(\S*?)#{Mx[:lv_c]}/,/#{Mx[:lv_o_9]}(\S*?)#{Mx[:lv_c]}/ +Rx[:meta]=/#{Mx[:meta_o]}(\S+?)#{Mx[:meta_c]}/ +Dx[:url_o]='‹'; Dx[:url_c]='›' +Dx[:url_o_xml]='<'; Dx[:url_c_xml]='>' +Dx[:rel_o]='‹'; Dx[:rel_c]='›' # Dx[:rel_o]='「'; Dx[:rel_c]='」' +Tex[:backslash]="\\\\" +Tex[:backslash]="\\\\" +Tex[:tilde]='\\\\\\~' +#Px[:emphasis_o]= '*'; Px[:emphasis_c]= '*' +#Px[:bold_o]= '!'; Px[:bold_c]= '!' +Px[:bold_o]= '*'; Px[:bold_c]= '*' +Px[:italics_o]= '/'; Px[:italics_c]= '/' +Px[:underscore_o]= '_'; Px[:underscore_c]= '_' +Px[:cite_o]= '"'; Px[:cite_c]= '"' +Px[:insert_o]= '+'; Px[:insert_c]= '+' +Px[:strike_o]= '-'; Px[:strike_c]= '-' +Px[:superscript_o]= '^'; Px[:superscript_c]= '^' +Px[:subscript_o]= '['; Px[:subscript_c]= ']' +Px[:hilite_o]= '*'; Px[:hilite_c]= '*' +Px[:monospace_o]= ''; Px[:monospace_c]= '' +Px[:po_bold_o]= '!{'; Px[:po_bold_c]= '}!' +Px[:po_italics_o]= '/{'; Px[:po_italics_c]= '}/' +Px[:po_underscore_o]= '_{'; Px[:po_underscore_c]= '}_' +Px[:po_cite_o]= '"{'; Px[:po_cite_c]= '}"' +Px[:po_insert_o]= '+{'; Px[:po_insert_c]= '}+' +Px[:po_strike_o]= '-{'; Px[:po_strike_c]= '}-' +Px[:po_superscript_o]='^{'; Px[:po_superscript_c]='}^' +Px[:po_subscript_o]= ',{'; Px[:po_subscript_c]= '},' +Px[:po_hilite_o]= '*{'; Px[:po_hilite_c]= '}*' +Px[:po_monospace_o]= '#{'; Px[:po_monospace_c]= '}#' +Px[:lng_lst]=%w[am bg bn br ca cs cy da de el en eo es et eu fi fr ga gl he hi hr hy ia is it la lo lt lv ml mr nl nn no oc pl pt pt_BR ro ru sa se sk sl sq sr sv ta te th tk tr uk ur us vi] +#Px[:lng_lst]=%w[sq am hy eu bn pt_BR br bg ca hr cs da nl en eo et gl de el he hi is ia ga it fi fr lo la lv lt ml mr no nn oc pl pt ro ru se sa sr sk sl es sv ta te th tr tk uk ur vi cy us] +Px[:lv1]= '*' +Px[:lv2]= '=' +Px[:lv3]= '=' +#Px[:lv2_3]= '=' +Px[:lv4]= '-' +Px[:lv5]= '.' +Px[:lv6]= '.' +#Px[:lv5_6]= '.' +Db[:name_prefix]="SiSU#{SiSU_version_dir}c_" +Db[:name_prefix_db]="sisu_#{SiSU_version_dir}c_" +Db[:col_title]=800 +Db[:col_title_part]=400 +Db[:col_title_edition]=10 +Db[:col_name]=600 +Db[:col_creator_misc_short]=100 +Db[:col_language]=100 +Db[:col_language_char]=3 +Db[:col_date_text]=10 +Db[:col_classify_txt_long]=600 +Db[:col_classify_txt_short]=600 +Db[:col_classify_short]=200 +Db[:col_classify_identify]=256 +Db[:col_classify_library]=30 +Db[:col_classify_small]=16 +Db[:col_filename]=256 +Db[:col_digest]=64 +Db[:col_filesize]=10 +Db[:col_info_note]=2500 +Gt[:grotto]='sisu:' +Gt[:src]='src' +Gt[:po]='po4a/po' +Gt[:pot]='po4a/pot' +Gt[:image]='mm/image' +Gt[:audio]='mm/audio' +Gt[:video]='mm/video' +Gt[:conf]='conf' +Gt[:skin]='conf/skin' #Gt[:skin]='conf/skin/doc' +__END__ +consider: + 〔comment〕 + 〔links?????〕 + import document? +check: + bold line + +┆┆⋮┇┊┋ +『』 +「」 +〔〕 +【】 + +· +¤ + #˝ " λ Ω β α π Ѫ Ж Я Ѳ ѳ Ф ㈣ + Ѳ ѳ Ф + ♩ ♭  ✠  ▭ ▬ ▪ +【】〖〗《》「」 + ‹ › ∗  +'〔lv1〕','〔lv2〕','〔lv3〕','〔lv4〕','〔lv5〕','〔lv6〕','〔lv7〕','〔lv8〕','〔lv9〕' +'〔 Ѳ1〕','〔 Ѳ2〕','〔 Ѳ3〕','〔 Ѳ4〕','〔 Ѳ5〕','〔Ѳ6〕','〔Ѳ7〕','〔Ѳ8〕','〔Ѳ9〕' +◁▷ +◀this is text or an image▶ http:// +p __FILE__ +':'+ __LINE__.to_s +p __FILE__ + ' ' + __LINE__.to_s + ' ' + html +puts "#{__FILE__} #{__LINE__} #{o.inspect}" +puts __FILE__ + ' ' + __LINE__.to_s + '--> ' + o.inspect +puts %{-\t#{__FILE__}::#{__LINE__}::#{caller}:\n"#{name}"} +p "\t" + txt.obj + " << #{__FILE__} #{__LINE__} >>" +p (__FILE__ + ' ' + __LINE__.to_s + '--> ' + dob.inspect) if dob.is=='heading' +data.each {|o| p (__FILE__ + ' ' + __LINE__.to_s + '--> ' + o.inspect) if o.is=='heading'} +puts "#{__FILE__} #{__LINE__} #{para}" if @opt.cmd =~/M/ +puts "#{__FILE__} #{__LINE__} #{t_o}" if @opt.cmd =~/M/ + dr ┌ 9484 dR ┍ 9485 Dr ┎ 9486 DR ┏ 9487 dl ┐ 9488 dL ┑ 9489 Dl ┒ 9490 LD ┓ 9491 ur └ 9492 uR ┕ 9493 Ur ┖ 9494 UR ┗ 9495 ul ┘ 9496 uL ┙ 9497 Ul ┚ 9498 UL ┛ 9499 vr ├ + dr ┌ 9484 dR ┍ 9485 Dr ┎ 9486 DR ┏ 9487 dl ┐ 9488 dL ┑ 9489 Dl ┒ 9490 LD ┓ 9491 ur └ 9492 uR ┕ 9493 Ur ┖ 9494 UR ┗ 9495 ul ┘ 9496 uL ┙ 9497 Ul ┚ 9498 UL ┛ 9499 vr ├ + └ ┘ +Iu ⌠ 8992 Il ⌡ <7 ⌈ 8968 >7 ⌉ 8969 7< ⌊ 8970 7> ⌋ 8971 +<" 『 12302 >" 』 12303 +<' 「 12300 >' 」 12301 + +#% Language List po4a +http://www.debian.org/international/l10n/po/ +see polyglossia for subset +* CSB (Unknown language) +* KAB (Unknown language) +* TLH (Unknown language) +* aa (Afar) +* ab (Abkhazian) +* af (Afrikaans) +* af_ZA (Afrikaans, as spoken in South Africa) +* am (Amharic) +* an (Unknown language) +* ang (Unknown language) +* ar (Arabic) +* ar_AR (Arabic, as spoken in Argentina) +* ar_EG (Arabic, as spoken in Egypt) +* ar_OM (Arabic, as spoken in Oman) +* ar_PS (Arabic, as spoken in Palestinian Territory, Occupied) +* ar_SA (Arabic, as spoken in Saudi Arabia) +* ar_SY (Arabic, as spoken in Syrian Arab Republic) +* as (Assamese) +* ast (Unknown language) +* ay (Aymara) +* az (Azerbaijani) +* az_IR (Azerbaijani, as spoken in Iran) +* be (Belarusian) +* be@latin (Unknown language) +* be@tarask (Unknown language) +* bem (Unknown language) +* bg (Bulgarian) +* bg_BG (Bulgarian, as spoken in Bulgaria) +* bi (Bislama) +* bn (Bengali) +* bn_BD (Bengali, as spoken in Bangladesh) +* bn_IN (Bengali, as spoken in India) +* bo (Tibetan) +* br (Breton) +* bs (Bosnian) +* bs_BA (Bosnian, as spoken in Bosnia and Herzegovina) +* bs_BS (Bosnian, as spoken in Bahamas) +* byn (Unknown language) +* ca (Catalan) +* ca@valencia (Unknown language) +* ca_AD (Catalan, as spoken in Andorra) +* ca_ES (Catalan, as spoken in Spain) +* ca_ES@valencia (Unknown language) +* ca_FR (Catalan, as spoken in France) +* ca_IT (Catalan, as spoken in Italy) +* co (Corsican) +* crh (Unknown language) +* cs (Czech) +* cs_CZ (Czech, as spoken in Czech Republic) +* csb (Unknown language) +* cy (Welsh) +* cy_GB (Welsh, as spoken in Great Britain) +* cz (Unknown language) +* da (Danish) +* da_DK (Danish, as spoken in Denmark) +* de (German) +* de_AT (German, as spoken in Austria) +* de_CH (German, as spoken in Switzerland) +* de_DE (German, as spoken in Germany) +* dk (Unknown language) +* dz (Dzongkha) +* el (Greek) +* el_GR (Greek, as spoken in Greece) +* en (English) +* en@boldquot (Unknown language) +* en@quot (Unknown language) +* en@shaw (Unknown language) +* en_AU (English, as spoken in Australia) +* en_CA (English, as spoken in Canada) +* en_GB (English, as spoken in Great Britain) +* en_NZ (English, as spoken in New Zealand) +* en_US (English, as spoken in United States) +* en_US@piglatin (Unknown language) +* en_ZA (English, as spoken in South Africa) +* eo (Esperanto) +* es (Spanish) +* es_AR (Spanish, as spoken in Argentina) +* es_CL (Spanish, as spoken in Chile) +* es_CO (Spanish, as spoken in Colombia) +* es_CR (Spanish, as spoken in Costa Rica) +* es_DO (Spanish, as spoken in Dominican Republic) +* es_EC (Spanish, as spoken in Ecuador) +* es_ES (Spanish, as spoken in Spain) +* es_GA (Spanish, as spoken in Gabon) +* es_GT (Spanish, as spoken in Guatemala) +* es_HN (Spanish, as spoken in Honduras) +* es_LA (Spanish, as spoken in Lao People's Democratic Republic) +* es_MX (Spanish, as spoken in Mexico) +* es_NI (Spanish, as spoken in Nicaragua) +* es_PA (Spanish, as spoken in Panama) +* es_PE (Spanish, as spoken in Peru) +* es_PR (Spanish, as spoken in Puerto Rico) +* es_SV (Spanish, as spoken in El Salvador) +* es_UY (Spanish, as spoken in Uruguay) +* es_VE (Spanish, as spoken in Venezuela) +* et (Estonian) +* et_EE (Estonian, as spoken in Estonia) +* eu (Basque) +* eu_ES (Basque, as spoken in Spain) +* fa (Persian) +* fa_AF (Persian, as spoken in Afghanistan) +* fa_IR (Persian, as spoken in Iran) +* fi (Finnish) +* fi_FI (Finnish, as spoken in Finland) +* fil (Unknown language) +* fo (Faeroese) +* fo_FO (Faeroese, as spoken in Faroe Islands) +* fr (French) +* fr_BE (French, as spoken in Belgium) +* fr_CA (French, as spoken in Canada) +* fr_CH (French, as spoken in Switzerland) +* fr_FR (French, as spoken in France) +* fr_FX (French, as spoken in France, Metropolitan) +* fr_LU (French, as spoken in Luxembourg) +* frp (Unknown language) +* fur (Unknown language) +* fy (Frisian) +* fy_NL (Frisian, as spoken in Netherlands) +* ga (Irish) +* gd (Gaelic (Scots)) +* gez (Unknown language) +* gl (Galician) +* gl_ES (Galician, as spoken in Spain) +* gn (Guarani) +* gu (Gujarati) +* gv (Manx) +* ha (Hausa) +* he (Hebrew) +* he_IL (Hebrew, as spoken in Israel) +* hi (Hindi) +* hne (Unknown language) +* hr (Croatian) +* hr_HR (Croatian, as spoken in Croatia) +* ht (Unknown language) +* hu (Hungarian) +* hu_HU (Hungarian, as spoken in Hungary) +* hy (Armenian) +* ia (Interlingua) +* id (Indonesian) +* id_ID (Indonesian, as spoken in Indonesia) +* ig (Unknown language) +* io (Unknown language) +* is (Icelandic) +* is_IS (Icelandic, as spoken in Iceland) +* it (Italian) +* it_CH (Italian, as spoken in Switzerland) +* it_IT (Italian, as spoken in Italy) +* iu (Inuktitut) +* ja (Japanese) +* ja_JP (Japanese, as spoken in Japan) +* jv (Unknown language) +* jv_ID (Unknown language) +* ka (Georgian) +* kab (Unknown language) +* kk (Kazakh) +* kl (Kalaallisut) +* km (Khmer) +* km_KH (Khmer, as spoken in Cambodia) +* kn (Kannada) +* ko (Korean) +* ko_KR (Korean, as spoken in Korea) +* ks (Kashmiri) +* ku (Kurdish) +* kw (Cornish) +* ky (Kirghiz) +* la (Latin) +* lb (Letzeburgesch) +* lg (Unknown language) +* li (Unknown language) +* ln (Lingala) +* lo (Lao) +* lt (Lithuanian) +* lt_LT (Lithuanian, as spoken in Lithuania) +* lv (Latvian) +* lv_LV (Latvian, as spoken in Latvia) +* mai (Unknown language) +* mal (Unknown language) +* mg (Malagasy) +* mi (Maori) +* mk (Macedonian) +* mk_MK (Macedonian, as spoken in Macedonia, the Former Yugoslav Republic of) +* ml (Malayalam) +* ml_IN (Malayalam, as spoken in India) +* ml_ML (Malayalam, as spoken in Mali) +* mn (Mongolian) +* mr (Marathi) +* ms (Malay) +* ms_MY (Malay, as spoken in Malaysia) +* mt (Maltese) +* my (Burmese) +* my_MM (Burmese, as spoken in Myanmar) +* na (Nauru) +* nb (Norwegian Bokmål) +* nb_NO (Norwegian Bokmål, as spoken in Norway) +* nds (Unknown language) +* ne (Nepali) +* new (Unknown language) +* nl (Dutch) +* nl_BE (Dutch, as spoken in Belgium) +* nl_NL (Dutch, as spoken in Netherlands) +* nn (Norwegian Nynorsk) +* nn_NO (Norwegian Nynorsk, as spoken in Norway) +* no (Norwegian) +* no_NO (Norwegian, as spoken in Norway) +* nr (Ndebele, South) +* nso (Unknown language) +* oc (Occitan (post 1500)) +* oc_FR (Occitan (post 1500), as spoken in France) +* om (Oromo) +* or (Oriya) +* pa (Panjabi) +* pl (Polish) +* pl_PL (Polish, as spoken in Poland) +* pms (Unknown language) +* ps (Pushto) +* pt (Portuguese) +* pt_BR (Portuguese, as spoken in Brazil) +* pt_PT (Portuguese, as spoken in Portugal) +* qu (Quechua) +* rm (Rhaeto-Romance) +* ro (Romanian) +* ro_RO (Romanian, as spoken in Romania) +* ru (Russian) +* ru_RU (Russian, as spoken in Russia) +* rw (Kinyarwanda) +* sa (Sanskrit) +* sc (Sardinian) +* sd (Sindhi) +* se (Sami) +* se_NO (Sami, as spoken in Norway) +* si (Sinhalese) +* si_LK (Sinhalese, as spoken in Sri Lanka) +* si_SI (Sinhalese, as spoken in Slovenia) +* sk (Slovak) +* sk_SK (Slovak, as spoken in Slovakia) +* sl (Slovenian) +* sl_SI (Slovenian, as spoken in Slovenia) +* sl_SL (Slovenian, as spoken in Sierra Leone) +* so (Somali) +* sp (Unknown language) +* sq (Albanian) +* sq_AL (Albanian, as spoken in Albania) +* sr (Serbian) +* sr@Latn (Unknown language) +* sr@ije (Unknown language) +* sr@ijekavian (Unknown language) +* sr@ijekavianlatin (Unknown language) +* sr@latin (Unknown language) +* sr_SR (Serbian, as spoken in Suriname) +* sr_YU (Serbian, as spoken in Yugoslavia) +* st (Sotho) +* su (Sundanese) +* su_ID (Sundanese, as spoken in Indonesia) +* sv (Swedish) +* sv_SE (Swedish, as spoken in Sweden) +* sw (Swahili) +* ta (Tamil) +* ta_LK (Tamil, as spoken in Sri Lanka) +* te (Telugu) +* tg (Tajik) +* th (Thai) +* th_TH (Thai, as spoken in Thailand) +* ti (Tigrinya) +* tig (Unknown language) +* tk (Turkmen) +* tl (Tagalog) +* tlh (Unknown language) +* to (Tonga) +* tr (Turkish) +* tr_TR (Turkish, as spoken in Turkey) +* tt (Tatar) +* ug (Uighur) +* ug_CN (Uighur, as spoken in China) +* uk (Ukrainian) +* uk_UA (Ukrainian, as spoken in Ukraine) +* ur (Urdu) +* ur_PK (Urdu, as spoken in Pakistan) +* uz (Uzbek) +* uz@cyrillic (Unknown language) +* ve (Unknown language) +* vi (Vietnamese) +* vi_AR (Vietnamese, as spoken in Argentina) +* vi_DE (Vietnamese, as spoken in Germany) +* vi_PL (Vietnamese, as spoken in Poland) +* vi_TR (Vietnamese, as spoken in Turkey) +* vi_VN (Vietnamese, as spoken in Vietnam) +* wa (Unknown language) +* wal (Unknown language) +* wo (Wolof) +* xh (Xhosa) +* yi (Yiddish) +* yo (Yoruba) +* zh (Chinese) +* zh_CN (Chinese, as spoken in China) +* zh_HK (Chinese, as spoken in Hong Kong) +* zh_TW (Chinese, as spoken in Taiwan) +* zu (Zulu) + + 'sq'; 'albanian' + 'am'; 'amharic' +#'ar'; 'arabic' # see polyglossia + 'hy'; 'armenian' +#''; 'asturian' # polyglossia +#''; 'bahasai' # polyglossia +#''; 'bahasam' # polyglossia + 'eu'; 'basque' + 'bn'; 'bengali' + 'pt_BR'; 'brazilian' + 'br'; 'breton' + 'bg'; 'bulgarian' + 'ca'; 'catalan' # see polyglossia +#''; 'coptic' # polyglossia + 'hr'; 'croatian' + 'cs'; 'czech' + 'da'; 'danish' +#''; 'divehi' # polyglossia + 'nl'; 'dutch' # see polyglossia + 'en'; 'english' # see polyglossia + 'eo'; 'esperanto' # see polyglossia + 'et'; 'estonian' + 'gl'; 'galician' + 'de'; 'german' + 'el'; 'greek' #gl ? + 'he'; 'hebrew' + 'hi'; 'hindi' + 'is'; 'icelandic' + 'ia'; 'interlingua' + 'ga'; 'irish' + 'it'; 'italian' +#''; 'farsi' # polyglossia + 'fi'; 'finnish' + 'fr'; 'french' + 'lo'; 'lao' + 'la'; 'latin' + 'lv'; 'latvian' + 'lt'; 'lithuanian' +#''; 'lsorbian' # polyglossia +#''; 'magyar' # polyglossia + 'ml'; 'malayalam' + 'mr'; 'marathi' +#'hu'; 'magyar' + 'no'; 'norske' + 'nn'; 'nynorsk' + 'oc'; 'occitan' + 'pl'; 'polish' + 'pt'; 'portuges' + 'ro'; 'romanian' + 'ru'; 'russian' + 'se'; 'samin' #(check sami?) + 'sa'; 'sanskrit' + 'sr'; 'serbian' +#''; 'scottish' # polyglossia (gd (Gaelic (Scots))) + 'sk'; 'slovak' + 'sl'; 'slovenian' + 'es'; 'spanish' + 'sv'; 'swedish' + 'ta'; 'tamil' + 'te'; 'telugu' + 'th'; 'thai' + 'tr'; 'turkish' + 'tk'; 'turkmen' + 'uk'; 'ukrainian' + 'ur'; 'urdu' +#''; 'usorbian' # polyglossia + 'vi'; 'vietnamese' + 'cy'; 'welsh' + 'us'; 'USenglish' # depreciated, see iso-639-2 diff --git a/lib/sisu/v3/css.rb b/lib/sisu/v3/css.rb new file mode 100644 index 00000000..d9727a83 --- /dev/null +++ b/lib/sisu/v3/css.rb @@ -0,0 +1,2085 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: css stylesheets + +=end +module SiSU_Style + require "#{SiSU_lib}/sysenv" # sysenv.rb + require "#{SiSU_lib}/defaults" # defaults.rb + class CSS + def initialize + @vz=SiSU_Env::Get_init.instance.skin + end + def fonts + @vz.font_fonts + end + def html_tables #stylesheet for css table_pages +<. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: preprocessing, (document abstraction), data abstraction used + in subsequent processing + +=end +module SiSU_DAL + require "#{SiSU_lib}/defaults" # defaults.rb + include SiSU_Viz + require "#{SiSU_lib}/sysenv" # sysenv.rb + include SiSU_Env + require "#{SiSU_lib}/param" # param.rb + include SiSU_Param + require "#{SiSU_lib}/dal_doc_objects" # dal_doc_objects.rb + require "#{SiSU_lib}/dal_syntax" # dal_syntax.rb + include SiSU_Syntax + require "#{SiSU_lib}/dal_doc_str" # dal_doc_str.rb + require "#{SiSU_lib}/dal_idx" # dal_idx.rb + require "#{SiSU_lib}/dal_numbering" # dal_numbering.rb + require "#{SiSU_lib}/dal_hash_digest" # dal_hash_digest.rb + require "#{SiSU_lib}/dal_endnotes" # dal_endnotes.rb + require "#{SiSU_lib}/dal_images" # dal_images.rb + require "#{SiSU_lib}/dal_metadata" # dal_metadata.rb + require "#{SiSU_lib}/dal_character_check" # dal_character_check.rb + require "#{SiSU_lib}/dal_substitutions_and_insertions" # dal_substitutions_and_insertions.rb + require "#{SiSU_lib}/dal_expand_insertions" # dal_expand_insertions.rb + require "#{SiSU_lib}/i18n" # i18n.rb + require "#{SiSU_lib}/shared_sem" # shared_sem.rb + class Instantiate < SiSU_Param::Parameters::Instructions + def initialize + @@flag_vocab=0 + @@line_mode='' + end + end + class Source [],:tex=>[],:html=>[],:xhtml=>[]} + @@map_arr={:nametags=>[],:ocn_htmlseg=>[]} + @@fns=nil + def initialize(opt,fnx=nil) + @opt,@fnx=opt,fnx + @@fns||@opt.fns + @make_fns=if @fnx and @fnx =~/\.ss[tmi]$/ + SiSU_Env::Info_file.new(@fnx) + else + SiSU_Env::Info_file.new(@opt.fns) + end + @fnm=@make_fns.marshal.dal_metadata + @fnc=@make_fns.marshal.dal_content + @idx_sst=@make_fns.marshal.dal_idx_sst_rel_html_seg + @idx_tex=@make_fns.marshal.dal_idx_sst_rel + @idx_html=@make_fns.marshal.dal_idx_html + @idx_xhtml=@make_fns.marshal.dal_idx_xhtml + @map_nametags=@make_fns.marshal.dal_map_nametags + @map_ocn_htmlseg=@make_fns.marshal.dal_map_ocn_htmlseg + SiSU_Env::Create_system_link.new.images + @env=SiSU_Env::Info_env.new + end + def read #creates dal + begin + dal=[] + @@dal_array=[] + @@fns=(@fnx && @fnx =~/\.ss[tmi]$/) \ + ? @fnx \ + : @opt.fns + create_dal + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@@fns).error + ensure + Instantiate.new + end + end + def get #reads dal, unless does not exist then creates first + begin + dal=[] + unless @@fns==@opt.fns \ + or @@fns==@fnx + @@fns=(@fnx && @fnx =~/\.ss[tmi]$/) \ + ? @fnx \ + : @opt.fns + @@dal_array=[] + end + dal=(@@dal_array.empty?) ? read_fnc : @@dal_array.dup + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + ensure + Instantiate.new + end + end + def get_idx_sst #reads dal idx.sst, #unless does not exist then creates first + begin + dal=[] + unless @@fns==@opt.fns \ + or @@fns==@fnx + @@fns=(@fnx && @fnx =~/\.ss[tmi]$/) \ + ? @fnx \ + : @opt.fns + @@idx_arr[:sst]=[] + end + dal=(@@idx_arr[:sst].empty?) ? read_idx_sst : @@idx_arr[:sst].dup #check + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + ensure + Instantiate.new + end + end + def get_idx_tex #reads dal idx.tex, #unless does not exist then creates first + begin + dal=[] + unless @@fns==@opt.fns \ + or @@fns==@fnx + @@fns=(@fnx && @fnx =~/\.ss[tmi]$/) \ + ? @fnx \ + : @opt.fns + @@idx_arr[:tex]=[] + end + dal=(@@idx_arr[:tex].empty?) ? read_idx_tex : @@idx_arr[:tex].dup #check + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + ensure + Instantiate.new + end + end + def get_idx_html #reads dal idx.html, #unless does not exist then creates first + begin + dal=[] + unless @@fns==@opt.fns \ + or @@fns==@fnx + @@fns=(@fnx && @fnx =~/\.ss[tmi]$/) \ + ? @fnx \ + : @opt.fns + @@idx_arr[:html]=[] + end + dal=(@@idx_arr[:html].empty?) ? read_idx_html : @@idx_arr[:html].dup + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + ensure + Instantiate.new + end + end + def get_idx_xhtml #reads dal idx.xhtml, #unless does not exist then creates first + begin + dal=[] + unless @@fns==@opt.fns \ + or @@fns==@fnx + @@fns=(@fnx && @fnx =~/\.ss[tmi]$/) \ + ? @fnx \ + : @opt.fns + @@idx_arr[:xthml]=[] + end + dal=(@@idx_arr[:xhtml].empty?) ? read_idx_xhtml : @@idx_arr[:xhtml].dup + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + ensure + Instantiate.new + end + end + def get_map_nametags #reads dal map.nametags, #unless does not exist then creates first + begin + dal=[] + unless @@fns==@opt.fns \ + or @@fns==@fnx + @@fns=(@fnx && @fnx =~/\.ss[tmi]$/) \ + ? @fnx \ + : @opt.fns + @@map_arr[:nametags]=[] + end + dal=(@@map_arr[:nametags].empty?) ? read_map_nametags : @@map_arr[:nametags].dup + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + ensure + Instantiate.new + end + end + def get_map_ocn_htmlseg #reads dal map.ocn_htmlseg, #unless does not exist then creates first + begin + dal=[] + unless @@fns==@opt.fns \ + or @@fns==@fnx + @@fns=(@fnx && @fnx =~/\.ss[tmi]$/) \ + ? @fnx \ + : @opt.fns + @@map_arr[:ocn_htmlseg]=[] + end + dal=(@@map_arr[:ocn_htmlseg].empty?) ? read_map_ocn_htmlseg : @@map_arr[:ocn_htmlseg].dup + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + ensure + Instantiate.new + end + end + protected + def create_dal + dal_array=[] + unless @opt.cmd =~/q/ + tell=(@opt.cmd=~/[vVM]/) \ + ? SiSU_Screen::Ansi.new(@opt.cmd,'Document Abstraction') \ + : SiSU_Screen::Ansi.new(@opt.cmd,'Document Abstraction',@opt.fns) + tell.green_title_hi + end + fn=(@fnx && @fnx =~/\.ss[tmi]$/) \ + ? @fnx \ + : @opt.fns + file_array=@env.read_source_file(fn) + file_array.each do |l| + if l =~/\r\n/; l.gsub!(/\r\n/,"\n") + end + end + meta=file_array.dup + meta=meta.join.split("\n\n") #check whether can be eliminated, some of these are large objects to have twice + @md=SiSU_Param::Parameters::Instructions.new(meta,@opt).extract + meta=nil + dal=SiSU_DAL::Make.new(fn,@md,file_array).song + if @opt.cmd =~/[vM]/ + cf=SiSU_Env::Create_file.new(fn) + SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"~meta/#{@opt.fns}.meta").output if @opt.cmd =~/v/i + SiSU_Screen::Ansi.new(@opt.cmd,"dal -> #{cf.meta}").txt_grey if @opt.cmd =~/M/ + end + dal.each{|s| dal_array << s} + dal_array + end + def read_fnm + dal=[] + dal=if FileTest.file?(@fnm) + (RUBY_VERSION < '1.9') \ + ? (File.open(@fnm){ |f| dal=Marshal.load(f)}) \ + : (File.open(@fnm,'r:utf-8'){ |f| dal=Marshal.load(f)}) + else SiSU_DAL::Source.new(@opt).create_dal + end + end + def read_fnc + dal=[] + dal=if FileTest.file?(@fnc) + (RUBY_VERSION < '1.9') \ + ? (File.open(@fnc){ |f| dal=Marshal.load(f)}) \ + : (File.open(@fnc,'r:utf-8'){ |f| dal=Marshal.load(f)}) + else SiSU_DAL::Source.new(@opt).create_dal + end + end + def read_idx_sst + m=[] + m=if FileTest.file?(@idx_sst) + (RUBY_VERSION < '1.9') \ + ? (File.open(@idx_sst){ |f| m=Marshal.load(f)}) \ + : (File.open(@idx_sst,'r:utf-8'){ |f| m=Marshal.load(f)}) + else nil + end + end + def read_idx_tex + m=[] + m=if FileTest.file?(@idx_tex) + (RUBY_VERSION < '1.9') \ + ? (File.open(@idx_tex){ |f| m=Marshal.load(f)}) \ + : (File.open(@idx_tex,'r:utf-8'){ |f| m=Marshal.load(f)}) + else nil + end + end + def read_idx_html + m=[] + m=if FileTest.file?(@idx_html) + (RUBY_VERSION < '1.9') \ + ? (File.open(@idx_html){ |f| m=Marshal.load(f)}) \ + : (File.open(@idx_html,'r:utf-8'){ |f| m=Marshal.load(f)}) + else nil + end + end + def read_idx_xhtml + m=[] + m=if FileTest.file?(@idx_xhtml) + (RUBY_VERSION < '1.9') \ + ? (File.open(@idx_xhtml){ |f| m=Marshal.load(f)}) \ + : (File.open(@idx_xhtml,'r:utf-8'){ |f| m=Marshal.load(f)}) + else nil + end + end + def read_map_nametags + m=[] + m=if FileTest.file?(@map_nametags) + (RUBY_VERSION < '1.9') \ + ? (File.open(@map_nametags){ |f| m=Marshal.load(f)}) \ + : (File.open(@map_nametags,'r:utf-8'){ |f| m=Marshal.load(f)}) + else nil + end + end + def read_map_ocn_htmlseg + m=[] + m=if FileTest.file?(@map_ocn_htmlseg) + (RUBY_VERSION < '1.9') \ + ? (File.open(@map_ocn_htmlseg){ |f| m=Marshal.load(f)}) \ + : (File.open(@map_ocn_htmlseg,'r:utf-8'){ |f| m=Marshal.load(f)}) + else nil + end + end + end + class Output + def initialize(fn,md,data) + @fn,@md,@data=fn,md,data + @cf=SiSU_Env::Create_file.new(@fn) + @make=SiSU_Env::Info_file.new(@fn) + @dir=SiSU_Env::Info_env.new(@fn) + end + def screen_dump(o) + if defined? o.of + print %{OF: #{o.of}; } + end + if defined? o.is + print %{IS: #{o.is}; } + end + if defined? o.ocn + print %{OCN: #{o.ocn}; } + end + if defined? o.node + print %{NODE: #{o.node}; } + end + if defined? o.parent + print %{Parent: #{o.parent}; } + end + if defined? o.obj and not o.obj.empty? + puts %{\n#{o.obj}; } + else "\n" + end + end + def screen_print(t_o) + if defined? t_o + print ' ' + t_o.to_s + end + end + def screen_output(data) + data.each do |o| + print o.class + screen_print(o.ocn) + screen_print(o.obj) + puts "\n" + end + end + def hard_output + if @md.cmd =~/M/ + filename_meta=@cf.metaverse.file_meta + @data.each {|o| filename_meta.puts o.inspect.sub(/:0x[0-9a-f]{8}\s/,': ')} #to make diffing easier + filename_txt=@cf.metaverse.file_txt + @data.each do |o| + if defined? o.ocn + filename_txt.puts case o.is + when 'heading' + "[#{o.is} #{o.lv}~#{o.name} [#{o.ocn}]] #{o.obj}" + else "[#{o.is} [#{o.ocn}]] #{o.obj}" + end + else + filename_txt.puts case o.is + when 'meta' + "[m~#{o.tag}] #{o.obj}" + else "[#{o.is}] #{o.obj}" + end + end + end + filename_debug=@cf.file_debug + @data.each do |o| + if defined? o.ocn + case o.is + when 'heading' + filename_debug.puts "#{o.is} #{o.lv}~#{o.name} odv=#{o.odv} osp=#{o.osp} [#{o.ocn}] -->\n\t#{o.obj}" + end + end + end + else + hard="#{@dir.path.dal}/#{@md.fns}.meta" + File.unlink(hard) if FileTest.file?(hard) + hard="#{@dir.path.dal}/#{@md.fns}.txt" + File.unlink(hard) if FileTest.file?(hard) + hard="#{@dir.path.dal}/#{@md.fns}.debug.txt" + File.unlink(hard) if FileTest.file?(hard) + end + end + def make_marshal_content + marshal_dal=@make.marshal.dal_content + File.open(marshal_dal,'w'){|f| Marshal.dump(@data,f)} if @data.class==Array + end + def make_marshal_metadata + marshal_dal=@make.marshal.dal_metadata + File.open(marshal_dal,'w'){|f| Marshal.dump(@data,f)} if @data.class==Array + end + def idx_html_hard_output + if @md.book_idx \ + and @md.cmd =~/M/ + filename_meta=@cf.file_meta_idx_html + unless @data.nil? #REMOVE earliest possible + @data.each {|s| p s.inspect + "\n" unless s.class==String} + @data.each {|s| filename_meta.puts s.strip + "\n" unless s.strip.empty?} + end + else + hard_idx_html="#{@dir.path.dal}/#{@md.fns}.idx.html" + File.unlink(hard_idx_html) if FileTest.file?(hard_idx_html) + end + end + def make_marshal_idx_sst_html_seg + marshal_dal=@make.marshal.dal_idx_sst_rel_html_seg + File.open(marshal_dal,'w'){|f| Marshal.dump(@data,f)} if @data.class==Array + end + def make_marshal_idx_sst_rel + marshal_dal=@make.marshal.dal_idx_sst_rel + File.open(marshal_dal,'w'){|f| Marshal.dump(@data,f)} if @data.class==Array + end + def make_marshal_idx_html + marshal_dal=@make.marshal.dal_idx_html + File.open(marshal_dal,'w'){|f| Marshal.dump(@data,f)} if @data.class==Array + end + def make_marshal_idx_xhtml + marshal_dal=@make.marshal.dal_idx_xhtml + File.open(marshal_dal,'w'){|f| Marshal.dump(@data,f)} if @data.class==Array + end + def make_marshal_map_nametags + marshal_dal=@make.marshal.dal_map_nametags + File.open(marshal_dal,'w'){|f| Marshal.dump(@data,f)} if @data.class==Hash + end + def make_marshal_map_name_ocn_htmlseg + marshal_dal=@make.marshal.dal_map_ocn_htmlseg + File.open(marshal_dal,'w'){|f| Marshal.dump(@data,f)} if @data.class==Hash + end + end + class Make + def initialize(fn,md,data) + @fn,@md,@data=fn,md,data + @env=SiSU_Env::Info_env.new(@md.fns) + end + def reset + @@flag_vocab=0 + @@line_mode='' + end + def song + reset + data=@data + data=data.join.split("\n\n") + data=SiSU_insertions::Insertions.new(@md,data).expand_insertions? # dal_expand_insertions.rb + data=SiSU_substitute_and_insert::SI.new(@md,data).substitutions_and_insertions? # dal_substitutions_and_insertions.rb + data,metadata=SiSU_document_structure_extract::Build.new(@md,data).identify_parts # dal_doc_str.rb + data=SiSU_Syntax::Markup.new(@md,data).songsheet # dal_syntax.rb + data,endnote_array=SiSU_character_check::Check.new(data).character_check_and_oldstyle_endnote_array # dal_character_check.rb + data=SiSU_images::Images.new(@md,data).images # dal_images.rb + data,tags_map,ocn_html_seg_map=SiSU_numbering::Numbering.new(@md,data).numbering_song # dal_numbering.rb + data,book_index_rel,book_index_rel_html_seg,html_idx,xhtml_idx=SiSU_book_index::Book_index.new(@md,data,@env).indexing_song if @md.book_idx # dal_idx.rb + data=SiSU_endnotes::Endnotes.new(@md,data,endnote_array).endnotes # dal_endnotes.rb + outputdata=data + if @md.cmd =~/[mM]/ + SiSU_DAL::Output.new(@fn,@md,outputdata).hard_output + SiSU_DAL::Output.new(@fn,@md,outputdata).make_marshal_content + SiSU_DAL::Output.new(@fn,@md,metadata).make_marshal_metadata + SiSU_DAL::Output.new(@fn,@md,html_idx).idx_html_hard_output + SiSU_DAL::Output.new(@fn,@md,book_index_rel_html_seg).make_marshal_idx_sst_html_seg + SiSU_DAL::Output.new(@fn,@md,book_index_rel).make_marshal_idx_sst_rel + SiSU_DAL::Output.new(@fn,@md,html_idx).make_marshal_idx_html + SiSU_DAL::Output.new(@fn,@md,xhtml_idx).make_marshal_idx_xhtml + SiSU_DAL::Output.new(@fn,@md,tags_map).make_marshal_map_nametags + SiSU_DAL::Output.new(@fn,@md,ocn_html_seg_map).make_marshal_map_name_ocn_htmlseg + end + reset + outputdata + end + protected + end +end +__END__ diff --git a/lib/sisu/v3/dal_character_check.rb b/lib/sisu/v3/dal_character_check.rb new file mode 100644 index 00000000..a843f202 --- /dev/null +++ b/lib/sisu/v3/dal_character_check.rb @@ -0,0 +1,104 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_character_check + class Check + def initialize(data) + @data=data + @comment='%' + @endnote_array=[] + end + def character_check_and_oldstyle_endnote_array + require 'iconv' + data=@data + @tuned_file,@endnote_array=[],[] + endnote_no=1 + data.each do |dob| + unless dob.is =='table' + dob.obj.strip! + dob.obj.gsub!(/^[{~}]\s*$/,'') + dob.obj.gsub!(/~#\s*/,"#{Mx[:pa_non_object_no_heading]}") + dob.obj.gsub!(/-#\s*/,"#{Mx[:pa_non_object_dummy_heading]}") + dob.obj.gsub!(/(#{Mx[:en_a_o]})\s*\s+/,'\1 '); dob.obj.gsub!(/(~\{\s*)\s+/,'\1 ') + dob.obj.gsub!(/ \/\//,"#{Mx[:br_line]}") + dob.obj.gsub!(/
/,"#{Mx[:br_line]}") #needed by xml, xhtml etc. + dob.obj.gsub!(/\t/,' ') + dob.obj.gsub!(/\342\200\231/u,"'") #if dob =~/’/ #Avoid #‘ ’ #“ ” + dob.obj.gsub!(/�/u,' ') #watch, replace with char code + dob.obj.gsub!(/·/u,'*') + dob.obj.gsub!(/\\copy(?:right)?\b/,'©') + dob.obj.gsub!(/\\trademark\b|\\tm\b/,'®') + dob.obj=dob.obj + "\n" + unless dob.is =~/^code/ + case dob.obj + when /\^~/ #% Note must do this first (earlier loop) and then enter gathered data into ~^\d+ + sub_dob=dob.obj.dup + @endnote_array << sub_dob.gsub!(/\n/,'').gsub!(/\^~\s+(.+)\s*/,%{#{Mx[:en_a_o]}#{endnote_no} \\1 #{Mx[:en_a_c]}}).strip + endnote_no+=1 + dob=nil if dob.obj =~/\^~ .+/ #watch, removes 'binary' endnote now in endnote array for later insertion + end + end + end + @tuned_file << dob unless dob.nil? + end + @tuned_file=@tuned_file.flatten.compact + [@tuned_file,@endnote_array] + end + end +end +__END__ diff --git a/lib/sisu/v3/dal_doc_objects.rb b/lib/sisu/v3/dal_doc_objects.rb new file mode 100644 index 00000000..feb5d2c1 --- /dev/null +++ b/lib/sisu/v3/dal_doc_objects.rb @@ -0,0 +1,444 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: document abstraction + +=end +module SiSU_document_structure + class Extract + def extract(h,o) + obj=h ? h : o + end + end + class Object_metadata + attr_accessor :is,:of,:tags,:obj,:digest + def initialize + @tags={} + @is=@tmp=@digest=nil + @of='meta' + end + def metadata(tags) + of= @of #String, classification - group + is= 'meta' #String, classification - specific type + tags= tags || ((defined? o.tags) ? o.tags : {}) #String, metadata type/tag + obj= nil + @of,@is,@tags,@obj=of,is,tags,obj + self + end + end + class Object_meta + attr_accessor :obj,:is,:of,:tag,:digest,:tmp + def initialize + @is=@obj=@tag=@digest=@digest=@tmp=nil + @of='meta' + end + def metadata(h,o=nil) + of= @of #String, classification - group + is= 'meta' #String, classification - specific type + tag= h[:tag] || ((defined? o.tag) ? o.tag : nil) #String, metadata type/tag + obj= h[:obj] || ((defined? o.obj) ? o.obj : nil) #String, text content + tmp= h[:tmp] || ((defined? o.tmp) ? o.tmp : nil) #available for processing, empty after use + digest= h[:digest] || ((defined? o.digest) ? o.digest : nil) #hash digests, either sha256 or md5 + @of,@is,@tag,@obj,@digest,@tmp=of,is,tag,obj,digest,tmp + self + end + end + class Object_heading + attr_accessor :obj,:is,:tags,:of,:lv,:ln,:toc_,:name,:idx,:ocn,:odv,:osp,:node,:parent,:ocn_,:note_,:autonum_,:digest,:tmp + def initialize + @of='para' + @is=@obj=@lv=@ln=@toc_=@name=@idx=@size=@ocn=@odv=@osp=@node=@parent=@ocn_=@note_=@autonum_=@digest=@tmp=nil + @tags=[] + end + def heading_ln(lv) + ln=case lv + when /A/; 1 + when /B/; 2 + when /C/; 3 + when /1/; 4 + when /2/; 5 + when /3/; 6 + when /4/; 7 + when /5/; 8 + when /6/; 9 + end + end + def heading_lv(ln) + lv=case ln.to_s + when /1/; 'A' + when /2/; 'B' + when /3/; 'C' + when /4/; '1' + when /5/; '2' + when /6/; '3' + when /7/; '4' + when /8/; '5' + when /9/; '6' + end + end + def heading(h,o=nil) + if not h[:ln] and (h[:lv] and h[:lv]=~/[1-6A-C]/) + h[:ln]=heading_ln(h[:lv]) + elsif not h[:lv] and (h[:ln] and h[:ln].to_s=~/[1-9]/) + h[:lv]=heading_lv(h[:ln]) + end + of= @of #String, classification - group + is= 'heading' #String, classification - specific type + name= h[:name] || ((defined? o.name) ? o.name : nil) #String, named object? + tags= h[:tags] || ((defined? o.tags) ? o.tags : []) #Array, associated object tags, names if any + obj= h[:obj] || ((defined? o.obj) ? o.obj : nil) #String, text content + idx= h[:idx] || ((defined? o.idx) ? o.idx : nil) #String, book index provided? + ocn= h[:ocn] || ((defined? o.ocn) ? o.ocn : nil) #Integer, sequential on substantive-content objects + odv= h[:odv] || ((defined? o.odv) ? o.odv : nil) + osp= h[:osp] || ((defined? o.osp) ? o.osp : nil) + node= h[:node] || ((defined? o.node) ? o.node : nil) #[Node relationship doc structure info] + parent= h[:parent] || ((defined? o.parent) ? o.parent : nil) #[Node parent] + lv= h[:lv] || ((defined? o.lv) ? o.lv : nil) #Alpha-numeric, document structure as used in markup, A-C then 1-6 + ln= h[:ln] || ((defined? o.ln) ? o.ln : nil) #Integer, document structure level, for convenience in processing 1-9 + toc_= h[:toc_] || ((defined? o.toc_) ? o.toc_ : false) #Bool, do not include in toc, (relevant to headings) + ocn_=if h[:ocn_].nil?; ((defined? o.ocn_) ? o.ocn_ : true) #Bool? no ocn, non-substantive content, do not include in toc #consider + else h[:ocn_] + end + autonum_= if h[:autonum_].nil?; ((defined? o.autonum_) ? o.autonum_ : true) #Bool? auto-numbering if requested default on, false suppresses + else h[:autonum_] + end + note_= h[:note_] || ((defined? o.note_) ? o.note_ : false) #Bool, endnotes/footnotes? (processing optimization) + digest= h[:digest] || ((defined? o.digest) ? o.digest : nil) #hash digests, either sha256 or md5 + tmp= h[:tmp] || ((defined? o.tmp) ? o.tmp : nil) #available for processing, empty after use + @of,@is,@lv,@ln,@name,@tags,@obj,@idx,@ocn,@odv,@osp,@node,@parent,@toc_,@ocn_,@note_,@autonum_,@digest,@tmp=of,is,lv,ln,name,tags,obj,idx,ocn,odv,osp,node,parent,toc_,ocn_,note_,autonum_,digest,tmp + self + end + def heading_insert(h,o=nil) + heading(h,o=nil) + @is= 'heading_insert' #String, classification - specific type + self + end + end + class Object_para + attr_accessor :obj,:is,:tags,:of,:name,:idx,:bullet_,:indent,:ocn,:odv,:osp,:parent,:note_,:image_,:ocn_,:digest,:tmp + def initialize + @of='para' + @is=@obj=@name=@idx=@bullet_=@indent=@size=@ocn=@odv=@osp=@parent=@note_=@image_=@ocn_=@digest=@tmp=nil + @tags=[] + end + def paragraph(h,o=nil) + of= @of #String, classification - group + is= 'para' #String, classification - specific type + name= h[:name] || ((defined? o.name) ? o.name : nil) #String, named object? + tags= h[:tags] || ((defined? o.tags) ? o.tags : []) #Array, associated object tags, names if any + obj= h[:obj] || ((defined? o.obj) ? o.obj : nil) #String, text content + idx= h[:idx] || ((defined? o.idx) ? o.idx : nil) #String, book index provided? + ocn= h[:ocn] || ((defined? o.ocn) ? o.ocn : nil) #Integer, sequential on substantive-content objects + odv= h[:odv] || ((defined? o.odv) ? o.odv : nil) + osp= h[:osp] || ((defined? o.osp) ? o.osp : nil) + parent= h[:parent] || ((defined? o.parent) ? o.parent : nil) #[Node parent] + indent= h[:indent].to_s || ((defined? o.indent) ? o.indent.to_s : nil) #Integer, indent level + bullet_=h[:bullet_] || ((defined? o.bullet_) ? o.bullet_ : false) #Bool, bulleted? + note_= h[:note_] || ((defined? o.note_) ? o.note_ : false) #Bool, endnotes/footnotes? (processing optimization) + image_= h[:image_] || ((defined? o.image_) ? o.image_ : false) #Bool, images? (processing optimization) + ocn_=if h[:ocn_].nil?; ((defined? o.ocn_) ? o.ocn_ : true) #Bool? no ocn, non-substantive content, do not include in toc #consider + else h[:ocn_] + end + digest= h[:digest] || ((defined? o.digest) ? o.digest : nil) #hash digests, either sha256 or md5 + tmp= h[:tmp] || ((defined? o.tmp) ? o.tmp : nil) #available for processing, empty after use + @of,@is,@name,@tags,@obj,@indent,@bullet_,@idx,@ocn,@odv,@osp,@parent,@image_,@note_,@ocn_,@digest,@tmp=of,is,name,tags,obj,indent,bullet_,idx,ocn,odv,osp,parent,image_,note_,ocn_,digest,tmp + self + end + def docinfo(h,o=nil) + of= @of #String, classification - group + is= 'docinfo' #String, classification - specific type + name= h[:name] || ((defined? o.name) ? o.name : nil) #String, named object? + tags= h[:tags] || ((defined? o.tags) ? o.tags : nil) #Array, associated object tags, names if any + obj= h[:obj] || ((defined? o.obj) ? o.obj : nil) #String, text content + idx= nil #String, book index provided? + ocn= nil #Integer, sequential on substantive-content objects + odv= h[:odv] || ((defined? o.odv) ? o.odv : nil) + osp= h[:osp] || ((defined? o.osp) ? o.osp : nil) + parent= h[:parent] || ((defined? o.parent) ? o.parent : nil) #[Node parent] + indent= nil #Integer, indent level + bullet_=false #Bool, bulleted? + note_= false #Bool, endnotes/footnotes? (processing optimization) + image_= h[:image_] || ((defined? o.image_) ? o.image_ : false) #Bool, images? (processing optimization) + ocn_=if h[:ocn_].nil?; ((defined? o.ocn_) ? o.ocn_ : true) #Bool? no ocn, non-substantive content, do not include in toc #consider + else h[:ocn_] + end + digest= h[:digest] || ((defined? o.digest) ? o.digest : nil) #hash digests, either sha256 or md5 + tmp= h[:tmp] || ((defined? o.tmp) ? o.tmp : nil) #available for processing, empty after use + @of,@is,@name,@tags,@obj,@indent,@bullet_,@idx,@ocn,@odv,@osp,@parent,@image_,@note_,@ocn_,@digest,@tmp=of,is,name,tags,obj,indent,bullet_,idx,ocn,odv,osp,parent,image_,note_,ocn_,digest,tmp + self + end + end + class Object_block_txt + attr_accessor :obj,:is,:of,:tags,:idx,:ocn,:odv,:osp,:parent,:note_,:number_,:ocn_,:digest,:tmp + def initialize + @of='block' + @is=@obj=@idx=@ocn=@odv=@osp=@parent=@note_=@number_=@ocn_=@digest=@tmp=nil + @tags=[] + end + def code(h,o=nil) + of= @of #String, classification - group #alt 'code' + is= 'code' #String, classification - specific type + tags= h[:tags] || ((defined? o.tags) ? o.tags : []) #Array, associated object tags, names if any + obj= h[:obj] || ((defined? o.obj) ? o.obj : nil) #String, text content + idx= h[:idx] || ((defined? o.idx) ? o.idx : nil) #String, book index provided? + ocn= h[:ocn] || ((defined? o.ocn) ? o.ocn : nil) #Integer, sequential on substantive-content objects + odv= h[:odv] || ((defined? o.odv) ? o.odv : nil) + osp= h[:osp] || ((defined? o.osp) ? o.osp : nil) + parent= h[:parent] || ((defined? o.parent) ? o.parent : nil) #[Node parent] + number_= h[:number_] || ((defined? o.number_) ? o.number_ : false) #Bool, numbered or not? + note_= h[:note_] || ((defined? o.note_) ? o.note_ : false) #Bool, endnotes/footnotes? (processing optimization) + ocn_= if h[:ocn_].nil?; ((defined? o.ocn_) ? o.ocn_ : true) #Bool? no ocn, non-substantive content, do not include in toc #consider + else h[:ocn_] + end + digest= h[:digest] || ((defined? o.digest) ? o.digest : nil) #hash digests, either sha256 or md5 + tmp= h[:tmp] || ((defined? o.tmp) ? o.tmp : nil) #available for processing, empty after use + @of,@is,@tags,@obj,@idx,@ocn,@odv,@osp,@parent,@number_,@note_,@ocn_,@digest,@tmp=of,is,tags,obj,idx,ocn,odv,osp,parent,number_,note_,ocn_,digest,tmp + self + end + def block(h,o=nil) + of= @of #String, classification - group + is= 'block' #String, classification - specific type + tags= h[:tags] || ((defined? o.tags) ? o.tags : []) #Array, associated object tags, names if any + obj= h[:obj] || ((defined? o.obj) ? o.obj : nil) #String, text content + idx= h[:idx] || ((defined? o.idx) ? o.idx : nil) #String, book index provided? + ocn= h[:ocn] || ((defined? o.ocn) ? o.ocn : nil) #Integer, sequential on substantive-content objects + odv= h[:odv] || ((defined? o.odv) ? o.odv : nil) + osp= h[:osp] || ((defined? o.osp) ? o.osp : nil) + parent= h[:parent] || ((defined? o.parent) ? o.parent : nil) #[Node parent] + note_= h[:note_] || ((defined? o.note_) ? o.note_ : false) #Bool, endnotes/footnotes? (processing optimization) + ocn_= if h[:ocn_].nil?; ((defined? o.ocn_) ? o.ocn_ : true) #Bool? no ocn, non-substantive content, do not include in toc #consider + else h[:ocn_] + end + digest= h[:digest] || ((defined? o.digest) ? o.digest : nil) #hash digests, either sha256 or md5 + tmp= h[:tmp] || ((defined? o.tmp) ? o.tmp : nil) #available for processing, empty after use + @of,@is,@tags,@obj,@idx,@ocn,@odv,@osp,@parent,@note_,@ocn_,@digest,@tmp=of,is,tags,obj,idx,ocn,odv,osp,parent,note_,ocn_,digest,tmp + self + end + def group(h,o=nil) + of= @of #String, classification - group + is= 'group' #String, classification - specific type + tags= h[:tags] || ((defined? o.tags) ? o.tags : []) #Array, associated object tags, names if any + obj= h[:obj] || ((defined? o.obj) ? o.obj : nil) #String, text content + idx= h[:idx] || ((defined? o.idx) ? o.idx : nil) #String, book index provided? + ocn= h[:ocn] || ((defined? o.ocn) ? o.ocn : nil) #Integer, sequential on substantive-content objects + odv= h[:odv] || ((defined? o.odv) ? o.odv : nil) + osp= h[:osp] || ((defined? o.osp) ? o.osp : nil) + parent= h[:parent] || ((defined? o.parent) ? o.parent : nil) #[Node parent] + note_= h[:note_] || ((defined? o.note_) ? o.note_ : false) #Bool, endnotes/footnotes? (processing optimization) + ocn_= if h[:ocn_].nil?; ((defined? o.ocn_) ? o.ocn_ : true) #Bool? no ocn, non-substantive content, do not include in toc #consider + else h[:ocn_] + end + digest= h[:digest] || ((defined? o.digest) ? o.digest : nil) #hash digests, either sha256 or md5 + tmp= h[:tmp] || ((defined? o.tmp) ? o.tmp : nil) #available for processing, empty after use + @of,@is,@tags,@obj,@idx,@ocn,@odv,@osp,@parent,@note_,@ocn_,@digest,@tmp=of,is,tags,obj,idx,ocn,odv,osp,parent,note_,ocn_,digest,tmp + self + end + def alt(h,o=nil) #see block + of= @of #String, classification - group + is= 'alt' #String, classification - specific type + tags= h[:tags] || ((defined? o.tags) ? o.tags : []) #Array, associated object tags, names if any + obj= h[:obj] || ((defined? o.obj) ? o.obj : nil) #String, text content + idx= h[:idx] || ((defined? o.idx) ? o.idx : nil) #String, book index provided? + ocn= h[:ocn] || ((defined? o.ocn) ? o.ocn : nil) #Integer, sequential on substantive-content objects + odv= h[:odv] || ((defined? o.odv) ? o.odv : nil) + osp= h[:osp] || ((defined? o.osp) ? o.osp : nil) + parent= h[:parent] || ((defined? o.parent) ? o.parent : nil) #[Node parent] + note_= h[:note_] || ((defined? o.note_) ? o.note_ : false) #Bool, endnotes/footnotes? (processing optimization) + ocn_= if h[:ocn_].nil?; ((defined? o.ocn_) ? o.ocn_ : true) #Bool? no ocn, non-substantive content, do not include in toc #consider + else h[:ocn_] + end + digest= h[:digest] || ((defined? o.digest) ? o.digest : nil) #hash digests, either sha256 or md5 + tmp= h[:tmp] || ((defined? o.tmp) ? o.tmp : nil) #available for processing, empty after use + @of,@is,@tags,@obj,@idx,@ocn,@odv,@osp,@parent,@note_,@ocn_,@digest,@tmp=of,is,tags,obj,idx,ocn,odv,osp,parent,note_,ocn_,digest,tmp + self + end + def verse(h,o=nil) #part of poem decide how you deal with this + of= @of #String, classification - group + is= 'verse' #String, classification - specific type + tags= h[:tags] || ((defined? o.tags) ? o.tags : []) #Array, associated object tags, names if any + obj= h[:obj] || ((defined? o.obj) ? o.obj : nil) #String, text content + idx= h[:idx] || ((defined? o.idx) ? o.idx : nil) #String, book index provided? + ocn= h[:ocn] || ((defined? o.ocn) ? o.ocn : nil) #Integer, sequential on substantive-content objects + odv= h[:odv] || ((defined? o.odv) ? o.odv : nil) + osp= h[:osp] || ((defined? o.osp) ? o.osp : nil) + parent= h[:parent] || ((defined? o.parent) ? o.parent : nil) #[Node parent] + ocn_= if h[:ocn_].nil?; ((defined? o.ocn_) ? o.ocn_ : true) #Bool? no ocn, non-substantive content, do not include in toc #consider + else h[:ocn_] + end + digest= h[:digest] || ((defined? o.digest) ? o.digest : nil) #hash digests, either sha256 or md5 + tmp= h[:tmp] || ((defined? o.tmp) ? o.tmp : nil) #available for processing, empty after use + @of,@is,@tags,@obj,@idx,@ocn,@odv,@osp,@parent,@note_,@ocn_,@digest,@tmp=of,is,tags,obj,idx,ocn,odv,osp,parent,note_,ocn_,digest,tmp + @h=nil + self + end + end + class Object_table + attr_accessor :obj,:is,:of,:lv,:tags,:name,:idx,:indent,:size,:ocn,:number,:head_,:cols,:widths,:odv,:osp,:parent,:note_,:ocn_,:digest,:tmp + def initialize + @of='block' + @is=@obj=@lv=@name=@idx=@indent=@size=@ocn,@number,@head_,@cols,@widths=@odv=@osp=@parent=@note_=@ocn_=@digest=@tmp=nil + @tags=[] + end + def table(h,o=nil) + of= @of #String, classification - group + is= 'table' #String, classification - specific type + tags= h[:tags] || ((defined? o.tags) ? o.tags : []) #Array, associated object tags, names if any + cols= h[:cols] || ((defined? o.cols) ? o.cols : nil) + widths= h[:widths] || ((defined? o.widths) ? o.widths : nil) + obj= h[:obj] || ((defined? o.obj) ? o.obj : nil) #String, text content + idx= h[:idx] || ((defined? o.idx) ? o.idx : nil) #String, book index provided? + ocn= h[:ocn] || ((defined? o.ocn) ? o.ocn : nil) #Integer, sequential on substantive-content objects + odv= h[:odv] || ((defined? o.odv) ? o.odv : nil) + osp= h[:osp] || ((defined? o.osp) ? o.osp : nil) + parent= h[:parent] || ((defined? o.parent) ? o.parent : nil) #[Node parent] + head_= h[:head_] || ((defined? o.head_) ? o.head_ : false) + note_= h[:note_] || ((defined? o.note_) ? o.note_ : false) #Bool, endnotes/footnotes? (processing optimization) + ocn_=if h[:ocn_].nil?; ((defined? o.ocn_) ? o.ocn_ : true) #Bool? no ocn, non-substantive content, do not include in toc #consider + else h[:ocn_] + end + digest= h[:digest] || ((defined? o.digest) ? o.digest : nil) #hash digests, either sha256 or md5 + tmp= h[:tmp] || ((defined? o.tmp) ? o.tmp : nil) #available for processing, empty after use + @of,@is,@tags,@cols,@widths,@obj,@idx,@ocn,@odv,@osp,@parent,@head_,@note_,@ocn_,@digest,@tmp=of,is,tags,cols,widths,obj,idx,ocn,odv,osp,parent,head_,note_,ocn_,digest,tmp + self + end + end + class Object_image + attr_accessor :obj,:is,:of,:lv,:idx,:size,:ocn,:parent,:note_,:ocn_,:digest,:tmp + def initialize + @of='image' + @is=@obj=@lv=@idx=@size=@ocn=@parent=@note_=@ocn_=@tmp=@digest=nil + @tags=[] + end + def image(h,o=nil) #not yet used, and what of a paragraph containing several images, consider + of= @of #String, classification - group + is= 'image' #String, classification - specific type + tags= h[:tags] || ((defined? o.tags) ? o.tags : []) #Array, associated object tags, names if any + obj= h[:obj] || ((defined? o.obj) ? o.obj : nil) #String, text content + size= h[:size] || ((defined? o.size) ? o.size : nil) + idx= h[:idx] || ((defined? o.idx) ? o.idx : nil) #String, book index provided? + ocn= h[:ocn] || ((defined? o.ocn) ? o.ocn : nil) #Integer, sequential on substantive-content objects + odv= h[:odv] || ((defined? o.odv) ? o.odv : nil) + osp= h[:osp] || ((defined? o.osp) ? o.osp : nil) + parent= h[:parent] || ((defined? o.parent) ? o.parent : nil) #[Node parent] + note_= h[:note_] || ((defined? o.note_) ? o.note_ : false) #Bool, endnotes/footnotes? (processing optimization) + ocn_=if h[:ocn_].nil?; ((defined? o.ocn_) ? o.ocn_ : true) #Bool? no ocn, non-substantive content, do not include in toc #consider + else h[:ocn_] + end + digest= h[:digest] || ((defined? o.digest) ? o.digest : nil) #hash digests, either sha256 or md5 + tmp= h[:tmp] || ((defined? o.tmp) ? o.tmp : nil) #available for processing, empty after use + @of,@is,@tags,@obj,@size,@idx,@ocn,@odv,@osp,@parent,@note_,@ocn_,@digest,@tmp=of,is,tags,obj,size,idx,ocn,odv,osp,parent,note_,ocn_,digest,tmp + self + end + end + class Object_structure + attr_accessor :obj,:tag,:node,:lv,:ln,:status,:is,:of,:tmp + def initialize + @of='structure' + @is=@obj=@node=@lv=@ln=@status=@tmp=nil + end + def xml_dom(h,o=nil) + of= @of #String, classification - group + is= 'xml_dom' #String, classification - specific type + obj= h[:obj] || ((defined? o.obj) ? o.obj : '') #String, text content + lv= h[:lv] || ((defined? o.lv) ? o.lv : nil) #Alpha-numeric, document structure as used in markup, A-C then 1-6 + ln= h[:ln] || ((defined? o.ln) ? o.ln : nil) #Integer, document structure level, for convenience in processing 1-9 + node= h[:node] || ((defined? o.node) ? o.node : nil) #[Node relationship doc structure info] + status= h[:status] || ((defined? o.status) ? o.status : nil) #tag status open or close + tmp= h[:tmp] || ((defined? o.tmp) ? o.tmp : nil) #available for processing, empty after use + @of,@is,@obj,@status,@node,@lv,@ln,@tmp=of,is,obj,status,node,lv,ln,tmp + self + end + end + class Object_comment + attr_accessor :obj,:is,:of,:tmp + def initialize + @of='comment' + @is=@obj=@tmp=nil + end + def comment(h,o=nil) + of= @of #String, classification - group + is= 'comment' #String, classification - specific type + obj= h[:obj] || ((defined? o.obj) ? o.obj : nil) #String, text content + tmp= h[:tmp] || ((defined? o.tmp) ? o.tmp : nil) #available for processing, empty after use + @of,@is,@obj,@tmp=of,is,obj,tmp + self + end + end + class Object_layout + attr_accessor :obj,:is,:of,:tmp + def initialize + @of='layout' + @is=@obj=@tmp=nil + end + def break(h,o=nil) #decide how to deal with, perhaps no obj? + of= @of #String, classification - group + is= 'break' #String, classification - specific type + obj= h[:obj] || ((defined? o.obj) ? o.obj : nil) #String, text content + tmp= h[:tmp] || ((defined? o.tmp) ? o.tmp : nil) #available for processing, empty after use + @of,@is,@obj,@tmp=of,is,obj,tmp + self + end + def insert(h,o=nil) #decide how to deal with, could mimic paragraph? + of= @of #String, classification - group + is= 'insert' #String, classification - specific type + obj= h[:obj] || ((defined? o.obj) ? o.obj : nil) #String, text content + tmp= h[:tmp] || ((defined? o.tmp) ? o.tmp : nil) #available for processing, empty after use + @of,@is,@obj,@tmp=of,is,obj,tmp + self + end + end +end +__END__ +# ~# |-# no paragraph number # -# not included in toc diff --git a/lib/sisu/v3/dal_doc_str.rb b/lib/sisu/v3/dal_doc_str.rb new file mode 100644 index 00000000..209fc1dd --- /dev/null +++ b/lib/sisu/v3/dal_doc_str.rb @@ -0,0 +1,1195 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: document abstraction + +=end +module SiSU_document_structure_extract + class Instantiate < SiSU_Param::Parameters::Instructions + @@flag={} #Beware!! + def initialize + @@flag['table_to']=false + @@counter=@@column=@@columns=0 + @@line_mode='' + end + end + class Build + @@flag={} #Beware!! + def initialize(md,data) + @md,@data=md,data + Instantiate.new + @pb=SiSU_document_structure::Object_layout.new.break(Hx[:br_page]) + @pbn=SiSU_document_structure::Object_layout.new.break(Hx[:br_page_new]) + end + def ln_get(lv) + ln=case lv + when /A/; 1 + when /B/; 2 + when /C/; 3 + when /1/; 4 + when /2/; 5 + when /3/; 6 + when /4/; 7 + when /5/; 8 + when /6/; 9 + end + end + def image_test(str) + boolean=(str=~/\{\s*\S+?\.png.+?\}https?:\/\/\S+/ ? true : false) + end + def bullet_test(str) + bool=((str=~/\*/) ? true : false) + end + def indent_test(str) + num=((str=~/^_([1-9])/) ? $1 : 0) + end + def endnote_test?(str) + bool=((str=~/~\{.+?\}~|~\[.+?\]~/) ? true : false) + end + def extract_tags(str,nametag=nil) + tags=[] + if str.nil? + else + if str =~/(?:^|[ ])\*~([a-z0-9._-]+)(?=[ #{Mx[:br_nl]}]|$)/ + str.gsub!(/(^|[ ])\*~([a-z0-9._-]+)(?=[ #{Mx[:br_nl]}]|$)/i, + "\\1#{Mx[:tag_o]}\\2#{Mx[:tag_c]}") + str.gsub!(/ [ ]+/i,' ') + tags=str.scan(/#{Mx[:tag_o]}(\S+?)#{Mx[:tag_c]}/).flatten + str.gsub!(/[ ]?#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}[ ]?/,' ') #may be issues with spaces would leave one, but "code" blocks? + end + tags=nametag ? (tags << nametag) : tags + end + [str,tags] + end + def identify_parts + data=@data + tuned_file=[] + @tuned_block,@tuned_code=[],[] + @@counter,@verse_count=0,0 + @metadata={} + @data.each do |t_o| + t_o.gsub!(/(?:\n\s*\n)+/m,"\n") unless @@flag['code'] + if t_o !~/^(?:code|poem|alt|group|block)\{|^\}(?:code|poem|alt|group|block)|^(?:table\{|\{table)[ ~]/ \ + and not @@flag['code'] \ + and not @@flag['poem'] \ + and not @@flag['group'] \ + and not @@flag['block'] \ + and not @@flag['alt'] \ + and not @@flag['table'] + unless t_o =~/^(?:@\S+?:|%+)\s/ # extract book index for paragraph if any + idx=if t_o=~/^=\{(.+)\}\s*$\Z/m; m=$1 + t_o.gsub!(/\n=\{.+\}\s*$\Z/m,'') + m + else nil + end + end + t_o=case t_o + when /^#{Mx[:meta_o]}\S+?#{Mx[:meta_c]}/ #metadata, header + if t_o=~/^#{Mx[:meta_o]}(\S+?)#{Mx[:meta_c]}\s*(.+)/m + tag,obj=$1,$2 + @metadata[tag]=obj + end + t_o=nil + when /^%+\s/ #comment + t_o=if t_o=~/^%+\s+(.+)/ + h={:obj=>$1} + SiSU_document_structure::Object_comment.new.comment(h) + else nil + end + when /^:?([A-C1-6])\~/ #heading / lv + lv=$1 + ln=ln_get(lv) + t_o=if t_o=~/^:?[A-C1-6]\~\s+(.+)/m + obj=$1 + note=endnote_test?(obj) + obj,tags=extract_tags(obj) + h={:lv=>lv,:ln=>ln,:obj=>obj,:idx=>idx,:tags=>tags} + SiSU_document_structure::Object_heading.new.heading(h) + elsif t_o=~/^:?[A-C1-6]\~(\S+?)-\s+(.+)/m + name,obj=$1,$2 + note=endnote_test?(obj) + obj,tags=extract_tags(obj) + h={:lv=>lv,:name=>name,:obj=>obj,:idx=>idx,:autonum_=>false,:tags=>tags} + SiSU_document_structure::Object_heading.new.heading(h) + elsif t_o=~/^:?[A-C1-6]\~(\S+)\s+(.+)/m + name,obj=$1,$2 + note=endnote_test?(obj) + obj,tags=extract_tags(obj,name) + h={:lv=>lv,:name=>name,:obj=>obj,:idx=>idx,:tags=>tags} + SiSU_document_structure::Object_heading.new.heading(h) + else nil + end + when /^(?:_[1-9]|_[1-9]?\*)\s+/ #indented and/or bullet paragraph + t_o=if t_o=~/^(_(?:[1-9]?\*|[1-9])\s+)(.+)/m + tst,obj=$1,$2 + indent=indent_test(tst) + bullet=bullet_test(tst) + image=image_test(obj) + note=endnote_test?(obj) + obj,tags=extract_tags(obj) + h={:bullet_=>bullet,:indent=>indent,:obj=>obj,:idx=>idx,:note_=>note,:image_=>image,:tags=>tags} + SiSU_document_structure::Object_para.new.paragraph(h) + else nil + end + when /^[<\[](?:br)?:(?:pa?r|o(?:bj|---)?)[>\]]\s*$/ #[br:par] #[br:obj] + SiSU_document_structure::Object_layout.new.break(Hx[:br_obj]) + when /^(?:[<\[](?:br)?:pg[>\]]|?)\s*$/ #[br:pg] + SiSU_document_structure::Object_layout.new.break(Hx[:br_page]) + when /^[<\[](?:br)?:pg?n[>\]]\s*$/ #[br:pgn] + SiSU_document_structure::Object_layout.new.break(Hx[:br_page_new]) + else #paragraph + image=image_test(t_o) + note=endnote_test?(t_o) + obj,tags=extract_tags(t_o) + h={:bullet_=>false,:indent=>0,:obj=>obj,:idx=>idx,:note_=>note,:image_=>image,:tags=>tags} + SiSU_document_structure::Object_para.new.paragraph(h) + end + elsif not @@flag['code'] + if t_o =~/^code\{/ + @@flag['code']=true + @@counter=1 + @codeblock_numbered=(t_o =~/^code\{#/) ? true : false + h={:obj=>'code block start'} #introduce a counter + t_o=SiSU_document_structure::Object_comment.new.comment(h) #t_o=SiSU_document_structure::Object_layout.new.insert(h) + elsif t_o =~/^poem\{/ + @@flag['poem']=true + h={:obj=>'poem start'} #introduce a counter + t_o=SiSU_document_structure::Object_comment.new.comment(h) #t_o=SiSU_document_structure::Object_layout.new.insert(h) + tuned_file << t_o + elsif t_o =~/^group\{/ + @@flag['group']=true + h={:obj=>'group text start'} #introduce a counter + t_o=SiSU_document_structure::Object_comment.new.comment(h) #t_o=SiSU_document_structure::Object_layout.new.insert(h) + tuned_file << t_o + elsif t_o =~/^block\{/ + @@flag['block']=true + h={:obj=>'block text start'} #introduce a counter + t_o=SiSU_document_structure::Object_comment.new.comment(h) #t_o=SiSU_document_structure::Object_layout.new.insert(h) + tuned_file << t_o + elsif t_o =~/^alt\{/ + @@flag['alt']=true + h={:obj=>'alt text start'} #introduce a counter + t_o=SiSU_document_structure::Object_comment.new.comment(h) #t_o=SiSU_document_structure::Object_layout.new.insert(h) + tuned_file << t_o + elsif t_o =~/^(?:table\{|\{table)[ ~]/ + h={:obj=>'table start'} #introduce a counter + ins=SiSU_document_structure::Object_comment.new.comment(h) #ins=SiSU_document_structure::Object_layout.new.insert(h) + tuned_file << ins + if t_o=~/^table\{(?:~h)?\s+/ + @@flag['table']=true + @rows='' + case t_o + when /table\{~h\s+c(\d+);\s+(.+)/ + cols=$1 + col=$2.scan(/\d+/) + heading=true + when /table\{\s+c(\d+);\s+(.+)/ + cols=$1 + col=$2.scan(/\d+/) + heading=false + end + @h={:head_=>heading,:cols=>cols,:widths=>col,:idx=>idx} + elsif t_o=~/^\{table(?:~h)?(?:\s+\d+;?)?\}\n.+\Z/m + m1,m2,hd=nil,nil,nil + tbl=/^\{table(?:~h)?(?:\s+\d+;?)?\}\n(.+)\Z/m.match(t_o)[1] #two table representations should be consolidated as one + hd=((t_o =~/^\{table~h/) ? true : false) + tbl,tags=extract_tags(tbl) + rws=tbl.split(/\n/) + rows='' + cols=nil + rws.each do |r| + cols=(cols ? cols : (r.scan('|').length) +1) + r.gsub!(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}") + rows += r + Mx[:tc_c] + end + col=[] + if t_o =~/^\{table(?:~h)?\s+(\d+);?\}/ #width of col 1 given as %, usually when wider than rest that are even + c1=$1.to_i + width=(100 - c1)/(cols - 1) + col=[ c1 ] + (cols - 1).times { col << width } + else #all columns of equal width + width=100.00/cols + cols.times { col << width } + end + h={:head_=>hd,:cols=>cols,:widths=>col,:obj=>rows,:idx=>idx,:tags=>tags} + t_o=SiSU_document_structure::Object_table.new.table(h) unless h.nil? + tuned_file << t_o + h={:obj=>'table end'} #introduce a counter + t_o=SiSU_document_structure::Object_comment.new.comment(h) + t_o + elsif t_o=~/^\{table(?:~h)?\s+/ + m1,m2,hd=nil,nil,nil + h=case t_o + when /\{table~h\s+(.+?)\}\n(.+)\Z/m #two table representations should be consolidated as one + m1,tbl,hd=$1,$2,true + when /\{table\s+(.+?)\}\n(.+)\Z/m #two table representations should be consolidated as one + m1,tbl,hd=$1,$2,false + else nil + end + tbl,tags=extract_tags(tbl) + col=m1.scan(/\d+/) + rws=tbl.split(/\n/) + rows='' + rws.each do |r| + r.gsub!(/\s*\|\s*/m,"#{Mx[:tc_p]}") #r.gsub!(/\|/m,"#{Mx[:tc_p]}") + rows += r + Mx[:tc_c] + end + h={:head_=>hd,:cols=>col.length,:widths=>col,:obj=>rows,:idx=>idx,:tags=>tags} + t_o=SiSU_document_structure::Object_table.new.table(h) unless h.nil? + tuned_file << t_o + h={:obj=>'table end'} #introduce a counter + t_o=SiSU_document_structure::Object_comment.new.comment(h) + t_o + end + end + t_o + end + if @@flag['table'] + if @@flag['table'] \ + and t_o =~/^\}table/ #two table representations should be consolidated as one + @@flag['table']=false + headings,columns,widths,idx=@h[:head_],@h[:cols],@h[:widths],@h[:idx] + @h={:head_=>headings,:cols=>columns,:widths=>widths,:idx=>idx,:obj=>@rows} + t_o=SiSU_document_structure::Object_table.new.table(@h) + tuned_file << t_o + @h,@rows=nil,'' + t_o + h={:obj=>'table end'} #introduce a counter + t_o=SiSU_document_structure::Object_comment.new.comment(h) #t_o=SiSU_document_structure::Object_layout.new.insert(h) + t_o + else + if t_o !~/^table\{/ and not t_o.nil? + t_o.gsub!(/\n/m,"#{Mx[:tc_p]}") + @rows += t_o + Mx[:tc_c] + end + t_o=nil + end + end + if @@flag['code'] + if t_o =~/^\}code/ + @@flag['code']=false + obj,tags=extract_tags(@tuned_code.join("\n")) + h={:obj=>obj,:tags=>tags,:number_=>@codeblock_numbered} + t_o=SiSU_document_structure::Object_block_txt.new.code(h) + @tuned_code=[] + tuned_file << t_o + h={:obj=>'code block end'} #introduce a counter + t_o=SiSU_document_structure::Object_comment.new.comment(h) #t_o=SiSU_document_structure::Object_layout.new.insert(h) + end + if @@flag['code'] \ + and t_o.class==String \ + and not t_o.nil? #you may need to introduce t_o.class==String test more widely + sub_array=t_o.dup + "#{Mx[:br_nl]}" + @line_mode=sub_array.scan(/.+/) + @line_mode=[] + sub_array.scan(/.+/) {|w| @line_mode << w if w =~/[\Ss]+/} + t_o=SiSU_document_structure_extract::Build.new(@md,@line_mode).build_lines('code').join + @tuned_code << t_o + t_o=nil + end + elsif @@flag['poem'] \ + or @@flag['group'] \ + or @@flag['block'] \ + or @@flag['alt'] + if @@flag['poem'] \ + and t_o =~/^\}poem/ + @@flag['poem']=false + h={:obj=>'poem end'} #introduce a counter + t_o=SiSU_document_structure::Object_comment.new.comment(h) #t_o=SiSU_document_structure::Object_layout.new.insert(h) + elsif ( @@flag['group'] \ + and t_o =~/^\}group/ ) + @@flag['group']=false + obj,tags=extract_tags(@tuned_block.join("\n")) + h={:obj=>obj,:tags=>tags} + @tuned_block=[] + t_o=SiSU_document_structure::Object_block_txt.new.group(h) + tuned_file << t_o + h={:obj=>'group text end'} #introduce a counter + t_o=SiSU_document_structure::Object_comment.new.comment(h) #t_o=SiSU_document_structure::Object_layout.new.insert(h) + elsif ( @@flag['block'] \ + and t_o =~/^\}block/ ) + @@flag['block']=false + obj,tags=extract_tags(@tuned_block.join("\n")) + h={:obj=>obj,:tags=>tags} + @tuned_block=[] + t_o=SiSU_document_structure::Object_block_txt.new.block(h) + tuned_file << t_o + h={:obj=>'block text end'} #introduce a counter + t_o=SiSU_document_structure::Object_comment.new.comment(h) #t_o=SiSU_document_structure::Object_layout.new.insert(h) + elsif ( @@flag['alt'] \ + and t_o =~/^\}alt/ ) + @@flag['alt']=false + obj,tags=extract_tags(@tuned_block.join("\n")) + h={:obj=>obj,:tags=>tags} + t_o=SiSU_document_structure::Object_block_txt.new.alt(h) + @tuned_block=[] + tuned_file << t_o + h={:obj=>'alt text end'} #introduce a counter + t_o=SiSU_document_structure::Object_comment.new.comment(h) #t_o=SiSU_document_structure::Object_layout.new.insert(h) + end + if @@flag['poem'] \ + or @@flag['group'] \ + or @@flag['alt'] \ + and t_o =~/\S/ \ + and t_o !~/^(?:\}(?:verse|code|alt|group|block)|(?:verse|code|alt|group|block)\{)/ # fix logic + sub_array=t_o.dup + @line_mode=sub_array.scan(/.+/) + type=if @@flag['poem']; 'poem' + t_o=SiSU_document_structure_extract::Build.new(@md,@line_mode).build_lines(type).join + poem=t_o.split(/\n\n/) + poem.each do |v| + v.gsub!(/\n/m,"#{Mx[:br_nl]}\n") + obj,tags=extract_tags(v) + h={:obj=>obj,:tags=>tags} + t_o=SiSU_document_structure::Object_block_txt.new.verse(h) + tuned_file << t_o + end + else 'group' + end + @verse_count+=1 if @@flag['poem'] + end + end + if not @@flag['code'] + if @@flag['poem'] \ + or @@flag['group'] \ + or @@flag['alt'] + if t_o.class==String + t_o.gsub!(/\n/m,"#{Mx[:br_nl]}") + t_o.gsub!(/[ ][ ]/m,"#{Mx[:nbsp]*2}") + t_o.gsub!(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}") + t_o=t_o + Mx[:br_nl] if t_o =~/\S+/ + elsif t_o.is=='group' \ + or t_o.is=='block' \ + or t_o.is=='alt' \ + or t_o.is=='verse' + t_o.obj.gsub!(/\n/m,"#{Mx[:br_nl]}") + t_o.obj.gsub!(/[ ][ ]/m,"#{Mx[:nbsp]*2}") + t_o.obj.gsub!(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}") + end + @tuned_block << t_o if t_o =~/\S+/ + else tuned_file << t_o + end + else tuned_file << t_o + end + end + if @md.flag_endnotes + tuned_file << @pb + h={:ln=>2,:obj=>'Endnotes',:autonum_=>false} + tuned_file << SiSU_document_structure::Object_heading.new.heading_insert(h) + h={:ln=>4,:obj=>'Endnotes',:name=>'endnotes',:autonum_=>false} + tuned_file << SiSU_document_structure::Object_heading.new.heading_insert(h) + h={:obj=>'Endnotes'} + end + if @md.book_idx + tuned_file << @pb + h={:ln=>2,:obj=>'Index',:autonum_=>false} + tuned_file << SiSU_document_structure::Object_heading.new.heading_insert(h) + h={:ln=>4,:obj=>'Index',:name=>'book_index',:autonum_=>false} + tuned_file << SiSU_document_structure::Object_heading.new.heading_insert(h) + h={:obj=>'Index'} + end + tuned_file << @pb + h={:ln=>2,:obj=>'Metadata',:autonum_=>false,:ocn_=>false} + tuned_file << SiSU_document_structure::Object_heading.new.heading_insert(h) + h={:ln=>4,:obj=>'SiSU Metadata, document information',:name=>'metadata',:autonum_=>false,:ocn_=>false} + tuned_file << SiSU_document_structure::Object_heading.new.heading_insert(h) + tuned_file << @pb + h={:ln=>2,:obj=>'Manifest',:autonum_=>false,:ocn_=>false} + tuned_file << SiSU_document_structure::Object_heading.new.heading_insert(h) + h={:ln=>4,:obj=>'SiSU Manifest, alternative outputs etc.',:name=>'sisu_manifest',:autonum_=>false,:ocn_=>false} + tuned_file << SiSU_document_structure::Object_heading.new.heading_insert(h) + tuned_file + h={:obj=>'eof'} + meta=SiSU_document_structure::Object_metadata.new.metadata(@metadata) + [tuned_file,meta] + end + def table_rows_and_columns_array(table_str) + table=[] + table_str.split(/#{Mx[:tc_c]}/).each do |table_row| + table_row_with_columns=table_row.split(/#{Mx[:tc_p]}/) + table << table_row_with_columns + end + table + end + def meta_heading(h) + h={:lv=>h[:lv],:ln=>h[:ln],:name=>h[:name],:obj=>h[:obj],:ocn=>'0'} + SiSU_document_structure::Object_heading.new.heading(h) + end + def meta_para(str) + h={:obj=>str,:ocn_=>false} + SiSU_document_structure::Object_para.new.paragraph(h) + end + def metadata + meta=[] + dir=SiSU_Env::Info_env.new(@md.fns) + base_html="#{dir.url.root}/#{@md.fnb}" + l=SiSU_Env::Standardise_language.new.file_to_language(@md.fns) + language=l[:l] + tr=SiSU_Translate::Source.new(@md,language) + meta << @pb + h={:ln=>2,:obj=>'Metadata',:ocn_=>false} + meta << SiSU_document_structure::Object_heading.new.heading(h) + h={:ln=>4,:name=>'metadata',:obj=>'Metadata',:autonum_=>false,:ocn_=>false} + meta << SiSU_document_structure::Object_heading.new.heading(h) #add ocnm + s="Document Manifest @\n #{base_html}/#{@md.fn[:manifest]}" + meta << meta_para(s) + s="#{Mx[:fa_bold_o]}Dublin Core#{Mx[:fa_bold_c]} (DC)" #add ocnm + meta << meta_para(s) + s="#{Mx[:fa_italics_o]}DC tags included with this document are provided here.#{Mx[:fa_italics_c]}" #add ocnm + meta << meta_para(s) + if defined? @md.title.full \ + and @md.title.full=~/\S+/ + s="#{tr.full_title}: #{Mx[:fa_underscore_o]}#{@md.title.full}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.creator.author \ + and @md.creator.author=~/\S+/ + s="\n#{tr.author}: #{Mx[:fa_underscore_o]}#{@md.creator.author}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.creator.translator \ + and @md.creator.translator=~/\S+/ + s="#{tr.translator}: #{Mx[:fa_underscore_o]}#{@md.creator.translator}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.creator.illustrator \ + and @md.creator.illustrator=~/\S+/ + s="#{tr.illustrator}: #{Mx[:fa_underscore_o]}#{@md.creator.illustrator}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.creator.prepared_by \ + and @md.creator.prepared_by=~/\S+/ + s="\n#{tr.prepared_by}: #{Mx[:fa_underscore_o]}#{@md.creator.prepared_by}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.creator.digitized_by \ + and @md.creator.digitized_by=~/\S+/ + s="#{tr.digitized_by}: #{Mx[:fa_underscore_o]}#{@md.creator.digitized_by}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.rights.all \ + and @md.rights.all=~/\S+/ + s="\n#{tr.rights}: #{Mx[:fa_underscore_o]}#{@md.rights.all}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.notes.description \ + and @md.notes.description=~/\S+/ + s="#{tr.description}: #{Mx[:fa_underscore_o]}#{@md.notes.description}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.classify.subject \ + and @md.classify.subject=~/\S+/ + s="#{tr.subject}: #{Mx[:fa_underscore_o]}#{@md.classify.subject}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.publisher \ + and @md.publisher=~/\S+/ + s="\n#{tr.publisher}: #{Mx[:fa_underscore_o]}#{@md.publisher}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.creator.contributor \ + and @md.creator.contributor=~/\S+/ + s="\n#{tr.contributor}: #{Mx[:fa_underscore_o]}#{@md.creator.contributor}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.notes.abstract \ + and @md.notes.abstract=~/\S+/ + s="\n#{tr.abstract}: #{Mx[:fa_underscore_o]}#{@md.notes.abstract}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.date.created \ + and @md.date.created=~/\S+/ + s="\n#{tr.date_created}: #{Mx[:fa_underscore_o]}#{@md.date.created}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.date.issued \ + and @md.date.issued=~/\S+/ + s="\n#{tr.date_issued}: #{Mx[:fa_underscore_o]}#{@md.date.issued}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.date.available \ + and @md.date.available=~/\S+/ + s="\n#{tr.date_available}: #{Mx[:fa_underscore_o]}#{@md.date.available}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.date.modified \ + and @md.date.modified=~/\S+/ + s="\n#{tr.date_modified}: #{Mx[:fa_underscore_o]}#{@md.date.modified}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.date.valid \ + and @md.date.valid=~/\S+/ + s="\n#{tr.date_valid}: #{Mx[:fa_underscore_o]}#{@md.date.valid}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.date.published \ + and @md.date.published=~/\S+/ + s="\n#{tr.date}: #{Mx[:fa_underscore_o]}#{@md.date.published}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.classify.loc \ + and @md.classify.loc=~/\S+/ + s="\n#{tr.cls_loc}: #{Mx[:fa_underscore_o]}#{@md.classify.loc}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.classify.dewey \ + and @md.classify.dewey=~/\S+/ + s="\n#{@cls_dewey}: #{Mx[:fa_underscore_o]}#{@md.classify.dewey}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.classify.pg \ + and @md.classify.pg=~/\S+/ + s="\n#{tr.cls_gutenberg}: #{Mx[:fa_underscore_o]}#{@md.classify.pg}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.classify.isbn \ + and @md.classify.isbn=~/\S+/ + s="\n#{tr.cls_isbn}: #{Mx[:fa_underscore_o]}#{@md.classify.isbn}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.notes.comment \ + and @md.notes.comment=~/\S+/ + s="\n#{tr.comments}: #{Mx[:fa_underscore_o]}#{@md.notes.comment}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.notes.prefix_a \ + and @md.notes.prefix_a=~/\S+/ + s="\n#{tr.prefix_a}: #{Mx[:fa_underscore_o]}#{@md.notes.prefix_a}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.notes.prefix_b \ + and @md.notes.prefix_b=~/\S+/ + s="\n#{tr.prefix_b}: #{Mx[:fa_underscore_o]}#{@md.notes.prefix_b}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.classify.identifier \ + and @md.classify.identifier=~/\S+/ + s="\n#{tr.identifier}: #{Mx[:fa_underscore_o]}#{@md.classify.identifier}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.original.source \ + and @md.original.source=~/\S+/ + s="\n#{tr.source}: #{Mx[:fa_underscore_o]}#{@md.original.source}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.title.language \ + and @md.title.language=~/\S+/ + s="\n#{tr.language}: #{Mx[:fa_underscore_o]}#{@md.title.language}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.original.language \ + and @md.original.language=~/\S+/ + s="\n#{tr.language_original}: #{Mx[:fa_underscore_o]}#{@md.original.language}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.classify.format \ + and @md.classify.format=~/\S+/ + s="\n#{tr.format}: #{Mx[:fa_underscore_o]}#{@md.classify.format}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.classify.relation \ + and @md.classify.relation=~/\S+/ + s="\n#{tr.relation}: #{Mx[:fa_underscore_o]}#{@md.classify.relation}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.classify.coverage \ + and @md.classify.coverage=~/\S+/ + s="\n#{tr.coverage}: #{Mx[:fa_underscore_o]}#{@md.classify.coverage}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.classify.keywords \ + and @md.classify.keywords=~/\S+/ + s="\n#{tr.keywords}: #{Mx[:fa_underscore_o]}#{@md.classify.keywords}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + s="#{Mx[:fa_bold_o]}Version Information#{Mx[:fa_bold_c]}" + meta << meta_para(s) + if defined? @md.fns \ + and @md.fns=~/\S+/ + s="#{tr.sourcefile}: #{Mx[:fa_underscore_o]}#{@md.fns}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.file_encoding \ + and @md.file_encoding=~/\S+/ + s="Filetype: #{Mx[:fa_underscore_o]}#{@md.file_encoding}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.dgst \ + and @md.dgst.class==Array + s="Source Digest: #{@md.dgst[0]} #{Mx[:fa_underscore_o]}#{@md.dgst[1]}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + if defined? @md.dgst_skin \ + and @md.dgst_skin.class==Array + s="Skin Digest: #{@md.dgst_skin[0]} #{Mx[:fa_underscore_o]}#{@md.dgst_skin[1]}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + end + s="#{Mx[:fa_bold_o]}Generated#{Mx[:fa_bold_c]}" + meta << meta_para(s) + s="#{tr.last_generated}: #{Mx[:fa_underscore_o]}#{Time.now}#{Mx[:fa_underscore_c]}" + meta << meta_para(s) + s="#{tr.sisu_version}: #{Mx[:fa_underscore_o]}#{@md.sisu_version[:project]}#{Mx[:fa_underscore_c]} #{Mx[:fa_underscore_o]}#{@md.sisu_version[:version]}#{Mx[:fa_underscore_c]} of #{@md.sisu_version[:date_stamp]} (#{@md.sisu_version[:date]})" + meta << meta_para(s) + meta + end + def build_lines(type='') + data=@data + data.each do |line| + if line =~/\S/ \ + and line !~/^code\{|^\}code/ \ + and line.class != Hash + line.gsub!(/\s\s/,"#{Mx[:nbsp]*2}") + line.gsub!(/#{Mx[:nbsp]}\s/,"#{Mx[:nbsp]*2}") + line.gsub!(/^/,"#{Mx[:gr_o]}codeline#{Mx[:gr_c]}") if type=='code' # REMOVE try sort for texpdf special case + if line =~/(?:https?|file|ftp):\/\/\S+$/ + line.gsub!(/\s*$/," #{Mx[:br_nl]}") + else line.gsub!(/\s*$/,"#{Mx[:br_nl]}") #unless type=='code' + end + if @@flag['code']; @@counter+=1 + else + end + elsif line =~/^\s*$/ + line.gsub!(/\s*$/,"#{Mx[:br_nl]}") + end + end + data + end + end + class Structure # this must happen early + def initialize(md,dob) + @md,@dob=md,dob + end + def structure + structure_markup_normalize + structure_markup + @dob + end + def structure_markup #build structure where structure provided only in meta header + @dob=if @dob.is =~/para/ \ + and @dob.indent !~/[1-9]/ \ + and not @dob.bullet_ + @dob=case @dob.obj + when /^#{@md.lv1}/ + h={:lv=>'A',:ln=>1} + SiSU_document_structure::Object_heading.new.heading(h,@dob) + when /^#{@md.lv2}/ + h={:lv=>'B',:ln=>2} + SiSU_document_structure::Object_heading.new.heading(h,@dob) + when /^#{@md.lv3}/ + h={:lv=>'C',:ln=>3} + SiSU_document_structure::Object_heading.new.heading(h,@dob) + when /^#{@md.lv4}/ + h={:lv=>'1',:ln=>4} + SiSU_document_structure::Object_heading.new.heading(h,@dob) + when /^#{@md.lv5}/ + h={:lv=>'2',:ln=>5} + SiSU_document_structure::Object_heading.new.heading(h,@dob) + when /^#{@md.lv6}/ + h={:lv=>'3',:ln=>6} + SiSU_document_structure::Object_heading.new.heading(h,@dob) + else @dob + end + else @dob + end + @dob + end + def structure_markup_normalize #needs a bit of thinking + dob=if @md.markup_version.determined < 0.38 #%convert internal representation, consider making 0.38 structure default ([A-C1-6] instead of [1-9]), requires downstream changes + @dob.gsub!(/^[456]~/,'!_') + @dob.gsub!(/^3~(\S+)/,"#{Mx[:lv_o]}6:\\1#{Mx[:lv_c]}") + @dob.gsub!(/^3~\s+/,"#{Mx[:lv_o]}6:#{Mx[:lv_c]}") + @dob.gsub!(/^2~(\S+)/,"#{Mx[:lv_o]}5:\\1#{Mx[:lv_c]}") + @dob.gsub!(/^2~\s+/,"#{Mx[:lv_o]}5:#{Mx[:lv_c]}") + @dob.gsub!(/^1~(\S+)/,"#{Mx[:lv_o]}4:\\1#{Mx[:lv_c]}") + @dob.gsub!(/^1~\s+/,"#{Mx[:lv_o]}4:#{Mx[:lv_c]}") + @dob.gsub!(/^:?C~(\S+)/,"#{Mx[:lv_o]}3:\\1#{Mx[:lv_c]}") + @dob.gsub!(/^:?C~\s+/,"#{Mx[:lv_o]}3:#{Mx[:lv_c]}") + @dob.gsub!(/^:?B~(\S+)/,"#{Mx[:lv_o]}2:\\1#{Mx[:lv_c]}") + @dob.gsub!(/^:?B~\s+/,"#{Mx[:lv_o]}2:#{Mx[:lv_c]}") + @dob.gsub!(/^:?A~(\S+)/,"#{Mx[:lv_o]}1:\\1#{Mx[:lv_c]}") + @dob.gsub!(/^:?A~\s+/,"#{Mx[:lv_o]}1:#{Mx[:lv_c]}") + @dob=if @dob =~/^@(?:level|markup):\s/ + @dob.gsub!(/3/,'6') + @dob.gsub!(/2/,'5') + @dob.gsub!(/1/,'4') + @dob.gsub!(/:?C/,'3') + @dob.gsub!(/:?B/,'2') + @dob.gsub!(/:?A/,'1') + @dob + else @dob + end + else @dob + end + end + def structure_marks + t_o=if @md.markup_version.determined < 0.38 + @t_o.gsub!(/^1~(\S+)/,"#{Mx[:lv_o]}1:\\1#{Mx[:lv_c]}") + @t_o.gsub!(/^1~\s+/,"#{Mx[:lv_o]}1:#{Mx[:lv_c]}") + @t_o.gsub!(/^2~(\S+)/,"#{Mx[:lv_o]}2:\\1#{Mx[:lv_c]}") + @t_o.gsub!(/^2~\s+/,"#{Mx[:lv_o]}2:#{Mx[:lv_c]}") + @t_o.gsub!(/^3~(\S+)/,"#{Mx[:lv_o]}3:\\1#{Mx[:lv_c]}") + @t_o.gsub!(/^3~\s+/,"#{Mx[:lv_o]}3:#{Mx[:lv_c]}") + @t_o.gsub!(/^4~(\S+)/,"#{Mx[:lv_o]}4:\\1#{Mx[:lv_c]}") + @t_o.gsub!(/^4~\s+/,"#{Mx[:lv_o]}4:#{Mx[:lv_c]}") + @t_o.gsub!(/^5~(\S+)/,"#{Mx[:lv_o]}5:\\1#{Mx[:lv_c]}") + @t_o.gsub!(/^5~\s+/,"#{Mx[:lv_o]}5:#{Mx[:lv_c]}") + @t_o.gsub!(/^6~(\S+)/,"#{Mx[:lv_o]}6:\\1#{Mx[:lv_c]}") + @t_o.gsub!(/^6~\s+/,"#{Mx[:lv_o]}6:#{Mx[:lv_c]}") + @t_o.gsub!(/^[789]~/,'!_') + @t_o + else @t_o + end + end + end + class OCN + def initialize(md,data) + @md,@data=md,data + end + def ocn #and auto segment numbering increment + data=@data + @o_array=[] + node=ocn=ocn_dv=ocn_sp=ocnh=ocnh1=ocnh2=ocnh3=ocnh4=ocnh5=ocnh6=ocno=ocnp=ocnt=ocnc=ocng=ocni=ocnm=ocnu=ocnk=nm=0 # h heading, o other, t table, g group, i image + node_count_flag=false + regex_exclude_ocn_and_node = /#{Rx[:meta]}|^@\S+?:\s|^4~endnotes|^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^\^~ |<:e[:_]\d+?>|^<:\#|<:- |<[:!]!4|
+ if dob.is=='heading' + ln=case dob.lv + when 'A'; 1 + when 'B'; 2 + when 'C'; 3 + when '1'; 4 + when '2'; 5 + when '3'; 6 + when '4'; 7 + when '5'; 8 + when '6'; 9 + end + end + if not dob.obj =~/<:#>|~#|-#/ \ + or not dob.toc_ # fix this no longer in dob.obj + ocn+=1 + if dob.is=='heading' \ + and (ln.to_s =~/^[1-9]/ \ + or ln.to_s =~@md.lv1 \ + or ln.to_s =~@md.lv2 \ + or ln.to_s =~@md.lv3 \ + or ln.to_s =~@md.lv4 \ + or ln.to_s =~@md.lv5 \ + or ln.to_s =~@md.lv6) + ocnh+=1 + if ln==1 \ + or ln=~@md.lv1; ocnh1+=1 #heading + node1="1:#{ocnh1};#{ocn}" + node,ocn_sp,parent=node1,"h#{ocnh}",0 #FIX + elsif ln==2 \ + or ln=~@md.lv2; ocnh2+=1 + node2="2:#{ocnh2};#{ocn}" + node,ocn_sp,parent=node2,"h#{ocnh}",node1 + elsif ln==3 \ + or ln=~@md.lv3; ocnh3+=1 + node3="3:#{ocnh3};#{ocn}" + node,ocn_sp,parent=node3,"h#{ocnh}",node2 + elsif ln==4 \ + or ln=~@md.lv4; ocnh4+=1 + node4="4:#{ocnh4};#{ocn}" + node,ocn_sp,parent=node4,"h#{ocnh}",node3 + elsif ln==5 \ + or ln=~@md.lv5; ocnh5+=1 + node5="5:#{ocnh5};#{ocn}" + node,ocn_sp,parent=node5,"h#{ocnh}",node4 + elsif ln==6 \ + or ln=~@md.lv6; ocnh6+=1 + node6="6:#{ocnh6};#{ocn}" + node,ocn_sp,parent=node6,"h#{ocnh}",node5 + end + else + ocno+=1 + if dob.is=='table' + ocnt+=1 + ocn_sp,parent="t#{ocnt}",node + elsif dob.is=='code' + ocnc+=1 + ocn_sp,parent="c#{ocnc}",node + elsif dob.is=~/^(?:group|block|alt|verse)/ + ocng+=1 #group, poem + ocn_sp,parent="g#{ocng}",node + elsif dob.is=~/image|#{Mx[:lnk_o]}\S+?\.(?:png|jpg|gif)\s+/m + ocni+=1 + ocn_sp,parent="i#{ocni}",node + else ocnp+=1 #paragraph + ocn_sp,parent="p#{ocnp}",node + end + end + if dob.is=='heading' + dob.ln,dob.node,dob.ocn,dob.odv,dob.osp,dob.parent=ln,node,ocn,ocn_dv,ocn_sp,parent + else + unless dob.of=~/meta|comment|layout/ + dob.ocn,dob.odv,dob.osp,dob.parent=ocn,ocn_dv,ocn_sp,parent + end + end + else ocnu+=1 + dob.obj.gsub!(/#{Mx[:fa_o]}~##{Mx[:fa_c]}/,'') if dob.obj + ocn_dv,ocn_sp="u#{ocnu}","u#{ocnu}" + dob.ocn,dob.odv,dob.osp=ocn,ocn_dv,ocn_sp + end + h + elsif dob.obj=~/#{Mx[:pa_non_object_no_heading]}/ + dob.obj.gsub!(/#{Mx[:pa_non_object_no_heading]}/,'') + if dob.is=='para' + h={:obj=>dob.obj,:ocn_=>false,:ocn=>nil} + dob=SiSU_document_structure::Object_para.new.paragraph(h,dob) + elsif dob.is=='heading' + h={:obj=>dob.obj,:ocn_=>false,:ocn=>nil,:toc_=>true} + dob=SiSU_document_structure::Object_heading.new.heading(h,dob) + end + elsif dob.obj=~/#{Mx[:pa_non_object_dummy_heading]}/ + dob.obj.gsub!(/#{Mx[:pa_non_object_dummy_heading]}/,'') + if dob.is=='para' + h={:obj=>dob.obj,:ocn_=>false,:ocn=>nil} + dob=SiSU_document_structure::Object_para.new.paragraph(h,dob) + elsif dob.is=='heading' + h={:obj=>dob.obj,:ocn_=>false,:ocn=>nil,:toc_=>false} + dob=SiSU_document_structure::Object_heading.new.heading(h,dob) + end + else dob + end + dob.obj.gsub!(/\n\n/,"\n") if dob.is =~/(?:code|verse|alt|group|block)/ #newlines taken out + @o_array << dob + end + @o_array + end + end + class XML + def initialize(md,data) + @data,@md=data,md + end + def dom + @s=['0', + 'A', + 'B', + 'C', + '1', + '2', + '3' + ] + @sp=' ' + tuned_file=structure_build + tuned_file + end + def structure_build + data=@data + tuned_file=[] + hs=[0,false,false,false] + t={:lv =>@s[0],:status =>'open'} + tuned_file << tags(t) + if @md.cmd =~/V/ + puts "\nXML sisu structure outline --->\n" + puts "<#{@s[0]}>" + end + data.each_with_index do |o,i| + if o.is =~/^heading/ + case o.ln + when 1 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.cmd =~/V/ + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs=[1,true,false,false] + when 2 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.cmd =~/V/ + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs=[2,true,true,false] + when 3 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.cmd =~/V/ + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs=[3,true,true,true] + when 4 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.cmd =~/V/ + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs[0]=4 + when 5 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.cmd =~/V/ + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs[0]=5 + when 6 + tuned_file << tag_close(o.ln,hs) + tuned_file << tag_open(o,@s) + if @md.cmd =~/V/ + puts_tag_close(o.ln,hs) + puts_tag_open(o,@s) + end + hs[0]=6 + end + end + tuned_file << o + end + puts_tag_close(0,hs) if @md.cmd =~/V/ + tuned_file << tag_close(0,hs) + tuned_file.flatten! + tuned_file + end + def tags(o) + tag=if o[:status]=='open' + %{<#{o[:lv]} id="#{o[:node]}">} + else "" + end + ln=case o[:lv] + when 'A'; 1 + when 'B'; 2 + when 'C'; 3 + when '1'; 4 + when '2'; 5 + when '3'; 6 + when '4'; 7 + when '5'; 8 + when '6'; 9 + end + h={:tag=>tag,:node=>o[:node],:lv =>o[:lv],:ln =>ln,:status =>o[:status]} + SiSU_document_structure::Object_structure.new.xml_dom(h) #downstream code utilise else ignore like comments + end + def tag_open(o,tag) + t={:lv =>tag[o.ln],:node =>o.node,:status =>'open'} + t_o=tags(t) + t_o + end + def tag_close(lev,hs) + ary=[] + case hs[0] + when 1 + if (lev <= 1) and hs[1] + t={:lv =>@s[1],:status =>'close'} + ary << tags(t) + end + if (lev==0) + t={:lv =>@s[0],:status =>'close'} + ary << tags(t) + end + when 2 + if (lev <= 2) and hs[2] + t={:lv =>@s[2],:status =>'close'} + ary << tags(t) + end + if (lev <= 1) and hs[1] + t={:lv =>@s[1],:status =>'close'} + ary << tags(t) + end + if (lev==0) + t={:lv =>@s[0],:status =>'close'} + ary << tags(t) + end + when 3 + if (lev <= 3) and hs[3] + t={:lv =>@s[3],:status =>'close'} + ary << tags(t) + end + if (lev <= 2) and hs[2] + t={:lv =>@s[2],:status =>'close'} + ary << tags(t) + end + if (lev <= 1) and hs[1] + t={:lv =>@s[1],:status =>'close'} + ary << tags(t) + end + if (lev==0) + t={:lv =>@s[0],:status =>'close'} + ary << tags(t) + end + when 4 + if (lev <= 4) + t={:lv =>@s[4],:status =>'close'} + ary << tags(t) + end + if (lev <= 3) and hs[3] + t={:lv =>@s[3],:status =>'close'} + ary << tags(t) + end + if (lev <= 2) and hs[2] + t={:lv =>@s[2],:status =>'close'} + ary << tags(t) + end + if (lev <= 1) and hs[1] + t={:lv =>@s[1],:status =>'close'} + ary << tags(t) + end + if (lev==0) + t={:lv =>@s[0],:status =>'close'} + ary << tags(t) + end + when 5 + if (lev <= 5) + t={:lv =>@s[5],:status =>'close'} + ary << tags(t) + end + if (lev <= 4) + t={:lv =>@s[4],:status =>'close'} + ary << tags(t) + end + if (lev <= 3) and hs[3] + t={:lv =>@s[3],:status =>'close'} + ary << tags(t) + end + if (lev <= 2) and hs[2] + t={:lv =>@s[2],:status =>'close'} + ary << tags(t) + end + if (lev <= 1) and hs[1] + t={:lv =>@s[1],:status =>'close'} + ary << tags(t) + end + if (lev==0) + t={:lv =>@s[0],:status =>'close'} + ary << tags(t) + end + when 6 + if (lev <= 6) + t={:lv =>@s[6],:status =>'close'} + ary << tags(t) + end + if (lev <= 5) + t={:lv =>@s[5],:status =>'close'} + ary << tags(t) + end + if (lev <= 4) + t={:lv =>@s[4],:status =>'close'} + ary << tags(t) + end + if (lev <= 3) and hs[3] + t={:lv =>@s[3],:status =>'close'} + ary << tags(t) + end + if (lev <= 2) and hs[2] + t={:lv =>@s[2],:status =>'close'} + ary << tags(t) + end + if (lev <= 1) and hs[1] + t={:lv =>@s[1],:status =>'close'} + ary << tags(t) + end + if (lev==0) + t={:lv =>@s[0],:status =>'close'} + ary << tags(t) + end + end + ary + end + def puts_tag_open(o,tag) + puts %{#{@sp*o.ln}<#{tag[o.ln]} id="#{o.node}">} + end + def puts_tag_close(lev,hs) + case hs[0] + when 1 + puts "#{@sp*1}" if (lev <= 1) and hs[1] + puts "" if (lev==0) + when 2 + puts "#{@sp*2}" if (lev <= 2) and hs[2] + puts "#{@sp*1}" if (lev <= 1) and hs[1] + puts "" if (lev==0) + when 3 + puts "#{@sp*3}" if (lev <= 3) and hs[3] + puts "#{@sp*2}" if (lev <= 2) and hs[2] + puts "#{@sp*1}" if (lev <= 1) and hs[1] + puts "" if (lev==0) + when 4 + puts "#{@sp*4}" if (lev <= 4) + puts "#{@sp*3}" if (lev <= 3) and hs[3] + puts "#{@sp*2}" if (lev <= 2) and hs[2] + puts "#{@sp*1}" if (lev <= 1) and hs[1] + puts "" if (lev==0) + when 5 + puts "#{@sp*5}" if (lev <= 5) + puts "#{@sp*4}" if (lev <= 4) + puts "#{@sp*3}" if (lev <= 3) and hs[3] + puts "#{@sp*2}" if (lev <= 2) and hs[2] + puts "#{@sp*1}" if (lev <= 1) and hs[1] + puts "" if (lev==0) + when 6 + puts "#{@sp*6}" if (lev <= 6) + puts "#{@sp*5}" if (lev <= 5) + puts "#{@sp*4}" if (lev <= 4) + puts "#{@sp*3}" if (lev <= 3) and hs[3] + puts "#{@sp*2}" if (lev <= 2) and hs[2] + puts "#{@sp*1}" if (lev <= 1) and hs[1] + puts "" if (lev==0) + end + end + end +end +__END__ diff --git a/lib/sisu/v3/dal_endnotes.rb b/lib/sisu/v3/dal_endnotes.rb new file mode 100644 index 00000000..95176516 --- /dev/null +++ b/lib/sisu/v3/dal_endnotes.rb @@ -0,0 +1,125 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_endnotes + class Endnotes + def initialize(md,data,endnote_array=nil) + @md,@data,@endnote_array=md,data,endnote_array + @endnote_counter,@endnote_counter_asterisk,@endnote_counter_dag=1,1,1 + end + def endnotes + data=@data + @tuned_file=[] + endnote_no,endnote_ref=1,1 + data.each do |dob| + # manually numbered endnotes --> + if @md.mod.inspect =~/--no-asterisk|--no-annotate/ + dob.obj.gsub!(/#{Mx[:en_b_o]}\s.+?#{Mx[:en_b_c]}/,'') + end + if @md.mod.inspect =~/--no-dagger|--no-annotate/ + dob.obj.gsub!(/#{Mx[:en_b_o]}[+]\s.+?#{Mx[:en_b_c]}/,'') + end + if defined? dob.obj \ + and defined? dob.is \ + and dob.is !~/^code/ + case dob.obj # auto-numbered endnotes --> + when /#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}[*+]\s+.+?#{Mx[:en_b_c]}/ + dob.obj.gsub!(/\s*(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/,' \1') # required 2003w31 + word_mode=dob.obj.scan(/\S+/m) + word_mode=endnote_call_number(word_mode) + dob.obj=word_mode.join(' ') + endnote_ref+=1 + when /~\^(?:\s|$)|<:e>/ #%note inserts endnotes previously gathered from /^(|[-~]\{{3})/ (in earlier loop) + word_mode=dob.obj.scan(/\S+/m) + word_mode=endnote_call_number(word_mode) + dob.obj=word_mode.join(' ') + endnote_ref+=1 + end + end + @tuned_file << dob + end + @endnote_counter,@endnote_counter_asterisk,@endnote_counter_dag=1,1,1 + @tuned_file=@tuned_file.flatten + end + def endnote_call_number(words) + words.each do |word| + case word + when /#{Mx[:en_a_o]}/ + unless word =~/#{Mx[:en_a_o]}[*+]+/ + word.gsub!(/#{Mx[:en_a_o]}/,"#{Mx[:en_a_o]}#{@endnote_counter} ") + @endnote_counter+=1 + end + when /#{Mx[:en_b_o]}/ + if word =~/#{Mx[:en_b_o]}[+]/ + word.gsub!(/#{Mx[:en_b_o]}[+]/,"#{Mx[:en_b_o]}\+#{@endnote_counter_dag} ") + @endnote_counter_dag+=1 + else + word.gsub!(/#{Mx[:en_b_o]}[*]?/,"#{Mx[:en_b_o]}\*#{@endnote_counter_asterisk} ") + @endnote_counter_asterisk+=1 + end + when /~\^|<:e>/ + if @endnote_array + word.gsub!(/~\^|<:e>/,"#{@endnote_array[@endnote_counter-1]}") + @endnote_counter+=1 + end + end + end + end + end +end +__END__ diff --git a/lib/sisu/v3/dal_expand_insertions.rb b/lib/sisu/v3/dal_expand_insertions.rb new file mode 100644 index 00000000..f7adb76e --- /dev/null +++ b/lib/sisu/v3/dal_expand_insertions.rb @@ -0,0 +1,198 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_insertions + class Insertions + def initialize(md,data) + @md,@data=md,data + end + def output_filetypes_in_cmd(cmd_shortcut,source=nil) #make list of file types in shortcut command (as configured), e.g. when sisu -3 is used + cf_defaults=SiSU_Env::Info_processing_flag.new + cmd_list=case cmd_shortcut.inspect + when /0/; cf_defaults.cf_0 + when /1/; cf_defaults.cf_1 + when /2/; cf_defaults.cf_2 + when /3/; cf_defaults.cf_3 + when /4/; cf_defaults.cf_4 + when /5/; cf_defaults.cf_5 + end + file_type_names={} + file_type_names[:gen],file_type_names[:src]=[],[] + file_type_names[:gen] <<= if cmd_list =~ /y/; 'sisu_manifest.html' + end + file_type_names[:gen] <<= if cmd_list =~ /h/; ['toc.html', 'doc.html'] + end + file_type_names[:gen] <<= if cmd_list =~ /e/; ['.epub'] + end + file_type_names[:gen] <<= if cmd_list =~ /p/; ['landscape.pdf', 'portrait.pdf'] + end + file_type_names[:gen] <<= if cmd_list =~ /o/; 'opendocument.odt' + end + file_type_names[:gen] <<= if cmd_list =~ /b/; 'scroll.xhtml' + end + file_type_names[:gen] <<= if cmd_list =~ /x/; 'sax.xml' + end + file_type_names[:gen] <<= if cmd_list =~ /X/; 'dom.xml' + end + file_type_names[:gen] <<= if cmd_list =~ /a/; 'plain.txt' + end + file_type_names[:gen] <<= if cmd_list =~ /g/; 'wiki.txt' + end + file_type_names[:gen] <<= if cmd_list =~ /w/; 'concordance.html' + end + file_type_names[:gen] <<= if cmd_list =~ /N/; 'digest.txt' + end + file_type_names[:src] <<= if source and cmd_shortcut =~ /s/; source + end + file_type_names[:src] <<= if cmd_shortcut =~ /S/; "#{source}.zip" + end + file_type_names[:gen]=file_type_names[:gen].flatten + file_type_names[:src]=file_type_names[:src].flatten + file_type_names + end + def expand_insertions? + data=@data + tuned_file,tuned_file_tmp=[],[] + data.each do |para| + if para !~/^%+\s/ \ + and para =~/\{(?:~\^\s+)?(.+?)\s\[(?:\d(?:[sS]*))\]\}(?:\.\.\/\S+?\/|\S+?\.ss[tm]\b)/ + txt,cmd,source,url_dir,note,manifest=nil,nil,nil,nil,nil,nil + @u=SiSU_Env::Info_env.new.url + if defined? @u.remote + if para =~/(.+?)\{(.+?)\s\[(\d[sS]*)\]\}((\S+?)\.ss[tm]\b)(.*)/m + pre,txt,cmd,source,url_dir,note="#{$1.strip} ",$2,$3,$4,$5,$6 + elsif para =~/\{(.+?)\s\[(\d[sS]*)\]\}((\S+?)\.ss[tm]\b)(.*)/ + pre,txt,cmd,source,url_dir,note='',$1,$2,$3,$4,$5 + end + manifest="#{pre}{#{txt} }#{@u.remote}/#{url_dir}/toc.html#{note}\n\n" + else + puts "error, does currently support relative paths (reltive paths were removed, as had problems for citation, and was not suited to all output types should possibly reconsider) #{__FILE__} #{__LINE__}" + if para =~/\{(?:~\^\s+)?(.+?)\s\[(\d[sS]*)\]\}\.\.\/(\S+?)\/(\s+#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]})?/ + txt,cmd,url_dir,note=$1,$2,$3,$4 + manifest="{ #{txt} }../#{url_dir}/toc.html#{note}\n\n" + end + end + tuned_file_tmp << manifest + output_filetypes=output_filetypes_in_cmd(cmd,source) + output_filetypes[:gen].each do |o_f| + describe = case o_f + when /sisu_manifest.html/; '~^ document manifest' + when /toc.html/; ' html, segmented text' + when /doc.html/; ' html, scroll, document in one' + when /\.epub/; ' epub' + when /landscape.pdf/; ' pdf, landscape' + when /portrait.pdf/; ' pdf, portrait' + when /opendocument.odt/; ' odf:odt, open document text' + when /scroll.xhtml/; ' xhtml scroll' + when /sax.xml/; ' xml, sax' + when /dom.xml/; ' xml, dom' + when /plain.txt/; ' plain text utf-8' + #when /manpage.1/; ' man, 1' + when /wiki.txt/; ' wiki text' + when /concordance.html/; ' concordance' + when /digest.txt/; ' dcc, document content certificate (digests)' + else nil + end + if describe + tuned_file_tmp << if @u.remote #to double space <:br> at beginning of entry + if describe =~/epub/ + "#{Mx[:nbsp]*4} { #{describe} }#{@u.remote}/epub/#{url_dir}#{o_f} " + elsif describe =~/^~\^ / + "#{Mx[:nbsp]*4} {#{describe} }#{@u.remote}/#{url_dir}/#{o_f} " + else "#{Mx[:nbsp]*4} { #{describe} }#{@u.remote}/#{url_dir}/#{o_f} " + end + else + if describe =~/epub/ + "#{Mx[:nbsp]*4} { #{describe} }../epub/#{url_dir}#{o_f} " + elsif describe =~/^~\^ / + "#{Mx[:nbsp]*4} {#{describe} }../#{url_dir}/#{o_f} " + else "#{Mx[:nbsp]*4} { #{describe} }../#{url_dir}/#{o_f} " + end + end + end + end + output_filetypes[:src].each do |o_f| + describe=case o_f + when /#{source}\.zip/; ' markup source (zipped) pod' + when /#{source}/; ' markup source text' + else nil + end + if describe + tuned_file_tmp << if @u.remote + x=if describe =~/zip/ + "#{Mx[:nbsp]*4} {#{describe} }#{@u.src_pod}/#{o_f} " + else "#{Mx[:nbsp]*4} {#{describe} }#{@u.src_txt}/#{o_f} " + end + else + x=if describe =~/zip/ + "#{Mx[:nbsp]*4} { #{describe} }../pod/#{o_f} " + else "#{Mx[:nbsp]*4} { #{describe} }../zip/#{o_f} " + end + end + end + end + tuned_file << 'group{' << tuned_file_tmp.join("\n") << '}group' + tuned_file_tmp=[] + else tuned_file << para + end + end + tuned_file + end + end +end +__END__ diff --git a/lib/sisu/v3/dal_hash_digest.rb b/lib/sisu/v3/dal_hash_digest.rb new file mode 100644 index 00000000..a78c54f1 --- /dev/null +++ b/lib/sisu/v3/dal_hash_digest.rb @@ -0,0 +1,155 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_hash + require "#{SiSU_lib}/shared_markup_alt.rb" #shared_markup_alt.rb + class Object_digest + def initialize(md,data,env=nil) + @md,@data,@env=md,data,env + @env ||=SiSU_Env::Info_env.new(@md.fns) + end + def object_digest + # 1. clean/stripped text without any markup, paragraph, headings etc. without endnotes + # 2. endnotes clean/stripped text digest only (there may be several endnotes within a paragraph) + # 3. whole object, text with markup and any endnotes, (question: with or without the endnote digests??? presumption better without, [however may be easier to check with?]) + # [digests should not include other digests] + data=@data + @tuned_file=[] + data.compact! + sha_ =(@env.digest.type=='sha256' ? true : false) + sha_ ? (require 'digest/sha2') : (require 'digest/md5') + data.each do |t_o| + unless t_o.obj.class==Array + t_o.obj.strip! + end + if t_o.of !~/structure|comment|layout/ \ + and t_o.ocn.class==Fixnum + if sha_ + for hash_class in [ Digest::SHA256 ] + @tuned_file << stamped(t_o,hash_class) + end + else + for hash_class in [ Digest::MD5 ] + @tuned_file << stamped(t_o,hash_class) + end + end + else @tuned_file << t_o unless t_o.nil? + end + end + @tuned_file=@tuned_file.flatten + #use md5 or to create hash of each dal object including ocn, & add into to each dal object + end + def endnote_digest(data) + t_o_bit=[] + data.each do |en_plus| + t_o_bit <<= case en_plus + when /#{Mx[:en_a_o]}|#{Mx[:en_b_o]}/ + if en_plus =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/ + t_o_txt,en_open,en_txt,en_close=/(.*?)(#{Mx[:en_a_o]}|#{Mx[:en_b_o]})(.+?)(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m.match(en_plus)[1..4] + stripped_en=SiSU_text_representation::Alter.new(en_txt).strip_clean_of_markup + digest_en_strip=if @env.digest.type =~/sha256/ + Digest::SHA256.hexdigest(stripped_en) + else + Digest::MD5.hexdigest(stripped_en) + end + t_o_txt + en_open + en_txt + Mx[:id_o] + digest_en_strip + Mx[:id_c] + en_close + else puts "Error Exception - problem encountered with:\n#{en_plus}" #arbitrary exception, tidy up + end + else en_plus + end + end + t_o_bit.join + end + def stamped(t_o,hash_class) #decide what hash information is most useful, is compromise necessary? + t_o.obj=SiSU_text_representation::Alter.new(t_o).strip_clean_of_extra_spaces + t_obj=t_o.inspect.sub(/:0x[0-9a-f]{8}\s/,': ') + stripped=SiSU_text_representation::Alter.new(t_o).strip_clean_of_markup + markup=SiSU_text_representation::Alter.new(t_o).semi_revert_markup + digests=SiSU_text_representation::Modified_text_plus_Hash_digest.new(@md,t_o).composite.dgst + unless t_o.is=='code' + case t_o.obj + when /#{Mx[:en_a_o]}[\d*+]+\s+.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}[*+]\d+\s+.+?#{Mx[:en_b_c]}/m + en_and_t_o,en_and_t_o_digest=[],[] + t_o.obj.gsub!(/\s*(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m,' \1') #watch + t_o_plus_en=t_o.obj.scan(/.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/m) + t_o_tail=if t_o.obj =~/(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+([\s\S]+)/m + /(?:.*?#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|.*?#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})+.*/m.match(t_o.obj)[1] + else '' + end + t_o_plus_en << t_o_tail + en_and_t_o_digest << endnote_digest(t_o_plus_en) + t_o_new=en_and_t_o_digest.join(' ') + #@tuned << t_o_new + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless t_o.nil? + else #@tuned << t_o + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless t_o.nil? + end + else #@tuned << t_o + Mx[:id_o] + digest_strip + ':' + digest_all + Mx[:id_c] unless t_o.nil? + end + t_o #KEEP intact + end + def strip_clean_extra_spaces(s) # dal output tuned + s=s.dup + s=s.gsub(/[ ]+([,.;:?](?:$|\s))/,'\1') unless s =~/#{Mx[:en_a_o]}|#{Mx[:en_b_o]}/ + s=s.gsub(/ [ ]+/,' ') + s=s.gsub(/^ [ ]+/,'') + s=s.gsub(/ [ ]+$/,'') + s=s.gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2') + s=s.gsub(/((?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})')[ ]+(s )/,'\1\2') + end + end +end +__END__ diff --git a/lib/sisu/v3/dal_idx.rb b/lib/sisu/v3/dal_idx.rb new file mode 100644 index 00000000..7c00be3c --- /dev/null +++ b/lib/sisu/v3/dal_idx.rb @@ -0,0 +1,357 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_book_index + class Book_index + def initialize(md,data,env=nil) + @md,@data,@env=md,data,env + @rgx_idx=/#{Mx[:idx_o]}(?:.+?)#{Mx[:idx_c]}\s*/ + @rgx_idx_ocn_seg=/(.+?)~(\d+)~(\S+)/ + @rgx_idx_ocn=/(.+?)~(\d+)/ + @env ||=SiSU_Env::Info_env.new(@md.fns) + end + def indexing_song + data=@data + data,sisu_markup_idx_rel,sisu_markup_idx_rel_html_seg,html_idx,xhtml_idx=extract_book_index(data) + data=clean_and_insert_index(data,sisu_markup_idx_rel_html_seg) + [data,sisu_markup_idx_rel,sisu_markup_idx_rel_html_seg,html_idx,xhtml_idx] + end + def extract_book_index(data) + tuned_file=[] + idx_array=[] + data.each do |dob| + if dob.is =~/heading/ \ + and dob.ln==4 + @seg=dob.name + end + idx_array << "#{dob.idx}~#{dob.ocn}~#{@seg}" if defined? dob.idx and not (dob.idx.nil? or dob.idx.empty?) + tuned_file << dob if dob + end + idx_array=construct_idx_array(idx_array) if idx_array.length > 0 + if idx_array.length > 0 + the_idx=construct_book_index(idx_array) + sisu_markup_idx_rel,sisu_markup_idx_rel_html_seg,html_idx,xhtml_idx=nil,nil,nil,nil + if @md.book_idx + idx=index(the_idx) + sisu_markup_idx_rel,sisu_markup_idx_rel_html_seg,html_idx,xhtml_idx=idx[:sst_rel],idx[:sst_rel_html_seg],idx[:html],idx[:xhtml] + end + end + [tuned_file,sisu_markup_idx_rel,sisu_markup_idx_rel_html_seg,html_idx,xhtml_idx] + end + def construct_idx_array(idx_array) + idx_lst=[] + idx_array.each do |idx| + idx_list,ocn,seg=@rgx_idx_ocn_seg.match(idx)[1..3] + idx_lst <<=if idx_list =~/;/ + g=idx_list.scan(/[^;]+/) + idxl=[] + g.each do |i| + i.strip! + idxl << { :rough_idx => i, :ocn => ocn, :seg => seg } + end + idxl + else { :rough_idx => idx_list, :ocn => ocn, :seg => seg } + end + end + idx_lst.flatten! + idx_lst + end + def construct_book_index(idx_array) + the_idx={} + idx_array.each do |idx| + idx_lst=idx[:rough_idx].scan(/[^|:]+/) + idx_lst[0].strip! + if idx_lst[0] =~/.+?\+\d+/ + use,plus=/(.+?)\+(\d+)/.match(idx_lst[0])[1,2] + else use=idx_lst[0] + end + use=use[0].chr.capitalize + use[1,use.length] + the_idx[use]={} unless the_idx[use] and defined? the_idx[use] + idx_lst.each do |i| + i.strip! + i,r=/(.+?)\+(\d+)/.match(i)[1,2] if i =~/.+?\+\d+/ + x=if idx_lst.length==1 or idx_lst[0].gsub(/\+\d+/,'')==i + the_idx[use]['term_node_lev1']=[] unless the_idx[use]['term_node_lev1'] and defined? the_idx[use]['term_node_lev1'] + x=if r + the_idx[use]['term_node_lev1'] << { :ocn => idx[:ocn], :range => "#{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}", :seg => idx[:seg] } + "#{i} #{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}" + else + the_idx[use]['term_node_lev1'] << { :ocn => idx[:ocn], :seg => idx[:seg] } + "#{i} #{idx[:ocn]}" + end + else + the_idx[use]['term_node_lev2']={} unless the_idx[use]['term_node_lev2'] and defined? the_idx[use]['term_node_lev2'] + the_idx[use]['term_node_lev2'][i]=[] unless the_idx[use]['term_node_lev2'][i] and defined? the_idx[use]['term_node_lev2'][i] + x=if r + the_idx[use]['term_node_lev2'][i] << { :ocn => idx[:ocn], :range => "#{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}", :seg => idx[:seg] } + "#{idx_lst[0]}:#{i} #{idx[:ocn]}-#{idx[:ocn].to_i+r.to_i}" + else + the_idx[use]['term_node_lev2'][i] << { :ocn => idx[:ocn], :seg => idx[:seg] } + "#{idx_lst[0]}:#{i} #{idx[:ocn]}" + end + end + end + end + the_idx=the_idx.sort + the_idx + end + def clean_xml(str) + str.gsub!(/&/,'&') + str.gsub!(/\(/,'(') + str.gsub!(/\)/,')') + str.gsub!(/\*/,'*') + str.gsub!(/\+/,'+') + str.gsub!(/,/,',') + str + end + def index(the_idx) + @x=1 + idx={} + idx[:sst_rel_html_seg],idx[:sst_rel],idx[:html],idx[:xhtml]=[],[],[],[] + h={:obj=>Mx[:br_page]} + o=SiSU_document_structure::Object_layout.new.break(h) + idx[:sst_rel_html_seg] << o + idx[:sst_rel] << o + h={:lv=>'2',:name=>'index',:obj=>"Index"} + o=SiSU_document_structure::Object_heading.new.heading(h) + idx[:sst_rel_html_seg] << o + idx[:sst_rel] << o + h={:lv=>'4',:name=>'idx',:obj=>" [Index] #{Mx[:pa_non_object_dummy_heading]}"} + o=SiSU_document_structure::Object_heading.new.heading(h) + idx[:sst_rel_html_seg] << o + idx[:sst_rel] << o + alph=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] + idx[:html] << '

' + idx[:xhtml] << '

' + alph.each do |x| + if x =~/[0-9]/ + idx[:html] << '' + idx[:xhtml] << '' + else + idx[:html] << %{#{x}, } + idx[:xhtml] << %{#{x}, } + end + end + idx[:html] << '

' + idx[:xhtml] << '

' + letter=alph.shift + idx[:html] << %{\n

} + idx[:xhtml] << %{\n

0 - 9

} + the_idx.each do |i| + i.each do |x| + if x.class==String + f=/^(\S)/.match(x)[1] + if letter < f + while letter < f + if alph.length > 0 + letter=alph.shift + idx[:html] << %{\n

#{letter}

} + idx[:xhtml] << %{\n

#{letter}

} + else break + end + end + end + idx[:sst_rel_html_seg] << %{\n\n#{Mx[:fa_bold_o]}#{x},#{Mx[:fa_bold_c]} } + idx[:sst_rel] << %{\n\n#{Mx[:fa_bold_o]}#{x},#{Mx[:fa_bold_c]} } + aname=x.gsub(/\s+/,'_') + idx[:html] << %{\n

#{x}, } + c=clean_xml(x.dup) + idx[:xhtml] << %{\n

#{c}, } + @o=idx[:sst_rel_html_seg].index(idx[:sst_rel_html_seg].last) + @t=idx[:sst_rel].index(idx[:sst_rel].last) + @q=idx[:html].index(idx[:html].last) + @r=idx[:xhtml].index(idx[:xhtml].last) + print "\n" + x + ', ' if @md.cmd =~/V/ + elsif x.class==Array + p 'array error? -->' + print x + elsif x.class==Hash + if x['term_node_lev1'].class==Array + x['term_node_lev1'].each do |a| + if a[:range] + idx[:sst_rel_html_seg][@o]=idx[:sst_rel_html_seg][@o] + %{#{Mx[:lnk_o]}#{a[:range]}#{Mx[:lnk_c]}#{Mx[:rel_o]}/#{a[:seg]}.html##{a[:ocn]}#{Mx[:rel_c]}, } + idx[:sst_rel][@t]=idx[:sst_rel][@t] + %{#{Mx[:lnk_o]}#{a[:range]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{a[:ocn]}#{Mx[:rel_c]}, } + idx[:html][@q]=idx[:html][@q] + %{#{a[:range]}, } + idx[:xhtml][@q]=idx[:xhtml][@q] + %{#{a[:range]}, } + print a[:range] + ', ' if @md.cmd =~/V/ + elsif a[:ocn] + idx[:sst_rel_html_seg][@o]=idx[:sst_rel_html_seg][@o] + %{#{Mx[:lnk_o]}#{a[:ocn]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{a[:seg]}.html##{a[:ocn]}#{Mx[:rel_c]}, } + idx[:sst_rel][@t]=idx[:sst_rel][@t] + %{#{Mx[:lnk_o]}#{a[:ocn]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{a[:ocn]}#{Mx[:rel_c]}, } + idx[:html][@q]=idx[:html][@q] + %{#{a[:ocn]}, } + idx[:xhtml][@q]=idx[:xhtml][@q] + %{#{a[:ocn]}, } + print a[:ocn] + ', ' if @md.cmd =~/V/ + else p 'error' + end + end + idx[:html][@q]=idx[:html][@q] + '

' + idx[:xhtml][@r]=idx[:xhtml][@r] + '

' + end + if x['term_node_lev2'] + m=x['term_node_lev2'] + m=m.sort + m.each do |k,y| + if k !~/term_node_lev1/ + idx[:sst_rel_html_seg][@o]=idx[:sst_rel_html_seg][@o] + %{#{k}, } + idx[:sst_rel][@t]=idx[:sst_rel][@t] + %{#{k}, } + idx[:html][@q]=idx[:html][@q] + %{\n

#{k}, } + c=clean_xml(k.dup) + idx[:xhtml][@r]=idx[:xhtml][@r] + %{\n

#{c}, } + print "\n\t" + k + ', ' if @md.cmd =~/V/ + y.each do |z| + if z[:range] + idx[:sst_rel_html_seg][@o]=idx[:sst_rel_html_seg][@o] + %{#{Mx[:lnk_o]}#{z[:range]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{z[:seg]}.html##{z[:ocn]}#{Mx[:rel_c]}, } + idx[:sst_rel][@t]=idx[:sst_rel][@t] + %{#{Mx[:lnk_o]}#{z[:range]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{z[:ocn]}#{Mx[:rel_c]}, } + idx[:html][@q]=idx[:html][@q] + %{#{z[:range]}, } + idx[:xhtml][@q]=idx[:xhtml][@q] + %{#{z[:range]}, } + print z[:range] + ', ' if @md.cmd =~/V/ + elsif z[:ocn] + idx[:sst_rel_html_seg][@o]=idx[:sst_rel_html_seg][@o] + %{#{Mx[:lnk_o]}#{z[:ocn]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{z[:seg]}.html##{z[:ocn]}#{Mx[:rel_c]}, } + idx[:sst_rel][@t]=idx[:sst_rel][@t] + %{#{Mx[:lnk_o]}#{z[:ocn]}#{Mx[:lnk_c]}#{Mx[:rel_o]}#{z[:ocn]}#{Mx[:rel_c]}, } + idx[:html][@q]=idx[:html][@q] + %{#{z[:ocn]}, } + idx[:xhtml][@q]=idx[:xhtml][@q] + %{#{z[:ocn]}, } + print z[:ocn] + ', ' if @md.cmd =~/V/ + else p 'error' + end + end + idx[:html][@q]=idx[:html][@q] + '

' + idx[:xhtml][@r]=idx[:xhtml][@r] + '

' + end + end + end + idx + @x +=1 + end + end + end + print "\n" if @md.cmd =~/V/ + idx + end + def screen_print(the_idx) + the_idx.each do |i| + i.each do |x| + if x.class==String + print "\n" + x + ', ' + elsif x.class==Array + p 'array error? -->' + print x + elsif x.class==Hash + if x['term_node_lev1'].class==Array + x['term_node_lev1'].each do |a| + if a[:range] + print a[:range] + ', ' + elsif a[:ocn] + print a[:ocn] + ', ' + else p 'error' + end + end + end + if x['term_node_lev2'] + m=x['term_node_lev2'] + m=m.sort + m.each do |k,y| + if k !~/term_node_lev1/ + print "\n\t" + k + ', ' + y.each do |z| + if z[:range] + print z[:range] + ', ' + elsif z[:ocn] + print z[:ocn] + ', ' + else p 'error' + end + end + end + end + end + end + end + end + end + def output_idx(idx) + if @md.book_idx + path="#{@env.path.output}/#{@md.fnb}" + Dir.mkdir(path) unless FileTest.directory?(path) + puts "#{path}/#{@md.fn[:book_idx_html]} #{__FILE__}::#{__LINE__}" + html_index_file=File.new("#{path}/#{@md.fn[:book_idx_html]}",'w') + idx[:html].each {|x| html_index_file << x } + html_index_file.close + end + end + def clean_and_insert_index(data,sisu_markup_idx) + tuned_file=[] + data.each do |dob| + tuned_file << dob + if dob.obj =~/#{Mx[:br_endnotes]}/ \ + and sisu_markup_idx + sisu_markup_idx.each do |idx| + tuned_file << idx + end + end + end + tuned_file + end + def clean_index(data) #check on use of dob + tuned_file=[] + data.each do |para| + para.gsub!(/\n*#{@rgx_idx}/m,'') + tuned_file << para + end + tuned_file + end + end +end +__END__ diff --git a/lib/sisu/v3/dal_images.rb b/lib/sisu/v3/dal_images.rb new file mode 100644 index 00000000..76a94dab --- /dev/null +++ b/lib/sisu/v3/dal_images.rb @@ -0,0 +1,155 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_images + class Images + #require 'RMagick' + #include Magick + def initialize(md,data) + @md,@data=md,data + end + def images + data=@data + tuned_file=[] + @rmgk=false + imagemagick_=SiSU_Env::Info_settings.new.program?('rmagick') + if imagemagick_ + begin + @rmgk=SiSU_Env::Load.new('RMagick').prog + rescue + @rmgk=false + end + else + if @md.cmd =~/[vVM]/ + SiSU_Screen::Ansi.new(@md.cmd,'use of RMagick is not enabled in sisurc.yml').warn + end + end + data.each do |dob| + unless dob.is =~/^table/ + dob.obj.strip! + if dob.obj =~/#{Mx[:lnk_o]}\s*\S+\.(?:png|jpg|gif)(?:\s*|\s+.+)?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/ + if dob.obj !~/#{Mx[:lnk_o]}\s*\S+\.(?:png|jpg|gif)\s+\d+x\d+/ + m=/#{Mx[:lnk_o]}\s*(\S+\.(?:png|jpg|gif))/ + if imagemagick_ + imgs=dob.obj.scan(m).flatten + img_col=img_row=nil + images=imgs.each do |image| + dir=SiSU_Env::Info_env.new(@md.fns) + path_image=[dir.path.image_source_include_local,dir.path.image_source_include_remote,dir.path.image_source_include] + image_path=nil + path_image.each do |image_path| + break if FileTest.exist?("#{image_path}/#{image}") + end + if FileTest.exist?("#{image_path}/#{image}") + if @rmgk + img=Magick::ImageList.new("#{image_path}/#{image}") + img_col,img_row=img.columns,img.rows + else + if @md.cmd =~/[vVM]/ + SiSU_Screen::Ansi.new(@md.cmd,'RMagick not present, will attempt to use imagemagick (identify) directly').warn + end + imgk=SiSU_Env::System_call.new.imagemagick + gmgk=SiSU_Env::System_call.new.graphicksmagick + if imgk or gmgk + if imgk + imgsys=`identify #{image_path}/#{image}`.strip #system call + elsif gmgk + imgsys=`gm identify #{image_path}/#{image}`.strip #system call + end + img_col,img_row=/(\d+)x(\d+)/m.match(imgsys)[1,2] + img_col,img_row=img_col.to_i,img_row.to_i + end + end + row=((img && defined? img.rows) ? img.rows : img_row) + col=((img && defined? img.columns) ? img.columns : img_col) + if img_col > img_row #landscape + if img_col> 640 + img_col=640 + img_row=((1.00*img_col/col)*row).round + end + else #portrait + if img_col> 640 + img_col=640 + img_row=((1.00*img_col/col)*row).round + end + if img_row > 640 + img_row=640 + img_col=((1.00*img_row/row)*col).round + end + end + dob.obj.gsub!(/(#{image})/,"#{image} #{img_col}x#{img_row}") + else dob.obj.gsub!(/#{Mx[:lnk_o]}\s*(\S+)\.(png|jpg|gif).+?#{Mx[:lnk_c]}(#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/,'[ \1 (\2 missing) ]') + end + end + else + images=dob.obj.scan(m) do |image| + SiSU_Screen::Ansi.new(@md.cmd,'where image dimensions have not been provided RMagick or imagemagick is required',image).warn #unless @opt.cmd =~/q/ + end + end + end + end + dob.obj.gsub!(/(#{Mx[:lnk_o]})\s*(\S+\.(?:png|jpg|gif))\s+/i,'\1\2 ') if dob.obj =~/#{Mx[:lnk_o]}\s*\S+\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/ + end + tuned_file << dob unless dob.nil? + end + tuned_file + end + end +end +__END__ +imgsys=`identify #{image_path}/#{image}`.strip diff --git a/lib/sisu/v3/dal_metadata.rb b/lib/sisu/v3/dal_metadata.rb new file mode 100644 index 00000000..77b58f86 --- /dev/null +++ b/lib/sisu/v3/dal_metadata.rb @@ -0,0 +1,79 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_metadata + class Metadata + def initialize(md,metad) + @md,@metadata=md,metad + l=SiSU_Env::Standardise_language.new.file_to_language(md.fns) + language=l[:l] + @tr=SiSU_Translate::Source.new(md,language) + end + def make_para(obj,ocn) + h={:obj=>obj,:ocn=>0} + SiSU_document_structure::Object_para.new.paragraph(h) + end + def make_heading(obj,ocn,name,lv,ln) + h={:lv=>lv,:ln=>ln,:name=>name,:obj=>obj,:ocn=>0} + SiSU_document_structure::Object_heading.new.heading(h) + end + def metadata + end + end +end +__END__ diff --git a/lib/sisu/v3/dal_numbering.rb b/lib/sisu/v3/dal_numbering.rb new file mode 100644 index 00000000..4bfb7da3 --- /dev/null +++ b/lib/sisu/v3/dal_numbering.rb @@ -0,0 +1,465 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_numbering + class Numbering + attr_accessor :obj,:osp,:ocn,:lv,:name,:index,:comment + def initialize(md,data) + @md,@data=md,data + @obj=@type=@ocn=@lv=@name=@index=@comment=nil + end + def numbering_song + data=@data + data=number_plaintext_para(data) + data=auto_number_heading_ie_title(data.compact) #tr issue + data=ocn(data.compact) #watch + data=xml(data.compact) + data=minor_numbering(data.compact) + data,tags_map,ocn_html_seg_map=name_para_seg_filename(data) + data=set_heading_top(data) unless @md.set_heading_top + [data,tags_map,ocn_html_seg_map] + end + def number_plaintext_para(data) + @tuned_file=[] + data.each do |dob| + if dob.of !~/(?:block|comment|layout)/ and dob.ocn_ #and dob.obj !~ /#{Mx[:gr_o]}Th|#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}/ #FIX + dob.obj.gsub!(/(.+)\n/,'\1 ') #messy, but idea is that tables should retain breaks + end + unless dob.obj.class==Array + dob.obj.gsub!(/^\s+/,'') + dob.obj.gsub!(/\s$/,"\n") + end + @tuned_file << dob + end + @tuned_file=@tuned_file.flatten + end + def number_sub_heading(dob,num,title_no) + unless dob.obj =~/\d+\.|(?:chapter|article|section|clause)\s+\d+/i #name selection arbitrary, fix + case dob.name + when /-/; dob.obj.gsub!(/^/,"#{title_no} ") + when /^#/; dob.obj.gsub!(/^/,"#{title_no} ") + when /^[a-z_\.]+/ + dob.obj.gsub!(/^/,"#{title_no} ") + else + dob.name=title_no if dob.name=~/^$/ #where title contains title number + dob.obj.gsub!(/^/,"#{title_no} ") if title_no =~/\d+/ #main, where title number is to be provided #watch changed placement + end + if @md.toc_lev_limit \ + and @md.toc_lev_limit < num + dob.obj.gsub!(/^/,'!_ ') #bold line, watch + end + end + dob + end + def heading_tag_clean(heading_tag) + heading_tag.gsub!(/[ ]+/,'_') + heading_tag.gsub!(/["']/,'') + heading_tag.gsub!(/[\/]/,'-') + heading_tag.gsub!(/#{Mx[:fa_bold_o]}|#{Mx[:fa_bold_c]}/,'') + heading_tag.gsub!(/#{Mx[:fa_italics_o]}|#{Mx[:fa_italics_c]}/,'') + heading_tag.gsub!(/#{Mx[:fa_underscore_o]}|#{Mx[:fa_underscore_c]}/,'') + heading_tag.gsub!(/#{Mx[:fa_cite_o]}|#{Mx[:fa_cite_c]}/,'') + heading_tag.gsub!(/#{Mx[:fa_insert_o]}|#{Mx[:fa_insert_c]}/,'') + heading_tag.gsub!(/#{Mx[:fa_strike_o]}|#{Mx[:fa_strike_c]}/,'') + heading_tag.gsub!(/#{Mx[:fa_superscript_o]}|#{Mx[:fa_superscript_c]}/,'') + heading_tag.gsub!(/#{Mx[:fa_subscript_o]}|#{Mx[:fa_subscript_c]}/,'') + heading_tag.gsub!(/#{Mx[:fa_hilite_o]}|#{Mx[:fa_hilite_c]}/,'') + heading_tag.gsub!(/#{Mx[:gl_bullet]}/,'') + heading_tag + end + def auto_number_heading_ie_title(data) #also does some segment naming + @tuned_file=[] + if defined? @md.make.num_top \ + and @md.make.num_top \ + and @md.make.num_top !~/^$/ + input||=@md.make.num_top + end + num_top=(input ? input.to_i : nil) + t_no1=t_no2=t_no3=t_no4=0 + if num_top + no1=num_top; no2=(num_top + 1); no3=(num_top + 2); no4=(num_top + 3) + end + t_not=0 + data.compact! + chapter_number_counter=0 + data.each do |dob| #@md.seg_names << [additions to segment names] + title_no=nil + dob=SiSU_document_structure_extract::Structure.new(@md,dob).structure_markup #must happen earlier, node info etc. require + if dob.is =='heading' \ + and dob.autonum_ \ + and defined? @md.make.num_top \ + and @md.make.num_top !~/^$/ + if dob.lv=='1' \ + and dob.obj =~/^#\s|\s#(?:\s|$)/ + chapter_number_counter +=1 + dob.obj.gsub!(/^#\s/,"#{chapter_number_counter} ") + dob.obj.gsub!(/#([:,]?\s|[.]?$)/,"#{chapter_number_counter}\\1") + end + if dob.ln==no1 + @subnumber=1 + @subnumber=0 if dob.ln==no1 + end + if dob.ln.to_s =~/^[1-6]/ \ + and not dob.toc_ \ + and dob.obj !~/#{Mx[:fa_o]}(?:~#|-#)#{Mx[:fa_c]}/ # <-- fix + if dob.ln==no1 + t_no1+=1; t_no2=0; t_no3=0 + title_no="#{t_no1}" + if not @md.seg_names.nil? \ + and not @md.seg_names.include?(title_no) + if dob.ln==no1 + dob.name="#{title_no}" if not dob.name + dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs + tag=dob.obj.gsub(/(Article|Clause|Section|Chapter)\s+/,"\\1_#{title_no}").downcase + tag=heading_tag_clean(tag) + dob.tags=[tag,dob.tags].flatten if tag !~/^\d+$/ #check whether will work across file types with stop signs + (dob.obj =~/(Article|Clause|Section)\s+/) \ + ? (dob.obj.gsub!(/(Article|Clause|Section)\s+/,"\\1 #{title_no} ")) \ + : (dob.obj.gsub!(/^/,"#{title_no}. ")) #fix stop later + end + if dob.ln !=no1 \ + and dob.obj =~/^[\d.]+\s/ #fix -> if the title starts with a numbering scheme, do not auto-number, review + dob.name ="#{title_no}" if not dob.name + dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs + dob.obj.gsub!(/^/,"#{title_no}. ") + end + @md.seg_names << title_no + end + if dob.ln!=no1 \ + and dob.name!~/^[a-z_\.]+$/ \ + and dob.obj !~/[A-Z]\.?\s/ #bug -> tmp fix, excludes A. B. C. lettering, but not roman numerals, is arbitrary, review required # not fixed, work on + dob.tags=[title_no,dob.tags].flatten if title_no !~/^\d+$/ #check whether will work across file types with stop signs + dob.obj.gsub!(/^/i,"#{title_no}. ") + end + end + if dob.ln==no1 #watch because here you change dob.name + dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs + end + if dob.ln==no2 #watch because here you change dob.name + t_no2+=1; t_no3=0 + title_no="#{t_no1}.#{t_no2}" + dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs + dob=number_sub_heading(dob,no2,title_no) + end + if dob.ln==no3 #watch because here you change dob.name + t_no3+=1 + title_no="#{t_no1}.#{t_no2}.#{t_no3}" + dob.tags=["h#{title_no}",dob.tags].flatten #check whether will work across file types with stop signs + dob=number_sub_heading(dob,no3,title_no) + end + elsif dob.ln.to_s =~/^[1-6]/ \ + and dob.name =~ /^[\w-]+-/ # endnotes, watch2005# endnotes, watch2005 + dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs + dob.name.gsub(/^([a-z_\.]+)-$/,'\1') + end + elsif dob.is =='heading' \ + and dob.autonum_ \ + and @md.markup =~/num_extract/ #AS DANGEROUS force enable with document, note already does this type of numbering for cisg, locate and coordinate logic, is currently misplaced in code, chengwei inspired 2004w23/4 + #here lies a bug, as is nil when run from -Dv --update, FIX + if (dob.name.nil? or dob.name.empty?) \ + and dob.ln.to_s =~/^[1-9]/ \ + and dob.obj =~ /^([\d\.]+)/ #risky (must be unique) consider output to 4~~\d instead of 4~\d + dob.name=$1 + dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs + end + if @md.toc_lev_limit + end + elsif defined? dob.name \ + and dob.name + dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs + end + dob.tags=dob.tags.uniq if defined? dob.tags + @tuned_file << dob + end + @tuned_file=@tuned_file.flatten + end + def ocn(data) #and auto segment numbering increment + @tuned_file=SiSU_document_structure_extract::OCN.new(@md,data).ocn + @tuned_file + end + def xml(data) + @tuned_file=SiSU_document_structure_extract::XML.new(@md,data).dom + @tuned_file + end + def minor_numbering(data) #and auto segment numbering increment + @tuned_file=[] + number_small,letter_small=0,0 + letter=%w( a b c d e f g h i j k l m n o p q r s t u v w x y z ) + data.each do |dob| + if dob.of =~/heading|para|block/ + if dob.is =='heading' \ + and dob.ln.to_s=~/^[1-9]/ #% sub-number system, (baby numbering) reset with any change of major number (more obviously should be placed in number titles, but that is conditionally executed, check and move later) + number_small,letter_small=0,0 + elsif dob.is =~/para/ + if dob.obj =~/^#[ 1]/ \ + and dob.obj !~/^#\s+(?:~#)?$/ + letter_small=0 + number_small=0 if dob.obj =~ /^#1/ + number_small+=1 + dob.obj.gsub!(/^#[ 1]/,"#{number_small}. ") #change 2004 + end + if dob.obj =~/^_# / + dob.obj.gsub!(/^_# /,"#{letter[letter_small]}. ") #change 2004 + dob.indent='1' + letter_small+=1 + end + end + end + @tuned_file << dob + end + @tuned_file=@tuned_file.flatten + end + def name_para_seg_filename(data) #segment naming, remaining + # paragraph name/numbering rules + # manual naming overrides, manual naming may be + # alpha-numeric characters mixed, + # numeric only (a number), if + # all segments have been named, + # the numbers used are over 1000 or + # it is not minded that auto-numbering uses a funny scheme for naming segments (not yet implemented) + # [for now a warning is printed for such documents on use of maintenance or very-verbose flag] + # auto-naming takes the form of giving numbers to segments + # the rules for which are as follows + # if the title/heading text starts with a numeric, then that is used (1 3.1 3rd etc.) + # otherwise the level 4 segment number from the embedded document structure info is used + # if there is none a sequential number is designated, preceded by an underscore + @tuned_file,@unique_auto_name=[],[] + tags={} + art_filename_auto=1 + @counter=1 + if not @md.seg_autoname_safe and @md.cmd =~/[MV]/ + puts 'manual segment names, numbers used as names, risk warning (segmented html)' + end + ocn_html_seg=[] + data.each do |dob| + if dob.is=='heading' \ + and dob.ln \ + and dob.ln.to_s =~/^[456]/ + if dob.ln==4 \ + and not dob.name \ + and not @md.set_heading_seg + @md.set_heading_seg=true + end + if dob.name !~/^\S+/ \ + and dob.obj =~/^\s*(?:\S+\s+)?([\d.,:-]+)/m #heading starts with a recognised numeric or word followed by a recognised numerical construct, use that as name + possible_seg_name=$1 + possible_seg_name.gsub!(/(?:[:,-]|\W)/,'.') + possible_seg_name.gsub!(/\.$/,'') + if not @md.seg_names.nil? \ + and not @md.seg_names.include?(possible_seg_name) + dob.name=possible_seg_name + dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ + @md.seg_names << possible_seg_name + else puts 'warn, there may be a conflicting numbering scheme' if @md.cmd =~/[VM]/ + end + end + if dob.ln==4 \ + and dob.name #extract segment name from embedded document structure info + if not @md.seg_names.nil? \ + and not @md.seg_names.include?(dob.name) + dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ + @md.seg_names << dob.name + end + end + if dob.ln==4 \ + and not dob.name #if still no segment name, provide a numerical one + pf='_' #pg='' #may use e.g. '' or '~' or '_' + segn_auto="#{pf}#{art_filename_auto.to_s}" + if not @md.seg_names.nil? \ + and not @md.seg_names.include?(segn_auto) + dob.name=segn_auto + dob.tags=[dob.name,dob.tags].flatten if dob.name !~/^\d+$/ #check whether will work across file types with stop signs + @md.seg_names << segn_auto + else puts 'segment name (numbering) error' + end + art_filename_auto+=1 + end + if dob.ln==4 \ + and not dob.name #should not occur + puts "e r r o r -\t#{__FILE__}::#{__LINE__}\n#{dob.inspect}" + end + end + if dob.is =~/heading/ \ + and dob.ln==4 + @seg=dob.name + end + @tuned_file << if dob.is=='heading' \ + and (@md.pagenew or @md.pagebreak) + m=dob.ln.to_s + dob_tmp=[] + if @md.pagenew.inspect =~/#{m}/ + dob_tmp << SiSU_document_structure::Object_layout.new.break(Hx[:br_page_new]) << dob + elsif @md.pagebreak.inspect =~/#{m}/ + dob_tmp << SiSU_document_structure::Object_layout.new.break(Hx[:br_page]) << dob + end + para_result=unless dob_tmp.length > 0; dob + else dob_tmp + end + else dob + end + if defined? dob.ocn \ + and dob.ocn + @segname=((dob.is=='heading'|| dob.is=='heading_insert') && dob.ln==4 && (defined? dob.name)) \ + ? (dob.name) \ + : @segname + tags["#{dob.ocn}"]={:segname=>@segname} + ocn_html_seg[dob.ocn]=if dob.is =~/heading/ + x=if dob.ln =~/[1-3]/ + {:seg=>nil,:level=> dob.ln} + else #elsif dob.ln =~/[4-6]/ + {:seg=>@seg,:level=> dob.ln} + end + else + {:seg=>@seg,:level=>nil} + end + end + dob.tags=dob.tags.uniq if defined? dob.tags + if defined? dob.tags \ + and dob.tags.length > 0 + #@segname=((dob.is=='heading'|| dob.is=='heading_insert') && dob.ln==4 && (defined? dob.name)) \ + #? (dob.name) \ + #: @segname + dob.tags.each do |x| + tags[x]={:ocn=>dob.ocn.to_s,:segname=>@segname} #@tags[x.to_s]=[dob.ocn.to_s,@segname.to_s] + end + end + dob + end + ocn_html_seg.each_with_index do |ocn,i| + if ocn \ + and ocn[:level].to_s=~/[1-3]/ + ocn_seg=nil + (1..4).each do |x| + if ocn_html_seg[i+x] and ocn_html_seg[i+x][:level]==4 + ocn[:seg]=ocn_html_seg[i+x][:seg] + end + end + end + end + if @md.seg_names.length > 0 + @md.set_heading_seg=true + end + tuned_file=@tuned_file.flatten + [tuned_file,tags,ocn_html_seg] + end + def set_heading_top(data) #% make sure no false positives + unless @md.set_heading_top + puts "\tdocument contains no top level heading, (will have to manufacture one)" if @md.cmd =~/[MV]/ + @tuned_file=[] + data.each do |t_o| + unless @md.set_heading_top + if t_o !~/^(?:#{Rx[:meta]}|@\S+:)\s/m \ + and t_o !~/\A\s*\Z/m + @md.set_heading_top=true + if defined? @md.title \ + and @md.title \ + and defined? @md.title.full \ + and defined? @md.creator \ + and @md.creator + head=@md.title.main ? ([@lv='1',@obj=@md.title.main]) : ([@lv='1',@obj='[no title provided]']) + @tuned_file << head + end + end + end + @tuned_file << t_o + end + @tuned_file=@tuned_file.flatten + end + end + def set_heading_seg(data) #% make sure no false positives + unless @md.set_heading_seg + puts "\tdocument contains no segment level, (will have to manufacture one)" if @md.cmd =~/[MV]/ + @tuned_file=[] + data.each do |dob| + unless @md.set_heading_seg + if defined? dob.ln and dob.ln.to_s !~/^[123]/m \ + and dob.obj !~/\A\s*\Z/m \ + and dob.is !='layout' + @md.set_heading_seg=true + head=if @md.title.main ; dob.ln,dob.name,dob.obj=4,'seg',@md.title.main + else dob.ln,dob.name,dob.obj=4,'seg','[segment]' + end + @tuned_file << head + end + end + @tuned_file << dob + end + @tuned_file=@tuned_file.flatten + end + end + def set_header_title(data) #% make sure no false positives + unless @md.set_header_title + puts "\t no document title provided, (will have to manufacture one)" if @md.cmd =~/[MV]/ + @tuned_file=[] + data.each do |t_o| + unless @md.set_header_title + if t_o !~/^%{1,2}\s/m \ + and t_o !~/\A\s*\Z/m + @tuned_file << "#{Mx[:meta_o]}title#{Mx[:meta_c]} #{@md.heading_seg_first}" + @md.title.main=@md.heading_seg_first + @md.set_header_title=true + end + end + @tuned_file << t_o + end + @tuned_file=@tuned_file.flatten + end + end + end +end +__END__ diff --git a/lib/sisu/v3/dal_substitutions_and_insertions.rb b/lib/sisu/v3/dal_substitutions_and_insertions.rb new file mode 100644 index 00000000..e1326232 --- /dev/null +++ b/lib/sisu/v3/dal_substitutions_and_insertions.rb @@ -0,0 +1,154 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_substitute_and_insert + class SI + def initialize(md,data) + @md,@data=md,data + @skin=SiSU_Env::Info_skin.new(@md) + end + def substitutions_and_insertions? + data=@data + data_expand=[] + if data[0] =~ /^#!\s*(?:\/usr\/bin\/env sisu|\/usr\/bin\/sisu)/ # remove bang from top #! (however file is stripped, so will be removed provided no content precedes it) + data[0].gsub!(/^#!\s*\/usr\/bin\/sisu/,'') + data[0].gsub!(/^#!\s*\/usr\/bin\/env sisu/,'') + end + if data[0] =~ /^(SiSU\s+[\d.]*|sisu-[\d.]+)$/ # SiSU identifier + data[0].gsub!(/^(SiSU\s*[\d.]*)$/,'% \1') + data[0].gsub!(/^(sisu-[\d.]+)$/,'% \1') + end + data.each do |para| + if para =~/<:(insert\d+)!?>/ \ + and para !~/^%\s+/ + @skin.select + ins=SiSU_Viz::Inserts.new + case para + when /^\s*<:(insert1)>\s*$/ + i=$1 + if defined? ins.insert1 + para=[] + ins.insert1.split(/\n\n/).each{|x| para << x } + else p "skin #{i} not found in #{@skin.select}" + end + when /^\s*<:(insert2)>\s*$/ + i=$1 + if defined? ins.insert2 + para=[] + ins.insert2.split(/\n\n/).each{|x| para << x } + else p "skin #{i} not found in #{@skin.select}" + end + when /^\s*<:(insert3)>\s*$/ + i=$1 + if defined? ins.insert3 + para=[] + ins.insert3.split(/\n\n/).each{|x| para << x << "\n"} + else p "skin #{i} not found in #{@skin.select}" + end + when /^\s*<:(insert4)>\s*$/ + i=$1 + if defined? ins.insert4 + para=[] + ins.insert4.split(/\n\n/).each{|x| para << x << "\n"} + else p "skin #{i} not found in #{@skin.select}" + end + when /^\s*<:(insert5)>\s*$/ + i=$1 + if defined? ins.insert5 + para=[] + ins.insert5.split(/\n\n/).each{|x| para << x << "\n"} + else p "skin #{i} not found in #{@skin.select}" + end + when /^\s*<:(insert6)>\s*$/ + i=$1 + if defined? ins.insert6 + para=[] + ins.insert6.split(/\n\n/).each{|x| para << x << "\n"} + else p "skin #{i} not found in #{@skin.select}" + end + when /^\s*<:(insert7)>\s*$/ + i=$1 + if defined? ins.insert7 + para=[] + ins.insert7.split(/\n\n/).each{|x| para << x << "\n"} + else p "skin #{i} not found in #{@skin.select}" + end + end + para.each{|x| data_expand << x } + else data_expand << para + end + data_expand.flatten! + data_expand.compact! + end + data_expand.each do |para| + para=if @md.markup_version.determined >= 0.38 + SiSU_document_structure_extract::Structure.new(@md,para).structure_markup_normalize + else + SiSU_document_structure_extract::Structure.new(@md,para).structure_marks + end + para.gsub!(/^(:?A~)\s*$/,'\1~ @title @author') #conditional header + para.gsub!(/^((?:[1-9]|:?[A-C])~\S*)\s*$/,'\1~ [Note: heading marker::required title missing]~#') #conditional header for incorporated document 2004w12 + if para =~/^@\S+?:/ + para.gsub!(/^@(\S+?):\s+/,"#{Mx[:meta_o]}\\1#{Mx[:meta_c]}") + para.gsub!(/^@(\S+?):([+-])\s+/,"#{Mx[:meta_o]}\\1\\2#{Mx[:meta_c]}") + end + end + end + end +end +__END__ diff --git a/lib/sisu/v3/dal_syntax.rb b/lib/sisu/v3/dal_syntax.rb new file mode 100644 index 00000000..34ac65e2 --- /dev/null +++ b/lib/sisu/v3/dal_syntax.rb @@ -0,0 +1,523 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: Syntax for markup, input markup syntaxes, determined here + +=end +module SiSU_Syntax + class Words + def initialize(line,md,mkp) + @line,@md,@mkp=line,md,mkp + end + end + class Markup + def initialize(md='',data='') + @data,@md=data,md + @vz=SiSU_Env::Get_init.instance.skin + @data_new=[] + url_and_stub=SiSU_Env::Info_env.new.url + @output_url="#{url_and_stub.remote}" + @env=SiSU_Env::Info_env.new + emph_set=if defined? @md.make.emphasis \ + and not @md.make.emphasis.nil? + @md.make.emphasis + else @env.markup_emphasis + end + @emph=case emph_set + when /bold/ + emph_italics=false + {:o =>Mx[:fa_bold_o], :c =>Mx[:fa_bold_c] } + when /italics/ + emph_italics=true + {:o =>Mx[:fa_italics_o], :c =>Mx[:fa_italics_c] } + when /underscore/ + emph_italics=false + {:o =>Mx[:fa_underscore_o], :c =>Mx[:fa_underscore_c] } + else p __LINE__.to_s + '::' + __FILE__ + end + @http_m=%r{\{.+?\}https?://\S+|https?:\S+|:\S+|\.\.\/\S+|#\S+|\S+?\.png\b|[*]~\S+|^#{Mx[:meta_o]}.+|#{Mx[:gr_o]}(?:code|block|group|alt|verse)(?:-end)?#{Mx[:gr_c]}|#{Mx[:fa_o]}:br#{Mx[:fa_c]}} + @manmkp_ital=emph_italics \ + ? '[i/*]\\{.+?\\}[i/*]' \ + : '[i/]\\{.+?\\}[i/]' + tail_m_ital=%q{(?:\s|[.,;:?!'")]|~\^|~\\\{\s|$)} + tail_m_bold=%{(?:(?:#{Mx[:fa_italics_c]})?(?:\s|[.,;:?!'")]|~\^|~\\\{\s|$))?} + bold_line=%{^!_\s.+?(?:#{Mx[:br_line]}|\n|$)} + ital_line=%{^/_\s.+?(?:#{Mx[:br_line]}|\n|$)} + @line_scan_ital=if defined? @md.make.italics[:str] \ + and defined? @vz.markup_make_italic[:str] + /#{@http_m}|#{bold_line}|#{@manmkp_ital}#{tail_m_ital}|(?:#{@md.make.italics[:str]}|#{@vz.markup_make_italic[:str]})#{tail_m_ital}|\S+|\n/ + elsif defined? @md.make.italics[:str] + /#{@http_m}|#{bold_line}|#{@manmkp_ital}#{tail_m_ital}|#{@md.make.italics[:str]}#{tail_m_ital}|\S+|\n/ + elsif defined? @vz.markup_make_italic[:str] + /#{@http_m}|#{bold_line}|#{@manmkp_ital}#{tail_m_ital}|#{@vz.markup_make_italic[:str]}#{tail_m_ital}|\S+|\n/ + end + @manmkp_bold=emph_italics \ + ? '^!_\s.+?(?:\n|$)|[!b]\\{.+?\\}[*!b]|[*!][a-zA-Z0-9\-_]+[!]' \ + : '^!_\s.+?(?:\n|$)|[*!b]\\{.+?\\}[*!b]|[*!][a-zA-Z0-9\-_]+[*!]' + @line_scan_bold=if (defined? @md.make.bold[:str] \ + and @md.make.bold[:str]) \ + and (defined? @vz.markup_make_bold[:str] \ + and @vz.markup_make_bold[:str]) + /#{@http_m}|#{bold_line}|(?:#{@manmkp_bold}|#{@md.make.bold[:str]}|#{@vz.markup_make_bold[:str]})#{tail_m_bold}|\S+|\n/ + elsif defined? @md.make.bold[:str] \ + and @md.make.bold[:str] + /#{@http_m}|#{bold_line}|(?:#{@manmkp_bold}|#{@md.make.bold[:str]})#{tail_m_bold}|\S+|\n/ + elsif defined? @vz.markup_make_bold[:str] \ + and @vz.markup_make_bold[:str] + /#{@http_m}|#{bold_line}|(?:#{@manmkp_bold}|#{@vz.markup_make_bold[:str]})#{tail_m_bold}|\S+|\n/ + end + end + def songsheet + @data.compact! + @data.each do |dob| + dob=if @md.sem_tag then sem(dob) else dob end #revisit + dob=wordlist_italics(dob) + dob=wordlist_bold(dob) + dob=bodymarkup(dob) + @data_new << dob unless dob.nil? + end + @data_new + end + def sem(dob) #revisit + dob=SiSU_sem::Tags.new(dob,@md).rm.all + end + def wordlist_italics(dob) + dob=dob.dup + if (defined? @md.make.italics[:str] \ + and @md.make.italics[:str]) \ + or (defined? @vz.markup_make_italic[:str] \ + and @vz.markup_make_italic[:str]) + dob.obj=if dob.is !~/^(?:meta|heading|code|comment)/ + word=dob.obj.scan(@line_scan_ital) + word.flatten! + word.compact! #reinstated + line_array=[] + word.each do |w| + unless /#{@manmkp_ital}|#{@http_m}/.match(w) + if defined? @md.make.italics[:regx] \ + and @md.make.italics[:regx] + w.gsub!(@md.make.italics[:regx], + "#{Mx[:fa_italics_o]}\\1#{Mx[:fa_italics_c]}") + elsif defined? @vz.markup_make_italic \ + and @vz.markup_make_italic + w.gsub!(@vz.markup_make_italic, + "#{Mx[:fa_italics_o]}\\1#{Mx[:fa_italics_c]}") + else w + end + end + line_array << w + end + line_array.join(' ') + else dob.obj + end + end + dob + end + def embolden(given) + given.gsub!(/^!_\s+((?:\{|#{Mx[:lnk_o]})(?:~^ )?.+?(?:\}|#{Mx[:lnk_o]})https?:\/\/\S+.*?)(
|[#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}])/, + "#{Mx[:fa_bold_o]} \\1 #{Mx[:fa_bold_c]}\\2") + given.gsub!(/^!_\s+((?:\{|#{Mx[:lnk_o]})(?:~^ )?.+?(?:\}|#{Mx[:lnk_o]})https?:\/\/\S+.*)/, + "#{Mx[:fa_bold_o]} \\1 #{Mx[:fa_bold_c]}") + given.gsub!(/(?:^!_|^#{Mx[:lv_o]}[7-9]:\S*?#{Mx[:lv_c]})\s*(.+?)(
|[#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}])/, + "#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}\\2") + given.gsub!(/(?:^!_|^#{Mx[:lv_o]}[7-9]:\S*?#{Mx[:lv_c]})\s*(.+?)\s+((?:[*]~\S+\s*)+)/, + "#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}\\2") + given.gsub!(/(?:^!_|^#{Mx[:lv_o]}[7-9]:\S*?#{Mx[:lv_c]})\s*(.+?)\s*([~-]#)$/, + "#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}\\2") + given.gsub!(/(?:^!_\s+|^#{Mx[:lv_o]}[7-9]:\S*?#{Mx[:lv_c]}\s*)(.*)?\s*$/, + "#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}") + end + def italicise(given) + given.gsub!(/^\/_\s*(.+?)(
|[#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}])/, + "#{Mx[:fa_italics_o]}\\1#{Mx[:fa_italics_c]}\\2") + given.gsub!(/^\/_\s*(.+?)\s+((?:[*]~\S+\s*)+)/, + "#{Mx[:fa_italics_o]}\\1#{Mx[:fa_italics_c]}\\2") + given.gsub!(/^\/_\s*(.+?)\s*([~-]#)$/, + "#{Mx[:fa_italics_o]}\\1#{Mx[:fa_italics_c]}\\2") + given.gsub!(/^\/_\s+(.*)?\s*$/, + "#{Mx[:fa_italics_o]}\\1#{Mx[:fa_italics_c]}") + end + def wordlist_bold(dob) + dob=dob.dup + if (defined? @md.make.bold[:str] \ + and @md.make.bold[:str]) \ + or (defined? @vz.markup_make_bold[:str] \ + and @vz.markup_make_bold[:str]) + dob.obj=if dob.is !~/^(?:meta|heading|code|comment|table)/ + line_array=[] + word=dob.obj.scan(@line_scan_bold) + word.flatten! + word.compact! + word.each do |w| + unless /#{@manmkp_bold}|#{@http_m}/.match(w) + if defined? @md.make.bold[:regx] \ + and @md.make.bold[:regx] #document header: @bold: [bold word list] + w.gsub!(@md.make.bold[:regx],"#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}") + elsif defined? @vz.markup_make_bold \ + and @vz.markup_make_bold #defaults and skin adjusted bold word list + w.gsub!(@vz.markup_make_bold,"#{Mx[:fa_bold_o]}\\1#{Mx[:fa_bold_c]}") + end + else + if w =~ /(?:^!_|^#{Mx[:lv_o]}[7-9]:\S*?#{Mx[:lv_c]})\s+/; embolden(w) #bold paragraph/emphasize #may wish to remove think about 7{ 8{ conversion not satisfactory, as information is lost! + elsif w =~/^\/_\s+/; italicise(w) + else w + end + end + line_array << w + end + line_array.join(' ') + else dob.obj + end + else + if dob.is !~/^(?:heading|comment|meta)/ \ + and dob.obj =~ /^!_\s+/ + embolden(dob.obj) + elsif dob.is=='heading' \ + and dob.ln.to_s =~/[7-9]/ + embolden(dob.obj) + end + if dob.obj =~ /\/_\s+/ + italicise(dob.obj) + end + end + dob + end + def fontface(dob) + dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|#{Mx[:br_line]}|#{Mx[:br_paragraph]}|[\(\[\{]|\>)\*\{(.+?)\}\*/m, + "\\1#{@emph[:o]}\\2#{@emph[:c]}") #emphasis + dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|#{Mx[:br_line]}|#{Mx[:br_paragraph]}|[\(\[\{]|\>)!\{(.+?)\}!/m, + "\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold + dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|#{Mx[:br_line]}|#{Mx[:br_paragraph]}|[\(\[\{]|\(|\>)\/\{(.+?)\}\//m, + "\\1#{Mx[:fa_italics_o]}\\2#{Mx[:fa_italics_c]}") #italics + dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|#{Mx[:br_line]}|#{Mx[:br_paragraph]}|[\(\[\{]|\>)_\{(.+?)\}_/m, + "\\1#{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]}") #underscore + dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|#{Mx[:br_line]}|#{Mx[:br_paragraph]}|[\(\[\{]|\>)#\{(.+?)\}#/m, + "\\1#{Mx[:fa_monospace_o]}\\2#{Mx[:fa_monospace_c]}") #monospace + dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\"\{(.+?)\}\"/m, + "\\1#{Mx[:fa_cite_o]}\\2#{Mx[:fa_c_o]}cite#{Mx[:fa_c]}") #cite /blockquote? + dob.obj.gsub!(/(^|[^\\])\^\{(.+?)\}\^/m, + "\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") #superscript + dob.obj.gsub!(/(^|[^\\]),\{(.+?)\},/m, + "\\1#{Mx[:fa_subscript_o]}\\2#{Mx[:fa_subscript_c]}") #subscript + dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\+\{(.+?)\}\+/m, + "\\1#{Mx[:fa_insert_o]}\\2#{Mx[:fa_insert_c]}") #inserted text + dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)-\{(.+?)\}-/m, + "\\1#{Mx[:fa_strike_o]}\\2#{Mx[:fa_strike_c]}") #strikethrough - deleted text + dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>|\d+)\^(\S+?)\^/, + "\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") #superscript single word, watch digit added + dob + end + def bodymarkup(dob) + # << http://www.jus.uio.no/sisu/sisu_markup_table/markup >> + # See: data/sisu/sample/document_samples_sisu_markup/ + ## fontface + # *{emphasis}* e{emphasis}e emphasis + # !{bold text}! b{bold}b bold text + # _{underline}_ u{underline}u underline + # /{italics}/ i{italics}i italics + # "{citation}" c{citation}c citation #blockquote? + # ^{superscript}^ superscript + # ,{subscript}, subscript + # +{inserted text}+ inserted text + # -{deleted text}- deleted text + # #{monospace text}# + # + # {url address}:url + # {image.png}imageurl + # {image.png}png + # ~{endnote}~ + # !_ #bold/emphasise paragraph + # _" #blockquote paragraph + # _1 <:i1> #indent paragraph 1 step + # _2 <:i2> #indent paragraph 2 steps + # _3 <:i3> #indent paragraph 3 steps + # _4 <:i4> #indent paragraph 4 steps + # _* #bullet (list) ● + # _1* #bullet (list) indented + # _1* #bullet (list) indented + # # #numbered (list) level 1 + # _# #numbered (list) level 2 + dob=dob.dup + if dob.is !~/^(?:meta|comment|code|table)/ + line_array=[] + word=dob.obj.scan(/\S+|\n/) #unless line =~/^(?:#{Mx[:meta_o]}|%+\s)/ #visit + if word + word.each do |w| # _ - / # | : ! ^ ~ + unless w =~/~\{|\}~|~\[|\]~|^\^~|~\^|\*~\S+|~#|\{t?~|\{table|https?:\/\/\S+/ # do something earlier about table!! + w.gsub!(/\\?~/,"#{Mx[:gl_o]}#126#{Mx[:gl_c]}") #escaped special character + end + w.gsub!(/^\<$/,"#{Mx[:gl_o]}#lt#{Mx[:gl_c]}") #escaped special character + w.gsub!(/^\>$/,"#{Mx[:gl_o]}#gt#{Mx[:gl_c]}") #escaped special character + line_array << w + end + dob.obj=line_array.join(' ') + dob.obj=dob.obj.strip + end + dob.obj.gsub!(/^([*#-.]{1,12})$/,'\1 ~#') #ocn off for these paragraph separators + dob.obj.gsub!(/~\{(.+?)\}~/m,Mx[:en_a_o] + '\1' + Mx[:en_a_c]) + dob.obj.gsub!(/~\[([^*+].+?)\]~/m,Mx[:en_b_o] + '* \1' + Mx[:en_b_c]) #default if markup does not specify + dob.obj.gsub!(/~\[(.+?)\]~/m,Mx[:en_b_o] + '\1' + Mx[:en_b_c]) + if dob.is =='heading' \ + and dob.ln ==1 + dob.obj.gsub!(/\s*@title\b/," #{@md.title.full}") + if defined? @md.creator.author \ + and @md.creator.author + dob.obj.gsub!(/\s+(?:@creator|@author)/,",
#{@md.creator.author}") + else dob.obj.gsub!(/\s+(?:@creator|@author)/,'') + end + end + if defined? @md.title \ + and @md.title \ + and defined? @md.title.full \ + and defined? @md.creator \ + and @md.creator + if dob.is =='heading' + dob.obj.gsub!(/^\s*@title\s*$/,@md.title.full) if dob.lv =~/1/ # + if dob.lv =~/[23]/ \ + and defined? @md.creator.author \ + and @md.creator.author + dob.obj.gsub!(/^\s*(?:(by\s+)?(?:@creator|@author))\s*$/,"\\1#{@md.creator.author}") + else dob.obj.gsub!(/^\s*(?:(by\s+)?(?:@creator|@author))\s*$/,'\1') + end + end + end + dob.obj.gsub!(/<(https?:\/\/\S+?)>/,'< \1 >') #catch problem markup + dob.obj.gsub!(/<:=(\S+?)>/,'{ c_\1.png 14x14 }http://www.jus.uio.no/sisu') #adjustment 2005w30 + dob.obj.gsub!(//,'<:\1>') #escaped special character + dob.obj.gsub!(/ /,"#{Mx[:nbsp]}") #escaped special character + dob.obj.gsub!(/\\~/,"#{Mx[:gl_o]}#126#{Mx[:gl_c]}") #escaped special character + dob.obj.gsub!(/\\\{/,"#{Mx[:gl_o]}#123#{Mx[:gl_c]}") #escaped special character + dob.obj.gsub!(/\\\}/,"#{Mx[:gl_o]}#125#{Mx[:gl_c]}") #escaped special character + dob.obj.gsub!(/\\\<>/,"#{Mx[:gl_o]}#gt#{Mx[:gl_c]}#{Mx[:gl_o]}#gt#{Mx[:gl_c]}") #escaped special character + dob.obj.gsub!(/\\\/,"#{Mx[:gl_o]}#gt#{Mx[:gl_c]}") #escaped special character + dob.obj.gsub!(/\\\_/,"#{Mx[:gl_o]}#095#{Mx[:gl_c]}") #escaped special character + dob.obj.gsub!(/\\\-/,"#{Mx[:gl_o]}#045#{Mx[:gl_c]}") #escaped special character + dob.obj.gsub!(/\\\+/,"#{Mx[:gl_o]}#043#{Mx[:gl_c]}") #escaped special character + dob.obj.gsub!(/\\\//,"#{Mx[:gl_o]}#047#{Mx[:gl_c]}") #escaped special character + dob.obj.gsub!(/\\\#/,"#{Mx[:gl_o]}#035#{Mx[:gl_c]}") #escaped special character + dob.obj.gsub!(/\\\&/,"#{Mx[:gl_o]}#038#{Mx[:gl_c]}") #& #escaped special character + dob.obj.gsub!(/\\\|/,"#{Mx[:gl_o]}#124#{Mx[:gl_c]}") #not really a sisu special character but made available as possibility + dob.obj.gsub!(/\\\:/,"#{Mx[:gl_o]}#058#{Mx[:gl_c]}") #not really a sisu special character but made available as possibility + dob.obj.gsub!(/\\\!/,"#{Mx[:gl_o]}#033#{Mx[:gl_c]}") #not really a sisu special character but made available as possibility + dob.obj.gsub!(/\\\^/,"#{Mx[:gl_o]}#094#{Mx[:gl_c]}") #not really a sisu special character but made available as possibility + dob.obj.gsub!(/\\\,/,"#{Mx[:gl_o]}#044#{Mx[:gl_c]}") #not really a sisu special character but made available as possibility + dob.obj.gsub!(/\\\\/,"#{Mx[:gl_o]}#092#{Mx[:gl_c]}") #escaped special character + dob.obj.gsub!(/\\\*/,"#{Mx[:gl_o]}#042#{Mx[:gl_c]}") #escaped special character + dob.obj.gsub!(/\\\!/,"#{Mx[:gl_o]}#033#{Mx[:gl_c]}") #escaped special character + dob.obj.gsub!(/(?:<:?br>|
)/,"#{Mx[:br_line]}") #indent used in endnotes, not implemented, replace when ready with: line.gsub!(/(?:
|
)\s*_([12])\s+/,'
<:i\1> ') + if dob.obj=~/(?:https?:|ftp:|\{([^{}]+?)\}(?:#|:|[.]{1,2}\/))\S+/m + if dob.obj=~/(?:^|[#{Mx[:gl_c]}#{Mx[:nbsp]} ])\{~\^ (?:.+?)\s*\}(?:(?:https?:|ftp:|:|[.]{1,2}\/)\S+?)\s*#{Mx[:en_a_o]}(.+?)#{Mx[:en_a_c]}/m + dob.obj.gsub!(/(^|[#{Mx[:gl_c]}#{Mx[:nbsp]} ])\{~\^ ([^}]+?)\s*\}((?:https?:|ftp:|:|[.]{1,2}\/)\S+?)\s*#{Mx[:en_a_o]}(.+?)#{Mx[:en_a_c]}/m, + "\\1#{Mx[:lnk_o]}\\2#{Mx[:lnk_c]}\\3 #{Mx[:en_a_o]}\\3 \\4#{Mx[:en_a_c]}") # watch + end + if dob.obj=~/(?:^|[#{Mx[:gl_c]}#{Mx[:nbsp]} ])\{~\^ (?:.+?)\s*\}(?:(?:https?:|ftp:|:|[.]{1,2}\/)\S+?)([;,.]?)(?=\s|[#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}]|$)/m + dob.obj.gsub!(/(^|[#{Mx[:gl_c]}#{Mx[:nbsp]} ])\{~\^ (.+?)\s*\}((?:https?:|ftp:|:|[.]{1,2}\/)\S+?)([;,.]?)(?=\s|[#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}]|$)/m, + "\\1#{Mx[:lnk_o]}\\2#{Mx[:lnk_c]}\\3\\4 #{Mx[:en_a_o]}\\3#{Mx[:en_a_c]} ") + end + dob.obj.gsub!(/(^|[^#])\{\s*([^{}]+?)\s*\}((?:https?:|:|[.]{2}\/|#)\S+?)(?=\s|[#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}#{Mx[:en_a_o]}#{Mx[:en_b_o]}]|$)/, + "\\1#{Mx[:lnk_o]}\\2#{Mx[:lnk_c]}\\3") #linked (text or image, however text cannot include modified face, e.g. bold, ital, underline) + dob.obj.gsub!(/(^|[#{Mx[:gl_c]}#{Mx[:lnk_c]}#{Mx[:en_a_o]}#{Mx[:en_b_o]}(\s])((?:https?|ftp):\/\/\S+?\.[^>< ]+?)([,.;'"]?)(?=[\s#{Mx[:en_a_c]}#{Mx[:en_b_c]}#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}]|$)/m, + %{\\1#{Mx[:url_o]}\\2#{Mx[:url_c]}\\3}) + dob.obj.gsub!(/#{Mx[:lnk_c]}#(\S+?[^>< ]+?)([()\[\]]*[,.;:!?'"]{0,2})(?=[\s#{Mx[:en_a_c]}#{Mx[:en_b_c]}#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}]|$)/m, + %{#{Mx[:lnk_c]}#{Mx[:rel_o]}\\1#{Mx[:rel_c]}\\2}) + dob.obj.gsub!(/#{Mx[:lnk_c]}:(\S+?[^>< ]+?)([()\[\]]*[,.;:!?'"]{0,2})(?=[\s#{Mx[:en_a_c]}#{Mx[:en_b_c]}#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}]|$)/m, + %{#{Mx[:lnk_c]}#{Mx[:rel_o]}:\\1#{Mx[:rel_c]}\\2}) + dob.obj.gsub!(/#{Mx[:lnk_c]}[.]{2}\/(\S+?[^>< ]+?)([()\[\]]*[,.;:!?'"]{0,2})(?=[\s#{Mx[:en_a_c]}#{Mx[:en_b_c]}#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}]|$)/m, + %{#{Mx[:lnk_c]}#{Mx[:rel_o]}:\\1#{Mx[:rel_c]}\\2}) + end + if dob.obj=~/_(?:https?|ftp):\S+/m # _http://url #CHECK + dob.obj.gsub!(/(^|[#{Mx[:gl_c]}#{Mx[:lnk_c]}#{Mx[:en_a_o]}#{Mx[:en_b_o]}(\s])(_(?:https?|ftp):\/\/\S+?\.[^>< ]+?)([,.;'"]?)(?=[\s#{Mx[:en_a_c]}#{Mx[:en_b_c]}#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}]|$)/m, + %{\\1#{Mx[:url_o]}\\2#{Mx[:url_c]}\\3}) + end + dob=fontface(dob) + dob.obj.gsub!(/<[:e]\s+(.+?)!?>/, + "#{Mx[:en_a_o]}\\1#{Mx[:en_a_c]}") #not tested + dob.obj.gsub!(/^\s*_\*\s*/, + "#{Mx[:gl_bullet]}") #bullets, shortcut + dob.obj.gsub!(/=\{(.+?)\}/, + "#{Mx[:idx_o]}\\1#{Mx[:idx_c]}") + dob.obj.gsub!(/^\s*_([1-9])\*\s*/, + "#{Mx[:pa_o]}:i\\1#{Mx[:pa_c]}#{Mx[:gl_bullet]}") #bullets, shortcut + dob.obj.gsub!(/^\s*_([1-9])\s+/, + "#{Mx[:pa_o]}:i\\1#{Mx[:pa_c]}") #indent + dob.obj.gsub!(/(?:<:?br>|
)/, + "#{Mx[:br_line]}") #indent used in endnotes, not implemented, replace when ready with: line.gsub!(/(?:
|
)\s*_([12])\s+/,'
<:i\1> ') + dob.obj.gsub!(/<:hi>/,"#{Mx[:fa_hilite_o]}") #'') # bright yellow rgb(255,255,0) pale yellow rgb(255,255,200) + dob.obj.gsub!(/<:\/hi>/,"#{Mx[:fa_hilite_c]}") #'') + dob.obj.gsub!(/(#{Mx[:gr_o]}verse#{Mx[:gr_c]}.+)/m,"\\1\n") + dob.obj.gsub!(/[ ]+($)/,'\1') + dob.obj.gsub!(/\{\s*(.+?)\s*\}(https?:\S+?)([;,.]?)(?=\s|[#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}#{Mx[:en_a_o]}#{Mx[:en_b_o]}]|$)/, + "#{Mx[:lnk_o]}\\1#{Mx[:lnk_c]}#{Mx[:url_o]}\\2#{Mx[:url_c]}\\3") #any remaining linked text or image + dob.obj.gsub!(/\{\s*(.+?)\s*\}(#{Mx[:url_o]}\S+?#{Mx[:url_c]})/, + "#{Mx[:lnk_o]}\\1#{Mx[:lnk_c]}\\2") #any remaining linked text or image + dob.obj.gsub!(/(^|[ ])\{\s*(.+?)\s*\}(\S+?)([;,.]?)(?=\s|[#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}#{Mx[:en_a_o]}#{Mx[:en_b_o]}]|$)/, + "\\1#{Mx[:lnk_o]}\\2#{Mx[:lnk_c]}\\3\\4") #any remaining linked text or image + dob.obj.gsub!(/\{\s*(.+?)\s*\}#(\S+?)([;,.]?)(?=\s|[#{Mx[:br_line]}#{Mx[:br_paragraph]}#{Mx[:br_nl]}#{Mx[:en_a_o]}#{Mx[:en_b_o]}]|$)/, + "#{Mx[:lnk_o]}\\1#{Mx[:lnk_c]}#{Mx[:rel_o]}\\2#{Mx[:rel_c]}\\3") #any remaining linked text or image, check need + dob.obj.gsub!(/\{\s*(.+?)\s*\}(#{Mx[:rel_o]}\S+?#{Mx[:rel_c]})/, + "#{Mx[:lnk_o]}\\1#{Mx[:lnk_c]}\\2") #any remaining linked text or image, check need + dob.obj.gsub!(/\{\s*(.+?)\s*\}(image)/, + "#{Mx[:lnk_o]}\\1#{Mx[:lnk_c]}\\2") #linked image + elsif dob.is=='table' + dob=fontface(dob) + elsif dob.is =='code' +#p dob.obj + dob.obj.gsub!(/#{Mx[:meta_o]}(\S+?)#{Mx[:meta_c]}\s*/,'@\1: ') + dob.obj.gsub!(/([<>])/,'_\1') + dob.obj.gsub!(/_<:(\S+?)_>/,'<:\1>') #convert <:\S+> back, clumsy + dob.obj.gsub!(/_<(br(?: \/)?)_>/,'<\1>') #convert

back, clumsy + dob.obj.gsub!(/(^|#{Mx[:gl_c]}|\s)<(br(?: \/)?)>([\s,.]|$)/,'\1<\2>\3') #convert

back, clumsy + if dob.number_ + codeline=[] + ln=1 + dob.obj.split(/#{Mx[:gr_o]}codeline#{Mx[:gr_c]}||\n/).each_with_index do |cl,i| + unless i == 0 + cl.gsub!(Mx[:br_nl],'') + w=3-ln.to_s.length + cl = "#{ln}#{Mx[:nbsp]*w}#{Mx[:vline]}#{Mx[:nbsp]}#{Mx[:nbsp]}#{cl}#{Mx[:br_nl]}" + ln +=1 + end + codeline << cl + end + codeline= codeline.join("") + dob.obj=codeline + else + dob.obj.gsub!(/#{Mx[:gr_o]}codeline#{Mx[:gr_c]}/, + "\n#{Mx[:nbsp]}#{Mx[:nbsp]}") #temporary fix, prefer: #line.gsub!(/<:codeline>/,"\n") + end + dob + else # @\S+?: + end + dob + end + def tech #script markup planned to be more strict for technical documents + # *{emphasis}* e{emphasis}e emphasis + # !{bold text}! b{bold}b bold text + # _{underline}_ u{underline}u underline + # /{italics}/ i{italics}i italics + # "{citation}" c{citation}c citation + # ^{superscript}^ superscript + # ,{subscript}, subscript + # +{inserted text}+ inserted text + # -{deleted text}- deleted text + # #{monospace text}# + # {url address}:url + # {image.png}imageurl + # {image.png}png + # ~{endnote}~ + # +1 + # +2 + puts 'tech' + @data.each do |line| + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\>)e\{(.+?)\}e/, + "\\1#{@emph[:o]}\\2#{@emph[:c]}") #emphasis + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\>)b\{(.+?)\}b/, + "\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\>)u\{(.+?)\}u/, + "\\1#{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]}") #underscore + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\>)c\{(.+?)\}c/, + "\\1#{Mx[:fa_cite_o]}\\2#{Mx[:fa_c_o]}cite#{Mx[:fa_c]}") #cite + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\>)i\{(.+?)\}i/, + "\\1#{Mx[:fa_italics_o]}\\2#{Mx[:fa_italics_c]}") #italics + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\>)!\{(.+?)\}!/, + "\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\>)\*\{(.+?)\}\*/, + "\\1#{@emph[:o]}\\2#{@emph[:c]}") #emphasis + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\>)_\{(.+?)\}_/, + "\\1#{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]}") #underscore + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|[\(\[]|\(|\>)\/\{(.+?)\}\//, + "\\1#{Mx[:fa_italics_o]}\\2#{Mx[:fa_italics_c]}") #italics + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\"\{(.+?)\}\"/, + "\\1#{Mx[:fa_cite_o]}\\2#{Mx[:fa_c_o]}cite#{Mx[:fa_c]}") + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\^\{(.+?)\}\^/, + "\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)9\{(.+?)\}9/, + "\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>),\{(.+?)\},/, + "\\1#{Mx[:fa_subscript_o]}\\2#{Mx[:fa_subscript_c]}") + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)6\{(.+?)\}6/, + "\\1#{Mx[:fa_subscript_o]}\\2#{Mx[:fa_subscript_c]}") + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\+\{(.+?)\}\+/, + "\\1#{Mx[:fa_insert_o]}\\2#{Mx[:fa_insert_c]}") + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)v\{(.+?)\}v/, + "\\1#{Mx[:fa_insert_o]}\\2#{Mx[:fa_insert_c]}") + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)-\{(.+?)\}-/, + "\\1#{Mx[:fa_strike_o]}\\2#{Mx[:fa_strike_c]}") + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)x\{(.+?)\}x/, + "\\1#{Mx[:fa_strike_o]}\\2#{Mx[:fa_strike_c]}") + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\*(\S+?)\*/, + "\\1#{@emph[:o]}\\2#{@emph[:c]}") #emphasise single word, watch + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\!(\S+?)\!/, + "\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold single word, watch + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\/([\(\)a-zA-Z0-9']+?)\/([ ,.;:'"~$]|[^a-zA-Z0-9])/, + "\\1#{Mx[:fa_italics_o]}\\2#{Mx[:fa_italics_c]}\\3") #italics single word, watch + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)_(\S+?)_/, + "\\1#{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]}") #underscore single word, watch + line.gsub!(/(^|\s+|['"]|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\^(\S+?)\^/, + "\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") #check #superscript single word, watch digit added + line.gsub!(/^\s*_\([1-9]\)\(\*\+\)\s*/, + "#{Mx[:pa_o]}:i\\1#{Mx[:pa_c]}#{Mx[:fa_o]}\\2#{Mx[:fa_c_o]}") #bullets, shortcut + line.gsub!(/^\s*_\([1-9]\)\s+/, + "#{Mx[:pa_o]}:i\\1#{Mx[:pa_c]}") + line.gsub!(/(?:<:?br>|
)\s*_[12]\s+/, + "#{Mx[:br_line]} ") #indent used in endnotes, not implemented, replace when ready with: line.gsub!(/(?:
|
)\s*_([12])\s+/,'
<:i\1> ') + end + @data + end + end +end +__END__ diff --git a/lib/sisu/v3/db_columns.rb b/lib/sisu/v3/db_columns.rb new file mode 100644 index 00000000..c1fb6109 --- /dev/null +++ b/lib/sisu/v3/db_columns.rb @@ -0,0 +1,2079 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: modules shared by the different db types, dbi, postgresql, + sqlite + +=end +module SiSU_DB_columns + require "#{SiSU_lib}/sysenv" # sysenv.rb + require "#{SiSU_lib}/db_sqltxt" # db_sqltxt.rb + class Columns < SiSU_DB_text::Prepare + def initialize(md=nil) + @md=md + @db=SiSU_Env::Info_db.new #watch + if defined? md.mod \ + and md.mod.inspect=~/import|update/ \ + and FileTest.exist?(md.fns) + txt_arr=IO.readlines(md.fns,'') + src=txt_arr.join("\n") + if @db.share_source? + @sisutxt=special_character_escape(src) + else @sisutxt='' + end + @fulltext=clean_searchable_text(txt_arr) + else @sisutxt,@fulltext='','' + end + end +#% structures + #def column_define + # def varchar(name,size) + # "#{name} VARCHAR(#{size}) NULL," + # end + #end +=begin +#% title +@title: + :subtitle: + :short: + :edition: + :language: + :note: +=end + def column + def title # DublinCore 1 - title + def name + 'title' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata full document title [DC1]';} + end + def tuple + t=if defined? @md.title.full \ + and @md.title.full=~/\S+/ + txt=@md.title.full + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_main + def name + 'title_main' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title_part]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata main document title';} + end + def tuple + t=if defined? @md.title.main \ + and @md.title.main=~/\S+/ + txt=@md.title.main + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_sub + def name + 'title_sub' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title_part]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document subtitle';} + end + def tuple + t=if defined? @md.title.sub \ + and @md.title.sub=~/\S+/ + txt=@md.title.sub + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_short + def name + 'title_short' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title_part]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document short title if any';} + end + def tuple + t=if defined? @md.title.short \ + and @md.title.short=~/\S+/ + txt=@md.title.short + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_edition + def name + 'title_edition' + end + def create_column + "#{name} VARCHAR(#{Db[:col_title_edition]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document edition (version)';} + end + def tuple + t=if defined? @md.title.edition \ + and @md.title.edition=~/\S+/ + txt=@md.title.edition + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_note + def name + 'title_note' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes associated with title';} + end + def tuple + t=if defined? @md.title.note \ + and @md.title.note=~/\S+/ + txt=@md.title.note + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_language + def name + 'title_language' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language [DC12]';} + end + def tuple + t=if defined? @md.title.language \ + and @md.title.language=~/\S+/ + txt=@md.title.language + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def title_language_char # consider + def name + 'title_language_char' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language_char]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language iso code';} + end + def tuple + t=if defined? @md.title.language_char \ + and @md.title.language_char=~/\S+/ + txt=@md.title.language_char + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% creator +@creator: + :author: + :contributor: + :illustrator: + :photographer: + :translator: + :prepared_by: + :digitized_by: + :audio: + :video: +=end + def creator_author # DublinCore 2 - creator/author (author) + def name + 'creator_author' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document author (creator) [DC2]';} + end + def tuple + t=if defined? @md.creator.author_detail \ + and @md.creator.author_detail.class==Array \ + and @md.creator.author_detail.length > 0 + txt='' + @md.creator.author_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_author_honorific # consider + def name + 'creator_author_hon' + end + def create_column + "#{name} VARCHAR(#{Db[:col_creator_misc_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document author honorific (title e.g, Ms. Dr. Prof.)';} + end + def tuple + t=if defined? @md.creator.author_hon \ + and @md.creator.author_hon=~/\S+/ + txt=@md.creator.author_hon + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_author_nationality # consider + def name + 'creator_author_nationality' + end + def create_column + "#{name} VARCHAR(#{Db[:col_creator_misc_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata nationality of document author (creator)';} + end + def tuple + t=if defined? @md.creator.author_nationality_detail \ + and @md.creator.author_nationality=~/\S+/ + txt=@md.creator.author_nationality_detail + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_contributor # DublinCore 6 - contributor + def name + 'creator_contributor' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document contributor name(s) [DC6]';} + end + def tuple + t=if defined? @md.creator.contributor_detail \ + and @md.creator.contributor_detail.class==Array \ + and @md.creator.contributor_detail.length > 0 + txt=@md.creator.contributor_detail #dc + txt='' + @md.creator.contributor_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_illustrator + def name + 'creator_illustrator' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document illustrator name(s)';} + end + def tuple + t=if defined? @md.creator.illustrator_detail \ + and @md.creator.illustrator_detail.class==Array \ + and @md.creator.illustrator_detail.length > 0 + txt=@md.creator.illustrator_detail + txt='' + @md.creator.illustrator_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_photographer + def name + 'creator_photographer' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document photographer name(s)';} + end + def tuple + t=if defined? @md.creator.photographer_detail \ + and @md.creator.photographer_detail.class==Array \ + and @md.creator.photographer_detail.length > 0 + txt=@md.creator.photographer_detail + txt='' + @md.creator.photographer_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_translator + def name + 'creator_translator' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document translator name(s)';} + end + def tuple + t=if defined? @md.creator.translator_detail \ + and @md.creator.translator_detail.class==Array \ + and @md.creator.translator_detail.length > 0 + txt='' + @md.creator.translator_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_prepared_by + def name + 'creator_prepared_by' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document prepared by name(s)';} + end + def tuple + t=if defined? @md.creator.prepared_by_detail \ + and @md.creator.prepared_by_detail.class==Array \ + and @md.creator.prepared_by_detail.length > 0 + txt=@md.creator.prepared_by_detail + txt='' + @md.creator.prepared_by_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_digitized_by + def name + 'creator_digitized_by' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document digitized by name(s)';} + end + def tuple + t=if defined? @md.creator.digitized_by_detail \ + and @md.creator.digitized_by_detail.class==Array \ + and @md.creator.digitized_by_detail.length > 0 + txt=@md.creator.digitized_by_detail + txt='' + @md.creator.digitized_by_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_audio + def name + 'creator_audio' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document audio by name(s)';} + end + def tuple + t=if defined? @md.creator.audio_detail \ + and @md.creator.audio_detail.class==Array \ + and @md.creator.audio_detail.length > 0 + txt=@md.creator.audio_detail + txt='' + @md.creator.audio_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def creator_video + def name + 'creator_video' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document video by name(s)';} + end + def tuple + t=if defined? @md.creator.video_detail \ + and @md.creator.video_detail.class==Array \ + and @md.creator.video_detail.length > 0 + txt='' + @md.creator.video_detail.each do |h| + txt=txt + %{#{h[:the]}, #{h[:others]}; } + end + txt.gsub!(/[;, ]+\s*$/,'') + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% language +#taken from other fields +@title: + :language: +@original: + :language: +#not available --> +#@language: +# :document: +# :original: +=end + def language_document + def name + 'language_document' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language';} + end + def tuple + t=if defined? @md.language.document \ + and @md.language.document=~/\S+/ + txt=@md.language.document + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def language_document_char + def name + 'language_document_char' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language_char]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language';} + end + def tuple + t=if defined? @md.language.document_char \ + and @md.language.document_char=~/\S+/ + txt=@md.language.document_char + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def language_original + def name + 'language_original' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata original document/text language';} + end + def tuple + t=if defined? @md.language.original \ + and @md.language.original=~/\S+/ + txt=@md.language.original + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def language_original_char + def name + 'language_original_char' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language_char]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document language';} + end + def tuple + t=if defined? @md.language.original_char \ + and @md.language.original_char=~/\S+/ + txt=@md.language.original_char + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% date +@date: + :added_to_site: + :available: + :created: + :issued: + :modified: + :published: + :valid: + :translated: + :original_publication: +=end + def date_added_to_site + def name + 'date_added_to_site' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + #"#{name} DATE," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date added to site';} + end + def tuple + t=if defined? @md.date.added_to_site \ + and @md.date.added_to_site=~/\S+/ + txt=@md.date.added_to_site + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_available + def name + 'date_available' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date added to site [DC]';} + end + def tuple + t=if defined? @md.date.available \ + and @md.date.available=~/\S+/ + txt=@md.date.available + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_created + def name + 'date_created' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date created [DC]';} + end + def tuple + t=if defined? @md.date.created \ + and @md.date.created=~/\S+/ + txt=@md.date.created + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_issued + def name + 'date_issued' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date issued [DC]';} + end + def tuple + t=if defined? @md.date.issued \ + and @md.date.issued=~/\S+/ + txt=@md.date.issued + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_modified + def name + 'date_modified' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date modified [DC]';} + end + def tuple + t=if defined? @md.date.modified \ + and @md.date.modified=~/\S+/ + txt=@md.date.modified + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_published + def name + 'date_published' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date published [DC7]';} + end + def tuple + t=if defined? @md.date.published \ + and @md.date.published=~/\S+/ + txt=@md.date.published + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_valid + def name + 'date_valid' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date valid [DC]';} + end + def tuple + t=if defined? @md.date.valid \ + and @md.date.valid=~/\S+/ + txt=@md.date.valid + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_translated + def name + 'date_translated' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date translated';} + end + def tuple + t=if defined? @md.date.translated \ + and @md.date.translated=~/\S+/ + txt=@md.date.translated + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_original_publication + def name + 'date_original_publication' + end + def create_column + "#{name} VARCHAR(#{Db[:col_date_text]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date of original publication';} + end + def tuple + t=if defined? @md.date.original_publication \ + and @md.date.original_publication=~/\S+/ + txt=@md.date.original_publication + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def date_generated + def name + 'date_generated' + end + def create_column #choose other representation of time + "#{name} VARCHAR(30) NULL," + #"#{name} VARCHAR(10) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata date of sisu generation of document, automatically populated';} + end + def tuple #choose other representation of time + t=if defined? @md.generated \ + and @md.generated.to_s=~/\S+/ + txt=@md.generated.to_s + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% publisher +@publisher: +=end + def publisher + def name + 'publisher' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document publisher [DC5]';} + end + def tuple + t=if defined? @md.publisher \ + and @md.publisher=~/\S+/ + txt=@md.publisher + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +##% current +# def current_publisher +# def name +# 'current_publisher' +# end +# def size +# 10 +# end +# def create_column +# "#{name} VARCHAR(#{current_publisher.size}) NULL," +# end +# def tuple +# t=if defined? @md.current.publisher \ +# and @md.current.publisher=~/\S+/ +# txt=@md.current.publisher +# special_character_escape(txt) +# "'#{txt}', " +# end +# end +# self +# end +=begin +#% original +@original: + :publisher: + #:date: #repeated under date + :language: + :institution: + :nationality: + :source: +=end + def original_publisher + def name + 'original_publisher' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original publisher [DC5]';} + end + def tuple + t=if defined? @md.original.publisher \ + and @md.original.publisher=~/\S+/ + txt=@md.original.publisher + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_language + def name + 'original_language' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original language';} + end + def tuple + t=if defined? @md.original.language \ + and @md.original.language=~/\S+/ + txt=@md.original.language + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_language_char # consider + def name + 'original_language_char' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language_char]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original language iso character';} + end + def tuple + t=if defined? @md.original.language_char \ + and @md.original.language_char=~/\S+/ + txt=@md.original.language_char + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_source + def name + 'original_source' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original source [DC11]';} + end + def tuple + t=if defined? @md.original.source \ + and @md.original.source=~/\S+/ + txt=@md.original.source + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_institution + def name + 'original_institution' + end + def create_column + "#{name} VARCHAR(#{Db[:col_name]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original institution';} + end + def tuple + t=if defined? @md.original.institution \ + and @md.original.institution=~/\S+/ + txt=@md.original.institution + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def original_nationality + def name + 'original_nationality' + end + def create_column + "#{name} VARCHAR(#{Db[:col_language]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document original nationality';} + end + def tuple + t=if defined? @md.original.nationality \ + and @md.original.nationality=~/\S+/ + txt=@md.original.nationality + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% rights +@rights: + #:copyright: #mapped to :text: used where no other copyrights and included in :all: + :text: + :translation: + :illustrations: + :photographs: + :preparation: + :digitization: + :audio: + :video: + :license: + :all: +=end + def rights_all + def name + 'rights' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata rights associated with document (composite) [DC15]';} + end + def tuple + t=if defined? @md.rights.all \ + and @md.rights.all=~/\S+/ + txt=@md.rights.all + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_text + def name + 'rights_copyright_text' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text';} + end + def tuple + t=if defined? @md.rights.copyright_text \ + and @md.rights.copyright_text=~/\S+/ + txt=@md.rights.copyright_text + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_translation + def name + 'rights_copyright_translation' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text translation (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_translation \ + and @md.rights.copyright_translation=~/\S+/ + txt=@md.rights.copyright_translation + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_illustrations + def name + 'rights_copyright_illustrations' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text illustrations (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_illustrations \ + and @md.rights.copyright_illustrations=~/\S+/ + txt=@md.rights.copyright_illustrations + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_photographs + def name + 'rights_copyright_photographs' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text photographs (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_photographs \ + and @md.rights.copyright_photographs=~/\S+/ + txt=@md.rights.copyright_photographs + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_preparation + def name + 'rights_copyright_preparation' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text preparation (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_preparation \ + and @md.rights.copyright_preparation=~/\S+/ + txt=@md.rights.copyright_preparation + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_digitization + def name + 'rights_copyright_digitization' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text digitization (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_digitization \ + and @md.rights.copyright_digitization=~/\S+/ + txt=@md.rights.copyright_digitization + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_audio + def name + 'rights_copyright_audio' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text audio (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_audio \ + and @md.rights.copyright_audio=~/\S+/ + txt=@md.rights.copyright_audio + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_copyright_video + def name + 'rights_copyright_video' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata copyright associated for document text video (if any)';} + end + def tuple + t=if defined? @md.rights.copyright_video \ + and @md.rights.copyright_video=~/\S+/ + txt=@md.rights.copyright_video + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def rights_license + def name + 'rights_license' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata license granted for use of document if any)';} + end + def tuple + t=if defined? @md.rights.license \ + and @md.rights.license=~/\S+/ + txt=@md.rights.license + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% classify +@classify: + :topic_register: + :coverage: + :format: + :identifier: + :keywords: + :relation: + :subject: + :type: + :loc: + :dewey: + :pg: + :isbn: +=end + def classify_topic_register + def name + 'classify_topic_register' + end + def create_column + "#{name} VARCHAR(#{Db[:col_info_note]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document topic register (semi-structured document subject information)';} + end + def tuple + t=if defined? @md.classify.topic_register \ + and @md.classify.topic_register=~/\S+/ + txt=@md.classify.topic_register + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_subject + def name + 'classify_subject' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document subject matter [DC3]';} + end + def tuple + t=if defined? @md.classify.subject \ + and @md.classify.subject=~/\S+/ + txt=@md.classify.subject + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_type #check + def name + 'classify_type' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document type [DC8]';} + end + def tuple + t=if defined? @md.classify.type \ + and @md.classify.type=~/\S+/ + txt=@md.classify.type + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_loc + def name + 'classify_loc' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_library]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document Library of Congress';} + end + def tuple + t=if defined? @md.classify.loc \ + and @md.classify.loc=~/\S+/ + txt=@md.classify.loc + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_dewey + def name + 'classify_dewey' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_library]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document Dewey';} + end + def tuple + t=if defined? @md.classify.dewey \ + and @md.classify.dewey=~/\S+/ + txt=@md.classify.dewey + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_oclc + def name + 'classify_oclc' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_library]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document Online Computer Library Center number';} + end + def tuple + t=if defined? @md.classify.oclc \ + and @md.classify.oclc=~/\S+/ + txt=@md.classify.oclc + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_pg + def name + 'classify_pg' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_small]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document project gutenberg (if any)';} + end + def tuple + t=if defined? @md.classify.pg \ + and @md.classify.pg=~/\S+/ + txt=@md.classify.pg + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_isbn + def name + 'classify_isbn' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_small]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document isbn (if any)';} + end + def tuple + t=if defined? @md.classify.isbn \ + and @md.classify.isbn=~/\S+/ + txt=@md.classify.isbn + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_format + def name + 'classify_format' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document format [DC9]';} + end + def tuple + t=if defined? @md.classify.format \ + and @md.classify.format=~/\S+/ + txt=@md.classify.format + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_identifier + def name + 'classify_identifier' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_identify]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document identifier [DC10]';} + end + def tuple + t=if defined? @md.classify.identifier \ + and @md.classify.identifier=~/\S+/ + txt=@md.classify.identifier + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_relation + def name + 'classify_relation' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document relation [DC13]';} + end + def tuple + t=if defined? @md.classify.relation \ + and @md.classify.relation=~/\S+/ + txt=@md.classify.relation + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_coverage + def name + 'classify_coverage' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document coverage [DC14]';} + end + def tuple + t=if defined? @md.classify.coverage \ + and @md.classify.coverage=~/\S+/ + txt=@md.classify.coverage + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def classify_keywords + def name + 'classify_keywords' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata classify document keywords';} + end + def tuple + t=if defined? @md.classify.keywords \ + and @md.classify.keywords=~/\S+/ + txt=@md.classify.keywords + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% notes +@notes: + :abstract: + :comment: + :description: + :history: + :prefix: + :prefix_a: + :prefix_b: + :suffix: +=end + def notes_abstract + def name + 'notes_abstract' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes abstract';} + end + def tuple + t=if defined? @md.notes.abstract \ + and @md.notes.abstract=~/\S+/ + txt=@md.notes.abstract + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_comment + def name + 'notes_comment' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes comment';} + end + def tuple + t=if defined? @md.notes.comment \ + and @md.notes.comment=~/\S+/ + txt=@md.notes.comment + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_description + def name + 'notes_description' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes description [DC4]';} + end + def tuple + t=if defined? @md.notes.description \ + and @md.notes.description=~/\S+/ + txt=@md.notes.description + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_history #check, consider removal + def name + 'notes_history' + end + def create_column + "#{name} VARCHAR(#{Db[:col_classify_txt_short]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes history';} + end + def tuple + t=if defined? @md.notes.history \ + and @md.notes.history=~/\S+/ + txt=@md.notes.history + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_prefix + def name + 'notes_prefix' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes prefix';} + end + def tuple + t=if defined? @md.notes.prefix \ + and @md.notes.prefix=~/\S+/ + txt=@md.notes.prefix + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_prefix_a + def name + 'notes_prefix_a' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes prefix_a';} + end + def tuple + t=if defined? @md.notes.prefix_a \ + and @md.notes.prefix_a=~/\S+/ + txt=@md.notes.prefix_a + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_prefix_b + def name + 'notes_prefix_b' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes prefix_b';} + end + def tuple + t=if defined? @md.notes.prefix_b \ + and @md.notes.prefix_b=~/\S+/ + txt=@md.notes.prefix_b + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def notes_suffix + def name + 'notes_suffix' + end + def create_column # keep text + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document notes suffix';} + end + def tuple + t=if defined? @md.notes.suffix \ + and @md.notes.suffix=~/\S+/ + txt=@md.notes.suffix + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end +=begin +#% src +=end + def src_filename + def name + 'src_filename' + end + def create_column + "#{name} VARCHAR(#{Db[:col_filename]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'sisu markup source text filename';} + end + def tuple + t=if defined? @md.fns \ + and @md.fns=~/\S+/ + txt=@md.fns + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def src_fingerprint + def name + 'src_fingerprint' #hash/digest, sha256 or md5 + end + def create_column + "#{name} VARCHAR(#{Db[:col_digest]}) NULL," + #"#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'sisu markup source text fingerprint, hash digest sha256 (or md5)';} + end + def tuple + t=if defined? @md.dgst \ + and @md.dgst.class==Array \ + and @md.dgst[1]=~/\S+/ + txt=@md.dgst[1] + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def src_filesize + def name + 'src_filesize' + end + def create_column + "#{name} VARCHAR(#{Db[:col_filesize]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'sisu markup source text file size';} + end + def tuple + t=if defined? @md.filesize \ + and @md.filesize=~/\S+/ + txt=@md.filesize + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def src_word_count + def name + 'src_word_count' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'sisu markup source text word count';} + end + def tuple + t=if defined? @md.wc_words \ + and @md.wc_words=~/\S+/ + txt=@md.wc_words + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def src_txt # consider naming sisusrc + def name + 'src_text' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'sisu markup source text (if shared)';} + end + def tuple + t=if @md.mod.inspect=~/import|update/ \ + and FileTest.exist?(@md.fns) + ["#{name}, ","'#{@sisutxt}', "] + else ['',''] + end + end + self + end +=begin +#% misc +@make: + :skin: +@links: +=end + def fulltext + def name + 'fulltext' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'document full text clean, searchable';} + end + def tuple + t=if @md.mod.inspect=~/import|update/ \ + and FileTest.exist?(@md.fns) + ["#{name}, ","'#{@fulltext}', "] + else ['',''] + end + end + self + end + def skin_name #check + def name + 'skin_name' + end + def create_column + "#{name} VARCHAR(#{Db[:col_filename]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'source text skin name';} + end + def tuple + t=if defined? @md.skin_name \ + and @md.skin_name=~/\S+/ + txt=@md.skin_name + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def skin_fingerprint #check + def name + 'skin_fingerprint' + end + def create_column + "#{name} VARCHAR(#{Db[:col_digest]}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'source text skin fingerprint';} + end + def tuple + t=if defined? @md.dgst_skin \ + and @md.dgst_skin.class==Array \ + and @md.dgst_skin[1]=~/\S+/ + txt=@md.dgst_skin[1] + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def skin # you likely want a separate table for skins + def name + 'skin' + end + def create_column + "#{name} TEXT NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'source text skin';} + end + def tuple + t=if defined? @md.skin \ + and @md.skin=~/\S+/ + txt=@md.skin + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + def links + def name + 'links' + end + def create_column + "#{name} TEXT NULL," + #"#{name} VARCHAR(#{links.size}) NULL," + end + def column_comment + %{COMMENT ON COLUMN metadata_and_text.#{name} + IS 'metadata document links';} + end + def tuple + t=if defined? @md.links \ + and @md.links=~/\S+/ + txt=@md.links + special_character_escape(txt) + ["#{name}, ","'#{txt}', "] + else ['',''] + end + end + self + end + self + end + end + class Column_size + def document_clean # restriction not necessary + 60000 + end + def document_body + 16000 + end + def document_seg + 120 + end + def document_seg_full + 120 + end + def endnote_clean # restriction not necessary + 60000 + end + def endnote_body + 16000 + end + end +end +__END__ diff --git a/lib/sisu/v3/db_create.rb b/lib/sisu/v3/db_create.rb new file mode 100644 index 00000000..2676542e --- /dev/null +++ b/lib/sisu/v3/db_create.rb @@ -0,0 +1,612 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: modules shared by the different db types, dbi, postgresql, + sqlite + +=end +module SiSU_DB_create + require "#{SiSU_lib}/db_columns" # db_columns.rb + class Create < SiSU_DB_columns::Columns + require "#{SiSU_lib}/sysenv" # sysenv.rb + @@dl=nil + def initialize(opt,conn,file,sql_type='pg') + @opt,@conn,@file,@sql_type=opt,conn,file,sql_type + @cX=SiSU_Screen::Ansi.new(@opt.cmd).cX + @comment=(@sql_type=='pg') \ + ? (Comment.new(@conn,@sql_type)) \ + : nil + @@dl ||=SiSU_Env::Info_env.new.digest.length + end + def available + DBI.available_drivers.each do |driver| + puts "Driver: #{driver}" + DBI.data_sources(driver).each do |dsn| + puts "\tDatasource: #{dsn}" + end + end + end + def create_db + @env=SiSU_Env::Info_env.new(@opt.fns) + tell=(@sql_type=='sqlite') \ + ? SiSU_Screen::Ansi.new(@opt.cmd,'invert','Create Sqlite db tables in:',%{"#{@env.path.output}/sisu_sqlite.db"}) \ + : SiSU_Screen::Ansi.new(@opt.cmd,'invert','Create PG db tables in:',%{"#{Db[:name_prefix]}#{@env.path.stub_pwd}"}) + tell.colorize if @opt.cmd =~/vVM/ + SiSU_Env::System_call.new.create_pg_db(@env.path.stub_pwd) if @sql_type=='pg' #watch use of path.stub_pwd instead of stub + end + def output_dir? + dir=SiSU_Env::Info_env.new('') + if @opt.cmd =~/d/ + dir.path.webserv_stub_ensure + end + end + def create_table + def metadata_and_text + print %{ + currently using sisu dbi module + to be populated from document files + create tables metadata_and_text + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE metadata_and_text ( + tid BIGINT PRIMARY KEY, + /* title */ + #{column.title.create_column} + #{column.title_main.create_column} + #{column.title_sub.create_column} + #{column.title_short.create_column} + #{column.title_edition.create_column} + #{column.title_note.create_column} + #{column.title_language.create_column} + #{column.title_language_char.create_column} + /* creator */ + #{column.creator_author.create_column} + #{column.creator_author_honorific.create_column} + #{column.creator_author_nationality.create_column} + #{column.creator_contributor.create_column} + #{column.creator_illustrator.create_column} + #{column.creator_photographer.create_column} + #{column.creator_translator.create_column} + #{column.creator_prepared_by.create_column} + #{column.creator_digitized_by.create_column} + #{column.creator_audio.create_column} + #{column.creator_video.create_column} + /* language */ + #{column.language_document.create_column} + #{column.language_document_char.create_column} + #{column.language_original.create_column} + #{column.language_original_char.create_column} + /* date */ + #{column.date_added_to_site.create_column} + #{column.date_available.create_column} + #{column.date_created.create_column} + #{column.date_issued.create_column} + #{column.date_modified.create_column} + #{column.date_published.create_column} + #{column.date_valid.create_column} + #{column.date_translated.create_column} + #{column.date_original_publication.create_column} + #{column.date_generated.create_column} + /* publisher */ + #{column.publisher.create_column} + /* original */ + #{column.original_publisher.create_column} + #{column.original_language.create_column} + #{column.original_language_char.create_column} + #{column.original_source.create_column} + #{column.original_institution.create_column} + #{column.original_nationality.create_column} + /* rights */ + #{column.rights_all.create_column} + #{column.rights_copyright_text.create_column} + #{column.rights_copyright_translation.create_column} + #{column.rights_copyright_illustrations.create_column} + #{column.rights_copyright_photographs.create_column} + #{column.rights_copyright_preparation.create_column} + #{column.rights_copyright_digitization.create_column} + #{column.rights_copyright_audio.create_column} + #{column.rights_copyright_video.create_column} + #{column.rights_license.create_column} + /* classify */ + #{column.classify_topic_register.create_column} + #{column.classify_subject.create_column} + #{column.classify_type.create_column} + #{column.classify_loc.create_column} + #{column.classify_dewey.create_column} + #{column.classify_oclc.create_column} + #{column.classify_pg.create_column} + #{column.classify_isbn.create_column} + #{column.classify_format.create_column} + #{column.classify_identifier.create_column} + #{column.classify_relation.create_column} + #{column.classify_coverage.create_column} + #{column.classify_keywords.create_column} + /* notes */ + #{column.notes_abstract.create_column} + #{column.notes_comment.create_column} + #{column.notes_description.create_column} + #{column.notes_history.create_column} + #{column.notes_prefix.create_column} + #{column.notes_prefix_a.create_column} + #{column.notes_prefix_b.create_column} + #{column.notes_suffix.create_column} + /* src */ + #{column.src_filename.create_column} + #{column.src_fingerprint.create_column} + #{column.src_filesize.create_column} + #{column.src_word_count.create_column} + #{column.src_txt.create_column} + /* misc */ + #{column.fulltext.create_column} + #{column.skin_name.create_column} + #{column.skin_fingerprint.create_column} + #{column.skin.create_column} + #{column.links.create_column.gsub(/,$/,'')} +/* subj VARCHAR(64) NULL, */ +/* contact VARCHAR(100) NULL, */ +/* information VARCHAR(100) NULL, */ +/* types CHAR(1) NULL, */ +/* writing_focus_nationality VARCHAR(100) NULL, */ + ); + }) + @comment.psql.metadata_and_text if @comment + end + def doc_objects # create doc_objects base + print %{ + to be populated from documents files + create tables doc_objects + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE doc_objects ( + lid BIGINT PRIMARY KEY, + metadata_tid BIGINT REFERENCES metadata_and_text, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + clean TEXT NULL, + body TEXT NULL, + seg VARCHAR(256) NULL, + lev_an VARCHAR(1), + lev SMALLINT NULL, + lev1 SMALLINT, + lev2 SMALLINT, + lev3 SMALLINT, + lev4 SMALLINT, + lev5 SMALLINT, + lev6 SMALLINT, + en_a SMALLINT NULL, + en_z SMALLINT NULL, + en_a_asterisk SMALLINT NULL, + en_z_asterisk SMALLINT NULL, + en_a_plus SMALLINT NULL, + en_z_plus SMALLINT NULL, + t_of VARCHAR(16), + t_is VARCHAR(16), + node VARCHAR(16) NULL, + parent VARCHAR(16) NULL, + digest_clean CHAR(#{@@dl}), + digest_all CHAR(#{@@dl}), + types CHAR(1) NULL + ); + }) + @comment.psql.doc_objects if @comment + end + def endnotes + print %{ + to be populated from document files + create tables endnotes + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE endnotes ( + nid BIGINT PRIMARY KEY, + document_lid BIGINT REFERENCES doc_objects, + nr SMALLINT, + clean TEXT NULL, + body TEXT NULL, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + digest_clean CHAR(#{@@dl}), + metadata_tid BIGINT REFERENCES metadata_and_text + ); + }) + @comment.psql.endnotes if @comment + end + def endnotes_asterisk + print %{ + to be populated from document files + create tables endnotes_asterisk + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE endnotes_asterisk ( + nid BIGINT PRIMARY KEY, + document_lid BIGINT REFERENCES doc_objects, + nr SMALLINT, + clean TEXT NULL, + body TEXT NULL, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + digest_clean CHAR(#{@@dl}), + metadata_tid BIGINT REFERENCES metadata_and_text + ); + }) + @comment.psql.endnotes_asterisk if @comment + end + def endnotes_plus + print %{ + to be populated from document files + create tables endnotes_plus + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE endnotes_plus ( + nid BIGINT PRIMARY KEY, + document_lid BIGINT REFERENCES doc_objects, + nr SMALLINT, + clean TEXT NULL, + body TEXT NULL, + ocn SMALLINT, + ocnd VARCHAR(6), + ocns VARCHAR(6), + digest_clean CHAR(#{@@dl}), + metadata_tid BIGINT REFERENCES metadata_and_text + ); + }) + @comment.psql.endnotes_plus if @comment + end + def urls # create doc_objects file links mapping + print %{ + currently using sisu dbi module + to be populated from doc_objects files + create tables urls + data import through ruby transfer + } unless @opt.cmd =~/q/ + @conn.execute(%{ + CREATE TABLE urls ( + metadata_tid BIGINT REFERENCES metadata_and_text, + plaintext varchar(512), + html_toc varchar(512), + html_doc varchar(512), + xhtml varchar(512), + xml_sax varchar(512), + xml_dom varchar(512), + odf varchar(512), + pdf_p varchar(512), + pdf_l varchar(512), + concordance varchar(512), + latex_p varchar(512), + latex_l varchar(512), + digest varchar(512), + manifest varchar(512), + markup varchar(512), + sisupod varchar(512) + ); + }) + @comment.psql.urls if @comment + end + self + end + end + class Comment < SiSU_DB_columns::Columns + def initialize(conn,sql_type='pg') + @conn=conn + if sql_type =~ /pg/; psql + end + end + def psql + def conn_execute_array(sql_arr) + @conn.transaction do |conn| + sql_arr.each do |sql| + conn.execute(sql) + end + end + end + def metadata_and_text + sql_arr=[ + %{COMMENT ON Table metadata_and_text + IS 'contains SiSU metadata and fulltext for search (including source .sst if shared)';}, + %{COMMENT ON COLUMN metadata_and_text.tid + IS 'unique';}, + %{#{column.title.column_comment}}, + %{#{column.title_main.column_comment}}, + %{#{column.title_sub.column_comment}}, + %{#{column.title_short.column_comment}}, + %{#{column.title_edition.column_comment}}, + %{#{column.title_note.column_comment}}, + %{#{column.title_language.column_comment}}, + %{#{column.title_language_char.column_comment}}, + %{#{column.creator_author.column_comment}}, + %{#{column.creator_author_honorific.column_comment}}, + %{#{column.creator_author_nationality.column_comment}}, + %{#{column.creator_contributor.column_comment}}, + %{#{column.creator_illustrator.column_comment}}, + %{#{column.creator_photographer.column_comment}}, + %{#{column.creator_translator.column_comment}}, + %{#{column.creator_prepared_by.column_comment}}, + %{#{column.creator_digitized_by.column_comment}}, + %{#{column.creator_audio.column_comment}}, + %{#{column.creator_video.column_comment}}, + %{#{column.language_document.column_comment}}, + %{#{column.language_document_char.column_comment}}, + %{#{column.language_original.column_comment}}, + %{#{column.language_original_char.column_comment}}, + %{#{column.date_added_to_site.column_comment}}, + %{#{column.date_available.column_comment}}, + %{#{column.date_created.column_comment}}, + %{#{column.date_issued.column_comment}}, + %{#{column.date_modified.column_comment}}, + %{#{column.date_published.column_comment}}, + %{#{column.date_valid.column_comment}}, + %{#{column.date_translated.column_comment}}, + %{#{column.date_original_publication.column_comment}}, + %{#{column.date_generated.column_comment}}, + %{#{column.publisher.column_comment}}, + %{#{column.original_publisher.column_comment}}, + %{#{column.original_language.column_comment}}, + %{#{column.original_language_char.column_comment}}, + %{#{column.original_source.column_comment}}, + %{#{column.original_institution.column_comment}}, + %{#{column.original_nationality.column_comment}}, + %{#{column.rights_all.column_comment}}, + %{#{column.rights_copyright_text.column_comment}}, + %{#{column.rights_copyright_translation.column_comment}}, + %{#{column.rights_copyright_illustrations.column_comment}}, + %{#{column.rights_copyright_photographs.column_comment}}, + %{#{column.rights_copyright_preparation.column_comment}}, + %{#{column.rights_copyright_digitization.column_comment}}, + %{#{column.rights_copyright_audio.column_comment}}, + %{#{column.rights_copyright_video.column_comment}}, + %{#{column.rights_license.column_comment}}, + %{#{column.classify_topic_register.column_comment}}, + %{#{column.classify_subject.column_comment}}, + %{#{column.classify_type.column_comment}}, + %{#{column.classify_loc.column_comment}}, + %{#{column.classify_dewey.column_comment}}, + %{#{column.classify_oclc.column_comment}}, + %{#{column.classify_pg.column_comment}}, + %{#{column.classify_isbn.column_comment}}, + %{#{column.classify_format.column_comment}}, + %{#{column.classify_identifier.column_comment}}, + %{#{column.classify_relation.column_comment}}, + %{#{column.classify_coverage.column_comment}}, + %{#{column.classify_keywords.column_comment}}, + %{#{column.notes_abstract.column_comment}}, + %{#{column.notes_comment.column_comment}}, + %{#{column.notes_description.column_comment}}, + %{#{column.notes_history.column_comment}}, + %{#{column.notes_prefix.column_comment}}, + %{#{column.notes_prefix_a.column_comment}}, + %{#{column.notes_prefix_b.column_comment}}, + %{#{column.notes_suffix.column_comment}}, + %{#{column.src_filename.column_comment}}, + %{#{column.src_fingerprint.column_comment}}, + %{#{column.src_filesize.column_comment}}, + %{#{column.src_word_count.column_comment}}, + %{#{column.src_txt.column_comment}}, + %{#{column.fulltext.column_comment}}, + %{#{column.skin_name.column_comment}}, + %{#{column.skin_fingerprint.column_comment}}, + %{#{column.skin.column_comment}}, + %{#{column.links.column_comment}}, + ] + conn_execute_array(sql_arr) + end + def doc_objects + sql_arr=[ + %{COMMENT ON Table doc_objects + IS 'contains searchable text of SiSU document objects';}, + %{COMMENT ON COLUMN doc_objects.lid + IS 'unique';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text';}, + %{COMMENT ON COLUMN doc_objects.lev_an + IS 'doc level A-C 1-6';}, + %{COMMENT ON COLUMN doc_objects.lev + IS 'doc level 1-6 \d\~';}, + %{COMMENT ON COLUMN doc_objects.seg + IS 'segment name from level number 4 (lv 1)';}, + %{COMMENT ON COLUMN doc_objects.ocn + IS 'object citation number';}, + %{COMMENT ON COLUMN doc_objects.en_a + IS 'first endnote number in text object (eg. NULL or 34) (used with en_z to create range)';}, + %{COMMENT ON COLUMN doc_objects.en_z + IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a to create range)';}, + %{COMMENT ON COLUMN doc_objects.en_a_asterisk + IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_asterisk to create range)';}, + %{COMMENT ON COLUMN doc_objects.en_z_asterisk + IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_asterisk to create range)';}, + %{COMMENT ON COLUMN doc_objects.en_a_plus + IS 'first endnote number in text object (eg. NULL or 34) (used with en_z_plus to create range)';}, + %{COMMENT ON COLUMN doc_objects.en_z_plus + IS 'last endnote number within text object (eg. NULL, 34 or say 47) (used with en_a_plus to create range)';}, + %{COMMENT ON COLUMN doc_objects.types + IS 'document types seg scroll';}, + %{COMMENT ON COLUMN doc_objects.clean + IS 'text object - substantive text: clean, stripped of markup';}, + %{COMMENT ON COLUMN doc_objects.body + IS 'text object - substantive text: light html markup';}, + %{COMMENT ON COLUMN doc_objects.lev1 + IS 'document structure, level number 1';}, + %{COMMENT ON COLUMN doc_objects.lev2 + IS 'document structure, level number 2';}, + %{COMMENT ON COLUMN doc_objects.lev3 + IS 'document structure, level number 3';}, + %{COMMENT ON COLUMN doc_objects.lev4 + IS 'document structure, level number 4';}, + %{COMMENT ON COLUMN doc_objects.lev5 + IS 'document structure, level number 5';}, + %{COMMENT ON COLUMN doc_objects.lev6 + IS 'document structure, level number 6';}, + %{COMMENT ON COLUMN doc_objects.t_of + IS 'document structure, type of object (object is of)';}, + %{COMMENT ON COLUMN doc_objects.t_is + IS 'document structure, object is';}, + %{COMMENT ON COLUMN doc_objects.node + IS 'document structure, object node if heading';}, + %{COMMENT ON COLUMN doc_objects.parent + IS 'document structure, object parent (is a heading)';} + ] + conn_execute_array(sql_arr) + end + def endnotes + sql_arr=[ + %{COMMENT ON Table endnotes + IS 'contains searchable text of SiSU documents endnotes';}, + %{COMMENT ON COLUMN endnotes.nid + IS 'unique';}, + %{COMMENT ON COLUMN endnotes.document_lid + IS 'ties to text block from which referenced';}, + %{COMMENT ON COLUMN endnotes.nr + IS 'endnote number ';}, + %{COMMENT ON COLUMN endnotes.clean + IS 'endnote substantive content, stripped of markup';}, + %{COMMENT ON COLUMN endnotes.body + IS 'endnote substantive content';}, + %{COMMENT ON COLUMN endnotes.ocn + IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document';} + ] + conn_execute_array(sql_arr) + end + def endnotes_asterisk + sql_arr=[ + %{COMMENT ON Table endnotes_asterisk + IS 'contains searchable text of SiSU documents endnotes marked with asterisk';}, + %{COMMENT ON COLUMN endnotes_asterisk.nid + IS 'unique';}, + %{COMMENT ON COLUMN endnotes_asterisk.document_lid + IS 'ties to text block from which referenced';}, + %{COMMENT ON COLUMN endnotes_asterisk.nr + IS 'endnote number ';}, + %{COMMENT ON COLUMN endnotes_asterisk.clean + IS 'endnote substantive content, stripped of markup';}, + %{COMMENT ON COLUMN endnotes_asterisk.body + IS 'endnote substantive content';}, + %{COMMENT ON COLUMN endnotes_asterisk.ocn + IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document';} + ] + conn_execute_array(sql_arr) + end + def endnotes_plus + sql_arr=[ + %{COMMENT ON Table endnotes_plus + IS 'contains searchable text of SiSU documents endnotes marked with plus';}, + %{COMMENT ON COLUMN endnotes_plus.nid + IS 'unique';}, + %{COMMENT ON COLUMN endnotes_plus.document_lid + IS 'ties to text block from which referenced';}, + %{COMMENT ON COLUMN endnotes_plus.nr + IS 'endnote number ';}, + %{COMMENT ON COLUMN endnotes_plus.clean + IS 'endnote substantive content, stripped of markup';}, + %{COMMENT ON COLUMN endnotes_plus.body + IS 'endnote substantive content';}, + %{COMMENT ON COLUMN endnotes_plus.ocn + IS 'object citation no# <\~(\d+)> from which endnote is referenced';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document';}, + ] + conn_execute_array(sql_arr) + end + def urls + sql_arr=[ + %{COMMENT ON Table urls + IS 'contains base url links to different SiSU output';}, + %{COMMENT ON COLUMN doc_objects.metadata_tid + IS 'tie to title in metadata_and_text - unique for each document, the mapping of rows is one to one';}, + %{COMMENT ON COLUMN urls.plaintext + IS 'plaintext utf-8';}, + %{COMMENT ON COLUMN urls.html_toc + IS 'table of contents for segmented html document';}, + %{COMMENT ON COLUMN urls.html_doc + IS 'html document (scroll)';}, + %{COMMENT ON COLUMN urls.xhtml + IS 'xhtml document (scroll)';}, + %{COMMENT ON COLUMN urls.xml_sax + IS 'xml sax oriented document (scroll)';}, + %{COMMENT ON COLUMN urls.xml_dom + IS 'xml dom oriented document (scroll)';}, + %{COMMENT ON COLUMN urls.odf + IS 'opendocument format text';}, + %{COMMENT ON COLUMN urls.pdf_p + IS 'pdf portrait';}, + %{COMMENT ON COLUMN urls.pdf_l + IS 'pdf landscape';}, + %{COMMENT ON COLUMN urls.concordance + IS 'rudimentary document index linked to html';}, + %{COMMENT ON COLUMN urls.latex_p + IS 'latex portrait';}, + %{COMMENT ON COLUMN urls.latex_l + IS 'latex_landscape';}, + %{COMMENT ON COLUMN urls.markup + IS 'markup';}, + %{COMMENT ON COLUMN urls.sisupod + IS 'SiSU document format .tgz (all SiSU information on document)';}, + ] + conn_execute_array(sql_arr) + end + self + end + end +end +__END__ diff --git a/lib/sisu/v3/db_dbi.rb b/lib/sisu/v3/db_dbi.rb new file mode 100644 index 00000000..1a3825b9 --- /dev/null +++ b/lib/sisu/v3/db_dbi.rb @@ -0,0 +1,93 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: modules shared by the different db types, dbi, postgresql, + sqlite + +=end +module SiSU_DB_DBI + require "#{SiSU_lib}/db_columns" # db_columns.rb + require "#{SiSU_lib}/db_tests" # db_tests.rb + require "#{SiSU_lib}/db_create" # db_create.rb + require "#{SiSU_lib}/db_select" # db_select.rb + require "#{SiSU_lib}/db_indexes" # db_indexes.rb + require "#{SiSU_lib}/db_drop" # db_drop.rb + require "#{SiSU_lib}/db_remove" # db_remove.rb + require "#{SiSU_lib}/db_load_tuple" # db_load_tuple.rb + require "#{SiSU_lib}/db_import" # db_import.rb + class Column_size < SiSU_DB_columns::Column_size # db_columns.rb + end + class Test < SiSU_DB_tests::Test # db_tests.rb + end + class Create . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: modules shared by the different db types, dbi, postgresql, + sqlite + +=end +module SiSU_DB_drop + class Drop + require "#{SiSU_lib}/response" # response.rb + def initialize(opt,conn,db_info,sql_type='') + @opt,@conn,@db_info,@sql_type=opt,conn,db_info,sql_type + @ans=SiSU_Response::Response.new + case @sql_type + when /sqlite/ + cascade='' + else + cascade='CASCADE' + end + @drop_table=[ + "DROP TABLE metadata_and_text #{cascade};", + "DROP TABLE doc_objects #{cascade};", + "DROP TABLE urls #{cascade};", + "DROP TABLE endnotes #{cascade};", + "DROP TABLE endnotes_asterisk #{cascade};", + "DROP TABLE endnotes_plus #{cascade};", + ] + end + def drop + def tables #% drop all tables + begin + msg_sqlite="as not all disk space is recovered after dropping the database << #{@db_info.sqlite.db} >>, you may be better off deleting the file, and recreating it as necessary" + case @sql_type + when /sqlite/ + puts msg_sqlite + ans=@ans.response?('remove sql database?') + if ans \ + and File.exist?(@db_info.sqlite.db) + @conn.close + File.unlink(@db_info.sqlite.db) + db=SiSU_Env::Info_db.new + conn=db.sqlite.conn_sqlite3 + sdb=SiSU_DB_DBI::Create.new(@opt,conn,@db_info,@sql_type) + sdb_index=SiSU_DB_DBI::Index.new(@opt,conn,@db_info,@sql_type) + sdb.output_dir? + begin + sdb.create_db + sdb.create_table.metadata_and_text + sdb.create_table.doc_objects + sdb.create_table.endnotes + sdb.create_table.endnotes_asterisk + sdb.create_table.endnotes_plus + sdb.create_table.urls + sdb_index.create_indexes + rescue; SiSU_Errors::Info_error.new($!,$@,'-D').error; @sdb.output_dir? + end + exit + else + @conn.transaction + @drop_table.each do |d| + @conn.execute(d) + end + @conn.commit + end + else + @drop_table.each do |d| + @conn.execute(d) + end + end + rescue + case @sql_type + when /sqlite/ + ans=@ans.response?('remove sql database?') + if ans and File.exist?(@db_info.sqlite.db); File.unlink(@db_info.sqlite.db) + end + else + @drop_table.each do |d| + @conn.execute(d) + end + end + ensure + end + end + def indexes + def conn_execute_array(sql_arr) + @conn.transaction do |conn| + sql_arr.each do |sql| + conn.execute(sql) + end + end + end + def base #% drop base indexes + print "\n drop documents common indexes\n" unless @opt.cmd =~/q/ + sql_arr=[ + %{DROP INDEX idx_title;}, + %{DROP INDEX idx_author;}, + %{DROP INDEX idx_filename;}, + %{DROP INDEX idx_topics;}, + %{DROP INDEX idx_ocn;}, + %{DROP INDEX idx_digest_clean;}, + %{DROP INDEX idx_digest_all;}, + %{DROP INDEX idx_lev1;}, + %{DROP INDEX idx_lev2;}, + %{DROP INDEX idx_lev3;}, + %{DROP INDEX idx_lev4;}, + %{DROP INDEX idx_lev5;}, + %{DROP INDEX idx_lev6;}, + %{DROP INDEX idx_endnote_nr;}, + %{DROP INDEX idx_digest_en;}, + %{DROP INDEX idx_endnote_nr_asterisk;}, + %{DROP INDEX idx_endnote_asterisk;}, + %{DROP INDEX idx_digest_en_asterisk;}, + %{DROP INDEX idx_endnote_nr_plus;}, + %{DROP INDEX idx_endnote_plus;}, + %{DROP INDEX idx_digest_en_plus}, + ] + conn_execute_array(sql_arr) + end + def text #% drop TEXT indexes, sqlite + print "\n drop documents TEXT indexes\n" unless @opt.cmd =~/q/ + sql_arr=[ + %{DROP INDEX idx_clean;}, + %{DROP INDEX idx_endnote}, + ] + conn_execute_array(sql_arr) + end + self + end + indexes.base + @opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : indexes.text) + self + end + end +end +__END__ diff --git a/lib/sisu/v3/db_import.rb b/lib/sisu/v3/db_import.rb new file mode 100644 index 00000000..41515747 --- /dev/null +++ b/lib/sisu/v3/db_import.rb @@ -0,0 +1,649 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: modules shared by the different db types, dbi, postgresql, + sqlite + +=end +module SiSU_DB_import + require "#{SiSU_lib}/db_columns" # db_columns.rb + require "#{SiSU_lib}/db_load_tuple" # db_load_tuple.rb + require "#{SiSU_lib}/db_sqltxt" # db_sqltxt.rb + require "#{SiSU_lib}/shared_html_lite" # shared_html_lite.rb + require 'sqlite3' + class Import < SiSU_DB_text::Prepare + include SiSU_Param + include SiSU_Screen + @@dl=nil + @@hname=nil + attr_accessor :tp + def initialize(opt,conn,file,sql_type='pg') + @opt,@conn,@file,@sql_type=opt,conn,file,sql_type + @cX=SiSU_Screen::Ansi.new(@opt.cmd).cX + @env=SiSU_Env::Info_env.new(@opt.fns) + @dal="#{@env.path.dal}" + if @opt.fns.empty? or @opt.cmd.empty?; @fnb='' + else + @md=SiSU_Param::Parameters.new(@opt).get + @fnb=@md.fnb + end + @suffix=@opt.fns[/(?:.+?)(?:\.ssm\.sst|\.-?sst)/,1] + @fnc="#{@dal}/#{@opt.fns}.content.rbm" + @@seg,@@seg_full='','' #create? consider placing field just before clean text as opposed to seg which contains seg(.html) name info seg_full would contain seg info for levels 5 & 6 where available eg seg_full may be 7.3 (level 5) and 7.3.1 (level 6) where seg is 7 + @col=Hash.new('') + @col[:ocn]='' + @counter={} + @db=SiSU_Env::Info_db.new + if @sql_type=='sqlite' + @driver_sqlite3=(@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) \ + ? true \ + : false + end + sql='SELECT MAX(lid) FROM doc_objects' + begin + @col[:lid] ||=0 + @col[:lid]=@driver_sqlite3 \ + ? @conn.execute( sql ).join.to_i \ + : @conn.execute( sql ) { |x| x.fetch_all.to_s.to_i } + rescue + puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/ + end + @col[:lid]=0 if @col[:lid].nil? or @col[:lid].to_s.empty? + sql='SELECT MAX(nid) FROM endnotes' + begin + @id_n ||=0 + @id_n=@driver_sqlite3 \ + ? @conn.execute( sql ).join.to_i \ + : @id_n=@conn.execute( sql ) { |x| x.fetch_all.to_s.to_i } + rescue + puts "#{__FILE__}:#{__LINE__}" if @opt.cmd =~/M/ + end + @id_n =0 if @col[:lid].nil? or @col[:lid].to_s.empty? + @col[:lv1]=@col[:lv2]=@col[:lv3]=@col[:lv4]=@col[:lv5]=@col[:lv6]=0 + @db=SiSU_Env::Info_db.new + @@dl ||=SiSU_Env::Info_env.new.digest.length + end + def marshal_load + require "#{SiSU_lib}/dal" # dal.rb + @dal_array=SiSU_DAL::Source.new(@opt).get # dal file drawn here + SiSU_Screen::Ansi.new(@opt.cmd,"#{@db.psql.db}::#{@opt.fns}").puts_blue if @opt.cmd =~/vVM/ + SiSU_Screen::Ansi.new(@opt.cmd,'Marshal Load',@fnc).puts_grey if @opt.cmd =~/v/ + select_first_match=%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.src_filename = '#{@opt.fns}'; } + file_exist=@sql_type=~/sqlite/ \ + ? @conn.get_first_value(select_first_match) \ + : @conn.select_one(select_first_match) + if not file_exist + t_d=[] # transaction_data + t_d << db_import_metadata + t_d << db_import_documents(@dal_array) + t_d << db_import_urls(@dal_array,@fnc) #import OID on/off + t_d=t_d.flatten + if @opt.cmd =~/[MV]/ + puts @conn.class if defined? @conn.class + puts @conn.driver_name if defined? @conn.driver_name + puts @conn.driver if defined? @conn.driver + end + begin + sql='' + if @sql_type=~/sqlite/ + @conn.transaction do |conn| + t_d.each do |sql| + conn.execute(sql) + end + end + #also 'execute' works for sqlite + #@conn.execute("BEGIN") + # t_d.each do |sql| + # @conn.execute(sql) + # end + #@conn.execute("COMMIT") + else + #'do' works for postgresql + @conn.do("BEGIN") + t_d.each do |sql| + @conn.do(sql) + end + @conn.do("COMMIT") + end + rescue DBI::DatabaseError => e + puts "Error code: #{e.err}" + puts "Error message: #{e.errstr}" + puts "Error SQLSTATE: #{e.state}" + SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + sqlfn="#{@env.path.sql}/#{@md.fnb}.sql" + sql=File.new(sqlfn,'w') + t_d.each {|i| sql.puts i} + p sqlfn + if @opt.cmd =~/M/ + puts sql + p @conn.methods.sort + puts "#{__FILE__}:#{__LINE__}" + end + rescue + SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + sqlfn="#{@env.path.sql}/#{@md.fnb}.sql" + sql=File.new(sqlfn,'w') + t_d.each {|i| sql.puts i} + p sqlfn + if @opt.cmd =~/M/ + puts sql + p @conn.methods.sort + puts "#{__FILE__}:#{__LINE__}" + end + ensure + end + else + if file_exist + @db=SiSU_Env::Info_db.new + puts "\n#{@cX.grey}file #{@cX.off} #{@cX.blue}#{@opt.fns}#{@cX.off} #{@cX.grey}already exists in database#{@cX.off} #{@cX.blue}#{@db.psql.db}#{@cX.off} #{@cX.brown}update instead?#{@cX.off}" + end + end + end + def pf_db_import_transaction_open + end + def pf_db_import_transaction_close + end + def db_import_metadata #% import documents - populate database + print %{ #{@cX.grey}import documents dbi_unit #{@cX.off} } if @opt.cmd =~/vVM/ + @tp={} + @md=SiSU_Param::Parameters.new(@opt).get +#% sisutxt & fulltxt + if FileTest.exist?(@md.fns) + txt_arr=IO.readlines(@md.fns,'') + src=txt_arr.join("\n") + src=special_character_escape(src) + @tp[:sisutxt_f],@tp[:sisutxt_i]='sisutxt, ',"'#{src}', " + txt=clean_searchable_text(txt_arr) + #special_character_escape(txt) + @tp[:fulltxt_f],@tp[:fulltxt_i]='fulltxt, ',"'#{txt}', " + end +#% title + if defined? @md.title.full \ + and @md.title.full=~/\S+/ # DublinCore 1 - title + #@tp[:title]=@md.title.full + #special_character_escape(@tp[:title]) + #@tp[:title_f],@tp[:title_i]='title, ',"'#{@tp[:title]}', " + sql='SELECT MAX(tid) FROM metadata_and_text' + begin + @@id_t ||=0 + id_t=if @driver_sqlite3 + @conn.execute( sql ).join.to_i # { |x| id_t=x.join.to_i } + else @conn.execute( sql ) { |x| x.fetch_all.to_s.to_i } + end + @@id_t=id_t if id_t + rescue + puts "#{__FILE__} #{__LINE__}" if @opt.cmd =~/M/ + end + @@id_t =0 if @col[:lid].nil? or @col[:lid].to_s.empty? + @@id_t+=1 #bug related, needs to be performed once at start of file, but consider moving, as, placed here it means program will fail if document header lacks @title: + puts %{\n#{@cX.grey}Processing file number#{@cX.off}: #{@cX.green}#{@@id_t}#{@@cX.off}} if @opt.cmd =~/vVM/ + end + ################ CLEAR ############## + SiSU_DB_DBI::Test.new(self,@opt).verify #% import title names, filenames (tuple) + t=SiSU_DB_tuple::Load_metadata.new(@conn,@@id_t,@md,@file) + tuple=t.tuple + tuple + end + def db_import_documents(dal_array) #% import documents - populate main database table, import into substantive database tables (tuple) + begin + @col[:tid]=@@id_t + @en,@en_ast,@en_pls,@tuple_array=[],[],[],[] + @col[:en_a],@col[:en_z]=nil,nil + dal_array.each do |data| + data.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') + data.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') + data.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') + data.obj.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1') + data.obj.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1') + data.obj.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1') + data.obj.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1') + data.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') + data.obj.gsub!(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1') + data.obj.gsub!(/#{Mx[:gl_o]}(●)#{Mx[:gl_c]}\s*/,'\1 ') + data.obj.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check + @col[:seg]=@@seg + if data.of =~/para|heading|block|group/ # regular text what of code-blocks grouped text etc. + notedata=data.obj.dup + if data.is=='heading' \ + and data.ln.inspect=~/[123]/ + @col[:lev],txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.ln,data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'','' + @col[:lid]+=1 + txt=endnotes(txt).extract_any + @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_minus + special_character_escape(@col[:body]) + @col[:plaintext]=@col[:body].dup + @col[:plaintext]=strip_markup(@col[:plaintext]) + @col[:plaintext]=clean_searchable_text(@col[:plaintext]) + if @en[0]; @en_a,@en_z=@en[0].first,@en[0].last + end + if @en_ast[0]; @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last + end + if @en_pls[0]; @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last + end + t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) + @tuple_array << t.tuple + case @col[:lev] + when /1/; @col[:lv1]+=1 + when /2/; @col[:lv2]+=1 + when /3/; @col[:lv3]+=1 + end + @col[:lev]=@col[:plaintext]=@col[:body]='' + elsif data.is=='heading' \ + and data.ln==4 + @@seg,txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.name,data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'','' + @col[:seg]=@@seg + @col[:lv4]+=1 + @col[:lid]+=1 + @col[:lev]=4 + @hname=if @col[:seg] \ + and not @col[:seg].to_s.empty? + @@hname=@col[:seg].to_s + else @@hname + end + @env=SiSU_Env::Info_env.new(@md.fns) + @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" + txt=endnotes(txt).extract_any + @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus + special_character_escape(@col[:body]) + @col[:plaintext]=@col[:body].dup + @col[:plaintext]=strip_markup(@col[:plaintext]) + @col[:plaintext]=clean_searchable_text(@col[:plaintext]) + @en_a,@en_z=@en[0].first,@en[0].last if @en[0] + @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] + @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] + t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) + @tuple_array << t.tuple + @col[:lev]=@col[:plaintext]=@col[:body]='' + elsif data.is=='heading' and data.ln==5 + txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'','' + @@seg_full=data.name if data.is=='heading' and data.ln==5 and data.name #check data.name + @@seg ||='' #nil # watch + @col[:seg]=@@seg + @col[:lv5]+=1 + @col[:lid]+=1 + @col[:lev]=5 + @hname=if @col[:seg] \ + and not @col[:seg].to_s.empty? + @@hname=@col[:seg].to_s + else @@hname + end + @env=SiSU_Env::Info_env.new(@md.fns) + @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" + txt=endnotes(txt).extract_any + @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus + special_character_escape(@col[:body]) + @col[:plaintext]=@col[:body].dup + @col[:plaintext]=strip_markup(@col[:plaintext]) + @col[:plaintext]=clean_searchable_text(@col[:plaintext]) + @en_a,@en_z=@en[0].first,@en[0].last if @en[0] + @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] + @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] + t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) + @tuple_array << t.tuple + @col[:lev]=@col[:plaintext]=@col[:body]='' + elsif data.is=='heading' and data.ln==6 + txt,@col[:ocn],@col[:lev_an],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.obj,data.ocn,data.lv,data.odv,data.osp,data.of,data.is,data.node,data.parent,'','' + @@seg_full=data.name if data.is=='heading' and data.ln==6 and data.name #check data.name + @@seg ||='' #nil # watch + @col[:seg]=@@seg + @col[:lv6]+=1 + @col[:lid]+=1 + @col[:lev]=6 + @hname=if @col[:seg] \ + and not @col[:seg].to_s.empty? + @@hname=@col[:seg].to_s + else @@hname + end + @env=SiSU_Env::Info_env.new(@md.fns) + @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" + txt=endnotes(txt).extract_any + @col[:body]=SiSU_Format_Shared::CSS_Format.new(@md,data).lev4_plus + special_character_escape(@col[:body]) + @col[:plaintext]=@col[:body].dup + @col[:plaintext]=strip_markup(@col[:plaintext]) + @col[:plaintext]=clean_searchable_text(@col[:plaintext]) + @en_a,@en_z=@en[0].first,@en[0].last if @en[0] + @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] + @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] + t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) + @tuple_array << t.tuple + @col[:lev]=@col[:plaintext]=@col[:body]='' + else #% regular text + @col[:lid]+=1 + txt='' + txt,@col[:ocn],@col[:ocnd],@col[:ocns],@col[:t_of],@col[:t_is],@col[:node],@col[:parent],@col[:digest_clean],@col[:digest_all]=data.obj,data.ocn,data.odv,data.osp,data.of,data.is,'',data.parent,'','' + @hname=if @col[:seg] \ + and not @col[:seg].to_s.empty? + @@hname=@col[:seg].to_s + else @@hname + end + @env=SiSU_Env::Info_env.new(@md.fns) + @base_url="#{@env.url.root}/#{@md.fnb}/#{@hname}.html" + txt=endnotes(txt).extract_any + if @sql_type=~/pg/ \ + and txt.size > (SiSU_DB_columns::Column_size.new.document_clean - 1) #% examine pg build & remove limitation + puts "\n\nTOO LARGE (TXT - see error log)\n\n" + open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| + error.puts("\n#{@opt.fns}\nTEXT BODY\n#{@col[:body].size} object #{@col[:ocn]} -> #{@col[:body].slice(0..500)}") + end + txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} + end + @en_a,@en_z=@en[0].first,@en[0].last if @en[0] + @en_a_asterisk,@en_z_asterisk=@en_ast[0].first,@en_ast[0].last if @en_ast[0] + @en_a_plus,@en_z_plus=@en_pls[0].first,@en_pls[0].last if @en_pls[0] + @col[:body]=if data.is=='table' + SiSU_Format_Shared::CSS_Format.new(@md,data).html_table + elsif data.is=='code' + SiSU_Format_Shared::CSS_Format.new(@md,data).code + elsif defined? data.indent and data.indent =~/[1-9]/ + SiSU_Format_Shared::CSS_Format.new(@md,data).indent(data.indent) + else + SiSU_Format_Shared::CSS_Format.new(@md,data).norm + end + special_character_escape(@col[:body]) + @col[:plaintext]=@col[:body].dup + @col[:plaintext]=strip_markup(@col[:plaintext]) + @col[:plaintext]=clean_searchable_text(@col[:plaintext]) + t=SiSU_DB_tuple::Load_documents.new(@conn,@col,@opt,@file) + @tuple_array << t.tuple + @en,@en_ast,@en_pls=[],[],[] + @col[:en_a]=@col[:en_z]=nil + @col[:lev]=@col[:plaintext]=@col[:body]=@col[:words]='' + end + if notedata =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ #% import into database endnotes tables + endnote_array=notedata.scan(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) + endnote_array.each do |inf| + if inf[/#{Mx[:en_a_o]}\d+.+?#{Mx[:en_a_c]}/] + if inf[/#{Mx[:en_a_o]}(\d+)(.+?)#{Mx[:en_a_c]}/] + nr,txt,digest_clean=$1,$2.strip,0 + end + @id_n+=1 + special_character_escape(txt) + body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) + strip_markup(txt) + if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) + puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" + open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| + error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") + end + txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} + end + if txt + en={ :type => 'endnotes', + :id => @id_n, + :lid => @col[:lid], + :nr => nr, + :txt => txt, + :body => body, + :ocn => @col[:ocn], + :ocnd => @col[:ocnd], + :ocns => @col[:ocns], + :id_t => @@id_t, + :hash => digest_clean + } + t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file) + @tuple_array << t.tuple + end + end + end + word_mode=notedata.scan(/\S+/) + end + if notedata =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ #% import into database endnotes tables + endnote_array=notedata.scan(/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) + endnote_array.each do |inf| + if inf[/#{Mx[:en_b_o]}\*\d+.+?#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 + if inf[/#{Mx[:en_b_o]}[*](\d+)(.+?)#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 + nr,txt,digest_clean=$1,$2.strip,0 + end + @id_n+=1 + special_character_escape(txt) + body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) + strip_markup(txt) + if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) + puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" + open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| + error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") + end + txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} + end + if txt + en={ :type => 'endnotes_asterisk', + :id => @id_n, + :lid => @col[:lid], + :nr => nr, + :txt => txt, + :body => body, + :ocn => @col[:ocn], + :ocnd => @col[:ocnd], + :ocns => @col[:ocns], + :id_t => @@id_t, + :hash => digest_clean + } + t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file) + @tuple_array << t.tuple + end + end + end + word_mode=notedata.scan(/\S+/) + end + if notedata =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ #% import into database endnotes tables + endnote_array=notedata.scan(/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) + endnote_array.each do |inf| + if inf[/#{Mx[:en_b_o]}\+\d+.+?#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 + if inf[/#{Mx[:en_b_o]}[+](\d+)(.+?)#{Mx[:en_b_c]}/] # dal new endnotes 2003w31/1 + nr,txt,digest_clean=$1,$2.strip,0 + end + @id_n+=1 + special_character_escape(txt) + body=SiSU_Format_Shared::CSS_Format.new(@md,data).endnote(nr,txt) + strip_markup(txt) + if txt.size > (SiSU_DB_columns::Column_size.new.endnote_clean - 1) + puts "\n\nTOO LARGE (ENDNOTE - see error log)\n\n" + open("#{Dir.pwd}/pg_documents_error_log",'a') do |error| + error.puts("\n#{@opt.fns}\nENDNOTE\n#{txt.size} object #{@col[:ocn]},#{@col[:ocnd]},#{@col[:ocns]} -> #{txt.slice(0..500)}") + end + txt=%{\n\nLARGE TEXT BLOCK OMITTED\n\n} + end + if txt + en={ :type => 'endnotes_plus', + :id => @id_n, + :lid => @col[:lid], + :nr => nr, + :txt => txt, + :body => body, + :ocn => @col[:ocn], + :ocnd => @col[:ocnd], + :ocns => @col[:ocns], + :id_t => @@id_t, + :hash => digest_clean + } + t=SiSU_DB_tuple::Load_endnotes.new(@conn,en,@opt,@file) + @tuple_array << t.tuple + end + end + end + word_mode=notedata.scan(/\S+/) + end + end + end + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + ensure + end + @tuple_array + end + def endnotes(txt) + @txt=txt + def extract_any + if @txt =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/ + endnotes(@txt).range + @en << endnotes(@txt).standard if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/ + @en_ast << endnotes(@txt).asterisk if @txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/ + @en_pls << endnotes(@txt).plus if @txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/ + @txt=endnotes(@txt).clean_text + end + @txt + end + def standard + x=(@txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/) \ + ? @txt.scan(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/) \ + : nil + end + def asterisk + x=(@txt =~/#{Mx[:en_b_o]}\*.+?#{Mx[:en_b_c]}/) \ + ? @txt.scan(/#{Mx[:en_b_o]}[*](\d+).+?#{Mx[:en_b_c]}/) \ + : nil + end + def plus + x=(@txt =~/#{Mx[:en_b_o]}\+.+?#{Mx[:en_b_c]}/) \ + ? @txt.scan(/#{Mx[:en_b_o]}[+](\d+).+?#{Mx[:en_b_c]}/) \ + : nil + end + def clean_text(base_url=nil) + if base_url + @txt.gsub!(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,%{\\1}) + @txt.gsub!(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,%{\\1}) + @txt.gsub!(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,%{\\1}) + else + @txt.gsub!(/#{Mx[:en_a_o]}(\d+).+?#{Mx[:en_a_c]}/,'\1') + @txt.gsub!(/#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}/,'\1') + @txt.gsub!(/#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/,'\1') + end + @txt + end + def range + @col[:en_a]=@col[:en_z]=nil + if @txt =~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}([*]\d+).+?#{Mx[:en_b_c]}|#{Mx[:en_b_o]}([+]\d+).+?#{Mx[:en_b_c]}/ + word_array=@txt.scan(/\S+/) + word_array.each do |w| + if w[/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[*+]?(\d+)\s+.+?(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/] # not tested since change 2003w31 + @col[:en_a]=$1 unless @col[:en_a] + @col[:en_z]=@col[:en_a].dup unless @col[:en_a] + @col[:en_z]=$1 if @col[:en_a] + end + end + end + @col + end + self + end + def db_import_urls(dbi_unit,content) #% import documents OID - populate database + begin + @fnc=content + @env=SiSU_Env::Info_env.new(@opt.fns) + base=@env.url.root + out=@env.path.output + f,u={},{} + if @fnb.empty? \ + or @fnb.nil? + p 'file output path error' #remove + end + if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:plain]}")==true + f[:txt],u[:txt]='plaintext,', "'#{base}/#{@fnb}/#{@md.fn[:plain]}'," + end + if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:toc]}")==true + f[:html_toc],u[:html_toc]='html_toc,', "'#{base}/#{@fnb}/#{@md.fn[:toc]}'," + end + if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:doc]}")==true + f[:html_doc],u[:html_doc]='html_doc,', "'#{base}/#{@fnb}/#{@md.fn[:doc]}'," + end + if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:xhtml]}")==true + f[:xhtml],u[:xhtml]='xhtml,', "'#{base}/#{@fnb}/#{@md.fn[:xhtml]}'," + end + if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:sax]}")==true + f[:xml_sax],u[:xml_sax]='xml_sax,', "'#{base}/#{@fnb}/#{@md.fn[:sax]}'," + end + if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:dom]}")==true + f[:xml_dom],u[:xml_dom]='xml_dom,', "'#{base}/#{@fnb}/#{@md.fn[:dom]}'," + end + if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:odf]}")==true + f[:odf],u[:odf]='odf,', "'#{base}/#{@fnb}/#{@md.fn[:odf]}'," + end + if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:pdf_p]}")==true + f[:pdf_p],u[:pdf_p]='pdf_p,', "'#{base}/#{@fnb}/#{@md.fn[:pdf_p]}'," + end + if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:pdf_l]}")==true + f[:pdf_l],u[:pdf_l]='pdf_l,', "'#{base}/#{@fnb}/#{@md.fn[:pdf_l]}'," + end + if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:concordance]}")==true + f[:concordance],u[:concordance]='concordance,', "'#{base}/#{@fnb}/#{@md.fn[:concordance]}'," + end + if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.tex")==true + f[:latex_p],u[:latex_p]='latex_p,', "'#{base}/#{@fnb}/#{@opt.fns}.tex'," + end + if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.landscape.tex")==true + f[:latex_l],u[:latex_l]='latex_l,', "'#{base}/#{@fnb}/#{@opt}.fns}.landscape.tex'," + end + if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:digest]}")==true + f[:digest],u[:digest]='digest,', "'#{base}/#{@fnb}/#{@md.fn[:digest]}'," + end + if FileTest.file?("#{out}/#{@fnb}/#{@md.fn[:manifest]}")==true #revisit, was to be text, this is html + f[:manifest],u[:manifest]='manifest,', "'#{base}/#{@fnb}/#{@md.fn[:manifest]}'," + end + if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.meta")==true + f[:markup],u[:markup]='markup,', "'#{base}/#{@fnb}/#{@opt.fns}.meta'," + end + if FileTest.file?("#{out}/#{@fnb}/#{@opt.fns}.tgz")==true + f[:sisupod],u[:sisupod]='sisupod,', "'#{base}/#{@fnb}/#{@opt.fns}.tgz'," + end + t=SiSU_DB_tuple::Load_urls.new(@conn,f,u,@@id_t,@opt,@file) + tuple=t.tuple + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + ensure + end + tuple + end + end +end +__END__ diff --git a/lib/sisu/v3/db_indexes.rb b/lib/sisu/v3/db_indexes.rb new file mode 100644 index 00000000..abd90409 --- /dev/null +++ b/lib/sisu/v3/db_indexes.rb @@ -0,0 +1,113 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: modules shared by the different db types, dbi, postgresql, + sqlite + +=end +module SiSU_DB_index + class Index # create documents Indexes def initialize(opt,conn='',sql_type='') + def initialize(opt,conn,file,sql_type='') + @opt,@conn,@file,@sql_type=opt,conn,file,sql_type + end + def create_indexes # check added from pg not tested + def conn_execute_array(sql_arr) + @conn.transaction do |conn| + sql_arr.each do |sql| + conn.execute(sql) + end + end + end + def base + print "\n create documents common indexes\n" unless @opt.cmd =~/q/ + sql_arr=[ + %{CREATE INDEX idx_ocn ON doc_objects(ocn);}, + %{CREATE INDEX idx_digest_clean ON doc_objects(digest_clean);}, + %{CREATE INDEX idx_digest_all ON doc_objects(digest_all);}, + %{CREATE INDEX idx_lev1 ON doc_objects(lev1);}, + %{CREATE INDEX idx_lev2 ON doc_objects(lev2);}, + %{CREATE INDEX idx_lev3 ON doc_objects(lev3);}, + %{CREATE INDEX idx_lev4 ON doc_objects(lev4);}, + %{CREATE INDEX idx_lev5 ON doc_objects(lev5);}, + %{CREATE INDEX idx_lev6 ON doc_objects(lev6);}, + %{CREATE INDEX idx_endnote_nr ON endnotes(nr);}, + %{CREATE INDEX idx_digest_en ON endnotes(digest_clean);}, + %{CREATE INDEX idx_endnote_nr_asterisk ON endnotes_asterisk(nr);}, + %{CREATE INDEX idx_endnote_asterisk ON endnotes_asterisk(clean);}, + %{CREATE INDEX idx_digest_en_asterisk ON endnotes_asterisk(digest_clean);}, + %{CREATE INDEX idx_endnote_nr_plus ON endnotes_plus(nr);}, + %{CREATE INDEX idx_endnote_plus ON endnotes_plus(clean);}, + %{CREATE INDEX idx_digest_en_plus ON endnotes_plus(digest_clean);}, + %{CREATE INDEX idx_title ON metadata_and_text(title);}, + %{CREATE INDEX idx_author ON metadata_and_text(creator_author);}, + %{CREATE INDEX idx_filename ON metadata_and_text(src_filename);}, + %{CREATE INDEX idx_topics ON metadata_and_text(classify_topic_register)}, + ] + conn_execute_array(sql_arr) + end + def text + print "\n create documents TEXT indexes\n" unless @opt.cmd =~/q/ + sql_arr=[ + %{CREATE INDEX idx_clean ON doc_objects(clean);}, + %{CREATE INDEX idx_endnote ON endnotes(clean);} + ] + conn_execute_array(sql_arr) + end + base + @opt.cmd=~/D/ || ((@opt.mod=~/psql/) ? '' : text) + end + end +end +__END__ diff --git a/lib/sisu/v3/db_load_tuple.rb b/lib/sisu/v3/db_load_tuple.rb new file mode 100644 index 00000000..f1af99a0 --- /dev/null +++ b/lib/sisu/v3/db_load_tuple.rb @@ -0,0 +1,331 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: modules shared by the different db types, dbi, postgresql, + sqlite + +=end +module SiSU_DB_tuple + require "#{SiSU_lib}/db_columns" # db_columns.rb + class Load_documents + require "#{SiSU_lib}/param" # param.rb + include SiSU_Param + def initialize(conn,col,opt,file) + @conn,@col,@opt,@file=conn,col,opt,file + @col[:lev]=@col[:lev].to_i + unless @col[:lev]=~/^[1-6]/ \ + or @col[:lev]==1 \ + or @col[:lev]==2 \ + or @col[:lev]==3 \ + or @col[:lev]==4 \ + or @col[:lev]==5 \ + or @col[:lev]==6 #changed from \d+ ?? + @col[:lev]=0 + end + @col[:ocn]=0 unless @col[:ocn].inspect=~/\d+/ + @cX=SiSU_Screen::Ansi.new(@opt.cmd).cX + end + def tuple #% import line + sql_entry=if @col[:en_a] + "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, en_a, en_z, t_of, t_is, node, parent, digest_clean, digest_all) " + + "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:en_a]}', '#{@col[:en_z]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" + else + "INSERT INTO doc_objects (lid, metadata_tid, lev, lev_an, clean, body, ocn, ocnd, ocns, seg, lev1, lev2, lev3, lev4, lev5, lev6, t_of, t_is, node, parent, digest_clean, digest_all) " + + "VALUES (#{@col[:lid]}, #{@col[:tid]}, #{@col[:lev]}, '#{@col[:lev_an]}', '#{@col[:plaintext]}', '#{@col[:body]}', '#{@col[:ocn]}', '#{@col[:ocnd]}', '#{@col[:ocns]}', '#{@col[:seg]}', '#{@col[:lv1]}', '#{@col[:lv2]}', '#{@col[:lv3]}', '#{@col[:lv4]}', '#{@col[:lv5]}', '#{@col[:lv6]}', '#{@col[:t_of]}', '#{@col[:t_is]}', '#{@col[:node]}', '#{@col[:parent]}', '#{@col[:digest_clean]}', '#{@col[:digest_all]}');" + end + if @opt.cmd =~/M/ + if @opt.cmd =~/V/ + puts @file.inspect + puts sql_entry + end + @file.puts sql_entry + else + if @opt.cmd =~/V/ + puts sql_entry + @file.puts sql_entry + end + end + if @opt.cmd =~/v/ + if @col[:lev].inspect =~/[12356789]/ + lev=case @col[:lev].inspect + when /1/; ':A' + when /2/; ':B' + when /3/; ':C' + when /5/; ' 2' + when /6/; ' 3' + end + puts %{#{lev}>\t#{@col[:lv1]}\t#{@col[:lv2]}\t#{@col[:lv3]}\t#{@col[:lv4]}\t#{@col[:lv5]}\t#{@col[:lv6]}\t#{@col[:ocn]}\t#{@col[:node]}\t#{@col[:ocns]}} + elsif @col[:lev].inspect =~/[4]/ + puts %{ #{@cX.green}1>#{@cX.off}\t#{@col[:lv1]}\t#{@col[:lv2]}\t#{@col[:lv3]}\t#{@col[:lv4]}\t#{@col[:lv5]}\t#{@col[:lv6]}\t#{@col[:ocn]}\t#{@col[:node]}\t#{@col[:ocns]}\t#{@col[:seg]}} + end + end + sql_entry + end + end + class Load_metadata #< SiSU_DB_columns::Columns + def initialize(conn,id,md,file) + @conn,@id,@opt,@file=conn,id,md,file + @tp=SiSU_DB_columns::Columns.new(md) + end + def tuple + sql_entry="INSERT INTO metadata_and_text ( +#{@tp.column.title.tuple[0]} +#{@tp.column.title_main.tuple[0]} +#{@tp.column.title_sub.tuple[0]} +#{@tp.column.title_short.tuple[0]} +#{@tp.column.title_edition.tuple[0]} +#{@tp.column.title_note.tuple[0]} +#{@tp.column.title_language.tuple[0]} +#{@tp.column.title_language_char.tuple[0]} +#{@tp.column.creator_author.tuple[0]} +#{@tp.column.creator_author_honorific.tuple[0]} +#{@tp.column.creator_author_nationality.tuple[0]} +#{@tp.column.creator_contributor.tuple[0]} +#{@tp.column.creator_illustrator.tuple[0]} +#{@tp.column.creator_photographer.tuple[0]} +#{@tp.column.creator_translator.tuple[0]} +#{@tp.column.creator_prepared_by.tuple[0]} +#{@tp.column.creator_digitized_by.tuple[0]} +#{@tp.column.creator_audio.tuple[0]} +#{@tp.column.creator_video.tuple[0]} +#{@tp.column.language_document.tuple[0]} +#{@tp.column.language_document_char.tuple[0]} +#{@tp.column.language_original.tuple[0]} +#{@tp.column.language_original_char.tuple[0]} +#{@tp.column.date_added_to_site.tuple[0]} +#{@tp.column.date_available.tuple[0]} +#{@tp.column.date_created.tuple[0]} +#{@tp.column.date_issued.tuple[0]} +#{@tp.column.date_modified.tuple[0]} +#{@tp.column.date_published.tuple[0]} +#{@tp.column.date_valid.tuple[0]} +#{@tp.column.date_translated.tuple[0]} +#{@tp.column.date_original_publication.tuple[0]} +#{@tp.column.date_generated.tuple[0]} +#{@tp.column.publisher.tuple[0]} +#{@tp.column.original_publisher.tuple[0]} +#{@tp.column.original_language.tuple[0]} +#{@tp.column.original_language_char.tuple[0]} +#{@tp.column.original_source.tuple[0]} +#{@tp.column.original_institution.tuple[0]} +#{@tp.column.original_nationality.tuple[0]} +#{@tp.column.rights_all.tuple[0]} +#{@tp.column.rights_copyright_text.tuple[0]} +#{@tp.column.rights_copyright_translation.tuple[0]} +#{@tp.column.rights_copyright_illustrations.tuple[0]} +#{@tp.column.rights_copyright_photographs.tuple[0]} +#{@tp.column.rights_copyright_preparation.tuple[0]} +#{@tp.column.rights_copyright_digitization.tuple[0]} +#{@tp.column.rights_copyright_audio.tuple[0]} +#{@tp.column.rights_copyright_video.tuple[0]} +#{@tp.column.rights_license.tuple[0]} +#{@tp.column.classify_topic_register.tuple[0]} +#{@tp.column.classify_subject.tuple[0]} +#{@tp.column.classify_type.tuple[0]} +#{@tp.column.classify_loc.tuple[0]} +#{@tp.column.classify_dewey.tuple[0]} +#{@tp.column.classify_oclc.tuple[0]} +#{@tp.column.classify_pg.tuple[0]} +#{@tp.column.classify_isbn.tuple[0]} +#{@tp.column.classify_format.tuple[0]} +#{@tp.column.classify_identifier.tuple[0]} +#{@tp.column.classify_relation.tuple[0]} +#{@tp.column.classify_coverage.tuple[0]} +#{@tp.column.classify_keywords.tuple[0]} +#{@tp.column.notes_abstract.tuple[0]} +#{@tp.column.notes_comment.tuple[0]} +#{@tp.column.notes_description.tuple[0]} +#{@tp.column.notes_history.tuple[0]} +#{@tp.column.notes_prefix.tuple[0]} +#{@tp.column.notes_prefix_a.tuple[0]} +#{@tp.column.notes_prefix_b.tuple[0]} +#{@tp.column.notes_suffix.tuple[0]} +#{@tp.column.src_filename.tuple[0]} +#{@tp.column.src_fingerprint.tuple[0]} +#{@tp.column.src_filesize.tuple[0]} +#{@tp.column.src_word_count.tuple[0]} +#{@tp.column.src_txt.tuple[0]} +#{@tp.column.fulltext.tuple[0]} +#{@tp.column.skin_name.tuple[0]} +#{@tp.column.skin_fingerprint.tuple[0]} +#{@tp.column.skin.tuple[0]} +#{@tp.column.links.tuple[0]} +tid) +" + + "VALUES ( +#{@tp.column.title.tuple[1]} +#{@tp.column.title_main.tuple[1]} +#{@tp.column.title_sub.tuple[1]} +#{@tp.column.title_short.tuple[1]} +#{@tp.column.title_edition.tuple[1]} +#{@tp.column.title_note.tuple[1]} +#{@tp.column.title_language.tuple[1]} +#{@tp.column.title_language_char.tuple[1]} +#{@tp.column.creator_author.tuple[1]} +#{@tp.column.creator_author_honorific.tuple[1]} +#{@tp.column.creator_author_nationality.tuple[1]} +#{@tp.column.creator_contributor.tuple[1]} +#{@tp.column.creator_illustrator.tuple[1]} +#{@tp.column.creator_photographer.tuple[1]} +#{@tp.column.creator_translator.tuple[1]} +#{@tp.column.creator_prepared_by.tuple[1]} +#{@tp.column.creator_digitized_by.tuple[1]} +#{@tp.column.creator_audio.tuple[1]} +#{@tp.column.creator_video.tuple[1]} +#{@tp.column.language_document.tuple[1]} +#{@tp.column.language_document_char.tuple[1]} +#{@tp.column.language_original.tuple[1]} +#{@tp.column.language_original_char.tuple[1]} +#{@tp.column.date_added_to_site.tuple[1]} +#{@tp.column.date_available.tuple[1]} +#{@tp.column.date_created.tuple[1]} +#{@tp.column.date_issued.tuple[1]} +#{@tp.column.date_modified.tuple[1]} +#{@tp.column.date_published.tuple[1]} +#{@tp.column.date_valid.tuple[1]} +#{@tp.column.date_translated.tuple[1]} +#{@tp.column.date_original_publication.tuple[1]} +#{@tp.column.date_generated.tuple[1]} +#{@tp.column.publisher.tuple[1]} +#{@tp.column.original_publisher.tuple[1]} +#{@tp.column.original_language.tuple[1]} +#{@tp.column.original_language_char.tuple[1]} +#{@tp.column.original_source.tuple[1]} +#{@tp.column.original_institution.tuple[1]} +#{@tp.column.original_nationality.tuple[1]} +#{@tp.column.rights_all.tuple[1]} +#{@tp.column.rights_copyright_text.tuple[1]} +#{@tp.column.rights_copyright_translation.tuple[1]} +#{@tp.column.rights_copyright_illustrations.tuple[1]} +#{@tp.column.rights_copyright_photographs.tuple[1]} +#{@tp.column.rights_copyright_preparation.tuple[1]} +#{@tp.column.rights_copyright_digitization.tuple[1]} +#{@tp.column.rights_copyright_audio.tuple[1]} +#{@tp.column.rights_copyright_video.tuple[1]} +#{@tp.column.rights_license.tuple[1]} +#{@tp.column.classify_topic_register.tuple[1]} +#{@tp.column.classify_subject.tuple[1]} +#{@tp.column.classify_type.tuple[1]} +#{@tp.column.classify_loc.tuple[1]} +#{@tp.column.classify_dewey.tuple[1]} +#{@tp.column.classify_oclc.tuple[1]} +#{@tp.column.classify_pg.tuple[1]} +#{@tp.column.classify_isbn.tuple[1]} +#{@tp.column.classify_format.tuple[1]} +#{@tp.column.classify_identifier.tuple[1]} +#{@tp.column.classify_relation.tuple[1]} +#{@tp.column.classify_coverage.tuple[1]} +#{@tp.column.classify_keywords.tuple[1]} +#{@tp.column.notes_abstract.tuple[1]} +#{@tp.column.notes_comment.tuple[1]} +#{@tp.column.notes_description.tuple[1]} +#{@tp.column.notes_history.tuple[1]} +#{@tp.column.notes_prefix.tuple[1]} +#{@tp.column.notes_prefix_a.tuple[1]} +#{@tp.column.notes_prefix_b.tuple[1]} +#{@tp.column.notes_suffix.tuple[1]} +#{@tp.column.src_filename.tuple[1]} +#{@tp.column.src_fingerprint.tuple[1]} +#{@tp.column.src_filesize.tuple[1]} +#{@tp.column.src_word_count.tuple[1]} +#{@tp.column.src_txt.tuple[1]} +#{@tp.column.fulltext.tuple[1]} +#{@tp.column.skin_name.tuple[1]} +#{@tp.column.skin_fingerprint.tuple[1]} +#{@tp.column.skin.tuple[1]} +#{@tp.column.links.tuple[1]} +#{@id} +);" + if @opt.cmd =~/M/ + puts "maintenance mode on: creating sql transaction file (for last transaction set (document) only):\n\t#{@file.inspect}" + @file.puts sql_entry + else + @file.puts sql_entry if @opt.cmd =~/V/ + end + sql_entry + end + end + class Load_urls + def initialize(conn,f,u,id,opt,file) + @conn,@f,@u,@id,@opt,@file=conn,f,u,id,opt,file + end + def tuple + sql_entry="INSERT INTO urls (#{@f[:txt]} #{@f[:html_toc]} #{@f[:html_doc]} #{@f[:xhtml]} #{@f[:xml_sax]} #{@f[:xml_dom]} #{@f[:odf]} #{@f[:pdf_p]} #{@f[:pdf_l]} #{@f[:concordance]} #{@f[:latex_p]} #{@f[:latex_l]} #{@f[:manifest]} #{@f[:digest]} #{@f[:markup]} #{@f[:sisupod]} metadata_tid) " + + "VALUES (#{@u[:txt]} #{@u[:html_toc]} #{@u[:html_doc]} #{@u[:xhtml]} #{@u[:xml_sax]} #{@u[:xml_dom]} #{@u[:odf]} #{@u[:pdf_p]} #{@u[:pdf_l]} #{@u[:concordance]} #{@u[:latex_p]} #{@u[:latex_l]} #{@u[:manifest]} #{@u[:digest]} #{@u[:markup]} #{@u[:sisupod]} #{@id});" + if @opt.cmd =~/M/ + @file.puts sql_entry + else + @file.puts sql_entry if @opt.cmd =~/V/ + end + sql_entry + end + end + class Load_endnotes + def initialize(conn,en,opt,file) + @conn,@en,@opt,@file=conn,en,opt,file + end + def tuple + sql_entry="INSERT INTO #{@en[:type]} (nid, document_lid, nr, clean, body, ocn, ocnd, ocns, metadata_tid, digest_clean) " + + "VALUES ('#{@en[:id]}', '#{@en[:lid]}', '#{@en[:nr]}', '#{@en[:txt]}', '#{@en[:body]}', '#{@en[:ocn]}', '#{@en[:ocnd]}', '#{@en[:ocns]}', '#{@en[:id_t]}', '#{@en[:hash]}');" + if @opt.cmd =~/M/ + @file.puts sql_entry + else + @file.puts sql_entry if @opt.cmd =~/V/ + end + sql_entry + end + end +end +__END__ diff --git a/lib/sisu/v3/db_remove.rb b/lib/sisu/v3/db_remove.rb new file mode 100644 index 00000000..364f5fc7 --- /dev/null +++ b/lib/sisu/v3/db_remove.rb @@ -0,0 +1,109 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: modules shared by the different db types, dbi, postgresql, + sqlite + +=end +module SiSU_DB_remove + class Remove + def initialize(opt,conn,file,sql_type) + @opt,@conn,@file,@sql_type=opt,conn,file,sql_type + @md=SiSU_Param::Parameters.new(@opt).get + @fnb=@md.fnb + @db=SiSU_Env::Info_db.new + end + def remove + driver_sqlite3=if @sql_type=='sqlite' + (@conn.inspect.match(/^(.{10})/)[1]==@db.sqlite.conn_sqlite3.inspect.match(/^(.{10})/)[1]) \ + ? true \ + : false + end + del_id=if driver_sqlite3 + @conn.get_first_value(%{ SELECT tid FROM metadata_and_text WHERE src_filename = '#{@opt.fns}'; }).to_i + else + x=@conn.select_one(%{ SELECT metadata_and_text.tid FROM metadata_and_text WHERE metadata_and_text.src_filename = '#{@opt.fns}'; }) + x ? (x.join.to_i) : nil + end + if del_id + sql_entry=[ + "DELETE FROM endnotes WHERE metadata_tid = '#{del_id}';", + "DELETE FROM endnotes_asterisk WHERE metadata_tid = '#{del_id}';", + "DELETE FROM endnotes_plus WHERE metadata_tid = '#{del_id}';", + "DELETE FROM doc_objects WHERE metadata_tid = '#{del_id}';", + "DELETE FROM urls WHERE metadata_tid = '#{del_id}';", + "DELETE FROM metadata_and_text WHERE metadata_and_text.tid = '#{del_id}';", + ] + if driver_sqlite3 + @conn.transaction + sql_entry.each do |s| + @conn.execute(s) + end + @conn.commit if driver_sqlite3 + else + sql_entry.each do |s| + @conn.execute(s) + end + end + if @opt.cmd =~/M/ + @file.puts sql_entry if @opt.cmd =~/M/ + end + else + SiSU_Screen::Ansi.new(@opt.cmd,"no such file in database #{@db.psql.db}::#{@opt.fns}").puts_grey if @opt.cmd =~/vVM/ + end + end + end +end +__END__ diff --git a/lib/sisu/v3/db_select.rb b/lib/sisu/v3/db_select.rb new file mode 100644 index 00000000..04d12acb --- /dev/null +++ b/lib/sisu/v3/db_select.rb @@ -0,0 +1,209 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: modules shared by the different db types, dbi, postgresql, + sqlite + +=end +module SiSU_DB_select + class Case + def initialize(opt,conn='',sql_type='pg') + @opt,@conn,@sql_type=opt,conn,sql_type + @db=SiSU_Env::Info_db.new + @file=sql_maintenance_file + @sdb=SiSU_DB_DBI::Create.new(@opt,@conn,@file,@sql_type) # db_dbi.rb + @sdb_index=SiSU_DB_DBI::Index.new(@opt,@conn,@file,@sql_type) # db_dbi.rb + @sdb_no=SiSU_DB_DBI::Drop.new(@opt,@conn,@db,@sql_type) # db_dbi.rb + if @opt.mod.inspect =~/update|import/ + @sdb_import=SiSU_DB_DBI::Import.new(@opt,@conn,@file,@sql_type) + @sdb_remove_doc=SiSU_DB_DBI::Remove.new(@opt,@conn,@file,@sql_type) + elsif @opt.mod.inspect =~/remove/ + @sdb_remove_doc=SiSU_DB_DBI::Remove.new(@opt,@conn,@file,@sql_type) + end + end + def db_exist? + if @sql_type=='sqlite' \ + and (not (FileTest.file?(@db.sqlite.db)) or FileTest.zero?(@db.sqlite.db)) + puts %{no connection with #{@sql_type} database established, createdb "#{@db.sqlite.db}"?} + exit + end + if @conn.class==NilClass + db=@sql_type=='sqlite' \ + ? @db.sqlite.db \ + : @db.psql.db + puts %{no connection with #{@sql_type} database established, createdb "#{db}"?} + exit + end + end + def sql_maintenance_file + file=if @opt.inspect =~/M/ + x=if @opt.fns and not @opt.fns.empty? + @env=SiSU_Env::Info_env.new(@opt.fns) if @opt.fns + puts "\n#{@env.path.sqlite}/#{@opt.fns}.sql" if @sql_type =~/sqlite/ and @opt.cmd =~/M/ + @db=SiSU_Env::Info_db.new + @job="sqlite3 #{@db.sqlite.db} < #{@env.path.sqlite}/#{@opt.fns}.sql" + File.new("#{@env.path.sqlite}/#{@opt.fns}.sql",'w+') + elsif @opt.fns and @opt.fns.inspect =~/create/; nil #sort variations later + else nil + end + else nil + end + file + end + def cases + @opt.mod.each do |mod| + case mod + when /^--createdb$/ + @sdb.output_dir? + begin + @sdb.create_db + rescue; @sdb.output_dir? + end + when /^--(?:init(?:ialize)?|create(?:all)?)$/ + @sdb.output_dir? + begin + @sdb.create_table.metadata_and_text + @sdb.create_table.doc_objects + @sdb.create_table.endnotes + @sdb.create_table.endnotes_asterisk + @sdb.create_table.endnotes_plus + @sdb.create_table.urls + @sdb_index.create_indexes + rescue; SiSU_Errors::Info_error.new($!,$@,'-D').error; @sdb.output_dir? + end + when /^--createtables?$/ + @sdb.output_dir? + begin + @sdb.create_table.metadata_and_text + @sdb.create_table.doc_objects + @sdb.create_table.endnotes + @sdb.create_table.endnotes_asterisk + @sdb.create_table.endnotes_plus + @sdb.create_table.urls + @sdb_index.create_indexes + rescue; @sdb.output_dir? + end + when /^--recreate$/ + @sdb.output_dir? + begin + @sdb_no.drop.tables + @sdb.create_table.metadata_and_text + @sdb.create_table.doc_objects + @sdb.create_table.endnotes + @sdb.create_table.endnotes_asterisk + @sdb.create_table.endnotes_plus + @sdb.create_table.urls + @sdb_index.create_indexes + rescue; @sdb.output_dir? + end + when /^--cr(eate)?lex$/ + @sdb.output_dir? + begin + @sdb.create_table.doc_objects + rescue; @sdb.output_dir? + end + when /^--cr(eate)?metadata$/ + @sdb.output_dir? + begin + @sdb.create_table.metadata_and_text + rescue; @sdb.output_dir? + end + when /^--import$/ + db_exist? + @sdb_import.marshal_load + tell=case @sql_type + when /sqlite/; SiSU_Screen::Ansi.new(@opt.cmd,"sqlite #{@db.sqlite.db} database?") + when /pg/; SiSU_Screen::Ansi.new(@opt.cmd,"pgaccess or psql #{@db.psql.db} database?") + else '???' + end + tell.puts_grey if @opt.cmd =~/v/ + when /^--update$/ + db_exist? + @sdb_remove_doc.remove + @sdb_import.marshal_load + SiSU_Screen::Ansi.new(@opt.cmd,"pgaccess or psql #{@db.psql.db} database?").puts_grey if @opt.cmd =~/v/ + when /^--remove$/ + db_exist? + @sdb_remove_doc.remove + when /^--index$/ + db_exist? + @sdb_index.create_indexes + when /^droptable(s)?$/ + db_exist? + @sdb_no.drop.tables + when /^--dropindex(es)?$/ + db_exist? + @sdb_no.drop.indexes + when /^--(?:dropall|drop)$/ + db_exist? + @sdb_no.drop.tables + when /^--(?:db=)?(?:(?:sq)?lite|pg(?:sql)?|my(?:sql)?)$/ + else + help=SiSU_Help::Help.new + help.summary + help.commands + end + if @opt.cmd =~/M/ \ + and @opt.cmd =~/d/ + puts @job + end + end + begin + rescue; @sdb.output_dir? + end + end + end +end +__END__ diff --git a/lib/sisu/v3/db_sqltxt.rb b/lib/sisu/v3/db_sqltxt.rb new file mode 100644 index 00000000..4e451e2a --- /dev/null +++ b/lib/sisu/v3/db_sqltxt.rb @@ -0,0 +1,134 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + #___# + + * Author: Ralph Amissah + + * Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, + 2007, 2008, 2009, 2010 Ralph Amissah All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_DB_text + class Prepare + def special_character_escape(str) + str.gsub!(/'/,"''") #string.gsub!(/'/,"\047") #string.gsub!(/'/,"\\'") + str.gsub!(/(\\)/m,'\1\1') #ok but with warnings, double backslash on sqlite #str.gsub!(/[\\]/m,'\\x5C') #ok but with warnings, but not for sqlite #str.gsub!(/(\\)/m,'\1') #ok for sqlite not for pgsql + str.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,"
\n") + str.gsub!(/#{Mx[:tag_o]}\S+?#{Mx[:tag_c]}/,'') #check + str.gsub!(/#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg))(?:\s+\d+x\d+)?(.+?)#{Mx[:lnk_c]}\S+/,'[image: \1] \2') + str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}(?:file|ftp):\/\/\S+?([.,!?]?(?:\s|$))/,'\1\2') + str.gsub!(/#{Mx[:lnk_o]}\s*(.+?)\s*#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,'\1') + str + end + def clean_searchable_text(arr) #produce clean, searchable, plaintext from document source + txt_arr,en=[],[] + arr=arr.class==String ? arr.split(/\n+/m) : arr + arr.each do |s| + s.gsub!(/([*\/_-])\{(.+?)\}\1/m,'\2') + s.gsub!(/^(?:block|group|poem|code)\{/m,''); s.gsub!(/^\}(?:block|group|poem|code)/m,'') + s.gsub!(/\A(?:@\S+:\s+.+)\Z/m,'') + if s =~/^:A~/ + if defined? @md.creator \ + and defined? @md.creator.author \ + and not @md.creator.author.empty? + s.gsub!(/@author/,@md.creator.author) + else + SiSU_Screen::Ansi.new('v','WARNING Document Author information missing; provide @creator: :author:',@md.fnb).warn unless @md.cmd.inspect =~/q/ + end + if defined? @md.title \ + and defined? @md.title.full \ + and not @md.title.full.empty? + s.gsub!(/@title/,@md.title.full) + else + SiSU_Screen::Ansi.new('v','WARNING Document Title missing; provide @title:',@md.fnb).warn unless @md.cmd.inspect =~/q/ + end + end + s.gsub!(/^(?:_[1-9]\*?|_\*)\s+/m,'') + s.gsub!(/^(?:[1-9]\~(\S+)?)\s+/m,'') + s.gsub!(/^(?::?[A-C]\~(\S+)?)\s+/m,'') + s.gsub!(/^%{1,3} .+/m,'') #removed even if contained in code block + s.gsub!(/
/m,' ') + en << s.scan(/~\{\s*(.+?)\s*\}~/m) + s.gsub!(/~\{.+?\}~/m,'') + s.gsub!(/ \s+/m,' ') + #special_character_escape(s) + s + end + txt_arr << arr << en + #txt_arr=txt_arr.flatten + txt=txt_arr.flatten.join("\n") + txt=special_character_escape(txt) + txt + end + def strip_markup(str) #define rules, make same as in dal clean + str.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]') + str.gsub!(/(?: \\;|#{Mx[:nbsp]})+/,' ') + str.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1') #tables + str.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ') #tables + str.gsub!(/#{Mx[:tc_p]}/u,' ') #tables tidy later + str.gsub!(/<.+?>/,'') + str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:file|ftp)\/\/:\S+ /,' [image] ') # else image names found in search + str.gsub!(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,' [image]') # else image names found in search + str.gsub!(/\s\s+/,' ') + str.strip! + str + end + def unique_words(str) + a=str.scan(/[a-zA-Z0-9\\\/_-]{2,}/) #a=str.scan(/\S+{2,}/) + str=a.uniq.sort.join(' ') + str + end + end +end +__END__ + diff --git a/lib/sisu/v3/db_tests.rb b/lib/sisu/v3/db_tests.rb new file mode 100644 index 00000000..55ea2e0b --- /dev/null +++ b/lib/sisu/v3/db_tests.rb @@ -0,0 +1,117 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: modules shared by the different db types, dbi, postgresql, + sqlite + +=end +module SiSU_DB_tests + class Test + def initialize(info,opt) + @ck,@opt=info,opt + unless @opt.cmd =~/q/ + puts @ck.tp[:fns] if @ck.tp[:fns] and not @ck.tp[:fns].empty? + puts @ck.tp[:title] if @ck.tp[:title] and not @ck.tp[:title].empty? + puts @ck.tp[:creator] if @ck.tp[:creator] and not @ck.tp[:creator].empty? + end + end + def verify + unless @opt.cmd =~/q/ + puts @ck.tp[:fns].length.to_s + ' checklength ' + @ck.tp[:fns] if @ck.tp[:fns] and @ck.tp[:fns].length >@ck.lt_filename + puts @ck.tp[:title].length.to_s + ' checklength ' + @ck.tp[:title] if @ck.tp[:title] and @ck.tp[:title].length >@ck.lt_title + puts @ck.tp[:subtitle].length.to_s + ' checklength ' + @ck.tp[:subtitle] if @ck.tp[:subtitle] and @ck.tp[:subtitle].length >@ck.lt_subtitle + puts @ck.tp[:creator].length.to_s + ' checklength ' + @ck.tp[:creator] if @ck.tp[:creator] and @ck.tp[:creator].length >@ck.lt_creator + puts @ck.tp[:author_title].length.to_s + ' checklength ' + @ck.tp[:author_title] if @ck.tp[:author_title] and @ck.tp[:author_title].length >@ck.lt_author_title + puts @ck.tp[:illustrator].length.to_s + ' checklength ' + @ck.tp[:illustrator] if @ck.tp[:illustrator] and @ck.tp[:illustrator].length >@ck.lt_illustrator + puts @ck.tp[:translator].length.to_s + ' checklength ' + @ck.tp[:translator] if @ck.tp[:translator] and @ck.tp[:translator].length >@ck.lt_translator + puts @ck.tp[:prepared_by].length.to_s + ' checklength ' + @ck.tp[:prepared_by] if @ck.tp[:prepared_by] and @ck.tp[:prepared_by].length >@ck.lt_prepared_by + puts @ck.tp[:digitized_by].length.to_s + ' checklength ' + @ck.tp[:digitized_by] if @ck.tp[:digitized_by] and @ck.tp[:digitized_by].length >@ck.lt_digitized_by + puts @ck.tp[:subject].length.to_s + ' checklength ' + @ck.tp[:subject] if @ck.tp[:subject] and @ck.tp[:subject].length >@ck.lt_subject + puts @ck.tp[:description].length.to_s + ' checklength ' + @ck.tp[:description] if @ck.tp[:description] and @ck.tp[:description].length >@ck.lt_description + puts @ck.tp[:publisher].length.to_s + ' checklength ' + @ck.tp[:publisher] if @ck.tp[:publisher] and @ck.tp[:publisher].length >@ck.lt_publisher + puts @ck.tp[:contributor].length.to_s + ' checklength ' + @ck.tp[:contributor] if @ck.tp[:contributor] and @ck.tp[:contributor].length >@ck.lt_contributor + puts @ck.tp[:date].length.to_s + ' checklength ' + @ck.tp[:date] if @ck.tp[:date] and @ck.tp[:date].length >@ck.lt_date + puts @ck.tp[:date_created].length.to_s + ' checklength ' + @ck.tp[:date_created] if @ck.tp[:date_created] and @ck.tp[:date_created].length >@ck.lt_date + puts @ck.tp[:date_issued].length.to_s + ' checklength ' + @ck.tp[:date_issued] if @ck.tp[:date_issued] and @ck.tp[:date_issued].length >@ck.lt_date + puts @ck.tp[:date_valid].length.to_s + ' checklength ' + @ck.tp[:date_valid] if @ck.tp[:date_valid] and @ck.tp[:date_valid].length >@ck.lt_date + puts @ck.tp[:date_available].length.to_s + ' checklength ' + @ck.tp[:date_available] if @ck.tp[:date_available] and @ck.tp[:date_available].length >@ck.lt_date + puts @ck.tp[:date_modified].length.to_s + ' checklength ' + @ck.tp[:date_modified] if @ck.tp[:date_modified] and @ck.tp[:date_modified].length >@ck.lt_date + puts @ck.tp[:date_translated].length.to_s + ' checklength ' + @ck.tp[:date_translated] if @ck.tp[:date_translated] and @ck.tp[:date_translated].length >@ck.lt_date + puts @ck.tp[:date_added_to_site].length.to_s + ' checklength ' + @ck.tp[:date_added_to_site] if @ck.tp[:date_added_to_site] and @ck.tp[:date_added_to_site].length >@ck.lt_date + puts @ck.tp[:type].length.to_s + ' checklength ' + @ck.tp[:type] if @ck.tp[:type] and @ck.tp[:type].length >@ck.lt_type + puts @ck.tp[:format].length.to_s + ' checklength ' + @ck.tp[:format] if @ck.tp[:format] and @ck.tp[:format].length >@ck.lt_format + puts @ck.tp[:identifier].length.to_s + ' checklength ' + @ck.tp[:identifier] if @ck.tp[:identifier] and @ck.tp[:identifier].length >@ck.lt_identifier + puts @ck.tp[:source].length.to_s + ' checklength ' + @ck.tp[:source] if @ck.tp[:source] and @ck.tp[:source].length >@ck.lt_source + puts @ck.tp[:language].length.to_s + ' checklength ' + @ck.tp[:language] if @ck.tp[:language] and @ck.tp[:language].length >@ck.lt_language + puts @ck.tp[:language_original].length.to_s + ' checklength ' + @ck.tp[:language_original] if @ck.tp[:language_original] and @ck.tp[:language_original].length >@ck.lt_language_original + puts @ck.tp[:relation].length.to_s + ' checklength ' + @ck.tp[:relation] if @ck.tp[:relation] and @ck.tp[:relation].length >@ck.lt_relation + puts @ck.tp[:coverage].length.to_s + ' checklength ' + @ck.tp[:coverage] if @ck.tp[:coverage] and @ck.tp[:coverage].length >@ck.lt_coverage + puts @ck.tp[:rights].length.to_s + ' checklength ' + @ck.tp[:rights] if @ck.tp[:rights] and @ck.tp[:rights].length >@ck.lt_rights + puts @ck.tp[:copyright].length.to_s + ' checklength ' + @ck.tp[:copyright] if @ck.tp[:copyright] and @ck.tp[:copyright].length >@ck.lt_copyright + puts @ck.tp[:owner].length.to_s + ' checklength ' + @ck.tp[:owner] if @ck.tp[:owner] and @ck.tp[:owner].length >@ck.lt_owner + puts @ck.tp[:keywords].length.to_s + ' checklength ' + @ck.tp[:keywords] if @ck.tp[:keywords] and @ck.tp[:keywords].length >@ck.lt_keywords + puts @ck.tp[:abstract].length.to_s + ' checklength ' + @ck.tp[:abstract] if @ck.tp[:abstract] and @ck.tp[:abstract].length >@ck.lt_abstract + puts @ck.tp[:comment].length.to_s + ' checklength ' + @ck.tp[:comment] if @ck.tp[:comment] and @ck.tp[:comment].length >@ck.lt_comment + puts @ck.tp[:loc].length.to_s + ' checklength ' + @ck.tp[:loc] if @ck.tp[:loc] and @ck.tp[:loc].length >@ck.lt_loc + puts @ck.tp[:dewey].length.to_s + ' checklength ' + @ck.tp[:dewey] if @ck.tp[:dewey] and @ck.tp[:dewey].length >@ck.lt_dewey + puts @ck.tp[:isbn].length.to_s + ' checklength ' + @ck.tp[:isbn] if @ck.tp[:isbn] and @ck.tp[:isbn].length >@ck.lt_isbn + puts @ck.tp[:pg].length.to_s + ' checklength ' + @ck.tp[:pg] if @ck.tp[:pg] and @ck.tp[:pg].length >@ck.lt_pg + puts @ck.tp[:topic_register].length.to_s + ' checklength ' + @ck.tp[:topic_register] if @ck.tp[:topic_register] and @ck.tp[:topci_register].length >@ck.lt_topic_register + puts @ck.tp[:date] if @ck.tp[:date] and not @ck.tp[:date].empty? and @ck.tp[:date] !~/\d\d-\d\d-\d\d/ + end + end + end +end +__END__ diff --git a/lib/sisu/v3/dbi.rb b/lib/sisu/v3/dbi.rb new file mode 100644 index 00000000..191d5099 --- /dev/null +++ b/lib/sisu/v3/dbi.rb @@ -0,0 +1,143 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: postgresql module, dbi import frame + +=end +module SiSU_DBI #% database building + require "#{SiSU_lib}/help" # help.rb + require "#{SiSU_lib}/sysenv" # sysenv.rb + include SiSU_Env; include SiSU_Screen + require "#{SiSU_lib}/param" # param.rb + include SiSU_Param + require "#{SiSU_lib}/db_dbi" # db_dbi.rb + include SiSU_DB_DBI + require "#{SiSU_lib}/shared_html_lite" # shared_html_lite.rb + include SiSU_Format_Shared + class SiSU_SQL + def initialize(opt) + SiSU_Env::Load.new('dbi',true).prog + @opt=opt + @db=SiSU_Env::Info_db.new + if @opt.cmd =~/d/i \ + or @opt.mod.inspect =~/--(pg(?:sql)?|(?:sq)?lite)/ + @sql_type=if @opt.cmd=~/D/ \ + or @opt.mod.inspect =~/--pg(?:sql)?/ + maintenance_check(@opt,__FILE__,__LINE__) if @opt.cmd.inspect =~/M/ + 'pg' + elsif @opt.cmd =~/d/ \ + and @opt.mod.inspect =~/--(?:db[=-])?pg(?:sql)?/ + maintenance_check(@opt,__FILE__,__LINE__) if @opt.cmd.inspect =~/M/ + 'pg' + elsif @opt.cmd=~/d/ \ + or @opt.mod.inspect =~/--(?:sq)?lite/ + maintenance_check(@opt,__FILE__,__LINE__) if @opt.cmd.inspect =~/M/ + 'sqlite' + elsif @opt.cmd =~/d/ \ + and @opt.mod.inspect =~/--(?:db[=-])?(?:sq)?lite/ + maintenance_check(@opt,__FILE__,__LINE__) if @opt.cmd.inspect =~/M/ + 'sqlite' + else + maintenance_check(@opt,__FILE__,__LINE__) if @opt.cmd.inspect =~/M/ + 'sqlite' + end + end + end + def maintenance_check(opt,file,line) + p opt.mod + p opt.cmd + p "at #{file} #{line}" + end + def read_psql + begin + @conn=@db.psql.conn_dbi + rescue + if @opt.mod.inspect=~/--(createall|create)/ + puts %{manually create the database: "#{@db.db}" if it does not yet exist} + #sudo su -p postgres; createdb #{@db.db}; #[createuser?] + end + SiSU_DB_DBI::Case.new(@opt,@conn,@sql_type).cases + @conn=DBI.connect(@db.dbi,@db.user,@db.db) + ensure + end + end + def read_sqlite + begin + sql_type='sqlite' + @conn=@db.sqlite.conn_sqlite3 + rescue + ensure + end + end + def connect + case @sql_type + when /pg/; read_psql + when /sqlite/; read_sqlite + end + SiSU_Screen::Ansi.new(@opt.cmd,"DBI (#{@sql_type}) #{@opt.mod}",@opt.fns).dbi_title unless @opt.cmd =~/q/ + begin + SiSU_DB_DBI::Case.new(@opt,@conn,@sql_type).cases + rescue + SiSU_Errors::Info_error.new($!,$@,@cf,@opt.fns).error + ensure + end + begin + rescue + connect + end + end + end +end +__END__ diff --git a/lib/sisu/v3/defaults.rb b/lib/sisu/v3/defaults.rb new file mode 100644 index 00000000..19941d48 --- /dev/null +++ b/lib/sisu/v3/defaults.rb @@ -0,0 +1,2342 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: Default values (reset by skins) + +=end +$latex_run=nil +module SiSU_Viz + require 'uri' + require "#{SiSU_lib}/sysenv" # sysenv.rb + include SiSU_Env + require "#{SiSU_lib}/css" # css.rb + include SiSU_Style + class Skin + def initialize + @fonts='verdana, arial, georgia, tahoma, sans-serif, helvetica, times, roman' # 'verdana, arial, georgia, tahoma, sans-serif, helvetica, "times new roman", times, roman' + @dir=SiSU_Env::Info_env.new + @date=SiSU_Env::Info_date.new #{@date.year} + @v=SiSU_Env::Info_version.instance.get_version + end + #% glyph + def glyph_bullet # • + '• ' # [• flagged] + end + #% html + def html_hardspace + ' ' + end + #% php + def php_persist + end + #% javascript #kxjs knxjs + def js_home + end + def js_infobox + end + def js_knxjs + end + def js_head + end + def js_top + end + def js_sisu + end + def js_home + end + def js_sponsor + end + def js_books + end + def js_journals + end + def js_conferences + end + def js_services + end + def js_catalogue + end + def js_doc + end + def js_toc + end + def js_seg + end + def js_mail + end + def js_manifest + end + def js_status + end + def js_next + end + def js_prev + end + def js_plaintext + end + def js_portrait + end + def js_landscape + end + def js_pdf + end + def js_epub + end + def js_odf + end + def js_concordance + end + def js_instruments + end + def js_external + end + def js_gopher + end + def js_ftp + end + def js_law + end + def js_disclaimer + end + def semantic_tags + def default + { + :pub => 'publication', + :conv => 'convention', + :vol => 'volume', + :pg => 'page', + :cty => 'city', + :org => 'organization', + :uni => 'university', + :dept => 'department', + :fac => 'faculty', + :inst => 'institute', + :co => 'company', + :com => 'company', + :conv => 'convention', + :dt => 'date', + :y => 'year', + :m => 'month', + :d => 'day', + :ti => 'title', + :au => 'author', + :ed => 'editor', #editor? + :v => 'version', #edition + :n => 'name', + :fn => 'firstname', + :mn => 'middlename', + :ln => 'lastname', + :in => 'initials', + :qt => 'quote', + :ct => 'cite', + :ref => 'reference', + :ab => 'abreviation', + :def => 'define', + :desc => 'description', + :trans => 'translate', + } + end + self + end + #% decorate + def decorate_italics + 'title|article|book|journal' + end + def decorate_bold + end + def decorate_uppercase + 'surname' + end + #% semantic + def sem_title #dc 1 + 'title' + end + def sem_article + 'article' + end + def sem_book + 'book' + end + def sem_journal + 'journal' + end + def sem_fullname # (contains: firstname, surname) #issues arise as contains surname etc. + 'fullname' + end + def sem_first + 'first' + end + def sem_surname + 'surname' + end + def sem_middle + 'middle' + end + def sem_creator #dc 2 #==fullname (contains: firstname, surname) + 'creator' + end + def sem_author #==fullname (contains: firstname, surname) + 'author' + end + def sem_editor #==fullname (contains: firstname, surname) + 'editor' + end + def sem_illustrator #==fullname (contains: firstname, surname) + 'illustrator' + end + def sem_translator #==fullname (contains: firstname, surname) + 'translator' + end + def sem_isbn # 10 or 13 + 'isbn' + end + def sem_isbn_10 + 'isbn10' + end + def sem_isbn_13 + 'isbn13' + end + def sem_loc # library of congress + 'loc' + end + def sem_dewey + 'dewey' + end + def sem_pg # project gutenberg number + 'pg' + end + def sem_subject #dc 3 + 'subject' + end + def sem_date #dc 7 + 'date' + end + def sem_date_created + 'date_created' + end + def sem_date_issued + 'date_issued' + end + def sem_date_available + 'date_available' + end + def sem_date_valid + 'date_valid' + end + def sem_date_modified + 'date_modified' + end + def sem_type #dc 8 + 'type' + end + def sem_description #dc 4 + 'description' + end + def sem_publisher #dc 5 + 'publisher' + end + def sem_contributor #dc 6 + 'contributor' + end + def sem_format #dc 9 + 'format' + end + def sem_identifier #dc 10 + 'identifier' + end + def sem_source #dc 11 + 'source' + end + def sem_language #dc 12 + 'language' + end + def sem_relation #dc 13 + 'source' + end + def sem_coverage #dc 14 + 'coverage' + end + def sem_rights #dc 15 + 'rights' + end + def sem_copyright + 'copyright' + end + def sem_license + 'license' + end + def sem_prepared_by + 'prepared_by' + end + def sem_digitized_by + 'digitized_by' + end + def sem_keywords + 'keywords' + end + def sem_comments + 'comments' + end + def sem_abstract + 'abstract' + end + #% path + def path_stylesheet_home + %{ } + end + #% text #changed from txt to avoid naming conflicts #FOLLOW + def txt_generator + %{ + } + end + def txt_generator_comment + %{ } + end + def txt_hp + ' SiSU' + end + def txt_hp_alias + 'SiSU' + end + def txt_home + 'SiSU' + end + def txt_signature # used in latex/pdf footer + 'SiSU' + end + #% url + def url_urify(uri) + URI.parse(uri) + end + def url_sisu + 'http://www.sisudoc.org/' + end + def url_sisudoc + 'http://www.sisudoc.org' + end + def url_footer_signature + 'http://www.sisudoc.org/' + end + def url_root + '/sisu' #watch + end + def url_root_http + 'http://www.sisudoc.org/' #watch + end + def url_home + 'http://www.sisudoc.org/' # used in pdf header + end + def url_site #used as stub... where there are subdirectories and is different from home + url_home + #'http://www.sisudoc.org/' # used in pdf header + end + def url_txt + 'www.sisudoc.org/' + end + def url_path_image_base #used for html image display + "#{Xx[:html_relative2]}_sisu/image" + end + def url_path_image #used for html image display + "#{Xx[:html_relative2]}_sisu/image" + end + def url_path_image_sys #used for html image display + "#{Xx[:html_relative2]}_sisu/image_sys" + end + def url_path_image_epub + './image' + end + def url_path_ebook_dir + './ebook' + end + def url_path_ebook_images + '.' + end + def url_promo + '' + end + def url_promo_home + '' + end + def url_decoration + def tex_open #'{\UseTextSymbol{OML}{<}}' + Dx[:url_o] + end + def tex_close #'{\UseTextSymbol{OML}{>}}' + Dx[:url_c] + end + def xml_open #'<' + Dx[:url_o] + end + def xml_close #'>' + Dx[:url_c] + end + def txt_open + '<' + end + def txt_close + '>' + end + self + end + def rel_decoration + def tex_open #'{\UseTextSymbol{OML}{<}}' + Dx[:rel_o] + end + def tex_close #'{\UseTextSymbol{OML}{>}}' + Dx[:rel_c] + end + def xml_open #'<' + Dx[:rel_o] + end + def xml_close #'>' + Dx[:rel_c] + end + def txt_open + '<' + end + def txt_close + '>' + end + self + end + #% color + def color_shadow + '"4"' + end + def color_body + %{} + end + def color_white + '"#ffffff"' + end + def color_black + '#000000' + end + def color_shadow #hmmm + '"4"' + end + def color_blue_dark + '#000099' + end + def color_blue + 'blue' + end + def color_blue_base + '#b9d4dd' + end + def color_blue_ink + '#003399' + end + def color_blue_tinge + '#e3ecef' + end + def color_blue_grey + '#8faebf' + end + def color_blue_murky + '#437389' + end + def color_beige + '#f1e8de' + end + def color_subtleglow + '#dddccc' + end + def color_glow + '#fff0c3' + end + def color_rose + '#ffdec9' + end + def color_turquoise + '#1c869b' + end + def color_grey_pale + '#eeeeee' + end + def color_grey_medium + '#cccccc' + end + def color_grey + '#999999' + end + def color_yellow_light + '#fff3b6' + end + def color_yellow + '#ffde14' + end + def color_yellow_dark + '#ffcc00' + end + def color_green_light + '#b7d398' # #e2efd5 #b7d398 #b1c999 # '#aed19e' + end + def color_green + '#0a8400' + end + def color_green_dark + '#086800' + end + def color_ruby + '#a00000' + end + def color_maroon + '#800000' + end + def color_paper + %{"#{color_white}"} + end + def color_band1 + %{"#{color_white}"} + end + def color_band2 + %{"#{color_white}"} + end + def color_body + %{\n\n} + end + def color_font_face #was font WATCH + "#{color_black}" + end + def color_surround + %{"#{color_white}"} + end + def color_band + %{"#{color_white}"} + end + def color_table1 + 'ffffcc' + end + def color_table2 + 'c0d0f0' + end + def color_band1 + '"#ffffff"' + end + def color_band2 + '"#ffffff"' + end + #% icon + def icon_ico + 'rb7.ico' + end + def icon_sisu + 'sisu.png' + end + def icon_manifest + 'b_info.png' + end + def icon_doc + 'b_doc.png' + end + def icon_toc + 'b_toc.png' + end + def icon_wmp + 'b_wmp.png' + end + def icon_odf + 'b_odf.png' + end + def icon_epub + 'b_epub.png' + end + def icon_pdf + 'b_pdf.png' + end + def icon_pdf_portrait + 'b_pdf.png' + end + def icon_pdf_landscape + 'b_pdf.png' + end + def icon_status + 'b_status.png' + end + def icon_external + 'b_ext.png' + end + def icon_external_toc + 'b_ext_toc.png' + end + def icon_seg_toc + 'b_bluebell.png' + end + def icon_crosslink_toc + 'b_amber.png' + end + def icon_mail + 'b_mail.png' + end + def icon_para + 'b_para.png' + end + def icon_pdf + 'b_pdf.png' + end + def icon_ftp + 'b_ftp.png' + end + def icon_gopher + 'b_gopher.png' + end + def icon_choice + 'b_choice.png' + end + def icon_new + 'b_new.png' + end + def icon_book + 'b_amber.png' + # b_book.png + end + def icon_dot_clear + 'dot_clear.png' + end + def icon_dot_white + 'dot_white.png' + end + def icon_dot + icon_dot_white + end + def icon_amber + 'b_amber.png' + end + def icon_rose + 'b_rose.png' + end + def icon_bluebell + 'b_bluebell.png' + end + def icon_home_button + 'sisu.png' + end + def icon_home_banner + 'sisu.png' + end + def icon_site + 'b_home.png' + end + def icon_bluedot + 'blueband.png' + end + def icon_next + 'arrow_next_red.png' + end + def icon_previous + 'arrow_prev_red.png' + end + def icon_up + 'arrow_up_red.png' + end + #% font + def font_fonts + @fonts + end + def font_face + %{face="#{font_fonts}"} + end + def font_lmtoc_face + %{face="#{font_fonts}"} + end + def font_ebook_face + %{face="#{font_fonts}"} + end + def font_face_lmtoc + %{face="#{font_fonts}"} + end + def font_color + 'color="#000000"' + end + def font_size + 'size="4"' + end + def font_size_txt + 'size="4"' + end + def font_size_txt_00 + 'size="3"' + end + def font_size_endnote + 'size="3"' + end + def font_small + 'size="3"' + end + def font_tiny + 'size="2"' + end + #% markup + def markup_italics_list #regular expression of words to be italised + end + def markup_bold_list #regular expression of words to be made bold + 'SiSU' + end + def markup_make_italic + if defined? italics_list \ + and italics_list + make={} + if italics_list + r=italics_list.dup + x=case r + when /\/i$/; 'i' + else '' + end + r.gsub!(/^\/(.+?)\/i?/,'\1') + r.gsub!(/\(/,'(?:') # avoid need to escape use of brackets within regex provided + m='\b(' + r + ')\b' + make[:str] + make[:regx]=if x =~/i/; /#{m}/i + else /#{m}/ + end + else nil + end + end + end + def markup_make_bold + if defined? bold_list \ + and not bold_list.empty? + make={} + if bold_list + r=bold_list.dup + x=case r + when /\/i$/; 'i' + else '' + end + r.gsub!(/^\/(.+?)\/i?/,'\1') + r.gsub!(/\(/,'(?:') # avoid need to escape use of brackets within regex provided + m='\b(' + r + ')\b' + make[:str] + make[:regx]=if x =~/i/; /#{m}/i + else /#{m}/ + end + else nil + end + make + end + end + #% paragraph + def paragraph_txt + %{

} + end + def paragraph_txt_00 + %{

} + end + def paragraph_font_citation + %{} + end + def paragraph_endnote + %{

} + end + def paragraph_table + %{

} + end + def paragraph_table_xml + end + def paragraph_tiny + %{

} + end + def paragraph_small + %{

} # keep but not used? + end + def paragraph_font_tiny + %{} + end + def paragraph_font_small + %{} + end + def paragraph_heading_1 + %{

} + end + def paragraph_heading_1_center + %{

} + end + #% table + def table_close + ' +' + end + def table_close_centered_table + end + def table_align_A + end + def table_align_B + end + def table_align_C + end + def table_width_1 + '"100%"' + end + def table_width_2 + '"99%"' + end + def table_width_3 + '"94%"' + end + def table_width_4 + '"90%"' + end + def table_width_txt + '"94%"' + end + def table_width_txt_avgo + '"100%"' + end + def table_width_txt_r + '"96%"' + end + def table_cellpad_small_paper_margins + '"6"' + end + def table_cellpad_paper_margins + '"36"' + end + def table_cellpad_A + '"0"' + end + def table_cellpad_B + '"20"' + end + def table_cellpad_shadow + '"4"' + end + def table_cellpad_band + '"16"' + end + def table_cellpad_box + '"20"' + end + def table_table_align_A + '
' + end + def table_table_align_B + '
' + end + def table_table_align_C + ' ' + end + #% table_do + def table_do_table_paper + %{#{table_align_C}
\n

\n} + end + def table_do_table_surround + %{ + +
\n} + end + #% indent + def indent_level_0 + '"1%"' + end + def indent_level_1 + '"4%"' + end + def indent_level_2 + '"6%"' + end + def indent_level_3 + '"8%"' + end + def indent_level_4 + '"10%"' + end + #% margin + def margin_num + '

' + end + def margin_numless + '' + end + def margin_num_css + ' ' + end + def margin_num_header + '' + end + def margin_txt_00_1 + %{ + +
+} + end + def margin_txt_w1 + %{ + +
 } + end + def margin_txt_w2 + %{ + +
 } + end + def margin_txt_0 + %{ +
+} + end + def margin_txt_1 + %{ +
} + end + def margin_txt_2 + %{ + +
+} + end + def margin_txt_3 + %{ + +
+} + end + def margin_css #unused, check + ' +
' + end + #% png + def png_ico + %{ } + end + def png_sisu #check url path + %{ + SiSU + } + end + def png_hp + dir=SiSU_Env::Info_env.new #(@fns) + %{ + #{txt_home} + } + end + def png_site + %{@} + end + def png_homepage + png_site + end + def png_nav + %{Contents} + end + def png_manifest + %{Document Manifest} + end + def png_doc + %{Full Text} + end + def png_toc + %{TOC linked} + end + def png_odf + %{ODF/ODT} + end + def png_epub + %{EPUB} + end + def png_pdf + %{PDF} + end + def png_pdf_portrait + %{PDF portrait} + end + def png_pdf_landscape + %{PDF landscape} + end + def png_wmp + %{Concordance} + end + def png_para + %{Segment} + end + def png_status + %{Membership status} + end + def png_mark + %{*} + end + def png_doc_tiny + %{Doc} + end + def png_toc_tiny + %{TOC} + end + def png_status_tiny + %{Status, Member States} + end + def png_ftp + %{FTP} + end + def png_gopher + %{Gopher} + end + def png_crosslink + %{lateral hop} + end + def png_crosslink_ext + %{lateral hop} + end + def png_home + dir=SiSU_Env::Info_env.new #(@fns) + %{#{txt_home} -->} + end + def png_home_button + #dir=SiSU_Env::Info_env.new #(@fns) + %{#{txt_home} -->} + end + def png_book + %{Cameron May Books} + end + #% png_nav + def png_nav_home + end + def png_nav_toc + %{TOC} + end + def png_nav_doc + end + def png_nav_previous + %{<< previous} + end + def png_nav_next + %{next >>} + end + def png_nav_pre + png_nav_previous + end + def png_nav_nxt + png_nav_next + end + def epub_png_nav_previous + %{<< previous} + end + def epub_png_nav_next + %{next >>} + end + def epub_png_nav_up + %{^up ^} + end + def epub_png_nav_pre + epub_png_nav_previous + end + def epub_png_nav_nxt + epub_png_nav_next + end + def epub_png_nav_toc + epub_png_nav_up + end + def png_nav_pdf + %{PDF} + end + def png_nav_pdf_portrait + %{pdf portrait} + end + def png_nav_pdf_landscape + %{pdf landscape} + end + def png_nav_dot_toc + %{^} + end + def png_nav_dot_previous + %{<} + end + def png_nav_dot_next + %{>} + end + def png_nav_dot_pre + png_nav_dot_previous + end + def png_nav_dot_nxt + png_nav_dot_next + end + #% nav_txt + def nav_txt_home + %{ + #{png_site}  + } + end + def nav_txt_home_button + %{ + #{png_home_button}  + } + end + def nav_txt_homepage + %{ +  home  + } + end + def nav_txt_toc_link + %{ +   toc  + } + end + def nav_txt_toc_link_verbose + %{ + #{png_toc}  segments' toc  + } + end + def nav_txt_doc_link + %{ +  scroll  + } + end + def nav_txt_manifest + #{png_manifest} document manifest + %{ + [ document manifest ] + } + end + def nav_txt_concordance + %{ +   A-Z  + } + end + def nav_txt_previous + ' +   << Previous   + + Full Text ' + end + def nav_txt_next + %{ +   Next     >>   + } + end + def nav_txt_plaintext + %{ +   txt  + } + end + def nav_txt_odf + %{ +   odt  + } + end + def nav_txt_pdfs + %{ + pdfs   + } + end + def nav_txt_epub + %{ +  epub  + } + end + def nav_txt_pdf_portrait + %{ +  pdf  + } + end + def nav_txt_pdf_landscape + %{ +  pdf  + } + end + #% banner + def banner_home + %{
+ + #{png_site} + +
+ + an +
+ ( international | transnational ) +
+ commercial law & e-commerce +
+ infrastructure monitor
} + end + def banner_home_guide + %{
+ + #{png_doc} LM toc + + + #{png_doc} LM 20** + +
} + end + def banner_home_button_only + %{ + #{png_home_button} + } + end + def banner_home_button #yellow_dark now white + %{ + +
+ + #{png_home_button} + + +#{table_close}} + end + def banner_home_and_index_buttons #yellow_dark now white + %{ + + +
+ + +
+ + #{png_home} + +
+
+ + +
+ + +  This text's sub-  +
+  Table of Contents  +
+
+
+
+   +#{table_close}} + end + def banner_url_txt_sisu + %{SiSU} + end + def banner_band #yellow_dark now white + %{ + +
+ + #{png_home} + + +#{table_close}} + end + def banner_credit_band + %{ +
+ +
+ +
} + end + def banner_instrument_cover_band_scr + ' +
' + end + def banner_instrument_cover_band_seg + ' +
' + end + #% widget + def widget_promo # Array used to build promo from list.yml and promo.yml + # ['sisu_icon','sisu','sisu_search_libre','open_society','fsf','ruby'] + end + def widget_browsers +< + +
+

+ + If you have problems viewing pages on this site please update your browser: + +

+#{table_close} + + + + + + + + + +
+

+ + +  Epiphany + + ® | + +

+
+

+ + +  Galeon + + ® | + +

+
+

+ + + I-Explorer + + ® | + +

+
+

+ + + Kazehakase + ® |

+ +

+ + + Konqueror + ® |

+

+ + + Mozilla + + + Firefox + + ® | + +

+
+

+ + + Netscape + + ® | + +

+
+

+ + + Opera + + ® | + +

+
+

+ + + Safari + + ® + +

+#{table_close} + + + + + + + + + + +WOK + end + def widgets_open +< +WOK + end + def widget_pdfviewers +< + + + + +WOK + end + def widget_sisu_text +< + Output generated by + + #{@v[:project]} + + #{@v[:version]} #{@v[:date]} (#{@v[:date_stamp]}) +

+WOK + end + def widget_sisu +< +
+WOK + end + def widget_sisu_verbose +< + +WOK + end + def widget_way_better + < + +WOK + end + #% credits + def credits_itl_cover_band + %{
+

+ + for console/text viewing: + +

+
+

+ + + elinks +  | + +

+
+

+ + + links2 +  | + +

+
+

+ + + w3m + + +

+
+

+ + for lightweight gui (X) viewing try: + +

+
+

+ + + Dillo + +  | + +

+
+

+ + +  Epiphany + + ® | + +

+
+

+ + +  Galeon +  | + +

+
+

+ + + links2 -g + + +

+#{table_close} +
+ +
+

+ + & for + + pdf + + viewings of this site we recommend stand alone viewers +
+ (rather than web browser plugins): +
+

+
+

+ + + Acrobat Reader + + ® +  | + +

+
+

+ + + Evince + + ® + +

+
+

+ +  GhostView® , + + GV® + + & + + GSview® + +  | + +

+
+

+ + +  Xpdf + + ® + +

+
+ +#{widget_sisu_text} +
+

+ + SiSU + +

+
+ +

+ Output generated by + + #{@v[:project]} + + #{@v[:version]} #{@v[:date]} (#{@v[:date_stamp]}) +
+ + #{@v[:project]} + + Copyright © Ralph Amissah + 1997, current #{@date.year_static}. + All Rights Reserved. +
+ + #{@v[:project]} + + is software for document structuring, publishing and search, +
+ + www.sisudoc.org/ + + and + + www.sisudoc.org + +
+ w3 since October 3 1993 + + ralph@amissah.com + +

+
+

+ #{@v[:project]} using: +
Standard SiSU markup syntax, +
Standard SiSU meta-markup syntax, and the +
Standard SiSU object citation numbering and system, (object/text positioning system) +
+ Copyright © Ralph Amissah 1997, current #{@date.year_static}. + All Rights Reserved. +

+
+

+ + GPLv3 + +

+
+

+ + #{@v[:project]} + + is released under + GPLv3 + or later, + #{url_decoration.xml_open}http://www.gnu.org/licenses/gpl.html#{url_decoration.xml_close} +

+
+

+ #{@v[:project]}, developed using + + Ruby + + on + + Debian/Gnu/Linux + + software infrastructure, + with the usual GPL (or OSS) suspects. +
+ Better - "performance, reliability, scalability, security & total cost of ownership" + [not to mention flexibility & choice] use of and adherence to open standards (where practical and fair) and it is software libré. +
+ Get With the Future + + Way Better! + +

+
+
+ +
+ +
+ + +
+ +
+ \@ +
+ #{txt_home} +
+ #{banner_url_txt_sisu} +
+
+#{table_close*4}} + end + def credits_sisu_manifest + widget_sisu_text + end + def credits_sisu + x=%{
+ +
+ #{widget_sisu} + #{widget_way_better} +
} + '' + end + def credits_splash + end + def credits_sisu_epub + x=%{
+

EPUB generated by #{@v[:project]} v#{@v[:version]}, GPL3

+
} + '' + end + #% bottom + def bottom_surround + %{
#{table_close} +
+ +
+#{table_close} + + #{banner_band} +#{table_close} + + +} + end + def search + env=SiSU_Env::Info_env.new(@md.fns,@md) + env.widget.search_form('sisusearch',nil,nil,true) + end + def manifest + %{} + end + end + class XML + end + class Head_toc < Head_information + def initialize(md) + super(md) + @md=md + @tocband_segtoc=make_seg + end + def concordance_navigation_band(type='') + if type=~/pdf/ + @tocband_concordance=make_concordance + end + %{

+ + #{@vz.epub_png_nav_toc} + +

+} + end + def seg_head_navigation_band + firstseg=%{ + #{@vz.epub_png_nav_nxt} + } if @md.firstseg =~/\S+/ + %{

#{firstseg}

} + end + def seg_head_navigation_band_bottom + firstseg=%{ + #{@vz.epub_png_nav_nxt} + } if @md.firstseg =~/\S+/ + %{

#{firstseg}

} + end + def manifest_link(text) #watch fix removed font size 2 + %{ #{text}} + end + def concordance_link(text) #watch fix removed font size 2 + if @md.concord_make + %{ + #{text} + } + else '' + end + end + def make_concordance + manifest=scroll=seg='' + wgt=Widget.new(@md) + %{
+ + +
+ #{table_close} + + #{credits_splash} +#{table_close} + +} + end + end + class Home < Skin + def initialize + @v=SiSU_Env::Info_version.instance.get_version + @dir=SiSU_Env::Info_env.new + @date=SiSU_Env::Info_date.new #{@date.year} + end + def redirect + < +SiSU + + + +SiSU informtion provided at www.sisudoc.org/sisu/SiSU

+If your browser supports redirection, you will be escorted there shortly. + + +WOK + end + def homepage + < + + +SiSU information Structuring Universe - Structured +information, Serialized Units - software for electronic texts, +documents, books, digital libraries in plaintext, html, XHTML, XML, +ODF (OpenDocument), EPUB, LaTeX, PDF, SQL (PostgreSQL and SQLite), and +for search + + + + + + + + + + + + + +

+

+ + SiSU >> + +

+

+ SiSU information Structuring Universe +

+

+ Structured information, Serialized Units +

+

+software for electronic texts, document collections, books, digital libraries, and search, +

+

+ with "atomic search" and text positioning system (shared text citation numbering: "ocn") +

+

+outputs include: plaintext, html, XHTML, XML, ODF (OpenDocument), EPUB, LaTeX, PDF, SQL (PostgreSQL and SQLite) +

+
+ +
+

+ + SiSU + +

+

+ --- +

+

+ + SiSU Manual + +

+

+ --- +

+

+ + What does SiSU do? Summary + +

+

+ --- +

+

+ + Book Samples and Markup Examples + +

+

+ + sorted by Author (sisu metadata) + +

+

+ + sorted by Topic (sisu metadata) + +

+

+ --- +

+

+ + Object Citation Numbering - ocn + +

+

(a text positioning system)

+

+ --- +

+

+ + Search - "granular" + +

+ Of interest is the ease of streaming documents to a relational database, at an object (roughly paragraph) level and the potential for increased precision in the presentation of matches that results thereby. The ability to serialise html, LaTeX, XML, SQL, (whatever) is also inherent in / incidental to the design. For a description see the + + abandoned U.S. provisional patent application + +

+

+ --- +

+

+ + Download + +

+

+ --- +

+

+ + Changelog + +

+

+ --- +

+

+ + License + +

+

+ Gnu / Linux / Unix +

+

+ ============= +

+

+ + sisu markup + +

+

+ + sisu commands + +

+

+ + sisu manual + +

+

+ + sisu man pages + +

+

+ --- +

+

+ document preparation can be on any platform, in any editor: + (syntax highlight support currently for: vim, kate, write, gedit, diakonos) +

+

+ + Syntax highlighting + +

+

+ ============= +

+

+ + * Composite document + +

+

+ the composite document is a superset of the following documents: +

+

+ + SiSU description + +

+

+ + SiSU examples + +

+

+ + SiSU chronology + +

+

+ + SiSU technical + +

+

+ + SiSU FAQ + +

+

+ + SiSU download + +

+

+ + SiSU changelog + +

+

+ + SiSU license + +

+

+ + SiSU standard + +

+

+ + SiSU abandoned provisional patent + +

+

+ Note: the placement of SiSU documents on the Net predate the release of SiSU. +

+
+
+

+SiSU Short Description +

+

+SiSU is a comprehensive future-proofing electronic document management system. Built-in search capabilities allow you to search across multiple documents and highlight matches in an easy-to-follow format. Paragraph numbering system allows you to cite your electronic documents in a consistent manner across multiple file formats. Multiple format outputs allow you to display your documents in plain text, PDF (portrait and horizontal), OpenDocument format, HTML, or e-book reading format (EPUB). Word mapping allows you to easily create word indexes for your documents. Future-proofing flexibility allows you to quickly adapt your documents to newer output formats as needed. All these and many other features are achieved with little or no additional work on your documents - by marking up the documents with a super simplistic markup language, leaving the SiSU engine to handle the heavy-lifting processing. +

+

+Potential users of SiSU include individual authors who want to publish their books or articles electronically to reach a broad audience, web publishers who want to provide multiple channels of access to their electronic documents, or any organizations which centrally manage a medium or large set of electronic documents, especially governmental organizations which may prefer to keep their documents in easily accessible yet non-proprietary formats. +

+

+SiSU is an Open Source project initiated and led by Ralph Amissah (ralph.amissah@gmail.com) and can be contacted via mailing list http://lists.sisudoc.org/listinfo/sisu at sisu@lists.sisudoc.org. SiSU is licensed under the GNU General Public License. +

+

+ For less markup than the most elementary HTML you can have more. +

+

SiSU - Structured information, Serialized Units for electronic documents, is an information structuring, transforming, publishing and search framework with the following features:

+

+(i) markup syntax: +(a) +simpler than html, +(b) +mnemonic, influenced by mail/messaging/wiki markup practices, +(c) human readable, and easily writable,

+

(ii) +(a) +minimal markup requirement, +(b) +single file marked up for multiple outputs,

+

+notes +

+

+* +documents are prepared in a single UTF-8 file using a minimalistic mnemonic syntax. Typical literature, documents like "War and Peace" require almost no markup, and most of the headers are optional. +

+

+* +markup is easily readable/parsed by the human eye, (basic markup is simpler and more sparse than the most basic html), [this may also be converted to XML representations of the same input/source document]. +

+

+* +markup defines document structure (this may be done once in a header pattern-match description, or for heading levels individually); basic text attributes (bold, italics, underscore, strike-through etc.) as required; and semantic information related to the document (header information, extended beyond the Dublin core and easily further extended as required); the headers may also contain processing instructions. +

+

(iii) +(a) +multiple outputs primarily industry established and institutionally accepted open standard formats, include amongst others: plaintext (UTF-8); html; (structured) XML; ODF (Open Document text); EPUB; LaTeX; PDF (via LaTeX); SQL type databases (currently PostgreSQL and SQLite). Also produces: concordance files; document content certificates (md5 or sha256 digests of headings, paragraphs, images etc.) and html manifests (and sitemaps of content). + +(b) + +takes advantage of the strengths implicit in these very different output types, (e.g. PDFs produced using typesetting of LaTeX, databases populated with documents at an individual object/paragraph level, making possible granular search (and related possibilities))

+

(iv) +outputs share a common numbering system (dubbed "object citation numbering" (ocn)) that is meaningful (to man and machine) across various digital outputs whether paper, screen, or database oriented, (PDF, html, XML, EPUB, sqlite, postgresql), this numbering system can be used to reference content.

+

+(v) +SQL databases are populated at an object level (roughly headings, paragraphs, verse, tables) and become searchable with that degree of granularity, the output information provides the object/paragraph numbers which are relevant across all generated outputs; it is also possible to look at just the matching paragraphs of the documents in the database; [output indexing also work well with search indexing tools like hyperesteier].

+

+(vi) + use of semantic meta-tags in headers permit the addition of semantic information on documents, (the available fields are easily extended)

+

+(vii) +creates organised directory/file structure for (file-system) output, easily mapped with its clearly defined structure, with all text objects numbered, you know in advance where in each document output type, a bit of text will be found (e.g. from an SQL search, you know where to go to find the prepared html output or PDF etc.)... there is more; easy directory management and document associations, the document preparation (sub-)directory may be used to determine output (sub-)directory, the skin used, and the SQL database used,

+

+(viii) +"Concordance file" wordmap, consisting of all the words in a document and their (text/ object) locations within the text, (and the possibility of adding vocabularies),

+

+(ix) +document content certification and comparison considerations: +(a) +the document and each object within it stamped with an md5 hash making it possible to easily check or guarantee that the substantive content of a document is unchanged, +(b) +version control, documents integrated with time based source control system, default RCS or CVS with use of $Id$ tag, which SiSU checks +

+(x) +SiSU's minimalist markup makes for meaningful "diffing" of the substantive content of markup-files,

+

+(xi) +easily skinnable, document appearance on a project/site wide, directory wide, or document instance level easily controlled/changed,

+

+(xii) +in many cases a regular expression may be used (once in the document header) to define all or part of a documents structure obviating or reducing the need to provide structural markup within the document,

+

+(xiii) +prepared files may be batch process, documents produced are static files so this needs to be done only once but may be repeated for various reasons as desired (updated content, addition of new output formats, updated technology document presentations/representations)

+

+(xiv) +possible to pre-process, which permits: the easy creation of standard form documents, and templates/term-sheets, or; building of composite documents (master documents) from other sisu marked up documents, or marked up parts, i.e. import documents or parts of text into a main document should this be desired

+

+there is a considerable degree of future-proofing, output representations are "upgradeable", and new document formats may be added. +

+

+(xv) +there is a considerable degree of future-proofing, output representations are "upgradeable", and new document formats may be added: +(a) +modular, (thanks in no small part to Ruby) another output format required, write another module.... +(b) easy to update output formats (eg html, XHTML, LaTeX/PDF produced can be updated in program and run against whole document set), +(c) easy to add, modify, or have alternative syntax rules for input, should you need to,

+

+(xvi) +scalability, dependent on your file-system (ext3, Reiserfs, XFS, whatever) and on the relational database used (currently Postgresql and SQLite), and your hardware,

+

+(xvii) +only marked up files need be backed up, to secure the larger document set produced,

+

+(xviii) +document management,

+

+(xix) +Syntax highlighting for SiSU markup is available for a number of text editors.

+

(xx) remote operations: +(a) +run SiSU on a remote server, (having prepared sisu markup documents locally or on that server, i.e. this solution where sisu is installed on the remote server, would work whatever type of machine you chose to prepare your markup documents on), +(b) +generated document outputs may be posted by sisu to remote sites (using rsync/scp) +(c) +document source (plaintext utf-8) if shared on the net may be identified by its url and processed locally to produce the different document outputs.

+

+(xxi) +document source may be bundled together (automatically) with associated documents (multiple language versions or master document with inclusions) and images and sent as a zip file called a sisupod, if shared on the net these too may be processed locally to produce the desired document outputs, these may be downloaded, shared as email attachments, or processed by running sisu against them, either using a url or the filename. +

+

+(xxii) +for basic document generation, the only software dependency is Ruby, and a few standard Unix tools (this covers plaintext, html, XML, ODF, EPUB, LaTeX). To use a database you of course need that, and to convert the LaTeX generated to PDF, a LaTeX processor like tetex or texlive. +

+

+as a developers tool it is flexible and extensible +

+
+

+More information on SiSU provided at www.sisudoc.org/sisu/SiSU

+
+
+

+ +

+ +
+ + + + idx + txt +search.sisudoc.org +
+ +

+

+SiSU ("SiSU information Structuring Universe" or "Structured information, Serialized Units"),1 is a Unix command line oriented framework for document structuring, publishing and search. Featuring minimalistic markup, multiple standard outputs, a common citation system, and granular search. +

+

+ Using markup applied to a document, SiSU can produce plain text, HTML, XHTML, XML, OpenDocument, LaTeX or PDF files, and populate an SQL database with objects2 (equating generally to paragraph-sized chunks) so searches may be performed and matches returned with that degree of granularity (e.g. your search criteria is met by these documents and at these locations within each document). Document output formats share a common object numbering system for locating content. This is particularly suitable for "published" works (finalized texts as opposed to works that are frequently changed or updated) for which it provides a fixed means of reference of content. +

+ +

+How it works +

+

+SiSU markup is fairly minimalistic, it consists of: a (largely optional) document header, made up of information about the document (such as when it was published, who authored it, and granting what rights) and any processing instructions; and markup within text which is related to document structure and typeface. SiSU must be able to discern the structure of a document, (text headings and their levels in relation to each other), either from information provided in the instruction header or from markup within the text (or from a combination of both). Processing is done against an abstraction of the document comprising of information on the document's structure and its objects,2 which the program serializes (providing the object numbers) and which are assigned hash sum values based on their content. This abstraction of information about document structure, objects, (and hash sums), provides considerable flexibility in representing documents different ways and for different purposes (e.g. search, document layout, publishing, content certification, concordance etc.), and makes it possible to take advantage of some of the strengths of established ways of representing documents, (or indeed to create new ones).

+

+1. also chosen for the meaning of the Finnish term "sisu". +

+

+2 objects include: headings, paragraphs, verse, tables, images, but not footnotes/endnotes which are numbered separately and tied to the object from which they are referenced.

+

+ More information on SiSU provided at: + + www.sisudoc.org/sisu/SiSU + +

+SiSU was developed in relation to legal documents, and is strong across a wide variety of texts (law, literature...(humanities, law and part of the social sciences)). SiSU handles images but is not suitable for formulae/ statistics, or for technical writing at this time.

+

+SiSU has been developed and has been in use for several years. Requirements to cover a wide range of documents within its use domain have been explored.

+

+ +ralph@amissah.com + +

+

+ +ralph.amissah@gmail.com + +

+

+ +sisu@lists.sisudoc.org. + +

+

+ +http://lists.sisudoc.org/listinfo/sisu + +

+

+#{@date.year_static} +

+

+w3 since October 3 1993 +

+
+ + +WOK + end + def home_toc + ' ' + end + end + class Inserts + end + class TeX < Skin + def initialize(papersize='') + @papersize=papersize + end + def a4 + def portrait + def w + 160 + end + def h + 228 + end + def img_px + 450 + end + self + end + def landscape + def w + 238 + end + def h + 160 + end + def img_px + 300 + end + self + end + self + end + def letter + def portrait + def w + 166 + end + def h + 212 + end + def img_px + 468 + end + self + end + def landscape + def w + 226 + end + def h + 166 + end + def img_px + 290 + end + self + end + self + end + def legal + def portrait + def w + 168 + end + def h + 286 + end + def img_px + 474 + end + self + end + def landscape + def w + 296 + end + def h + 166 + end + def img_px + 420 + end + self + end + self + end + def b5 + def portrait + def w + 140 + end + def h + 204 + end + def img_px + 356 + end + self + end + def landscape + def w + 200 + end + def h + 130 + end + def img_px + 260 + end + self + end + self + end + def a5 + def portrait + def w + 112 + end + def h + 162 + end + def img_px + 280 + end + self + end + def landscape + def w + 152 + end + def h + 100 + end + def img_px + 190 + end + self + end + self + end + def dimensions + d=case @papersize + when /a4/; a4 + when /letter/; letter + when /legal/; legal + when /b5/; b5 + when /a5/; a5 + else a4 + end + end + end +end +__END__ diff --git a/lib/sisu/v3/digests.rb b/lib/sisu/v3/digests.rb new file mode 100644 index 00000000..77c593ac --- /dev/null +++ b/lib/sisu/v3/digests.rb @@ -0,0 +1,388 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: document digests (md5|sha256) and structure processing + +=end +module SiSU_Digest_view + require "#{SiSU_lib}/particulars" # particulars.rb + require "#{SiSU_lib}/i18n" # i18n.rb + require "#{SiSU_lib}/shared_markup_alt.rb" # shared_markup_alt.rb + pwd=Dir.pwd + class Source + @@dg=nil + def initialize(opt) + @opt=opt + @fnb=@opt.fnb + @@endnotes_para=[] + @@dg=nil + @dg=@@dg ||=SiSU_Env::Info_env.new.digest.type + @particulars=SiSU_Particulars::Combined_singleton.instance.get_all(opt) + end + def read + begin + @env,@md,@dal_array=@particulars.env,@particulars.md,@particulars.dal_array + unless @opt.cmd =~/q/ + tool=(@opt.cmd =~/[MVv]/) \ + ? "#{@env.program.text_editor} #{@env.path.output_tell}/#{@md.fnb}/#{@md.fn[:digest]}" \ + : @opt.fns + @opt.cmd=~/[MVvz]/ \ + ? SiSU_Screen::Ansi.new(@opt.cmd,"Document #{@dg} Digests",tool).green_hi_blue \ + : SiSU_Screen::Ansi.new(@opt.cmd,"Document #{@dg} Digests",tool).green_title_hi + SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{@env.path.output_tell}/#{@md.fnb}/#{@md.fn[:digest]}").flow if @opt.cmd =~/[MV]/ + end + SiSU_Digest_view::Source::Scroll.new(@particulars).songsheet + SiSU_Env::Info_skin.new(@md).select + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + ensure + end + end + private + class Scroll \1') + para.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') + para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') + if para=~/#{Mx[:en_a_o]}[\d*+]+.+?#{Mx[:id_o]}[0-9a-f]{#{@dl}}#{Mx[:id_c]}#{Mx[:en_a_c]}/ + para_endnotes << para.scan(/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})([\d*+]+).+?#{Mx[:id_o]}([0-9a-f]{#{@dl}})#{Mx[:id_c]}(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/) + end + ima=[] + if para !~/^%+\s/ \ + and para =~/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+\.(png|jpg|gif))\s.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/ + images=para.scan(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+\.(?:png|jpg|gif))\s.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/).flatten + else image=nil + end + x=case para + when /^#{Mx[:meta_o]}title#{Mx[:meta_c]}/ + "\n" + ' '*0 +'@' + ' '*9 + when /^#{Mx[:meta_o]}subtitle#{Mx[:meta_c]}/ + "\n" + ' '*1 +'@' + ' '*8 + when /^#{Mx[:lv_o]}1:/ #fix Mx[:lv_o] + "\n" + ' '*2 +':A ' + ' '*6 +'- ' + ocn + ' '*(10-ocn.length) + d_clean + ' ' + d_all + when /^#{Mx[:lv_o]}2:/ #fix Mx[:lv_o] + "\n" + ' '*3 +':B ' + ' '*5 +'- ' + ocn + ' '*(10-ocn.length) + d_clean + ' ' + d_all + when /^#{Mx[:lv_o]}3:/ #fix Mx[:lv_o] + "\n" + ' '*4 +':C ' + ' '*4 +'- ' + ocn + ' '*(10-ocn.length) + d_clean + ' ' + d_all + when /^#{Mx[:lv_o]}4:/ #fix Mx[:lv_o] + "\n" + ' '*5 +'1' + ' '*4 +'- ' + ocn + ' '*(10-ocn.length) + d_clean + ' ' + d_all + when /^#{Mx[:lv_o]}5:/ #fix Mx[:lv_o] + "\n" + ' '*6 +'2' + ' '*3 +'- ' + ocn + ' '*(10-ocn.length) + d_clean + ' ' + d_all + when /^#{Mx[:lv_o]}6:/ #fix Mx[:lv_o] + "\n" + ' '*7 +'3' + ' '*2 +'- ' + ocn + ' '*(10-ocn.length) + d_clean + ' ' + d_all + else + if para =~/MD5\(\S+?\.sst\)=\s*([0-9a-f]{#{@dl}})<\/u>/ #watch + @n,@s=/MD5\((\S+?\.sst)\)=\s*([0-9a-f]{#{@dl}})<\/u>/.match(para)[1,2] + end + x=unless ocn =~ /^0$/ + if images \ + and images.length > 0 # then get path of image & produce digest + @image_name,@image_dgst,@img=[],[],[] + images.each do |i| + image_source=if FileTest.file?("#{@env.path.image_source_include_local}/#{i}") + @env.path.image_source_include_local + elsif FileTest.file?("#{@env.path.image_source_include_remote}/#{i}") + @env.path.image_source_include_remote + elsif FileTest.file?("#{@env.path.image_source_include}/#{i}") + @env.path.image_source_include + else + SiSU_Screen::Ansi.new(@md.cmd,"ERROR - image:", %{"#{i}" missing}, "search locations: #{@env.path.image_source_include_local}, #{@env.path.image_source_include_remote} and #{@env.path.image_source_include}").error2 unless @md.cmd =~/q/ + nil + end + @img << /\S+\.(png|jpg|gif)/.match(i)[1] + not_found_msg='image not found' + if image_source + para_image = image_source + '/' + i + @image_name << i + @image_dgst << if @dg =~/^sha(?:2|256)$/; sys.sha256(para_image) + else sys.md5(para_image) + end + else + @image_name << ' '*16 + i + ' [image missing]' + @image_dgst << '' + @image_dgst[1]=not_found_msg + ' '*(32-not_found_msg.length) + end + end + line= "\n" + ' '*9 + ' - ' + ocn + ' '*(10-ocn.length) + d_clean + ' ' + d_all + "\n" + line_image=[] + c=0 + @image_name.each do |ok| + line_image << %{ #{@img[c]} #{@image_dgst[c][1]} #{@image_name[c]}} + c +=1 + end + line=line + line_image.join("\n") + else "\n" + ' '*9 + ' - ' + ocn + ' '*(10-ocn.length) + d_clean + ' ' + d_all + end + else + prefix='' + metad=[@tr.full_title,@tr.author,@tr.translator,@tr.illustrator,@tr.prepared_by,@tr.digitized_by,@tr.description,@tr.subject,@tr.abstract,@tr.publisher,@tr.contributor,@tr.date_created,@tr.date_issued,@tr.date_available,@tr.date_modified,@tr.date_valid,@tr.date,@tr.type,@tr.format,@tr.rights,@tr.identifier,@tr.source,@tr.language,@tr.language_original,@tr.relation,@tr.coverage,@tr.keywords,@tr.comments,@tr.cls_loc,@tr.cls_dewey,@tr.cls_gutenberg,@tr.cls_isbn,@tr.prefix_a,@tr.prefix_b,@tr.sourcefile,@tr.sourcefile_digest,@tr.last_generated,@tr.sisu_version,@tr.ruby_version,@tr.sc_number,@tr.sc_date,'Skin_Digest: ','Generated by: ','Ruby version: '] + metad.each do |n| + m=rgx_txt(n) + if m=~/\S+/ \ + and para=~/^#{m}:/ + x,o=0,18 + while x < 2; o = o + 2 + x=o - n.length + end + space=' '*x + prefix="#{n.downcase}#{space}" + break + else prefix=' '*9 + end + end + m_title=rgx_txt(@tr.full_title) + m_author=rgx_txt(@tr.author) + m_sourcefile_digest=rgx_txt(@tr.sourcefile_digest) + m_sisu_version=rgx_txt(@tr.sisu_version) + m_last_generated=rgx_txt(@tr.last_generated) + m_ruby_version=rgx_txt(@tr.ruby_version) + case para + when /#{m_title}: / + @t=/#{m_title}: (.+?)#{Mx[:id_o]}~\d;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@dl}}:[0-9a-f]{#{@dl}}#{Mx[:id_c]}/.match(para)[1].gsub(/<\/?u>/,'').strip + when /#{m_author}: / + @c=/#{m_author}: (.+?)#{Mx[:id_o]}~\d;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@dl}}:[0-9a-f]{#{@dl}}#{Mx[:id_c]}/.match(para)[1].gsub(/<\/?u>/,'').strip + when /#{m_sourcefile_digest}.+?/ #watch + dgst_extra="\n" + ' '*21 +'source' +' '*4 + @md.dgst[1] + ' '*34 + @md.fns + when /Skin_Digest: / + dgst_extra="\n" + ' '*21 + 'skin' +' '*6 + @md.dgst_skin[1] + ' '*34 + /(skin_\S+?\.rb)/.match(@md.dgst_skin[0])[1] + when /#{m_sisu_version}: / + @v=/#{m_sisu_version}: (.+?)#{Mx[:id_o]}~\d;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@dl}}:[0-9a-f]{#{@dl}}#{Mx[:id_c]}/.match(para)[1].gsub(/<\/?u>/,'').strip + when /#{m_last_generated}: / + @g=/#{m_last_generated}: (.+?)#{Mx[:id_o]}~\d;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@dl}}:[0-9a-f]{#{@dl}}#{Mx[:id_c]}/.match(para)[1].gsub(/<\/?u>/,'').strip + when /#{m_ruby_version}: / + @r=/#{m_ruby_version}: (.+?)#{Mx[:id_o]}~\d;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}[0-9a-f]{#{@dl}}:[0-9a-f]{#{@dl}}#{Mx[:id_c]}/.match(para)[1].gsub(/<\/?u>/,'').strip + end + dgst_extra ||='' + "\n" + prefix +' - ' + ocn + ' '*(10-ocn.length) + d_clean + ' ' + d_all + dgst_extra + "\n" + end + end + para_endnotes[0].each { |e| y << "\n" + ' '*(28-e[0].length) + "[#{e[0].to_s}] #{e[1].to_s}" } if para_endnotes[0] + if y; digests(x,y) + else digests(x) + end + end + end + manifest="#{@env.url.root}/#{@md.fnb}/sisu_manifest.html" + a=%{ocn digest clean (no markup/notes),#{@sp*33}digest all (includes markup & endnotes)\n} + description("#{@md.title.full}\n") + description("#{@md.author}\n") + description("#{@md.fns}\n") + description("----------------------------------------------\n") + description("SiSU Document Content Certificate (Digest/DCC)\n") + description("----------------------------------------------\n") + description(" #{@dg} digests\n") + description("------------\n") + description("Sourcefile digest: #{@md.dgst[1]}\n") + description(" source filename: #{@md.fns}\n") + description("available outputs: #{manifest}\n") + #description(" time generated: #{@g}\n") + #description(" SiSU version used: #{@v}\n") + #description(" Ruby version used: #{@r}\n") + description("------------\n") + description("Document Digests\n") + description(a) + end + def dal_structure + data=@data + endnotes=nil + data.each do |t_o| + dgst=SiSU_text_representation::Modified_text_plus_Hash_digest.new(@md,t_o).composite.dgst + if dgst + if t_o.is=='heading' + digests("#{@sp*0}#{dgst[:ocn]}#{@sp*(8-dgst[:ocn].to_s.length)}#{dgst[:dgst_stripped_txt]} #{dgst[:dgst_markedup_txt]} #{dgst[:is]} #{t_o.lv}") + elsif t_o.is=='heading_insert' + digests("#{@sp*0}[#{dgst[:ocn]}]#{@sp*(6-dgst[:ocn].to_s.length)}#{dgst[:dgst_stripped_txt]} #{dgst[:dgst_markedup_txt]} #{dgst[:is]} #{t_o.lv}") + else + digests("#{@sp*0}#{dgst[:ocn]}#{@sp*(8-dgst[:ocn].to_s.length)}#{dgst[:dgst_stripped_txt]} #{dgst[:dgst_markedup_txt]} #{dgst[:is]}") + if dgst[:images] + dgst[:images].each do |img| + digests("#{@sp*8}#{img[:img_dgst]}#{@sp*66}#{img[:img_type]} #{img[:img_name]}") + end + end + end + if dgst[:endnotes] + dgst[:endnotes].each do |en| + digests("#{@sp*8}#{en[:note_dgst]}#{@sp*66}note [#{en[:note_number]}]") + endnotes=en[:note_number] + end + end + end + end + l=Hash.new(0) + ocn=nil + dal_structure_tree("------------\n") + dal_structure_tree("document structure[*]\n") + data.each do |t_o| + if t_o.is=='heading' + x=case t_o.ln + when 1; l[1] +=1 #fix Mx[:lv_o] + ' '*0 +':A' + when 2; l[2] +=1 #fix Mx[:lv_o] + ' '*1 +':B' + when 3; l[3] +=1 #fix Mx[:lv_o] + ' '*2 +':C' + when 4; l[4] +=1 #fix Mx[:lv_o] + ' '*3 +'1' + when 5; l[5] +=1 #fix Mx[:lv_o] + ' '*4 +'2' + when 6; l[6] +=1 #fix Mx[:lv_o] + ' '*5 +'3' + else nil + end + end + ocn=t_o.ocn if defined? t_o.ocn and t_o.is !='heading_insert' + dal_structure_tree("#{x}\n") if x and not x.empty? + end + dal_structure_tree(" [*] heading levels\n") + dal_structure_summary("------------\n") + dal_structure_summary("document structure[*]\n") + [0,1,2,3,4,5,6].each do |y| + v=case y + when 1; ':A' + when 2; ':B' + when 3; ':C' + when 4; '1 ' + when 5; '2 ' + when 6; '3 ' + end + dal_structure_summary("#{v} = #{l[y]}\n") if l[y] > 0 + end + dal_structure_summary("objects (ocn) = #{ocn}\n") + dal_structure_summary("endnotes = #{endnotes}\n") + dal_structure_summary(" [*] number of headers (@) and of each heading level (:A to :C and 1 to 3)\n") + end + def supplementary + if defined? @md.sc_number \ + and @md.sc_number + rcinfo("------------\n") + rcinfo("source control information\n") + rcinfo(" (the following information while not important for document content certification\n may help the publisher in locating the version referred to)\n") + rcinfo(" rcs version number: #{@md.sc_number}\n") + if defined? @md.sc_date \ + and @md.sc_date + rcinfo(" rcs date: #{@md.sc_date}\n") + end + if defined? @md.sc_time \ + and @md.sc_time + rcinfo(" rcs time: #{@md.sc_time}\n") + end + end + rcinfo("------------\n") + rcinfo("Note: the time generated related fields (text and digests) will vary between otherwise identical document outputs\n") + end + end + end +end +__END__ diff --git a/lib/sisu/v3/embedded.rb b/lib/sisu/v3/embedded.rb new file mode 100644 index 00000000..afdfd5a7 --- /dev/null +++ b/lib/sisu/v3/embedded.rb @@ -0,0 +1,139 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: sitemap created from parameters extracted from input file(s) + +=end +module SiSU_Embedded + require "#{SiSU_lib}/param" # param.rb + include SiSU_Param + require "#{SiSU_lib}/sysenv" # sysenv.rb + include SiSU_Env + class Source + require 'fileutils' + include FileUtils + def initialize(opt) + @opt=opt + @md=SiSU_Param::Parameters.new(@opt).get + @env=SiSU_Env::Info_env.new(@md.fns) + @rhost=SiSU_Env::Info_remote.new(@opt).remote_host_base + end + def read + songsheet + end + def songsheet + images + audio + multimedia + begin + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + ensure + end + end + def images + src="#{Dir.pwd}/_sisu/image" + ldest="#{@env.path.webserv}/#{@env.path.stub_pwd}/_sisu/image" + @rhost.each do |remote_conn| + rdest="#{remote_conn[:name]}/#{@env.path.stub_pwd}/_sisu/image" + if @md.cmd.inspect =~/[vVMR]/ \ + and FileTest.directory?(src) + mkdir_p(ldest) unless FileTest.directory?(ldest) + src_ec="#{src}/" + @md.ec[:image].join(" #{src}/") + unless @opt.fns =~/\.-sst$/ + SiSU_Env::System_call.new(src_ec,"#{ldest}/.",'q').rsync + if @md.cmd.inspect =~/R/ #rsync to remote image directory + SiSU_Env::System_call.new(src_ec,"#{rdest}/.",'q').rsync + end + end + end + end + end + def audio + #p @md.ec[:audio] + src="#{Dir.pwd}/_sisu/mm/audio" + ldest="#{@env.path.webserv}/#{@env.path.stub_pwd}/_sisu/mm/audio" + @rhost.each do |remote_conn| + rdest="#{remote_conn[:name]}/#{@env.path.stub_pwd}/_sisu/mm/audio" + if @md.cmd.inspect =~/[vVMR]/ \ + and FileTest.directory?(src) + mkdir_p(ldest) unless FileTest.directory?(ldest) + src_ec="#{src}/" + @md.ec[:audio].join(" #{src}/") + SiSU_Env::System_call.new(src_ec,"#{ldest}/.",'q').rsync + if @md.cmd.inspect =~/R/ #rsync to remote audio directory + SiSU_Env::System_call.new(src_ec,"#{rdest}/.",'q').rsync + end + end + end + end + def multimedia + #p @md.ec[:multimedia] + src="#{Dir.pwd}/_sisu/mm/video" + ldest="#{@env.path.webserv}/#{@env.path.stub_pwd}/_sisu/mm/video" + @rhost.each do |remote_conn| + rdest="#{remote_conn[:name]}/#{@env.path.stub_pwd}/_sisu/mm/video" + if @md.cmd.inspect =~/[vVMR]/ \ + and FileTest.directory?(src) + mkdir_p(ldest) unless FileTest.directory?(ldest) + src_ec="#{src}/" + @md.ec[:multimedia].join(" #{src}/") + SiSU_Env::System_call.new(src_ec,"#{ldest}/.",'q').rsync + if @md.cmd.inspect =~/R/ #rsync to remote video directory + SiSU_Env::System_call.new(src_ec,"#{rdest}/.",'q').rsync + end + end + end + end + end +end +__END__ diff --git a/lib/sisu/v3/epub.rb b/lib/sisu/v3/epub.rb new file mode 100644 index 00000000..aa4ede99 --- /dev/null +++ b/lib/sisu/v3/epub.rb @@ -0,0 +1,673 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: epub generation, processing + +=end +module SiSU_EPUB + require 'pstore' + require "#{SiSU_lib}/particulars" # particulars.rb + include SiSU_Particulars + require "#{SiSU_lib}/defaults" # defaults.rb + include SiSU_Viz + require "#{SiSU_lib}/xhtml_table" # xhtml_table.rb + require "#{SiSU_lib}/epub_format" # epub_format.rb + include SiSU_EPUB_Format + require "#{SiSU_lib}/epub_segments" # epub_segments.rb + include SiSU_EPUB_seg + require "#{SiSU_lib}/epub_tune" # epub_tune.rb + include SiSU_EPUB_Tune + require "#{SiSU_lib}/epub_concordance" # epub_concordance.rb + class Source + def initialize(opt) + @opt=opt + @particulars=SiSU_Particulars::Combined_singleton.instance.get_all(opt) + end + def read + songsheet + end + def songsheet + begin + @md=@particulars.md + @fnb=@md.fnb + @env=@particulars.env + loc=@env.path.url.output_tell + unless @opt.cmd =~/q/ + tool=if @opt.cmd =~/z/; "#{@env.program.epub_viewer} #{loc}/epub/#{@fnb}.epub" + elsif @opt.cmd =~/[MVv]/; "#{@env.program.epub_viewer} #{loc}/epub/#{@fnb}.epub" + else @opt.fns + end + @opt.cmd=~/[MVvz]/ \ + ? SiSU_Screen::Ansi.new(@opt.cmd,'EPUB',tool).green_hi_blue \ + : SiSU_Screen::Ansi.new(@opt.cmd,'EPUB',tool).green_title_hi + SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{loc}/epub/#{@fnb}.epub").flow if @opt.cmd =~/[MV]/ + end + @env.path.epub_bld #(@md) + @env.path.epub_cp_images(@md) + dir_epub=@env.path.epub + SiSU_Env::Info_skin.new(@md).select + data=nil + SiSU_Env::SiSU_file.new(@md).mkdir.output.epub + @tuned_file_array=SiSU_EPUB::Source::XHtml_environment.new(@particulars).tuned_file_instructions + data=@tuned_file_array + scr_endnotes=SiSU_EPUB::Source::Endnotes.new(@md,data).scroll + toc=SiSU_EPUB::Source::Toc.new(@md,data).songsheet + data=@tuned_file_array + scr_toc=SiSU_EPUB::Source::Scroll_head_and_segtoc.new(@md,toc).in_common #watch + SiSU_EPUB::Source::Seg.new(@md,data).songsheet + SiSU_EPUB::Source::Epub_output.new(@md).songsheet + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + ensure + unless @opt.cmd =~/[MV]/ #check maintenance flag + texfiles=Dir["#{@env.path.tune}/#{@opt.fns}*"] + texfiles.each do |f| + if FileTest.file?(f) + File.unlink(f) + end + end + end + SiSU_Env::Clear.new(@opt.cmd,@opt.fns).param_instantiate + @@flag,@@scr,@@seg,@@seg_endnotes,@@seg_subtoc,@@seg_ad={},{},{},{},{},{} + @@seg_total,@@tracker,@@loop_count,@@tablehead,@@number_of_cols=0,0,0,0,0 + @@seg_name,@@seg_name_html,@@seg_subtoc_array,@@seg_endnotes_array,@@segtocband,@@tablefoot=Array.new(7){[]} + @@filename_seg,@@seg_url,@@fn,@@to_lev4,@@get_hash_to,@@get_hash_fn='','','','','','','' + @@is4=@@is3=@@is2=@@is1=@@heading1=@@heading2=@@heading3=@@heading4=0 + end + end + private + class XHtml_environment + def initialize(particulars) + @particulars=particulars + @md,@env=particulars.md,particulars.env + @vz=SiSU_Env::Get_init.instance.skin + @env,@css,@symlnk=particulars.env,SiSU_Style::CSS.new,SiSU_Env::Create_system_link.new #home + end + def link_images + @symlnk.images + end + def directories + title=File.basename(@md.fns,'.rb') + SiSU_Env::SiSU_file.new(@md).mkdir.output.epub + end + def tuned_file_instructions + @tell=SiSU_Screen::Ansi.new(@md.cmd) + @md.cmd=@md.cmd.gsub(/H/,'h') + @md.file_type='html' if @md.cmd =~/[hon]/ + directories + dal_array=@particulars.dal_array # dal file drawn here + @tuned_file_array=SiSU_EPUB_Tune::Tune.new(dal_array,@md).songsheet + @tuned_file_array + end + end + class Endnotes + include SiSU_EPUB_Format + def initialize(md,data) + @md,@data=md,data + end + def scroll + @scr_endnotes=[] + format_head_scroll=SiSU_EPUB_Format::Head_scroll.new(@md) + @data.each do |dob| + pg=dob.dup + unless pg.is =~/^code/ + if pg.obj =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[\d*+]+ / + endnote_array=[] + if pg.obj=~/#{Mx[:en_a_o]}[\d*+].+?#{Mx[:en_a_c]}/m + endnote_array = pg.obj.scan(/#{Mx[:en_a_o]}[\d*+]+(.+?)#{Mx[:en_a_c]}/m) + end + if pg.obj=~/#{Mx[:en_b_o]}[\d*]+\s.+?#{Mx[:en_b_c]}/m + endnote_array = pg.obj.scan(/#{Mx[:en_b_o]}[\d*]+(.+?)#{Mx[:en_b_c]}/m) + end + if pg.obj=~/#{Mx[:en_b_o]}[\d+]+\s.+?#{Mx[:en_b_c]}/m + endnote_array = pg.obj.scan(/#{Mx[:en_b_o]}[\d+]+(.+?)#{Mx[:en_b_c]}/m) + end + endnote_array.flatten.each do |note| + txt_obj={:txt =>note} + format_scroll=SiSU_EPUB_Format::Format_scroll.new(@md,txt_obj) + @scr_endnotes << format_scroll.endnote_body + end + end + end + end + @scr_endnotes + end + end + class Toc + @@toc={ :seg=>[],:seg_mini=>[],:scr=>[],:ncx=>[],:opf=>[] } + @@seg_url='' + @@firstseg=nil + def initialize(md=nil,data='') + @md,@data=md,data + @vz=SiSU_Env::Get_init.instance.skin + @epub=SiSU_EPUB_Format::Head_information.new(@md) + @tell=SiSU_Screen::Ansi.new(@md.cmd) if @md + end + def songsheet #extracts toc for scroll & seg + SiSU_Screen::Ansi.new(@md.cmd,'Toc').txt_grey if @md.cmd =~/[MVv]/ + toc=nil + @@firstseg=nil + @@toc={ :seg=>[],:seg_mini=>[],:scr=>[],:ncx=>[],:opf=>[] } + md_opf_a_content,md_opf_a_spine=[],[] + @nav_no=1 + @@toc[:ncx] << @epub.toc_ncx.open #epub ncx navmap + @@toc[:ncx] << @epub.toc_ncx.head_open << @epub.toc_ncx.head << @epub.toc_ncx.head_close + @@toc[:ncx] << @epub.toc_ncx.doc_title << @epub.toc_ncx.doc_author + @@toc[:ncx] << @epub.toc_ncx.navmap_open + @@toc[:opf] << @epub.metadata_opf.package_open + @@toc[:opf] << @epub.metadata_opf.metadata + @@toc[:opf] << @epub.metadata_opf.manifest_open + @@toc[:ncx] << @epub.toc_ncx.navmap_sisu_toc(@nav_no) #epub ncx navmap, toc + @@toc[:seg] << %{
\n
} + @@toc[:scr] << %{
\n
} + md_opf_a_content << @epub.metadata_opf.manifest_content_sisu_toc + md_opf_a_spine << @epub.metadata_opf.spine_sisu_toc + @ncxo=[nil,false,false,false,false,false,false] + @dob_toc2,@dob_toc3=nil,nil + @ncx_cls=[] + @data.each do |dob| + if dob.is=='heading' \ + or dob.is=='heading_insert' + dob_toc=dob.dup + toc=case dob_toc.ln + when 1 + Toc.new(@md,dob_toc).level_1 + when 2 + @nav_no+=1 + @nav_no2=@nav_no + @ncx_cls << @epub.toc_ncx.navpoint_close if @ncxo[4] + @ncx_cls << @epub.toc_ncx.navpoint_close if @ncxo[3] + @ncx_cls << @epub.toc_ncx.navpoint_close if @ncxo[2] + @ncxo[2],@ncxo[3],@ncxo[4]=false,false,false + @dob_toc2=dob_toc + @ncxo[2]=true + Toc.new(@md,dob_toc).level_2 + when 3 + @nav_no+=1 + @nav_no3=@nav_no + @ncx_cls << @epub.toc_ncx.navpoint_close if @ncxo[4] + @ncx_cls << @epub.toc_ncx.navpoint_close if @ncxo[3] + @ncxo[3],@ncxo[4]=false,false + @dob_toc3=dob_toc + @ncxo[3]=true + Toc.new(@md,dob_toc).level_3 + when 4 + @@toc[:ncx] << @ncx_cls if @ncx_cls.length > 0 + @ncx_cls=[] + @@toc[:ncx] << @epub.toc_ncx.navpoint_top3(@dob_toc2,@nav_no2,dob_toc.name) if @dob_toc2 #epub ncx navmap + @@toc[:ncx] << @epub.toc_ncx.navpoint_top3(@dob_toc3,@nav_no3,dob_toc.name) if @dob_toc3 #epub ncx navmap + @dob_toc2,@dob_toc3=nil,nil + @nav_no+=1 + @@toc[:ncx] << @epub.toc_ncx.navpoint_close if @ncxo[4] + @ncxo[4]=false + @@toc[:ncx] << @epub.toc_ncx.navpoint(dob_toc,@nav_no) if dob_toc #epub ncx navmap + @ncxo[4]=true + md_opf_a_content << @epub.metadata_opf.manifest_content(dob_toc) + md_opf_a_spine << @epub.metadata_opf.spine(dob_toc) + Toc.new(@md,dob_toc).level_4 + when 5; Toc.new(@md,dob_toc).level_5 + when 6; Toc.new(@md,dob_toc).level_6 + else nil + end + toc.each do |k,d| + d.gsub!(/(?:#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})\s*/m,' ') + end if toc + if @@firstseg.nil? \ + and dob.ln==4 \ + and dob.name =~/\S+/ + @@firstseg=dob.name + end + if toc + begin + @@toc[:seg] << toc[:seg] + @@toc[:scr] << toc[:seg] + @@toc[:seg_mini] << toc[:seg_mini] if toc[:seg_mini] + rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error + end + end + end + end + @@toc[:ncx] << @epub.toc_ncx.navpoint_close if @ncxo[4] + @@toc[:ncx] << @epub.toc_ncx.navpoint_close if @ncxo[3] + @@toc[:ncx] << @epub.toc_ncx.navpoint_close if @ncxo[2] + @ncxo[1],@ncxo[2],@ncxo[3],@ncxo[4]=false,false,false,false + md_opf_a_content << @epub.metadata_opf.manifest_images(@md.ec[:image]) + @@toc[:seg] << "
\n
" + @@toc[:scr] << "
\n
" + @@toc[:ncx] << @epub.toc_ncx.navmap_close + @@toc[:ncx] << @epub.toc_ncx.close + @@toc[:opf] << md_opf_a_content << @epub.metadata_opf.manifest_close + @@toc[:opf] << @epub.metadata_opf.spine_open << md_opf_a_spine << @epub.metadata_opf.spine_close + @@toc[:opf] << @epub.metadata_opf.package_close + @@toc[:opf]=@@toc[:opf].flatten + Epub_output.new(@md,@@toc[:opf]).epub_metadata_opf + Epub_output.new(@md,@@toc[:ncx]).epub_toc_ncx + @md.firstseg=@@firstseg + @@toc + end + def minitoc + minitoc=@@toc[:seg_mini].join("\n") + '
' + minitoc + '
' + end + protected + def level_1 + dob=@data + linkname,link=dob.obj.strip,dob.ocn + if link \ + and link !~/#/ #% keep eye on link + p_num=SiSU_EPUB_Format::Paragraph_number.new(@md,link) + end + title=if dob.obj !~/Document Information/; linkname + else + link='metadata' + %{#{linkname}} + end + toc={} + txt_obj={:txt =>title} + format_toc=SiSU_EPUB_Format::Format_toc.new(@md,txt_obj) + toc[:seg]=if dob.name =~/^meta/ \ + and dob.obj =~/Document Information/ #check + format_toc.lev0 + else format_toc.lev1 + end + toc[:seg_mini]=if dob.name =~/^meta/ \ + and dob.obj =~/Document Information/ #check + x=if @md.concord_make + format_toc.mini_concord_tail + else format_toc.mini_tail + end + else format_toc.mini_lev1 + end + title=if dob.ocn ==0 + if dob.name =~/^meta/ \ + and dob.obj =~/Document Information/ + %{#{linkname}} + else linkname + end + else + @@toc[:scr] << '
' + link=if dob.ln; dob.ln + else '' + end + %{#{linkname}} + end + txt_obj={:txt =>title} + format_toc=SiSU_EPUB_Format::Format_toc.new(@md,txt_obj) + toc[:scr]=if dob.name =~/^meta/ \ + and dob.obj =~/Document Information/ + format_toc.lev0 + else format_toc.lev1 + end + toc + end + def level_2 + dob=@data + linkname,ocn=dob.obj.strip,dob.ocn + if ocn \ + and ocn !~/#/ + p_num=SiSU_EPUB_Format::Paragraph_number.new(@md,ocn) + end + txt_obj={:txt =>linkname} + format_toc=SiSU_EPUB_Format::Format_toc.new(@md,txt_obj) + toc={} + toc[:seg]=format_toc.lev2 + toc[:seg_mini]=format_toc.mini_lev2 + if p_num + title=%{#{p_num.goto}#{linkname}} + txt_obj={:txt =>title} + format_toc=SiSU_EPUB_Format::Format_toc.new(@md,txt_obj) + toc[:scr]=format_toc.lev2 + end + toc + end + def level_3 + dob=@data + linkname,ocn=dob.obj.strip,dob.ocn + if ocn \ + and ocn !~/#/ + p_num=SiSU_EPUB_Format::Paragraph_number.new(@md,ocn) + end + txt_obj={:txt =>linkname} + format_toc=SiSU_EPUB_Format::Format_toc.new(@md,txt_obj) + toc={} + toc[:seg]=format_toc.lev3 + toc[:seg_mini]=format_toc.mini_lev3 + if p_num + title=%{#{p_num.goto}#{linkname}} + txt_obj={:txt =>title} + format_toc=SiSU_EPUB_Format::Format_toc.new(@md,txt_obj) + toc[:scr]=format_toc.lev3 + end + toc + end + def level_4 + dob=@data + linkname,ocn=dob.obj.strip,dob.ocn + p_num=SiSU_EPUB_Format::Paragraph_number.new(@md,ocn) if ocn + if dob.ln==4 + seg_link=%{ + #{dob.obj} + } + @@seg_url=dob.name + elsif dob.obj =~/\d+.\d+.\d+.\d+|\d+.\d+.\d+|\d+.\d+|\d+/ + seg_link=dob.obj.gsub(/^(\d+.\d+.\d+.\d+|\d+.\d+.\d+|\d+.\d+|\d+)(.*)/, + %{} + + %{\\1 \\2 }) + end + p_num=SiSU_EPUB_Format::Paragraph_number.new(@md,ocn) if ocn + txt_obj={:txt =>seg_link} + format_toc=SiSU_EPUB_Format::Format_toc.new(@md,txt_obj) + toc={} + toc[:seg]=format_toc.lev4 + toc[:seg_mini]=format_toc.mini_lev4 + title=%{#{p_num.goto}#{linkname}} if p_num + txt_obj={:txt =>title} + format_toc=SiSU_EPUB_Format::Format_toc.new(@md,txt_obj) + toc[:scr]=format_toc.lev4 + toc + end + def level_5 + dob=@data + linkname,ocn=dob.obj.strip,dob.ocn + toc={} + if ocn \ + and ocn !~/#/ + p_num=SiSU_EPUB_Format::Paragraph_number.new(@md,ocn) + lnk_n_txt=%{ + #{linkname} + } + txt_obj={:txt =>lnk_n_txt} + format_toc=SiSU_EPUB_Format::Format_toc.new(@md,txt_obj) + toc[:seg]=format_toc.lev5 + toc[:seg_mini]=format_toc.mini_lev5 + title=%{#{p_num.goto}#{linkname}} + txt_obj={:txt =>title} + format_toc=SiSU_EPUB_Format::Format_toc.new(@md,txt_obj) + toc[:scr]=format_toc.lev5 + end + toc + end + def level_6 + dob=@data + linkname,ocn=dob.obj.strip,dob.ocn + toc={} + if ocn \ + and ocn !~/#/ + p_num=SiSU_EPUB_Format::Paragraph_number.new(@md,ocn) + lnk_n_txt=%{ + #{linkname} +} + txt_obj={:txt =>lnk_n_txt} + format_toc=SiSU_EPUB_Format::Format_toc.new(@md,txt_obj) + toc[:seg]=format_toc.lev6 + toc[:seg_mini]=format_toc.mini_lev6 + title=%{#{p_num.goto}#{linkname}} + txt_obj={:txt =>title} + format_toc=SiSU_EPUB_Format::Format_toc.new(@md,txt_obj) + toc[:scr]=format_toc.lev6 + end + toc + end + def level_crosslink + dob=@data + if dob !~/^4~!/ + dob.gsub!(/^4~!\s+(\S+)\s+(.+)/, + %{ +
+ #{@png.crosslink_ext} +   \\2 + <\/a> +
+}) + else + dob.gsub!(/^4~!\s+(\S+)\s+(.+)/, + %{
+ + + #{@png.crosslink} +   \\2 + <\/a> +
+}) + end + end + end + class Scroll_head_and_segtoc < Toc + def initialize(md='',toc='',links_guide_toc='') + @md,@toc,@links_guide_toc=md,toc,links_guide_toc + @vz=SiSU_Env::Get_init.instance.skin + end + def in_common + toc_shared=[] + segtoc=[] + SiSU_Screen::Ansi.new(@md.cmd,'Scroll & Segtoc').txt_grey if @md.cmd =~/[MVv]/ + format_head_toc=SiSU_EPUB_Format::Head_toc.new(@md) + dochead=format_head_toc.head + dochead.gsub!(/toc\.(html)/,'doc.\1') #kludge + toc_shared << dochead #<< ads.div.major + segtoc << format_head_toc.head #<< ads.div.major + toc_shared << format_head_toc.toc_head_escript if SiSU_EPUB_Format::Head_toc.method_defined? :toc_head_escript + segtoc << format_head_toc.toc_head_escript if SiSU_EPUB_Format::Head_toc.method_defined? :toc_head_escript + if defined? @md.rights.all \ + and @md.rights.all + rights=format_head_toc.rights.all + rights=SiSU_EPUB_Tune::Clean_xhtml.new(rights).clean + end + if defined? @md.notes.prefix_b \ + and @md.notes.prefix_b + prefix_b=format_head_toc.prefix_b + prefix_b=SiSU_EPUB_Tune::Clean_xhtml.new(prefix_b).clean + end + seg_toc_band=format_head_toc.seg_head_navigation_band + seg_toc_band_bottom=format_head_toc.seg_head_navigation_band_bottom + tmp_head=nil + doc_title_endnote=@md.title.full.gsub(/(\*+)/,'\1') + tmp_head=doc_title_endnote + "\n" + txt_obj={:txt =>tmp_head} + format_txt_obj=SiSU_EPUB_Format::Format_text_object.new(@md,txt_obj) + toc_shared << format_txt_obj.center_bold + segtoc << format_txt_obj.center_bold + if defined? @md.creator.author \ + and @md.creator.author + creator_endnote=@md.creator.author.gsub(/(\*+)/,%{ \\1}) + tmp_head=creator_endnote + "\n" + txt_obj={:txt =>tmp_head} + format_txt_obj=SiSU_EPUB_Format::Format_text_object.new(@md,txt_obj) + toc_shared << format_txt_obj.center_bold + segtoc << format_txt_obj.center_bold + end + segtoc << seg_toc_band + tmp_head=nil + if defined? @md.prefix_a \ + and @md.prefix_a + tmp_head ||= %{#{@md.prefix_a}\n} + toc_shared << tmp_head.dup + segtoc << tmp_head.dup + end + tmp_head=nil + toc_shared << @links_guide_toc + if defined? @md.rights.all \ + and @md.rights.all + toc_shared << rights + end + if defined? @md.prefix_b \ + and @md.prefix_b + toc_shared << prefix_b + end + #Table of Contents added/appended here + toc_shared << @toc[:scr] + segtoc << @links_guide_toc + segtoc << @toc[:seg] + if defined? @md.rights.all \ + and @md.rights.all + segtoc << rights + end + if defined? @md.prefix_b \ + and @md.prefix_b + segtoc << prefix_b + end + #Segtoc tail added here + segtoc << seg_toc_band_bottom + segtoc << format_head_toc.xhtml_close + segtoc.flatten!.compact! + Epub_output.new(@md,segtoc).segtoc + segtoc=[] + @toc[:scr],@toc[:seg]=[],[] + toc_shared + end + end + class Table < SiSU_XHTML_table::Table_xhtml + end + class Seg < SiSU_EPUB_seg::Seg + end + class Epub_output + require 'fileutils' + include FileUtils #::Verbose + def initialize(md,output='') + @md,@output=md,output + @epub_doc="#{@md.fnb}.epub" + @epub_header=SiSU_EPUB_Format::Head_information.new(@md) + @make=SiSU_Env::Create_file.new(@md.fns) + end + def songsheet + mimetype + metainf_container + css + images if @md.ec[:image] + #concordance #uncomment to enable inclusion of concordance file + output_zip + end + def mimetype + out=@make.epub.mimetype + out<<@epub_header.mimetype + out.close + end + def metainf_container #container.xml file in META-INF directory + out=@make.epub.metainf_cont + out<<@epub_header.metainf_container + out.close + end + def css + out=@make.epub.xhtml_css + out << SiSU_EPUB_Format::Css.new.css_epub_xhtml + out.close + end + def epub_toc_ncx + begin + out=@make.epub.toc_ncx + @output.each do |para| + unless para =~/\A\s*\Z/ + out.puts para + end + end + out.close + rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error + end + end + def epub_metadata_opf + begin + out=@make.epub.metadata + @output.each do |para| + unless para =~/\A\s*\Z/ + out.puts para + end + end + out.close + rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error + end + end + def images + img_pth=@md.env.path.image_source_include + @md.ec[:image].each do |x| + if FileTest.directory?("#{@md.env.path.epub}/OPS/image") \ + and FileTest.file?("#{img_pth}/#{x}") + cp("#{img_pth}/#{x}","#{@md.env.path.epub}/OPS/image") + end + end + end + def concordance + SiSU_EPUB_Concordance::Source.new(@md.opt).read + end + def output_zip + mkdir_p(@md.file.output_path.epub) unless FileTest.directory?(@md.file.output_path.epub) + system(" + cd #{@md.env.path.epub} + zip -qXr9D #{@epub_doc} * + mv #{@epub_doc} #{@md.file.place_file.epub} + cd #{Dir.pwd} + ") + unless @md.cmd.inspect =~/M/ + system("rm -r #{@md.env.path.epub}") + end + end + def segtoc + begin + filename_html_segtoc=@make.epub.xhtml_segtoc + filename_html_index=@make.epub.xhtml_index + @output.each do |para| + para.strip! + unless para =~/\A\s*\Z/ + filename_html_segtoc.puts para,"\n" + filename_html_index.puts para,"\n" + end + end + filename_html_segtoc.close + filename_html_index.close + rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error + end + end + end + end +end +__END__ diff --git a/lib/sisu/v3/epub_concordance.rb b/lib/sisu/v3/epub_concordance.rb new file mode 100644 index 00000000..301d34a2 --- /dev/null +++ b/lib/sisu/v3/epub_concordance.rb @@ -0,0 +1,312 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: epub concordance file (html concordance, wordmap, linked index + of words in document) + +=end +module SiSU_EPUB_Concordance + require "#{SiSU_lib}/particulars" # particulars.rb + include SiSU_Particulars + require "#{SiSU_lib}/sysenv" # sysenv.rb + include SiSU_Env + require "#{SiSU_lib}/defaults" # defaults.rb + include SiSU_Viz + require "#{SiSU_lib}/epub_format" # epub_format.rb + include SiSU_EPUB_Format + class Source + def initialize(opt) + @opt=opt + @particulars=SiSU_Particulars::Combined_singleton.instance.get_all(opt) + end + def read + begin + @env,@md=@particulars.env,@particulars.md + loc=@env.url.output_tell + tool=((@md.cmd =~/[MVv]/) ? "#{@env.program.web_browser} #{loc}/#{@md.fnb}/#{@md.fn[:concordance]}" : '') + SiSU_Screen::Ansi.new(@md.cmd,"Concordance",tool).grey_title_hi unless @md.cmd =~/q/ + wordmax=@env.concord_max + unless @md.wc_words.nil? + if @md.wc_words < wordmax + SiSU_EPUB_Concordance::Source::Words.new(@particulars).songsheet + else + SiSU_Screen::Ansi.new(@md.cmd,"concordance skipped, large document has over #{wordmax} words (#{@md.wc_words})").warn unless @md.cmd =~/q/ + end + else + SiSU_Screen::Ansi.new(@md.cmd,"wc (word count) is off, concordance will be processed for all files including those over the max set size of: #{wordmax} words").warn unless @md.cmd =~/q/ + SiSU_EPUB_Concordance::Source::Words.new(@particulars).songsheet + end + rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error + ensure + end + end + private + class Doc_title + include SiSU_Viz + #revisit, both requires (html & shared_xml) needed for stand alone operation (sisu -w [filename]) + require "#{SiSU_lib}/epub" # epub.rb + def initialize(particulars) + @particulars,@md=particulars,particulars.md + @data=SiSU_EPUB::Source::XHtml_environment.new(particulars).tuned_file_instructions + @vz=SiSU_Env::Get_init.instance.skin + txt_path=%{#{@md.dir_out}} + SiSU_Env::Info_skin.new(@md).select + @fnb=@md.fnb + @lex_button=%{SiSU home -->} + @doc_details =<
 

#{@md.title.full}

#{@md.creator.author}

+WOK + end + def create + @css=SiSU_Env::CSS_stylesheet.new(@particulars.md) + format_head_toc=SiSU_EPUB_Format::Head_toc.new(@md) + dochead=format_head_toc.head + < + #{@doc_details} +

Word index links are to html versions of the text the segmented version followed by the scroll (single document) version.
[For segmented text references [T1], [T2] or [T3] appearing without a link, indicates that the word appears in a title (or subtitle) of the text (that is identifiable by the appended object citation number).]

+

(The word listing/index is Case sensitive: Capitalized words appear before lower case)

+

+ word (number of occurences)
linked references to word within document
+ [if number of occurences exceed number of references - word occurs more than once in at least one reference. Footnote/endnotes are either assigned to the paragraph from which they are referenced or ignored, so it is relevant to check the footnotes referenced from within a paragraph as well.] +

+

+ (After the page is fully loaded) you can jump directly to a word by appending a hash (#) and the word to the url for this text, (do not forget that words are case sensitive, and may be listed twice (starting with and without an upper case letter)), #your_word # [ http://[web host]/#{@fnb}/concordance.html#your_word ] +

+WOK + end + end + class Word + @@word_previous='' + def initialize(word,freq) + @word,@freq=word,freq + end + def html + w=if @word.capitalize==@@word_previous + %{\n

#{@word}

(#{@freq})

\n\t

} + else n=@word.strip.gsub(/\s+/,'_') #also need to convert extended character set to html + %{\n

#{@word}

(#{@freq})

\n\t

} + end + @@word_previous=@word.capitalize + w + end + end + class Words + require "#{SiSU_lib}/defaults" # defaults.rb + include SiSU_Viz + require "#{SiSU_lib}/epub_format" # epub_format.rb + include SiSU_EPUB_Format + require "#{SiSU_lib}/sysenv" # sysenv.rb + include SiSU_Screen + def initialize(particulars) + @particulars=particulars + begin + @vz=SiSU_Env::Get_init.instance.skin + @env,@md,@dal_array=particulars.env,particulars.md,particulars.dal_array + @path="#{@env.path.epub}" + @freq=Hash.new(0) + @rxp_lv1=/^#{Mx[:lv_o]}1:/ #fix Mx[:lv_o] + @rxp_lv2=/^#{Mx[:lv_o]}2:/ #fix Mx[:lv_o] + @rxp_lv3=/^#{Mx[:lv_o]}3:/ #fix Mx[:lv_o] + @rxp_seg=/^#{Mx[:lv_o]}4:(\S+?)#{Mx[:lv_c]}/ + @rxp_title=Regexp.new("^#{Mx[:meta_o]}title#{Mx[:meta_c]}\s*(.+?)\s*$") + @rxp_t1=Regexp.new('^T1') + @rxp_t2=Regexp.new('^T2') + @rxp_t3=Regexp.new('^T3') + @rxp_excluded1=/(?:https?|file|ftp):\/\/\S+/ + @rxp_excluded0=/^(?:#{Mx[:fa_bold_o]}|#{Mx[:fa_italics_o]})?(?:to\d+|\d+| |#{Mx[:br_endnotes]}|EOF|#{Mx[:br_eof]}|thumb_\S+|snap_\S+|_+|-+|[(]?(?:ii+|iv|vi+|ix|xi+|xiv|xv|xvi+|xix|xx)[).]?|\S+?_\S+|[\d_]+\w\S+|[\w\d]{1,2}|\d{1,3}\w?|[0-9a-f]{16,64}|\d{2,3}x\d{2,3}|\S{0,2}sha\d|\S{0,3}\d{4}w\d\d|\b\w\d+|\d_all\b|e\.?g\.?)(?:#{Mx[:fa_bold_c]}|#{Mx[:fa_italics_c]})?$/mi #this regex causes and cures a stack dump in ruby 1.9 !!! + @rgx_splitlist=%r{[—.,;:-]+|#{Mx[:nbsp]}+}mi + @rgx_scanlist=%r{#{Mx[:fa_italics_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_italics_c]}|#{Mx[:fa_bold_o]}[a-zA-Z0-9"\s]{2,12}#{Mx[:fa_bold_c]}|#{Mx[:url_o]}https?://\S+?#{Mx[:url_c]}|file://\S+|<\S+?>|\w+|[a-zA-Z]+}mi + rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error + end + end + def songsheet + begin + @file_concordance=File.open("#{@path}/content/#{@md.fn[:epub_concord]}",'w') + map_para + rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error + ensure + @file_concordance.close + end + end + protected + def location_scroll(wordlocation,show) + @wordlocation=wordlocation + %{#{@wordlocation}; } + end + def location_seg(wordlocation,show) + @wordlocation,@show=wordlocation,show + @word_location_seg=wordlocation.gsub(/(.+?)\#(\d+)/,"#{@md.fnl[:pre]}\\1#{@md.fnl[:mid]}#{Sfx[:epub_xhtml]}#{@md.fnl[:post]}#o\\2") unless wordlocation.nil? + case @wordlocation + when @rxp_t1 + %{[H]#{@show}, } + when @rxp_t2 + %{[H]#{@show}, } + when @rxp_t3 + %{[H]#{@show}, } + else %{#{@show}, } + end + end + def map_para + @seg,toy=nil,nil + @word_map={} + @dal_array.each do |line| + if defined? line.ocn + if line.is =~/heading/ and line.ln==4; @seg=line.name + end + if line.ocn.to_s =~/\d+/; toy=line.ocn.to_s + end + if toy =~/\d+/ \ + and toy !~/^0$/ + line.obj=line.obj.split(@rgx_splitlist).join(' ') #%take in word or other match + for word in line.obj.scan(@rgx_scanlist) #%take in word or other match + word.gsub!(/#{Mx[:lnk_o]}|#{Mx[:lnk_c]}|#{Mx[:url_o]}|#{Mx[:url_c]}/,'') + word.gsub!(/#{Mx[:fa_o]}\S+?#{Mx[:fa_o_c]}/,'') + word.gsub!(/#{Mx[:fa_c_o]}\S+?#{Mx[:fa_c]}/,'') + word.gsub!(/#{Mx[:gl_o]}#[a-z]+#{Mx[:gl_c]}/,'') + word.gsub!(/#{Mx[:gl_o]}#[0-9]+#{Mx[:gl_c]}/,'') + word.gsub!(/^\S$/,'') + word=nil if word.empty? + word=nil if word =~@rxp_excluded0 #watch + word=nil if word =~@rxp_excluded1 #watch + word=nil if word =~/^\S$/ + if word + word.gsub!(/#{Mx[:br_nl]}|#{Mx[:br_line]}/,' ') + word.gsub!(/#{Mx[:fa_o]}[a-z]{1,7}#{Mx[:fa_o_c]}|#{Mx[:fa_c_o]}[a-z]{1,7}#{Mx[:fa_c]}/,'') + word.gsub!(/#{Mx[:mk_o]}(?:[0-9a-f]{32}:[0-9a-f]{32}|[0-9a-f]{64}:[0-9a-f]{64})#{Mx[:mk_c]}/,'') + word.gsub!(/#{Mx[:mk_o]}(?:[0-9a-f]{32}|[0-9a-f]{64})#{Mx[:mk_c]}/,'') + word.gsub!(/#{Mx[:en_a_o]}(?:\d|[*+])*|#{Mx[:en_b_o]}(?:\d|[*+])*|#{Mx[:en_a_c]}|#{Mx[:en_b_c]}/mi,'') + word.gsub!(/#{Mx[:fa_o]}\S+?#{Mx[:fa_o_c]}/,''); word.gsub!(/#{Mx[:fa_c_o]}\S+?#{Mx[:fa_c]}/,'') + word.gsub!(/<\/?\S+?>/,'') + word.gsub!(/^\@+/,'') + word.strip! + word.gsub!(/#{Mx[:tc_p]}.+/,'') + word.gsub!(/[\.,;:"]$/,'') + word.gsub!(/["]/,'') + word.gsub!(/^\s*[\(]/,'') + word.gsub!(/[\(]\s*$/,'') + word.gsub!(/^(?:See|e\.?g\.?).+/,'') + word.gsub!(/^\s*[.,;:]\s*/,'') + word.strip! + word.gsub!(/^\(?[a-zA-Z]\)$/,'') + word.gsub!(/^\d+(st|nd|rd|th)$/,'') + word.gsub!(/^(\d+\.?)+$/, '') + word.gsub!(/#{Mx[:mk_o]}|#{Mx[:mk_c]}/,'') + word.gsub!(/:name#\S+/,'') + word.gsub!(/^\S$/,'') + word=nil if word =~/^\S$/ + word=nil if word =~/^\s*$/ #watch + if word + unless word =~/[A-Z][A-Z]/ \ + or word =~/\w+\s\w+/ + word.capitalize! + end + @freq[word] +=1 + @word_map[word] ||= [] + if line !~@rxp_lv1 \ + and line !~@rxp_lv2 \ + and line !~@rxp_lv3 + @word_map[word] << location_seg("#{@seg}\##{toy}",toy) + else + @word_map[word] << case line + when @rxp_lv1; location_seg('T1',toy) + when @rxp_lv2; location_seg('T2',toy) + when @rxp_lv3; location_seg('T3',toy) + end + end + end + end + end + end + end + end + scr='Full Text scroll: doc#  ' + seg='' + @file_concordance << SiSU_EPUB_Concordance::Source::Doc_title.new(@particulars).create + alph=%W[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] + @file_concordance << '

' + alph.each {|x| @file_concordance << %{#{x}, }} + @file_concordance << '

' + letter=alph.shift + @file_concordance << %{\n

A

} + for word in @freq.keys.sort! {|a,b| a.downcase<=>b.downcase} + f=/^(\S)/.match(word)[1] + if letter < f.upcase + while letter < f.upcase + if alph.length > 0 + letter=alph.shift + @file_concordance << %{\n

#{letter}

} + else break + end + end + end + keyword=SiSU_EPUB_Concordance::Source::Word.new(word,@freq[word]).html + if keyword !~ @rxp_excluded0 + if @word_map[word][0] =~ /\d+/ + wm=[] + @file_concordance << %{#{keyword}#{seg}#{@word_map[word].uniq.compact.join}} + end + @file_concordance << '

' + end + # special cases endnotes and header levels 1 - 3 + end + credits=@vz.credits_sisu_epub + @file_concordance << %{>#{credits}\n} # footer + end + end + end +end +__END__ diff --git a/lib/sisu/v3/epub_format.rb b/lib/sisu/v3/epub_format.rb new file mode 100644 index 00000000..c189eb3a --- /dev/null +++ b/lib/sisu/v3/epub_format.rb @@ -0,0 +1,2030 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: epub formating, css template + +=end +module SiSU_EPUB_Format + include SiSU_Viz + class Paragraph_number + def initialize(md,ocn) + @md,@ocn=md,ocn.to_s + @ocn ||='' + vz=SiSU_Env::Get_init.instance.skin + @skin_no_ocn=if defined? vz.ocn_display_off \ + and vz.ocn_display_off==true + true + else false + end + end + def ocn_display + if @md.markup.inspect =~/no_ocn/ \ + or @md.mod.inspect =~/--no-ocn/ \ + or @skin_no_ocn + ocn_class='ocn_off' + @ocn.gsub(/^(\d+|)$/, + %{}) + elsif @ocn.to_i==0 + @ocn.gsub(/^(\d+|)$/, + %{}) + else + ocn_class='ocn' + @ocn.gsub(/^(\d+|)$/, + %{}) + end + end + def name + %{} + end + def id #w3c? "tidy" complains about numbers as identifiers ! annoying + %{id="o#{@ocn}"} + end + def goto + %{} + end + end + class Css + def css_epub_xhtml +< + + +WOK + end +=begin +~/epub + |-- META-INF + | `-- container.xml #✓ simple, make sure full-path of rootfile points to metadata.opf + |-- content + | |-- 1.xhtml + | |-- 2.xhtml + | |-- 3.xhtml + | |-- ... .xhtml + | |-- concordance.xhtml + | |-- css + | | `-- xhtml.css + | |-- endnotes.xhtml + | |-- image + | | |-- arrow_next_red.png + | | |-- arrow_prev_red.png + | | |-- arrow_up_red.png + | | `-- bullet_09.png + | |-- index.xhtml + | |-- meta.xhtml + | |-- metadata.xhtml + | `-- toc.xhtml + |-- metadata.opf #(i) metadata dc; (ii) manifest (contents); (iii) spine (mimetypes) + |-- mimetype #✓ application/epub+zip + `-- toc.ncx #✓ (i) head (ii) doc title (iii) navmap, list of navigation points (like chapters) +=end + def doc_type + doc_type_xhtml + end + def mimetype + < + + + + + +WOK + end + def toc_ncx #list of navigation points (like chapters), table of contents, listing each navigation point (chapters and such) under the navigation map + def structure + open + head_open + head + head_close + doc_title + doc_author + navmap_open + #navmap ... + navmap_close + close + end + def open + < +WOK + end + def close + < +WOK + end + def head_open + < +WOK + end + def head + depth=@md.lvs[2] + @md.lvs[3] + @md.lvs[4] + < + + + + + +WOK + end + def head_close + < +WOK + end + def doc_title + < + #{@md.title.full} + +WOK + end + def doc_author + < + #{@md.author} + +WOK + end + def navmap_open + < +WOK + end + def navmap_sisu_toc(no) + < + + Table of Contents + + + +WOK + end + def navpoint(dob,no) + < + + #{dob.obj} + + +WOK + end + def navpoint_top3(dob,no,name) + < + + #{dob.obj} + + +WOK + end + def navpoint_close + < +WOK + end + def navmap_close + < +WOK + end + self + end + def metadata_opf #(i) metadata dc; (ii) manifest (contents); (iii) spine (mimetypes) + def structure + package_open + metadata_open + metadata_close + manifest_open + manifest_close + spine_open + spine_close + package_close + end + def package_open + < + +WOK + end + def package_close + < +WOK + end + def metadata #metadata dc + author=if defined? @md.creator.author \ + and @md.creator.author =~/\S+/ + x=@md.creator.author.gsub!(//,'>') + @md.creator.author.gsub!(/<br(?: \/)?>/,'
') + %{\n #{x}} + else '' + end + illustrator=if defined? @md.creator.illustrator \ + and @md.creator.illustrator =~/\S+/ + x=@md.creator.illustrator.gsub!(//,'>') + @md.creator.illustrator.gsub!(/<br(?: \/)?>/,'
') + %{\n #{x}} + else '' + end + rights=if defined? @md.rights.all \ + and @md.rights.all =~/\S+/ + rights=@md.rights.all.gsub(/
/,'
') + %{\n #{rights}} + else '' + end + < + #{@md.title.full} + #{author}#{illustrator} + en-US + ... + #{rights} + urn:uuid:#{@md.dgst[1]} + +WOK + end + def manifest_open + < + + + + + +WOK + end + def manifest_content_sisu_toc + < +WOK + end + def manifest_content(dob) + < +WOK + end + def manifest_images(imgs) + imgs=imgs + ['arrow_next_red.png','arrow_prev_red.png','arrow_up_red.png','bullet_09.png'] + images=[" \n"] + imgs.each do |i| + image,type=/(\S+?)\.(png|jpg|gif)/.match(i)[1,2] + images<<< +WOK + end + images=images.join('') + images + end + def manifest_close + < +WOK + end + def spine_open + #spine: reading order of HTML files from manifest, idref attribute refers back to id in manifest (exclude images, CSS etc.). + < +WOK + end + def spine_sisu_toc + < +WOK + end + def spine(dob) + < +WOK + end + def spine_close + < +WOK + end + self + end + def toc_head_escript + end + def seg_head_escript + end + def table_close + %{ +#{@vz.table_close}} + end + def buttons_home + %{ + #{@vz.banner_home_and_index_buttons} +} + end + def copyat + %{

copy @ + + #{@vz.txt_home} +

} + end + def xhtml_close + %{#{@vz.credits_sisu_epub} + +} + end + end + class Widget < Head_information + def initialize(md) + super(md) + @md=md + @cf_defaults=SiSU_Env::Info_processing_flag.new + end + def home + %{
+ + #{@vz.png_homepage} + + + #{@vz.nav_txt_manifest} + +
+ + #{wgt.seg(@vz.nav_txt_toc_link)}#{wgt.scroll(@vz.nav_txt_doc_link)}#{wgt.pdf}#{wgt.odf} +
+ #{wgt.concordance(@vz.nav_txt_concordance)} + #{wgt.manifest} + #{wgt.search} + #{wgt.home} +
} + end + def head + %{#{doc_type} + + +#{@css.xhtml_epub} + +#{@vz.color_body} +#{@vz.js_top}} + end + def concordance + if @md.concord_make + %{#{@vz.margin_css} +

+ + Concordance + +

+#{@vz.table_close}} + else + %{#{@vz.margin_css} +#{@vz.table_close}} + end + end + def links_guide_open(type='horizontal') + if type=='vertical'; links_guide_vertical_open + else links_guide_horizontal_open + end + end + def prefix_a + end + def rights + def all + rghts=@md.rights.all.gsub(/
/,'
') + rghts=rghts.gsub(/^\s*Copyright\s+\(C\)/,'Copyright © ') + %{

Rights: #{rghts}

} + end + self + end + def prefix_b + %{

Prefix: #{@md.prefix_b}} + end + def make_seg + concord=concordance_link(@vz.nav_txt_concordance) + %{ + +
+ #{@vz.nav_txt_toc_link} + + + #{concord} +#{@vz.table_close}} + end + def manifest #check structure + manifest=manifest_link(@vz.nav_txt_manifest) + %{#{@vz.margin_txt_3} + #{@vz.paragraph_font_small} + #{manifest} + +#{@vz.table_close}} + end + def concordance #check structure + concord=concordance_link(@vz.nav_txt_concordance) + %{#{@vz.margin_txt_3} + #{@vz.paragraph_font_small} + #{concord} + +#{@vz.table_close}} + end + def metadata + %{#{@vz.margin_css} +

+ + MetaData + +

+#{@vz.table_close}} + end + def seg_metadata + @metalink=%{./#{@md.fn[:metadata]}} + metadata + end + end + class Head_seg < Head_information + def initialize(md) + super(md) + end + def head + %{#{doc_type} + + + #{@seg_name_xhtml[@seg_name_xhtml_tracker]} - + #{@md.html_title} + + +#{@vz.font_css_table_file} + +#{@vz.color_body} +#{@vz.js_top}} + end + def title_banner(title,subtitle,creator) + end + def dot_control_pre_next + %{ + + +
+ + #{@vz.png_nav_dot_pre} + + + + #{@vz.png_nav_dot_toc} + + + + #{@vz.png_nav_dot_nxt} + +#{@vz.table_close}} + end + def dot_control_pre + %{ + + +
+ + #{@vz.png_nav_dot_pre} + + + + #{@vz.png_nav_dot_toc} + + + + #{@vz.png_nav_dot_nxt} + +#{@vz.table_close}} + end + def toc_nav(f_pre=false,f_nxt=false,use=1) + pre=nxt='' + toc=%{ + #{@vz.epub_png_nav_toc} + +} + pre=%{ + #{@vz.epub_png_nav_pre} + +} if f_pre==true + nxt=%{ + #{@vz.epub_png_nav_nxt} + +} if f_nxt==true + %{

+#{pre} +#{toc} +#{nxt} +

+} + end + def toc_next2 + pre,nxt=false,true + toc_nav(false,true).dup + end + def toc_pre_next2 + toc_nav(true,true).dup + end + def toc_pre2 + toc_nav(true,false).dup + end + def manifest_link(text) + %{ + #{text} + } + end + def concordance_link(text) + if @md.concord_make + %{ + #{text} + } + else '' + end + end + def doc_types #used in seg_nav_band ### + scroll=seg='' + wgt=Widget.new(@md) + x=if @md.concord_make + %{ + + + #{wgt.seg(@vz.nav_txt_toc_link)} + #{wgt.scroll(@vz.nav_txt_doc_link)} + #{wgt.pdf}#{wgt.odf} +
+ #{wgt.concordance(@vz.nav_txt_concordance)} + #{wgt.manifest} + #{wgt.search} + #{wgt.home} +
} + else + %{ + + + #{wgt.seg(@vz.nav_txt_toc_link)} + #{wgt.scroll(@vz.nav_txt_doc_link)} + #{wgt.pdf}#{wgt.odf} +
+ #{wgt.manifest} + #{wgt.search} + #{wgt.home} +
} + end + end + def navigation_table + %{ +
+ + + } + end + def navigation_table1 + %{ +
+ } + end + def navigation_table2 + %{ +
+ } + end + def heading_advert_local_1 + dir=SiSU_Env::Info_env.new(@fns) + %{

+ +
+ + #{@md.ad_alt} + +#{@vz.table_close} +

} + end + def heading_advert_local_2 + dir=SiSU_Env::Info_env.new(@fns) + %{

+ +
+ + #{@md.ad_alt} + +#{@vz.table_close} +

} + end + def heading_advert_external + dir=SiSU_Env::Info_env.new(@fns) + %{

+ +
+ + #{@md.ad_alt} + +#{@vz.table_close} +

} + end + def navigation_band(segtocband) #change name to navigaion_band_banner + %{#{segtocband} +} + end + def navigation_band_bottom(segtocband,seg_table_top_control) #change name to navigaion_band_bannerless + %{ +
+ + + +
+ #{doc_types} + + #{segtocband} +
+ #{seg_table_top_control} +
+} + end + def endnote_mark +%{ +
+} + end + def endnote_section_open +%{ +
+} + end + def endnote_section_close +%{ +
+} #revisit + end + def head + %{#{doc_type} + + + #{@seg_name_xhtml[@seg_name_xhtml_tracker]} - + #{@md.html_title} + + +#{@css.xhtml_epub} + +#{@vz.color_body} +#{@vz.js_top}} + end + def toc_metadata + @metalink=%{./#{@md.fn[:metadata]}} + toc_metadata + end + def title_banner(title,subtitle,creator) + end + end + class Head_scroll < Head_toc + def initialize(md) + super(md) + end + def toc_owner_details + %{#{@vz.margin_txt_3} +#{@vz.paragraph_font_small} + + Owner Details + +     + + + +#{@vz.table_close}} + end + end + class Format_text_object + @vz=SiSU_Env::Get_init.instance.skin + attr_accessor :md,:t_o,:txt,:ocn,:format,:table,:link,:linkname,:paranum,:p_num,:headname,:banner,:url + def initialize(md,t_o) + @md,@t_o=md,t_o + if t_o.class==Hash + @txt =t_o[:txt] || nil + @ocn =t_o[:ocn] || nil + @ocn_display =t_o[:ocn_display] || nil + @headname =t_o[:headname] || nil + @trailer =t_o[:trailer] || nil + @endnote_part_a =t_o[:endnote_part_a] || nil + @endnote_part_b =t_o[:endnote_part_b] || nil + @lnk_url =t_o[:lnk_url] || nil + @lnk_txt =t_o[:lnk_txt] || nil + @format =t_o[:format] || nil + @target =t_o[:target] || nil #occasionally passed but not used + if @format and not @format.empty? + if @format=~/^\d:(\S+)/ #need more reliable marker #if @format =~ /#{Rx[:lv]}/ + headname=$1 #format[/\d~(\S+)/m,1] + @headname=if headname =~/^[a-zA-Z]/; %{} #consider: h_#{headname} + else %{} + end + @headname=if headname =~/^[a-zA-Z]/; %{} #consider: h_#{headname} + else %{} + end + end + end + elsif t_o.class.inspect =~/Object/ + @dob=t_o if defined? t_o.is + @named=nametags_seg(@dob) + @txt=((defined? t_o.obj) ? t_o.obj : nil) + @ocn=((defined? t_o.ocn) ? t_o.ocn.to_s : nil) + @headname=((t_o.is=='heading' and defined? t_o.name) ? t_o.name : nil) + else + if @md.cmd =~/M/ + p __FILE__ +':'+ __LINE__.to_s + p t_o.class + p caller + end + end + if @txt and not @txt.empty? + @txt.gsub!(/#{Mx[:mk_o]}[-~]##{Mx[:mk_c]}/,'') + end + @p_num=Paragraph_number.new(@md,@ocn) + @vz=SiSU_Env::Get_init.instance.skin + end + def nametags_seg(dob) #FIX + tags='' + if defined? dob.tags \ + and dob.tags.length > 0 # insert tags "hypertargets" + dob.tags.each do |t| + tags=tags +%{} + end + end + tags + end + def endnote_body + %{ +

+ #{@txt} +

+} + end + def endnote_body_indent + %{ +

+ #{@txt} +

+} + end + def no_paranum + %{ +
+ +

+ #{@txt} +

+
+} + end + def para_form_css(tag,attrib) # regular paragraphs shaped here + ul=ulc='' + ul,ulc="
    \n ","\n
" if @tag =~/li/ + %{ +
+ #{@p_num.ocn_display} + #{ul}<#{tag} class="#{attrib}" #{@p_num.id}> + #{@named}#{@txt} + #{ulc} +
+} + end + def para + para_form_css('p','norm') + end + def group + para_form_css('p','group') + end + def block + para_form_css('p','block') + end + def alt + para_form_css('p','alt') + end + def verse + para_form_css('p','verse') + end + def code + para_form_css('p','code') + end + def center + para_form_css('p','center') + end + def bold + para_form_css('p','bold') + end + def bullet + para_form_css('li','bullet') + end + def table + @txt=if @t_o.obj !~/^
') + @txt.gsub!(/#{Mx[:br_obj]}/,'

') + para_form_css('p','norm') + end + def format(tag,attrib) + para_form_css(tag,attrib) + end + def title_heading(tag,attrib) + %{ +
+<#{tag} class="#{attrib}"> + #{@named}#{@txt} + +
+} + end + def title_heading1 + title_heading('h1','tiny') + end + def title_heading2 + title_heading('h2','tiny') + end + def title_heading3 + title_heading('h3','tiny') + end + def title_heading4 + '' + end + def seg_heading_sub(tag,attrib) + @txt.gsub!(/(?:#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})\s*/m,' ') + %{ +
+ #{@p_num.ocn_display} + <#{tag} class="#{attrib}" #{@p_num.id}>#{@p_num.name} + #{@named}#{@txt} + +
+} + end + def seg_heading4 + %{ +
+ #{@p_num.ocn_display} +

+ #{@txt} +

+
+} + end + def seg_heading5 + seg_heading_sub('p','bold') + end + def seg_heading6 + seg_heading_sub('p','bold') + end + def dl #check :trailer + "
#{@txt} #{@trailer}
" + end + def table_css_end + '
+

+ ' + end + def gsub_body + case @txt + when /^(?:#{Mx[:pa_o]}i[1-9]#{Mx[:pa_c]}\s*)?\((i+|iv|v|vi+|ix|x|xi+)\)/ + @txt.gsub!(/^\((i+|iv|v|vi+|ix|x|xi+)\)/,'(\1)') + @txt.gsub!(/^(#{Mx[:pa_o]}i[1-9]#{Mx[:pa_c]})\s*\((i+|iv|v|vi+|ix|x|xi+)\)/,'\1(\2)') + when /^(?:#{Mx[:pa_o]}i[1-9]#{Mx[:pa_c]}\s*)?\(?(\d|[a-z])+\)/ + @txt.gsub!(/^\((\d+|[a-z])+\)/,'(\1)') + @txt.gsub!(/^(#{Mx[:pa_o]}i[1-9]#{Mx[:pa_c]})\s*\((\d+|[a-z])+\)/,'\1(\2)') + when /^\s*\d{1,3}\.\s/ + @txt.gsub!(/^\s*(\d+\.)/,'\1') + when /^\s*[A-Z]\.\s/ + @txt.gsub!(/^\s*([A-Z]\.)/,'\1') + end + end + def bold_para + %{#{@vz.margin_txt_0} +

+ #{@txt} +

+#{@vz.margin_num_css} +     +#{@vz.table_close}} + end + def bold_heading + @txt.gsub!(/[1-9]~\S+/,'') + @txt.gsub!(/[1-9]~/,'') + %{

+ #{@txt} +

+#{@vz.margin_num_css} +     +#{@vz.table_close}} + end + def toc_head_copy_at + %{

#{@txt}

\n} + end + def center + %{

#{@txt}

\n} + end + def bold + %{

#{@txt}

\n} + end + def center_bold + %{

#{@txt}

\n} + end + end + class Format_scroll < Format_text_object + def initialize(md,txt) + super(md,txt) + @vz=SiSU_Env::Get_init.instance.skin + end + end + class Format_seg < Format_text_object + def initialize(md,txt) + super(md,txt) + end + def navigation_toc_lev1_advert + %{#{@banner.home_button}\n +

+#{@txt} +#{@two} +

} + end + def navigation_toc_lev1 + %{#{@banner.nav_toc}} + end + def navigation_toc_lev2 #change bold use css + %{ + +
+ + + #{@txt} + +

+#{@vz.table_close}} + end + def navigation_toc_lev3 #change bold use css + %{ + +
+ + + #{@txt} + +

+#{@vz.table_close}} + end + def navigation_toc_lev4 + %{ + +
+ +

+ #{@txt} +

+#{@vz.table_close}} + end + def navigation_toc_lev5 + end + def navigation_toc_lev6 + end + def endnote_seg_body(fn='') #FIX #url construction keep within single line... BUG WATCH 200408 + fn='doc' if fn.to_s.empty? #you may wish to reconsider, sends to 'doc' where no segment info + %{ +

+ #{@endnote_part_a}#{@md.fnl[:pre]}#{fn}#{@md.fnl[:mid]}#{Sfx[:epub_xhtml]}#{@md.fnl[:post]}#{@endnote_part_b} +

+} + end + def clean(txt) + txt.gsub!(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/,'') + txt.gsub!(/#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]}/,'') + txt + end + def subtoc_lev(tag,attrib) + @txt=clean(@txt) + txt=if @txt \ + and @txt =~/<\/?i>|/mi + @txt.gsub(/<\/?i>|/mi,'') #removes name markers from subtoc, go directly to substantive text + else @txt + end + note='' + if txt =~/(#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})/m # had \s* at end + note=$1 + note.gsub!(/[\n\s]+/m,' ') + txt.gsub!(/(?:#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})\s*/m,' ') + txt.gsub!(/ \d+<\/sup> /m,'') + txt.gsub!(/#{Mx[:nbsp]}\d+<\/sup>#{Mx[:nbsp]}/m,'') #remove + end + %{<#{tag} class="#{attrib}"> + #{txt} #{note} + } + end + def subtoc_lev5 + subtoc_lev('h5','subtoc') if @txt + end + def subtoc_lev6 + subtoc_lev('h6','subtoc') if @txt + end + def heading_sub(tag,attrib) + @txt.gsub!(/(?:#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})\s*/m,' ') + %{ +
+ #{@p_num.ocn_display} + <#{tag} class="#{attrib}" #{@p_num.id}> #{@headname} + #{@txt} + +
+} + end + def heading4 + %{ +
+ #{@p_num.ocn_display} +

+ #{@t_o[:format]} + #{@txt} +

+
+} + end + def heading5 + heading_sub('p','bold') + end + def heading6 + heading_sub('p','bold') + end + def navigation_heading4 + %{ + +} + end + def scroll(text) + if @md.fns =~ /\.(?:-|ssm\.)?sst$/ + scroll=%{ +} + end + end + def seg(text) + %{ +} + end + def search + env=SiSU_Env::Info_env.new(@md.fns,@md) + env.widget.search_form('sisusearch',nil,nil,true) + end + def manifest + manifest_lnk=if @file.by_language? \ + or @file.by_filetype? + "#{Xx[:html_relative1]}manifest/#{@file.base_filename.manifest}" + else @file.base_filename.manifest + end + %{} + end + def pdf #retired 2.7.9 + pdf=if @md.programs[:pdf] \ + and @cf_defaults.cf_0 =~/p/ + %{ + + +} + else '' + end + end + def txt #retired 2.7.9 + txt=if @cf_defaults.cf_0 =~/[at]/ + %{ +} + else '' + end + end + def epub #retired 2.7.9 + epub=if @cf_defaults.cf_0 =~/e/ + %{ +} + else '' + end + end + def odf #retired 2.7.9 + odf=if @cf_defaults.cf_0 =~/o/ + %{ +} + else '' + end + end + def concordance(text) #retired 2.7.9 + if @md.concord_make \ + and @md.wc_words < 300000 #max word count for display of concordance here as would now be a large file + %{} + else '' + end + end + end + class XML + end + class Head_toc < Head_information + def initialize(md) + super(md) + @md=md + end + def scroll_head_navigation_band + pdf=if @md.programs[:pdf] + < + #{make_seg_scroll_pdf} + +WOK + else '' + end + %{
+

+ #{@txt} +

+#{@vz.table_close}} + end + def navigation_heading5 + %{

+ #{@txt} +

} + end + def navigation_heading6 + %{

+ #{@txt} +

} + end + def navigation_center + %{

#{@txt}

} + end + end + class Format_toc < Format_text_object + def initialize(md,txt) + super(md,txt) + end + def links_guide + %{
  • + + #{@lnk_txt} + +
  • +} + end + def lev(tag,attrib) + if @txt + %{<#{tag} class="#{attrib}"> + #{@txt} + +} + else '' + end + end + def lev1 + lev('h1','toc') + end + def lev2 + lev('h2','toc') + end + def lev3 + lev('h3','toc') + end + def lev4 + lev('h4','toc') + end + def lev5 + lev('h5','toc') + end + def lev6 + lev('h6','toc') + end + def lev0 #docinfo + lev('h0','toc') + end + def mini_lev1 + lev('h1','minitoc') + end + def mini_lev2 + lev('h2','minitoc') + end + def mini_lev3 + lev('h3','minitoc') + end + def mini_lev4 + lev('h4','minitoc') + end + def mini_lev5 + lev('h5','minitoc') + end + def mini_lev6 + lev('h6','minitoc') + end + def mini_lev0 #docinfo + lev('h0','minitoc') + end + def mini_tail + %{ +

    + Manifest (alternative outputs) +

    +} + end + def mini_concord_tail + %{ +

    + Concordance (wordlist) +

    +

    + Manifest (alternative outputs) +

    +} + end + end +end +__END__ diff --git a/lib/sisu/v3/epub_segments.rb b/lib/sisu/v3/epub_segments.rb new file mode 100644 index 00000000..bd13b08b --- /dev/null +++ b/lib/sisu/v3/epub_segments.rb @@ -0,0 +1,525 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: epub segment generation, processing + +=end +module SiSU_EPUB_seg + require "#{SiSU_lib}/shared_xhtml" # shared_xhtml.rb + require "#{SiSU_lib}/epub" # epub.rb + require "#{SiSU_lib}/shared_metadata" # shared_metadata.rb + class Seg_output + def initialize(md,outputfile,seg,minitoc,type='') + @md,@output_epub_cont_seg,@seg,@minitoc,@type=md,outputfile,seg,minitoc,type + end + def output #CONSIDER + if @seg[:title] =~/\S/ + filename_seg=[] + filename_seg << @seg[:title] << @seg[:nav] + if @type=='endnotes' + @seg[:headings]=[] + format_head_seg=SiSU_EPUB_Format::Head_seg.new(@md) + @seg[:headings] << format_head_seg.title_banner(@md.title.main,@md.title.sub,@author) + txt_obj={:txt =>'Endnotes',:ocn_display =>''} + format_seg=SiSU_EPUB_Format::Format_seg.new(@md,txt_obj) + @seg[:headings] << format_seg.title_heading1 + filename_seg << @seg[:heading_endnotes] << @seg[:headings] << %{\n
    \n} << @seg[:endnote_all] << '
    ' + elsif @type=='idx' + @seg[:headings]=[] + format_head_seg=SiSU_EPUB_Format::Head_seg.new(@md) + @seg[:headings] << format_head_seg.title_banner(@md.title.main,@md.title.sub,@author) + txt_obj={:txt =>'Index',:ocn_display =>''} + format_seg=SiSU_EPUB_Format::Format_seg.new(@md,txt_obj) + @seg[:headings] << format_seg.title_heading1 + filename_seg << @seg[:heading_idx] << @seg[:headings] << %{\n
    \n} << @seg[:idx] << '
    ' + elsif @type=='metadata' + metadata=Metadata::Summary.new(@md).xhtml_display.metadata + @seg[:headings]=[] + format_head_seg=SiSU_EPUB_Format::Head_seg.new(@md) + @seg[:headings] << format_head_seg.title_banner(@md.title.main,@md.title.sub,@author) + txt_obj={:txt =>'Metadata',:ocn_display =>''} + format_seg=SiSU_EPUB_Format::Format_seg.new(@md,txt_obj) + @seg[:headings] << format_seg.title_heading1 + filename_seg << @seg[:heading_idx] << @seg[:headings] << %{\n
    \n} << metadata << '
    ' + elsif @type=='sisu_manifest' + env=SiSU_Env::Info_env.new(@md.fns) + path_and_name,url_and_name="#{env.path.output}/#{@md.fnb}/sisu_manifest.html","#{env.url.root}/#{@md.fnb}/sisu_manifest.html" + manifest=if FileTest.file?("#{path_and_name}")==true + <A list of available output types may be available at the following url:

    +

    #{url_and_name}

    +WOK + else '' + end + @seg[:headings]=[] + format_head_seg=SiSU_EPUB_Format::Head_seg.new(@md) + @seg[:headings] << format_head_seg.title_banner(@md.title.main,@md.title.sub,@author) + txt_obj={:txt =>'Manifest',:ocn_display =>''} + format_seg=SiSU_EPUB_Format::Format_seg.new(@md,txt_obj) + @seg[:headings] << format_seg.title_heading1 + filename_seg << @seg[:heading_idx] << @seg[:headings] << %{\n
    \n} << manifest << '
    ' + else + filename_seg << @seg[:headings] << @seg[:main] << "\n\n" + end + filename_seg << @seg[:tail] << @seg[:nav] << @seg[:close] + filename_seg.flatten!.compact! + filename_seg.each do |str| + unless str =~/\A\s*\Z/ + str.strip! + @output_epub_cont_seg << str + end + end + @output_epub_cont_seg.close + end + end + end + class Seg + @@seg,@@seg_subtoc,@@seg_endnotes,@@seg_ad={},{},{},{} + @@seg_name,@@seg_name_xhtml=[],[] + @@seg_url=@@fn=@@get_hash_to=@@get_hash_fn='' + @@loop_count=@@seg_total=@@tracker=0 + @@is4=@@is3=@@is2=@@is1=0 + @@heading1=@@heading2=@@heading3=@@heading4=0 + @@seg[:nav],@@seg[:title],@@seg[:headings],@@seg[:main],@@seg[:idx],@@seg[:tail],@@seg_subtoc_array,@@seg_endnotes_array,@@seg[:endnote_all]=Array.new(10){[]} + @@seg[:heading_endnotes]='' + @@tablehead,@@number_of_cols=0,0 + @@segtocband=nil + @@fns_previous='' + attr_reader :seg_name_xhtml,:seg_name_xhtml_tracker + def initialize(md='',data='') + @md,@data=md,data + @vz=SiSU_Env::Get_init.instance.skin + @seg_name_xhtml=@@seg_name_xhtml || nil + @seg_name_xhtml_tracker=@@tracker || nil + end + def songsheet + begin + @minitoc=SiSU_EPUB::Source::Toc.new(@md,@data).minitoc + data=get_subtoc_endnotes(@data) + data=articles(data) + Seg.new.cleanup # (((( added )))) + #### (((( END )))) #### + rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error + ensure + @@seg_name=[] + end + end + protected + def articles(data) + track,tracking,newfile=0,0,0 + @@is4=@@is3=@@is2=@@is1=0 + printed_endnote_seg='n' + idx_xhtml=nil + if @md.book_idx + idx_xhtml=SiSU_Particulars::Combined_singleton.instance.get_idx_xhtml(@md).xhtml_idx + idx_xhtml.each {|x| @@seg[:idx] << x } + @@seg[:heading_idx]='' + end + data.each do |dob| + if (dob.is=='heading' or dob.is=='heading_insert') \ + and dob.ln==4 + @@seg_name << dob.name + seg_name=dob.name + end + end + @@seg_name_xhtml=@@seg_name + @@seg_total=@@seg_name.length + testforartnum=@@seg_name_xhtml + SiSU_Screen::Ansi.new(@md.cmd,@@seg_name.length) if @md.cmd =~/[MVv]/ + map_nametags=SiSU_Particulars::Combined_singleton.instance.get_map_nametags(@md).nametags_map #p map_nametags + data.each do |dob| + #if defined? dob.obj \ + #and dob.obj =~/href="#{Xx[:segment]}#+\S+?"/ + # ##Consider: remove, reinstate earlier? + # #while dob.obj =~/href="#{Xx[:segment]}#+(\S+?)"/ + # # m=$1 + # # if map_nametags[m][:segname] + # # dob.obj.sub!(/href="#{Xx[:segment]}#+(\S+?)"/,%{href="#{map_nametags[m][:segname]}#{Sfx[:html]}#\\1"}) + # # else + # # p "NOT FOUND name_tags: #{m}" + # # dob.obj.sub!(/href="#{Xx[:segment]}#+(\S+?)"/,%{href="#\\1"}) # not satisfactory + # # end + # #end + #end + if (dob.is=='heading' or dob.is=='heading_insert') \ + and dob.ln==4 + if dob.ocn==0 + @@heading4=dob.obj + else @@heading4=dob.obj + end + @@is4=newfile=1 + end + if (dob.is=='heading' or dob.is=='heading_insert') \ + and dob.ln==3 + @@heading3=dob.obj + @@is4,@@is3=0,1 + end + if (dob.is=='heading' or dob.is=='heading_insert') \ + and dob.ln==2 + @@heading2=dob.obj + @@is4,@@is3,@@is2=0,0,1 + end + if (dob.is=='heading' or dob.is=='heading_insert') \ + and dob.ln==1 + @@heading1=dob.obj + @@is4,@@is3,@@is2,@@is1=0,0,0,1 + end + if (@@is1 && !@@is2 && !@@is3 && !@@is4) + if not (dob.is=='heading' or dob.is=='heading_insert') \ + and dob.ln==1 + head1=$_ #; check + end + end + if @@is4==1 + dir_epub_cont="#{@md.env.path.epub}/OPS" + if newfile==1 \ + or dob.obj =~/^#{Mx[:br_endnotes]}|^#{Mx[:br_eof]}/ + newfile=0 + if (dob.is=='heading' or dob.is=='heading_insert') \ + and dob.ln==4 + if tracking != 0 + Seg.new(@md).tail + segfilename="#{dir_epub_cont}/#{@md.fnl[:pre]}#{@@seg_name_xhtml[tracking-1]}#{@md.fnl[:mid]}#{Sfx[:epub_xhtml]}#{@md.fnl[:post]}" + output_epub_cont_seg=File.new(segfilename,'w') if @@seg_name_xhtml[tracking-1] + if dob.is=='heading' \ + or @@seg_name_xhtml[tracking-1] !~/endnotes|book_index|metadata/ + Seg_output.new(@md,output_epub_cont_seg,@@seg,@minitoc).output + elsif dob.is=='heading_insert' + if @@seg_name_xhtml[tracking-1]=='endnotes' + Seg_output.new(@md,output_epub_cont_seg,@@seg,@minitoc,'endnotes').output + elsif @@seg_name_xhtml[tracking-1]=='book_index' + Seg_output.new(@md,output_epub_cont_seg,@@seg,@minitoc,'idx').output + @@seg[:idx]=[] + elsif @@seg_name_xhtml[tracking-1]=='metadata' # navigation bug FIX + Seg_output.new(@md,output_epub_cont_seg,@@seg,@minitoc,'metadata').output + else puts "#{__FILE__}::#{__LINE__}" + end + else puts "#{__FILE__}::#{__LINE__}" + end + Seg.new.reinitialise + heading_art(dob) + head(dob) + if @@seg_name_xhtml[tracking]=='sisu_manifest' # this is for manifest, includes navigation bug + segfilename="#{dir_epub_cont}/#{@md.fnl[:pre]}#{@@seg_name_xhtml[tracking]}#{@md.fnl[:mid]}#{Sfx[:epub_xhtml]}#{@md.fnl[:post]}" + output_epub_cont_seg=File.new(segfilename,'w') + Seg_output.new(@md,output_epub_cont_seg,@@seg,@minitoc,'sisu_manifest').output + Seg.new.reinitialise #BUG navigation bug with items following metadata, and occurring before manifest, this becomes a bug ... work area for book index, FIX + end + #@output_epub_cont_seg.close #%(((( EOF )))) --> + end + if tracking==0 + heading_art(dob) + head(dob) + end + end + tracking=tracking+1 + end + @@get_hash_to=dob.name if (dob.is=='heading' or dob.is=='heading_insert') and dob.ln==4 and dob.name + @@get_hash_fn=dob.name if (dob.is=='heading' or dob.is=='heading_insert') and dob.ln==4 and dob.name + if dob.obj.class==String + markup(dob) + elsif dob.obj.class==Array + dob.obj.each do |pg| + markup(pg) + end + end + if testforartnum[tracking-1] =~/endnote/ + if printed_endnote_seg=='n' + printed_endnote_seg='y' + end + end + end + end + data + end + def heading_art(dob) + format_head_seg=SiSU_EPUB_Format::Head_seg.new(@md) + if dob.is=='heading' \ + and dob.ln =~/^[1-6]/ + if @@tracker < @@seg_total-1; @@seg[:dot_nav]=format_head_seg.dot_control_pre_next + else @@seg[:dot_nav]=format_head_seg.dot_control_pre + end + end + @@seg[:title]=format_head_seg.head + end + def head(dob) + clean=/|<:.*?>$/ + format_head_seg=SiSU_EPUB_Format::Head_seg.new(@md) + if @@tracker < @@seg_total-1 + if @@tracker==0; @@segtocband=format_head_seg.toc_next2 #if format_head_seg.toc_next2 + else @@segtocband=format_head_seg.toc_pre_next2 #if format_head_seg.toc_pre_next2 + end + else @@segtocband=format_head_seg.toc_pre2 #if format_head_seg.toc_pre2 + end + @p_num ||= '' + if @@is1==1 + if defined? @md.creator.author \ + and @md.creator.author + @author=%{#{@md.creator.author}\n} + end + @@seg[:nav] << format_head_seg.navigation_band(@@segtocband) + ocn=if @@heading1[/.+?#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#{@dp}:#{@dp}#{Mx[:id_c]}$/]; $1 #fix + else '' + end + @p_num=SiSU_EPUB_Format::Paragraph_number.new(@md,ocn) + txt_obj={:txt =>@@heading1,:ocn_display =>@p_num.ocn_display} + format_seg=SiSU_EPUB_Format::Format_seg.new(@md,txt_obj) + @@seg[:headings] << format_seg.title_heading1.gsub(clean,'') + @@heading1.gsub!(/  [\d*+]+<\/sup> <\/a>/,'') + end + if @@is2==1 + heading2=@@heading2 + ocn=if heading2[/.+?#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#{@dp}:#{@dp}#{Mx[:id_c]}$/]; $1 #fix + else '' + end + @p_num=SiSU_EPUB_Format::Paragraph_number.new(@md,ocn) + txt_obj={:txt =>heading2,:ocn_display =>@p_num.ocn_display} + format_seg=SiSU_EPUB_Format::Format_seg.new(@md,txt_obj) + @@seg[:headings] << format_seg.title_heading2.gsub(clean,'') + @@heading2.gsub!(/  [\d*+]+<\/sup> <\/a>/,'') + end + if @@is3==1 + heading3=@@heading3 + ocn=if heading3[/.+?#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#{@dp}:#{@dp}#{Mx[:id_c]}$/]; $1 #fix + else '' + end + @p_num=SiSU_EPUB_Format::Paragraph_number.new(@md,ocn) + txt_obj={:txt =>heading3,:ocn_display =>@p_num.ocn_display} + format_seg=SiSU_EPUB_Format::Format_seg.new(@md,txt_obj) + @@seg[:headings] << format_seg.title_heading3.gsub(clean,'') + @@heading3.gsub!(/  [\d*+]+<\/sup> <\/a>/,'') + end + if @@is4==1 + heading4=@@heading4 + ocn=if heading4[/.+?#{Mx[:id_o]}~(\d+);(?:[oh]|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#{@dp}:#{@dp}#{Mx[:id_c]}$/]; $1 #fix + else '' + end + @p_num=SiSU_EPUB_Format::Paragraph_number.new(@md,ocn) + txt_obj={:txt =>heading4,:ocn_display =>@p_num.ocn_display} + format_seg=SiSU_EPUB_Format::Format_seg.new(@md,txt_obj) + @@seg[:headings] << format_seg.title_heading4.gsub(clean,'') + end + @@tracker=@@tracker+1 + end + def markup(dob) + @debug=[] + format_head_seg=SiSU_EPUB_Format::Head_seg.new(@md) + if dob.is =~/(?:heading|para)/ #extend as necessary FIX + @p_num=SiSU_EPUB_Format::Paragraph_number.new(@md,dob.ocn) + end + sto=SiSU_EPUB_Format::Format_text_object.new(@md,dob) + dob_xhtml=if dob.is=='heading' or dob.is=='heading_insert' or dob.is=='para' + dob_xhtml=if dob.is=='heading' or dob.is=='heading_insert' + if dob.ln==4 + sto.seg_heading4 # work on see Split_text_object + elsif dob.ln==5 + sto.seg_heading5 + elsif dob.ln==6; sto.seg_heading6 + end + elsif dob.is=='para' + if dob.indent and dob.indent =~/[1-9]/ + if dob.bullet_ + sto.format('li',"i#{dob.indent}") + else sto.format('p',"i#{dob.indent}") + end + else + if dob.bullet_ + sto.format('li','bullet') + else sto.para + end + end + end + elsif dob.is =~/^(?:block|group|alt)$/ + sto.para #fix this should be block type specific #FIX + elsif dob.is=='verse' + sto.verse + elsif dob.is=='code' + sto.code + elsif dob.is=='table' + sto.table + elsif dob.is=='break' + sto.break + end + if @md.flag_separate_endnotes # may need to revisit, check + dob.obj.gsub!(/"\s+href="#note_ref(\d+)">/,%{" href=\"endnotes#{Sfx[:epub_xhtml]}#note_ref\\1">}) #endnote- twice #removed file type + end + if dob.is =~/heading|para/ and (not dob.ocn or dob.ocn.to_s.empty?) + format_seg=SiSU_EPUB_Format::Format_seg.new(@md,dob) + end + if (dob.is=='heading' or dob.is=='heading_insert' or dob.is=='para') \ + and dob.note_ #dob.obj =~/ \n} + @@seg[:main] << dob_xhtml + @@seg[:main] << @@seg_subtoc[@@get_hash_fn] #% insertion of sub-toc + else + @@seg[:main] << dob_xhtml + end + end + def tail + format_head_seg=SiSU_EPUB_Format::Head_seg.new(@md) + if @md.flag_auto_endnotes and @@seg_endnotes[@@get_hash_fn] + @@seg[:tail] << %{\n
    \n
    \n} + if @@seg_endnotes[@@get_hash_fn].flatten.length > 0 + @@seg[:tail] << format_head_seg.endnote_mark + @@seg[:tail] << @@seg_endnotes[@@get_hash_fn].flatten #endnotes deposited at end of individual segments ||@|EXTRACTION OF ENDNOTES| + end + @@seg[:tail] << '
    ' + @@seg[:tail] << '
    ' #this div closes div class content + end + @@seg[:close]=[] + @@seg[:close] << format_head_seg.xhtml_close + end + def reinitialise + @@seg[:title],@@seg[:dot_nav],@@seg[:nav],@@seg[:headings],@@seg[:main],@@seg[:tail],@@seg[:credits]=Array.new(8){[]} + @@segtocband=nil + end + def cleanup + reinitialise + @@seg_total,@@tracker=0,0 + @@seg_endnotes,@@seg_subtoc={},{} + @@seg_endnotes_array,@@seg_subtoc_array=[],[] + @@seg[:endnote_all]=[] + end + def get_subtoc_endnotes(data) #get endnotes & sub-table of contents subtoc + data.each do |dob| + dob.obj.gsub!(/
    (.+?)<\/a>/mi,'\1') + if @md.flag_auto_endnotes + if (dob.is=='heading' or dob.is=='heading_insert') \ + and dob.ln.to_s =~/^[1234]/ \ + and not @@fn.to_s.empty? + @@seg_endnotes[@@fn]=[] + @@seg_endnotes[@@fn] << @@seg_endnotes_array + @@seg_endnotes_array=[] if dob.ln==4 + @@fns_previous=@md.fns if dob.ln==4 and dob.name =~/^meta/ + end + if (dob.is=='heading' or dob.is=='heading_insert') \ + and dob.ln==4 #% EXTRACTION OF SUB-TOCs & SEGMENT NAME, after EXTRACTION OF ENDNOTES & SUB-TOCs + @@seg_subtoc[@@fn]=@@seg_subtoc_array + @@seg_subtoc_array=[] + if dob.name \ + and dob.obj + @@fn=dob.name + else + if dob.name =~/\S+/ + @@fn=dob.name + else @@fn='' + end + end + end + end + if dob.is=='heading' \ + and dob.ln.to_s =~/^[56]/ + case dob.ln + when 5 + format_seg=SiSU_EPUB_Format::Format_seg.new(@md,dob) + subtoc=format_seg.subtoc_lev5 #keep and make available, this is the subtoc + when 6 + format_seg=SiSU_EPUB_Format::Format_seg.new(@md,dob) + subtoc=format_seg.subtoc_lev6 #keep and make available, this is the subtoc + end + @@seg_subtoc_array << subtoc + end + if @md.flag_auto_endnotes + ast,pls='*','+' + if dob.obj =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})(?:\d|#{ast}|#{pls})+ / \ + and dob.is !~/^code/ # endnote- + endnote_array=[] + if dob.obj=~/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/m + endnote_array << dob.obj.scan(/#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}/m) + end + if dob.obj=~/#{Mx[:en_b_o]}#{ast}\d+\s.+?#{Mx[:en_b_c]}/m + endnote_array << dob.obj.scan(/#{Mx[:en_b_o]}#{ast}\d+\s.+?#{Mx[:en_b_c]}/m) + end + if dob.obj=~/#{Mx[:en_b_o]}#{pls}\d+\s.+?#{Mx[:en_b_c]}/m + endnote_array << dob.obj.scan(/#{Mx[:en_b_o]}#{pls}\d+\s.+?#{Mx[:en_b_c]}/m) + end + endnote_array.flatten! #.compact! #check compacting + endnote_array.each do |note| + note_match=note.dup + note_match_seg=note.dup + e_n=note_match_seg[/(?:#{Mx[:en_a_o]}(?:\d|#{ast}|#{pls})+|#{Mx[:en_b_o]}(?:#{ast}|#{pls})\d+)\s+(.+?)(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/m,1] + try=e_n.split(/
    /) + try.each do |e| + txt_obj={:txt =>e} + format_seg=SiSU_EPUB_Format::Format_seg.new(@md,txt_obj) + note_match=if e =~/#{Mx[:pa_o]}i[1-9]#{Mx[:pa_c]}/ + format_seg.endnote_body_indent + else format_seg.endnote_body + end + @@seg_endnotes_array << note_match + end + try.join('
    ') + #% creation of separate end segment/page of all endnotes referenced back to reference segment + m=/(?:#{Mx[:en_a_o]}(?:\d|#{ast}|#{pls})+|#{Mx[:en_b_o]}(?:#{ast}|#{pls})\d+)\s+(.+?href=")(#note_ref(?:\d|_a|_b)+".+)(?:#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/mi + endnote_part_a=note_match_seg[m,1] + endnote_part_b=note_match_seg[m,2] + txt_obj={:endnote_part_a =>endnote_part_a,:endnote_part_b =>endnote_part_b} + format_seg=SiSU_EPUB_Format::Format_seg.new(@md,txt_obj) + note_match_all_seg=format_seg.endnote_seg_body(@@fn) #BUG WATCH 200408 + @@seg[:endnote_all] << note_match_all_seg + end + dob.obj.gsub!(/(?:#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})\s*/m,' ') + end + end + end + end + end +end +__END__ diff --git a/lib/sisu/v3/epub_tune.rb b/lib/sisu/v3/epub_tune.rb new file mode 100644 index 00000000..23b0acf0 --- /dev/null +++ b/lib/sisu/v3/epub_tune.rb @@ -0,0 +1,417 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: epub generation, epub pre-processing + +=end +require "#{SiSU_lib}/param" +module SiSU_EPUB_Tune + require "#{SiSU_lib}/sysenv" # sysenv.rb + include SiSU_Env; include SiSU_Screen + require "#{SiSU_lib}/epub_format" # epub_format.rb #watch + @@line_mode='' + @@endnote_array=[] + @@endnote_call_counter=1 + @@table_align=' + + +
    + \;' + @@table_align_close=' + +   
    ' + @@counter,@@column,@columns=0,0,0 + class Output + def initialize(data,md) + @data,@md=data,md + @file=SiSU_Env::SiSU_file.new(@md) + @cX=SiSU_Screen::Ansi.new(@md.cmd).cX + end + def hard_output + @filename_tune=@file.write_file_processing.html_tune + data=[] + @data.each{|x| x.obj.strip; data << x if not x.obj.empty?} #1.9 array? + data.each do |dob| + @filename_tune.puts dob, "\n" #check + end + end + def marshal + File.open(@file.marshal.html_tune,'w') {|f| Marshal.dump(@data.to_a,f)} + end + end + class Clean_xhtml + def initialize(html='') + @html=html + end + def clean + html=@html + str=if html.class==String + html + else html.obj + end + str.gsub!(/#{Mx[:gl_o]}(#[0-9]{3})#{Mx[:gl_c]}/u,'&\1;') + str.gsub!(/#{Mx[:gl_o]}#([a-z]{2,4})#{Mx[:gl_c]}/u,'&\1;') + str.gsub!(/\*/u,'*') # * + str.gsub!(/\+/u,'+') # + + str.gsub!(/¢/u,'¢') # ¢ + str.gsub!(/£/u,'£') # £ + str.gsub!(/¥/u,'¥') # ¥ + str.gsub!(/§/u,'§') # § + str.gsub!(/©/u,'©') # © + str.gsub!(/ª/u,'ª') # ª + str.gsub!(/«/u,'«') # « + str.gsub!(/®/u,'®') # ® + str.gsub!(/°/u,'°') # ° + str.gsub!(/±/u,'±') # ± + str.gsub!(/²/u,'²') # ² + str.gsub!(/³/u,'³') # ³ + str.gsub!(/µ/u,'µ') # µ + str.gsub!(/¶/u,'¶') # ¶ + str.gsub!(/¹/u,'¹') # ¹ + str.gsub!(/º/u,'º') # º + str.gsub!(/»/u,'»') # » + str.gsub!(/¼/u,'¼') # ¼ + str.gsub!(/½/u,'½') # ½ + str.gsub!(/¾/u,'¾') # ¾ + str.gsub!(/×/u,'×') # × + str.gsub!(/÷/u,'÷') # ÷ + str.gsub!(/¿/u,'¿') # ¿ + str.gsub!(/À/u,'À') # À + str.gsub!(/Á/u,'Á') # Á + str.gsub!(/Â/u,'Â') # Â + str.gsub!(/Ã/u,'Ã') # Ã + str.gsub!(/Ä/u,'Ä') # Ä + str.gsub!(/Å/u,'Å') # Å + str.gsub!(/Æ/u,'Æ') # Æ + str.gsub!(/Ç/u,'Ç') # Ç + str.gsub!(/È/u,'È') # È + str.gsub!(/É/u,'É') # É + str.gsub!(/Ê/u,'Ê') # Ê + str.gsub!(/Ë/u,'Ë') # Ë + str.gsub!(/Ì/u,'Ì') # Ì + str.gsub!(/Í/u,'Í') # Í + str.gsub!(/Î/u,'Î') # Î + str.gsub!(/Ï/u,'Ï') # Ï + str.gsub!(/Ð/u,'Ð') # Ð + str.gsub!(/Ñ/u,'Ñ') # Ñ + str.gsub!(/Ò/u,'Ò') # Ò + str.gsub!(/Ó/u,'Ó') # Ó + str.gsub!(/Ô/u,'Ô') # Ô + str.gsub!(/Õ/u,'Õ') # Õ + str.gsub!(/Ö/u,'Ö') # Ö + str.gsub!(/Ø/u,'Ø') # Ø + str.gsub!(/Ù/u,'Ù') # Ù + str.gsub!(/Ú/u,'Ú') # Ú + str.gsub!(/Û/u,'Û') # Û + str.gsub!(/Ü/u,'Ü') # Ü + str.gsub!(/Ý/u,'Ý') # Ý + str.gsub!(/Þ/u,'Þ') # Þ + str.gsub!(/ß/u,'ß') # ß + str.gsub!(/à/u,'à') # à + str.gsub!(/á/u,'á') # á + str.gsub!(/â/u,'â') # â + str.gsub!(/ã/u,'ã') # ã + str.gsub!(/ä/u,'ä') # ä + str.gsub!(/å/u,'å') # å + str.gsub!(/æ/u,'æ') # æ + str.gsub!(/ç/u,'ç') # ç + str.gsub!(/è/u,'è') # è + str.gsub!(/é/u,'é') # é + str.gsub!(/ê/u,'ê') # ê + str.gsub!(/ë/u,'ë') # ë + str.gsub!(/ì/u,'ì') # ì + str.gsub!(/í/u,'í') # í + str.gsub!(/î/u,'î') # î + str.gsub!(/ï/u,'ï') # ï + str.gsub!(/ð/u,'ð') # ð + str.gsub!(/ñ/u,'ñ') # ñ + str.gsub!(/ò/u,'ò') # ò + str.gsub!(/ó/u,'ó') # ó + str.gsub!(/ô/u,'ô') # ô + str.gsub!(/õ/u,'õ') # õ + str.gsub!(/ö/u,'ö') # ö + str.gsub!(/ø/u,'ø') # ø + str.gsub!(/ù/u,'ù') # ú + str.gsub!(/ú/u,'ú') # û + str.gsub!(/û/u,'û') # ü + str.gsub!(/ü/u,'ü') # ý + str.gsub!(/þ/u,'þ') # þ + str.gsub!(/ÿ/u,'ÿ') # ÿ + str.gsub!(/ý/u,'ý') + str.gsub!(/
    /u,'
    ') + str.gsub(/#{Mx[:nbsp]}/u,' ') + str + end + end + class Tune + def initialize(data,md) + @data,@md=data,md + @vz=SiSU_Env::Get_init.instance.skin + @env=SiSU_Env::Info_env.new(@md.fns) + @sys=SiSU_Env::System_call.new + @env=SiSU_Env::Info_env.new(@md.fns) + @brace_url=SiSU_Viz::Skin.new.url_decoration + end + def songsheet + begin + @cX=SiSU_Screen::Ansi.new(@md.cmd).cX + SiSU_Screen::Ansi.new(@md.cmd,'Tune').txt_grey if @md.cmd =~/[MVv]/ + data=Tune.new(@data,@md).amp_html + data=Tune.new(data,@md).endnotes_html + data=Tune.new(data,@md).url_markup + data=Tune.new(data,@md).markup + if @md.cmd =~/M/ #Hard Output Tune Optional on/off here + data=Output.new(data,@md).hard_output + Output.new(data,@md).marshal + end + tuned=Tune.new(@data,@md).output + rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error + ensure + end + end + def para_numbers + data=@data + @tuned_file=[] + data.each do |dob| + dob.gsub!(/#{Mx[:lv_o]}\d:(\S?)#{Mx[:lv_c]}/,'\0#\1. ') #fix Mx[:lv_o] + @tuned_file << dob + end + end + def markup + @tuned_file=[] + @data.each do |dob| + dob.obj.gsub!(/#{Mx[:mk_o]}#([a-zA-Z]+)#{Mx[:mk_c]}/,'&\1;') + dob.obj.gsub!(/#{Mx[:mk_o]}(#[0-9]+)#{Mx[:mk_c]}/,'&\1;') + dob.obj.gsub!(/#{Mx[:br_line]}|#{Mx[:br_nl]}/,'
    ') unless dob.is=='table' + dob.obj.gsub!(/#{Mx[:fa_bold_o]}(.+?)#{Mx[:fa_bold_c]}/,'\1') + dob.obj.gsub!(/#{Mx[:fa_italics_o]}(.+?)#{Mx[:fa_italics_c]}/,'\1') + dob.obj.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') + dob.obj.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1') + dob.obj.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1') + dob.obj.gsub!(/#{Mx[:fa_insert_o]}(.+?)#{Mx[:fa_insert_c]}/,'\1') + dob.obj.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1') + dob.obj.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') + dob.obj.gsub!(/#{Mx[:fa_monospace_o]}(.+?)#{Mx[:fa_monospace_c]}/,'\1') # tt, kbd + dob.obj.gsub!(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'') + dob.obj.gsub!(/^#{Mx[:gl_bullet]}/m,'●  ') + dob.obj.gsub!(/#{Mx[:nbsp]}/,' ') + dob.obj.gsub!(/<(p|br)>/,'<\1 />') + dob=SiSU_EPUB_Tune::Clean_xhtml.new(dob).clean + @tuned_file << dob + end + end + def urls(data) + @words=[] + map_nametags=SiSU_Particulars::Combined_singleton.instance.get_map_nametags(@md).nametags_map #p map_nametags + data.each do |word| + @words << if word=~/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}|image)/ + http_=true + if word =~/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/ + m,u=/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/.match(word).captures + elsif word =~/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}/ + http_=false + m,u=/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}#{Mx[:rel_o]}(\S+?)#{Mx[:rel_c]}/.match(word).captures + elsif word =~/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}image/ + m,u=/#{Mx[:lnk_o]}(.+?)#{Mx[:lnk_c]}(image)/.match(word).captures + end + case m + when /\.png|\.jpg|\.gif|c=|\s\d+x\d+/ + w,h=/\s(\d+)x(\d+)/.match(m).captures if m =~/\s\d+x\d+/ + w=%{width="#{w}"} if w + h=%{height="#{h}"} if h + c=m[/"(.+?)"/m,1] + caption=%{

    #{c}

    } if c + png=m.scan(/\S+/)[0] + image_path='./image' + ins=if u \ + and u.strip !~/^image$/ + %{
    #{caption}} + else %{#{caption}} + end + word.gsub!(/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|image)/,ins) + else + link=m[/(.+)/m] + png=m.scan(/\S+/)[0].strip + link=link.strip + u.sub!(/^#*/,'') #make neater + if map_nametags[u] \ + and map_nametags[u][:segname] + if u=~/^\d+$/ + u.gsub!(/^(\d+)$/,"#{map_nametags[u][:segname]}#{Sfx[:xhtml]}#o\\1") if u !~/\// unless http_ + else + u.gsub!(/(\S+)/,"#{map_nametags[u][:segname]}#{Sfx[:xhtml]}#\\1") if u !~/\// unless http_ + end + elsif u =~/^:/ + u.gsub!(/^:/,"#{@env.url.root}/") + elsif u =~/^\.\.\// + u.gsub!(/^\.\.\//,"#{@env.url.root}/") + elsif u =~/https?:\/\// + else p "NOT FOUND name_tags: #{u}" + end + ins=%{#{link}} + word.gsub!(/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}#{Mx[:url_o]}\S+?#{Mx[:url_c]}/,ins) + word.gsub!(/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}/,ins) + end + word + else word + end + word + end + @words=@words.join(' ') + end + def url_markup + data=@data + @tuned_file=[] + data.each do |dob| + unless dob.is=='code' + if dob.obj =~/<::\s+/ #watch + dob.obj.gsub!(/<::\s+(\S+?)\s+!>/, + %{\\1}) + end + if dob.obj =~/<:image\s+/ + dob.obj.gsub!(/<:image\s+(http\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+>/, + %{}) + dob.obj.gsub!(/<:image\s+(http\S+)\s+(\S+)\s+>/, + %{}) + dob.obj.gsub!(/<:image\s+(\S+)\s+(\S+)\s+(\S+)\s+>/, + %{}) + dob.obj.gsub!(/<:image\s+(\S+)\s+>/, + %{}) + end + if dob.obj =~/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}|image)/ + @word_mode=dob.obj.scan(/#{Mx[:lnk_o]}.+?#{Mx[:lnk_c]}(?:#{Mx[:url_o]}\S+?#{Mx[:url_c]}|#{Mx[:rel_o]}\S+?#{Mx[:rel_c]}|image)[()\[\]]*[,.;:!?'"]{0,2}|(?:#{Mx[:gl_o]}\S+?#{Mx[:gl_c]})+|[^#{Mx[:lnk_o]}#{Mx[:lnk_c]}]+/mu) + words=urls(@word_mode) + dob.obj.gsub!(/.+/m,words) + end + dob.obj.gsub!(/\\copyright/i,%{©}) + if (dob.obj !~/\<:ad\s+\.\.\//) + dob.obj.gsub!(/\<:ad\s+(\S+)?\s+(\S+\.png)\s+(.+)?\;\s+(.+)?\;\s*!\>/, + %{\n
    \\3
    \n}) + else + dob.obj.gsub!(/\<:ad\s+(\S+)?\s+(\S+\.png)\s+(.+)?\;\s+(.+)?\;\s*\>/, + %{\n
    \\3
    \n}) + end + dob.obj.gsub!(/!pick/,%{stellar}) + dob.obj.gsub!(/!new/,%{ new}) + dob.obj.gsub!(/<:h(.{1,7}?)>/,'\1') + dob.obj.gsub!(/<:to(\d{1,7}?)>/,'to { \1 } ') + if dob.obj =~/\b\S+\@\S+?\.\S+/ \ + and dob.obj !~/(\"\S+\@\S+?\.\S+\"|>\S+\@\S+?\.\S+?<)/ + dob.obj.gsub!(/([a-zA-Z0-9._-]+\@\S+?\.[a-zA-Z0-9._-]+)/,'<\1>') + end + dob.obj.gsub!(/#{Mx[:url_o]}_(\S+?)#{Mx[:url_c]}/,'\1') #http ftp matches escaped, no decoration + dob.obj.gsub!(/#{Mx[:url_o]}(\S+?)#{Mx[:url_c]}/,%{#{@brace_url.xml_open}\\1#{@brace_url.xml_close}}) #http ftp matches with decoration + if dob.obj =~/..\/\S+/ \ + and dob.obj !~/(\"..\/\S+?\"|>\s*..\/\S+<)/ + dob.obj.gsub!(/(\.\.\/\S+)/,'\1') + end + dob.obj.gsub!(//m,'\1>') #code-block: angle brackets special characters + dob.obj.gsub!(/(^|[^}])_/m,'\1>') + end + @tuned_file << dob + end + end + def amp_html + data=@data + data.each do |dob| + dob.obj.gsub!(/&/u,'&') + end + data + end + def endnotes_html + data=@data + @tuned_file=[] + a,s='_a','_s' + ast,pls='*','+' + data.each do |dob| + unless dob.is =~/^code/ + dob.obj.gsub!(/(#{Mx[:en_a_o]}|#{Mx[:en_b_o]})(\d+)\s+(.+?)(#{Mx[:en_a_c]}|#{Mx[:en_b_c]})/, + %{#{Mx[:nbsp]}#{Mx[:nbsp]}\\2#{Mx[:nbsp]} } + #note- endnote- + %{\\1\\2 #{Mx[:nbsp]}\\2. \\3 \\4}) #endnote- note- (careful may have switched) + dob.obj.gsub!(/(#{Mx[:en_b_o]})[*](\d+)\s+(.+?)(#{Mx[:en_b_c]})/, + %{#{Mx[:nbsp]}#{Mx[:nbsp]}#{ast}\\2#{Mx[:nbsp]} } + #note- endnote- + %{\\1#{ast}\\2 #{Mx[:nbsp]}#{ast}\\2. \\3 \\4}) #endnote- note- (careful may have switched) + dob.obj.gsub!(/(#{Mx[:en_b_o]})[+](\d+)\s+(.+?)(#{Mx[:en_b_c]})/, + %{#{Mx[:nbsp]}#{Mx[:nbsp]}#{pls}\\2#{Mx[:nbsp]} } + #note- endnote- + %{\\1#{pls}\\2 #{Mx[:nbsp]}#{pls}\\2. \\3 \\4}) #endnote- note- (careful may have switched) # double-check there may here be a bug + if dob.obj =~/#{Mx[:en_a_o]}([*+]+)\s+.+?#{Mx[:en_a_c]}/ + m=$1.length.to_i + dob.obj.gsub!(/(#{Mx[:en_a_o]})[*]+\s+(.+?)(#{Mx[:en_a_c]})/, + %{#{Mx[:nbsp]}#{Mx[:nbsp]}#{ast*m}#{Mx[:nbsp]} } + #note- endnote- + %{\\1#{ast*m} #{Mx[:nbsp]}#{ast*m} \\2 \\3}) #endnote- note- (careful may have switched) + dob.obj.gsub!(/(#{Mx[:en_a_o]})([+]+)\s+(.+?)(#{Mx[:en_a_c]})/, + %{#{Mx[:nbsp]}#{Mx[:nbsp]}#{pls*m}#{Mx[:nbsp]} } + #note- endnote- + %{\\1#{pls*m} #{Mx[:nbsp]}#{pls*m} \\2 \\3}) #endnote- note- (careful may have switched) + end + end + @tuned_file << dob + end + end + def output + data=@data + @tuned_file=[] + data.each do |dob| + dob.obj.strip! + dob.obj.chomp! + @tuned_file << dob + end + @tuned_file << "\n" if (@md.fns =~/\.sst0/) #remove + @tuned_file + end + end +end +__END__ diff --git a/lib/sisu/v3/errors.rb b/lib/sisu/v3/errors.rb new file mode 100644 index 00000000..d761e1db --- /dev/null +++ b/lib/sisu/v3/errors.rb @@ -0,0 +1,81 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, error screen reporting + +=end +module SiSU_Errors + require "#{SiSU_lib}/sysenv" # sysenv.rb + include SiSU_Env; include SiSU_Screen + class Info_error . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +module SiSU_Git + require "#{SiSU_lib}/param" # param.rb + require "#{SiSU_lib}/sysenv" # sysenv.rb + require "#{SiSU_lib}/dal" # dal.rb + class Source + include FileUtils #::Verbose + def initialize(opt) + @opt=opt + @env=SiSU_Env::Info_env.new + @md=SiSU_Param::Parameters.new(@opt).get + @file=SiSU_Env::SiSU_file.new(@md) + l=SiSU_Env::Standardise_language.new.file_to_language(@opt.fns) + unless @md.i18n[0]==l[:c] + p "using: #{@md.i18n[0]} (@make: :language:); filename #{@md.fns} filename language: #{l[:c]}, mismatch" + end + if @env.multilingual? + m=/((.+?)(?:\~\w{2,3})?)\.((?:-|ssm\.)?sst|ssm)$/ #watch added match for sss + fnn,fnb,fnt=@opt.fns[m,1],@opt.fns[m,2],@opt.fns[m,3] + else m=/(.+?)\.((?:-|ssm\.)?sst|ssm)$/ + fnb=@fnn=@opt.fns[m,1] + fnt=@opt.fns[m,2] + end + git_path_fnb=@env.path.processing_path_git + '/' + fnb + #unless @opt.cmd =~/q/ + # @opt.cmd=~/[MVvz]/ \ + # ? SiSU_Screen::Ansi.new(@opt.cmd,'Git path',@git_path[:fnb]).green_hi_blue \ + # : SiSU_Screen::Ansi.new(@opt.cmd,'Git path',@git_path[:fnb]).green_title_hi + # SiSU_Screen::Ansi.new(@opt.cmd,"Git path","#{@opt.fns} -> #{@git_path[:fnb]}").warn if @opt.cmd =~/[MVv]/ + #end + lng=(@md.opt.f_pth[:lng]) ? (@md.opt.f_pth[:lng]) : (@md.i18n[0]) + @git_path={ + :fnb=> git_path_fnb, + :src=> git_path_fnb + '/' + Gt[:src] + '/' + lng, + :po=> git_path_fnb + '/' + Gt[:po] + '/' + lng, + :pot=> git_path_fnb + '/' + Gt[:pot], + :conf=> git_path_fnb + '/' + Gt[:conf], + :skin=> git_path_fnb + '/' + Gt[:skin], + :image=>git_path_fnb + '/' + Gt[:image], + :audio=>git_path_fnb + '/' + Gt[:audio], + :video=>git_path_fnb + '/' + Gt[:video], + :conf=> git_path_fnb + '/' + Gt[:conf] + } + SiSU_DAL::Source.new(@opt).read # -m + end + def create_file_structure_git + make_dir_fnb + if program_found? + git_init + end + end + def read + create_file_structure_git + populate.sisusrc_files + if program_found? + git_commit + end + unless @opt.cmd =~/q/ + @opt.cmd=~/[MVvz]/ \ + ? SiSU_Screen::Ansi.new(@opt.cmd,'Git path',@git_path[:fnb]).green_hi_blue \ + : SiSU_Screen::Ansi.new(@opt.cmd,'Git path',@git_path[:fnb]).green_title_hi + SiSU_Screen::Ansi.new(@opt.cmd,"Git path","#{@opt.fns} -> #{@git_path[:fnb]}").warn if @opt.cmd =~/[MVv]/ + end + end + def program_found? + found=`whereis git` + (found =~/bin\/git\b/) ? true : false + end + def make_dir_fnb + mkdir_p(@git_path[:fnb]) unless FileTest.directory?(@git_path[:fnb]) + mkdir_p(@git_path[:src]) unless FileTest.directory?(@git_path[:src]) + mkdir_p(@git_path[:po]) unless FileTest.directory?(@git_path[:po]) + mkdir_p(@git_path[:pot]) unless FileTest.directory?(@git_path[:pot]) + mkdir_p(@git_path[:conf]) unless FileTest.directory?(@git_path[:conf]) + mkdir_p(@git_path[:skin]) unless FileTest.directory?(@git_path[:skin]) + mkdir_p(@git_path[:image]) unless FileTest.directory?(@git_path[:image]) + #mkdir_p(@git_path[:audio]) unless FileTest.directory?(@git_path[:audio]) + #mkdir_p(@git_path[:video]) unless FileTest.directory?(@git_path[:video]) + end + def git_init + unless FileTest.directory?("#{@git_path[:fnb]}/.git") + system("cd #{@git_path[:fnb]}\ + && git init + ") + end + end + def git_commit + system("cd #{@git_path[:fnb]} \ + && git add . \ + && git commit -a + ") + end + def populate + def identify_language_versions + print __FILE__ + ':' + p __LINE__ + end + def copy_src_head + if @opt.f_pth[:lng] + cp_r("#{@env.path.pwd}/#{@opt.f_pth[:lng]}/#{@opt.fns}",@git_path[:src]) + elsif @opt.fns =~/\.ssm\.sst/ + ssm=@opt.fns.gsub(/\.ssm\.sst/,'.ssm') + cp_r("#{@env.path.pwd}/#{ssm}",@git_path[:src]) + else + cp_r("#{@env.path.pwd}/#{@opt.fns}",@git_path[:src]) + end + end + def copy_related_sst_ssi + doc_import=[] + @rgx_doc_import=/^<<\s(\S+?\.ss[ti])/ + file_array=IO.readlines(@opt.fns,'') + file_array.each do |f| + if f =~@rgx_doc_import + doc_import = doc_import + f.scan(@rgx_doc_import).uniq.flatten + end + end + doc_import.each do |f| + if @opt.f_pth[:lng] + cp_r("#{@env.path.pwd}/#{@opt.f_pth[:lng]}/#{f}",@git_path[:src]) + else + cp_r("#{@env.path.pwd}/#{f}",@git_path[:src]) + end + end + end + def locate_parse_file + composite_src=@opt.fns=~/\.ssm$/ ? true : false + parse_file=if composite_src \ + and @opt.cmd.inspect !~/m/ + ##SiSU_Assemble::Composite.new(@opt).read + #SiSU_DAL::Source.new(@opt).read # -m + "#{@env.path.composite_file}/#{@opt.fnb}.ssm.sst" + elsif composite_src + "#{@env.path.composite_file}/#{@opt.fnb}.ssm.sst" + else "#{@env.path.pwd}/#{@opt.fns}" + end + end + def locate_skin + SiSU_Env::Info_skin.new(@md).select + end + def read_composite + #print __FILE__ + ':' + #p __LINE__ + end + def extract_skin + #print __FILE__ + ':' + #p __LINE__ + end + def extract_skin_and_images #(parse_file) + parse_file_name=locate_parse_file + parse_file=IO.readlines(parse_file_name,'') + rgx_image=/(?:^|[^_\\])\{\s*(\S+?\.(?:png|jpg|gif))/ + #rgx_rb_image=/["'](\S+?\.(?:png|jpg|gif))["']/ + #rgx_rb_image=/[^\/]?([a-z]\S+?\.(?:png|jpg|gif))/ + rgx_rb_image=/([a-z][^ \/]+?\.(?:png|jpg|gif))/ + rgx_skin=/^\s+:skin:\s+(\S+)/ + skin_get=nil + images=[] + skin_get + parse_file.each do |f| #% work area + if f !~/^%+\s/ + skin_get ||= f.scan(rgx_skin).uniq.flatten if f =~rgx_skin + if f =~rgx_image + images << f.scan(rgx_image).uniq + end + end + end + skin=skin_get[0] if skin_get + skin=locate_skin + parse_skin=IO.readlines(skin,"\n") + parse_skin.each do |f| #% work area + if f !~/^#/ \ + and f =~rgx_rb_image + images << f.scan(rgx_rb_image).uniq + end + end + image_path="#{@env.path.pwd}/_sisu/image" + images.flatten.each do |i| + if FileTest.file?("#{image_path}/#{i}") + cp_r("#{image_path}/#{i}",@git_path[:image]) + end + end + if FileTest.file?(skin) + cp_r(skin,"#{@git_path[:conf]}/skin") + end + {:skin =>skin, :images =>images} + end + def sisuyaml_rc + sisurc=@env.path.sisurc_path + if FileTest.file?(sisurc) + cp_r(sisurc,@git_path[:conf]) + end + end + def read_src + print __FILE__ + ':' + p __LINE__ + end + def composite_src? + @opt.fns=~/\.ssm$/ ? true : false + end + def sisusrc_files + populate.copy_src_head + if composite_src? + populate.copy_related_sst_ssi + end + populate.extract_skin + #parse_file_name=locate_parse_file + #parse_file=IO.readlines(parse_file_name,'') + populate.sisuyaml_rc #(parse_file) + populate.extract_skin_and_images #(parse_file) + #populate.extract_composite_source + #populate.read_composite # or read_each_composite + populate.identify_language_versions + end + self + end + end +end +__END__ +@file.output_path.sisugit diff --git a/lib/sisu/v3/harvest.rb b/lib/sisu/v3/harvest.rb new file mode 100644 index 00000000..2a01910e --- /dev/null +++ b/lib/sisu/v3/harvest.rb @@ -0,0 +1,101 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + harvest metadata from document corpus (suitable for medium sized sites) + (concept example, [to remove size constraint: implement SQL equivalent]) + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: system environment, resource control and configuration details + +=end +def help + puts <. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: simple xml representation (sax style) + +=end +module HARVEST_authors + require "#{SiSU_lib}/author_format" # author_format.rb + @@the_idx_authors=[] + class Songsheet + def initialize(opt) + @opt=opt + @file_list=opt.files + @env=SiSU_Env::Info_env.new + end + def songsheet + files,idx_array=[],[] + @file_list.each do |f| + (f =~/.+?\.ss[tm]$/) \ + ? (files << f[/(.+?\.ss[tm])$/,1]) \ + : (print "not .sst or .ssm ? << #{f} >> ") + end + files.each do |filename| + file_array=[] + File.open(filename,'r') do |file| + file.each_line("\n\n") do |line| + if line =~/^@(?:title|creator|date):(?:\s|$)/m + file_array << line + elsif line =~/^@\S+?:(?:\s|$)/m \ + or line =~/^(?:\s*\n|%+ )/ + else break + end + end + end + idx_array=HARVEST_authors::Harvest.new(file_array,filename,idx_array).extract_harvest + end + the_idx=HARVEST_authors::Index.new(idx_array,@@the_idx_authors).construct_book_author_index + HARVEST_authors::Output_index.new(@opt,the_idx).html_print.html_songsheet + puts "file://#{@env.path.output_md_harvest}/harvest_authors.html" + puts "file://#{@env.path.pwd}/harvest_authors.html" if @opt.cmd.inspect =~/M/ + end + end + class Harvest + def initialize(data,filename,idx_array) + @data,@filename,@idx_array=data,filename,idx_array + end + def extract_harvest + data,filename,idx_array=@data,@filename,@idx_array + @title,@subtitle,@fulltitle,@author,@author_format,@date=nil,nil,nil,nil,nil,nil + @authors=[] + rgx={} + rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m + rgx[:title]=/^@title:[ ]+(.+)/ + rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m + rgx[:date]=/^@date:(?:[ ]+|.+?:published:[ ]+)(\d{4})/m + data.each do |para| + if para=~ rgx[:title] + @title=rgx[:title].match(para)[1] + end + if para=~ rgx[:subtitle] + @subtitle=rgx[:subtitle].match(para)[1] + end + if para=~ rgx[:author] + @author_format=rgx[:author].match(para)[1] + end + if para=~ rgx[:date] + @date=rgx[:date].match(para)[1] + end + break if @title and @subtitle and @author and @date + end + @fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title + if @title and @author_format + creator=FORMAT::Author.new(@author_format.strip).author_details + @authors,@authorship=creator[:authors],creator[:authorship] + file=if filename=~/~[a-z]{2,3}\.ss[mt]$/ + lang='.' + /~([a-z]{2,3})\.ss[mt]$/.match(filename)[1] + filename.sub(/~[a-z]{2,3}\.ss[mt]$/,'') + else + lang='' + filename.sub(/\.ss[mt]$/,'') + end + page="sisu_manifest#{lang}.html" + idx_array <<= { :filename => filename, :file => file, :date => @date, :title => @fulltitle, :author => creator, :page => page } + else + #p "missing author field: #{@filename} title: #{@title}; author: #{@author_format}" + end + idx_array.flatten! + idx_array + end + end + class Index + def initialize(idx_array,the_idx) + @idx_array,@the_idx=idx_array,the_idx + @@the_idx_authors=@the_idx + end + def capital(txt) + txt[0].chr.capitalize + txt[1,txt.length] + end + def construct_book_author_index + idx_array=@idx_array + idx_array.each do |idx| + idx[:author][:last_first_format_a].each do |author| + author.strip! + if @@the_idx_authors[author].class==NilClass + @@the_idx_authors[author]={:md => []} + end + @@the_idx_authors[author][:md] << { :filename => idx[:filename], :file => idx[:file], :author => idx[:author], :title => idx[:title], :date => idx[:date], :page => idx[:page] } + end + end + @the_idx=@@the_idx_authors + end + end + class Output_index + def initialize(opt,the_idx) + @opt,@the_idx=opt,the_idx + @env=SiSU_Env::Info_env.new + @rc=Get_init.instance.sisu_yaml.rc + @alph=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] + @letter=@alph.shift + @vz=SiSU_Env::Get_init.instance.skin + end + def html_file_open + @output={} + @output[:html]=File.new("#{@env.path.output_md_harvest}/harvest_authors.html",'w') + @output[:html_mnt]=(@opt.cmd.inspect =~/M/) \ + ? File.new("#{@env.path.pwd}/harvest_authors.html",'w') \ + : nil + end + def html_file_close + @output[:html].close + @output[:html_mnt].close if @output[:html_mnt].class==File + end + def html_print + def html_songsheet + html_file_open + html_head + html_alph + html_body + html_tail + html_file_close + end + def html_head_adjust(type='') + css_path=(type !~/maintenance/) \ + ? '../_sisu/css/harvest.css' \ + : 'harvest.css' + sv=SiSU_Env::Info_version.instance.get_version + < + + +SiSU Metadata Harvest - Authors + + + + + + + + + + + + +

    SiSU Metadata Harvest - Authors

    +

    [ HOME ] also see SiSU Metadata Harvest - Topics

    +

    #{@env.widget_static.search_form}

    +
    +WOK + end + def html_head + @output[:html_mnt] << html_head_adjust('maintenance') if @opt.cmd.inspect =~/M/ + @output[:html] << html_head_adjust + end + def html_alph + a=[] + a << '

    ' + @alph.each do |x| + a << ((x =~/[0-9]/) ? '' : %{#{x}, }) + end + @output[:html_mnt] << a.join if @output[:html_mnt].class==File + @output[:html] << a.join + end + def html_tail + a=[] + a <<< + + + + + + +#{@vz.credits_sisu} + + +WOK + @output[:html_mnt] << a if @output[:html_mnt].class==File + @output[:html] << a + end + def do_html(html) + @output[:html_mnt] << html if @output[:html_mnt].class==File + @output[:html] << html + end + def do_string(attrib,string) + html=%{

    #{string}

    } + do_html(html) + end + def do_string_name(attrib,string) + f=/^(\S)/.match(string[0])[1] + if @letter < f + while @letter < f + if @alph.length > 0 + @letter=@alph.shift + if @output[:html_mnt].class==File + @output[:html_mnt] << %{\n

    #{@letter}

    } + end + @output[:html] << %{\n

    #{@letter}

    } + else break + end + end + end + end + def html_body + the_idx=@the_idx + the_idx.sort.each do |a| + do_string_name('',a) + name=a[0].sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') + x = %{

    #{a[0]}

    } + if @output[:html_mnt].class==File + @output[:html_mnt] << x + end + @output[:html] << x + works=[] + a[1][:md].each do |x| + work=[ "#{x[:date]} #{x[:title]}", %{

    #{x[:date]} #{x[:title]}, #{x[:author][:authors_s]}

    } ] + works<<=(@output[:html_mnt].class==File) \ + ? (work.concat([%{

    [src]  #{x[:date]} #{x[:title]}, #{x[:author][:authors_s]} -- [#{x[:file]}.sst]

    }])) \ + : work + end + works.sort_by {|x| x[0]}.each do |x| + @output[:html] << x[1] + @output[:html_mnt] << x[2] if @output[:html_mnt].class==File + end + end + end + self + end + def screen_print + def cycle + the_idx=@the_idx + the_idx.sort.each do |a| + puts a[0] + a[1][:md].each do |x| + puts "\t" + x[:file] + end + end + end + self + end + end +end +__END__ diff --git a/lib/sisu/v3/harvest_topics.rb b/lib/sisu/v3/harvest_topics.rb new file mode 100644 index 00000000..948965dd --- /dev/null +++ b/lib/sisu/v3/harvest_topics.rb @@ -0,0 +1,559 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + metadata harvest, extract topics and associated writings from document set + (topics use topic_register header) + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: simple xml representation (sax style) + +=end +module HARVEST_topics + require "#{SiSU_lib}/author_format" # author_format.rb + class Songsheet + def initialize(opt) + @opt=opt + @file_list=opt.files + @env=SiSU_Env::Info_env.new + end + def songsheet + files,idx_array=[],[] + @file_list.each do |f| + (f =~/.+?\.ss[tm]$/) \ + ? (files << f[/(.+?\.ss[tm])$/,1]) \ + : (print "not .sst or .ssm ? << #{f} >> ") + end + files.each do |filename| + file_array=[] + File.open(filename,'r') do |file| + file.each_line("\n\n") do |line| + if line =~/^@(?:title|creator|classify):(?:\s|$)/m + file_array << line + elsif line =~/^@\S+?:(?:\s|$)/m \ + or line =~/^(?:\s*\n|%+ )/ + else break + end + end + end + idx_array=HARVEST_topics::Harvest.new(@opt,file_array,filename,idx_array).extract_harvest + end + the_idx=HARVEST_topics::Index.new(idx_array,@@the_idx_topics).construct_book_topic_index + #HARVEST_topics::Output_index.new('',the_idx).screen_print.cycle if @opt.cmd.inspect =~/[VM]/ + HARVEST_topics::Output_index.new(@opt,the_idx).html_print.html_songsheet + puts "file://#{@env.path.output_md_harvest}/harvest_topics.html" + puts "file://#{@env.path.pwd}/harvest_topics.html" if @opt.cmd.inspect =~/M/ + end + end + class Harvest + def initialize(opt,data,filename,idx_array) + @opt,@data,@filename,@idx_array=opt,data,filename,idx_array + end + def extract_harvest + data,filename,idx_array=@data,@filename,@idx_array + @idx_lst,@title,@subtitle,@fulltitle,@author,@author_format=nil,nil,nil,nil,nil,nil + rgx={} + rgx[:author]=/^@creator:(?:[ ]+|.+?:author:[ ]+)(.+?)(?:\||\n)/m + rgx[:title]=/^@title:[ ]+(.+)/ + rgx[:subtitle]=/^@title:.+?:subtitle:[ ]+(.+?)\n/m + rgx[:idx]=/^@classify:.+?:topic_register:[ ]+(.+?)\n/m + data.each do |para| + if para=~ rgx[:idx] + @idx_list=rgx[:idx].match(para)[1] + end + if para=~ rgx[:title] + @title=rgx[:title].match(para)[1] + end + if para=~ rgx[:subtitle] + @subtitle=rgx[:subtitle].match(para)[1] + end + if para=~ rgx[:author] + @author_format=rgx[:author].match(para)[1] + end + break if @title and @subtitle and @author and @idx_lst + end + @fulltitle=@subtitle ? (@title + ' - ' + @subtitle) : @title + if @title \ + and @author_format \ + and @idx_list + creator=FORMAT::Author.new(@author_format.strip).author_details + @authors,@authorship=creator[:authors],creator[:authorship] + file=if filename=~/~[a-z]{2,3}\.ss[mt]$/ + lang='.' + /~([a-z]{2,3})\.ss[mt]$/.match(filename)[1] + filename.sub(/~[a-z]{2,3}\.ss[mt]$/,'') + else + lang='' + filename.sub(/\.ss[mt]$/,'') + end + page="sisu_manifest#{lang}.html" + idx_array <<=if @idx_list =~/;/ + g=@idx_list.scan(/[^;]+/) + idxl=[] + g.each do |i| + i.strip! + idxl << { :filename =>filename,:file =>file,:rough_idx =>i,:title =>@fulltitle,:author =>creator,:page =>page} + end + idxl + else { :filename =>filename,:file =>file,:rough_idx =>@idx_list,:title =>@fulltitle,:author =>creator,:page =>page} + end + else + p "missing required field in #{@filename} - [title]: <<#{@title}>>; [author]: <<#{@author_format}>>; [idx]: <<#{@idx_list}>>" if @opt.cmd.inspect =~/[VM]/ + end + idx_array.flatten! + idx_array + end + end + class Index + def initialize(idx_array,the_idx) + @idx_array,@the_idx=idx_array,the_idx + @@the_idx_topics=@the_idx + end + def capital(txt) + txt[0].chr.capitalize + txt[1,txt.length] + end + def contents(hash,idx) + names='' + idx[:author][:last_first_format_a].each do |n| + s=n.sub(/(.+?)(?:,.+|$)/,'\1').gsub(/\s+/,'_') + names += %{#{n}, } + end + hash << { :filename =>idx[:filename],:file =>idx[:file],:author =>names,:title =>idx[:title],:page =>idx[:page]} + end + def construct_book_topic_index + idx_array=@idx_array + idx_array.each do |idx| + @lv0,@lv1,@lv2,@lv3,@lv4={},{},{},{},{} + if idx[:rough_idx] + idx_lst=idx[:rough_idx].scan(/[^:]+/) + else + puts "no topic register in: << #{idx[:filename]} >>" + next + end + idx_lst_alt=[] + idx_lst.each {|lev| idx_lst_alt << lev.scan(/[^|]+/)} + depth = idx_lst_alt.length - 1 + range = 0..depth + range.each do |t| + if idx_lst_alt[t] + case t + when 0 + lev0=idx_lst_alt[t] + lev0.each do |lv0| + lv0=capital(lv0) + if @@the_idx_topics[lv0].class==NilClass + @@the_idx_topics[lv0]={:md => []} + end + @lv0=lv0 if lev0.length==1 + j=@@the_idx_topics[lv0][:md] + contents(j,idx) if idx_lst_alt.length - 1 == t + end + when 1 + lev1=idx_lst_alt[t] + lev1.each do |lv1| + lv1=capital(lv1) + if @@the_idx_topics[@lv0][lv1].class==NilClass + @@the_idx_topics[@lv0][lv1]={:md => []} + end + @lv1=lv1 if lev1.length==1 + j=@@the_idx_topics[@lv0][lv1][:md] + contents(j,idx) if idx_lst_alt.length - 1 == t + end + when 2 + lev2=idx_lst_alt[t] + lev2.each do |lv2| + lv2=capital(lv2) + if @@the_idx_topics[@lv0][@lv1][lv2].class==NilClass + @@the_idx_topics[@lv0][@lv1][lv2]={:md => []} + end + @lv2=lv2 if lev2.length==1 + j=@@the_idx_topics[@lv0][@lv1][lv2][:md] + contents(j,idx) if idx_lst_alt.length - 1 == t + end + when 3 + lev3=idx_lst_alt[t] + lev3.each do |lv3| + lv3=capital(lv3) + if @@the_idx_topics[@lv0][@lv1][@lv2][lv3].class==NilClass + @@the_idx_topics[@lv0][@lv1][@lv2][lv3]={:md => []} + end + @lv3=lv3 if lev3.length==1 + j=@@the_idx_topics[@lv0][@lv1][@lv2][lv3][:md] + contents(j,idx) if idx_lst_alt.length - 1 == t + end + when 4 + lev4=idx_lst_alt[t] + lev4.each do |lv4| + lv4=capital(lv4) + if @@the_idx_topics[@lv0][@lv1][@lv2][@lv3][lv4].class==NilClass + @@the_idx_topics[@lv0][@lv1][@lv2][@lv3][lv4]={:md => []} + end + @lv4=lv4 if lev4.length==1 + j=@@the_idx_topics[@lv0][@lv1][@lv2][@lv3][lv4][:md] + contents(j,idx) if idx_lst_alt.length - 1 == t + end + end + end + end + end + @the_idx + end + end + class Output_index + def initialize(opt,the_idx) + @opt,@the_idx=opt,the_idx + @env=SiSU_Env::Info_env.new + @rc=Get_init.instance.sisu_yaml.rc + @alph=%W[9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z] + @letter=@alph.shift + @vz=SiSU_Env::Get_init.instance.skin + end + def html_file_open + @output={} + @output[:html]=File.new("#{@env.path.output_md_harvest}/harvest_topics.html",'w') + if @opt.cmd.inspect =~/-M/ + @output[:html_mnt]=File.new("#{@env.path.pwd}/harvest_topics.html",'w') + end + end + def html_file_close + @output[:html].close + @output[:html_mnt].close if @output[:html_mnt].class==File + end + def html_print + def html_songsheet + html_file_open + html_head + html_alph + html_body + html_tail + html_file_close + end + def html_head_adjust(type='') + css_path=(type !~/maintenance/) \ + ? '../_sisu/css/harvest.css' \ + : 'harvest.css' + sv=SiSU_Env::Info_version.instance.get_version + < + + +SiSU Metadata Harvest - Topics + + + + + + + + + + + + +

    SiSU Metadata Harvest - Topics

    +

    [ HOME ] also see SiSU Metadata Harvest - Authors

    +

    #{@env.widget_static.search_form}

    +
    +WOK + end + def html_head + @output[:html_mnt] << html_head_adjust('maintenance') if @opt.cmd.inspect =~/M/ + @output[:html] << html_head_adjust + end + def html_alph + a=[] + a << '

    ' + @alph.each do |x| + a << (x =~/[0-9]/) \ + ? '' \ + : %{#{x}, } + end + @output[:html_mnt] << a if @opt.cmd.inspect =~/M/ + @output[:html] << a.join + end + def html_tail + a=[] + a <<< + + + + + + +#{@vz.credits_sisu} + + +WOK + @output[:html_mnt] << a if @output[:html_mnt].class==File + @output[:html] << a + end + def do_html(html) + @output[:html] << html + end + def do_html_maintenance(html) + @output[:html_mnt] << html if @output[:html_mnt].class==File + end + def do_string(attrib,string) + html=%{

    #{string}

    } + do_html(html) + do_html_maintenance(html) if @output[:html_mnt].class==File + end + def do_string_default(attrib,string) + html=%{

    #{string}

    } + do_html(html) + end + def do_string_maintenance(attrib,string) + html=%{

    #{string}

    } + do_html_maintenance(html) if @output[:html_mnt].class==File + end + def do_string_name(attrib,string) + f=/^(\S)/.match(string)[1] + if @letter < f + while @letter < f + if @alph.length > 0 + @letter=@alph.shift + if @output[:html_mnt].class==File + @output[:html_mnt] << %{\n

    #{@letter}

    } + end + @output[:html] << %{\n

    #{@letter}

    } + else break + end + end + end + name=string.strip.gsub(/\s+/,'_') + html=%{

    #{string}

    } + do_html(html) + do_html_maintenance(html) if @output[:html_mnt].class==File + end + def do_array(lv,array) + lv+=1 + array.each do |b| + do_case(lv,b) + end + end + def do_hash_md(attrib,hash) + html=%{#{hash[:title]} - #{hash[:author]}} + do_string_default(attrib,html) + end + def do_hash_md_maintenance(attrib,hash) + if @output[:html_mnt].class==File #should not be run for presentation output + html=%{[src]  #{hash[:title]} - #{hash[:author]}} + do_string_maintenance(attrib,html) + end + end + def do_hash(lv,hash) + lv+=1 + key=[] + hash.each_key do |m| + if m == :md + do_case(lv,hash[m]) + elsif m != :title and m != :author and m != :filename and m != :file and m != :rough_idx and m != :page + key << m + elsif m == :title + do_hash_md('work',hash) + do_hash_md_maintenance('work',hash) + end + end + if key.length > 0 + key.sort.each do |m| + attrib="lev#{lv}" + lv==0 ? do_string_name(attrib,m) : do_string(attrib,m) + do_case(lv,hash[m]) + end + end + end + def do_case(lv,a) + y = a.class + case + when y==String + attrib="lev#{lv}" + lv==0 ? do_string_name(attrib,a) : do_string(attrib,a) + when y==Array + do_array(lv,a) + when y==Hash + do_hash(lv,a) + end + end + def html_body + the_idx=@the_idx + the_idx.sort.each do |a| + do_case(-1,a) + end + end + self + end + def screen_print + def do_string(lv,string) + s=' '*4 + puts s*lv + string + end + def do_array(lv,array) + lv+=1 + array.each do |b| + do_case(lv,b) + end + end + def do_hash_md(lv,hash) + string=hash[:title] + ' - ' + hash[:author] + do_string(lv,string) + end + def do_hash(lv,hash) + lv+=1 + key=[] + hash.each_key do |m| + if m == :md + do_case(lv,hash[m]) + elsif m != :title and m != :author and m != :filename and m != :file and m != :rough_idx and m != :page + key << m + elsif m == :title + do_hash_md(lv,hash) + end + end + if key.length > 0 + key.sort.each do |m| + do_string(lv,m) + do_case(lv,hash[m]) + end + end + end + def do_case(lv,a) + s=' '*4 + y = a.class + case + when y==String + do_string(lv,a) + when y==Array + do_array(lv,a) + when y==Hash + do_hash(lv,a) + end + end + def cycle + the_idx=@the_idx + the_idx.each do |a| + do_case(-1,a) + end + end + self + end + def screen_print_unsorted + def do_string(lv,string) + s=' '*4 + puts s*lv + string + end + def do_array(lv,array) + lv+=1 + array.each do |b| + do_case(lv,b) + end + end + def do_hash_md(lv,hash) + string=hash[:title] + ' - ' + hash[:author] + do_string(lv,string) + end + def do_hash(lv,hash) + lv+=1 + hash.each_key do |m| + if m == :md + do_case(lv,hash[m]) + else + if m != :title and m != :author and m != :filename and m != :file and m != :rough_idx and m != :page + do_string(lv,m) + do_case(lv,hash[m]) + elsif m == :title + do_hash_md(lv,hash) + else + end + end + end + end + def do_case(lv,a) + s=' '*4 + y = a.class + case + when y==String + do_string(lv,a) + when y==Array + do_array(lv,a) + when y==Hash + do_hash(lv,a) + end + end + def cycle + the_idx=@the_idx + the_idx.each do |a| + do_case(-1,a) + end + end + self + end + end +end +__END__ +terms -|_ t{tl1} -|_ {fa}[fa]{filenames and other details} + | |_ {tl2} -|_ {fa}[fa]{filenames and other details} + | | |_{tl3} -|_ {fa}[fa]{filenames and other details} + | | | |_{tl4} - {fa}[fa]{filenames and other details} + | | | | + | | | |_{tl4a} - {fa}[fa]{filenames and other details} + | | | | + | | | |_{tl4b} - {fa}[fa]{filenames and other details} + | | | | + | | | |_ ... + | | | + | | |_{tl3a} - {fa}[fa]{filenames and other details} + | | + | |_{tl2a} - {fa}[fa]{filenames and other details} + | + |_ t{tl1a} -|_ {fa}[fa]{filenames and other details} + |_ ... diff --git a/lib/sisu/v3/help.rb b/lib/sisu/v3/help.rb new file mode 100644 index 00000000..b23d8066 --- /dev/null +++ b/lib/sisu/v3/help.rb @@ -0,0 +1,1924 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: interactive infomation/help + +=end +module SiSU_Help + require "#{SiSU_lib}/sysenv" # sysenv.rb + include SiSU_Screen + require "#{SiSU_lib}/param" # param.rb + class Help + def initialize(request='',color='') + @request,@color=request,color + if color =~/color_off/; @cX=SiSU_Screen::Ansi.new('k').cX + else @cX=SiSU_Screen::Ansi.new('yes').cX + end + fns='help_example_dummy_file_name.sst' + @env=SiSU_Env::Info_env.new(fns) + @db=SiSU_Env::Info_db.new + m=/.+\/(?:src\/)?(\S+)/im # m=/.+?\/(?:src\/)?([^\/]+)$/im # m=/.+\/(\S+)/m + @output_stub=Dir.pwd[m,1] + end + def help_request + begin + gotten=nil + regx=/^(list|com(?:mands)?|mod(?:ifiers)|markup|syntax|example(?:_v1|_v2)?|head(?:ers?)?|(?:heading|title|level|structure)s?|endnotes|footnotes|tables?|customise|skin|dir(?:ectories)?|paths?|lang(?:uage)?|modules|setup|conf(?:ig(?:ure)?)?|standards?|li[cs]en[sc]e|scratch|install|termsheet|dublin(?:core)?|dc|customise|styles?|appearance|theme|env(ironment)?|dir(?:ector(?:y|ies))?|metaverse|abstract|features|summary|(?:short)?cuts?|sisu|about|ext(?:ernal)?(?:_?prog(?:rams)?)?)|utf-?8|plaintext|html|xml|xhtml|epub|odf|odt|opendocument|css|pdf|latex|tex|(?:tex)?info|search|(?:hyper)?est(?:raier)?|searchform|cgi|sql|db|pg|postgresql|pg?sql|sqlite|convert|php|webrick|sitemaps?|ya?ml|ansi|colors|-[AabcDdEeFHhIMmNnopqrRSstUuVvwXxyZz0-9]|-[Ddcv]|-[CcFLSVvW]/ + help_info=%{#{@cX.blue_hi}SiSU help#{@cX.off} #{@cX.ruby}~#{@cX.off} #{@request}} + help_list=%{#{@cX.blue}sisu --help#{@cX.off} #{@cX.cyan}type keyword else "enter" to exit help:\n\tkeywords include:#{@cX.off} #{@cX.brown}list, (com)mands, short(cuts), (mod)ifiers, (env)ironment, markup, syntax, headers, headings, endnotes, tables, example, customise, skin, (dir)ectories, path, (lang)uage, db, install, setup, (conf)igure, convert, termsheet, search, sql, features, license#{@cX.off} \n} + help_prompt=%{#{@cX.fuschia}exit, [or carriage return to exit help] #{@cX.off}\n#{@cX.blue_hi}SiSU help#{@cX.off} #{@cX.ruby}~#{@cX.off} } + until gotten =~/exit|quit|bye|q|^\s*$/ \ + and ( @request.nil? or @request.empty? ) + @help=Help.new(@request,@color) + if @request + puts help_info + gotten=@request + @request=nil + end + case gotten + when /h((?:elp)| )|~/i + @help.summary + help_@request + when /list/; @help.summary + when /com(mands)?/; @help.commands + when /mod(ifiers)?/; @help.modifiers + when /markup|syntax/; @help.markup + when /example\b/; @help.example_v2 + when /example_v1/; @help.example_v1 + when /example(_v2)?/; @help.example_v2 + when /(?:heading|title|level)s?|structure/; @help.headings + when /head(ers?)?/; @help.headers + when /dublin(core)?|dc/; @help.dublin_core + when /(?:foot|end)notes/; @help.endnotes + when /tables?/; @help.tables + when /customise|skin/; @help.customise + when /modules/; @help.modules + when /env(ironment)?/; @help.environment + when /dir(ector(y|ies))?/; @help.directories + when /paths?/; @help.path + when /setup/; @help.setup + when /conf(?:ig(?:ure)?)?/; @help.configure + when /standards?/; @help.standards + when /lang(?:uage)?/; @help.languages + when /li[cs]en[sc]e/; @help.license + when /scratch/; @help.scratch + when /install/; @help.install + when /(?:--)?termsheet/; @help.termsheet + when /customise|styles?|appearance|theme/; @help.customise + when /metaverse/; @help.dal + when /(?:--)?plaintext|(?:--)?te?xt|-[aAeE]/; @help.plaintext + when /utf-?8/i; @help.utf8 + when /(?:--)?html|-[hH]/; @help.html + when /css/; @help.css + when /(?:--)?xhtml|-b/; @help.xhtml + when /(?:--)?xml|-[xX]/; @help.xml + when /(?:--)?odf|(?:--)?odt|opendocument|-o/; @help.odf + when /(?:--)?epub|-e/; @help.epub + when /php/; @help.php + when /(?:--)?pdf|-p/; @help.pdf + when /latex|tex/; @help.latex + when /(tex)?info/; @help.texinfo + when /lout/; @help.lout + when /concordance|index|-w/; @help.concordance + when /search\b/; @help.help_search + when /(?:hyper)?est(?:raier)?/; @help.hyperestraier + when /db|database|sql|postgresql|(?:--)?sqlite|(?:--)?pg|pg?sql|-[dD]/; @help.sql + when /searchform|cgi/; @help.cgi + when /convert/; @help.convert + when /(?:--)?webrick|-W/; @help.webrick + when /abstract|features|summary|about|sisu/; @help.abstract + when /ext(?:ernal)?(?:_?prog(?:rams)?)?/; @help.external_programs + when /ya?ml/; @help.yaml + when /sitemaps?/; @help.sitemap + when /(?:short)?cuts?/; @help.shortcuts + when /ansi|colors?/; SiSU_Screen::Ansi.new('c').colors + else @help.summary + end + print help_list + print help_prompt + gotten=nil + gotten=gets + end + rescue + #STDERR.puts Ansi.new($!, $@).rescue + # dies silently... for now, silence of use in connection with "sisu ~ commands" etc. + ensure + end + end + def summary + print < + ------------------------------------------ + #{@cX.green}~##{@cX.off} unnumbered paragraph (place marker at end of paragraph) + #{@cX.green}-##{@cX.off} unnumbered paragraph, delete when not required (place marker at end of paragraph) [used in dummy headings, eg. for segmented html] + ------------------------------------------ + manual page breaks (LaTeX/pdf) + #{@cX.green}<:pb>#{@cX.off} page break, which breaks a page, starting a new page in single column text and a new column in double column text + #{@cX.green}<:pn>#{@cX.off} page new, which starts a new page, in both single and double column text (leaving an empty column in double column text if necessary). + Note: page breaks are usually introduced to pdfs either as header instructions, indicating that pages should break at given levels + ------------------------------------------ + #{@cX.cyan}Composite documents#{@cX.off} + It is possible to build a document by creating a master document that requires other documents. The documents required may complete documents that could be generated independently, or they could be markup snippets, prepared so as to be easily available to be placed within another text. If the calling document is a master document (built mainly from other documents), it should be named with the suffix #{@cX.blue}.ssm#{@cX.off} Within this document you would provide information on the other documents that should be included within the text. These may be other documents that would be processed in a regular way, or markup bits prepared only for inclusion within a master document #{@cX.blue}.sst#{@cX.off} regular markup file, or #{@cX.blue}.ssi#{@cX.off} (insert/information) A secondary file of the composite document is built prior to processing with the same prefix and the suffix #{@cX.blue}.ssm.sst#{@cX.off} + + #{@cX.cyan}#basic sisu markup alternatives#{@cX.off} + #{@cX.green}{#{@cX.off}filename.ssi#{@cX.green}}require#{@cX.off} + #{@cX.green}<< {#{@cX.off}filename.ssi#{@cX.green}}#{@cX.off} + + #{@cX.cyan}#using textlink alternatives#{@cX.off} + #{@cX.green}|#{@cX.off}filename.ssi#{@cX.green}|@|^|require#{@cX.off} + #{@cX.green}<< |#{@cX.off}filename.ssi#{@cX.green}|@|^|#{@cX.off} + + #{@cX.cyan}#using thlnk alternatives#{@cX.off} + #{@cX.green}require#{@cX.off} + #{@cX.green}<< #{@cX.off} + + #{@cX.cyan}Composite documents - remote parts#{@cX.off} + Composite documents may be built from remote parts, by using the composite document syntax with a url. This makes sense using either sisu regular syntax (which is just a convenient way of marking up), or thlnk syntax, which also recognises remote urls, and permits hyperlinking ascii to the url location. + + #{@cX.cyan}Remote documents#{@cX.off} + SiSU will download and process remote locations if a url is provided instead of a filename. [this at present works only for sisu markup files without images] + + ------------------------------------------ + #{@cX.green}%#{@cX.off}#{@cX.off} add a comment to text, that will be removed prior to processing (place marker at beginning of line) + #{@cX.green}\\#{@cX.off}#{@cX.off} escape a sepcial character, whether general: { } < > or contextual special characters, (in combination with other characters) ~ - _ / % ^ and occasionally ! # + , + #{@cX.green}%%#{@cX.off}#{@cX.off} same as above but recognised by vim folds for placing fold in document text, in addition to headers and headings + ------------------------------------------ + + #{@cX.ruby}More HELP on Markup#{@cX.off} markup help is available on: + document wide instructions: headers (document structure) + general text markup: headings; endnotes; tables (which also includes a note on preformatted text) + configuration and customisation + document or site wide customisation: customise; skin +WOK + help_markup +# {../_sisu/image/tux.png http://www.jus.uio.no/sisu/ w=64 c=\"a better way\" }:image depreciated image eg +# old form + end + def example + help_markup + end + def example_v1 + print <. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + <#{@cX.blue}http://www.fsf.org/licenses/gpl.html#{@cX.off}> + <#{@cX.blue}http://www.gnu.org/licenses/gpl.html#{@cX.off}> + <#{@cX.blue}http://www.jus.uio.no/sisu/gpl.fsf#{@cX.off}> + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + <#{@cX.blue}http://www.jus.uio.no/sisu#{@cX.off}> + <#{@cX.blue}http://www.sisudoc.org#{@cX.off}> + + * Download: + <#{@cX.blue}http://www.jus.uio.no/sisu/SiSU/download.html#{@cX.off}> + + Ralph Amissah + <#{@cX.blue}ralph@amissah.com#{@cX.off}> + <#{@cX.blue}ralph.amissah@gmail.com#{@cX.off}> + +WOK + end + def standards + print <to bold to indent for superscript for subscript text + +_1 at the beginning of a line indents the paragraph + +_2 at the beginning of a line double indents the paragraph + +Others include + +Other things to note: + +By default paragraphs are automatically numbered... and is the same across all output formats +This makes citation a lot easier... regardless of the form of output that is being looked at +It also permits the building of various addons, like the concordance feature which identifies each word and the paragraphs in which the word appears with links to the paragraph... + +Urls are automatically turned to live links in the html and pdf files created... +WOK + end + end +end +__END__ + diff --git a/lib/sisu/v3/html.rb b/lib/sisu/v3/html.rb new file mode 100644 index 00000000..ebfd15bf --- /dev/null +++ b/lib/sisu/v3/html.rb @@ -0,0 +1,654 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: html generation, processing + +=end +module SiSU_HTML + require 'pstore' + require "#{SiSU_lib}/particulars" # particulars.rb + include SiSU_Particulars + require "#{SiSU_lib}/defaults" # defaults.rb + include SiSU_Viz + require "#{SiSU_lib}/html_table" # html_table.rb + require "#{SiSU_lib}/html_format" # html_format.rb + include SiSU_HTML_Format + require "#{SiSU_lib}/html_segments" # html_segments.rb + include SiSU_HTML_seg + require "#{SiSU_lib}/html_scroll" # html_scroll.rb + require "#{SiSU_lib}/html_promo" # html_promo.rb + include SiSU_HTML_promo + require "#{SiSU_lib}/html_tune" # html_tune.rb + include SiSU_HTML_Tune + class Source + def initialize(opt) + @opt=opt + @particulars=SiSU_Particulars::Combined_singleton.instance.get_all(opt) + end + def read + songsheet + end + def songsheet + begin + @md=@particulars.md + @fnb=@md.fnb + @env=@particulars.env + loc=@env.url.output_tell + unless @opt.cmd =~/q/ + tool=if @opt.cmd =~/z/; "#{@env.program.web_browser} #{loc}/#{@fnb}/#{@md.fn[:index]}" + elsif @opt.cmd =~/[MVv]/; "#{@env.program.web_browser} #{loc}/#{@fnb}/#{@md.fn[:index]}" + else @opt.fns + end + @opt.cmd=~/[MVvz]/ \ + ? SiSU_Screen::Ansi.new(@opt.cmd,'HTML',tool).green_hi_blue \ + : SiSU_Screen::Ansi.new(@opt.cmd,'HTML',tool).green_title_hi + SiSU_Screen::Ansi.new(@opt.cmd,@opt.fns,"#{@env.path.output_tell}/#{@fnb}/#{@md.fn[:index]}").flow if @opt.cmd =~/[MV]/ + end + SiSU_Env::Info_skin.new(@md).select + data=nil + @tuned_file_array=SiSU_HTML::Source::Html_environment.new(@particulars).tuned_file_instructions + data=@tuned_file_array + scr_endnotes=SiSU_HTML::Source::Endnotes.new(data,@md).scroll + toc=SiSU_HTML::Source::Toc.new(@md,data).songsheet + links_guide=SiSU_HTML::Source::Links_guide.new(data,@md).toc + data=@tuned_file_array + scr_toc=SiSU_HTML::Source::Scroll_head_and_segtoc.new(@md,toc,links_guide).in_common #watch + SiSU_HTML::Source::Seg.new(@md,data).songsheet + data=@tuned_file_array + scr=SiSU_HTML::Source::Scroll.new(@md,data,scr_endnotes).songsheet + scroll=SiSU_HTML::Source::Scroll_output.new(scr_toc,scr[:body],scr[:metadata],scr[:owner_details],scr[:tails],@md).publish + SiSU_HTML::Source::Output.new(scroll,@md).scroll + rescue; SiSU_Errors::Info_error.new($!,$@,@opt.cmd,@opt.fns).error + ensure + unless @opt.cmd =~/[MV]/ #check maintenance flag + texfiles=Dir["#{@env.path.tune}/#{@opt.fns}*"] + texfiles.each do |f| + if FileTest.file?(f) + File.unlink(f) + end + end + end + SiSU_Env::Clear.new(@opt.cmd,@opt.fns,@md).param_instantiate + @@flag,@@scr,@@seg,@@seg_endnotes,@@seg_subtoc,@@seg_ad={},{},{},{},{},{} + @@seg_total,@@tracker,@@loop_count,@@tablehead,@@number_of_cols=0,0,0,0,0 + @@seg_name,@@seg_name_html,@@seg_subtoc_array,@@seg_endnotes_array,@@segtocband,@@tablefoot=Array.new(7){[]} + @@filename_seg,@@seg_url,@@fn,@@to_lev4,@@get_hash_to,@@get_hash_fn='','','','','','','' + @@is4=@@is3=@@is2=@@is1=@@heading1=@@heading2=@@heading3=@@heading4=0 + end + end + private + class Html_environment + def initialize(particulars) + @particulars=particulars + @md,@env=particulars.md,particulars.env + @vz=SiSU_Env::Get_init.instance.skin + @env,@css,@symlnk=particulars.env,SiSU_Style::CSS.new,SiSU_Env::Create_system_link.new #home + end + def link_images + @symlnk.images + end + def directories + title=File.basename(@md.fns,'.rb') + end + def tuned_file_instructions + @tell=SiSU_Screen::Ansi.new(@md.cmd) + @md.cmd=@md.cmd.gsub(/H/,'h') + @md.file_type='html' if @md.cmd =~/[hon]/ + directories + newfilename=%{#{@env.path.output}/#{@md.fnb}/#{@md.fn[:index]}} if @md.file_type =~/html/ + dal_array=@particulars.dal_array # dal file drawn here + @tuned_file_array=SiSU_HTML_Tune::Tune.new(dal_array,@md).songsheet + @tuned_file_array + end + end + class Links_guide + @links_guide_toc=[] + def initialize(data,md) + @data,@md=data,md + @links_guide_=SiSU_Env::Create_site.new(@md.cmd).html_quick_ref? + end + def toc + @links_guide_toc=[] + if @links_guide_ + format_head_toc=SiSU_HTML_Format::Head_toc.new(@md) + guide_type='horzontal' #values: horizontal or vertical + @links_guide_toc << format_head_toc.links_guide_open(guide_type) + if defined? @md.lnk \ + and @md.lnk + @md.lnk.each do |l| + if defined? l[:say] + target=if l[:url] !~/^\.(\.)?\//; 'external' + else '_top' + end + s_lnk_url,s_lnk_lnk=l[:url],l[:say] + txt_obj={:lnk_url =>s_lnk_url,:lnk_txt =>s_lnk_lnk,:target =>target} + lev_dob_ocn=SiSU_HTML_Format::Format_toc.new(@md,txt_obj) + @links_guide_toc << lev_dob_ocn.links_guide if s_lnk_lnk + end + end + end + format_head_toc=SiSU_HTML_Format::Head_toc.new(@md) + @links_guide_toc << format_head_toc.links_guide_close #(guide_type) + @links_guide_toc + else '' + end + end + end + class Endnotes + include SiSU_HTML_Format + def initialize(data,md) + @data,@md=data,md + end + def scroll + @scr_endnotes=[] + format_head_scroll=SiSU_HTML_Format::Head_scroll.new(@md) + @data.each do |dob| + pg=dob.dup + unless pg.is =~/^code/ + if pg.obj =~/(?:#{Mx[:en_a_o]}|#{Mx[:en_b_o]})[\d*+]+ / + endnote_array=[] + if pg.obj=~/#{Mx[:en_a_o]}[\d*+].+?#{Mx[:en_a_c]}/m + endnote_array = pg.obj.scan(/#{Mx[:en_a_o]}[\d*+]+(.+?)#{Mx[:en_a_c]}/m) + end + if pg.obj=~/#{Mx[:en_b_o]}[\d*]+\s.+?#{Mx[:en_b_c]}/m + endnote_array = pg.obj.scan(/#{Mx[:en_b_o]}[\d*]+(.+?)#{Mx[:en_b_c]}/m) + end + if pg.obj=~/#{Mx[:en_b_o]}[\d+]+\s.+?#{Mx[:en_b_c]}/m + endnote_array = pg.obj.scan(/#{Mx[:en_b_o]}[\d+]+(.+?)#{Mx[:en_b_c]}/m) + end + endnote_array.flatten.each do |note| + txt_obj={:txt =>note} + format_scroll=SiSU_HTML_Format::Format_scroll.new(@md,txt_obj) + @scr_endnotes << format_scroll.endnote_body + end + end + end + end + @scr_endnotes + end + end + class Toc [],:seg_mini=>[],:scr=>[] } + @@seg_url='' + @@firstseg=nil + def initialize(md=nil,data='') + @data,@md=data,md + @vz=SiSU_Env::Get_init.instance.skin + @tell=SiSU_Screen::Ansi.new(@md.cmd) if @md + end + def songsheet #extracts toc for scroll & seg + SiSU_Screen::Ansi.new(@md.cmd,'Toc').txt_grey if @md.cmd =~/[MVv]/ + toc=nil + @@firstseg=nil + @@toc={ :seg=>[],:seg_mini=>[],:scr=>[] } + @data.each do |dob| + if dob.is=='heading' \ + or dob.is=='heading_insert' + dob_toc=dob.dup + toc=if dob_toc.is =='heading' \ + or dob.is=='heading_insert' + toc=case dob_toc.ln + when 1; Toc.new(@md,dob_toc).level_1 + when 2; Toc.new(@md,dob_toc).level_2 + when 3; Toc.new(@md,dob_toc).level_3 + when 4; Toc.new(@md,dob_toc).level_4 + when 5; Toc.new(@md,dob_toc).level_5 + when 6; Toc.new(@md,dob_toc).level_6 + else nil + end + end + toc.each do |k,d| + d.gsub!(/(?:#{Mx[:en_a_o]}.+?#{Mx[:en_a_c]}|#{Mx[:en_b_o]}.+?#{Mx[:en_b_c]})\s*/m,' ') + end if toc + if @@firstseg.nil? \ + and dob.ln==4 \ + and dob.name =~/\S+/ + @@firstseg=dob.name + end + if toc + begin + @@toc[:seg] << toc[:seg] if toc[:seg] + @@toc[:seg_mini] << toc[:seg_mini] if toc[:seg_mini] + @@toc[:scr] << toc[:scr] if toc[:scr] + rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error + end + end + end + end + @md.firstseg=@@firstseg + @@toc + end + def minitoc + minitoc=@@toc[:seg_mini].join("\n") + '
    ' + minitoc + '
    ' + end + protected + def rss #sort all wrong, disabled but kept + @@toc[:seg] <<< +
    +

    +(relatively static) RSS feeds for DOCUMENTS:
    +RSS feed http://www.jus.uio.no/lm/rssfeed/documents.xml
    +RSS feed http://www.jus.uio.no/lm/rssfeed/tradelaw.xml
    +RSS feed http://www.jus.uio.no/lm/rssfeed/environmental.xml
    +

    info@address.com
    +

    +
    +WOK + end +#not used --> + def level_endnotes + if @md.flag_endnotes + format_head_scroll=SiSU_HTML_Format::Head_scroll.new(@md) + @@toc[:scr] << format_head_scroll.toc_endnote + end + end + def level_concordance + format_head_toc=SiSU_HTML_Format::Head_toc.new(@md) + @@toc[:seg_mini] << format_head_toc.mini_seg_concordance + end + def level_metadata + format_head_toc=SiSU_HTML_Format::Head_toc.new(@md) + @@toc[:scr] << format_head_toc.metadata + @@toc[:seg] << format_head_toc.seg_metadata + @@toc[:seg_mini] << format_head_toc.mini_seg_metadata + end + def level_word_index + format_head_toc=SiSU_HTML_Format::Head_toc.new(@d0c) + @@toc[:scr] << format_head_toc.concordance + @@toc[:seg] << format_head_toc.concordance + @@toc[:seg_mini] << format_head_toc.mini_concordance + end +# <-- not used + def level_1 + dob=@data + linkname,link=dob.obj.strip,dob.ocn + if link \ + and link !~/#/ #% keep eye on link + p_num=SiSU_HTML_Format::Paragraph_number.new(@md,link) + end + title=if dob.obj !~/Metadata/; linkname + else + link='metadata' + %{#{linkname}} + end + toc={} + txt_obj={:txt =>title} + format_toc=SiSU_HTML_Format::Format_toc.new(@md,txt_obj) + toc[:seg]=if dob.name =~/^meta/ \ + and dob.obj =~/Document Information/ + format_toc.lev0 + else format_toc.lev1 + end + toc[:seg_mini]=if dob.name =~/^meta/ \ + and dob.obj =~/Document Information/ + x=if @md.concord_make + format_toc.mini_concord_tail + else format_toc.mini_tail + end + else format_toc.mini_lev1 + end + title=if dob.ocn ==0 + if dob.name =~/^meta/ \ + and dob.obj =~/Document Information/ + %{#{linkname}} + else linkname + end + else + @@toc[:scr] << '
    ' + link=if dob.ln; dob.ln + else '' + end + %{#{linkname}} + end + txt_obj={:txt =>title} + format_toc=SiSU_HTML_Format::Format_toc.new(@md,txt_obj) + toc[:scr]=if dob.name =~/^meta/ \ + and dob.obj =~/Document Information/ + format_toc.lev0 + else format_toc.lev1 + end + toc + end + def level_2 + dob=@data + linkname,ocn=dob.obj.strip,dob.ocn + p_num=if ocn \ + and ocn !~/#/ + SiSU_HTML_Format::Paragraph_number.new(@md,ocn) + else nil + end + txt_obj={:txt =>linkname} + format_toc=SiSU_HTML_Format::Format_toc.new(@md,txt_obj) + toc={} + toc[:seg]=format_toc.lev2 + toc[:seg_mini]=format_toc.mini_lev2 + if p_num + title=%{#{p_num.goto}#{linkname}} + txt_obj={:txt =>title} + format_toc=SiSU_HTML_Format::Format_toc.new(@md,txt_obj) + toc[:scr]=format_toc.lev2 + end + toc + end + def level_3 + dob=@data + linkname,ocn=dob.obj.strip,dob.ocn + p_num=if ocn \ + and ocn !~/#/ + SiSU_HTML_Format::Paragraph_number.new(@md,ocn) + else nil + end + txt_obj={:txt =>linkname} + format_toc=SiSU_HTML_Format::Format_toc.new(@md,txt_obj) + toc={} + toc[:seg]=format_toc.lev3 + toc[:seg_mini]=format_toc.mini_lev3 + if p_num + title=%{#{p_num.goto}#{linkname}} + txt_obj={:txt =>title} + format_toc=SiSU_HTML_Format::Format_toc.new(@md,txt_obj) + toc[:scr]=format_toc.lev3 + end + toc + end + def level_4 + dob=@data + linkname,ocn=dob.obj.strip,dob.ocn + p_num=SiSU_HTML_Format::Paragraph_number.new(@md,ocn) if ocn + if dob.ln ==4 + seg_link=%{ + #{dob.obj} + } + @@seg_url=dob.name + elsif dob.obj =~/\d+.\d+.\d+.\d+|\d+.\d+.\d+|\d+.\d+|\d+/ + seg_link=dob.obj.gsub(/^(\d+.\d+.\d+.\d+|\d+.\d+.\d+|\d+.\d+|\d+)(.*)/, + %{\\1 \\2 }) + end + p_num=SiSU_HTML_Format::Paragraph_number.new(@md,ocn) if ocn + @file=SiSU_Env::SiSU_file.new(@md) if @md + txt_obj=if seg_link=~/sisu_manifest\.html/ + man_link=if @file.by_language? \ + or @file.by_filetype? + seg_link.gsub(/sisu_manifest\.html/,"../../manifest/#{@file.base_filename.manifest}") + else seg_link + end + {:txt =>man_link} + else {:txt =>seg_link} + end + format_toc=SiSU_HTML_Format::Format_toc.new(@md,txt_obj) + toc={} + toc[:seg]=format_toc.lev4 + toc[:seg_mini]=format_toc.mini_lev4 + title=%{#{p_num.goto}#{linkname}} if p_num + txt_obj=if title=~/sisu_manifest.html/ + man_link=title.gsub(/sisu_manifest.html/,"../manifest/#{@file.base_filename.manifest}") + {:txt =>man_link} + else {:txt =>title} + end + format_toc=SiSU_HTML_Format::Format_toc.new(@md,txt_obj) + toc[:scr]=format_toc.lev4 + toc + #end + end + def level_5 + dob=@data + linkname,ocn=dob.obj.strip,dob.ocn + toc={} + if ocn \ + and ocn !~/#/ + p_num=SiSU_HTML_Format::Paragraph_number.new(@md,ocn) + lnk_n_txt=%{ + #{linkname} + } + txt_obj={:txt =>lnk_n_txt} + format_toc=SiSU_HTML_Format::Format_toc.new(@md,txt_obj) + toc[:seg]=format_toc.lev5 + toc[:seg_mini]=format_toc.mini_lev5 + title=%{#{p_num.goto}#{linkname}} + txt_obj={:txt =>title} + format_toc=SiSU_HTML_Format::Format_toc.new(@md,txt_obj) + toc[:scr]=format_toc.lev5 + end + toc + end + def level_6 + dob=@data + linkname,ocn=dob.obj.strip,dob.ocn + toc={} + if ocn \ + and ocn !~/#/ + p_num=SiSU_HTML_Format::Paragraph_number.new(@md,ocn) + lnk_n_txt=%{ + #{linkname} +} + txt_obj={:txt =>lnk_n_txt} + format_toc=SiSU_HTML_Format::Format_toc.new(@md,txt_obj) + toc[:seg]=format_toc.lev6 + toc[:seg_mini]=format_toc.mini_lev6 + title=%{#{p_num.goto}#{linkname}} + txt_obj={:txt =>title} + format_toc=SiSU_HTML_Format::Format_toc.new(@md,txt_obj) + toc[:scr]=format_toc.lev6 + end + toc + end + def level_crosslink + dob=@data + if dob !~/^4~!/ + dob.gsub!(/^4~!\s+(\S+)\s+(.+)/, + %{ +
    + #{@png.crosslink_ext} +   \\2 + <\/a> +
    +}) + else + dob.gsub!(/^4~!\s+(\S+)\s+(.+)/, + %{
    + + + #{@png.crosslink} +   \\2 + <\/a> +
    +}) + end + end + end + class Scroll_head_and_segtoc < Toc + def initialize(md='',toc='',links_guide_toc='') + @md,@toc,@links_guide_toc=md,toc,links_guide_toc + @vz=SiSU_Env::Get_init.instance.skin + end + def in_common + toc_shared=[] + @segtoc=[] + SiSU_Screen::Ansi.new(@md.cmd,'Scroll & Segtoc').txt_grey if @md.cmd =~/[MVv]/ + format_head_toc=SiSU_HTML_Format::Head_toc.new(@md) + dochead=format_head_toc.head + dochead.gsub!(/toc\.(html)/,'doc.\1') #kludge + ads=SiSU_HTML_promo::Ad.new(@md) + toc_shared << dochead << ads.div.major + @segtoc << format_head_toc.head << ads.div.major + toc_shared << format_head_toc.toc_head_escript if SiSU_HTML_Format::Head_toc.method_defined? :toc_head_escript + @segtoc << format_head_toc.toc_head_escript if SiSU_HTML_Format::Head_toc.method_defined? :toc_head_escript + toc_shared << format_head_toc.scroll_head_navigation_band + if defined? @md.rights.all + rights=format_head_toc.rights.all + rights=SiSU_HTML_Tune::Clean_html.new(rights).clean + end + if @md.prefix_b + prefix_b=format_head_toc.notes.prefix_b + prefix_b=SiSU_HTML_Tune::Clean_html.new(prefix_b).clean + end + @segtoc << format_head_toc.seg_head_navigation_band + toc_shared << format_head_toc.scroll_head_title_banner_open + @segtoc << format_head_toc.seg_head_title_banner_open + tmp_head=nil + doc_title_endnote=@md.title.full.gsub(/(\*+)/,'\1') + tmp_head=doc_title_endnote + "\n" + txt_obj={:txt =>tmp_head} + format_txt_obj=SiSU_HTML_Format::Format_text_object.new(@md,txt_obj) + toc_shared << format_txt_obj.center_bold + @segtoc << format_txt_obj.center_bold + if defined? @md.creator.author + creator_endnote=@md.creator.author.gsub(/(\*+)/,%{ \\1}) + tmp_head=creator_endnote + "\n" + txt_obj={:txt =>tmp_head} + format_txt_obj=SiSU_HTML_Format::Format_text_object.new(@md,txt_obj) + toc_shared << format_txt_obj.center_bold + @segtoc << format_txt_obj.center_bold + end + toc_shared << "#{@vz.table_close*1}\n" + @segtoc << "#{@vz.table_close*1}\n" + tmp_head=nil + if @md.prefix_a + tmp_head ||= %{

    #{@md.prefix_a}\n} + toc_shared << tmp_head.dup + @segtoc << tmp_head.dup + end + tmp_head=nil + toc_shared << @links_guide_toc + if defined? @md.rights.all #and ? @md.rights.all + toc_shared << rights + end + if defined? @md.prefix_b + toc_shared << prefix_b + end + #Table of Contents added/appended here + toc_shared << @toc[:scr] + @segtoc << @links_guide_toc + @segtoc << @toc[:seg] + if defined? @md.rights.all \ + and not @md.rights.all.empty? + @segtoc << rights + end + @segtoc << prefix_b if @md.prefix_b + #Segtoc tail added here + @segtoc << "

    \n" #bugfix sort later DEBUGNOW + @segtoc << @seg_toc_band_bottom + ads=SiSU_HTML_promo::Ad.new(@md) + @segtoc << format_head_toc.seg_navigation_tail << ads.div.close << ads.display << format_head_toc.html_close + @segtoc.flatten!.compact! + Output.new(@segtoc,@md).segtoc + @segtoc=[] + @toc[:scr],@toc[:seg]=[],[] + toc_shared + end + end + class Table < SiSU_HTML_table::Table_html + end + class Scroll < SiSU_HTML_scroll::Scroll + end + class Scroll_output + def initialize(scr_toc,scr_body,scr_metadata,scr_owner_details,scr_tails,md) + @scr_toc,@scr_body,@scr_metadata,@scr_owner_details,@scr_tails,@md=scr_toc,scr_body,scr_metadata,scr_owner_details,scr_tails,md + end + def publish + scroll=[] + hr='

    ' + scroll << @scr_toc << '
    ' << hr << @scr_body << @scr_endnotes << hr << @scr_owner_details << '
    ' << @scr_tails + scroll.flatten!.compact! + end + end + class Seg < SiSU_HTML_seg::Seg + end + class Output + def initialize(data='',md='') + @data,@md=data,md + @file=SiSU_Env::SiSU_file.new(@md) + end + def scroll + begin + @filename_html_scroll=@file.write_file.html_scroll + @data.each do |para| + para.strip! + para.gsub!(/<:.+?>/,'') + para.gsub!(Xx[:html_relative2],@file.path_rel_links.html_scroll_2) + para.gsub!(Xx[:html_relative1],@file.path_rel_links.html_scroll_1) + #para.gsub!(/#{Xx[:html_relative]}/,@file.path_rel_links.html_scroll) + para.gsub!(/#{Rx[:mx_fa_clean]}/,'') + unless para =~/\A\s*\Z/ + @filename_html_scroll.puts para,"\n" + end + end + rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error + end + end + def segtoc + begin + @filename_html_segtoc=@file.write_file.html_segtoc + @filename_html_index=@file.write_file.html_seg_index + @data.each do |para| + para.strip! + para.gsub!(//,'') + para.gsub!(Xx[:html_relative2],@file.path_rel_links.html_seg_2) + para.gsub!(Xx[:html_relative1],@file.path_rel_links.html_seg_1) + unless para =~/\A\s*\Z/ + @filename_html_segtoc.puts para,"\n" + @filename_html_index.puts para,"\n" + end + end + rescue; SiSU_Errors::Info_error.new($!,$@,@md.cmd,@md.fns).error + end + end + end + end +end +__END__ diff --git a/lib/sisu/v3/html_format.rb b/lib/sisu/v3/html_format.rb new file mode 100644 index 00000000..0a5e6e45 --- /dev/null +++ b/lib/sisu/v3/html_format.rb @@ -0,0 +1,1480 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2010, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Download: + + + * Ralph Amissah + + + + ** Description: html formating, css template + +=end +module SiSU_HTML_Format + include SiSU_Viz + class Paragraph_number + def initialize(md,ocn) + @md,@ocn=md,ocn.to_s + @ocn ||='' + vz=SiSU_Env::Get_init.instance.skin + @skin_no_ocn=if defined? vz.ocn_display_off \ + and vz.ocn_display_off==true + true + else false + end + end + def ocn_display + if @md.markup.inspect =~/no_ocn/ \ + or @md.mod.inspect =~/--no-ocn/ \ + or @skin_no_ocn + ocn_class='ocn_off' + @ocn.gsub(/^(\d+|)$/, + %{}) + elsif @ocn.to_i==0 + @ocn.gsub(/^(\d+|)$/, + %{}) + else + ocn_class='ocn' + @ocn.gsub(/^(\d+|)$/, + %{}) + end + end + def name + %{} + end + def id #w3c? "tidy" complains about numbers as identifiers ! annoying + %{id="o#{@ocn}"} + end + def goto + %{} + end + end + class Head_information + require "#{SiSU_lib}/shared_xml" # shared_xml.rb + include SiSU_Viz + attr_reader :md,:rdf,:vz + def initialize(md) + @md=md + # DublinCore 1 - title + @vz=SiSU_Env::Get_init.instance.skin + @css=SiSU_Env::CSS_stylesheet.new(md) + @seg_name_html=(SiSU_HTML::Source::Seg.new.seg_name_html || []) + @seg_name_html_tracker=(SiSU_HTML::Source::Seg.new.seg_name_html_tracker || []) + @toc,@index='toc','index' + @metalink='#metadata' + @tocband_scroll,@tocband_segtoc=nil,nil + end + def doc_types #used in toc & seg_nav_band + scroll=seg='' + wgt=Widget.new(@md) + %{ + + +
    + #{wgt.manifest} + #{wgt.search} +
    } + end + def rdf + SiSU_XML_tags::RDF.new(md) + end + def doc_type + %{ +\n} + end + def table_close + %{ +#{@vz.table_close}} + end + def buttons_home + %{ + #{@vz.banner_home_and_index_buttons} +} + end + def copyat + %{#{@vz.paragraph_font_tiny}copy @ +
    + #{@vz.txt_home} + } + end + def html_close #moved + %{ +} + end + end + class Widget < Head_information + def initialize(md) + super(md) + @md=md + @cf_defaults=SiSU_Env::Info_processing_flag.new + @env=SiSU_Env::Info_env.new(@md.fns) + @file=SiSU_Env::SiSU_file.new(@md) + end + def home + %{
    + + #{@vz.nav_txt_homepage} + + + #{text} + + + + #{text} + + + + #{@vz.nav_txt_manifest} + + + + #{@vz.nav_txt_pdf_portrait} + + + + #{@vz.nav_txt_pdf_landscape} + + + + #{@vz.nav_txt_plaintext} + + + + #{@vz.nav_txt_epub} + + + + #{@vz.nav_txt_odf} + + + + #{text} + +
    +#{pdf} +
    + #{@vz.banner_band} + +   +#{@vz.table_close} +

    } + end + def concordance_navigation_band(type='') + %{ + +
    + #{@vz.banner_band} + +   + #{@vz.png_nav_toc} +   +#{@vz.table_close} +

    } + end + def seg_head_navigation_band(type='') + firstseg=%{ + #{@vz.png_nav_nxt} + } if @md.firstseg =~/\S+/ + %{ + + + +} + %{
    + #{@vz.banner_band} + + #{doc_types} + +  #{firstseg}  +#{@vz.table_close} +

    } + end + def seg_head_navigation_band_bottom(type='') #retired 2.7.9 + if type=~/pdf/ + @tocband_segtoc=make_scroll_seg_pdf + end + firstseg=%{ + #{@vz.png_nav_nxt} + } if @md.firstseg =~/\S+/ + %{ + + + +} + %{
    +   + + #{@tocband_segtoc} + +  #{firstseg}  +#{@vz.table_close} +

    } + end + def manifest_link(text) +# @file=SiSU_Env::SiSU_file.new(@md) if @md + %{ + #{text} + } + end + def concordance_link(text) + if @md.concord_make + %{ + + #{text} + + } + else '' + end + end + def make_seg_scroll_pdf + wgt=Widget.new(@md) + scroll=%{

    + #{@vz.nav_txt_doc_link} +
    + + #{wgt.manifest} + #{wgt.search} +
    } + end + def make_scroll_seg_pdf + manifest=scroll=seg='' + wgt=Widget.new(@md) + seg=%{

    + #{@vz.nav_txt_toc_link} +
    + +
    + #{wgt.manifest} + #{wgt.search} +
    } + end + def make_concordance + manifest=scroll=seg='' + wgt=Widget.new(@md) + %{ + +
    + #{wgt.manifest} + #{wgt.search} +
    } + end + def head + rdf=SiSU_XML_tags::RDF.new(@md) + %{#{doc_type} + + + #{@md.html_title} + + +#{rdf.metatag_html} +#{@css.html}#{@css.html_seg} + +#{@vz.color_body} + + + +#{@vz.js_top}} + end + def concordance + if @md.concord_make + %{#{@vz.margin_css} +

    + + Concordance + +

    +#{@vz.table_close}} + else + %{#{@vz.margin_css} +#{@vz.table_close}} + end + end + def links_guide_vertical_open +# @file=SiSU_Env::SiSU_file.new(@md) if @md + url=((defined? @vz.url_hp) && @vz.url_hp =~/^https?:\/\/\S+$/ ? @vz.url_hp : @vz.url_home) + %{ +