From 3dcd083585b3f486ece3cfaa0780a6e2ec5b43fe Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 11 Apr 2020 20:51:27 -0400 Subject: help & manpages, start work --- .gitignore | 6 + COPYRIGHT | 14 +- README | 304 ++ doc/man/man1/spine.1 | 4088 ++++++++++++++++++ misc/util/d/cgi/search/README | 11 + misc/util/d/cgi/search/dub.sdl | 14 + .../d/cgi/search/src/spine_cgi_sqlite_search.d | 913 ++++ misc/util/rb/cgi/spine.search.cgi | 958 +++++ misc/util/rb/tex/dr_tex.rb | 70 + org/COPYRIGHT | 12 +- org/out_cgi_search_sqlite.org | 2 +- org/out_latex.org | 2 +- org/spine_build_scaffold.org | 8 + org/spine_doc.org | 4548 ++++++++++++++++++++ org/spine_info.org | 61 +- org/util_cgi_d_sqlite_search.org | 8 +- org/util_cgi_rb_fcgi_sqlite_search.org | 32 +- src/COPYRIGHT | 14 +- src/doc_reform/COPYRIGHT | 14 +- util/d/cgi/search/README | 11 - util/d/cgi/search/dub.sdl | 14 - util/d/cgi/search/dub.selections.json | 7 - .../cgi/search/localhostsqlitespine.search.sql.db | 0 util/d/cgi/search/src/spine_cgi_sqlite_search.d | 913 ---- util/rb/cgi/search.cgi | 937 ---- util/rb/cgi/search_ref.cgi | 937 ---- util/rb/cgi/sisu_7a_sqlite.cgi | 937 ---- util/rb/cgi/sisu_lng.cgi | 935 ---- util/rb/cgi/sisu_search_pg.cgi | 935 ---- util/rb/cgi/sisu_search_sqlite.cgi | 937 ---- util/rb/cgi/spine.search.cgi | 958 ----- util/rb/tex/dr_tex.rb | 70 - 32 files changed, 10995 insertions(+), 7675 deletions(-) create mode 100644 doc/man/man1/spine.1 create mode 100644 misc/util/d/cgi/search/README create mode 100644 misc/util/d/cgi/search/dub.sdl create mode 100644 misc/util/d/cgi/search/src/spine_cgi_sqlite_search.d create mode 100755 misc/util/rb/cgi/spine.search.cgi create mode 100755 misc/util/rb/tex/dr_tex.rb create mode 100644 org/spine_doc.org delete mode 100644 util/d/cgi/search/README delete mode 100644 util/d/cgi/search/dub.sdl delete mode 100644 util/d/cgi/search/dub.selections.json delete mode 100644 util/d/cgi/search/localhostsqlitespine.search.sql.db delete mode 100644 util/d/cgi/search/src/spine_cgi_sqlite_search.d delete mode 100755 util/rb/cgi/search.cgi delete mode 100755 util/rb/cgi/search_ref.cgi delete mode 100755 util/rb/cgi/sisu_7a_sqlite.cgi delete mode 100755 util/rb/cgi/sisu_lng.cgi delete mode 100755 util/rb/cgi/sisu_search_pg.cgi delete mode 100755 util/rb/cgi/sisu_search_sqlite.cgi delete mode 100755 util/rb/cgi/spine.search.cgi delete mode 100755 util/rb/tex/dr_tex.rb diff --git a/.gitignore b/.gitignore index 39e781a..d37450c 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,13 @@ !*.d !*.rb !conf.sdl +!doc +!doc/** +!man +!man/** !org +!misc +!misc/** !util !util/** !ext_lib diff --git a/COPYRIGHT b/COPYRIGHT index 8cba1e7..05e171f 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -1,4 +1,4 @@ -- Name: Spine, Doc Reform +- Name: spine - SiSU Spine, Doc Reform - Description: documents, structuring, processing, publishing, search - static content generator @@ -7,7 +7,7 @@ - Copyright: (C) 2015 - 2020 Ralph Amissah - - code under src/ + - code under src/ & org/ - License: AGPL 3 or later: Spine, Doc Reform (SiSU), a framework for document structuring, publishing and @@ -34,19 +34,15 @@ [http://www.gnu.org/licenses/agpl.html] - Spine, Doc Reform (related to SiSU) uses standard: - - docReform markup syntax + - docReform markup syntax (based on SiSU markup) - standard SiSU markup syntax with modified headers and minor modifications - - docReform object numbering - - standard SiSU object citation numbering & system + - docReform object numbering (based on SiSU object citation numbering) + - standard SiSU document object numbering - Hompages: - [http://www.doc_reform.org] [http://www.sisudoc.org] - - Git - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary] - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/html.rb;hb=HEAD] - Spine, Doc Reform (SiSU) markup samples Individual document content Copyright (Author) [as stated in document header] diff --git a/README b/README index d8ea96f..203ee70 100644 --- a/README +++ b/README @@ -19,3 +19,307 @@ project_name: Spine, Doc Reform "http://www.doc_reform.org", "http://www.sisudoc.org" ] + +# Installation, Compilation + +SiSU spine is written in the programming language D for which there are 3 compilers: + +- dmd +- ldc +- gdc + +D projects tend to use dub as project manager +https://code.dlang.org/packages/dub +https://code.dlang.org/packages/dub +https://github.com/dlang/dub/blob/master/source/dub/commandline.d + + dub --compiler=ldc2 -color --config=ldc -b release + + dub --compiler=dmd -color --config=dmd + + dub --compiler=gdc-10 -color --config=gdc -b release + + make ldc + + make dmd + +there has been some coalescence around the Meson build system +https://mesonbuild.com/ + + meson + + ninja -C build + + meson setup --wipe build && ninja -v -C build + + make meson + +dub --force --compiler=ldc2 && sudo cp -v cgi-bin/spine-search /usr/lib/cgi-bin/. + +# Configuration + +Configuration files are yaml files + +The following paths are searched: + + ~/.dr/config_local_site + ~/path_to_pod_root/.dr/config_local_site + +e.g. processing + + ~spineMarkupSamples/pod/* + +will search: + + ~spineMarkupSamples/pod/.dr/config_local_site + + ~/.dr/config_local_site + +to specify an alternative configuration file to use on the command line (in this +example named "my_config"): + + spine -v --html --config=~spineMarkupSamples/pod/.dr/my_config + +here is a sample configuration file: + +flag: + act0: "--html" + act1: "--html --epub" +output: + path: "/var/www/html" +default: + language: "en" + papersize: "a4" + text_wrap: "80" + digest: "sha256" +webserv: + http: "http" + domain: "localhost" + data_http: "http" + data_domain: "localhost" + data_root_url: "http://localhost" + data_root_path: "/var/www/html" + data_root_part: "" + images_root_part: "image" + cgi_title: "≅ SiSU Spine search" + cgi_http: "http" + cgi_domain: "localhost" + cgi_bin_url: "http://localhost/cgi-bin" + cgi_bin_part: "cgi-bin" + cgi_bin_path: "/usr/lib/cgi-bin" + cgi_search_script: "spine-search" + cgi_search_script_raw_fn_d: "spine_search.d" + cgi_port: "" + cgi_user: "" + cgi_action: "http://localhost/cgi-bin/spine-search" + db_sqlite: "spine.search.db" + db_pg_table: "" + db_pg_user: "" + +# Commands + +for a list of commands from the program type: + + spine -h + +at the time of writing this provides the following output: + + --abstraction document abstraction + --assert set optional assertions on + --cgi-search-form-codegen generates (pre-compiled) d code for search of specified db + --cgi-sqlite-search-filename =[filename] + --concordance file for document + --config =/path/to/config/file/including/filename + --dark alternative dark theme + --debug debug + --digest hash digest for each object + --epub process epub output + --harvest extract info on authors & topics from document header metadata + --harvest-authors extract info on authors from document header metadata + --harvest-topics extract info on topics from document header metadata + --hide-ocn object cite numbers + --html process html output + --html-link-harvest place links back to harvest in segmented html + --html-link-search html embedded search submission + --html-seg process html output + --html-scroll process html output + --lang =[lang code e.g. =en or =en,es] + --latex output for pdfs + --latex-color-links mono or color links for pdfs + --light default light theme + --manifest process manifest output + --ocn-off object cite numbers + --odf open document format text (--odt) + --odt open document format text + --output =/path/to/output/dir specify where to place output + --parallel parallelisation + --parallel-subprocesses nested parallelisation + --pdf latex output for pdfs + --pdf-color-links mono or color links for pdfs + --pod spine (doc reform) pod source content bundled +-q --quiet output to terminal + --section-backmatter document backmatter (default) + --section-biblio document biblio (default) + --section-blurb document blurb (default) + --section-body document body (default) + --section-bookindex document bookindex (default) + --section-endnotes document endnotes (default) + --section-glossary document glossary (default) + --section-toc table of contents (default) + --serial serial processing + --skip-output skip output + --show-config show config + --show-make show make + --show-metadata show metadata + --show-summary show summary + --source document markup source + --sqlite-discrete process discrete sqlite output + --sqlite-db-create create db, create tables + --sqlite-db-drop drop tables & db + --sqlite-db-recreate create db, create tables + --sqlite-delete sqlite output + --sqlite-db-filename =[filename].sql.db + --sqlite-insert sqlite output + --sqlite-update sqlite output + --text text output + --theme-dark alternative dark theme + --theme-light default light theme + --txt text output +-v --verbose output to terminal + --very-verbose output to terminal + --workon (reserved for some matters under development & testing) + --xhtml xhtml output +-h --help This help information. + +# Examples + +if configuartion has been set specify just +- the desired output and +- the markup document/pod(s) to process + + spine -v --html ~spineMarkupSamples/markup/pod/sisu-manual + +if configuartion has not been set or to overide the set configration specify +- the output path as well as +- the desired output and +- the markup document/pod(s) to process + +note: ~webDocRoot should be the path to web doc root, provide a suitable output path. + + spine -v --html --html-link-search --html-link-harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --html-link-harvest --epub --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --epub --latex --odt --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + +## harvest + +if you have a document collection with documents that have metadata headers a +summary of the collection can be made using the harvest command + + spine -v --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --harvest ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --html-link-harvest --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --html-link-harvest --harvest ~spineMarkupSamples/pod/* + +## sqlite + +### create db + +if there is no sqlite db you first need to create one, to do so +- the name of the db and +- the root path for document output +must be specified: + + spine -v \ + --sqlite-db-create --sqlite-db-filename="spine.search.db" \ + --output=/var/www/html \ + ~spineMarkupSamples/pod/* + + spine -v --sqlite-db-create --sqlite-db-filename="spine.search.db" --output=`echo ~webDocRoot` + +if you have a configration file providing this information that is to be used +for a document collection you can point to the document collection: + + spine -v --sqlite-db-create ~spineMarkupSamples/pod + +### populate db + +must specify: +- the name of the db and +- the root path for document output + + spine -v --sqlite-update \ + --sqlite-db-filename="spine.search.db" \ + --output=/var/www/html \ + ~spineMarkupSamples/pod/* + + spine -v --sqlite-update --sqlite-db-filename="spine.search.db" --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + +if you have a configration file providing this information that is to be used +for a document collection you can point to the document collection: + + spine -v --sqlite-update ~spineMarkupSamples/pod/* + +### generate a cgi search form in d + + spine -v --cgi-search-form-codegen \ + --output=/var/www/html \ + ~spineMarkupSamples/pod + + spine -v --cgi-search-form-codegen --config=~spineMarkupSamples/pod + + spine -v --cgi-search-form-codegen --config=~spineMarkupSamples/pod/.dr/config_local_site + + spine --cgi-search-form-codegen --output=`echo ~webDocRoot` ~spineMarkupSamples/pod + + spine --cgi-search-form-codegen --cgi-sqlite-search-filename="spine_search" --output=`echo ~webDocRoot` + + spine -v --cgi-search-form-codegen \ + --sqlite-db-filename="spine.search.db" \ + --cgi-sqlite-search-filename="spine-search" \ + --output=/var/www/html \ + ~spineMarkupSamples/pod + +#### compile the cgi search form + + cd /var/www/html/cgi # /var/www/html (default document root) + + cd ~webDocRoot/cgi + +the directory ~webDocRoot/cgi/src should contain two files +- spine_search.d (or whatever you named it) +- cgi.d (by Adam Rupee) + + dub --force --compiler=ldc2 && sudo cp -v cgi-bin/spine-search /usr/lib/cgi-bin/. + +should compile spine-search in ~webDocRoot/cgi/cgi-bin and copy it to the +cgi-bin directory + + spine -v --sqlite-db-create --sqlite-db-filename="spine.search.db" --cgi-sqlite-search-filename="spine-search" --output=`echo ~webDocRoot` + + spine -v --sqlite-db-create ~spineMarkupSamples/pod + + spine -v --html --html-link-search --cgi-sqlite-search-filename="spine-search" --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --cgi-sqlite-search-filename="spine-search" --html-link-harvest --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + +### create db & search form + + spine -v \ + --sqlite-db-create --sqlite-db-filename="spine.search.db" \ + --cgi-search-form-codegen --cgi-sqlite-search-filename="spine-search" \ + --output=/var/www/html \ + ~spineMarkupSamples/pod/* + +### html with links to search form + + spine -v --html \ + --html-link-search \ + --output=`echo ~webDocRoot` \ + ~spineMarkupSamples/pod/* diff --git a/doc/man/man1/spine.1 b/doc/man/man1/spine.1 new file mode 100644 index 0000000..255119a --- /dev/null +++ b/doc/man/man1/spine.1 @@ -0,0 +1,4088 @@ +.TH "spine" "1" "2020-04-05" "0.10.0" "Spine" +.br +.SH NAME +.br +sisu - documents: markup, structuring, publishing in multiple standard formats, and search +.br +.SH SYNOPSIS +.br +sisu [--options] [filename/wildcard] + +.br +sisu --txt --html --epub --odt --pdf --wordmap --sqlite --manpage --texinfo --sisupod --source --qrcode [filename/wildcard] + +.br +sisu --pg (--createdb|update [filename/wildcard]|--dropall) + +.SH SISU - MANUAL, +RALPH AMISSAH + +.SH WHAT IS SISU? + +.SH INTRODUCTION - WHAT IS SISU? + +.BR + +.B SiSU +is a lightweight markup based document creation and publishing framework that +is controlled from the command line. Prepare documents for +.B SiSU +using your text editor of choice, then use +.B SiSU +to generate various output document formats. + +.BR +From a single lightly prepared document (plain-text +.I UTF-8 +) sisu custom builds several standard output formats which share a common (text +object) numbering system for citation of content within a document (that also +has implications for search). The sisu engine works with an abstraction of the +document's structure and content from which it is possible to generate +different forms of representation of the document. +.B SiSU +produces: plain-text, +.I HTML, +.I XHTML, +.I XML, +.I EPUB, +.I ODF: +.I ODT +(Opendocument), +.I LaTeX, +.I PDF, +and populates an +.I SQL +database ( +.I PostgreSQL +or +.I SQLite +) with text objects, roughly, paragraph sized chunks so that document searches +are done at this level of granularity. + +.BR +Outputs share a common citation numbering system, associated with text objects +and any semantic meta-data provided about the document. + +.BR + +.B SiSU +also provides concordance files, document content certificates and manifests of +generated output. Book indexes may be made. + +.BR +Some document markup samples are provided in the package sisu -markup-samples. +Homepages: + +- + +- + +.SH COMMANDS SUMMARY + +.SH DESCRIPTION + +.BR + +.B SiSU +is a document publishing system, that from a simple single marked-up document, +produces multiple output formats including: +.I plaintext, +.I HTML, +.I XHTML, +.I XML, +.I EPUB, +.I ODT +( +.I OpenDocument +( +.I ODF +) text), +.I LaTeX, +.I PDF, +info, and +.I SQL +( +.I PostgreSQL +and +.I SQLite +) , which share text object numbers ("object citation numbering") and the same +document structure information. For more see: or + +.SH DOCUMENT PROCESSING COMMAND FLAGS + +.TP +.B --abstraction [path + filename] +run document abstraction +.TP +.B --act[s0-9] [path + filename] +--act0 to --act9 configurable shortcuts for multiple flags, -0 to -9 synonyms, +configure in sisurc.yml; sisu default action on a specified file where no flag +is provided is --act0; --act or --acts for information on current actions +ascribed to --act0 to --act9 +.TP +.B --asciidoc [path + filename] +asciidoc, smart text (not available) +.TP +.B --cgi-search-form-codegen + generate d code search form to search db specfied needs --output=[path] and +--sqlite-db-filename=[cgi search form name] or path to configuration file +--config=[full path to config file] +.TP +.B --cgi-sqlite-search-filename=[filename] +name to give cgi-search form, (it generates a [filename].d file that requires +subsequent compilation) also required is the name of the sqlite db to be +searched by the form. +.TP +.B --concordance [path + filename] +(not implemented) +.TP +.B --config=[path to config file + filename] +.TP +.B --dark + alternative theme for html and epub output, a light (default) theme is + also provided +.TP +.B --digest (not implemented) +.TP +.B --delete [path + filename] +see --zap +.TP +.B --digests [path + filename] +not implemented +.TP +.B --epub [path + filename] +produces an epub document +.TP +.B --harvest [path to files] +extract and present info on authors & topics from document header metadata. +makes two lists of sisu output based on the sisu markup documents in a +directory: list of author and authors works (year and titles), and; list by +topic with titles and author. Makes use of header metadata fields (author, +title, date, topic_register). +.TP +.B --harvest-authors [path to files] +extract and present info on authors from metadata in document headers +.TP +.B --harvest-topics [path to files] +extract and present info on topics from metadata in document headers +.TP +.B --hide-ocn +turn visibility of object numbers off +.TP +.B --html [path + filename] +produces html output in two forms (i) segmented text with table of contents +(toc.html and index.html) and (ii) the document in a single file (scroll.html). +.TP +.B --html-link-harvest +within html output creates link to the document set metadata harvest output +part of --html output instruction and assumes that --harvest has been or will + be run +.TP +.B --html-link-search +within html output creates a search form for submission, requires information +on the name of the search form --search part of --html output instruction it +assumes there is a cgi search form and related document database +.TP +.B --html-scroll [path + filename] +produces html output, the document in a single file (scroll.html) only. Compare +--html-seg and --html +.TP +.B --html-seg [path + filename] +produces html output, segmented text with table of contents (toc.html and +index.html). Compare --html-scroll and --html +.TP +.B --lang=[language code, e.g. =en or =en,es] +provide language code of document +.TP +.B --latex [path + filename] +.I LaTeX +output for different document sizes (a4, a5, b4, letter) and orientations +(portrait, landscape) for downstream (processing and) conversion to pdf, (used +with xetex no direct link between programs provided as this is a much slower +process) +.TP +.B --latex-color-links +monochrome or color links within pdf, toggle (mono better for printing), +the default is mono for portrait and color for landscape documents +.TP +.B --light theme +for html and epub output, default, a dark alternative is provided +.TP +.B --manifest [path + filename] +produces an html summary of output generated (hyperlinked to content) and +document specific metadata (sisu_manifest.html). This step is assumed for most +processing flags. +.TP +.B --markdown [path + filename] +markdown smart text (not available) +.TP +.B --no-* +negate a toggle +.TP +.B --ocn-off +object numbers off (the c in ocn is for citation). See --hide-ocn +.TP +.B --odf [path + filename] +see --odt +.TP +.B --odt [path + filename] +produce open document output +.TP +.B --output=[path to output directories] +where to place document output +.TP +.B --parallel +parallelization on (the default except for sqlite) +.TP +.B --parallel-subprocesses +nested parallelization on (the default except for sqlite) +.TP +.B --papersize-(a4|a5|b5|letter|legal) +in conjunction with --pdf set pdf papersize, overriding any configuration +settings, to set more than one papersize repeat the option --pdf --papersize-a4 +--papersize-letter. See also --papersize=* (NOT implemented) +.BR +.B --papersize=a4,a5,b5,letter,legal +in conjunction with --pdf set pdf papersize, overriding any configuration +settings, to set more than one papersize list after the equal sign with a comma +separator --papersize=a4,letter. See also --papersize-* (NOT implemented) +.TP +.B --pdf [path + filename] +produces +.I LaTeX +see --latex +.TP +.B --pdf-color-links +monochrome or color links within latex for pdf. See --latex-color-links +.TP +.B --pod +markup source bundled in a zip file. +Produces a zipped file of the prepared document specified along with associated +images This provides a quick way of gathering the relevant +parts of a sisu document which can then for example be emailed. A sisupod +includes sisu markup source file, (along with associated documents if a master +file, or available in multilingual versions), together with related images. +(it should be possible in future to run spine commands directly against a pod). +.TP +.B --qrcode [path + filename] +generate QR code image of metadata (used in manifest). (not implemented) +.TP +.B --quiet +quiet less output to terminal. +.TP +.B --section-* +provides finer grain control over which parts of the document are processed +to produce output, toc, body, endnotes, glossary, biblio, bookindex and blurb +.TP +.B --section-biblio +produce document bibliography output, toggle +.TP +.B --section-blurb +produce document blurb output, toggle +.TP +.B --section-body +produce document body output, toggle +.TP +.B --section-bookindex +produce document bookindex output, toggle +.TP +.B --section-endnotes +produce document endnotes output, toggle +.TP +.B --section-endnotes +produce document glossary output, toggle +.TP +.B --serial +serial processing --no-parallel +.TP +.B --show-config +show site and document configuration instructions. Requires path to +configuration file or path to documents to be processed. +.TP +.B --show-make +show document make instructions +.TP +.B --show-metadata +show document metadata +.TP +.B --show-summary +show document summary +.TP +.B --source [path + filename] +document markup source +.TP +.B --sha256 +set hash digest where used to sha256 (not implemented) +.TP +.B --sha512 +set hash digest where used to sha512 (not implemented) +.TP +.B --sqlite-discrete [path + filename] +create a per document sqlite db +.TP +.B --sqlite-db-create --sqlite-db-filename="[db filename]" --output="[output path]" +create a shared db and its tables. Requires a db filename, which may be set in the configuration file or on the command line as shown +.TP +.B --sqlite-db-drop [path + db filename] +drop (remove) db and its tables +.TP +.B --sqlite-db-recreate [path + filename] +drop and re-create a shared db and its tables. Requires a db filename, which may be set in the configuration file or on the command line with --sqlite-db-filename="[db name]" +.TP +.B --sqlite-db-filename="[db name]" +provide name of sqlite db, to be created, dropped, populated or for which a search form is to be made. This information may also be set in the configuration file. +.TP +.B --sqlite-delete [path + filename] +process sqlite output, remove file +.TP +.B --sqlite-insert [path + filename] +process sqlite output, insert file. See --sqlite-update +.TP +.B --sqlite-update [path + filename] +process sqlite output, update file +.TP +.B --source [filename/wildcard] +copies sisu markup file to output directory. Alias -s +.TP +.B --text [filename/wildcard] +produces +.I plaintext +output +(not implemented) +.TP +.B --theme-dark +See --dark +.TP +.B --theme-light +See --light +.TP +.B --txt [filename/wildcard] +produces +.I plaintext +output +(not implemented) +.TP +.B --txt-asciidoc [filename/wildcard] +see --asciidoc +(not implemented) +.TP +.B --txt-markdown [filename/wildcard] +see --markdown +(not implemented) +.TP +.B --txt-rst [filename/wildcard] +see --rst +(not implemented) +.TP +.B --txt-textile [filename/wildcard] +see --textile +(not implemented) +.TP +.B -v +on its own, provides +.B SiSU +version information +.TP +.B -v [filename/wildcard] +see --verbose +.TP +.B --verbose [filename/wildcard] +provides verbose output of what is being generated, where output is placed (and +error messages if any). Alias -v +.TP +.B --very-verbose [filename/wildcard] +provides more verbose output of what is being generated. See --verbose. Alias +-V +.TP +.B --version +spine version +(not implemented) +.TP +.B --xhtml +xhtml output +(not implemented) + +.SH COMMAND LINE MODIFIERS + +.TP +.B --no-ocn +[with --html --pdf or --epub] switches off +.I object citation numbering. +Produce output without identifying numbers in margins of html or +.I LaTeX +/pdf output. +.SH DATABASE COMMANDS + +.BR + +.B dbi - database interface + +.BR + +.B --pg or --pgsql +set for +.I PostgreSQL +.B --sqlite +default set for +.I SQLite +-d is modifiable with --db=[database type (PgSQL or +.I SQLite +) ] +.TP +.B --pg -v --createall +initial step, creates required relations (tables, indexes) in existing +.I PostgreSQL +database (a database should be created manually and given the same name as +working directory, as requested) (rb.dbi) [ -dv --createall +.I SQLite +equivalent] it may be necessary to run sisu -Dv --createdb initially NOTE: at +the present time for +.I PostgreSQL +it may be necessary to manually create the database. The command would be +'createdb [database name]' where database name would be SiSU_[present working +directory name (without path)]. Please use only alphanumerics and underscores. +.TP +.B --pg -v --import +[filename/wildcard] imports data specified to +.I PostgreSQL +db (rb.dbi) [ -dv --import +.I SQLite +equivalent] +.TP +.B --pg -v --update +[filename/wildcard] updates/imports specified data to +.I PostgreSQL +db (rb.dbi) [ -dv --update +.I SQLite +equivalent] +.TP +.B --pg --remove +[filename/wildcard] removes specified data to +.I PostgreSQL +db (rb.dbi) [ -d --remove +.I SQLite +equivalent] +.TP +.B --pg --dropall +kills data" and drops ( +.I PostgreSQL +or +.I SQLite +) db, tables & indexes [ -d --dropall +.I SQLite +equivalent] + +.BR +The -v is for verbose output. +.SH CONFIGURATION + +.BR + +default location: +.TP +~/.dr/config_local_site +.TP +.nf +flag: + act0: "--html" + act1: "--html --epub" +output: + path: "/var/www/html" +default: + language: "en" + papersize: "a4" + text_wrap: "80" + digest: "sha256" +webserv: + http: "http" + domain: "localhost" + data_http: "http" + data_domain: "localhost" + data_root_url: "http://localhost" + data_root_path: "/var/www/html" + data_root_part: "" + images_root_part: "image" + cgi_title: "≅ SiSU Spine search" + cgi_http: "http" + cgi_domain: "localhost" + cgi_bin_url: "http://localhost/cgi-bin" + cgi_bin_part: "cgi-bin" + cgi_bin_path: "/usr/lib/cgi-bin" + cgi_search_script: "spine-search" + cgi_search_script_raw_fn_d: "spine_search.d" + cgi_port: "" + cgi_user: "" + cgi_action: "http://localhost/cgi-bin/spine-search" + db_sqlite: "spine.search.db" + db_pg_table: "" + db_pg_user: "" +.fi + +.BR +.SH SAMPLE POD DIRECTORY STRUCTURE +.BR +.TP +.nf + +pod (directory may contain multiple documents) + └── the_wealth_of_networks.yochai_benkler + ├── conf + │   └── sisu_document_make + ├── media + │   ├── image + │   │   ├── won_benkler_2_1.png + │   │   ├── won_benkler_6_1.png + │   │   ├── won_benkler_7_1.png + │   │   ├── won_benkler_7_2.png + │   │   ├── won_benkler_7_3a.png + │   │   ├── won_benkler_7_3b.png + │   │   ├── won_benkler_7_4.png + │   │   ├── won_benkler_7_5.png + │   │   ├── won_benkler_7_6.png + │   │   └── won_benkler_9_1.png + │   └── text + │   └── en + │   └── the_wealth_of_networks.yochai_benkler.sst + └── pod.manifest + +.fi +.SH COMMAND LINE EXAMPLES + +.TP +note: ~webDocRoot should be the path to web doc root, provide a suitable output path. +.TP +spine -v --html --html-link-search --html-link-harvest --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* +.TP +spine -v --html --html-link-search --html-link-harvest --epub --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* +.TP +spine -v --sqlite-db-create --sqlite-db-filename="spine.search.db" --output=`echo ~webDocRoot` ~spineMarkupSamples/pod +.TP +spine -v --sqlite-db-create ~spineMarkupSamples/pod +.TP +spine -v --sqlite-update --sqlite-db-filename="spine.search.db" --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* +.TP +spine -v --sqlite-update ~spineMarkupSamples/pod/* +.TP +spine -v --show-config +.TP +spine -v --show-config --config= ~spineMarkupSamples/pod/.dr/config_local_site_test +.TP +spine -v --show-config --config=~spineMarkupSamples/pod/.dr +.TP +spine -v --cgi-search-form-codegen --config=~spineMarkupSamples/pod/.dr/config_local +.TP +cd ~webDocRoot/cgi +.TP +dub --force --compiler=ldc2 && sudo cp -v cgi-bin/spine-search /usr/lib/cgi-bin/. +.TP + +.BR +Running sisu (alone without any flags, filenames or wildcards) brings up the +interactive help, as does any sisu command that is not recognised. Enter to +escape. +.SH HELP + +.SH SISU MANUAL + + +.BR +The most up to date information on sisu should be contained in the sisu_manual, +available at: + +.BR + + +.BR +The manual can be generated from source, found respectively, either within the +.B SiSU +tarball or installed locally at: + +.BR + ./data/doc/sisu/markup-samples/sisu_manual + +.BR + /usr/share/doc/sisu/markup-samples/sisu_manual + +.BR +move to the respective directory and type e.g.: + +.BR + sisu sisu_manual.ssm +.SH SISU MAN PAGES + + +.BR +If +.B SiSU +is installed on your system usual man commands should be available, try: + +.BR + man sisu + +.BR +Most +.B SiSU +man pages are generated directly from sisu documents that are used to prepare +the sisu manual, the sources files for which are located within the +.B SiSU +tarball at: + +.BR + ./data/doc/sisu/markup-samples/sisu_manual + +.BR +Once installed, directory equivalent to: + +.BR + /usr/share/doc/sisu/markup-samples/sisu_manual + +.BR +Available man pages are converted back to html using man2html: + +.BR + /usr/share/doc/sisu/html/ + +.BR + ./data/doc/sisu/html + +.BR +An online version of the sisu man page is available here: + +.BR + +- various sisu man pages [^1] + +.BR +- sisu.1 [^2] +.SH SISU BUILT-IN INTERACTIVE HELP, [DISCONTINUED] + + +.BR +This fell out of date and has been discontinued. +.SH INTRODUCTION TO SISU MARKUP[^3] + +.SH SUMMARY + +.BR + +.B SiSU +source documents are +.I plaintext +( +.I UTF-8 +)[^4] files + +.BR +All paragraphs are separated by an empty line. + +.BR +Markup is comprised of: + +.BR +- at the top of a document, the document header made up of semantic meta-data +about the document and if desired additional processing instructions (such an +instruction to automatically number headings from a particular level down) + +.BR +- followed by the prepared substantive text of which the most important single +characteristic is the markup of different heading levels, which define the +primary outline of the document structure. Markup of substantive text includes: + +.BR + * heading levels defines document structure + +.BR + * text basic attributes, italics, bold etc. + +.BR + * grouped text (objects), which are to be treated differently, such as code + blocks or poems. + +.BR + * footnotes/endnotes + +.BR + * linked text and images + +.BR + * paragraph actions, such as indent, bulleted, numbered-lists, etc. +.SH MARKUP RULES, DOCUMENT STRUCTURE AND METADATA REQUIREMENTS + + +.BR +minimal content/structure requirement: + +.BR +[metadata] +.nf +A~ (level A [title]) + +1~ (at least one level 1 [segment/(chapter)]) +.fi + + +.BR +structure rules (document heirarchy, heading levels): + +.BR +there are two sets of heading levels ABCD (title & parts if any) and 123 +(segment & subsegments if any) + +.BR +sisu has the fllowing levels: +.nf +A~ [title] . + required (== 1) followed by B~ or 1~ +B~ [part] * + followed by C~ or 1~ +C~ [subpart] * + followed by D~ or 1~ +D~ [subsubpart] * + followed by 1~ +1~ [segment (chapter)] + + required (>= 1) followed by text or 2~ +text * + followed by more text or 1~, 2~ + or relevant part *() +2~ [subsegment] * + followed by text or 3~ +text * + followed by more text or 1~, 2~ or 3~ + or relevant part, see *() +3~ [subsubsegment] * + followed by text +text * + followed by more text or 1~, 2~ or 3~ or relevant part, see *() + +*(B~ if none other used; + if C~ is last used: C~ or B~; + if D~ is used: D~, C~ or B~) +.fi + +.nf +- level A~ is the tile and is mandatory +- there can only be one level A~ + +- heading levels BCD, are optional and there may be several of each + (where all three are used corresponding to e.g. Book Part Section) + * sublevels that are used must follow each other sequentially + (alphabetically), +- heading levels A~ B~ C~ D~ are followed by other heading levels rather + than substantive text + which may be the subsequent sequential (alphabetic) heading part level + or a heading (segment) level 1~ +- there must be at least one heading (segment) level 1~ + (the level on which the text is segmented, in a book would correspond + to the Chapter level) +- additional heading levels 1~ 2~ 3~ are optional and there may be several + of each +- heading levels 1~ 2~ 3~ are followed by text (which may be followed by + the same heading level) + and/or the next lower numeric heading level (followed by text) + or indeed return to the relevant part level + (as a corollary to the rules above substantive text/ content + must be preceded by a level 1~ (2~ or 3~) heading) +.fi + +.SH MARKUP EXAMPLES + +.SH ONLINE + + +.BR +Online markup examples are available together with the respective outputs +produced from or from + + +.BR +There is of course this document, which provides a cursory overview of sisu +markup and the respective output produced: + + +.BR +an alternative presentation of markup syntax: +/usr/share/doc/sisu/on_markup.txt.gz +.SH INSTALLED + + +.BR +With +.B SiSU +installed sample skins may be found in: /usr/share/doc/sisu/markup-samples (or +equivalent directory) and if sisu -markup-samples is installed also under: +/usr/share/doc/sisu/markup-samples-non-free + +.SH MARKUP OF HEADERS + +.BR +Headers contain either: semantic meta-data about a document, which can be used +by any output module of the program, or; processing instructions. + +.BR +Note: the first line of a document may include information on the markup +version used in the form of a comment. Comments are a percentage mark at the +start of a paragraph (and as the first character in a line of text) followed by +a space and the comment: +.nf +% this would be a comment +.fi + +.SH SAMPLE HEADER + + +.BR +This current document is loaded by a master document that has a header similar +to this one: +.nf +% SiSU master 4.0 + +title: SiSU + subtitle: Manual + +creator: + author: Amissah, Ralph + +publisher: [publisher name] + +rights: Copyright (C) Ralph Amissah 2007, part of SiSU documentation, License GPL 3 + +classify: + topic_register: SiSU:manual;electronic documents:SiSU:manual + subject: ebook, epublishing, electronic book, electronic publishing, + electronic document, electronic citation, data structure, + citation systems, search + +% used_by: manual + +date: + published: 2008-05-22 + created: 2002-08-28 + issued: 2002-08-28 + available: 2002-08-28 + modified: 2010-03-03 + +make: + num_top: 1 + breaks: new=C; break=1 + bold: /Gnu|Debian|Ruby|SiSU/ + home_button_text: {SiSU}http://sisudoc.org; {git}http://git.sisudoc.org + footer: {SiSU}http://sisudoc.org; {git}http://git.sisudoc.org + manpage: name=sisu - documents: markup, structuring, publishing in multiple standard formats, and search; + synopsis=sisu [-abcDdeFhIiMmNnopqRrSsTtUuVvwXxYyZz0-9] [filename/wildcard ] + . sisu [-Ddcv] [instruction] + . sisu [-CcFLSVvW] + +@links: + { SiSU Homepage }http://www.sisudoc.org/ + { SiSU Manual }http://www.sisudoc.org/sisu/sisu_manual/ + { Book Samples & Markup Examples }http://www.jus.uio.no/sisu/SiSU/examples.html + { SiSU Download }http://www.jus.uio.no/sisu/SiSU/download.html + { SiSU Changelog }http://www.jus.uio.no/sisu/SiSU/changelog.html + { SiSU Git repo }http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary + { SiSU List Archives }http://lists.sisudoc.org/pipermail/sisu/ + { SiSU @ Debian }http://packages.qa.debian.org/s/sisu.html + { SiSU Project @ Debian }http://qa.debian.org/developer.php?login=sisu@lists.sisudoc.org + { SiSU @ Wikipedia }http://en.wikipedia.org/wiki/SiSU +.fi + +.SH AVAILABLE HEADERS + + +.BR +Header tags appear at the beginning of a document and provide meta information +on the document (such as the +.I Dublin Core +) , or information as to how the document as a whole is to be processed. All +header instructions take the form @headername: or on the next line and indented +by once space :subheadername: All +.I Dublin Core +meta tags are available + +.BR + +.B @identifier: +information or instructions + +.BR +where the "identifier" is a tag recognised by the program, and the +"information" or "instructions" belong to the tag/identifier specified + +.BR +Note: a header where used should only be used once; all headers apart from +@title: are optional; the @structure: header is used to describe document +structure, and can be useful to know. + +.BR +This is a sample header +.nf +% SiSU 2.0 [declared file-type identifier with markup version] +.fi + +.nf +@title: [title text] [this header is the only one that is mandatory] + subtitle: [subtitle if any] + language: English +.fi + +.nf +creator: + author: [Lastname, First names] + illustrator: [Lastname, First names] + translator: [Lastname, First names] + prepared_by: [Lastname, First names] +.fi + +.nf +date: + published: [year or yyyy-mm-dd] + created: [year or yyyy-mm-dd] + issued: [year or yyyy-mm-dd] + available: [year or yyyy-mm-dd] + modified: [year or yyyy-mm-dd] + valid: [year or yyyy-mm-dd] + added_to_site: [year or yyyy-mm-dd] + translated: [year or yyyy-mm-dd] +.fi + +.nf +rights: + copyright: Copyright (C) [Year and Holder] + license: [Use License granted] + text: [Year and Holder] + translation: [Name, Year] + illustrations: [Name, Year] +.fi + +.nf +classify: + topic_register: SiSU:markup sample:book;book:novel:fantasy + type: + subject: + description: + keywords: + abstract: + loc: [Library of Congress classification] + dewey: [Dewey classification +.fi + +.nf +identify: + :isbn: [ISBN] + :oclc: +.fi + +.nf +links: { SiSU }http://www.sisudoc.org + { FSF }http://www.fsf.org +.fi + +.nf +make: + num_top: 1 + headings: [text to match for each level + (e.g. PART; Chapter; Section; Article; or another: none; BOOK|FIRST|SECOND; none; CHAPTER;) + breaks: new=:C; break=1 + promo: sisu, ruby, sisu_search_libre, open_society + bold: [regular expression of words/phrases to be made bold] + italics: [regular expression of words/phrases to italicise] + home_button_text: {SiSU}http://sisudoc.org; {git}http://git.sisudoc.org + footer: {SiSU}http://sisudoc.org; {git}http://git.sisudoc.org +.fi + +.nf +original: + language: [language] +.fi + +.nf +notes: + comment: + prefix: [prefix is placed just after table of contents] +.fi + +.SH MARKUP OF SUBSTANTIVE TEXT + +.SH HEADING LEVELS + + +.BR +Heading levels are :A~ ,:B~ ,:C~ ,1~ ,2~ ,3~ ... :A - :C being part / section +headings, followed by other heading levels, and 1 -6 being headings followed by +substantive text or sub-headings. :A~ usually the title :A~? conditional level +1 heading (used where a stand-alone document may be imported into another) + +.BR + +.B :A~ [heading text] +Top level heading [this usually has similar content to the title @title: ] +NOTE: the heading levels described here are in 0.38 notation, see heading + +.BR + +.B :B~ [heading text] +Second level heading [this is a heading level divider] + +.BR + +.B :C~ [heading text] +Third level heading [this is a heading level divider] + +.BR + +.B 1~ [heading text] +Top level heading preceding substantive text of document or sub-heading 2, the +heading level that would normally be marked 1. or 2. or 3. etc. in a document, +and the level on which sisu by default would break html output into named +segments, names are provided automatically if none are given (a number), +otherwise takes the form 1~my_filename_for_this_segment + +.BR + +.B 2~ [heading text] +Second level heading preceding substantive text of document or sub-heading 3 , +the heading level that would normally be marked 1.1 or 1.2 or 1.3 or 2.1 etc. +in a document. + +.BR + +.B 3~ [heading text] +Third level heading preceding substantive text of document, that would normally +be marked 1.1.1 or 1.1.2 or 1.2.1 or 2.1.1 etc. in a document +.nf +1~filename level 1 heading, + +% the primary division such as Chapter that is followed by substantive text, and may be further subdivided (this is the level on which by default html segments are made) +.fi + +.SH FONT ATTRIBUTES + +.BR + +.B markup example: +.nf +normal text, *{emphasis}*, !{bold text}!, /{italics}/, _{underscore}_, "{citation}", +^{superscript}^, ,{subscript},, +{inserted text}+, -{strikethrough}-, #{monospace}# + +normal text + +*{emphasis}* [note: can be configured to be represented by bold, italics or underscore] + +!{bold text}! + +/{italics}/ + +_{underscore}_ + +"{citation}" + +^{superscript}^ + +,{subscript}, + ++{inserted text}+ + +-{strikethrough}- + +#{monospace}# +.fi + + +.BR + +.B resulting output: + +.BR +normal text, +.B emphasis, +.B bold text +, +.I italics, +.I underscore, +"citation", ^superscript^, [subscript], ++inserted text++, --strikethrough--, +monospace + +.BR +normal text + +.BR + +.B emphasis +[note: can be configured to be represented by bold, italics or underscore] + +.BR + +.B bold text + +.BR + +.I italics + +.BR +.I underscore + +.BR +"citation" + +.BR +^superscript^ + +.BR +[subscript] + +.BR +++inserted text++ + +.BR +--strikethrough-- + +.BR +monospace +.SH INDENTATION AND BULLETS + + +.BR + +.B markup example: +.nf +ordinary paragraph + +_1 indent paragraph one step + +_2 indent paragraph two steps + +_9 indent paragraph nine steps +.fi + + +.BR + +.B resulting output: + +.BR +ordinary paragraph + +.BR + indent paragraph one step + +.BR + indent paragraph two steps + +.BR + indent paragraph nine steps + +.BR + +.B markup example: +.nf +_* bullet text + +_1* bullet text, first indent + +_2* bullet text, two step indent +.fi + + +.BR + +.B resulting output: + +.BR +- bullet text + +.BR + * bullet text, first indent + +.BR + * bullet text, two step indent + +.BR +Numbered List (not to be confused with headings/titles, (document structure)) + +.BR + +.B markup example: +.nf +# numbered list numbered list 1., 2., 3, etc. + +_# numbered list numbered list indented a., b., c., d., etc. +.fi + +.SH HANGING INDENTS + + +.BR + +.B markup example: +.nf +_0_1 first line no indent, +rest of paragraph indented one step + +_1_0 first line indented, +rest of paragraph no indent + +in each case level may be 0-9 +.fi + + +.BR + +.B resulting output: + +.BR +first line no indent, rest of paragraph indented one step; first line no + indent, rest of paragraph indented one step; first line no indent, rest of + paragraph indented one step; first line no indent, rest of paragraph indented + one step; first line no indent, rest of paragraph indented one step; first + line no indent, rest of paragraph indented one step; first line no indent, + rest of paragraph indented one step; first line no indent, rest of paragraph + indented one step; first line no indent, rest of paragraph indented one step; + +.BR +A regular paragraph. + +.BR +first line indented, rest of paragraph no indent first line indented, rest of +paragraph no indent first line indented, rest of paragraph no indent first line +indented, rest of paragraph no indent first line indented, rest of paragraph no +indent first line indented, rest of paragraph no indent first line indented, +rest of paragraph no indent first line indented, rest of paragraph no indent +first line indented, rest of paragraph no indent first line indented, rest of +paragraph no indent first line indented, rest of paragraph no indent + +.BR +in each case level may be 0-9 + +.BR + +.B live-build + A collection of scripts used to build customized +.B Debian + Livesystems. + .I live-build + was formerly known as live-helper, and even earlier known as live-package. + +.BR + +.B live-build + + A collection of scripts used to build customized +.B Debian + Livesystems. +.I live-build + was formerly known as live-helper, and even earlier known as live-package. +.SH FOOTNOTES / ENDNOTES + + +.BR +Footnotes and endnotes are marked up at the location where they would be +indicated within a text. They are automatically numbered. The output type +determines whether footnotes or endnotes will be produced + +.BR + +.B markup example: +.nf +~{ a footnote or endnote }~ +.fi + + +.BR + +.B resulting output: + +.BR +[^5] + +.BR + +.B markup example: +.nf +normal text~{ self contained endnote marker & endnote in one }~ continues +.fi + + +.BR + +.B resulting output: + +.BR +normal text[^6] continues + +.BR + +.B markup example: +.nf +normal text ~{* unnumbered asterisk footnote/endnote, insert multiple asterisks if required }~ continues + +normal text ~{** another unnumbered asterisk footnote/endnote }~ continues +.fi + + +.BR + +.B resulting output: + +.BR +normal text [^*] continues + +.BR +normal text [^**] continues + +.BR + +.B markup example: +.nf +normal text ~[* editors notes, numbered asterisk footnote/endnote series ]~ continues + +normal text ~[+ editors notes, numbered plus symbol footnote/endnote series ]~ continues +.fi + + +.BR + +.B resulting output: + +.BR +normal text [^*3] continues + +.BR +normal text [^+2] continues + +.BR + +.B Alternative endnote pair notation for footnotes/endnotes: +.nf +% note the endnote marker "~^" + +normal text~^ continues + +^~ endnote text following the paragraph in which the marker occurs +.fi + + +.BR +the standard and pair notation cannot be mixed in the same document +.SH LINKS + +.SH NAKED URLS WITHIN TEXT, DEALING WITH URLS + + +.BR +urls found within text are marked up automatically. A url within text is +automatically hyperlinked to itself and by default decorated with angled +braces, unless they are contained within a code block (in which case they are +passed as normal text), or escaped by a preceding underscore (in which case the +decoration is omitted). + +.BR + +.B markup example: +.nf +normal text http://www.sisudoc.org/ continues +.fi + + +.BR + +.B resulting output: + +.BR +normal text continues + +.BR +An escaped url without decoration + +.BR + +.B markup example: +.nf +normal text _http://www.sisudoc.org/ continues + +deb _http://www.jus.uio.no/sisu/archive unstable main non-free +.fi + + +.BR + +.B resulting output: + +.BR +normal text <_http://www.sisudoc.org/> continues + +.BR +deb <_http://www.jus.uio.no/sisu/archive> unstable main non-free + +.BR +where a code block is used there is neither decoration nor hyperlinking, code +blocks are discussed later in this document + +.BR + +.B resulting output: +.nf +deb http://www.jus.uio.no/sisu/archive unstable main non-free +deb-src http://www.jus.uio.no/sisu/archive unstable main non-free +.fi + +.SH LINKING TEXT + + +.BR +To link text or an image to a url the markup is as follows + +.BR + +.B markup example: +.nf +about { SiSU }http://url.org markup +.fi + + +.BR + +.B resulting output: + +.BR +aboutSiSU markup + +.BR +A shortcut notation is available so the url link may also be provided +automatically as a footnote + +.BR + +.B markup example: +.nf +about {~^ SiSU }http://url.org markup +.fi + + +.BR + +.B resulting output: + +.BR +aboutSiSU [^7] markup + +.BR +Internal document links to a tagged location, including an ocn + +.BR + +.B markup example: +.nf +about { text links }#link_text +.fi + + +.BR + +.B resulting output: + +.BR +about ⌠text links⌡⌈link_text⌋ + +.BR +Shared document collection link + +.BR + +.B markup example: +.nf +about { SiSU book markup examples }:SiSU/examples.html +.fi + + +.BR + +.B resulting output: + +.BR +about ⌠ +.B SiSU +book markup examples⌡⌈:SiSU/examples.html⌋ +.SH LINKING IMAGES + + +.BR + +.B markup example: +.nf +{ tux.png 64x80 }image + +% various url linked images + +{tux.png 64x80 "a better way" }http://www.sisudoc.org/ + +{GnuDebianLinuxRubyBetterWay.png 100x101 "Way Better - with Gnu/Linux, Debian and Ruby" }http://www.sisudoc.org/ + +{~^ ruby_logo.png "Ruby" }http://www.ruby-lang.org/en/ +.fi + + +.BR + +.B resulting output: + +.BR +[ tux.png ] + +.BR +tux.png 64x80 "Gnu/Linux - a better way" + +.BR +GnuDebianLinuxRubyBetterWay.png 100x101 "Way Better - with Gnu/Linux, Debian +and Ruby" + +.BR +ruby_logo.png 70x90 "Ruby" [^8] + +.BR + +.B linked url footnote shortcut +.nf +{~^ [text to link] }http://url.org + +% maps to: { [text to link] }http://url.org ~{ http://url.org }~ + +% which produces hyper-linked text within a document/paragraph, with an endnote providing the url for the text location used in the hyperlink +.fi + +.nf +text marker *~name +.fi + + +.BR +note at a heading level the same is automatically achieved by providing names +to headings 1, 2 and 3 i.e. 2~[name] and 3~[name] or in the case of +auto-heading numbering, without further intervention. +.SH LINK SHORTCUT FOR MULTIPLE VERSIONS OF A SISU DOCUMENT IN THE SAME DIRECTORY +TREE + + +.BR + +.B markup example: +.nf +!_ /{"Viral Spiral"}/, David Bollier + +{ "Viral Spiral", David Bollier [3sS]}viral_spiral.david_bollier.sst +.fi + + +.BR + +.B +.I "Viral Spiral", +David Bollier +"Viral Spiral", David Bollier + document manifest + ⌠html, segmented text⌡「http://corundum/sisu_manual/en/html/viral_spiral.david_bollier.html」 + ⌠html, scroll, document in one⌡「http://corundum/sisu_manual/en/html/viral_spiral.david_bollier.html」 + ⌠epub⌡「http://corundum/sisu_manual/en/epub/viral_spiral.david_bollier.epub」 + ⌠pdf, landscape⌡「http://corundum/sisu_manual/en/pdf/viral_spiral.david_bollier.pdf」 + ⌠pdf, portrait⌡「http://corundum/sisu_manual/en/pdf/viral_spiral.david_bollier.pdf」 + ⌠odf: odt, open document text⌡「http://corundum/sisu_manual/en/odt/viral_spiral.david_bollier.odt」 + ⌠xhtml scroll⌡「http://corundum/sisu_manual/en/xhtml/viral_spiral.david_bollier.xhtml」 + ⌠xml, sax⌡「http://corundum/sisu_manual/en/xml/viral_spiral.david_bollier.xml」 + ⌠xml, dom⌡「http://corundum/sisu_manual/en/xml/viral_spiral.david_bollier.xml」 + ⌠concordance⌡「http://corundum/sisu_manual/en/html/viral_spiral.david_bollier.html」 + ⌠dcc, document content certificate (digests)⌡「http://corundum/sisu_manual/en/digest/viral_spiral.david_bollier.txt」 + ⌠markup source text⌡「http://corundum/sisu_manual/en/src/viral_spiral.david_bollier.sst」 + ⌠markup source (zipped) pod⌡「http://corundum/sisu_manual/en/pod/viral_spiral.david_bollier.sst.zip」 + +.SH GROUPED TEXT / BLOCKED TEXT + + +.BR +There are two markup syntaxes for blocked text, using curly braces or using +tics +.SH BLOCKED TEXT CURLY BRACE SYNTAX + + +.BR +at the start of a line on its own use name of block type with an opening curly +brace, follow with the content of the block, and close with a closing curly +brace and the name of the block type, e.g. +.nf +code{ + +this is a code block + +}code +.fi + +.nf + +poem{ + +this here is a poem + +}poem +.fi + +.SH BLOCKED TEXT TIC SYNTAX + +.nf +``` code +this is a code block + +``` + +``` poem +this here is a poem + +``` +.fi + + +.BR +start a line with three backtics, a space followed by the name of the name of +block type, follow with the content of the block, and close with three back +ticks on a line of their own, e.g. +.SH TABLES + + +.BR +Tables may be prepared in two either of two forms + +.BR + +.B markup example: +.nf +table{ c3; 40; 30; 30; + +This is a table +this would become column two of row one +column three of row one is here + +And here begins another row +column two of row two +column three of row two, and so on + +}table +.fi + + +.BR + +.B resulting output: +This is a table|this would become column two of row one|column three of row one is here』And here begins another row|column two of row two|column three of row two, and so on』 + + +.BR +a second form may be easier to work with in cases where there is not much +information in each column + +.BR + +.B markup example: +[^9] +.nf +!_ Table 3.1: Contributors to Wikipedia, January 2001 - June 2005 + +{table~h 24; 12; 12; 12; 12; 12; 12;} + |Jan. 2001|Jan. 2002|Jan. 2003|Jan. 2004|July 2004|June 2006 +Contributors* | 10| 472| 2,188| 9,653| 25,011| 48,721 +Active contributors** | 9| 212| 846| 3,228| 8,442| 16,945 +Very active contributors*** | 0| 31| 190| 692| 1,639| 3,016 +No. of English language articles| 25| 16,000| 101,000| 190,000| 320,000| 630,000 +No. of articles, all languages | 25| 19,000| 138,000| 490,000| 862,000|1,600,000 + +- Contributed at least ten times; ** at least 5 times in last month; *** more than 100 times in last month. +.fi + + +.BR + +.B resulting output: + +.BR + +.B Table 3.1: Contributors to Wikipedia, January 2001 - June 2005 +|Jan. 2001|Jan. 2002|Jan. 2003|Jan. 2004|July 2004|June 2006』Contributors*|10|472|2,188|9,653|25,011|48,721』Active contributors**|9|212|846|3,228|8,442|16,945』Very active contributors***|0|31|190|692|1,639|3,016』No. of English language articles|25|16,000|101,000|190,000|320,000|630,000』No. of articles, all languages|25|19,000|138,000|490,000|862,000|1,600,000』 + + +.BR +- Contributed at least ten times; ** at least 5 times in last month; *** more +than 100 times in last month. +.SH POEM + + +.BR + +.B basic markup: +.nf +poem{ + + Your poem here + +}poem + +Each verse in a poem is given an object number. +.fi + + +.BR + +.B markup example: +.nf +poem{ + + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' + +}poem +.fi + + +.BR + +.B resulting output: + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' + + +.SH GROUP + + +.BR + +.B basic markup: +.nf +group{ + + Your grouped text here + +}group + +A group is treated as an object and given a single object number. +.fi + + +.BR + +.B markup example: +.nf +group{ + + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' + +}group +.fi + + +.BR + +.B resulting output: + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' + + +.SH CODE + + +.BR +Code tags code{ ... }code (used as with other group tags described above) are +used to escape regular sisu markup, and have been used extensively within this +document to provide examples of +.B SiSU +markup. You cannot however use code tags to escape code tags. They are however +used in the same way as group or poem tags. + +.BR +A code-block is treated as an object and given a single object number. [an +option to number each line of code may be considered at some later time] + +.BR + +.B use of code tags instead of poem compared, resulting output: +.nf + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' +.fi + + +.BR +From +.B SiSU +2.7.7 on you can number codeblocks by placing a hash after the opening code tag +code{# as demonstrated here: +.nf +1 | `Fury said to a +2 | mouse, That he +3 | met in the +4 | house, +5 | "Let us +6 | both go to +7 | law: I will +8 | prosecute +9 | YOU. --Come, +10 | I'll take no +11 | denial; We +12 | must have a +13 | trial: For +14 | really this +15 | morning I've +16 | nothing +17 | to do." +18 | Said the +19 | mouse to the +20 | cur, "Such +21 | a trial, +22 | dear Sir, +23 | With +24 | no jury +25 | or judge, +26 | would be +27 | wasting +28 | our +29 | breath." +30 | "I'll be +31 | judge, I'll +32 | be jury," +33 | Said +34 | cunning +35 | old Fury: +36 | "I'll +37 | try the +38 | whole +39 | cause, +40 | and +41 | condemn +42 | you +43 | to +44 | death."' +.fi + +.SH ADDITIONAL BREAKS - LINEBREAKS WITHIN OBJECTS, COLUMN AND PAGE-BREAKS + +.SH LINE-BREAKS + + +.BR +To break a line within a "paragraph object", two backslashes \e\e +with a space before and a space or newline after them +may be used. +.nf +To break a line within a "paragraph object", +two backslashes \e\e with a space before +and a space or newline after them \e\e +may be used. +.fi + + +.BR +The html break br enclosed in angle brackets (though undocumented) is available +in versions prior to 3.0.13 and 2.9.7 (it remains available for the time being, +but is depreciated). + +.BR +To draw a dividing line dividing paragraphs, see the section on page breaks. +.SH PAGE BREAKS + + +.BR +Page breaks are only relevant and honored in some output formats. A page break +or a new page may be inserted manually using the following markup on a line on +its own: + +.BR +page new =\e= breaks the page, starts a new page. + +.BR +page break -\- breaks a column, starts a new column, if using columns, else +breaks the page, starts a new page. + +.BR +page break line across page -..- draws a dividing line, dividing paragraphs + +.BR +page break: +.nf +-\e\e- +.fi + + +.BR +page (break) new: +.nf +=\e\e= +.fi + + +.BR +page (break) line across page (dividing paragraphs): +.nf +-..- +.fi + +.SH BIBLIOGRAPHY / REFERENCES + + +.BR +There are three ways to prepare a bibliography using sisu (which are mutually +exclusive): (i) manually preparing and marking up as regular text in sisu a +list of references, this is treated as a regular document segment (and placed +before endnotes if any); (ii) preparing a bibliography, marking a heading level +1~!biblio (note the exclamation mark) and preparing a bibliography using +various metadata tags including for author: title: year: a list of which is +provided below, or; (iii) as an assistance in preparing a bibliography, marking +a heading level 1~!biblio and tagging citations within footnotes for inclusion, +identifying citations and having a parser attempt to extract them and build a +bibliography of the citations provided. + +.BR +For the heading/section sequence: endnotes, bibliography then book index to +occur, the name biblio or bibliography must be given to the bibliography +section, like so: +.nf +1~!biblio~ [Note: heading marker::required title missing] +.fi + +.SH A MARKUP TAGGED METADATA BIBLIOGRAPHY SECTION + + +.BR +Here instead of writing your full citations directly in footnotes, each time +you have new material to cite, you add it to your bibliography section (if it +has not been added yet) providing the information you need against an available +list of tags (provided below). + +.BR +The required tags are au: ti: and year: [^10] an short quick example might be +as follows: +.nf +1~!biblio~ [Note: heading marker::required title missing] + +au: von Hippel, E. +ti: Perspective: User Toolkits for Innovation +lng: (language) +jo: Journal of Product Innovation Management +vo: 18 +ed: (editor) +yr: 2001 +note: +sn: Hippel, /{User Toolkits}/ (2001) +id: vHippel_2001 +% form: + +au: Benkler, Yochai +ti: The Wealth of Networks +st: How Social Production Transforms Markets and Freedom +lng: (language) +pb: Harvard University Press +edn: (edition) +yr: 2006 +pl: U.S. +url: http://cyber.law.harvard.edu/wealth_of_networks/Main_Page +note: +sn: Benkler, /{Wealth of Networks}/ (2006) +id: Benkler2006 + +au: Quixote, Don; Panza, Sancho +ti: Taming Windmills, Keeping True +jo: Imaginary Journal +yr: 1605 +url: https://en.wikipedia.org/wiki/Don_Quixote +note: made up to provide an example of author markup for an article with two authors +sn: Quixote & Panza, /{Taming Windmills}/ (1605) +id: quixote1605 +.fi + + +.BR +Note that the section name !biblio (or !bibliography) is required for the +bibliography to be treated specially as such, and placed after the +auto-generated endnote section. + +.BR +Using this method, work goes into preparing the bibliography, the tags author +or editor, year and title are required and will be used to sort the +bibliography that is placed under the Bibliography section + +.BR +The metadata tags may include shortname (sn:) and id, if provided, which are +used for substitution within text. Every time the given id is found within the +text it will be replaced by the given short title of the work (it is for this +reason the short title has sisu markup to italicize the title), it should work +with any page numbers to be added, the short title should be one that can +easily be used to look up the full description in the bibliography. +.nf +The following footnote~{ quixote1605, pp 1000 - 1001, also Benkler2006 p 1. }~ +.fi + + +.BR +would be presented as: + +.BR +Quixote and Panza, +.I Taming Windmills +(1605), pp 1000 - 1001 also, Benkler, +.I Wealth of Networks, +(2006) p 1 or rather[^11] +.nf +au: author Surname, FirstNames (if multiple semi-colon separator) + (required unless editor to be used instead) +ti: title (required) +st: subtitle +jo: journal +vo: volume +ed: editor (required if author not provided) +tr: translator +src: source (generic field where others are not appropriate) +in: in (like src) +pl: place/location (state, country) +pb: publisher +edn: edition +yr: year (yyyy or yyyy-mm or yyyy-mm-dd) (required) +pg: pages +url: http://url +note: note +id: create_short_identifier e.g. authorSurnameYear + (used in substitutions: when found within text will be + replaced by the short name provided) +sn: short name e.g. Author, /{short title}/, Year + (used in substitutions: when an id is found within text + the short name will be used to replace it) +.fi + +.SH TAGGING CITATIONS FOR INCLUSION IN THE BIBLIOGRAPHY + + +.BR +Here whenever you make a citation that you wish be included in the +bibliography, you tag the citation as such using special delimiters (which are +subsequently removed from the final text produced by sisu) + +.BR +Here you would write something like the following, either in regular text or a +footnote +.nf +See .: Quixote, Don; Panza, Sancho /{Taming Windmills, Keeping True}/ (1605) :. +.fi + + +.BR + +.B SiSU +will parse for a number of patterns within the delimiters to try make out the +authors, title, date etc. and from that create a Bibliography. This is more +limited than the previously described method of preparing a tagged +bibliography, and using an id within text to identify the work, which also +lends itself to greater consistency. +.SH GLOSSARY + + +.BR +Using the section name 1~!glossary results in the Glossary being treated +specially as such, and placed after the auto-generated endnote section (before +the bibliography/list of references if there is one). + +.BR +The Glossary is ordinary text marked up in a manner deemed suitable for that +purpose. e.g. with the term in bold, possibly with a hanging indent. +.nf +1~!glossary~ [Note: heading marker::required title missing] + +_0_1 *{GPL}* An abbreviation that stands for "General Purpose License." ... + +_0_1 [provide your list of terms and definitions] +.fi + + +.BR +In the given example the first line is not indented subsequent lines are by one +level, and the term to be defined is in bold text. +.SH BOOK INDEX + + +.BR +To make an index append to paragraph the book index term relates to it, using +an equal sign and curly braces. + +.BR +Currently two levels are provided, a main term and if needed a sub-term. +Sub-terms are separated from the main term by a colon. +.nf + Paragraph containing main term and sub-term. + ={Main term:sub-term} +.fi + + +.BR +The index syntax starts on a new line, but there should not be an empty line +between paragraph and index markup. + +.BR +The structure of the resulting index would be: +.nf + Main term, 1 + sub-term, 1 +.fi + + +.BR +Several terms may relate to a paragraph, they are separated by a semicolon. If +the term refers to more than one paragraph, indicate the number of paragraphs. +.nf + Paragraph containing main term, second term and sub-term. + ={first term; second term: sub-term} +.fi + + +.BR +The structure of the resulting index would be: +.nf + First term, 1, + Second term, 1, + sub-term, 1 +.fi + + +.BR +If multiple sub-terms appear under one paragraph, they are separated under the +main term heading from each other by a pipe symbol. +.nf + Paragraph containing main term, second term and sub-term. + ={Main term: + sub-term+2|second sub-term; + Another term + } + + A paragraph that continues discussion of the first sub-term +.fi + + +.BR +The plus one in the example provided indicates the first sub-term spans one +additional paragraph. The logical structure of the resulting index would be: +.nf + Main term, 1, + sub-term, 1-3, + second sub-term, 1, + Another term, 1 +.fi + +.SH COMPOSITE DOCUMENTS MARKUP + + +.BR +It is possible to build a document by creating a master document that requires +other documents. The documents required may be complete documents that could be +generated independently, or they could be markup snippets, prepared so as to be +easily available to be placed within another text. If the calling document is a +master document (built from other documents), it should be named with the +suffix +.B .ssm +Within this document you would provide information on the other documents that +should be included within the text. These may be other documents that would be +processed in a regular way, or markup bits prepared only for inclusion within a +master document +.B .sst +regular markup file, or +.B .ssi +(insert/information) A secondary file of the composite document is built prior +to processing with the same prefix and the suffix +.B ._sst + +.BR +basic markup for importing a document into a master document +.nf +<< filename1.sst + +<< filename2.ssi +.fi + + +.BR +The form described above should be relied on. Within the +.I Vim +editor it results in the text thus linked becoming hyperlinked to the document +it is calling in which is convenient for editing. +.SH SUBSTITUTIONS + + +.BR + +.B markup example: +.nf +The current Debian is ${debian_stable} the next debian will be ${debian_testing} + +Configure substitution in _sisu/sisu_document_make + +make: + substitute: /${debian_stable}/,'*{Wheezy}*' /${debian_testing}/,'*{Jessie}*' +.fi + + +.BR + +.B resulting output: + +.BR +The current +.B Debian +is +.B Jessie +the next debian will be +.B Stretch + +.BR +Configure substitution in _sisu/sisu_document_make +.SH SISU FILETYPES + + +.BR + +.B SiSU +has +.I plaintext +and binary filetypes, and can process either type of document. +.SH .SST .SSM .SSI MARKED UP PLAIN TEXT + +.TP +.B SiSU +documents are prepared as plain-text (utf-8) files with +.B SiSU +markup. They may make reference to and contain images (for example), which are +stored in the directory beneath them _sisu/image. 〔b¤SiSU +.I plaintext +markup files are of three types that may be distinguished by the file extension +used: regular text .sst; master documents, composite documents that incorporate +other text, which can be any regular text or text insert; and inserts the +contents of which are like regular text except these are marked .ssi and are +not processed. + +.BR + +.B SiSU +processing can be done directly against a sisu documents; which may be located +locally or on a remote server for which a url is provided. + +.BR + +.B SiSU +source markup can be shared with the command: + +.BR + sisu -s [filename] +.SH SISU TEXT - REGULAR FILES (.SST) + + +.BR +The most common form of document in +.B SiSU, +see the section on +.B SiSU +markup. +.SH SISU MASTER FILES (.SSM) + + +.BR +Composite documents which incorporate other +.B SiSU +documents which may be either regular +.B SiSU +text .sst which may be generated independently, or inserts prepared solely for +the purpose of being incorporated into one or more master documents. + +.BR +The mechanism by which master files incorporate other documents is described as +one of the headings under under +.B SiSU +markup in the +.B SiSU +manual. + +.BR +Note: Master documents may be prepared in a similar way to regular documents, +and processing will occur normally if a .sst file is renamed .ssm without +requiring any other documents; the .ssm marker flags that the document may +contain other documents. + +.BR +Note: a secondary file of the composite document is built prior to processing +with the same prefix and the suffix ._sst +.SH SISU INSERT FILES (.SSI) + + +.BR +Inserts are documents prepared solely for the purpose of being incorporated +into one or more master documents. They resemble regular +.B SiSU +text files (.sst). Since sisu -5.5.0 (6.1.0) .ssi files can like .ssm files +include other .sst or .ssm files. .ssi files cannot be called by the sisu +processor directly and can only be incorporated in other documents. Making a +file a .ssi file is a quick and convenient way of breaking up a document that +is to be included in a master document, and flagging that the file to be +incorporated .ssi is not intended that the file should be processed on its own. +.SH SISUPOD, ZIPPED BINARY CONTAINER (SISUPOD.ZIP, .SSP) + + +.BR +A sisupod is a zipped +.B SiSU +text file or set of +.B SiSU +text files and any associated images that they contain (this will be extended +to include sound and multimedia-files) +.TP +.B SiSU +.I plaintext +files rely on a recognised directory structure to find contents such as images +associated with documents, but all images for example for all documents +contained in a directory are located in the sub-directory _sisu/image. Without +the ability to create a sisupod it can be inconvenient to manually identify all +other files associated with a document. A sisupod automatically bundles all +associated files with the document that is turned into a pod. + +.BR +The structure of the sisupod is such that it may for example contain a single +document and its associated images; a master document and its associated +documents and anything else; or the zipped contents of a whole directory of +prepared +.B SiSU +documents. + +.BR +The command to create a sisupod is: + +.BR + sisu -S [filename] + +.BR +Alternatively, make a pod of the contents of a whole directory: + +.BR + sisu -S + +.BR + +.B SiSU +processing can be done directly against a sisupod; which may be located locally +or on a remote server for which a url is provided. + +.BR + + +.BR + +.SH CONFIGURATION + +.SH CONFIGURATION FILES + +.SH CONFIG.YML + + +.BR + +.B SiSU +configration parameters are adjusted in the configuration file, which can be +used to override the defaults set. This includes such things as which directory +interim processing should be done in and where the generated output should be +placed. + +.BR +The +.B SiSU +configuration file is a yaml file, which means indentation is significant. + +.BR + +.B SiSU +resource configuration is determined by looking at the following files if they +exist: + +.BR + ./_sisu/v7/sisurc.yml + +.BR + ./_sisu/sisurc.yml + +.BR + ~/.sisu/v7/sisurc.yml + +.BR + ~/.sisu/sisurc.yml + +.BR + /etc/sisu/v7/sisurc.yml + +.BR + /etc/sisu/sisurc.yml + +.BR +The search is in the order listed, and the first one found is used. + +.BR +In the absence of instructions in any of these it falls back to the internal +program defaults. + +.BR +Configuration determines the output and processing directories and the database +access details. + +.BR +If +.B SiSU +is installed a sample sisurc.yml may be found in /etc/sisu/sisurc.yml +.SH SISU_DOCUMENT_MAKE + + +.BR +Most sisu document headers relate to metadata, the exception is the @make: +header which provides processing related information. The default contents of +the @make header may be set by placing them in a file sisu_document_make. + +.BR +The search order is as for resource configuration: + +.BR + ./_sisu/v7/sisu_document_make + +.BR + ./_sisu/sisu_document_make + +.BR + ~/.sisu/v7/sisu_document_make + +.BR + ~/.sisu/sisu_document_make + +.BR + /etc/sisu/v7/sisu_document_make + +.BR + /etc/sisu/sisu_document_make + +.BR +A sample sisu_document_make can be found in the _sisu/ directory under along +with the provided sisu markup samples. +.SH CSS - CASCADING STYLE SHEETS (FOR HTML, XHTML AND XML) + + +.BR +CSS files to modify the appearance of +.B SiSU +html, +.I XHTML +or +.I XML +may be placed in the configuration directory: ./_sisu/css ; ~/.sisu/css or; +/etc/sisu/css and these will be copied to the output directories with the +command sisu -CC. + +.BR +The basic CSS file for html output is html. css, placing a file of that name in +directory _sisu/css or equivalent will result in the default file of that name +being overwritten. + +.BR + +.I HTML: +html. css + +.BR + +.I XML +DOM: dom.css + +.BR + +.I XML +SAX: sax.css + +.BR + +.I XHTML: +xhtml. css + +.BR +The default homepage may use homepage.css or html. css + +.BR +Under consideration is to permit the placement of a CSS file with a different +name in directory _sisu/css directory or equivalent.[^12] +.SH ORGANISING CONTENT - DIRECTORY STRUCTURE AND MAPPING + + +.BR + +.B SiSU +v3 has new options for the source directory tree, and output directory +structures of which there are 3 alternatives. +.SH DOCUMENT SOURCE DIRECTORY + + +.BR +The document source directory is the directory in which sisu processing +commands are given. It contains the sisu source files (.sst .ssm .ssi), or (for +sisu v3 may contain) subdirectories with language codes which contain the sisu +source files, so all English files would go in subdirectory en/, French in fr/, +Spanish in es/ and so on. ISO 639-1 codes are used (as varied by po4a). A list +of available languages (and possible sub-directory names) can be obtained with +the command "sisu --help lang" The list of languages is limited to langagues +supported by XeTeX polyglosia. +.SH GENERAL DIRECTORIES + +.nf + ./subject_name/ + +% files stored at this level e.g. sisu_manual.sst or +% for sisu v3 may be under language sub-directories +% e.g. + + ./subject_name/en + + ./subject_name/fr + + ./subject_name/es + + ./subject_name/_sisu + + ./subject_name/_sisu/css + + ./subject_name/_sisu/image +.fi + +.SH DOCUMENT OUTPUT DIRECTORY STRUCTURES + +.SH OUTPUT DIRECTORY ROOT + + +.BR +The output directory root can be set in the sisurc.yml file. Under the root, +subdirectories are made for each directory in which a document set resides. If +you have a directory named poems or conventions, that directory will be created +under the output directory root and the output for all documents contained in +the directory of a particular name will be generated to subdirectories beneath +that directory (poem or conventions). A document will be placed in a +subdirectory of the same name as the document with the filetype identifier +stripped (.sst .ssm) + +.BR +The last part of a directory path, representing the sub-directory in which a +document set resides, is the directory name that will be used for the output +directory. This has implications for the organisation of document collections +as it could make sense to place documents of a particular subject, or type +within a directory identifying them. This grouping as suggested could be by +subject (sales_law, english_literature); or just as conveniently by some other +classification (X University). The mapping means it is also possible to place +in the same output directory documents that are for organisational purposes +kept separately, for example documents on a given subject of two different +institutions may be kept in two different directories of the same name, under a +directory named after each institution, and these would be output to the same +output directory. Skins could be associated with each institution on a +directory basis and resulting documents will take on the appropriate different +appearance. +.SH ALTERNATIVE OUTPUT STRUCTURES + + +.BR +There are 3 possibile output structures described as being, by language, by +filetype or by filename, the selection is made in sisurc.yml +.nf +#% output_dir_structure_by: language; filetype; or filename +output_dir_structure_by: language #(language & filetype, preferred?) +#output_dir_structure_by: filetype +#output_dir_structure_by: filename #(default, closest to original v1 & v2) +.fi + +.SH BY LANGUAGE + + +.BR +The by language directory structure places output files + +.BR +The by language directory structure separates output files by language code +(all files of a given language), and within the language directory by filetype. + +.BR +Its selection is configured in sisurc.yml + +.BR +output_dir_structure_by: language +.nf + |-- en + |-- epub + |-- hashes + |-- html + | |-- viral_spiral.david_bollier + | |-- manifest + | |-- qrcode + | |-- odt + | |-- pdf + | |-- sitemaps + | |-- txt + | |-- xhtml + | `-- xml + |-- po4a + | `-- live-manual + | |-- po + | |-- fr + | `-- pot + `-- _sisu + |-- css + |-- image + |-- image_sys -> ../../_sisu/image_sys + `-- xml + |-- rnc + |-- rng + `-- xsd +.fi + + +.BR +#by: language subject_dir/en/manifest/filename.html +.SH BY FILETYPE + + +.BR +The by filetype directory structure separates output files by filetype, all +html files in one directory pdfs in another and so on. Filenames are given a +language extension. + +.BR +Its selection is configured in sisurc.yml + +.BR +output_dir_structure_by: filetype +.nf + |-- epub + |-- hashes + |-- html + |-- viral_spiral.david_bollier + |-- manifest + |-- qrcode + |-- odt + |-- pdf + |-- po4a + |-- live-manual + | |-- po + | |-- fr + | `-- pot + |-- _sisu + | |-- css + | |-- image + | |-- image_sys -> ../../_sisu/image_sys + | `-- xml + | |-- rnc + | |-- rng + | `-- xsd + |-- sitemaps + |-- txt + |-- xhtml + `-- xml +.fi + + +.BR +#by: filetype subject_dir/html/filename/manifest.en.html +.SH BY FILENAME + + +.BR +The by filename directory structure places most output of a particular file +(the different filetypes) in a common directory. + +.BR +Its selection is configured in sisurc.yml + +.BR +output_dir_structure_by: filename +.nf + |-- epub + |-- po4a + |-- live-manual + | |-- po + | |-- fr + | `-- pot + |-- _sisu + | |-- css + | |-- image + | |-- image_sys -> ../../_sisu/image_sys + | `-- xml + | |-- rnc + | |-- rng + | `-- xsd + |-- sitemaps + |-- src + |-- pod + `-- viral_spiral.david_bollier +.fi + + +.BR +#by: filename subject_dir/filename/manifest.en.html +.SH REMOTE DIRECTORIES + +.nf + ./subject_name/ + +% containing sub_directories named after the generated files from which they are made + + ./subject_name/src + +% contains shared source files text and binary e.g. sisu_manual.sst and sisu_manual.sst.zip + + ./subject_name/_sisu + +% configuration file e.g. sisurc.yml + + ./subject_name/_sisu/skin + +% skins in various skin directories doc, dir, site, yml + + ./subject_name/_sisu/css + + ./subject_name/_sisu/image + +% images for documents contained in this directory + + ./subject_name/_sisu/mm +.fi + +.SH SISUPOD + +.nf + ./sisupod/ + +% files stored at this level e.g. sisu_manual.sst + + ./sisupod/_sisu + +% configuration file e.g. sisurc.yml + + ./sisupod/_sisu/skin + +% skins in various skin directories doc, dir, site, yml + + ./sisupod/_sisu/css + + ./sisupod/_sisu/image + +% images for documents contained in this directory + + ./sisupod/_sisu/mm +.fi + +.SH HOMEPAGES + + +.BR + +.B SiSU +is about the ability to auto-generate documents. Home pages are regarded as +custom built items, and are not created by +.B SiSU. +More accurately, +.B SiSU +has a default home page, which will not be appropriate for use with other +sites, and the means to provide your own home page instead in one of two ways +as part of a site's configuration, these being: + +.BR +1. through placing your home page and other custom built documents in the +subdirectory _sisu/home/ (this probably being the easier and more convenient +option) + +.BR +2. through providing what you want as the home page in a skin, + +.BR +Document sets are contained in directories, usually organised by site or +subject. Each directory can/should have its own homepage. See the section on +directory structure and organisation of content. +.SH HOME PAGE AND OTHER CUSTOM BUILT PAGES IN A SUB-DIRECTORY + + +.BR +Custom built pages, including the home page index.html may be placed within the +configuration directory _sisu/home/ in any of the locations that is searched +for the configuration directory, namely ./_sisu ; ~/_sisu ; /etc/sisu From +there they are copied to the root of the output directory with the command: + +.BR + sisu -CC +.SH MARKUP AND OUTPUT EXAMPLES + +.SH MARKUP EXAMPLES + + +.BR +Current markup examples and document output samples are provided off + or and in the sisu +-markup-sample package available off + +.BR +For some documents hardly any markup at all is required at all, other than a +header, and an indication that the levels to be taken into account by the +program in generating its output are. +.SH SISU MARKUP SAMPLES + + +.BR +A few additional sample books prepared as sisu markup samples, output formats +to be generated using +.B SiSU +are contained in a separate package sisu -markup-samples. sisu -markup-samples +contains books (prepared using sisu markup), that were released by their +authors various licenses mostly different Creative Commons licences that do not +permit inclusion in the +.B Debian +Project as they have requirements that do not meet the +.B Debian +Free Software Guidelines for various reasons, most commonly that they require +that the original substantive text remain unchanged, and sometimes that the +works be used only non-commercially. + +.BR + +.I Accelerando, +Charles Stross (2005) +accelerando.charles_stross.sst + +.BR + +.I Alice's Adventures in Wonderland, +Lewis Carroll (1865) +alices_adventures_in_wonderland.lewis_carroll.sst + +.BR + +.I CONTENT, +Cory Doctorow (2008) +content.cory_doctorow.sst + +.BR + +.I Democratizing Innovation, +Eric von Hippel (2005) +democratizing_innovation.eric_von_hippel.sst + +.BR + +.I Down and Out in the Magic Kingdom, +Cory Doctorow (2003) +down_and_out_in_the_magic_kingdom.cory_doctorow.sst + +.BR + +.I For the Win, +Cory Doctorow (2010) +for_the_win.cory_doctorow.sst + +.BR + +.I Free as in Freedom - Richard Stallman's Crusade for Free Software, +Sam Williams (2002) +free_as_in_freedom.richard_stallman_crusade_for_free_software.sam_williams.sst + +.BR + +.I Free as in Freedom 2.0 - Richard Stallman and the Free Software Revolution, +Sam Williams (2002), Richard M. Stallman (2010) +free_as_in_freedom_2.richard_stallman_and_the_free_software_revolution.sam_williams.richard_stallman.sst + +.BR + +.I Free Culture - How Big Media Uses Technology and the Law to Lock Down +Culture and Control Creativity, +Lawrence Lessig (2004) +free_culture.lawrence_lessig.sst + +.BR + +.I Free For All - How Linux and the Free Software Movement Undercut the High +Tech Titans, +Peter Wayner (2002) +free_for_all.peter_wayner.sst + +.BR + +.I GNU GENERAL PUBLIC LICENSE v2, +Free Software Foundation (1991) +gpl2.fsf.sst + +.BR + +.I GNU GENERAL PUBLIC LICENSE v3, +Free Software Foundation (2007) +gpl3.fsf.sst + +.BR + +.I Gulliver's Travels, +Jonathan Swift (1726 / 1735) +gullivers_travels.jonathan_swift.sst + +.BR + +.I Little Brother, +Cory Doctorow (2008) +little_brother.cory_doctorow.sst + +.BR + +.I The Cathederal and the Bazaar, +Eric Raymond (2000) +the_cathedral_and_the_bazaar.eric_s_raymond.sst + +.BR + +.I The Public Domain - Enclosing the Commons of the Mind, +James Boyle (2008) +the_public_domain.james_boyle.sst + +.BR + +.I The Wealth of Networks - How Social Production Transforms Markets and +Freedom, +Yochai Benkler (2006) +the_wealth_of_networks.yochai_benkler.sst + +.BR + +.I Through the Looking Glass, +Lewis Carroll (1871) +through_the_looking_glass.lewis_carroll.sst + +.BR + +.I Two Bits - The Cultural Significance of Free Software, +Christopher Kelty (2008) +two_bits.christopher_kelty.sst + +.BR + +.I UN Contracts for International Sale of Goods, +UN (1980) +un_contracts_international_sale_of_goods_convention_1980.sst + +.BR + +.I Viral Spiral, +David Bollier (2008) +viral_spiral.david_bollier.sst +.SH SISU SEARCH - INTRODUCTION + + +.BR +Because the document structure of sites created is clearly defined, and the +text +.I object citation system +is available hypothetically at least, for all forms of output, it is possible +to search the sql database, and either read results from that database, or map +the results to the html or other output, which has richer text markup. + +.BR + +.B SiSU +can populate a relational sql type database with documents at an object level, +including objects numbers that are shared across different output types. Making +a document corpus searchable with that degree of granularity. Basically, your +match criteria is met by these documents and at these locations within each +document, which can be viewed within the database directly or in various output +formats. + +.BR + +.B SiSU +can populate an sql database (sqlite3 or postgresql) with documents made up of +their objects. It also can generate a cgi search form that can be used to query +the database. + +.BR +In order to use the built in search functionality you would take the following +steps. + +.BR +- use sisu to populate an sql database with with a sisu markup content + +.BR + * sqlite3 should work out of the box + +.BR + * postgresql may require some initial database configuration + +.BR +- provide a way to query the database, which sisu can assist with by + +.BR + * generating a sample ruby cgi search form, required (sisu configuration + recommended) + +.BR + * adding a query field for this search form to be added to all html files + (sisu configuration required) +.SH SQL + +.SH POPULATE THE DATABASE + + +.BR +TO populate the sql database, run sisu against a sisu markup file with one of +the following sets of flags +.nf +sisu --sqlite filename.sst +.fi + + +.BR +creates an sqlite3 database containing searchable content of just the sisu +markup document selected +.nf +sisu --sqlite --update filename.sst +.fi + + +.BR +creates an sqlite3 database containing searchable content of marked up +document(s) selected by the user from a common directory +.nf +sisu --pg --update filename.sst +.fi + + +.BR +fills a postgresql database with searchable content of marked up document(s) +selected by the user from a common directory + +.BR +For postgresql the first time the command is run in a given directory the user +will be prompted to create the requisite database, at the time of writing the +prompt sisu provides is as follows: +.nf +no connection with pg database established, you may need to run: + createdb "SiSU.7a.current" + after that don't forget to run: + sisu --pg --createall + before attempting to populate the database +.fi + + +.BR +The named database that sisu expects to find must exist and if necessary be +created using postgresql tools. If the database exist but the database tables +do not, sisu will attempt to create the tables it needs, the equivalent of the +requested sisu --pg --createall command. + +.BR +Once this is done, the sql database is populated and ready to be queried. +.SH SQL TYPE DATABASES + + +.BR + +.B SiSU +feeds sisu markup documents into sql type databases +.I PostgreSQL +[^13] and/or +.I SQLite +[^14] database together with information related to document structure. + +.BR +This is one of the more interesting output forms, as all the structural data of +the documents are retained (though can be ignored by the user of the database +should they so choose). All site texts/documents are (currently) streamed to +four tables: + +.BR + * one containing semantic (and other) headers, including, title, author, + subject, (the + .I Dublin Core. + ..); + +.BR + * another the substantive texts by individual "paragraph" (or object) - along + with structural information, each paragraph being identifiable by its + paragraph number (if it has one which almost all of them do), and the + substantive text of each paragraph quite naturally being searchable (both in + formatted and clean text versions for searching); and + +.BR + * a third containing endnotes cross-referenced back to the paragraph from + which they are referenced (both in formatted and clean text versions for + searching). + +.BR + * a fourth table with a one to one relation with the headers table contains + full text versions of output, eg. pdf, html, xml, and + .I ascii. + +.BR +There is of course the possibility to add further structures. + +.BR +At this level +.B SiSU +loads a relational database with documents chunked into objects, their smallest +logical structurally constituent parts, as text objects, with their object +citation number and all other structural information needed to construct the +document. Text is stored (at this text object level) with and without +elementary markup tagging, the stripped version being so as to facilitate ease +of searching. + +.BR +Being able to search a relational database at an object level with the +.B SiSU +citation system is an effective way of locating content generated by +.B SiSU. +As individual text objects of a document stored (and indexed) together with +object numbers, and all versions of the document have the same numbering, +complex searches can be tailored to return just the locations of the search +results relevant for all available output formats, with live links to the +precise locations in the database or in html/xml documents; or, the structural +information provided makes it possible to search the full contents of the +database and have headings in which search content appears, or to search only +headings etc. (as the +.I Dublin Core +is incorporated it is easy to make use of that as well). +.SH POSTGRESQL + +.SH NAME + + +.BR + +.B SiSU +- Structured information, Serialized Units - a document publishing system, +postgresql dependency package +.SH DESCRIPTION + + +.BR +Information related to using postgresql with sisu (and related to the +sisu_postgresql dependency package, which is a dummy package to install +dependencies needed for +.B SiSU +to populate a postgresql database, this being part of +.B SiSU +- man sisu) . +.SH SYNOPSIS + + +.BR + sisu -D [instruction] [filename/wildcard if required] + +.BR + sisu -D --pg --[instruction] [filename/wildcard if required] +.SH COMMANDS + + +.BR +Mappings to two databases are provided by default, postgresql and sqlite, the +same commands are used within sisu to construct and populate databases however +-d (lowercase) denotes sqlite and -D (uppercase) denotes postgresql, +alternatively --sqlite or --pgsql may be used + +.BR + +.B -D or --pgsql +may be used interchangeably. +.SH CREATE AND DESTROY DATABASE + +.TP +.B --pgsql --createall +initial step, creates required relations (tables, indexes) in existing +(postgresql) database (a database should be created manually and given the same +name as working directory, as requested) (rb.dbi) +.TP +.B sisu -D --createdb +creates database where no database existed before +.TP +.B sisu -D --create +creates database tables where no database tables existed before +.TP +.B sisu -D --Dropall +destroys database (including all its content)! kills data and drops tables, +indexes and database associated with a given directory (and directories of the +same name). +.TP +.B sisu -D --recreate +destroys existing database and builds a new empty database structure +.SH IMPORT AND REMOVE DOCUMENTS + +.TP +.B sisu -D --import -v [filename/wildcard] +populates database with the contents of the file. Imports documents(s) +specified to a postgresql database (at an object level). +.TP +.B sisu -D --update -v [filename/wildcard] +updates file contents in database +.TP +.B sisu -D --remove -v [filename/wildcard] +removes specified document from postgresql database. +.SH SQLITE + +.SH NAME + + +.BR + +.B SiSU +- Structured information, Serialized Units - a document publishing system. +.SH DESCRIPTION + + +.BR +Information related to using sqlite with sisu (and related to the sisu_sqlite +dependency package, which is a dummy package to install dependencies needed for +.B SiSU +to populate an sqlite database, this being part of +.B SiSU +- man sisu) . +.SH SYNOPSIS + + +.BR + sisu -d [instruction] [filename/wildcard if required] + +.BR + sisu -d --(sqlite|pg) --[instruction] [filename/wildcard if required] +.SH COMMANDS + + +.BR +Mappings to two databases are provided by default, postgresql and sqlite, the +same commands are used within sisu to construct and populate databases however +-d (lowercase) denotes sqlite and -D (uppercase) denotes postgresql, +alternatively --sqlite or --pgsql may be used + +.SH CREATE AND DESTROY DATABASE + +.TP +.B --sqlite --createall +initial step, creates required relations (tables, indexes) in existing (sqlite) +database (a database should be created manually and given the same name as +working directory, as requested) (rb.dbi) +.TP +.B sisu -d --createdb +creates database where no database existed before +.TP +.B sisu -d --create +creates database tables where no database tables existed before +.TP +.B sisu -d --dropall +destroys database (including all its content)! kills data and drops tables, +indexes and database associated with a given directory (and directories of the +same name). +.TP +.B sisu -d --recreate +destroys existing database and builds a new empty database structure +.SH IMPORT AND REMOVE DOCUMENTS + +.TP +.B sisu -d --import -v [filename/wildcard] +populates database with the contents of the file. Imports documents(s) +specified to an sqlite database (at an object level). +.TP +.B sisu -d --update -v [filename/wildcard] +updates file contents in database +.TP +.B sisu -d --remove -v [filename/wildcard] +removes specified document from sqlite database. +.SH CGI SEARCH FORM + + +.BR +For the search form, which is a single search page + +.BR +- configure the search form + +.BR +- generate the sample search form with the sisu command, (this will be based on +the configuration settings and existing found sisu databases) + +.BR +For postgresql web content you may need to edit the search cgi script. Two +things to look out for are that the user is set as needed, and that the any +different databases that you wish to be able to query are listed. + +.BR +correctly, you may want www-data rather than your username. +.nf +@user='www-data' +.fi + + +.BR +- check the search form, copy it to the appropriate cgi directory and set the +correct permissions + +.BR +For a search form to appear on each html page, you need to: + +.BR +- rely on the above mentioned configuration of the search form + +.BR +- configure the html search form to be on + +.BR +- run the html command +.SH SETUP SEARCH FORM + + +.BR +You will need a web server, httpd with cgi enabled, and a postgresql database +to which you are able to create databases. + +.BR +Setup postgresql, make sure you are able to create and write to the database, +e.g.: +.nf +sudo su postgres + createuser -d -a ralph +.fi + + +.BR +You then need to create the database that sisu will use, for sisu manual in the +directory manual/en for example, (when you try to populate a database that does +not exist sisu prompts as to whether it exists): +.nf +createdb SiSU.7a.manual +.fi + + +.BR + +.B SiSU +is then able to create the required tables that allow you to populate the +database with documents in the directory for which it has been created: +.nf +sisu --pg --createall -v +.fi + + +.BR +You can then start to populate the database, in this example with a single +document: +.nf +sisu --pg --update -v en/sisu_manual.ssm +.fi + + +.BR +To create a sample search form, from within the same directory run: +.nf +sisu --sample-search-form --db-pg +.fi + + +.BR +and copy the resulting cgi form to your cgi-bin directory + +.BR +A sample setup for nginx is provided that assumes data will be stored under +/srv/www and cgi scripts under /srv/cgi +.SH SEARCH - DATABASE FRONTEND SAMPLE, UTILISING DATABASE AND SISU FEATURES, +INCLUDING OBJECT CITATION NUMBERING (BACKEND CURRENTLY POSTGRESQL) + + +.BR +Sample search frontend [^15] A small database and +sample query front-end (search from) that makes use of the citation system, .I +object citation numbering +to demonstrates functionality.[^16] + +.BR + +.B SiSU +can provide information on which documents are matched and at what locations +within each document the matches are found. These results are relevant across +all outputs using +.I object citation numbering, +which includes html, +.I XML, +.I EPUB, +.I LaTeX, +.I PDF +and indeed the +.I SQL +database. You can then refer to one of the other outputs or in the +.I SQL +database expand the text within the matched objects (paragraphs) in the +documents matched. + +.BR +Note you may set results either for documents matched and object number +locations within each matched document meeting the search criteria; or display +the names of the documents matched along with the objects (paragraphs) that +meet the search criteria.[^17] +.TP +.B sisu -F --webserv-webrick +builds a cgi web search frontend for the database created + +.BR +The following is feedback on the setup on a machine provided by the help +command: + +.BR + sisu --help sql +.nf +Postgresql + user: ralph + current db set: SiSU_sisu + port: 5432 + dbi connect: DBI:Pg:database=SiSU_sisu;port=5432 + +sqlite + current db set: /home/ralph/sisu_www/sisu/sisu_sqlite.db + dbi connect DBI:SQLite:/home/ralph/sisu_www/sisu/sisu_sqlite.db +.fi + +.BR +Note on databases built + +.BR +By default, [unless otherwise specified] databases are built on a directory +basis, from collections of documents within that directory. The name of the +directory you choose to work from is used as the database name, i.e. if you are +working in a directory called /home/ralph/ebook the database SiSU_ebook is +used. [otherwise a manual mapping for the collection is necessary] + +.SH SEARCH FORM + +.TP +.B sisu -F +generates a sample search form, which must be copied to the web-server cgi +directory +.TP +.B sisu -F --webserv-webrick +generates a sample search form for use with the webrick server, which must be +copied to the web-server cgi directory +.TP +.B sisu -W +starts the webrick server which should be available wherever sisu is properly +installed + +.BR +The generated search form must be copied manually to the webserver directory as +instructed +.SH SISU_WEBRICK + +.SH NAME + + +.BR + +.B SiSU +- Structured information, Serialized Units - a document publishing system +.SH SYNOPSIS + + +.BR +sisu_webrick [port] + +.BR +or + +.BR +sisu -W [port] +.SH DESCRIPTION + + +.BR +sisu_webrick is part of +.B SiSU +(man sisu) sisu_webrick starts +.B Ruby +' s Webrick web-server and points it to the directories to which +.B SiSU +output is written, providing a list of these directories (assuming +.B SiSU +is in use and they exist). + +.BR +The default port for sisu_webrick is set to 8081, this may be modified in the +yaml file: ~/.sisu/sisurc.yml a sample of which is provided as +/etc/sisu/sisurc.yml (or in the equivalent directory on your system). +.SH SUMMARY OF MAN PAGE + + +.BR +sisu_webrick, may be started on it's own with the command: sisu_webrick [port] +or using the sisu command with the -W flag: sisu -W [port] + +.BR +where no port is given and settings are unchanged the default port is 8081 +.SH DOCUMENT PROCESSING COMMAND FLAGS + + +.BR +sisu -W [port] starts +.B Ruby +Webrick web-server, serving +.B SiSU +output directories, on the port provided, or if no port is provided and the +defaults have not been changed in ~/.sisu/sisurc.yaml then on port 8081 +.SH SUMMARY OF FEATURES + + +.BR +- sparse/minimal markup (clean utf-8 source texts). Documents are prepared in a +single +.I UTF-8 +file using a minimalistic mnemonic syntax. Typical literature, documents like +"War and Peace" require almost no markup, and most of the headers are optional. + +.BR +- markup is easily readable/parsable by the human eye, (basic markup is simpler +and more sparse than the most basic +.I HTML +) , [this may also be converted to +.I XML +representations of the same input/source document]. + +.BR +- markup defines document structure (this may be done once in a header +pattern-match description, or for heading levels individually); basic text +attributes (bold, italics, underscore, strike-through etc.) as required; and +semantic information related to the document (header information, extended +beyond the Dublin core and easily further extended as required); the headers +may also contain processing instructions. +.B SiSU +markup is primarily an abstraction of document structure and document metadata +to permit taking advantage of the basic strengths of existing alternative +practical standard ways of representing documents [be that browser viewing, +paper publication, sql search etc.] (html, epub, xml, odf, latex, pdf, sql) + +.BR +- for output produces reasonably elegant output of established industry and +institutionally accepted open standard formats.[3] takes advantage of the +different strengths of various standard formats for representing documents, +amongst the output formats currently supported are: + +.BR +* +.I HTML +- both as a single scrollable text and a segmented document + +.BR +* +.I XHTML + +.BR +* +.I EPUB + +.BR +* +.I XML +- both in sax and dom style xml structures for further development as required + +.BR +* +.I ODT +- Open Document Format text, the iso standard for document storage + +.BR +* +.I LaTeX +- used to generate pdf + +.BR +* +.I PDF +(via +.I LaTeX +) + +.BR +* +.I SQL +- population of an sql database ( +.I PostgreSQL +or +.I SQLite +) , (at the same object level that is used to cite text within a document) + +.BR +Also produces: concordance files; document content certificates (md5 or sha256 +digests of headings, paragraphs, images etc.) and html manifests (and sitemaps +of content). (b) takes advantage of the strengths implicit in these very +different output types, (e.g. PDFs produced using typesetting of +.I LaTeX, +databases populated with documents at an individual object/paragraph level, +making possible +.I granular search +(and related possibilities)) + +.BR +- ensuring content can be cited in a meaningful way regardless of selected +output format. Online publishing (and publishing in multiple document formats) +lacks a useful way of citing text internally within documents (important to +academics generally and to lawyers) as page numbers are meaningless across +browsers and formats. sisu seeks to provide a common way of pinpoint the text +within a document, (which can be utilized for citation and by search engines). +The outputs share a common numbering system that is meaningful (to man and +machine) across all digital outputs whether paper, screen, or database +oriented, (pdf, +.I HTML, +.I EPUB, +xml, sqlite, postgresql) , this numbering system can be used to reference +content. + +.BR +- Granular search within documents. +.I SQL +databases are populated at an object level (roughly headings, paragraphs, +verse, tables) and become searchable with that degree of granularity, the +output information provides the object/paragraph numbers which are relevant +across all generated outputs; it is also possible to look at just the matching +paragraphs of the documents in the database; [output indexing also work well +with search indexing tools like hyperestraier]. + +.BR +- long term maintainability of document collections in a world of changing +formats, having a very sparsely marked-up source document base. there is a +considerable degree of future-proofing, output representations are +"upgradeable", and new document formats may be added. e.g. addition of odf +(open document text) module in 2006, epub in 2009 and in future html5 output +sometime in future, without modification of existing prepared texts + +.BR +* +.I SQL +search aside, documents are generated as required and static once generated. + +.BR +- documents produced are static files, and may be batch processed, this needs +to be done only once but may be repeated for various reasons as desired +(updated content, addition of new output formats, updated technology document +presentations/representations) + +.BR +- document source ( +.I plaintext +utf-8) if shared on the net may be used as input and processed locally to +produce the different document outputs + +.BR +- document source may be bundled together (automatically) with associated +documents (multiple language versions or master document with inclusions) and +images and sent as a zip file called a sisupod, if shared on the net these too +may be processed locally to produce the desired document outputs + +.BR +- generated document outputs may automatically be posted to remote sites. + +.BR +- for basic document generation, the only software dependency is +.B Ruby, +and a few standard Unix tools (this covers +.I plaintext, +.I HTML, +.I EPUB, +.I XML, +.I ODF, +.I LaTeX +) . To use a database you of course need that, and to convert the +.I LaTeX +generated to pdf, a latex processor like tetex or texlive. + +.BR +- as a developers tool it is flexible and extensible + +.BR +Syntax highlighting for +.B SiSU +markup is available for a number of text editors. + +.BR + +.B SiSU +is less about document layout than about finding a way with little markup to be +able to construct an abstract representation of a document that makes it +possible to produce multiple representations of it which may be rather +different from each other and used for different purposes, whether layout and +publishing, or search of content + +.BR +i.e. to be able to take advantage from this minimal preparation starting point +of some of the strengths of rather different established ways of representing +documents for different purposes, whether for search (relational database, or +indexed flat files generated for that purpose whether of complete documents, or +say of files made up of objects), online viewing (e.g. html, xml, pdf) , or +paper publication (e.g. pdf) ... + +.BR +the solution arrived at is by extracting structural information about the +document (about headings within the document) and by tracking objects (which +are serialized and also given hash values) in the manner described. It makes +possible representations that are quite different from those offered at +present. For example objects could be saved individually and identified by +their hashes, with an index of how the objects relate to each other to form a +document. +.TP +.BI *1. +square brackets + +.BR +.TP +.BI *2. +square brackets + +.BR +.TP +.BI +1. +square brackets + +.BR +.TP +.BI 1. + + +.BR +.TP +.BI 2. + + +.BR +.TP +.BI 3. +From sometime after SiSU 0.58 it should be possible to describe SiSU markup +using SiSU, which though not an original design goal is useful. + +.BR +.TP +.BI 4. +files should be prepared using UTF-8 character encoding + +.BR +.TP +.BI 5. +a footnote or endnote + +.BR +.TP +.BI 6. +self contained endnote marker & endnote in one + +.BR +.TP +.BI *. +unnumbered asterisk footnote/endnote, insert multiple asterisks if required + +.BR +.TP +.BI **. +another unnumbered asterisk footnote/endnote + +.BR +.TP +.BI *3. +editors notes, numbered asterisk footnote/endnote series + +.BR +.TP +.BI +2. +editors notes, numbered plus symbol footnote/endnote series + +.BR +.TP +.BI 7. + + +.BR +.TP +.BI 8. + + +.BR +.TP +.BI 9. +Table from the Wealth of Networks by Yochai Benkler + + +.BR +.TP +.BI 10. +for which you may alternatively use the full form author: title: and year: + +.BR +.TP +.BI 11. +Quixote and Panza, Taming Windmills (1605), pp 1000 - 1001 also, Benkler, Wealth of Networks (2006), p 1 + +.BR +.TP +.BI 12. +SiSU has worked this way in the past, though this was dropped as it was +thought the complexity outweighed the flexibility, however, the balance was +rather fine and this behaviour could be reinstated. + +.BR +.TP +.BI 13. + + + +.BR +.TP +.BI 14. + + +.BR +.TP +.BI 15. + + +.BR +.TP +.BI 16. +(which could be extended further with current back-end). As regards scaling +of the database, it is as scalable as the database (here Postgresql) and +hardware allow. + +.BR +.TP +.BI 17. +of this feature when demonstrated to an IBM software innovations evaluator +in 2004 he said to paraphrase: this could be of interest to us. We have large +document management systems, you can search hundreds of thousands of documents +and we can tell you which documents meet your search criteria, but there is no +way we can tell you without opening each document where within each your +matches are found. + +.BR + +.TP +.SH SEE ALSO + sisu(1), + sisu-epub(1), + sisu-harvest(1), + sisu-html(1), + sisu-odf(1), + sisu-pdf(1), + sisu-pg(1), + sisu-sqlite(1), + sisu-txt(1). + sisu_vim(7) +.TP +.SH HOMEPAGE + More information about SiSU can be found at or +.TP +.SH SOURCE + +.TP +.SH AUTHOR + SiSU is written by Ralph Amissah diff --git a/misc/util/d/cgi/search/README b/misc/util/d/cgi/search/README new file mode 100644 index 0000000..eb8fcde --- /dev/null +++ b/misc/util/d/cgi/search/README @@ -0,0 +1,11 @@ +change db name to match name of db you create +cv.db_selected = "spine.search.sql.db"; + +~dr/bin/spine-ldc -v --sqlite-db-create --sqlite-db-filename="spine.search.db" --cgi-sqlite-search-filename="spine-search" --output=/var/www ~grotto/repo/git.repo/code/project-spine/doc-reform-markup/markup_samples/markup/pod/* + +~dr/bin/spine-ldc -v --sqlite-update --sqlite-db-filename="spine.search.db" --output=/var/www ~grotto/repo/git.repo/code/project-spine/doc-reform-markup/markup_samples/markup/pod/* + +cd util/d/cgi/search/src +dub --force --compiler=ldc2 && sudo cp -v cgi-bin/spine-search /usr/lib/cgi-bin/. + +http://localhost/cgi-bin/spine-search? diff --git a/misc/util/d/cgi/search/dub.sdl b/misc/util/d/cgi/search/dub.sdl new file mode 100644 index 0000000..b859f42 --- /dev/null +++ b/misc/util/d/cgi/search/dub.sdl @@ -0,0 +1,14 @@ +name "spine_search" +description "A minimal D application." +authors "ralph" +copyright "Copyright © 2020, ralph" +license "GPL-3.0+" +dependency "d2sqlite3" version="~>0.18.3" +targetType "executable" +targetPath "./cgi-bin" +mainSourceFile "src/spine_cgi_sqlite_search.d" +configuration "default" { + targetType "executable" + targetName "spine-search" + postGenerateCommands "/usr/bin/notify-send -t 0 'D executable ready' 'spine cgi sqlite search d'" +} diff --git a/misc/util/d/cgi/search/src/spine_cgi_sqlite_search.d b/misc/util/d/cgi/search/src/spine_cgi_sqlite_search.d new file mode 100644 index 0000000..1460643 --- /dev/null +++ b/misc/util/d/cgi/search/src/spine_cgi_sqlite_search.d @@ -0,0 +1,913 @@ +/+ dub.sdl + name "spine search" + description "spine cgi search" ++/ +import std.format; +import std.range; +import std.regex; +import arsd.cgi; +import d2sqlite3; +import std.process : environment; +void cgi_function_intro(Cgi cgi) { + string header; + string table; + string form; + struct Config { + string http_request_type; + string http_host; + // string server_name; + string doc_root; + string cgi_root; + string cgi_script; + string data_path_html; + string db_path; + string query_string; + string http_url; + string request_method; + } + auto conf = Config(); + conf.http_request_type = environment.get("REQUEST_SCHEME", "http"); + conf.http_host = environment.get("HTTP_HOST", "localhost"); + // conf.server_name = environment.get("SERVER_NAME", "localhost"); + conf.doc_root = environment.get("DOCUMENT_ROOT", "/var/www/html"); + conf.cgi_root = environment.get("CONTEXT_DOCUMENT_ROOT", "/usr/lib/cgi-bin/"); + // conf.cgi_script = environment.get("SCRIPT_NAME", "/cgi-bin/spine-search"); + conf.query_string = environment.get("QUERY_STRING", ""); + conf.http_url = environment.get("HTTP_REFERER", conf.http_request_type ~ "://" ~ conf.http_host ~ conf.cgi_script ~ "?" ~ conf.query_string); + conf.db_path = "/var/www/html/sqlite/"; // conf.http_host ~ "/sqlite/"; + conf.request_method = environment.get("REQUEST_METHOD", "POST"); + struct CGI_val { + string db_selected = ""; + string sql_match_limit = ""; // radio: ( 1000 | 2500 ) + string sql_match_offset = ""; + string search_text = ""; + string results_type = ""; // index + bool checked_echo = false; + bool checked_stats = false; + bool checked_url = false; + bool checked_searched = false; + bool checked_tip = false; + bool checked_sql = false; + } + auto cv = CGI_val(); + cv.db_selected = "spine.search.db"; // config, set db name + auto text_fields() { + string canned_query_str = environment.get("QUERY_STRING", ""); + if ("query_string" in cgi.post) { + canned_query_str = environment.get("QUERY_STRING", ""); + } + string[string] canned_query; + if (conf.request_method == "POST") { + } else if (conf.request_method == "GET") { + foreach (pair_str; canned_query_str.split("&")) { + // cgi.write(pair_str ~ "
"); + string[] pair = pair_str.split("="); + canned_query[pair[0]] = pair[1]; + } + // foreach (field, content; canned_query) { + // cgi.write(field ~ ": " ~ content ~ "
"); + // } + } + static struct Rgx { + // static canned_query = ctRegex!(`\A(?P.+)\Z`, "m"); + static search_text_area = ctRegex!(`\A(?P.+)\Z`, "m"); + // static fulltext = ctRegex!(`\A(?P.+)\Z`, "m"); + static line = ctRegex!(`^(?P.+?)(?: ~|$)`, "m"); + static text = ctRegex!(`(?:^|\s~\s*)text:\s+(?P.+?)(?: ~|$)`, "m"); + static author = ctRegex!(`(?:^|\s~\s*)author:\s+(?P.+)$`, "m"); + static title = ctRegex!(`(?:^|\s~\s*)title:\s+(?P.+)$`, "m"); + static uid = ctRegex!(`(?:^|\s~\s*)uid:\s+(?P.+)$`, "m"); + static fn = ctRegex!(`(?:^|\s~\s*)fn:\s+(?P.+)$`, "m"); + static keywords = ctRegex!(`(?:^|\s~\s*)keywords:\s+(?P.+)$`, "m"); + static topic_register = ctRegex!(`(?:^|\s~\s*)topic_register:\s+(?P.+)$`, "m"); + static subject = ctRegex!(`(?:^|\s~\s*)subject:\s+(?P.+)$`, "m"); + static description = ctRegex!(`(?:^|\s~\s*)description:\s+(?P.+)$`, "m"); + static publisher = ctRegex!(`(?:^|\s~\s*)publisher:\s+(?P.+)$`, "m"); + static editor = ctRegex!(`(?:^|\s~\s*)editor:\s+(?P.+)$`, "m"); + static contributor = ctRegex!(`(?:^|\s~\s*)contributor:\s+(?P.+)$`, "m"); + static date = ctRegex!(`(?:^|\s~\s*)date:\s+(?P.+)$`, "m"); + static results_type = ctRegex!(`(?:^|\s~\s*)type:\s+(?P.+)$`, "m"); + static format = ctRegex!(`(?:^|\s~\s*)format:\s+(?P.+)$`, "m"); + static identifier = ctRegex!(`(?:^|\s~\s*)identifier:\s+(?P.+)$`, "m"); + static source = ctRegex!(`(?:^|\s~\s*)source:\s+(?P.+)$`, "m"); + static language = ctRegex!(`(?:^|\s~\s*)language:\s+(?P.+)$`, "m"); + static relation = ctRegex!(`(?:^|\s~\s*)relation:\s+(?P.+)$`, "m"); + static coverage = ctRegex!(`(?:^|\s~\s*)coverage:\s+(?P.+)$`, "m"); + static rights = ctRegex!(`(?:^|\s~\s*)rights:\s+(?P.+)$`, "m"); + static comment = ctRegex!(`(?:^|\s~\s*)comment:\s+(?P.+)$`, "m"); + // static abstract_ = ctRegex!(`(?:^|\s~\s*)abstract:\s+(?P.+)$`, "m"); + static src_filename_base = ctRegex!(`^src_filename_base:\s+(?P.+)$`, "m"); + } + struct searchFields { + string canned_query = ""; // GET canned_query == cq + string search_text_area = ""; // POST search_text_area == tsa + string text = ""; // text == txt + string author = ""; // author == au + string title = ""; // title == ti + string uid = ""; // uid == uid + string fn = ""; // fn == fn + string keywords = ""; // keywords == kw + string topic_register = ""; // topic_register == tr + string subject = ""; // subject == su + string description = ""; // description == de + string publisher = ""; // publisher == pb + string editor = ""; // editor == ed + string contributor = ""; // contributor == ct + string date = ""; // date == dt + string format = ""; // format == fmt + string identifier = ""; // identifier == id + string source = ""; // source == src sfn + string language = ""; // language == lng + string relation = ""; // relation == rl + string coverage = ""; // coverage == cv + string rights = ""; // rights == rgt + string comment = ""; // comment == cmt + // string abstract = ""; + string src_filename_base = ""; // src_filename_base == bfn + string results_type = ""; // results_type == rt radio + string sql_match_limit = ""; // sql_match_limit == sml radio + string sql_match_offset = ""; // sql_match_offset == smo + string stats = ""; // stats == sts checked + string echo = ""; // echo == ec checked + string url = ""; // url == url checked + string searched = ""; // searched == se checked + string sql = ""; // sql == sql checked + } + auto rgx = Rgx(); + auto got = searchFields(); + if (environment.get("REQUEST_METHOD", "POST") == "POST") { + if ("sf" in cgi.post) { + got.search_text_area = cgi.post["sf"]; + if (auto m = got.search_text_area.matchFirst(rgx.text)) { + got.text = m["matched"]; + got.canned_query ~= "sf=" ~ m["matched"]; + } else if (auto m = got.search_text_area.matchFirst(rgx.line)) { + if ( + !(m["matched"].matchFirst(rgx.author)) + && !(m["matched"].matchFirst(rgx.title)) + ) { + got.text = m["matched"]; + got.canned_query ~= "sf=" ~ m["matched"]; + } + } + if (auto m = got.search_text_area.matchFirst(rgx.author)) { + got.author = m["matched"]; + got.canned_query ~= "&au=" ~ m["matched"]; + } + if (auto m = got.search_text_area.matchFirst(rgx.title)) { + got.title = m["matched"]; + got.canned_query ~= "&ti=" ~ m["matched"]; + } + if (auto m = got.search_text_area.matchFirst(rgx.uid)) { + got.uid = m["matched"]; + got.canned_query ~= "&uid=" ~ m["matched"]; + } + if (auto m = got.search_text_area.matchFirst(rgx.fn)) { + got.fn = m["matched"]; + got.canned_query ~= "&fn=" ~ m["matched"]; + } + if (auto m = got.search_text_area.matchFirst(rgx.keywords)) { + got.keywords = m["matched"]; + got.canned_query ~= "&kw=" ~ m["matched"]; + } + if (auto m = got.search_text_area.matchFirst(rgx.topic_register)) { + got.topic_register = m["matched"]; + got.canned_query ~= "&tr=" ~ m["matched"]; + } + if (auto m = got.search_text_area.matchFirst(rgx.subject)) { + got.subject = m["matched"]; + got.canned_query ~= "&su=" ~ m["matched"]; + } + if (auto m = got.search_text_area.matchFirst(rgx.description)) { + got.description = m["matched"]; + got.canned_query ~= "&de=" ~ m["matched"]; + } + if (auto m = got.search_text_area.matchFirst(rgx.publisher)) { + got.publisher = m["matched"]; + got.canned_query ~= "&pb=" ~ m["matched"]; + } + if (auto m = got.search_text_area.matchFirst(rgx.editor)) { + got.editor = m["matched"]; + got.canned_query ~= "&ed=" ~ m["matched"]; + } + if (auto m = got.search_text_area.matchFirst(rgx.contributor)) { + got.contributor = m["matched"]; + got.canned_query ~= "&ct=" ~ m["matched"]; + } + if (auto m = got.search_text_area.matchFirst(rgx.date)) { + got.date = m["matched"]; + got.canned_query ~= "&dt=" ~ m["matched"]; + } + // if (auto m = got.search_text_area.matchFirst(rgx.results_type)) { + // got.results_type = m["matched"]; + // got.canned_query ~= "&rt=" ~ m["matched"]; + // } + if (auto m = got.search_text_area.matchFirst(rgx.format)) { + got.format = m["matched"]; + got.canned_query ~= "&fmt=" ~ m["matched"]; + } + if (auto m = got.search_text_area.matchFirst(rgx.identifier)) { + got.identifier = m["matched"]; + got.canned_query ~= "&id=" ~ m["matched"]; + } + if (auto m = got.search_text_area.matchFirst(rgx.source)) { + got.source = m["matched"]; + got.canned_query ~= "&src=" ~ m["matched"]; + } + if (auto m = got.search_text_area.matchFirst(rgx.language)) { + got.language = m["matched"]; + got.canned_query ~= "&lng=" ~ m["matched"]; + } + if (auto m = got.search_text_area.matchFirst(rgx.relation)) { + got.relation = m["matched"]; + got.canned_query ~= "&rl=" ~ m["matched"]; + } + if (auto m = got.search_text_area.matchFirst(rgx.coverage)) { + got.coverage = m["matched"]; + got.canned_query ~= "&cv=" ~ m["matched"]; + } + if (auto m = got.search_text_area.matchFirst(rgx.rights)) { + got.rights = m["matched"]; + got.canned_query ~= "&rgt=" ~ m["matched"]; + } + if (auto m = got.search_text_area.matchFirst(rgx.comment)) { + got.comment = m["matched"]; + got.canned_query ~= "&cmt=" ~ m["matched"]; + } + // if (auto m = search_text_area.matchFirst(rgx.abstract)) { + // got.abstract = m["matched"]; + // } + if (auto m = got.search_text_area.matchFirst(rgx.src_filename_base)) { + got.src_filename_base = m["matched"]; + got.canned_query ~= "&bfn=" ~ m["matched"]; + } + } + if ("fn" in cgi.post) { + got.fn = cgi.post["fn"]; + got.canned_query ~= "&fn=" ~ cgi.post["fn"]; + } + if ("rt" in cgi.post) { + got.results_type = cgi.post["rt"]; + got.canned_query ~= "&rt=" ~ cgi.post["rt"]; + } + if ("sts" in cgi.post) { + got.stats = cgi.post["sts"]; + got.canned_query ~= "&sts=" ~ cgi.post["sts"]; + } + if ("ec" in cgi.post) { + got.echo = cgi.post["ec"]; + got.canned_query ~= "&ec=" ~ cgi.post["ec"]; + } + if ("url" in cgi.post) { + got.url = cgi.post["url"]; + got.canned_query ~= "&url=" ~ cgi.post["url"]; + } + if ("se" in cgi.post) { + got.searched = cgi.post["se"]; + got.canned_query ~= "&se=" ~ cgi.post["se"]; + } + if ("sql" in cgi.post) { + got.sql = cgi.post["sql"]; + got.canned_query ~= "&sql=" ~ cgi.post["sql"]; + } + if ("sml" in cgi.post) { + got.sql_match_limit = cgi.post["sml"]; + got.canned_query ~= "&sml=" ~ cgi.post["sml"]; + } + if ("smo" in cgi.post) { + got.sql_match_offset = "0"; // cgi.post["smo"]; + got.canned_query ~= "&smo=0"; // ~ cgi.post["smo"]; + } + got.canned_query = got.canned_query.strip.split(" ").join("%20"); + conf.query_string = got.canned_query; + // cgi.write("f.canned_query: " ~ got.canned_query ~ "
"); + } else if (environment.get("REQUEST_METHOD", "POST") == "GET") { + got.canned_query = environment.get("QUERY_STRING", ""); + // cgi.write("f.canned_query: " ~ got.canned_query ~ "
"); + got.search_text_area = ""; + if ("sf" in canned_query && !(canned_query["sf"]).empty) { + got.text = canned_query["sf"].split("%20").join(" "); + got.search_text_area ~= "text: " ~ got.text ~ "\n"; + } + if ("au" in canned_query && !(canned_query["au"]).empty) { + got.author = canned_query["au"].split("%20").join(" "); + got.search_text_area ~= "author: " ~ got.author ~ "\n"; + } + if ("ti" in canned_query && !(canned_query["ti"]).empty) { + got.title = canned_query["ti"].split("%20").join(" "); + got.search_text_area ~= "title: " ~ got.title ~ "\n"; + } + if ("uid" in canned_query && !(canned_query["uid"]).empty) { + got.uid = canned_query["uid"].split("%20").join(" "); + got.search_text_area ~= "uid: " ~ got.uid ~ "\n"; + } + if ("fn" in canned_query && !(canned_query["fn"]).empty) { + got.fn = canned_query["fn"].split("%20").join(" "); + got.search_text_area ~= "fn: " ~ got.fn ~ "\n"; + } + if ("kw" in canned_query && !(canned_query["kw"]).empty) { + got.keywords = canned_query["kw"].split("%20").join(" "); + got.search_text_area ~= "keywords: " ~ got.keywords ~ "\n"; + } + if ("tr" in canned_query && !(canned_query["tr"]).empty) { + got.topic_register = canned_query["tr"].split("%20").join(" "); + got.search_text_area ~= "topic_register: " ~ got.topic_register ~ "\n"; + } + if ("su" in canned_query && !(canned_query["su"]).empty) { + got.subject = canned_query["su"].split("%20").join(" "); + got.search_text_area ~= "subject: " ~ got.subject ~ "\n"; + } + if ("de" in canned_query && !(canned_query["de"]).empty) { + got.description = canned_query["de"].split("%20").join(" "); + got.search_text_area ~= "description: " ~ got.description ~ "\n"; + } + if ("pb" in canned_query && !(canned_query["pb"]).empty) { + got.publisher = canned_query["pb"].split("%20").join(" "); + got.search_text_area ~= "publisher: " ~ got.publisher ~ "\n"; + } + if ("ed" in canned_query && !(canned_query["ed"]).empty) { + got.editor = canned_query["ed"].split("%20").join(" "); + got.search_text_area ~= "editor: " ~ got.editor ~ "\n"; + } + if ("ct" in canned_query && !(canned_query["ct"]).empty) { + got.contributor = canned_query["ct"].split("%20").join(" "); + got.search_text_area ~= "contributor: " ~ got.contributor ~ "\n"; + } + if ("dt" in canned_query && !(canned_query["dt"]).empty) { + got.date = canned_query["dt"].split("%20").join(" "); + got.search_text_area ~= "date: " ~ got.date ~ "\n"; + } + if ("rt" in canned_query && !(canned_query["rt"]).empty) { + got.results_type = canned_query["rt"].split("%20").join(" "); + // got.search_text_area ~= "results_type: " ~ got.results_type ~ "\n"; + } + if ("fmt" in canned_query && !(canned_query["fmt"]).empty) { + got.format = canned_query["fmt"].split("%20").join(" "); + got.search_text_area ~= "format: " ~ got.format ~ "\n"; + } + if ("id" in canned_query && !(canned_query["id"]).empty) { + got.identifier = canned_query["id"].split("%20").join(" "); + got.search_text_area ~= "identifier: " ~ got.identifier ~ "\n"; + } + if ("src" in canned_query && !(canned_query["src"]).empty) { + got.source = canned_query["src"].split("%20").join(" "); + got.search_text_area ~= "source: " ~ got.source ~ "\n"; + } + if ("lng" in canned_query && !(canned_query["lng"]).empty) { + got.language = canned_query["lng"].split("%20").join(" "); + got.search_text_area ~= "language: " ~ got.language ~ "\n"; + } + if ("rl" in canned_query && !(canned_query["rl"]).empty) { + got.relation = canned_query["rl"].split("%20").join(" "); + got.search_text_area ~= "relation: " ~ got.relation ~ "\n"; + } + if ("cv" in canned_query && !(canned_query["cv"]).empty) { + got.coverage = canned_query["cv"].split("%20").join(" "); + got.search_text_area ~= "coverage: " ~ got.coverage ~ "\n"; + } + if ("rgt" in canned_query && !(canned_query["rgt"]).empty) { + got.rights = canned_query["rgt"].split("%20").join(" "); + got.search_text_area ~= "rights: " ~ got.rights ~ "\n"; + } + if ("cmt" in canned_query && !(canned_query["cmt"]).empty) { + got.comment = canned_query["cmt"].split("%20").join(" "); + got.search_text_area ~= "comment: " ~ got.comment ~ "\n"; + } + // if ("abstract" in canned_query && !(canned_query["abstract"]).empty) { + // got.abstract = canned_query["abstract"]; + // } + if ("bfn" in canned_query && !(canned_query["bfn"]).empty) { // search_field + got.src_filename_base = canned_query["bfn"].split("%20").join(" "); + got.search_text_area ~= "src_filename_base: " ~ got.src_filename_base ~ "\n"; + } + if ("sml" in canned_query && !(canned_query["sml"]).empty) { + got.sql_match_limit = canned_query["sml"].split("%20").join(" "); + // got.search_text_area ~= "sql_match_limit: " ~ got.sql_match_limit ~ "\n"; + } + // cgi.write("f.search_text_area: " ~ got.search_text_area ~ "
"); + } + return got; + } + auto tf = text_fields; // + struct SQL_select { + string the_body = ""; + string the_range = ""; + } + auto sql_select = SQL_select(); + string base ; // = ""; + string tip ; // = ""; + string search_note ; // = ""; + uint sql_match_offset_count = 0; + string previous_next () { + static struct Rgx { + static track_offset = ctRegex!(`(?P[&]smo=)(?P[0-9]+)`, "m"); + } + auto rgx = Rgx(); + string _previous_next = ""; + int _current_offset_value = 0; + string _set_offset_next = ""; + string _set_offset_previous = ""; + string _url = ""; + string _url_previous = ""; + string _url_next = ""; + string arrow_previous = ""; + string arrow_next = ""; + if (environment.get("REQUEST_METHOD", "POST") == "POST") { + _url = conf.http_request_type ~ "://" ~ conf.http_host ~ conf.cgi_script ~ "?" ~ tf.canned_query; + } else if (environment.get("REQUEST_METHOD", "POST") == "GET") { + _url = conf.http_request_type ~ "://" ~ conf.http_host ~ conf.cgi_script ~ "?" ~ environment.get("QUERY_STRING", ""); + } + if (auto m = _url.matchFirst(rgx.track_offset)) { + _current_offset_value = m.captures["offset_val"].to!int; + _set_offset_next = m.captures["offset_key"] ~ ((m.captures["offset_val"]).to!int + cv.sql_match_limit.to!int).to!string; + _url_next = _url.replace(rgx.track_offset, _set_offset_next); + if (_current_offset_value < cv.sql_match_limit.to!int) { + _url_previous = ""; + } else { + _url_previous = ""; + _set_offset_previous = m.captures["offset_key"] ~ ((m.captures["offset_val"]).to!int - cv.sql_match_limit.to!int).to!string; + _url_previous = _url.replace(rgx.track_offset, _set_offset_previous); + } + } else {// _current_offset_value = 0; + _url_next = _url ~= "&smo=" ~ cv.sql_match_limit.to!string; + } + if (_url_previous.empty) { + arrow_previous = ""; + } else { + arrow_previous = + "" + ~ "" + ~ "<< prev" + ~ " || "; + } + arrow_next = + "" + ~ "" + ~ "next >>" + ~ ""; + _previous_next = "
" ~ arrow_previous ~ arrow_next; + return _previous_next; + } + { + header = format(q"┃ + + + + + + SiSU spine search form (sample) + + + + + + + +┃", + conf.http_host, + ); + } + { + table = format(q"┃ + + + +
+ + +
+
+ SiSU + +
+ git + +
+
+ +
+ ┃"); + } + { + string post_value(string field_name, string type="box", string set="on") { + string val = ""; + switch (type) { + case "field": + val = ((field_name in cgi.post && !(cgi.post[field_name]).empty) + ? cgi.post[field_name] + : (field_name in cgi.get) + ? cgi.get[field_name] + : ""); + val = tf.search_text_area; + break; + case "box": // generic for checkbox or radio; checkbox set == "on" radio set == "name set" + val = ((field_name in cgi.post && !(cgi.post[field_name]).empty) + ? (cgi.post[field_name] == set ? "checked" : "off") + : (field_name in cgi.get) + ? (cgi.get[field_name] == set ? "checked" : "off") + : "off"); + break; + case "radio": // used generic bo + val = ((field_name in cgi.post && !(cgi.post[field_name]).empty) + ? (cgi.post[field_name] == set ? "checked" : "off") + : (field_name in cgi.get) + ? (cgi.get[field_name] == set ? "checked" : "off") + : "checked"); + break; + case "checkbox": // used generic bo + val = ((field_name in cgi.post && !(cgi.post[field_name]).empty) + ? (cgi.post[field_name] == set ? "checked" : "off") + : (field_name in cgi.get) + ? (cgi.get[field_name] == set ? "checked" : "off") + : "checked"); + break; + default: + } + return val; + } + string the_can(string fv) { + string show_the_can = post_value("url"); + string _the_can = ""; + if (show_the_can == "checked") { + tf = text_fields; + string method_get_url = conf.http_request_type ~ "://" ~ conf.http_host ~ conf.cgi_script ~ "?" ~ environment.get("QUERY_STRING", ""); + string method_post_url_construct = conf.http_request_type ~ "://" ~ conf.http_host ~ conf.cgi_script ~ "?" ~ tf.canned_query; + // assert(method_get_url == environment.get("HTTP_REFERER", conf.http_request_type ~ "://" ~ conf.http_host ~ conf.cgi_script ~ "?" ~ conf.query_string)); + if (conf.request_method == "POST") { + _the_can = + "" + ~ "POST: " + ~ "" + ~ method_post_url_construct + ~ "" + ~ "
"; + } else if (conf.request_method == "GET") { + _the_can = + "" + ~ "GET:  " + ~ "" + ~ method_get_url + ~ ""; + } + conf.http_url = conf.http_request_type ~ "://" ~ conf.http_host ~ conf.cgi_script ~ tf.canned_query; + } + return _the_can; + } + string provide_tip() { + string searched_tip = post_value("se"); + string tip = ""; + if (searched_tip == "checked") { + string search_field = post_value("sf", "field"); + tf = text_fields; + tip = format(q"┃ + +database: %s; selected view: index +search string: %s %s %s %s %s %s
+%s %s %s %s %s %s +
+┃", + cv.db_selected, + (tf.text.empty ? "" : "\"text: " ~ tf.text ~ "; "), + (tf.title.empty ? "" : "\"title: " ~ tf.title ~ "; "), + (tf.author.empty ? "" : "\"author: " ~ tf.author ~ "; "), + (tf.date.empty ? "" : "\"date " ~ tf.date ~ "; "), + (tf.uid.empty ? "" : "\"uid: " ~ tf.uid ~ "; "), + (tf.fn.empty ? "" : "\"fn: " ~ tf.fn ~ "; "), + (tf.text.empty ? "" : "text: " ~ tf.text ~ "
"), + (tf.title.empty ? "" : "title: " ~ tf.title ~ "
"), + (tf.author.empty ? "" : "author: " ~ tf.author ~ "
"), + (tf.date.empty ? "" : "date: " ~ tf.date ~ "
"), + (tf.uid.empty ? "" : "\"uid: " ~ tf.uid ~ "; "), + (tf.fn.empty ? "" : "\"fn: " ~ tf.fn ~ "; "), + ); + } + return tip; + } + form = format(q"┃ +
+ + +
+ + + %s + %s + %s +
+ + + to search: select which database to search (drop-down menu below); enter your search query (in the form above); and click on the search button (below) +
+ + + index + text / grep; + match limit: + 1,000 + 2,500 +
+ echo query + result stats + search url + searched + available fields + sql statement + +
+
+ + +
+┃", + "spine-search", + (post_value("ec") == "checked") ? post_value("sf", "field") : "", + provide_tip, + search_note, + the_can(post_value("sf", "field")), + cv.db_selected, + cv.db_selected, + post_value("rt", "box", "idx"), + post_value("rt", "box", "txt"), + post_value("sml", "box", "1000"), + post_value("sml", "box", "2500"), + post_value("ec"), + post_value("sts"), + post_value("url"), + post_value("se"), + post_value("tip"), + post_value("sql"), + ); + { + string set_value(string field_name, string default_val) { + string val; + if (field_name in cgi.post) { + val = cgi.post[field_name]; + } else if (field_name in cgi.get) { + val = cgi.get[field_name]; + } else { val = default_val; } + return val; + } + bool set_bool(string field_name) { + bool val; + if (field_name in cgi.post + && cgi.post[field_name] == "on") { + val = true; + } else if (field_name in cgi.get + && cgi.get[field_name] == "on") { + val = true; + } else { val = false; } + return val; + } + cv.db_selected = set_value("selected_db", "spine.search.db"); // selected_db == db + cv.sql_match_limit = set_value("sml", "1000"); + cv.sql_match_offset = set_value("smo", "0"); + cv.search_text = set_value("sf", "test"); // remove test + cv.results_type = set_value("rt", "idx"); + cv.checked_echo = set_bool("ec"); + cv.checked_stats = set_bool("sts"); + cv.checked_url = set_bool("url"); + cv.checked_searched = set_bool("se"); + cv.checked_tip = set_bool("tip"); + cv.checked_sql = set_bool("sql"); + tf = text_fields; + } + } + { + cgi.write(header); + cgi.write(table); + cgi.write(form); + // cgi.write(previous_next); + { // debug environment + // foreach (k, d; environment.toAA) { + // cgi.write(k ~ ": " ~ d ~ "
"); + // } + } + { // debug cgi info + // cgi.write("db_selected: " ~ cv.db_selected ~ "
\n"); + // cgi.write("search_text: " ~ cv.search_text ~ "
\n"); + // cgi.write("sql_match_limit: " ~ cv.sql_match_limit ~ ";\n"); + // cgi.write("sql_match_offset: " ~ cv.sql_match_offset ~ ";\n"); + // cgi.write("results_type: " ~ cv.results_type ~ "
\n"); + // cgi.write("cv.checked_echo: " ~ (cv.checked_echo ? "checked" : "off") ~ "; \n"); + // cgi.write("cv.checked_stats: " ~ (cv.checked_stats ? "checked" : "off") ~ "; \n"); + // cgi.write("cv.checked_url: " ~ (cv.checked_url ? "checked" : "off") ~ "; \n"); + // cgi.write("cv.checked_searched: " ~ (cv.checked_searched ? "checked" : "off") ~ ";
\n"); + // cgi.write("cv.checked_tip: " ~ (cv.checked_tip ? "checked" : "off") ~ "; \n"); + // cgi.write("cv.checked_sql: " ~ (cv.checked_sql ? "checked" : "off") ~ "
\n"); + } + } + auto db = Database(conf.db_path ~ cv.db_selected); + { + uint sql_match_offset_counter(T)(T cv) { + sql_match_offset_count += cv.sql_match_limit.to!uint; + return sql_match_offset_count; + } + void sql_search_query() { + string select_field_like(string db_field, string search_field) { + string where_ = ""; + if (!(search_field.empty)) { + string _sf = search_field.strip.split("%20").join(" "); + if (_sf.match(r" OR ")) { + _sf = _sf.split(" OR ").join("%' OR " ~ db_field ~ " LIKE '%"); + } + if (_sf.match(r" AND ")) { + _sf = _sf.split(" AND ").join("%' AND " ~ db_field ~ " LIKE '%"); + } + _sf = "( " ~ db_field ~ " LIKE\n '%" ~ _sf ~ "%' )"; + where_ ~= format(q"┃ + %s +┃", + _sf + ); + } + return where_; + } + string[] _fields; + _fields ~= select_field_like("doc_objects.clean", tf.text); + _fields ~= select_field_like("metadata_and_text.title", tf.title); + _fields ~= select_field_like("metadata_and_text.creator_author", tf.author); + _fields ~= select_field_like("metadata_and_text.uid", tf.uid); + _fields ~= select_field_like("metadata_and_text.src_filename_base", tf.fn); + _fields ~= select_field_like("metadata_and_text.src_filename_base", tf.src_filename_base); + _fields ~= select_field_like("metadata_and_text.language_document_char", tf.language); + _fields ~= select_field_like("metadata_and_text.date_published", tf.date); + _fields ~= select_field_like("metadata_and_text.classify_keywords", tf.keywords); + _fields ~= select_field_like("metadata_and_text.classify_topic_register", tf.topic_register); + string[] fields; + foreach (f; _fields) { + if (!(f.empty)) { fields ~= f; } + } + string fields_str = ""; + fields_str ~= fields.join(" AND "); + sql_select.the_body ~= format(q"┃ +SELECT + metadata_and_text.uid, + metadata_and_text.title, + metadata_and_text.creator_author_last_first, + metadata_and_text.creator_author, + metadata_and_text.src_filename_base, + metadata_and_text.language_document_char, + metadata_and_text.date_published, + metadata_and_text.classify_keywords, + metadata_and_text.classify_topic_register, + doc_objects.body, + doc_objects.seg_name, + doc_objects.ocn, + metadata_and_text.uid +FROM + doc_objects, + metadata_and_text +WHERE ( + %s + ) +AND + doc_objects.uid_metadata_and_text = metadata_and_text.uid +ORDER BY + metadata_and_text.creator_author_last_first, + metadata_and_text.date_published DESC, + metadata_and_text.title, + metadata_and_text.language_document_char, + metadata_and_text.src_filename_base, + doc_objects.ocn +LIMIT %s OFFSET %s +;┃", + fields_str, + cv.sql_match_limit, + cv.sql_match_offset, + ); + (cv.checked_sql) + ? cgi.write(previous_next ~ "
" ~ sql_select.the_body.split("\n ").join(" ").split("\n").join("
") ~ "
\n") + : ""; + cgi.write(previous_next); + auto select_query_results = db.execute(sql_select.the_body).cached; + string _old_uid = ""; + if (!select_query_results.empty) { + foreach (row; select_query_results) { + if (row["uid"].as!string != _old_uid) { + _old_uid = row["uid"].as!string; + auto m = (row["date_published"].as!string).match(regex(r"^([0-9]{4})")); // breaks if row missing or no match? + cgi.write( + "
\"" + ~ row["title"].as!string ~ "\"" + ~ " (" + ~ m.hit + ~ ") " + ~ "[" + ~ row["language_document_char"].as!string + ~ "] " + ~ row["creator_author_last_first"].as!string + ~ ":
\n" + ); + } + if (cv.results_type == "txt") { + cgi.write( + "
" + ~ row["ocn"].as!string + ~ "" + ~ "
" + ~ row["body"].as!string + ); + } else { + cgi.write( + "" + ~ row["ocn"].as!string + ~ ", " + ); + } + } + cgi.write( previous_next); + } else { // offset_not_beyond_limit = false; + cgi.write("select_query_results empty

\n"); + } + } + sql_search_query; + } + { + db.close; + } + { + string tail = format(q"┃ + +┃"); + cgi.write(tail); + } +} +mixin GenericMain!cgi_function_intro; diff --git a/misc/util/rb/cgi/spine.search.cgi b/misc/util/rb/cgi/spine.search.cgi new file mode 100755 index 0000000..cfe9d73 --- /dev/null +++ b/misc/util/rb/cgi/spine.search.cgi @@ -0,0 +1,958 @@ +#!/usr/bin/env ruby +=begin + * Name: SiSU information Structuring Universe + * Author: Ralph Amissah + * http://www.jus.uio.no/sisu + * http://www.jus.uio.no/sisu/SiSU/download + + * Description: generates naive cgi search form for search of sisu database (sqlite) + * Name: SiSU generated sample cgi search form + + * Description: generated sample cgi search form for SiSU + (SiSU is a framework for document structuring, publishing and search) + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2014, Ralph Amissah, All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see . + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + + + + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + + + + * Ralph Amissah + + + +=end +begin + require 'cgi' + require 'fcgi' + require 'sqlite3' +rescue LoadError + puts 'cgi, fcgi or sqlite3 NOT FOUND (LoadError)' +end +@stub_default = 'search' +@image_src = "http://#{ENV['HTTP_HOST']}/image_sys" +@hosturl_cgi = "http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}" +@hosturl_files = "http://#{ENV['HTTP_HOST']}" +@output_dir_structure_by = 'language' +@lingual = 'multi' +@db_name_prefix = 'spine.' +@base = "http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}#{ENV['SCRIPT_NAME']}" +#Common TOP +@@offset = 0 +@@canned_search_url = @base +@color_heading = '#DDFFAA' +@color_match = '#ffff48' +class Form + def initialize(base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') + search_note = '' if checked_searched !~ /\S/ + the_can = '' if checked_url !~ /\S/ + search_field = '' if checked_echo !~ /\S/ + @base,@search_field,@selected_db,@result_type,@checked_sql_limit,@checked_tip,@checked_stats,@checked_searched,@checked_url,@checked_case,@checked_echo,@checked_sql,@checked_all,@checked_none,@checked_selected,@checked_default,@search_note,@the_can=base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can + @tip = if checked_tip =~ /\S/ + 'text:__; fulltxt:__; keywords:__; title:__; author:__; topic_register:__; subject:__; description:__; publisher:__; editor:__; contributor:__; date:__; type:__; format:__; identifier:__; source:__; language:__; relation:__; coverage:__; rights:__; comment:__; abstract:__; src_filename_base:__;
' + else '' + end + end + def submission_form + search_form =<<-WOK + + + + + <meta charset="utf-8"> + <meta name="sourcefile" content="SiSU._sst" /> + SiSU search form (sample): SiSU information Structuring Universe + + + + + + + + + +
+ + +
+
+ SiSU + +
+ git + +
+
+ +
+

+ + +
+ + + #{@tip} + #{@search_note} + #{@the_can} +
+ + + + + to search: select which database to search (drop-down menu below); enter your search query (in the form above); and click on the search button (below) +
+ + + index + text / grep +
+ match limit: + 1,000 + 2,500 +
+ echo query + result stats + search url + searched + available fields + sql statement +
+ checks: + default + selected + all + none +
+ + +
+ WOK + end +end +class SearchRequest #% search_for + attr_accessor :text1,:fulltext,:keywords,:title,:author,:topic_register,:subject,:description,:publisher,:editor,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:src_filename_base + def initialize(search_field='',q='') + @search_field,@q=search_field,q + @text1=@fulltext=@keywords=@title=@author=@topic_register=@subject=@description=@publisher=@editor=@contributor=@date=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@comment=@abstract=@owner=@date_created=@date_issued=@date_modified=@date_available=@date_valid=@filename='' + if @search_field=~/\S/ + @text1 = text_to_match('text:') + @fulltext = text_to_match('fulltxt:') + @topic_register = text_to_match('topic_register:') + @title = text_to_match('title:') # DublinCore 1 - title + @author = text_to_match('(?:author|creator)s?:') # DublinCore 2 - creator/author + @subject = text_to_match('subj(?:ect)?:') # DublinCore 3 - subject + @description = text_to_match('description:') # DublinCore 4 - description + @publisher = text_to_match('pub(?:lisher)?:') # DublinCore 5 - publisher + @editor = text_to_match('editor:') + @contributor = text_to_match('contributor:') # DublinCore 6 - contributor + @date = text_to_match('date:') # DublinCore 7 - date dd-mm-yy + @type = text_to_match('type:') # DublinCore 8 - type + @format = text_to_match('format:') # DublinCore 9 - format + @identifier = text_to_match('identifier:') # DublinCore 10 - identifier + @source = text_to_match('source:') # DublinCore 11 - source + @language = text_to_match('language:') # DublinCore 12 - language + @relation = text_to_match('relation:') # DublinCore 13 - relation + @coverage = text_to_match('coverage:') # DublinCore 14 - coverage + @rights = text_to_match('rights:') # DublinCore 15 - rights + @keywords = text_to_match('key(?:words?)?:') + @comment = text_to_match('comment:') + @abstract = text_to_match('abs(?:tract)?:') + @owner = text_to_match('owner:') + @date_created = text_to_match('date_created:') + @date_issued = text_to_match('date_issued:') + @date_modified = text_to_match('date_modified:') + @date_available = text_to_match('date_available:') + @date_valid = text_to_match('date_valid:') + @filename = text_to_match('src_filename_base:') + @text1 = text_to_match unless @keywords or @author or @title or @text1 or @fulltext or @comment or @abstract or @rights or @subject or @publisher or @date or @filename or @topic_register + else + @text1 = q['s1'] if q['s1'] =~ /\S/ + @fulltext = q['ft'] if q['ft'] =~ /\S/ + @keywords = q['key'] if q['key'] =~ /\S/ + @title = q['ti'] if q['ti'] =~ /\S/ + @author = q['au'] if q['au'] =~ /\S/ + @topic_register = q['tr'] if q['tr'] =~ /\S/ + @subject = q['sj'] if q['sj'] =~ /\S/ + @description = q['dsc'] if q['dsc'] =~ /\S/ + @publisher = q['pb'] if q['pb'] =~ /\S/ + @editor = q['cntr'] if q['cntr'] =~ /\S/ + @contributor = q['cntr'] if q['cntr'] =~ /\S/ + @date = q['dt'] if q['dt'] =~ /\S/ + @type = q['ty'] if q['ty'] =~ /\S/ + @identifier = q['id'] if q['id'] =~ /\S/ + @source = q['src'] if q['src'] =~ /\S/ + @language = q['lang'] if q['lang'] =~ /\S/ + @relation = q['rel'] if q['rel'] =~ /\S/ + @coverage = q['cov'] if q['cov'] =~ /\S/ + @rights = q['cr'] if q['cr'] =~ /\S/ + @comment = q['co'] if q['co'] =~ /\S/ + @abstract = q['ab'] if q['ab'] =~ /\S/ + @date_created = q['dtc'] if q['dtc'] =~ /\S/ + @date_issued = q['dti'] if q['dti'] =~ /\S/ + @date_modified = q['dtm'] if q['dtm'] =~ /\S/ + @date_available = q['dta'] if q['dta'] =~ /\S/ + @date_valid = q['dtv'] if q['dtv'] =~ /\S/ + @filename = if q['doc'] and q['search'] !~ /search db/ then q['doc'] + elsif q['fns'] =~ /\S/ then q['fns'] + end + @@limit = q['ltd'] if q['ltd'] =~ /\d+/ # 1000 + @@offset = q['off'] if q['off'] =~ /\d+/ # 0 + end + end + def text_to_match(identifier='') + m={ + string: /#{identifier}\s*(.+?)/, + string: /#{identifier}\s*(.+?)(?:;|\n|\r|$)/, + word: /#{identifier}[\s(]*(\S+)/ + } + search_string=if @search_field =~m[:word] + search_string=if @search_field =~m[:braces] then m[:braces].match(@search_field)[1] + elsif @search_field =~m[:string] then m[:string].match(@search_field)[1] + else + str=m[:word].match(@search_field)[1] + str=str.gsub(/[()]/,'') + str + end + search_string=search_string.strip.gsub(/\s+/,'+') + #else + # "__" + end + end +end +class DBI_SearchString + def initialize(l,t,q,cse=false) + @l,@t,@q=l,t,q + end + def string + search={ search: [], flag: false } + if @t =~/\S+/ or @q =~/\S+/ + if @t =~/\S+/ then unescaped_search=CGI.unescape(@t) + elsif @q =~/\S+/ then unescaped_search=CGI.unescape(@q) + end + search_construct=[] + unescaped_search=unescaped_search.gsub(/\s*(AND|OR)\s*/,"%' \) \\1 #{@l} LIKE \( '%"). + gsub(/(.+)/,"#{@l} LIKE \( '%\\1%' \)") + search_construct << unescaped_search + search_construct=search_construct.join(' ') + search[:search] << search_construct + search[:flag]=true + search + end + search + end +end +class DBI_SearchStatement + attr_reader :text_search_flag,:sql_select_body_format,:sql_offset,:sql_limit + def initialize(conn,search_for,q,c) + @conn=conn + @text_search_flag=false + @sql_statement={ body: '', endnotes: '', range: '' } + #@offset||=@@offset + #@offset+=@@limit + search={ text: [], endnotes: [] } + cse=(c =~/\S/) ? true : false + st=DBI_SearchString.new('doc_objects.clean',search_for.text1,q['s1'],cse).string + se=DBI_SearchString.new('endnotes.clean',search_for.text1,q['s1'],cse).string + @text_search_flag=st[:flag] + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.fulltext',search_for.fulltext,q['ft'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.title',search_for.title,q['ti'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.creator_author',search_for.author,q['au'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.classify_topic_register',search_for.topic_register,q['tr'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.classify_subject',search_for.subject,q['sj'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.classify_keywords',search_for.keywords,q['key'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.notes_description',search_for.description,q['dsc'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.publisher',search_for.publisher,q['pb'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.creator_editor',search_for.editor,q['cntr'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.creator_contributor',search_for.contributor,q['cntr'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.date_published',search_for.date,q['dt'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.notes_type',search_for.type,q['ty'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.original_source',search_for.source,q['src'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.language_document_char',search_for.language,q['lang'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.notes_relation',search_for.relation,q['rel'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.notes_coverage',search_for.coverage,q['cov'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.rights_all',search_for.rights,q['cr'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.notes_comment',search_for.comment,q['co'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.notes_abstract',search_for.abstract,q['ab'],cse).string + if st[:flag] + search[:text] << st[:search] + end + st = DBI_SearchString.new('metadata_and_text.src_filename_base',search_for.src_filename_base,q['fns'],cse).string + if st[:flag] + search[:text] << st[:search] + end + @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 + @@offset=q['off'] if q['off']=~/\d+/ # 0 + @search_text='' + @search_text=search[:text].flatten.join(' AND ') + @search_text=@search_text.gsub(/(doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\)\s+(?:(?:AND|OR)\s+doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\))+)/,'(\1)') + end + def sql_offset + @@offset + end + def sql_match_limit + @@limit + end + def sql_canned_search + @offset_next=sql_offset.to_i + sql_match_limit.to_i + @offset_previous=sql_offset.to_i - sql_match_limit.to_i + def current + @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + sql_offset.to_s + end + def next + @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_next.to_s + end + def previous + @offset_previous >= 0 \ + ? (@@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_previous.to_s) + : '' + end + def start + @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + 0.to_s + end + self + end + def pre_next(beyond_limit,img) + can=sql_canned_search + page=(sql_offset.to_i + sql_match_limit.to_i)/sql_match_limit.to_i + if beyond_limit + if page.to_s =~ /^1$/ + %{
+ pg. #{page.to_s} + +  >> + +
} + elsif page.to_s =~ /^2$/ + %{
+ + <<  + + pg. #{page.to_s} + +  >> + +
} + else + %{
+ + |<  + + + <<  + + pg. #{page.to_s} + +  >> + +
} + end + else + if page.to_s =~ /^1$/ then '' + elsif page.to_s =~ /^2$/ + %{
+ + <<  + + pg. #{page.to_s} +
} + else + %{
+ + |<  + + + <<  + + pg. #{page.to_s} +
} + end + end + end + def sql_select_body + limit ||= @@limit + offset ||= @@offset + @sql_statement[:body] = %{ + SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename_base, metadata_and_text.language_document_char, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg_name, doc_objects.ocn, metadata_and_text.uid + FROM doc_objects, metadata_and_text + WHERE #{@search_text} AND doc_objects.uid_metadata_and_text = metadata_and_text.uid + ORDER BY metadata_and_text.language_document_char, metadata_and_text.title, metadata_and_text.src_filename_base, doc_objects.ocn + } + @sql_statement[:range] = %{LIMIT #{limit} OFFSET #{offset} ;} + select = @sql_statement[:body] + ' ' + @sql_statement[:range] + select + end + def sql_select_body_format + %{#{sql_select_body}} + end + def contents + @conn.execute(sql_select_body) + end +end +def tail + <<-'WOK' +


+ + + + + + +
+ + +
+
+ SiSU + +
+ git + +
+
+ +

+ Generated by + SiSU 6.3.1 2014-10-19 (2014w41/7) +
+ + SiSU © Ralph Amissah + 1993, current 2014. + All Rights Reserved. +
+ SiSU is software for document structuring, publishing and search, +
+ + www.jus.uio.no/sisu + + and + + www.sisudoc.org + + sources + + git.sisudoc.org + +
+ w3 since October 3 1993 + + ralph@amissah.com + +
+ mailing list subscription + + http://lists.sisudoc.org/listinfo/sisu + +
+ + sisu@lists.sisudoc.org + +

+
+

+ SiSU using: +
Standard SiSU markup syntax, +
Standard SiSU meta-markup syntax, and the +
Standard SiSU object citation numbering and system, (object/text identifying/locating system) +
+ © Ralph Amissah 1997, current 2014. + All Rights Reserved. +

+
+

+ + .: + +

+
+

+ SiSU is released under + GPL v3 + or later, + + http://www.gnu.org/licenses/gpl.html + +

+
+

+ SiSU, developed using + + Ruby + + on + + Debian/Gnu/Linux + + software infrastructure, + with the usual GPL (or OSS) suspects. +

+
+ + + WOK +end +@tail=tail +@counter_txt_doc,@counter_txt_ocn,@counter_endn_doc,@counter_endn_ocn=0,0,0,0 +@counters_txt,@counters_endn,@sql_select_body='','','' +FCGI.each_cgi do |cgi| + begin # all code goes in begin section + @search={ text: [], endnotes: [] } + q=CGI.new + @db=if cgi['db'] =~ /#{@db_name_prefix}(\S+)/ + @stub=$1 + cgi['db'] + else + @stub=@stub_default + @db_name_prefix + @stub + end + checked_url,checked_stats,checked_searched,checked_tip,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,selected_db='','','','','','','','','' + result_type=(cgi['view']=~/text/) \ + ? result_type={ index: '', text: 'checked'} + : result_type={ index: 'checked', text: ''} + @@limit=if cgi['sql_match_limit'].to_s=~/2500/ + checked_sql_limit={ l1000: '', l2500: 'checked'} + '2500' + else + checked_sql_limit={ l1000: 'checked', l2500: ''} + '1000' + end + checked_echo = 'checked' if cgi['echo'] =~/\S/ + checked_stats = 'checked' if cgi['stats'] =~/\S/ + checked_url = 'checked' if cgi['url'] =~/\S/ or cgi['u'].to_i==1 + checked_searched = 'checked' if cgi['searched'] =~/\S/ + checked_tip = 'checked' if cgi['tip'] =~/\S/ + checked_case = 'checked' if cgi['casesense'] =~/\S/ + checked_sql = 'checked' if cgi['sql'] =~/\S/ + if cgi['checks'] =~ /check_all/ or cgi['check_all'] =~/\S/ or cgi['a'].to_i==1 + checked_all = 'checked' + checked_echo=checked_stats=checked_url=checked_searched=checked_tip=checked_sql='checked' + checked_none ='' + elsif cgi['checks'] =~ /check_none/ + checked_none = 'checked' + checked_all=checked_url=checked_stats=checked_searched=checked_tip=checked_echo=checked_sql='' + elsif cgi['checks'] =~ /check_selected/ + checked_selected = 'checked' + elsif cgi['checks'] =~ /check_default/ + checked_default = 'checked' + checked_echo=checked_stats=checked_url='checked' + checked_searched=checked_tip=checked_case=checked_sql='' + else + checked_selected='checked' + checked_echo=checked_stats=checked_url='checked' + checked_searched=checked_tip=checked_case=checked_sql='' + end + selected_db=case cgi['db'] + when /spine.sqlite/ then '' + end + db_name='spine.search.sql.db' + #db_name='spine.sqlite.db' + #db_name='sisu_sqlite.db' + db_sqlite=case cgi['db'] + when /spine.sqlite/ then "/srv/complete.sisudoc.org/web/manual/#{db_name}" + else "/var/www/sqlite/#{db_name}" + end + #when /spine.sqlite/ then "/srv/complete.sisudoc.org/web/manual/#{db_name}" + #else "/srv/complete.sisudoc.org/web/manual/#{db_name}" + #end + #@conn=SQLite3::Database.new(db_sqlite) + @conn=SQLite3::Database.new("/var/www/sqlite/spine.search.sql.db") + #@conn=SQLite3::Database.new("/var/www/spine.sqlite.db") + @conn.results_as_hash=true + search_field=cgi['find'] if cgi['find'] # =~/\S+/ + @search_for=SearchRequest.new(search_field,q) #.analyze #% search_for + #% searches + #Canned_search.new(@base,@search_for.text1,cgi) + if @search_for.text1=~/\S+/ or @search_for.fulltext=~/\S+/ or @search_for.author=~/\S+/ or @search_for.topic_register=~/\S+/ #and search_field =~/\S/ + s1 = 's1=' + CGI.escape(@search_for.text1) if @search_for.text1 =~ /\S/ + ft = '&ft=' + CGI.escape(@search_for.fulltext) if @search_for.fulltext =~ /\S/ + key = 'key=' + CGI.escape(@search_for.keywords) if @search_for.keywords =~ /\S/ + ti = '&ti=' + CGI.escape(@search_for.title) if @search_for.title =~ /\S/ + au = '&au=' + CGI.escape(@search_for.author) if @search_for.author =~ /\S/ + tr = '&tr=' + CGI.escape(@search_for.topic_register) if @search_for.topic_register =~ /\S/ + sj = '&sj=' + CGI.escape(@search_for.subject) if @search_for.subject =~ /\S/ + dsc = '&dsc=' + CGI.escape(@search_for.description) if @search_for.description =~ /\S/ + pb = '&pb=' + CGI.escape(@search_for.publisher) if @search_for.publisher =~ /\S/ + edt = '&edt=' + CGI.escape(@search_for.editor) if @search_for.editor =~ /\S/ + cntr = '&cntr=' + CGI.escape(@search_for.contributor) if @search_for.contributor =~ /\S/ + dt = '&dt=' + CGI.escape(@search_for.date) if @search_for.date =~ /\S/ + ty = '&ty=' + CGI.escape(@search_for.type) if @search_for.type =~ /\S/ + id = '&id=' + CGI.escape(@search_for.identifier) if @search_for.identifier =~ /\S/ + src = '&src=' + CGI.escape(@search_for.source) if @search_for.source =~ /\S/ + lang = '&lang=' + CGI.escape(@search_for.language) if @search_for.language =~ /\S/ + rel = '&rel=' + CGI.escape(@search_for.relation) if @search_for.relation =~ /\S/ + cov = '&cov=' + CGI.escape(@search_for.coverage) if @search_for.coverage =~ /\S/ + cr = '&cr=' + CGI.escape(@search_for.rights) if @search_for.rights =~ /\S/ + co = '&co=' + CGI.escape(@search_for.comment) if @search_for.comment =~ /\S/ + ab = '&ab=' + CGI.escape(@search_for.abstract) if @search_for.abstract =~ /\S/ + dtc = '&dtc=' + CGI.escape(@search_for.date_created) if @search_for.date_created =~ /\S/ + dti = '&dti=' + CGI.escape(@search_for.date_issued) if @search_for.date_issued =~ /\S/ + dtm = '&dtm=' + CGI.escape(@search_for.date_modified) if @search_for.date_modified =~ /\S/ + dta = '&dta=' + CGI.escape(@search_for.date_available) if @search_for.date_available =~ /\S/ + dtv = '&dtv=' + CGI.escape(@search_for.date_valid) if @search_for.date_valid =~ /\S/ + fns = '&fns=' + CGI.escape(@search_for.src_filename_base) if @search_for.src_filename_base =~ /\S/ + @@canned_search_url=(checked_all =~/checked/) \ + ? "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" + : "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" + mod=ft=~/\S+/ ? (ft.gsub(/ft/,'s1')) : s1 + @canned_base_url="#{@base}?#{mod}&db=#{cgi['db']}" + if checked_case=~/\S/ + @search[:text][1]=%{doc_objects.clean~'#{@search_for.text1}'} #s1 + else + @search[:text][1]=%{doc_objects.clean~*'#{@search_for.text1}'} #s1 + end + canned_note='search url:' + else + @@canned_search_url="#{@base}?db=#{@db}&view=index" + canned_note='search url example:' + end + if search_field =~/\S+/ + analyze_format=search_field.gsub(/\s*\n/,'; ') + elsif checked_all =~/checked/ or checked_url =~/checked/ + canned_search=@@canned_search_url.scan(/(?:s1|ft|au|ti|fns|tr)=[^&]+/) + af=canned_search.join('; ') + af=af.gsub(/s1=/,'text: '). + gsub(/ft=/,'fulltxt: '). + gsub(/au=/,'author: '). + gsub(/ti=/,'title: '). + gsub(/fns=/,'src_filename_base: '). + gsub(/tr=/,'topic_register: '). + gsub(/%2B/,' ') + analyze_format=af + st=af.split(/\s*;\s*/) + search_field=st.join("\n") + end + green=%{} + canned_search_url_txt=CGI.escapeHTML(@@canned_search_url) + the_can=%{#{canned_note} #{canned_search_url_txt}
} + p_text=p_fulltext=p_keywords=p_title=p_author=p_topic_register=p_subject=p_description=p_publisher=p_editor=p_contributor=p_date=p_type=p_format=p_identifier=p_source=p_language=p_relation=p_coverage=p_rights=p_comment=p_abstract=p_filename='' + p_filename = %{src_filename_base: #{green}#{@search_for.src_filename_base}

} if @search_for.src_filename_base =~ /\S+/ + p_text = %{text: #{green}#{@search_for.text1}
} if @search_for.text1 =~ /\S+/ + p_fulltext = %{fulltxt: #{green}#{@search_for.fulltext}
} if @search_for.fulltext =~ /\S+/ + p_title = %{title: #{green}#{@search_for.title}
} if @search_for.title =~ /\S+/ + p_author = %{author: #{green}#{@search_for.author}
} if @search_for.author =~ /\S+/ + p_editor = %{editor: #{green}#{@search_for.editor}
} if @search_for.editor =~ /\S+/ + p_contributor = %{contributor: #{green}#{@search_for.contributor}
} if @search_for.contributor =~ /\S+/ + p_date = %{date: #{green}#{@search_for.date}
} if @search_for.date =~ /\S+/ + p_rights = %{rights: #{green}#{@search_for.rights}
} if @search_for.rights =~ /\S+/ + p_topic_register = %{topic_register: #{green}#{@search_for.topic_register}
} if @search_for.topic_register =~ /\S+/ + p_subject = %{subject: #{green}#{@search_for.subject}
} if @search_for.subject =~ /\S+/ + p_keywords = %{keywords: #{green}#{@search_for.keywords}
} if @search_for.keywords =~ /\S+/ + p_identifier = %{identifier: #{green}#{@search_for.identifier}
} if @search_for.identifier =~ /\S+/ + p_type = %{type: #{green}#{@search_for.type}
} if @search_for.type =~ /\S+/ + p_format = %{format: #{green}#{@search_for.format}
} if @search_for.format =~ /\S+/ + p_relation = %{relation: #{green}#{@search_for.relation}
} if @search_for.relation =~ /\S+/ + p_coverage = %{coverage: #{green}#{@search_for.coverage}
} if @search_for.coverage =~ /\S+/ + p_description = %{description: #{green}#{@search_for.description}
} if @search_for.description =~ /\S+/ + p_abstract = %{abstract: #{green}#{@search_for.abstract}
} if @search_for.abstract =~ /\S+/ + p_comment = %{comment: #{green}#{@search_for.comment}
} if @search_for.comment =~ /\S+/ + p_publisher = %{publisher: #{green}#{@search_for.publisher}
} if @search_for.publisher =~ /\S+/ + p_source = %{source: #{green}#{@search_for.source}
} if @search_for.source =~ /\S+/ + p_language = %{language: #{green}#{@search_for.language}
} if @search_for.language =~ /\S+/ + search_note=<<-WOK + + database: #{green}#{@db}; selected view: #{green}#{cgi['view']} + search string: "#{green}#{analyze_format}"
+ #{p_text} #{p_fulltext} #{p_keywords} #{p_title} #{p_author} #{p_topic_register} #{p_subject} #{p_description} #{p_publisher} #{p_editor} #{p_contributor} #{p_date} #{p_type} #{p_format} #{p_identifier} #{p_source} #{p_language} #{p_relation} #{p_coverage} #{p_rights} #{p_comment} #{p_abstract} #{p_filename} + + WOK + #eg = %{canned search e.g.:
#{url}
find: #{analyze}
database: #{database}} + #% dbi_canning + @header = Form.new(@base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form + unless q['s1'] =~/\S/ or q['au'] =~/\S/ or @search[:text][1] =~/\S/ + print "Content-type: text/html\n\n" + puts (@header+@tail) + else #% searches + s1=(@search_for.text1 =~/\S/) \ + ? @search_for.text1 + : 'Unavailable' + if checked_case=~/\S/ + @search[:text]<<%{doc_objects.clean~'#{CGI.unescape(s1)}'} + else + @search[:text]<<%{doc_objects.clean~*'#{CGI.unescape(s1)}'} + end + #% dbi_request + dbi_statement=DBI_SearchStatement.new(@conn,@search_for,q,checked_case) + @text_search_flag=false + @text_search_flag=dbi_statement.text_search_flag + s_contents=dbi_statement.contents + @body_main='' + @search_regx=nil + olduid="" + if @text_search_flag + if checked_sql =~/\S/ + sql_select_body=dbi_statement.sql_select_body_format + else sql_select_body='' + end + @body_main << sql_select_body + #@body_main << '



Main Text:
' << sql_select_body + else + end + @hostpath = "#{@hosturl_files}" + #@hostpath="#{@hosturl_files}/#{@stub}" + def path_manifest(fn,ln=nil) + case @output_dir_structure_by + when 'filename' + @lingual =='mono' \ + ? "#{@hostpath}/#{fn}/sisu_manifest.html" + : "#{@hostpath}/#{fn}/sisu_manifest.#{ln}.html" + when 'filetype' + @lingual =='mono' \ + ? "#{@hostpath}/manifest/#{fn}.html" + : "#{@hostpath}/manifest/#{fn}.#{ln}.html" + else + "#{@hostpath}/#{ln}/manifest/#{fn}.html" + end + end + def path_html_seg(fn,ln=nil) + case @output_dir_structure_by + when 'filename' + "#{@hostpath}/#{fn}" + when 'filetype' + "#{@hostpath}/html/#{fn}" + else + "#{@hostpath}/#{ln}/html/#{fn}" + end + end + def path_toc(fn,ln=nil) + if @output_dir_structure_by =='filename' \ + or @output_dir_structure_by =='filetype' + @lingual =='mono' \ + ? "#{path_html_seg(fn,ln)}/toc.html" + : "#{path_html_seg(fn,ln)}/toc.#{ln}.html" + else + "#{path_html_seg(fn,ln)}/toc.html" + end + end + def path_filename(fn,seg_name,ln=nil) + if @output_dir_structure_by =='filename' \ + or @output_dir_structure_by =='filetype' + @lingual =='mono' \ + ? "#{path_html_seg(fn,ln)}/#{seg_name}.html" + : "#{path_html_seg(fn,ln)}/#{seg_name}.#{ln}.html" + else + "#{path_html_seg(fn,ln)}/#{seg_name}.html" + end + end + def path_html_doc(fn,ln=nil) + case @output_dir_structure_by + when 'filename' + @lingual =='mono' \ + ? "#{path_html_seg(fn,ln)}/scroll.html" + : "#{path_html_seg(fn,ln)}/scroll.#{ln}.html" + when 'filetype' + @lingual =='mono' \ + ? "#{@hostpath}/html/#{fn}.html" + : "#{@hostpath}/html/#{fn}.#{ln}.html" + else + "#{@hostpath}/#{ln}/html/#{fn}.html" + end + end + #% text_objects_body + s_contents.each do |c| #% text body + location=c['src_filename_base'][/(.+?)\.(?:ssm\.sst|sst)$/,1] + file_suffix=c['src_filename_base'][/.+?\.(ssm\.sst|sst)$/,1] + lang=if location =~ /\S+?~(\S\S\S?)$/ + l=location[/\S+?~(\S\S\S?)$/,1] + location=location.gsub(/(\S+?)~\S\S\S?/,'\1') + l=".#{l}" + else '' + end + #% metadata_found_body + if c['uid'] != olduid + ti=c['title'] + can_txt_srch=(cgi['view']=~/index/) \ + ? %{search } + : %{search } + title = %{ #{ti} [#{c['language_document_char']}] by #{c['creator_author']} #{can_txt_srch}
} + title=@text_search_flag \ + ? '

'+title + : '
'+title + @counter_txt_doc+=1 + olduid=c['uid'] + else title='' + end + if @text_search_flag + if cgi['view']=~/text/ \ + or (cgi['view']!~/index/ and cgi['search'] !~/search db/) #% txt body + text=if c['suffix'] !~/1/ #seg + if @search_for.text1 =~/\S+/ \ + or q['s1'] =~/\S+/ #% only this branch is working !! + unescaped_search=if @search_for.text1 =~/\S+/ + CGI.unescape(@search_for.text1) + elsif q['s1'] =~/\S+/ + CGI.unescape(q['s1']) + else nil + end + @search_regx=if unescaped_search #check + search_regex=[] + build=unescaped_search.scan(/\S+/).each do |g| + (g.to_s =~/(AND|OR)/) \ + ? (search_regex << '|') + : (search_regex << %{#{g.to_s}}) + end + search_regex=search_regex.join(' ') + search_regex=search_regex.gsub(/\s*\|\s*/,'|') + Regexp.new(search_regex, Regexp::IGNORECASE) + else nil + end + else nil + end + matched_para=(@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) \ + ? (c['body'].gsub(/(<\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{\\1})) + : c['body'] + %{

ocn #{c['ocn']}:

#{matched_para}} + elsif c['suffix'] =~/1/ #doc + %{#{title}

ocn #{c['ocn']}:#{c['body']}} + end + @counter_txt_ocn+=1 + output=title+text + else #elsif cgi['view']=~/index/ #% idx body + if c['suffix'] !~/1/ #seg + index=%{#{c['ocn']}, } if @text_search_flag + elsif c['suffix'] =~/1/ #doc #FIX + index=%{#{c['ocn']}, } + end + if c['seg_name'] =~/\S+/ + if @text_search_flag + @counter_txt_ocn+=1 + output=title+index + end + else + @counter_txt_ocn+=1 + output=c['suffix'] !~/1/ \ + ? title+index + : %{#{title}#{c['ocn'].sort}, } + end + end + else output=title + end + @counters_txt=if @counter_txt_doc > 0 + if checked_stats =~/\S/ + @@lt_t=(@counter_txt_ocn==dbi_statement.sql_match_limit.to_i) ? true : false + start=(@@offset.to_i+1).to_s + range=(@@offset.to_i+@counter_txt_ocn.to_i).to_s + %{


Found #{@counter_txt_ocn} times in the main body of #{@counter_txt_doc} documents [ matches #{start} to #{range} ]
} + else '' + end + else '' + end + @body_main << output #+ details + end + olduid = "" + offset=dbi_statement.sql_offset.to_s + limit=dbi_statement.sql_match_limit.to_s + @@lt_t ||=false; @@lt_e ||=false + canned=(@@lt_t or @@lt_e) \ + ? dbi_statement.pre_next(true,@image_src).to_s + : dbi_statement.pre_next(false,@image_src).to_s + limit=dbi_statement.sql_match_limit.to_s + cgi.out{ + @header.force_encoding("UTF-8") \ + + @counters_txt.force_encoding("UTF-8") \ + + @counters_endn.force_encoding("UTF-8") \ + + canned.force_encoding("UTF-8") \ + + @body_main.force_encoding("UTF-8") \ + + canned.force_encoding("UTF-8") \ + + @tail.force_encoding("UTF-8") + } #% print cgi_output_header+counters+body + end + rescue Exception => e + s='
' + CGI::escapeHTML(e.backtrace.reverse.join("\n"))
+    s << CGI::escapeHTML(e.message) + '
' + cgi.out{s} + next + ensure # eg. disconnect from server + @conn.disconnect if @conn + end +end diff --git a/misc/util/rb/tex/dr_tex.rb b/misc/util/rb/tex/dr_tex.rb new file mode 100755 index 0000000..767742c --- /dev/null +++ b/misc/util/rb/tex/dr_tex.rb @@ -0,0 +1,70 @@ +#!/usr/bin/env ruby +require 'fileutils' +pwd = Dir.pwd +argv,texfiles_with_path,flags=[],[],[] +lngs = %{(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)} +Regexp.new(lngs, Regexp::IGNORECASE) +argv=$* +argv.sort.each{|y| (y =~/^--\S+$/i) ? (flags << y) : (texfiles_with_path << y) } +if flags.length==0 \ +|| flags.inspect =~/"--help"/ + cmd=(/([^\/]+)$/).match($0)[1] + puts < 0 + texfiles_with_path.each do |texfile_with_path| + if texfile_with_path =~/.+\.tex/ + #puts texfile_with_path + if FileTest.file?(texfile_with_path) + file_basename_with_path = texfile_with_path.sub(/\.tex$/,'') + file_basename = file_basename_with_path.sub(/.*?([^\/]+)$/,'\1') + _out_path = out_path + if file_basename =~ /\.#{lngs}$/ + lng = file_basename.match(/\.#{lngs}$/)[1] + puts file_basename + puts lng + puts _out_path + unless _out_path.match(/\/#{lng}\/pdf$/) + _out_path = "#{out_path}/#{lng}/pdf" + FileUtils::mkdir_p(_out_path) + end + end + texpdf_cmd = %{xetex -interaction=batchmode -fmt=xelatex #{texfile_with_path}\n} + puts texpdf_cmd + 2.times { |i| system(texpdf_cmd) } + if (FileTest.file?(%{#{pwd}/#{file_basename}.pdf})) && (FileTest.directory?(_out_path)) + FileUtils::Verbose::mv(%{#{pwd}/#{file_basename}.pdf}, %{#{_out_path}/#{file_basename}.pdf}) + puts (%{#{_out_path}/#{file_basename}.pdf}) + else + puts "issue with pdf file or output directory" + puts "pdf file: #{pwd}/#{file_basename}.pdf}" + puts "output dir: #{_out_path}/" + end + suffix = ['log', 'out', 'toc', 'aux'] + suffix.each { |s| FileUtils::rm_f(%{#{pwd}/#{file_basename}.#{s}})} + end + end + end +end +Dir.chdir(pwd) +__END__ diff --git a/org/COPYRIGHT b/org/COPYRIGHT index 2217fc4..2705e3c 100644 --- a/org/COPYRIGHT +++ b/org/COPYRIGHT @@ -1,4 +1,4 @@ -- Name: Spine, Doc Reform +- Name: spine - SiSU Spine, Doc Reform - Description: documents, structuring, processing, publishing, search - static content generator @@ -8,19 +8,15 @@ - Copyright: (C) 2015 - 2020 Ralph Amissah - Spine, Doc Reform (related to SiSU) uses standard: - - docReform markup syntax + - docReform markup syntax (based on SiSU markup) - standard SiSU markup syntax with modified headers and minor modifications - - docReform object numbering - - standard SiSU object citation numbering & system + - docReform object numbering (based on SiSU object citation numbering) + - standard SiSU document object numbering - Hompages: - [http://www.doc_reform.org] [http://www.sisudoc.org] - - Git - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary] - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/html.rb;hb=HEAD] - Spine, Doc Reform (SiSU) markup samples Individual document content Copyright (Author) [as stated in document header] diff --git a/org/out_cgi_search_sqlite.org b/org/out_cgi_search_sqlite.org index 58375bf..3145870 100644 --- a/org/out_cgi_search_sqlite.org +++ b/org/out_cgi_search_sqlite.org @@ -1841,7 +1841,7 @@ configuration "default" { * cgi-search README -#+BEGIN_SRC text :NO-tangle "../util/d/cgi/search/README" +#+BEGIN_SRC text :NO-tangle "../misc/util/d/cgi/search/README" change db name to match name of db you create cv.db_selected = "spine.search.sql.db"; diff --git a/org/out_latex.org b/org/out_latex.org index 66f086c..0955687 100644 --- a/org/out_latex.org +++ b/org/out_latex.org @@ -1884,7 +1884,7 @@ string latex_tail(M)( * latex system command helper script ** latex command, ruby script -#+BEGIN_SRC ruby :tangle "../util/rb/tex/dr_tex.rb" :tangle-mode (identity #o755) :shebang #!/usr/bin/env ruby +#+BEGIN_SRC ruby :tangle "../misc/util/rb/tex/dr_tex.rb" :tangle-mode (identity #o755) :shebang #!/usr/bin/env ruby require 'fileutils' pwd = Dir.pwd argv,texfiles_with_path,flags=[],[],[] diff --git a/org/spine_build_scaffold.org b/org/spine_build_scaffold.org index ac1fc98..94d5cf9 100644 --- a/org/spine_build_scaffold.org +++ b/org/spine_build_scaffold.org @@ -1132,9 +1132,17 @@ spine_exe = executable('spine', !*.d !*.rb !conf.sdl +!doc +!doc/** +!man +!man/** !org +!misc +!misc/** !util !util/** +!editor-syntax-etc +!editor-syntax-etc/** !ext_lib !ext_lib/** !src diff --git a/org/spine_doc.org b/org/spine_doc.org new file mode 100644 index 0000000..49e7313 --- /dev/null +++ b/org/spine_doc.org @@ -0,0 +1,4548 @@ +-*- mode: org -*- +#+TITLE: spine (doc_reform) hub +#+DESCRIPTION: documents - structuring, various output representations & search +#+FILETAGS: :spine:hub: +#+AUTHOR: Ralph Amissah +#+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] +#+COPYRIGHT: Copyright (C) 2015 - 2020 Ralph Amissah +#+LANGUAGE: en +#+STARTUP: content hideblocks hidestars noindent entitiespretty +#+PROPERTY: header-args :exports code +#+PROPERTY: header-args+ :noweb yes +#+PROPERTY: header-args+ :eval no +#+PROPERTY: header-args+ :results no +#+PROPERTY: header-args+ :cache no +#+PROPERTY: header-args+ :padline no + +* README +** tangle + +#+BEGIN_SRC text :tangle "../README" +<> +<> +<> +<> +<> +<> +#+END_SRC + +** project name + +#+NAME: sisu_spine_readme_info +#+BEGIN_SRC text +project_name: Spine, Doc Reform + description: [ + "documents, structuring, processing, publishing", + search, + object numbering, + static content generator, + sisu markup + ] + + author: + name: Ralph Amissah + email: ralph.amissah@gmail.com + + copyright: "(C) 2015 - 2020 Ralph Amissah, All Rights Reserved." + + license: "AGPL 3 or later" + + hompage: [ + "http://www.doc_reform.org", + "http://www.sisudoc.org" + ] +#+END_SRC + +** short description + +#+NAME: sisu_spine_readme_description +#+BEGIN_SRC text +#+END_SRC + +** installation + +#+NAME: sisu_spine_readme_install +#+BEGIN_SRC text +# Installation, Compilation + +SiSU spine is written in the programming language D for which there are 3 compilers: + +- dmd +- ldc +- gdc + +D projects tend to use dub as project manager +https://code.dlang.org/packages/dub +https://code.dlang.org/packages/dub +https://github.com/dlang/dub/blob/master/source/dub/commandline.d + + dub --compiler=ldc2 -color --config=ldc -b release + + dub --compiler=dmd -color --config=dmd + + dub --compiler=gdc-10 -color --config=gdc -b release + + make ldc + + make dmd + +there has been some coalescence around the Meson build system +https://mesonbuild.com/ + + meson + + ninja -C build + + meson setup --wipe build && ninja -v -C build + + make meson + +dub --force --compiler=ldc2 && sudo cp -v cgi-bin/spine-search /usr/lib/cgi-bin/. + +#+END_SRC + +** configuration + +#+NAME: sisu_spine_readme_configuration +#+BEGIN_SRC text +# Configuration + +Configuration files are yaml files + +The following paths are searched: + + ~/.dr/config_local_site + ~/path_to_pod_root/.dr/config_local_site + +e.g. processing + + ~spineMarkupSamples/pod/* + +will search: + + ~spineMarkupSamples/pod/.dr/config_local_site + + ~/.dr/config_local_site + +to specify an alternative configuration file to use on the command line (in this +example named "my_config"): + + spine -v --html --config=~spineMarkupSamples/pod/.dr/my_config + +here is a sample configuration file: + +flag: + act0: "--html" + act1: "--html --epub" +output: + path: "/var/www/html" +default: + language: "en" + papersize: "a4" + text_wrap: "80" + digest: "sha256" +webserv: + http: "http" + domain: "localhost" + data_http: "http" + data_domain: "localhost" + data_root_url: "http://localhost" + data_root_path: "/var/www/html" + data_root_part: "" + images_root_part: "image" + cgi_title: "≅ SiSU Spine search" + cgi_http: "http" + cgi_domain: "localhost" + cgi_bin_url: "http://localhost/cgi-bin" + cgi_bin_part: "cgi-bin" + cgi_bin_path: "/usr/lib/cgi-bin" + cgi_search_script: "spine-search" + cgi_search_script_raw_fn_d: "spine_search.d" + cgi_port: "" + cgi_user: "" + cgi_action: "http://localhost/cgi-bin/spine-search" + db_sqlite: "spine.search.db" + db_pg_table: "" + db_pg_user: "" + +#+END_SRC + +** commands help + +#+NAME: sisu_spine_readme_commands +#+BEGIN_SRC text +# Commands + +for a list of commands from the program type: + + spine -h + +at the time of writing this provides the following output: + + --abstraction document abstraction + --assert set optional assertions on + --cgi-search-form-codegen generates (pre-compiled) d code for search of specified db + --cgi-sqlite-search-filename =[filename] + --concordance file for document + --config =/path/to/config/file/including/filename + --dark alternative dark theme + --debug debug + --digest hash digest for each object + --epub process epub output + --harvest extract info on authors & topics from document header metadata + --harvest-authors extract info on authors from document header metadata + --harvest-topics extract info on topics from document header metadata + --hide-ocn object cite numbers + --html process html output + --html-link-harvest place links back to harvest in segmented html + --html-link-search html embedded search submission + --html-seg process html output + --html-scroll process html output + --lang =[lang code e.g. =en or =en,es] + --latex output for pdfs + --latex-color-links mono or color links for pdfs + --light default light theme + --manifest process manifest output + --ocn-off object cite numbers + --odf open document format text (--odt) + --odt open document format text + --output =/path/to/output/dir specify where to place output + --parallel parallelisation + --parallel-subprocesses nested parallelisation + --pdf latex output for pdfs + --pdf-color-links mono or color links for pdfs + --pod spine (doc reform) pod source content bundled +-q --quiet output to terminal + --section-backmatter document backmatter (default) + --section-biblio document biblio (default) + --section-blurb document blurb (default) + --section-body document body (default) + --section-bookindex document bookindex (default) + --section-endnotes document endnotes (default) + --section-glossary document glossary (default) + --section-toc table of contents (default) + --serial serial processing + --skip-output skip output + --show-config show config + --show-make show make + --show-metadata show metadata + --show-summary show summary + --source document markup source + --sqlite-discrete process discrete sqlite output + --sqlite-db-create create db, create tables + --sqlite-db-drop drop tables & db + --sqlite-db-recreate create db, create tables + --sqlite-delete sqlite output + --sqlite-db-filename =[filename].sql.db + --sqlite-insert sqlite output + --sqlite-update sqlite output + --text text output + --theme-dark alternative dark theme + --theme-light default light theme + --txt text output +-v --verbose output to terminal + --very-verbose output to terminal + --workon (reserved for some matters under development & testing) + --xhtml xhtml output +-h --help This help information. + +#+END_SRC + +** command examples + +#+NAME: sisu_spine_readme_examples +#+BEGIN_SRC text +# Examples + +if configuartion has been set specify just +- the desired output and +- the markup document/pod(s) to process + + spine -v --html ~spineMarkupSamples/markup/pod/sisu-manual + +if configuartion has not been set or to overide the set configration specify +- the output path as well as +- the desired output and +- the markup document/pod(s) to process + +note: ~webDocRoot should be the path to web doc root, provide a suitable output path. + + spine -v --html --html-link-search --html-link-harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --html-link-harvest --epub --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --epub --latex --odt --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + +## harvest + +if you have a document collection with documents that have metadata headers a +summary of the collection can be made using the harvest command + + spine -v --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --harvest ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --html-link-harvest --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --html-link-harvest --harvest ~spineMarkupSamples/pod/* + +## sqlite + +### create db + +if there is no sqlite db you first need to create one, to do so +- the name of the db and +- the root path for document output +must be specified: + + spine -v \ + --sqlite-db-create --sqlite-db-filename="spine.search.db" \ + --output=/var/www/html \ + ~spineMarkupSamples/pod/* + + spine -v --sqlite-db-create --sqlite-db-filename="spine.search.db" --output=`echo ~webDocRoot` + +if you have a configration file providing this information that is to be used +for a document collection you can point to the document collection: + + spine -v --sqlite-db-create ~spineMarkupSamples/pod + +### populate db + +must specify: +- the name of the db and +- the root path for document output + + spine -v --sqlite-update \ + --sqlite-db-filename="spine.search.db" \ + --output=/var/www/html \ + ~spineMarkupSamples/pod/* + + spine -v --sqlite-update --sqlite-db-filename="spine.search.db" --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + +if you have a configration file providing this information that is to be used +for a document collection you can point to the document collection: + + spine -v --sqlite-update ~spineMarkupSamples/pod/* + +### generate a cgi search form in d + + spine -v --cgi-search-form-codegen \ + --output=/var/www/html \ + ~spineMarkupSamples/pod + + spine -v --cgi-search-form-codegen --config=~spineMarkupSamples/pod + + spine -v --cgi-search-form-codegen --config=~spineMarkupSamples/pod/.dr/config_local_site + + spine --cgi-search-form-codegen --output=`echo ~webDocRoot` ~spineMarkupSamples/pod + + spine --cgi-search-form-codegen --cgi-sqlite-search-filename="spine_search" --output=`echo ~webDocRoot` + + spine -v --cgi-search-form-codegen \ + --sqlite-db-filename="spine.search.db" \ + --cgi-sqlite-search-filename="spine-search" \ + --output=/var/www/html \ + ~spineMarkupSamples/pod + +#### compile the cgi search form + + cd /var/www/html/cgi # /var/www/html (default document root) + + cd ~webDocRoot/cgi + +the directory ~webDocRoot/cgi/src should contain two files +- spine_search.d (or whatever you named it) +- cgi.d (by Adam Rupee) + + dub --force --compiler=ldc2 && sudo cp -v cgi-bin/spine-search /usr/lib/cgi-bin/. + +should compile spine-search in ~webDocRoot/cgi/cgi-bin and copy it to the +cgi-bin directory + + spine -v --sqlite-db-create --sqlite-db-filename="spine.search.db" --cgi-sqlite-search-filename="spine-search" --output=`echo ~webDocRoot` + + spine -v --sqlite-db-create ~spineMarkupSamples/pod + + spine -v --html --html-link-search --cgi-sqlite-search-filename="spine-search" --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --cgi-sqlite-search-filename="spine-search" --html-link-harvest --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + +### create db & search form + + spine -v \ + --sqlite-db-create --sqlite-db-filename="spine.search.db" \ + --cgi-search-form-codegen --cgi-sqlite-search-filename="spine-search" \ + --output=/var/www/html \ + ~spineMarkupSamples/pod/* + +### html with links to search form + + spine -v --html \ + --html-link-search \ + --output=`echo ~webDocRoot` \ + ~spineMarkupSamples/pod/* + +#+END_SRC + +* manpage +** tangle + +#+BEGIN_SRC man :tangle "../doc/man/man1/spine.1" +<> +<> +<> +<> +<> +<> +<> +<> +<> +#+END_SRC + +** manpage +*** head + +#+NAME: sisu_spine_manpage_head +#+BEGIN_SRC man +.TH "spine" "1" "2020-04-05" "0.10.0" "Spine" +.br +.SH NAME +.br +sisu - documents: markup, structuring, publishing in multiple standard formats, and search +.br +.SH SYNOPSIS +.br +sisu [--options] [filename/wildcard] + +.br +sisu --txt --html --epub --odt --pdf --wordmap --sqlite --manpage --texinfo --sisupod --source --qrcode [filename/wildcard] + +.br +sisu --pg (--createdb|update [filename/wildcard]|--dropall) + +#+END_SRC + +*** description + +#+NAME: sisu_spine_manpage_description +#+BEGIN_SRC man +.SH SISU - MANUAL, +RALPH AMISSAH + +.SH WHAT IS SISU? + +.SH INTRODUCTION - WHAT IS SISU? + +.BR + +.B SiSU +is a lightweight markup based document creation and publishing framework that +is controlled from the command line. Prepare documents for +.B SiSU +using your text editor of choice, then use +.B SiSU +to generate various output document formats. + +.BR +From a single lightly prepared document (plain-text +.I UTF-8 +) sisu custom builds several standard output formats which share a common (text +object) numbering system for citation of content within a document (that also +has implications for search). The sisu engine works with an abstraction of the +document's structure and content from which it is possible to generate +different forms of representation of the document. +.B SiSU +produces: plain-text, +.I HTML, +.I XHTML, +.I XML, +.I EPUB, +.I ODF: +.I ODT +(Opendocument), +.I LaTeX, +.I PDF, +and populates an +.I SQL +database ( +.I PostgreSQL +or +.I SQLite +) with text objects, roughly, paragraph sized chunks so that document searches +are done at this level of granularity. + +.BR +Outputs share a common citation numbering system, associated with text objects +and any semantic meta-data provided about the document. + +.BR + +.B SiSU +also provides concordance files, document content certificates and manifests of +generated output. Book indexes may be made. + +.BR +Some document markup samples are provided in the package sisu -markup-samples. +Homepages: + +- + +- + +.SH COMMANDS SUMMARY + +.SH DESCRIPTION + +.BR + +.B SiSU +is a document publishing system, that from a simple single marked-up document, +produces multiple output formats including: +.I plaintext, +.I HTML, +.I XHTML, +.I XML, +.I EPUB, +.I ODT +( +.I OpenDocument +( +.I ODF +) text), +.I LaTeX, +.I PDF, +info, and +.I SQL +( +.I PostgreSQL +and +.I SQLite +) , which share text object numbers ("object citation numbering") and the same +document structure information. For more see: or + +#+END_SRC + +** flags +*** general + +#+NAME: sisu_spine_manpage_flags +#+BEGIN_SRC man +.SH DOCUMENT PROCESSING COMMAND FLAGS + +.TP +.B --abstraction [path + filename] +run document abstraction +.TP +.B --act[s0-9] [path + filename] +--act0 to --act9 configurable shortcuts for multiple flags, -0 to -9 synonyms, +configure in sisurc.yml; sisu default action on a specified file where no flag +is provided is --act0; --act or --acts for information on current actions +ascribed to --act0 to --act9 +.TP +.B --asciidoc [path + filename] +asciidoc, smart text (not available) +.TP +.B --cgi-search-form-codegen + generate d code search form to search db specfied needs --output=[path] and +--sqlite-db-filename=[cgi search form name] or path to configuration file +--config=[full path to config file] +.TP +.B --cgi-sqlite-search-filename=[filename] +name to give cgi-search form, (it generates a [filename].d file that requires +subsequent compilation) also required is the name of the sqlite db to be +searched by the form. +.TP +.B --concordance [path + filename] +(not implemented) +.TP +.B --config=[path to config file + filename] +.TP +.B --dark + alternative theme for html and epub output, a light (default) theme is + also provided +.TP +.B --digest (not implemented) +.TP +.B --delete [path + filename] +see --zap +.TP +.B --digests [path + filename] +not implemented +.TP +.B --epub [path + filename] +produces an epub document +.TP +.B --harvest [path to files] +extract and present info on authors & topics from document header metadata. +makes two lists of sisu output based on the sisu markup documents in a +directory: list of author and authors works (year and titles), and; list by +topic with titles and author. Makes use of header metadata fields (author, +title, date, topic_register). +.TP +.B --harvest-authors [path to files] +extract and present info on authors from metadata in document headers +.TP +.B --harvest-topics [path to files] +extract and present info on topics from metadata in document headers +.TP +.B --hide-ocn +turn visibility of object numbers off +.TP +.B --html [path + filename] +produces html output in two forms (i) segmented text with table of contents +(toc.html and index.html) and (ii) the document in a single file (scroll.html). +.TP +.B --html-link-harvest +within html output creates link to the document set metadata harvest output +part of --html output instruction and assumes that --harvest has been or will + be run +.TP +.B --html-link-search +within html output creates a search form for submission, requires information +on the name of the search form --search part of --html output instruction it +assumes there is a cgi search form and related document database +.TP +.B --html-scroll [path + filename] +produces html output, the document in a single file (scroll.html) only. Compare +--html-seg and --html +.TP +.B --html-seg [path + filename] +produces html output, segmented text with table of contents (toc.html and +index.html). Compare --html-scroll and --html +.TP +.B --lang=[language code, e.g. =en or =en,es] +provide language code of document +.TP +.B --latex [path + filename] +.I LaTeX +output for different document sizes (a4, a5, b4, letter) and orientations +(portrait, landscape) for downstream (processing and) conversion to pdf, (used +with xetex no direct link between programs provided as this is a much slower +process) +.TP +.B --latex-color-links +monochrome or color links within pdf, toggle (mono better for printing), +the default is mono for portrait and color for landscape documents +.TP +.B --light theme +for html and epub output, default, a dark alternative is provided +.TP +.B --manifest [path + filename] +produces an html summary of output generated (hyperlinked to content) and +document specific metadata (sisu_manifest.html). This step is assumed for most +processing flags. +.TP +.B --markdown [path + filename] +markdown smart text (not available) +.TP +.B --no-* +negate a toggle +.TP +.B --ocn-off +object numbers off (the c in ocn is for citation). See --hide-ocn +.TP +.B --odf [path + filename] +see --odt +.TP +.B --odt [path + filename] +produce open document output +.TP +.B --output=[path to output directories] +where to place document output +.TP +.B --parallel +parallelization on (the default except for sqlite) +.TP +.B --parallel-subprocesses +nested parallelization on (the default except for sqlite) +.TP +.B --papersize-(a4|a5|b5|letter|legal) +in conjunction with --pdf set pdf papersize, overriding any configuration +settings, to set more than one papersize repeat the option --pdf --papersize-a4 +--papersize-letter. See also --papersize=* (NOT implemented) +.BR +.B --papersize=a4,a5,b5,letter,legal +in conjunction with --pdf set pdf papersize, overriding any configuration +settings, to set more than one papersize list after the equal sign with a comma +separator --papersize=a4,letter. See also --papersize-* (NOT implemented) +.TP +.B --pdf [path + filename] +produces +.I LaTeX +see --latex +.TP +.B --pdf-color-links +monochrome or color links within latex for pdf. See --latex-color-links +.TP +.B --pod +markup source bundled in a zip file. +Produces a zipped file of the prepared document specified along with associated +images This provides a quick way of gathering the relevant +parts of a sisu document which can then for example be emailed. A sisupod +includes sisu markup source file, (along with associated documents if a master +file, or available in multilingual versions), together with related images. +(it should be possible in future to run spine commands directly against a pod). +.TP +.B --qrcode [path + filename] +generate QR code image of metadata (used in manifest). (not implemented) +.TP +.B --quiet +quiet less output to terminal. +.TP +.B --section-* +provides finer grain control over which parts of the document are processed +to produce output, toc, body, endnotes, glossary, biblio, bookindex and blurb +.TP +.B --section-biblio +produce document bibliography output, toggle +.TP +.B --section-blurb +produce document blurb output, toggle +.TP +.B --section-body +produce document body output, toggle +.TP +.B --section-bookindex +produce document bookindex output, toggle +.TP +.B --section-endnotes +produce document endnotes output, toggle +.TP +.B --section-endnotes +produce document glossary output, toggle +.TP +.B --serial +serial processing --no-parallel +.TP +.B --show-config +show site and document configuration instructions. Requires path to +configuration file or path to documents to be processed. +.TP +.B --show-make +show document make instructions +.TP +.B --show-metadata +show document metadata +.TP +.B --show-summary +show document summary +.TP +.B --source [path + filename] +document markup source +.TP +.B --sha256 +set hash digest where used to sha256 (not implemented) +.TP +.B --sha512 +set hash digest where used to sha512 (not implemented) +.TP +.B --sqlite-discrete [path + filename] +create a per document sqlite db +.TP +.B --sqlite-db-create --sqlite-db-filename="[db filename]" --output="[output path]" +create a shared db and its tables. Requires a db filename, which may be set in the configuration file or on the command line as shown +.TP +.B --sqlite-db-drop [path + db filename] +drop (remove) db and its tables +.TP +.B --sqlite-db-recreate [path + filename] +drop and re-create a shared db and its tables. Requires a db filename, which may be set in the configuration file or on the command line with --sqlite-db-filename="[db name]" +.TP +.B --sqlite-db-filename="[db name]" +provide name of sqlite db, to be created, dropped, populated or for which a search form is to be made. This information may also be set in the configuration file. +.TP +.B --sqlite-delete [path + filename] +process sqlite output, remove file +.TP +.B --sqlite-insert [path + filename] +process sqlite output, insert file. See --sqlite-update +.TP +.B --sqlite-update [path + filename] +process sqlite output, update file +.TP +.B --source [filename/wildcard] +copies sisu markup file to output directory. Alias -s +.TP +.B --text [filename/wildcard] +produces +.I plaintext +output +(not implemented) +.TP +.B --theme-dark +See --dark +.TP +.B --theme-light +See --light +.TP +.B --txt [filename/wildcard] +produces +.I plaintext +output +(not implemented) +.TP +.B --txt-asciidoc [filename/wildcard] +see --asciidoc +(not implemented) +.TP +.B --txt-markdown [filename/wildcard] +see --markdown +(not implemented) +.TP +.B --txt-rst [filename/wildcard] +see --rst +(not implemented) +.TP +.B --txt-textile [filename/wildcard] +see --textile +(not implemented) +.TP +.B -v +on its own, provides +.B SiSU +version information +.TP +.B -v [filename/wildcard] +see --verbose +.TP +.B --verbose [filename/wildcard] +provides verbose output of what is being generated, where output is placed (and +error messages if any). Alias -v +.TP +.B --very-verbose [filename/wildcard] +provides more verbose output of what is being generated. See --verbose. Alias +-V +.TP +.B --version +spine version +(not implemented) +.TP +.B --xhtml +xhtml output +(not implemented) + +.SH COMMAND LINE MODIFIERS + +.TP +.B --no-ocn +[with --html --pdf or --epub] switches off +.I object citation numbering. +Produce output without identifying numbers in margins of html or +.I LaTeX +/pdf output. +#+END_SRC + +*** db flags + +#+NAME: sisu_spine_manpage_flags_db +#+BEGIN_SRC man +.SH DATABASE COMMANDS + +.BR + +.B dbi - database interface + +.BR + +.B --pg or --pgsql +set for +.I PostgreSQL +.B --sqlite +default set for +.I SQLite +-d is modifiable with --db=[database type (PgSQL or +.I SQLite +) ] +.TP +.B --pg -v --createall +initial step, creates required relations (tables, indexes) in existing +.I PostgreSQL +database (a database should be created manually and given the same name as +working directory, as requested) (rb.dbi) [ -dv --createall +.I SQLite +equivalent] it may be necessary to run sisu -Dv --createdb initially NOTE: at +the present time for +.I PostgreSQL +it may be necessary to manually create the database. The command would be +'createdb [database name]' where database name would be SiSU_[present working +directory name (without path)]. Please use only alphanumerics and underscores. +.TP +.B --pg -v --import +[filename/wildcard] imports data specified to +.I PostgreSQL +db (rb.dbi) [ -dv --import +.I SQLite +equivalent] +.TP +.B --pg -v --update +[filename/wildcard] updates/imports specified data to +.I PostgreSQL +db (rb.dbi) [ -dv --update +.I SQLite +equivalent] +.TP +.B --pg --remove +[filename/wildcard] removes specified data to +.I PostgreSQL +db (rb.dbi) [ -d --remove +.I SQLite +equivalent] +.TP +.B --pg --dropall +kills data" and drops ( +.I PostgreSQL +or +.I SQLite +) db, tables & indexes [ -d --dropall +.I SQLite +equivalent] + +.BR +The -v is for verbose output. +#+END_SRC + +** configuration file + +#+NAME: sisu_spine_manpage_config +#+BEGIN_SRC man +.SH CONFIGURATION + +.BR + +default location: +.TP +~/.dr/config_local_site +.TP +.nf +flag: + act0: "--html" + act1: "--html --epub" +output: + path: "/var/www/html" +default: + language: "en" + papersize: "a4" + text_wrap: "80" + digest: "sha256" +webserv: + http: "http" + domain: "localhost" + data_http: "http" + data_domain: "localhost" + data_root_url: "http://localhost" + data_root_path: "/var/www/html" + data_root_part: "" + images_root_part: "image" + cgi_title: "≅ SiSU Spine search" + cgi_http: "http" + cgi_domain: "localhost" + cgi_bin_url: "http://localhost/cgi-bin" + cgi_bin_part: "cgi-bin" + cgi_bin_path: "/usr/lib/cgi-bin" + cgi_search_script: "spine-search" + cgi_search_script_raw_fn_d: "spine_search.d" + cgi_port: "" + cgi_user: "" + cgi_action: "http://localhost/cgi-bin/spine-search" + db_sqlite: "spine.search.db" + db_pg_table: "" + db_pg_user: "" +.fi + +.BR +#+END_SRC + +** sample pod directory + +#+NAME: sisu_spine_manpage_pod_dir_structure +#+BEGIN_SRC man +.SH SAMPLE POD DIRECTORY STRUCTURE +.BR +.TP +.nf + +pod (directory may contain multiple documents) + └── the_wealth_of_networks.yochai_benkler + ├── conf + │   └── sisu_document_make + ├── media + │   ├── image + │   │   ├── won_benkler_2_1.png + │   │   ├── won_benkler_6_1.png + │   │   ├── won_benkler_7_1.png + │   │   ├── won_benkler_7_2.png + │   │   ├── won_benkler_7_3a.png + │   │   ├── won_benkler_7_3b.png + │   │   ├── won_benkler_7_4.png + │   │   ├── won_benkler_7_5.png + │   │   ├── won_benkler_7_6.png + │   │   └── won_benkler_9_1.png + │   └── text + │   └── en + │   └── the_wealth_of_networks.yochai_benkler.sst + └── pod.manifest + +.fi +#+END_SRC + +** examples + +#+NAME: sisu_spine_manpage_cli_examples +#+BEGIN_SRC man +.SH COMMAND LINE EXAMPLES + +.TP +note: ~webDocRoot should be the path to web doc root, provide a suitable output path. +.TP +spine -v --html --html-link-search --html-link-harvest --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* +.TP +spine -v --html --html-link-search --html-link-harvest --epub --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* +.TP +spine -v --sqlite-db-create --sqlite-db-filename="spine.search.db" --output=`echo ~webDocRoot` ~spineMarkupSamples/pod +.TP +spine -v --sqlite-db-create ~spineMarkupSamples/pod +.TP +spine -v --sqlite-update --sqlite-db-filename="spine.search.db" --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* +.TP +spine -v --sqlite-update ~spineMarkupSamples/pod/* +.TP +spine -v --show-config +.TP +spine -v --show-config --config= ~spineMarkupSamples/pod/.dr/config_local_site_test +.TP +spine -v --show-config --config=~spineMarkupSamples/pod/.dr +.TP +spine -v --cgi-search-form-codegen --config=~spineMarkupSamples/pod/.dr/config_local +.TP +cd ~webDocRoot/cgi +.TP +dub --force --compiler=ldc2 && sudo cp -v cgi-bin/spine-search /usr/lib/cgi-bin/. +.TP +#+END_SRC + +** docs +*** sources + +#+NAME: sisu_spine_manpage_docs +#+BEGIN_SRC man + +.BR +Running sisu (alone without any flags, filenames or wildcards) brings up the +interactive help, as does any sisu command that is not recognised. Enter to +escape. +.SH HELP + +.SH SISU MANUAL + + +.BR +The most up to date information on sisu should be contained in the sisu_manual, +available at: + +.BR + + +.BR +The manual can be generated from source, found respectively, either within the +.B SiSU +tarball or installed locally at: + +.BR + ./data/doc/sisu/markup-samples/sisu_manual + +.BR + /usr/share/doc/sisu/markup-samples/sisu_manual + +.BR +move to the respective directory and type e.g.: + +.BR + sisu sisu_manual.ssm +.SH SISU MAN PAGES + + +.BR +If +.B SiSU +is installed on your system usual man commands should be available, try: + +.BR + man sisu + +.BR +Most +.B SiSU +man pages are generated directly from sisu documents that are used to prepare +the sisu manual, the sources files for which are located within the +.B SiSU +tarball at: + +.BR + ./data/doc/sisu/markup-samples/sisu_manual + +.BR +Once installed, directory equivalent to: + +.BR + /usr/share/doc/sisu/markup-samples/sisu_manual + +.BR +Available man pages are converted back to html using man2html: + +.BR + /usr/share/doc/sisu/html/ + +.BR + ./data/doc/sisu/html + +.BR +An online version of the sisu man page is available here: + +.BR + +- various sisu man pages [^1] + +.BR +- sisu.1 [^2] +.SH SISU BUILT-IN INTERACTIVE HELP, [DISCONTINUED] + + +.BR +This fell out of date and has been discontinued. +#+END_SRC + +*** markup + +#+NAME: sisu_spine_manpage_markup +#+BEGIN_SRC man +.SH INTRODUCTION TO SISU MARKUP[^3] + +.SH SUMMARY + +.BR + +.B SiSU +source documents are +.I plaintext +( +.I UTF-8 +)[^4] files + +.BR +All paragraphs are separated by an empty line. + +.BR +Markup is comprised of: + +.BR +- at the top of a document, the document header made up of semantic meta-data +about the document and if desired additional processing instructions (such an +instruction to automatically number headings from a particular level down) + +.BR +- followed by the prepared substantive text of which the most important single +characteristic is the markup of different heading levels, which define the +primary outline of the document structure. Markup of substantive text includes: + +.BR + * heading levels defines document structure + +.BR + * text basic attributes, italics, bold etc. + +.BR + * grouped text (objects), which are to be treated differently, such as code + blocks or poems. + +.BR + * footnotes/endnotes + +.BR + * linked text and images + +.BR + * paragraph actions, such as indent, bulleted, numbered-lists, etc. +.SH MARKUP RULES, DOCUMENT STRUCTURE AND METADATA REQUIREMENTS + + +.BR +minimal content/structure requirement: + +.BR +[metadata] +.nf +A~ (level A [title]) + +1~ (at least one level 1 [segment/(chapter)]) +.fi + + +.BR +structure rules (document heirarchy, heading levels): + +.BR +there are two sets of heading levels ABCD (title & parts if any) and 123 +(segment & subsegments if any) + +.BR +sisu has the fllowing levels: +.nf +A~ [title] . + required (== 1) followed by B~ or 1~ +B~ [part] * + followed by C~ or 1~ +C~ [subpart] * + followed by D~ or 1~ +D~ [subsubpart] * + followed by 1~ +1~ [segment (chapter)] + + required (>= 1) followed by text or 2~ +text * + followed by more text or 1~, 2~ + or relevant part *() +2~ [subsegment] * + followed by text or 3~ +text * + followed by more text or 1~, 2~ or 3~ + or relevant part, see *() +3~ [subsubsegment] * + followed by text +text * + followed by more text or 1~, 2~ or 3~ or relevant part, see *() + +*(B~ if none other used; + if C~ is last used: C~ or B~; + if D~ is used: D~, C~ or B~) +.fi + +.nf +- level A~ is the tile and is mandatory +- there can only be one level A~ + +- heading levels BCD, are optional and there may be several of each + (where all three are used corresponding to e.g. Book Part Section) + * sublevels that are used must follow each other sequentially + (alphabetically), +- heading levels A~ B~ C~ D~ are followed by other heading levels rather + than substantive text + which may be the subsequent sequential (alphabetic) heading part level + or a heading (segment) level 1~ +- there must be at least one heading (segment) level 1~ + (the level on which the text is segmented, in a book would correspond + to the Chapter level) +- additional heading levels 1~ 2~ 3~ are optional and there may be several + of each +- heading levels 1~ 2~ 3~ are followed by text (which may be followed by + the same heading level) + and/or the next lower numeric heading level (followed by text) + or indeed return to the relevant part level + (as a corollary to the rules above substantive text/ content + must be preceded by a level 1~ (2~ or 3~) heading) +.fi + +.SH MARKUP EXAMPLES + +.SH ONLINE + + +.BR +Online markup examples are available together with the respective outputs +produced from or from + + +.BR +There is of course this document, which provides a cursory overview of sisu +markup and the respective output produced: + + +.BR +an alternative presentation of markup syntax: +/usr/share/doc/sisu/on_markup.txt.gz +.SH INSTALLED + + +.BR +With +.B SiSU +installed sample skins may be found in: /usr/share/doc/sisu/markup-samples (or +equivalent directory) and if sisu -markup-samples is installed also under: +/usr/share/doc/sisu/markup-samples-non-free + +.SH MARKUP OF HEADERS + +.BR +Headers contain either: semantic meta-data about a document, which can be used +by any output module of the program, or; processing instructions. + +.BR +Note: the first line of a document may include information on the markup +version used in the form of a comment. Comments are a percentage mark at the +start of a paragraph (and as the first character in a line of text) followed by +a space and the comment: +.nf +% this would be a comment +.fi + +.SH SAMPLE HEADER + + +.BR +This current document is loaded by a master document that has a header similar +to this one: +.nf +% SiSU master 4.0 + +title: SiSU + subtitle: Manual + +creator: + author: Amissah, Ralph + +publisher: [publisher name] + +rights: Copyright (C) Ralph Amissah 2007, part of SiSU documentation, License GPL 3 + +classify: + topic_register: SiSU:manual;electronic documents:SiSU:manual + subject: ebook, epublishing, electronic book, electronic publishing, + electronic document, electronic citation, data structure, + citation systems, search + +% used_by: manual + +date: + published: 2008-05-22 + created: 2002-08-28 + issued: 2002-08-28 + available: 2002-08-28 + modified: 2010-03-03 + +make: + num_top: 1 + breaks: new=C; break=1 + bold: /Gnu|Debian|Ruby|SiSU/ + home_button_text: {SiSU}http://sisudoc.org; {git}http://git.sisudoc.org + footer: {SiSU}http://sisudoc.org; {git}http://git.sisudoc.org + manpage: name=sisu - documents: markup, structuring, publishing in multiple standard formats, and search; + synopsis=sisu [-abcDdeFhIiMmNnopqRrSsTtUuVvwXxYyZz0-9] [filename/wildcard ] + . sisu [-Ddcv] [instruction] + . sisu [-CcFLSVvW] + +@links: + { SiSU Homepage }http://www.sisudoc.org/ + { SiSU Manual }http://www.sisudoc.org/sisu/sisu_manual/ + { Book Samples & Markup Examples }http://www.jus.uio.no/sisu/SiSU/examples.html + { SiSU Download }http://www.jus.uio.no/sisu/SiSU/download.html + { SiSU Changelog }http://www.jus.uio.no/sisu/SiSU/changelog.html + { SiSU Git repo }http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary + { SiSU List Archives }http://lists.sisudoc.org/pipermail/sisu/ + { SiSU @ Debian }http://packages.qa.debian.org/s/sisu.html + { SiSU Project @ Debian }http://qa.debian.org/developer.php?login=sisu@lists.sisudoc.org + { SiSU @ Wikipedia }http://en.wikipedia.org/wiki/SiSU +.fi + +.SH AVAILABLE HEADERS + + +.BR +Header tags appear at the beginning of a document and provide meta information +on the document (such as the +.I Dublin Core +) , or information as to how the document as a whole is to be processed. All +header instructions take the form @headername: or on the next line and indented +by once space :subheadername: All +.I Dublin Core +meta tags are available + +.BR + +.B @identifier: +information or instructions + +.BR +where the "identifier" is a tag recognised by the program, and the +"information" or "instructions" belong to the tag/identifier specified + +.BR +Note: a header where used should only be used once; all headers apart from +@title: are optional; the @structure: header is used to describe document +structure, and can be useful to know. + +.BR +This is a sample header +.nf +% SiSU 2.0 [declared file-type identifier with markup version] +.fi + +.nf +@title: [title text] [this header is the only one that is mandatory] + subtitle: [subtitle if any] + language: English +.fi + +.nf +creator: + author: [Lastname, First names] + illustrator: [Lastname, First names] + translator: [Lastname, First names] + prepared_by: [Lastname, First names] +.fi + +.nf +date: + published: [year or yyyy-mm-dd] + created: [year or yyyy-mm-dd] + issued: [year or yyyy-mm-dd] + available: [year or yyyy-mm-dd] + modified: [year or yyyy-mm-dd] + valid: [year or yyyy-mm-dd] + added_to_site: [year or yyyy-mm-dd] + translated: [year or yyyy-mm-dd] +.fi + +.nf +rights: + copyright: Copyright (C) [Year and Holder] + license: [Use License granted] + text: [Year and Holder] + translation: [Name, Year] + illustrations: [Name, Year] +.fi + +.nf +classify: + topic_register: SiSU:markup sample:book;book:novel:fantasy + type: + subject: + description: + keywords: + abstract: + loc: [Library of Congress classification] + dewey: [Dewey classification +.fi + +.nf +identify: + :isbn: [ISBN] + :oclc: +.fi + +.nf +links: { SiSU }http://www.sisudoc.org + { FSF }http://www.fsf.org +.fi + +.nf +make: + num_top: 1 + headings: [text to match for each level + (e.g. PART; Chapter; Section; Article; or another: none; BOOK|FIRST|SECOND; none; CHAPTER;) + breaks: new=:C; break=1 + promo: sisu, ruby, sisu_search_libre, open_society + bold: [regular expression of words/phrases to be made bold] + italics: [regular expression of words/phrases to italicise] + home_button_text: {SiSU}http://sisudoc.org; {git}http://git.sisudoc.org + footer: {SiSU}http://sisudoc.org; {git}http://git.sisudoc.org +.fi + +.nf +original: + language: [language] +.fi + +.nf +notes: + comment: + prefix: [prefix is placed just after table of contents] +.fi + +.SH MARKUP OF SUBSTANTIVE TEXT + +.SH HEADING LEVELS + + +.BR +Heading levels are :A~ ,:B~ ,:C~ ,1~ ,2~ ,3~ ... :A - :C being part / section +headings, followed by other heading levels, and 1 -6 being headings followed by +substantive text or sub-headings. :A~ usually the title :A~? conditional level +1 heading (used where a stand-alone document may be imported into another) + +.BR + +.B :A~ [heading text] +Top level heading [this usually has similar content to the title @title: ] +NOTE: the heading levels described here are in 0.38 notation, see heading + +.BR + +.B :B~ [heading text] +Second level heading [this is a heading level divider] + +.BR + +.B :C~ [heading text] +Third level heading [this is a heading level divider] + +.BR + +.B 1~ [heading text] +Top level heading preceding substantive text of document or sub-heading 2, the +heading level that would normally be marked 1. or 2. or 3. etc. in a document, +and the level on which sisu by default would break html output into named +segments, names are provided automatically if none are given (a number), +otherwise takes the form 1~my_filename_for_this_segment + +.BR + +.B 2~ [heading text] +Second level heading preceding substantive text of document or sub-heading 3 , +the heading level that would normally be marked 1.1 or 1.2 or 1.3 or 2.1 etc. +in a document. + +.BR + +.B 3~ [heading text] +Third level heading preceding substantive text of document, that would normally +be marked 1.1.1 or 1.1.2 or 1.2.1 or 2.1.1 etc. in a document +.nf +1~filename level 1 heading, + +% the primary division such as Chapter that is followed by substantive text, and may be further subdivided (this is the level on which by default html segments are made) +.fi + +.SH FONT ATTRIBUTES + +.BR + +.B markup example: +.nf +normal text, *{emphasis}*, !{bold text}!, /{italics}/, _{underscore}_, "{citation}", +^{superscript}^, ,{subscript},, +{inserted text}+, -{strikethrough}-, #{monospace}# + +normal text + +*{emphasis}* [note: can be configured to be represented by bold, italics or underscore] + +!{bold text}! + +/{italics}/ + +_{underscore}_ + +"{citation}" + +^{superscript}^ + +,{subscript}, + ++{inserted text}+ + +-{strikethrough}- + +#{monospace}# +.fi + + +.BR + +.B resulting output: + +.BR +normal text, +.B emphasis, +.B bold text +, +.I italics, +.I underscore, +"citation", ^superscript^, [subscript], ++inserted text++, --strikethrough--, +monospace + +.BR +normal text + +.BR + +.B emphasis +[note: can be configured to be represented by bold, italics or underscore] + +.BR + +.B bold text + +.BR + +.I italics + +.BR +.I underscore + +.BR +"citation" + +.BR +^superscript^ + +.BR +[subscript] + +.BR +++inserted text++ + +.BR +--strikethrough-- + +.BR +monospace +.SH INDENTATION AND BULLETS + + +.BR + +.B markup example: +.nf +ordinary paragraph + +_1 indent paragraph one step + +_2 indent paragraph two steps + +_9 indent paragraph nine steps +.fi + + +.BR + +.B resulting output: + +.BR +ordinary paragraph + +.BR + indent paragraph one step + +.BR + indent paragraph two steps + +.BR + indent paragraph nine steps + +.BR + +.B markup example: +.nf +_* bullet text + +_1* bullet text, first indent + +_2* bullet text, two step indent +.fi + + +.BR + +.B resulting output: + +.BR +- bullet text + +.BR + * bullet text, first indent + +.BR + * bullet text, two step indent + +.BR +Numbered List (not to be confused with headings/titles, (document structure)) + +.BR + +.B markup example: +.nf +# numbered list numbered list 1., 2., 3, etc. + +_# numbered list numbered list indented a., b., c., d., etc. +.fi + +.SH HANGING INDENTS + + +.BR + +.B markup example: +.nf +_0_1 first line no indent, +rest of paragraph indented one step + +_1_0 first line indented, +rest of paragraph no indent + +in each case level may be 0-9 +.fi + + +.BR + +.B resulting output: + +.BR +first line no indent, rest of paragraph indented one step; first line no + indent, rest of paragraph indented one step; first line no indent, rest of + paragraph indented one step; first line no indent, rest of paragraph indented + one step; first line no indent, rest of paragraph indented one step; first + line no indent, rest of paragraph indented one step; first line no indent, + rest of paragraph indented one step; first line no indent, rest of paragraph + indented one step; first line no indent, rest of paragraph indented one step; + +.BR +A regular paragraph. + +.BR +first line indented, rest of paragraph no indent first line indented, rest of +paragraph no indent first line indented, rest of paragraph no indent first line +indented, rest of paragraph no indent first line indented, rest of paragraph no +indent first line indented, rest of paragraph no indent first line indented, +rest of paragraph no indent first line indented, rest of paragraph no indent +first line indented, rest of paragraph no indent first line indented, rest of +paragraph no indent first line indented, rest of paragraph no indent + +.BR +in each case level may be 0-9 + +.BR + +.B live-build + A collection of scripts used to build customized +.B Debian + Livesystems. + .I live-build + was formerly known as live-helper, and even earlier known as live-package. + +.BR + +.B live-build + + A collection of scripts used to build customized +.B Debian + Livesystems. +.I live-build + was formerly known as live-helper, and even earlier known as live-package. +.SH FOOTNOTES / ENDNOTES + + +.BR +Footnotes and endnotes are marked up at the location where they would be +indicated within a text. They are automatically numbered. The output type +determines whether footnotes or endnotes will be produced + +.BR + +.B markup example: +.nf +~{ a footnote or endnote }~ +.fi + + +.BR + +.B resulting output: + +.BR +[^5] + +.BR + +.B markup example: +.nf +normal text~{ self contained endnote marker & endnote in one }~ continues +.fi + + +.BR + +.B resulting output: + +.BR +normal text[^6] continues + +.BR + +.B markup example: +.nf +normal text ~{* unnumbered asterisk footnote/endnote, insert multiple asterisks if required }~ continues + +normal text ~{** another unnumbered asterisk footnote/endnote }~ continues +.fi + + +.BR + +.B resulting output: + +.BR +normal text [^*] continues + +.BR +normal text [^**] continues + +.BR + +.B markup example: +.nf +normal text ~[* editors notes, numbered asterisk footnote/endnote series ]~ continues + +normal text ~[+ editors notes, numbered plus symbol footnote/endnote series ]~ continues +.fi + + +.BR + +.B resulting output: + +.BR +normal text [^*3] continues + +.BR +normal text [^+2] continues + +.BR + +.B Alternative endnote pair notation for footnotes/endnotes: +.nf +% note the endnote marker "~^" + +normal text~^ continues + +^~ endnote text following the paragraph in which the marker occurs +.fi + + +.BR +the standard and pair notation cannot be mixed in the same document +.SH LINKS + +.SH NAKED URLS WITHIN TEXT, DEALING WITH URLS + + +.BR +urls found within text are marked up automatically. A url within text is +automatically hyperlinked to itself and by default decorated with angled +braces, unless they are contained within a code block (in which case they are +passed as normal text), or escaped by a preceding underscore (in which case the +decoration is omitted). + +.BR + +.B markup example: +.nf +normal text http://www.sisudoc.org/ continues +.fi + + +.BR + +.B resulting output: + +.BR +normal text continues + +.BR +An escaped url without decoration + +.BR + +.B markup example: +.nf +normal text _http://www.sisudoc.org/ continues + +deb _http://www.jus.uio.no/sisu/archive unstable main non-free +.fi + + +.BR + +.B resulting output: + +.BR +normal text <_http://www.sisudoc.org/> continues + +.BR +deb <_http://www.jus.uio.no/sisu/archive> unstable main non-free + +.BR +where a code block is used there is neither decoration nor hyperlinking, code +blocks are discussed later in this document + +.BR + +.B resulting output: +.nf +deb http://www.jus.uio.no/sisu/archive unstable main non-free +deb-src http://www.jus.uio.no/sisu/archive unstable main non-free +.fi + +.SH LINKING TEXT + + +.BR +To link text or an image to a url the markup is as follows + +.BR + +.B markup example: +.nf +about { SiSU }http://url.org markup +.fi + + +.BR + +.B resulting output: + +.BR +aboutSiSU markup + +.BR +A shortcut notation is available so the url link may also be provided +automatically as a footnote + +.BR + +.B markup example: +.nf +about {~^ SiSU }http://url.org markup +.fi + + +.BR + +.B resulting output: + +.BR +aboutSiSU [^7] markup + +.BR +Internal document links to a tagged location, including an ocn + +.BR + +.B markup example: +.nf +about { text links }#link_text +.fi + + +.BR + +.B resulting output: + +.BR +about ⌠text links⌡⌈link_text⌋ + +.BR +Shared document collection link + +.BR + +.B markup example: +.nf +about { SiSU book markup examples }:SiSU/examples.html +.fi + + +.BR + +.B resulting output: + +.BR +about ⌠ +.B SiSU +book markup examples⌡⌈:SiSU/examples.html⌋ +.SH LINKING IMAGES + + +.BR + +.B markup example: +.nf +{ tux.png 64x80 }image + +% various url linked images + +{tux.png 64x80 "a better way" }http://www.sisudoc.org/ + +{GnuDebianLinuxRubyBetterWay.png 100x101 "Way Better - with Gnu/Linux, Debian and Ruby" }http://www.sisudoc.org/ + +{~^ ruby_logo.png "Ruby" }http://www.ruby-lang.org/en/ +.fi + + +.BR + +.B resulting output: + +.BR +[ tux.png ] + +.BR +tux.png 64x80 "Gnu/Linux - a better way" + +.BR +GnuDebianLinuxRubyBetterWay.png 100x101 "Way Better - with Gnu/Linux, Debian +and Ruby" + +.BR +ruby_logo.png 70x90 "Ruby" [^8] + +.BR + +.B linked url footnote shortcut +.nf +{~^ [text to link] }http://url.org + +% maps to: { [text to link] }http://url.org ~{ http://url.org }~ + +% which produces hyper-linked text within a document/paragraph, with an endnote providing the url for the text location used in the hyperlink +.fi + +.nf +text marker *~name +.fi + + +.BR +note at a heading level the same is automatically achieved by providing names +to headings 1, 2 and 3 i.e. 2~[name] and 3~[name] or in the case of +auto-heading numbering, without further intervention. +.SH LINK SHORTCUT FOR MULTIPLE VERSIONS OF A SISU DOCUMENT IN THE SAME DIRECTORY +TREE + + +.BR + +.B markup example: +.nf +!_ /{"Viral Spiral"}/, David Bollier + +{ "Viral Spiral", David Bollier [3sS]}viral_spiral.david_bollier.sst +.fi + + +.BR + +.B +.I "Viral Spiral", +David Bollier +"Viral Spiral", David Bollier + document manifest + ⌠html, segmented text⌡「http://corundum/sisu_manual/en/html/viral_spiral.david_bollier.html」 + ⌠html, scroll, document in one⌡「http://corundum/sisu_manual/en/html/viral_spiral.david_bollier.html」 + ⌠epub⌡「http://corundum/sisu_manual/en/epub/viral_spiral.david_bollier.epub」 + ⌠pdf, landscape⌡「http://corundum/sisu_manual/en/pdf/viral_spiral.david_bollier.pdf」 + ⌠pdf, portrait⌡「http://corundum/sisu_manual/en/pdf/viral_spiral.david_bollier.pdf」 + ⌠odf: odt, open document text⌡「http://corundum/sisu_manual/en/odt/viral_spiral.david_bollier.odt」 + ⌠xhtml scroll⌡「http://corundum/sisu_manual/en/xhtml/viral_spiral.david_bollier.xhtml」 + ⌠xml, sax⌡「http://corundum/sisu_manual/en/xml/viral_spiral.david_bollier.xml」 + ⌠xml, dom⌡「http://corundum/sisu_manual/en/xml/viral_spiral.david_bollier.xml」 + ⌠concordance⌡「http://corundum/sisu_manual/en/html/viral_spiral.david_bollier.html」 + ⌠dcc, document content certificate (digests)⌡「http://corundum/sisu_manual/en/digest/viral_spiral.david_bollier.txt」 + ⌠markup source text⌡「http://corundum/sisu_manual/en/src/viral_spiral.david_bollier.sst」 + ⌠markup source (zipped) pod⌡「http://corundum/sisu_manual/en/pod/viral_spiral.david_bollier.sst.zip」 + +.SH GROUPED TEXT / BLOCKED TEXT + + +.BR +There are two markup syntaxes for blocked text, using curly braces or using +tics +.SH BLOCKED TEXT CURLY BRACE SYNTAX + + +.BR +at the start of a line on its own use name of block type with an opening curly +brace, follow with the content of the block, and close with a closing curly +brace and the name of the block type, e.g. +.nf +code{ + +this is a code block + +}code +.fi + +.nf + +poem{ + +this here is a poem + +}poem +.fi + +.SH BLOCKED TEXT TIC SYNTAX + +.nf +``` code +this is a code block + +``` + +``` poem +this here is a poem + +``` +.fi + + +.BR +start a line with three backtics, a space followed by the name of the name of +block type, follow with the content of the block, and close with three back +ticks on a line of their own, e.g. +.SH TABLES + + +.BR +Tables may be prepared in two either of two forms + +.BR + +.B markup example: +.nf +table{ c3; 40; 30; 30; + +This is a table +this would become column two of row one +column three of row one is here + +And here begins another row +column two of row two +column three of row two, and so on + +}table +.fi + + +.BR + +.B resulting output: +This is a table|this would become column two of row one|column three of row one is here』And here begins another row|column two of row two|column three of row two, and so on』 + + +.BR +a second form may be easier to work with in cases where there is not much +information in each column + +.BR + +.B markup example: +[^9] +.nf +!_ Table 3.1: Contributors to Wikipedia, January 2001 - June 2005 + +{table~h 24; 12; 12; 12; 12; 12; 12;} + |Jan. 2001|Jan. 2002|Jan. 2003|Jan. 2004|July 2004|June 2006 +Contributors* | 10| 472| 2,188| 9,653| 25,011| 48,721 +Active contributors** | 9| 212| 846| 3,228| 8,442| 16,945 +Very active contributors*** | 0| 31| 190| 692| 1,639| 3,016 +No. of English language articles| 25| 16,000| 101,000| 190,000| 320,000| 630,000 +No. of articles, all languages | 25| 19,000| 138,000| 490,000| 862,000|1,600,000 + +- Contributed at least ten times; ** at least 5 times in last month; *** more than 100 times in last month. +.fi + + +.BR + +.B resulting output: + +.BR + +.B Table 3.1: Contributors to Wikipedia, January 2001 - June 2005 +|Jan. 2001|Jan. 2002|Jan. 2003|Jan. 2004|July 2004|June 2006』Contributors*|10|472|2,188|9,653|25,011|48,721』Active contributors**|9|212|846|3,228|8,442|16,945』Very active contributors***|0|31|190|692|1,639|3,016』No. of English language articles|25|16,000|101,000|190,000|320,000|630,000』No. of articles, all languages|25|19,000|138,000|490,000|862,000|1,600,000』 + + +.BR +- Contributed at least ten times; ** at least 5 times in last month; *** more +than 100 times in last month. +.SH POEM + + +.BR + +.B basic markup: +.nf +poem{ + + Your poem here + +}poem + +Each verse in a poem is given an object number. +.fi + + +.BR + +.B markup example: +.nf +poem{ + + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' + +}poem +.fi + + +.BR + +.B resulting output: + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' + + +.SH GROUP + + +.BR + +.B basic markup: +.nf +group{ + + Your grouped text here + +}group + +A group is treated as an object and given a single object number. +.fi + + +.BR + +.B markup example: +.nf +group{ + + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' + +}group +.fi + + +.BR + +.B resulting output: + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' + + +.SH CODE + + +.BR +Code tags code{ ... }code (used as with other group tags described above) are +used to escape regular sisu markup, and have been used extensively within this +document to provide examples of +.B SiSU +markup. You cannot however use code tags to escape code tags. They are however +used in the same way as group or poem tags. + +.BR +A code-block is treated as an object and given a single object number. [an +option to number each line of code may be considered at some later time] + +.BR + +.B use of code tags instead of poem compared, resulting output: +.nf + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' +.fi + + +.BR +From +.B SiSU +2.7.7 on you can number codeblocks by placing a hash after the opening code tag +code{# as demonstrated here: +.nf +1 | `Fury said to a +2 | mouse, That he +3 | met in the +4 | house, +5 | "Let us +6 | both go to +7 | law: I will +8 | prosecute +9 | YOU. --Come, +10 | I'll take no +11 | denial; We +12 | must have a +13 | trial: For +14 | really this +15 | morning I've +16 | nothing +17 | to do." +18 | Said the +19 | mouse to the +20 | cur, "Such +21 | a trial, +22 | dear Sir, +23 | With +24 | no jury +25 | or judge, +26 | would be +27 | wasting +28 | our +29 | breath." +30 | "I'll be +31 | judge, I'll +32 | be jury," +33 | Said +34 | cunning +35 | old Fury: +36 | "I'll +37 | try the +38 | whole +39 | cause, +40 | and +41 | condemn +42 | you +43 | to +44 | death."' +.fi + +.SH ADDITIONAL BREAKS - LINEBREAKS WITHIN OBJECTS, COLUMN AND PAGE-BREAKS + +.SH LINE-BREAKS + + +.BR +To break a line within a "paragraph object", two backslashes \e\e +with a space before and a space or newline after them +may be used. +.nf +To break a line within a "paragraph object", +two backslashes \e\e with a space before +and a space or newline after them \e\e +may be used. +.fi + + +.BR +The html break br enclosed in angle brackets (though undocumented) is available +in versions prior to 3.0.13 and 2.9.7 (it remains available for the time being, +but is depreciated). + +.BR +To draw a dividing line dividing paragraphs, see the section on page breaks. +.SH PAGE BREAKS + + +.BR +Page breaks are only relevant and honored in some output formats. A page break +or a new page may be inserted manually using the following markup on a line on +its own: + +.BR +page new =\e= breaks the page, starts a new page. + +.BR +page break -\- breaks a column, starts a new column, if using columns, else +breaks the page, starts a new page. + +.BR +page break line across page -..- draws a dividing line, dividing paragraphs + +.BR +page break: +.nf +-\e\e- +.fi + + +.BR +page (break) new: +.nf +=\e\e= +.fi + + +.BR +page (break) line across page (dividing paragraphs): +.nf +-..- +.fi + +.SH BIBLIOGRAPHY / REFERENCES + + +.BR +There are three ways to prepare a bibliography using sisu (which are mutually +exclusive): (i) manually preparing and marking up as regular text in sisu a +list of references, this is treated as a regular document segment (and placed +before endnotes if any); (ii) preparing a bibliography, marking a heading level +1~!biblio (note the exclamation mark) and preparing a bibliography using +various metadata tags including for author: title: year: a list of which is +provided below, or; (iii) as an assistance in preparing a bibliography, marking +a heading level 1~!biblio and tagging citations within footnotes for inclusion, +identifying citations and having a parser attempt to extract them and build a +bibliography of the citations provided. + +.BR +For the heading/section sequence: endnotes, bibliography then book index to +occur, the name biblio or bibliography must be given to the bibliography +section, like so: +.nf +1~!biblio~ [Note: heading marker::required title missing] +.fi + +.SH A MARKUP TAGGED METADATA BIBLIOGRAPHY SECTION + + +.BR +Here instead of writing your full citations directly in footnotes, each time +you have new material to cite, you add it to your bibliography section (if it +has not been added yet) providing the information you need against an available +list of tags (provided below). + +.BR +The required tags are au: ti: and year: [^10] an short quick example might be +as follows: +.nf +1~!biblio~ [Note: heading marker::required title missing] + +au: von Hippel, E. +ti: Perspective: User Toolkits for Innovation +lng: (language) +jo: Journal of Product Innovation Management +vo: 18 +ed: (editor) +yr: 2001 +note: +sn: Hippel, /{User Toolkits}/ (2001) +id: vHippel_2001 +% form: + +au: Benkler, Yochai +ti: The Wealth of Networks +st: How Social Production Transforms Markets and Freedom +lng: (language) +pb: Harvard University Press +edn: (edition) +yr: 2006 +pl: U.S. +url: http://cyber.law.harvard.edu/wealth_of_networks/Main_Page +note: +sn: Benkler, /{Wealth of Networks}/ (2006) +id: Benkler2006 + +au: Quixote, Don; Panza, Sancho +ti: Taming Windmills, Keeping True +jo: Imaginary Journal +yr: 1605 +url: https://en.wikipedia.org/wiki/Don_Quixote +note: made up to provide an example of author markup for an article with two authors +sn: Quixote & Panza, /{Taming Windmills}/ (1605) +id: quixote1605 +.fi + + +.BR +Note that the section name !biblio (or !bibliography) is required for the +bibliography to be treated specially as such, and placed after the +auto-generated endnote section. + +.BR +Using this method, work goes into preparing the bibliography, the tags author +or editor, year and title are required and will be used to sort the +bibliography that is placed under the Bibliography section + +.BR +The metadata tags may include shortname (sn:) and id, if provided, which are +used for substitution within text. Every time the given id is found within the +text it will be replaced by the given short title of the work (it is for this +reason the short title has sisu markup to italicize the title), it should work +with any page numbers to be added, the short title should be one that can +easily be used to look up the full description in the bibliography. +.nf +The following footnote~{ quixote1605, pp 1000 - 1001, also Benkler2006 p 1. }~ +.fi + + +.BR +would be presented as: + +.BR +Quixote and Panza, +.I Taming Windmills +(1605), pp 1000 - 1001 also, Benkler, +.I Wealth of Networks, +(2006) p 1 or rather[^11] +.nf +au: author Surname, FirstNames (if multiple semi-colon separator) + (required unless editor to be used instead) +ti: title (required) +st: subtitle +jo: journal +vo: volume +ed: editor (required if author not provided) +tr: translator +src: source (generic field where others are not appropriate) +in: in (like src) +pl: place/location (state, country) +pb: publisher +edn: edition +yr: year (yyyy or yyyy-mm or yyyy-mm-dd) (required) +pg: pages +url: http://url +note: note +id: create_short_identifier e.g. authorSurnameYear + (used in substitutions: when found within text will be + replaced by the short name provided) +sn: short name e.g. Author, /{short title}/, Year + (used in substitutions: when an id is found within text + the short name will be used to replace it) +.fi + +.SH TAGGING CITATIONS FOR INCLUSION IN THE BIBLIOGRAPHY + + +.BR +Here whenever you make a citation that you wish be included in the +bibliography, you tag the citation as such using special delimiters (which are +subsequently removed from the final text produced by sisu) + +.BR +Here you would write something like the following, either in regular text or a +footnote +.nf +See .: Quixote, Don; Panza, Sancho /{Taming Windmills, Keeping True}/ (1605) :. +.fi + + +.BR + +.B SiSU +will parse for a number of patterns within the delimiters to try make out the +authors, title, date etc. and from that create a Bibliography. This is more +limited than the previously described method of preparing a tagged +bibliography, and using an id within text to identify the work, which also +lends itself to greater consistency. +.SH GLOSSARY + + +.BR +Using the section name 1~!glossary results in the Glossary being treated +specially as such, and placed after the auto-generated endnote section (before +the bibliography/list of references if there is one). + +.BR +The Glossary is ordinary text marked up in a manner deemed suitable for that +purpose. e.g. with the term in bold, possibly with a hanging indent. +.nf +1~!glossary~ [Note: heading marker::required title missing] + +_0_1 *{GPL}* An abbreviation that stands for "General Purpose License." ... + +_0_1 [provide your list of terms and definitions] +.fi + + +.BR +In the given example the first line is not indented subsequent lines are by one +level, and the term to be defined is in bold text. +.SH BOOK INDEX + + +.BR +To make an index append to paragraph the book index term relates to it, using +an equal sign and curly braces. + +.BR +Currently two levels are provided, a main term and if needed a sub-term. +Sub-terms are separated from the main term by a colon. +.nf + Paragraph containing main term and sub-term. + ={Main term:sub-term} +.fi + + +.BR +The index syntax starts on a new line, but there should not be an empty line +between paragraph and index markup. + +.BR +The structure of the resulting index would be: +.nf + Main term, 1 + sub-term, 1 +.fi + + +.BR +Several terms may relate to a paragraph, they are separated by a semicolon. If +the term refers to more than one paragraph, indicate the number of paragraphs. +.nf + Paragraph containing main term, second term and sub-term. + ={first term; second term: sub-term} +.fi + + +.BR +The structure of the resulting index would be: +.nf + First term, 1, + Second term, 1, + sub-term, 1 +.fi + + +.BR +If multiple sub-terms appear under one paragraph, they are separated under the +main term heading from each other by a pipe symbol. +.nf + Paragraph containing main term, second term and sub-term. + ={Main term: + sub-term+2|second sub-term; + Another term + } + + A paragraph that continues discussion of the first sub-term +.fi + + +.BR +The plus one in the example provided indicates the first sub-term spans one +additional paragraph. The logical structure of the resulting index would be: +.nf + Main term, 1, + sub-term, 1-3, + second sub-term, 1, + Another term, 1 +.fi + +.SH COMPOSITE DOCUMENTS MARKUP + + +.BR +It is possible to build a document by creating a master document that requires +other documents. The documents required may be complete documents that could be +generated independently, or they could be markup snippets, prepared so as to be +easily available to be placed within another text. If the calling document is a +master document (built from other documents), it should be named with the +suffix +.B .ssm +Within this document you would provide information on the other documents that +should be included within the text. These may be other documents that would be +processed in a regular way, or markup bits prepared only for inclusion within a +master document +.B .sst +regular markup file, or +.B .ssi +(insert/information) A secondary file of the composite document is built prior +to processing with the same prefix and the suffix +.B ._sst + +.BR +basic markup for importing a document into a master document +.nf +<< filename1.sst + +<< filename2.ssi +.fi + + +.BR +The form described above should be relied on. Within the +.I Vim +editor it results in the text thus linked becoming hyperlinked to the document +it is calling in which is convenient for editing. +.SH SUBSTITUTIONS + + +.BR + +.B markup example: +.nf +The current Debian is ${debian_stable} the next debian will be ${debian_testing} + +Configure substitution in _sisu/sisu_document_make + +make: + substitute: /${debian_stable}/,'*{Wheezy}*' /${debian_testing}/,'*{Jessie}*' +.fi + + +.BR + +.B resulting output: + +.BR +The current +.B Debian +is +.B Jessie +the next debian will be +.B Stretch + +.BR +Configure substitution in _sisu/sisu_document_make +.SH SISU FILETYPES + + +.BR + +.B SiSU +has +.I plaintext +and binary filetypes, and can process either type of document. +.SH .SST .SSM .SSI MARKED UP PLAIN TEXT + +.TP +.B SiSU +documents are prepared as plain-text (utf-8) files with +.B SiSU +markup. They may make reference to and contain images (for example), which are +stored in the directory beneath them _sisu/image. 〔b¤SiSU +.I plaintext +markup files are of three types that may be distinguished by the file extension +used: regular text .sst; master documents, composite documents that incorporate +other text, which can be any regular text or text insert; and inserts the +contents of which are like regular text except these are marked .ssi and are +not processed. + +.BR + +.B SiSU +processing can be done directly against a sisu documents; which may be located +locally or on a remote server for which a url is provided. + +.BR + +.B SiSU +source markup can be shared with the command: + +.BR + sisu -s [filename] +.SH SISU TEXT - REGULAR FILES (.SST) + + +.BR +The most common form of document in +.B SiSU, +see the section on +.B SiSU +markup. +.SH SISU MASTER FILES (.SSM) + + +.BR +Composite documents which incorporate other +.B SiSU +documents which may be either regular +.B SiSU +text .sst which may be generated independently, or inserts prepared solely for +the purpose of being incorporated into one or more master documents. + +.BR +The mechanism by which master files incorporate other documents is described as +one of the headings under under +.B SiSU +markup in the +.B SiSU +manual. + +.BR +Note: Master documents may be prepared in a similar way to regular documents, +and processing will occur normally if a .sst file is renamed .ssm without +requiring any other documents; the .ssm marker flags that the document may +contain other documents. + +.BR +Note: a secondary file of the composite document is built prior to processing +with the same prefix and the suffix ._sst +.SH SISU INSERT FILES (.SSI) + + +.BR +Inserts are documents prepared solely for the purpose of being incorporated +into one or more master documents. They resemble regular +.B SiSU +text files (.sst). Since sisu -5.5.0 (6.1.0) .ssi files can like .ssm files +include other .sst or .ssm files. .ssi files cannot be called by the sisu +processor directly and can only be incorporated in other documents. Making a +file a .ssi file is a quick and convenient way of breaking up a document that +is to be included in a master document, and flagging that the file to be +incorporated .ssi is not intended that the file should be processed on its own. +.SH SISUPOD, ZIPPED BINARY CONTAINER (SISUPOD.ZIP, .SSP) + + +.BR +A sisupod is a zipped +.B SiSU +text file or set of +.B SiSU +text files and any associated images that they contain (this will be extended +to include sound and multimedia-files) +.TP +.B SiSU +.I plaintext +files rely on a recognised directory structure to find contents such as images +associated with documents, but all images for example for all documents +contained in a directory are located in the sub-directory _sisu/image. Without +the ability to create a sisupod it can be inconvenient to manually identify all +other files associated with a document. A sisupod automatically bundles all +associated files with the document that is turned into a pod. + +.BR +The structure of the sisupod is such that it may for example contain a single +document and its associated images; a master document and its associated +documents and anything else; or the zipped contents of a whole directory of +prepared +.B SiSU +documents. + +.BR +The command to create a sisupod is: + +.BR + sisu -S [filename] + +.BR +Alternatively, make a pod of the contents of a whole directory: + +.BR + sisu -S + +.BR + +.B SiSU +processing can be done directly against a sisupod; which may be located locally +or on a remote server for which a url is provided. + +.BR + + +.BR + +.SH CONFIGURATION + +.SH CONFIGURATION FILES + +.SH CONFIG.YML + + +.BR + +.B SiSU +configration parameters are adjusted in the configuration file, which can be +used to override the defaults set. This includes such things as which directory +interim processing should be done in and where the generated output should be +placed. + +.BR +The +.B SiSU +configuration file is a yaml file, which means indentation is significant. + +.BR + +.B SiSU +resource configuration is determined by looking at the following files if they +exist: + +.BR + ./_sisu/v7/sisurc.yml + +.BR + ./_sisu/sisurc.yml + +.BR + ~/.sisu/v7/sisurc.yml + +.BR + ~/.sisu/sisurc.yml + +.BR + /etc/sisu/v7/sisurc.yml + +.BR + /etc/sisu/sisurc.yml + +.BR +The search is in the order listed, and the first one found is used. + +.BR +In the absence of instructions in any of these it falls back to the internal +program defaults. + +.BR +Configuration determines the output and processing directories and the database +access details. + +.BR +If +.B SiSU +is installed a sample sisurc.yml may be found in /etc/sisu/sisurc.yml +.SH SISU_DOCUMENT_MAKE + + +.BR +Most sisu document headers relate to metadata, the exception is the @make: +header which provides processing related information. The default contents of +the @make header may be set by placing them in a file sisu_document_make. + +.BR +The search order is as for resource configuration: + +.BR + ./_sisu/v7/sisu_document_make + +.BR + ./_sisu/sisu_document_make + +.BR + ~/.sisu/v7/sisu_document_make + +.BR + ~/.sisu/sisu_document_make + +.BR + /etc/sisu/v7/sisu_document_make + +.BR + /etc/sisu/sisu_document_make + +.BR +A sample sisu_document_make can be found in the _sisu/ directory under along +with the provided sisu markup samples. +.SH CSS - CASCADING STYLE SHEETS (FOR HTML, XHTML AND XML) + + +.BR +CSS files to modify the appearance of +.B SiSU +html, +.I XHTML +or +.I XML +may be placed in the configuration directory: ./_sisu/css ; ~/.sisu/css or; +/etc/sisu/css and these will be copied to the output directories with the +command sisu -CC. + +.BR +The basic CSS file for html output is html. css, placing a file of that name in +directory _sisu/css or equivalent will result in the default file of that name +being overwritten. + +.BR + +.I HTML: +html. css + +.BR + +.I XML +DOM: dom.css + +.BR + +.I XML +SAX: sax.css + +.BR + +.I XHTML: +xhtml. css + +.BR +The default homepage may use homepage.css or html. css + +.BR +Under consideration is to permit the placement of a CSS file with a different +name in directory _sisu/css directory or equivalent.[^12] +.SH ORGANISING CONTENT - DIRECTORY STRUCTURE AND MAPPING + + +.BR + +.B SiSU +v3 has new options for the source directory tree, and output directory +structures of which there are 3 alternatives. +.SH DOCUMENT SOURCE DIRECTORY + + +.BR +The document source directory is the directory in which sisu processing +commands are given. It contains the sisu source files (.sst .ssm .ssi), or (for +sisu v3 may contain) subdirectories with language codes which contain the sisu +source files, so all English files would go in subdirectory en/, French in fr/, +Spanish in es/ and so on. ISO 639-1 codes are used (as varied by po4a). A list +of available languages (and possible sub-directory names) can be obtained with +the command "sisu --help lang" The list of languages is limited to langagues +supported by XeTeX polyglosia. +.SH GENERAL DIRECTORIES + +.nf + ./subject_name/ + +% files stored at this level e.g. sisu_manual.sst or +% for sisu v3 may be under language sub-directories +% e.g. + + ./subject_name/en + + ./subject_name/fr + + ./subject_name/es + + ./subject_name/_sisu + + ./subject_name/_sisu/css + + ./subject_name/_sisu/image +.fi + +.SH DOCUMENT OUTPUT DIRECTORY STRUCTURES + +.SH OUTPUT DIRECTORY ROOT + + +.BR +The output directory root can be set in the sisurc.yml file. Under the root, +subdirectories are made for each directory in which a document set resides. If +you have a directory named poems or conventions, that directory will be created +under the output directory root and the output for all documents contained in +the directory of a particular name will be generated to subdirectories beneath +that directory (poem or conventions). A document will be placed in a +subdirectory of the same name as the document with the filetype identifier +stripped (.sst .ssm) + +.BR +The last part of a directory path, representing the sub-directory in which a +document set resides, is the directory name that will be used for the output +directory. This has implications for the organisation of document collections +as it could make sense to place documents of a particular subject, or type +within a directory identifying them. This grouping as suggested could be by +subject (sales_law, english_literature); or just as conveniently by some other +classification (X University). The mapping means it is also possible to place +in the same output directory documents that are for organisational purposes +kept separately, for example documents on a given subject of two different +institutions may be kept in two different directories of the same name, under a +directory named after each institution, and these would be output to the same +output directory. Skins could be associated with each institution on a +directory basis and resulting documents will take on the appropriate different +appearance. +.SH ALTERNATIVE OUTPUT STRUCTURES + + +.BR +There are 3 possibile output structures described as being, by language, by +filetype or by filename, the selection is made in sisurc.yml +.nf +#% output_dir_structure_by: language; filetype; or filename +output_dir_structure_by: language #(language & filetype, preferred?) +#output_dir_structure_by: filetype +#output_dir_structure_by: filename #(default, closest to original v1 & v2) +.fi + +.SH BY LANGUAGE + + +.BR +The by language directory structure places output files + +.BR +The by language directory structure separates output files by language code +(all files of a given language), and within the language directory by filetype. + +.BR +Its selection is configured in sisurc.yml + +.BR +output_dir_structure_by: language +.nf + |-- en + |-- epub + |-- hashes + |-- html + | |-- viral_spiral.david_bollier + | |-- manifest + | |-- qrcode + | |-- odt + | |-- pdf + | |-- sitemaps + | |-- txt + | |-- xhtml + | `-- xml + |-- po4a + | `-- live-manual + | |-- po + | |-- fr + | `-- pot + `-- _sisu + |-- css + |-- image + |-- image_sys -> ../../_sisu/image_sys + `-- xml + |-- rnc + |-- rng + `-- xsd +.fi + + +.BR +#by: language subject_dir/en/manifest/filename.html +.SH BY FILETYPE + + +.BR +The by filetype directory structure separates output files by filetype, all +html files in one directory pdfs in another and so on. Filenames are given a +language extension. + +.BR +Its selection is configured in sisurc.yml + +.BR +output_dir_structure_by: filetype +.nf + |-- epub + |-- hashes + |-- html + |-- viral_spiral.david_bollier + |-- manifest + |-- qrcode + |-- odt + |-- pdf + |-- po4a + |-- live-manual + | |-- po + | |-- fr + | `-- pot + |-- _sisu + | |-- css + | |-- image + | |-- image_sys -> ../../_sisu/image_sys + | `-- xml + | |-- rnc + | |-- rng + | `-- xsd + |-- sitemaps + |-- txt + |-- xhtml + `-- xml +.fi + + +.BR +#by: filetype subject_dir/html/filename/manifest.en.html +.SH BY FILENAME + + +.BR +The by filename directory structure places most output of a particular file +(the different filetypes) in a common directory. + +.BR +Its selection is configured in sisurc.yml + +.BR +output_dir_structure_by: filename +.nf + |-- epub + |-- po4a + |-- live-manual + | |-- po + | |-- fr + | `-- pot + |-- _sisu + | |-- css + | |-- image + | |-- image_sys -> ../../_sisu/image_sys + | `-- xml + | |-- rnc + | |-- rng + | `-- xsd + |-- sitemaps + |-- src + |-- pod + `-- viral_spiral.david_bollier +.fi + + +.BR +#by: filename subject_dir/filename/manifest.en.html +.SH REMOTE DIRECTORIES + +.nf + ./subject_name/ + +% containing sub_directories named after the generated files from which they are made + + ./subject_name/src + +% contains shared source files text and binary e.g. sisu_manual.sst and sisu_manual.sst.zip + + ./subject_name/_sisu + +% configuration file e.g. sisurc.yml + + ./subject_name/_sisu/skin + +% skins in various skin directories doc, dir, site, yml + + ./subject_name/_sisu/css + + ./subject_name/_sisu/image + +% images for documents contained in this directory + + ./subject_name/_sisu/mm +.fi + +.SH SISUPOD + +.nf + ./sisupod/ + +% files stored at this level e.g. sisu_manual.sst + + ./sisupod/_sisu + +% configuration file e.g. sisurc.yml + + ./sisupod/_sisu/skin + +% skins in various skin directories doc, dir, site, yml + + ./sisupod/_sisu/css + + ./sisupod/_sisu/image + +% images for documents contained in this directory + + ./sisupod/_sisu/mm +.fi + +.SH HOMEPAGES + + +.BR + +.B SiSU +is about the ability to auto-generate documents. Home pages are regarded as +custom built items, and are not created by +.B SiSU. +More accurately, +.B SiSU +has a default home page, which will not be appropriate for use with other +sites, and the means to provide your own home page instead in one of two ways +as part of a site's configuration, these being: + +.BR +1. through placing your home page and other custom built documents in the +subdirectory _sisu/home/ (this probably being the easier and more convenient +option) + +.BR +2. through providing what you want as the home page in a skin, + +.BR +Document sets are contained in directories, usually organised by site or +subject. Each directory can/should have its own homepage. See the section on +directory structure and organisation of content. +.SH HOME PAGE AND OTHER CUSTOM BUILT PAGES IN A SUB-DIRECTORY + + +.BR +Custom built pages, including the home page index.html may be placed within the +configuration directory _sisu/home/ in any of the locations that is searched +for the configuration directory, namely ./_sisu ; ~/_sisu ; /etc/sisu From +there they are copied to the root of the output directory with the command: + +.BR + sisu -CC +.SH MARKUP AND OUTPUT EXAMPLES + +.SH MARKUP EXAMPLES + + +.BR +Current markup examples and document output samples are provided off + or and in the sisu +-markup-sample package available off + +.BR +For some documents hardly any markup at all is required at all, other than a +header, and an indication that the levels to be taken into account by the +program in generating its output are. +.SH SISU MARKUP SAMPLES + + +.BR +A few additional sample books prepared as sisu markup samples, output formats +to be generated using +.B SiSU +are contained in a separate package sisu -markup-samples. sisu -markup-samples +contains books (prepared using sisu markup), that were released by their +authors various licenses mostly different Creative Commons licences that do not +permit inclusion in the +.B Debian +Project as they have requirements that do not meet the +.B Debian +Free Software Guidelines for various reasons, most commonly that they require +that the original substantive text remain unchanged, and sometimes that the +works be used only non-commercially. + +.BR + +.I Accelerando, +Charles Stross (2005) +accelerando.charles_stross.sst + +.BR + +.I Alice's Adventures in Wonderland, +Lewis Carroll (1865) +alices_adventures_in_wonderland.lewis_carroll.sst + +.BR + +.I CONTENT, +Cory Doctorow (2008) +content.cory_doctorow.sst + +.BR + +.I Democratizing Innovation, +Eric von Hippel (2005) +democratizing_innovation.eric_von_hippel.sst + +.BR + +.I Down and Out in the Magic Kingdom, +Cory Doctorow (2003) +down_and_out_in_the_magic_kingdom.cory_doctorow.sst + +.BR + +.I For the Win, +Cory Doctorow (2010) +for_the_win.cory_doctorow.sst + +.BR + +.I Free as in Freedom - Richard Stallman's Crusade for Free Software, +Sam Williams (2002) +free_as_in_freedom.richard_stallman_crusade_for_free_software.sam_williams.sst + +.BR + +.I Free as in Freedom 2.0 - Richard Stallman and the Free Software Revolution, +Sam Williams (2002), Richard M. Stallman (2010) +free_as_in_freedom_2.richard_stallman_and_the_free_software_revolution.sam_williams.richard_stallman.sst + +.BR + +.I Free Culture - How Big Media Uses Technology and the Law to Lock Down +Culture and Control Creativity, +Lawrence Lessig (2004) +free_culture.lawrence_lessig.sst + +.BR + +.I Free For All - How Linux and the Free Software Movement Undercut the High +Tech Titans, +Peter Wayner (2002) +free_for_all.peter_wayner.sst + +.BR + +.I GNU GENERAL PUBLIC LICENSE v2, +Free Software Foundation (1991) +gpl2.fsf.sst + +.BR + +.I GNU GENERAL PUBLIC LICENSE v3, +Free Software Foundation (2007) +gpl3.fsf.sst + +.BR + +.I Gulliver's Travels, +Jonathan Swift (1726 / 1735) +gullivers_travels.jonathan_swift.sst + +.BR + +.I Little Brother, +Cory Doctorow (2008) +little_brother.cory_doctorow.sst + +.BR + +.I The Cathederal and the Bazaar, +Eric Raymond (2000) +the_cathedral_and_the_bazaar.eric_s_raymond.sst + +.BR + +.I The Public Domain - Enclosing the Commons of the Mind, +James Boyle (2008) +the_public_domain.james_boyle.sst + +.BR + +.I The Wealth of Networks - How Social Production Transforms Markets and +Freedom, +Yochai Benkler (2006) +the_wealth_of_networks.yochai_benkler.sst + +.BR + +.I Through the Looking Glass, +Lewis Carroll (1871) +through_the_looking_glass.lewis_carroll.sst + +.BR + +.I Two Bits - The Cultural Significance of Free Software, +Christopher Kelty (2008) +two_bits.christopher_kelty.sst + +.BR + +.I UN Contracts for International Sale of Goods, +UN (1980) +un_contracts_international_sale_of_goods_convention_1980.sst + +.BR + +.I Viral Spiral, +David Bollier (2008) +viral_spiral.david_bollier.sst +.SH SISU SEARCH - INTRODUCTION + + +.BR +Because the document structure of sites created is clearly defined, and the +text +.I object citation system +is available hypothetically at least, for all forms of output, it is possible +to search the sql database, and either read results from that database, or map +the results to the html or other output, which has richer text markup. + +.BR + +.B SiSU +can populate a relational sql type database with documents at an object level, +including objects numbers that are shared across different output types. Making +a document corpus searchable with that degree of granularity. Basically, your +match criteria is met by these documents and at these locations within each +document, which can be viewed within the database directly or in various output +formats. + +.BR + +.B SiSU +can populate an sql database (sqlite3 or postgresql) with documents made up of +their objects. It also can generate a cgi search form that can be used to query +the database. + +.BR +In order to use the built in search functionality you would take the following +steps. + +.BR +- use sisu to populate an sql database with with a sisu markup content + +.BR + * sqlite3 should work out of the box + +.BR + * postgresql may require some initial database configuration + +.BR +- provide a way to query the database, which sisu can assist with by + +.BR + * generating a sample ruby cgi search form, required (sisu configuration + recommended) + +.BR + * adding a query field for this search form to be added to all html files + (sisu configuration required) +.SH SQL + +.SH POPULATE THE DATABASE + + +.BR +TO populate the sql database, run sisu against a sisu markup file with one of +the following sets of flags +.nf +sisu --sqlite filename.sst +.fi + + +.BR +creates an sqlite3 database containing searchable content of just the sisu +markup document selected +.nf +sisu --sqlite --update filename.sst +.fi + + +.BR +creates an sqlite3 database containing searchable content of marked up +document(s) selected by the user from a common directory +.nf +sisu --pg --update filename.sst +.fi + + +.BR +fills a postgresql database with searchable content of marked up document(s) +selected by the user from a common directory + +.BR +For postgresql the first time the command is run in a given directory the user +will be prompted to create the requisite database, at the time of writing the +prompt sisu provides is as follows: +.nf +no connection with pg database established, you may need to run: + createdb "SiSU.7a.current" + after that don't forget to run: + sisu --pg --createall + before attempting to populate the database +.fi + + +.BR +The named database that sisu expects to find must exist and if necessary be +created using postgresql tools. If the database exist but the database tables +do not, sisu will attempt to create the tables it needs, the equivalent of the +requested sisu --pg --createall command. + +.BR +Once this is done, the sql database is populated and ready to be queried. +.SH SQL TYPE DATABASES + + +.BR + +.B SiSU +feeds sisu markup documents into sql type databases +.I PostgreSQL +[^13] and/or +.I SQLite +[^14] database together with information related to document structure. + +.BR +This is one of the more interesting output forms, as all the structural data of +the documents are retained (though can be ignored by the user of the database +should they so choose). All site texts/documents are (currently) streamed to +four tables: + +.BR + * one containing semantic (and other) headers, including, title, author, + subject, (the + .I Dublin Core. + ..); + +.BR + * another the substantive texts by individual "paragraph" (or object) - along + with structural information, each paragraph being identifiable by its + paragraph number (if it has one which almost all of them do), and the + substantive text of each paragraph quite naturally being searchable (both in + formatted and clean text versions for searching); and + +.BR + * a third containing endnotes cross-referenced back to the paragraph from + which they are referenced (both in formatted and clean text versions for + searching). + +.BR + * a fourth table with a one to one relation with the headers table contains + full text versions of output, eg. pdf, html, xml, and + .I ascii. + +.BR +There is of course the possibility to add further structures. + +.BR +At this level +.B SiSU +loads a relational database with documents chunked into objects, their smallest +logical structurally constituent parts, as text objects, with their object +citation number and all other structural information needed to construct the +document. Text is stored (at this text object level) with and without +elementary markup tagging, the stripped version being so as to facilitate ease +of searching. + +.BR +Being able to search a relational database at an object level with the +.B SiSU +citation system is an effective way of locating content generated by +.B SiSU. +As individual text objects of a document stored (and indexed) together with +object numbers, and all versions of the document have the same numbering, +complex searches can be tailored to return just the locations of the search +results relevant for all available output formats, with live links to the +precise locations in the database or in html/xml documents; or, the structural +information provided makes it possible to search the full contents of the +database and have headings in which search content appears, or to search only +headings etc. (as the +.I Dublin Core +is incorporated it is easy to make use of that as well). +.SH POSTGRESQL + +.SH NAME + + +.BR + +.B SiSU +- Structured information, Serialized Units - a document publishing system, +postgresql dependency package +.SH DESCRIPTION + + +.BR +Information related to using postgresql with sisu (and related to the +sisu_postgresql dependency package, which is a dummy package to install +dependencies needed for +.B SiSU +to populate a postgresql database, this being part of +.B SiSU +- man sisu) . +.SH SYNOPSIS + + +.BR + sisu -D [instruction] [filename/wildcard if required] + +.BR + sisu -D --pg --[instruction] [filename/wildcard if required] +.SH COMMANDS + + +.BR +Mappings to two databases are provided by default, postgresql and sqlite, the +same commands are used within sisu to construct and populate databases however +-d (lowercase) denotes sqlite and -D (uppercase) denotes postgresql, +alternatively --sqlite or --pgsql may be used + +.BR + +.B -D or --pgsql +may be used interchangeably. +.SH CREATE AND DESTROY DATABASE + +.TP +.B --pgsql --createall +initial step, creates required relations (tables, indexes) in existing +(postgresql) database (a database should be created manually and given the same +name as working directory, as requested) (rb.dbi) +.TP +.B sisu -D --createdb +creates database where no database existed before +.TP +.B sisu -D --create +creates database tables where no database tables existed before +.TP +.B sisu -D --Dropall +destroys database (including all its content)! kills data and drops tables, +indexes and database associated with a given directory (and directories of the +same name). +.TP +.B sisu -D --recreate +destroys existing database and builds a new empty database structure +.SH IMPORT AND REMOVE DOCUMENTS + +.TP +.B sisu -D --import -v [filename/wildcard] +populates database with the contents of the file. Imports documents(s) +specified to a postgresql database (at an object level). +.TP +.B sisu -D --update -v [filename/wildcard] +updates file contents in database +.TP +.B sisu -D --remove -v [filename/wildcard] +removes specified document from postgresql database. +.SH SQLITE + +.SH NAME + + +.BR + +.B SiSU +- Structured information, Serialized Units - a document publishing system. +.SH DESCRIPTION + + +.BR +Information related to using sqlite with sisu (and related to the sisu_sqlite +dependency package, which is a dummy package to install dependencies needed for +.B SiSU +to populate an sqlite database, this being part of +.B SiSU +- man sisu) . +.SH SYNOPSIS + + +.BR + sisu -d [instruction] [filename/wildcard if required] + +.BR + sisu -d --(sqlite|pg) --[instruction] [filename/wildcard if required] +.SH COMMANDS + + +.BR +Mappings to two databases are provided by default, postgresql and sqlite, the +same commands are used within sisu to construct and populate databases however +-d (lowercase) denotes sqlite and -D (uppercase) denotes postgresql, +alternatively --sqlite or --pgsql may be used + +.SH CREATE AND DESTROY DATABASE + +.TP +.B --sqlite --createall +initial step, creates required relations (tables, indexes) in existing (sqlite) +database (a database should be created manually and given the same name as +working directory, as requested) (rb.dbi) +.TP +.B sisu -d --createdb +creates database where no database existed before +.TP +.B sisu -d --create +creates database tables where no database tables existed before +.TP +.B sisu -d --dropall +destroys database (including all its content)! kills data and drops tables, +indexes and database associated with a given directory (and directories of the +same name). +.TP +.B sisu -d --recreate +destroys existing database and builds a new empty database structure +.SH IMPORT AND REMOVE DOCUMENTS + +.TP +.B sisu -d --import -v [filename/wildcard] +populates database with the contents of the file. Imports documents(s) +specified to an sqlite database (at an object level). +.TP +.B sisu -d --update -v [filename/wildcard] +updates file contents in database +.TP +.B sisu -d --remove -v [filename/wildcard] +removes specified document from sqlite database. +.SH CGI SEARCH FORM + + +.BR +For the search form, which is a single search page + +.BR +- configure the search form + +.BR +- generate the sample search form with the sisu command, (this will be based on +the configuration settings and existing found sisu databases) + +.BR +For postgresql web content you may need to edit the search cgi script. Two +things to look out for are that the user is set as needed, and that the any +different databases that you wish to be able to query are listed. + +.BR +correctly, you may want www-data rather than your username. +.nf +@user='www-data' +.fi + + +.BR +- check the search form, copy it to the appropriate cgi directory and set the +correct permissions + +.BR +For a search form to appear on each html page, you need to: + +.BR +- rely on the above mentioned configuration of the search form + +.BR +- configure the html search form to be on + +.BR +- run the html command +.SH SETUP SEARCH FORM + + +.BR +You will need a web server, httpd with cgi enabled, and a postgresql database +to which you are able to create databases. + +.BR +Setup postgresql, make sure you are able to create and write to the database, +e.g.: +.nf +sudo su postgres + createuser -d -a ralph +.fi + + +.BR +You then need to create the database that sisu will use, for sisu manual in the +directory manual/en for example, (when you try to populate a database that does +not exist sisu prompts as to whether it exists): +.nf +createdb SiSU.7a.manual +.fi + + +.BR + +.B SiSU +is then able to create the required tables that allow you to populate the +database with documents in the directory for which it has been created: +.nf +sisu --pg --createall -v +.fi + + +.BR +You can then start to populate the database, in this example with a single +document: +.nf +sisu --pg --update -v en/sisu_manual.ssm +.fi + + +.BR +To create a sample search form, from within the same directory run: +.nf +sisu --sample-search-form --db-pg +.fi + + +.BR +and copy the resulting cgi form to your cgi-bin directory + +.BR +A sample setup for nginx is provided that assumes data will be stored under +/srv/www and cgi scripts under /srv/cgi +.SH SEARCH - DATABASE FRONTEND SAMPLE, UTILISING DATABASE AND SISU FEATURES, +INCLUDING OBJECT CITATION NUMBERING (BACKEND CURRENTLY POSTGRESQL) + + +.BR +Sample search frontend [^15] A small database and +sample query front-end (search from) that makes use of the citation system, .I +object citation numbering +to demonstrates functionality.[^16] + +.BR + +.B SiSU +can provide information on which documents are matched and at what locations +within each document the matches are found. These results are relevant across +all outputs using +.I object citation numbering, +which includes html, +.I XML, +.I EPUB, +.I LaTeX, +.I PDF +and indeed the +.I SQL +database. You can then refer to one of the other outputs or in the +.I SQL +database expand the text within the matched objects (paragraphs) in the +documents matched. + +.BR +Note you may set results either for documents matched and object number +locations within each matched document meeting the search criteria; or display +the names of the documents matched along with the objects (paragraphs) that +meet the search criteria.[^17] +.TP +.B sisu -F --webserv-webrick +builds a cgi web search frontend for the database created + +.BR +The following is feedback on the setup on a machine provided by the help +command: + +.BR + sisu --help sql +.nf +Postgresql + user: ralph + current db set: SiSU_sisu + port: 5432 + dbi connect: DBI:Pg:database=SiSU_sisu;port=5432 + +sqlite + current db set: /home/ralph/sisu_www/sisu/sisu_sqlite.db + dbi connect DBI:SQLite:/home/ralph/sisu_www/sisu/sisu_sqlite.db +.fi + +.BR +Note on databases built + +.BR +By default, [unless otherwise specified] databases are built on a directory +basis, from collections of documents within that directory. The name of the +directory you choose to work from is used as the database name, i.e. if you are +working in a directory called /home/ralph/ebook the database SiSU_ebook is +used. [otherwise a manual mapping for the collection is necessary] + +.SH SEARCH FORM + +.TP +.B sisu -F +generates a sample search form, which must be copied to the web-server cgi +directory +.TP +.B sisu -F --webserv-webrick +generates a sample search form for use with the webrick server, which must be +copied to the web-server cgi directory +.TP +.B sisu -W +starts the webrick server which should be available wherever sisu is properly +installed + +.BR +The generated search form must be copied manually to the webserver directory as +instructed +.SH SISU_WEBRICK + +.SH NAME + + +.BR + +.B SiSU +- Structured information, Serialized Units - a document publishing system +.SH SYNOPSIS + + +.BR +sisu_webrick [port] + +.BR +or + +.BR +sisu -W [port] +.SH DESCRIPTION + + +.BR +sisu_webrick is part of +.B SiSU +(man sisu) sisu_webrick starts +.B Ruby +' s Webrick web-server and points it to the directories to which +.B SiSU +output is written, providing a list of these directories (assuming +.B SiSU +is in use and they exist). + +.BR +The default port for sisu_webrick is set to 8081, this may be modified in the +yaml file: ~/.sisu/sisurc.yml a sample of which is provided as +/etc/sisu/sisurc.yml (or in the equivalent directory on your system). +.SH SUMMARY OF MAN PAGE + + +.BR +sisu_webrick, may be started on it's own with the command: sisu_webrick [port] +or using the sisu command with the -W flag: sisu -W [port] + +.BR +where no port is given and settings are unchanged the default port is 8081 +.SH DOCUMENT PROCESSING COMMAND FLAGS + + +.BR +sisu -W [port] starts +.B Ruby +Webrick web-server, serving +.B SiSU +output directories, on the port provided, or if no port is provided and the +defaults have not been changed in ~/.sisu/sisurc.yaml then on port 8081 +.SH SUMMARY OF FEATURES + + +.BR +- sparse/minimal markup (clean utf-8 source texts). Documents are prepared in a +single +.I UTF-8 +file using a minimalistic mnemonic syntax. Typical literature, documents like +"War and Peace" require almost no markup, and most of the headers are optional. + +.BR +- markup is easily readable/parsable by the human eye, (basic markup is simpler +and more sparse than the most basic +.I HTML +) , [this may also be converted to +.I XML +representations of the same input/source document]. + +.BR +- markup defines document structure (this may be done once in a header +pattern-match description, or for heading levels individually); basic text +attributes (bold, italics, underscore, strike-through etc.) as required; and +semantic information related to the document (header information, extended +beyond the Dublin core and easily further extended as required); the headers +may also contain processing instructions. +.B SiSU +markup is primarily an abstraction of document structure and document metadata +to permit taking advantage of the basic strengths of existing alternative +practical standard ways of representing documents [be that browser viewing, +paper publication, sql search etc.] (html, epub, xml, odf, latex, pdf, sql) + +.BR +- for output produces reasonably elegant output of established industry and +institutionally accepted open standard formats.[3] takes advantage of the +different strengths of various standard formats for representing documents, +amongst the output formats currently supported are: + +.BR +* +.I HTML +- both as a single scrollable text and a segmented document + +.BR +* +.I XHTML + +.BR +* +.I EPUB + +.BR +* +.I XML +- both in sax and dom style xml structures for further development as required + +.BR +* +.I ODT +- Open Document Format text, the iso standard for document storage + +.BR +* +.I LaTeX +- used to generate pdf + +.BR +* +.I PDF +(via +.I LaTeX +) + +.BR +* +.I SQL +- population of an sql database ( +.I PostgreSQL +or +.I SQLite +) , (at the same object level that is used to cite text within a document) + +.BR +Also produces: concordance files; document content certificates (md5 or sha256 +digests of headings, paragraphs, images etc.) and html manifests (and sitemaps +of content). (b) takes advantage of the strengths implicit in these very +different output types, (e.g. PDFs produced using typesetting of +.I LaTeX, +databases populated with documents at an individual object/paragraph level, +making possible +.I granular search +(and related possibilities)) + +.BR +- ensuring content can be cited in a meaningful way regardless of selected +output format. Online publishing (and publishing in multiple document formats) +lacks a useful way of citing text internally within documents (important to +academics generally and to lawyers) as page numbers are meaningless across +browsers and formats. sisu seeks to provide a common way of pinpoint the text +within a document, (which can be utilized for citation and by search engines). +The outputs share a common numbering system that is meaningful (to man and +machine) across all digital outputs whether paper, screen, or database +oriented, (pdf, +.I HTML, +.I EPUB, +xml, sqlite, postgresql) , this numbering system can be used to reference +content. + +.BR +- Granular search within documents. +.I SQL +databases are populated at an object level (roughly headings, paragraphs, +verse, tables) and become searchable with that degree of granularity, the +output information provides the object/paragraph numbers which are relevant +across all generated outputs; it is also possible to look at just the matching +paragraphs of the documents in the database; [output indexing also work well +with search indexing tools like hyperestraier]. + +.BR +- long term maintainability of document collections in a world of changing +formats, having a very sparsely marked-up source document base. there is a +considerable degree of future-proofing, output representations are +"upgradeable", and new document formats may be added. e.g. addition of odf +(open document text) module in 2006, epub in 2009 and in future html5 output +sometime in future, without modification of existing prepared texts + +.BR +* +.I SQL +search aside, documents are generated as required and static once generated. + +.BR +- documents produced are static files, and may be batch processed, this needs +to be done only once but may be repeated for various reasons as desired +(updated content, addition of new output formats, updated technology document +presentations/representations) + +.BR +- document source ( +.I plaintext +utf-8) if shared on the net may be used as input and processed locally to +produce the different document outputs + +.BR +- document source may be bundled together (automatically) with associated +documents (multiple language versions or master document with inclusions) and +images and sent as a zip file called a sisupod, if shared on the net these too +may be processed locally to produce the desired document outputs + +.BR +- generated document outputs may automatically be posted to remote sites. + +.BR +- for basic document generation, the only software dependency is +.B Ruby, +and a few standard Unix tools (this covers +.I plaintext, +.I HTML, +.I EPUB, +.I XML, +.I ODF, +.I LaTeX +) . To use a database you of course need that, and to convert the +.I LaTeX +generated to pdf, a latex processor like tetex or texlive. + +.BR +- as a developers tool it is flexible and extensible + +.BR +Syntax highlighting for +.B SiSU +markup is available for a number of text editors. + +.BR + +.B SiSU +is less about document layout than about finding a way with little markup to be +able to construct an abstract representation of a document that makes it +possible to produce multiple representations of it which may be rather +different from each other and used for different purposes, whether layout and +publishing, or search of content + +.BR +i.e. to be able to take advantage from this minimal preparation starting point +of some of the strengths of rather different established ways of representing +documents for different purposes, whether for search (relational database, or +indexed flat files generated for that purpose whether of complete documents, or +say of files made up of objects), online viewing (e.g. html, xml, pdf) , or +paper publication (e.g. pdf) ... + +.BR +the solution arrived at is by extracting structural information about the +document (about headings within the document) and by tracking objects (which +are serialized and also given hash values) in the manner described. It makes +possible representations that are quite different from those offered at +present. For example objects could be saved individually and identified by +their hashes, with an index of how the objects relate to each other to form a +document. +.TP +.BI *1. +square brackets + +.BR +.TP +.BI *2. +square brackets + +.BR +.TP +.BI +1. +square brackets + +.BR +.TP +.BI 1. + + +.BR +.TP +.BI 2. + + +.BR +.TP +.BI 3. +From sometime after SiSU 0.58 it should be possible to describe SiSU markup +using SiSU, which though not an original design goal is useful. + +.BR +.TP +.BI 4. +files should be prepared using UTF-8 character encoding + +.BR +.TP +.BI 5. +a footnote or endnote + +.BR +.TP +.BI 6. +self contained endnote marker & endnote in one + +.BR +.TP +.BI *. +unnumbered asterisk footnote/endnote, insert multiple asterisks if required + +.BR +.TP +.BI **. +another unnumbered asterisk footnote/endnote + +.BR +.TP +.BI *3. +editors notes, numbered asterisk footnote/endnote series + +.BR +.TP +.BI +2. +editors notes, numbered plus symbol footnote/endnote series + +.BR +.TP +.BI 7. + + +.BR +.TP +.BI 8. + + +.BR +.TP +.BI 9. +Table from the Wealth of Networks by Yochai Benkler + + +.BR +.TP +.BI 10. +for which you may alternatively use the full form author: title: and year: + +.BR +.TP +.BI 11. +Quixote and Panza, Taming Windmills (1605), pp 1000 - 1001 also, Benkler, Wealth of Networks (2006), p 1 + +.BR +.TP +.BI 12. +SiSU has worked this way in the past, though this was dropped as it was +thought the complexity outweighed the flexibility, however, the balance was +rather fine and this behaviour could be reinstated. + +.BR +.TP +.BI 13. + + + +.BR +.TP +.BI 14. + + +.BR +.TP +.BI 15. + + +.BR +.TP +.BI 16. +(which could be extended further with current back-end). As regards scaling +of the database, it is as scalable as the database (here Postgresql) and +hardware allow. + +.BR +.TP +.BI 17. +of this feature when demonstrated to an IBM software innovations evaluator +in 2004 he said to paraphrase: this could be of interest to us. We have large +document management systems, you can search hundreds of thousands of documents +and we can tell you which documents meet your search criteria, but there is no +way we can tell you without opening each document where within each your +matches are found. + +.BR + +.TP +.SH SEE ALSO + sisu(1), + sisu-epub(1), + sisu-harvest(1), + sisu-html(1), + sisu-odf(1), + sisu-pdf(1), + sisu-pg(1), + sisu-sqlite(1), + sisu-txt(1). + sisu_vim(7) +.TP +.SH HOMEPAGE + More information about SiSU can be found at or +.TP +.SH SOURCE + +.TP +.SH AUTHOR + SiSU is written by Ralph Amissah +#+END_SRC diff --git a/org/spine_info.org b/org/spine_info.org index 04b91a6..3dc942c 100644 --- a/org/spine_info.org +++ b/org/spine_info.org @@ -16,7 +16,7 @@ * README -#+BEGIN_SRC txt :tangle "../README" +#+BEGIN_SRC txt :NO-tangle "../README" project_name: Spine, Doc Reform description: [ "documents, structuring, processing, publishing", @@ -45,46 +45,46 @@ project_name: Spine, Doc Reform *** project (project root) ./ #+BEGIN_SRC txt :tangle "../COPYRIGHT" -<> -<> -<> +<> +<> +<> #+END_SRC *** code source ./src #+BEGIN_SRC txt :tangle "../src/COPYRIGHT" -<> -<> -<> +<> +<> +<> #+END_SRC *** code source ./src/spine #+BEGIN_SRC txt :tangle "../src/doc_reform/COPYRIGHT" -<> -<> -<> +<> +<> +<> #+END_SRC *** org files ./org #+BEGIN_SRC txt :tangle "./COPYRIGHT" -<> -<> +<> +<> #+END_SRC -*** data (markup samples) ./data +*** doc (markup samples) ./doc -#+BEGIN_SRC txt :tangle "../data/COPYRIGHT" +#+BEGIN_SRC txt :NO-tangle "../doc/COPYRIGHT" <> #+END_SRC ** incorporate *** copyright -#+NAME: spine_copyright +#+NAME: sisu_spine_copyright #+BEGIN_SRC txt -- Name: Spine, Doc Reform +- Name: spine - SiSU Spine, Doc Reform - Description: documents, structuring, processing, publishing, search - static content generator @@ -96,10 +96,11 @@ project_name: Spine, Doc Reform #+END_SRC *** license +**** AGPLv3 -#+NAME: spine_license_agpl3 +#+NAME: sisu_spine_license_agpl3 #+BEGIN_SRC txt - - code under src/ + - code under src/ & org/ - License: AGPL 3 or later: Spine, Doc Reform (SiSU), a framework for document structuring, publishing and @@ -129,27 +130,33 @@ project_name: Spine, Doc Reform *** tail -#+NAME: spine_extra +#+NAME: sisu_spine_extra #+BEGIN_SRC txt - Spine, Doc Reform (related to SiSU) uses standard: - - docReform markup syntax + - docReform markup syntax (based on SiSU markup) - standard SiSU markup syntax with modified headers and minor modifications - - docReform object numbering - - standard SiSU object citation numbering & system + - docReform object numbering (based on SiSU object citation numbering) + - standard SiSU document object numbering + - Hompages: + [http://www.sisudoc.org] + + +#+END_SRC + +#+BEGIN_SRC txt - Hompages: [http://www.doc_reform.org] [http://www.sisudoc.org] - Git - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary] - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/html.rb;hb=HEAD] - + [] #+END_SRC -*** data -#+NAME: spine_extra +*** doc + +#+NAME: sisu_spine_extra #+BEGIN_SRC txt - Spine, Doc Reform (SiSU) markup samples Individual document content Copyright (Author) [as stated in document header] diff --git a/org/util_cgi_d_sqlite_search.org b/org/util_cgi_d_sqlite_search.org index 43844a5..07a6d15 100644 --- a/org/util_cgi_d_sqlite_search.org +++ b/org/util_cgi_d_sqlite_search.org @@ -23,7 +23,7 @@ dub --force --compiler=ldc2 && sudo cp -v cgi-bin/spine-search /usr/lib/cgi-bin/ ** 0. set program tangle -#+BEGIN_SRC d :tangle "../util/d/cgi/search/src/spine_cgi_sqlite_search.d" +#+BEGIN_SRC d :tangle "../misc/util/d/cgi/search/src/spine_cgi_sqlite_search.d" <> <> void cgi_function_intro(Cgi cgi) { @@ -1134,8 +1134,8 @@ LIMIT %s OFFSET %s * cgi-search dub.sdl -#+BEGIN_SRC d :tangle "../util/d/cgi/search/dub.sdl" -name "spine-search" +#+BEGIN_SRC d :tangle "../misc/util/d/cgi/search/dub.sdl" +name "spine_search" description "A minimal D application." authors "ralph" copyright "Copyright © 2020, ralph" @@ -1164,7 +1164,7 @@ configuration "default" { * cgi-search README -#+BEGIN_SRC text :tangle "../util/d/cgi/search/README" +#+BEGIN_SRC text :tangle "../misc/util/d/cgi/search/README" change db name to match name of db you create cv.db_selected = "spine.search.sql.db"; diff --git a/org/util_cgi_rb_fcgi_sqlite_search.org b/org/util_cgi_rb_fcgi_sqlite_search.org index 3460b32..124ac25 100644 --- a/org/util_cgi_rb_fcgi_sqlite_search.org +++ b/org/util_cgi_rb_fcgi_sqlite_search.org @@ -31,7 +31,7 @@ document home currently at /var/www/html (better placed at /var/www or srv/proje *** sqlite cgi search TODO **** head -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" :tangle-mode (identity #o755) :shebang #!/usr/bin/env ruby +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" :tangle-mode (identity #o755) :shebang #!/usr/bin/env ruby =begin * Name: SiSU information Structuring Universe * Author: Ralph Amissah @@ -91,7 +91,7 @@ document home currently at /var/www/html (better placed at /var/www or srv/proje **** initialize -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" begin require 'cgi' require 'fcgi' @@ -117,7 +117,7 @@ end **** form ***** initialize form -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" class Form def initialize(base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') search_note = '' if checked_searched !~ /\S/ @@ -133,7 +133,7 @@ class Form ***** submission form -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" def submission_form search_form =<<-WOK @@ -218,7 +218,7 @@ end **** search request ***** initialize request -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" class SearchRequest #% search_for attr_accessor :text1,:fulltext,:keywords,:title,:author,:topic_register,:subject,:description,:publisher,:editor,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:src_filename_base def initialize(search_field='',q='') @@ -293,7 +293,7 @@ class SearchRequest #% sea ***** text to match -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" def text_to_match(identifier='') m={ string: /#{identifier}\s*(.+?)/, @@ -318,7 +318,7 @@ end **** search string -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" class DBI_SearchString def initialize(l,t,q,cse=false) @l,@t,@q=l,t,q @@ -346,7 +346,7 @@ end **** search statement ***** init -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" class DBI_SearchStatement attr_reader :text_search_flag,:sql_select_body_format,:sql_offset,:sql_limit def initialize(conn,search_for,q,c) @@ -453,7 +453,7 @@ class DBI_SearchStatement ***** misc -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" def sql_offset @@offset end @@ -540,7 +540,7 @@ class DBI_SearchStatement ***** select statement -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" def sql_select_body limit ||= @@limit offset ||= @@offset @@ -565,7 +565,7 @@ end **** tail -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" def tail <<-'WOK'


@@ -673,7 +673,7 @@ end **** fcgi each ***** init & start loop -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" @tail=tail @counter_txt_doc,@counter_txt_ocn,@counter_endn_doc,@counter_endn_ocn=0,0,0,0 @counters_txt,@counters_endn,@sql_select_body='','','' @@ -748,7 +748,7 @@ FCGI.each_cgi do |cgi| ***** canned search -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" #Canned_search.new(@base,@search_for.text1,cgi) if @search_for.text1=~/\S+/ or @search_for.fulltext=~/\S+/ or @search_for.author=~/\S+/ or @search_for.topic_register=~/\S+/ #and search_field =~/\S/ s1 = 's1=' + CGI.escape(@search_for.text1) if @search_for.text1 =~ /\S/ @@ -849,7 +849,7 @@ FCGI.each_cgi do |cgi| ***** submission form -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" @header = Form.new(@base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form unless q['s1'] =~/\S/ or q['au'] =~/\S/ or @search[:text][1] =~/\S/ print "Content-type: text/html\n\n" @@ -944,7 +944,7 @@ FCGI.each_cgi do |cgi| ***** contents each, text body -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" #% text_objects_body s_contents.each do |c| #% text body location=c['src_filename_base'][/(.+?)\.(?:ssm\.sst|sst)$/,1] @@ -1040,7 +1040,7 @@ FCGI.each_cgi do |cgi| ***** after -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" olduid = "" offset=dbi_statement.sql_offset.to_s limit=dbi_statement.sql_match_limit.to_s diff --git a/src/COPYRIGHT b/src/COPYRIGHT index 8cba1e7..05e171f 100644 --- a/src/COPYRIGHT +++ b/src/COPYRIGHT @@ -1,4 +1,4 @@ -- Name: Spine, Doc Reform +- Name: spine - SiSU Spine, Doc Reform - Description: documents, structuring, processing, publishing, search - static content generator @@ -7,7 +7,7 @@ - Copyright: (C) 2015 - 2020 Ralph Amissah - - code under src/ + - code under src/ & org/ - License: AGPL 3 or later: Spine, Doc Reform (SiSU), a framework for document structuring, publishing and @@ -34,19 +34,15 @@ [http://www.gnu.org/licenses/agpl.html] - Spine, Doc Reform (related to SiSU) uses standard: - - docReform markup syntax + - docReform markup syntax (based on SiSU markup) - standard SiSU markup syntax with modified headers and minor modifications - - docReform object numbering - - standard SiSU object citation numbering & system + - docReform object numbering (based on SiSU object citation numbering) + - standard SiSU document object numbering - Hompages: - [http://www.doc_reform.org] [http://www.sisudoc.org] - - Git - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary] - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/html.rb;hb=HEAD] - Spine, Doc Reform (SiSU) markup samples Individual document content Copyright (Author) [as stated in document header] diff --git a/src/doc_reform/COPYRIGHT b/src/doc_reform/COPYRIGHT index 8cba1e7..05e171f 100644 --- a/src/doc_reform/COPYRIGHT +++ b/src/doc_reform/COPYRIGHT @@ -1,4 +1,4 @@ -- Name: Spine, Doc Reform +- Name: spine - SiSU Spine, Doc Reform - Description: documents, structuring, processing, publishing, search - static content generator @@ -7,7 +7,7 @@ - Copyright: (C) 2015 - 2020 Ralph Amissah - - code under src/ + - code under src/ & org/ - License: AGPL 3 or later: Spine, Doc Reform (SiSU), a framework for document structuring, publishing and @@ -34,19 +34,15 @@ [http://www.gnu.org/licenses/agpl.html] - Spine, Doc Reform (related to SiSU) uses standard: - - docReform markup syntax + - docReform markup syntax (based on SiSU markup) - standard SiSU markup syntax with modified headers and minor modifications - - docReform object numbering - - standard SiSU object citation numbering & system + - docReform object numbering (based on SiSU object citation numbering) + - standard SiSU document object numbering - Hompages: - [http://www.doc_reform.org] [http://www.sisudoc.org] - - Git - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary] - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/html.rb;hb=HEAD] - Spine, Doc Reform (SiSU) markup samples Individual document content Copyright (Author) [as stated in document header] diff --git a/util/d/cgi/search/README b/util/d/cgi/search/README deleted file mode 100644 index eb8fcde..0000000 --- a/util/d/cgi/search/README +++ /dev/null @@ -1,11 +0,0 @@ -change db name to match name of db you create -cv.db_selected = "spine.search.sql.db"; - -~dr/bin/spine-ldc -v --sqlite-db-create --sqlite-db-filename="spine.search.db" --cgi-sqlite-search-filename="spine-search" --output=/var/www ~grotto/repo/git.repo/code/project-spine/doc-reform-markup/markup_samples/markup/pod/* - -~dr/bin/spine-ldc -v --sqlite-update --sqlite-db-filename="spine.search.db" --output=/var/www ~grotto/repo/git.repo/code/project-spine/doc-reform-markup/markup_samples/markup/pod/* - -cd util/d/cgi/search/src -dub --force --compiler=ldc2 && sudo cp -v cgi-bin/spine-search /usr/lib/cgi-bin/. - -http://localhost/cgi-bin/spine-search? diff --git a/util/d/cgi/search/dub.sdl b/util/d/cgi/search/dub.sdl deleted file mode 100644 index c1c775c..0000000 --- a/util/d/cgi/search/dub.sdl +++ /dev/null @@ -1,14 +0,0 @@ -name "spine-search" -description "A minimal D application." -authors "ralph" -copyright "Copyright © 2020, ralph" -license "GPL-3.0+" -dependency "d2sqlite3" version="~>0.18.3" -targetType "executable" -targetPath "./cgi-bin" -mainSourceFile "src/spine_cgi_sqlite_search.d" -configuration "default" { - targetType "executable" - targetName "spine-search" - postGenerateCommands "/usr/bin/notify-send -t 0 'D executable ready' 'spine cgi sqlite search d'" -} diff --git a/util/d/cgi/search/dub.selections.json b/util/d/cgi/search/dub.selections.json deleted file mode 100644 index e6e1b26..0000000 --- a/util/d/cgi/search/dub.selections.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "fileVersion": 1, - "versions": { - "d2sqlite3": "0.18.3", - "dcgi": "0.1.0" - } -} diff --git a/util/d/cgi/search/localhostsqlitespine.search.sql.db b/util/d/cgi/search/localhostsqlitespine.search.sql.db deleted file mode 100644 index e69de29..0000000 diff --git a/util/d/cgi/search/src/spine_cgi_sqlite_search.d b/util/d/cgi/search/src/spine_cgi_sqlite_search.d deleted file mode 100644 index 1460643..0000000 --- a/util/d/cgi/search/src/spine_cgi_sqlite_search.d +++ /dev/null @@ -1,913 +0,0 @@ -/+ dub.sdl - name "spine search" - description "spine cgi search" -+/ -import std.format; -import std.range; -import std.regex; -import arsd.cgi; -import d2sqlite3; -import std.process : environment; -void cgi_function_intro(Cgi cgi) { - string header; - string table; - string form; - struct Config { - string http_request_type; - string http_host; - // string server_name; - string doc_root; - string cgi_root; - string cgi_script; - string data_path_html; - string db_path; - string query_string; - string http_url; - string request_method; - } - auto conf = Config(); - conf.http_request_type = environment.get("REQUEST_SCHEME", "http"); - conf.http_host = environment.get("HTTP_HOST", "localhost"); - // conf.server_name = environment.get("SERVER_NAME", "localhost"); - conf.doc_root = environment.get("DOCUMENT_ROOT", "/var/www/html"); - conf.cgi_root = environment.get("CONTEXT_DOCUMENT_ROOT", "/usr/lib/cgi-bin/"); - // conf.cgi_script = environment.get("SCRIPT_NAME", "/cgi-bin/spine-search"); - conf.query_string = environment.get("QUERY_STRING", ""); - conf.http_url = environment.get("HTTP_REFERER", conf.http_request_type ~ "://" ~ conf.http_host ~ conf.cgi_script ~ "?" ~ conf.query_string); - conf.db_path = "/var/www/html/sqlite/"; // conf.http_host ~ "/sqlite/"; - conf.request_method = environment.get("REQUEST_METHOD", "POST"); - struct CGI_val { - string db_selected = ""; - string sql_match_limit = ""; // radio: ( 1000 | 2500 ) - string sql_match_offset = ""; - string search_text = ""; - string results_type = ""; // index - bool checked_echo = false; - bool checked_stats = false; - bool checked_url = false; - bool checked_searched = false; - bool checked_tip = false; - bool checked_sql = false; - } - auto cv = CGI_val(); - cv.db_selected = "spine.search.db"; // config, set db name - auto text_fields() { - string canned_query_str = environment.get("QUERY_STRING", ""); - if ("query_string" in cgi.post) { - canned_query_str = environment.get("QUERY_STRING", ""); - } - string[string] canned_query; - if (conf.request_method == "POST") { - } else if (conf.request_method == "GET") { - foreach (pair_str; canned_query_str.split("&")) { - // cgi.write(pair_str ~ "
"); - string[] pair = pair_str.split("="); - canned_query[pair[0]] = pair[1]; - } - // foreach (field, content; canned_query) { - // cgi.write(field ~ ": " ~ content ~ "
"); - // } - } - static struct Rgx { - // static canned_query = ctRegex!(`\A(?P.+)\Z`, "m"); - static search_text_area = ctRegex!(`\A(?P.+)\Z`, "m"); - // static fulltext = ctRegex!(`\A(?P.+)\Z`, "m"); - static line = ctRegex!(`^(?P.+?)(?: ~|$)`, "m"); - static text = ctRegex!(`(?:^|\s~\s*)text:\s+(?P.+?)(?: ~|$)`, "m"); - static author = ctRegex!(`(?:^|\s~\s*)author:\s+(?P.+)$`, "m"); - static title = ctRegex!(`(?:^|\s~\s*)title:\s+(?P.+)$`, "m"); - static uid = ctRegex!(`(?:^|\s~\s*)uid:\s+(?P.+)$`, "m"); - static fn = ctRegex!(`(?:^|\s~\s*)fn:\s+(?P.+)$`, "m"); - static keywords = ctRegex!(`(?:^|\s~\s*)keywords:\s+(?P.+)$`, "m"); - static topic_register = ctRegex!(`(?:^|\s~\s*)topic_register:\s+(?P.+)$`, "m"); - static subject = ctRegex!(`(?:^|\s~\s*)subject:\s+(?P.+)$`, "m"); - static description = ctRegex!(`(?:^|\s~\s*)description:\s+(?P.+)$`, "m"); - static publisher = ctRegex!(`(?:^|\s~\s*)publisher:\s+(?P.+)$`, "m"); - static editor = ctRegex!(`(?:^|\s~\s*)editor:\s+(?P.+)$`, "m"); - static contributor = ctRegex!(`(?:^|\s~\s*)contributor:\s+(?P.+)$`, "m"); - static date = ctRegex!(`(?:^|\s~\s*)date:\s+(?P.+)$`, "m"); - static results_type = ctRegex!(`(?:^|\s~\s*)type:\s+(?P.+)$`, "m"); - static format = ctRegex!(`(?:^|\s~\s*)format:\s+(?P.+)$`, "m"); - static identifier = ctRegex!(`(?:^|\s~\s*)identifier:\s+(?P.+)$`, "m"); - static source = ctRegex!(`(?:^|\s~\s*)source:\s+(?P.+)$`, "m"); - static language = ctRegex!(`(?:^|\s~\s*)language:\s+(?P.+)$`, "m"); - static relation = ctRegex!(`(?:^|\s~\s*)relation:\s+(?P.+)$`, "m"); - static coverage = ctRegex!(`(?:^|\s~\s*)coverage:\s+(?P.+)$`, "m"); - static rights = ctRegex!(`(?:^|\s~\s*)rights:\s+(?P.+)$`, "m"); - static comment = ctRegex!(`(?:^|\s~\s*)comment:\s+(?P.+)$`, "m"); - // static abstract_ = ctRegex!(`(?:^|\s~\s*)abstract:\s+(?P.+)$`, "m"); - static src_filename_base = ctRegex!(`^src_filename_base:\s+(?P.+)$`, "m"); - } - struct searchFields { - string canned_query = ""; // GET canned_query == cq - string search_text_area = ""; // POST search_text_area == tsa - string text = ""; // text == txt - string author = ""; // author == au - string title = ""; // title == ti - string uid = ""; // uid == uid - string fn = ""; // fn == fn - string keywords = ""; // keywords == kw - string topic_register = ""; // topic_register == tr - string subject = ""; // subject == su - string description = ""; // description == de - string publisher = ""; // publisher == pb - string editor = ""; // editor == ed - string contributor = ""; // contributor == ct - string date = ""; // date == dt - string format = ""; // format == fmt - string identifier = ""; // identifier == id - string source = ""; // source == src sfn - string language = ""; // language == lng - string relation = ""; // relation == rl - string coverage = ""; // coverage == cv - string rights = ""; // rights == rgt - string comment = ""; // comment == cmt - // string abstract = ""; - string src_filename_base = ""; // src_filename_base == bfn - string results_type = ""; // results_type == rt radio - string sql_match_limit = ""; // sql_match_limit == sml radio - string sql_match_offset = ""; // sql_match_offset == smo - string stats = ""; // stats == sts checked - string echo = ""; // echo == ec checked - string url = ""; // url == url checked - string searched = ""; // searched == se checked - string sql = ""; // sql == sql checked - } - auto rgx = Rgx(); - auto got = searchFields(); - if (environment.get("REQUEST_METHOD", "POST") == "POST") { - if ("sf" in cgi.post) { - got.search_text_area = cgi.post["sf"]; - if (auto m = got.search_text_area.matchFirst(rgx.text)) { - got.text = m["matched"]; - got.canned_query ~= "sf=" ~ m["matched"]; - } else if (auto m = got.search_text_area.matchFirst(rgx.line)) { - if ( - !(m["matched"].matchFirst(rgx.author)) - && !(m["matched"].matchFirst(rgx.title)) - ) { - got.text = m["matched"]; - got.canned_query ~= "sf=" ~ m["matched"]; - } - } - if (auto m = got.search_text_area.matchFirst(rgx.author)) { - got.author = m["matched"]; - got.canned_query ~= "&au=" ~ m["matched"]; - } - if (auto m = got.search_text_area.matchFirst(rgx.title)) { - got.title = m["matched"]; - got.canned_query ~= "&ti=" ~ m["matched"]; - } - if (auto m = got.search_text_area.matchFirst(rgx.uid)) { - got.uid = m["matched"]; - got.canned_query ~= "&uid=" ~ m["matched"]; - } - if (auto m = got.search_text_area.matchFirst(rgx.fn)) { - got.fn = m["matched"]; - got.canned_query ~= "&fn=" ~ m["matched"]; - } - if (auto m = got.search_text_area.matchFirst(rgx.keywords)) { - got.keywords = m["matched"]; - got.canned_query ~= "&kw=" ~ m["matched"]; - } - if (auto m = got.search_text_area.matchFirst(rgx.topic_register)) { - got.topic_register = m["matched"]; - got.canned_query ~= "&tr=" ~ m["matched"]; - } - if (auto m = got.search_text_area.matchFirst(rgx.subject)) { - got.subject = m["matched"]; - got.canned_query ~= "&su=" ~ m["matched"]; - } - if (auto m = got.search_text_area.matchFirst(rgx.description)) { - got.description = m["matched"]; - got.canned_query ~= "&de=" ~ m["matched"]; - } - if (auto m = got.search_text_area.matchFirst(rgx.publisher)) { - got.publisher = m["matched"]; - got.canned_query ~= "&pb=" ~ m["matched"]; - } - if (auto m = got.search_text_area.matchFirst(rgx.editor)) { - got.editor = m["matched"]; - got.canned_query ~= "&ed=" ~ m["matched"]; - } - if (auto m = got.search_text_area.matchFirst(rgx.contributor)) { - got.contributor = m["matched"]; - got.canned_query ~= "&ct=" ~ m["matched"]; - } - if (auto m = got.search_text_area.matchFirst(rgx.date)) { - got.date = m["matched"]; - got.canned_query ~= "&dt=" ~ m["matched"]; - } - // if (auto m = got.search_text_area.matchFirst(rgx.results_type)) { - // got.results_type = m["matched"]; - // got.canned_query ~= "&rt=" ~ m["matched"]; - // } - if (auto m = got.search_text_area.matchFirst(rgx.format)) { - got.format = m["matched"]; - got.canned_query ~= "&fmt=" ~ m["matched"]; - } - if (auto m = got.search_text_area.matchFirst(rgx.identifier)) { - got.identifier = m["matched"]; - got.canned_query ~= "&id=" ~ m["matched"]; - } - if (auto m = got.search_text_area.matchFirst(rgx.source)) { - got.source = m["matched"]; - got.canned_query ~= "&src=" ~ m["matched"]; - } - if (auto m = got.search_text_area.matchFirst(rgx.language)) { - got.language = m["matched"]; - got.canned_query ~= "&lng=" ~ m["matched"]; - } - if (auto m = got.search_text_area.matchFirst(rgx.relation)) { - got.relation = m["matched"]; - got.canned_query ~= "&rl=" ~ m["matched"]; - } - if (auto m = got.search_text_area.matchFirst(rgx.coverage)) { - got.coverage = m["matched"]; - got.canned_query ~= "&cv=" ~ m["matched"]; - } - if (auto m = got.search_text_area.matchFirst(rgx.rights)) { - got.rights = m["matched"]; - got.canned_query ~= "&rgt=" ~ m["matched"]; - } - if (auto m = got.search_text_area.matchFirst(rgx.comment)) { - got.comment = m["matched"]; - got.canned_query ~= "&cmt=" ~ m["matched"]; - } - // if (auto m = search_text_area.matchFirst(rgx.abstract)) { - // got.abstract = m["matched"]; - // } - if (auto m = got.search_text_area.matchFirst(rgx.src_filename_base)) { - got.src_filename_base = m["matched"]; - got.canned_query ~= "&bfn=" ~ m["matched"]; - } - } - if ("fn" in cgi.post) { - got.fn = cgi.post["fn"]; - got.canned_query ~= "&fn=" ~ cgi.post["fn"]; - } - if ("rt" in cgi.post) { - got.results_type = cgi.post["rt"]; - got.canned_query ~= "&rt=" ~ cgi.post["rt"]; - } - if ("sts" in cgi.post) { - got.stats = cgi.post["sts"]; - got.canned_query ~= "&sts=" ~ cgi.post["sts"]; - } - if ("ec" in cgi.post) { - got.echo = cgi.post["ec"]; - got.canned_query ~= "&ec=" ~ cgi.post["ec"]; - } - if ("url" in cgi.post) { - got.url = cgi.post["url"]; - got.canned_query ~= "&url=" ~ cgi.post["url"]; - } - if ("se" in cgi.post) { - got.searched = cgi.post["se"]; - got.canned_query ~= "&se=" ~ cgi.post["se"]; - } - if ("sql" in cgi.post) { - got.sql = cgi.post["sql"]; - got.canned_query ~= "&sql=" ~ cgi.post["sql"]; - } - if ("sml" in cgi.post) { - got.sql_match_limit = cgi.post["sml"]; - got.canned_query ~= "&sml=" ~ cgi.post["sml"]; - } - if ("smo" in cgi.post) { - got.sql_match_offset = "0"; // cgi.post["smo"]; - got.canned_query ~= "&smo=0"; // ~ cgi.post["smo"]; - } - got.canned_query = got.canned_query.strip.split(" ").join("%20"); - conf.query_string = got.canned_query; - // cgi.write("f.canned_query: " ~ got.canned_query ~ "
"); - } else if (environment.get("REQUEST_METHOD", "POST") == "GET") { - got.canned_query = environment.get("QUERY_STRING", ""); - // cgi.write("f.canned_query: " ~ got.canned_query ~ "
"); - got.search_text_area = ""; - if ("sf" in canned_query && !(canned_query["sf"]).empty) { - got.text = canned_query["sf"].split("%20").join(" "); - got.search_text_area ~= "text: " ~ got.text ~ "\n"; - } - if ("au" in canned_query && !(canned_query["au"]).empty) { - got.author = canned_query["au"].split("%20").join(" "); - got.search_text_area ~= "author: " ~ got.author ~ "\n"; - } - if ("ti" in canned_query && !(canned_query["ti"]).empty) { - got.title = canned_query["ti"].split("%20").join(" "); - got.search_text_area ~= "title: " ~ got.title ~ "\n"; - } - if ("uid" in canned_query && !(canned_query["uid"]).empty) { - got.uid = canned_query["uid"].split("%20").join(" "); - got.search_text_area ~= "uid: " ~ got.uid ~ "\n"; - } - if ("fn" in canned_query && !(canned_query["fn"]).empty) { - got.fn = canned_query["fn"].split("%20").join(" "); - got.search_text_area ~= "fn: " ~ got.fn ~ "\n"; - } - if ("kw" in canned_query && !(canned_query["kw"]).empty) { - got.keywords = canned_query["kw"].split("%20").join(" "); - got.search_text_area ~= "keywords: " ~ got.keywords ~ "\n"; - } - if ("tr" in canned_query && !(canned_query["tr"]).empty) { - got.topic_register = canned_query["tr"].split("%20").join(" "); - got.search_text_area ~= "topic_register: " ~ got.topic_register ~ "\n"; - } - if ("su" in canned_query && !(canned_query["su"]).empty) { - got.subject = canned_query["su"].split("%20").join(" "); - got.search_text_area ~= "subject: " ~ got.subject ~ "\n"; - } - if ("de" in canned_query && !(canned_query["de"]).empty) { - got.description = canned_query["de"].split("%20").join(" "); - got.search_text_area ~= "description: " ~ got.description ~ "\n"; - } - if ("pb" in canned_query && !(canned_query["pb"]).empty) { - got.publisher = canned_query["pb"].split("%20").join(" "); - got.search_text_area ~= "publisher: " ~ got.publisher ~ "\n"; - } - if ("ed" in canned_query && !(canned_query["ed"]).empty) { - got.editor = canned_query["ed"].split("%20").join(" "); - got.search_text_area ~= "editor: " ~ got.editor ~ "\n"; - } - if ("ct" in canned_query && !(canned_query["ct"]).empty) { - got.contributor = canned_query["ct"].split("%20").join(" "); - got.search_text_area ~= "contributor: " ~ got.contributor ~ "\n"; - } - if ("dt" in canned_query && !(canned_query["dt"]).empty) { - got.date = canned_query["dt"].split("%20").join(" "); - got.search_text_area ~= "date: " ~ got.date ~ "\n"; - } - if ("rt" in canned_query && !(canned_query["rt"]).empty) { - got.results_type = canned_query["rt"].split("%20").join(" "); - // got.search_text_area ~= "results_type: " ~ got.results_type ~ "\n"; - } - if ("fmt" in canned_query && !(canned_query["fmt"]).empty) { - got.format = canned_query["fmt"].split("%20").join(" "); - got.search_text_area ~= "format: " ~ got.format ~ "\n"; - } - if ("id" in canned_query && !(canned_query["id"]).empty) { - got.identifier = canned_query["id"].split("%20").join(" "); - got.search_text_area ~= "identifier: " ~ got.identifier ~ "\n"; - } - if ("src" in canned_query && !(canned_query["src"]).empty) { - got.source = canned_query["src"].split("%20").join(" "); - got.search_text_area ~= "source: " ~ got.source ~ "\n"; - } - if ("lng" in canned_query && !(canned_query["lng"]).empty) { - got.language = canned_query["lng"].split("%20").join(" "); - got.search_text_area ~= "language: " ~ got.language ~ "\n"; - } - if ("rl" in canned_query && !(canned_query["rl"]).empty) { - got.relation = canned_query["rl"].split("%20").join(" "); - got.search_text_area ~= "relation: " ~ got.relation ~ "\n"; - } - if ("cv" in canned_query && !(canned_query["cv"]).empty) { - got.coverage = canned_query["cv"].split("%20").join(" "); - got.search_text_area ~= "coverage: " ~ got.coverage ~ "\n"; - } - if ("rgt" in canned_query && !(canned_query["rgt"]).empty) { - got.rights = canned_query["rgt"].split("%20").join(" "); - got.search_text_area ~= "rights: " ~ got.rights ~ "\n"; - } - if ("cmt" in canned_query && !(canned_query["cmt"]).empty) { - got.comment = canned_query["cmt"].split("%20").join(" "); - got.search_text_area ~= "comment: " ~ got.comment ~ "\n"; - } - // if ("abstract" in canned_query && !(canned_query["abstract"]).empty) { - // got.abstract = canned_query["abstract"]; - // } - if ("bfn" in canned_query && !(canned_query["bfn"]).empty) { // search_field - got.src_filename_base = canned_query["bfn"].split("%20").join(" "); - got.search_text_area ~= "src_filename_base: " ~ got.src_filename_base ~ "\n"; - } - if ("sml" in canned_query && !(canned_query["sml"]).empty) { - got.sql_match_limit = canned_query["sml"].split("%20").join(" "); - // got.search_text_area ~= "sql_match_limit: " ~ got.sql_match_limit ~ "\n"; - } - // cgi.write("f.search_text_area: " ~ got.search_text_area ~ "
"); - } - return got; - } - auto tf = text_fields; // - struct SQL_select { - string the_body = ""; - string the_range = ""; - } - auto sql_select = SQL_select(); - string base ; // = ""; - string tip ; // = ""; - string search_note ; // = ""; - uint sql_match_offset_count = 0; - string previous_next () { - static struct Rgx { - static track_offset = ctRegex!(`(?P[&]smo=)(?P[0-9]+)`, "m"); - } - auto rgx = Rgx(); - string _previous_next = ""; - int _current_offset_value = 0; - string _set_offset_next = ""; - string _set_offset_previous = ""; - string _url = ""; - string _url_previous = ""; - string _url_next = ""; - string arrow_previous = ""; - string arrow_next = ""; - if (environment.get("REQUEST_METHOD", "POST") == "POST") { - _url = conf.http_request_type ~ "://" ~ conf.http_host ~ conf.cgi_script ~ "?" ~ tf.canned_query; - } else if (environment.get("REQUEST_METHOD", "POST") == "GET") { - _url = conf.http_request_type ~ "://" ~ conf.http_host ~ conf.cgi_script ~ "?" ~ environment.get("QUERY_STRING", ""); - } - if (auto m = _url.matchFirst(rgx.track_offset)) { - _current_offset_value = m.captures["offset_val"].to!int; - _set_offset_next = m.captures["offset_key"] ~ ((m.captures["offset_val"]).to!int + cv.sql_match_limit.to!int).to!string; - _url_next = _url.replace(rgx.track_offset, _set_offset_next); - if (_current_offset_value < cv.sql_match_limit.to!int) { - _url_previous = ""; - } else { - _url_previous = ""; - _set_offset_previous = m.captures["offset_key"] ~ ((m.captures["offset_val"]).to!int - cv.sql_match_limit.to!int).to!string; - _url_previous = _url.replace(rgx.track_offset, _set_offset_previous); - } - } else {// _current_offset_value = 0; - _url_next = _url ~= "&smo=" ~ cv.sql_match_limit.to!string; - } - if (_url_previous.empty) { - arrow_previous = ""; - } else { - arrow_previous = - "" - ~ "" - ~ "<< prev" - ~ " || "; - } - arrow_next = - "" - ~ "" - ~ "next >>" - ~ ""; - _previous_next = "
" ~ arrow_previous ~ arrow_next; - return _previous_next; - } - { - header = format(q"┃ - - - - - - SiSU spine search form (sample) - - - - - - - -┃", - conf.http_host, - ); - } - { - table = format(q"┃ - - - -
- - -
-
- SiSU - -
- git - -
-
- -
- ┃"); - } - { - string post_value(string field_name, string type="box", string set="on") { - string val = ""; - switch (type) { - case "field": - val = ((field_name in cgi.post && !(cgi.post[field_name]).empty) - ? cgi.post[field_name] - : (field_name in cgi.get) - ? cgi.get[field_name] - : ""); - val = tf.search_text_area; - break; - case "box": // generic for checkbox or radio; checkbox set == "on" radio set == "name set" - val = ((field_name in cgi.post && !(cgi.post[field_name]).empty) - ? (cgi.post[field_name] == set ? "checked" : "off") - : (field_name in cgi.get) - ? (cgi.get[field_name] == set ? "checked" : "off") - : "off"); - break; - case "radio": // used generic bo - val = ((field_name in cgi.post && !(cgi.post[field_name]).empty) - ? (cgi.post[field_name] == set ? "checked" : "off") - : (field_name in cgi.get) - ? (cgi.get[field_name] == set ? "checked" : "off") - : "checked"); - break; - case "checkbox": // used generic bo - val = ((field_name in cgi.post && !(cgi.post[field_name]).empty) - ? (cgi.post[field_name] == set ? "checked" : "off") - : (field_name in cgi.get) - ? (cgi.get[field_name] == set ? "checked" : "off") - : "checked"); - break; - default: - } - return val; - } - string the_can(string fv) { - string show_the_can = post_value("url"); - string _the_can = ""; - if (show_the_can == "checked") { - tf = text_fields; - string method_get_url = conf.http_request_type ~ "://" ~ conf.http_host ~ conf.cgi_script ~ "?" ~ environment.get("QUERY_STRING", ""); - string method_post_url_construct = conf.http_request_type ~ "://" ~ conf.http_host ~ conf.cgi_script ~ "?" ~ tf.canned_query; - // assert(method_get_url == environment.get("HTTP_REFERER", conf.http_request_type ~ "://" ~ conf.http_host ~ conf.cgi_script ~ "?" ~ conf.query_string)); - if (conf.request_method == "POST") { - _the_can = - "" - ~ "POST: " - ~ "" - ~ method_post_url_construct - ~ "" - ~ "
"; - } else if (conf.request_method == "GET") { - _the_can = - "" - ~ "GET:  " - ~ "" - ~ method_get_url - ~ ""; - } - conf.http_url = conf.http_request_type ~ "://" ~ conf.http_host ~ conf.cgi_script ~ tf.canned_query; - } - return _the_can; - } - string provide_tip() { - string searched_tip = post_value("se"); - string tip = ""; - if (searched_tip == "checked") { - string search_field = post_value("sf", "field"); - tf = text_fields; - tip = format(q"┃ - -database: %s; selected view: index -search string: %s %s %s %s %s %s
-%s %s %s %s %s %s -
-┃", - cv.db_selected, - (tf.text.empty ? "" : "\"text: " ~ tf.text ~ "; "), - (tf.title.empty ? "" : "\"title: " ~ tf.title ~ "; "), - (tf.author.empty ? "" : "\"author: " ~ tf.author ~ "; "), - (tf.date.empty ? "" : "\"date " ~ tf.date ~ "; "), - (tf.uid.empty ? "" : "\"uid: " ~ tf.uid ~ "; "), - (tf.fn.empty ? "" : "\"fn: " ~ tf.fn ~ "; "), - (tf.text.empty ? "" : "text: " ~ tf.text ~ "
"), - (tf.title.empty ? "" : "title: " ~ tf.title ~ "
"), - (tf.author.empty ? "" : "author: " ~ tf.author ~ "
"), - (tf.date.empty ? "" : "date: " ~ tf.date ~ "
"), - (tf.uid.empty ? "" : "\"uid: " ~ tf.uid ~ "; "), - (tf.fn.empty ? "" : "\"fn: " ~ tf.fn ~ "; "), - ); - } - return tip; - } - form = format(q"┃ -
- - -
- - - %s - %s - %s -
- - - to search: select which database to search (drop-down menu below); enter your search query (in the form above); and click on the search button (below) -
- - - index - text / grep; - match limit: - 1,000 - 2,500 -
- echo query - result stats - search url - searched - available fields - sql statement - -
-
- - -
-┃", - "spine-search", - (post_value("ec") == "checked") ? post_value("sf", "field") : "", - provide_tip, - search_note, - the_can(post_value("sf", "field")), - cv.db_selected, - cv.db_selected, - post_value("rt", "box", "idx"), - post_value("rt", "box", "txt"), - post_value("sml", "box", "1000"), - post_value("sml", "box", "2500"), - post_value("ec"), - post_value("sts"), - post_value("url"), - post_value("se"), - post_value("tip"), - post_value("sql"), - ); - { - string set_value(string field_name, string default_val) { - string val; - if (field_name in cgi.post) { - val = cgi.post[field_name]; - } else if (field_name in cgi.get) { - val = cgi.get[field_name]; - } else { val = default_val; } - return val; - } - bool set_bool(string field_name) { - bool val; - if (field_name in cgi.post - && cgi.post[field_name] == "on") { - val = true; - } else if (field_name in cgi.get - && cgi.get[field_name] == "on") { - val = true; - } else { val = false; } - return val; - } - cv.db_selected = set_value("selected_db", "spine.search.db"); // selected_db == db - cv.sql_match_limit = set_value("sml", "1000"); - cv.sql_match_offset = set_value("smo", "0"); - cv.search_text = set_value("sf", "test"); // remove test - cv.results_type = set_value("rt", "idx"); - cv.checked_echo = set_bool("ec"); - cv.checked_stats = set_bool("sts"); - cv.checked_url = set_bool("url"); - cv.checked_searched = set_bool("se"); - cv.checked_tip = set_bool("tip"); - cv.checked_sql = set_bool("sql"); - tf = text_fields; - } - } - { - cgi.write(header); - cgi.write(table); - cgi.write(form); - // cgi.write(previous_next); - { // debug environment - // foreach (k, d; environment.toAA) { - // cgi.write(k ~ ": " ~ d ~ "
"); - // } - } - { // debug cgi info - // cgi.write("db_selected: " ~ cv.db_selected ~ "
\n"); - // cgi.write("search_text: " ~ cv.search_text ~ "
\n"); - // cgi.write("sql_match_limit: " ~ cv.sql_match_limit ~ ";\n"); - // cgi.write("sql_match_offset: " ~ cv.sql_match_offset ~ ";\n"); - // cgi.write("results_type: " ~ cv.results_type ~ "
\n"); - // cgi.write("cv.checked_echo: " ~ (cv.checked_echo ? "checked" : "off") ~ "; \n"); - // cgi.write("cv.checked_stats: " ~ (cv.checked_stats ? "checked" : "off") ~ "; \n"); - // cgi.write("cv.checked_url: " ~ (cv.checked_url ? "checked" : "off") ~ "; \n"); - // cgi.write("cv.checked_searched: " ~ (cv.checked_searched ? "checked" : "off") ~ ";
\n"); - // cgi.write("cv.checked_tip: " ~ (cv.checked_tip ? "checked" : "off") ~ "; \n"); - // cgi.write("cv.checked_sql: " ~ (cv.checked_sql ? "checked" : "off") ~ "
\n"); - } - } - auto db = Database(conf.db_path ~ cv.db_selected); - { - uint sql_match_offset_counter(T)(T cv) { - sql_match_offset_count += cv.sql_match_limit.to!uint; - return sql_match_offset_count; - } - void sql_search_query() { - string select_field_like(string db_field, string search_field) { - string where_ = ""; - if (!(search_field.empty)) { - string _sf = search_field.strip.split("%20").join(" "); - if (_sf.match(r" OR ")) { - _sf = _sf.split(" OR ").join("%' OR " ~ db_field ~ " LIKE '%"); - } - if (_sf.match(r" AND ")) { - _sf = _sf.split(" AND ").join("%' AND " ~ db_field ~ " LIKE '%"); - } - _sf = "( " ~ db_field ~ " LIKE\n '%" ~ _sf ~ "%' )"; - where_ ~= format(q"┃ - %s -┃", - _sf - ); - } - return where_; - } - string[] _fields; - _fields ~= select_field_like("doc_objects.clean", tf.text); - _fields ~= select_field_like("metadata_and_text.title", tf.title); - _fields ~= select_field_like("metadata_and_text.creator_author", tf.author); - _fields ~= select_field_like("metadata_and_text.uid", tf.uid); - _fields ~= select_field_like("metadata_and_text.src_filename_base", tf.fn); - _fields ~= select_field_like("metadata_and_text.src_filename_base", tf.src_filename_base); - _fields ~= select_field_like("metadata_and_text.language_document_char", tf.language); - _fields ~= select_field_like("metadata_and_text.date_published", tf.date); - _fields ~= select_field_like("metadata_and_text.classify_keywords", tf.keywords); - _fields ~= select_field_like("metadata_and_text.classify_topic_register", tf.topic_register); - string[] fields; - foreach (f; _fields) { - if (!(f.empty)) { fields ~= f; } - } - string fields_str = ""; - fields_str ~= fields.join(" AND "); - sql_select.the_body ~= format(q"┃ -SELECT - metadata_and_text.uid, - metadata_and_text.title, - metadata_and_text.creator_author_last_first, - metadata_and_text.creator_author, - metadata_and_text.src_filename_base, - metadata_and_text.language_document_char, - metadata_and_text.date_published, - metadata_and_text.classify_keywords, - metadata_and_text.classify_topic_register, - doc_objects.body, - doc_objects.seg_name, - doc_objects.ocn, - metadata_and_text.uid -FROM - doc_objects, - metadata_and_text -WHERE ( - %s - ) -AND - doc_objects.uid_metadata_and_text = metadata_and_text.uid -ORDER BY - metadata_and_text.creator_author_last_first, - metadata_and_text.date_published DESC, - metadata_and_text.title, - metadata_and_text.language_document_char, - metadata_and_text.src_filename_base, - doc_objects.ocn -LIMIT %s OFFSET %s -;┃", - fields_str, - cv.sql_match_limit, - cv.sql_match_offset, - ); - (cv.checked_sql) - ? cgi.write(previous_next ~ "
" ~ sql_select.the_body.split("\n ").join(" ").split("\n").join("
") ~ "
\n") - : ""; - cgi.write(previous_next); - auto select_query_results = db.execute(sql_select.the_body).cached; - string _old_uid = ""; - if (!select_query_results.empty) { - foreach (row; select_query_results) { - if (row["uid"].as!string != _old_uid) { - _old_uid = row["uid"].as!string; - auto m = (row["date_published"].as!string).match(regex(r"^([0-9]{4})")); // breaks if row missing or no match? - cgi.write( - "
\"" - ~ row["title"].as!string ~ "\"" - ~ " (" - ~ m.hit - ~ ") " - ~ "[" - ~ row["language_document_char"].as!string - ~ "] " - ~ row["creator_author_last_first"].as!string - ~ ":
\n" - ); - } - if (cv.results_type == "txt") { - cgi.write( - "
" - ~ row["ocn"].as!string - ~ "" - ~ "
" - ~ row["body"].as!string - ); - } else { - cgi.write( - "" - ~ row["ocn"].as!string - ~ ", " - ); - } - } - cgi.write( previous_next); - } else { // offset_not_beyond_limit = false; - cgi.write("select_query_results empty

\n"); - } - } - sql_search_query; - } - { - db.close; - } - { - string tail = format(q"┃ - -┃"); - cgi.write(tail); - } -} -mixin GenericMain!cgi_function_intro; diff --git a/util/rb/cgi/search.cgi b/util/rb/cgi/search.cgi deleted file mode 100755 index 0fe8ea5..0000000 --- a/util/rb/cgi/search.cgi +++ /dev/null @@ -1,937 +0,0 @@ -#!/usr/bin/env ruby -=begin - * Name: SiSU information Structuring Universe - * Author: Ralph Amissah - * http://www.jus.uio.no/sisu - * http://www.jus.uio.no/sisu/SiSU/download - - * Description: generates naive cgi search form for search of sisu database (sqlite) - * Name: SiSU generated sample cgi search form - - * Description: generated sample cgi search form for SiSU - (SiSU is a framework for document structuring, publishing and search) - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2014, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Ralph Amissah - - - -=end - begin - require 'cgi' - require 'fcgi' - require 'sqlite3' - rescue LoadError - puts 'cgi, fcgi or sqlite3 NOT FOUND (LoadError)' - end - @stub_default='manual' - @image_src="http://#{ENV['HTTP_HOST']}/_sisu/image_sys" - @hosturl_cgi="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}" - @hosturl_files="http://#{ENV['HTTP_HOST']}" - @output_dir_structure_by='filetype' - @lingual='multi' - @db_name_prefix='SiSU.7a.' - @base="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}#{ENV['SCRIPT_NAME']}" -#Common TOP - @@offset=0 - @@canned_search_url=@base - @color_heading='#DDFFAA' - @color_match='#ffff48' - class Form - def initialize(base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') - search_note='' if checked_searched !~/\S/ - the_can='' if checked_url !~/\S/ - search_field='' if checked_echo !~/\S/ - @base,@search_field,@selected_db,@result_type,@checked_sql_limit,@checked_tip,@checked_stats,@checked_searched,@checked_url,@checked_case,@checked_echo,@checked_sql,@checked_all,@checked_none,@checked_selected,@checked_default,@search_note,@the_can=base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can - @tip=if checked_tip =~/\S/ - 'text:__; fulltxt:__; keywords:__; title:__; author:__; topic_register:__; subject:__; description:__; publisher:__; editor:__; contributor:__; date:__; type:__; format:__; identifier:__; source:__; language:__; relation:__; coverage:__; rights:__; comment:__; abstract:__; filename:__;
' - else '' - end - end - def submission_form - search_form=<<-WOK - - - - - <meta charset="utf-8"> - <meta name="sourcefile" content="SiSU._sst" /> - SiSU search form (sample): SiSU information Structuring Universe - - - - - - - - - -
- - -
-
- SiSU - -
- git - -
-
- -
-

- - -
- - - #{@tip} - #{@search_note} - #{@the_can} -
- - - - - to search: select which database to search (drop-down menu below); enter your search query (in the form above); and click on the search button (below) -
- - - index - text / grep -
- match limit: - 1,000 - 2,500 -
- echo query - result stats - search url - searched - available fields - sql statement -
- checks: - default - selected - all - none -
- - -
- WOK - end - end - class SearchRequest #% search_for - attr_accessor :text1,:fulltext,:keywords,:title,:author,:topic_register,:subject,:description,:publisher,:editor,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:filename - def initialize(search_field='',q='') - @search_field,@q=search_field,q - @text1=@fulltext=@keywords=@title=@author=@topic_register=@subject=@description=@publisher=@editor=@contributor=@date=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@comment=@abstract=@owner=@date_created=@date_issued=@date_modified=@date_available=@date_valid=@filename='' - if @search_field=~/\S/ - @text1=text_to_match('text:') - @fulltext=text_to_match('fulltxt:') - @topic_register=text_to_match('topic_register:') - @title=text_to_match('title:') # DublinCore 1 - title - @author=text_to_match('(?:author|creator)s?:') # DublinCore 2 - creator/author - @subject=text_to_match('subj(?:ect)?:') # DublinCore 3 - subject - @description=text_to_match('description:') # DublinCore 4 - description - @publisher=text_to_match('pub(?:lisher)?:') # DublinCore 5 - publisher - @editor=text_to_match('editor:') - @contributor=text_to_match('contributor:') # DublinCore 6 - contributor - @date=text_to_match('date:') # DublinCore 7 - date dd-mm-yy - @type=text_to_match('type:') # DublinCore 8 - type - @format=text_to_match('format:') # DublinCore 9 - format - @identifier=text_to_match('identifier:') # DublinCore 10 - identifier - @source=text_to_match('source:') # DublinCore 11 - source - @language=text_to_match('language:') # DublinCore 12 - language - @relation=text_to_match('relation:') # DublinCore 13 - relation - @coverage=text_to_match('coverage:') # DublinCore 14 - coverage - @rights=text_to_match('rights:') # DublinCore 15 - rights - @keywords=text_to_match('key(?:words?)?:') - @comment=text_to_match('comment:') - @abstract=text_to_match('abs(?:tract)?:') - @owner=text_to_match('owner:') - @date_created=text_to_match('date_created:') - @date_issued=text_to_match('date_issued:') - @date_modified=text_to_match('date_modified:') - @date_available=text_to_match('date_available:') - @date_valid=text_to_match('date_valid:') - @filename=text_to_match('filename:') - @text1=text_to_match unless @keywords or @author or @title or @text1 or @fulltext or @comment or @abstract or @rights or @subject or @publisher or @date or @filename or @topic_register - else - @text1=q['s1'] if q['s1']=~/\S/ - @fulltext=q['ft'] if q['ft']=~/\S/ - @keywords=q['key'] if q['key']=~/\S/ - @title=q['ti'] if q['ti']=~/\S/ - @author=q['au'] if q['au']=~/\S/ - @topic_register=q['tr'] if q['tr']=~/\S/ - @subject=q['sj'] if q['sj']=~/\S/ - @description=q['dsc'] if q['dsc']=~/\S/ - @publisher=q['pb'] if q['pb']=~/\S/ - @editor=q['cntr'] if q['cntr']=~/\S/ - @contributor=q['cntr'] if q['cntr']=~/\S/ - @date=q['dt'] if q['dt']=~/\S/ - @type=q['ty'] if q['ty']=~/\S/ - @identifier=q['id'] if q['id']=~/\S/ - @source=q['src'] if q['src']=~/\S/ - @language=q['lang'] if q['lang']=~/\S/ - @relation=q['rel'] if q['rel']=~/\S/ - @coverage=q['cov'] if q['cov']=~/\S/ - @rights=q['cr'] if q['cr']=~/\S/ - @comment=q['co'] if q['co']=~/\S/ - @abstract=q['ab'] if q['ab']=~/\S/ - @date_created=q['dtc'] if q['dtc']=~/\S/ - @date_issued=q['dti'] if q['dti']=~/\S/ - @date_modified=q['dtm'] if q['dtm']=~/\S/ - @date_available=q['dta'] if q['dta']=~/\S/ - @date_valid=q['dtv'] if q['dtv']=~/\S/ - @filename=if q['doc'] and q['search'] !~/search db/ then q['doc'] - elsif q['fns']=~/\S/ then q['fns'] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - end - end - def text_to_match(identifier='') - m={ - string: /#{identifier}\s*(.+?)/, - string: /#{identifier}\s*(.+?)(?:;|\n|\r|$)/, - word: /#{identifier}[\s(]*(\S+)/ - } - search_string=if @search_field =~m[:word] - search_string=if @search_field =~m[:braces] then m[:braces].match(@search_field)[1] - elsif @search_field =~m[:string] then m[:string].match(@search_field)[1] - else - str=m[:word].match(@search_field)[1] - str=str.gsub(/[()]/,'') - str - end - search_string=search_string.strip.gsub(/\s+/,'+') - #else - # "__" - end - end - end - class DBI_SearchString - def initialize(l,t,q,cse=false) - @l,@t,@q=l,t,q - end - def string - search={ search: [], flag: false } - if @t =~/\S+/ or @q =~/\S+/ - if @t =~/\S+/ then unescaped_search=CGI.unescape(@t) - elsif @q =~/\S+/ then unescaped_search=CGI.unescape(@q) - end - search_construct=[] - unescaped_search=unescaped_search.gsub(/\s*(AND|OR)\s*/,"%' \) \\1 #{@l} LIKE \( '%"). - gsub(/(.+)/,"#{@l} LIKE \( '%\\1%' \)") - search_construct << unescaped_search - search_construct=search_construct.join(' ') - search[:search] << search_construct - search[:flag]=true - search - end - search - end - end - class DBI_SearchStatement - attr_reader :text_search_flag,:sql_select_body_format,:sql_offset,:sql_limit - def initialize(conn,search_for,q,c) - @conn=conn - @text_search_flag=false - @sql_statement={ body: '', endnotes: '', range: '' } - #@offset||=@@offset - #@offset+=@@limit - search={ text: [], endnotes: [] } - cse=(c =~/\S/) ? true : false - st=DBI_SearchString.new('doc_objects.clean',search_for.text1,q['s1'],cse).string - se=DBI_SearchString.new('endnotes.clean',search_for.text1,q['s1'],cse).string - @text_search_flag=st[:flag] - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.fulltext',search_for.fulltext,q['ft'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.title',search_for.title,q['ti'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_author',search_for.author,q['au'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_topic_register',search_for.topic_register,q['tr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_subject',search_for.subject,q['sj'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_keywords',search_for.keywords,q['key'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_description',search_for.description,q['dsc'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.publisher',search_for.publisher,q['pb'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_editor',search_for.editor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_contributor',search_for.contributor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.date_published',search_for.date,q['dt'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_type',search_for.type,q['ty'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.original_source',search_for.source,q['src'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.language_document_char',search_for.language,q['lang'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_relation',search_for.relation,q['rel'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_coverage',search_for.coverage,q['cov'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.rights_all',search_for.rights,q['cr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_comment',search_for.comment,q['co'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_abstract',search_for.abstract,q['ab'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.src_filename',search_for.filename,q['fns'],cse).string - if st[:flag] - search[:text] << st[:search] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - @search_text='' - @search_text=search[:text].flatten.join(' AND ') - @search_text=@search_text.gsub(/(doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\)\s+(?:(?:AND|OR)\s+doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\))+)/,'(\1)') - end - def sql_offset - @@offset - end - def sql_match_limit - @@limit - end - def sql_canned_search - @offset_next=sql_offset.to_i + sql_match_limit.to_i - @offset_previous=sql_offset.to_i - sql_match_limit.to_i - def current - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + sql_offset.to_s - end - def next - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_next.to_s - end - def previous - @offset_previous >= 0 \ - ? (@@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_previous.to_s) - : '' - end - def start - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + 0.to_s - end - self - end - def pre_next(beyond_limit,img) - can=sql_canned_search - page=(sql_offset.to_i + sql_match_limit.to_i)/sql_match_limit.to_i - if beyond_limit - if page.to_s =~ /^1$/ - %{
- pg. #{page.to_s} - -  >> - -
} - elsif page.to_s =~ /^2$/ - %{
- - <<  - - pg. #{page.to_s} - -  >> - -
} - else - %{
- - |<  - - - <<  - - pg. #{page.to_s} - -  >> - -
} - end - else - if page.to_s =~ /^1$/ then '' - elsif page.to_s =~ /^2$/ - %{
- - <<  - - pg. #{page.to_s} -
} - else - %{
- - |<  - - - <<  - - pg. #{page.to_s} -
} - end - end - end - def sql_select_body - limit ||=@@limit - offset ||=@@offset - @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, metadata_and_text.language_document_char, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE #{@search_text} AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.language_document_char, metadata_and_text.title, metadata_and_text.src_filename, doc_objects.ocn} - @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} - select=@sql_statement[:body] + ' ' + @sql_statement[:range] - select - end - def sql_select_body_format - %{#{sql_select_body}} - end - def contents - @conn.execute(sql_select_body) - end - end - def tail - <<-'WOK' -


- - - - - - -
- - -
-
- SiSU - -
- git - -
-
- -

- Generated by - SiSU 6.3.1 2014-10-19 (2014w41/7) -
- - SiSU © Ralph Amissah - 1993, current 2014. - All Rights Reserved. -
- SiSU is software for document structuring, publishing and search, -
- - www.jus.uio.no/sisu - - and - - www.sisudoc.org - - sources - - git.sisudoc.org - -
- w3 since October 3 1993 - - ralph@amissah.com - -
- mailing list subscription - - http://lists.sisudoc.org/listinfo/sisu - -
- - sisu@lists.sisudoc.org - -

-
-

- SiSU using: -
Standard SiSU markup syntax, -
Standard SiSU meta-markup syntax, and the -
Standard SiSU object citation numbering and system, (object/text identifying/locating system) -
- © Ralph Amissah 1997, current 2014. - All Rights Reserved. -

-
-

- - .: - -

-
-

- SiSU is released under - GPL v3 - or later, - - http://www.gnu.org/licenses/gpl.html - -

-
-

- SiSU, developed using - - Ruby - - on - - Debian/Gnu/Linux - - software infrastructure, - with the usual GPL (or OSS) suspects. -

-
- - - WOK - end - @tail=tail - @counter_txt_doc,@counter_txt_ocn,@counter_endn_doc,@counter_endn_ocn=0,0,0,0 - @counters_txt,@counters_endn,@sql_select_body='','','' - FCGI.each_cgi do |cgi| - begin # all code goes in begin section - @search={ text: [], endnotes: [] } - q=CGI.new - @db=if cgi['db'] =~ /#{@db_name_prefix}(\S+)/ - @stub=$1 - cgi['db'] - else - @stub=@stub_default - @db_name_prefix + @stub - end - checked_url,checked_stats,checked_searched,checked_tip,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,selected_db='','','','','','','','','' - result_type=(cgi['view']=~/text/) \ - ? result_type={ index: '', text: 'checked'} - : result_type={ index: 'checked', text: ''} - @@limit=if cgi['sql_match_limit'].to_s=~/2500/ - checked_sql_limit={ l1000: '', l2500: 'checked'} - '2500' - else - checked_sql_limit={ l1000: 'checked', l2500: ''} - '1000' - end - checked_echo='checked' if cgi['echo'] =~/\S/ - checked_stats='checked' if cgi['stats'] =~/\S/ - checked_url='checked' if cgi['url'] =~/\S/ or cgi['u'].to_i==1 - checked_searched='checked' if cgi['searched'] =~/\S/ - checked_tip='checked' if cgi['tip'] =~/\S/ - checked_case='checked' if cgi['casesense'] =~/\S/ - checked_sql='checked' if cgi['sql'] =~/\S/ - if cgi['checks'] =~/check_all/ or cgi['check_all'] =~/\S/ or cgi['a'].to_i==1 - checked_all='checked' - checked_echo=checked_stats=checked_url=checked_searched=checked_tip=checked_sql='checked' - checked_none='' - elsif cgi['checks'] =~/check_none/ - checked_none='checked' - checked_all=checked_url=checked_stats=checked_searched=checked_tip=checked_echo=checked_sql='' - elsif cgi['checks'] =~/check_selected/ - checked_selected='checked' - elsif cgi['checks'] =~/check_default/ - checked_default='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - else - checked_selected='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - end - selected_db=case cgi['db'] - when /SiSU.7a.manual/ then '' - end - db_name='sisu_sqlite.db' - db_sqlite=case cgi['db'] - when /SiSU.7a.manual/ then "/srv/complete.sisudoc.org/web/manual/#{db_name}" - else "/srv/complete.sisudoc.org/web/manual/#{db_name}" - end - @conn=SQLite3::Database.new(db_sqlite) - @conn.results_as_hash=true - search_field=cgi['find'] if cgi['find'] # =~/\S+/ - @search_for=SearchRequest.new(search_field,q) #.analyze #% search_for - #% searches - #Canned_search.new(@base,@search_for.text1,cgi) - if @search_for.text1=~/\S+/ or @search_for.fulltext=~/\S+/ or @search_for.author=~/\S+/ or @search_for.topic_register=~/\S+/ #and search_field =~/\S/ - s1='s1=' + CGI.escape(@search_for.text1) if @search_for.text1=~/\S/ - ft='&ft=' + CGI.escape(@search_for.fulltext) if @search_for.fulltext=~/\S/ - key='key=' + CGI.escape(@search_for.keywords) if @search_for.keywords=~/\S/ - ti='&ti=' + CGI.escape(@search_for.title) if @search_for.title=~/\S/ - au='&au=' + CGI.escape(@search_for.author) if @search_for.author=~/\S/ - tr='&tr=' + CGI.escape(@search_for.topic_register) if @search_for.topic_register=~/\S/ - sj='&sj=' + CGI.escape(@search_for.subject) if @search_for.subject=~/\S/ - dsc='&dsc=' + CGI.escape(@search_for.description) if @search_for.description=~/\S/ - pb='&pb=' + CGI.escape(@search_for.publisher) if @search_for.publisher=~/\S/ - edt='&edt=' + CGI.escape(@search_for.editor) if @search_for.editor=~/\S/ - cntr='&cntr=' + CGI.escape(@search_for.contributor) if @search_for.contributor=~/\S/ - dt='&dt=' + CGI.escape(@search_for.date) if @search_for.date=~/\S/ - ty='&ty=' + CGI.escape(@search_for.type) if @search_for.type=~/\S/ - id='&id=' + CGI.escape(@search_for.identifier) if @search_for.identifier=~/\S/ - src='&src=' + CGI.escape(@search_for.source) if @search_for.source=~/\S/ - lang='&lang=' + CGI.escape(@search_for.language) if @search_for.language=~/\S/ - rel='&rel=' + CGI.escape(@search_for.relation) if @search_for.relation=~/\S/ - cov='&cov=' + CGI.escape(@search_for.coverage) if @search_for.coverage=~/\S/ - cr='&cr=' + CGI.escape(@search_for.rights) if @search_for.rights=~/\S/ - co='&co=' + CGI.escape(@search_for.comment) if @search_for.comment=~/\S/ - ab='&ab=' + CGI.escape(@search_for.abstract) if @search_for.abstract=~/\S/ - dtc='&dtc=' + CGI.escape(@search_for.date_created) if @search_for.date_created=~/\S/ - dti='&dti=' + CGI.escape(@search_for.date_issued) if @search_for.date_issued=~/\S/ - dtm='&dtm=' + CGI.escape(@search_for.date_modified) if @search_for.date_modified=~/\S/ - dta='&dta=' + CGI.escape(@search_for.date_available) if @search_for.date_available=~/\S/ - dtv='&dtv=' + CGI.escape(@search_for.date_valid) if @search_for.date_valid=~/\S/ - fns='&fns=' + CGI.escape(@search_for.filename) if @search_for.filename=~/\S/ - @@canned_search_url=(checked_all =~/checked/) \ - ? "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" - : "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" - mod=ft=~/\S+/ ? (ft.gsub(/ft/,'s1')) : s1 - @canned_base_url="#{@base}?#{mod}&db=#{cgi['db']}" - if checked_case=~/\S/ - @search[:text][1]=%{doc_objects.clean~'#{@search_for.text1}'} #s1 - else - @search[:text][1]=%{doc_objects.clean~*'#{@search_for.text1}'} #s1 - end - canned_note='search url:' - else - @@canned_search_url="#{@base}?db=#{@db}&view=index" - canned_note='search url example:' - end - if search_field =~/\S+/ - analyze_format=search_field.gsub(/\s*\n/,'; ') - elsif checked_all =~/checked/ or checked_url =~/checked/ - canned_search=@@canned_search_url.scan(/(?:s1|ft|au|ti|fns|tr)=[^&]+/) - af=canned_search.join('; ') - af=af.gsub(/s1=/,'text: '). - gsub(/ft=/,'fulltxt: '). - gsub(/au=/,'author: '). - gsub(/ti=/,'title: '). - gsub(/fns=/,'filename: '). - gsub(/tr=/,'topic_register: '). - gsub(/%2B/,' ') - analyze_format=af - st=af.split(/\s*;\s*/) - search_field=st.join("\n") - end - green=%{} - canned_search_url_txt=CGI.escapeHTML(@@canned_search_url) - the_can=%{#{canned_note} #{canned_search_url_txt}
} - p_text=p_fulltext=p_keywords=p_title=p_author=p_topic_register=p_subject=p_description=p_publisher=p_editor=p_contributor=p_date=p_type=p_format=p_identifier=p_source=p_language=p_relation=p_coverage=p_rights=p_comment=p_abstract=p_filename='' - p_filename=%{filename: #{green}#{@search_for.filename}

} if @search_for.filename =~/\S+/ - p_text=%{text: #{green}#{@search_for.text1}

} if @search_for.text1 =~/\S+/ - p_fulltext=%{fulltxt: #{green}#{@search_for.fulltext}
} if @search_for.fulltext =~/\S+/ - p_title=%{title: #{green}#{@search_for.title}
} if @search_for.title =~/\S+/ - p_author=%{author: #{green}#{@search_for.author}
} if @search_for.author =~/\S+/ - p_editor=%{editor: #{green}#{@search_for.editor}
} if @search_for.editor=~/\S+/ - p_contributor=%{contributor: #{green}#{@search_for.contributor}
} if @search_for.contributor =~/\S+/ - p_date=%{date: #{green}#{@search_for.date}
} if @search_for.date =~/\S+/ - p_rights=%{rights: #{green}#{@search_for.rights}
} if @search_for.rights =~/\S+/ - p_topic_register=%{topic_register: #{green}#{@search_for.topic_register}
} if @search_for.topic_register =~/\S+/ - p_subject=%{subject: #{green}#{@search_for.subject}
} if @search_for.subject =~/\S+/ - p_keywords=%{keywords: #{green}#{@search_for.keywords}
} if @search_for.keywords =~/\S+/ - p_identifier=%{identifier: #{green}#{@search_for.identifier}
} if @search_for.identifier =~/\S+/ - p_type=%{type: #{green}#{@search_for.type}
} if @search_for.type =~/\S+/ - p_format=%{format: #{green}#{@search_for.format}
} if @search_for.format =~/\S+/ - p_relation=%{relation: #{green}#{@search_for.relation}
} if @search_for.relation =~/\S+/ - p_coverage=%{coverage: #{green}#{@search_for.coverage}
} if @search_for.coverage =~/\S+/ - p_description=%{description: #{green}#{@search_for.description}
} if @search_for.description =~/\S+/ - p_abstract=%{abstract: #{green}#{@search_for.abstract}
} if @search_for.abstract =~/\S+/ - p_comment=%{comment: #{green}#{@search_for.comment}
} if @search_for.comment =~/\S+/ - p_publisher=%{publisher: #{green}#{@search_for.publisher}
} if @search_for.publisher =~/\S+/ - p_source=%{source: #{green}#{@search_for.source}
} if @search_for.source =~/\S+/ - p_language=%{language: #{green}#{@search_for.language}
} if @search_for.language =~/\S+/ - search_note=<<-WOK - - database: #{green}#{@db}; selected view: #{green}#{cgi['view']} - search string: "#{green}#{analyze_format}"
- #{p_text} #{p_fulltext} #{p_keywords} #{p_title} #{p_author} #{p_topic_register} #{p_subject} #{p_description} #{p_publisher} #{p_editor} #{p_contributor} #{p_date} #{p_type} #{p_format} #{p_identifier} #{p_source} #{p_language} #{p_relation} #{p_coverage} #{p_rights} #{p_comment} #{p_abstract} #{p_filename} - - WOK - #eg = %{canned search e.g.:
#{url}
find: #{analyze}
database: #{database}} - #% dbi_canning - @header=Form.new(@base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form - unless q['s1'] =~/\S/ or q['au'] =~/\S/ or @search[:text][1] =~/\S/ - print "Content-type: text/html\n\n" - puts (@header+@tail) - else #% searches - s1=(@search_for.text1 =~/\S/) \ - ? @search_for.text1 - : 'Unavailable' - if checked_case=~/\S/ - @search[:text]<<%{doc_objects.clean~'#{CGI.unescape(s1)}'} - else - @search[:text]<<%{doc_objects.clean~*'#{CGI.unescape(s1)}'} - end - #% dbi_request - dbi_statement=DBI_SearchStatement.new(@conn,@search_for,q,checked_case) - @text_search_flag=false - @text_search_flag=dbi_statement.text_search_flag - s_contents=dbi_statement.contents - @body_main='' - @search_regx=nil - oldtid=0 - if @text_search_flag - if checked_sql =~/\S/ - sql_select_body=dbi_statement.sql_select_body_format - else sql_select_body='' - end - @body_main << sql_select_body - #@body_main << '



Main Text:
' << sql_select_body - else - end - @hostpath="#{@hosturl_files}/#{@stub}" - def path_manifest(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{@hostpath}/#{fn}/sisu_manifest.html" - : "#{@hostpath}/#{fn}/sisu_manifest.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/manifest/#{fn}.html" - : "#{@hostpath}/manifest/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/manifest/#{fn}.html" - end - end - def path_html_seg(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - "#{@hostpath}/#{fn}" - when 'filetype' - "#{@hostpath}/html/#{fn}" - else - "#{@hostpath}/#{ln}/html/#{fn}" - end - end - def path_toc(fn,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/toc.html" - : "#{path_html_seg(fn,ln)}/toc.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/toc.html" - end - end - def path_filename(fn,seg,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/#{seg}.html" - : "#{path_html_seg(fn,ln)}/#{seg}.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/#{seg}.html" - end - end - def path_html_doc(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/scroll.html" - : "#{path_html_seg(fn,ln)}/scroll.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/html/#{fn}.html" - : "#{@hostpath}/html/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/html/#{fn}.html" - end - end - #% text_objects_body - s_contents.each do |c| #% text body - location=c['src_filename'][/(.+?)\.(?:ssm\.sst|sst)$/,1] - file_suffix=c['src_filename'][/.+?\.(ssm\.sst|sst)$/,1] - lang=if location =~ /\S+?~(\S\S\S?)$/ - l=location[/\S+?~(\S\S\S?)$/,1] - location=location.gsub(/(\S+?)~\S\S\S?/,'\1') - l=".#{l}" - else '' - end - #% metadata_found_body - if c['tid'].to_i != oldtid.to_i - ti=c['title'] - can_txt_srch=(cgi['view']=~/index/) \ - ? %{search } - : %{search } - title=%{toc html #{ti} [#{c['language_document_char']}] by #{c['creator_author']} manifest #{can_txt_srch}
} if file_suffix=~/s/ #hmm watch file_suffix - title=@text_search_flag \ - ? '

'+title - : '
'+title - @counter_txt_doc+=1 - oldtid=c['tid'].to_i - else title='' - end - if @text_search_flag - if cgi['view']=~/text/ \ - or (cgi['view']!~/index/ and cgi['search'] !~/search db/) #% txt body - text=if c['suffix'] !~/1/ #seg - if @search_for.text1 =~/\S+/ \ - or q['s1'] =~/\S+/ #% only this branch is working !! - unescaped_search=if @search_for.text1 =~/\S+/ - CGI.unescape(@search_for.text1) - elsif q['s1'] =~/\S+/ - CGI.unescape(q['s1']) - else nil - end - @search_regx=if unescaped_search #check - search_regex=[] - build=unescaped_search.scan(/\S+/).each do |g| - (g.to_s =~/(AND|OR)/) \ - ? (search_regex << '|') - : (search_regex << %{#{g.to_s}}) - end - search_regex=search_regex.join(' ') - search_regex=search_regex.gsub(/\s*\|\s*/,'|') - Regexp.new(search_regex, Regexp::IGNORECASE) - else nil - end - else nil - end - matched_para=(@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) \ - ? (c['body'].gsub(/(<\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{\\1})) - : c['body'] - %{

ocn #{c['ocn']}:

#{matched_para}} - elsif c['suffix'] =~/1/ #doc - %{#{title}

ocn #{c['ocn']}:#{c['body']}} - end - @counter_txt_ocn+=1 - output=title+text - else #elsif cgi['view']=~/index/ #% idx body - if c['suffix'] !~/1/ #seg - index=%{#{c['ocn']}, } if @text_search_flag - elsif c['suffix'] =~/1/ #doc #FIX - index=%{#{c['ocn']}, } - end - if c['seg'] =~/\S+/ - if @text_search_flag - @counter_txt_ocn+=1 - output=title+index - end - else - @counter_txt_ocn+=1 - output=c['suffix'] !~/1/ \ - ? title+index - : %{#{title}#{c['ocn'].sort}, } - end - end - else output=title - end - @counters_txt=if @counter_txt_doc > 0 - if checked_stats =~/\S/ - @@lt_t=(@counter_txt_ocn==dbi_statement.sql_match_limit.to_i) ? true : false - start=(@@offset.to_i+1).to_s - range=(@@offset.to_i+@counter_txt_ocn.to_i).to_s - %{


Found #{@counter_txt_ocn} times in the main body of #{@counter_txt_doc} documents [ matches #{start} to #{range} ]
} - else '' - end - else '' - end - @body_main << output #+ details - end - oldtid = 0 - offset=dbi_statement.sql_offset.to_s - limit=dbi_statement.sql_match_limit.to_s - @@lt_t ||=false; @@lt_e ||=false - canned=(@@lt_t or @@lt_e) \ - ? dbi_statement.pre_next(true,@image_src).to_s - : dbi_statement.pre_next(false,@image_src).to_s - limit=dbi_statement.sql_match_limit.to_s - cgi.out{@header.force_encoding("UTF-8") + @counters_txt.force_encoding("UTF-8") + @counters_endn.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @body_main.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @tail.force_encoding("UTF-8")} #% print cgi_output_header+counters+body - end - rescue Exception => e - s='
' + CGI::escapeHTML(e.backtrace.reverse.join("\n"))
-          s << CGI::escapeHTML(e.message) + '
' - cgi.out{s} - next - ensure # eg. disconnect from server - @conn.disconnect if @conn - end - end diff --git a/util/rb/cgi/search_ref.cgi b/util/rb/cgi/search_ref.cgi deleted file mode 100755 index 0fe8ea5..0000000 --- a/util/rb/cgi/search_ref.cgi +++ /dev/null @@ -1,937 +0,0 @@ -#!/usr/bin/env ruby -=begin - * Name: SiSU information Structuring Universe - * Author: Ralph Amissah - * http://www.jus.uio.no/sisu - * http://www.jus.uio.no/sisu/SiSU/download - - * Description: generates naive cgi search form for search of sisu database (sqlite) - * Name: SiSU generated sample cgi search form - - * Description: generated sample cgi search form for SiSU - (SiSU is a framework for document structuring, publishing and search) - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2014, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Ralph Amissah - - - -=end - begin - require 'cgi' - require 'fcgi' - require 'sqlite3' - rescue LoadError - puts 'cgi, fcgi or sqlite3 NOT FOUND (LoadError)' - end - @stub_default='manual' - @image_src="http://#{ENV['HTTP_HOST']}/_sisu/image_sys" - @hosturl_cgi="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}" - @hosturl_files="http://#{ENV['HTTP_HOST']}" - @output_dir_structure_by='filetype' - @lingual='multi' - @db_name_prefix='SiSU.7a.' - @base="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}#{ENV['SCRIPT_NAME']}" -#Common TOP - @@offset=0 - @@canned_search_url=@base - @color_heading='#DDFFAA' - @color_match='#ffff48' - class Form - def initialize(base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') - search_note='' if checked_searched !~/\S/ - the_can='' if checked_url !~/\S/ - search_field='' if checked_echo !~/\S/ - @base,@search_field,@selected_db,@result_type,@checked_sql_limit,@checked_tip,@checked_stats,@checked_searched,@checked_url,@checked_case,@checked_echo,@checked_sql,@checked_all,@checked_none,@checked_selected,@checked_default,@search_note,@the_can=base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can - @tip=if checked_tip =~/\S/ - 'text:__; fulltxt:__; keywords:__; title:__; author:__; topic_register:__; subject:__; description:__; publisher:__; editor:__; contributor:__; date:__; type:__; format:__; identifier:__; source:__; language:__; relation:__; coverage:__; rights:__; comment:__; abstract:__; filename:__;
' - else '' - end - end - def submission_form - search_form=<<-WOK - - - - - <meta charset="utf-8"> - <meta name="sourcefile" content="SiSU._sst" /> - SiSU search form (sample): SiSU information Structuring Universe - - - - - - - - - -
- - -
-
- SiSU - -
- git - -
-
- -
-
- - -
- - - #{@tip} - #{@search_note} - #{@the_can} -
- - - - - to search: select which database to search (drop-down menu below); enter your search query (in the form above); and click on the search button (below) -
- - - index - text / grep -
- match limit: - 1,000 - 2,500 -
- echo query - result stats - search url - searched - available fields - sql statement -
- checks: - default - selected - all - none -
- - -
- WOK - end - end - class SearchRequest #% search_for - attr_accessor :text1,:fulltext,:keywords,:title,:author,:topic_register,:subject,:description,:publisher,:editor,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:filename - def initialize(search_field='',q='') - @search_field,@q=search_field,q - @text1=@fulltext=@keywords=@title=@author=@topic_register=@subject=@description=@publisher=@editor=@contributor=@date=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@comment=@abstract=@owner=@date_created=@date_issued=@date_modified=@date_available=@date_valid=@filename='' - if @search_field=~/\S/ - @text1=text_to_match('text:') - @fulltext=text_to_match('fulltxt:') - @topic_register=text_to_match('topic_register:') - @title=text_to_match('title:') # DublinCore 1 - title - @author=text_to_match('(?:author|creator)s?:') # DublinCore 2 - creator/author - @subject=text_to_match('subj(?:ect)?:') # DublinCore 3 - subject - @description=text_to_match('description:') # DublinCore 4 - description - @publisher=text_to_match('pub(?:lisher)?:') # DublinCore 5 - publisher - @editor=text_to_match('editor:') - @contributor=text_to_match('contributor:') # DublinCore 6 - contributor - @date=text_to_match('date:') # DublinCore 7 - date dd-mm-yy - @type=text_to_match('type:') # DublinCore 8 - type - @format=text_to_match('format:') # DublinCore 9 - format - @identifier=text_to_match('identifier:') # DublinCore 10 - identifier - @source=text_to_match('source:') # DublinCore 11 - source - @language=text_to_match('language:') # DublinCore 12 - language - @relation=text_to_match('relation:') # DublinCore 13 - relation - @coverage=text_to_match('coverage:') # DublinCore 14 - coverage - @rights=text_to_match('rights:') # DublinCore 15 - rights - @keywords=text_to_match('key(?:words?)?:') - @comment=text_to_match('comment:') - @abstract=text_to_match('abs(?:tract)?:') - @owner=text_to_match('owner:') - @date_created=text_to_match('date_created:') - @date_issued=text_to_match('date_issued:') - @date_modified=text_to_match('date_modified:') - @date_available=text_to_match('date_available:') - @date_valid=text_to_match('date_valid:') - @filename=text_to_match('filename:') - @text1=text_to_match unless @keywords or @author or @title or @text1 or @fulltext or @comment or @abstract or @rights or @subject or @publisher or @date or @filename or @topic_register - else - @text1=q['s1'] if q['s1']=~/\S/ - @fulltext=q['ft'] if q['ft']=~/\S/ - @keywords=q['key'] if q['key']=~/\S/ - @title=q['ti'] if q['ti']=~/\S/ - @author=q['au'] if q['au']=~/\S/ - @topic_register=q['tr'] if q['tr']=~/\S/ - @subject=q['sj'] if q['sj']=~/\S/ - @description=q['dsc'] if q['dsc']=~/\S/ - @publisher=q['pb'] if q['pb']=~/\S/ - @editor=q['cntr'] if q['cntr']=~/\S/ - @contributor=q['cntr'] if q['cntr']=~/\S/ - @date=q['dt'] if q['dt']=~/\S/ - @type=q['ty'] if q['ty']=~/\S/ - @identifier=q['id'] if q['id']=~/\S/ - @source=q['src'] if q['src']=~/\S/ - @language=q['lang'] if q['lang']=~/\S/ - @relation=q['rel'] if q['rel']=~/\S/ - @coverage=q['cov'] if q['cov']=~/\S/ - @rights=q['cr'] if q['cr']=~/\S/ - @comment=q['co'] if q['co']=~/\S/ - @abstract=q['ab'] if q['ab']=~/\S/ - @date_created=q['dtc'] if q['dtc']=~/\S/ - @date_issued=q['dti'] if q['dti']=~/\S/ - @date_modified=q['dtm'] if q['dtm']=~/\S/ - @date_available=q['dta'] if q['dta']=~/\S/ - @date_valid=q['dtv'] if q['dtv']=~/\S/ - @filename=if q['doc'] and q['search'] !~/search db/ then q['doc'] - elsif q['fns']=~/\S/ then q['fns'] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - end - end - def text_to_match(identifier='') - m={ - string: /#{identifier}\s*(.+?)/, - string: /#{identifier}\s*(.+?)(?:;|\n|\r|$)/, - word: /#{identifier}[\s(]*(\S+)/ - } - search_string=if @search_field =~m[:word] - search_string=if @search_field =~m[:braces] then m[:braces].match(@search_field)[1] - elsif @search_field =~m[:string] then m[:string].match(@search_field)[1] - else - str=m[:word].match(@search_field)[1] - str=str.gsub(/[()]/,'') - str - end - search_string=search_string.strip.gsub(/\s+/,'+') - #else - # "__" - end - end - end - class DBI_SearchString - def initialize(l,t,q,cse=false) - @l,@t,@q=l,t,q - end - def string - search={ search: [], flag: false } - if @t =~/\S+/ or @q =~/\S+/ - if @t =~/\S+/ then unescaped_search=CGI.unescape(@t) - elsif @q =~/\S+/ then unescaped_search=CGI.unescape(@q) - end - search_construct=[] - unescaped_search=unescaped_search.gsub(/\s*(AND|OR)\s*/,"%' \) \\1 #{@l} LIKE \( '%"). - gsub(/(.+)/,"#{@l} LIKE \( '%\\1%' \)") - search_construct << unescaped_search - search_construct=search_construct.join(' ') - search[:search] << search_construct - search[:flag]=true - search - end - search - end - end - class DBI_SearchStatement - attr_reader :text_search_flag,:sql_select_body_format,:sql_offset,:sql_limit - def initialize(conn,search_for,q,c) - @conn=conn - @text_search_flag=false - @sql_statement={ body: '', endnotes: '', range: '' } - #@offset||=@@offset - #@offset+=@@limit - search={ text: [], endnotes: [] } - cse=(c =~/\S/) ? true : false - st=DBI_SearchString.new('doc_objects.clean',search_for.text1,q['s1'],cse).string - se=DBI_SearchString.new('endnotes.clean',search_for.text1,q['s1'],cse).string - @text_search_flag=st[:flag] - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.fulltext',search_for.fulltext,q['ft'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.title',search_for.title,q['ti'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_author',search_for.author,q['au'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_topic_register',search_for.topic_register,q['tr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_subject',search_for.subject,q['sj'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_keywords',search_for.keywords,q['key'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_description',search_for.description,q['dsc'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.publisher',search_for.publisher,q['pb'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_editor',search_for.editor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_contributor',search_for.contributor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.date_published',search_for.date,q['dt'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_type',search_for.type,q['ty'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.original_source',search_for.source,q['src'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.language_document_char',search_for.language,q['lang'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_relation',search_for.relation,q['rel'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_coverage',search_for.coverage,q['cov'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.rights_all',search_for.rights,q['cr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_comment',search_for.comment,q['co'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_abstract',search_for.abstract,q['ab'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.src_filename',search_for.filename,q['fns'],cse).string - if st[:flag] - search[:text] << st[:search] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - @search_text='' - @search_text=search[:text].flatten.join(' AND ') - @search_text=@search_text.gsub(/(doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\)\s+(?:(?:AND|OR)\s+doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\))+)/,'(\1)') - end - def sql_offset - @@offset - end - def sql_match_limit - @@limit - end - def sql_canned_search - @offset_next=sql_offset.to_i + sql_match_limit.to_i - @offset_previous=sql_offset.to_i - sql_match_limit.to_i - def current - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + sql_offset.to_s - end - def next - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_next.to_s - end - def previous - @offset_previous >= 0 \ - ? (@@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_previous.to_s) - : '' - end - def start - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + 0.to_s - end - self - end - def pre_next(beyond_limit,img) - can=sql_canned_search - page=(sql_offset.to_i + sql_match_limit.to_i)/sql_match_limit.to_i - if beyond_limit - if page.to_s =~ /^1$/ - %{
- pg. #{page.to_s} - -  >> - -
} - elsif page.to_s =~ /^2$/ - %{
- - <<  - - pg. #{page.to_s} - -  >> - -
} - else - %{
- - |<  - - - <<  - - pg. #{page.to_s} - -  >> - -
} - end - else - if page.to_s =~ /^1$/ then '' - elsif page.to_s =~ /^2$/ - %{
- - <<  - - pg. #{page.to_s} -
} - else - %{
- - |<  - - - <<  - - pg. #{page.to_s} -
} - end - end - end - def sql_select_body - limit ||=@@limit - offset ||=@@offset - @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, metadata_and_text.language_document_char, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE #{@search_text} AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.language_document_char, metadata_and_text.title, metadata_and_text.src_filename, doc_objects.ocn} - @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} - select=@sql_statement[:body] + ' ' + @sql_statement[:range] - select - end - def sql_select_body_format - %{#{sql_select_body}} - end - def contents - @conn.execute(sql_select_body) - end - end - def tail - <<-'WOK' -


- - - - - - -
- - -
-
- SiSU - -
- git - -
-
- -

- Generated by - SiSU 6.3.1 2014-10-19 (2014w41/7) -
- - SiSU © Ralph Amissah - 1993, current 2014. - All Rights Reserved. -
- SiSU is software for document structuring, publishing and search, -
- - www.jus.uio.no/sisu - - and - - www.sisudoc.org - - sources - - git.sisudoc.org - -
- w3 since October 3 1993 - - ralph@amissah.com - -
- mailing list subscription - - http://lists.sisudoc.org/listinfo/sisu - -
- - sisu@lists.sisudoc.org - -

-
-

- SiSU using: -
Standard SiSU markup syntax, -
Standard SiSU meta-markup syntax, and the -
Standard SiSU object citation numbering and system, (object/text identifying/locating system) -
- © Ralph Amissah 1997, current 2014. - All Rights Reserved. -

-
-

- - .: - -

-
-

- SiSU is released under - GPL v3 - or later, - - http://www.gnu.org/licenses/gpl.html - -

-
-

- SiSU, developed using - - Ruby - - on - - Debian/Gnu/Linux - - software infrastructure, - with the usual GPL (or OSS) suspects. -

-
- - - WOK - end - @tail=tail - @counter_txt_doc,@counter_txt_ocn,@counter_endn_doc,@counter_endn_ocn=0,0,0,0 - @counters_txt,@counters_endn,@sql_select_body='','','' - FCGI.each_cgi do |cgi| - begin # all code goes in begin section - @search={ text: [], endnotes: [] } - q=CGI.new - @db=if cgi['db'] =~ /#{@db_name_prefix}(\S+)/ - @stub=$1 - cgi['db'] - else - @stub=@stub_default - @db_name_prefix + @stub - end - checked_url,checked_stats,checked_searched,checked_tip,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,selected_db='','','','','','','','','' - result_type=(cgi['view']=~/text/) \ - ? result_type={ index: '', text: 'checked'} - : result_type={ index: 'checked', text: ''} - @@limit=if cgi['sql_match_limit'].to_s=~/2500/ - checked_sql_limit={ l1000: '', l2500: 'checked'} - '2500' - else - checked_sql_limit={ l1000: 'checked', l2500: ''} - '1000' - end - checked_echo='checked' if cgi['echo'] =~/\S/ - checked_stats='checked' if cgi['stats'] =~/\S/ - checked_url='checked' if cgi['url'] =~/\S/ or cgi['u'].to_i==1 - checked_searched='checked' if cgi['searched'] =~/\S/ - checked_tip='checked' if cgi['tip'] =~/\S/ - checked_case='checked' if cgi['casesense'] =~/\S/ - checked_sql='checked' if cgi['sql'] =~/\S/ - if cgi['checks'] =~/check_all/ or cgi['check_all'] =~/\S/ or cgi['a'].to_i==1 - checked_all='checked' - checked_echo=checked_stats=checked_url=checked_searched=checked_tip=checked_sql='checked' - checked_none='' - elsif cgi['checks'] =~/check_none/ - checked_none='checked' - checked_all=checked_url=checked_stats=checked_searched=checked_tip=checked_echo=checked_sql='' - elsif cgi['checks'] =~/check_selected/ - checked_selected='checked' - elsif cgi['checks'] =~/check_default/ - checked_default='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - else - checked_selected='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - end - selected_db=case cgi['db'] - when /SiSU.7a.manual/ then '' - end - db_name='sisu_sqlite.db' - db_sqlite=case cgi['db'] - when /SiSU.7a.manual/ then "/srv/complete.sisudoc.org/web/manual/#{db_name}" - else "/srv/complete.sisudoc.org/web/manual/#{db_name}" - end - @conn=SQLite3::Database.new(db_sqlite) - @conn.results_as_hash=true - search_field=cgi['find'] if cgi['find'] # =~/\S+/ - @search_for=SearchRequest.new(search_field,q) #.analyze #% search_for - #% searches - #Canned_search.new(@base,@search_for.text1,cgi) - if @search_for.text1=~/\S+/ or @search_for.fulltext=~/\S+/ or @search_for.author=~/\S+/ or @search_for.topic_register=~/\S+/ #and search_field =~/\S/ - s1='s1=' + CGI.escape(@search_for.text1) if @search_for.text1=~/\S/ - ft='&ft=' + CGI.escape(@search_for.fulltext) if @search_for.fulltext=~/\S/ - key='key=' + CGI.escape(@search_for.keywords) if @search_for.keywords=~/\S/ - ti='&ti=' + CGI.escape(@search_for.title) if @search_for.title=~/\S/ - au='&au=' + CGI.escape(@search_for.author) if @search_for.author=~/\S/ - tr='&tr=' + CGI.escape(@search_for.topic_register) if @search_for.topic_register=~/\S/ - sj='&sj=' + CGI.escape(@search_for.subject) if @search_for.subject=~/\S/ - dsc='&dsc=' + CGI.escape(@search_for.description) if @search_for.description=~/\S/ - pb='&pb=' + CGI.escape(@search_for.publisher) if @search_for.publisher=~/\S/ - edt='&edt=' + CGI.escape(@search_for.editor) if @search_for.editor=~/\S/ - cntr='&cntr=' + CGI.escape(@search_for.contributor) if @search_for.contributor=~/\S/ - dt='&dt=' + CGI.escape(@search_for.date) if @search_for.date=~/\S/ - ty='&ty=' + CGI.escape(@search_for.type) if @search_for.type=~/\S/ - id='&id=' + CGI.escape(@search_for.identifier) if @search_for.identifier=~/\S/ - src='&src=' + CGI.escape(@search_for.source) if @search_for.source=~/\S/ - lang='&lang=' + CGI.escape(@search_for.language) if @search_for.language=~/\S/ - rel='&rel=' + CGI.escape(@search_for.relation) if @search_for.relation=~/\S/ - cov='&cov=' + CGI.escape(@search_for.coverage) if @search_for.coverage=~/\S/ - cr='&cr=' + CGI.escape(@search_for.rights) if @search_for.rights=~/\S/ - co='&co=' + CGI.escape(@search_for.comment) if @search_for.comment=~/\S/ - ab='&ab=' + CGI.escape(@search_for.abstract) if @search_for.abstract=~/\S/ - dtc='&dtc=' + CGI.escape(@search_for.date_created) if @search_for.date_created=~/\S/ - dti='&dti=' + CGI.escape(@search_for.date_issued) if @search_for.date_issued=~/\S/ - dtm='&dtm=' + CGI.escape(@search_for.date_modified) if @search_for.date_modified=~/\S/ - dta='&dta=' + CGI.escape(@search_for.date_available) if @search_for.date_available=~/\S/ - dtv='&dtv=' + CGI.escape(@search_for.date_valid) if @search_for.date_valid=~/\S/ - fns='&fns=' + CGI.escape(@search_for.filename) if @search_for.filename=~/\S/ - @@canned_search_url=(checked_all =~/checked/) \ - ? "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" - : "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" - mod=ft=~/\S+/ ? (ft.gsub(/ft/,'s1')) : s1 - @canned_base_url="#{@base}?#{mod}&db=#{cgi['db']}" - if checked_case=~/\S/ - @search[:text][1]=%{doc_objects.clean~'#{@search_for.text1}'} #s1 - else - @search[:text][1]=%{doc_objects.clean~*'#{@search_for.text1}'} #s1 - end - canned_note='search url:' - else - @@canned_search_url="#{@base}?db=#{@db}&view=index" - canned_note='search url example:' - end - if search_field =~/\S+/ - analyze_format=search_field.gsub(/\s*\n/,'; ') - elsif checked_all =~/checked/ or checked_url =~/checked/ - canned_search=@@canned_search_url.scan(/(?:s1|ft|au|ti|fns|tr)=[^&]+/) - af=canned_search.join('; ') - af=af.gsub(/s1=/,'text: '). - gsub(/ft=/,'fulltxt: '). - gsub(/au=/,'author: '). - gsub(/ti=/,'title: '). - gsub(/fns=/,'filename: '). - gsub(/tr=/,'topic_register: '). - gsub(/%2B/,' ') - analyze_format=af - st=af.split(/\s*;\s*/) - search_field=st.join("\n") - end - green=%{} - canned_search_url_txt=CGI.escapeHTML(@@canned_search_url) - the_can=%{#{canned_note} #{canned_search_url_txt}
} - p_text=p_fulltext=p_keywords=p_title=p_author=p_topic_register=p_subject=p_description=p_publisher=p_editor=p_contributor=p_date=p_type=p_format=p_identifier=p_source=p_language=p_relation=p_coverage=p_rights=p_comment=p_abstract=p_filename='' - p_filename=%{filename: #{green}#{@search_for.filename}

} if @search_for.filename =~/\S+/ - p_text=%{text: #{green}#{@search_for.text1}

} if @search_for.text1 =~/\S+/ - p_fulltext=%{fulltxt: #{green}#{@search_for.fulltext}
} if @search_for.fulltext =~/\S+/ - p_title=%{title: #{green}#{@search_for.title}
} if @search_for.title =~/\S+/ - p_author=%{author: #{green}#{@search_for.author}
} if @search_for.author =~/\S+/ - p_editor=%{editor: #{green}#{@search_for.editor}
} if @search_for.editor=~/\S+/ - p_contributor=%{contributor: #{green}#{@search_for.contributor}
} if @search_for.contributor =~/\S+/ - p_date=%{date: #{green}#{@search_for.date}
} if @search_for.date =~/\S+/ - p_rights=%{rights: #{green}#{@search_for.rights}
} if @search_for.rights =~/\S+/ - p_topic_register=%{topic_register: #{green}#{@search_for.topic_register}
} if @search_for.topic_register =~/\S+/ - p_subject=%{subject: #{green}#{@search_for.subject}
} if @search_for.subject =~/\S+/ - p_keywords=%{keywords: #{green}#{@search_for.keywords}
} if @search_for.keywords =~/\S+/ - p_identifier=%{identifier: #{green}#{@search_for.identifier}
} if @search_for.identifier =~/\S+/ - p_type=%{type: #{green}#{@search_for.type}
} if @search_for.type =~/\S+/ - p_format=%{format: #{green}#{@search_for.format}
} if @search_for.format =~/\S+/ - p_relation=%{relation: #{green}#{@search_for.relation}
} if @search_for.relation =~/\S+/ - p_coverage=%{coverage: #{green}#{@search_for.coverage}
} if @search_for.coverage =~/\S+/ - p_description=%{description: #{green}#{@search_for.description}
} if @search_for.description =~/\S+/ - p_abstract=%{abstract: #{green}#{@search_for.abstract}
} if @search_for.abstract =~/\S+/ - p_comment=%{comment: #{green}#{@search_for.comment}
} if @search_for.comment =~/\S+/ - p_publisher=%{publisher: #{green}#{@search_for.publisher}
} if @search_for.publisher =~/\S+/ - p_source=%{source: #{green}#{@search_for.source}
} if @search_for.source =~/\S+/ - p_language=%{language: #{green}#{@search_for.language}
} if @search_for.language =~/\S+/ - search_note=<<-WOK - - database: #{green}#{@db}; selected view: #{green}#{cgi['view']} - search string: "#{green}#{analyze_format}"
- #{p_text} #{p_fulltext} #{p_keywords} #{p_title} #{p_author} #{p_topic_register} #{p_subject} #{p_description} #{p_publisher} #{p_editor} #{p_contributor} #{p_date} #{p_type} #{p_format} #{p_identifier} #{p_source} #{p_language} #{p_relation} #{p_coverage} #{p_rights} #{p_comment} #{p_abstract} #{p_filename} - - WOK - #eg = %{canned search e.g.:
#{url}
find: #{analyze}
database: #{database}} - #% dbi_canning - @header=Form.new(@base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form - unless q['s1'] =~/\S/ or q['au'] =~/\S/ or @search[:text][1] =~/\S/ - print "Content-type: text/html\n\n" - puts (@header+@tail) - else #% searches - s1=(@search_for.text1 =~/\S/) \ - ? @search_for.text1 - : 'Unavailable' - if checked_case=~/\S/ - @search[:text]<<%{doc_objects.clean~'#{CGI.unescape(s1)}'} - else - @search[:text]<<%{doc_objects.clean~*'#{CGI.unescape(s1)}'} - end - #% dbi_request - dbi_statement=DBI_SearchStatement.new(@conn,@search_for,q,checked_case) - @text_search_flag=false - @text_search_flag=dbi_statement.text_search_flag - s_contents=dbi_statement.contents - @body_main='' - @search_regx=nil - oldtid=0 - if @text_search_flag - if checked_sql =~/\S/ - sql_select_body=dbi_statement.sql_select_body_format - else sql_select_body='' - end - @body_main << sql_select_body - #@body_main << '



Main Text:
' << sql_select_body - else - end - @hostpath="#{@hosturl_files}/#{@stub}" - def path_manifest(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{@hostpath}/#{fn}/sisu_manifest.html" - : "#{@hostpath}/#{fn}/sisu_manifest.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/manifest/#{fn}.html" - : "#{@hostpath}/manifest/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/manifest/#{fn}.html" - end - end - def path_html_seg(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - "#{@hostpath}/#{fn}" - when 'filetype' - "#{@hostpath}/html/#{fn}" - else - "#{@hostpath}/#{ln}/html/#{fn}" - end - end - def path_toc(fn,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/toc.html" - : "#{path_html_seg(fn,ln)}/toc.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/toc.html" - end - end - def path_filename(fn,seg,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/#{seg}.html" - : "#{path_html_seg(fn,ln)}/#{seg}.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/#{seg}.html" - end - end - def path_html_doc(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/scroll.html" - : "#{path_html_seg(fn,ln)}/scroll.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/html/#{fn}.html" - : "#{@hostpath}/html/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/html/#{fn}.html" - end - end - #% text_objects_body - s_contents.each do |c| #% text body - location=c['src_filename'][/(.+?)\.(?:ssm\.sst|sst)$/,1] - file_suffix=c['src_filename'][/.+?\.(ssm\.sst|sst)$/,1] - lang=if location =~ /\S+?~(\S\S\S?)$/ - l=location[/\S+?~(\S\S\S?)$/,1] - location=location.gsub(/(\S+?)~\S\S\S?/,'\1') - l=".#{l}" - else '' - end - #% metadata_found_body - if c['tid'].to_i != oldtid.to_i - ti=c['title'] - can_txt_srch=(cgi['view']=~/index/) \ - ? %{search } - : %{search } - title=%{toc html #{ti} [#{c['language_document_char']}] by #{c['creator_author']} manifest #{can_txt_srch}
} if file_suffix=~/s/ #hmm watch file_suffix - title=@text_search_flag \ - ? '

'+title - : '
'+title - @counter_txt_doc+=1 - oldtid=c['tid'].to_i - else title='' - end - if @text_search_flag - if cgi['view']=~/text/ \ - or (cgi['view']!~/index/ and cgi['search'] !~/search db/) #% txt body - text=if c['suffix'] !~/1/ #seg - if @search_for.text1 =~/\S+/ \ - or q['s1'] =~/\S+/ #% only this branch is working !! - unescaped_search=if @search_for.text1 =~/\S+/ - CGI.unescape(@search_for.text1) - elsif q['s1'] =~/\S+/ - CGI.unescape(q['s1']) - else nil - end - @search_regx=if unescaped_search #check - search_regex=[] - build=unescaped_search.scan(/\S+/).each do |g| - (g.to_s =~/(AND|OR)/) \ - ? (search_regex << '|') - : (search_regex << %{#{g.to_s}}) - end - search_regex=search_regex.join(' ') - search_regex=search_regex.gsub(/\s*\|\s*/,'|') - Regexp.new(search_regex, Regexp::IGNORECASE) - else nil - end - else nil - end - matched_para=(@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) \ - ? (c['body'].gsub(/(<\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{\\1})) - : c['body'] - %{

ocn #{c['ocn']}:

#{matched_para}} - elsif c['suffix'] =~/1/ #doc - %{#{title}

ocn #{c['ocn']}:#{c['body']}} - end - @counter_txt_ocn+=1 - output=title+text - else #elsif cgi['view']=~/index/ #% idx body - if c['suffix'] !~/1/ #seg - index=%{#{c['ocn']}, } if @text_search_flag - elsif c['suffix'] =~/1/ #doc #FIX - index=%{#{c['ocn']}, } - end - if c['seg'] =~/\S+/ - if @text_search_flag - @counter_txt_ocn+=1 - output=title+index - end - else - @counter_txt_ocn+=1 - output=c['suffix'] !~/1/ \ - ? title+index - : %{#{title}#{c['ocn'].sort}, } - end - end - else output=title - end - @counters_txt=if @counter_txt_doc > 0 - if checked_stats =~/\S/ - @@lt_t=(@counter_txt_ocn==dbi_statement.sql_match_limit.to_i) ? true : false - start=(@@offset.to_i+1).to_s - range=(@@offset.to_i+@counter_txt_ocn.to_i).to_s - %{


Found #{@counter_txt_ocn} times in the main body of #{@counter_txt_doc} documents [ matches #{start} to #{range} ]
} - else '' - end - else '' - end - @body_main << output #+ details - end - oldtid = 0 - offset=dbi_statement.sql_offset.to_s - limit=dbi_statement.sql_match_limit.to_s - @@lt_t ||=false; @@lt_e ||=false - canned=(@@lt_t or @@lt_e) \ - ? dbi_statement.pre_next(true,@image_src).to_s - : dbi_statement.pre_next(false,@image_src).to_s - limit=dbi_statement.sql_match_limit.to_s - cgi.out{@header.force_encoding("UTF-8") + @counters_txt.force_encoding("UTF-8") + @counters_endn.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @body_main.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @tail.force_encoding("UTF-8")} #% print cgi_output_header+counters+body - end - rescue Exception => e - s='
' + CGI::escapeHTML(e.backtrace.reverse.join("\n"))
-          s << CGI::escapeHTML(e.message) + '
' - cgi.out{s} - next - ensure # eg. disconnect from server - @conn.disconnect if @conn - end - end diff --git a/util/rb/cgi/sisu_7a_sqlite.cgi b/util/rb/cgi/sisu_7a_sqlite.cgi deleted file mode 100755 index 0fe8ea5..0000000 --- a/util/rb/cgi/sisu_7a_sqlite.cgi +++ /dev/null @@ -1,937 +0,0 @@ -#!/usr/bin/env ruby -=begin - * Name: SiSU information Structuring Universe - * Author: Ralph Amissah - * http://www.jus.uio.no/sisu - * http://www.jus.uio.no/sisu/SiSU/download - - * Description: generates naive cgi search form for search of sisu database (sqlite) - * Name: SiSU generated sample cgi search form - - * Description: generated sample cgi search form for SiSU - (SiSU is a framework for document structuring, publishing and search) - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2014, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Ralph Amissah - - - -=end - begin - require 'cgi' - require 'fcgi' - require 'sqlite3' - rescue LoadError - puts 'cgi, fcgi or sqlite3 NOT FOUND (LoadError)' - end - @stub_default='manual' - @image_src="http://#{ENV['HTTP_HOST']}/_sisu/image_sys" - @hosturl_cgi="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}" - @hosturl_files="http://#{ENV['HTTP_HOST']}" - @output_dir_structure_by='filetype' - @lingual='multi' - @db_name_prefix='SiSU.7a.' - @base="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}#{ENV['SCRIPT_NAME']}" -#Common TOP - @@offset=0 - @@canned_search_url=@base - @color_heading='#DDFFAA' - @color_match='#ffff48' - class Form - def initialize(base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') - search_note='' if checked_searched !~/\S/ - the_can='' if checked_url !~/\S/ - search_field='' if checked_echo !~/\S/ - @base,@search_field,@selected_db,@result_type,@checked_sql_limit,@checked_tip,@checked_stats,@checked_searched,@checked_url,@checked_case,@checked_echo,@checked_sql,@checked_all,@checked_none,@checked_selected,@checked_default,@search_note,@the_can=base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can - @tip=if checked_tip =~/\S/ - 'text:__; fulltxt:__; keywords:__; title:__; author:__; topic_register:__; subject:__; description:__; publisher:__; editor:__; contributor:__; date:__; type:__; format:__; identifier:__; source:__; language:__; relation:__; coverage:__; rights:__; comment:__; abstract:__; filename:__;
' - else '' - end - end - def submission_form - search_form=<<-WOK - - - - - <meta charset="utf-8"> - <meta name="sourcefile" content="SiSU._sst" /> - SiSU search form (sample): SiSU information Structuring Universe - - - - - - - - - -
- - -
-
- SiSU - -
- git - -
-
- -
-
- - -
- - - #{@tip} - #{@search_note} - #{@the_can} -
- - - - - to search: select which database to search (drop-down menu below); enter your search query (in the form above); and click on the search button (below) -
- - - index - text / grep -
- match limit: - 1,000 - 2,500 -
- echo query - result stats - search url - searched - available fields - sql statement -
- checks: - default - selected - all - none -
- - -
- WOK - end - end - class SearchRequest #% search_for - attr_accessor :text1,:fulltext,:keywords,:title,:author,:topic_register,:subject,:description,:publisher,:editor,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:filename - def initialize(search_field='',q='') - @search_field,@q=search_field,q - @text1=@fulltext=@keywords=@title=@author=@topic_register=@subject=@description=@publisher=@editor=@contributor=@date=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@comment=@abstract=@owner=@date_created=@date_issued=@date_modified=@date_available=@date_valid=@filename='' - if @search_field=~/\S/ - @text1=text_to_match('text:') - @fulltext=text_to_match('fulltxt:') - @topic_register=text_to_match('topic_register:') - @title=text_to_match('title:') # DublinCore 1 - title - @author=text_to_match('(?:author|creator)s?:') # DublinCore 2 - creator/author - @subject=text_to_match('subj(?:ect)?:') # DublinCore 3 - subject - @description=text_to_match('description:') # DublinCore 4 - description - @publisher=text_to_match('pub(?:lisher)?:') # DublinCore 5 - publisher - @editor=text_to_match('editor:') - @contributor=text_to_match('contributor:') # DublinCore 6 - contributor - @date=text_to_match('date:') # DublinCore 7 - date dd-mm-yy - @type=text_to_match('type:') # DublinCore 8 - type - @format=text_to_match('format:') # DublinCore 9 - format - @identifier=text_to_match('identifier:') # DublinCore 10 - identifier - @source=text_to_match('source:') # DublinCore 11 - source - @language=text_to_match('language:') # DublinCore 12 - language - @relation=text_to_match('relation:') # DublinCore 13 - relation - @coverage=text_to_match('coverage:') # DublinCore 14 - coverage - @rights=text_to_match('rights:') # DublinCore 15 - rights - @keywords=text_to_match('key(?:words?)?:') - @comment=text_to_match('comment:') - @abstract=text_to_match('abs(?:tract)?:') - @owner=text_to_match('owner:') - @date_created=text_to_match('date_created:') - @date_issued=text_to_match('date_issued:') - @date_modified=text_to_match('date_modified:') - @date_available=text_to_match('date_available:') - @date_valid=text_to_match('date_valid:') - @filename=text_to_match('filename:') - @text1=text_to_match unless @keywords or @author or @title or @text1 or @fulltext or @comment or @abstract or @rights or @subject or @publisher or @date or @filename or @topic_register - else - @text1=q['s1'] if q['s1']=~/\S/ - @fulltext=q['ft'] if q['ft']=~/\S/ - @keywords=q['key'] if q['key']=~/\S/ - @title=q['ti'] if q['ti']=~/\S/ - @author=q['au'] if q['au']=~/\S/ - @topic_register=q['tr'] if q['tr']=~/\S/ - @subject=q['sj'] if q['sj']=~/\S/ - @description=q['dsc'] if q['dsc']=~/\S/ - @publisher=q['pb'] if q['pb']=~/\S/ - @editor=q['cntr'] if q['cntr']=~/\S/ - @contributor=q['cntr'] if q['cntr']=~/\S/ - @date=q['dt'] if q['dt']=~/\S/ - @type=q['ty'] if q['ty']=~/\S/ - @identifier=q['id'] if q['id']=~/\S/ - @source=q['src'] if q['src']=~/\S/ - @language=q['lang'] if q['lang']=~/\S/ - @relation=q['rel'] if q['rel']=~/\S/ - @coverage=q['cov'] if q['cov']=~/\S/ - @rights=q['cr'] if q['cr']=~/\S/ - @comment=q['co'] if q['co']=~/\S/ - @abstract=q['ab'] if q['ab']=~/\S/ - @date_created=q['dtc'] if q['dtc']=~/\S/ - @date_issued=q['dti'] if q['dti']=~/\S/ - @date_modified=q['dtm'] if q['dtm']=~/\S/ - @date_available=q['dta'] if q['dta']=~/\S/ - @date_valid=q['dtv'] if q['dtv']=~/\S/ - @filename=if q['doc'] and q['search'] !~/search db/ then q['doc'] - elsif q['fns']=~/\S/ then q['fns'] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - end - end - def text_to_match(identifier='') - m={ - string: /#{identifier}\s*(.+?)/, - string: /#{identifier}\s*(.+?)(?:;|\n|\r|$)/, - word: /#{identifier}[\s(]*(\S+)/ - } - search_string=if @search_field =~m[:word] - search_string=if @search_field =~m[:braces] then m[:braces].match(@search_field)[1] - elsif @search_field =~m[:string] then m[:string].match(@search_field)[1] - else - str=m[:word].match(@search_field)[1] - str=str.gsub(/[()]/,'') - str - end - search_string=search_string.strip.gsub(/\s+/,'+') - #else - # "__" - end - end - end - class DBI_SearchString - def initialize(l,t,q,cse=false) - @l,@t,@q=l,t,q - end - def string - search={ search: [], flag: false } - if @t =~/\S+/ or @q =~/\S+/ - if @t =~/\S+/ then unescaped_search=CGI.unescape(@t) - elsif @q =~/\S+/ then unescaped_search=CGI.unescape(@q) - end - search_construct=[] - unescaped_search=unescaped_search.gsub(/\s*(AND|OR)\s*/,"%' \) \\1 #{@l} LIKE \( '%"). - gsub(/(.+)/,"#{@l} LIKE \( '%\\1%' \)") - search_construct << unescaped_search - search_construct=search_construct.join(' ') - search[:search] << search_construct - search[:flag]=true - search - end - search - end - end - class DBI_SearchStatement - attr_reader :text_search_flag,:sql_select_body_format,:sql_offset,:sql_limit - def initialize(conn,search_for,q,c) - @conn=conn - @text_search_flag=false - @sql_statement={ body: '', endnotes: '', range: '' } - #@offset||=@@offset - #@offset+=@@limit - search={ text: [], endnotes: [] } - cse=(c =~/\S/) ? true : false - st=DBI_SearchString.new('doc_objects.clean',search_for.text1,q['s1'],cse).string - se=DBI_SearchString.new('endnotes.clean',search_for.text1,q['s1'],cse).string - @text_search_flag=st[:flag] - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.fulltext',search_for.fulltext,q['ft'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.title',search_for.title,q['ti'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_author',search_for.author,q['au'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_topic_register',search_for.topic_register,q['tr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_subject',search_for.subject,q['sj'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_keywords',search_for.keywords,q['key'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_description',search_for.description,q['dsc'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.publisher',search_for.publisher,q['pb'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_editor',search_for.editor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_contributor',search_for.contributor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.date_published',search_for.date,q['dt'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_type',search_for.type,q['ty'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.original_source',search_for.source,q['src'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.language_document_char',search_for.language,q['lang'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_relation',search_for.relation,q['rel'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_coverage',search_for.coverage,q['cov'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.rights_all',search_for.rights,q['cr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_comment',search_for.comment,q['co'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_abstract',search_for.abstract,q['ab'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.src_filename',search_for.filename,q['fns'],cse).string - if st[:flag] - search[:text] << st[:search] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - @search_text='' - @search_text=search[:text].flatten.join(' AND ') - @search_text=@search_text.gsub(/(doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\)\s+(?:(?:AND|OR)\s+doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\))+)/,'(\1)') - end - def sql_offset - @@offset - end - def sql_match_limit - @@limit - end - def sql_canned_search - @offset_next=sql_offset.to_i + sql_match_limit.to_i - @offset_previous=sql_offset.to_i - sql_match_limit.to_i - def current - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + sql_offset.to_s - end - def next - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_next.to_s - end - def previous - @offset_previous >= 0 \ - ? (@@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_previous.to_s) - : '' - end - def start - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + 0.to_s - end - self - end - def pre_next(beyond_limit,img) - can=sql_canned_search - page=(sql_offset.to_i + sql_match_limit.to_i)/sql_match_limit.to_i - if beyond_limit - if page.to_s =~ /^1$/ - %{
- pg. #{page.to_s} - -  >> - -
} - elsif page.to_s =~ /^2$/ - %{
- - <<  - - pg. #{page.to_s} - -  >> - -
} - else - %{
- - |<  - - - <<  - - pg. #{page.to_s} - -  >> - -
} - end - else - if page.to_s =~ /^1$/ then '' - elsif page.to_s =~ /^2$/ - %{
- - <<  - - pg. #{page.to_s} -
} - else - %{
- - |<  - - - <<  - - pg. #{page.to_s} -
} - end - end - end - def sql_select_body - limit ||=@@limit - offset ||=@@offset - @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, metadata_and_text.language_document_char, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE #{@search_text} AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.language_document_char, metadata_and_text.title, metadata_and_text.src_filename, doc_objects.ocn} - @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} - select=@sql_statement[:body] + ' ' + @sql_statement[:range] - select - end - def sql_select_body_format - %{#{sql_select_body}} - end - def contents - @conn.execute(sql_select_body) - end - end - def tail - <<-'WOK' -


- - - - - - -
- - -
-
- SiSU - -
- git - -
-
- -

- Generated by - SiSU 6.3.1 2014-10-19 (2014w41/7) -
- - SiSU © Ralph Amissah - 1993, current 2014. - All Rights Reserved. -
- SiSU is software for document structuring, publishing and search, -
- - www.jus.uio.no/sisu - - and - - www.sisudoc.org - - sources - - git.sisudoc.org - -
- w3 since October 3 1993 - - ralph@amissah.com - -
- mailing list subscription - - http://lists.sisudoc.org/listinfo/sisu - -
- - sisu@lists.sisudoc.org - -

-
-

- SiSU using: -
Standard SiSU markup syntax, -
Standard SiSU meta-markup syntax, and the -
Standard SiSU object citation numbering and system, (object/text identifying/locating system) -
- © Ralph Amissah 1997, current 2014. - All Rights Reserved. -

-
-

- - .: - -

-
-

- SiSU is released under - GPL v3 - or later, - - http://www.gnu.org/licenses/gpl.html - -

-
-

- SiSU, developed using - - Ruby - - on - - Debian/Gnu/Linux - - software infrastructure, - with the usual GPL (or OSS) suspects. -

-
- - - WOK - end - @tail=tail - @counter_txt_doc,@counter_txt_ocn,@counter_endn_doc,@counter_endn_ocn=0,0,0,0 - @counters_txt,@counters_endn,@sql_select_body='','','' - FCGI.each_cgi do |cgi| - begin # all code goes in begin section - @search={ text: [], endnotes: [] } - q=CGI.new - @db=if cgi['db'] =~ /#{@db_name_prefix}(\S+)/ - @stub=$1 - cgi['db'] - else - @stub=@stub_default - @db_name_prefix + @stub - end - checked_url,checked_stats,checked_searched,checked_tip,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,selected_db='','','','','','','','','' - result_type=(cgi['view']=~/text/) \ - ? result_type={ index: '', text: 'checked'} - : result_type={ index: 'checked', text: ''} - @@limit=if cgi['sql_match_limit'].to_s=~/2500/ - checked_sql_limit={ l1000: '', l2500: 'checked'} - '2500' - else - checked_sql_limit={ l1000: 'checked', l2500: ''} - '1000' - end - checked_echo='checked' if cgi['echo'] =~/\S/ - checked_stats='checked' if cgi['stats'] =~/\S/ - checked_url='checked' if cgi['url'] =~/\S/ or cgi['u'].to_i==1 - checked_searched='checked' if cgi['searched'] =~/\S/ - checked_tip='checked' if cgi['tip'] =~/\S/ - checked_case='checked' if cgi['casesense'] =~/\S/ - checked_sql='checked' if cgi['sql'] =~/\S/ - if cgi['checks'] =~/check_all/ or cgi['check_all'] =~/\S/ or cgi['a'].to_i==1 - checked_all='checked' - checked_echo=checked_stats=checked_url=checked_searched=checked_tip=checked_sql='checked' - checked_none='' - elsif cgi['checks'] =~/check_none/ - checked_none='checked' - checked_all=checked_url=checked_stats=checked_searched=checked_tip=checked_echo=checked_sql='' - elsif cgi['checks'] =~/check_selected/ - checked_selected='checked' - elsif cgi['checks'] =~/check_default/ - checked_default='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - else - checked_selected='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - end - selected_db=case cgi['db'] - when /SiSU.7a.manual/ then '' - end - db_name='sisu_sqlite.db' - db_sqlite=case cgi['db'] - when /SiSU.7a.manual/ then "/srv/complete.sisudoc.org/web/manual/#{db_name}" - else "/srv/complete.sisudoc.org/web/manual/#{db_name}" - end - @conn=SQLite3::Database.new(db_sqlite) - @conn.results_as_hash=true - search_field=cgi['find'] if cgi['find'] # =~/\S+/ - @search_for=SearchRequest.new(search_field,q) #.analyze #% search_for - #% searches - #Canned_search.new(@base,@search_for.text1,cgi) - if @search_for.text1=~/\S+/ or @search_for.fulltext=~/\S+/ or @search_for.author=~/\S+/ or @search_for.topic_register=~/\S+/ #and search_field =~/\S/ - s1='s1=' + CGI.escape(@search_for.text1) if @search_for.text1=~/\S/ - ft='&ft=' + CGI.escape(@search_for.fulltext) if @search_for.fulltext=~/\S/ - key='key=' + CGI.escape(@search_for.keywords) if @search_for.keywords=~/\S/ - ti='&ti=' + CGI.escape(@search_for.title) if @search_for.title=~/\S/ - au='&au=' + CGI.escape(@search_for.author) if @search_for.author=~/\S/ - tr='&tr=' + CGI.escape(@search_for.topic_register) if @search_for.topic_register=~/\S/ - sj='&sj=' + CGI.escape(@search_for.subject) if @search_for.subject=~/\S/ - dsc='&dsc=' + CGI.escape(@search_for.description) if @search_for.description=~/\S/ - pb='&pb=' + CGI.escape(@search_for.publisher) if @search_for.publisher=~/\S/ - edt='&edt=' + CGI.escape(@search_for.editor) if @search_for.editor=~/\S/ - cntr='&cntr=' + CGI.escape(@search_for.contributor) if @search_for.contributor=~/\S/ - dt='&dt=' + CGI.escape(@search_for.date) if @search_for.date=~/\S/ - ty='&ty=' + CGI.escape(@search_for.type) if @search_for.type=~/\S/ - id='&id=' + CGI.escape(@search_for.identifier) if @search_for.identifier=~/\S/ - src='&src=' + CGI.escape(@search_for.source) if @search_for.source=~/\S/ - lang='&lang=' + CGI.escape(@search_for.language) if @search_for.language=~/\S/ - rel='&rel=' + CGI.escape(@search_for.relation) if @search_for.relation=~/\S/ - cov='&cov=' + CGI.escape(@search_for.coverage) if @search_for.coverage=~/\S/ - cr='&cr=' + CGI.escape(@search_for.rights) if @search_for.rights=~/\S/ - co='&co=' + CGI.escape(@search_for.comment) if @search_for.comment=~/\S/ - ab='&ab=' + CGI.escape(@search_for.abstract) if @search_for.abstract=~/\S/ - dtc='&dtc=' + CGI.escape(@search_for.date_created) if @search_for.date_created=~/\S/ - dti='&dti=' + CGI.escape(@search_for.date_issued) if @search_for.date_issued=~/\S/ - dtm='&dtm=' + CGI.escape(@search_for.date_modified) if @search_for.date_modified=~/\S/ - dta='&dta=' + CGI.escape(@search_for.date_available) if @search_for.date_available=~/\S/ - dtv='&dtv=' + CGI.escape(@search_for.date_valid) if @search_for.date_valid=~/\S/ - fns='&fns=' + CGI.escape(@search_for.filename) if @search_for.filename=~/\S/ - @@canned_search_url=(checked_all =~/checked/) \ - ? "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" - : "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" - mod=ft=~/\S+/ ? (ft.gsub(/ft/,'s1')) : s1 - @canned_base_url="#{@base}?#{mod}&db=#{cgi['db']}" - if checked_case=~/\S/ - @search[:text][1]=%{doc_objects.clean~'#{@search_for.text1}'} #s1 - else - @search[:text][1]=%{doc_objects.clean~*'#{@search_for.text1}'} #s1 - end - canned_note='search url:' - else - @@canned_search_url="#{@base}?db=#{@db}&view=index" - canned_note='search url example:' - end - if search_field =~/\S+/ - analyze_format=search_field.gsub(/\s*\n/,'; ') - elsif checked_all =~/checked/ or checked_url =~/checked/ - canned_search=@@canned_search_url.scan(/(?:s1|ft|au|ti|fns|tr)=[^&]+/) - af=canned_search.join('; ') - af=af.gsub(/s1=/,'text: '). - gsub(/ft=/,'fulltxt: '). - gsub(/au=/,'author: '). - gsub(/ti=/,'title: '). - gsub(/fns=/,'filename: '). - gsub(/tr=/,'topic_register: '). - gsub(/%2B/,' ') - analyze_format=af - st=af.split(/\s*;\s*/) - search_field=st.join("\n") - end - green=%{} - canned_search_url_txt=CGI.escapeHTML(@@canned_search_url) - the_can=%{#{canned_note} #{canned_search_url_txt}
} - p_text=p_fulltext=p_keywords=p_title=p_author=p_topic_register=p_subject=p_description=p_publisher=p_editor=p_contributor=p_date=p_type=p_format=p_identifier=p_source=p_language=p_relation=p_coverage=p_rights=p_comment=p_abstract=p_filename='' - p_filename=%{filename: #{green}#{@search_for.filename}

} if @search_for.filename =~/\S+/ - p_text=%{text: #{green}#{@search_for.text1}

} if @search_for.text1 =~/\S+/ - p_fulltext=%{fulltxt: #{green}#{@search_for.fulltext}
} if @search_for.fulltext =~/\S+/ - p_title=%{title: #{green}#{@search_for.title}
} if @search_for.title =~/\S+/ - p_author=%{author: #{green}#{@search_for.author}
} if @search_for.author =~/\S+/ - p_editor=%{editor: #{green}#{@search_for.editor}
} if @search_for.editor=~/\S+/ - p_contributor=%{contributor: #{green}#{@search_for.contributor}
} if @search_for.contributor =~/\S+/ - p_date=%{date: #{green}#{@search_for.date}
} if @search_for.date =~/\S+/ - p_rights=%{rights: #{green}#{@search_for.rights}
} if @search_for.rights =~/\S+/ - p_topic_register=%{topic_register: #{green}#{@search_for.topic_register}
} if @search_for.topic_register =~/\S+/ - p_subject=%{subject: #{green}#{@search_for.subject}
} if @search_for.subject =~/\S+/ - p_keywords=%{keywords: #{green}#{@search_for.keywords}
} if @search_for.keywords =~/\S+/ - p_identifier=%{identifier: #{green}#{@search_for.identifier}
} if @search_for.identifier =~/\S+/ - p_type=%{type: #{green}#{@search_for.type}
} if @search_for.type =~/\S+/ - p_format=%{format: #{green}#{@search_for.format}
} if @search_for.format =~/\S+/ - p_relation=%{relation: #{green}#{@search_for.relation}
} if @search_for.relation =~/\S+/ - p_coverage=%{coverage: #{green}#{@search_for.coverage}
} if @search_for.coverage =~/\S+/ - p_description=%{description: #{green}#{@search_for.description}
} if @search_for.description =~/\S+/ - p_abstract=%{abstract: #{green}#{@search_for.abstract}
} if @search_for.abstract =~/\S+/ - p_comment=%{comment: #{green}#{@search_for.comment}
} if @search_for.comment =~/\S+/ - p_publisher=%{publisher: #{green}#{@search_for.publisher}
} if @search_for.publisher =~/\S+/ - p_source=%{source: #{green}#{@search_for.source}
} if @search_for.source =~/\S+/ - p_language=%{language: #{green}#{@search_for.language}
} if @search_for.language =~/\S+/ - search_note=<<-WOK - - database: #{green}#{@db}; selected view: #{green}#{cgi['view']} - search string: "#{green}#{analyze_format}"
- #{p_text} #{p_fulltext} #{p_keywords} #{p_title} #{p_author} #{p_topic_register} #{p_subject} #{p_description} #{p_publisher} #{p_editor} #{p_contributor} #{p_date} #{p_type} #{p_format} #{p_identifier} #{p_source} #{p_language} #{p_relation} #{p_coverage} #{p_rights} #{p_comment} #{p_abstract} #{p_filename} - - WOK - #eg = %{canned search e.g.:
#{url}
find: #{analyze}
database: #{database}} - #% dbi_canning - @header=Form.new(@base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form - unless q['s1'] =~/\S/ or q['au'] =~/\S/ or @search[:text][1] =~/\S/ - print "Content-type: text/html\n\n" - puts (@header+@tail) - else #% searches - s1=(@search_for.text1 =~/\S/) \ - ? @search_for.text1 - : 'Unavailable' - if checked_case=~/\S/ - @search[:text]<<%{doc_objects.clean~'#{CGI.unescape(s1)}'} - else - @search[:text]<<%{doc_objects.clean~*'#{CGI.unescape(s1)}'} - end - #% dbi_request - dbi_statement=DBI_SearchStatement.new(@conn,@search_for,q,checked_case) - @text_search_flag=false - @text_search_flag=dbi_statement.text_search_flag - s_contents=dbi_statement.contents - @body_main='' - @search_regx=nil - oldtid=0 - if @text_search_flag - if checked_sql =~/\S/ - sql_select_body=dbi_statement.sql_select_body_format - else sql_select_body='' - end - @body_main << sql_select_body - #@body_main << '



Main Text:
' << sql_select_body - else - end - @hostpath="#{@hosturl_files}/#{@stub}" - def path_manifest(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{@hostpath}/#{fn}/sisu_manifest.html" - : "#{@hostpath}/#{fn}/sisu_manifest.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/manifest/#{fn}.html" - : "#{@hostpath}/manifest/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/manifest/#{fn}.html" - end - end - def path_html_seg(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - "#{@hostpath}/#{fn}" - when 'filetype' - "#{@hostpath}/html/#{fn}" - else - "#{@hostpath}/#{ln}/html/#{fn}" - end - end - def path_toc(fn,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/toc.html" - : "#{path_html_seg(fn,ln)}/toc.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/toc.html" - end - end - def path_filename(fn,seg,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/#{seg}.html" - : "#{path_html_seg(fn,ln)}/#{seg}.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/#{seg}.html" - end - end - def path_html_doc(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/scroll.html" - : "#{path_html_seg(fn,ln)}/scroll.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/html/#{fn}.html" - : "#{@hostpath}/html/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/html/#{fn}.html" - end - end - #% text_objects_body - s_contents.each do |c| #% text body - location=c['src_filename'][/(.+?)\.(?:ssm\.sst|sst)$/,1] - file_suffix=c['src_filename'][/.+?\.(ssm\.sst|sst)$/,1] - lang=if location =~ /\S+?~(\S\S\S?)$/ - l=location[/\S+?~(\S\S\S?)$/,1] - location=location.gsub(/(\S+?)~\S\S\S?/,'\1') - l=".#{l}" - else '' - end - #% metadata_found_body - if c['tid'].to_i != oldtid.to_i - ti=c['title'] - can_txt_srch=(cgi['view']=~/index/) \ - ? %{search } - : %{search } - title=%{toc html #{ti} [#{c['language_document_char']}] by #{c['creator_author']} manifest #{can_txt_srch}
} if file_suffix=~/s/ #hmm watch file_suffix - title=@text_search_flag \ - ? '

'+title - : '
'+title - @counter_txt_doc+=1 - oldtid=c['tid'].to_i - else title='' - end - if @text_search_flag - if cgi['view']=~/text/ \ - or (cgi['view']!~/index/ and cgi['search'] !~/search db/) #% txt body - text=if c['suffix'] !~/1/ #seg - if @search_for.text1 =~/\S+/ \ - or q['s1'] =~/\S+/ #% only this branch is working !! - unescaped_search=if @search_for.text1 =~/\S+/ - CGI.unescape(@search_for.text1) - elsif q['s1'] =~/\S+/ - CGI.unescape(q['s1']) - else nil - end - @search_regx=if unescaped_search #check - search_regex=[] - build=unescaped_search.scan(/\S+/).each do |g| - (g.to_s =~/(AND|OR)/) \ - ? (search_regex << '|') - : (search_regex << %{#{g.to_s}}) - end - search_regex=search_regex.join(' ') - search_regex=search_regex.gsub(/\s*\|\s*/,'|') - Regexp.new(search_regex, Regexp::IGNORECASE) - else nil - end - else nil - end - matched_para=(@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) \ - ? (c['body'].gsub(/(<\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{\\1})) - : c['body'] - %{

ocn #{c['ocn']}:

#{matched_para}} - elsif c['suffix'] =~/1/ #doc - %{#{title}

ocn #{c['ocn']}:#{c['body']}} - end - @counter_txt_ocn+=1 - output=title+text - else #elsif cgi['view']=~/index/ #% idx body - if c['suffix'] !~/1/ #seg - index=%{#{c['ocn']}, } if @text_search_flag - elsif c['suffix'] =~/1/ #doc #FIX - index=%{#{c['ocn']}, } - end - if c['seg'] =~/\S+/ - if @text_search_flag - @counter_txt_ocn+=1 - output=title+index - end - else - @counter_txt_ocn+=1 - output=c['suffix'] !~/1/ \ - ? title+index - : %{#{title}#{c['ocn'].sort}, } - end - end - else output=title - end - @counters_txt=if @counter_txt_doc > 0 - if checked_stats =~/\S/ - @@lt_t=(@counter_txt_ocn==dbi_statement.sql_match_limit.to_i) ? true : false - start=(@@offset.to_i+1).to_s - range=(@@offset.to_i+@counter_txt_ocn.to_i).to_s - %{


Found #{@counter_txt_ocn} times in the main body of #{@counter_txt_doc} documents [ matches #{start} to #{range} ]
} - else '' - end - else '' - end - @body_main << output #+ details - end - oldtid = 0 - offset=dbi_statement.sql_offset.to_s - limit=dbi_statement.sql_match_limit.to_s - @@lt_t ||=false; @@lt_e ||=false - canned=(@@lt_t or @@lt_e) \ - ? dbi_statement.pre_next(true,@image_src).to_s - : dbi_statement.pre_next(false,@image_src).to_s - limit=dbi_statement.sql_match_limit.to_s - cgi.out{@header.force_encoding("UTF-8") + @counters_txt.force_encoding("UTF-8") + @counters_endn.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @body_main.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @tail.force_encoding("UTF-8")} #% print cgi_output_header+counters+body - end - rescue Exception => e - s='
' + CGI::escapeHTML(e.backtrace.reverse.join("\n"))
-          s << CGI::escapeHTML(e.message) + '
' - cgi.out{s} - next - ensure # eg. disconnect from server - @conn.disconnect if @conn - end - end diff --git a/util/rb/cgi/sisu_lng.cgi b/util/rb/cgi/sisu_lng.cgi deleted file mode 100755 index 5e07a16..0000000 --- a/util/rb/cgi/sisu_lng.cgi +++ /dev/null @@ -1,935 +0,0 @@ -#!/usr/bin/env ruby -=begin - * Name: SiSU information Structuring Universe - * Author: Ralph Amissah - * http://www.jus.uio.no/sisu - * http://www.jus.uio.no/sisu/SiSU/download - - * Description: generates naive cgi search form for search of sisu database (pgsql) - * Name: SiSU generated sample cgi search form - - * Description: generated sample cgi search form for SiSU - (SiSU is a framework for document structuring, publishing and search) - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2015, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Ralph Amissah - - - -=end - begin - require 'cgi' - require 'fcgi' - require 'pg' - rescue LoadError - puts 'cgi, fcgi or pg NOT FOUND (LoadError)' - end - @stub_default='samples_by_language' - @image_src="http://#{ENV['HTTP_HOST']}/_sisu/image_sys" - @hosturl_cgi="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}" - @hosturl_files="http://#{ENV['HTTP_HOST']}" - @output_dir_structure_by='language' - @lingual='multi' - @port='5432' - @db_name_prefix='SiSU.7a.' - @user='www-data' # check user name for access to pg database: e.g. www-data or 'ralph' - @base="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}#{ENV['SCRIPT_NAME']}" -#Common TOP - @@offset=0 - @@canned_search_url=@base - @color_heading='#DDFFAA' - @color_match='#ffff48' - class Form - def initialize(base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') - search_note='' if checked_searched !~/\S/ - the_can='' if checked_url !~/\S/ - search_field='' if checked_echo !~/\S/ - @base,@search_field,@selected_db,@result_type,@checked_sql_limit,@checked_tip,@checked_stats,@checked_searched,@checked_url,@checked_case,@checked_echo,@checked_sql,@checked_all,@checked_none,@checked_selected,@checked_default,@search_note,@the_can=base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can - @tip=if checked_tip =~/\S/ - 'text:__; fulltxt:__; keywords:__; title:__; author:__; topic_register:__; subject:__; description:__; publisher:__; editor:__; contributor:__; date:__; type:__; format:__; identifier:__; source:__; language:__; relation:__; coverage:__; rights:__; comment:__; abstract:__; filename:__;
' - else '' - end - end - def submission_form - search_form=<<-WOK - - - - - <meta charset="utf-8"> - <meta name="sourcefile" content="SiSU._sst" /> - SiSU search form (sample): SiSU information Structuring Universe - - - - - - - - - -
- - -
-
- SiSU - -
- git - -
-
- -
-
- - -
- - - #{@tip} - #{@search_note} - #{@the_can} -
- - - - - to search: select which database to search (drop-down menu below); enter your search query (in the form above); and click on the search button (below) -
- - - index - text / grep - case sensitive -
- match limit: - 1,000 - 2,500 -
- echo query - result stats - search url - searched - available fields - sql statement -
- checks: - default - selected - all - none -
- - -
- WOK - end - end - class SearchRequest #% search_for - attr_accessor :text1,:fulltext,:keywords,:title,:author,:topic_register,:subject,:description,:publisher,:editor,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:filename - def initialize(search_field='',q='') - @search_field,@q=search_field,q - @text1=@fulltext=@keywords=@title=@author=@topic_register=@subject=@description=@publisher=@editor=@contributor=@date=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@comment=@abstract=@owner=@date_created=@date_issued=@date_modified=@date_available=@date_valid=@filename='' - if @search_field=~/\S/ - @text1=text_to_match('text:') - @fulltext=text_to_match('fulltxt:') - @topic_register=text_to_match('topic_register:') - @title=text_to_match('title:') # DublinCore 1 - title - @author=text_to_match('(?:author|creator)s?:') # DublinCore 2 - creator/author - @subject=text_to_match('subj(?:ect)?:') # DublinCore 3 - subject - @description=text_to_match('description:') # DublinCore 4 - description - @publisher=text_to_match('pub(?:lisher)?:') # DublinCore 5 - publisher - @editor=text_to_match('editor:') - @contributor=text_to_match('contributor:') # DublinCore 6 - contributor - @date=text_to_match('date:') # DublinCore 7 - date dd-mm-yy - @type=text_to_match('type:') # DublinCore 8 - type - @format=text_to_match('format:') # DublinCore 9 - format - @identifier=text_to_match('identifier:') # DublinCore 10 - identifier - @source=text_to_match('source:') # DublinCore 11 - source - @language=text_to_match('language:') # DublinCore 12 - language - @relation=text_to_match('relation:') # DublinCore 13 - relation - @coverage=text_to_match('coverage:') # DublinCore 14 - coverage - @rights=text_to_match('rights:') # DublinCore 15 - rights - @keywords=text_to_match('key(?:words?)?:') - @comment=text_to_match('comment:') - @abstract=text_to_match('abs(?:tract)?:') - @owner=text_to_match('owner:') - @date_created=text_to_match('date_created:') - @date_issued=text_to_match('date_issued:') - @date_modified=text_to_match('date_modified:') - @date_available=text_to_match('date_available:') - @date_valid=text_to_match('date_valid:') - @filename=text_to_match('filename:') - @text1=text_to_match unless @keywords or @author or @title or @text1 or @fulltext or @comment or @abstract or @rights or @subject or @publisher or @date or @filename or @topic_register - else - @text1=q['s1'] if q['s1']=~/\S/ - @fulltext=q['ft'] if q['ft']=~/\S/ - @keywords=q['key'] if q['key']=~/\S/ - @title=q['ti'] if q['ti']=~/\S/ - @author=q['au'] if q['au']=~/\S/ - @topic_register=q['tr'] if q['tr']=~/\S/ - @subject=q['sj'] if q['sj']=~/\S/ - @description=q['dsc'] if q['dsc']=~/\S/ - @publisher=q['pb'] if q['pb']=~/\S/ - @editor=q['cntr'] if q['cntr']=~/\S/ - @contributor=q['cntr'] if q['cntr']=~/\S/ - @date=q['dt'] if q['dt']=~/\S/ - @type=q['ty'] if q['ty']=~/\S/ - @identifier=q['id'] if q['id']=~/\S/ - @source=q['src'] if q['src']=~/\S/ - @language=q['lang'] if q['lang']=~/\S/ - @relation=q['rel'] if q['rel']=~/\S/ - @coverage=q['cov'] if q['cov']=~/\S/ - @rights=q['cr'] if q['cr']=~/\S/ - @comment=q['co'] if q['co']=~/\S/ - @abstract=q['ab'] if q['ab']=~/\S/ - @date_created=q['dtc'] if q['dtc']=~/\S/ - @date_issued=q['dti'] if q['dti']=~/\S/ - @date_modified=q['dtm'] if q['dtm']=~/\S/ - @date_available=q['dta'] if q['dta']=~/\S/ - @date_valid=q['dtv'] if q['dtv']=~/\S/ - @filename=if q['doc'] and q['search'] !~/search db/ then q['doc'] - elsif q['fns']=~/\S/ then q['fns'] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - end - end - def text_to_match(identifier='') - m={ - string: /#{identifier}\s*(.+?)/, - string: /#{identifier}\s*(.+?)(?:;|\n|\r|$)/, - word: /#{identifier}[\s(]*(\S+)/ - } - search_string=if @search_field =~m[:word] - search_string=if @search_field =~m[:braces] then m[:braces].match(@search_field)[1] - elsif @search_field =~m[:string] then m[:string].match(@search_field)[1] - else - str=m[:word].match(@search_field)[1] - str=str.gsub(/[()]/,'') - str - end - search_string=search_string.strip.gsub(/\s+/,'+') - #else - # "__" - end - end - end - class DBI_SearchString - def initialize(l,t,q,cse=false) - @l,@t,@q,@c=l,t,q,cse - end - def string - search={ search: [], flag: false } - if @t =~/\S+/ or @q =~/\S+/ - if @t =~/\S+/ then unescaped_search=CGI.unescape(@t) - elsif @q =~/\S+/ then unescaped_search=CGI.unescape(@q) - end - search_construct=[] - unescaped_search=if @c - unescaped_search.gsub(/\s*(AND|OR)\s*/,"' \) \\1 #{@l}~\( '"). - gsub(/(.+)/,"#{@l}~\( '\\1' \)") - else - unescaped_search.gsub(/\s*(AND|OR)\s*/,"' \) \\1 #{@l}~*\( '"). - gsub(/(.+)/,"#{@l}~*\( '\\1' \)") - end - search_construct << unescaped_search - search_construct=search_construct.join(' ') - search[:search] << search_construct - search[:flag]=true - search - end - search - end - end - class DBI_SearchStatement - attr_reader :text_search_flag,:sql_select_body_format,:sql_offset,:sql_limit - def initialize(conn,search_for,q,c) - @conn=conn - @text_search_flag=false - @sql_statement={ body: '', endnotes: '', range: '' } - #@offset||=@@offset - #@offset+=@@limit - search={ text: [], endnotes: [] } - cse=(c =~/\S/) ? true : false - st=DBI_SearchString.new('doc_objects.clean',search_for.text1,q['s1'],cse).string - se=DBI_SearchString.new('endnotes.clean',search_for.text1,q['s1'],cse).string - @text_search_flag=st[:flag] - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.fulltext',search_for.fulltext,q['ft'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.title',search_for.title,q['ti'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_author',search_for.author,q['au'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_topic_register',search_for.topic_register,q['tr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_subject',search_for.subject,q['sj'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_keywords',search_for.keywords,q['key'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_description',search_for.description,q['dsc'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.publisher',search_for.publisher,q['pb'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_editor',search_for.editor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_contributor',search_for.contributor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.date_published',search_for.date,q['dt'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_type',search_for.type,q['ty'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.original_source',search_for.source,q['src'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.language_document_char',search_for.language,q['lang'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_relation',search_for.relation,q['rel'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_coverage',search_for.coverage,q['cov'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.rights_all',search_for.rights,q['cr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_comment',search_for.comment,q['co'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_abstract',search_for.abstract,q['ab'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.src_filename',search_for.filename,q['fns'],cse).string - if st[:flag] - search[:text] << st[:search] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - @search_text='' - @search_text=search[:text].flatten.join(' AND ') - @search_text=@search_text.gsub(/(doc_objects\.clean~[*]?\(\s*'[^']+'\s*\)\s+(?:(?:AND|OR)\s+doc_objects\.clean~[*]?\(\s*'[^']+'\s*\))+)/,'(\1)') - end - def sql_offset - @@offset - end - def sql_match_limit - @@limit - end - def sql_canned_search - @offset_next=sql_offset.to_i + sql_match_limit.to_i - @offset_previous=sql_offset.to_i - sql_match_limit.to_i - def current - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + sql_offset.to_s - end - def next - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_next.to_s - end - def previous - @offset_previous >= 0 \ - ? (@@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_previous.to_s) - : '' - end - def start - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + 0.to_s - end - self - end - def pre_next(beyond_limit,img) - can=sql_canned_search - page=(sql_offset.to_i + sql_match_limit.to_i)/sql_match_limit.to_i - if beyond_limit - if page.to_s =~ /^1$/ - %{
- pg. #{page.to_s} - -  >> - -
} - elsif page.to_s =~ /^2$/ - %{
- - <<  - - pg. #{page.to_s} - -  >> - -
} - else - %{
- - |<  - - - <<  - - pg. #{page.to_s} - -  >> - -
} - end - else - if page.to_s =~ /^1$/ then '' - elsif page.to_s =~ /^2$/ - %{
- - <<  - - pg. #{page.to_s} -
} - else - %{
- - |<  - - - <<  - - pg. #{page.to_s} -
} - end - end - end - def sql_select_body - limit ||=@@limit - offset ||=@@offset - @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, metadata_and_text.language_document_char, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE (#{@search_text}) AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.language_document_char, metadata_and_text.title, metadata_and_text.src_filename, doc_objects.ocn} - @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} - select=@sql_statement[:body] + ' ' + @sql_statement[:range] - select - end - def sql_select_body_format - %{#{sql_select_body}} - end - def contents - @conn.exec(sql_select_body) - end - end - def tail - <<-'WOK' -


- - - - - - -
- - -
-
- SiSU - -
- git - -
-
- -

- Generated by - SiSU 7.1.2 2015-05-18 (2015w20/1) -
- - SiSU © Ralph Amissah - 1993, current 2015. - All Rights Reserved. -
- SiSU is software for document structuring, publishing and search, -
- - www.jus.uio.no/sisu - - and - - www.sisudoc.org - - sources - - git.sisudoc.org - -
- w3 since October 3 1993 - - ralph@amissah.com - -
- mailing list subscription - - http://lists.sisudoc.org/listinfo/sisu - -
- - sisu@lists.sisudoc.org - -

-
-

- SiSU using: -
Standard SiSU markup syntax, -
Standard SiSU meta-markup syntax, and the -
Standard SiSU object citation numbering and system, (object/text identifying/locating system) -
- © Ralph Amissah 1997, current 2015. - All Rights Reserved. -

-
-

- - .: - -

-
-

- SiSU is released under - GPL v3 - or later, - - http://www.gnu.org/licenses/gpl.html - -

-
-

- SiSU, developed using - - Ruby - - on - - Debian/Gnu/Linux - - software infrastructure, - with the usual GPL (or OSS) suspects. -

-
- - - WOK - end - @tail=tail - @counter_txt_doc,@counter_txt_ocn,@counter_endn_doc,@counter_endn_ocn=0,0,0,0 - @counters_txt,@counters_endn,@sql_select_body='','','' - FCGI.each_cgi do |cgi| - begin # all code goes in begin section - @search={ text: [], endnotes: [] } - q=CGI.new - @db=if cgi['db'] =~ /#{@db_name_prefix}(\S+)/ - @stub=$1 - cgi['db'] - else - @stub=@stub_default - @db_name_prefix + @stub - end - checked_url,checked_stats,checked_searched,checked_tip,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,selected_db='','','','','','','','','' - result_type=(cgi['view']=~/text/) \ - ? result_type={ index: '', text: 'checked'} - : result_type={ index: 'checked', text: ''} - @@limit=if cgi['sql_match_limit'].to_s=~/2500/ - checked_sql_limit={ l1000: '', l2500: 'checked'} - '2500' - else - checked_sql_limit={ l1000: 'checked', l2500: ''} - '1000' - end - checked_echo='checked' if cgi['echo'] =~/\S/ - checked_stats='checked' if cgi['stats'] =~/\S/ - checked_url='checked' if cgi['url'] =~/\S/ or cgi['u'].to_i==1 - checked_searched='checked' if cgi['searched'] =~/\S/ - checked_tip='checked' if cgi['tip'] =~/\S/ - checked_case='checked' if cgi['casesense'] =~/\S/ - checked_sql='checked' if cgi['sql'] =~/\S/ - if cgi['checks'] =~/check_all/ or cgi['check_all'] =~/\S/ or cgi['a'].to_i==1 - checked_all='checked' - checked_echo=checked_stats=checked_url=checked_searched=checked_tip=checked_sql='checked' - checked_none='' - elsif cgi['checks'] =~/check_none/ - checked_none='checked' - checked_all=checked_url=checked_stats=checked_searched=checked_tip=checked_echo=checked_sql='' - elsif cgi['checks'] =~/check_selected/ - checked_selected='checked' - elsif cgi['checks'] =~/check_default/ - checked_default='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - else - checked_selected='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - end - selected_db=%{} - @conn=PG::Connection.open(dbname: @db, port: @port, user: @user) - search_field=cgi['find'] if cgi['find'] # =~/\S+/ - @search_for=SearchRequest.new(search_field,q) #.analyze #% search_for - #% searches - #Canned_search.new(@base,@search_for.text1,cgi) - if @search_for.text1=~/\S+/ or @search_for.fulltext=~/\S+/ or @search_for.author=~/\S+/ or @search_for.topic_register=~/\S+/ #and search_field =~/\S/ - s1='s1=' + CGI.escape(@search_for.text1) if @search_for.text1=~/\S/ - ft='&ft=' + CGI.escape(@search_for.fulltext) if @search_for.fulltext=~/\S/ - key='key=' + CGI.escape(@search_for.keywords) if @search_for.keywords=~/\S/ - ti='&ti=' + CGI.escape(@search_for.title) if @search_for.title=~/\S/ - au='&au=' + CGI.escape(@search_for.author) if @search_for.author=~/\S/ - tr='&tr=' + CGI.escape(@search_for.topic_register) if @search_for.topic_register=~/\S/ - sj='&sj=' + CGI.escape(@search_for.subject) if @search_for.subject=~/\S/ - dsc='&dsc=' + CGI.escape(@search_for.description) if @search_for.description=~/\S/ - pb='&pb=' + CGI.escape(@search_for.publisher) if @search_for.publisher=~/\S/ - edt='&edt=' + CGI.escape(@search_for.editor) if @search_for.editor=~/\S/ - cntr='&cntr=' + CGI.escape(@search_for.contributor) if @search_for.contributor=~/\S/ - dt='&dt=' + CGI.escape(@search_for.date) if @search_for.date=~/\S/ - ty='&ty=' + CGI.escape(@search_for.type) if @search_for.type=~/\S/ - id='&id=' + CGI.escape(@search_for.identifier) if @search_for.identifier=~/\S/ - src='&src=' + CGI.escape(@search_for.source) if @search_for.source=~/\S/ - lang='&lang=' + CGI.escape(@search_for.language) if @search_for.language=~/\S/ - rel='&rel=' + CGI.escape(@search_for.relation) if @search_for.relation=~/\S/ - cov='&cov=' + CGI.escape(@search_for.coverage) if @search_for.coverage=~/\S/ - cr='&cr=' + CGI.escape(@search_for.rights) if @search_for.rights=~/\S/ - co='&co=' + CGI.escape(@search_for.comment) if @search_for.comment=~/\S/ - ab='&ab=' + CGI.escape(@search_for.abstract) if @search_for.abstract=~/\S/ - dtc='&dtc=' + CGI.escape(@search_for.date_created) if @search_for.date_created=~/\S/ - dti='&dti=' + CGI.escape(@search_for.date_issued) if @search_for.date_issued=~/\S/ - dtm='&dtm=' + CGI.escape(@search_for.date_modified) if @search_for.date_modified=~/\S/ - dta='&dta=' + CGI.escape(@search_for.date_available) if @search_for.date_available=~/\S/ - dtv='&dtv=' + CGI.escape(@search_for.date_valid) if @search_for.date_valid=~/\S/ - fns='&fns=' + CGI.escape(@search_for.filename) if @search_for.filename=~/\S/ - @@canned_search_url=(checked_all =~/checked/) \ - ? "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" - : "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" - mod=ft=~/\S+/ ? (ft.gsub(/ft/,'s1')) : s1 - @canned_base_url="#{@base}?#{mod}&db=#{cgi['db']}" - if checked_case=~/\S/ - @search[:text][1]=%{doc_objects.clean~'#{@search_for.text1}'} #s1 - else - @search[:text][1]=%{doc_objects.clean~*'#{@search_for.text1}'} #s1 - end - canned_note='search url:' - else - @@canned_search_url="#{@base}?db=#{@db}&view=index" - canned_note='search url example:' - end - if search_field =~/\S+/ - analyze_format=search_field.gsub(/\s*\n/,'; ') - elsif checked_all =~/checked/ or checked_url =~/checked/ - canned_search=@@canned_search_url.scan(/(?:s1|ft|au|ti|fns|tr)=[^&]+/) - af=canned_search.join('; ') - af=af.gsub(/s1=/,'text: '). - gsub(/ft=/,'fulltxt: '). - gsub(/au=/,'author: '). - gsub(/ti=/,'title: '). - gsub(/fns=/,'filename: '). - gsub(/tr=/,'topic_register: '). - gsub(/%2B/,' ') - analyze_format=af - st=af.split(/\s*;\s*/) - search_field=st.join("\n") - end - green=%{} - canned_search_url_txt=CGI.escapeHTML(@@canned_search_url) - the_can=%{#{canned_note} #{canned_search_url_txt}
} - p_text=p_fulltext=p_keywords=p_title=p_author=p_topic_register=p_subject=p_description=p_publisher=p_editor=p_contributor=p_date=p_type=p_format=p_identifier=p_source=p_language=p_relation=p_coverage=p_rights=p_comment=p_abstract=p_filename='' - p_filename=%{filename: #{green}#{@search_for.filename}

} if @search_for.filename =~/\S+/ - p_text=%{text: #{green}#{@search_for.text1}

} if @search_for.text1 =~/\S+/ - p_fulltext=%{fulltxt: #{green}#{@search_for.fulltext}
} if @search_for.fulltext =~/\S+/ - p_title=%{title: #{green}#{@search_for.title}
} if @search_for.title =~/\S+/ - p_author=%{author: #{green}#{@search_for.author}
} if @search_for.author =~/\S+/ - p_editor=%{editor: #{green}#{@search_for.editor}
} if @search_for.editor=~/\S+/ - p_contributor=%{contributor: #{green}#{@search_for.contributor}
} if @search_for.contributor =~/\S+/ - p_date=%{date: #{green}#{@search_for.date}
} if @search_for.date =~/\S+/ - p_rights=%{rights: #{green}#{@search_for.rights}
} if @search_for.rights =~/\S+/ - p_topic_register=%{topic_register: #{green}#{@search_for.topic_register}
} if @search_for.topic_register =~/\S+/ - p_subject=%{subject: #{green}#{@search_for.subject}
} if @search_for.subject =~/\S+/ - p_keywords=%{keywords: #{green}#{@search_for.keywords}
} if @search_for.keywords =~/\S+/ - p_identifier=%{identifier: #{green}#{@search_for.identifier}
} if @search_for.identifier =~/\S+/ - p_type=%{type: #{green}#{@search_for.type}
} if @search_for.type =~/\S+/ - p_format=%{format: #{green}#{@search_for.format}
} if @search_for.format =~/\S+/ - p_relation=%{relation: #{green}#{@search_for.relation}
} if @search_for.relation =~/\S+/ - p_coverage=%{coverage: #{green}#{@search_for.coverage}
} if @search_for.coverage =~/\S+/ - p_description=%{description: #{green}#{@search_for.description}
} if @search_for.description =~/\S+/ - p_abstract=%{abstract: #{green}#{@search_for.abstract}
} if @search_for.abstract =~/\S+/ - p_comment=%{comment: #{green}#{@search_for.comment}
} if @search_for.comment =~/\S+/ - p_publisher=%{publisher: #{green}#{@search_for.publisher}
} if @search_for.publisher =~/\S+/ - p_source=%{source: #{green}#{@search_for.source}
} if @search_for.source =~/\S+/ - p_language=%{language: #{green}#{@search_for.language}
} if @search_for.language =~/\S+/ - search_note=<<-WOK - - database: #{green}#{@db}; selected view: #{green}#{cgi['view']} - search string: "#{green}#{analyze_format}"
- #{p_text} #{p_fulltext} #{p_keywords} #{p_title} #{p_author} #{p_topic_register} #{p_subject} #{p_description} #{p_publisher} #{p_editor} #{p_contributor} #{p_date} #{p_type} #{p_format} #{p_identifier} #{p_source} #{p_language} #{p_relation} #{p_coverage} #{p_rights} #{p_comment} #{p_abstract} #{p_filename} - - WOK - #eg = %{canned search e.g.:
#{url}
find: #{analyze}
database: #{database}} - #% dbi_canning - @header=Form.new(@base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form - unless q['s1'] =~/\S/ or q['au'] =~/\S/ or @search[:text][1] =~/\S/ - print "Content-type: text/html\n\n" - puts (@header+@tail) - else #% searches - s1=(@search_for.text1 =~/\S/) \ - ? @search_for.text1 - : 'Unavailable' - if checked_case=~/\S/ - @search[:text]<<%{doc_objects.clean~'#{CGI.unescape(s1)}'} - else - @search[:text]<<%{doc_objects.clean~*'#{CGI.unescape(s1)}'} - end - #% dbi_request - dbi_statement=DBI_SearchStatement.new(@conn,@search_for,q,checked_case) - @text_search_flag=false - @text_search_flag=dbi_statement.text_search_flag - s_contents=dbi_statement.contents - @body_main='' - @search_regx=nil - oldtid=0 - if @text_search_flag - if checked_sql =~/\S/ - sql_select_body=dbi_statement.sql_select_body_format - else sql_select_body='' - end - @body_main << sql_select_body - else - end - @hostpath="#{@hosturl_files}/#{@stub}" - def path_manifest(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{@hostpath}/#{fn}/sisu_manifest.html" - : "#{@hostpath}/#{fn}/sisu_manifest.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/manifest/#{fn}.html" - : "#{@hostpath}/manifest/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/manifest/#{fn}.html" - end - end - def path_html_seg(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - "#{@hostpath}/#{fn}" - when 'filetype' - "#{@hostpath}/html/#{fn}" - else - "#{@hostpath}/#{ln}/html/#{fn}" - end - end - def path_toc(fn,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/toc.html" - : "#{path_html_seg(fn,ln)}/toc.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/toc.html" - end - end - def path_filename(fn,seg,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/#{seg}.html" - : "#{path_html_seg(fn,ln)}/#{seg}.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/#{seg}.html" - end - end - def path_html_doc(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/scroll.html" - : "#{path_html_seg(fn,ln)}/scroll.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/html/#{fn}.html" - : "#{@hostpath}/html/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/html/#{fn}.html" - end - end - #% text_objects_body - s_contents.each do |c| #% text body - location=c['src_filename'][/(.+?)\.(?:ssm\.sst|sst)$/,1] - file_suffix=c['src_filename'][/.+?\.(ssm\.sst|sst)$/,1] - lang=if location =~ /\S+?~(\S\S\S?)$/ - l=location[/\S+?~(\S\S\S?)$/,1] - location=location.gsub(/(\S+?)~\S\S\S?/,'\1') - l=".#{l}" - else '' - end - #% metadata_found_body - if c['tid'].to_i != oldtid.to_i - ti=c['title'] - can_txt_srch=(cgi['view']=~/index/) \ - ? %{search } - : %{search } - title=%{toc html #{ti} [#{c['language_document_char']}] by #{c['creator_author']} manifest #{can_txt_srch}
} if file_suffix=~/s/ #hmm watch file_suffix - title=@text_search_flag \ - ? '

'+title - : '
'+title - @counter_txt_doc+=1 - oldtid=c['tid'].to_i - else title='' - end - if @text_search_flag - if cgi['view']=~/text/ \ - or (cgi['view']!~/index/ and cgi['search'] !~/search db/) #% txt body - text=if c['suffix'] !~/1/ #seg - if @search_for.text1 =~/\S+/ \ - or q['s1'] =~/\S+/ #% only this branch is working !! - unescaped_search=if @search_for.text1 =~/\S+/ - CGI.unescape(@search_for.text1) - elsif q['s1'] =~/\S+/ - CGI.unescape(q['s1']) - else nil - end - @search_regx=if unescaped_search #check - search_regex=unescaped_search.scan(/\S+/).each.map do |g| - (g.to_s =~/(AND|OR)/) \ - ? ('|') - : (%{#{g.to_s}}) - end.join(' ') - search_regex=search_regex.gsub(/\s*\|\s*/,'|') - Regexp.new(search_regex, Regexp::IGNORECASE) - else nil - end - else nil - end - matched_para=(@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) \ - ? (c['body'].gsub(/(<\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{\\1})) - : c['body'] - %{

ocn #{c['ocn']}:

#{matched_para}} - elsif c['suffix'] =~/1/ #doc - %{#{title}

ocn #{c['ocn']}:#{c['body']}} - end - @counter_txt_ocn+=1 - output=title+text - else #elsif cgi['view']=~/index/ #% idx body - if c['suffix'] !~/1/ #seg - index=%{#{c['ocn']}, } if @text_search_flag - elsif c['suffix'] =~/1/ #doc #FIX - index=%{#{c['ocn']}, } - end - if c['seg'] =~/\S+/ - if @text_search_flag - @counter_txt_ocn+=1 - output=title+index - end - else - @counter_txt_ocn+=1 - output=c['suffix'] !~/1/ \ - ? title+index - : %{#{title}#{c['ocn'].sort}, } - end - end - else output=title - end - @counters_txt=if @counter_txt_doc > 0 - if checked_stats =~/\S/ - @@lt_t=(@counter_txt_ocn==dbi_statement.sql_match_limit.to_i) ? true : false - start=(@@offset.to_i+1).to_s - range=(@@offset.to_i+@counter_txt_ocn.to_i).to_s - %{


Found #{@counter_txt_ocn} times in the main body of #{@counter_txt_doc} documents [ matches #{start} to #{range} ]
} - else '' - end - else '' - end - @body_main << output #+ details - end - oldtid = 0 - offset=dbi_statement.sql_offset.to_s - limit=dbi_statement.sql_match_limit.to_s - @@lt_t ||=false; @@lt_e ||=false - canned=(@@lt_t or @@lt_e) \ - ? dbi_statement.pre_next(true,@image_src).to_s - : dbi_statement.pre_next(false,@image_src).to_s - limit=dbi_statement.sql_match_limit.to_s - cgi.out{@header.force_encoding("UTF-8") + @counters_txt.force_encoding("UTF-8") + @counters_endn.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @body_main.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @tail.force_encoding("UTF-8")} #% print cgi_output_header+counters+body - end - rescue Exception => e - s='
' + CGI::escapeHTML(e.backtrace.reverse.join("\n"))
-          s << CGI::escapeHTML(e.message) + '
' - cgi.out{s} - next - ensure # eg. disconnect from server - @conn.disconnect if @conn - end - end diff --git a/util/rb/cgi/sisu_search_pg.cgi b/util/rb/cgi/sisu_search_pg.cgi deleted file mode 100755 index 5e07a16..0000000 --- a/util/rb/cgi/sisu_search_pg.cgi +++ /dev/null @@ -1,935 +0,0 @@ -#!/usr/bin/env ruby -=begin - * Name: SiSU information Structuring Universe - * Author: Ralph Amissah - * http://www.jus.uio.no/sisu - * http://www.jus.uio.no/sisu/SiSU/download - - * Description: generates naive cgi search form for search of sisu database (pgsql) - * Name: SiSU generated sample cgi search form - - * Description: generated sample cgi search form for SiSU - (SiSU is a framework for document structuring, publishing and search) - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2015, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Ralph Amissah - - - -=end - begin - require 'cgi' - require 'fcgi' - require 'pg' - rescue LoadError - puts 'cgi, fcgi or pg NOT FOUND (LoadError)' - end - @stub_default='samples_by_language' - @image_src="http://#{ENV['HTTP_HOST']}/_sisu/image_sys" - @hosturl_cgi="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}" - @hosturl_files="http://#{ENV['HTTP_HOST']}" - @output_dir_structure_by='language' - @lingual='multi' - @port='5432' - @db_name_prefix='SiSU.7a.' - @user='www-data' # check user name for access to pg database: e.g. www-data or 'ralph' - @base="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}#{ENV['SCRIPT_NAME']}" -#Common TOP - @@offset=0 - @@canned_search_url=@base - @color_heading='#DDFFAA' - @color_match='#ffff48' - class Form - def initialize(base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') - search_note='' if checked_searched !~/\S/ - the_can='' if checked_url !~/\S/ - search_field='' if checked_echo !~/\S/ - @base,@search_field,@selected_db,@result_type,@checked_sql_limit,@checked_tip,@checked_stats,@checked_searched,@checked_url,@checked_case,@checked_echo,@checked_sql,@checked_all,@checked_none,@checked_selected,@checked_default,@search_note,@the_can=base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can - @tip=if checked_tip =~/\S/ - 'text:__; fulltxt:__; keywords:__; title:__; author:__; topic_register:__; subject:__; description:__; publisher:__; editor:__; contributor:__; date:__; type:__; format:__; identifier:__; source:__; language:__; relation:__; coverage:__; rights:__; comment:__; abstract:__; filename:__;
' - else '' - end - end - def submission_form - search_form=<<-WOK - - - - - <meta charset="utf-8"> - <meta name="sourcefile" content="SiSU._sst" /> - SiSU search form (sample): SiSU information Structuring Universe - - - - - - - - - -
- - -
-
- SiSU - -
- git - -
-
- -
-
- - -
- - - #{@tip} - #{@search_note} - #{@the_can} -
- - - - - to search: select which database to search (drop-down menu below); enter your search query (in the form above); and click on the search button (below) -
- - - index - text / grep - case sensitive -
- match limit: - 1,000 - 2,500 -
- echo query - result stats - search url - searched - available fields - sql statement -
- checks: - default - selected - all - none -
- - -
- WOK - end - end - class SearchRequest #% search_for - attr_accessor :text1,:fulltext,:keywords,:title,:author,:topic_register,:subject,:description,:publisher,:editor,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:filename - def initialize(search_field='',q='') - @search_field,@q=search_field,q - @text1=@fulltext=@keywords=@title=@author=@topic_register=@subject=@description=@publisher=@editor=@contributor=@date=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@comment=@abstract=@owner=@date_created=@date_issued=@date_modified=@date_available=@date_valid=@filename='' - if @search_field=~/\S/ - @text1=text_to_match('text:') - @fulltext=text_to_match('fulltxt:') - @topic_register=text_to_match('topic_register:') - @title=text_to_match('title:') # DublinCore 1 - title - @author=text_to_match('(?:author|creator)s?:') # DublinCore 2 - creator/author - @subject=text_to_match('subj(?:ect)?:') # DublinCore 3 - subject - @description=text_to_match('description:') # DublinCore 4 - description - @publisher=text_to_match('pub(?:lisher)?:') # DublinCore 5 - publisher - @editor=text_to_match('editor:') - @contributor=text_to_match('contributor:') # DublinCore 6 - contributor - @date=text_to_match('date:') # DublinCore 7 - date dd-mm-yy - @type=text_to_match('type:') # DublinCore 8 - type - @format=text_to_match('format:') # DublinCore 9 - format - @identifier=text_to_match('identifier:') # DublinCore 10 - identifier - @source=text_to_match('source:') # DublinCore 11 - source - @language=text_to_match('language:') # DublinCore 12 - language - @relation=text_to_match('relation:') # DublinCore 13 - relation - @coverage=text_to_match('coverage:') # DublinCore 14 - coverage - @rights=text_to_match('rights:') # DublinCore 15 - rights - @keywords=text_to_match('key(?:words?)?:') - @comment=text_to_match('comment:') - @abstract=text_to_match('abs(?:tract)?:') - @owner=text_to_match('owner:') - @date_created=text_to_match('date_created:') - @date_issued=text_to_match('date_issued:') - @date_modified=text_to_match('date_modified:') - @date_available=text_to_match('date_available:') - @date_valid=text_to_match('date_valid:') - @filename=text_to_match('filename:') - @text1=text_to_match unless @keywords or @author or @title or @text1 or @fulltext or @comment or @abstract or @rights or @subject or @publisher or @date or @filename or @topic_register - else - @text1=q['s1'] if q['s1']=~/\S/ - @fulltext=q['ft'] if q['ft']=~/\S/ - @keywords=q['key'] if q['key']=~/\S/ - @title=q['ti'] if q['ti']=~/\S/ - @author=q['au'] if q['au']=~/\S/ - @topic_register=q['tr'] if q['tr']=~/\S/ - @subject=q['sj'] if q['sj']=~/\S/ - @description=q['dsc'] if q['dsc']=~/\S/ - @publisher=q['pb'] if q['pb']=~/\S/ - @editor=q['cntr'] if q['cntr']=~/\S/ - @contributor=q['cntr'] if q['cntr']=~/\S/ - @date=q['dt'] if q['dt']=~/\S/ - @type=q['ty'] if q['ty']=~/\S/ - @identifier=q['id'] if q['id']=~/\S/ - @source=q['src'] if q['src']=~/\S/ - @language=q['lang'] if q['lang']=~/\S/ - @relation=q['rel'] if q['rel']=~/\S/ - @coverage=q['cov'] if q['cov']=~/\S/ - @rights=q['cr'] if q['cr']=~/\S/ - @comment=q['co'] if q['co']=~/\S/ - @abstract=q['ab'] if q['ab']=~/\S/ - @date_created=q['dtc'] if q['dtc']=~/\S/ - @date_issued=q['dti'] if q['dti']=~/\S/ - @date_modified=q['dtm'] if q['dtm']=~/\S/ - @date_available=q['dta'] if q['dta']=~/\S/ - @date_valid=q['dtv'] if q['dtv']=~/\S/ - @filename=if q['doc'] and q['search'] !~/search db/ then q['doc'] - elsif q['fns']=~/\S/ then q['fns'] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - end - end - def text_to_match(identifier='') - m={ - string: /#{identifier}\s*(.+?)/, - string: /#{identifier}\s*(.+?)(?:;|\n|\r|$)/, - word: /#{identifier}[\s(]*(\S+)/ - } - search_string=if @search_field =~m[:word] - search_string=if @search_field =~m[:braces] then m[:braces].match(@search_field)[1] - elsif @search_field =~m[:string] then m[:string].match(@search_field)[1] - else - str=m[:word].match(@search_field)[1] - str=str.gsub(/[()]/,'') - str - end - search_string=search_string.strip.gsub(/\s+/,'+') - #else - # "__" - end - end - end - class DBI_SearchString - def initialize(l,t,q,cse=false) - @l,@t,@q,@c=l,t,q,cse - end - def string - search={ search: [], flag: false } - if @t =~/\S+/ or @q =~/\S+/ - if @t =~/\S+/ then unescaped_search=CGI.unescape(@t) - elsif @q =~/\S+/ then unescaped_search=CGI.unescape(@q) - end - search_construct=[] - unescaped_search=if @c - unescaped_search.gsub(/\s*(AND|OR)\s*/,"' \) \\1 #{@l}~\( '"). - gsub(/(.+)/,"#{@l}~\( '\\1' \)") - else - unescaped_search.gsub(/\s*(AND|OR)\s*/,"' \) \\1 #{@l}~*\( '"). - gsub(/(.+)/,"#{@l}~*\( '\\1' \)") - end - search_construct << unescaped_search - search_construct=search_construct.join(' ') - search[:search] << search_construct - search[:flag]=true - search - end - search - end - end - class DBI_SearchStatement - attr_reader :text_search_flag,:sql_select_body_format,:sql_offset,:sql_limit - def initialize(conn,search_for,q,c) - @conn=conn - @text_search_flag=false - @sql_statement={ body: '', endnotes: '', range: '' } - #@offset||=@@offset - #@offset+=@@limit - search={ text: [], endnotes: [] } - cse=(c =~/\S/) ? true : false - st=DBI_SearchString.new('doc_objects.clean',search_for.text1,q['s1'],cse).string - se=DBI_SearchString.new('endnotes.clean',search_for.text1,q['s1'],cse).string - @text_search_flag=st[:flag] - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.fulltext',search_for.fulltext,q['ft'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.title',search_for.title,q['ti'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_author',search_for.author,q['au'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_topic_register',search_for.topic_register,q['tr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_subject',search_for.subject,q['sj'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_keywords',search_for.keywords,q['key'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_description',search_for.description,q['dsc'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.publisher',search_for.publisher,q['pb'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_editor',search_for.editor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_contributor',search_for.contributor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.date_published',search_for.date,q['dt'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_type',search_for.type,q['ty'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.original_source',search_for.source,q['src'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.language_document_char',search_for.language,q['lang'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_relation',search_for.relation,q['rel'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_coverage',search_for.coverage,q['cov'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.rights_all',search_for.rights,q['cr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_comment',search_for.comment,q['co'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_abstract',search_for.abstract,q['ab'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.src_filename',search_for.filename,q['fns'],cse).string - if st[:flag] - search[:text] << st[:search] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - @search_text='' - @search_text=search[:text].flatten.join(' AND ') - @search_text=@search_text.gsub(/(doc_objects\.clean~[*]?\(\s*'[^']+'\s*\)\s+(?:(?:AND|OR)\s+doc_objects\.clean~[*]?\(\s*'[^']+'\s*\))+)/,'(\1)') - end - def sql_offset - @@offset - end - def sql_match_limit - @@limit - end - def sql_canned_search - @offset_next=sql_offset.to_i + sql_match_limit.to_i - @offset_previous=sql_offset.to_i - sql_match_limit.to_i - def current - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + sql_offset.to_s - end - def next - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_next.to_s - end - def previous - @offset_previous >= 0 \ - ? (@@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_previous.to_s) - : '' - end - def start - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + 0.to_s - end - self - end - def pre_next(beyond_limit,img) - can=sql_canned_search - page=(sql_offset.to_i + sql_match_limit.to_i)/sql_match_limit.to_i - if beyond_limit - if page.to_s =~ /^1$/ - %{
- pg. #{page.to_s} - -  >> - -
} - elsif page.to_s =~ /^2$/ - %{
- - <<  - - pg. #{page.to_s} - -  >> - -
} - else - %{
- - |<  - - - <<  - - pg. #{page.to_s} - -  >> - -
} - end - else - if page.to_s =~ /^1$/ then '' - elsif page.to_s =~ /^2$/ - %{
- - <<  - - pg. #{page.to_s} -
} - else - %{
- - |<  - - - <<  - - pg. #{page.to_s} -
} - end - end - end - def sql_select_body - limit ||=@@limit - offset ||=@@offset - @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, metadata_and_text.language_document_char, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE (#{@search_text}) AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.language_document_char, metadata_and_text.title, metadata_and_text.src_filename, doc_objects.ocn} - @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} - select=@sql_statement[:body] + ' ' + @sql_statement[:range] - select - end - def sql_select_body_format - %{#{sql_select_body}} - end - def contents - @conn.exec(sql_select_body) - end - end - def tail - <<-'WOK' -


- - - - - - -
- - -
-
- SiSU - -
- git - -
-
- -

- Generated by - SiSU 7.1.2 2015-05-18 (2015w20/1) -
- - SiSU © Ralph Amissah - 1993, current 2015. - All Rights Reserved. -
- SiSU is software for document structuring, publishing and search, -
- - www.jus.uio.no/sisu - - and - - www.sisudoc.org - - sources - - git.sisudoc.org - -
- w3 since October 3 1993 - - ralph@amissah.com - -
- mailing list subscription - - http://lists.sisudoc.org/listinfo/sisu - -
- - sisu@lists.sisudoc.org - -

-
-

- SiSU using: -
Standard SiSU markup syntax, -
Standard SiSU meta-markup syntax, and the -
Standard SiSU object citation numbering and system, (object/text identifying/locating system) -
- © Ralph Amissah 1997, current 2015. - All Rights Reserved. -

-
-

- - .: - -

-
-

- SiSU is released under - GPL v3 - or later, - - http://www.gnu.org/licenses/gpl.html - -

-
-

- SiSU, developed using - - Ruby - - on - - Debian/Gnu/Linux - - software infrastructure, - with the usual GPL (or OSS) suspects. -

-
- - - WOK - end - @tail=tail - @counter_txt_doc,@counter_txt_ocn,@counter_endn_doc,@counter_endn_ocn=0,0,0,0 - @counters_txt,@counters_endn,@sql_select_body='','','' - FCGI.each_cgi do |cgi| - begin # all code goes in begin section - @search={ text: [], endnotes: [] } - q=CGI.new - @db=if cgi['db'] =~ /#{@db_name_prefix}(\S+)/ - @stub=$1 - cgi['db'] - else - @stub=@stub_default - @db_name_prefix + @stub - end - checked_url,checked_stats,checked_searched,checked_tip,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,selected_db='','','','','','','','','' - result_type=(cgi['view']=~/text/) \ - ? result_type={ index: '', text: 'checked'} - : result_type={ index: 'checked', text: ''} - @@limit=if cgi['sql_match_limit'].to_s=~/2500/ - checked_sql_limit={ l1000: '', l2500: 'checked'} - '2500' - else - checked_sql_limit={ l1000: 'checked', l2500: ''} - '1000' - end - checked_echo='checked' if cgi['echo'] =~/\S/ - checked_stats='checked' if cgi['stats'] =~/\S/ - checked_url='checked' if cgi['url'] =~/\S/ or cgi['u'].to_i==1 - checked_searched='checked' if cgi['searched'] =~/\S/ - checked_tip='checked' if cgi['tip'] =~/\S/ - checked_case='checked' if cgi['casesense'] =~/\S/ - checked_sql='checked' if cgi['sql'] =~/\S/ - if cgi['checks'] =~/check_all/ or cgi['check_all'] =~/\S/ or cgi['a'].to_i==1 - checked_all='checked' - checked_echo=checked_stats=checked_url=checked_searched=checked_tip=checked_sql='checked' - checked_none='' - elsif cgi['checks'] =~/check_none/ - checked_none='checked' - checked_all=checked_url=checked_stats=checked_searched=checked_tip=checked_echo=checked_sql='' - elsif cgi['checks'] =~/check_selected/ - checked_selected='checked' - elsif cgi['checks'] =~/check_default/ - checked_default='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - else - checked_selected='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - end - selected_db=%{} - @conn=PG::Connection.open(dbname: @db, port: @port, user: @user) - search_field=cgi['find'] if cgi['find'] # =~/\S+/ - @search_for=SearchRequest.new(search_field,q) #.analyze #% search_for - #% searches - #Canned_search.new(@base,@search_for.text1,cgi) - if @search_for.text1=~/\S+/ or @search_for.fulltext=~/\S+/ or @search_for.author=~/\S+/ or @search_for.topic_register=~/\S+/ #and search_field =~/\S/ - s1='s1=' + CGI.escape(@search_for.text1) if @search_for.text1=~/\S/ - ft='&ft=' + CGI.escape(@search_for.fulltext) if @search_for.fulltext=~/\S/ - key='key=' + CGI.escape(@search_for.keywords) if @search_for.keywords=~/\S/ - ti='&ti=' + CGI.escape(@search_for.title) if @search_for.title=~/\S/ - au='&au=' + CGI.escape(@search_for.author) if @search_for.author=~/\S/ - tr='&tr=' + CGI.escape(@search_for.topic_register) if @search_for.topic_register=~/\S/ - sj='&sj=' + CGI.escape(@search_for.subject) if @search_for.subject=~/\S/ - dsc='&dsc=' + CGI.escape(@search_for.description) if @search_for.description=~/\S/ - pb='&pb=' + CGI.escape(@search_for.publisher) if @search_for.publisher=~/\S/ - edt='&edt=' + CGI.escape(@search_for.editor) if @search_for.editor=~/\S/ - cntr='&cntr=' + CGI.escape(@search_for.contributor) if @search_for.contributor=~/\S/ - dt='&dt=' + CGI.escape(@search_for.date) if @search_for.date=~/\S/ - ty='&ty=' + CGI.escape(@search_for.type) if @search_for.type=~/\S/ - id='&id=' + CGI.escape(@search_for.identifier) if @search_for.identifier=~/\S/ - src='&src=' + CGI.escape(@search_for.source) if @search_for.source=~/\S/ - lang='&lang=' + CGI.escape(@search_for.language) if @search_for.language=~/\S/ - rel='&rel=' + CGI.escape(@search_for.relation) if @search_for.relation=~/\S/ - cov='&cov=' + CGI.escape(@search_for.coverage) if @search_for.coverage=~/\S/ - cr='&cr=' + CGI.escape(@search_for.rights) if @search_for.rights=~/\S/ - co='&co=' + CGI.escape(@search_for.comment) if @search_for.comment=~/\S/ - ab='&ab=' + CGI.escape(@search_for.abstract) if @search_for.abstract=~/\S/ - dtc='&dtc=' + CGI.escape(@search_for.date_created) if @search_for.date_created=~/\S/ - dti='&dti=' + CGI.escape(@search_for.date_issued) if @search_for.date_issued=~/\S/ - dtm='&dtm=' + CGI.escape(@search_for.date_modified) if @search_for.date_modified=~/\S/ - dta='&dta=' + CGI.escape(@search_for.date_available) if @search_for.date_available=~/\S/ - dtv='&dtv=' + CGI.escape(@search_for.date_valid) if @search_for.date_valid=~/\S/ - fns='&fns=' + CGI.escape(@search_for.filename) if @search_for.filename=~/\S/ - @@canned_search_url=(checked_all =~/checked/) \ - ? "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" - : "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" - mod=ft=~/\S+/ ? (ft.gsub(/ft/,'s1')) : s1 - @canned_base_url="#{@base}?#{mod}&db=#{cgi['db']}" - if checked_case=~/\S/ - @search[:text][1]=%{doc_objects.clean~'#{@search_for.text1}'} #s1 - else - @search[:text][1]=%{doc_objects.clean~*'#{@search_for.text1}'} #s1 - end - canned_note='search url:' - else - @@canned_search_url="#{@base}?db=#{@db}&view=index" - canned_note='search url example:' - end - if search_field =~/\S+/ - analyze_format=search_field.gsub(/\s*\n/,'; ') - elsif checked_all =~/checked/ or checked_url =~/checked/ - canned_search=@@canned_search_url.scan(/(?:s1|ft|au|ti|fns|tr)=[^&]+/) - af=canned_search.join('; ') - af=af.gsub(/s1=/,'text: '). - gsub(/ft=/,'fulltxt: '). - gsub(/au=/,'author: '). - gsub(/ti=/,'title: '). - gsub(/fns=/,'filename: '). - gsub(/tr=/,'topic_register: '). - gsub(/%2B/,' ') - analyze_format=af - st=af.split(/\s*;\s*/) - search_field=st.join("\n") - end - green=%{} - canned_search_url_txt=CGI.escapeHTML(@@canned_search_url) - the_can=%{#{canned_note} #{canned_search_url_txt}
} - p_text=p_fulltext=p_keywords=p_title=p_author=p_topic_register=p_subject=p_description=p_publisher=p_editor=p_contributor=p_date=p_type=p_format=p_identifier=p_source=p_language=p_relation=p_coverage=p_rights=p_comment=p_abstract=p_filename='' - p_filename=%{filename: #{green}#{@search_for.filename}

} if @search_for.filename =~/\S+/ - p_text=%{text: #{green}#{@search_for.text1}

} if @search_for.text1 =~/\S+/ - p_fulltext=%{fulltxt: #{green}#{@search_for.fulltext}
} if @search_for.fulltext =~/\S+/ - p_title=%{title: #{green}#{@search_for.title}
} if @search_for.title =~/\S+/ - p_author=%{author: #{green}#{@search_for.author}
} if @search_for.author =~/\S+/ - p_editor=%{editor: #{green}#{@search_for.editor}
} if @search_for.editor=~/\S+/ - p_contributor=%{contributor: #{green}#{@search_for.contributor}
} if @search_for.contributor =~/\S+/ - p_date=%{date: #{green}#{@search_for.date}
} if @search_for.date =~/\S+/ - p_rights=%{rights: #{green}#{@search_for.rights}
} if @search_for.rights =~/\S+/ - p_topic_register=%{topic_register: #{green}#{@search_for.topic_register}
} if @search_for.topic_register =~/\S+/ - p_subject=%{subject: #{green}#{@search_for.subject}
} if @search_for.subject =~/\S+/ - p_keywords=%{keywords: #{green}#{@search_for.keywords}
} if @search_for.keywords =~/\S+/ - p_identifier=%{identifier: #{green}#{@search_for.identifier}
} if @search_for.identifier =~/\S+/ - p_type=%{type: #{green}#{@search_for.type}
} if @search_for.type =~/\S+/ - p_format=%{format: #{green}#{@search_for.format}
} if @search_for.format =~/\S+/ - p_relation=%{relation: #{green}#{@search_for.relation}
} if @search_for.relation =~/\S+/ - p_coverage=%{coverage: #{green}#{@search_for.coverage}
} if @search_for.coverage =~/\S+/ - p_description=%{description: #{green}#{@search_for.description}
} if @search_for.description =~/\S+/ - p_abstract=%{abstract: #{green}#{@search_for.abstract}
} if @search_for.abstract =~/\S+/ - p_comment=%{comment: #{green}#{@search_for.comment}
} if @search_for.comment =~/\S+/ - p_publisher=%{publisher: #{green}#{@search_for.publisher}
} if @search_for.publisher =~/\S+/ - p_source=%{source: #{green}#{@search_for.source}
} if @search_for.source =~/\S+/ - p_language=%{language: #{green}#{@search_for.language}
} if @search_for.language =~/\S+/ - search_note=<<-WOK - - database: #{green}#{@db}; selected view: #{green}#{cgi['view']} - search string: "#{green}#{analyze_format}"
- #{p_text} #{p_fulltext} #{p_keywords} #{p_title} #{p_author} #{p_topic_register} #{p_subject} #{p_description} #{p_publisher} #{p_editor} #{p_contributor} #{p_date} #{p_type} #{p_format} #{p_identifier} #{p_source} #{p_language} #{p_relation} #{p_coverage} #{p_rights} #{p_comment} #{p_abstract} #{p_filename} - - WOK - #eg = %{canned search e.g.:
#{url}
find: #{analyze}
database: #{database}} - #% dbi_canning - @header=Form.new(@base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form - unless q['s1'] =~/\S/ or q['au'] =~/\S/ or @search[:text][1] =~/\S/ - print "Content-type: text/html\n\n" - puts (@header+@tail) - else #% searches - s1=(@search_for.text1 =~/\S/) \ - ? @search_for.text1 - : 'Unavailable' - if checked_case=~/\S/ - @search[:text]<<%{doc_objects.clean~'#{CGI.unescape(s1)}'} - else - @search[:text]<<%{doc_objects.clean~*'#{CGI.unescape(s1)}'} - end - #% dbi_request - dbi_statement=DBI_SearchStatement.new(@conn,@search_for,q,checked_case) - @text_search_flag=false - @text_search_flag=dbi_statement.text_search_flag - s_contents=dbi_statement.contents - @body_main='' - @search_regx=nil - oldtid=0 - if @text_search_flag - if checked_sql =~/\S/ - sql_select_body=dbi_statement.sql_select_body_format - else sql_select_body='' - end - @body_main << sql_select_body - else - end - @hostpath="#{@hosturl_files}/#{@stub}" - def path_manifest(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{@hostpath}/#{fn}/sisu_manifest.html" - : "#{@hostpath}/#{fn}/sisu_manifest.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/manifest/#{fn}.html" - : "#{@hostpath}/manifest/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/manifest/#{fn}.html" - end - end - def path_html_seg(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - "#{@hostpath}/#{fn}" - when 'filetype' - "#{@hostpath}/html/#{fn}" - else - "#{@hostpath}/#{ln}/html/#{fn}" - end - end - def path_toc(fn,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/toc.html" - : "#{path_html_seg(fn,ln)}/toc.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/toc.html" - end - end - def path_filename(fn,seg,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/#{seg}.html" - : "#{path_html_seg(fn,ln)}/#{seg}.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/#{seg}.html" - end - end - def path_html_doc(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/scroll.html" - : "#{path_html_seg(fn,ln)}/scroll.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/html/#{fn}.html" - : "#{@hostpath}/html/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/html/#{fn}.html" - end - end - #% text_objects_body - s_contents.each do |c| #% text body - location=c['src_filename'][/(.+?)\.(?:ssm\.sst|sst)$/,1] - file_suffix=c['src_filename'][/.+?\.(ssm\.sst|sst)$/,1] - lang=if location =~ /\S+?~(\S\S\S?)$/ - l=location[/\S+?~(\S\S\S?)$/,1] - location=location.gsub(/(\S+?)~\S\S\S?/,'\1') - l=".#{l}" - else '' - end - #% metadata_found_body - if c['tid'].to_i != oldtid.to_i - ti=c['title'] - can_txt_srch=(cgi['view']=~/index/) \ - ? %{search } - : %{search } - title=%{toc html #{ti} [#{c['language_document_char']}] by #{c['creator_author']} manifest #{can_txt_srch}
} if file_suffix=~/s/ #hmm watch file_suffix - title=@text_search_flag \ - ? '

'+title - : '
'+title - @counter_txt_doc+=1 - oldtid=c['tid'].to_i - else title='' - end - if @text_search_flag - if cgi['view']=~/text/ \ - or (cgi['view']!~/index/ and cgi['search'] !~/search db/) #% txt body - text=if c['suffix'] !~/1/ #seg - if @search_for.text1 =~/\S+/ \ - or q['s1'] =~/\S+/ #% only this branch is working !! - unescaped_search=if @search_for.text1 =~/\S+/ - CGI.unescape(@search_for.text1) - elsif q['s1'] =~/\S+/ - CGI.unescape(q['s1']) - else nil - end - @search_regx=if unescaped_search #check - search_regex=unescaped_search.scan(/\S+/).each.map do |g| - (g.to_s =~/(AND|OR)/) \ - ? ('|') - : (%{#{g.to_s}}) - end.join(' ') - search_regex=search_regex.gsub(/\s*\|\s*/,'|') - Regexp.new(search_regex, Regexp::IGNORECASE) - else nil - end - else nil - end - matched_para=(@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) \ - ? (c['body'].gsub(/(<\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{\\1})) - : c['body'] - %{

ocn #{c['ocn']}:

#{matched_para}} - elsif c['suffix'] =~/1/ #doc - %{#{title}

ocn #{c['ocn']}:#{c['body']}} - end - @counter_txt_ocn+=1 - output=title+text - else #elsif cgi['view']=~/index/ #% idx body - if c['suffix'] !~/1/ #seg - index=%{#{c['ocn']}, } if @text_search_flag - elsif c['suffix'] =~/1/ #doc #FIX - index=%{#{c['ocn']}, } - end - if c['seg'] =~/\S+/ - if @text_search_flag - @counter_txt_ocn+=1 - output=title+index - end - else - @counter_txt_ocn+=1 - output=c['suffix'] !~/1/ \ - ? title+index - : %{#{title}#{c['ocn'].sort}, } - end - end - else output=title - end - @counters_txt=if @counter_txt_doc > 0 - if checked_stats =~/\S/ - @@lt_t=(@counter_txt_ocn==dbi_statement.sql_match_limit.to_i) ? true : false - start=(@@offset.to_i+1).to_s - range=(@@offset.to_i+@counter_txt_ocn.to_i).to_s - %{


Found #{@counter_txt_ocn} times in the main body of #{@counter_txt_doc} documents [ matches #{start} to #{range} ]
} - else '' - end - else '' - end - @body_main << output #+ details - end - oldtid = 0 - offset=dbi_statement.sql_offset.to_s - limit=dbi_statement.sql_match_limit.to_s - @@lt_t ||=false; @@lt_e ||=false - canned=(@@lt_t or @@lt_e) \ - ? dbi_statement.pre_next(true,@image_src).to_s - : dbi_statement.pre_next(false,@image_src).to_s - limit=dbi_statement.sql_match_limit.to_s - cgi.out{@header.force_encoding("UTF-8") + @counters_txt.force_encoding("UTF-8") + @counters_endn.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @body_main.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @tail.force_encoding("UTF-8")} #% print cgi_output_header+counters+body - end - rescue Exception => e - s='
' + CGI::escapeHTML(e.backtrace.reverse.join("\n"))
-          s << CGI::escapeHTML(e.message) + '
' - cgi.out{s} - next - ensure # eg. disconnect from server - @conn.disconnect if @conn - end - end diff --git a/util/rb/cgi/sisu_search_sqlite.cgi b/util/rb/cgi/sisu_search_sqlite.cgi deleted file mode 100755 index 0fe8ea5..0000000 --- a/util/rb/cgi/sisu_search_sqlite.cgi +++ /dev/null @@ -1,937 +0,0 @@ -#!/usr/bin/env ruby -=begin - * Name: SiSU information Structuring Universe - * Author: Ralph Amissah - * http://www.jus.uio.no/sisu - * http://www.jus.uio.no/sisu/SiSU/download - - * Description: generates naive cgi search form for search of sisu database (sqlite) - * Name: SiSU generated sample cgi search form - - * Description: generated sample cgi search form for SiSU - (SiSU is a framework for document structuring, publishing and search) - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2014, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Ralph Amissah - - - -=end - begin - require 'cgi' - require 'fcgi' - require 'sqlite3' - rescue LoadError - puts 'cgi, fcgi or sqlite3 NOT FOUND (LoadError)' - end - @stub_default='manual' - @image_src="http://#{ENV['HTTP_HOST']}/_sisu/image_sys" - @hosturl_cgi="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}" - @hosturl_files="http://#{ENV['HTTP_HOST']}" - @output_dir_structure_by='filetype' - @lingual='multi' - @db_name_prefix='SiSU.7a.' - @base="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}#{ENV['SCRIPT_NAME']}" -#Common TOP - @@offset=0 - @@canned_search_url=@base - @color_heading='#DDFFAA' - @color_match='#ffff48' - class Form - def initialize(base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') - search_note='' if checked_searched !~/\S/ - the_can='' if checked_url !~/\S/ - search_field='' if checked_echo !~/\S/ - @base,@search_field,@selected_db,@result_type,@checked_sql_limit,@checked_tip,@checked_stats,@checked_searched,@checked_url,@checked_case,@checked_echo,@checked_sql,@checked_all,@checked_none,@checked_selected,@checked_default,@search_note,@the_can=base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can - @tip=if checked_tip =~/\S/ - 'text:__; fulltxt:__; keywords:__; title:__; author:__; topic_register:__; subject:__; description:__; publisher:__; editor:__; contributor:__; date:__; type:__; format:__; identifier:__; source:__; language:__; relation:__; coverage:__; rights:__; comment:__; abstract:__; filename:__;
' - else '' - end - end - def submission_form - search_form=<<-WOK - - - - - <meta charset="utf-8"> - <meta name="sourcefile" content="SiSU._sst" /> - SiSU search form (sample): SiSU information Structuring Universe - - - - - - - - - -
- - -
-
- SiSU - -
- git - -
-
- -
-
- - -
- - - #{@tip} - #{@search_note} - #{@the_can} -
- - - - - to search: select which database to search (drop-down menu below); enter your search query (in the form above); and click on the search button (below) -
- - - index - text / grep -
- match limit: - 1,000 - 2,500 -
- echo query - result stats - search url - searched - available fields - sql statement -
- checks: - default - selected - all - none -
- - -
- WOK - end - end - class SearchRequest #% search_for - attr_accessor :text1,:fulltext,:keywords,:title,:author,:topic_register,:subject,:description,:publisher,:editor,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:filename - def initialize(search_field='',q='') - @search_field,@q=search_field,q - @text1=@fulltext=@keywords=@title=@author=@topic_register=@subject=@description=@publisher=@editor=@contributor=@date=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@comment=@abstract=@owner=@date_created=@date_issued=@date_modified=@date_available=@date_valid=@filename='' - if @search_field=~/\S/ - @text1=text_to_match('text:') - @fulltext=text_to_match('fulltxt:') - @topic_register=text_to_match('topic_register:') - @title=text_to_match('title:') # DublinCore 1 - title - @author=text_to_match('(?:author|creator)s?:') # DublinCore 2 - creator/author - @subject=text_to_match('subj(?:ect)?:') # DublinCore 3 - subject - @description=text_to_match('description:') # DublinCore 4 - description - @publisher=text_to_match('pub(?:lisher)?:') # DublinCore 5 - publisher - @editor=text_to_match('editor:') - @contributor=text_to_match('contributor:') # DublinCore 6 - contributor - @date=text_to_match('date:') # DublinCore 7 - date dd-mm-yy - @type=text_to_match('type:') # DublinCore 8 - type - @format=text_to_match('format:') # DublinCore 9 - format - @identifier=text_to_match('identifier:') # DublinCore 10 - identifier - @source=text_to_match('source:') # DublinCore 11 - source - @language=text_to_match('language:') # DublinCore 12 - language - @relation=text_to_match('relation:') # DublinCore 13 - relation - @coverage=text_to_match('coverage:') # DublinCore 14 - coverage - @rights=text_to_match('rights:') # DublinCore 15 - rights - @keywords=text_to_match('key(?:words?)?:') - @comment=text_to_match('comment:') - @abstract=text_to_match('abs(?:tract)?:') - @owner=text_to_match('owner:') - @date_created=text_to_match('date_created:') - @date_issued=text_to_match('date_issued:') - @date_modified=text_to_match('date_modified:') - @date_available=text_to_match('date_available:') - @date_valid=text_to_match('date_valid:') - @filename=text_to_match('filename:') - @text1=text_to_match unless @keywords or @author or @title or @text1 or @fulltext or @comment or @abstract or @rights or @subject or @publisher or @date or @filename or @topic_register - else - @text1=q['s1'] if q['s1']=~/\S/ - @fulltext=q['ft'] if q['ft']=~/\S/ - @keywords=q['key'] if q['key']=~/\S/ - @title=q['ti'] if q['ti']=~/\S/ - @author=q['au'] if q['au']=~/\S/ - @topic_register=q['tr'] if q['tr']=~/\S/ - @subject=q['sj'] if q['sj']=~/\S/ - @description=q['dsc'] if q['dsc']=~/\S/ - @publisher=q['pb'] if q['pb']=~/\S/ - @editor=q['cntr'] if q['cntr']=~/\S/ - @contributor=q['cntr'] if q['cntr']=~/\S/ - @date=q['dt'] if q['dt']=~/\S/ - @type=q['ty'] if q['ty']=~/\S/ - @identifier=q['id'] if q['id']=~/\S/ - @source=q['src'] if q['src']=~/\S/ - @language=q['lang'] if q['lang']=~/\S/ - @relation=q['rel'] if q['rel']=~/\S/ - @coverage=q['cov'] if q['cov']=~/\S/ - @rights=q['cr'] if q['cr']=~/\S/ - @comment=q['co'] if q['co']=~/\S/ - @abstract=q['ab'] if q['ab']=~/\S/ - @date_created=q['dtc'] if q['dtc']=~/\S/ - @date_issued=q['dti'] if q['dti']=~/\S/ - @date_modified=q['dtm'] if q['dtm']=~/\S/ - @date_available=q['dta'] if q['dta']=~/\S/ - @date_valid=q['dtv'] if q['dtv']=~/\S/ - @filename=if q['doc'] and q['search'] !~/search db/ then q['doc'] - elsif q['fns']=~/\S/ then q['fns'] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - end - end - def text_to_match(identifier='') - m={ - string: /#{identifier}\s*(.+?)/, - string: /#{identifier}\s*(.+?)(?:;|\n|\r|$)/, - word: /#{identifier}[\s(]*(\S+)/ - } - search_string=if @search_field =~m[:word] - search_string=if @search_field =~m[:braces] then m[:braces].match(@search_field)[1] - elsif @search_field =~m[:string] then m[:string].match(@search_field)[1] - else - str=m[:word].match(@search_field)[1] - str=str.gsub(/[()]/,'') - str - end - search_string=search_string.strip.gsub(/\s+/,'+') - #else - # "__" - end - end - end - class DBI_SearchString - def initialize(l,t,q,cse=false) - @l,@t,@q=l,t,q - end - def string - search={ search: [], flag: false } - if @t =~/\S+/ or @q =~/\S+/ - if @t =~/\S+/ then unescaped_search=CGI.unescape(@t) - elsif @q =~/\S+/ then unescaped_search=CGI.unescape(@q) - end - search_construct=[] - unescaped_search=unescaped_search.gsub(/\s*(AND|OR)\s*/,"%' \) \\1 #{@l} LIKE \( '%"). - gsub(/(.+)/,"#{@l} LIKE \( '%\\1%' \)") - search_construct << unescaped_search - search_construct=search_construct.join(' ') - search[:search] << search_construct - search[:flag]=true - search - end - search - end - end - class DBI_SearchStatement - attr_reader :text_search_flag,:sql_select_body_format,:sql_offset,:sql_limit - def initialize(conn,search_for,q,c) - @conn=conn - @text_search_flag=false - @sql_statement={ body: '', endnotes: '', range: '' } - #@offset||=@@offset - #@offset+=@@limit - search={ text: [], endnotes: [] } - cse=(c =~/\S/) ? true : false - st=DBI_SearchString.new('doc_objects.clean',search_for.text1,q['s1'],cse).string - se=DBI_SearchString.new('endnotes.clean',search_for.text1,q['s1'],cse).string - @text_search_flag=st[:flag] - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.fulltext',search_for.fulltext,q['ft'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.title',search_for.title,q['ti'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_author',search_for.author,q['au'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_topic_register',search_for.topic_register,q['tr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_subject',search_for.subject,q['sj'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_keywords',search_for.keywords,q['key'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_description',search_for.description,q['dsc'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.publisher',search_for.publisher,q['pb'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_editor',search_for.editor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_contributor',search_for.contributor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.date_published',search_for.date,q['dt'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_type',search_for.type,q['ty'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.original_source',search_for.source,q['src'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.language_document_char',search_for.language,q['lang'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_relation',search_for.relation,q['rel'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_coverage',search_for.coverage,q['cov'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.rights_all',search_for.rights,q['cr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_comment',search_for.comment,q['co'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_abstract',search_for.abstract,q['ab'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.src_filename',search_for.filename,q['fns'],cse).string - if st[:flag] - search[:text] << st[:search] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - @search_text='' - @search_text=search[:text].flatten.join(' AND ') - @search_text=@search_text.gsub(/(doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\)\s+(?:(?:AND|OR)\s+doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\))+)/,'(\1)') - end - def sql_offset - @@offset - end - def sql_match_limit - @@limit - end - def sql_canned_search - @offset_next=sql_offset.to_i + sql_match_limit.to_i - @offset_previous=sql_offset.to_i - sql_match_limit.to_i - def current - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + sql_offset.to_s - end - def next - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_next.to_s - end - def previous - @offset_previous >= 0 \ - ? (@@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_previous.to_s) - : '' - end - def start - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + 0.to_s - end - self - end - def pre_next(beyond_limit,img) - can=sql_canned_search - page=(sql_offset.to_i + sql_match_limit.to_i)/sql_match_limit.to_i - if beyond_limit - if page.to_s =~ /^1$/ - %{
- pg. #{page.to_s} - -  >> - -
} - elsif page.to_s =~ /^2$/ - %{
- - <<  - - pg. #{page.to_s} - -  >> - -
} - else - %{
- - |<  - - - <<  - - pg. #{page.to_s} - -  >> - -
} - end - else - if page.to_s =~ /^1$/ then '' - elsif page.to_s =~ /^2$/ - %{
- - <<  - - pg. #{page.to_s} -
} - else - %{
- - |<  - - - <<  - - pg. #{page.to_s} -
} - end - end - end - def sql_select_body - limit ||=@@limit - offset ||=@@offset - @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, metadata_and_text.language_document_char, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE #{@search_text} AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.language_document_char, metadata_and_text.title, metadata_and_text.src_filename, doc_objects.ocn} - @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} - select=@sql_statement[:body] + ' ' + @sql_statement[:range] - select - end - def sql_select_body_format - %{#{sql_select_body}} - end - def contents - @conn.execute(sql_select_body) - end - end - def tail - <<-'WOK' -


- - - - - - -
- - -
-
- SiSU - -
- git - -
-
- -

- Generated by - SiSU 6.3.1 2014-10-19 (2014w41/7) -
- - SiSU © Ralph Amissah - 1993, current 2014. - All Rights Reserved. -
- SiSU is software for document structuring, publishing and search, -
- - www.jus.uio.no/sisu - - and - - www.sisudoc.org - - sources - - git.sisudoc.org - -
- w3 since October 3 1993 - - ralph@amissah.com - -
- mailing list subscription - - http://lists.sisudoc.org/listinfo/sisu - -
- - sisu@lists.sisudoc.org - -

-
-

- SiSU using: -
Standard SiSU markup syntax, -
Standard SiSU meta-markup syntax, and the -
Standard SiSU object citation numbering and system, (object/text identifying/locating system) -
- © Ralph Amissah 1997, current 2014. - All Rights Reserved. -

-
-

- - .: - -

-
-

- SiSU is released under - GPL v3 - or later, - - http://www.gnu.org/licenses/gpl.html - -

-
-

- SiSU, developed using - - Ruby - - on - - Debian/Gnu/Linux - - software infrastructure, - with the usual GPL (or OSS) suspects. -

-
- - - WOK - end - @tail=tail - @counter_txt_doc,@counter_txt_ocn,@counter_endn_doc,@counter_endn_ocn=0,0,0,0 - @counters_txt,@counters_endn,@sql_select_body='','','' - FCGI.each_cgi do |cgi| - begin # all code goes in begin section - @search={ text: [], endnotes: [] } - q=CGI.new - @db=if cgi['db'] =~ /#{@db_name_prefix}(\S+)/ - @stub=$1 - cgi['db'] - else - @stub=@stub_default - @db_name_prefix + @stub - end - checked_url,checked_stats,checked_searched,checked_tip,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,selected_db='','','','','','','','','' - result_type=(cgi['view']=~/text/) \ - ? result_type={ index: '', text: 'checked'} - : result_type={ index: 'checked', text: ''} - @@limit=if cgi['sql_match_limit'].to_s=~/2500/ - checked_sql_limit={ l1000: '', l2500: 'checked'} - '2500' - else - checked_sql_limit={ l1000: 'checked', l2500: ''} - '1000' - end - checked_echo='checked' if cgi['echo'] =~/\S/ - checked_stats='checked' if cgi['stats'] =~/\S/ - checked_url='checked' if cgi['url'] =~/\S/ or cgi['u'].to_i==1 - checked_searched='checked' if cgi['searched'] =~/\S/ - checked_tip='checked' if cgi['tip'] =~/\S/ - checked_case='checked' if cgi['casesense'] =~/\S/ - checked_sql='checked' if cgi['sql'] =~/\S/ - if cgi['checks'] =~/check_all/ or cgi['check_all'] =~/\S/ or cgi['a'].to_i==1 - checked_all='checked' - checked_echo=checked_stats=checked_url=checked_searched=checked_tip=checked_sql='checked' - checked_none='' - elsif cgi['checks'] =~/check_none/ - checked_none='checked' - checked_all=checked_url=checked_stats=checked_searched=checked_tip=checked_echo=checked_sql='' - elsif cgi['checks'] =~/check_selected/ - checked_selected='checked' - elsif cgi['checks'] =~/check_default/ - checked_default='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - else - checked_selected='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - end - selected_db=case cgi['db'] - when /SiSU.7a.manual/ then '' - end - db_name='sisu_sqlite.db' - db_sqlite=case cgi['db'] - when /SiSU.7a.manual/ then "/srv/complete.sisudoc.org/web/manual/#{db_name}" - else "/srv/complete.sisudoc.org/web/manual/#{db_name}" - end - @conn=SQLite3::Database.new(db_sqlite) - @conn.results_as_hash=true - search_field=cgi['find'] if cgi['find'] # =~/\S+/ - @search_for=SearchRequest.new(search_field,q) #.analyze #% search_for - #% searches - #Canned_search.new(@base,@search_for.text1,cgi) - if @search_for.text1=~/\S+/ or @search_for.fulltext=~/\S+/ or @search_for.author=~/\S+/ or @search_for.topic_register=~/\S+/ #and search_field =~/\S/ - s1='s1=' + CGI.escape(@search_for.text1) if @search_for.text1=~/\S/ - ft='&ft=' + CGI.escape(@search_for.fulltext) if @search_for.fulltext=~/\S/ - key='key=' + CGI.escape(@search_for.keywords) if @search_for.keywords=~/\S/ - ti='&ti=' + CGI.escape(@search_for.title) if @search_for.title=~/\S/ - au='&au=' + CGI.escape(@search_for.author) if @search_for.author=~/\S/ - tr='&tr=' + CGI.escape(@search_for.topic_register) if @search_for.topic_register=~/\S/ - sj='&sj=' + CGI.escape(@search_for.subject) if @search_for.subject=~/\S/ - dsc='&dsc=' + CGI.escape(@search_for.description) if @search_for.description=~/\S/ - pb='&pb=' + CGI.escape(@search_for.publisher) if @search_for.publisher=~/\S/ - edt='&edt=' + CGI.escape(@search_for.editor) if @search_for.editor=~/\S/ - cntr='&cntr=' + CGI.escape(@search_for.contributor) if @search_for.contributor=~/\S/ - dt='&dt=' + CGI.escape(@search_for.date) if @search_for.date=~/\S/ - ty='&ty=' + CGI.escape(@search_for.type) if @search_for.type=~/\S/ - id='&id=' + CGI.escape(@search_for.identifier) if @search_for.identifier=~/\S/ - src='&src=' + CGI.escape(@search_for.source) if @search_for.source=~/\S/ - lang='&lang=' + CGI.escape(@search_for.language) if @search_for.language=~/\S/ - rel='&rel=' + CGI.escape(@search_for.relation) if @search_for.relation=~/\S/ - cov='&cov=' + CGI.escape(@search_for.coverage) if @search_for.coverage=~/\S/ - cr='&cr=' + CGI.escape(@search_for.rights) if @search_for.rights=~/\S/ - co='&co=' + CGI.escape(@search_for.comment) if @search_for.comment=~/\S/ - ab='&ab=' + CGI.escape(@search_for.abstract) if @search_for.abstract=~/\S/ - dtc='&dtc=' + CGI.escape(@search_for.date_created) if @search_for.date_created=~/\S/ - dti='&dti=' + CGI.escape(@search_for.date_issued) if @search_for.date_issued=~/\S/ - dtm='&dtm=' + CGI.escape(@search_for.date_modified) if @search_for.date_modified=~/\S/ - dta='&dta=' + CGI.escape(@search_for.date_available) if @search_for.date_available=~/\S/ - dtv='&dtv=' + CGI.escape(@search_for.date_valid) if @search_for.date_valid=~/\S/ - fns='&fns=' + CGI.escape(@search_for.filename) if @search_for.filename=~/\S/ - @@canned_search_url=(checked_all =~/checked/) \ - ? "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" - : "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" - mod=ft=~/\S+/ ? (ft.gsub(/ft/,'s1')) : s1 - @canned_base_url="#{@base}?#{mod}&db=#{cgi['db']}" - if checked_case=~/\S/ - @search[:text][1]=%{doc_objects.clean~'#{@search_for.text1}'} #s1 - else - @search[:text][1]=%{doc_objects.clean~*'#{@search_for.text1}'} #s1 - end - canned_note='search url:' - else - @@canned_search_url="#{@base}?db=#{@db}&view=index" - canned_note='search url example:' - end - if search_field =~/\S+/ - analyze_format=search_field.gsub(/\s*\n/,'; ') - elsif checked_all =~/checked/ or checked_url =~/checked/ - canned_search=@@canned_search_url.scan(/(?:s1|ft|au|ti|fns|tr)=[^&]+/) - af=canned_search.join('; ') - af=af.gsub(/s1=/,'text: '). - gsub(/ft=/,'fulltxt: '). - gsub(/au=/,'author: '). - gsub(/ti=/,'title: '). - gsub(/fns=/,'filename: '). - gsub(/tr=/,'topic_register: '). - gsub(/%2B/,' ') - analyze_format=af - st=af.split(/\s*;\s*/) - search_field=st.join("\n") - end - green=%{} - canned_search_url_txt=CGI.escapeHTML(@@canned_search_url) - the_can=%{#{canned_note} #{canned_search_url_txt}
} - p_text=p_fulltext=p_keywords=p_title=p_author=p_topic_register=p_subject=p_description=p_publisher=p_editor=p_contributor=p_date=p_type=p_format=p_identifier=p_source=p_language=p_relation=p_coverage=p_rights=p_comment=p_abstract=p_filename='' - p_filename=%{filename: #{green}#{@search_for.filename}

} if @search_for.filename =~/\S+/ - p_text=%{text: #{green}#{@search_for.text1}

} if @search_for.text1 =~/\S+/ - p_fulltext=%{fulltxt: #{green}#{@search_for.fulltext}
} if @search_for.fulltext =~/\S+/ - p_title=%{title: #{green}#{@search_for.title}
} if @search_for.title =~/\S+/ - p_author=%{author: #{green}#{@search_for.author}
} if @search_for.author =~/\S+/ - p_editor=%{editor: #{green}#{@search_for.editor}
} if @search_for.editor=~/\S+/ - p_contributor=%{contributor: #{green}#{@search_for.contributor}
} if @search_for.contributor =~/\S+/ - p_date=%{date: #{green}#{@search_for.date}
} if @search_for.date =~/\S+/ - p_rights=%{rights: #{green}#{@search_for.rights}
} if @search_for.rights =~/\S+/ - p_topic_register=%{topic_register: #{green}#{@search_for.topic_register}
} if @search_for.topic_register =~/\S+/ - p_subject=%{subject: #{green}#{@search_for.subject}
} if @search_for.subject =~/\S+/ - p_keywords=%{keywords: #{green}#{@search_for.keywords}
} if @search_for.keywords =~/\S+/ - p_identifier=%{identifier: #{green}#{@search_for.identifier}
} if @search_for.identifier =~/\S+/ - p_type=%{type: #{green}#{@search_for.type}
} if @search_for.type =~/\S+/ - p_format=%{format: #{green}#{@search_for.format}
} if @search_for.format =~/\S+/ - p_relation=%{relation: #{green}#{@search_for.relation}
} if @search_for.relation =~/\S+/ - p_coverage=%{coverage: #{green}#{@search_for.coverage}
} if @search_for.coverage =~/\S+/ - p_description=%{description: #{green}#{@search_for.description}
} if @search_for.description =~/\S+/ - p_abstract=%{abstract: #{green}#{@search_for.abstract}
} if @search_for.abstract =~/\S+/ - p_comment=%{comment: #{green}#{@search_for.comment}
} if @search_for.comment =~/\S+/ - p_publisher=%{publisher: #{green}#{@search_for.publisher}
} if @search_for.publisher =~/\S+/ - p_source=%{source: #{green}#{@search_for.source}
} if @search_for.source =~/\S+/ - p_language=%{language: #{green}#{@search_for.language}
} if @search_for.language =~/\S+/ - search_note=<<-WOK - - database: #{green}#{@db}; selected view: #{green}#{cgi['view']} - search string: "#{green}#{analyze_format}"
- #{p_text} #{p_fulltext} #{p_keywords} #{p_title} #{p_author} #{p_topic_register} #{p_subject} #{p_description} #{p_publisher} #{p_editor} #{p_contributor} #{p_date} #{p_type} #{p_format} #{p_identifier} #{p_source} #{p_language} #{p_relation} #{p_coverage} #{p_rights} #{p_comment} #{p_abstract} #{p_filename} - - WOK - #eg = %{canned search e.g.:
#{url}
find: #{analyze}
database: #{database}} - #% dbi_canning - @header=Form.new(@base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form - unless q['s1'] =~/\S/ or q['au'] =~/\S/ or @search[:text][1] =~/\S/ - print "Content-type: text/html\n\n" - puts (@header+@tail) - else #% searches - s1=(@search_for.text1 =~/\S/) \ - ? @search_for.text1 - : 'Unavailable' - if checked_case=~/\S/ - @search[:text]<<%{doc_objects.clean~'#{CGI.unescape(s1)}'} - else - @search[:text]<<%{doc_objects.clean~*'#{CGI.unescape(s1)}'} - end - #% dbi_request - dbi_statement=DBI_SearchStatement.new(@conn,@search_for,q,checked_case) - @text_search_flag=false - @text_search_flag=dbi_statement.text_search_flag - s_contents=dbi_statement.contents - @body_main='' - @search_regx=nil - oldtid=0 - if @text_search_flag - if checked_sql =~/\S/ - sql_select_body=dbi_statement.sql_select_body_format - else sql_select_body='' - end - @body_main << sql_select_body - #@body_main << '



Main Text:
' << sql_select_body - else - end - @hostpath="#{@hosturl_files}/#{@stub}" - def path_manifest(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{@hostpath}/#{fn}/sisu_manifest.html" - : "#{@hostpath}/#{fn}/sisu_manifest.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/manifest/#{fn}.html" - : "#{@hostpath}/manifest/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/manifest/#{fn}.html" - end - end - def path_html_seg(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - "#{@hostpath}/#{fn}" - when 'filetype' - "#{@hostpath}/html/#{fn}" - else - "#{@hostpath}/#{ln}/html/#{fn}" - end - end - def path_toc(fn,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/toc.html" - : "#{path_html_seg(fn,ln)}/toc.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/toc.html" - end - end - def path_filename(fn,seg,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/#{seg}.html" - : "#{path_html_seg(fn,ln)}/#{seg}.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/#{seg}.html" - end - end - def path_html_doc(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/scroll.html" - : "#{path_html_seg(fn,ln)}/scroll.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/html/#{fn}.html" - : "#{@hostpath}/html/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/html/#{fn}.html" - end - end - #% text_objects_body - s_contents.each do |c| #% text body - location=c['src_filename'][/(.+?)\.(?:ssm\.sst|sst)$/,1] - file_suffix=c['src_filename'][/.+?\.(ssm\.sst|sst)$/,1] - lang=if location =~ /\S+?~(\S\S\S?)$/ - l=location[/\S+?~(\S\S\S?)$/,1] - location=location.gsub(/(\S+?)~\S\S\S?/,'\1') - l=".#{l}" - else '' - end - #% metadata_found_body - if c['tid'].to_i != oldtid.to_i - ti=c['title'] - can_txt_srch=(cgi['view']=~/index/) \ - ? %{search } - : %{search } - title=%{toc html #{ti} [#{c['language_document_char']}] by #{c['creator_author']} manifest #{can_txt_srch}
} if file_suffix=~/s/ #hmm watch file_suffix - title=@text_search_flag \ - ? '

'+title - : '
'+title - @counter_txt_doc+=1 - oldtid=c['tid'].to_i - else title='' - end - if @text_search_flag - if cgi['view']=~/text/ \ - or (cgi['view']!~/index/ and cgi['search'] !~/search db/) #% txt body - text=if c['suffix'] !~/1/ #seg - if @search_for.text1 =~/\S+/ \ - or q['s1'] =~/\S+/ #% only this branch is working !! - unescaped_search=if @search_for.text1 =~/\S+/ - CGI.unescape(@search_for.text1) - elsif q['s1'] =~/\S+/ - CGI.unescape(q['s1']) - else nil - end - @search_regx=if unescaped_search #check - search_regex=[] - build=unescaped_search.scan(/\S+/).each do |g| - (g.to_s =~/(AND|OR)/) \ - ? (search_regex << '|') - : (search_regex << %{#{g.to_s}}) - end - search_regex=search_regex.join(' ') - search_regex=search_regex.gsub(/\s*\|\s*/,'|') - Regexp.new(search_regex, Regexp::IGNORECASE) - else nil - end - else nil - end - matched_para=(@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) \ - ? (c['body'].gsub(/(<\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{\\1})) - : c['body'] - %{

ocn #{c['ocn']}:

#{matched_para}} - elsif c['suffix'] =~/1/ #doc - %{#{title}

ocn #{c['ocn']}:#{c['body']}} - end - @counter_txt_ocn+=1 - output=title+text - else #elsif cgi['view']=~/index/ #% idx body - if c['suffix'] !~/1/ #seg - index=%{#{c['ocn']}, } if @text_search_flag - elsif c['suffix'] =~/1/ #doc #FIX - index=%{#{c['ocn']}, } - end - if c['seg'] =~/\S+/ - if @text_search_flag - @counter_txt_ocn+=1 - output=title+index - end - else - @counter_txt_ocn+=1 - output=c['suffix'] !~/1/ \ - ? title+index - : %{#{title}#{c['ocn'].sort}, } - end - end - else output=title - end - @counters_txt=if @counter_txt_doc > 0 - if checked_stats =~/\S/ - @@lt_t=(@counter_txt_ocn==dbi_statement.sql_match_limit.to_i) ? true : false - start=(@@offset.to_i+1).to_s - range=(@@offset.to_i+@counter_txt_ocn.to_i).to_s - %{


Found #{@counter_txt_ocn} times in the main body of #{@counter_txt_doc} documents [ matches #{start} to #{range} ]
} - else '' - end - else '' - end - @body_main << output #+ details - end - oldtid = 0 - offset=dbi_statement.sql_offset.to_s - limit=dbi_statement.sql_match_limit.to_s - @@lt_t ||=false; @@lt_e ||=false - canned=(@@lt_t or @@lt_e) \ - ? dbi_statement.pre_next(true,@image_src).to_s - : dbi_statement.pre_next(false,@image_src).to_s - limit=dbi_statement.sql_match_limit.to_s - cgi.out{@header.force_encoding("UTF-8") + @counters_txt.force_encoding("UTF-8") + @counters_endn.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @body_main.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @tail.force_encoding("UTF-8")} #% print cgi_output_header+counters+body - end - rescue Exception => e - s='
' + CGI::escapeHTML(e.backtrace.reverse.join("\n"))
-          s << CGI::escapeHTML(e.message) + '
' - cgi.out{s} - next - ensure # eg. disconnect from server - @conn.disconnect if @conn - end - end diff --git a/util/rb/cgi/spine.search.cgi b/util/rb/cgi/spine.search.cgi deleted file mode 100755 index cfe9d73..0000000 --- a/util/rb/cgi/spine.search.cgi +++ /dev/null @@ -1,958 +0,0 @@ -#!/usr/bin/env ruby -=begin - * Name: SiSU information Structuring Universe - * Author: Ralph Amissah - * http://www.jus.uio.no/sisu - * http://www.jus.uio.no/sisu/SiSU/download - - * Description: generates naive cgi search form for search of sisu database (sqlite) - * Name: SiSU generated sample cgi search form - - * Description: generated sample cgi search form for SiSU - (SiSU is a framework for document structuring, publishing and search) - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2014, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see . - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - - - - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - - - - * Ralph Amissah - - - -=end -begin - require 'cgi' - require 'fcgi' - require 'sqlite3' -rescue LoadError - puts 'cgi, fcgi or sqlite3 NOT FOUND (LoadError)' -end -@stub_default = 'search' -@image_src = "http://#{ENV['HTTP_HOST']}/image_sys" -@hosturl_cgi = "http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}" -@hosturl_files = "http://#{ENV['HTTP_HOST']}" -@output_dir_structure_by = 'language' -@lingual = 'multi' -@db_name_prefix = 'spine.' -@base = "http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}#{ENV['SCRIPT_NAME']}" -#Common TOP -@@offset = 0 -@@canned_search_url = @base -@color_heading = '#DDFFAA' -@color_match = '#ffff48' -class Form - def initialize(base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') - search_note = '' if checked_searched !~ /\S/ - the_can = '' if checked_url !~ /\S/ - search_field = '' if checked_echo !~ /\S/ - @base,@search_field,@selected_db,@result_type,@checked_sql_limit,@checked_tip,@checked_stats,@checked_searched,@checked_url,@checked_case,@checked_echo,@checked_sql,@checked_all,@checked_none,@checked_selected,@checked_default,@search_note,@the_can=base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can - @tip = if checked_tip =~ /\S/ - 'text:__; fulltxt:__; keywords:__; title:__; author:__; topic_register:__; subject:__; description:__; publisher:__; editor:__; contributor:__; date:__; type:__; format:__; identifier:__; source:__; language:__; relation:__; coverage:__; rights:__; comment:__; abstract:__; src_filename_base:__;
' - else '' - end - end - def submission_form - search_form =<<-WOK - - - - - <meta charset="utf-8"> - <meta name="sourcefile" content="SiSU._sst" /> - SiSU search form (sample): SiSU information Structuring Universe - - - - - - - - - -
- - -
-
- SiSU - -
- git - -
-
- -
-
- - -
- - - #{@tip} - #{@search_note} - #{@the_can} -
- - - - - to search: select which database to search (drop-down menu below); enter your search query (in the form above); and click on the search button (below) -
- - - index - text / grep -
- match limit: - 1,000 - 2,500 -
- echo query - result stats - search url - searched - available fields - sql statement -
- checks: - default - selected - all - none -
- - -
- WOK - end -end -class SearchRequest #% search_for - attr_accessor :text1,:fulltext,:keywords,:title,:author,:topic_register,:subject,:description,:publisher,:editor,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:src_filename_base - def initialize(search_field='',q='') - @search_field,@q=search_field,q - @text1=@fulltext=@keywords=@title=@author=@topic_register=@subject=@description=@publisher=@editor=@contributor=@date=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@comment=@abstract=@owner=@date_created=@date_issued=@date_modified=@date_available=@date_valid=@filename='' - if @search_field=~/\S/ - @text1 = text_to_match('text:') - @fulltext = text_to_match('fulltxt:') - @topic_register = text_to_match('topic_register:') - @title = text_to_match('title:') # DublinCore 1 - title - @author = text_to_match('(?:author|creator)s?:') # DublinCore 2 - creator/author - @subject = text_to_match('subj(?:ect)?:') # DublinCore 3 - subject - @description = text_to_match('description:') # DublinCore 4 - description - @publisher = text_to_match('pub(?:lisher)?:') # DublinCore 5 - publisher - @editor = text_to_match('editor:') - @contributor = text_to_match('contributor:') # DublinCore 6 - contributor - @date = text_to_match('date:') # DublinCore 7 - date dd-mm-yy - @type = text_to_match('type:') # DublinCore 8 - type - @format = text_to_match('format:') # DublinCore 9 - format - @identifier = text_to_match('identifier:') # DublinCore 10 - identifier - @source = text_to_match('source:') # DublinCore 11 - source - @language = text_to_match('language:') # DublinCore 12 - language - @relation = text_to_match('relation:') # DublinCore 13 - relation - @coverage = text_to_match('coverage:') # DublinCore 14 - coverage - @rights = text_to_match('rights:') # DublinCore 15 - rights - @keywords = text_to_match('key(?:words?)?:') - @comment = text_to_match('comment:') - @abstract = text_to_match('abs(?:tract)?:') - @owner = text_to_match('owner:') - @date_created = text_to_match('date_created:') - @date_issued = text_to_match('date_issued:') - @date_modified = text_to_match('date_modified:') - @date_available = text_to_match('date_available:') - @date_valid = text_to_match('date_valid:') - @filename = text_to_match('src_filename_base:') - @text1 = text_to_match unless @keywords or @author or @title or @text1 or @fulltext or @comment or @abstract or @rights or @subject or @publisher or @date or @filename or @topic_register - else - @text1 = q['s1'] if q['s1'] =~ /\S/ - @fulltext = q['ft'] if q['ft'] =~ /\S/ - @keywords = q['key'] if q['key'] =~ /\S/ - @title = q['ti'] if q['ti'] =~ /\S/ - @author = q['au'] if q['au'] =~ /\S/ - @topic_register = q['tr'] if q['tr'] =~ /\S/ - @subject = q['sj'] if q['sj'] =~ /\S/ - @description = q['dsc'] if q['dsc'] =~ /\S/ - @publisher = q['pb'] if q['pb'] =~ /\S/ - @editor = q['cntr'] if q['cntr'] =~ /\S/ - @contributor = q['cntr'] if q['cntr'] =~ /\S/ - @date = q['dt'] if q['dt'] =~ /\S/ - @type = q['ty'] if q['ty'] =~ /\S/ - @identifier = q['id'] if q['id'] =~ /\S/ - @source = q['src'] if q['src'] =~ /\S/ - @language = q['lang'] if q['lang'] =~ /\S/ - @relation = q['rel'] if q['rel'] =~ /\S/ - @coverage = q['cov'] if q['cov'] =~ /\S/ - @rights = q['cr'] if q['cr'] =~ /\S/ - @comment = q['co'] if q['co'] =~ /\S/ - @abstract = q['ab'] if q['ab'] =~ /\S/ - @date_created = q['dtc'] if q['dtc'] =~ /\S/ - @date_issued = q['dti'] if q['dti'] =~ /\S/ - @date_modified = q['dtm'] if q['dtm'] =~ /\S/ - @date_available = q['dta'] if q['dta'] =~ /\S/ - @date_valid = q['dtv'] if q['dtv'] =~ /\S/ - @filename = if q['doc'] and q['search'] !~ /search db/ then q['doc'] - elsif q['fns'] =~ /\S/ then q['fns'] - end - @@limit = q['ltd'] if q['ltd'] =~ /\d+/ # 1000 - @@offset = q['off'] if q['off'] =~ /\d+/ # 0 - end - end - def text_to_match(identifier='') - m={ - string: /#{identifier}\s*(.+?)/, - string: /#{identifier}\s*(.+?)(?:;|\n|\r|$)/, - word: /#{identifier}[\s(]*(\S+)/ - } - search_string=if @search_field =~m[:word] - search_string=if @search_field =~m[:braces] then m[:braces].match(@search_field)[1] - elsif @search_field =~m[:string] then m[:string].match(@search_field)[1] - else - str=m[:word].match(@search_field)[1] - str=str.gsub(/[()]/,'') - str - end - search_string=search_string.strip.gsub(/\s+/,'+') - #else - # "__" - end - end -end -class DBI_SearchString - def initialize(l,t,q,cse=false) - @l,@t,@q=l,t,q - end - def string - search={ search: [], flag: false } - if @t =~/\S+/ or @q =~/\S+/ - if @t =~/\S+/ then unescaped_search=CGI.unescape(@t) - elsif @q =~/\S+/ then unescaped_search=CGI.unescape(@q) - end - search_construct=[] - unescaped_search=unescaped_search.gsub(/\s*(AND|OR)\s*/,"%' \) \\1 #{@l} LIKE \( '%"). - gsub(/(.+)/,"#{@l} LIKE \( '%\\1%' \)") - search_construct << unescaped_search - search_construct=search_construct.join(' ') - search[:search] << search_construct - search[:flag]=true - search - end - search - end -end -class DBI_SearchStatement - attr_reader :text_search_flag,:sql_select_body_format,:sql_offset,:sql_limit - def initialize(conn,search_for,q,c) - @conn=conn - @text_search_flag=false - @sql_statement={ body: '', endnotes: '', range: '' } - #@offset||=@@offset - #@offset+=@@limit - search={ text: [], endnotes: [] } - cse=(c =~/\S/) ? true : false - st=DBI_SearchString.new('doc_objects.clean',search_for.text1,q['s1'],cse).string - se=DBI_SearchString.new('endnotes.clean',search_for.text1,q['s1'],cse).string - @text_search_flag=st[:flag] - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.fulltext',search_for.fulltext,q['ft'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.title',search_for.title,q['ti'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.creator_author',search_for.author,q['au'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.classify_topic_register',search_for.topic_register,q['tr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.classify_subject',search_for.subject,q['sj'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.classify_keywords',search_for.keywords,q['key'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.notes_description',search_for.description,q['dsc'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.publisher',search_for.publisher,q['pb'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.creator_editor',search_for.editor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.creator_contributor',search_for.contributor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.date_published',search_for.date,q['dt'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.notes_type',search_for.type,q['ty'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.original_source',search_for.source,q['src'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.language_document_char',search_for.language,q['lang'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.notes_relation',search_for.relation,q['rel'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.notes_coverage',search_for.coverage,q['cov'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.rights_all',search_for.rights,q['cr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.notes_comment',search_for.comment,q['co'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.notes_abstract',search_for.abstract,q['ab'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st = DBI_SearchString.new('metadata_and_text.src_filename_base',search_for.src_filename_base,q['fns'],cse).string - if st[:flag] - search[:text] << st[:search] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - @search_text='' - @search_text=search[:text].flatten.join(' AND ') - @search_text=@search_text.gsub(/(doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\)\s+(?:(?:AND|OR)\s+doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\))+)/,'(\1)') - end - def sql_offset - @@offset - end - def sql_match_limit - @@limit - end - def sql_canned_search - @offset_next=sql_offset.to_i + sql_match_limit.to_i - @offset_previous=sql_offset.to_i - sql_match_limit.to_i - def current - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + sql_offset.to_s - end - def next - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_next.to_s - end - def previous - @offset_previous >= 0 \ - ? (@@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_previous.to_s) - : '' - end - def start - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + 0.to_s - end - self - end - def pre_next(beyond_limit,img) - can=sql_canned_search - page=(sql_offset.to_i + sql_match_limit.to_i)/sql_match_limit.to_i - if beyond_limit - if page.to_s =~ /^1$/ - %{
- pg. #{page.to_s} - -  >> - -
} - elsif page.to_s =~ /^2$/ - %{
- - <<  - - pg. #{page.to_s} - -  >> - -
} - else - %{
- - |<  - - - <<  - - pg. #{page.to_s} - -  >> - -
} - end - else - if page.to_s =~ /^1$/ then '' - elsif page.to_s =~ /^2$/ - %{
- - <<  - - pg. #{page.to_s} -
} - else - %{
- - |<  - - - <<  - - pg. #{page.to_s} -
} - end - end - end - def sql_select_body - limit ||= @@limit - offset ||= @@offset - @sql_statement[:body] = %{ - SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename_base, metadata_and_text.language_document_char, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg_name, doc_objects.ocn, metadata_and_text.uid - FROM doc_objects, metadata_and_text - WHERE #{@search_text} AND doc_objects.uid_metadata_and_text = metadata_and_text.uid - ORDER BY metadata_and_text.language_document_char, metadata_and_text.title, metadata_and_text.src_filename_base, doc_objects.ocn - } - @sql_statement[:range] = %{LIMIT #{limit} OFFSET #{offset} ;} - select = @sql_statement[:body] + ' ' + @sql_statement[:range] - select - end - def sql_select_body_format - %{#{sql_select_body}} - end - def contents - @conn.execute(sql_select_body) - end -end -def tail - <<-'WOK' -


- - - - - - -
- - -
-
- SiSU - -
- git - -
-
- -

- Generated by - SiSU 6.3.1 2014-10-19 (2014w41/7) -
- - SiSU © Ralph Amissah - 1993, current 2014. - All Rights Reserved. -
- SiSU is software for document structuring, publishing and search, -
- - www.jus.uio.no/sisu - - and - - www.sisudoc.org - - sources - - git.sisudoc.org - -
- w3 since October 3 1993 - - ralph@amissah.com - -
- mailing list subscription - - http://lists.sisudoc.org/listinfo/sisu - -
- - sisu@lists.sisudoc.org - -

-
-

- SiSU using: -
Standard SiSU markup syntax, -
Standard SiSU meta-markup syntax, and the -
Standard SiSU object citation numbering and system, (object/text identifying/locating system) -
- © Ralph Amissah 1997, current 2014. - All Rights Reserved. -

-
-

- - .: - -

-
-

- SiSU is released under - GPL v3 - or later, - - http://www.gnu.org/licenses/gpl.html - -

-
-

- SiSU, developed using - - Ruby - - on - - Debian/Gnu/Linux - - software infrastructure, - with the usual GPL (or OSS) suspects. -

-
- - - WOK -end -@tail=tail -@counter_txt_doc,@counter_txt_ocn,@counter_endn_doc,@counter_endn_ocn=0,0,0,0 -@counters_txt,@counters_endn,@sql_select_body='','','' -FCGI.each_cgi do |cgi| - begin # all code goes in begin section - @search={ text: [], endnotes: [] } - q=CGI.new - @db=if cgi['db'] =~ /#{@db_name_prefix}(\S+)/ - @stub=$1 - cgi['db'] - else - @stub=@stub_default - @db_name_prefix + @stub - end - checked_url,checked_stats,checked_searched,checked_tip,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,selected_db='','','','','','','','','' - result_type=(cgi['view']=~/text/) \ - ? result_type={ index: '', text: 'checked'} - : result_type={ index: 'checked', text: ''} - @@limit=if cgi['sql_match_limit'].to_s=~/2500/ - checked_sql_limit={ l1000: '', l2500: 'checked'} - '2500' - else - checked_sql_limit={ l1000: 'checked', l2500: ''} - '1000' - end - checked_echo = 'checked' if cgi['echo'] =~/\S/ - checked_stats = 'checked' if cgi['stats'] =~/\S/ - checked_url = 'checked' if cgi['url'] =~/\S/ or cgi['u'].to_i==1 - checked_searched = 'checked' if cgi['searched'] =~/\S/ - checked_tip = 'checked' if cgi['tip'] =~/\S/ - checked_case = 'checked' if cgi['casesense'] =~/\S/ - checked_sql = 'checked' if cgi['sql'] =~/\S/ - if cgi['checks'] =~ /check_all/ or cgi['check_all'] =~/\S/ or cgi['a'].to_i==1 - checked_all = 'checked' - checked_echo=checked_stats=checked_url=checked_searched=checked_tip=checked_sql='checked' - checked_none ='' - elsif cgi['checks'] =~ /check_none/ - checked_none = 'checked' - checked_all=checked_url=checked_stats=checked_searched=checked_tip=checked_echo=checked_sql='' - elsif cgi['checks'] =~ /check_selected/ - checked_selected = 'checked' - elsif cgi['checks'] =~ /check_default/ - checked_default = 'checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - else - checked_selected='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - end - selected_db=case cgi['db'] - when /spine.sqlite/ then '' - end - db_name='spine.search.sql.db' - #db_name='spine.sqlite.db' - #db_name='sisu_sqlite.db' - db_sqlite=case cgi['db'] - when /spine.sqlite/ then "/srv/complete.sisudoc.org/web/manual/#{db_name}" - else "/var/www/sqlite/#{db_name}" - end - #when /spine.sqlite/ then "/srv/complete.sisudoc.org/web/manual/#{db_name}" - #else "/srv/complete.sisudoc.org/web/manual/#{db_name}" - #end - #@conn=SQLite3::Database.new(db_sqlite) - @conn=SQLite3::Database.new("/var/www/sqlite/spine.search.sql.db") - #@conn=SQLite3::Database.new("/var/www/spine.sqlite.db") - @conn.results_as_hash=true - search_field=cgi['find'] if cgi['find'] # =~/\S+/ - @search_for=SearchRequest.new(search_field,q) #.analyze #% search_for - #% searches - #Canned_search.new(@base,@search_for.text1,cgi) - if @search_for.text1=~/\S+/ or @search_for.fulltext=~/\S+/ or @search_for.author=~/\S+/ or @search_for.topic_register=~/\S+/ #and search_field =~/\S/ - s1 = 's1=' + CGI.escape(@search_for.text1) if @search_for.text1 =~ /\S/ - ft = '&ft=' + CGI.escape(@search_for.fulltext) if @search_for.fulltext =~ /\S/ - key = 'key=' + CGI.escape(@search_for.keywords) if @search_for.keywords =~ /\S/ - ti = '&ti=' + CGI.escape(@search_for.title) if @search_for.title =~ /\S/ - au = '&au=' + CGI.escape(@search_for.author) if @search_for.author =~ /\S/ - tr = '&tr=' + CGI.escape(@search_for.topic_register) if @search_for.topic_register =~ /\S/ - sj = '&sj=' + CGI.escape(@search_for.subject) if @search_for.subject =~ /\S/ - dsc = '&dsc=' + CGI.escape(@search_for.description) if @search_for.description =~ /\S/ - pb = '&pb=' + CGI.escape(@search_for.publisher) if @search_for.publisher =~ /\S/ - edt = '&edt=' + CGI.escape(@search_for.editor) if @search_for.editor =~ /\S/ - cntr = '&cntr=' + CGI.escape(@search_for.contributor) if @search_for.contributor =~ /\S/ - dt = '&dt=' + CGI.escape(@search_for.date) if @search_for.date =~ /\S/ - ty = '&ty=' + CGI.escape(@search_for.type) if @search_for.type =~ /\S/ - id = '&id=' + CGI.escape(@search_for.identifier) if @search_for.identifier =~ /\S/ - src = '&src=' + CGI.escape(@search_for.source) if @search_for.source =~ /\S/ - lang = '&lang=' + CGI.escape(@search_for.language) if @search_for.language =~ /\S/ - rel = '&rel=' + CGI.escape(@search_for.relation) if @search_for.relation =~ /\S/ - cov = '&cov=' + CGI.escape(@search_for.coverage) if @search_for.coverage =~ /\S/ - cr = '&cr=' + CGI.escape(@search_for.rights) if @search_for.rights =~ /\S/ - co = '&co=' + CGI.escape(@search_for.comment) if @search_for.comment =~ /\S/ - ab = '&ab=' + CGI.escape(@search_for.abstract) if @search_for.abstract =~ /\S/ - dtc = '&dtc=' + CGI.escape(@search_for.date_created) if @search_for.date_created =~ /\S/ - dti = '&dti=' + CGI.escape(@search_for.date_issued) if @search_for.date_issued =~ /\S/ - dtm = '&dtm=' + CGI.escape(@search_for.date_modified) if @search_for.date_modified =~ /\S/ - dta = '&dta=' + CGI.escape(@search_for.date_available) if @search_for.date_available =~ /\S/ - dtv = '&dtv=' + CGI.escape(@search_for.date_valid) if @search_for.date_valid =~ /\S/ - fns = '&fns=' + CGI.escape(@search_for.src_filename_base) if @search_for.src_filename_base =~ /\S/ - @@canned_search_url=(checked_all =~/checked/) \ - ? "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" - : "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" - mod=ft=~/\S+/ ? (ft.gsub(/ft/,'s1')) : s1 - @canned_base_url="#{@base}?#{mod}&db=#{cgi['db']}" - if checked_case=~/\S/ - @search[:text][1]=%{doc_objects.clean~'#{@search_for.text1}'} #s1 - else - @search[:text][1]=%{doc_objects.clean~*'#{@search_for.text1}'} #s1 - end - canned_note='search url:' - else - @@canned_search_url="#{@base}?db=#{@db}&view=index" - canned_note='search url example:' - end - if search_field =~/\S+/ - analyze_format=search_field.gsub(/\s*\n/,'; ') - elsif checked_all =~/checked/ or checked_url =~/checked/ - canned_search=@@canned_search_url.scan(/(?:s1|ft|au|ti|fns|tr)=[^&]+/) - af=canned_search.join('; ') - af=af.gsub(/s1=/,'text: '). - gsub(/ft=/,'fulltxt: '). - gsub(/au=/,'author: '). - gsub(/ti=/,'title: '). - gsub(/fns=/,'src_filename_base: '). - gsub(/tr=/,'topic_register: '). - gsub(/%2B/,' ') - analyze_format=af - st=af.split(/\s*;\s*/) - search_field=st.join("\n") - end - green=%{} - canned_search_url_txt=CGI.escapeHTML(@@canned_search_url) - the_can=%{#{canned_note} #{canned_search_url_txt}
} - p_text=p_fulltext=p_keywords=p_title=p_author=p_topic_register=p_subject=p_description=p_publisher=p_editor=p_contributor=p_date=p_type=p_format=p_identifier=p_source=p_language=p_relation=p_coverage=p_rights=p_comment=p_abstract=p_filename='' - p_filename = %{src_filename_base: #{green}#{@search_for.src_filename_base}

} if @search_for.src_filename_base =~ /\S+/ - p_text = %{text: #{green}#{@search_for.text1}

} if @search_for.text1 =~ /\S+/ - p_fulltext = %{fulltxt: #{green}#{@search_for.fulltext}
} if @search_for.fulltext =~ /\S+/ - p_title = %{title: #{green}#{@search_for.title}
} if @search_for.title =~ /\S+/ - p_author = %{author: #{green}#{@search_for.author}
} if @search_for.author =~ /\S+/ - p_editor = %{editor: #{green}#{@search_for.editor}
} if @search_for.editor =~ /\S+/ - p_contributor = %{contributor: #{green}#{@search_for.contributor}
} if @search_for.contributor =~ /\S+/ - p_date = %{date: #{green}#{@search_for.date}
} if @search_for.date =~ /\S+/ - p_rights = %{rights: #{green}#{@search_for.rights}
} if @search_for.rights =~ /\S+/ - p_topic_register = %{topic_register: #{green}#{@search_for.topic_register}
} if @search_for.topic_register =~ /\S+/ - p_subject = %{subject: #{green}#{@search_for.subject}
} if @search_for.subject =~ /\S+/ - p_keywords = %{keywords: #{green}#{@search_for.keywords}
} if @search_for.keywords =~ /\S+/ - p_identifier = %{identifier: #{green}#{@search_for.identifier}
} if @search_for.identifier =~ /\S+/ - p_type = %{type: #{green}#{@search_for.type}
} if @search_for.type =~ /\S+/ - p_format = %{format: #{green}#{@search_for.format}
} if @search_for.format =~ /\S+/ - p_relation = %{relation: #{green}#{@search_for.relation}
} if @search_for.relation =~ /\S+/ - p_coverage = %{coverage: #{green}#{@search_for.coverage}
} if @search_for.coverage =~ /\S+/ - p_description = %{description: #{green}#{@search_for.description}
} if @search_for.description =~ /\S+/ - p_abstract = %{abstract: #{green}#{@search_for.abstract}
} if @search_for.abstract =~ /\S+/ - p_comment = %{comment: #{green}#{@search_for.comment}
} if @search_for.comment =~ /\S+/ - p_publisher = %{publisher: #{green}#{@search_for.publisher}
} if @search_for.publisher =~ /\S+/ - p_source = %{source: #{green}#{@search_for.source}
} if @search_for.source =~ /\S+/ - p_language = %{language: #{green}#{@search_for.language}
} if @search_for.language =~ /\S+/ - search_note=<<-WOK - - database: #{green}#{@db}; selected view: #{green}#{cgi['view']} - search string: "#{green}#{analyze_format}"
- #{p_text} #{p_fulltext} #{p_keywords} #{p_title} #{p_author} #{p_topic_register} #{p_subject} #{p_description} #{p_publisher} #{p_editor} #{p_contributor} #{p_date} #{p_type} #{p_format} #{p_identifier} #{p_source} #{p_language} #{p_relation} #{p_coverage} #{p_rights} #{p_comment} #{p_abstract} #{p_filename} - - WOK - #eg = %{canned search e.g.:
#{url}
find: #{analyze}
database: #{database}} - #% dbi_canning - @header = Form.new(@base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form - unless q['s1'] =~/\S/ or q['au'] =~/\S/ or @search[:text][1] =~/\S/ - print "Content-type: text/html\n\n" - puts (@header+@tail) - else #% searches - s1=(@search_for.text1 =~/\S/) \ - ? @search_for.text1 - : 'Unavailable' - if checked_case=~/\S/ - @search[:text]<<%{doc_objects.clean~'#{CGI.unescape(s1)}'} - else - @search[:text]<<%{doc_objects.clean~*'#{CGI.unescape(s1)}'} - end - #% dbi_request - dbi_statement=DBI_SearchStatement.new(@conn,@search_for,q,checked_case) - @text_search_flag=false - @text_search_flag=dbi_statement.text_search_flag - s_contents=dbi_statement.contents - @body_main='' - @search_regx=nil - olduid="" - if @text_search_flag - if checked_sql =~/\S/ - sql_select_body=dbi_statement.sql_select_body_format - else sql_select_body='' - end - @body_main << sql_select_body - #@body_main << '



Main Text:
' << sql_select_body - else - end - @hostpath = "#{@hosturl_files}" - #@hostpath="#{@hosturl_files}/#{@stub}" - def path_manifest(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{@hostpath}/#{fn}/sisu_manifest.html" - : "#{@hostpath}/#{fn}/sisu_manifest.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/manifest/#{fn}.html" - : "#{@hostpath}/manifest/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/manifest/#{fn}.html" - end - end - def path_html_seg(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - "#{@hostpath}/#{fn}" - when 'filetype' - "#{@hostpath}/html/#{fn}" - else - "#{@hostpath}/#{ln}/html/#{fn}" - end - end - def path_toc(fn,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/toc.html" - : "#{path_html_seg(fn,ln)}/toc.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/toc.html" - end - end - def path_filename(fn,seg_name,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/#{seg_name}.html" - : "#{path_html_seg(fn,ln)}/#{seg_name}.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/#{seg_name}.html" - end - end - def path_html_doc(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/scroll.html" - : "#{path_html_seg(fn,ln)}/scroll.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/html/#{fn}.html" - : "#{@hostpath}/html/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/html/#{fn}.html" - end - end - #% text_objects_body - s_contents.each do |c| #% text body - location=c['src_filename_base'][/(.+?)\.(?:ssm\.sst|sst)$/,1] - file_suffix=c['src_filename_base'][/.+?\.(ssm\.sst|sst)$/,1] - lang=if location =~ /\S+?~(\S\S\S?)$/ - l=location[/\S+?~(\S\S\S?)$/,1] - location=location.gsub(/(\S+?)~\S\S\S?/,'\1') - l=".#{l}" - else '' - end - #% metadata_found_body - if c['uid'] != olduid - ti=c['title'] - can_txt_srch=(cgi['view']=~/index/) \ - ? %{search } - : %{search } - title = %{ #{ti} [#{c['language_document_char']}] by #{c['creator_author']} #{can_txt_srch}
} - title=@text_search_flag \ - ? '

'+title - : '
'+title - @counter_txt_doc+=1 - olduid=c['uid'] - else title='' - end - if @text_search_flag - if cgi['view']=~/text/ \ - or (cgi['view']!~/index/ and cgi['search'] !~/search db/) #% txt body - text=if c['suffix'] !~/1/ #seg - if @search_for.text1 =~/\S+/ \ - or q['s1'] =~/\S+/ #% only this branch is working !! - unescaped_search=if @search_for.text1 =~/\S+/ - CGI.unescape(@search_for.text1) - elsif q['s1'] =~/\S+/ - CGI.unescape(q['s1']) - else nil - end - @search_regx=if unescaped_search #check - search_regex=[] - build=unescaped_search.scan(/\S+/).each do |g| - (g.to_s =~/(AND|OR)/) \ - ? (search_regex << '|') - : (search_regex << %{#{g.to_s}}) - end - search_regex=search_regex.join(' ') - search_regex=search_regex.gsub(/\s*\|\s*/,'|') - Regexp.new(search_regex, Regexp::IGNORECASE) - else nil - end - else nil - end - matched_para=(@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) \ - ? (c['body'].gsub(/(<\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{\\1})) - : c['body'] - %{

ocn #{c['ocn']}:

#{matched_para}} - elsif c['suffix'] =~/1/ #doc - %{#{title}

ocn #{c['ocn']}:#{c['body']}} - end - @counter_txt_ocn+=1 - output=title+text - else #elsif cgi['view']=~/index/ #% idx body - if c['suffix'] !~/1/ #seg - index=%{#{c['ocn']}, } if @text_search_flag - elsif c['suffix'] =~/1/ #doc #FIX - index=%{#{c['ocn']}, } - end - if c['seg_name'] =~/\S+/ - if @text_search_flag - @counter_txt_ocn+=1 - output=title+index - end - else - @counter_txt_ocn+=1 - output=c['suffix'] !~/1/ \ - ? title+index - : %{#{title}#{c['ocn'].sort}, } - end - end - else output=title - end - @counters_txt=if @counter_txt_doc > 0 - if checked_stats =~/\S/ - @@lt_t=(@counter_txt_ocn==dbi_statement.sql_match_limit.to_i) ? true : false - start=(@@offset.to_i+1).to_s - range=(@@offset.to_i+@counter_txt_ocn.to_i).to_s - %{


Found #{@counter_txt_ocn} times in the main body of #{@counter_txt_doc} documents [ matches #{start} to #{range} ]
} - else '' - end - else '' - end - @body_main << output #+ details - end - olduid = "" - offset=dbi_statement.sql_offset.to_s - limit=dbi_statement.sql_match_limit.to_s - @@lt_t ||=false; @@lt_e ||=false - canned=(@@lt_t or @@lt_e) \ - ? dbi_statement.pre_next(true,@image_src).to_s - : dbi_statement.pre_next(false,@image_src).to_s - limit=dbi_statement.sql_match_limit.to_s - cgi.out{ - @header.force_encoding("UTF-8") \ - + @counters_txt.force_encoding("UTF-8") \ - + @counters_endn.force_encoding("UTF-8") \ - + canned.force_encoding("UTF-8") \ - + @body_main.force_encoding("UTF-8") \ - + canned.force_encoding("UTF-8") \ - + @tail.force_encoding("UTF-8") - } #% print cgi_output_header+counters+body - end - rescue Exception => e - s='
' + CGI::escapeHTML(e.backtrace.reverse.join("\n"))
-    s << CGI::escapeHTML(e.message) + '
' - cgi.out{s} - next - ensure # eg. disconnect from server - @conn.disconnect if @conn - end -end diff --git a/util/rb/tex/dr_tex.rb b/util/rb/tex/dr_tex.rb deleted file mode 100755 index 767742c..0000000 --- a/util/rb/tex/dr_tex.rb +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env ruby -require 'fileutils' -pwd = Dir.pwd -argv,texfiles_with_path,flags=[],[],[] -lngs = %{(am|bg|bn|br|ca|cs|cy|da|de|el|en|eo|es|et|eu|fi|fr|ga|gl|he|hi|hr|hy|ia|is|it|ja|ko|la|lo|lt|lv|ml|mr|nl|no|nn|oc|pl|pt|pt_BR|ro|ru|sa|se|sk|sl|sq|sr|sv|ta|te|th|tk|tr|uk|ur|vi|zh)} -Regexp.new(lngs, Regexp::IGNORECASE) -argv=$* -argv.sort.each{|y| (y =~/^--\S+$/i) ? (flags << y) : (texfiles_with_path << y) } -if flags.length==0 \ -|| flags.inspect =~/"--help"/ - cmd=(/([^\/]+)$/).match($0)[1] - puts < 0 - texfiles_with_path.each do |texfile_with_path| - if texfile_with_path =~/.+\.tex/ - #puts texfile_with_path - if FileTest.file?(texfile_with_path) - file_basename_with_path = texfile_with_path.sub(/\.tex$/,'') - file_basename = file_basename_with_path.sub(/.*?([^\/]+)$/,'\1') - _out_path = out_path - if file_basename =~ /\.#{lngs}$/ - lng = file_basename.match(/\.#{lngs}$/)[1] - puts file_basename - puts lng - puts _out_path - unless _out_path.match(/\/#{lng}\/pdf$/) - _out_path = "#{out_path}/#{lng}/pdf" - FileUtils::mkdir_p(_out_path) - end - end - texpdf_cmd = %{xetex -interaction=batchmode -fmt=xelatex #{texfile_with_path}\n} - puts texpdf_cmd - 2.times { |i| system(texpdf_cmd) } - if (FileTest.file?(%{#{pwd}/#{file_basename}.pdf})) && (FileTest.directory?(_out_path)) - FileUtils::Verbose::mv(%{#{pwd}/#{file_basename}.pdf}, %{#{_out_path}/#{file_basename}.pdf}) - puts (%{#{_out_path}/#{file_basename}.pdf}) - else - puts "issue with pdf file or output directory" - puts "pdf file: #{pwd}/#{file_basename}.pdf}" - puts "output dir: #{_out_path}/" - end - suffix = ['log', 'out', 'toc', 'aux'] - suffix.each { |s| FileUtils::rm_f(%{#{pwd}/#{file_basename}.#{s}})} - end - end - end -end -Dir.chdir(pwd) -__END__ -- cgit v1.2.3