diff options
author | Ralph Amissah <ralph.amissah@gmail.com> | 2020-04-11 20:51:27 -0400 |
---|---|---|
committer | Ralph Amissah <ralph.amissah@gmail.com> | 2020-05-20 11:27:25 -0400 |
commit | 3dcd083585b3f486ece3cfaa0780a6e2ec5b43fe (patch) | |
tree | fda07f176104b63fb0bc23e829d457d2926bbdb1 | |
parent | metaverse, set behavior of block & group text (diff) |
help & manpages, start work
27 files changed, 9030 insertions, 5710 deletions
@@ -14,7 +14,13 @@ !*.d !*.rb !conf.sdl +!doc +!doc/** +!man +!man/** !org +!misc +!misc/** !util !util/** !ext_lib @@ -1,4 +1,4 @@ -- Name: Spine, Doc Reform +- Name: spine - SiSU Spine, Doc Reform - Description: documents, structuring, processing, publishing, search - static content generator @@ -7,7 +7,7 @@ - Copyright: (C) 2015 - 2020 Ralph Amissah - - code under src/ + - code under src/ & org/ - License: AGPL 3 or later: Spine, Doc Reform (SiSU), a framework for document structuring, publishing and @@ -34,19 +34,15 @@ [http://www.gnu.org/licenses/agpl.html] - Spine, Doc Reform (related to SiSU) uses standard: - - docReform markup syntax + - docReform markup syntax (based on SiSU markup) - standard SiSU markup syntax with modified headers and minor modifications - - docReform object numbering - - standard SiSU object citation numbering & system + - docReform object numbering (based on SiSU object citation numbering) + - standard SiSU document object numbering - Hompages: - [http://www.doc_reform.org] [http://www.sisudoc.org] - - Git - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary] - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/html.rb;hb=HEAD] - Spine, Doc Reform (SiSU) markup samples Individual document content Copyright (Author) [as stated in document header] @@ -19,3 +19,307 @@ project_name: Spine, Doc Reform "http://www.doc_reform.org", "http://www.sisudoc.org" ] + +# Installation, Compilation + +SiSU spine is written in the programming language D for which there are 3 compilers: + +- dmd +- ldc +- gdc + +D projects tend to use dub as project manager +https://code.dlang.org/packages/dub +https://code.dlang.org/packages/dub +https://github.com/dlang/dub/blob/master/source/dub/commandline.d + + dub --compiler=ldc2 -color --config=ldc -b release + + dub --compiler=dmd -color --config=dmd + + dub --compiler=gdc-10 -color --config=gdc -b release + + make ldc + + make dmd + +there has been some coalescence around the Meson build system +https://mesonbuild.com/ + + meson + + ninja -C build + + meson setup --wipe build && ninja -v -C build + + make meson + +dub --force --compiler=ldc2 && sudo cp -v cgi-bin/spine-search /usr/lib/cgi-bin/. + +# Configuration + +Configuration files are yaml files + +The following paths are searched: + + ~/.dr/config_local_site + ~/path_to_pod_root/.dr/config_local_site + +e.g. processing + + ~spineMarkupSamples/pod/* + +will search: + + ~spineMarkupSamples/pod/.dr/config_local_site + + ~/.dr/config_local_site + +to specify an alternative configuration file to use on the command line (in this +example named "my_config"): + + spine -v --html --config=~spineMarkupSamples/pod/.dr/my_config + +here is a sample configuration file: + +flag: + act0: "--html" + act1: "--html --epub" +output: + path: "/var/www/html" +default: + language: "en" + papersize: "a4" + text_wrap: "80" + digest: "sha256" +webserv: + http: "http" + domain: "localhost" + data_http: "http" + data_domain: "localhost" + data_root_url: "http://localhost" + data_root_path: "/var/www/html" + data_root_part: "" + images_root_part: "image" + cgi_title: "≅ SiSU Spine search" + cgi_http: "http" + cgi_domain: "localhost" + cgi_bin_url: "http://localhost/cgi-bin" + cgi_bin_part: "cgi-bin" + cgi_bin_path: "/usr/lib/cgi-bin" + cgi_search_script: "spine-search" + cgi_search_script_raw_fn_d: "spine_search.d" + cgi_port: "" + cgi_user: "" + cgi_action: "http://localhost/cgi-bin/spine-search" + db_sqlite: "spine.search.db" + db_pg_table: "" + db_pg_user: "" + +# Commands + +for a list of commands from the program type: + + spine -h + +at the time of writing this provides the following output: + + --abstraction document abstraction + --assert set optional assertions on + --cgi-search-form-codegen generates (pre-compiled) d code for search of specified db + --cgi-sqlite-search-filename =[filename] + --concordance file for document + --config =/path/to/config/file/including/filename + --dark alternative dark theme + --debug debug + --digest hash digest for each object + --epub process epub output + --harvest extract info on authors & topics from document header metadata + --harvest-authors extract info on authors from document header metadata + --harvest-topics extract info on topics from document header metadata + --hide-ocn object cite numbers + --html process html output + --html-link-harvest place links back to harvest in segmented html + --html-link-search html embedded search submission + --html-seg process html output + --html-scroll process html output + --lang =[lang code e.g. =en or =en,es] + --latex output for pdfs + --latex-color-links mono or color links for pdfs + --light default light theme + --manifest process manifest output + --ocn-off object cite numbers + --odf open document format text (--odt) + --odt open document format text + --output =/path/to/output/dir specify where to place output + --parallel parallelisation + --parallel-subprocesses nested parallelisation + --pdf latex output for pdfs + --pdf-color-links mono or color links for pdfs + --pod spine (doc reform) pod source content bundled +-q --quiet output to terminal + --section-backmatter document backmatter (default) + --section-biblio document biblio (default) + --section-blurb document blurb (default) + --section-body document body (default) + --section-bookindex document bookindex (default) + --section-endnotes document endnotes (default) + --section-glossary document glossary (default) + --section-toc table of contents (default) + --serial serial processing + --skip-output skip output + --show-config show config + --show-make show make + --show-metadata show metadata + --show-summary show summary + --source document markup source + --sqlite-discrete process discrete sqlite output + --sqlite-db-create create db, create tables + --sqlite-db-drop drop tables & db + --sqlite-db-recreate create db, create tables + --sqlite-delete sqlite output + --sqlite-db-filename =[filename].sql.db + --sqlite-insert sqlite output + --sqlite-update sqlite output + --text text output + --theme-dark alternative dark theme + --theme-light default light theme + --txt text output +-v --verbose output to terminal + --very-verbose output to terminal + --workon (reserved for some matters under development & testing) + --xhtml xhtml output +-h --help This help information. + +# Examples + +if configuartion has been set specify just +- the desired output and +- the markup document/pod(s) to process + + spine -v --html ~spineMarkupSamples/markup/pod/sisu-manual + +if configuartion has not been set or to overide the set configration specify +- the output path as well as +- the desired output and +- the markup document/pod(s) to process + +note: ~webDocRoot should be the path to web doc root, provide a suitable output path. + + spine -v --html --html-link-search --html-link-harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --html-link-harvest --epub --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --epub --latex --odt --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + +## harvest + +if you have a document collection with documents that have metadata headers a +summary of the collection can be made using the harvest command + + spine -v --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --harvest ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --html-link-harvest --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --html-link-harvest --harvest ~spineMarkupSamples/pod/* + +## sqlite + +### create db + +if there is no sqlite db you first need to create one, to do so +- the name of the db and +- the root path for document output +must be specified: + + spine -v \ + --sqlite-db-create --sqlite-db-filename="spine.search.db" \ + --output=/var/www/html \ + ~spineMarkupSamples/pod/* + + spine -v --sqlite-db-create --sqlite-db-filename="spine.search.db" --output=`echo ~webDocRoot` + +if you have a configration file providing this information that is to be used +for a document collection you can point to the document collection: + + spine -v --sqlite-db-create ~spineMarkupSamples/pod + +### populate db + +must specify: +- the name of the db and +- the root path for document output + + spine -v --sqlite-update \ + --sqlite-db-filename="spine.search.db" \ + --output=/var/www/html \ + ~spineMarkupSamples/pod/* + + spine -v --sqlite-update --sqlite-db-filename="spine.search.db" --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + +if you have a configration file providing this information that is to be used +for a document collection you can point to the document collection: + + spine -v --sqlite-update ~spineMarkupSamples/pod/* + +### generate a cgi search form in d + + spine -v --cgi-search-form-codegen \ + --output=/var/www/html \ + ~spineMarkupSamples/pod + + spine -v --cgi-search-form-codegen --config=~spineMarkupSamples/pod + + spine -v --cgi-search-form-codegen --config=~spineMarkupSamples/pod/.dr/config_local_site + + spine --cgi-search-form-codegen --output=`echo ~webDocRoot` ~spineMarkupSamples/pod + + spine --cgi-search-form-codegen --cgi-sqlite-search-filename="spine_search" --output=`echo ~webDocRoot` + + spine -v --cgi-search-form-codegen \ + --sqlite-db-filename="spine.search.db" \ + --cgi-sqlite-search-filename="spine-search" \ + --output=/var/www/html \ + ~spineMarkupSamples/pod + +#### compile the cgi search form + + cd /var/www/html/cgi # /var/www/html (default document root) + + cd ~webDocRoot/cgi + +the directory ~webDocRoot/cgi/src should contain two files +- spine_search.d (or whatever you named it) +- cgi.d (by Adam Rupee) + + dub --force --compiler=ldc2 && sudo cp -v cgi-bin/spine-search /usr/lib/cgi-bin/. + +should compile spine-search in ~webDocRoot/cgi/cgi-bin and copy it to the +cgi-bin directory + + spine -v --sqlite-db-create --sqlite-db-filename="spine.search.db" --cgi-sqlite-search-filename="spine-search" --output=`echo ~webDocRoot` + + spine -v --sqlite-db-create ~spineMarkupSamples/pod + + spine -v --html --html-link-search --cgi-sqlite-search-filename="spine-search" --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --cgi-sqlite-search-filename="spine-search" --html-link-harvest --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + +### create db & search form + + spine -v \ + --sqlite-db-create --sqlite-db-filename="spine.search.db" \ + --cgi-search-form-codegen --cgi-sqlite-search-filename="spine-search" \ + --output=/var/www/html \ + ~spineMarkupSamples/pod/* + +### html with links to search form + + spine -v --html \ + --html-link-search \ + --output=`echo ~webDocRoot` \ + ~spineMarkupSamples/pod/* diff --git a/doc/man/man1/spine.1 b/doc/man/man1/spine.1 new file mode 100644 index 0000000..255119a --- /dev/null +++ b/doc/man/man1/spine.1 @@ -0,0 +1,4088 @@ +.TH "spine" "1" "2020-04-05" "0.10.0" "Spine" +.br +.SH NAME +.br +sisu - documents: markup, structuring, publishing in multiple standard formats, and search +.br +.SH SYNOPSIS +.br +sisu [--options] [filename/wildcard] + +.br +sisu --txt --html --epub --odt --pdf --wordmap --sqlite --manpage --texinfo --sisupod --source --qrcode [filename/wildcard] + +.br +sisu --pg (--createdb|update [filename/wildcard]|--dropall) + +.SH SISU - MANUAL, +RALPH AMISSAH + +.SH WHAT IS SISU? + +.SH INTRODUCTION - WHAT IS SISU? + +.BR + +.B SiSU +is a lightweight markup based document creation and publishing framework that +is controlled from the command line. Prepare documents for +.B SiSU +using your text editor of choice, then use +.B SiSU +to generate various output document formats. + +.BR +From a single lightly prepared document (plain-text +.I UTF-8 +) sisu custom builds several standard output formats which share a common (text +object) numbering system for citation of content within a document (that also +has implications for search). The sisu engine works with an abstraction of the +document's structure and content from which it is possible to generate +different forms of representation of the document. +.B SiSU +produces: plain-text, +.I HTML, +.I XHTML, +.I XML, +.I EPUB, +.I ODF: +.I ODT +(Opendocument), +.I LaTeX, +.I PDF, +and populates an +.I SQL +database ( +.I PostgreSQL +or +.I SQLite +) with text objects, roughly, paragraph sized chunks so that document searches +are done at this level of granularity. + +.BR +Outputs share a common citation numbering system, associated with text objects +and any semantic meta-data provided about the document. + +.BR + +.B SiSU +also provides concordance files, document content certificates and manifests of +generated output. Book indexes may be made. + +.BR +Some document markup samples are provided in the package sisu -markup-samples. +Homepages: + +- <http://www.sisudoc.org/> + +- <http://www.jus.uio.no/sisu> + +.SH COMMANDS SUMMARY + +.SH DESCRIPTION + +.BR + +.B SiSU +is a document publishing system, that from a simple single marked-up document, +produces multiple output formats including: +.I plaintext, +.I HTML, +.I XHTML, +.I XML, +.I EPUB, +.I ODT +( +.I OpenDocument +( +.I ODF +) text), +.I LaTeX, +.I PDF, +info, and +.I SQL +( +.I PostgreSQL +and +.I SQLite +) , which share text object numbers ("object citation numbering") and the same +document structure information. For more see: <http://sisudoc.org> or +<http://www.jus.uio.no/sisu> +.SH DOCUMENT PROCESSING COMMAND FLAGS + +.TP +.B --abstraction [path + filename] +run document abstraction +.TP +.B --act[s0-9] [path + filename] +--act0 to --act9 configurable shortcuts for multiple flags, -0 to -9 synonyms, +configure in sisurc.yml; sisu default action on a specified file where no flag +is provided is --act0; --act or --acts for information on current actions +ascribed to --act0 to --act9 +.TP +.B --asciidoc [path + filename] +asciidoc, smart text (not available) +.TP +.B --cgi-search-form-codegen + generate d code search form to search db specfied needs --output=[path] and +--sqlite-db-filename=[cgi search form name] or path to configuration file +--config=[full path to config file] +.TP +.B --cgi-sqlite-search-filename=[filename] +name to give cgi-search form, (it generates a [filename].d file that requires +subsequent compilation) also required is the name of the sqlite db to be +searched by the form. +.TP +.B --concordance [path + filename] +(not implemented) +.TP +.B --config=[path to config file + filename] +.TP +.B --dark + alternative theme for html and epub output, a light (default) theme is + also provided +.TP +.B --digest (not implemented) +.TP +.B --delete [path + filename] +see --zap +.TP +.B --digests [path + filename] +not implemented +.TP +.B --epub [path + filename] +produces an epub document +.TP +.B --harvest [path to files] +extract and present info on authors & topics from document header metadata. +makes two lists of sisu output based on the sisu markup documents in a +directory: list of author and authors works (year and titles), and; list by +topic with titles and author. Makes use of header metadata fields (author, +title, date, topic_register). +.TP +.B --harvest-authors [path to files] +extract and present info on authors from metadata in document headers +.TP +.B --harvest-topics [path to files] +extract and present info on topics from metadata in document headers +.TP +.B --hide-ocn +turn visibility of object numbers off +.TP +.B --html [path + filename] +produces html output in two forms (i) segmented text with table of contents +(toc.html and index.html) and (ii) the document in a single file (scroll.html). +.TP +.B --html-link-harvest +within html output creates link to the document set metadata harvest output +part of --html output instruction and assumes that --harvest has been or will + be run +.TP +.B --html-link-search +within html output creates a search form for submission, requires information +on the name of the search form --search part of --html output instruction it +assumes there is a cgi search form and related document database +.TP +.B --html-scroll [path + filename] +produces html output, the document in a single file (scroll.html) only. Compare +--html-seg and --html +.TP +.B --html-seg [path + filename] +produces html output, segmented text with table of contents (toc.html and +index.html). Compare --html-scroll and --html +.TP +.B --lang=[language code, e.g. =en or =en,es] +provide language code of document +.TP +.B --latex [path + filename] +.I LaTeX +output for different document sizes (a4, a5, b4, letter) and orientations +(portrait, landscape) for downstream (processing and) conversion to pdf, (used +with xetex no direct link between programs provided as this is a much slower +process) +.TP +.B --latex-color-links +monochrome or color links within pdf, toggle (mono better for printing), +the default is mono for portrait and color for landscape documents +.TP +.B --light theme +for html and epub output, default, a dark alternative is provided +.TP +.B --manifest [path + filename] +produces an html summary of output generated (hyperlinked to content) and +document specific metadata (sisu_manifest.html). This step is assumed for most +processing flags. +.TP +.B --markdown [path + filename] +markdown smart text (not available) +.TP +.B --no-* +negate a toggle +.TP +.B --ocn-off +object numbers off (the c in ocn is for citation). See --hide-ocn +.TP +.B --odf [path + filename] +see --odt +.TP +.B --odt [path + filename] +produce open document output +.TP +.B --output=[path to output directories] +where to place document output +.TP +.B --parallel +parallelization on (the default except for sqlite) +.TP +.B --parallel-subprocesses +nested parallelization on (the default except for sqlite) +.TP +.B --papersize-(a4|a5|b5|letter|legal) +in conjunction with --pdf set pdf papersize, overriding any configuration +settings, to set more than one papersize repeat the option --pdf --papersize-a4 +--papersize-letter. See also --papersize=* (NOT implemented) +.BR +.B --papersize=a4,a5,b5,letter,legal +in conjunction with --pdf set pdf papersize, overriding any configuration +settings, to set more than one papersize list after the equal sign with a comma +separator --papersize=a4,letter. See also --papersize-* (NOT implemented) +.TP +.B --pdf [path + filename] +produces +.I LaTeX +see --latex +.TP +.B --pdf-color-links +monochrome or color links within latex for pdf. See --latex-color-links +.TP +.B --pod +markup source bundled in a zip file. +Produces a zipped file of the prepared document specified along with associated +images This provides a quick way of gathering the relevant +parts of a sisu document which can then for example be emailed. A sisupod +includes sisu markup source file, (along with associated documents if a master +file, or available in multilingual versions), together with related images. +(it should be possible in future to run spine commands directly against a pod). +.TP +.B --qrcode [path + filename] +generate QR code image of metadata (used in manifest). (not implemented) +.TP +.B --quiet +quiet less output to terminal. +.TP +.B --section-* +provides finer grain control over which parts of the document are processed +to produce output, toc, body, endnotes, glossary, biblio, bookindex and blurb +.TP +.B --section-biblio +produce document bibliography output, toggle +.TP +.B --section-blurb +produce document blurb output, toggle +.TP +.B --section-body +produce document body output, toggle +.TP +.B --section-bookindex +produce document bookindex output, toggle +.TP +.B --section-endnotes +produce document endnotes output, toggle +.TP +.B --section-endnotes +produce document glossary output, toggle +.TP +.B --serial +serial processing --no-parallel +.TP +.B --show-config +show site and document configuration instructions. Requires path to +configuration file or path to documents to be processed. +.TP +.B --show-make +show document make instructions +.TP +.B --show-metadata +show document metadata +.TP +.B --show-summary +show document summary +.TP +.B --source [path + filename] +document markup source +.TP +.B --sha256 +set hash digest where used to sha256 (not implemented) +.TP +.B --sha512 +set hash digest where used to sha512 (not implemented) +.TP +.B --sqlite-discrete [path + filename] +create a per document sqlite db +.TP +.B --sqlite-db-create --sqlite-db-filename="[db filename]" --output="[output path]" +create a shared db and its tables. Requires a db filename, which may be set in the configuration file or on the command line as shown +.TP +.B --sqlite-db-drop [path + db filename] +drop (remove) db and its tables +.TP +.B --sqlite-db-recreate [path + filename] +drop and re-create a shared db and its tables. Requires a db filename, which may be set in the configuration file or on the command line with --sqlite-db-filename="[db name]" +.TP +.B --sqlite-db-filename="[db name]" +provide name of sqlite db, to be created, dropped, populated or for which a search form is to be made. This information may also be set in the configuration file. +.TP +.B --sqlite-delete [path + filename] +process sqlite output, remove file +.TP +.B --sqlite-insert [path + filename] +process sqlite output, insert file. See --sqlite-update +.TP +.B --sqlite-update [path + filename] +process sqlite output, update file +.TP +.B --source [filename/wildcard] +copies sisu markup file to output directory. Alias -s +.TP +.B --text [filename/wildcard] +produces +.I plaintext +output +(not implemented) +.TP +.B --theme-dark +See --dark +.TP +.B --theme-light +See --light +.TP +.B --txt [filename/wildcard] +produces +.I plaintext +output +(not implemented) +.TP +.B --txt-asciidoc [filename/wildcard] +see --asciidoc +(not implemented) +.TP +.B --txt-markdown [filename/wildcard] +see --markdown +(not implemented) +.TP +.B --txt-rst [filename/wildcard] +see --rst +(not implemented) +.TP +.B --txt-textile [filename/wildcard] +see --textile +(not implemented) +.TP +.B -v +on its own, provides +.B SiSU +version information +.TP +.B -v [filename/wildcard] +see --verbose +.TP +.B --verbose [filename/wildcard] +provides verbose output of what is being generated, where output is placed (and +error messages if any). Alias -v +.TP +.B --very-verbose [filename/wildcard] +provides more verbose output of what is being generated. See --verbose. Alias +-V +.TP +.B --version +spine version +(not implemented) +.TP +.B --xhtml +xhtml output +(not implemented) + +.SH COMMAND LINE MODIFIERS + +.TP +.B --no-ocn +[with --html --pdf or --epub] switches off +.I object citation numbering. +Produce output without identifying numbers in margins of html or +.I LaTeX +/pdf output. +.SH DATABASE COMMANDS + +.BR + +.B dbi - database interface + +.BR + +.B --pg or --pgsql +set for +.I PostgreSQL +.B --sqlite +default set for +.I SQLite +-d is modifiable with --db=[database type (PgSQL or +.I SQLite +) ] +.TP +.B --pg -v --createall +initial step, creates required relations (tables, indexes) in existing +.I PostgreSQL +database (a database should be created manually and given the same name as +working directory, as requested) (rb.dbi) [ -dv --createall +.I SQLite +equivalent] it may be necessary to run sisu -Dv --createdb initially NOTE: at +the present time for +.I PostgreSQL +it may be necessary to manually create the database. The command would be +'createdb [database name]' where database name would be SiSU_[present working +directory name (without path)]. Please use only alphanumerics and underscores. +.TP +.B --pg -v --import +[filename/wildcard] imports data specified to +.I PostgreSQL +db (rb.dbi) [ -dv --import +.I SQLite +equivalent] +.TP +.B --pg -v --update +[filename/wildcard] updates/imports specified data to +.I PostgreSQL +db (rb.dbi) [ -dv --update +.I SQLite +equivalent] +.TP +.B --pg --remove +[filename/wildcard] removes specified data to +.I PostgreSQL +db (rb.dbi) [ -d --remove +.I SQLite +equivalent] +.TP +.B --pg --dropall +kills data" and drops ( +.I PostgreSQL +or +.I SQLite +) db, tables & indexes [ -d --dropall +.I SQLite +equivalent] + +.BR +The -v is for verbose output. +.SH CONFIGURATION + +.BR + +default location: +.TP +~/.dr/config_local_site +.TP +.nf +flag: + act0: "--html" + act1: "--html --epub" +output: + path: "/var/www/html" +default: + language: "en" + papersize: "a4" + text_wrap: "80" + digest: "sha256" +webserv: + http: "http" + domain: "localhost" + data_http: "http" + data_domain: "localhost" + data_root_url: "http://localhost" + data_root_path: "/var/www/html" + data_root_part: "" + images_root_part: "image" + cgi_title: "≅ SiSU Spine search" + cgi_http: "http" + cgi_domain: "localhost" + cgi_bin_url: "http://localhost/cgi-bin" + cgi_bin_part: "cgi-bin" + cgi_bin_path: "/usr/lib/cgi-bin" + cgi_search_script: "spine-search" + cgi_search_script_raw_fn_d: "spine_search.d" + cgi_port: "" + cgi_user: "" + cgi_action: "http://localhost/cgi-bin/spine-search" + db_sqlite: "spine.search.db" + db_pg_table: "" + db_pg_user: "" +.fi + +.BR +.SH SAMPLE POD DIRECTORY STRUCTURE +.BR +.TP +.nf + +pod (directory may contain multiple documents) + └── the_wealth_of_networks.yochai_benkler + ├── conf + │ └── sisu_document_make + ├── media + │ ├── image + │ │ ├── won_benkler_2_1.png + │ │ ├── won_benkler_6_1.png + │ │ ├── won_benkler_7_1.png + │ │ ├── won_benkler_7_2.png + │ │ ├── won_benkler_7_3a.png + │ │ ├── won_benkler_7_3b.png + │ │ ├── won_benkler_7_4.png + │ │ ├── won_benkler_7_5.png + │ │ ├── won_benkler_7_6.png + │ │ └── won_benkler_9_1.png + │ └── text + │ └── en + │ └── the_wealth_of_networks.yochai_benkler.sst + └── pod.manifest + +.fi +.SH COMMAND LINE EXAMPLES + +.TP +note: ~webDocRoot should be the path to web doc root, provide a suitable output path. +.TP +spine -v --html --html-link-search --html-link-harvest --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* +.TP +spine -v --html --html-link-search --html-link-harvest --epub --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* +.TP +spine -v --sqlite-db-create --sqlite-db-filename="spine.search.db" --output=`echo ~webDocRoot` ~spineMarkupSamples/pod +.TP +spine -v --sqlite-db-create ~spineMarkupSamples/pod +.TP +spine -v --sqlite-update --sqlite-db-filename="spine.search.db" --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* +.TP +spine -v --sqlite-update ~spineMarkupSamples/pod/* +.TP +spine -v --show-config +.TP +spine -v --show-config --config= ~spineMarkupSamples/pod/.dr/config_local_site_test +.TP +spine -v --show-config --config=~spineMarkupSamples/pod/.dr +.TP +spine -v --cgi-search-form-codegen --config=~spineMarkupSamples/pod/.dr/config_local +.TP +cd ~webDocRoot/cgi +.TP +dub --force --compiler=ldc2 && sudo cp -v cgi-bin/spine-search /usr/lib/cgi-bin/. +.TP + +.BR +Running sisu (alone without any flags, filenames or wildcards) brings up the +interactive help, as does any sisu command that is not recognised. Enter to +escape. +.SH HELP + +.SH SISU MANUAL + + +.BR +The most up to date information on sisu should be contained in the sisu_manual, +available at: + +.BR + <http://sisudoc.org/sisu/sisu_manual/> + +.BR +The manual can be generated from source, found respectively, either within the +.B SiSU +tarball or installed locally at: + +.BR + ./data/doc/sisu/markup-samples/sisu_manual + +.BR + /usr/share/doc/sisu/markup-samples/sisu_manual + +.BR +move to the respective directory and type e.g.: + +.BR + sisu sisu_manual.ssm +.SH SISU MAN PAGES + + +.BR +If +.B SiSU +is installed on your system usual man commands should be available, try: + +.BR + man sisu + +.BR +Most +.B SiSU +man pages are generated directly from sisu documents that are used to prepare +the sisu manual, the sources files for which are located within the +.B SiSU +tarball at: + +.BR + ./data/doc/sisu/markup-samples/sisu_manual + +.BR +Once installed, directory equivalent to: + +.BR + /usr/share/doc/sisu/markup-samples/sisu_manual + +.BR +Available man pages are converted back to html using man2html: + +.BR + /usr/share/doc/sisu/html/ + +.BR + ./data/doc/sisu/html + +.BR +An online version of the sisu man page is available here: + +.BR + +- various sisu man pages <http://www.jus.uio.no/sisu/man/> [^1] + +.BR +- sisu.1 <http://www.jus.uio.no/sisu/man/sisu.1.html> [^2] +.SH SISU BUILT-IN INTERACTIVE HELP, [DISCONTINUED] + + +.BR +This fell out of date and has been discontinued. +.SH INTRODUCTION TO SISU MARKUP[^3] + +.SH SUMMARY + +.BR + +.B SiSU +source documents are +.I plaintext +( +.I UTF-8 +)[^4] files + +.BR +All paragraphs are separated by an empty line. + +.BR +Markup is comprised of: + +.BR +- at the top of a document, the document header made up of semantic meta-data +about the document and if desired additional processing instructions (such an +instruction to automatically number headings from a particular level down) + +.BR +- followed by the prepared substantive text of which the most important single +characteristic is the markup of different heading levels, which define the +primary outline of the document structure. Markup of substantive text includes: + +.BR + * heading levels defines document structure + +.BR + * text basic attributes, italics, bold etc. + +.BR + * grouped text (objects), which are to be treated differently, such as code + blocks or poems. + +.BR + * footnotes/endnotes + +.BR + * linked text and images + +.BR + * paragraph actions, such as indent, bulleted, numbered-lists, etc. +.SH MARKUP RULES, DOCUMENT STRUCTURE AND METADATA REQUIREMENTS + + +.BR +minimal content/structure requirement: + +.BR +[metadata] +.nf +A~ (level A [title]) + +1~ (at least one level 1 [segment/(chapter)]) +.fi + + +.BR +structure rules (document heirarchy, heading levels): + +.BR +there are two sets of heading levels ABCD (title & parts if any) and 123 +(segment & subsegments if any) + +.BR +sisu has the fllowing levels: +.nf +A~ [title] . + required (== 1) followed by B~ or 1~ +B~ [part] * + followed by C~ or 1~ +C~ [subpart] * + followed by D~ or 1~ +D~ [subsubpart] * + followed by 1~ +1~ [segment (chapter)] + + required (>= 1) followed by text or 2~ +text * + followed by more text or 1~, 2~ + or relevant part *() +2~ [subsegment] * + followed by text or 3~ +text * + followed by more text or 1~, 2~ or 3~ + or relevant part, see *() +3~ [subsubsegment] * + followed by text +text * + followed by more text or 1~, 2~ or 3~ or relevant part, see *() + +*(B~ if none other used; + if C~ is last used: C~ or B~; + if D~ is used: D~, C~ or B~) +.fi + +.nf +- level A~ is the tile and is mandatory +- there can only be one level A~ + +- heading levels BCD, are optional and there may be several of each + (where all three are used corresponding to e.g. Book Part Section) + * sublevels that are used must follow each other sequentially + (alphabetically), +- heading levels A~ B~ C~ D~ are followed by other heading levels rather + than substantive text + which may be the subsequent sequential (alphabetic) heading part level + or a heading (segment) level 1~ +- there must be at least one heading (segment) level 1~ + (the level on which the text is segmented, in a book would correspond + to the Chapter level) +- additional heading levels 1~ 2~ 3~ are optional and there may be several + of each +- heading levels 1~ 2~ 3~ are followed by text (which may be followed by + the same heading level) + and/or the next lower numeric heading level (followed by text) + or indeed return to the relevant part level + (as a corollary to the rules above substantive text/ content + must be preceded by a level 1~ (2~ or 3~) heading) +.fi + +.SH MARKUP EXAMPLES + +.SH ONLINE + + +.BR +Online markup examples are available together with the respective outputs +produced from <http://www.jus.uio.no/sisu/SiSU/examples.html> or from +<http://www.jus.uio.no/sisu/sisu_examples/> + +.BR +There is of course this document, which provides a cursory overview of sisu +markup and the respective output produced: +<http://www.jus.uio.no/sisu/sisu_markup/> + +.BR +an alternative presentation of markup syntax: +/usr/share/doc/sisu/on_markup.txt.gz +.SH INSTALLED + + +.BR +With +.B SiSU +installed sample skins may be found in: /usr/share/doc/sisu/markup-samples (or +equivalent directory) and if sisu -markup-samples is installed also under: +/usr/share/doc/sisu/markup-samples-non-free + +.SH MARKUP OF HEADERS + +.BR +Headers contain either: semantic meta-data about a document, which can be used +by any output module of the program, or; processing instructions. + +.BR +Note: the first line of a document may include information on the markup +version used in the form of a comment. Comments are a percentage mark at the +start of a paragraph (and as the first character in a line of text) followed by +a space and the comment: +.nf +% this would be a comment +.fi + +.SH SAMPLE HEADER + + +.BR +This current document is loaded by a master document that has a header similar +to this one: +.nf +% SiSU master 4.0 + +title: SiSU + subtitle: Manual + +creator: + author: Amissah, Ralph + +publisher: [publisher name] + +rights: Copyright (C) Ralph Amissah 2007, part of SiSU documentation, License GPL 3 + +classify: + topic_register: SiSU:manual;electronic documents:SiSU:manual + subject: ebook, epublishing, electronic book, electronic publishing, + electronic document, electronic citation, data structure, + citation systems, search + +% used_by: manual + +date: + published: 2008-05-22 + created: 2002-08-28 + issued: 2002-08-28 + available: 2002-08-28 + modified: 2010-03-03 + +make: + num_top: 1 + breaks: new=C; break=1 + bold: /Gnu|Debian|Ruby|SiSU/ + home_button_text: {SiSU}http://sisudoc.org; {git}http://git.sisudoc.org + footer: {SiSU}http://sisudoc.org; {git}http://git.sisudoc.org + manpage: name=sisu - documents: markup, structuring, publishing in multiple standard formats, and search; + synopsis=sisu [-abcDdeFhIiMmNnopqRrSsTtUuVvwXxYyZz0-9] [filename/wildcard ] + . sisu [-Ddcv] [instruction] + . sisu [-CcFLSVvW] + +@links: + { SiSU Homepage }http://www.sisudoc.org/ + { SiSU Manual }http://www.sisudoc.org/sisu/sisu_manual/ + { Book Samples & Markup Examples }http://www.jus.uio.no/sisu/SiSU/examples.html + { SiSU Download }http://www.jus.uio.no/sisu/SiSU/download.html + { SiSU Changelog }http://www.jus.uio.no/sisu/SiSU/changelog.html + { SiSU Git repo }http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary + { SiSU List Archives }http://lists.sisudoc.org/pipermail/sisu/ + { SiSU @ Debian }http://packages.qa.debian.org/s/sisu.html + { SiSU Project @ Debian }http://qa.debian.org/developer.php?login=sisu@lists.sisudoc.org + { SiSU @ Wikipedia }http://en.wikipedia.org/wiki/SiSU +.fi + +.SH AVAILABLE HEADERS + + +.BR +Header tags appear at the beginning of a document and provide meta information +on the document (such as the +.I Dublin Core +) , or information as to how the document as a whole is to be processed. All +header instructions take the form @headername: or on the next line and indented +by once space :subheadername: All +.I Dublin Core +meta tags are available + +.BR + +.B @identifier: +information or instructions + +.BR +where the "identifier" is a tag recognised by the program, and the +"information" or "instructions" belong to the tag/identifier specified + +.BR +Note: a header where used should only be used once; all headers apart from +@title: are optional; the @structure: header is used to describe document +structure, and can be useful to know. + +.BR +This is a sample header +.nf +% SiSU 2.0 [declared file-type identifier with markup version] +.fi + +.nf +@title: [title text] [this header is the only one that is mandatory] + subtitle: [subtitle if any] + language: English +.fi + +.nf +creator: + author: [Lastname, First names] + illustrator: [Lastname, First names] + translator: [Lastname, First names] + prepared_by: [Lastname, First names] +.fi + +.nf +date: + published: [year or yyyy-mm-dd] + created: [year or yyyy-mm-dd] + issued: [year or yyyy-mm-dd] + available: [year or yyyy-mm-dd] + modified: [year or yyyy-mm-dd] + valid: [year or yyyy-mm-dd] + added_to_site: [year or yyyy-mm-dd] + translated: [year or yyyy-mm-dd] +.fi + +.nf +rights: + copyright: Copyright (C) [Year and Holder] + license: [Use License granted] + text: [Year and Holder] + translation: [Name, Year] + illustrations: [Name, Year] +.fi + +.nf +classify: + topic_register: SiSU:markup sample:book;book:novel:fantasy + type: + subject: + description: + keywords: + abstract: + loc: [Library of Congress classification] + dewey: [Dewey classification +.fi + +.nf +identify: + :isbn: [ISBN] + :oclc: +.fi + +.nf +links: { SiSU }http://www.sisudoc.org + { FSF }http://www.fsf.org +.fi + +.nf +make: + num_top: 1 + headings: [text to match for each level + (e.g. PART; Chapter; Section; Article; or another: none; BOOK|FIRST|SECOND; none; CHAPTER;) + breaks: new=:C; break=1 + promo: sisu, ruby, sisu_search_libre, open_society + bold: [regular expression of words/phrases to be made bold] + italics: [regular expression of words/phrases to italicise] + home_button_text: {SiSU}http://sisudoc.org; {git}http://git.sisudoc.org + footer: {SiSU}http://sisudoc.org; {git}http://git.sisudoc.org +.fi + +.nf +original: + language: [language] +.fi + +.nf +notes: + comment: + prefix: [prefix is placed just after table of contents] +.fi + +.SH MARKUP OF SUBSTANTIVE TEXT + +.SH HEADING LEVELS + + +.BR +Heading levels are :A~ ,:B~ ,:C~ ,1~ ,2~ ,3~ ... :A - :C being part / section +headings, followed by other heading levels, and 1 -6 being headings followed by +substantive text or sub-headings. :A~ usually the title :A~? conditional level +1 heading (used where a stand-alone document may be imported into another) + +.BR + +.B :A~ [heading text] +Top level heading [this usually has similar content to the title @title: ] +NOTE: the heading levels described here are in 0.38 notation, see heading + +.BR + +.B :B~ [heading text] +Second level heading [this is a heading level divider] + +.BR + +.B :C~ [heading text] +Third level heading [this is a heading level divider] + +.BR + +.B 1~ [heading text] +Top level heading preceding substantive text of document or sub-heading 2, the +heading level that would normally be marked 1. or 2. or 3. etc. in a document, +and the level on which sisu by default would break html output into named +segments, names are provided automatically if none are given (a number), +otherwise takes the form 1~my_filename_for_this_segment + +.BR + +.B 2~ [heading text] +Second level heading preceding substantive text of document or sub-heading 3 , +the heading level that would normally be marked 1.1 or 1.2 or 1.3 or 2.1 etc. +in a document. + +.BR + +.B 3~ [heading text] +Third level heading preceding substantive text of document, that would normally +be marked 1.1.1 or 1.1.2 or 1.2.1 or 2.1.1 etc. in a document +.nf +1~filename level 1 heading, + +% the primary division such as Chapter that is followed by substantive text, and may be further subdivided (this is the level on which by default html segments are made) +.fi + +.SH FONT ATTRIBUTES + +.BR + +.B markup example: +.nf +normal text, *{emphasis}*, !{bold text}!, /{italics}/, _{underscore}_, "{citation}", +^{superscript}^, ,{subscript},, +{inserted text}+, -{strikethrough}-, #{monospace}# + +normal text + +*{emphasis}* [note: can be configured to be represented by bold, italics or underscore] + +!{bold text}! + +/{italics}/ + +_{underscore}_ + +"{citation}" + +^{superscript}^ + +,{subscript}, + ++{inserted text}+ + +-{strikethrough}- + +#{monospace}# +.fi + + +.BR + +.B resulting output: + +.BR +normal text, +.B emphasis, +.B bold text +, +.I italics, +.I underscore, +"citation", ^superscript^, [subscript], ++inserted text++, --strikethrough--, +monospace + +.BR +normal text + +.BR + +.B emphasis +[note: can be configured to be represented by bold, italics or underscore] + +.BR + +.B bold text + +.BR + +.I italics + +.BR +.I underscore + +.BR +"citation" + +.BR +^superscript^ + +.BR +[subscript] + +.BR +++inserted text++ + +.BR +--strikethrough-- + +.BR +monospace +.SH INDENTATION AND BULLETS + + +.BR + +.B markup example: +.nf +ordinary paragraph + +_1 indent paragraph one step + +_2 indent paragraph two steps + +_9 indent paragraph nine steps +.fi + + +.BR + +.B resulting output: + +.BR +ordinary paragraph + +.BR + indent paragraph one step + +.BR + indent paragraph two steps + +.BR + indent paragraph nine steps + +.BR + +.B markup example: +.nf +_* bullet text + +_1* bullet text, first indent + +_2* bullet text, two step indent +.fi + + +.BR + +.B resulting output: + +.BR +- bullet text + +.BR + * bullet text, first indent + +.BR + * bullet text, two step indent + +.BR +Numbered List (not to be confused with headings/titles, (document structure)) + +.BR + +.B markup example: +.nf +# numbered list numbered list 1., 2., 3, etc. + +_# numbered list numbered list indented a., b., c., d., etc. +.fi + +.SH HANGING INDENTS + + +.BR + +.B markup example: +.nf +_0_1 first line no indent, +rest of paragraph indented one step + +_1_0 first line indented, +rest of paragraph no indent + +in each case level may be 0-9 +.fi + + +.BR + +.B resulting output: + +.BR +first line no indent, rest of paragraph indented one step; first line no + indent, rest of paragraph indented one step; first line no indent, rest of + paragraph indented one step; first line no indent, rest of paragraph indented + one step; first line no indent, rest of paragraph indented one step; first + line no indent, rest of paragraph indented one step; first line no indent, + rest of paragraph indented one step; first line no indent, rest of paragraph + indented one step; first line no indent, rest of paragraph indented one step; + +.BR +A regular paragraph. + +.BR +first line indented, rest of paragraph no indent first line indented, rest of +paragraph no indent first line indented, rest of paragraph no indent first line +indented, rest of paragraph no indent first line indented, rest of paragraph no +indent first line indented, rest of paragraph no indent first line indented, +rest of paragraph no indent first line indented, rest of paragraph no indent +first line indented, rest of paragraph no indent first line indented, rest of +paragraph no indent first line indented, rest of paragraph no indent + +.BR +in each case level may be 0-9 + +.BR + +.B live-build + A collection of scripts used to build customized +.B Debian + Livesystems. + .I live-build + was formerly known as live-helper, and even earlier known as live-package. + +.BR + +.B live-build + + A collection of scripts used to build customized +.B Debian + Livesystems. +.I live-build + was formerly known as live-helper, and even earlier known as live-package. +.SH FOOTNOTES / ENDNOTES + + +.BR +Footnotes and endnotes are marked up at the location where they would be +indicated within a text. They are automatically numbered. The output type +determines whether footnotes or endnotes will be produced + +.BR + +.B markup example: +.nf +~{ a footnote or endnote }~ +.fi + + +.BR + +.B resulting output: + +.BR +[^5] + +.BR + +.B markup example: +.nf +normal text~{ self contained endnote marker & endnote in one }~ continues +.fi + + +.BR + +.B resulting output: + +.BR +normal text[^6] continues + +.BR + +.B markup example: +.nf +normal text ~{* unnumbered asterisk footnote/endnote, insert multiple asterisks if required }~ continues + +normal text ~{** another unnumbered asterisk footnote/endnote }~ continues +.fi + + +.BR + +.B resulting output: + +.BR +normal text [^*] continues + +.BR +normal text [^**] continues + +.BR + +.B markup example: +.nf +normal text ~[* editors notes, numbered asterisk footnote/endnote series ]~ continues + +normal text ~[+ editors notes, numbered plus symbol footnote/endnote series ]~ continues +.fi + + +.BR + +.B resulting output: + +.BR +normal text [^*3] continues + +.BR +normal text [^+2] continues + +.BR + +.B Alternative endnote pair notation for footnotes/endnotes: +.nf +% note the endnote marker "~^" + +normal text~^ continues + +^~ endnote text following the paragraph in which the marker occurs +.fi + + +.BR +the standard and pair notation cannot be mixed in the same document +.SH LINKS + +.SH NAKED URLS WITHIN TEXT, DEALING WITH URLS + + +.BR +urls found within text are marked up automatically. A url within text is +automatically hyperlinked to itself and by default decorated with angled +braces, unless they are contained within a code block (in which case they are +passed as normal text), or escaped by a preceding underscore (in which case the +decoration is omitted). + +.BR + +.B markup example: +.nf +normal text http://www.sisudoc.org/ continues +.fi + + +.BR + +.B resulting output: + +.BR +normal text <http://www.sisudoc.org/> continues + +.BR +An escaped url without decoration + +.BR + +.B markup example: +.nf +normal text _http://www.sisudoc.org/ continues + +deb _http://www.jus.uio.no/sisu/archive unstable main non-free +.fi + + +.BR + +.B resulting output: + +.BR +normal text <_http://www.sisudoc.org/> continues + +.BR +deb <_http://www.jus.uio.no/sisu/archive> unstable main non-free + +.BR +where a code block is used there is neither decoration nor hyperlinking, code +blocks are discussed later in this document + +.BR + +.B resulting output: +.nf +deb http://www.jus.uio.no/sisu/archive unstable main non-free +deb-src http://www.jus.uio.no/sisu/archive unstable main non-free +.fi + +.SH LINKING TEXT + + +.BR +To link text or an image to a url the markup is as follows + +.BR + +.B markup example: +.nf +about { SiSU }http://url.org markup +.fi + + +.BR + +.B resulting output: + +.BR +aboutSiSU <http://www.sisudoc.org/> markup + +.BR +A shortcut notation is available so the url link may also be provided +automatically as a footnote + +.BR + +.B markup example: +.nf +about {~^ SiSU }http://url.org markup +.fi + + +.BR + +.B resulting output: + +.BR +aboutSiSU <http://www.sisudoc.org/> [^7] markup + +.BR +Internal document links to a tagged location, including an ocn + +.BR + +.B markup example: +.nf +about { text links }#link_text +.fi + + +.BR + +.B resulting output: + +.BR +about ⌠text links⌡⌈link_text⌋ + +.BR +Shared document collection link + +.BR + +.B markup example: +.nf +about { SiSU book markup examples }:SiSU/examples.html +.fi + + +.BR + +.B resulting output: + +.BR +about ⌠ +.B SiSU +book markup examples⌡⌈:SiSU/examples.html⌋ +.SH LINKING IMAGES + + +.BR + +.B markup example: +.nf +{ tux.png 64x80 }image + +% various url linked images + +{tux.png 64x80 "a better way" }http://www.sisudoc.org/ + +{GnuDebianLinuxRubyBetterWay.png 100x101 "Way Better - with Gnu/Linux, Debian and Ruby" }http://www.sisudoc.org/ + +{~^ ruby_logo.png "Ruby" }http://www.ruby-lang.org/en/ +.fi + + +.BR + +.B resulting output: + +.BR +[ tux.png ] + +.BR +tux.png 64x80 "Gnu/Linux - a better way" <http://www.sisudoc.org/> + +.BR +GnuDebianLinuxRubyBetterWay.png 100x101 "Way Better - with Gnu/Linux, Debian +and Ruby" <http://www.sisudoc.org/> + +.BR +ruby_logo.png 70x90 "Ruby" <http://www.ruby-lang.org/en/> [^8] + +.BR + +.B linked url footnote shortcut +.nf +{~^ [text to link] }http://url.org + +% maps to: { [text to link] }http://url.org ~{ http://url.org }~ + +% which produces hyper-linked text within a document/paragraph, with an endnote providing the url for the text location used in the hyperlink +.fi + +.nf +text marker *~name +.fi + + +.BR +note at a heading level the same is automatically achieved by providing names +to headings 1, 2 and 3 i.e. 2~[name] and 3~[name] or in the case of +auto-heading numbering, without further intervention. +.SH LINK SHORTCUT FOR MULTIPLE VERSIONS OF A SISU DOCUMENT IN THE SAME DIRECTORY +TREE + + +.BR + +.B markup example: +.nf +!_ /{"Viral Spiral"}/, David Bollier + +{ "Viral Spiral", David Bollier [3sS]}viral_spiral.david_bollier.sst +.fi + + +.BR + +.B +.I "Viral Spiral", +David Bollier +"Viral Spiral", David Bollier <http://corundum/sisu_manual/en/manifest/viral_spiral.david_bollier.html> + document manifest <http://corundum/sisu_manual/en/manifest/viral_spiral.david_bollier.html> + ⌠html, segmented text⌡「http://corundum/sisu_manual/en/html/viral_spiral.david_bollier.html」 + ⌠html, scroll, document in one⌡「http://corundum/sisu_manual/en/html/viral_spiral.david_bollier.html」 + ⌠epub⌡「http://corundum/sisu_manual/en/epub/viral_spiral.david_bollier.epub」 + ⌠pdf, landscape⌡「http://corundum/sisu_manual/en/pdf/viral_spiral.david_bollier.pdf」 + ⌠pdf, portrait⌡「http://corundum/sisu_manual/en/pdf/viral_spiral.david_bollier.pdf」 + ⌠odf: odt, open document text⌡「http://corundum/sisu_manual/en/odt/viral_spiral.david_bollier.odt」 + ⌠xhtml scroll⌡「http://corundum/sisu_manual/en/xhtml/viral_spiral.david_bollier.xhtml」 + ⌠xml, sax⌡「http://corundum/sisu_manual/en/xml/viral_spiral.david_bollier.xml」 + ⌠xml, dom⌡「http://corundum/sisu_manual/en/xml/viral_spiral.david_bollier.xml」 + ⌠concordance⌡「http://corundum/sisu_manual/en/html/viral_spiral.david_bollier.html」 + ⌠dcc, document content certificate (digests)⌡「http://corundum/sisu_manual/en/digest/viral_spiral.david_bollier.txt」 + ⌠markup source text⌡「http://corundum/sisu_manual/en/src/viral_spiral.david_bollier.sst」 + ⌠markup source (zipped) pod⌡「http://corundum/sisu_manual/en/pod/viral_spiral.david_bollier.sst.zip」 + +.SH GROUPED TEXT / BLOCKED TEXT + + +.BR +There are two markup syntaxes for blocked text, using curly braces or using +tics +.SH BLOCKED TEXT CURLY BRACE SYNTAX + + +.BR +at the start of a line on its own use name of block type with an opening curly +brace, follow with the content of the block, and close with a closing curly +brace and the name of the block type, e.g. +.nf +code{ + +this is a code block + +}code +.fi + +.nf + +poem{ + +this here is a poem + +}poem +.fi + +.SH BLOCKED TEXT TIC SYNTAX + +.nf +``` code +this is a code block + +``` + +``` poem +this here is a poem + +``` +.fi + + +.BR +start a line with three backtics, a space followed by the name of the name of +block type, follow with the content of the block, and close with three back +ticks on a line of their own, e.g. +.SH TABLES + + +.BR +Tables may be prepared in two either of two forms + +.BR + +.B markup example: +.nf +table{ c3; 40; 30; 30; + +This is a table +this would become column two of row one +column three of row one is here + +And here begins another row +column two of row two +column three of row two, and so on + +}table +.fi + + +.BR + +.B resulting output: +This is a table|this would become column two of row one|column three of row one is here』And here begins another row|column two of row two|column three of row two, and so on』 + + +.BR +a second form may be easier to work with in cases where there is not much +information in each column + +.BR + +.B markup example: +[^9] +.nf +!_ Table 3.1: Contributors to Wikipedia, January 2001 - June 2005 + +{table~h 24; 12; 12; 12; 12; 12; 12;} + |Jan. 2001|Jan. 2002|Jan. 2003|Jan. 2004|July 2004|June 2006 +Contributors* | 10| 472| 2,188| 9,653| 25,011| 48,721 +Active contributors** | 9| 212| 846| 3,228| 8,442| 16,945 +Very active contributors*** | 0| 31| 190| 692| 1,639| 3,016 +No. of English language articles| 25| 16,000| 101,000| 190,000| 320,000| 630,000 +No. of articles, all languages | 25| 19,000| 138,000| 490,000| 862,000|1,600,000 + +- Contributed at least ten times; ** at least 5 times in last month; *** more than 100 times in last month. +.fi + + +.BR + +.B resulting output: + +.BR + +.B Table 3.1: Contributors to Wikipedia, January 2001 - June 2005 +|Jan. 2001|Jan. 2002|Jan. 2003|Jan. 2004|July 2004|June 2006』Contributors*|10|472|2,188|9,653|25,011|48,721』Active contributors**|9|212|846|3,228|8,442|16,945』Very active contributors***|0|31|190|692|1,639|3,016』No. of English language articles|25|16,000|101,000|190,000|320,000|630,000』No. of articles, all languages|25|19,000|138,000|490,000|862,000|1,600,000』 + + +.BR +- Contributed at least ten times; ** at least 5 times in last month; *** more +than 100 times in last month. +.SH POEM + + +.BR + +.B basic markup: +.nf +poem{ + + Your poem here + +}poem + +Each verse in a poem is given an object number. +.fi + + +.BR + +.B markup example: +.nf +poem{ + + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' + +}poem +.fi + + +.BR + +.B resulting output: + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' + + +.SH GROUP + + +.BR + +.B basic markup: +.nf +group{ + + Your grouped text here + +}group + +A group is treated as an object and given a single object number. +.fi + + +.BR + +.B markup example: +.nf +group{ + + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' + +}group +.fi + + +.BR + +.B resulting output: + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' + + +.SH CODE + + +.BR +Code tags code{ ... }code (used as with other group tags described above) are +used to escape regular sisu markup, and have been used extensively within this +document to provide examples of +.B SiSU +markup. You cannot however use code tags to escape code tags. They are however +used in the same way as group or poem tags. + +.BR +A code-block is treated as an object and given a single object number. [an +option to number each line of code may be considered at some later time] + +.BR + +.B use of code tags instead of poem compared, resulting output: +.nf + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' +.fi + + +.BR +From +.B SiSU +2.7.7 on you can number codeblocks by placing a hash after the opening code tag +code{# as demonstrated here: +.nf +1 | `Fury said to a +2 | mouse, That he +3 | met in the +4 | house, +5 | "Let us +6 | both go to +7 | law: I will +8 | prosecute +9 | YOU. --Come, +10 | I'll take no +11 | denial; We +12 | must have a +13 | trial: For +14 | really this +15 | morning I've +16 | nothing +17 | to do." +18 | Said the +19 | mouse to the +20 | cur, "Such +21 | a trial, +22 | dear Sir, +23 | With +24 | no jury +25 | or judge, +26 | would be +27 | wasting +28 | our +29 | breath." +30 | "I'll be +31 | judge, I'll +32 | be jury," +33 | Said +34 | cunning +35 | old Fury: +36 | "I'll +37 | try the +38 | whole +39 | cause, +40 | and +41 | condemn +42 | you +43 | to +44 | death."' +.fi + +.SH ADDITIONAL BREAKS - LINEBREAKS WITHIN OBJECTS, COLUMN AND PAGE-BREAKS + +.SH LINE-BREAKS + + +.BR +To break a line within a "paragraph object", two backslashes \e\e +with a space before and a space or newline after them +may be used. +.nf +To break a line within a "paragraph object", +two backslashes \e\e with a space before +and a space or newline after them \e\e +may be used. +.fi + + +.BR +The html break br enclosed in angle brackets (though undocumented) is available +in versions prior to 3.0.13 and 2.9.7 (it remains available for the time being, +but is depreciated). + +.BR +To draw a dividing line dividing paragraphs, see the section on page breaks. +.SH PAGE BREAKS + + +.BR +Page breaks are only relevant and honored in some output formats. A page break +or a new page may be inserted manually using the following markup on a line on +its own: + +.BR +page new =\e= breaks the page, starts a new page. + +.BR +page break -\- breaks a column, starts a new column, if using columns, else +breaks the page, starts a new page. + +.BR +page break line across page -..- draws a dividing line, dividing paragraphs + +.BR +page break: +.nf +-\e\e- +.fi + + +.BR +page (break) new: +.nf +=\e\e= +.fi + + +.BR +page (break) line across page (dividing paragraphs): +.nf +-..- +.fi + +.SH BIBLIOGRAPHY / REFERENCES + + +.BR +There are three ways to prepare a bibliography using sisu (which are mutually +exclusive): (i) manually preparing and marking up as regular text in sisu a +list of references, this is treated as a regular document segment (and placed +before endnotes if any); (ii) preparing a bibliography, marking a heading level +1~!biblio (note the exclamation mark) and preparing a bibliography using +various metadata tags including for author: title: year: a list of which is +provided below, or; (iii) as an assistance in preparing a bibliography, marking +a heading level 1~!biblio and tagging citations within footnotes for inclusion, +identifying citations and having a parser attempt to extract them and build a +bibliography of the citations provided. + +.BR +For the heading/section sequence: endnotes, bibliography then book index to +occur, the name biblio or bibliography must be given to the bibliography +section, like so: +.nf +1~!biblio~ [Note: heading marker::required title missing] +.fi + +.SH A MARKUP TAGGED METADATA BIBLIOGRAPHY SECTION + + +.BR +Here instead of writing your full citations directly in footnotes, each time +you have new material to cite, you add it to your bibliography section (if it +has not been added yet) providing the information you need against an available +list of tags (provided below). + +.BR +The required tags are au: ti: and year: [^10] an short quick example might be +as follows: +.nf +1~!biblio~ [Note: heading marker::required title missing] + +au: von Hippel, E. +ti: Perspective: User Toolkits for Innovation +lng: (language) +jo: Journal of Product Innovation Management +vo: 18 +ed: (editor) +yr: 2001 +note: +sn: Hippel, /{User Toolkits}/ (2001) +id: vHippel_2001 +% form: + +au: Benkler, Yochai +ti: The Wealth of Networks +st: How Social Production Transforms Markets and Freedom +lng: (language) +pb: Harvard University Press +edn: (edition) +yr: 2006 +pl: U.S. +url: http://cyber.law.harvard.edu/wealth_of_networks/Main_Page +note: +sn: Benkler, /{Wealth of Networks}/ (2006) +id: Benkler2006 + +au: Quixote, Don; Panza, Sancho +ti: Taming Windmills, Keeping True +jo: Imaginary Journal +yr: 1605 +url: https://en.wikipedia.org/wiki/Don_Quixote +note: made up to provide an example of author markup for an article with two authors +sn: Quixote & Panza, /{Taming Windmills}/ (1605) +id: quixote1605 +.fi + + +.BR +Note that the section name !biblio (or !bibliography) is required for the +bibliography to be treated specially as such, and placed after the +auto-generated endnote section. + +.BR +Using this method, work goes into preparing the bibliography, the tags author +or editor, year and title are required and will be used to sort the +bibliography that is placed under the Bibliography section + +.BR +The metadata tags may include shortname (sn:) and id, if provided, which are +used for substitution within text. Every time the given id is found within the +text it will be replaced by the given short title of the work (it is for this +reason the short title has sisu markup to italicize the title), it should work +with any page numbers to be added, the short title should be one that can +easily be used to look up the full description in the bibliography. +.nf +The following footnote~{ quixote1605, pp 1000 - 1001, also Benkler2006 p 1. }~ +.fi + + +.BR +would be presented as: + +.BR +Quixote and Panza, +.I Taming Windmills +(1605), pp 1000 - 1001 also, Benkler, +.I Wealth of Networks, +(2006) p 1 or rather[^11] +.nf +au: author Surname, FirstNames (if multiple semi-colon separator) + (required unless editor to be used instead) +ti: title (required) +st: subtitle +jo: journal +vo: volume +ed: editor (required if author not provided) +tr: translator +src: source (generic field where others are not appropriate) +in: in (like src) +pl: place/location (state, country) +pb: publisher +edn: edition +yr: year (yyyy or yyyy-mm or yyyy-mm-dd) (required) +pg: pages +url: http://url +note: note +id: create_short_identifier e.g. authorSurnameYear + (used in substitutions: when found within text will be + replaced by the short name provided) +sn: short name e.g. Author, /{short title}/, Year + (used in substitutions: when an id is found within text + the short name will be used to replace it) +.fi + +.SH TAGGING CITATIONS FOR INCLUSION IN THE BIBLIOGRAPHY + + +.BR +Here whenever you make a citation that you wish be included in the +bibliography, you tag the citation as such using special delimiters (which are +subsequently removed from the final text produced by sisu) + +.BR +Here you would write something like the following, either in regular text or a +footnote +.nf +See .: Quixote, Don; Panza, Sancho /{Taming Windmills, Keeping True}/ (1605) :. +.fi + + +.BR + +.B SiSU +will parse for a number of patterns within the delimiters to try make out the +authors, title, date etc. and from that create a Bibliography. This is more +limited than the previously described method of preparing a tagged +bibliography, and using an id within text to identify the work, which also +lends itself to greater consistency. +.SH GLOSSARY + + +.BR +Using the section name 1~!glossary results in the Glossary being treated +specially as such, and placed after the auto-generated endnote section (before +the bibliography/list of references if there is one). + +.BR +The Glossary is ordinary text marked up in a manner deemed suitable for that +purpose. e.g. with the term in bold, possibly with a hanging indent. +.nf +1~!glossary~ [Note: heading marker::required title missing] + +_0_1 *{GPL}* An abbreviation that stands for "General Purpose License." ... + +_0_1 [provide your list of terms and definitions] +.fi + + +.BR +In the given example the first line is not indented subsequent lines are by one +level, and the term to be defined is in bold text. +.SH BOOK INDEX + + +.BR +To make an index append to paragraph the book index term relates to it, using +an equal sign and curly braces. + +.BR +Currently two levels are provided, a main term and if needed a sub-term. +Sub-terms are separated from the main term by a colon. +.nf + Paragraph containing main term and sub-term. + ={Main term:sub-term} +.fi + + +.BR +The index syntax starts on a new line, but there should not be an empty line +between paragraph and index markup. + +.BR +The structure of the resulting index would be: +.nf + Main term, 1 + sub-term, 1 +.fi + + +.BR +Several terms may relate to a paragraph, they are separated by a semicolon. If +the term refers to more than one paragraph, indicate the number of paragraphs. +.nf + Paragraph containing main term, second term and sub-term. + ={first term; second term: sub-term} +.fi + + +.BR +The structure of the resulting index would be: +.nf + First term, 1, + Second term, 1, + sub-term, 1 +.fi + + +.BR +If multiple sub-terms appear under one paragraph, they are separated under the +main term heading from each other by a pipe symbol. +.nf + Paragraph containing main term, second term and sub-term. + ={Main term: + sub-term+2|second sub-term; + Another term + } + + A paragraph that continues discussion of the first sub-term +.fi + + +.BR +The plus one in the example provided indicates the first sub-term spans one +additional paragraph. The logical structure of the resulting index would be: +.nf + Main term, 1, + sub-term, 1-3, + second sub-term, 1, + Another term, 1 +.fi + +.SH COMPOSITE DOCUMENTS MARKUP + + +.BR +It is possible to build a document by creating a master document that requires +other documents. The documents required may be complete documents that could be +generated independently, or they could be markup snippets, prepared so as to be +easily available to be placed within another text. If the calling document is a +master document (built from other documents), it should be named with the +suffix +.B .ssm +Within this document you would provide information on the other documents that +should be included within the text. These may be other documents that would be +processed in a regular way, or markup bits prepared only for inclusion within a +master document +.B .sst +regular markup file, or +.B .ssi +(insert/information) A secondary file of the composite document is built prior +to processing with the same prefix and the suffix +.B ._sst + +.BR +basic markup for importing a document into a master document +.nf +<< filename1.sst + +<< filename2.ssi +.fi + + +.BR +The form described above should be relied on. Within the +.I Vim +editor it results in the text thus linked becoming hyperlinked to the document +it is calling in which is convenient for editing. +.SH SUBSTITUTIONS + + +.BR + +.B markup example: +.nf +The current Debian is ${debian_stable} the next debian will be ${debian_testing} + +Configure substitution in _sisu/sisu_document_make + +make: + substitute: /${debian_stable}/,'*{Wheezy}*' /${debian_testing}/,'*{Jessie}*' +.fi + + +.BR + +.B resulting output: + +.BR +The current +.B Debian +is +.B Jessie +the next debian will be +.B Stretch + +.BR +Configure substitution in _sisu/sisu_document_make +.SH SISU FILETYPES + + +.BR + +.B SiSU +has +.I plaintext +and binary filetypes, and can process either type of document. +.SH .SST .SSM .SSI MARKED UP PLAIN TEXT + +.TP +.B SiSU +documents are prepared as plain-text (utf-8) files with +.B SiSU +markup. They may make reference to and contain images (for example), which are +stored in the directory beneath them _sisu/image. 〔b¤SiSU +.I plaintext +markup files are of three types that may be distinguished by the file extension +used: regular text .sst; master documents, composite documents that incorporate +other text, which can be any regular text or text insert; and inserts the +contents of which are like regular text except these are marked .ssi and are +not processed. + +.BR + +.B SiSU +processing can be done directly against a sisu documents; which may be located +locally or on a remote server for which a url is provided. + +.BR + +.B SiSU +source markup can be shared with the command: + +.BR + sisu -s [filename] +.SH SISU TEXT - REGULAR FILES (.SST) + + +.BR +The most common form of document in +.B SiSU, +see the section on +.B SiSU +markup. +.SH SISU MASTER FILES (.SSM) + + +.BR +Composite documents which incorporate other +.B SiSU +documents which may be either regular +.B SiSU +text .sst which may be generated independently, or inserts prepared solely for +the purpose of being incorporated into one or more master documents. + +.BR +The mechanism by which master files incorporate other documents is described as +one of the headings under under +.B SiSU +markup in the +.B SiSU +manual. + +.BR +Note: Master documents may be prepared in a similar way to regular documents, +and processing will occur normally if a .sst file is renamed .ssm without +requiring any other documents; the .ssm marker flags that the document may +contain other documents. + +.BR +Note: a secondary file of the composite document is built prior to processing +with the same prefix and the suffix ._sst +.SH SISU INSERT FILES (.SSI) + + +.BR +Inserts are documents prepared solely for the purpose of being incorporated +into one or more master documents. They resemble regular +.B SiSU +text files (.sst). Since sisu -5.5.0 (6.1.0) .ssi files can like .ssm files +include other .sst or .ssm files. .ssi files cannot be called by the sisu +processor directly and can only be incorporated in other documents. Making a +file a .ssi file is a quick and convenient way of breaking up a document that +is to be included in a master document, and flagging that the file to be +incorporated .ssi is not intended that the file should be processed on its own. +.SH SISUPOD, ZIPPED BINARY CONTAINER (SISUPOD.ZIP, .SSP) + + +.BR +A sisupod is a zipped +.B SiSU +text file or set of +.B SiSU +text files and any associated images that they contain (this will be extended +to include sound and multimedia-files) +.TP +.B SiSU +.I plaintext +files rely on a recognised directory structure to find contents such as images +associated with documents, but all images for example for all documents +contained in a directory are located in the sub-directory _sisu/image. Without +the ability to create a sisupod it can be inconvenient to manually identify all +other files associated with a document. A sisupod automatically bundles all +associated files with the document that is turned into a pod. + +.BR +The structure of the sisupod is such that it may for example contain a single +document and its associated images; a master document and its associated +documents and anything else; or the zipped contents of a whole directory of +prepared +.B SiSU +documents. + +.BR +The command to create a sisupod is: + +.BR + sisu -S [filename] + +.BR +Alternatively, make a pod of the contents of a whole directory: + +.BR + sisu -S + +.BR + +.B SiSU +processing can be done directly against a sisupod; which may be located locally +or on a remote server for which a url is provided. + +.BR +<http://www.sisudoc.org/sisu/sisu_commands> + +.BR +<http://www.sisudoc.org/sisu/sisu_manual> +.SH CONFIGURATION + +.SH CONFIGURATION FILES + +.SH CONFIG.YML + + +.BR + +.B SiSU +configration parameters are adjusted in the configuration file, which can be +used to override the defaults set. This includes such things as which directory +interim processing should be done in and where the generated output should be +placed. + +.BR +The +.B SiSU +configuration file is a yaml file, which means indentation is significant. + +.BR + +.B SiSU +resource configuration is determined by looking at the following files if they +exist: + +.BR + ./_sisu/v7/sisurc.yml + +.BR + ./_sisu/sisurc.yml + +.BR + ~/.sisu/v7/sisurc.yml + +.BR + ~/.sisu/sisurc.yml + +.BR + /etc/sisu/v7/sisurc.yml + +.BR + /etc/sisu/sisurc.yml + +.BR +The search is in the order listed, and the first one found is used. + +.BR +In the absence of instructions in any of these it falls back to the internal +program defaults. + +.BR +Configuration determines the output and processing directories and the database +access details. + +.BR +If +.B SiSU +is installed a sample sisurc.yml may be found in /etc/sisu/sisurc.yml +.SH SISU_DOCUMENT_MAKE + + +.BR +Most sisu document headers relate to metadata, the exception is the @make: +header which provides processing related information. The default contents of +the @make header may be set by placing them in a file sisu_document_make. + +.BR +The search order is as for resource configuration: + +.BR + ./_sisu/v7/sisu_document_make + +.BR + ./_sisu/sisu_document_make + +.BR + ~/.sisu/v7/sisu_document_make + +.BR + ~/.sisu/sisu_document_make + +.BR + /etc/sisu/v7/sisu_document_make + +.BR + /etc/sisu/sisu_document_make + +.BR +A sample sisu_document_make can be found in the _sisu/ directory under along +with the provided sisu markup samples. +.SH CSS - CASCADING STYLE SHEETS (FOR HTML, XHTML AND XML) + + +.BR +CSS files to modify the appearance of +.B SiSU +html, +.I XHTML +or +.I XML +may be placed in the configuration directory: ./_sisu/css ; ~/.sisu/css or; +/etc/sisu/css and these will be copied to the output directories with the +command sisu -CC. + +.BR +The basic CSS file for html output is html. css, placing a file of that name in +directory _sisu/css or equivalent will result in the default file of that name +being overwritten. + +.BR + +.I HTML: +html. css + +.BR + +.I XML +DOM: dom.css + +.BR + +.I XML +SAX: sax.css + +.BR + +.I XHTML: +xhtml. css + +.BR +The default homepage may use homepage.css or html. css + +.BR +Under consideration is to permit the placement of a CSS file with a different +name in directory _sisu/css directory or equivalent.[^12] +.SH ORGANISING CONTENT - DIRECTORY STRUCTURE AND MAPPING + + +.BR + +.B SiSU +v3 has new options for the source directory tree, and output directory +structures of which there are 3 alternatives. +.SH DOCUMENT SOURCE DIRECTORY + + +.BR +The document source directory is the directory in which sisu processing +commands are given. It contains the sisu source files (.sst .ssm .ssi), or (for +sisu v3 may contain) subdirectories with language codes which contain the sisu +source files, so all English files would go in subdirectory en/, French in fr/, +Spanish in es/ and so on. ISO 639-1 codes are used (as varied by po4a). A list +of available languages (and possible sub-directory names) can be obtained with +the command "sisu --help lang" The list of languages is limited to langagues +supported by XeTeX polyglosia. +.SH GENERAL DIRECTORIES + +.nf + ./subject_name/ + +% files stored at this level e.g. sisu_manual.sst or +% for sisu v3 may be under language sub-directories +% e.g. + + ./subject_name/en + + ./subject_name/fr + + ./subject_name/es + + ./subject_name/_sisu + + ./subject_name/_sisu/css + + ./subject_name/_sisu/image +.fi + +.SH DOCUMENT OUTPUT DIRECTORY STRUCTURES + +.SH OUTPUT DIRECTORY ROOT + + +.BR +The output directory root can be set in the sisurc.yml file. Under the root, +subdirectories are made for each directory in which a document set resides. If +you have a directory named poems or conventions, that directory will be created +under the output directory root and the output for all documents contained in +the directory of a particular name will be generated to subdirectories beneath +that directory (poem or conventions). A document will be placed in a +subdirectory of the same name as the document with the filetype identifier +stripped (.sst .ssm) + +.BR +The last part of a directory path, representing the sub-directory in which a +document set resides, is the directory name that will be used for the output +directory. This has implications for the organisation of document collections +as it could make sense to place documents of a particular subject, or type +within a directory identifying them. This grouping as suggested could be by +subject (sales_law, english_literature); or just as conveniently by some other +classification (X University). The mapping means it is also possible to place +in the same output directory documents that are for organisational purposes +kept separately, for example documents on a given subject of two different +institutions may be kept in two different directories of the same name, under a +directory named after each institution, and these would be output to the same +output directory. Skins could be associated with each institution on a +directory basis and resulting documents will take on the appropriate different +appearance. +.SH ALTERNATIVE OUTPUT STRUCTURES + + +.BR +There are 3 possibile output structures described as being, by language, by +filetype or by filename, the selection is made in sisurc.yml +.nf +#% output_dir_structure_by: language; filetype; or filename +output_dir_structure_by: language #(language & filetype, preferred?) +#output_dir_structure_by: filetype +#output_dir_structure_by: filename #(default, closest to original v1 & v2) +.fi + +.SH BY LANGUAGE + + +.BR +The by language directory structure places output files + +.BR +The by language directory structure separates output files by language code +(all files of a given language), and within the language directory by filetype. + +.BR +Its selection is configured in sisurc.yml + +.BR +output_dir_structure_by: language +.nf + |-- en + |-- epub + |-- hashes + |-- html + | |-- viral_spiral.david_bollier + | |-- manifest + | |-- qrcode + | |-- odt + | |-- pdf + | |-- sitemaps + | |-- txt + | |-- xhtml + | `-- xml + |-- po4a + | `-- live-manual + | |-- po + | |-- fr + | `-- pot + `-- _sisu + |-- css + |-- image + |-- image_sys -> ../../_sisu/image_sys + `-- xml + |-- rnc + |-- rng + `-- xsd +.fi + + +.BR +#by: language subject_dir/en/manifest/filename.html +.SH BY FILETYPE + + +.BR +The by filetype directory structure separates output files by filetype, all +html files in one directory pdfs in another and so on. Filenames are given a +language extension. + +.BR +Its selection is configured in sisurc.yml + +.BR +output_dir_structure_by: filetype +.nf + |-- epub + |-- hashes + |-- html + |-- viral_spiral.david_bollier + |-- manifest + |-- qrcode + |-- odt + |-- pdf + |-- po4a + |-- live-manual + | |-- po + | |-- fr + | `-- pot + |-- _sisu + | |-- css + | |-- image + | |-- image_sys -> ../../_sisu/image_sys + | `-- xml + | |-- rnc + | |-- rng + | `-- xsd + |-- sitemaps + |-- txt + |-- xhtml + `-- xml +.fi + + +.BR +#by: filetype subject_dir/html/filename/manifest.en.html +.SH BY FILENAME + + +.BR +The by filename directory structure places most output of a particular file +(the different filetypes) in a common directory. + +.BR +Its selection is configured in sisurc.yml + +.BR +output_dir_structure_by: filename +.nf + |-- epub + |-- po4a + |-- live-manual + | |-- po + | |-- fr + | `-- pot + |-- _sisu + | |-- css + | |-- image + | |-- image_sys -> ../../_sisu/image_sys + | `-- xml + | |-- rnc + | |-- rng + | `-- xsd + |-- sitemaps + |-- src + |-- pod + `-- viral_spiral.david_bollier +.fi + + +.BR +#by: filename subject_dir/filename/manifest.en.html +.SH REMOTE DIRECTORIES + +.nf + ./subject_name/ + +% containing sub_directories named after the generated files from which they are made + + ./subject_name/src + +% contains shared source files text and binary e.g. sisu_manual.sst and sisu_manual.sst.zip + + ./subject_name/_sisu + +% configuration file e.g. sisurc.yml + + ./subject_name/_sisu/skin + +% skins in various skin directories doc, dir, site, yml + + ./subject_name/_sisu/css + + ./subject_name/_sisu/image + +% images for documents contained in this directory + + ./subject_name/_sisu/mm +.fi + +.SH SISUPOD + +.nf + ./sisupod/ + +% files stored at this level e.g. sisu_manual.sst + + ./sisupod/_sisu + +% configuration file e.g. sisurc.yml + + ./sisupod/_sisu/skin + +% skins in various skin directories doc, dir, site, yml + + ./sisupod/_sisu/css + + ./sisupod/_sisu/image + +% images for documents contained in this directory + + ./sisupod/_sisu/mm +.fi + +.SH HOMEPAGES + + +.BR + +.B SiSU +is about the ability to auto-generate documents. Home pages are regarded as +custom built items, and are not created by +.B SiSU. +More accurately, +.B SiSU +has a default home page, which will not be appropriate for use with other +sites, and the means to provide your own home page instead in one of two ways +as part of a site's configuration, these being: + +.BR +1. through placing your home page and other custom built documents in the +subdirectory _sisu/home/ (this probably being the easier and more convenient +option) + +.BR +2. through providing what you want as the home page in a skin, + +.BR +Document sets are contained in directories, usually organised by site or +subject. Each directory can/should have its own homepage. See the section on +directory structure and organisation of content. +.SH HOME PAGE AND OTHER CUSTOM BUILT PAGES IN A SUB-DIRECTORY + + +.BR +Custom built pages, including the home page index.html may be placed within the +configuration directory _sisu/home/ in any of the locations that is searched +for the configuration directory, namely ./_sisu ; ~/_sisu ; /etc/sisu From +there they are copied to the root of the output directory with the command: + +.BR + sisu -CC +.SH MARKUP AND OUTPUT EXAMPLES + +.SH MARKUP EXAMPLES + + +.BR +Current markup examples and document output samples are provided off +<http://sisudoc.org> or <http://www.jus.uio.no/sisu> and in the sisu +-markup-sample package available off <http://git.sisudoc.org> + +.BR +For some documents hardly any markup at all is required at all, other than a +header, and an indication that the levels to be taken into account by the +program in generating its output are. +.SH SISU MARKUP SAMPLES + + +.BR +A few additional sample books prepared as sisu markup samples, output formats +to be generated using +.B SiSU +are contained in a separate package sisu -markup-samples. sisu -markup-samples +contains books (prepared using sisu markup), that were released by their +authors various licenses mostly different Creative Commons licences that do not +permit inclusion in the +.B Debian +Project as they have requirements that do not meet the +.B Debian +Free Software Guidelines for various reasons, most commonly that they require +that the original substantive text remain unchanged, and sometimes that the +works be used only non-commercially. + +.BR + +.I Accelerando, +Charles Stross (2005) +accelerando.charles_stross.sst + +.BR + +.I Alice's Adventures in Wonderland, +Lewis Carroll (1865) +alices_adventures_in_wonderland.lewis_carroll.sst + +.BR + +.I CONTENT, +Cory Doctorow (2008) +content.cory_doctorow.sst + +.BR + +.I Democratizing Innovation, +Eric von Hippel (2005) +democratizing_innovation.eric_von_hippel.sst + +.BR + +.I Down and Out in the Magic Kingdom, +Cory Doctorow (2003) +down_and_out_in_the_magic_kingdom.cory_doctorow.sst + +.BR + +.I For the Win, +Cory Doctorow (2010) +for_the_win.cory_doctorow.sst + +.BR + +.I Free as in Freedom - Richard Stallman's Crusade for Free Software, +Sam Williams (2002) +free_as_in_freedom.richard_stallman_crusade_for_free_software.sam_williams.sst + +.BR + +.I Free as in Freedom 2.0 - Richard Stallman and the Free Software Revolution, +Sam Williams (2002), Richard M. Stallman (2010) +free_as_in_freedom_2.richard_stallman_and_the_free_software_revolution.sam_williams.richard_stallman.sst + +.BR + +.I Free Culture - How Big Media Uses Technology and the Law to Lock Down +Culture and Control Creativity, +Lawrence Lessig (2004) +free_culture.lawrence_lessig.sst + +.BR + +.I Free For All - How Linux and the Free Software Movement Undercut the High +Tech Titans, +Peter Wayner (2002) +free_for_all.peter_wayner.sst + +.BR + +.I GNU GENERAL PUBLIC LICENSE v2, +Free Software Foundation (1991) +gpl2.fsf.sst + +.BR + +.I GNU GENERAL PUBLIC LICENSE v3, +Free Software Foundation (2007) +gpl3.fsf.sst + +.BR + +.I Gulliver's Travels, +Jonathan Swift (1726 / 1735) +gullivers_travels.jonathan_swift.sst + +.BR + +.I Little Brother, +Cory Doctorow (2008) +little_brother.cory_doctorow.sst + +.BR + +.I The Cathederal and the Bazaar, +Eric Raymond (2000) +the_cathedral_and_the_bazaar.eric_s_raymond.sst + +.BR + +.I The Public Domain - Enclosing the Commons of the Mind, +James Boyle (2008) +the_public_domain.james_boyle.sst + +.BR + +.I The Wealth of Networks - How Social Production Transforms Markets and +Freedom, +Yochai Benkler (2006) +the_wealth_of_networks.yochai_benkler.sst + +.BR + +.I Through the Looking Glass, +Lewis Carroll (1871) +through_the_looking_glass.lewis_carroll.sst + +.BR + +.I Two Bits - The Cultural Significance of Free Software, +Christopher Kelty (2008) +two_bits.christopher_kelty.sst + +.BR + +.I UN Contracts for International Sale of Goods, +UN (1980) +un_contracts_international_sale_of_goods_convention_1980.sst + +.BR + +.I Viral Spiral, +David Bollier (2008) +viral_spiral.david_bollier.sst +.SH SISU SEARCH - INTRODUCTION + + +.BR +Because the document structure of sites created is clearly defined, and the +text +.I object citation system +is available hypothetically at least, for all forms of output, it is possible +to search the sql database, and either read results from that database, or map +the results to the html or other output, which has richer text markup. + +.BR + +.B SiSU +can populate a relational sql type database with documents at an object level, +including objects numbers that are shared across different output types. Making +a document corpus searchable with that degree of granularity. Basically, your +match criteria is met by these documents and at these locations within each +document, which can be viewed within the database directly or in various output +formats. + +.BR + +.B SiSU +can populate an sql database (sqlite3 or postgresql) with documents made up of +their objects. It also can generate a cgi search form that can be used to query +the database. + +.BR +In order to use the built in search functionality you would take the following +steps. + +.BR +- use sisu to populate an sql database with with a sisu markup content + +.BR + * sqlite3 should work out of the box + +.BR + * postgresql may require some initial database configuration + +.BR +- provide a way to query the database, which sisu can assist with by + +.BR + * generating a sample ruby cgi search form, required (sisu configuration + recommended) + +.BR + * adding a query field for this search form to be added to all html files + (sisu configuration required) +.SH SQL + +.SH POPULATE THE DATABASE + + +.BR +TO populate the sql database, run sisu against a sisu markup file with one of +the following sets of flags +.nf +sisu --sqlite filename.sst +.fi + + +.BR +creates an sqlite3 database containing searchable content of just the sisu +markup document selected +.nf +sisu --sqlite --update filename.sst +.fi + + +.BR +creates an sqlite3 database containing searchable content of marked up +document(s) selected by the user from a common directory +.nf +sisu --pg --update filename.sst +.fi + + +.BR +fills a postgresql database with searchable content of marked up document(s) +selected by the user from a common directory + +.BR +For postgresql the first time the command is run in a given directory the user +will be prompted to create the requisite database, at the time of writing the +prompt sisu provides is as follows: +.nf +no connection with pg database established, you may need to run: + createdb "SiSU.7a.current" + after that don't forget to run: + sisu --pg --createall + before attempting to populate the database +.fi + + +.BR +The named database that sisu expects to find must exist and if necessary be +created using postgresql tools. If the database exist but the database tables +do not, sisu will attempt to create the tables it needs, the equivalent of the +requested sisu --pg --createall command. + +.BR +Once this is done, the sql database is populated and ready to be queried. +.SH SQL TYPE DATABASES + + +.BR + +.B SiSU +feeds sisu markup documents into sql type databases +.I PostgreSQL +[^13] and/or +.I SQLite +[^14] database together with information related to document structure. + +.BR +This is one of the more interesting output forms, as all the structural data of +the documents are retained (though can be ignored by the user of the database +should they so choose). All site texts/documents are (currently) streamed to +four tables: + +.BR + * one containing semantic (and other) headers, including, title, author, + subject, (the + .I Dublin Core. + ..); + +.BR + * another the substantive texts by individual "paragraph" (or object) - along + with structural information, each paragraph being identifiable by its + paragraph number (if it has one which almost all of them do), and the + substantive text of each paragraph quite naturally being searchable (both in + formatted and clean text versions for searching); and + +.BR + * a third containing endnotes cross-referenced back to the paragraph from + which they are referenced (both in formatted and clean text versions for + searching). + +.BR + * a fourth table with a one to one relation with the headers table contains + full text versions of output, eg. pdf, html, xml, and + .I ascii. + +.BR +There is of course the possibility to add further structures. + +.BR +At this level +.B SiSU +loads a relational database with documents chunked into objects, their smallest +logical structurally constituent parts, as text objects, with their object +citation number and all other structural information needed to construct the +document. Text is stored (at this text object level) with and without +elementary markup tagging, the stripped version being so as to facilitate ease +of searching. + +.BR +Being able to search a relational database at an object level with the +.B SiSU +citation system is an effective way of locating content generated by +.B SiSU. +As individual text objects of a document stored (and indexed) together with +object numbers, and all versions of the document have the same numbering, +complex searches can be tailored to return just the locations of the search +results relevant for all available output formats, with live links to the +precise locations in the database or in html/xml documents; or, the structural +information provided makes it possible to search the full contents of the +database and have headings in which search content appears, or to search only +headings etc. (as the +.I Dublin Core +is incorporated it is easy to make use of that as well). +.SH POSTGRESQL + +.SH NAME + + +.BR + +.B SiSU +- Structured information, Serialized Units - a document publishing system, +postgresql dependency package +.SH DESCRIPTION + + +.BR +Information related to using postgresql with sisu (and related to the +sisu_postgresql dependency package, which is a dummy package to install +dependencies needed for +.B SiSU +to populate a postgresql database, this being part of +.B SiSU +- man sisu) . +.SH SYNOPSIS + + +.BR + sisu -D [instruction] [filename/wildcard if required] + +.BR + sisu -D --pg --[instruction] [filename/wildcard if required] +.SH COMMANDS + + +.BR +Mappings to two databases are provided by default, postgresql and sqlite, the +same commands are used within sisu to construct and populate databases however +-d (lowercase) denotes sqlite and -D (uppercase) denotes postgresql, +alternatively --sqlite or --pgsql may be used + +.BR + +.B -D or --pgsql +may be used interchangeably. +.SH CREATE AND DESTROY DATABASE + +.TP +.B --pgsql --createall +initial step, creates required relations (tables, indexes) in existing +(postgresql) database (a database should be created manually and given the same +name as working directory, as requested) (rb.dbi) +.TP +.B sisu -D --createdb +creates database where no database existed before +.TP +.B sisu -D --create +creates database tables where no database tables existed before +.TP +.B sisu -D --Dropall +destroys database (including all its content)! kills data and drops tables, +indexes and database associated with a given directory (and directories of the +same name). +.TP +.B sisu -D --recreate +destroys existing database and builds a new empty database structure +.SH IMPORT AND REMOVE DOCUMENTS + +.TP +.B sisu -D --import -v [filename/wildcard] +populates database with the contents of the file. Imports documents(s) +specified to a postgresql database (at an object level). +.TP +.B sisu -D --update -v [filename/wildcard] +updates file contents in database +.TP +.B sisu -D --remove -v [filename/wildcard] +removes specified document from postgresql database. +.SH SQLITE + +.SH NAME + + +.BR + +.B SiSU +- Structured information, Serialized Units - a document publishing system. +.SH DESCRIPTION + + +.BR +Information related to using sqlite with sisu (and related to the sisu_sqlite +dependency package, which is a dummy package to install dependencies needed for +.B SiSU +to populate an sqlite database, this being part of +.B SiSU +- man sisu) . +.SH SYNOPSIS + + +.BR + sisu -d [instruction] [filename/wildcard if required] + +.BR + sisu -d --(sqlite|pg) --[instruction] [filename/wildcard if required] +.SH COMMANDS + + +.BR +Mappings to two databases are provided by default, postgresql and sqlite, the +same commands are used within sisu to construct and populate databases however +-d (lowercase) denotes sqlite and -D (uppercase) denotes postgresql, +alternatively --sqlite or --pgsql may be used + +.SH CREATE AND DESTROY DATABASE + +.TP +.B --sqlite --createall +initial step, creates required relations (tables, indexes) in existing (sqlite) +database (a database should be created manually and given the same name as +working directory, as requested) (rb.dbi) +.TP +.B sisu -d --createdb +creates database where no database existed before +.TP +.B sisu -d --create +creates database tables where no database tables existed before +.TP +.B sisu -d --dropall +destroys database (including all its content)! kills data and drops tables, +indexes and database associated with a given directory (and directories of the +same name). +.TP +.B sisu -d --recreate +destroys existing database and builds a new empty database structure +.SH IMPORT AND REMOVE DOCUMENTS + +.TP +.B sisu -d --import -v [filename/wildcard] +populates database with the contents of the file. Imports documents(s) +specified to an sqlite database (at an object level). +.TP +.B sisu -d --update -v [filename/wildcard] +updates file contents in database +.TP +.B sisu -d --remove -v [filename/wildcard] +removes specified document from sqlite database. +.SH CGI SEARCH FORM + + +.BR +For the search form, which is a single search page + +.BR +- configure the search form + +.BR +- generate the sample search form with the sisu command, (this will be based on +the configuration settings and existing found sisu databases) + +.BR +For postgresql web content you may need to edit the search cgi script. Two +things to look out for are that the user is set as needed, and that the any +different databases that you wish to be able to query are listed. + +.BR +correctly, you may want www-data rather than your username. +.nf +@user='www-data' +.fi + + +.BR +- check the search form, copy it to the appropriate cgi directory and set the +correct permissions + +.BR +For a search form to appear on each html page, you need to: + +.BR +- rely on the above mentioned configuration of the search form + +.BR +- configure the html search form to be on + +.BR +- run the html command +.SH SETUP SEARCH FORM + + +.BR +You will need a web server, httpd with cgi enabled, and a postgresql database +to which you are able to create databases. + +.BR +Setup postgresql, make sure you are able to create and write to the database, +e.g.: +.nf +sudo su postgres + createuser -d -a ralph +.fi + + +.BR +You then need to create the database that sisu will use, for sisu manual in the +directory manual/en for example, (when you try to populate a database that does +not exist sisu prompts as to whether it exists): +.nf +createdb SiSU.7a.manual +.fi + + +.BR + +.B SiSU +is then able to create the required tables that allow you to populate the +database with documents in the directory for which it has been created: +.nf +sisu --pg --createall -v +.fi + + +.BR +You can then start to populate the database, in this example with a single +document: +.nf +sisu --pg --update -v en/sisu_manual.ssm +.fi + + +.BR +To create a sample search form, from within the same directory run: +.nf +sisu --sample-search-form --db-pg +.fi + + +.BR +and copy the resulting cgi form to your cgi-bin directory + +.BR +A sample setup for nginx is provided that assumes data will be stored under +/srv/www and cgi scripts under /srv/cgi +.SH SEARCH - DATABASE FRONTEND SAMPLE, UTILISING DATABASE AND SISU FEATURES, +INCLUDING OBJECT CITATION NUMBERING (BACKEND CURRENTLY POSTGRESQL) + + +.BR +Sample search frontend <http://search.sisudoc.org> [^15] A small database and +sample query front-end (search from) that makes use of the citation system, .I +object citation numbering +to demonstrates functionality.[^16] + +.BR + +.B SiSU +can provide information on which documents are matched and at what locations +within each document the matches are found. These results are relevant across +all outputs using +.I object citation numbering, +which includes html, +.I XML, +.I EPUB, +.I LaTeX, +.I PDF +and indeed the +.I SQL +database. You can then refer to one of the other outputs or in the +.I SQL +database expand the text within the matched objects (paragraphs) in the +documents matched. + +.BR +Note you may set results either for documents matched and object number +locations within each matched document meeting the search criteria; or display +the names of the documents matched along with the objects (paragraphs) that +meet the search criteria.[^17] +.TP +.B sisu -F --webserv-webrick +builds a cgi web search frontend for the database created + +.BR +The following is feedback on the setup on a machine provided by the help +command: + +.BR + sisu --help sql +.nf +Postgresql + user: ralph + current db set: SiSU_sisu + port: 5432 + dbi connect: DBI:Pg:database=SiSU_sisu;port=5432 + +sqlite + current db set: /home/ralph/sisu_www/sisu/sisu_sqlite.db + dbi connect DBI:SQLite:/home/ralph/sisu_www/sisu/sisu_sqlite.db +.fi + +.BR +Note on databases built + +.BR +By default, [unless otherwise specified] databases are built on a directory +basis, from collections of documents within that directory. The name of the +directory you choose to work from is used as the database name, i.e. if you are +working in a directory called /home/ralph/ebook the database SiSU_ebook is +used. [otherwise a manual mapping for the collection is necessary] + +.SH SEARCH FORM + +.TP +.B sisu -F +generates a sample search form, which must be copied to the web-server cgi +directory +.TP +.B sisu -F --webserv-webrick +generates a sample search form for use with the webrick server, which must be +copied to the web-server cgi directory +.TP +.B sisu -W +starts the webrick server which should be available wherever sisu is properly +installed + +.BR +The generated search form must be copied manually to the webserver directory as +instructed +.SH SISU_WEBRICK + +.SH NAME + + +.BR + +.B SiSU +- Structured information, Serialized Units - a document publishing system +.SH SYNOPSIS + + +.BR +sisu_webrick [port] + +.BR +or + +.BR +sisu -W [port] +.SH DESCRIPTION + + +.BR +sisu_webrick is part of +.B SiSU +(man sisu) sisu_webrick starts +.B Ruby +' s Webrick web-server and points it to the directories to which +.B SiSU +output is written, providing a list of these directories (assuming +.B SiSU +is in use and they exist). + +.BR +The default port for sisu_webrick is set to 8081, this may be modified in the +yaml file: ~/.sisu/sisurc.yml a sample of which is provided as +/etc/sisu/sisurc.yml (or in the equivalent directory on your system). +.SH SUMMARY OF MAN PAGE + + +.BR +sisu_webrick, may be started on it's own with the command: sisu_webrick [port] +or using the sisu command with the -W flag: sisu -W [port] + +.BR +where no port is given and settings are unchanged the default port is 8081 +.SH DOCUMENT PROCESSING COMMAND FLAGS + + +.BR +sisu -W [port] starts +.B Ruby +Webrick web-server, serving +.B SiSU +output directories, on the port provided, or if no port is provided and the +defaults have not been changed in ~/.sisu/sisurc.yaml then on port 8081 +.SH SUMMARY OF FEATURES + + +.BR +- sparse/minimal markup (clean utf-8 source texts). Documents are prepared in a +single +.I UTF-8 +file using a minimalistic mnemonic syntax. Typical literature, documents like +"War and Peace" require almost no markup, and most of the headers are optional. + +.BR +- markup is easily readable/parsable by the human eye, (basic markup is simpler +and more sparse than the most basic +.I HTML +) , [this may also be converted to +.I XML +representations of the same input/source document]. + +.BR +- markup defines document structure (this may be done once in a header +pattern-match description, or for heading levels individually); basic text +attributes (bold, italics, underscore, strike-through etc.) as required; and +semantic information related to the document (header information, extended +beyond the Dublin core and easily further extended as required); the headers +may also contain processing instructions. +.B SiSU +markup is primarily an abstraction of document structure and document metadata +to permit taking advantage of the basic strengths of existing alternative +practical standard ways of representing documents [be that browser viewing, +paper publication, sql search etc.] (html, epub, xml, odf, latex, pdf, sql) + +.BR +- for output produces reasonably elegant output of established industry and +institutionally accepted open standard formats.[3] takes advantage of the +different strengths of various standard formats for representing documents, +amongst the output formats currently supported are: + +.BR +* +.I HTML +- both as a single scrollable text and a segmented document + +.BR +* +.I XHTML + +.BR +* +.I EPUB + +.BR +* +.I XML +- both in sax and dom style xml structures for further development as required + +.BR +* +.I ODT +- Open Document Format text, the iso standard for document storage + +.BR +* +.I LaTeX +- used to generate pdf + +.BR +* +.I PDF +(via +.I LaTeX +) + +.BR +* +.I SQL +- population of an sql database ( +.I PostgreSQL +or +.I SQLite +) , (at the same object level that is used to cite text within a document) + +.BR +Also produces: concordance files; document content certificates (md5 or sha256 +digests of headings, paragraphs, images etc.) and html manifests (and sitemaps +of content). (b) takes advantage of the strengths implicit in these very +different output types, (e.g. PDFs produced using typesetting of +.I LaTeX, +databases populated with documents at an individual object/paragraph level, +making possible +.I granular search +(and related possibilities)) + +.BR +- ensuring content can be cited in a meaningful way regardless of selected +output format. Online publishing (and publishing in multiple document formats) +lacks a useful way of citing text internally within documents (important to +academics generally and to lawyers) as page numbers are meaningless across +browsers and formats. sisu seeks to provide a common way of pinpoint the text +within a document, (which can be utilized for citation and by search engines). +The outputs share a common numbering system that is meaningful (to man and +machine) across all digital outputs whether paper, screen, or database +oriented, (pdf, +.I HTML, +.I EPUB, +xml, sqlite, postgresql) , this numbering system can be used to reference +content. + +.BR +- Granular search within documents. +.I SQL +databases are populated at an object level (roughly headings, paragraphs, +verse, tables) and become searchable with that degree of granularity, the +output information provides the object/paragraph numbers which are relevant +across all generated outputs; it is also possible to look at just the matching +paragraphs of the documents in the database; [output indexing also work well +with search indexing tools like hyperestraier]. + +.BR +- long term maintainability of document collections in a world of changing +formats, having a very sparsely marked-up source document base. there is a +considerable degree of future-proofing, output representations are +"upgradeable", and new document formats may be added. e.g. addition of odf +(open document text) module in 2006, epub in 2009 and in future html5 output +sometime in future, without modification of existing prepared texts + +.BR +* +.I SQL +search aside, documents are generated as required and static once generated. + +.BR +- documents produced are static files, and may be batch processed, this needs +to be done only once but may be repeated for various reasons as desired +(updated content, addition of new output formats, updated technology document +presentations/representations) + +.BR +- document source ( +.I plaintext +utf-8) if shared on the net may be used as input and processed locally to +produce the different document outputs + +.BR +- document source may be bundled together (automatically) with associated +documents (multiple language versions or master document with inclusions) and +images and sent as a zip file called a sisupod, if shared on the net these too +may be processed locally to produce the desired document outputs + +.BR +- generated document outputs may automatically be posted to remote sites. + +.BR +- for basic document generation, the only software dependency is +.B Ruby, +and a few standard Unix tools (this covers +.I plaintext, +.I HTML, +.I EPUB, +.I XML, +.I ODF, +.I LaTeX +) . To use a database you of course need that, and to convert the +.I LaTeX +generated to pdf, a latex processor like tetex or texlive. + +.BR +- as a developers tool it is flexible and extensible + +.BR +Syntax highlighting for +.B SiSU +markup is available for a number of text editors. + +.BR + +.B SiSU +is less about document layout than about finding a way with little markup to be +able to construct an abstract representation of a document that makes it +possible to produce multiple representations of it which may be rather +different from each other and used for different purposes, whether layout and +publishing, or search of content + +.BR +i.e. to be able to take advantage from this minimal preparation starting point +of some of the strengths of rather different established ways of representing +documents for different purposes, whether for search (relational database, or +indexed flat files generated for that purpose whether of complete documents, or +say of files made up of objects), online viewing (e.g. html, xml, pdf) , or +paper publication (e.g. pdf) ... + +.BR +the solution arrived at is by extracting structural information about the +document (about headings within the document) and by tracking objects (which +are serialized and also given hash values) in the manner described. It makes +possible representations that are quite different from those offered at +present. For example objects could be saved individually and identified by +their hashes, with an index of how the objects relate to each other to form a +document. +.TP +.BI *1. +square brackets + +.BR +.TP +.BI *2. +square brackets + +.BR +.TP +.BI +1. +square brackets + +.BR +.TP +.BI 1. +<http://www.jus.uio.no/sisu/man/> + +.BR +.TP +.BI 2. +<http://www.jus.uio.no/sisu/man/sisu.1.html> + +.BR +.TP +.BI 3. +From sometime after SiSU 0.58 it should be possible to describe SiSU markup +using SiSU, which though not an original design goal is useful. + +.BR +.TP +.BI 4. +files should be prepared using UTF-8 character encoding + +.BR +.TP +.BI 5. +a footnote or endnote + +.BR +.TP +.BI 6. +self contained endnote marker & endnote in one + +.BR +.TP +.BI *. +unnumbered asterisk footnote/endnote, insert multiple asterisks if required + +.BR +.TP +.BI **. +another unnumbered asterisk footnote/endnote + +.BR +.TP +.BI *3. +editors notes, numbered asterisk footnote/endnote series + +.BR +.TP +.BI +2. +editors notes, numbered plus symbol footnote/endnote series + +.BR +.TP +.BI 7. +<http://www.sisudoc.org/> + +.BR +.TP +.BI 8. +<http://www.ruby-lang.org/en/> + +.BR +.TP +.BI 9. +Table from the Wealth of Networks by Yochai Benkler +<http://www.jus.uio.no/sisu/the_wealth_of_networks.yochai_benkler> + +.BR +.TP +.BI 10. +for which you may alternatively use the full form author: title: and year: + +.BR +.TP +.BI 11. +Quixote and Panza, Taming Windmills (1605), pp 1000 - 1001 also, Benkler, Wealth of Networks (2006), p 1 + +.BR +.TP +.BI 12. +SiSU has worked this way in the past, though this was dropped as it was +thought the complexity outweighed the flexibility, however, the balance was +rather fine and this behaviour could be reinstated. + +.BR +.TP +.BI 13. +<http://www.postgresql.org/> <http://advocacy.postgresql.org/> +<http://en.wikipedia.org/wiki/Postgresql> + +.BR +.TP +.BI 14. +<http://www.hwaci.com/sw/sqlite/> <http://en.wikipedia.org/wiki/Sqlite> + +.BR +.TP +.BI 15. +<http://search.sisudoc.org> + +.BR +.TP +.BI 16. +(which could be extended further with current back-end). As regards scaling +of the database, it is as scalable as the database (here Postgresql) and +hardware allow. + +.BR +.TP +.BI 17. +of this feature when demonstrated to an IBM software innovations evaluator +in 2004 he said to paraphrase: this could be of interest to us. We have large +document management systems, you can search hundreds of thousands of documents +and we can tell you which documents meet your search criteria, but there is no +way we can tell you without opening each document where within each your +matches are found. + +.BR + +.TP +.SH SEE ALSO + sisu(1), + sisu-epub(1), + sisu-harvest(1), + sisu-html(1), + sisu-odf(1), + sisu-pdf(1), + sisu-pg(1), + sisu-sqlite(1), + sisu-txt(1). + sisu_vim(7) +.TP +.SH HOMEPAGE + More information about SiSU can be found at <http://www.sisudoc.org/> or <http://www.jus.uio.no/sisu/> +.TP +.SH SOURCE + <http://git.sisudoc.org/> +.TP +.SH AUTHOR + SiSU is written by Ralph Amissah <ralph@amissah.com> diff --git a/util/d/cgi/search/README b/misc/util/d/cgi/search/README index eb8fcde..eb8fcde 100644 --- a/util/d/cgi/search/README +++ b/misc/util/d/cgi/search/README diff --git a/util/d/cgi/search/dub.sdl b/misc/util/d/cgi/search/dub.sdl index c1c775c..b859f42 100644 --- a/util/d/cgi/search/dub.sdl +++ b/misc/util/d/cgi/search/dub.sdl @@ -1,4 +1,4 @@ -name "spine-search" +name "spine_search" description "A minimal D application." authors "ralph" copyright "Copyright © 2020, ralph" diff --git a/util/d/cgi/search/src/spine_cgi_sqlite_search.d b/misc/util/d/cgi/search/src/spine_cgi_sqlite_search.d index 1460643..1460643 100644 --- a/util/d/cgi/search/src/spine_cgi_sqlite_search.d +++ b/misc/util/d/cgi/search/src/spine_cgi_sqlite_search.d diff --git a/util/rb/cgi/spine.search.cgi b/misc/util/rb/cgi/spine.search.cgi index cfe9d73..cfe9d73 100755 --- a/util/rb/cgi/spine.search.cgi +++ b/misc/util/rb/cgi/spine.search.cgi diff --git a/util/rb/tex/dr_tex.rb b/misc/util/rb/tex/dr_tex.rb index 767742c..767742c 100755 --- a/util/rb/tex/dr_tex.rb +++ b/misc/util/rb/tex/dr_tex.rb diff --git a/org/COPYRIGHT b/org/COPYRIGHT index 2217fc4..2705e3c 100644 --- a/org/COPYRIGHT +++ b/org/COPYRIGHT @@ -1,4 +1,4 @@ -- Name: Spine, Doc Reform +- Name: spine - SiSU Spine, Doc Reform - Description: documents, structuring, processing, publishing, search - static content generator @@ -8,19 +8,15 @@ - Copyright: (C) 2015 - 2020 Ralph Amissah - Spine, Doc Reform (related to SiSU) uses standard: - - docReform markup syntax + - docReform markup syntax (based on SiSU markup) - standard SiSU markup syntax with modified headers and minor modifications - - docReform object numbering - - standard SiSU object citation numbering & system + - docReform object numbering (based on SiSU object citation numbering) + - standard SiSU document object numbering - Hompages: - [http://www.doc_reform.org] [http://www.sisudoc.org] - - Git - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary] - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/html.rb;hb=HEAD] - Spine, Doc Reform (SiSU) markup samples Individual document content Copyright (Author) [as stated in document header] diff --git a/org/out_cgi_search_sqlite.org b/org/out_cgi_search_sqlite.org index 58375bf..3145870 100644 --- a/org/out_cgi_search_sqlite.org +++ b/org/out_cgi_search_sqlite.org @@ -1841,7 +1841,7 @@ configuration "default" { * cgi-search README -#+BEGIN_SRC text :NO-tangle "../util/d/cgi/search/README" +#+BEGIN_SRC text :NO-tangle "../misc/util/d/cgi/search/README" change db name to match name of db you create cv.db_selected = "spine.search.sql.db"; diff --git a/org/out_latex.org b/org/out_latex.org index 66f086c..0955687 100644 --- a/org/out_latex.org +++ b/org/out_latex.org @@ -1884,7 +1884,7 @@ string latex_tail(M)( * latex system command helper script ** latex command, ruby script -#+BEGIN_SRC ruby :tangle "../util/rb/tex/dr_tex.rb" :tangle-mode (identity #o755) :shebang #!/usr/bin/env ruby +#+BEGIN_SRC ruby :tangle "../misc/util/rb/tex/dr_tex.rb" :tangle-mode (identity #o755) :shebang #!/usr/bin/env ruby require 'fileutils' pwd = Dir.pwd argv,texfiles_with_path,flags=[],[],[] diff --git a/org/spine_build_scaffold.org b/org/spine_build_scaffold.org index ac1fc98..94d5cf9 100644 --- a/org/spine_build_scaffold.org +++ b/org/spine_build_scaffold.org @@ -1132,9 +1132,17 @@ spine_exe = executable('spine', !*.d !*.rb !conf.sdl +!doc +!doc/** +!man +!man/** !org +!misc +!misc/** !util !util/** +!editor-syntax-etc +!editor-syntax-etc/** !ext_lib !ext_lib/** !src diff --git a/org/spine_doc.org b/org/spine_doc.org new file mode 100644 index 0000000..49e7313 --- /dev/null +++ b/org/spine_doc.org @@ -0,0 +1,4548 @@ +-*- mode: org -*- +#+TITLE: spine (doc_reform) hub +#+DESCRIPTION: documents - structuring, various output representations & search +#+FILETAGS: :spine:hub: +#+AUTHOR: Ralph Amissah +#+EMAIL: [[mailto:ralph.amissah@gmail.com][ralph.amissah@gmail.com]] +#+COPYRIGHT: Copyright (C) 2015 - 2020 Ralph Amissah +#+LANGUAGE: en +#+STARTUP: content hideblocks hidestars noindent entitiespretty +#+PROPERTY: header-args :exports code +#+PROPERTY: header-args+ :noweb yes +#+PROPERTY: header-args+ :eval no +#+PROPERTY: header-args+ :results no +#+PROPERTY: header-args+ :cache no +#+PROPERTY: header-args+ :padline no + +* README +** tangle + +#+BEGIN_SRC text :tangle "../README" +<<sisu_spine_readme_info>> +<<sisu_spine_readme_description>> +<<sisu_spine_readme_install>> +<<sisu_spine_readme_configuration>> +<<sisu_spine_readme_commands>> +<<sisu_spine_readme_examples>> +#+END_SRC + +** project name + +#+NAME: sisu_spine_readme_info +#+BEGIN_SRC text +project_name: Spine, Doc Reform + description: [ + "documents, structuring, processing, publishing", + search, + object numbering, + static content generator, + sisu markup + ] + + author: + name: Ralph Amissah + email: ralph.amissah@gmail.com + + copyright: "(C) 2015 - 2020 Ralph Amissah, All Rights Reserved." + + license: "AGPL 3 or later" + + hompage: [ + "http://www.doc_reform.org", + "http://www.sisudoc.org" + ] +#+END_SRC + +** short description + +#+NAME: sisu_spine_readme_description +#+BEGIN_SRC text +#+END_SRC + +** installation + +#+NAME: sisu_spine_readme_install +#+BEGIN_SRC text +# Installation, Compilation + +SiSU spine is written in the programming language D for which there are 3 compilers: + +- dmd +- ldc +- gdc + +D projects tend to use dub as project manager +https://code.dlang.org/packages/dub +https://code.dlang.org/packages/dub +https://github.com/dlang/dub/blob/master/source/dub/commandline.d + + dub --compiler=ldc2 -color --config=ldc -b release + + dub --compiler=dmd -color --config=dmd + + dub --compiler=gdc-10 -color --config=gdc -b release + + make ldc + + make dmd + +there has been some coalescence around the Meson build system +https://mesonbuild.com/ + + meson + + ninja -C build + + meson setup --wipe build && ninja -v -C build + + make meson + +dub --force --compiler=ldc2 && sudo cp -v cgi-bin/spine-search /usr/lib/cgi-bin/. + +#+END_SRC + +** configuration + +#+NAME: sisu_spine_readme_configuration +#+BEGIN_SRC text +# Configuration + +Configuration files are yaml files + +The following paths are searched: + + ~/.dr/config_local_site + ~/path_to_pod_root/.dr/config_local_site + +e.g. processing + + ~spineMarkupSamples/pod/* + +will search: + + ~spineMarkupSamples/pod/.dr/config_local_site + + ~/.dr/config_local_site + +to specify an alternative configuration file to use on the command line (in this +example named "my_config"): + + spine -v --html --config=~spineMarkupSamples/pod/.dr/my_config + +here is a sample configuration file: + +flag: + act0: "--html" + act1: "--html --epub" +output: + path: "/var/www/html" +default: + language: "en" + papersize: "a4" + text_wrap: "80" + digest: "sha256" +webserv: + http: "http" + domain: "localhost" + data_http: "http" + data_domain: "localhost" + data_root_url: "http://localhost" + data_root_path: "/var/www/html" + data_root_part: "" + images_root_part: "image" + cgi_title: "≅ SiSU Spine search" + cgi_http: "http" + cgi_domain: "localhost" + cgi_bin_url: "http://localhost/cgi-bin" + cgi_bin_part: "cgi-bin" + cgi_bin_path: "/usr/lib/cgi-bin" + cgi_search_script: "spine-search" + cgi_search_script_raw_fn_d: "spine_search.d" + cgi_port: "" + cgi_user: "" + cgi_action: "http://localhost/cgi-bin/spine-search" + db_sqlite: "spine.search.db" + db_pg_table: "" + db_pg_user: "" + +#+END_SRC + +** commands help + +#+NAME: sisu_spine_readme_commands +#+BEGIN_SRC text +# Commands + +for a list of commands from the program type: + + spine -h + +at the time of writing this provides the following output: + + --abstraction document abstraction + --assert set optional assertions on + --cgi-search-form-codegen generates (pre-compiled) d code for search of specified db + --cgi-sqlite-search-filename =[filename] + --concordance file for document + --config =/path/to/config/file/including/filename + --dark alternative dark theme + --debug debug + --digest hash digest for each object + --epub process epub output + --harvest extract info on authors & topics from document header metadata + --harvest-authors extract info on authors from document header metadata + --harvest-topics extract info on topics from document header metadata + --hide-ocn object cite numbers + --html process html output + --html-link-harvest place links back to harvest in segmented html + --html-link-search html embedded search submission + --html-seg process html output + --html-scroll process html output + --lang =[lang code e.g. =en or =en,es] + --latex output for pdfs + --latex-color-links mono or color links for pdfs + --light default light theme + --manifest process manifest output + --ocn-off object cite numbers + --odf open document format text (--odt) + --odt open document format text + --output =/path/to/output/dir specify where to place output + --parallel parallelisation + --parallel-subprocesses nested parallelisation + --pdf latex output for pdfs + --pdf-color-links mono or color links for pdfs + --pod spine (doc reform) pod source content bundled +-q --quiet output to terminal + --section-backmatter document backmatter (default) + --section-biblio document biblio (default) + --section-blurb document blurb (default) + --section-body document body (default) + --section-bookindex document bookindex (default) + --section-endnotes document endnotes (default) + --section-glossary document glossary (default) + --section-toc table of contents (default) + --serial serial processing + --skip-output skip output + --show-config show config + --show-make show make + --show-metadata show metadata + --show-summary show summary + --source document markup source + --sqlite-discrete process discrete sqlite output + --sqlite-db-create create db, create tables + --sqlite-db-drop drop tables & db + --sqlite-db-recreate create db, create tables + --sqlite-delete sqlite output + --sqlite-db-filename =[filename].sql.db + --sqlite-insert sqlite output + --sqlite-update sqlite output + --text text output + --theme-dark alternative dark theme + --theme-light default light theme + --txt text output +-v --verbose output to terminal + --very-verbose output to terminal + --workon (reserved for some matters under development & testing) + --xhtml xhtml output +-h --help This help information. + +#+END_SRC + +** command examples + +#+NAME: sisu_spine_readme_examples +#+BEGIN_SRC text +# Examples + +if configuartion has been set specify just +- the desired output and +- the markup document/pod(s) to process + + spine -v --html ~spineMarkupSamples/markup/pod/sisu-manual + +if configuartion has not been set or to overide the set configration specify +- the output path as well as +- the desired output and +- the markup document/pod(s) to process + +note: ~webDocRoot should be the path to web doc root, provide a suitable output path. + + spine -v --html --html-link-search --html-link-harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --html-link-harvest --epub --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --epub --latex --odt --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + +## harvest + +if you have a document collection with documents that have metadata headers a +summary of the collection can be made using the harvest command + + spine -v --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --harvest ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --html-link-harvest --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --html-link-harvest --harvest ~spineMarkupSamples/pod/* + +## sqlite + +### create db + +if there is no sqlite db you first need to create one, to do so +- the name of the db and +- the root path for document output +must be specified: + + spine -v \ + --sqlite-db-create --sqlite-db-filename="spine.search.db" \ + --output=/var/www/html \ + ~spineMarkupSamples/pod/* + + spine -v --sqlite-db-create --sqlite-db-filename="spine.search.db" --output=`echo ~webDocRoot` + +if you have a configration file providing this information that is to be used +for a document collection you can point to the document collection: + + spine -v --sqlite-db-create ~spineMarkupSamples/pod + +### populate db + +must specify: +- the name of the db and +- the root path for document output + + spine -v --sqlite-update \ + --sqlite-db-filename="spine.search.db" \ + --output=/var/www/html \ + ~spineMarkupSamples/pod/* + + spine -v --sqlite-update --sqlite-db-filename="spine.search.db" --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + +if you have a configration file providing this information that is to be used +for a document collection you can point to the document collection: + + spine -v --sqlite-update ~spineMarkupSamples/pod/* + +### generate a cgi search form in d + + spine -v --cgi-search-form-codegen \ + --output=/var/www/html \ + ~spineMarkupSamples/pod + + spine -v --cgi-search-form-codegen --config=~spineMarkupSamples/pod + + spine -v --cgi-search-form-codegen --config=~spineMarkupSamples/pod/.dr/config_local_site + + spine --cgi-search-form-codegen --output=`echo ~webDocRoot` ~spineMarkupSamples/pod + + spine --cgi-search-form-codegen --cgi-sqlite-search-filename="spine_search" --output=`echo ~webDocRoot` + + spine -v --cgi-search-form-codegen \ + --sqlite-db-filename="spine.search.db" \ + --cgi-sqlite-search-filename="spine-search" \ + --output=/var/www/html \ + ~spineMarkupSamples/pod + +#### compile the cgi search form + + cd /var/www/html/cgi # /var/www/html (default document root) + + cd ~webDocRoot/cgi + +the directory ~webDocRoot/cgi/src should contain two files +- spine_search.d (or whatever you named it) +- cgi.d (by Adam Rupee) + + dub --force --compiler=ldc2 && sudo cp -v cgi-bin/spine-search /usr/lib/cgi-bin/. + +should compile spine-search in ~webDocRoot/cgi/cgi-bin and copy it to the +cgi-bin directory + + spine -v --sqlite-db-create --sqlite-db-filename="spine.search.db" --cgi-sqlite-search-filename="spine-search" --output=`echo ~webDocRoot` + + spine -v --sqlite-db-create ~spineMarkupSamples/pod + + spine -v --html --html-link-search --cgi-sqlite-search-filename="spine-search" --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + + spine -v --html --html-link-search --cgi-sqlite-search-filename="spine-search" --html-link-harvest --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* + +### create db & search form + + spine -v \ + --sqlite-db-create --sqlite-db-filename="spine.search.db" \ + --cgi-search-form-codegen --cgi-sqlite-search-filename="spine-search" \ + --output=/var/www/html \ + ~spineMarkupSamples/pod/* + +### html with links to search form + + spine -v --html \ + --html-link-search \ + --output=`echo ~webDocRoot` \ + ~spineMarkupSamples/pod/* + +#+END_SRC + +* manpage +** tangle + +#+BEGIN_SRC man :tangle "../doc/man/man1/spine.1" +<<sisu_spine_manpage_head>> +<<sisu_spine_manpage_description>> +<<sisu_spine_manpage_flags>> +<<sisu_spine_manpage_flags_db>> +<<sisu_spine_manpage_config>> +<<sisu_spine_manpage_pod_dir_structure>> +<<sisu_spine_manpage_cli_examples>> +<<sisu_spine_manpage_docs>> +<<sisu_spine_manpage_markup>> +#+END_SRC + +** manpage +*** head + +#+NAME: sisu_spine_manpage_head +#+BEGIN_SRC man +.TH "spine" "1" "2020-04-05" "0.10.0" "Spine" +.br +.SH NAME +.br +sisu - documents: markup, structuring, publishing in multiple standard formats, and search +.br +.SH SYNOPSIS +.br +sisu [--options] [filename/wildcard] + +.br +sisu --txt --html --epub --odt --pdf --wordmap --sqlite --manpage --texinfo --sisupod --source --qrcode [filename/wildcard] + +.br +sisu --pg (--createdb|update [filename/wildcard]|--dropall) + +#+END_SRC + +*** description + +#+NAME: sisu_spine_manpage_description +#+BEGIN_SRC man +.SH SISU - MANUAL, +RALPH AMISSAH + +.SH WHAT IS SISU? + +.SH INTRODUCTION - WHAT IS SISU? + +.BR + +.B SiSU +is a lightweight markup based document creation and publishing framework that +is controlled from the command line. Prepare documents for +.B SiSU +using your text editor of choice, then use +.B SiSU +to generate various output document formats. + +.BR +From a single lightly prepared document (plain-text +.I UTF-8 +) sisu custom builds several standard output formats which share a common (text +object) numbering system for citation of content within a document (that also +has implications for search). The sisu engine works with an abstraction of the +document's structure and content from which it is possible to generate +different forms of representation of the document. +.B SiSU +produces: plain-text, +.I HTML, +.I XHTML, +.I XML, +.I EPUB, +.I ODF: +.I ODT +(Opendocument), +.I LaTeX, +.I PDF, +and populates an +.I SQL +database ( +.I PostgreSQL +or +.I SQLite +) with text objects, roughly, paragraph sized chunks so that document searches +are done at this level of granularity. + +.BR +Outputs share a common citation numbering system, associated with text objects +and any semantic meta-data provided about the document. + +.BR + +.B SiSU +also provides concordance files, document content certificates and manifests of +generated output. Book indexes may be made. + +.BR +Some document markup samples are provided in the package sisu -markup-samples. +Homepages: + +- <http://www.sisudoc.org/> + +- <http://www.jus.uio.no/sisu> + +.SH COMMANDS SUMMARY + +.SH DESCRIPTION + +.BR + +.B SiSU +is a document publishing system, that from a simple single marked-up document, +produces multiple output formats including: +.I plaintext, +.I HTML, +.I XHTML, +.I XML, +.I EPUB, +.I ODT +( +.I OpenDocument +( +.I ODF +) text), +.I LaTeX, +.I PDF, +info, and +.I SQL +( +.I PostgreSQL +and +.I SQLite +) , which share text object numbers ("object citation numbering") and the same +document structure information. For more see: <http://sisudoc.org> or +<http://www.jus.uio.no/sisu> +#+END_SRC + +** flags +*** general + +#+NAME: sisu_spine_manpage_flags +#+BEGIN_SRC man +.SH DOCUMENT PROCESSING COMMAND FLAGS + +.TP +.B --abstraction [path + filename] +run document abstraction +.TP +.B --act[s0-9] [path + filename] +--act0 to --act9 configurable shortcuts for multiple flags, -0 to -9 synonyms, +configure in sisurc.yml; sisu default action on a specified file where no flag +is provided is --act0; --act or --acts for information on current actions +ascribed to --act0 to --act9 +.TP +.B --asciidoc [path + filename] +asciidoc, smart text (not available) +.TP +.B --cgi-search-form-codegen + generate d code search form to search db specfied needs --output=[path] and +--sqlite-db-filename=[cgi search form name] or path to configuration file +--config=[full path to config file] +.TP +.B --cgi-sqlite-search-filename=[filename] +name to give cgi-search form, (it generates a [filename].d file that requires +subsequent compilation) also required is the name of the sqlite db to be +searched by the form. +.TP +.B --concordance [path + filename] +(not implemented) +.TP +.B --config=[path to config file + filename] +.TP +.B --dark + alternative theme for html and epub output, a light (default) theme is + also provided +.TP +.B --digest (not implemented) +.TP +.B --delete [path + filename] +see --zap +.TP +.B --digests [path + filename] +not implemented +.TP +.B --epub [path + filename] +produces an epub document +.TP +.B --harvest [path to files] +extract and present info on authors & topics from document header metadata. +makes two lists of sisu output based on the sisu markup documents in a +directory: list of author and authors works (year and titles), and; list by +topic with titles and author. Makes use of header metadata fields (author, +title, date, topic_register). +.TP +.B --harvest-authors [path to files] +extract and present info on authors from metadata in document headers +.TP +.B --harvest-topics [path to files] +extract and present info on topics from metadata in document headers +.TP +.B --hide-ocn +turn visibility of object numbers off +.TP +.B --html [path + filename] +produces html output in two forms (i) segmented text with table of contents +(toc.html and index.html) and (ii) the document in a single file (scroll.html). +.TP +.B --html-link-harvest +within html output creates link to the document set metadata harvest output +part of --html output instruction and assumes that --harvest has been or will + be run +.TP +.B --html-link-search +within html output creates a search form for submission, requires information +on the name of the search form --search part of --html output instruction it +assumes there is a cgi search form and related document database +.TP +.B --html-scroll [path + filename] +produces html output, the document in a single file (scroll.html) only. Compare +--html-seg and --html +.TP +.B --html-seg [path + filename] +produces html output, segmented text with table of contents (toc.html and +index.html). Compare --html-scroll and --html +.TP +.B --lang=[language code, e.g. =en or =en,es] +provide language code of document +.TP +.B --latex [path + filename] +.I LaTeX +output for different document sizes (a4, a5, b4, letter) and orientations +(portrait, landscape) for downstream (processing and) conversion to pdf, (used +with xetex no direct link between programs provided as this is a much slower +process) +.TP +.B --latex-color-links +monochrome or color links within pdf, toggle (mono better for printing), +the default is mono for portrait and color for landscape documents +.TP +.B --light theme +for html and epub output, default, a dark alternative is provided +.TP +.B --manifest [path + filename] +produces an html summary of output generated (hyperlinked to content) and +document specific metadata (sisu_manifest.html). This step is assumed for most +processing flags. +.TP +.B --markdown [path + filename] +markdown smart text (not available) +.TP +.B --no-* +negate a toggle +.TP +.B --ocn-off +object numbers off (the c in ocn is for citation). See --hide-ocn +.TP +.B --odf [path + filename] +see --odt +.TP +.B --odt [path + filename] +produce open document output +.TP +.B --output=[path to output directories] +where to place document output +.TP +.B --parallel +parallelization on (the default except for sqlite) +.TP +.B --parallel-subprocesses +nested parallelization on (the default except for sqlite) +.TP +.B --papersize-(a4|a5|b5|letter|legal) +in conjunction with --pdf set pdf papersize, overriding any configuration +settings, to set more than one papersize repeat the option --pdf --papersize-a4 +--papersize-letter. See also --papersize=* (NOT implemented) +.BR +.B --papersize=a4,a5,b5,letter,legal +in conjunction with --pdf set pdf papersize, overriding any configuration +settings, to set more than one papersize list after the equal sign with a comma +separator --papersize=a4,letter. See also --papersize-* (NOT implemented) +.TP +.B --pdf [path + filename] +produces +.I LaTeX +see --latex +.TP +.B --pdf-color-links +monochrome or color links within latex for pdf. See --latex-color-links +.TP +.B --pod +markup source bundled in a zip file. +Produces a zipped file of the prepared document specified along with associated +images This provides a quick way of gathering the relevant +parts of a sisu document which can then for example be emailed. A sisupod +includes sisu markup source file, (along with associated documents if a master +file, or available in multilingual versions), together with related images. +(it should be possible in future to run spine commands directly against a pod). +.TP +.B --qrcode [path + filename] +generate QR code image of metadata (used in manifest). (not implemented) +.TP +.B --quiet +quiet less output to terminal. +.TP +.B --section-* +provides finer grain control over which parts of the document are processed +to produce output, toc, body, endnotes, glossary, biblio, bookindex and blurb +.TP +.B --section-biblio +produce document bibliography output, toggle +.TP +.B --section-blurb +produce document blurb output, toggle +.TP +.B --section-body +produce document body output, toggle +.TP +.B --section-bookindex +produce document bookindex output, toggle +.TP +.B --section-endnotes +produce document endnotes output, toggle +.TP +.B --section-endnotes +produce document glossary output, toggle +.TP +.B --serial +serial processing --no-parallel +.TP +.B --show-config +show site and document configuration instructions. Requires path to +configuration file or path to documents to be processed. +.TP +.B --show-make +show document make instructions +.TP +.B --show-metadata +show document metadata +.TP +.B --show-summary +show document summary +.TP +.B --source [path + filename] +document markup source +.TP +.B --sha256 +set hash digest where used to sha256 (not implemented) +.TP +.B --sha512 +set hash digest where used to sha512 (not implemented) +.TP +.B --sqlite-discrete [path + filename] +create a per document sqlite db +.TP +.B --sqlite-db-create --sqlite-db-filename="[db filename]" --output="[output path]" +create a shared db and its tables. Requires a db filename, which may be set in the configuration file or on the command line as shown +.TP +.B --sqlite-db-drop [path + db filename] +drop (remove) db and its tables +.TP +.B --sqlite-db-recreate [path + filename] +drop and re-create a shared db and its tables. Requires a db filename, which may be set in the configuration file or on the command line with --sqlite-db-filename="[db name]" +.TP +.B --sqlite-db-filename="[db name]" +provide name of sqlite db, to be created, dropped, populated or for which a search form is to be made. This information may also be set in the configuration file. +.TP +.B --sqlite-delete [path + filename] +process sqlite output, remove file +.TP +.B --sqlite-insert [path + filename] +process sqlite output, insert file. See --sqlite-update +.TP +.B --sqlite-update [path + filename] +process sqlite output, update file +.TP +.B --source [filename/wildcard] +copies sisu markup file to output directory. Alias -s +.TP +.B --text [filename/wildcard] +produces +.I plaintext +output +(not implemented) +.TP +.B --theme-dark +See --dark +.TP +.B --theme-light +See --light +.TP +.B --txt [filename/wildcard] +produces +.I plaintext +output +(not implemented) +.TP +.B --txt-asciidoc [filename/wildcard] +see --asciidoc +(not implemented) +.TP +.B --txt-markdown [filename/wildcard] +see --markdown +(not implemented) +.TP +.B --txt-rst [filename/wildcard] +see --rst +(not implemented) +.TP +.B --txt-textile [filename/wildcard] +see --textile +(not implemented) +.TP +.B -v +on its own, provides +.B SiSU +version information +.TP +.B -v [filename/wildcard] +see --verbose +.TP +.B --verbose [filename/wildcard] +provides verbose output of what is being generated, where output is placed (and +error messages if any). Alias -v +.TP +.B --very-verbose [filename/wildcard] +provides more verbose output of what is being generated. See --verbose. Alias +-V +.TP +.B --version +spine version +(not implemented) +.TP +.B --xhtml +xhtml output +(not implemented) + +.SH COMMAND LINE MODIFIERS + +.TP +.B --no-ocn +[with --html --pdf or --epub] switches off +.I object citation numbering. +Produce output without identifying numbers in margins of html or +.I LaTeX +/pdf output. +#+END_SRC + +*** db flags + +#+NAME: sisu_spine_manpage_flags_db +#+BEGIN_SRC man +.SH DATABASE COMMANDS + +.BR + +.B dbi - database interface + +.BR + +.B --pg or --pgsql +set for +.I PostgreSQL +.B --sqlite +default set for +.I SQLite +-d is modifiable with --db=[database type (PgSQL or +.I SQLite +) ] +.TP +.B --pg -v --createall +initial step, creates required relations (tables, indexes) in existing +.I PostgreSQL +database (a database should be created manually and given the same name as +working directory, as requested) (rb.dbi) [ -dv --createall +.I SQLite +equivalent] it may be necessary to run sisu -Dv --createdb initially NOTE: at +the present time for +.I PostgreSQL +it may be necessary to manually create the database. The command would be +'createdb [database name]' where database name would be SiSU_[present working +directory name (without path)]. Please use only alphanumerics and underscores. +.TP +.B --pg -v --import +[filename/wildcard] imports data specified to +.I PostgreSQL +db (rb.dbi) [ -dv --import +.I SQLite +equivalent] +.TP +.B --pg -v --update +[filename/wildcard] updates/imports specified data to +.I PostgreSQL +db (rb.dbi) [ -dv --update +.I SQLite +equivalent] +.TP +.B --pg --remove +[filename/wildcard] removes specified data to +.I PostgreSQL +db (rb.dbi) [ -d --remove +.I SQLite +equivalent] +.TP +.B --pg --dropall +kills data" and drops ( +.I PostgreSQL +or +.I SQLite +) db, tables & indexes [ -d --dropall +.I SQLite +equivalent] + +.BR +The -v is for verbose output. +#+END_SRC + +** configuration file + +#+NAME: sisu_spine_manpage_config +#+BEGIN_SRC man +.SH CONFIGURATION + +.BR + +default location: +.TP +~/.dr/config_local_site +.TP +.nf +flag: + act0: "--html" + act1: "--html --epub" +output: + path: "/var/www/html" +default: + language: "en" + papersize: "a4" + text_wrap: "80" + digest: "sha256" +webserv: + http: "http" + domain: "localhost" + data_http: "http" + data_domain: "localhost" + data_root_url: "http://localhost" + data_root_path: "/var/www/html" + data_root_part: "" + images_root_part: "image" + cgi_title: "≅ SiSU Spine search" + cgi_http: "http" + cgi_domain: "localhost" + cgi_bin_url: "http://localhost/cgi-bin" + cgi_bin_part: "cgi-bin" + cgi_bin_path: "/usr/lib/cgi-bin" + cgi_search_script: "spine-search" + cgi_search_script_raw_fn_d: "spine_search.d" + cgi_port: "" + cgi_user: "" + cgi_action: "http://localhost/cgi-bin/spine-search" + db_sqlite: "spine.search.db" + db_pg_table: "" + db_pg_user: "" +.fi + +.BR +#+END_SRC + +** sample pod directory + +#+NAME: sisu_spine_manpage_pod_dir_structure +#+BEGIN_SRC man +.SH SAMPLE POD DIRECTORY STRUCTURE +.BR +.TP +.nf + +pod (directory may contain multiple documents) + └── the_wealth_of_networks.yochai_benkler + ├── conf + │ └── sisu_document_make + ├── media + │ ├── image + │ │ ├── won_benkler_2_1.png + │ │ ├── won_benkler_6_1.png + │ │ ├── won_benkler_7_1.png + │ │ ├── won_benkler_7_2.png + │ │ ├── won_benkler_7_3a.png + │ │ ├── won_benkler_7_3b.png + │ │ ├── won_benkler_7_4.png + │ │ ├── won_benkler_7_5.png + │ │ ├── won_benkler_7_6.png + │ │ └── won_benkler_9_1.png + │ └── text + │ └── en + │ └── the_wealth_of_networks.yochai_benkler.sst + └── pod.manifest + +.fi +#+END_SRC + +** examples + +#+NAME: sisu_spine_manpage_cli_examples +#+BEGIN_SRC man +.SH COMMAND LINE EXAMPLES + +.TP +note: ~webDocRoot should be the path to web doc root, provide a suitable output path. +.TP +spine -v --html --html-link-search --html-link-harvest --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* +.TP +spine -v --html --html-link-search --html-link-harvest --epub --harvest --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* +.TP +spine -v --sqlite-db-create --sqlite-db-filename="spine.search.db" --output=`echo ~webDocRoot` ~spineMarkupSamples/pod +.TP +spine -v --sqlite-db-create ~spineMarkupSamples/pod +.TP +spine -v --sqlite-update --sqlite-db-filename="spine.search.db" --output=`echo ~webDocRoot` ~spineMarkupSamples/pod/* +.TP +spine -v --sqlite-update ~spineMarkupSamples/pod/* +.TP +spine -v --show-config +.TP +spine -v --show-config --config= ~spineMarkupSamples/pod/.dr/config_local_site_test +.TP +spine -v --show-config --config=~spineMarkupSamples/pod/.dr +.TP +spine -v --cgi-search-form-codegen --config=~spineMarkupSamples/pod/.dr/config_local +.TP +cd ~webDocRoot/cgi +.TP +dub --force --compiler=ldc2 && sudo cp -v cgi-bin/spine-search /usr/lib/cgi-bin/. +.TP +#+END_SRC + +** docs +*** sources + +#+NAME: sisu_spine_manpage_docs +#+BEGIN_SRC man + +.BR +Running sisu (alone without any flags, filenames or wildcards) brings up the +interactive help, as does any sisu command that is not recognised. Enter to +escape. +.SH HELP + +.SH SISU MANUAL + + +.BR +The most up to date information on sisu should be contained in the sisu_manual, +available at: + +.BR + <http://sisudoc.org/sisu/sisu_manual/> + +.BR +The manual can be generated from source, found respectively, either within the +.B SiSU +tarball or installed locally at: + +.BR + ./data/doc/sisu/markup-samples/sisu_manual + +.BR + /usr/share/doc/sisu/markup-samples/sisu_manual + +.BR +move to the respective directory and type e.g.: + +.BR + sisu sisu_manual.ssm +.SH SISU MAN PAGES + + +.BR +If +.B SiSU +is installed on your system usual man commands should be available, try: + +.BR + man sisu + +.BR +Most +.B SiSU +man pages are generated directly from sisu documents that are used to prepare +the sisu manual, the sources files for which are located within the +.B SiSU +tarball at: + +.BR + ./data/doc/sisu/markup-samples/sisu_manual + +.BR +Once installed, directory equivalent to: + +.BR + /usr/share/doc/sisu/markup-samples/sisu_manual + +.BR +Available man pages are converted back to html using man2html: + +.BR + /usr/share/doc/sisu/html/ + +.BR + ./data/doc/sisu/html + +.BR +An online version of the sisu man page is available here: + +.BR + +- various sisu man pages <http://www.jus.uio.no/sisu/man/> [^1] + +.BR +- sisu.1 <http://www.jus.uio.no/sisu/man/sisu.1.html> [^2] +.SH SISU BUILT-IN INTERACTIVE HELP, [DISCONTINUED] + + +.BR +This fell out of date and has been discontinued. +#+END_SRC + +*** markup + +#+NAME: sisu_spine_manpage_markup +#+BEGIN_SRC man +.SH INTRODUCTION TO SISU MARKUP[^3] + +.SH SUMMARY + +.BR + +.B SiSU +source documents are +.I plaintext +( +.I UTF-8 +)[^4] files + +.BR +All paragraphs are separated by an empty line. + +.BR +Markup is comprised of: + +.BR +- at the top of a document, the document header made up of semantic meta-data +about the document and if desired additional processing instructions (such an +instruction to automatically number headings from a particular level down) + +.BR +- followed by the prepared substantive text of which the most important single +characteristic is the markup of different heading levels, which define the +primary outline of the document structure. Markup of substantive text includes: + +.BR + * heading levels defines document structure + +.BR + * text basic attributes, italics, bold etc. + +.BR + * grouped text (objects), which are to be treated differently, such as code + blocks or poems. + +.BR + * footnotes/endnotes + +.BR + * linked text and images + +.BR + * paragraph actions, such as indent, bulleted, numbered-lists, etc. +.SH MARKUP RULES, DOCUMENT STRUCTURE AND METADATA REQUIREMENTS + + +.BR +minimal content/structure requirement: + +.BR +[metadata] +.nf +A~ (level A [title]) + +1~ (at least one level 1 [segment/(chapter)]) +.fi + + +.BR +structure rules (document heirarchy, heading levels): + +.BR +there are two sets of heading levels ABCD (title & parts if any) and 123 +(segment & subsegments if any) + +.BR +sisu has the fllowing levels: +.nf +A~ [title] . + required (== 1) followed by B~ or 1~ +B~ [part] * + followed by C~ or 1~ +C~ [subpart] * + followed by D~ or 1~ +D~ [subsubpart] * + followed by 1~ +1~ [segment (chapter)] + + required (>= 1) followed by text or 2~ +text * + followed by more text or 1~, 2~ + or relevant part *() +2~ [subsegment] * + followed by text or 3~ +text * + followed by more text or 1~, 2~ or 3~ + or relevant part, see *() +3~ [subsubsegment] * + followed by text +text * + followed by more text or 1~, 2~ or 3~ or relevant part, see *() + +*(B~ if none other used; + if C~ is last used: C~ or B~; + if D~ is used: D~, C~ or B~) +.fi + +.nf +- level A~ is the tile and is mandatory +- there can only be one level A~ + +- heading levels BCD, are optional and there may be several of each + (where all three are used corresponding to e.g. Book Part Section) + * sublevels that are used must follow each other sequentially + (alphabetically), +- heading levels A~ B~ C~ D~ are followed by other heading levels rather + than substantive text + which may be the subsequent sequential (alphabetic) heading part level + or a heading (segment) level 1~ +- there must be at least one heading (segment) level 1~ + (the level on which the text is segmented, in a book would correspond + to the Chapter level) +- additional heading levels 1~ 2~ 3~ are optional and there may be several + of each +- heading levels 1~ 2~ 3~ are followed by text (which may be followed by + the same heading level) + and/or the next lower numeric heading level (followed by text) + or indeed return to the relevant part level + (as a corollary to the rules above substantive text/ content + must be preceded by a level 1~ (2~ or 3~) heading) +.fi + +.SH MARKUP EXAMPLES + +.SH ONLINE + + +.BR +Online markup examples are available together with the respective outputs +produced from <http://www.jus.uio.no/sisu/SiSU/examples.html> or from +<http://www.jus.uio.no/sisu/sisu_examples/> + +.BR +There is of course this document, which provides a cursory overview of sisu +markup and the respective output produced: +<http://www.jus.uio.no/sisu/sisu_markup/> + +.BR +an alternative presentation of markup syntax: +/usr/share/doc/sisu/on_markup.txt.gz +.SH INSTALLED + + +.BR +With +.B SiSU +installed sample skins may be found in: /usr/share/doc/sisu/markup-samples (or +equivalent directory) and if sisu -markup-samples is installed also under: +/usr/share/doc/sisu/markup-samples-non-free + +.SH MARKUP OF HEADERS + +.BR +Headers contain either: semantic meta-data about a document, which can be used +by any output module of the program, or; processing instructions. + +.BR +Note: the first line of a document may include information on the markup +version used in the form of a comment. Comments are a percentage mark at the +start of a paragraph (and as the first character in a line of text) followed by +a space and the comment: +.nf +% this would be a comment +.fi + +.SH SAMPLE HEADER + + +.BR +This current document is loaded by a master document that has a header similar +to this one: +.nf +% SiSU master 4.0 + +title: SiSU + subtitle: Manual + +creator: + author: Amissah, Ralph + +publisher: [publisher name] + +rights: Copyright (C) Ralph Amissah 2007, part of SiSU documentation, License GPL 3 + +classify: + topic_register: SiSU:manual;electronic documents:SiSU:manual + subject: ebook, epublishing, electronic book, electronic publishing, + electronic document, electronic citation, data structure, + citation systems, search + +% used_by: manual + +date: + published: 2008-05-22 + created: 2002-08-28 + issued: 2002-08-28 + available: 2002-08-28 + modified: 2010-03-03 + +make: + num_top: 1 + breaks: new=C; break=1 + bold: /Gnu|Debian|Ruby|SiSU/ + home_button_text: {SiSU}http://sisudoc.org; {git}http://git.sisudoc.org + footer: {SiSU}http://sisudoc.org; {git}http://git.sisudoc.org + manpage: name=sisu - documents: markup, structuring, publishing in multiple standard formats, and search; + synopsis=sisu [-abcDdeFhIiMmNnopqRrSsTtUuVvwXxYyZz0-9] [filename/wildcard ] + . sisu [-Ddcv] [instruction] + . sisu [-CcFLSVvW] + +@links: + { SiSU Homepage }http://www.sisudoc.org/ + { SiSU Manual }http://www.sisudoc.org/sisu/sisu_manual/ + { Book Samples & Markup Examples }http://www.jus.uio.no/sisu/SiSU/examples.html + { SiSU Download }http://www.jus.uio.no/sisu/SiSU/download.html + { SiSU Changelog }http://www.jus.uio.no/sisu/SiSU/changelog.html + { SiSU Git repo }http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary + { SiSU List Archives }http://lists.sisudoc.org/pipermail/sisu/ + { SiSU @ Debian }http://packages.qa.debian.org/s/sisu.html + { SiSU Project @ Debian }http://qa.debian.org/developer.php?login=sisu@lists.sisudoc.org + { SiSU @ Wikipedia }http://en.wikipedia.org/wiki/SiSU +.fi + +.SH AVAILABLE HEADERS + + +.BR +Header tags appear at the beginning of a document and provide meta information +on the document (such as the +.I Dublin Core +) , or information as to how the document as a whole is to be processed. All +header instructions take the form @headername: or on the next line and indented +by once space :subheadername: All +.I Dublin Core +meta tags are available + +.BR + +.B @identifier: +information or instructions + +.BR +where the "identifier" is a tag recognised by the program, and the +"information" or "instructions" belong to the tag/identifier specified + +.BR +Note: a header where used should only be used once; all headers apart from +@title: are optional; the @structure: header is used to describe document +structure, and can be useful to know. + +.BR +This is a sample header +.nf +% SiSU 2.0 [declared file-type identifier with markup version] +.fi + +.nf +@title: [title text] [this header is the only one that is mandatory] + subtitle: [subtitle if any] + language: English +.fi + +.nf +creator: + author: [Lastname, First names] + illustrator: [Lastname, First names] + translator: [Lastname, First names] + prepared_by: [Lastname, First names] +.fi + +.nf +date: + published: [year or yyyy-mm-dd] + created: [year or yyyy-mm-dd] + issued: [year or yyyy-mm-dd] + available: [year or yyyy-mm-dd] + modified: [year or yyyy-mm-dd] + valid: [year or yyyy-mm-dd] + added_to_site: [year or yyyy-mm-dd] + translated: [year or yyyy-mm-dd] +.fi + +.nf +rights: + copyright: Copyright (C) [Year and Holder] + license: [Use License granted] + text: [Year and Holder] + translation: [Name, Year] + illustrations: [Name, Year] +.fi + +.nf +classify: + topic_register: SiSU:markup sample:book;book:novel:fantasy + type: + subject: + description: + keywords: + abstract: + loc: [Library of Congress classification] + dewey: [Dewey classification +.fi + +.nf +identify: + :isbn: [ISBN] + :oclc: +.fi + +.nf +links: { SiSU }http://www.sisudoc.org + { FSF }http://www.fsf.org +.fi + +.nf +make: + num_top: 1 + headings: [text to match for each level + (e.g. PART; Chapter; Section; Article; or another: none; BOOK|FIRST|SECOND; none; CHAPTER;) + breaks: new=:C; break=1 + promo: sisu, ruby, sisu_search_libre, open_society + bold: [regular expression of words/phrases to be made bold] + italics: [regular expression of words/phrases to italicise] + home_button_text: {SiSU}http://sisudoc.org; {git}http://git.sisudoc.org + footer: {SiSU}http://sisudoc.org; {git}http://git.sisudoc.org +.fi + +.nf +original: + language: [language] +.fi + +.nf +notes: + comment: + prefix: [prefix is placed just after table of contents] +.fi + +.SH MARKUP OF SUBSTANTIVE TEXT + +.SH HEADING LEVELS + + +.BR +Heading levels are :A~ ,:B~ ,:C~ ,1~ ,2~ ,3~ ... :A - :C being part / section +headings, followed by other heading levels, and 1 -6 being headings followed by +substantive text or sub-headings. :A~ usually the title :A~? conditional level +1 heading (used where a stand-alone document may be imported into another) + +.BR + +.B :A~ [heading text] +Top level heading [this usually has similar content to the title @title: ] +NOTE: the heading levels described here are in 0.38 notation, see heading + +.BR + +.B :B~ [heading text] +Second level heading [this is a heading level divider] + +.BR + +.B :C~ [heading text] +Third level heading [this is a heading level divider] + +.BR + +.B 1~ [heading text] +Top level heading preceding substantive text of document or sub-heading 2, the +heading level that would normally be marked 1. or 2. or 3. etc. in a document, +and the level on which sisu by default would break html output into named +segments, names are provided automatically if none are given (a number), +otherwise takes the form 1~my_filename_for_this_segment + +.BR + +.B 2~ [heading text] +Second level heading preceding substantive text of document or sub-heading 3 , +the heading level that would normally be marked 1.1 or 1.2 or 1.3 or 2.1 etc. +in a document. + +.BR + +.B 3~ [heading text] +Third level heading preceding substantive text of document, that would normally +be marked 1.1.1 or 1.1.2 or 1.2.1 or 2.1.1 etc. in a document +.nf +1~filename level 1 heading, + +% the primary division such as Chapter that is followed by substantive text, and may be further subdivided (this is the level on which by default html segments are made) +.fi + +.SH FONT ATTRIBUTES + +.BR + +.B markup example: +.nf +normal text, *{emphasis}*, !{bold text}!, /{italics}/, _{underscore}_, "{citation}", +^{superscript}^, ,{subscript},, +{inserted text}+, -{strikethrough}-, #{monospace}# + +normal text + +*{emphasis}* [note: can be configured to be represented by bold, italics or underscore] + +!{bold text}! + +/{italics}/ + +_{underscore}_ + +"{citation}" + +^{superscript}^ + +,{subscript}, + ++{inserted text}+ + +-{strikethrough}- + +#{monospace}# +.fi + + +.BR + +.B resulting output: + +.BR +normal text, +.B emphasis, +.B bold text +, +.I italics, +.I underscore, +"citation", ^superscript^, [subscript], ++inserted text++, --strikethrough--, +monospace + +.BR +normal text + +.BR + +.B emphasis +[note: can be configured to be represented by bold, italics or underscore] + +.BR + +.B bold text + +.BR + +.I italics + +.BR +.I underscore + +.BR +"citation" + +.BR +^superscript^ + +.BR +[subscript] + +.BR +++inserted text++ + +.BR +--strikethrough-- + +.BR +monospace +.SH INDENTATION AND BULLETS + + +.BR + +.B markup example: +.nf +ordinary paragraph + +_1 indent paragraph one step + +_2 indent paragraph two steps + +_9 indent paragraph nine steps +.fi + + +.BR + +.B resulting output: + +.BR +ordinary paragraph + +.BR + indent paragraph one step + +.BR + indent paragraph two steps + +.BR + indent paragraph nine steps + +.BR + +.B markup example: +.nf +_* bullet text + +_1* bullet text, first indent + +_2* bullet text, two step indent +.fi + + +.BR + +.B resulting output: + +.BR +- bullet text + +.BR + * bullet text, first indent + +.BR + * bullet text, two step indent + +.BR +Numbered List (not to be confused with headings/titles, (document structure)) + +.BR + +.B markup example: +.nf +# numbered list numbered list 1., 2., 3, etc. + +_# numbered list numbered list indented a., b., c., d., etc. +.fi + +.SH HANGING INDENTS + + +.BR + +.B markup example: +.nf +_0_1 first line no indent, +rest of paragraph indented one step + +_1_0 first line indented, +rest of paragraph no indent + +in each case level may be 0-9 +.fi + + +.BR + +.B resulting output: + +.BR +first line no indent, rest of paragraph indented one step; first line no + indent, rest of paragraph indented one step; first line no indent, rest of + paragraph indented one step; first line no indent, rest of paragraph indented + one step; first line no indent, rest of paragraph indented one step; first + line no indent, rest of paragraph indented one step; first line no indent, + rest of paragraph indented one step; first line no indent, rest of paragraph + indented one step; first line no indent, rest of paragraph indented one step; + +.BR +A regular paragraph. + +.BR +first line indented, rest of paragraph no indent first line indented, rest of +paragraph no indent first line indented, rest of paragraph no indent first line +indented, rest of paragraph no indent first line indented, rest of paragraph no +indent first line indented, rest of paragraph no indent first line indented, +rest of paragraph no indent first line indented, rest of paragraph no indent +first line indented, rest of paragraph no indent first line indented, rest of +paragraph no indent first line indented, rest of paragraph no indent + +.BR +in each case level may be 0-9 + +.BR + +.B live-build + A collection of scripts used to build customized +.B Debian + Livesystems. + .I live-build + was formerly known as live-helper, and even earlier known as live-package. + +.BR + +.B live-build + + A collection of scripts used to build customized +.B Debian + Livesystems. +.I live-build + was formerly known as live-helper, and even earlier known as live-package. +.SH FOOTNOTES / ENDNOTES + + +.BR +Footnotes and endnotes are marked up at the location where they would be +indicated within a text. They are automatically numbered. The output type +determines whether footnotes or endnotes will be produced + +.BR + +.B markup example: +.nf +~{ a footnote or endnote }~ +.fi + + +.BR + +.B resulting output: + +.BR +[^5] + +.BR + +.B markup example: +.nf +normal text~{ self contained endnote marker & endnote in one }~ continues +.fi + + +.BR + +.B resulting output: + +.BR +normal text[^6] continues + +.BR + +.B markup example: +.nf +normal text ~{* unnumbered asterisk footnote/endnote, insert multiple asterisks if required }~ continues + +normal text ~{** another unnumbered asterisk footnote/endnote }~ continues +.fi + + +.BR + +.B resulting output: + +.BR +normal text [^*] continues + +.BR +normal text [^**] continues + +.BR + +.B markup example: +.nf +normal text ~[* editors notes, numbered asterisk footnote/endnote series ]~ continues + +normal text ~[+ editors notes, numbered plus symbol footnote/endnote series ]~ continues +.fi + + +.BR + +.B resulting output: + +.BR +normal text [^*3] continues + +.BR +normal text [^+2] continues + +.BR + +.B Alternative endnote pair notation for footnotes/endnotes: +.nf +% note the endnote marker "~^" + +normal text~^ continues + +^~ endnote text following the paragraph in which the marker occurs +.fi + + +.BR +the standard and pair notation cannot be mixed in the same document +.SH LINKS + +.SH NAKED URLS WITHIN TEXT, DEALING WITH URLS + + +.BR +urls found within text are marked up automatically. A url within text is +automatically hyperlinked to itself and by default decorated with angled +braces, unless they are contained within a code block (in which case they are +passed as normal text), or escaped by a preceding underscore (in which case the +decoration is omitted). + +.BR + +.B markup example: +.nf +normal text http://www.sisudoc.org/ continues +.fi + + +.BR + +.B resulting output: + +.BR +normal text <http://www.sisudoc.org/> continues + +.BR +An escaped url without decoration + +.BR + +.B markup example: +.nf +normal text _http://www.sisudoc.org/ continues + +deb _http://www.jus.uio.no/sisu/archive unstable main non-free +.fi + + +.BR + +.B resulting output: + +.BR +normal text <_http://www.sisudoc.org/> continues + +.BR +deb <_http://www.jus.uio.no/sisu/archive> unstable main non-free + +.BR +where a code block is used there is neither decoration nor hyperlinking, code +blocks are discussed later in this document + +.BR + +.B resulting output: +.nf +deb http://www.jus.uio.no/sisu/archive unstable main non-free +deb-src http://www.jus.uio.no/sisu/archive unstable main non-free +.fi + +.SH LINKING TEXT + + +.BR +To link text or an image to a url the markup is as follows + +.BR + +.B markup example: +.nf +about { SiSU }http://url.org markup +.fi + + +.BR + +.B resulting output: + +.BR +aboutSiSU <http://www.sisudoc.org/> markup + +.BR +A shortcut notation is available so the url link may also be provided +automatically as a footnote + +.BR + +.B markup example: +.nf +about {~^ SiSU }http://url.org markup +.fi + + +.BR + +.B resulting output: + +.BR +aboutSiSU <http://www.sisudoc.org/> [^7] markup + +.BR +Internal document links to a tagged location, including an ocn + +.BR + +.B markup example: +.nf +about { text links }#link_text +.fi + + +.BR + +.B resulting output: + +.BR +about ⌠text links⌡⌈link_text⌋ + +.BR +Shared document collection link + +.BR + +.B markup example: +.nf +about { SiSU book markup examples }:SiSU/examples.html +.fi + + +.BR + +.B resulting output: + +.BR +about ⌠ +.B SiSU +book markup examples⌡⌈:SiSU/examples.html⌋ +.SH LINKING IMAGES + + +.BR + +.B markup example: +.nf +{ tux.png 64x80 }image + +% various url linked images + +{tux.png 64x80 "a better way" }http://www.sisudoc.org/ + +{GnuDebianLinuxRubyBetterWay.png 100x101 "Way Better - with Gnu/Linux, Debian and Ruby" }http://www.sisudoc.org/ + +{~^ ruby_logo.png "Ruby" }http://www.ruby-lang.org/en/ +.fi + + +.BR + +.B resulting output: + +.BR +[ tux.png ] + +.BR +tux.png 64x80 "Gnu/Linux - a better way" <http://www.sisudoc.org/> + +.BR +GnuDebianLinuxRubyBetterWay.png 100x101 "Way Better - with Gnu/Linux, Debian +and Ruby" <http://www.sisudoc.org/> + +.BR +ruby_logo.png 70x90 "Ruby" <http://www.ruby-lang.org/en/> [^8] + +.BR + +.B linked url footnote shortcut +.nf +{~^ [text to link] }http://url.org + +% maps to: { [text to link] }http://url.org ~{ http://url.org }~ + +% which produces hyper-linked text within a document/paragraph, with an endnote providing the url for the text location used in the hyperlink +.fi + +.nf +text marker *~name +.fi + + +.BR +note at a heading level the same is automatically achieved by providing names +to headings 1, 2 and 3 i.e. 2~[name] and 3~[name] or in the case of +auto-heading numbering, without further intervention. +.SH LINK SHORTCUT FOR MULTIPLE VERSIONS OF A SISU DOCUMENT IN THE SAME DIRECTORY +TREE + + +.BR + +.B markup example: +.nf +!_ /{"Viral Spiral"}/, David Bollier + +{ "Viral Spiral", David Bollier [3sS]}viral_spiral.david_bollier.sst +.fi + + +.BR + +.B +.I "Viral Spiral", +David Bollier +"Viral Spiral", David Bollier <http://corundum/sisu_manual/en/manifest/viral_spiral.david_bollier.html> + document manifest <http://corundum/sisu_manual/en/manifest/viral_spiral.david_bollier.html> + ⌠html, segmented text⌡「http://corundum/sisu_manual/en/html/viral_spiral.david_bollier.html」 + ⌠html, scroll, document in one⌡「http://corundum/sisu_manual/en/html/viral_spiral.david_bollier.html」 + ⌠epub⌡「http://corundum/sisu_manual/en/epub/viral_spiral.david_bollier.epub」 + ⌠pdf, landscape⌡「http://corundum/sisu_manual/en/pdf/viral_spiral.david_bollier.pdf」 + ⌠pdf, portrait⌡「http://corundum/sisu_manual/en/pdf/viral_spiral.david_bollier.pdf」 + ⌠odf: odt, open document text⌡「http://corundum/sisu_manual/en/odt/viral_spiral.david_bollier.odt」 + ⌠xhtml scroll⌡「http://corundum/sisu_manual/en/xhtml/viral_spiral.david_bollier.xhtml」 + ⌠xml, sax⌡「http://corundum/sisu_manual/en/xml/viral_spiral.david_bollier.xml」 + ⌠xml, dom⌡「http://corundum/sisu_manual/en/xml/viral_spiral.david_bollier.xml」 + ⌠concordance⌡「http://corundum/sisu_manual/en/html/viral_spiral.david_bollier.html」 + ⌠dcc, document content certificate (digests)⌡「http://corundum/sisu_manual/en/digest/viral_spiral.david_bollier.txt」 + ⌠markup source text⌡「http://corundum/sisu_manual/en/src/viral_spiral.david_bollier.sst」 + ⌠markup source (zipped) pod⌡「http://corundum/sisu_manual/en/pod/viral_spiral.david_bollier.sst.zip」 + +.SH GROUPED TEXT / BLOCKED TEXT + + +.BR +There are two markup syntaxes for blocked text, using curly braces or using +tics +.SH BLOCKED TEXT CURLY BRACE SYNTAX + + +.BR +at the start of a line on its own use name of block type with an opening curly +brace, follow with the content of the block, and close with a closing curly +brace and the name of the block type, e.g. +.nf +code{ + +this is a code block + +}code +.fi + +.nf + +poem{ + +this here is a poem + +}poem +.fi + +.SH BLOCKED TEXT TIC SYNTAX + +.nf +``` code +this is a code block + +``` + +``` poem +this here is a poem + +``` +.fi + + +.BR +start a line with three backtics, a space followed by the name of the name of +block type, follow with the content of the block, and close with three back +ticks on a line of their own, e.g. +.SH TABLES + + +.BR +Tables may be prepared in two either of two forms + +.BR + +.B markup example: +.nf +table{ c3; 40; 30; 30; + +This is a table +this would become column two of row one +column three of row one is here + +And here begins another row +column two of row two +column three of row two, and so on + +}table +.fi + + +.BR + +.B resulting output: +This is a table|this would become column two of row one|column three of row one is here』And here begins another row|column two of row two|column three of row two, and so on』 + + +.BR +a second form may be easier to work with in cases where there is not much +information in each column + +.BR + +.B markup example: +[^9] +.nf +!_ Table 3.1: Contributors to Wikipedia, January 2001 - June 2005 + +{table~h 24; 12; 12; 12; 12; 12; 12;} + |Jan. 2001|Jan. 2002|Jan. 2003|Jan. 2004|July 2004|June 2006 +Contributors* | 10| 472| 2,188| 9,653| 25,011| 48,721 +Active contributors** | 9| 212| 846| 3,228| 8,442| 16,945 +Very active contributors*** | 0| 31| 190| 692| 1,639| 3,016 +No. of English language articles| 25| 16,000| 101,000| 190,000| 320,000| 630,000 +No. of articles, all languages | 25| 19,000| 138,000| 490,000| 862,000|1,600,000 + +- Contributed at least ten times; ** at least 5 times in last month; *** more than 100 times in last month. +.fi + + +.BR + +.B resulting output: + +.BR + +.B Table 3.1: Contributors to Wikipedia, January 2001 - June 2005 +|Jan. 2001|Jan. 2002|Jan. 2003|Jan. 2004|July 2004|June 2006』Contributors*|10|472|2,188|9,653|25,011|48,721』Active contributors**|9|212|846|3,228|8,442|16,945』Very active contributors***|0|31|190|692|1,639|3,016』No. of English language articles|25|16,000|101,000|190,000|320,000|630,000』No. of articles, all languages|25|19,000|138,000|490,000|862,000|1,600,000』 + + +.BR +- Contributed at least ten times; ** at least 5 times in last month; *** more +than 100 times in last month. +.SH POEM + + +.BR + +.B basic markup: +.nf +poem{ + + Your poem here + +}poem + +Each verse in a poem is given an object number. +.fi + + +.BR + +.B markup example: +.nf +poem{ + + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' + +}poem +.fi + + +.BR + +.B resulting output: + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' + + +.SH GROUP + + +.BR + +.B basic markup: +.nf +group{ + + Your grouped text here + +}group + +A group is treated as an object and given a single object number. +.fi + + +.BR + +.B markup example: +.nf +group{ + + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' + +}group +.fi + + +.BR + +.B resulting output: + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' + + +.SH CODE + + +.BR +Code tags code{ ... }code (used as with other group tags described above) are +used to escape regular sisu markup, and have been used extensively within this +document to provide examples of +.B SiSU +markup. You cannot however use code tags to escape code tags. They are however +used in the same way as group or poem tags. + +.BR +A code-block is treated as an object and given a single object number. [an +option to number each line of code may be considered at some later time] + +.BR + +.B use of code tags instead of poem compared, resulting output: +.nf + `Fury said to a + mouse, That he + met in the + house, + "Let us + both go to + law: I will + prosecute + YOU. --Come, + I'll take no + denial; We + must have a + trial: For + really this + morning I've + nothing + to do." + Said the + mouse to the + cur, "Such + a trial, + dear Sir, + With + no jury + or judge, + would be + wasting + our + breath." + "I'll be + judge, I'll + be jury," + Said + cunning + old Fury: + "I'll + try the + whole + cause, + and + condemn + you + to + death."' +.fi + + +.BR +From +.B SiSU +2.7.7 on you can number codeblocks by placing a hash after the opening code tag +code{# as demonstrated here: +.nf +1 | `Fury said to a +2 | mouse, That he +3 | met in the +4 | house, +5 | "Let us +6 | both go to +7 | law: I will +8 | prosecute +9 | YOU. --Come, +10 | I'll take no +11 | denial; We +12 | must have a +13 | trial: For +14 | really this +15 | morning I've +16 | nothing +17 | to do." +18 | Said the +19 | mouse to the +20 | cur, "Such +21 | a trial, +22 | dear Sir, +23 | With +24 | no jury +25 | or judge, +26 | would be +27 | wasting +28 | our +29 | breath." +30 | "I'll be +31 | judge, I'll +32 | be jury," +33 | Said +34 | cunning +35 | old Fury: +36 | "I'll +37 | try the +38 | whole +39 | cause, +40 | and +41 | condemn +42 | you +43 | to +44 | death."' +.fi + +.SH ADDITIONAL BREAKS - LINEBREAKS WITHIN OBJECTS, COLUMN AND PAGE-BREAKS + +.SH LINE-BREAKS + + +.BR +To break a line within a "paragraph object", two backslashes \e\e +with a space before and a space or newline after them +may be used. +.nf +To break a line within a "paragraph object", +two backslashes \e\e with a space before +and a space or newline after them \e\e +may be used. +.fi + + +.BR +The html break br enclosed in angle brackets (though undocumented) is available +in versions prior to 3.0.13 and 2.9.7 (it remains available for the time being, +but is depreciated). + +.BR +To draw a dividing line dividing paragraphs, see the section on page breaks. +.SH PAGE BREAKS + + +.BR +Page breaks are only relevant and honored in some output formats. A page break +or a new page may be inserted manually using the following markup on a line on +its own: + +.BR +page new =\e= breaks the page, starts a new page. + +.BR +page break -\- breaks a column, starts a new column, if using columns, else +breaks the page, starts a new page. + +.BR +page break line across page -..- draws a dividing line, dividing paragraphs + +.BR +page break: +.nf +-\e\e- +.fi + + +.BR +page (break) new: +.nf +=\e\e= +.fi + + +.BR +page (break) line across page (dividing paragraphs): +.nf +-..- +.fi + +.SH BIBLIOGRAPHY / REFERENCES + + +.BR +There are three ways to prepare a bibliography using sisu (which are mutually +exclusive): (i) manually preparing and marking up as regular text in sisu a +list of references, this is treated as a regular document segment (and placed +before endnotes if any); (ii) preparing a bibliography, marking a heading level +1~!biblio (note the exclamation mark) and preparing a bibliography using +various metadata tags including for author: title: year: a list of which is +provided below, or; (iii) as an assistance in preparing a bibliography, marking +a heading level 1~!biblio and tagging citations within footnotes for inclusion, +identifying citations and having a parser attempt to extract them and build a +bibliography of the citations provided. + +.BR +For the heading/section sequence: endnotes, bibliography then book index to +occur, the name biblio or bibliography must be given to the bibliography +section, like so: +.nf +1~!biblio~ [Note: heading marker::required title missing] +.fi + +.SH A MARKUP TAGGED METADATA BIBLIOGRAPHY SECTION + + +.BR +Here instead of writing your full citations directly in footnotes, each time +you have new material to cite, you add it to your bibliography section (if it +has not been added yet) providing the information you need against an available +list of tags (provided below). + +.BR +The required tags are au: ti: and year: [^10] an short quick example might be +as follows: +.nf +1~!biblio~ [Note: heading marker::required title missing] + +au: von Hippel, E. +ti: Perspective: User Toolkits for Innovation +lng: (language) +jo: Journal of Product Innovation Management +vo: 18 +ed: (editor) +yr: 2001 +note: +sn: Hippel, /{User Toolkits}/ (2001) +id: vHippel_2001 +% form: + +au: Benkler, Yochai +ti: The Wealth of Networks +st: How Social Production Transforms Markets and Freedom +lng: (language) +pb: Harvard University Press +edn: (edition) +yr: 2006 +pl: U.S. +url: http://cyber.law.harvard.edu/wealth_of_networks/Main_Page +note: +sn: Benkler, /{Wealth of Networks}/ (2006) +id: Benkler2006 + +au: Quixote, Don; Panza, Sancho +ti: Taming Windmills, Keeping True +jo: Imaginary Journal +yr: 1605 +url: https://en.wikipedia.org/wiki/Don_Quixote +note: made up to provide an example of author markup for an article with two authors +sn: Quixote & Panza, /{Taming Windmills}/ (1605) +id: quixote1605 +.fi + + +.BR +Note that the section name !biblio (or !bibliography) is required for the +bibliography to be treated specially as such, and placed after the +auto-generated endnote section. + +.BR +Using this method, work goes into preparing the bibliography, the tags author +or editor, year and title are required and will be used to sort the +bibliography that is placed under the Bibliography section + +.BR +The metadata tags may include shortname (sn:) and id, if provided, which are +used for substitution within text. Every time the given id is found within the +text it will be replaced by the given short title of the work (it is for this +reason the short title has sisu markup to italicize the title), it should work +with any page numbers to be added, the short title should be one that can +easily be used to look up the full description in the bibliography. +.nf +The following footnote~{ quixote1605, pp 1000 - 1001, also Benkler2006 p 1. }~ +.fi + + +.BR +would be presented as: + +.BR +Quixote and Panza, +.I Taming Windmills +(1605), pp 1000 - 1001 also, Benkler, +.I Wealth of Networks, +(2006) p 1 or rather[^11] +.nf +au: author Surname, FirstNames (if multiple semi-colon separator) + (required unless editor to be used instead) +ti: title (required) +st: subtitle +jo: journal +vo: volume +ed: editor (required if author not provided) +tr: translator +src: source (generic field where others are not appropriate) +in: in (like src) +pl: place/location (state, country) +pb: publisher +edn: edition +yr: year (yyyy or yyyy-mm or yyyy-mm-dd) (required) +pg: pages +url: http://url +note: note +id: create_short_identifier e.g. authorSurnameYear + (used in substitutions: when found within text will be + replaced by the short name provided) +sn: short name e.g. Author, /{short title}/, Year + (used in substitutions: when an id is found within text + the short name will be used to replace it) +.fi + +.SH TAGGING CITATIONS FOR INCLUSION IN THE BIBLIOGRAPHY + + +.BR +Here whenever you make a citation that you wish be included in the +bibliography, you tag the citation as such using special delimiters (which are +subsequently removed from the final text produced by sisu) + +.BR +Here you would write something like the following, either in regular text or a +footnote +.nf +See .: Quixote, Don; Panza, Sancho /{Taming Windmills, Keeping True}/ (1605) :. +.fi + + +.BR + +.B SiSU +will parse for a number of patterns within the delimiters to try make out the +authors, title, date etc. and from that create a Bibliography. This is more +limited than the previously described method of preparing a tagged +bibliography, and using an id within text to identify the work, which also +lends itself to greater consistency. +.SH GLOSSARY + + +.BR +Using the section name 1~!glossary results in the Glossary being treated +specially as such, and placed after the auto-generated endnote section (before +the bibliography/list of references if there is one). + +.BR +The Glossary is ordinary text marked up in a manner deemed suitable for that +purpose. e.g. with the term in bold, possibly with a hanging indent. +.nf +1~!glossary~ [Note: heading marker::required title missing] + +_0_1 *{GPL}* An abbreviation that stands for "General Purpose License." ... + +_0_1 [provide your list of terms and definitions] +.fi + + +.BR +In the given example the first line is not indented subsequent lines are by one +level, and the term to be defined is in bold text. +.SH BOOK INDEX + + +.BR +To make an index append to paragraph the book index term relates to it, using +an equal sign and curly braces. + +.BR +Currently two levels are provided, a main term and if needed a sub-term. +Sub-terms are separated from the main term by a colon. +.nf + Paragraph containing main term and sub-term. + ={Main term:sub-term} +.fi + + +.BR +The index syntax starts on a new line, but there should not be an empty line +between paragraph and index markup. + +.BR +The structure of the resulting index would be: +.nf + Main term, 1 + sub-term, 1 +.fi + + +.BR +Several terms may relate to a paragraph, they are separated by a semicolon. If +the term refers to more than one paragraph, indicate the number of paragraphs. +.nf + Paragraph containing main term, second term and sub-term. + ={first term; second term: sub-term} +.fi + + +.BR +The structure of the resulting index would be: +.nf + First term, 1, + Second term, 1, + sub-term, 1 +.fi + + +.BR +If multiple sub-terms appear under one paragraph, they are separated under the +main term heading from each other by a pipe symbol. +.nf + Paragraph containing main term, second term and sub-term. + ={Main term: + sub-term+2|second sub-term; + Another term + } + + A paragraph that continues discussion of the first sub-term +.fi + + +.BR +The plus one in the example provided indicates the first sub-term spans one +additional paragraph. The logical structure of the resulting index would be: +.nf + Main term, 1, + sub-term, 1-3, + second sub-term, 1, + Another term, 1 +.fi + +.SH COMPOSITE DOCUMENTS MARKUP + + +.BR +It is possible to build a document by creating a master document that requires +other documents. The documents required may be complete documents that could be +generated independently, or they could be markup snippets, prepared so as to be +easily available to be placed within another text. If the calling document is a +master document (built from other documents), it should be named with the +suffix +.B .ssm +Within this document you would provide information on the other documents that +should be included within the text. These may be other documents that would be +processed in a regular way, or markup bits prepared only for inclusion within a +master document +.B .sst +regular markup file, or +.B .ssi +(insert/information) A secondary file of the composite document is built prior +to processing with the same prefix and the suffix +.B ._sst + +.BR +basic markup for importing a document into a master document +.nf +<< filename1.sst + +<< filename2.ssi +.fi + + +.BR +The form described above should be relied on. Within the +.I Vim +editor it results in the text thus linked becoming hyperlinked to the document +it is calling in which is convenient for editing. +.SH SUBSTITUTIONS + + +.BR + +.B markup example: +.nf +The current Debian is ${debian_stable} the next debian will be ${debian_testing} + +Configure substitution in _sisu/sisu_document_make + +make: + substitute: /${debian_stable}/,'*{Wheezy}*' /${debian_testing}/,'*{Jessie}*' +.fi + + +.BR + +.B resulting output: + +.BR +The current +.B Debian +is +.B Jessie +the next debian will be +.B Stretch + +.BR +Configure substitution in _sisu/sisu_document_make +.SH SISU FILETYPES + + +.BR + +.B SiSU +has +.I plaintext +and binary filetypes, and can process either type of document. +.SH .SST .SSM .SSI MARKED UP PLAIN TEXT + +.TP +.B SiSU +documents are prepared as plain-text (utf-8) files with +.B SiSU +markup. They may make reference to and contain images (for example), which are +stored in the directory beneath them _sisu/image. 〔b¤SiSU +.I plaintext +markup files are of three types that may be distinguished by the file extension +used: regular text .sst; master documents, composite documents that incorporate +other text, which can be any regular text or text insert; and inserts the +contents of which are like regular text except these are marked .ssi and are +not processed. + +.BR + +.B SiSU +processing can be done directly against a sisu documents; which may be located +locally or on a remote server for which a url is provided. + +.BR + +.B SiSU +source markup can be shared with the command: + +.BR + sisu -s [filename] +.SH SISU TEXT - REGULAR FILES (.SST) + + +.BR +The most common form of document in +.B SiSU, +see the section on +.B SiSU +markup. +.SH SISU MASTER FILES (.SSM) + + +.BR +Composite documents which incorporate other +.B SiSU +documents which may be either regular +.B SiSU +text .sst which may be generated independently, or inserts prepared solely for +the purpose of being incorporated into one or more master documents. + +.BR +The mechanism by which master files incorporate other documents is described as +one of the headings under under +.B SiSU +markup in the +.B SiSU +manual. + +.BR +Note: Master documents may be prepared in a similar way to regular documents, +and processing will occur normally if a .sst file is renamed .ssm without +requiring any other documents; the .ssm marker flags that the document may +contain other documents. + +.BR +Note: a secondary file of the composite document is built prior to processing +with the same prefix and the suffix ._sst +.SH SISU INSERT FILES (.SSI) + + +.BR +Inserts are documents prepared solely for the purpose of being incorporated +into one or more master documents. They resemble regular +.B SiSU +text files (.sst). Since sisu -5.5.0 (6.1.0) .ssi files can like .ssm files +include other .sst or .ssm files. .ssi files cannot be called by the sisu +processor directly and can only be incorporated in other documents. Making a +file a .ssi file is a quick and convenient way of breaking up a document that +is to be included in a master document, and flagging that the file to be +incorporated .ssi is not intended that the file should be processed on its own. +.SH SISUPOD, ZIPPED BINARY CONTAINER (SISUPOD.ZIP, .SSP) + + +.BR +A sisupod is a zipped +.B SiSU +text file or set of +.B SiSU +text files and any associated images that they contain (this will be extended +to include sound and multimedia-files) +.TP +.B SiSU +.I plaintext +files rely on a recognised directory structure to find contents such as images +associated with documents, but all images for example for all documents +contained in a directory are located in the sub-directory _sisu/image. Without +the ability to create a sisupod it can be inconvenient to manually identify all +other files associated with a document. A sisupod automatically bundles all +associated files with the document that is turned into a pod. + +.BR +The structure of the sisupod is such that it may for example contain a single +document and its associated images; a master document and its associated +documents and anything else; or the zipped contents of a whole directory of +prepared +.B SiSU +documents. + +.BR +The command to create a sisupod is: + +.BR + sisu -S [filename] + +.BR +Alternatively, make a pod of the contents of a whole directory: + +.BR + sisu -S + +.BR + +.B SiSU +processing can be done directly against a sisupod; which may be located locally +or on a remote server for which a url is provided. + +.BR +<http://www.sisudoc.org/sisu/sisu_commands> + +.BR +<http://www.sisudoc.org/sisu/sisu_manual> +.SH CONFIGURATION + +.SH CONFIGURATION FILES + +.SH CONFIG.YML + + +.BR + +.B SiSU +configration parameters are adjusted in the configuration file, which can be +used to override the defaults set. This includes such things as which directory +interim processing should be done in and where the generated output should be +placed. + +.BR +The +.B SiSU +configuration file is a yaml file, which means indentation is significant. + +.BR + +.B SiSU +resource configuration is determined by looking at the following files if they +exist: + +.BR + ./_sisu/v7/sisurc.yml + +.BR + ./_sisu/sisurc.yml + +.BR + ~/.sisu/v7/sisurc.yml + +.BR + ~/.sisu/sisurc.yml + +.BR + /etc/sisu/v7/sisurc.yml + +.BR + /etc/sisu/sisurc.yml + +.BR +The search is in the order listed, and the first one found is used. + +.BR +In the absence of instructions in any of these it falls back to the internal +program defaults. + +.BR +Configuration determines the output and processing directories and the database +access details. + +.BR +If +.B SiSU +is installed a sample sisurc.yml may be found in /etc/sisu/sisurc.yml +.SH SISU_DOCUMENT_MAKE + + +.BR +Most sisu document headers relate to metadata, the exception is the @make: +header which provides processing related information. The default contents of +the @make header may be set by placing them in a file sisu_document_make. + +.BR +The search order is as for resource configuration: + +.BR + ./_sisu/v7/sisu_document_make + +.BR + ./_sisu/sisu_document_make + +.BR + ~/.sisu/v7/sisu_document_make + +.BR + ~/.sisu/sisu_document_make + +.BR + /etc/sisu/v7/sisu_document_make + +.BR + /etc/sisu/sisu_document_make + +.BR +A sample sisu_document_make can be found in the _sisu/ directory under along +with the provided sisu markup samples. +.SH CSS - CASCADING STYLE SHEETS (FOR HTML, XHTML AND XML) + + +.BR +CSS files to modify the appearance of +.B SiSU +html, +.I XHTML +or +.I XML +may be placed in the configuration directory: ./_sisu/css ; ~/.sisu/css or; +/etc/sisu/css and these will be copied to the output directories with the +command sisu -CC. + +.BR +The basic CSS file for html output is html. css, placing a file of that name in +directory _sisu/css or equivalent will result in the default file of that name +being overwritten. + +.BR + +.I HTML: +html. css + +.BR + +.I XML +DOM: dom.css + +.BR + +.I XML +SAX: sax.css + +.BR + +.I XHTML: +xhtml. css + +.BR +The default homepage may use homepage.css or html. css + +.BR +Under consideration is to permit the placement of a CSS file with a different +name in directory _sisu/css directory or equivalent.[^12] +.SH ORGANISING CONTENT - DIRECTORY STRUCTURE AND MAPPING + + +.BR + +.B SiSU +v3 has new options for the source directory tree, and output directory +structures of which there are 3 alternatives. +.SH DOCUMENT SOURCE DIRECTORY + + +.BR +The document source directory is the directory in which sisu processing +commands are given. It contains the sisu source files (.sst .ssm .ssi), or (for +sisu v3 may contain) subdirectories with language codes which contain the sisu +source files, so all English files would go in subdirectory en/, French in fr/, +Spanish in es/ and so on. ISO 639-1 codes are used (as varied by po4a). A list +of available languages (and possible sub-directory names) can be obtained with +the command "sisu --help lang" The list of languages is limited to langagues +supported by XeTeX polyglosia. +.SH GENERAL DIRECTORIES + +.nf + ./subject_name/ + +% files stored at this level e.g. sisu_manual.sst or +% for sisu v3 may be under language sub-directories +% e.g. + + ./subject_name/en + + ./subject_name/fr + + ./subject_name/es + + ./subject_name/_sisu + + ./subject_name/_sisu/css + + ./subject_name/_sisu/image +.fi + +.SH DOCUMENT OUTPUT DIRECTORY STRUCTURES + +.SH OUTPUT DIRECTORY ROOT + + +.BR +The output directory root can be set in the sisurc.yml file. Under the root, +subdirectories are made for each directory in which a document set resides. If +you have a directory named poems or conventions, that directory will be created +under the output directory root and the output for all documents contained in +the directory of a particular name will be generated to subdirectories beneath +that directory (poem or conventions). A document will be placed in a +subdirectory of the same name as the document with the filetype identifier +stripped (.sst .ssm) + +.BR +The last part of a directory path, representing the sub-directory in which a +document set resides, is the directory name that will be used for the output +directory. This has implications for the organisation of document collections +as it could make sense to place documents of a particular subject, or type +within a directory identifying them. This grouping as suggested could be by +subject (sales_law, english_literature); or just as conveniently by some other +classification (X University). The mapping means it is also possible to place +in the same output directory documents that are for organisational purposes +kept separately, for example documents on a given subject of two different +institutions may be kept in two different directories of the same name, under a +directory named after each institution, and these would be output to the same +output directory. Skins could be associated with each institution on a +directory basis and resulting documents will take on the appropriate different +appearance. +.SH ALTERNATIVE OUTPUT STRUCTURES + + +.BR +There are 3 possibile output structures described as being, by language, by +filetype or by filename, the selection is made in sisurc.yml +.nf +#% output_dir_structure_by: language; filetype; or filename +output_dir_structure_by: language #(language & filetype, preferred?) +#output_dir_structure_by: filetype +#output_dir_structure_by: filename #(default, closest to original v1 & v2) +.fi + +.SH BY LANGUAGE + + +.BR +The by language directory structure places output files + +.BR +The by language directory structure separates output files by language code +(all files of a given language), and within the language directory by filetype. + +.BR +Its selection is configured in sisurc.yml + +.BR +output_dir_structure_by: language +.nf + |-- en + |-- epub + |-- hashes + |-- html + | |-- viral_spiral.david_bollier + | |-- manifest + | |-- qrcode + | |-- odt + | |-- pdf + | |-- sitemaps + | |-- txt + | |-- xhtml + | `-- xml + |-- po4a + | `-- live-manual + | |-- po + | |-- fr + | `-- pot + `-- _sisu + |-- css + |-- image + |-- image_sys -> ../../_sisu/image_sys + `-- xml + |-- rnc + |-- rng + `-- xsd +.fi + + +.BR +#by: language subject_dir/en/manifest/filename.html +.SH BY FILETYPE + + +.BR +The by filetype directory structure separates output files by filetype, all +html files in one directory pdfs in another and so on. Filenames are given a +language extension. + +.BR +Its selection is configured in sisurc.yml + +.BR +output_dir_structure_by: filetype +.nf + |-- epub + |-- hashes + |-- html + |-- viral_spiral.david_bollier + |-- manifest + |-- qrcode + |-- odt + |-- pdf + |-- po4a + |-- live-manual + | |-- po + | |-- fr + | `-- pot + |-- _sisu + | |-- css + | |-- image + | |-- image_sys -> ../../_sisu/image_sys + | `-- xml + | |-- rnc + | |-- rng + | `-- xsd + |-- sitemaps + |-- txt + |-- xhtml + `-- xml +.fi + + +.BR +#by: filetype subject_dir/html/filename/manifest.en.html +.SH BY FILENAME + + +.BR +The by filename directory structure places most output of a particular file +(the different filetypes) in a common directory. + +.BR +Its selection is configured in sisurc.yml + +.BR +output_dir_structure_by: filename +.nf + |-- epub + |-- po4a + |-- live-manual + | |-- po + | |-- fr + | `-- pot + |-- _sisu + | |-- css + | |-- image + | |-- image_sys -> ../../_sisu/image_sys + | `-- xml + | |-- rnc + | |-- rng + | `-- xsd + |-- sitemaps + |-- src + |-- pod + `-- viral_spiral.david_bollier +.fi + + +.BR +#by: filename subject_dir/filename/manifest.en.html +.SH REMOTE DIRECTORIES + +.nf + ./subject_name/ + +% containing sub_directories named after the generated files from which they are made + + ./subject_name/src + +% contains shared source files text and binary e.g. sisu_manual.sst and sisu_manual.sst.zip + + ./subject_name/_sisu + +% configuration file e.g. sisurc.yml + + ./subject_name/_sisu/skin + +% skins in various skin directories doc, dir, site, yml + + ./subject_name/_sisu/css + + ./subject_name/_sisu/image + +% images for documents contained in this directory + + ./subject_name/_sisu/mm +.fi + +.SH SISUPOD + +.nf + ./sisupod/ + +% files stored at this level e.g. sisu_manual.sst + + ./sisupod/_sisu + +% configuration file e.g. sisurc.yml + + ./sisupod/_sisu/skin + +% skins in various skin directories doc, dir, site, yml + + ./sisupod/_sisu/css + + ./sisupod/_sisu/image + +% images for documents contained in this directory + + ./sisupod/_sisu/mm +.fi + +.SH HOMEPAGES + + +.BR + +.B SiSU +is about the ability to auto-generate documents. Home pages are regarded as +custom built items, and are not created by +.B SiSU. +More accurately, +.B SiSU +has a default home page, which will not be appropriate for use with other +sites, and the means to provide your own home page instead in one of two ways +as part of a site's configuration, these being: + +.BR +1. through placing your home page and other custom built documents in the +subdirectory _sisu/home/ (this probably being the easier and more convenient +option) + +.BR +2. through providing what you want as the home page in a skin, + +.BR +Document sets are contained in directories, usually organised by site or +subject. Each directory can/should have its own homepage. See the section on +directory structure and organisation of content. +.SH HOME PAGE AND OTHER CUSTOM BUILT PAGES IN A SUB-DIRECTORY + + +.BR +Custom built pages, including the home page index.html may be placed within the +configuration directory _sisu/home/ in any of the locations that is searched +for the configuration directory, namely ./_sisu ; ~/_sisu ; /etc/sisu From +there they are copied to the root of the output directory with the command: + +.BR + sisu -CC +.SH MARKUP AND OUTPUT EXAMPLES + +.SH MARKUP EXAMPLES + + +.BR +Current markup examples and document output samples are provided off +<http://sisudoc.org> or <http://www.jus.uio.no/sisu> and in the sisu +-markup-sample package available off <http://git.sisudoc.org> + +.BR +For some documents hardly any markup at all is required at all, other than a +header, and an indication that the levels to be taken into account by the +program in generating its output are. +.SH SISU MARKUP SAMPLES + + +.BR +A few additional sample books prepared as sisu markup samples, output formats +to be generated using +.B SiSU +are contained in a separate package sisu -markup-samples. sisu -markup-samples +contains books (prepared using sisu markup), that were released by their +authors various licenses mostly different Creative Commons licences that do not +permit inclusion in the +.B Debian +Project as they have requirements that do not meet the +.B Debian +Free Software Guidelines for various reasons, most commonly that they require +that the original substantive text remain unchanged, and sometimes that the +works be used only non-commercially. + +.BR + +.I Accelerando, +Charles Stross (2005) +accelerando.charles_stross.sst + +.BR + +.I Alice's Adventures in Wonderland, +Lewis Carroll (1865) +alices_adventures_in_wonderland.lewis_carroll.sst + +.BR + +.I CONTENT, +Cory Doctorow (2008) +content.cory_doctorow.sst + +.BR + +.I Democratizing Innovation, +Eric von Hippel (2005) +democratizing_innovation.eric_von_hippel.sst + +.BR + +.I Down and Out in the Magic Kingdom, +Cory Doctorow (2003) +down_and_out_in_the_magic_kingdom.cory_doctorow.sst + +.BR + +.I For the Win, +Cory Doctorow (2010) +for_the_win.cory_doctorow.sst + +.BR + +.I Free as in Freedom - Richard Stallman's Crusade for Free Software, +Sam Williams (2002) +free_as_in_freedom.richard_stallman_crusade_for_free_software.sam_williams.sst + +.BR + +.I Free as in Freedom 2.0 - Richard Stallman and the Free Software Revolution, +Sam Williams (2002), Richard M. Stallman (2010) +free_as_in_freedom_2.richard_stallman_and_the_free_software_revolution.sam_williams.richard_stallman.sst + +.BR + +.I Free Culture - How Big Media Uses Technology and the Law to Lock Down +Culture and Control Creativity, +Lawrence Lessig (2004) +free_culture.lawrence_lessig.sst + +.BR + +.I Free For All - How Linux and the Free Software Movement Undercut the High +Tech Titans, +Peter Wayner (2002) +free_for_all.peter_wayner.sst + +.BR + +.I GNU GENERAL PUBLIC LICENSE v2, +Free Software Foundation (1991) +gpl2.fsf.sst + +.BR + +.I GNU GENERAL PUBLIC LICENSE v3, +Free Software Foundation (2007) +gpl3.fsf.sst + +.BR + +.I Gulliver's Travels, +Jonathan Swift (1726 / 1735) +gullivers_travels.jonathan_swift.sst + +.BR + +.I Little Brother, +Cory Doctorow (2008) +little_brother.cory_doctorow.sst + +.BR + +.I The Cathederal and the Bazaar, +Eric Raymond (2000) +the_cathedral_and_the_bazaar.eric_s_raymond.sst + +.BR + +.I The Public Domain - Enclosing the Commons of the Mind, +James Boyle (2008) +the_public_domain.james_boyle.sst + +.BR + +.I The Wealth of Networks - How Social Production Transforms Markets and +Freedom, +Yochai Benkler (2006) +the_wealth_of_networks.yochai_benkler.sst + +.BR + +.I Through the Looking Glass, +Lewis Carroll (1871) +through_the_looking_glass.lewis_carroll.sst + +.BR + +.I Two Bits - The Cultural Significance of Free Software, +Christopher Kelty (2008) +two_bits.christopher_kelty.sst + +.BR + +.I UN Contracts for International Sale of Goods, +UN (1980) +un_contracts_international_sale_of_goods_convention_1980.sst + +.BR + +.I Viral Spiral, +David Bollier (2008) +viral_spiral.david_bollier.sst +.SH SISU SEARCH - INTRODUCTION + + +.BR +Because the document structure of sites created is clearly defined, and the +text +.I object citation system +is available hypothetically at least, for all forms of output, it is possible +to search the sql database, and either read results from that database, or map +the results to the html or other output, which has richer text markup. + +.BR + +.B SiSU +can populate a relational sql type database with documents at an object level, +including objects numbers that are shared across different output types. Making +a document corpus searchable with that degree of granularity. Basically, your +match criteria is met by these documents and at these locations within each +document, which can be viewed within the database directly or in various output +formats. + +.BR + +.B SiSU +can populate an sql database (sqlite3 or postgresql) with documents made up of +their objects. It also can generate a cgi search form that can be used to query +the database. + +.BR +In order to use the built in search functionality you would take the following +steps. + +.BR +- use sisu to populate an sql database with with a sisu markup content + +.BR + * sqlite3 should work out of the box + +.BR + * postgresql may require some initial database configuration + +.BR +- provide a way to query the database, which sisu can assist with by + +.BR + * generating a sample ruby cgi search form, required (sisu configuration + recommended) + +.BR + * adding a query field for this search form to be added to all html files + (sisu configuration required) +.SH SQL + +.SH POPULATE THE DATABASE + + +.BR +TO populate the sql database, run sisu against a sisu markup file with one of +the following sets of flags +.nf +sisu --sqlite filename.sst +.fi + + +.BR +creates an sqlite3 database containing searchable content of just the sisu +markup document selected +.nf +sisu --sqlite --update filename.sst +.fi + + +.BR +creates an sqlite3 database containing searchable content of marked up +document(s) selected by the user from a common directory +.nf +sisu --pg --update filename.sst +.fi + + +.BR +fills a postgresql database with searchable content of marked up document(s) +selected by the user from a common directory + +.BR +For postgresql the first time the command is run in a given directory the user +will be prompted to create the requisite database, at the time of writing the +prompt sisu provides is as follows: +.nf +no connection with pg database established, you may need to run: + createdb "SiSU.7a.current" + after that don't forget to run: + sisu --pg --createall + before attempting to populate the database +.fi + + +.BR +The named database that sisu expects to find must exist and if necessary be +created using postgresql tools. If the database exist but the database tables +do not, sisu will attempt to create the tables it needs, the equivalent of the +requested sisu --pg --createall command. + +.BR +Once this is done, the sql database is populated and ready to be queried. +.SH SQL TYPE DATABASES + + +.BR + +.B SiSU +feeds sisu markup documents into sql type databases +.I PostgreSQL +[^13] and/or +.I SQLite +[^14] database together with information related to document structure. + +.BR +This is one of the more interesting output forms, as all the structural data of +the documents are retained (though can be ignored by the user of the database +should they so choose). All site texts/documents are (currently) streamed to +four tables: + +.BR + * one containing semantic (and other) headers, including, title, author, + subject, (the + .I Dublin Core. + ..); + +.BR + * another the substantive texts by individual "paragraph" (or object) - along + with structural information, each paragraph being identifiable by its + paragraph number (if it has one which almost all of them do), and the + substantive text of each paragraph quite naturally being searchable (both in + formatted and clean text versions for searching); and + +.BR + * a third containing endnotes cross-referenced back to the paragraph from + which they are referenced (both in formatted and clean text versions for + searching). + +.BR + * a fourth table with a one to one relation with the headers table contains + full text versions of output, eg. pdf, html, xml, and + .I ascii. + +.BR +There is of course the possibility to add further structures. + +.BR +At this level +.B SiSU +loads a relational database with documents chunked into objects, their smallest +logical structurally constituent parts, as text objects, with their object +citation number and all other structural information needed to construct the +document. Text is stored (at this text object level) with and without +elementary markup tagging, the stripped version being so as to facilitate ease +of searching. + +.BR +Being able to search a relational database at an object level with the +.B SiSU +citation system is an effective way of locating content generated by +.B SiSU. +As individual text objects of a document stored (and indexed) together with +object numbers, and all versions of the document have the same numbering, +complex searches can be tailored to return just the locations of the search +results relevant for all available output formats, with live links to the +precise locations in the database or in html/xml documents; or, the structural +information provided makes it possible to search the full contents of the +database and have headings in which search content appears, or to search only +headings etc. (as the +.I Dublin Core +is incorporated it is easy to make use of that as well). +.SH POSTGRESQL + +.SH NAME + + +.BR + +.B SiSU +- Structured information, Serialized Units - a document publishing system, +postgresql dependency package +.SH DESCRIPTION + + +.BR +Information related to using postgresql with sisu (and related to the +sisu_postgresql dependency package, which is a dummy package to install +dependencies needed for +.B SiSU +to populate a postgresql database, this being part of +.B SiSU +- man sisu) . +.SH SYNOPSIS + + +.BR + sisu -D [instruction] [filename/wildcard if required] + +.BR + sisu -D --pg --[instruction] [filename/wildcard if required] +.SH COMMANDS + + +.BR +Mappings to two databases are provided by default, postgresql and sqlite, the +same commands are used within sisu to construct and populate databases however +-d (lowercase) denotes sqlite and -D (uppercase) denotes postgresql, +alternatively --sqlite or --pgsql may be used + +.BR + +.B -D or --pgsql +may be used interchangeably. +.SH CREATE AND DESTROY DATABASE + +.TP +.B --pgsql --createall +initial step, creates required relations (tables, indexes) in existing +(postgresql) database (a database should be created manually and given the same +name as working directory, as requested) (rb.dbi) +.TP +.B sisu -D --createdb +creates database where no database existed before +.TP +.B sisu -D --create +creates database tables where no database tables existed before +.TP +.B sisu -D --Dropall +destroys database (including all its content)! kills data and drops tables, +indexes and database associated with a given directory (and directories of the +same name). +.TP +.B sisu -D --recreate +destroys existing database and builds a new empty database structure +.SH IMPORT AND REMOVE DOCUMENTS + +.TP +.B sisu -D --import -v [filename/wildcard] +populates database with the contents of the file. Imports documents(s) +specified to a postgresql database (at an object level). +.TP +.B sisu -D --update -v [filename/wildcard] +updates file contents in database +.TP +.B sisu -D --remove -v [filename/wildcard] +removes specified document from postgresql database. +.SH SQLITE + +.SH NAME + + +.BR + +.B SiSU +- Structured information, Serialized Units - a document publishing system. +.SH DESCRIPTION + + +.BR +Information related to using sqlite with sisu (and related to the sisu_sqlite +dependency package, which is a dummy package to install dependencies needed for +.B SiSU +to populate an sqlite database, this being part of +.B SiSU +- man sisu) . +.SH SYNOPSIS + + +.BR + sisu -d [instruction] [filename/wildcard if required] + +.BR + sisu -d --(sqlite|pg) --[instruction] [filename/wildcard if required] +.SH COMMANDS + + +.BR +Mappings to two databases are provided by default, postgresql and sqlite, the +same commands are used within sisu to construct and populate databases however +-d (lowercase) denotes sqlite and -D (uppercase) denotes postgresql, +alternatively --sqlite or --pgsql may be used + +.SH CREATE AND DESTROY DATABASE + +.TP +.B --sqlite --createall +initial step, creates required relations (tables, indexes) in existing (sqlite) +database (a database should be created manually and given the same name as +working directory, as requested) (rb.dbi) +.TP +.B sisu -d --createdb +creates database where no database existed before +.TP +.B sisu -d --create +creates database tables where no database tables existed before +.TP +.B sisu -d --dropall +destroys database (including all its content)! kills data and drops tables, +indexes and database associated with a given directory (and directories of the +same name). +.TP +.B sisu -d --recreate +destroys existing database and builds a new empty database structure +.SH IMPORT AND REMOVE DOCUMENTS + +.TP +.B sisu -d --import -v [filename/wildcard] +populates database with the contents of the file. Imports documents(s) +specified to an sqlite database (at an object level). +.TP +.B sisu -d --update -v [filename/wildcard] +updates file contents in database +.TP +.B sisu -d --remove -v [filename/wildcard] +removes specified document from sqlite database. +.SH CGI SEARCH FORM + + +.BR +For the search form, which is a single search page + +.BR +- configure the search form + +.BR +- generate the sample search form with the sisu command, (this will be based on +the configuration settings and existing found sisu databases) + +.BR +For postgresql web content you may need to edit the search cgi script. Two +things to look out for are that the user is set as needed, and that the any +different databases that you wish to be able to query are listed. + +.BR +correctly, you may want www-data rather than your username. +.nf +@user='www-data' +.fi + + +.BR +- check the search form, copy it to the appropriate cgi directory and set the +correct permissions + +.BR +For a search form to appear on each html page, you need to: + +.BR +- rely on the above mentioned configuration of the search form + +.BR +- configure the html search form to be on + +.BR +- run the html command +.SH SETUP SEARCH FORM + + +.BR +You will need a web server, httpd with cgi enabled, and a postgresql database +to which you are able to create databases. + +.BR +Setup postgresql, make sure you are able to create and write to the database, +e.g.: +.nf +sudo su postgres + createuser -d -a ralph +.fi + + +.BR +You then need to create the database that sisu will use, for sisu manual in the +directory manual/en for example, (when you try to populate a database that does +not exist sisu prompts as to whether it exists): +.nf +createdb SiSU.7a.manual +.fi + + +.BR + +.B SiSU +is then able to create the required tables that allow you to populate the +database with documents in the directory for which it has been created: +.nf +sisu --pg --createall -v +.fi + + +.BR +You can then start to populate the database, in this example with a single +document: +.nf +sisu --pg --update -v en/sisu_manual.ssm +.fi + + +.BR +To create a sample search form, from within the same directory run: +.nf +sisu --sample-search-form --db-pg +.fi + + +.BR +and copy the resulting cgi form to your cgi-bin directory + +.BR +A sample setup for nginx is provided that assumes data will be stored under +/srv/www and cgi scripts under /srv/cgi +.SH SEARCH - DATABASE FRONTEND SAMPLE, UTILISING DATABASE AND SISU FEATURES, +INCLUDING OBJECT CITATION NUMBERING (BACKEND CURRENTLY POSTGRESQL) + + +.BR +Sample search frontend <http://search.sisudoc.org> [^15] A small database and +sample query front-end (search from) that makes use of the citation system, .I +object citation numbering +to demonstrates functionality.[^16] + +.BR + +.B SiSU +can provide information on which documents are matched and at what locations +within each document the matches are found. These results are relevant across +all outputs using +.I object citation numbering, +which includes html, +.I XML, +.I EPUB, +.I LaTeX, +.I PDF +and indeed the +.I SQL +database. You can then refer to one of the other outputs or in the +.I SQL +database expand the text within the matched objects (paragraphs) in the +documents matched. + +.BR +Note you may set results either for documents matched and object number +locations within each matched document meeting the search criteria; or display +the names of the documents matched along with the objects (paragraphs) that +meet the search criteria.[^17] +.TP +.B sisu -F --webserv-webrick +builds a cgi web search frontend for the database created + +.BR +The following is feedback on the setup on a machine provided by the help +command: + +.BR + sisu --help sql +.nf +Postgresql + user: ralph + current db set: SiSU_sisu + port: 5432 + dbi connect: DBI:Pg:database=SiSU_sisu;port=5432 + +sqlite + current db set: /home/ralph/sisu_www/sisu/sisu_sqlite.db + dbi connect DBI:SQLite:/home/ralph/sisu_www/sisu/sisu_sqlite.db +.fi + +.BR +Note on databases built + +.BR +By default, [unless otherwise specified] databases are built on a directory +basis, from collections of documents within that directory. The name of the +directory you choose to work from is used as the database name, i.e. if you are +working in a directory called /home/ralph/ebook the database SiSU_ebook is +used. [otherwise a manual mapping for the collection is necessary] + +.SH SEARCH FORM + +.TP +.B sisu -F +generates a sample search form, which must be copied to the web-server cgi +directory +.TP +.B sisu -F --webserv-webrick +generates a sample search form for use with the webrick server, which must be +copied to the web-server cgi directory +.TP +.B sisu -W +starts the webrick server which should be available wherever sisu is properly +installed + +.BR +The generated search form must be copied manually to the webserver directory as +instructed +.SH SISU_WEBRICK + +.SH NAME + + +.BR + +.B SiSU +- Structured information, Serialized Units - a document publishing system +.SH SYNOPSIS + + +.BR +sisu_webrick [port] + +.BR +or + +.BR +sisu -W [port] +.SH DESCRIPTION + + +.BR +sisu_webrick is part of +.B SiSU +(man sisu) sisu_webrick starts +.B Ruby +' s Webrick web-server and points it to the directories to which +.B SiSU +output is written, providing a list of these directories (assuming +.B SiSU +is in use and they exist). + +.BR +The default port for sisu_webrick is set to 8081, this may be modified in the +yaml file: ~/.sisu/sisurc.yml a sample of which is provided as +/etc/sisu/sisurc.yml (or in the equivalent directory on your system). +.SH SUMMARY OF MAN PAGE + + +.BR +sisu_webrick, may be started on it's own with the command: sisu_webrick [port] +or using the sisu command with the -W flag: sisu -W [port] + +.BR +where no port is given and settings are unchanged the default port is 8081 +.SH DOCUMENT PROCESSING COMMAND FLAGS + + +.BR +sisu -W [port] starts +.B Ruby +Webrick web-server, serving +.B SiSU +output directories, on the port provided, or if no port is provided and the +defaults have not been changed in ~/.sisu/sisurc.yaml then on port 8081 +.SH SUMMARY OF FEATURES + + +.BR +- sparse/minimal markup (clean utf-8 source texts). Documents are prepared in a +single +.I UTF-8 +file using a minimalistic mnemonic syntax. Typical literature, documents like +"War and Peace" require almost no markup, and most of the headers are optional. + +.BR +- markup is easily readable/parsable by the human eye, (basic markup is simpler +and more sparse than the most basic +.I HTML +) , [this may also be converted to +.I XML +representations of the same input/source document]. + +.BR +- markup defines document structure (this may be done once in a header +pattern-match description, or for heading levels individually); basic text +attributes (bold, italics, underscore, strike-through etc.) as required; and +semantic information related to the document (header information, extended +beyond the Dublin core and easily further extended as required); the headers +may also contain processing instructions. +.B SiSU +markup is primarily an abstraction of document structure and document metadata +to permit taking advantage of the basic strengths of existing alternative +practical standard ways of representing documents [be that browser viewing, +paper publication, sql search etc.] (html, epub, xml, odf, latex, pdf, sql) + +.BR +- for output produces reasonably elegant output of established industry and +institutionally accepted open standard formats.[3] takes advantage of the +different strengths of various standard formats for representing documents, +amongst the output formats currently supported are: + +.BR +* +.I HTML +- both as a single scrollable text and a segmented document + +.BR +* +.I XHTML + +.BR +* +.I EPUB + +.BR +* +.I XML +- both in sax and dom style xml structures for further development as required + +.BR +* +.I ODT +- Open Document Format text, the iso standard for document storage + +.BR +* +.I LaTeX +- used to generate pdf + +.BR +* +.I PDF +(via +.I LaTeX +) + +.BR +* +.I SQL +- population of an sql database ( +.I PostgreSQL +or +.I SQLite +) , (at the same object level that is used to cite text within a document) + +.BR +Also produces: concordance files; document content certificates (md5 or sha256 +digests of headings, paragraphs, images etc.) and html manifests (and sitemaps +of content). (b) takes advantage of the strengths implicit in these very +different output types, (e.g. PDFs produced using typesetting of +.I LaTeX, +databases populated with documents at an individual object/paragraph level, +making possible +.I granular search +(and related possibilities)) + +.BR +- ensuring content can be cited in a meaningful way regardless of selected +output format. Online publishing (and publishing in multiple document formats) +lacks a useful way of citing text internally within documents (important to +academics generally and to lawyers) as page numbers are meaningless across +browsers and formats. sisu seeks to provide a common way of pinpoint the text +within a document, (which can be utilized for citation and by search engines). +The outputs share a common numbering system that is meaningful (to man and +machine) across all digital outputs whether paper, screen, or database +oriented, (pdf, +.I HTML, +.I EPUB, +xml, sqlite, postgresql) , this numbering system can be used to reference +content. + +.BR +- Granular search within documents. +.I SQL +databases are populated at an object level (roughly headings, paragraphs, +verse, tables) and become searchable with that degree of granularity, the +output information provides the object/paragraph numbers which are relevant +across all generated outputs; it is also possible to look at just the matching +paragraphs of the documents in the database; [output indexing also work well +with search indexing tools like hyperestraier]. + +.BR +- long term maintainability of document collections in a world of changing +formats, having a very sparsely marked-up source document base. there is a +considerable degree of future-proofing, output representations are +"upgradeable", and new document formats may be added. e.g. addition of odf +(open document text) module in 2006, epub in 2009 and in future html5 output +sometime in future, without modification of existing prepared texts + +.BR +* +.I SQL +search aside, documents are generated as required and static once generated. + +.BR +- documents produced are static files, and may be batch processed, this needs +to be done only once but may be repeated for various reasons as desired +(updated content, addition of new output formats, updated technology document +presentations/representations) + +.BR +- document source ( +.I plaintext +utf-8) if shared on the net may be used as input and processed locally to +produce the different document outputs + +.BR +- document source may be bundled together (automatically) with associated +documents (multiple language versions or master document with inclusions) and +images and sent as a zip file called a sisupod, if shared on the net these too +may be processed locally to produce the desired document outputs + +.BR +- generated document outputs may automatically be posted to remote sites. + +.BR +- for basic document generation, the only software dependency is +.B Ruby, +and a few standard Unix tools (this covers +.I plaintext, +.I HTML, +.I EPUB, +.I XML, +.I ODF, +.I LaTeX +) . To use a database you of course need that, and to convert the +.I LaTeX +generated to pdf, a latex processor like tetex or texlive. + +.BR +- as a developers tool it is flexible and extensible + +.BR +Syntax highlighting for +.B SiSU +markup is available for a number of text editors. + +.BR + +.B SiSU +is less about document layout than about finding a way with little markup to be +able to construct an abstract representation of a document that makes it +possible to produce multiple representations of it which may be rather +different from each other and used for different purposes, whether layout and +publishing, or search of content + +.BR +i.e. to be able to take advantage from this minimal preparation starting point +of some of the strengths of rather different established ways of representing +documents for different purposes, whether for search (relational database, or +indexed flat files generated for that purpose whether of complete documents, or +say of files made up of objects), online viewing (e.g. html, xml, pdf) , or +paper publication (e.g. pdf) ... + +.BR +the solution arrived at is by extracting structural information about the +document (about headings within the document) and by tracking objects (which +are serialized and also given hash values) in the manner described. It makes +possible representations that are quite different from those offered at +present. For example objects could be saved individually and identified by +their hashes, with an index of how the objects relate to each other to form a +document. +.TP +.BI *1. +square brackets + +.BR +.TP +.BI *2. +square brackets + +.BR +.TP +.BI +1. +square brackets + +.BR +.TP +.BI 1. +<http://www.jus.uio.no/sisu/man/> + +.BR +.TP +.BI 2. +<http://www.jus.uio.no/sisu/man/sisu.1.html> + +.BR +.TP +.BI 3. +From sometime after SiSU 0.58 it should be possible to describe SiSU markup +using SiSU, which though not an original design goal is useful. + +.BR +.TP +.BI 4. +files should be prepared using UTF-8 character encoding + +.BR +.TP +.BI 5. +a footnote or endnote + +.BR +.TP +.BI 6. +self contained endnote marker & endnote in one + +.BR +.TP +.BI *. +unnumbered asterisk footnote/endnote, insert multiple asterisks if required + +.BR +.TP +.BI **. +another unnumbered asterisk footnote/endnote + +.BR +.TP +.BI *3. +editors notes, numbered asterisk footnote/endnote series + +.BR +.TP +.BI +2. +editors notes, numbered plus symbol footnote/endnote series + +.BR +.TP +.BI 7. +<http://www.sisudoc.org/> + +.BR +.TP +.BI 8. +<http://www.ruby-lang.org/en/> + +.BR +.TP +.BI 9. +Table from the Wealth of Networks by Yochai Benkler +<http://www.jus.uio.no/sisu/the_wealth_of_networks.yochai_benkler> + +.BR +.TP +.BI 10. +for which you may alternatively use the full form author: title: and year: + +.BR +.TP +.BI 11. +Quixote and Panza, Taming Windmills (1605), pp 1000 - 1001 also, Benkler, Wealth of Networks (2006), p 1 + +.BR +.TP +.BI 12. +SiSU has worked this way in the past, though this was dropped as it was +thought the complexity outweighed the flexibility, however, the balance was +rather fine and this behaviour could be reinstated. + +.BR +.TP +.BI 13. +<http://www.postgresql.org/> <http://advocacy.postgresql.org/> +<http://en.wikipedia.org/wiki/Postgresql> + +.BR +.TP +.BI 14. +<http://www.hwaci.com/sw/sqlite/> <http://en.wikipedia.org/wiki/Sqlite> + +.BR +.TP +.BI 15. +<http://search.sisudoc.org> + +.BR +.TP +.BI 16. +(which could be extended further with current back-end). As regards scaling +of the database, it is as scalable as the database (here Postgresql) and +hardware allow. + +.BR +.TP +.BI 17. +of this feature when demonstrated to an IBM software innovations evaluator +in 2004 he said to paraphrase: this could be of interest to us. We have large +document management systems, you can search hundreds of thousands of documents +and we can tell you which documents meet your search criteria, but there is no +way we can tell you without opening each document where within each your +matches are found. + +.BR + +.TP +.SH SEE ALSO + sisu(1), + sisu-epub(1), + sisu-harvest(1), + sisu-html(1), + sisu-odf(1), + sisu-pdf(1), + sisu-pg(1), + sisu-sqlite(1), + sisu-txt(1). + sisu_vim(7) +.TP +.SH HOMEPAGE + More information about SiSU can be found at <http://www.sisudoc.org/> or <http://www.jus.uio.no/sisu/> +.TP +.SH SOURCE + <http://git.sisudoc.org/> +.TP +.SH AUTHOR + SiSU is written by Ralph Amissah <ralph@amissah.com> +#+END_SRC diff --git a/org/spine_info.org b/org/spine_info.org index 04b91a6..3dc942c 100644 --- a/org/spine_info.org +++ b/org/spine_info.org @@ -16,7 +16,7 @@ * README -#+BEGIN_SRC txt :tangle "../README" +#+BEGIN_SRC txt :NO-tangle "../README" project_name: Spine, Doc Reform description: [ "documents, structuring, processing, publishing", @@ -45,46 +45,46 @@ project_name: Spine, Doc Reform *** project (project root) ./ #+BEGIN_SRC txt :tangle "../COPYRIGHT" -<<spine_copyright>> -<<spine_license_agpl3>> -<<spine_extra>> +<<sisu_spine_copyright>> +<<sisu_spine_license_agpl3>> +<<sisu_spine_extra>> #+END_SRC *** code source ./src #+BEGIN_SRC txt :tangle "../src/COPYRIGHT" -<<spine_copyright>> -<<spine_license_agpl3>> -<<spine_extra>> +<<sisu_spine_copyright>> +<<sisu_spine_license_agpl3>> +<<sisu_spine_extra>> #+END_SRC *** code source ./src/spine #+BEGIN_SRC txt :tangle "../src/doc_reform/COPYRIGHT" -<<spine_copyright>> -<<spine_license_agpl3>> -<<spine_extra>> +<<sisu_spine_copyright>> +<<sisu_spine_license_agpl3>> +<<sisu_spine_extra>> #+END_SRC *** org files ./org #+BEGIN_SRC txt :tangle "./COPYRIGHT" -<<spine_copyright>> -<<spine_extra>> +<<sisu_spine_copyright>> +<<sisu_spine_extra>> #+END_SRC -*** data (markup samples) ./data +*** doc (markup samples) ./doc -#+BEGIN_SRC txt :tangle "../data/COPYRIGHT" +#+BEGIN_SRC txt :NO-tangle "../doc/COPYRIGHT" <<data>> #+END_SRC ** incorporate *** copyright -#+NAME: spine_copyright +#+NAME: sisu_spine_copyright #+BEGIN_SRC txt -- Name: Spine, Doc Reform +- Name: spine - SiSU Spine, Doc Reform - Description: documents, structuring, processing, publishing, search - static content generator @@ -96,10 +96,11 @@ project_name: Spine, Doc Reform #+END_SRC *** license +**** AGPLv3 -#+NAME: spine_license_agpl3 +#+NAME: sisu_spine_license_agpl3 #+BEGIN_SRC txt - - code under src/ + - code under src/ & org/ - License: AGPL 3 or later: Spine, Doc Reform (SiSU), a framework for document structuring, publishing and @@ -129,27 +130,33 @@ project_name: Spine, Doc Reform *** tail -#+NAME: spine_extra +#+NAME: sisu_spine_extra #+BEGIN_SRC txt - Spine, Doc Reform (related to SiSU) uses standard: - - docReform markup syntax + - docReform markup syntax (based on SiSU markup) - standard SiSU markup syntax with modified headers and minor modifications - - docReform object numbering - - standard SiSU object citation numbering & system + - docReform object numbering (based on SiSU object citation numbering) + - standard SiSU document object numbering - Hompages: + [http://www.sisudoc.org] + + +#+END_SRC + +#+BEGIN_SRC txt + - Hompages: [http://www.doc_reform.org] [http://www.sisudoc.org] - Git - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary] - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/html.rb;hb=HEAD] - + [] #+END_SRC -*** data -#+NAME: spine_extra +*** doc + +#+NAME: sisu_spine_extra #+BEGIN_SRC txt - Spine, Doc Reform (SiSU) markup samples Individual document content Copyright (Author) [as stated in document header] diff --git a/org/util_cgi_d_sqlite_search.org b/org/util_cgi_d_sqlite_search.org index 43844a5..07a6d15 100644 --- a/org/util_cgi_d_sqlite_search.org +++ b/org/util_cgi_d_sqlite_search.org @@ -23,7 +23,7 @@ dub --force --compiler=ldc2 && sudo cp -v cgi-bin/spine-search /usr/lib/cgi-bin/ ** 0. set program tangle -#+BEGIN_SRC d :tangle "../util/d/cgi/search/src/spine_cgi_sqlite_search.d" +#+BEGIN_SRC d :tangle "../misc/util/d/cgi/search/src/spine_cgi_sqlite_search.d" <<cgi_sqlite_head>> <<cgi_sqlite_imports>> void cgi_function_intro(Cgi cgi) { @@ -1134,8 +1134,8 @@ LIMIT %s OFFSET %s * cgi-search dub.sdl -#+BEGIN_SRC d :tangle "../util/d/cgi/search/dub.sdl" -name "spine-search" +#+BEGIN_SRC d :tangle "../misc/util/d/cgi/search/dub.sdl" +name "spine_search" description "A minimal D application." authors "ralph" copyright "Copyright © 2020, ralph" @@ -1164,7 +1164,7 @@ configuration "default" { * cgi-search README -#+BEGIN_SRC text :tangle "../util/d/cgi/search/README" +#+BEGIN_SRC text :tangle "../misc/util/d/cgi/search/README" change db name to match name of db you create cv.db_selected = "spine.search.sql.db"; diff --git a/org/util_cgi_rb_fcgi_sqlite_search.org b/org/util_cgi_rb_fcgi_sqlite_search.org index 3460b32..124ac25 100644 --- a/org/util_cgi_rb_fcgi_sqlite_search.org +++ b/org/util_cgi_rb_fcgi_sqlite_search.org @@ -31,7 +31,7 @@ document home currently at /var/www/html (better placed at /var/www or srv/proje *** sqlite cgi search TODO **** head -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" :tangle-mode (identity #o755) :shebang #!/usr/bin/env ruby +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" :tangle-mode (identity #o755) :shebang #!/usr/bin/env ruby =begin * Name: SiSU information Structuring Universe * Author: Ralph Amissah @@ -91,7 +91,7 @@ document home currently at /var/www/html (better placed at /var/www or srv/proje **** initialize -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" begin require 'cgi' require 'fcgi' @@ -117,7 +117,7 @@ end **** form ***** initialize form -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" class Form def initialize(base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') search_note = '' if checked_searched !~ /\S/ @@ -133,7 +133,7 @@ class Form ***** submission form -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" def submission_form search_form =<<-WOK <!DOCTYPE html> @@ -218,7 +218,7 @@ end **** search request ***** initialize request -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" class SearchRequest #% search_for attr_accessor :text1,:fulltext,:keywords,:title,:author,:topic_register,:subject,:description,:publisher,:editor,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:src_filename_base def initialize(search_field='',q='') @@ -293,7 +293,7 @@ class SearchRequest #% sea ***** text to match -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" def text_to_match(identifier='') m={ string: /#{identifier}\s*(.+?)/, @@ -318,7 +318,7 @@ end **** search string -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" class DBI_SearchString def initialize(l,t,q,cse=false) @l,@t,@q=l,t,q @@ -346,7 +346,7 @@ end **** search statement ***** init -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" class DBI_SearchStatement attr_reader :text_search_flag,:sql_select_body_format,:sql_offset,:sql_limit def initialize(conn,search_for,q,c) @@ -453,7 +453,7 @@ class DBI_SearchStatement ***** misc -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" def sql_offset @@offset end @@ -540,7 +540,7 @@ class DBI_SearchStatement ***** select statement -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" def sql_select_body limit ||= @@limit offset ||= @@offset @@ -565,7 +565,7 @@ end **** tail -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" def tail <<-'WOK' <br /><hr /><br /> @@ -673,7 +673,7 @@ end **** fcgi each ***** init & start loop -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" @tail=tail @counter_txt_doc,@counter_txt_ocn,@counter_endn_doc,@counter_endn_ocn=0,0,0,0 @counters_txt,@counters_endn,@sql_select_body='','','' @@ -748,7 +748,7 @@ FCGI.each_cgi do |cgi| ***** canned search -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" #Canned_search.new(@base,@search_for.text1,cgi) if @search_for.text1=~/\S+/ or @search_for.fulltext=~/\S+/ or @search_for.author=~/\S+/ or @search_for.topic_register=~/\S+/ #and search_field =~/\S/ s1 = 's1=' + CGI.escape(@search_for.text1) if @search_for.text1 =~ /\S/ @@ -849,7 +849,7 @@ FCGI.each_cgi do |cgi| ***** submission form -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" @header = Form.new(@base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form unless q['s1'] =~/\S/ or q['au'] =~/\S/ or @search[:text][1] =~/\S/ print "Content-type: text/html\n\n" @@ -944,7 +944,7 @@ FCGI.each_cgi do |cgi| ***** contents each, text body -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" #% text_objects_body s_contents.each do |c| #% text body location=c['src_filename_base'][/(.+?)\.(?:ssm\.sst|sst)$/,1] @@ -1040,7 +1040,7 @@ FCGI.each_cgi do |cgi| ***** after -#+BEGIN_SRC ruby :tangle "../util/rb/cgi/spine.search.cgi" +#+BEGIN_SRC ruby :tangle "../misc/util/rb/cgi/spine.search.cgi" olduid = "" offset=dbi_statement.sql_offset.to_s limit=dbi_statement.sql_match_limit.to_s diff --git a/src/COPYRIGHT b/src/COPYRIGHT index 8cba1e7..05e171f 100644 --- a/src/COPYRIGHT +++ b/src/COPYRIGHT @@ -1,4 +1,4 @@ -- Name: Spine, Doc Reform +- Name: spine - SiSU Spine, Doc Reform - Description: documents, structuring, processing, publishing, search - static content generator @@ -7,7 +7,7 @@ - Copyright: (C) 2015 - 2020 Ralph Amissah - - code under src/ + - code under src/ & org/ - License: AGPL 3 or later: Spine, Doc Reform (SiSU), a framework for document structuring, publishing and @@ -34,19 +34,15 @@ [http://www.gnu.org/licenses/agpl.html] - Spine, Doc Reform (related to SiSU) uses standard: - - docReform markup syntax + - docReform markup syntax (based on SiSU markup) - standard SiSU markup syntax with modified headers and minor modifications - - docReform object numbering - - standard SiSU object citation numbering & system + - docReform object numbering (based on SiSU object citation numbering) + - standard SiSU document object numbering - Hompages: - [http://www.doc_reform.org] [http://www.sisudoc.org] - - Git - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary] - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/html.rb;hb=HEAD] - Spine, Doc Reform (SiSU) markup samples Individual document content Copyright (Author) [as stated in document header] diff --git a/src/doc_reform/COPYRIGHT b/src/doc_reform/COPYRIGHT index 8cba1e7..05e171f 100644 --- a/src/doc_reform/COPYRIGHT +++ b/src/doc_reform/COPYRIGHT @@ -1,4 +1,4 @@ -- Name: Spine, Doc Reform +- Name: spine - SiSU Spine, Doc Reform - Description: documents, structuring, processing, publishing, search - static content generator @@ -7,7 +7,7 @@ - Copyright: (C) 2015 - 2020 Ralph Amissah - - code under src/ + - code under src/ & org/ - License: AGPL 3 or later: Spine, Doc Reform (SiSU), a framework for document structuring, publishing and @@ -34,19 +34,15 @@ [http://www.gnu.org/licenses/agpl.html] - Spine, Doc Reform (related to SiSU) uses standard: - - docReform markup syntax + - docReform markup syntax (based on SiSU markup) - standard SiSU markup syntax with modified headers and minor modifications - - docReform object numbering - - standard SiSU object citation numbering & system + - docReform object numbering (based on SiSU object citation numbering) + - standard SiSU document object numbering - Hompages: - [http://www.doc_reform.org] [http://www.sisudoc.org] - - Git - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=summary] - [http://git.sisudoc.org/gitweb/?p=code/sisu.git;a=blob;f=lib/sisu/html.rb;hb=HEAD] - Spine, Doc Reform (SiSU) markup samples Individual document content Copyright (Author) [as stated in document header] diff --git a/util/d/cgi/search/dub.selections.json b/util/d/cgi/search/dub.selections.json deleted file mode 100644 index e6e1b26..0000000 --- a/util/d/cgi/search/dub.selections.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "fileVersion": 1, - "versions": { - "d2sqlite3": "0.18.3", - "dcgi": "0.1.0" - } -} diff --git a/util/d/cgi/search/localhostsqlitespine.search.sql.db b/util/d/cgi/search/localhostsqlitespine.search.sql.db deleted file mode 100644 index e69de29..0000000 --- a/util/d/cgi/search/localhostsqlitespine.search.sql.db +++ /dev/null diff --git a/util/rb/cgi/search.cgi b/util/rb/cgi/search.cgi deleted file mode 100755 index 0fe8ea5..0000000 --- a/util/rb/cgi/search.cgi +++ /dev/null @@ -1,937 +0,0 @@ -#!/usr/bin/env ruby -=begin - * Name: SiSU information Structuring Universe - * Author: Ralph Amissah - * http://www.jus.uio.no/sisu - * http://www.jus.uio.no/sisu/SiSU/download - - * Description: generates naive cgi search form for search of sisu database (sqlite) - * Name: SiSU generated sample cgi search form - - * Description: generated sample cgi search form for SiSU - (SiSU is a framework for document structuring, publishing and search) - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2014, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see <http://www.gnu.org/licenses/>. - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - <http://www.fsf.org/licenses/gpl.html> - <http://www.gnu.org/licenses/gpl.html> - <http://www.jus.uio.no/sisu/gpl.fsf> - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - <http://www.jus.uio.no/sisu> - <http://www.sisudoc.org> - - * Ralph Amissah - <ralph@amissah.com> - <ralph.amissah@gmail.com> - -=end - begin - require 'cgi' - require 'fcgi' - require 'sqlite3' - rescue LoadError - puts 'cgi, fcgi or sqlite3 NOT FOUND (LoadError)' - end - @stub_default='manual' - @image_src="http://#{ENV['HTTP_HOST']}/_sisu/image_sys" - @hosturl_cgi="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}" - @hosturl_files="http://#{ENV['HTTP_HOST']}" - @output_dir_structure_by='filetype' - @lingual='multi' - @db_name_prefix='SiSU.7a.' - @base="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}#{ENV['SCRIPT_NAME']}" -#Common TOP - @@offset=0 - @@canned_search_url=@base - @color_heading='#DDFFAA' - @color_match='#ffff48' - class Form - def initialize(base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') - search_note='' if checked_searched !~/\S/ - the_can='' if checked_url !~/\S/ - search_field='' if checked_echo !~/\S/ - @base,@search_field,@selected_db,@result_type,@checked_sql_limit,@checked_tip,@checked_stats,@checked_searched,@checked_url,@checked_case,@checked_echo,@checked_sql,@checked_all,@checked_none,@checked_selected,@checked_default,@search_note,@the_can=base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can - @tip=if checked_tip =~/\S/ - '<font size="2" color="#666666">text:__; fulltxt:__; keywords:__; title:__; author:__; topic_register:__; subject:__; description:__; publisher:__; editor:__; contributor:__; date:__; type:__; format:__; identifier:__; source:__; language:__; relation:__; coverage:__; rights:__; comment:__; abstract:__; filename:__;</font><br />' - else '' - end - end - def submission_form - search_form=<<-WOK - <!DOCTYPE html> - <html> - <head> - <title> - <meta charset="utf-8"> - <meta name="sourcefile" content="SiSU._sst" /> - SiSU search form (sample): SiSU information Structuring Universe - </title> - <link rel="generator" href="http://www.jus.uio.no/sisu" /> - <link rel="shortcut icon" href="http://#{ENV['HTTP_HOST']}/_sisu/image_sys/rb7.ico" /> - <link href="../_sisu/css/html.css" rel="stylesheet"> - </head> - <body lang="en" xml:lang="en"> - <table summary="band" border="0" cellpadding="3" cellspacing="0"> - <tr><td width="20%"> - <table summary="home button / home information" border="0" cellpadding="3" cellspacing="0"> - <tr><td align="left"> - <br /><a href="http://sisudoc.org/" target="_top"> - <b>SiSU</b> - </a> - <br /><a href="http://git.sisudoc.org/" target="_top"> - git - </a> - </td></tr> - </table> - </td> - <td> - <label for="find"><b>SiSU (generated sample) search form (content organised by filetype)</b></label> - </td></tr> - </table> - <form action="#{@base}" id="Test Form" method="post"> - <table cellpadding="2"> - <tr><td valign=\"top\"> - <textarea id="find" name="find" type="text" rows="6" cols="40" maxlength="256">#{@search_field}</textarea> - </td> - <td valign=\"top\"> - #{@tip} - #{@search_note} - #{@the_can} - </td></tr></table> - <td valign=\"top\"><tr><td> - <!input type="text" id="find" name="find" value="#{@search_field}" /> - <!input type="text" id="find" name="find" value="" /> - <font size="2" color="#222222"> - <b>to search:</b> select which database to search (drop-down menu below); enter your search query (in the form above); and <b>click on the search button</b> (below) - <br /> - <select name="db" size="1"> - #{@selected_db} - <option value="SiSU.7a.manual">manual</option> - </select> - <input type="submit" value="SiSU search" /> - <input type="radio" name="view" value="index" #{@result_type[:index]}> index - <input type="radio" name="view" value="text" #{@result_type[:text]}> text / grep - <br /> - match limit: - <input type="radio" name="sql_match_limit" value="1000" #{@checked_sql_limit[:l1000]}> 1,000 - <input type="radio" name="sql_match_limit" value="2500" #{@checked_sql_limit[:l2500]}> 2,500 - <br /> - <input type="checkbox" name="echo" #{@checked_echo}> echo query - <input type="checkbox" name="stats" #{@checked_stats}> result stats - <input type="checkbox" name="url" #{@checked_url}> search url - <input type="checkbox" name="searched" #{@checked_searched}> searched - <input type="checkbox" name="tip" #{@checked_tip}> available fields - <input type="checkbox" name="sql" #{@checked_sql}> sql statement - <br /> - checks: - <input type="radio" name="checks" value="check_default" #{@checked_default}> default - <input type="radio" name="checks" value="check_selected" #{@checked_selected}> selected - <input type="radio" name="checks" value="check_all" #{@checked_all}> all - <input type="radio" name="checks" value="check_none" #{@checked_none}> none - </font> - </td></tr> - </table> - </form> - WOK - end - end - class SearchRequest #% search_for - attr_accessor :text1,:fulltext,:keywords,:title,:author,:topic_register,:subject,:description,:publisher,:editor,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:filename - def initialize(search_field='',q='') - @search_field,@q=search_field,q - @text1=@fulltext=@keywords=@title=@author=@topic_register=@subject=@description=@publisher=@editor=@contributor=@date=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@comment=@abstract=@owner=@date_created=@date_issued=@date_modified=@date_available=@date_valid=@filename='' - if @search_field=~/\S/ - @text1=text_to_match('text:') - @fulltext=text_to_match('fulltxt:') - @topic_register=text_to_match('topic_register:') - @title=text_to_match('title:') # DublinCore 1 - title - @author=text_to_match('(?:author|creator)s?:') # DublinCore 2 - creator/author - @subject=text_to_match('subj(?:ect)?:') # DublinCore 3 - subject - @description=text_to_match('description:') # DublinCore 4 - description - @publisher=text_to_match('pub(?:lisher)?:') # DublinCore 5 - publisher - @editor=text_to_match('editor:') - @contributor=text_to_match('contributor:') # DublinCore 6 - contributor - @date=text_to_match('date:') # DublinCore 7 - date dd-mm-yy - @type=text_to_match('type:') # DublinCore 8 - type - @format=text_to_match('format:') # DublinCore 9 - format - @identifier=text_to_match('identifier:') # DublinCore 10 - identifier - @source=text_to_match('source:') # DublinCore 11 - source - @language=text_to_match('language:') # DublinCore 12 - language - @relation=text_to_match('relation:') # DublinCore 13 - relation - @coverage=text_to_match('coverage:') # DublinCore 14 - coverage - @rights=text_to_match('rights:') # DublinCore 15 - rights - @keywords=text_to_match('key(?:words?)?:') - @comment=text_to_match('comment:') - @abstract=text_to_match('abs(?:tract)?:') - @owner=text_to_match('owner:') - @date_created=text_to_match('date_created:') - @date_issued=text_to_match('date_issued:') - @date_modified=text_to_match('date_modified:') - @date_available=text_to_match('date_available:') - @date_valid=text_to_match('date_valid:') - @filename=text_to_match('filename:') - @text1=text_to_match unless @keywords or @author or @title or @text1 or @fulltext or @comment or @abstract or @rights or @subject or @publisher or @date or @filename or @topic_register - else - @text1=q['s1'] if q['s1']=~/\S/ - @fulltext=q['ft'] if q['ft']=~/\S/ - @keywords=q['key'] if q['key']=~/\S/ - @title=q['ti'] if q['ti']=~/\S/ - @author=q['au'] if q['au']=~/\S/ - @topic_register=q['tr'] if q['tr']=~/\S/ - @subject=q['sj'] if q['sj']=~/\S/ - @description=q['dsc'] if q['dsc']=~/\S/ - @publisher=q['pb'] if q['pb']=~/\S/ - @editor=q['cntr'] if q['cntr']=~/\S/ - @contributor=q['cntr'] if q['cntr']=~/\S/ - @date=q['dt'] if q['dt']=~/\S/ - @type=q['ty'] if q['ty']=~/\S/ - @identifier=q['id'] if q['id']=~/\S/ - @source=q['src'] if q['src']=~/\S/ - @language=q['lang'] if q['lang']=~/\S/ - @relation=q['rel'] if q['rel']=~/\S/ - @coverage=q['cov'] if q['cov']=~/\S/ - @rights=q['cr'] if q['cr']=~/\S/ - @comment=q['co'] if q['co']=~/\S/ - @abstract=q['ab'] if q['ab']=~/\S/ - @date_created=q['dtc'] if q['dtc']=~/\S/ - @date_issued=q['dti'] if q['dti']=~/\S/ - @date_modified=q['dtm'] if q['dtm']=~/\S/ - @date_available=q['dta'] if q['dta']=~/\S/ - @date_valid=q['dtv'] if q['dtv']=~/\S/ - @filename=if q['doc'] and q['search'] !~/search db/ then q['doc'] - elsif q['fns']=~/\S/ then q['fns'] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - end - end - def text_to_match(identifier='') - m={ - string: /#{identifier}\s*(.+?)/, - string: /#{identifier}\s*(.+?)(?:;|\n|\r|$)/, - word: /#{identifier}[\s(]*(\S+)/ - } - search_string=if @search_field =~m[:word] - search_string=if @search_field =~m[:braces] then m[:braces].match(@search_field)[1] - elsif @search_field =~m[:string] then m[:string].match(@search_field)[1] - else - str=m[:word].match(@search_field)[1] - str=str.gsub(/[()]/,'') - str - end - search_string=search_string.strip.gsub(/\s+/,'+') - #else - # "__" - end - end - end - class DBI_SearchString - def initialize(l,t,q,cse=false) - @l,@t,@q=l,t,q - end - def string - search={ search: [], flag: false } - if @t =~/\S+/ or @q =~/\S+/ - if @t =~/\S+/ then unescaped_search=CGI.unescape(@t) - elsif @q =~/\S+/ then unescaped_search=CGI.unescape(@q) - end - search_construct=[] - unescaped_search=unescaped_search.gsub(/\s*(AND|OR)\s*/,"%' \) \\1 #{@l} LIKE \( '%"). - gsub(/(.+)/,"#{@l} LIKE \( '%\\1%' \)") - search_construct << unescaped_search - search_construct=search_construct.join(' ') - search[:search] << search_construct - search[:flag]=true - search - end - search - end - end - class DBI_SearchStatement - attr_reader :text_search_flag,:sql_select_body_format,:sql_offset,:sql_limit - def initialize(conn,search_for,q,c) - @conn=conn - @text_search_flag=false - @sql_statement={ body: '', endnotes: '', range: '' } - #@offset||=@@offset - #@offset+=@@limit - search={ text: [], endnotes: [] } - cse=(c =~/\S/) ? true : false - st=DBI_SearchString.new('doc_objects.clean',search_for.text1,q['s1'],cse).string - se=DBI_SearchString.new('endnotes.clean',search_for.text1,q['s1'],cse).string - @text_search_flag=st[:flag] - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.fulltext',search_for.fulltext,q['ft'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.title',search_for.title,q['ti'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_author',search_for.author,q['au'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_topic_register',search_for.topic_register,q['tr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_subject',search_for.subject,q['sj'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_keywords',search_for.keywords,q['key'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_description',search_for.description,q['dsc'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.publisher',search_for.publisher,q['pb'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_editor',search_for.editor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_contributor',search_for.contributor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.date_published',search_for.date,q['dt'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_type',search_for.type,q['ty'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.original_source',search_for.source,q['src'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.language_document_char',search_for.language,q['lang'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_relation',search_for.relation,q['rel'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_coverage',search_for.coverage,q['cov'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.rights_all',search_for.rights,q['cr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_comment',search_for.comment,q['co'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_abstract',search_for.abstract,q['ab'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.src_filename',search_for.filename,q['fns'],cse).string - if st[:flag] - search[:text] << st[:search] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - @search_text='' - @search_text=search[:text].flatten.join(' AND ') - @search_text=@search_text.gsub(/(doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\)\s+(?:(?:AND|OR)\s+doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\))+)/,'(\1)') - end - def sql_offset - @@offset - end - def sql_match_limit - @@limit - end - def sql_canned_search - @offset_next=sql_offset.to_i + sql_match_limit.to_i - @offset_previous=sql_offset.to_i - sql_match_limit.to_i - def current - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + sql_offset.to_s - end - def next - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_next.to_s - end - def previous - @offset_previous >= 0 \ - ? (@@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_previous.to_s) - : '' - end - def start - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + 0.to_s - end - self - end - def pre_next(beyond_limit,img) - can=sql_canned_search - page=(sql_offset.to_i + sql_match_limit.to_i)/sql_match_limit.to_i - if beyond_limit - if page.to_s =~ /^1$/ - %{<br /><center> - pg. #{page.to_s} - <a href="#{can.next}"> - <img border="0" width="22" height="22" src="#{img}/arrow_next_red.png" alt=" >>" /> - </a> - </center>} - elsif page.to_s =~ /^2$/ - %{<br /><center> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - <a href="#{can.next}"> - <img border="0" width="22" height="22" src="#{img}/arrow_next_red.png" alt=" >>" /> - </a> - </center>} - else - %{<br /><center> - <a href="#{can.start}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="|< " /> - </a> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - <a href="#{can.next}"> - <img border="0" width="22" height="22" src="#{img}/arrow_next_red.png" alt=" >>" /> - </a> - </center>} - end - else - if page.to_s =~ /^1$/ then '' - elsif page.to_s =~ /^2$/ - %{<br /><center> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - </center>} - else - %{<br /><center> - <a href="#{can.start}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="|< " /> - </a> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - </center>} - end - end - end - def sql_select_body - limit ||=@@limit - offset ||=@@offset - @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, metadata_and_text.language_document_char, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE #{@search_text} AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.language_document_char, metadata_and_text.title, metadata_and_text.src_filename, doc_objects.ocn} - @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} - select=@sql_statement[:body] + ' ' + @sql_statement[:range] - select - end - def sql_select_body_format - %{<font color="#666666" size="2">#{sql_select_body}</font>} - end - def contents - @conn.execute(sql_select_body) - end - end - def tail - <<-'WOK' - <br /><hr /><br /> -<table summary="SiSU summary" cellpadding="2" border="0"> - <!-- widget sisu --> -<tr><td valign="top" width="10%"> - <table summary="home button / home information" border="0" cellpadding="3" cellspacing="0"> - <tr><td align="left"> - <br /><a href="http://sisudoc.org/" target="_top"> - <b>SiSU</b> - </a> - <br /><a href="http://git.sisudoc.org/" target="_top"> - git - </a> - </td></tr> - </table> -</td> -<td valign="top" width="45%"> -<!-- SiSU Rights --> - <p class="tiny_left"><font color="#666666" size="2"> - Generated by - SiSU 6.3.1 2014-10-19 (2014w41/7) - <br /> - <a href="http://www.sisudoc.org" > - <b>SiSU</b></a> <sup>©</sup> Ralph Amissah - 1993, current 2014. - All Rights Reserved. - <br /> - SiSU is software for document structuring, publishing and search, - <br /> - <a href="http://www.jus.uio.no/sisu" > - www.jus.uio.no/sisu - </a> - and - <a href="http://www.sisudoc.org" > - www.sisudoc.org - </a> - sources - <a href="http://git.sisudoc.org" > - git.sisudoc.org - </a> - <br /> - <i>w3 since October 3 1993</i> - <a href="mailto:ralph@amissah.com" > - ralph@amissah.com - </a> - <br /> - mailing list subscription - <a href="http://lists.sisudoc.org/listinfo/sisu" > - http://lists.sisudoc.org/listinfo/sisu - </a> - <br /> - <a href="mailto:sisu@lists.sisudoc.org" > - sisu@lists.sisudoc.org - </a> - </font></p> -</td><td valign="top" width="45%"> - <p class="tiny_left"><font color="#666666" size="2"> - SiSU using: - <br />Standard SiSU markup syntax, - <br />Standard SiSU meta-markup syntax, and the - <br />Standard SiSU <u>object citation numbering</u> and system, (object/text identifying/locating system) - <br /> - <sup>©</sup> Ralph Amissah 1997, current 2014. - All Rights Reserved. - </font></p> -</td></tr> - <!-- widget way better --> -<tr><td valign="top" width="10%"> - <p class="tiny_left"><font color="#666666" size="2"> - <a href="http://www.gnu.org/licenses/gpl.html"> - .: - </a> - </font></p> -</td><td valign="top" width="45%"> - <p class="tiny_left"><font color="#666666" size="2"> - SiSU is released under - <a href="http://www.gnu.org/licenses/gpl.html">GPL v3</a> - or later, - <a href="http://www.gnu.org/licenses/gpl.html"> - http://www.gnu.org/licenses/gpl.html - </a> - </font></p> -</td><td valign="top" width="45%"> - <p class="tiny_left"><font color="#666666" size="2"> - SiSU, developed using - <a href="http://www.ruby-lang.org/en/"> - Ruby - </a> - on - <a href="http://www.debian.org/"> - Debian/Gnu/Linux - </a> - software infrastructure, - with the usual GPL (or OSS) suspects. - </font></p> -</td></tr> -</table> - <a name="bottom" id="bottom"></a><a name="down" id="down"></a><a name="end" id="end"></a><a name="finish" id="finish"></a><a name="stop" id="stop"></a><a name="credits" id="credits"></a> - </body></html> - WOK - end - @tail=tail - @counter_txt_doc,@counter_txt_ocn,@counter_endn_doc,@counter_endn_ocn=0,0,0,0 - @counters_txt,@counters_endn,@sql_select_body='','','' - FCGI.each_cgi do |cgi| - begin # all code goes in begin section - @search={ text: [], endnotes: [] } - q=CGI.new - @db=if cgi['db'] =~ /#{@db_name_prefix}(\S+)/ - @stub=$1 - cgi['db'] - else - @stub=@stub_default - @db_name_prefix + @stub - end - checked_url,checked_stats,checked_searched,checked_tip,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,selected_db='','','','','','','','','' - result_type=(cgi['view']=~/text/) \ - ? result_type={ index: '', text: 'checked'} - : result_type={ index: 'checked', text: ''} - @@limit=if cgi['sql_match_limit'].to_s=~/2500/ - checked_sql_limit={ l1000: '', l2500: 'checked'} - '2500' - else - checked_sql_limit={ l1000: 'checked', l2500: ''} - '1000' - end - checked_echo='checked' if cgi['echo'] =~/\S/ - checked_stats='checked' if cgi['stats'] =~/\S/ - checked_url='checked' if cgi['url'] =~/\S/ or cgi['u'].to_i==1 - checked_searched='checked' if cgi['searched'] =~/\S/ - checked_tip='checked' if cgi['tip'] =~/\S/ - checked_case='checked' if cgi['casesense'] =~/\S/ - checked_sql='checked' if cgi['sql'] =~/\S/ - if cgi['checks'] =~/check_all/ or cgi['check_all'] =~/\S/ or cgi['a'].to_i==1 - checked_all='checked' - checked_echo=checked_stats=checked_url=checked_searched=checked_tip=checked_sql='checked' - checked_none='' - elsif cgi['checks'] =~/check_none/ - checked_none='checked' - checked_all=checked_url=checked_stats=checked_searched=checked_tip=checked_echo=checked_sql='' - elsif cgi['checks'] =~/check_selected/ - checked_selected='checked' - elsif cgi['checks'] =~/check_default/ - checked_default='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - else - checked_selected='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - end - selected_db=case cgi['db'] - when /SiSU.7a.manual/ then '<option value="SiSU.7a.manual">manual</option>' - end - db_name='sisu_sqlite.db' - db_sqlite=case cgi['db'] - when /SiSU.7a.manual/ then "/srv/complete.sisudoc.org/web/manual/#{db_name}" - else "/srv/complete.sisudoc.org/web/manual/#{db_name}" - end - @conn=SQLite3::Database.new(db_sqlite) - @conn.results_as_hash=true - search_field=cgi['find'] if cgi['find'] # =~/\S+/ - @search_for=SearchRequest.new(search_field,q) #.analyze #% search_for - #% searches - #Canned_search.new(@base,@search_for.text1,cgi) - if @search_for.text1=~/\S+/ or @search_for.fulltext=~/\S+/ or @search_for.author=~/\S+/ or @search_for.topic_register=~/\S+/ #and search_field =~/\S/ - s1='s1=' + CGI.escape(@search_for.text1) if @search_for.text1=~/\S/ - ft='&ft=' + CGI.escape(@search_for.fulltext) if @search_for.fulltext=~/\S/ - key='key=' + CGI.escape(@search_for.keywords) if @search_for.keywords=~/\S/ - ti='&ti=' + CGI.escape(@search_for.title) if @search_for.title=~/\S/ - au='&au=' + CGI.escape(@search_for.author) if @search_for.author=~/\S/ - tr='&tr=' + CGI.escape(@search_for.topic_register) if @search_for.topic_register=~/\S/ - sj='&sj=' + CGI.escape(@search_for.subject) if @search_for.subject=~/\S/ - dsc='&dsc=' + CGI.escape(@search_for.description) if @search_for.description=~/\S/ - pb='&pb=' + CGI.escape(@search_for.publisher) if @search_for.publisher=~/\S/ - edt='&edt=' + CGI.escape(@search_for.editor) if @search_for.editor=~/\S/ - cntr='&cntr=' + CGI.escape(@search_for.contributor) if @search_for.contributor=~/\S/ - dt='&dt=' + CGI.escape(@search_for.date) if @search_for.date=~/\S/ - ty='&ty=' + CGI.escape(@search_for.type) if @search_for.type=~/\S/ - id='&id=' + CGI.escape(@search_for.identifier) if @search_for.identifier=~/\S/ - src='&src=' + CGI.escape(@search_for.source) if @search_for.source=~/\S/ - lang='&lang=' + CGI.escape(@search_for.language) if @search_for.language=~/\S/ - rel='&rel=' + CGI.escape(@search_for.relation) if @search_for.relation=~/\S/ - cov='&cov=' + CGI.escape(@search_for.coverage) if @search_for.coverage=~/\S/ - cr='&cr=' + CGI.escape(@search_for.rights) if @search_for.rights=~/\S/ - co='&co=' + CGI.escape(@search_for.comment) if @search_for.comment=~/\S/ - ab='&ab=' + CGI.escape(@search_for.abstract) if @search_for.abstract=~/\S/ - dtc='&dtc=' + CGI.escape(@search_for.date_created) if @search_for.date_created=~/\S/ - dti='&dti=' + CGI.escape(@search_for.date_issued) if @search_for.date_issued=~/\S/ - dtm='&dtm=' + CGI.escape(@search_for.date_modified) if @search_for.date_modified=~/\S/ - dta='&dta=' + CGI.escape(@search_for.date_available) if @search_for.date_available=~/\S/ - dtv='&dtv=' + CGI.escape(@search_for.date_valid) if @search_for.date_valid=~/\S/ - fns='&fns=' + CGI.escape(@search_for.filename) if @search_for.filename=~/\S/ - @@canned_search_url=(checked_all =~/checked/) \ - ? "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" - : "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" - mod=ft=~/\S+/ ? (ft.gsub(/ft/,'s1')) : s1 - @canned_base_url="#{@base}?#{mod}&db=#{cgi['db']}" - if checked_case=~/\S/ - @search[:text][1]=%{doc_objects.clean~'#{@search_for.text1}'} #s1 - else - @search[:text][1]=%{doc_objects.clean~*'#{@search_for.text1}'} #s1 - end - canned_note='search url:' - else - @@canned_search_url="#{@base}?db=#{@db}&view=index" - canned_note='search url example:' - end - if search_field =~/\S+/ - analyze_format=search_field.gsub(/\s*\n/,'; ') - elsif checked_all =~/checked/ or checked_url =~/checked/ - canned_search=@@canned_search_url.scan(/(?:s1|ft|au|ti|fns|tr)=[^&]+/) - af=canned_search.join('; ') - af=af.gsub(/s1=/,'text: '). - gsub(/ft=/,'fulltxt: '). - gsub(/au=/,'author: '). - gsub(/ti=/,'title: '). - gsub(/fns=/,'filename: '). - gsub(/tr=/,'topic_register: '). - gsub(/%2B/,' ') - analyze_format=af - st=af.split(/\s*;\s*/) - search_field=st.join("\n") - end - green=%{<font size="2" color="#004000">} - canned_search_url_txt=CGI.escapeHTML(@@canned_search_url) - the_can=%{<font size="2" color="#666666">#{canned_note} <a href="#{@@canned_search_url}">#{canned_search_url_txt}</a></font><br />} - p_text=p_fulltext=p_keywords=p_title=p_author=p_topic_register=p_subject=p_description=p_publisher=p_editor=p_contributor=p_date=p_type=p_format=p_identifier=p_source=p_language=p_relation=p_coverage=p_rights=p_comment=p_abstract=p_filename='' - p_filename=%{filename: #{green}#{@search_for.filename}</font><br />} if @search_for.filename =~/\S+/ - p_text=%{text: #{green}#{@search_for.text1}</font><br />} if @search_for.text1 =~/\S+/ - p_fulltext=%{fulltxt: #{green}#{@search_for.fulltext}</font><br />} if @search_for.fulltext =~/\S+/ - p_title=%{title: #{green}#{@search_for.title}</font><br />} if @search_for.title =~/\S+/ - p_author=%{author: #{green}#{@search_for.author}</font><br />} if @search_for.author =~/\S+/ - p_editor=%{editor: #{green}#{@search_for.editor}</font><br />} if @search_for.editor=~/\S+/ - p_contributor=%{contributor: #{green}#{@search_for.contributor}</font><br />} if @search_for.contributor =~/\S+/ - p_date=%{date: #{green}#{@search_for.date}</font><br />} if @search_for.date =~/\S+/ - p_rights=%{rights: #{green}#{@search_for.rights}</font><br />} if @search_for.rights =~/\S+/ - p_topic_register=%{topic_register: #{green}#{@search_for.topic_register}</font><br />} if @search_for.topic_register =~/\S+/ - p_subject=%{subject: #{green}#{@search_for.subject}</font><br />} if @search_for.subject =~/\S+/ - p_keywords=%{keywords: #{green}#{@search_for.keywords}</font><br />} if @search_for.keywords =~/\S+/ - p_identifier=%{identifier: #{green}#{@search_for.identifier}</font><br />} if @search_for.identifier =~/\S+/ - p_type=%{type: #{green}#{@search_for.type}</font><br />} if @search_for.type =~/\S+/ - p_format=%{format: #{green}#{@search_for.format}</font><br />} if @search_for.format =~/\S+/ - p_relation=%{relation: #{green}#{@search_for.relation}</font><br />} if @search_for.relation =~/\S+/ - p_coverage=%{coverage: #{green}#{@search_for.coverage}</font><br />} if @search_for.coverage =~/\S+/ - p_description=%{description: #{green}#{@search_for.description}</font><br />} if @search_for.description =~/\S+/ - p_abstract=%{abstract: #{green}#{@search_for.abstract}</font><br />} if @search_for.abstract =~/\S+/ - p_comment=%{comment: #{green}#{@search_for.comment}</font><br />} if @search_for.comment =~/\S+/ - p_publisher=%{publisher: #{green}#{@search_for.publisher}</font><br />} if @search_for.publisher =~/\S+/ - p_source=%{source: #{green}#{@search_for.source}</font><br />} if @search_for.source =~/\S+/ - p_language=%{language: #{green}#{@search_for.language}</font><br />} if @search_for.language =~/\S+/ - search_note=<<-WOK - <font size="2" color="#666666"> - <b>database:</b> #{green}#{@db}</font>; <b>selected view:</b> #{green}#{cgi['view']}</font> - <b>search string:</b> "#{green}#{analyze_format}</font>"<br /> - #{p_text} #{p_fulltext} #{p_keywords} #{p_title} #{p_author} #{p_topic_register} #{p_subject} #{p_description} #{p_publisher} #{p_editor} #{p_contributor} #{p_date} #{p_type} #{p_format} #{p_identifier} #{p_source} #{p_language} #{p_relation} #{p_coverage} #{p_rights} #{p_comment} #{p_abstract} #{p_filename} - </font> - WOK - #eg = %{canned search e.g.:<br /> <a href="#{url}">#{url}</a><br />find: #{analyze}<br />database: #{database}} - #% dbi_canning - @header=Form.new(@base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form - unless q['s1'] =~/\S/ or q['au'] =~/\S/ or @search[:text][1] =~/\S/ - print "Content-type: text/html\n\n" - puts (@header+@tail) - else #% searches - s1=(@search_for.text1 =~/\S/) \ - ? @search_for.text1 - : 'Unavailable' - if checked_case=~/\S/ - @search[:text]<<%{doc_objects.clean~'#{CGI.unescape(s1)}'} - else - @search[:text]<<%{doc_objects.clean~*'#{CGI.unescape(s1)}'} - end - #% dbi_request - dbi_statement=DBI_SearchStatement.new(@conn,@search_for,q,checked_case) - @text_search_flag=false - @text_search_flag=dbi_statement.text_search_flag - s_contents=dbi_statement.contents - @body_main='' - @search_regx=nil - oldtid=0 - if @text_search_flag - if checked_sql =~/\S/ - sql_select_body=dbi_statement.sql_select_body_format - else sql_select_body='' - end - @body_main << sql_select_body - #@body_main << '<p><hr><br /><b>Main Text:</b><br />' << sql_select_body - else - end - @hostpath="#{@hosturl_files}/#{@stub}" - def path_manifest(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{@hostpath}/#{fn}/sisu_manifest.html" - : "#{@hostpath}/#{fn}/sisu_manifest.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/manifest/#{fn}.html" - : "#{@hostpath}/manifest/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/manifest/#{fn}.html" - end - end - def path_html_seg(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - "#{@hostpath}/#{fn}" - when 'filetype' - "#{@hostpath}/html/#{fn}" - else - "#{@hostpath}/#{ln}/html/#{fn}" - end - end - def path_toc(fn,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/toc.html" - : "#{path_html_seg(fn,ln)}/toc.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/toc.html" - end - end - def path_filename(fn,seg,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/#{seg}.html" - : "#{path_html_seg(fn,ln)}/#{seg}.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/#{seg}.html" - end - end - def path_html_doc(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/scroll.html" - : "#{path_html_seg(fn,ln)}/scroll.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/html/#{fn}.html" - : "#{@hostpath}/html/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/html/#{fn}.html" - end - end - #% text_objects_body - s_contents.each do |c| #% text body - location=c['src_filename'][/(.+?)\.(?:ssm\.sst|sst)$/,1] - file_suffix=c['src_filename'][/.+?\.(ssm\.sst|sst)$/,1] - lang=if location =~ /\S+?~(\S\S\S?)$/ - l=location[/\S+?~(\S\S\S?)$/,1] - location=location.gsub(/(\S+?)~\S\S\S?/,'\1') - l=".#{l}" - else '' - end - #% metadata_found_body - if c['tid'].to_i != oldtid.to_i - ti=c['title'] - can_txt_srch=(cgi['view']=~/index/) \ - ? %{<a href="#{@canned_base_url}&fns=#{c['src_filename']}&lang=#{c['language_document_char']}&view=text"><img border="0" width="24" height="16" src="#{@image_src}/b_search.png" alt="search"></a> } - : %{<a href="#{@canned_base_url}&fns=#{c['src_filename']}&lang=#{c['language_document_char']}&view=index"><img border="0" width="24" height="16" src="#{@image_src}/b_search.png" alt="search"></a> } - title=%{<span style="background-color: #{@color_heading}"><a href="#{path_toc(location,c['language_document_char'])}"><img border="0" width="15" height="18" src="#{@image_src}/b_toc.png" alt="toc html"> #{ti}</a></span> [#{c['language_document_char']}] by #{c['creator_author']} <a href="#{path_manifest(location,c['language_document_char'])}"><img border="0" width="15" height="15" src="#{@image_src}/b_info.png" alt="manifest"></a> #{can_txt_srch}<br />} if file_suffix=~/s/ #hmm watch file_suffix - title=@text_search_flag \ - ? '<br /><hr>'+title - : '<br />'+title - @counter_txt_doc+=1 - oldtid=c['tid'].to_i - else title='' - end - if @text_search_flag - if cgi['view']=~/text/ \ - or (cgi['view']!~/index/ and cgi['search'] !~/search db/) #% txt body - text=if c['suffix'] !~/1/ #seg - if @search_for.text1 =~/\S+/ \ - or q['s1'] =~/\S+/ #% only this branch is working !! - unescaped_search=if @search_for.text1 =~/\S+/ - CGI.unescape(@search_for.text1) - elsif q['s1'] =~/\S+/ - CGI.unescape(q['s1']) - else nil - end - @search_regx=if unescaped_search #check - search_regex=[] - build=unescaped_search.scan(/\S+/).each do |g| - (g.to_s =~/(AND|OR)/) \ - ? (search_regex << '|') - : (search_regex << %{#{g.to_s}}) - end - search_regex=search_regex.join(' ') - search_regex=search_regex.gsub(/\s*\|\s*/,'|') - Regexp.new(search_regex, Regexp::IGNORECASE) - else nil - end - else nil - end - matched_para=(@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) \ - ? (c['body'].gsub(/(<a\s+href="https?:\/\/[^><\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{<span style="background-color: #{@color_match}">\\1</span>})) - : c['body'] - %{<hr><p><font size="2">ocn <b><a href="#{path_filename(location,c['seg'],c['language_document_char'])}##{c['ocn']}">#{c['ocn']}</a></b>:</font></p>#{matched_para}} - elsif c['suffix'] =~/1/ #doc - %{#{title}<hr><p><font size="2">ocn #{c['ocn']}:#{c['body']}} - end - @counter_txt_ocn+=1 - output=title+text - else #elsif cgi['view']=~/index/ #% idx body - if c['suffix'] !~/1/ #seg - index=%{<a href="#{path_filename(location,c['seg'],c['language_document_char'])}##{c['ocn']}">#{c['ocn']}</a>, } if @text_search_flag - elsif c['suffix'] =~/1/ #doc #FIX - index=%{<a href="#{path_html_doc(location,c['language_document_char'])}##{c['ocn']}">#{c['ocn']}</a>, } - end - if c['seg'] =~/\S+/ - if @text_search_flag - @counter_txt_ocn+=1 - output=title+index - end - else - @counter_txt_ocn+=1 - output=c['suffix'] !~/1/ \ - ? title+index - : %{#{title}#{c['ocn'].sort}, } - end - end - else output=title - end - @counters_txt=if @counter_txt_doc > 0 - if checked_stats =~/\S/ - @@lt_t=(@counter_txt_ocn==dbi_statement.sql_match_limit.to_i) ? true : false - start=(@@offset.to_i+1).to_s - range=(@@offset.to_i+@counter_txt_ocn.to_i).to_s - %{<hr /><font size="2" color="#666666">Found #{@counter_txt_ocn} times in the main body of #{@counter_txt_doc} documents [ matches #{start} to #{range} ]</font><br />} - else '' - end - else '' - end - @body_main << output #+ details - end - oldtid = 0 - offset=dbi_statement.sql_offset.to_s - limit=dbi_statement.sql_match_limit.to_s - @@lt_t ||=false; @@lt_e ||=false - canned=(@@lt_t or @@lt_e) \ - ? dbi_statement.pre_next(true,@image_src).to_s - : dbi_statement.pre_next(false,@image_src).to_s - limit=dbi_statement.sql_match_limit.to_s - cgi.out{@header.force_encoding("UTF-8") + @counters_txt.force_encoding("UTF-8") + @counters_endn.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @body_main.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @tail.force_encoding("UTF-8")} #% print cgi_output_header+counters+body - end - rescue Exception => e - s='<pre>' + CGI::escapeHTML(e.backtrace.reverse.join("\n")) - s << CGI::escapeHTML(e.message) + '</pre>' - cgi.out{s} - next - ensure # eg. disconnect from server - @conn.disconnect if @conn - end - end diff --git a/util/rb/cgi/search_ref.cgi b/util/rb/cgi/search_ref.cgi deleted file mode 100755 index 0fe8ea5..0000000 --- a/util/rb/cgi/search_ref.cgi +++ /dev/null @@ -1,937 +0,0 @@ -#!/usr/bin/env ruby -=begin - * Name: SiSU information Structuring Universe - * Author: Ralph Amissah - * http://www.jus.uio.no/sisu - * http://www.jus.uio.no/sisu/SiSU/download - - * Description: generates naive cgi search form for search of sisu database (sqlite) - * Name: SiSU generated sample cgi search form - - * Description: generated sample cgi search form for SiSU - (SiSU is a framework for document structuring, publishing and search) - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2014, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see <http://www.gnu.org/licenses/>. - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - <http://www.fsf.org/licenses/gpl.html> - <http://www.gnu.org/licenses/gpl.html> - <http://www.jus.uio.no/sisu/gpl.fsf> - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - <http://www.jus.uio.no/sisu> - <http://www.sisudoc.org> - - * Ralph Amissah - <ralph@amissah.com> - <ralph.amissah@gmail.com> - -=end - begin - require 'cgi' - require 'fcgi' - require 'sqlite3' - rescue LoadError - puts 'cgi, fcgi or sqlite3 NOT FOUND (LoadError)' - end - @stub_default='manual' - @image_src="http://#{ENV['HTTP_HOST']}/_sisu/image_sys" - @hosturl_cgi="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}" - @hosturl_files="http://#{ENV['HTTP_HOST']}" - @output_dir_structure_by='filetype' - @lingual='multi' - @db_name_prefix='SiSU.7a.' - @base="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}#{ENV['SCRIPT_NAME']}" -#Common TOP - @@offset=0 - @@canned_search_url=@base - @color_heading='#DDFFAA' - @color_match='#ffff48' - class Form - def initialize(base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') - search_note='' if checked_searched !~/\S/ - the_can='' if checked_url !~/\S/ - search_field='' if checked_echo !~/\S/ - @base,@search_field,@selected_db,@result_type,@checked_sql_limit,@checked_tip,@checked_stats,@checked_searched,@checked_url,@checked_case,@checked_echo,@checked_sql,@checked_all,@checked_none,@checked_selected,@checked_default,@search_note,@the_can=base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can - @tip=if checked_tip =~/\S/ - '<font size="2" color="#666666">text:__; fulltxt:__; keywords:__; title:__; author:__; topic_register:__; subject:__; description:__; publisher:__; editor:__; contributor:__; date:__; type:__; format:__; identifier:__; source:__; language:__; relation:__; coverage:__; rights:__; comment:__; abstract:__; filename:__;</font><br />' - else '' - end - end - def submission_form - search_form=<<-WOK - <!DOCTYPE html> - <html> - <head> - <title> - <meta charset="utf-8"> - <meta name="sourcefile" content="SiSU._sst" /> - SiSU search form (sample): SiSU information Structuring Universe - </title> - <link rel="generator" href="http://www.jus.uio.no/sisu" /> - <link rel="shortcut icon" href="http://#{ENV['HTTP_HOST']}/_sisu/image_sys/rb7.ico" /> - <link href="../_sisu/css/html.css" rel="stylesheet"> - </head> - <body lang="en" xml:lang="en"> - <table summary="band" border="0" cellpadding="3" cellspacing="0"> - <tr><td width="20%"> - <table summary="home button / home information" border="0" cellpadding="3" cellspacing="0"> - <tr><td align="left"> - <br /><a href="http://sisudoc.org/" target="_top"> - <b>SiSU</b> - </a> - <br /><a href="http://git.sisudoc.org/" target="_top"> - git - </a> - </td></tr> - </table> - </td> - <td> - <label for="find"><b>SiSU (generated sample) search form (content organised by filetype)</b></label> - </td></tr> - </table> - <form action="#{@base}" id="Test Form" method="post"> - <table cellpadding="2"> - <tr><td valign=\"top\"> - <textarea id="find" name="find" type="text" rows="6" cols="40" maxlength="256">#{@search_field}</textarea> - </td> - <td valign=\"top\"> - #{@tip} - #{@search_note} - #{@the_can} - </td></tr></table> - <td valign=\"top\"><tr><td> - <!input type="text" id="find" name="find" value="#{@search_field}" /> - <!input type="text" id="find" name="find" value="" /> - <font size="2" color="#222222"> - <b>to search:</b> select which database to search (drop-down menu below); enter your search query (in the form above); and <b>click on the search button</b> (below) - <br /> - <select name="db" size="1"> - #{@selected_db} - <option value="SiSU.7a.manual">manual</option> - </select> - <input type="submit" value="SiSU search" /> - <input type="radio" name="view" value="index" #{@result_type[:index]}> index - <input type="radio" name="view" value="text" #{@result_type[:text]}> text / grep - <br /> - match limit: - <input type="radio" name="sql_match_limit" value="1000" #{@checked_sql_limit[:l1000]}> 1,000 - <input type="radio" name="sql_match_limit" value="2500" #{@checked_sql_limit[:l2500]}> 2,500 - <br /> - <input type="checkbox" name="echo" #{@checked_echo}> echo query - <input type="checkbox" name="stats" #{@checked_stats}> result stats - <input type="checkbox" name="url" #{@checked_url}> search url - <input type="checkbox" name="searched" #{@checked_searched}> searched - <input type="checkbox" name="tip" #{@checked_tip}> available fields - <input type="checkbox" name="sql" #{@checked_sql}> sql statement - <br /> - checks: - <input type="radio" name="checks" value="check_default" #{@checked_default}> default - <input type="radio" name="checks" value="check_selected" #{@checked_selected}> selected - <input type="radio" name="checks" value="check_all" #{@checked_all}> all - <input type="radio" name="checks" value="check_none" #{@checked_none}> none - </font> - </td></tr> - </table> - </form> - WOK - end - end - class SearchRequest #% search_for - attr_accessor :text1,:fulltext,:keywords,:title,:author,:topic_register,:subject,:description,:publisher,:editor,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:filename - def initialize(search_field='',q='') - @search_field,@q=search_field,q - @text1=@fulltext=@keywords=@title=@author=@topic_register=@subject=@description=@publisher=@editor=@contributor=@date=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@comment=@abstract=@owner=@date_created=@date_issued=@date_modified=@date_available=@date_valid=@filename='' - if @search_field=~/\S/ - @text1=text_to_match('text:') - @fulltext=text_to_match('fulltxt:') - @topic_register=text_to_match('topic_register:') - @title=text_to_match('title:') # DublinCore 1 - title - @author=text_to_match('(?:author|creator)s?:') # DublinCore 2 - creator/author - @subject=text_to_match('subj(?:ect)?:') # DublinCore 3 - subject - @description=text_to_match('description:') # DublinCore 4 - description - @publisher=text_to_match('pub(?:lisher)?:') # DublinCore 5 - publisher - @editor=text_to_match('editor:') - @contributor=text_to_match('contributor:') # DublinCore 6 - contributor - @date=text_to_match('date:') # DublinCore 7 - date dd-mm-yy - @type=text_to_match('type:') # DublinCore 8 - type - @format=text_to_match('format:') # DublinCore 9 - format - @identifier=text_to_match('identifier:') # DublinCore 10 - identifier - @source=text_to_match('source:') # DublinCore 11 - source - @language=text_to_match('language:') # DublinCore 12 - language - @relation=text_to_match('relation:') # DublinCore 13 - relation - @coverage=text_to_match('coverage:') # DublinCore 14 - coverage - @rights=text_to_match('rights:') # DublinCore 15 - rights - @keywords=text_to_match('key(?:words?)?:') - @comment=text_to_match('comment:') - @abstract=text_to_match('abs(?:tract)?:') - @owner=text_to_match('owner:') - @date_created=text_to_match('date_created:') - @date_issued=text_to_match('date_issued:') - @date_modified=text_to_match('date_modified:') - @date_available=text_to_match('date_available:') - @date_valid=text_to_match('date_valid:') - @filename=text_to_match('filename:') - @text1=text_to_match unless @keywords or @author or @title or @text1 or @fulltext or @comment or @abstract or @rights or @subject or @publisher or @date or @filename or @topic_register - else - @text1=q['s1'] if q['s1']=~/\S/ - @fulltext=q['ft'] if q['ft']=~/\S/ - @keywords=q['key'] if q['key']=~/\S/ - @title=q['ti'] if q['ti']=~/\S/ - @author=q['au'] if q['au']=~/\S/ - @topic_register=q['tr'] if q['tr']=~/\S/ - @subject=q['sj'] if q['sj']=~/\S/ - @description=q['dsc'] if q['dsc']=~/\S/ - @publisher=q['pb'] if q['pb']=~/\S/ - @editor=q['cntr'] if q['cntr']=~/\S/ - @contributor=q['cntr'] if q['cntr']=~/\S/ - @date=q['dt'] if q['dt']=~/\S/ - @type=q['ty'] if q['ty']=~/\S/ - @identifier=q['id'] if q['id']=~/\S/ - @source=q['src'] if q['src']=~/\S/ - @language=q['lang'] if q['lang']=~/\S/ - @relation=q['rel'] if q['rel']=~/\S/ - @coverage=q['cov'] if q['cov']=~/\S/ - @rights=q['cr'] if q['cr']=~/\S/ - @comment=q['co'] if q['co']=~/\S/ - @abstract=q['ab'] if q['ab']=~/\S/ - @date_created=q['dtc'] if q['dtc']=~/\S/ - @date_issued=q['dti'] if q['dti']=~/\S/ - @date_modified=q['dtm'] if q['dtm']=~/\S/ - @date_available=q['dta'] if q['dta']=~/\S/ - @date_valid=q['dtv'] if q['dtv']=~/\S/ - @filename=if q['doc'] and q['search'] !~/search db/ then q['doc'] - elsif q['fns']=~/\S/ then q['fns'] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - end - end - def text_to_match(identifier='') - m={ - string: /#{identifier}\s*(.+?)/, - string: /#{identifier}\s*(.+?)(?:;|\n|\r|$)/, - word: /#{identifier}[\s(]*(\S+)/ - } - search_string=if @search_field =~m[:word] - search_string=if @search_field =~m[:braces] then m[:braces].match(@search_field)[1] - elsif @search_field =~m[:string] then m[:string].match(@search_field)[1] - else - str=m[:word].match(@search_field)[1] - str=str.gsub(/[()]/,'') - str - end - search_string=search_string.strip.gsub(/\s+/,'+') - #else - # "__" - end - end - end - class DBI_SearchString - def initialize(l,t,q,cse=false) - @l,@t,@q=l,t,q - end - def string - search={ search: [], flag: false } - if @t =~/\S+/ or @q =~/\S+/ - if @t =~/\S+/ then unescaped_search=CGI.unescape(@t) - elsif @q =~/\S+/ then unescaped_search=CGI.unescape(@q) - end - search_construct=[] - unescaped_search=unescaped_search.gsub(/\s*(AND|OR)\s*/,"%' \) \\1 #{@l} LIKE \( '%"). - gsub(/(.+)/,"#{@l} LIKE \( '%\\1%' \)") - search_construct << unescaped_search - search_construct=search_construct.join(' ') - search[:search] << search_construct - search[:flag]=true - search - end - search - end - end - class DBI_SearchStatement - attr_reader :text_search_flag,:sql_select_body_format,:sql_offset,:sql_limit - def initialize(conn,search_for,q,c) - @conn=conn - @text_search_flag=false - @sql_statement={ body: '', endnotes: '', range: '' } - #@offset||=@@offset - #@offset+=@@limit - search={ text: [], endnotes: [] } - cse=(c =~/\S/) ? true : false - st=DBI_SearchString.new('doc_objects.clean',search_for.text1,q['s1'],cse).string - se=DBI_SearchString.new('endnotes.clean',search_for.text1,q['s1'],cse).string - @text_search_flag=st[:flag] - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.fulltext',search_for.fulltext,q['ft'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.title',search_for.title,q['ti'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_author',search_for.author,q['au'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_topic_register',search_for.topic_register,q['tr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_subject',search_for.subject,q['sj'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_keywords',search_for.keywords,q['key'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_description',search_for.description,q['dsc'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.publisher',search_for.publisher,q['pb'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_editor',search_for.editor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_contributor',search_for.contributor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.date_published',search_for.date,q['dt'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_type',search_for.type,q['ty'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.original_source',search_for.source,q['src'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.language_document_char',search_for.language,q['lang'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_relation',search_for.relation,q['rel'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_coverage',search_for.coverage,q['cov'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.rights_all',search_for.rights,q['cr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_comment',search_for.comment,q['co'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_abstract',search_for.abstract,q['ab'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.src_filename',search_for.filename,q['fns'],cse).string - if st[:flag] - search[:text] << st[:search] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - @search_text='' - @search_text=search[:text].flatten.join(' AND ') - @search_text=@search_text.gsub(/(doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\)\s+(?:(?:AND|OR)\s+doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\))+)/,'(\1)') - end - def sql_offset - @@offset - end - def sql_match_limit - @@limit - end - def sql_canned_search - @offset_next=sql_offset.to_i + sql_match_limit.to_i - @offset_previous=sql_offset.to_i - sql_match_limit.to_i - def current - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + sql_offset.to_s - end - def next - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_next.to_s - end - def previous - @offset_previous >= 0 \ - ? (@@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_previous.to_s) - : '' - end - def start - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + 0.to_s - end - self - end - def pre_next(beyond_limit,img) - can=sql_canned_search - page=(sql_offset.to_i + sql_match_limit.to_i)/sql_match_limit.to_i - if beyond_limit - if page.to_s =~ /^1$/ - %{<br /><center> - pg. #{page.to_s} - <a href="#{can.next}"> - <img border="0" width="22" height="22" src="#{img}/arrow_next_red.png" alt=" >>" /> - </a> - </center>} - elsif page.to_s =~ /^2$/ - %{<br /><center> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - <a href="#{can.next}"> - <img border="0" width="22" height="22" src="#{img}/arrow_next_red.png" alt=" >>" /> - </a> - </center>} - else - %{<br /><center> - <a href="#{can.start}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="|< " /> - </a> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - <a href="#{can.next}"> - <img border="0" width="22" height="22" src="#{img}/arrow_next_red.png" alt=" >>" /> - </a> - </center>} - end - else - if page.to_s =~ /^1$/ then '' - elsif page.to_s =~ /^2$/ - %{<br /><center> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - </center>} - else - %{<br /><center> - <a href="#{can.start}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="|< " /> - </a> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - </center>} - end - end - end - def sql_select_body - limit ||=@@limit - offset ||=@@offset - @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, metadata_and_text.language_document_char, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE #{@search_text} AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.language_document_char, metadata_and_text.title, metadata_and_text.src_filename, doc_objects.ocn} - @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} - select=@sql_statement[:body] + ' ' + @sql_statement[:range] - select - end - def sql_select_body_format - %{<font color="#666666" size="2">#{sql_select_body}</font>} - end - def contents - @conn.execute(sql_select_body) - end - end - def tail - <<-'WOK' - <br /><hr /><br /> -<table summary="SiSU summary" cellpadding="2" border="0"> - <!-- widget sisu --> -<tr><td valign="top" width="10%"> - <table summary="home button / home information" border="0" cellpadding="3" cellspacing="0"> - <tr><td align="left"> - <br /><a href="http://sisudoc.org/" target="_top"> - <b>SiSU</b> - </a> - <br /><a href="http://git.sisudoc.org/" target="_top"> - git - </a> - </td></tr> - </table> -</td> -<td valign="top" width="45%"> -<!-- SiSU Rights --> - <p class="tiny_left"><font color="#666666" size="2"> - Generated by - SiSU 6.3.1 2014-10-19 (2014w41/7) - <br /> - <a href="http://www.sisudoc.org" > - <b>SiSU</b></a> <sup>©</sup> Ralph Amissah - 1993, current 2014. - All Rights Reserved. - <br /> - SiSU is software for document structuring, publishing and search, - <br /> - <a href="http://www.jus.uio.no/sisu" > - www.jus.uio.no/sisu - </a> - and - <a href="http://www.sisudoc.org" > - www.sisudoc.org - </a> - sources - <a href="http://git.sisudoc.org" > - git.sisudoc.org - </a> - <br /> - <i>w3 since October 3 1993</i> - <a href="mailto:ralph@amissah.com" > - ralph@amissah.com - </a> - <br /> - mailing list subscription - <a href="http://lists.sisudoc.org/listinfo/sisu" > - http://lists.sisudoc.org/listinfo/sisu - </a> - <br /> - <a href="mailto:sisu@lists.sisudoc.org" > - sisu@lists.sisudoc.org - </a> - </font></p> -</td><td valign="top" width="45%"> - <p class="tiny_left"><font color="#666666" size="2"> - SiSU using: - <br />Standard SiSU markup syntax, - <br />Standard SiSU meta-markup syntax, and the - <br />Standard SiSU <u>object citation numbering</u> and system, (object/text identifying/locating system) - <br /> - <sup>©</sup> Ralph Amissah 1997, current 2014. - All Rights Reserved. - </font></p> -</td></tr> - <!-- widget way better --> -<tr><td valign="top" width="10%"> - <p class="tiny_left"><font color="#666666" size="2"> - <a href="http://www.gnu.org/licenses/gpl.html"> - .: - </a> - </font></p> -</td><td valign="top" width="45%"> - <p class="tiny_left"><font color="#666666" size="2"> - SiSU is released under - <a href="http://www.gnu.org/licenses/gpl.html">GPL v3</a> - or later, - <a href="http://www.gnu.org/licenses/gpl.html"> - http://www.gnu.org/licenses/gpl.html - </a> - </font></p> -</td><td valign="top" width="45%"> - <p class="tiny_left"><font color="#666666" size="2"> - SiSU, developed using - <a href="http://www.ruby-lang.org/en/"> - Ruby - </a> - on - <a href="http://www.debian.org/"> - Debian/Gnu/Linux - </a> - software infrastructure, - with the usual GPL (or OSS) suspects. - </font></p> -</td></tr> -</table> - <a name="bottom" id="bottom"></a><a name="down" id="down"></a><a name="end" id="end"></a><a name="finish" id="finish"></a><a name="stop" id="stop"></a><a name="credits" id="credits"></a> - </body></html> - WOK - end - @tail=tail - @counter_txt_doc,@counter_txt_ocn,@counter_endn_doc,@counter_endn_ocn=0,0,0,0 - @counters_txt,@counters_endn,@sql_select_body='','','' - FCGI.each_cgi do |cgi| - begin # all code goes in begin section - @search={ text: [], endnotes: [] } - q=CGI.new - @db=if cgi['db'] =~ /#{@db_name_prefix}(\S+)/ - @stub=$1 - cgi['db'] - else - @stub=@stub_default - @db_name_prefix + @stub - end - checked_url,checked_stats,checked_searched,checked_tip,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,selected_db='','','','','','','','','' - result_type=(cgi['view']=~/text/) \ - ? result_type={ index: '', text: 'checked'} - : result_type={ index: 'checked', text: ''} - @@limit=if cgi['sql_match_limit'].to_s=~/2500/ - checked_sql_limit={ l1000: '', l2500: 'checked'} - '2500' - else - checked_sql_limit={ l1000: 'checked', l2500: ''} - '1000' - end - checked_echo='checked' if cgi['echo'] =~/\S/ - checked_stats='checked' if cgi['stats'] =~/\S/ - checked_url='checked' if cgi['url'] =~/\S/ or cgi['u'].to_i==1 - checked_searched='checked' if cgi['searched'] =~/\S/ - checked_tip='checked' if cgi['tip'] =~/\S/ - checked_case='checked' if cgi['casesense'] =~/\S/ - checked_sql='checked' if cgi['sql'] =~/\S/ - if cgi['checks'] =~/check_all/ or cgi['check_all'] =~/\S/ or cgi['a'].to_i==1 - checked_all='checked' - checked_echo=checked_stats=checked_url=checked_searched=checked_tip=checked_sql='checked' - checked_none='' - elsif cgi['checks'] =~/check_none/ - checked_none='checked' - checked_all=checked_url=checked_stats=checked_searched=checked_tip=checked_echo=checked_sql='' - elsif cgi['checks'] =~/check_selected/ - checked_selected='checked' - elsif cgi['checks'] =~/check_default/ - checked_default='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - else - checked_selected='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - end - selected_db=case cgi['db'] - when /SiSU.7a.manual/ then '<option value="SiSU.7a.manual">manual</option>' - end - db_name='sisu_sqlite.db' - db_sqlite=case cgi['db'] - when /SiSU.7a.manual/ then "/srv/complete.sisudoc.org/web/manual/#{db_name}" - else "/srv/complete.sisudoc.org/web/manual/#{db_name}" - end - @conn=SQLite3::Database.new(db_sqlite) - @conn.results_as_hash=true - search_field=cgi['find'] if cgi['find'] # =~/\S+/ - @search_for=SearchRequest.new(search_field,q) #.analyze #% search_for - #% searches - #Canned_search.new(@base,@search_for.text1,cgi) - if @search_for.text1=~/\S+/ or @search_for.fulltext=~/\S+/ or @search_for.author=~/\S+/ or @search_for.topic_register=~/\S+/ #and search_field =~/\S/ - s1='s1=' + CGI.escape(@search_for.text1) if @search_for.text1=~/\S/ - ft='&ft=' + CGI.escape(@search_for.fulltext) if @search_for.fulltext=~/\S/ - key='key=' + CGI.escape(@search_for.keywords) if @search_for.keywords=~/\S/ - ti='&ti=' + CGI.escape(@search_for.title) if @search_for.title=~/\S/ - au='&au=' + CGI.escape(@search_for.author) if @search_for.author=~/\S/ - tr='&tr=' + CGI.escape(@search_for.topic_register) if @search_for.topic_register=~/\S/ - sj='&sj=' + CGI.escape(@search_for.subject) if @search_for.subject=~/\S/ - dsc='&dsc=' + CGI.escape(@search_for.description) if @search_for.description=~/\S/ - pb='&pb=' + CGI.escape(@search_for.publisher) if @search_for.publisher=~/\S/ - edt='&edt=' + CGI.escape(@search_for.editor) if @search_for.editor=~/\S/ - cntr='&cntr=' + CGI.escape(@search_for.contributor) if @search_for.contributor=~/\S/ - dt='&dt=' + CGI.escape(@search_for.date) if @search_for.date=~/\S/ - ty='&ty=' + CGI.escape(@search_for.type) if @search_for.type=~/\S/ - id='&id=' + CGI.escape(@search_for.identifier) if @search_for.identifier=~/\S/ - src='&src=' + CGI.escape(@search_for.source) if @search_for.source=~/\S/ - lang='&lang=' + CGI.escape(@search_for.language) if @search_for.language=~/\S/ - rel='&rel=' + CGI.escape(@search_for.relation) if @search_for.relation=~/\S/ - cov='&cov=' + CGI.escape(@search_for.coverage) if @search_for.coverage=~/\S/ - cr='&cr=' + CGI.escape(@search_for.rights) if @search_for.rights=~/\S/ - co='&co=' + CGI.escape(@search_for.comment) if @search_for.comment=~/\S/ - ab='&ab=' + CGI.escape(@search_for.abstract) if @search_for.abstract=~/\S/ - dtc='&dtc=' + CGI.escape(@search_for.date_created) if @search_for.date_created=~/\S/ - dti='&dti=' + CGI.escape(@search_for.date_issued) if @search_for.date_issued=~/\S/ - dtm='&dtm=' + CGI.escape(@search_for.date_modified) if @search_for.date_modified=~/\S/ - dta='&dta=' + CGI.escape(@search_for.date_available) if @search_for.date_available=~/\S/ - dtv='&dtv=' + CGI.escape(@search_for.date_valid) if @search_for.date_valid=~/\S/ - fns='&fns=' + CGI.escape(@search_for.filename) if @search_for.filename=~/\S/ - @@canned_search_url=(checked_all =~/checked/) \ - ? "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" - : "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" - mod=ft=~/\S+/ ? (ft.gsub(/ft/,'s1')) : s1 - @canned_base_url="#{@base}?#{mod}&db=#{cgi['db']}" - if checked_case=~/\S/ - @search[:text][1]=%{doc_objects.clean~'#{@search_for.text1}'} #s1 - else - @search[:text][1]=%{doc_objects.clean~*'#{@search_for.text1}'} #s1 - end - canned_note='search url:' - else - @@canned_search_url="#{@base}?db=#{@db}&view=index" - canned_note='search url example:' - end - if search_field =~/\S+/ - analyze_format=search_field.gsub(/\s*\n/,'; ') - elsif checked_all =~/checked/ or checked_url =~/checked/ - canned_search=@@canned_search_url.scan(/(?:s1|ft|au|ti|fns|tr)=[^&]+/) - af=canned_search.join('; ') - af=af.gsub(/s1=/,'text: '). - gsub(/ft=/,'fulltxt: '). - gsub(/au=/,'author: '). - gsub(/ti=/,'title: '). - gsub(/fns=/,'filename: '). - gsub(/tr=/,'topic_register: '). - gsub(/%2B/,' ') - analyze_format=af - st=af.split(/\s*;\s*/) - search_field=st.join("\n") - end - green=%{<font size="2" color="#004000">} - canned_search_url_txt=CGI.escapeHTML(@@canned_search_url) - the_can=%{<font size="2" color="#666666">#{canned_note} <a href="#{@@canned_search_url}">#{canned_search_url_txt}</a></font><br />} - p_text=p_fulltext=p_keywords=p_title=p_author=p_topic_register=p_subject=p_description=p_publisher=p_editor=p_contributor=p_date=p_type=p_format=p_identifier=p_source=p_language=p_relation=p_coverage=p_rights=p_comment=p_abstract=p_filename='' - p_filename=%{filename: #{green}#{@search_for.filename}</font><br />} if @search_for.filename =~/\S+/ - p_text=%{text: #{green}#{@search_for.text1}</font><br />} if @search_for.text1 =~/\S+/ - p_fulltext=%{fulltxt: #{green}#{@search_for.fulltext}</font><br />} if @search_for.fulltext =~/\S+/ - p_title=%{title: #{green}#{@search_for.title}</font><br />} if @search_for.title =~/\S+/ - p_author=%{author: #{green}#{@search_for.author}</font><br />} if @search_for.author =~/\S+/ - p_editor=%{editor: #{green}#{@search_for.editor}</font><br />} if @search_for.editor=~/\S+/ - p_contributor=%{contributor: #{green}#{@search_for.contributor}</font><br />} if @search_for.contributor =~/\S+/ - p_date=%{date: #{green}#{@search_for.date}</font><br />} if @search_for.date =~/\S+/ - p_rights=%{rights: #{green}#{@search_for.rights}</font><br />} if @search_for.rights =~/\S+/ - p_topic_register=%{topic_register: #{green}#{@search_for.topic_register}</font><br />} if @search_for.topic_register =~/\S+/ - p_subject=%{subject: #{green}#{@search_for.subject}</font><br />} if @search_for.subject =~/\S+/ - p_keywords=%{keywords: #{green}#{@search_for.keywords}</font><br />} if @search_for.keywords =~/\S+/ - p_identifier=%{identifier: #{green}#{@search_for.identifier}</font><br />} if @search_for.identifier =~/\S+/ - p_type=%{type: #{green}#{@search_for.type}</font><br />} if @search_for.type =~/\S+/ - p_format=%{format: #{green}#{@search_for.format}</font><br />} if @search_for.format =~/\S+/ - p_relation=%{relation: #{green}#{@search_for.relation}</font><br />} if @search_for.relation =~/\S+/ - p_coverage=%{coverage: #{green}#{@search_for.coverage}</font><br />} if @search_for.coverage =~/\S+/ - p_description=%{description: #{green}#{@search_for.description}</font><br />} if @search_for.description =~/\S+/ - p_abstract=%{abstract: #{green}#{@search_for.abstract}</font><br />} if @search_for.abstract =~/\S+/ - p_comment=%{comment: #{green}#{@search_for.comment}</font><br />} if @search_for.comment =~/\S+/ - p_publisher=%{publisher: #{green}#{@search_for.publisher}</font><br />} if @search_for.publisher =~/\S+/ - p_source=%{source: #{green}#{@search_for.source}</font><br />} if @search_for.source =~/\S+/ - p_language=%{language: #{green}#{@search_for.language}</font><br />} if @search_for.language =~/\S+/ - search_note=<<-WOK - <font size="2" color="#666666"> - <b>database:</b> #{green}#{@db}</font>; <b>selected view:</b> #{green}#{cgi['view']}</font> - <b>search string:</b> "#{green}#{analyze_format}</font>"<br /> - #{p_text} #{p_fulltext} #{p_keywords} #{p_title} #{p_author} #{p_topic_register} #{p_subject} #{p_description} #{p_publisher} #{p_editor} #{p_contributor} #{p_date} #{p_type} #{p_format} #{p_identifier} #{p_source} #{p_language} #{p_relation} #{p_coverage} #{p_rights} #{p_comment} #{p_abstract} #{p_filename} - </font> - WOK - #eg = %{canned search e.g.:<br /> <a href="#{url}">#{url}</a><br />find: #{analyze}<br />database: #{database}} - #% dbi_canning - @header=Form.new(@base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form - unless q['s1'] =~/\S/ or q['au'] =~/\S/ or @search[:text][1] =~/\S/ - print "Content-type: text/html\n\n" - puts (@header+@tail) - else #% searches - s1=(@search_for.text1 =~/\S/) \ - ? @search_for.text1 - : 'Unavailable' - if checked_case=~/\S/ - @search[:text]<<%{doc_objects.clean~'#{CGI.unescape(s1)}'} - else - @search[:text]<<%{doc_objects.clean~*'#{CGI.unescape(s1)}'} - end - #% dbi_request - dbi_statement=DBI_SearchStatement.new(@conn,@search_for,q,checked_case) - @text_search_flag=false - @text_search_flag=dbi_statement.text_search_flag - s_contents=dbi_statement.contents - @body_main='' - @search_regx=nil - oldtid=0 - if @text_search_flag - if checked_sql =~/\S/ - sql_select_body=dbi_statement.sql_select_body_format - else sql_select_body='' - end - @body_main << sql_select_body - #@body_main << '<p><hr><br /><b>Main Text:</b><br />' << sql_select_body - else - end - @hostpath="#{@hosturl_files}/#{@stub}" - def path_manifest(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{@hostpath}/#{fn}/sisu_manifest.html" - : "#{@hostpath}/#{fn}/sisu_manifest.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/manifest/#{fn}.html" - : "#{@hostpath}/manifest/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/manifest/#{fn}.html" - end - end - def path_html_seg(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - "#{@hostpath}/#{fn}" - when 'filetype' - "#{@hostpath}/html/#{fn}" - else - "#{@hostpath}/#{ln}/html/#{fn}" - end - end - def path_toc(fn,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/toc.html" - : "#{path_html_seg(fn,ln)}/toc.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/toc.html" - end - end - def path_filename(fn,seg,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/#{seg}.html" - : "#{path_html_seg(fn,ln)}/#{seg}.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/#{seg}.html" - end - end - def path_html_doc(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/scroll.html" - : "#{path_html_seg(fn,ln)}/scroll.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/html/#{fn}.html" - : "#{@hostpath}/html/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/html/#{fn}.html" - end - end - #% text_objects_body - s_contents.each do |c| #% text body - location=c['src_filename'][/(.+?)\.(?:ssm\.sst|sst)$/,1] - file_suffix=c['src_filename'][/.+?\.(ssm\.sst|sst)$/,1] - lang=if location =~ /\S+?~(\S\S\S?)$/ - l=location[/\S+?~(\S\S\S?)$/,1] - location=location.gsub(/(\S+?)~\S\S\S?/,'\1') - l=".#{l}" - else '' - end - #% metadata_found_body - if c['tid'].to_i != oldtid.to_i - ti=c['title'] - can_txt_srch=(cgi['view']=~/index/) \ - ? %{<a href="#{@canned_base_url}&fns=#{c['src_filename']}&lang=#{c['language_document_char']}&view=text"><img border="0" width="24" height="16" src="#{@image_src}/b_search.png" alt="search"></a> } - : %{<a href="#{@canned_base_url}&fns=#{c['src_filename']}&lang=#{c['language_document_char']}&view=index"><img border="0" width="24" height="16" src="#{@image_src}/b_search.png" alt="search"></a> } - title=%{<span style="background-color: #{@color_heading}"><a href="#{path_toc(location,c['language_document_char'])}"><img border="0" width="15" height="18" src="#{@image_src}/b_toc.png" alt="toc html"> #{ti}</a></span> [#{c['language_document_char']}] by #{c['creator_author']} <a href="#{path_manifest(location,c['language_document_char'])}"><img border="0" width="15" height="15" src="#{@image_src}/b_info.png" alt="manifest"></a> #{can_txt_srch}<br />} if file_suffix=~/s/ #hmm watch file_suffix - title=@text_search_flag \ - ? '<br /><hr>'+title - : '<br />'+title - @counter_txt_doc+=1 - oldtid=c['tid'].to_i - else title='' - end - if @text_search_flag - if cgi['view']=~/text/ \ - or (cgi['view']!~/index/ and cgi['search'] !~/search db/) #% txt body - text=if c['suffix'] !~/1/ #seg - if @search_for.text1 =~/\S+/ \ - or q['s1'] =~/\S+/ #% only this branch is working !! - unescaped_search=if @search_for.text1 =~/\S+/ - CGI.unescape(@search_for.text1) - elsif q['s1'] =~/\S+/ - CGI.unescape(q['s1']) - else nil - end - @search_regx=if unescaped_search #check - search_regex=[] - build=unescaped_search.scan(/\S+/).each do |g| - (g.to_s =~/(AND|OR)/) \ - ? (search_regex << '|') - : (search_regex << %{#{g.to_s}}) - end - search_regex=search_regex.join(' ') - search_regex=search_regex.gsub(/\s*\|\s*/,'|') - Regexp.new(search_regex, Regexp::IGNORECASE) - else nil - end - else nil - end - matched_para=(@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) \ - ? (c['body'].gsub(/(<a\s+href="https?:\/\/[^><\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{<span style="background-color: #{@color_match}">\\1</span>})) - : c['body'] - %{<hr><p><font size="2">ocn <b><a href="#{path_filename(location,c['seg'],c['language_document_char'])}##{c['ocn']}">#{c['ocn']}</a></b>:</font></p>#{matched_para}} - elsif c['suffix'] =~/1/ #doc - %{#{title}<hr><p><font size="2">ocn #{c['ocn']}:#{c['body']}} - end - @counter_txt_ocn+=1 - output=title+text - else #elsif cgi['view']=~/index/ #% idx body - if c['suffix'] !~/1/ #seg - index=%{<a href="#{path_filename(location,c['seg'],c['language_document_char'])}##{c['ocn']}">#{c['ocn']}</a>, } if @text_search_flag - elsif c['suffix'] =~/1/ #doc #FIX - index=%{<a href="#{path_html_doc(location,c['language_document_char'])}##{c['ocn']}">#{c['ocn']}</a>, } - end - if c['seg'] =~/\S+/ - if @text_search_flag - @counter_txt_ocn+=1 - output=title+index - end - else - @counter_txt_ocn+=1 - output=c['suffix'] !~/1/ \ - ? title+index - : %{#{title}#{c['ocn'].sort}, } - end - end - else output=title - end - @counters_txt=if @counter_txt_doc > 0 - if checked_stats =~/\S/ - @@lt_t=(@counter_txt_ocn==dbi_statement.sql_match_limit.to_i) ? true : false - start=(@@offset.to_i+1).to_s - range=(@@offset.to_i+@counter_txt_ocn.to_i).to_s - %{<hr /><font size="2" color="#666666">Found #{@counter_txt_ocn} times in the main body of #{@counter_txt_doc} documents [ matches #{start} to #{range} ]</font><br />} - else '' - end - else '' - end - @body_main << output #+ details - end - oldtid = 0 - offset=dbi_statement.sql_offset.to_s - limit=dbi_statement.sql_match_limit.to_s - @@lt_t ||=false; @@lt_e ||=false - canned=(@@lt_t or @@lt_e) \ - ? dbi_statement.pre_next(true,@image_src).to_s - : dbi_statement.pre_next(false,@image_src).to_s - limit=dbi_statement.sql_match_limit.to_s - cgi.out{@header.force_encoding("UTF-8") + @counters_txt.force_encoding("UTF-8") + @counters_endn.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @body_main.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @tail.force_encoding("UTF-8")} #% print cgi_output_header+counters+body - end - rescue Exception => e - s='<pre>' + CGI::escapeHTML(e.backtrace.reverse.join("\n")) - s << CGI::escapeHTML(e.message) + '</pre>' - cgi.out{s} - next - ensure # eg. disconnect from server - @conn.disconnect if @conn - end - end diff --git a/util/rb/cgi/sisu_7a_sqlite.cgi b/util/rb/cgi/sisu_7a_sqlite.cgi deleted file mode 100755 index 0fe8ea5..0000000 --- a/util/rb/cgi/sisu_7a_sqlite.cgi +++ /dev/null @@ -1,937 +0,0 @@ -#!/usr/bin/env ruby -=begin - * Name: SiSU information Structuring Universe - * Author: Ralph Amissah - * http://www.jus.uio.no/sisu - * http://www.jus.uio.no/sisu/SiSU/download - - * Description: generates naive cgi search form for search of sisu database (sqlite) - * Name: SiSU generated sample cgi search form - - * Description: generated sample cgi search form for SiSU - (SiSU is a framework for document structuring, publishing and search) - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2014, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see <http://www.gnu.org/licenses/>. - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - <http://www.fsf.org/licenses/gpl.html> - <http://www.gnu.org/licenses/gpl.html> - <http://www.jus.uio.no/sisu/gpl.fsf> - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - <http://www.jus.uio.no/sisu> - <http://www.sisudoc.org> - - * Ralph Amissah - <ralph@amissah.com> - <ralph.amissah@gmail.com> - -=end - begin - require 'cgi' - require 'fcgi' - require 'sqlite3' - rescue LoadError - puts 'cgi, fcgi or sqlite3 NOT FOUND (LoadError)' - end - @stub_default='manual' - @image_src="http://#{ENV['HTTP_HOST']}/_sisu/image_sys" - @hosturl_cgi="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}" - @hosturl_files="http://#{ENV['HTTP_HOST']}" - @output_dir_structure_by='filetype' - @lingual='multi' - @db_name_prefix='SiSU.7a.' - @base="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}#{ENV['SCRIPT_NAME']}" -#Common TOP - @@offset=0 - @@canned_search_url=@base - @color_heading='#DDFFAA' - @color_match='#ffff48' - class Form - def initialize(base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') - search_note='' if checked_searched !~/\S/ - the_can='' if checked_url !~/\S/ - search_field='' if checked_echo !~/\S/ - @base,@search_field,@selected_db,@result_type,@checked_sql_limit,@checked_tip,@checked_stats,@checked_searched,@checked_url,@checked_case,@checked_echo,@checked_sql,@checked_all,@checked_none,@checked_selected,@checked_default,@search_note,@the_can=base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can - @tip=if checked_tip =~/\S/ - '<font size="2" color="#666666">text:__; fulltxt:__; keywords:__; title:__; author:__; topic_register:__; subject:__; description:__; publisher:__; editor:__; contributor:__; date:__; type:__; format:__; identifier:__; source:__; language:__; relation:__; coverage:__; rights:__; comment:__; abstract:__; filename:__;</font><br />' - else '' - end - end - def submission_form - search_form=<<-WOK - <!DOCTYPE html> - <html> - <head> - <title> - <meta charset="utf-8"> - <meta name="sourcefile" content="SiSU._sst" /> - SiSU search form (sample): SiSU information Structuring Universe - </title> - <link rel="generator" href="http://www.jus.uio.no/sisu" /> - <link rel="shortcut icon" href="http://#{ENV['HTTP_HOST']}/_sisu/image_sys/rb7.ico" /> - <link href="../_sisu/css/html.css" rel="stylesheet"> - </head> - <body lang="en" xml:lang="en"> - <table summary="band" border="0" cellpadding="3" cellspacing="0"> - <tr><td width="20%"> - <table summary="home button / home information" border="0" cellpadding="3" cellspacing="0"> - <tr><td align="left"> - <br /><a href="http://sisudoc.org/" target="_top"> - <b>SiSU</b> - </a> - <br /><a href="http://git.sisudoc.org/" target="_top"> - git - </a> - </td></tr> - </table> - </td> - <td> - <label for="find"><b>SiSU (generated sample) search form (content organised by filetype)</b></label> - </td></tr> - </table> - <form action="#{@base}" id="Test Form" method="post"> - <table cellpadding="2"> - <tr><td valign=\"top\"> - <textarea id="find" name="find" type="text" rows="6" cols="40" maxlength="256">#{@search_field}</textarea> - </td> - <td valign=\"top\"> - #{@tip} - #{@search_note} - #{@the_can} - </td></tr></table> - <td valign=\"top\"><tr><td> - <!input type="text" id="find" name="find" value="#{@search_field}" /> - <!input type="text" id="find" name="find" value="" /> - <font size="2" color="#222222"> - <b>to search:</b> select which database to search (drop-down menu below); enter your search query (in the form above); and <b>click on the search button</b> (below) - <br /> - <select name="db" size="1"> - #{@selected_db} - <option value="SiSU.7a.manual">manual</option> - </select> - <input type="submit" value="SiSU search" /> - <input type="radio" name="view" value="index" #{@result_type[:index]}> index - <input type="radio" name="view" value="text" #{@result_type[:text]}> text / grep - <br /> - match limit: - <input type="radio" name="sql_match_limit" value="1000" #{@checked_sql_limit[:l1000]}> 1,000 - <input type="radio" name="sql_match_limit" value="2500" #{@checked_sql_limit[:l2500]}> 2,500 - <br /> - <input type="checkbox" name="echo" #{@checked_echo}> echo query - <input type="checkbox" name="stats" #{@checked_stats}> result stats - <input type="checkbox" name="url" #{@checked_url}> search url - <input type="checkbox" name="searched" #{@checked_searched}> searched - <input type="checkbox" name="tip" #{@checked_tip}> available fields - <input type="checkbox" name="sql" #{@checked_sql}> sql statement - <br /> - checks: - <input type="radio" name="checks" value="check_default" #{@checked_default}> default - <input type="radio" name="checks" value="check_selected" #{@checked_selected}> selected - <input type="radio" name="checks" value="check_all" #{@checked_all}> all - <input type="radio" name="checks" value="check_none" #{@checked_none}> none - </font> - </td></tr> - </table> - </form> - WOK - end - end - class SearchRequest #% search_for - attr_accessor :text1,:fulltext,:keywords,:title,:author,:topic_register,:subject,:description,:publisher,:editor,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:filename - def initialize(search_field='',q='') - @search_field,@q=search_field,q - @text1=@fulltext=@keywords=@title=@author=@topic_register=@subject=@description=@publisher=@editor=@contributor=@date=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@comment=@abstract=@owner=@date_created=@date_issued=@date_modified=@date_available=@date_valid=@filename='' - if @search_field=~/\S/ - @text1=text_to_match('text:') - @fulltext=text_to_match('fulltxt:') - @topic_register=text_to_match('topic_register:') - @title=text_to_match('title:') # DublinCore 1 - title - @author=text_to_match('(?:author|creator)s?:') # DublinCore 2 - creator/author - @subject=text_to_match('subj(?:ect)?:') # DublinCore 3 - subject - @description=text_to_match('description:') # DublinCore 4 - description - @publisher=text_to_match('pub(?:lisher)?:') # DublinCore 5 - publisher - @editor=text_to_match('editor:') - @contributor=text_to_match('contributor:') # DublinCore 6 - contributor - @date=text_to_match('date:') # DublinCore 7 - date dd-mm-yy - @type=text_to_match('type:') # DublinCore 8 - type - @format=text_to_match('format:') # DublinCore 9 - format - @identifier=text_to_match('identifier:') # DublinCore 10 - identifier - @source=text_to_match('source:') # DublinCore 11 - source - @language=text_to_match('language:') # DublinCore 12 - language - @relation=text_to_match('relation:') # DublinCore 13 - relation - @coverage=text_to_match('coverage:') # DublinCore 14 - coverage - @rights=text_to_match('rights:') # DublinCore 15 - rights - @keywords=text_to_match('key(?:words?)?:') - @comment=text_to_match('comment:') - @abstract=text_to_match('abs(?:tract)?:') - @owner=text_to_match('owner:') - @date_created=text_to_match('date_created:') - @date_issued=text_to_match('date_issued:') - @date_modified=text_to_match('date_modified:') - @date_available=text_to_match('date_available:') - @date_valid=text_to_match('date_valid:') - @filename=text_to_match('filename:') - @text1=text_to_match unless @keywords or @author or @title or @text1 or @fulltext or @comment or @abstract or @rights or @subject or @publisher or @date or @filename or @topic_register - else - @text1=q['s1'] if q['s1']=~/\S/ - @fulltext=q['ft'] if q['ft']=~/\S/ - @keywords=q['key'] if q['key']=~/\S/ - @title=q['ti'] if q['ti']=~/\S/ - @author=q['au'] if q['au']=~/\S/ - @topic_register=q['tr'] if q['tr']=~/\S/ - @subject=q['sj'] if q['sj']=~/\S/ - @description=q['dsc'] if q['dsc']=~/\S/ - @publisher=q['pb'] if q['pb']=~/\S/ - @editor=q['cntr'] if q['cntr']=~/\S/ - @contributor=q['cntr'] if q['cntr']=~/\S/ - @date=q['dt'] if q['dt']=~/\S/ - @type=q['ty'] if q['ty']=~/\S/ - @identifier=q['id'] if q['id']=~/\S/ - @source=q['src'] if q['src']=~/\S/ - @language=q['lang'] if q['lang']=~/\S/ - @relation=q['rel'] if q['rel']=~/\S/ - @coverage=q['cov'] if q['cov']=~/\S/ - @rights=q['cr'] if q['cr']=~/\S/ - @comment=q['co'] if q['co']=~/\S/ - @abstract=q['ab'] if q['ab']=~/\S/ - @date_created=q['dtc'] if q['dtc']=~/\S/ - @date_issued=q['dti'] if q['dti']=~/\S/ - @date_modified=q['dtm'] if q['dtm']=~/\S/ - @date_available=q['dta'] if q['dta']=~/\S/ - @date_valid=q['dtv'] if q['dtv']=~/\S/ - @filename=if q['doc'] and q['search'] !~/search db/ then q['doc'] - elsif q['fns']=~/\S/ then q['fns'] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - end - end - def text_to_match(identifier='') - m={ - string: /#{identifier}\s*(.+?)/, - string: /#{identifier}\s*(.+?)(?:;|\n|\r|$)/, - word: /#{identifier}[\s(]*(\S+)/ - } - search_string=if @search_field =~m[:word] - search_string=if @search_field =~m[:braces] then m[:braces].match(@search_field)[1] - elsif @search_field =~m[:string] then m[:string].match(@search_field)[1] - else - str=m[:word].match(@search_field)[1] - str=str.gsub(/[()]/,'') - str - end - search_string=search_string.strip.gsub(/\s+/,'+') - #else - # "__" - end - end - end - class DBI_SearchString - def initialize(l,t,q,cse=false) - @l,@t,@q=l,t,q - end - def string - search={ search: [], flag: false } - if @t =~/\S+/ or @q =~/\S+/ - if @t =~/\S+/ then unescaped_search=CGI.unescape(@t) - elsif @q =~/\S+/ then unescaped_search=CGI.unescape(@q) - end - search_construct=[] - unescaped_search=unescaped_search.gsub(/\s*(AND|OR)\s*/,"%' \) \\1 #{@l} LIKE \( '%"). - gsub(/(.+)/,"#{@l} LIKE \( '%\\1%' \)") - search_construct << unescaped_search - search_construct=search_construct.join(' ') - search[:search] << search_construct - search[:flag]=true - search - end - search - end - end - class DBI_SearchStatement - attr_reader :text_search_flag,:sql_select_body_format,:sql_offset,:sql_limit - def initialize(conn,search_for,q,c) - @conn=conn - @text_search_flag=false - @sql_statement={ body: '', endnotes: '', range: '' } - #@offset||=@@offset - #@offset+=@@limit - search={ text: [], endnotes: [] } - cse=(c =~/\S/) ? true : false - st=DBI_SearchString.new('doc_objects.clean',search_for.text1,q['s1'],cse).string - se=DBI_SearchString.new('endnotes.clean',search_for.text1,q['s1'],cse).string - @text_search_flag=st[:flag] - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.fulltext',search_for.fulltext,q['ft'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.title',search_for.title,q['ti'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_author',search_for.author,q['au'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_topic_register',search_for.topic_register,q['tr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_subject',search_for.subject,q['sj'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_keywords',search_for.keywords,q['key'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_description',search_for.description,q['dsc'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.publisher',search_for.publisher,q['pb'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_editor',search_for.editor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_contributor',search_for.contributor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.date_published',search_for.date,q['dt'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_type',search_for.type,q['ty'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.original_source',search_for.source,q['src'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.language_document_char',search_for.language,q['lang'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_relation',search_for.relation,q['rel'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_coverage',search_for.coverage,q['cov'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.rights_all',search_for.rights,q['cr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_comment',search_for.comment,q['co'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_abstract',search_for.abstract,q['ab'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.src_filename',search_for.filename,q['fns'],cse).string - if st[:flag] - search[:text] << st[:search] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - @search_text='' - @search_text=search[:text].flatten.join(' AND ') - @search_text=@search_text.gsub(/(doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\)\s+(?:(?:AND|OR)\s+doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\))+)/,'(\1)') - end - def sql_offset - @@offset - end - def sql_match_limit - @@limit - end - def sql_canned_search - @offset_next=sql_offset.to_i + sql_match_limit.to_i - @offset_previous=sql_offset.to_i - sql_match_limit.to_i - def current - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + sql_offset.to_s - end - def next - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_next.to_s - end - def previous - @offset_previous >= 0 \ - ? (@@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_previous.to_s) - : '' - end - def start - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + 0.to_s - end - self - end - def pre_next(beyond_limit,img) - can=sql_canned_search - page=(sql_offset.to_i + sql_match_limit.to_i)/sql_match_limit.to_i - if beyond_limit - if page.to_s =~ /^1$/ - %{<br /><center> - pg. #{page.to_s} - <a href="#{can.next}"> - <img border="0" width="22" height="22" src="#{img}/arrow_next_red.png" alt=" >>" /> - </a> - </center>} - elsif page.to_s =~ /^2$/ - %{<br /><center> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - <a href="#{can.next}"> - <img border="0" width="22" height="22" src="#{img}/arrow_next_red.png" alt=" >>" /> - </a> - </center>} - else - %{<br /><center> - <a href="#{can.start}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="|< " /> - </a> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - <a href="#{can.next}"> - <img border="0" width="22" height="22" src="#{img}/arrow_next_red.png" alt=" >>" /> - </a> - </center>} - end - else - if page.to_s =~ /^1$/ then '' - elsif page.to_s =~ /^2$/ - %{<br /><center> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - </center>} - else - %{<br /><center> - <a href="#{can.start}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="|< " /> - </a> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - </center>} - end - end - end - def sql_select_body - limit ||=@@limit - offset ||=@@offset - @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, metadata_and_text.language_document_char, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE #{@search_text} AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.language_document_char, metadata_and_text.title, metadata_and_text.src_filename, doc_objects.ocn} - @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} - select=@sql_statement[:body] + ' ' + @sql_statement[:range] - select - end - def sql_select_body_format - %{<font color="#666666" size="2">#{sql_select_body}</font>} - end - def contents - @conn.execute(sql_select_body) - end - end - def tail - <<-'WOK' - <br /><hr /><br /> -<table summary="SiSU summary" cellpadding="2" border="0"> - <!-- widget sisu --> -<tr><td valign="top" width="10%"> - <table summary="home button / home information" border="0" cellpadding="3" cellspacing="0"> - <tr><td align="left"> - <br /><a href="http://sisudoc.org/" target="_top"> - <b>SiSU</b> - </a> - <br /><a href="http://git.sisudoc.org/" target="_top"> - git - </a> - </td></tr> - </table> -</td> -<td valign="top" width="45%"> -<!-- SiSU Rights --> - <p class="tiny_left"><font color="#666666" size="2"> - Generated by - SiSU 6.3.1 2014-10-19 (2014w41/7) - <br /> - <a href="http://www.sisudoc.org" > - <b>SiSU</b></a> <sup>©</sup> Ralph Amissah - 1993, current 2014. - All Rights Reserved. - <br /> - SiSU is software for document structuring, publishing and search, - <br /> - <a href="http://www.jus.uio.no/sisu" > - www.jus.uio.no/sisu - </a> - and - <a href="http://www.sisudoc.org" > - www.sisudoc.org - </a> - sources - <a href="http://git.sisudoc.org" > - git.sisudoc.org - </a> - <br /> - <i>w3 since October 3 1993</i> - <a href="mailto:ralph@amissah.com" > - ralph@amissah.com - </a> - <br /> - mailing list subscription - <a href="http://lists.sisudoc.org/listinfo/sisu" > - http://lists.sisudoc.org/listinfo/sisu - </a> - <br /> - <a href="mailto:sisu@lists.sisudoc.org" > - sisu@lists.sisudoc.org - </a> - </font></p> -</td><td valign="top" width="45%"> - <p class="tiny_left"><font color="#666666" size="2"> - SiSU using: - <br />Standard SiSU markup syntax, - <br />Standard SiSU meta-markup syntax, and the - <br />Standard SiSU <u>object citation numbering</u> and system, (object/text identifying/locating system) - <br /> - <sup>©</sup> Ralph Amissah 1997, current 2014. - All Rights Reserved. - </font></p> -</td></tr> - <!-- widget way better --> -<tr><td valign="top" width="10%"> - <p class="tiny_left"><font color="#666666" size="2"> - <a href="http://www.gnu.org/licenses/gpl.html"> - .: - </a> - </font></p> -</td><td valign="top" width="45%"> - <p class="tiny_left"><font color="#666666" size="2"> - SiSU is released under - <a href="http://www.gnu.org/licenses/gpl.html">GPL v3</a> - or later, - <a href="http://www.gnu.org/licenses/gpl.html"> - http://www.gnu.org/licenses/gpl.html - </a> - </font></p> -</td><td valign="top" width="45%"> - <p class="tiny_left"><font color="#666666" size="2"> - SiSU, developed using - <a href="http://www.ruby-lang.org/en/"> - Ruby - </a> - on - <a href="http://www.debian.org/"> - Debian/Gnu/Linux - </a> - software infrastructure, - with the usual GPL (or OSS) suspects. - </font></p> -</td></tr> -</table> - <a name="bottom" id="bottom"></a><a name="down" id="down"></a><a name="end" id="end"></a><a name="finish" id="finish"></a><a name="stop" id="stop"></a><a name="credits" id="credits"></a> - </body></html> - WOK - end - @tail=tail - @counter_txt_doc,@counter_txt_ocn,@counter_endn_doc,@counter_endn_ocn=0,0,0,0 - @counters_txt,@counters_endn,@sql_select_body='','','' - FCGI.each_cgi do |cgi| - begin # all code goes in begin section - @search={ text: [], endnotes: [] } - q=CGI.new - @db=if cgi['db'] =~ /#{@db_name_prefix}(\S+)/ - @stub=$1 - cgi['db'] - else - @stub=@stub_default - @db_name_prefix + @stub - end - checked_url,checked_stats,checked_searched,checked_tip,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,selected_db='','','','','','','','','' - result_type=(cgi['view']=~/text/) \ - ? result_type={ index: '', text: 'checked'} - : result_type={ index: 'checked', text: ''} - @@limit=if cgi['sql_match_limit'].to_s=~/2500/ - checked_sql_limit={ l1000: '', l2500: 'checked'} - '2500' - else - checked_sql_limit={ l1000: 'checked', l2500: ''} - '1000' - end - checked_echo='checked' if cgi['echo'] =~/\S/ - checked_stats='checked' if cgi['stats'] =~/\S/ - checked_url='checked' if cgi['url'] =~/\S/ or cgi['u'].to_i==1 - checked_searched='checked' if cgi['searched'] =~/\S/ - checked_tip='checked' if cgi['tip'] =~/\S/ - checked_case='checked' if cgi['casesense'] =~/\S/ - checked_sql='checked' if cgi['sql'] =~/\S/ - if cgi['checks'] =~/check_all/ or cgi['check_all'] =~/\S/ or cgi['a'].to_i==1 - checked_all='checked' - checked_echo=checked_stats=checked_url=checked_searched=checked_tip=checked_sql='checked' - checked_none='' - elsif cgi['checks'] =~/check_none/ - checked_none='checked' - checked_all=checked_url=checked_stats=checked_searched=checked_tip=checked_echo=checked_sql='' - elsif cgi['checks'] =~/check_selected/ - checked_selected='checked' - elsif cgi['checks'] =~/check_default/ - checked_default='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - else - checked_selected='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - end - selected_db=case cgi['db'] - when /SiSU.7a.manual/ then '<option value="SiSU.7a.manual">manual</option>' - end - db_name='sisu_sqlite.db' - db_sqlite=case cgi['db'] - when /SiSU.7a.manual/ then "/srv/complete.sisudoc.org/web/manual/#{db_name}" - else "/srv/complete.sisudoc.org/web/manual/#{db_name}" - end - @conn=SQLite3::Database.new(db_sqlite) - @conn.results_as_hash=true - search_field=cgi['find'] if cgi['find'] # =~/\S+/ - @search_for=SearchRequest.new(search_field,q) #.analyze #% search_for - #% searches - #Canned_search.new(@base,@search_for.text1,cgi) - if @search_for.text1=~/\S+/ or @search_for.fulltext=~/\S+/ or @search_for.author=~/\S+/ or @search_for.topic_register=~/\S+/ #and search_field =~/\S/ - s1='s1=' + CGI.escape(@search_for.text1) if @search_for.text1=~/\S/ - ft='&ft=' + CGI.escape(@search_for.fulltext) if @search_for.fulltext=~/\S/ - key='key=' + CGI.escape(@search_for.keywords) if @search_for.keywords=~/\S/ - ti='&ti=' + CGI.escape(@search_for.title) if @search_for.title=~/\S/ - au='&au=' + CGI.escape(@search_for.author) if @search_for.author=~/\S/ - tr='&tr=' + CGI.escape(@search_for.topic_register) if @search_for.topic_register=~/\S/ - sj='&sj=' + CGI.escape(@search_for.subject) if @search_for.subject=~/\S/ - dsc='&dsc=' + CGI.escape(@search_for.description) if @search_for.description=~/\S/ - pb='&pb=' + CGI.escape(@search_for.publisher) if @search_for.publisher=~/\S/ - edt='&edt=' + CGI.escape(@search_for.editor) if @search_for.editor=~/\S/ - cntr='&cntr=' + CGI.escape(@search_for.contributor) if @search_for.contributor=~/\S/ - dt='&dt=' + CGI.escape(@search_for.date) if @search_for.date=~/\S/ - ty='&ty=' + CGI.escape(@search_for.type) if @search_for.type=~/\S/ - id='&id=' + CGI.escape(@search_for.identifier) if @search_for.identifier=~/\S/ - src='&src=' + CGI.escape(@search_for.source) if @search_for.source=~/\S/ - lang='&lang=' + CGI.escape(@search_for.language) if @search_for.language=~/\S/ - rel='&rel=' + CGI.escape(@search_for.relation) if @search_for.relation=~/\S/ - cov='&cov=' + CGI.escape(@search_for.coverage) if @search_for.coverage=~/\S/ - cr='&cr=' + CGI.escape(@search_for.rights) if @search_for.rights=~/\S/ - co='&co=' + CGI.escape(@search_for.comment) if @search_for.comment=~/\S/ - ab='&ab=' + CGI.escape(@search_for.abstract) if @search_for.abstract=~/\S/ - dtc='&dtc=' + CGI.escape(@search_for.date_created) if @search_for.date_created=~/\S/ - dti='&dti=' + CGI.escape(@search_for.date_issued) if @search_for.date_issued=~/\S/ - dtm='&dtm=' + CGI.escape(@search_for.date_modified) if @search_for.date_modified=~/\S/ - dta='&dta=' + CGI.escape(@search_for.date_available) if @search_for.date_available=~/\S/ - dtv='&dtv=' + CGI.escape(@search_for.date_valid) if @search_for.date_valid=~/\S/ - fns='&fns=' + CGI.escape(@search_for.filename) if @search_for.filename=~/\S/ - @@canned_search_url=(checked_all =~/checked/) \ - ? "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" - : "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" - mod=ft=~/\S+/ ? (ft.gsub(/ft/,'s1')) : s1 - @canned_base_url="#{@base}?#{mod}&db=#{cgi['db']}" - if checked_case=~/\S/ - @search[:text][1]=%{doc_objects.clean~'#{@search_for.text1}'} #s1 - else - @search[:text][1]=%{doc_objects.clean~*'#{@search_for.text1}'} #s1 - end - canned_note='search url:' - else - @@canned_search_url="#{@base}?db=#{@db}&view=index" - canned_note='search url example:' - end - if search_field =~/\S+/ - analyze_format=search_field.gsub(/\s*\n/,'; ') - elsif checked_all =~/checked/ or checked_url =~/checked/ - canned_search=@@canned_search_url.scan(/(?:s1|ft|au|ti|fns|tr)=[^&]+/) - af=canned_search.join('; ') - af=af.gsub(/s1=/,'text: '). - gsub(/ft=/,'fulltxt: '). - gsub(/au=/,'author: '). - gsub(/ti=/,'title: '). - gsub(/fns=/,'filename: '). - gsub(/tr=/,'topic_register: '). - gsub(/%2B/,' ') - analyze_format=af - st=af.split(/\s*;\s*/) - search_field=st.join("\n") - end - green=%{<font size="2" color="#004000">} - canned_search_url_txt=CGI.escapeHTML(@@canned_search_url) - the_can=%{<font size="2" color="#666666">#{canned_note} <a href="#{@@canned_search_url}">#{canned_search_url_txt}</a></font><br />} - p_text=p_fulltext=p_keywords=p_title=p_author=p_topic_register=p_subject=p_description=p_publisher=p_editor=p_contributor=p_date=p_type=p_format=p_identifier=p_source=p_language=p_relation=p_coverage=p_rights=p_comment=p_abstract=p_filename='' - p_filename=%{filename: #{green}#{@search_for.filename}</font><br />} if @search_for.filename =~/\S+/ - p_text=%{text: #{green}#{@search_for.text1}</font><br />} if @search_for.text1 =~/\S+/ - p_fulltext=%{fulltxt: #{green}#{@search_for.fulltext}</font><br />} if @search_for.fulltext =~/\S+/ - p_title=%{title: #{green}#{@search_for.title}</font><br />} if @search_for.title =~/\S+/ - p_author=%{author: #{green}#{@search_for.author}</font><br />} if @search_for.author =~/\S+/ - p_editor=%{editor: #{green}#{@search_for.editor}</font><br />} if @search_for.editor=~/\S+/ - p_contributor=%{contributor: #{green}#{@search_for.contributor}</font><br />} if @search_for.contributor =~/\S+/ - p_date=%{date: #{green}#{@search_for.date}</font><br />} if @search_for.date =~/\S+/ - p_rights=%{rights: #{green}#{@search_for.rights}</font><br />} if @search_for.rights =~/\S+/ - p_topic_register=%{topic_register: #{green}#{@search_for.topic_register}</font><br />} if @search_for.topic_register =~/\S+/ - p_subject=%{subject: #{green}#{@search_for.subject}</font><br />} if @search_for.subject =~/\S+/ - p_keywords=%{keywords: #{green}#{@search_for.keywords}</font><br />} if @search_for.keywords =~/\S+/ - p_identifier=%{identifier: #{green}#{@search_for.identifier}</font><br />} if @search_for.identifier =~/\S+/ - p_type=%{type: #{green}#{@search_for.type}</font><br />} if @search_for.type =~/\S+/ - p_format=%{format: #{green}#{@search_for.format}</font><br />} if @search_for.format =~/\S+/ - p_relation=%{relation: #{green}#{@search_for.relation}</font><br />} if @search_for.relation =~/\S+/ - p_coverage=%{coverage: #{green}#{@search_for.coverage}</font><br />} if @search_for.coverage =~/\S+/ - p_description=%{description: #{green}#{@search_for.description}</font><br />} if @search_for.description =~/\S+/ - p_abstract=%{abstract: #{green}#{@search_for.abstract}</font><br />} if @search_for.abstract =~/\S+/ - p_comment=%{comment: #{green}#{@search_for.comment}</font><br />} if @search_for.comment =~/\S+/ - p_publisher=%{publisher: #{green}#{@search_for.publisher}</font><br />} if @search_for.publisher =~/\S+/ - p_source=%{source: #{green}#{@search_for.source}</font><br />} if @search_for.source =~/\S+/ - p_language=%{language: #{green}#{@search_for.language}</font><br />} if @search_for.language =~/\S+/ - search_note=<<-WOK - <font size="2" color="#666666"> - <b>database:</b> #{green}#{@db}</font>; <b>selected view:</b> #{green}#{cgi['view']}</font> - <b>search string:</b> "#{green}#{analyze_format}</font>"<br /> - #{p_text} #{p_fulltext} #{p_keywords} #{p_title} #{p_author} #{p_topic_register} #{p_subject} #{p_description} #{p_publisher} #{p_editor} #{p_contributor} #{p_date} #{p_type} #{p_format} #{p_identifier} #{p_source} #{p_language} #{p_relation} #{p_coverage} #{p_rights} #{p_comment} #{p_abstract} #{p_filename} - </font> - WOK - #eg = %{canned search e.g.:<br /> <a href="#{url}">#{url}</a><br />find: #{analyze}<br />database: #{database}} - #% dbi_canning - @header=Form.new(@base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form - unless q['s1'] =~/\S/ or q['au'] =~/\S/ or @search[:text][1] =~/\S/ - print "Content-type: text/html\n\n" - puts (@header+@tail) - else #% searches - s1=(@search_for.text1 =~/\S/) \ - ? @search_for.text1 - : 'Unavailable' - if checked_case=~/\S/ - @search[:text]<<%{doc_objects.clean~'#{CGI.unescape(s1)}'} - else - @search[:text]<<%{doc_objects.clean~*'#{CGI.unescape(s1)}'} - end - #% dbi_request - dbi_statement=DBI_SearchStatement.new(@conn,@search_for,q,checked_case) - @text_search_flag=false - @text_search_flag=dbi_statement.text_search_flag - s_contents=dbi_statement.contents - @body_main='' - @search_regx=nil - oldtid=0 - if @text_search_flag - if checked_sql =~/\S/ - sql_select_body=dbi_statement.sql_select_body_format - else sql_select_body='' - end - @body_main << sql_select_body - #@body_main << '<p><hr><br /><b>Main Text:</b><br />' << sql_select_body - else - end - @hostpath="#{@hosturl_files}/#{@stub}" - def path_manifest(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{@hostpath}/#{fn}/sisu_manifest.html" - : "#{@hostpath}/#{fn}/sisu_manifest.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/manifest/#{fn}.html" - : "#{@hostpath}/manifest/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/manifest/#{fn}.html" - end - end - def path_html_seg(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - "#{@hostpath}/#{fn}" - when 'filetype' - "#{@hostpath}/html/#{fn}" - else - "#{@hostpath}/#{ln}/html/#{fn}" - end - end - def path_toc(fn,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/toc.html" - : "#{path_html_seg(fn,ln)}/toc.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/toc.html" - end - end - def path_filename(fn,seg,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/#{seg}.html" - : "#{path_html_seg(fn,ln)}/#{seg}.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/#{seg}.html" - end - end - def path_html_doc(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/scroll.html" - : "#{path_html_seg(fn,ln)}/scroll.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/html/#{fn}.html" - : "#{@hostpath}/html/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/html/#{fn}.html" - end - end - #% text_objects_body - s_contents.each do |c| #% text body - location=c['src_filename'][/(.+?)\.(?:ssm\.sst|sst)$/,1] - file_suffix=c['src_filename'][/.+?\.(ssm\.sst|sst)$/,1] - lang=if location =~ /\S+?~(\S\S\S?)$/ - l=location[/\S+?~(\S\S\S?)$/,1] - location=location.gsub(/(\S+?)~\S\S\S?/,'\1') - l=".#{l}" - else '' - end - #% metadata_found_body - if c['tid'].to_i != oldtid.to_i - ti=c['title'] - can_txt_srch=(cgi['view']=~/index/) \ - ? %{<a href="#{@canned_base_url}&fns=#{c['src_filename']}&lang=#{c['language_document_char']}&view=text"><img border="0" width="24" height="16" src="#{@image_src}/b_search.png" alt="search"></a> } - : %{<a href="#{@canned_base_url}&fns=#{c['src_filename']}&lang=#{c['language_document_char']}&view=index"><img border="0" width="24" height="16" src="#{@image_src}/b_search.png" alt="search"></a> } - title=%{<span style="background-color: #{@color_heading}"><a href="#{path_toc(location,c['language_document_char'])}"><img border="0" width="15" height="18" src="#{@image_src}/b_toc.png" alt="toc html"> #{ti}</a></span> [#{c['language_document_char']}] by #{c['creator_author']} <a href="#{path_manifest(location,c['language_document_char'])}"><img border="0" width="15" height="15" src="#{@image_src}/b_info.png" alt="manifest"></a> #{can_txt_srch}<br />} if file_suffix=~/s/ #hmm watch file_suffix - title=@text_search_flag \ - ? '<br /><hr>'+title - : '<br />'+title - @counter_txt_doc+=1 - oldtid=c['tid'].to_i - else title='' - end - if @text_search_flag - if cgi['view']=~/text/ \ - or (cgi['view']!~/index/ and cgi['search'] !~/search db/) #% txt body - text=if c['suffix'] !~/1/ #seg - if @search_for.text1 =~/\S+/ \ - or q['s1'] =~/\S+/ #% only this branch is working !! - unescaped_search=if @search_for.text1 =~/\S+/ - CGI.unescape(@search_for.text1) - elsif q['s1'] =~/\S+/ - CGI.unescape(q['s1']) - else nil - end - @search_regx=if unescaped_search #check - search_regex=[] - build=unescaped_search.scan(/\S+/).each do |g| - (g.to_s =~/(AND|OR)/) \ - ? (search_regex << '|') - : (search_regex << %{#{g.to_s}}) - end - search_regex=search_regex.join(' ') - search_regex=search_regex.gsub(/\s*\|\s*/,'|') - Regexp.new(search_regex, Regexp::IGNORECASE) - else nil - end - else nil - end - matched_para=(@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) \ - ? (c['body'].gsub(/(<a\s+href="https?:\/\/[^><\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{<span style="background-color: #{@color_match}">\\1</span>})) - : c['body'] - %{<hr><p><font size="2">ocn <b><a href="#{path_filename(location,c['seg'],c['language_document_char'])}##{c['ocn']}">#{c['ocn']}</a></b>:</font></p>#{matched_para}} - elsif c['suffix'] =~/1/ #doc - %{#{title}<hr><p><font size="2">ocn #{c['ocn']}:#{c['body']}} - end - @counter_txt_ocn+=1 - output=title+text - else #elsif cgi['view']=~/index/ #% idx body - if c['suffix'] !~/1/ #seg - index=%{<a href="#{path_filename(location,c['seg'],c['language_document_char'])}##{c['ocn']}">#{c['ocn']}</a>, } if @text_search_flag - elsif c['suffix'] =~/1/ #doc #FIX - index=%{<a href="#{path_html_doc(location,c['language_document_char'])}##{c['ocn']}">#{c['ocn']}</a>, } - end - if c['seg'] =~/\S+/ - if @text_search_flag - @counter_txt_ocn+=1 - output=title+index - end - else - @counter_txt_ocn+=1 - output=c['suffix'] !~/1/ \ - ? title+index - : %{#{title}#{c['ocn'].sort}, } - end - end - else output=title - end - @counters_txt=if @counter_txt_doc > 0 - if checked_stats =~/\S/ - @@lt_t=(@counter_txt_ocn==dbi_statement.sql_match_limit.to_i) ? true : false - start=(@@offset.to_i+1).to_s - range=(@@offset.to_i+@counter_txt_ocn.to_i).to_s - %{<hr /><font size="2" color="#666666">Found #{@counter_txt_ocn} times in the main body of #{@counter_txt_doc} documents [ matches #{start} to #{range} ]</font><br />} - else '' - end - else '' - end - @body_main << output #+ details - end - oldtid = 0 - offset=dbi_statement.sql_offset.to_s - limit=dbi_statement.sql_match_limit.to_s - @@lt_t ||=false; @@lt_e ||=false - canned=(@@lt_t or @@lt_e) \ - ? dbi_statement.pre_next(true,@image_src).to_s - : dbi_statement.pre_next(false,@image_src).to_s - limit=dbi_statement.sql_match_limit.to_s - cgi.out{@header.force_encoding("UTF-8") + @counters_txt.force_encoding("UTF-8") + @counters_endn.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @body_main.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @tail.force_encoding("UTF-8")} #% print cgi_output_header+counters+body - end - rescue Exception => e - s='<pre>' + CGI::escapeHTML(e.backtrace.reverse.join("\n")) - s << CGI::escapeHTML(e.message) + '</pre>' - cgi.out{s} - next - ensure # eg. disconnect from server - @conn.disconnect if @conn - end - end diff --git a/util/rb/cgi/sisu_lng.cgi b/util/rb/cgi/sisu_lng.cgi deleted file mode 100755 index 5e07a16..0000000 --- a/util/rb/cgi/sisu_lng.cgi +++ /dev/null @@ -1,935 +0,0 @@ -#!/usr/bin/env ruby -=begin - * Name: SiSU information Structuring Universe - * Author: Ralph Amissah - * http://www.jus.uio.no/sisu - * http://www.jus.uio.no/sisu/SiSU/download - - * Description: generates naive cgi search form for search of sisu database (pgsql) - * Name: SiSU generated sample cgi search form - - * Description: generated sample cgi search form for SiSU - (SiSU is a framework for document structuring, publishing and search) - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2015, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see <http://www.gnu.org/licenses/>. - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - <http://www.fsf.org/licenses/gpl.html> - <http://www.gnu.org/licenses/gpl.html> - <http://www.jus.uio.no/sisu/gpl.fsf> - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - <http://www.jus.uio.no/sisu> - <http://www.sisudoc.org> - - * Ralph Amissah - <ralph@amissah.com> - <ralph.amissah@gmail.com> - -=end - begin - require 'cgi' - require 'fcgi' - require 'pg' - rescue LoadError - puts 'cgi, fcgi or pg NOT FOUND (LoadError)' - end - @stub_default='samples_by_language' - @image_src="http://#{ENV['HTTP_HOST']}/_sisu/image_sys" - @hosturl_cgi="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}" - @hosturl_files="http://#{ENV['HTTP_HOST']}" - @output_dir_structure_by='language' - @lingual='multi' - @port='5432' - @db_name_prefix='SiSU.7a.' - @user='www-data' # check user name for access to pg database: e.g. www-data or 'ralph' - @base="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}#{ENV['SCRIPT_NAME']}" -#Common TOP - @@offset=0 - @@canned_search_url=@base - @color_heading='#DDFFAA' - @color_match='#ffff48' - class Form - def initialize(base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') - search_note='' if checked_searched !~/\S/ - the_can='' if checked_url !~/\S/ - search_field='' if checked_echo !~/\S/ - @base,@search_field,@selected_db,@result_type,@checked_sql_limit,@checked_tip,@checked_stats,@checked_searched,@checked_url,@checked_case,@checked_echo,@checked_sql,@checked_all,@checked_none,@checked_selected,@checked_default,@search_note,@the_can=base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can - @tip=if checked_tip =~/\S/ - '<font size="2" color="#666666">text:__; fulltxt:__; keywords:__; title:__; author:__; topic_register:__; subject:__; description:__; publisher:__; editor:__; contributor:__; date:__; type:__; format:__; identifier:__; source:__; language:__; relation:__; coverage:__; rights:__; comment:__; abstract:__; filename:__;</font><br>' - else '' - end - end - def submission_form - search_form=<<-WOK - <!DOCTYPE html> - <html> - <head> - <title> - <meta charset="utf-8"> - <meta name="sourcefile" content="SiSU._sst" /> - SiSU search form (sample): SiSU information Structuring Universe - </title> - <link rel="generator" href="http://www.jus.uio.no/sisu" /> - <link rel="shortcut icon" href="http://#{ENV['HTTP_HOST']}/_sisu/image_sys/rb7.ico" /> - <link href="../_sisu/css/html.css" rel="stylesheet"> - </head> - <body lang="en" xml:lang="en"> - <table summary="band" border="0" cellpadding="3" cellspacing="0"> - <tr><td width="20%"> - <table summary="home button / home information" border="0" cellpadding="3" cellspacing="0"> - <tr><td align="left"> - <br><a href="http://sisudoc.org/" target="_top"> - <b>SiSU</b> - </a> - <br><a href="http://git.sisudoc.org/gitweb/" target="_top"> - git - </a> - </td></tr> - </table> - </td> - <td> - <label for="find"><b>SiSU search form (sample) (content organised by language)</b></label> - </td></tr> - </table> - <form action="#{@base}" id="Test Form" method="post"> - <table cellpadding="2"> - <tr><td valign=\"top\"> - <textarea id="find" name="find" type="text" rows="6" cols="40" maxlength="256">#{@search_field}</textarea> - </td> - <td valign=\"top\"> - #{@tip} - #{@search_note} - #{@the_can} - </td></tr></table> - <td valign=\"top\"><tr><td> - <!input type="text" id="find" name="find" value="#{@search_field}" /> - <!input type="text" id="find" name="find" value="" /> - <font size="2" color="#222222"> - <b>to search:</b> select which database to search (drop-down menu below); enter your search query (in the form above); and <b>click on the search button</b> (below) - <br> - <select name="db" size="1"> - #{@selected_db} - <option value="SiSU.7a.samples_by_language">samples_by_language</option> - <option value="SiSUv6c_manual">manual</option> - </select> - <input type="submit" value="SiSU search" /> - <input type="radio" name="view" value="index" #{@result_type[:index]}> index - <input type="radio" name="view" value="text" #{@result_type[:text]}> text / grep - <input type="checkbox" name="casesense" #{@checked_case}> case sensitive - <br> - match limit: - <input type="radio" name="sql_match_limit" value="1000" #{@checked_sql_limit[:l1000]}> 1,000 - <input type="radio" name="sql_match_limit" value="2500" #{@checked_sql_limit[:l2500]}> 2,500 - <br> - <input type="checkbox" name="echo" #{@checked_echo}> echo query - <input type="checkbox" name="stats" #{@checked_stats}> result stats - <input type="checkbox" name="url" #{@checked_url}> search url - <input type="checkbox" name="searched" #{@checked_searched}> searched - <input type="checkbox" name="tip" #{@checked_tip}> available fields - <input type="checkbox" name="sql" #{@checked_sql}> sql statement - <br> - checks: - <input type="radio" name="checks" value="check_default" #{@checked_default}> default - <input type="radio" name="checks" value="check_selected" #{@checked_selected}> selected - <input type="radio" name="checks" value="check_all" #{@checked_all}> all - <input type="radio" name="checks" value="check_none" #{@checked_none}> none - </font> - </td></tr> - </table> - </form> - WOK - end - end - class SearchRequest #% search_for - attr_accessor :text1,:fulltext,:keywords,:title,:author,:topic_register,:subject,:description,:publisher,:editor,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:filename - def initialize(search_field='',q='') - @search_field,@q=search_field,q - @text1=@fulltext=@keywords=@title=@author=@topic_register=@subject=@description=@publisher=@editor=@contributor=@date=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@comment=@abstract=@owner=@date_created=@date_issued=@date_modified=@date_available=@date_valid=@filename='' - if @search_field=~/\S/ - @text1=text_to_match('text:') - @fulltext=text_to_match('fulltxt:') - @topic_register=text_to_match('topic_register:') - @title=text_to_match('title:') # DublinCore 1 - title - @author=text_to_match('(?:author|creator)s?:') # DublinCore 2 - creator/author - @subject=text_to_match('subj(?:ect)?:') # DublinCore 3 - subject - @description=text_to_match('description:') # DublinCore 4 - description - @publisher=text_to_match('pub(?:lisher)?:') # DublinCore 5 - publisher - @editor=text_to_match('editor:') - @contributor=text_to_match('contributor:') # DublinCore 6 - contributor - @date=text_to_match('date:') # DublinCore 7 - date dd-mm-yy - @type=text_to_match('type:') # DublinCore 8 - type - @format=text_to_match('format:') # DublinCore 9 - format - @identifier=text_to_match('identifier:') # DublinCore 10 - identifier - @source=text_to_match('source:') # DublinCore 11 - source - @language=text_to_match('language:') # DublinCore 12 - language - @relation=text_to_match('relation:') # DublinCore 13 - relation - @coverage=text_to_match('coverage:') # DublinCore 14 - coverage - @rights=text_to_match('rights:') # DublinCore 15 - rights - @keywords=text_to_match('key(?:words?)?:') - @comment=text_to_match('comment:') - @abstract=text_to_match('abs(?:tract)?:') - @owner=text_to_match('owner:') - @date_created=text_to_match('date_created:') - @date_issued=text_to_match('date_issued:') - @date_modified=text_to_match('date_modified:') - @date_available=text_to_match('date_available:') - @date_valid=text_to_match('date_valid:') - @filename=text_to_match('filename:') - @text1=text_to_match unless @keywords or @author or @title or @text1 or @fulltext or @comment or @abstract or @rights or @subject or @publisher or @date or @filename or @topic_register - else - @text1=q['s1'] if q['s1']=~/\S/ - @fulltext=q['ft'] if q['ft']=~/\S/ - @keywords=q['key'] if q['key']=~/\S/ - @title=q['ti'] if q['ti']=~/\S/ - @author=q['au'] if q['au']=~/\S/ - @topic_register=q['tr'] if q['tr']=~/\S/ - @subject=q['sj'] if q['sj']=~/\S/ - @description=q['dsc'] if q['dsc']=~/\S/ - @publisher=q['pb'] if q['pb']=~/\S/ - @editor=q['cntr'] if q['cntr']=~/\S/ - @contributor=q['cntr'] if q['cntr']=~/\S/ - @date=q['dt'] if q['dt']=~/\S/ - @type=q['ty'] if q['ty']=~/\S/ - @identifier=q['id'] if q['id']=~/\S/ - @source=q['src'] if q['src']=~/\S/ - @language=q['lang'] if q['lang']=~/\S/ - @relation=q['rel'] if q['rel']=~/\S/ - @coverage=q['cov'] if q['cov']=~/\S/ - @rights=q['cr'] if q['cr']=~/\S/ - @comment=q['co'] if q['co']=~/\S/ - @abstract=q['ab'] if q['ab']=~/\S/ - @date_created=q['dtc'] if q['dtc']=~/\S/ - @date_issued=q['dti'] if q['dti']=~/\S/ - @date_modified=q['dtm'] if q['dtm']=~/\S/ - @date_available=q['dta'] if q['dta']=~/\S/ - @date_valid=q['dtv'] if q['dtv']=~/\S/ - @filename=if q['doc'] and q['search'] !~/search db/ then q['doc'] - elsif q['fns']=~/\S/ then q['fns'] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - end - end - def text_to_match(identifier='') - m={ - string: /#{identifier}\s*(.+?)/, - string: /#{identifier}\s*(.+?)(?:;|\n|\r|$)/, - word: /#{identifier}[\s(]*(\S+)/ - } - search_string=if @search_field =~m[:word] - search_string=if @search_field =~m[:braces] then m[:braces].match(@search_field)[1] - elsif @search_field =~m[:string] then m[:string].match(@search_field)[1] - else - str=m[:word].match(@search_field)[1] - str=str.gsub(/[()]/,'') - str - end - search_string=search_string.strip.gsub(/\s+/,'+') - #else - # "__" - end - end - end - class DBI_SearchString - def initialize(l,t,q,cse=false) - @l,@t,@q,@c=l,t,q,cse - end - def string - search={ search: [], flag: false } - if @t =~/\S+/ or @q =~/\S+/ - if @t =~/\S+/ then unescaped_search=CGI.unescape(@t) - elsif @q =~/\S+/ then unescaped_search=CGI.unescape(@q) - end - search_construct=[] - unescaped_search=if @c - unescaped_search.gsub(/\s*(AND|OR)\s*/,"' \) \\1 #{@l}~\( '"). - gsub(/(.+)/,"#{@l}~\( '\\1' \)") - else - unescaped_search.gsub(/\s*(AND|OR)\s*/,"' \) \\1 #{@l}~*\( '"). - gsub(/(.+)/,"#{@l}~*\( '\\1' \)") - end - search_construct << unescaped_search - search_construct=search_construct.join(' ') - search[:search] << search_construct - search[:flag]=true - search - end - search - end - end - class DBI_SearchStatement - attr_reader :text_search_flag,:sql_select_body_format,:sql_offset,:sql_limit - def initialize(conn,search_for,q,c) - @conn=conn - @text_search_flag=false - @sql_statement={ body: '', endnotes: '', range: '' } - #@offset||=@@offset - #@offset+=@@limit - search={ text: [], endnotes: [] } - cse=(c =~/\S/) ? true : false - st=DBI_SearchString.new('doc_objects.clean',search_for.text1,q['s1'],cse).string - se=DBI_SearchString.new('endnotes.clean',search_for.text1,q['s1'],cse).string - @text_search_flag=st[:flag] - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.fulltext',search_for.fulltext,q['ft'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.title',search_for.title,q['ti'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_author',search_for.author,q['au'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_topic_register',search_for.topic_register,q['tr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_subject',search_for.subject,q['sj'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_keywords',search_for.keywords,q['key'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_description',search_for.description,q['dsc'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.publisher',search_for.publisher,q['pb'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_editor',search_for.editor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_contributor',search_for.contributor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.date_published',search_for.date,q['dt'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_type',search_for.type,q['ty'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.original_source',search_for.source,q['src'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.language_document_char',search_for.language,q['lang'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_relation',search_for.relation,q['rel'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_coverage',search_for.coverage,q['cov'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.rights_all',search_for.rights,q['cr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_comment',search_for.comment,q['co'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_abstract',search_for.abstract,q['ab'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.src_filename',search_for.filename,q['fns'],cse).string - if st[:flag] - search[:text] << st[:search] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - @search_text='' - @search_text=search[:text].flatten.join(' AND ') - @search_text=@search_text.gsub(/(doc_objects\.clean~[*]?\(\s*'[^']+'\s*\)\s+(?:(?:AND|OR)\s+doc_objects\.clean~[*]?\(\s*'[^']+'\s*\))+)/,'(\1)') - end - def sql_offset - @@offset - end - def sql_match_limit - @@limit - end - def sql_canned_search - @offset_next=sql_offset.to_i + sql_match_limit.to_i - @offset_previous=sql_offset.to_i - sql_match_limit.to_i - def current - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + sql_offset.to_s - end - def next - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_next.to_s - end - def previous - @offset_previous >= 0 \ - ? (@@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_previous.to_s) - : '' - end - def start - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + 0.to_s - end - self - end - def pre_next(beyond_limit,img) - can=sql_canned_search - page=(sql_offset.to_i + sql_match_limit.to_i)/sql_match_limit.to_i - if beyond_limit - if page.to_s =~ /^1$/ - %{<br><center> - pg. #{page.to_s} - <a href="#{can.next}"> - <img border="0" width="22" height="22" src="#{img}/arrow_next_red.png" alt=" >>" /> - </a> - </center>} - elsif page.to_s =~ /^2$/ - %{<br><center> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - <a href="#{can.next}"> - <img border="0" width="22" height="22" src="#{img}/arrow_next_red.png" alt=" >>" /> - </a> - </center>} - else - %{<br><center> - <a href="#{can.start}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="|< " /> - </a> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - <a href="#{can.next}"> - <img border="0" width="22" height="22" src="#{img}/arrow_next_red.png" alt=" >>" /> - </a> - </center>} - end - else - if page.to_s =~ /^1$/ then '' - elsif page.to_s =~ /^2$/ - %{<br><center> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - </center>} - else - %{<br><center> - <a href="#{can.start}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="|< " /> - </a> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - </center>} - end - end - end - def sql_select_body - limit ||=@@limit - offset ||=@@offset - @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, metadata_and_text.language_document_char, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE (#{@search_text}) AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.language_document_char, metadata_and_text.title, metadata_and_text.src_filename, doc_objects.ocn} - @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} - select=@sql_statement[:body] + ' ' + @sql_statement[:range] - select - end - def sql_select_body_format - %{<font color="#666666" size="2">#{sql_select_body}</font>} - end - def contents - @conn.exec(sql_select_body) - end - end - def tail - <<-'WOK' - <br><hr /><br> -<table summary="SiSU summary" cellpadding="2" border="0"> - <!-- widget sisu --> -<tr><td valign="top" width="10%"> - <table summary="home button / home information" border="0" cellpadding="3" cellspacing="0"> - <tr><td align="left"> - <br><a href="http://sisudoc.org/" target="_top"> - <b>SiSU</b> - </a> - <br><a href="http://git.sisudoc.org/" target="_top"> - git - </a> - </td></tr> - </table> -</td> -<td valign="top" width="45%"> -<!-- SiSU Rights --> - <p class="tiny_left"><font color="#666666" size="2"> - Generated by - SiSU 7.1.2 2015-05-18 (2015w20/1) - <br> - <a href="http://www.sisudoc.org" > - <b>SiSU</b></a> <sup>©</sup> Ralph Amissah - 1993, current 2015. - All Rights Reserved. - <br> - SiSU is software for document structuring, publishing and search, - <br> - <a href="http://www.jus.uio.no/sisu" > - www.jus.uio.no/sisu - </a> - and - <a href="http://www.sisudoc.org" > - www.sisudoc.org - </a> - sources - <a href="http://git.sisudoc.org" > - git.sisudoc.org - </a> - <br> - <i>w3 since October 3 1993</i> - <a href="mailto:ralph@amissah.com" > - ralph@amissah.com - </a> - <br> - mailing list subscription - <a href="http://lists.sisudoc.org/listinfo/sisu" > - http://lists.sisudoc.org/listinfo/sisu - </a> - <br> - <a href="mailto:sisu@lists.sisudoc.org" > - sisu@lists.sisudoc.org - </a> - </font></p> -</td><td valign="top" width="45%"> - <p class="tiny_left"><font color="#666666" size="2"> - SiSU using: - <br>Standard SiSU markup syntax, - <br>Standard SiSU meta-markup syntax, and the - <br>Standard SiSU <u>object citation numbering</u> and system, (object/text identifying/locating system) - <br> - <sup>©</sup> Ralph Amissah 1997, current 2015. - All Rights Reserved. - </font></p> -</td></tr> - <!-- widget way better --> -<tr><td valign="top" width="10%"> - <p class="tiny_left"><font color="#666666" size="2"> - <a href="http://www.gnu.org/licenses/gpl.html"> - .: - </a> - </font></p> -</td><td valign="top" width="45%"> - <p class="tiny_left"><font color="#666666" size="2"> - SiSU is released under - <a href="http://www.gnu.org/licenses/gpl.html">GPL v3</a> - or later, - <a href="http://www.gnu.org/licenses/gpl.html"> - http://www.gnu.org/licenses/gpl.html - </a> - </font></p> -</td><td valign="top" width="45%"> - <p class="tiny_left"><font color="#666666" size="2"> - SiSU, developed using - <a href="http://www.ruby-lang.org/en/"> - Ruby - </a> - on - <a href="http://www.debian.org/"> - Debian/Gnu/Linux - </a> - software infrastructure, - with the usual GPL (or OSS) suspects. - </font></p> -</td></tr> -</table> - <a name="bottom" id="bottom"></a><a name="down" id="down"></a><a name="end" id="end"></a><a name="finish" id="finish"></a><a name="stop" id="stop"></a><a name="credits" id="credits"></a> - </body></html> - WOK - end - @tail=tail - @counter_txt_doc,@counter_txt_ocn,@counter_endn_doc,@counter_endn_ocn=0,0,0,0 - @counters_txt,@counters_endn,@sql_select_body='','','' - FCGI.each_cgi do |cgi| - begin # all code goes in begin section - @search={ text: [], endnotes: [] } - q=CGI.new - @db=if cgi['db'] =~ /#{@db_name_prefix}(\S+)/ - @stub=$1 - cgi['db'] - else - @stub=@stub_default - @db_name_prefix + @stub - end - checked_url,checked_stats,checked_searched,checked_tip,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,selected_db='','','','','','','','','' - result_type=(cgi['view']=~/text/) \ - ? result_type={ index: '', text: 'checked'} - : result_type={ index: 'checked', text: ''} - @@limit=if cgi['sql_match_limit'].to_s=~/2500/ - checked_sql_limit={ l1000: '', l2500: 'checked'} - '2500' - else - checked_sql_limit={ l1000: 'checked', l2500: ''} - '1000' - end - checked_echo='checked' if cgi['echo'] =~/\S/ - checked_stats='checked' if cgi['stats'] =~/\S/ - checked_url='checked' if cgi['url'] =~/\S/ or cgi['u'].to_i==1 - checked_searched='checked' if cgi['searched'] =~/\S/ - checked_tip='checked' if cgi['tip'] =~/\S/ - checked_case='checked' if cgi['casesense'] =~/\S/ - checked_sql='checked' if cgi['sql'] =~/\S/ - if cgi['checks'] =~/check_all/ or cgi['check_all'] =~/\S/ or cgi['a'].to_i==1 - checked_all='checked' - checked_echo=checked_stats=checked_url=checked_searched=checked_tip=checked_sql='checked' - checked_none='' - elsif cgi['checks'] =~/check_none/ - checked_none='checked' - checked_all=checked_url=checked_stats=checked_searched=checked_tip=checked_echo=checked_sql='' - elsif cgi['checks'] =~/check_selected/ - checked_selected='checked' - elsif cgi['checks'] =~/check_default/ - checked_default='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - else - checked_selected='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - end - selected_db=%{<option value="#{@db_name_prefix}#{@stub}">#{@stub}</option>} - @conn=PG::Connection.open(dbname: @db, port: @port, user: @user) - search_field=cgi['find'] if cgi['find'] # =~/\S+/ - @search_for=SearchRequest.new(search_field,q) #.analyze #% search_for - #% searches - #Canned_search.new(@base,@search_for.text1,cgi) - if @search_for.text1=~/\S+/ or @search_for.fulltext=~/\S+/ or @search_for.author=~/\S+/ or @search_for.topic_register=~/\S+/ #and search_field =~/\S/ - s1='s1=' + CGI.escape(@search_for.text1) if @search_for.text1=~/\S/ - ft='&ft=' + CGI.escape(@search_for.fulltext) if @search_for.fulltext=~/\S/ - key='key=' + CGI.escape(@search_for.keywords) if @search_for.keywords=~/\S/ - ti='&ti=' + CGI.escape(@search_for.title) if @search_for.title=~/\S/ - au='&au=' + CGI.escape(@search_for.author) if @search_for.author=~/\S/ - tr='&tr=' + CGI.escape(@search_for.topic_register) if @search_for.topic_register=~/\S/ - sj='&sj=' + CGI.escape(@search_for.subject) if @search_for.subject=~/\S/ - dsc='&dsc=' + CGI.escape(@search_for.description) if @search_for.description=~/\S/ - pb='&pb=' + CGI.escape(@search_for.publisher) if @search_for.publisher=~/\S/ - edt='&edt=' + CGI.escape(@search_for.editor) if @search_for.editor=~/\S/ - cntr='&cntr=' + CGI.escape(@search_for.contributor) if @search_for.contributor=~/\S/ - dt='&dt=' + CGI.escape(@search_for.date) if @search_for.date=~/\S/ - ty='&ty=' + CGI.escape(@search_for.type) if @search_for.type=~/\S/ - id='&id=' + CGI.escape(@search_for.identifier) if @search_for.identifier=~/\S/ - src='&src=' + CGI.escape(@search_for.source) if @search_for.source=~/\S/ - lang='&lang=' + CGI.escape(@search_for.language) if @search_for.language=~/\S/ - rel='&rel=' + CGI.escape(@search_for.relation) if @search_for.relation=~/\S/ - cov='&cov=' + CGI.escape(@search_for.coverage) if @search_for.coverage=~/\S/ - cr='&cr=' + CGI.escape(@search_for.rights) if @search_for.rights=~/\S/ - co='&co=' + CGI.escape(@search_for.comment) if @search_for.comment=~/\S/ - ab='&ab=' + CGI.escape(@search_for.abstract) if @search_for.abstract=~/\S/ - dtc='&dtc=' + CGI.escape(@search_for.date_created) if @search_for.date_created=~/\S/ - dti='&dti=' + CGI.escape(@search_for.date_issued) if @search_for.date_issued=~/\S/ - dtm='&dtm=' + CGI.escape(@search_for.date_modified) if @search_for.date_modified=~/\S/ - dta='&dta=' + CGI.escape(@search_for.date_available) if @search_for.date_available=~/\S/ - dtv='&dtv=' + CGI.escape(@search_for.date_valid) if @search_for.date_valid=~/\S/ - fns='&fns=' + CGI.escape(@search_for.filename) if @search_for.filename=~/\S/ - @@canned_search_url=(checked_all =~/checked/) \ - ? "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" - : "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" - mod=ft=~/\S+/ ? (ft.gsub(/ft/,'s1')) : s1 - @canned_base_url="#{@base}?#{mod}&db=#{cgi['db']}" - if checked_case=~/\S/ - @search[:text][1]=%{doc_objects.clean~'#{@search_for.text1}'} #s1 - else - @search[:text][1]=%{doc_objects.clean~*'#{@search_for.text1}'} #s1 - end - canned_note='search url:' - else - @@canned_search_url="#{@base}?db=#{@db}&view=index" - canned_note='search url example:' - end - if search_field =~/\S+/ - analyze_format=search_field.gsub(/\s*\n/,'; ') - elsif checked_all =~/checked/ or checked_url =~/checked/ - canned_search=@@canned_search_url.scan(/(?:s1|ft|au|ti|fns|tr)=[^&]+/) - af=canned_search.join('; ') - af=af.gsub(/s1=/,'text: '). - gsub(/ft=/,'fulltxt: '). - gsub(/au=/,'author: '). - gsub(/ti=/,'title: '). - gsub(/fns=/,'filename: '). - gsub(/tr=/,'topic_register: '). - gsub(/%2B/,' ') - analyze_format=af - st=af.split(/\s*;\s*/) - search_field=st.join("\n") - end - green=%{<font size="2" color="#004000">} - canned_search_url_txt=CGI.escapeHTML(@@canned_search_url) - the_can=%{<font size="2" color="#666666">#{canned_note} <a href="#{@@canned_search_url}">#{canned_search_url_txt}</a></font><br>} - p_text=p_fulltext=p_keywords=p_title=p_author=p_topic_register=p_subject=p_description=p_publisher=p_editor=p_contributor=p_date=p_type=p_format=p_identifier=p_source=p_language=p_relation=p_coverage=p_rights=p_comment=p_abstract=p_filename='' - p_filename=%{filename: #{green}#{@search_for.filename}</font><br>} if @search_for.filename =~/\S+/ - p_text=%{text: #{green}#{@search_for.text1}</font><br>} if @search_for.text1 =~/\S+/ - p_fulltext=%{fulltxt: #{green}#{@search_for.fulltext}</font><br>} if @search_for.fulltext =~/\S+/ - p_title=%{title: #{green}#{@search_for.title}</font><br>} if @search_for.title =~/\S+/ - p_author=%{author: #{green}#{@search_for.author}</font><br>} if @search_for.author =~/\S+/ - p_editor=%{editor: #{green}#{@search_for.editor}</font><br>} if @search_for.editor=~/\S+/ - p_contributor=%{contributor: #{green}#{@search_for.contributor}</font><br>} if @search_for.contributor =~/\S+/ - p_date=%{date: #{green}#{@search_for.date}</font><br>} if @search_for.date =~/\S+/ - p_rights=%{rights: #{green}#{@search_for.rights}</font><br>} if @search_for.rights =~/\S+/ - p_topic_register=%{topic_register: #{green}#{@search_for.topic_register}</font><br>} if @search_for.topic_register =~/\S+/ - p_subject=%{subject: #{green}#{@search_for.subject}</font><br>} if @search_for.subject =~/\S+/ - p_keywords=%{keywords: #{green}#{@search_for.keywords}</font><br>} if @search_for.keywords =~/\S+/ - p_identifier=%{identifier: #{green}#{@search_for.identifier}</font><br>} if @search_for.identifier =~/\S+/ - p_type=%{type: #{green}#{@search_for.type}</font><br>} if @search_for.type =~/\S+/ - p_format=%{format: #{green}#{@search_for.format}</font><br>} if @search_for.format =~/\S+/ - p_relation=%{relation: #{green}#{@search_for.relation}</font><br>} if @search_for.relation =~/\S+/ - p_coverage=%{coverage: #{green}#{@search_for.coverage}</font><br>} if @search_for.coverage =~/\S+/ - p_description=%{description: #{green}#{@search_for.description}</font><br>} if @search_for.description =~/\S+/ - p_abstract=%{abstract: #{green}#{@search_for.abstract}</font><br>} if @search_for.abstract =~/\S+/ - p_comment=%{comment: #{green}#{@search_for.comment}</font><br>} if @search_for.comment =~/\S+/ - p_publisher=%{publisher: #{green}#{@search_for.publisher}</font><br>} if @search_for.publisher =~/\S+/ - p_source=%{source: #{green}#{@search_for.source}</font><br>} if @search_for.source =~/\S+/ - p_language=%{language: #{green}#{@search_for.language}</font><br>} if @search_for.language =~/\S+/ - search_note=<<-WOK - <font size="2" color="#666666"> - <b>database:</b> #{green}#{@db}</font>; <b>selected view:</b> #{green}#{cgi['view']}</font> - <b>search string:</b> "#{green}#{analyze_format}</font>"<br> - #{p_text} #{p_fulltext} #{p_keywords} #{p_title} #{p_author} #{p_topic_register} #{p_subject} #{p_description} #{p_publisher} #{p_editor} #{p_contributor} #{p_date} #{p_type} #{p_format} #{p_identifier} #{p_source} #{p_language} #{p_relation} #{p_coverage} #{p_rights} #{p_comment} #{p_abstract} #{p_filename} - </font> - WOK - #eg = %{canned search e.g.:<br> <a href="#{url}">#{url}</a><br>find: #{analyze}<br>database: #{database}} - #% dbi_canning - @header=Form.new(@base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form - unless q['s1'] =~/\S/ or q['au'] =~/\S/ or @search[:text][1] =~/\S/ - print "Content-type: text/html\n\n" - puts (@header+@tail) - else #% searches - s1=(@search_for.text1 =~/\S/) \ - ? @search_for.text1 - : 'Unavailable' - if checked_case=~/\S/ - @search[:text]<<%{doc_objects.clean~'#{CGI.unescape(s1)}'} - else - @search[:text]<<%{doc_objects.clean~*'#{CGI.unescape(s1)}'} - end - #% dbi_request - dbi_statement=DBI_SearchStatement.new(@conn,@search_for,q,checked_case) - @text_search_flag=false - @text_search_flag=dbi_statement.text_search_flag - s_contents=dbi_statement.contents - @body_main='' - @search_regx=nil - oldtid=0 - if @text_search_flag - if checked_sql =~/\S/ - sql_select_body=dbi_statement.sql_select_body_format - else sql_select_body='' - end - @body_main << sql_select_body - else - end - @hostpath="#{@hosturl_files}/#{@stub}" - def path_manifest(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{@hostpath}/#{fn}/sisu_manifest.html" - : "#{@hostpath}/#{fn}/sisu_manifest.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/manifest/#{fn}.html" - : "#{@hostpath}/manifest/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/manifest/#{fn}.html" - end - end - def path_html_seg(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - "#{@hostpath}/#{fn}" - when 'filetype' - "#{@hostpath}/html/#{fn}" - else - "#{@hostpath}/#{ln}/html/#{fn}" - end - end - def path_toc(fn,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/toc.html" - : "#{path_html_seg(fn,ln)}/toc.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/toc.html" - end - end - def path_filename(fn,seg,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/#{seg}.html" - : "#{path_html_seg(fn,ln)}/#{seg}.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/#{seg}.html" - end - end - def path_html_doc(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/scroll.html" - : "#{path_html_seg(fn,ln)}/scroll.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/html/#{fn}.html" - : "#{@hostpath}/html/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/html/#{fn}.html" - end - end - #% text_objects_body - s_contents.each do |c| #% text body - location=c['src_filename'][/(.+?)\.(?:ssm\.sst|sst)$/,1] - file_suffix=c['src_filename'][/.+?\.(ssm\.sst|sst)$/,1] - lang=if location =~ /\S+?~(\S\S\S?)$/ - l=location[/\S+?~(\S\S\S?)$/,1] - location=location.gsub(/(\S+?)~\S\S\S?/,'\1') - l=".#{l}" - else '' - end - #% metadata_found_body - if c['tid'].to_i != oldtid.to_i - ti=c['title'] - can_txt_srch=(cgi['view']=~/index/) \ - ? %{<a href="#{@canned_base_url}&fns=#{c['src_filename']}&lang=#{c['language_document_char']}&view=text"><img border="0" width="24" height="16" src="#{@image_src}/b_search.png" alt="search"></a> } - : %{<a href="#{@canned_base_url}&fns=#{c['src_filename']}&lang=#{c['language_document_char']}&view=index"><img border="0" width="24" height="16" src="#{@image_src}/b_search.png" alt="search"></a> } - title=%{<span style="background-color: #{@color_heading}"><a href="#{path_toc(location,c['language_document_char'])}"><img border="0" width="15" height="18" src="#{@image_src}/b_toc.png" alt="toc html"> #{ti}</a></span> [#{c['language_document_char']}] by #{c['creator_author']} <a href="#{path_manifest(location,c['language_document_char'])}"><img border="0" width="15" height="15" src="#{@image_src}/b_info.png" alt="manifest"></a> #{can_txt_srch}<br>} if file_suffix=~/s/ #hmm watch file_suffix - title=@text_search_flag \ - ? '<br><hr>'+title - : '<br>'+title - @counter_txt_doc+=1 - oldtid=c['tid'].to_i - else title='' - end - if @text_search_flag - if cgi['view']=~/text/ \ - or (cgi['view']!~/index/ and cgi['search'] !~/search db/) #% txt body - text=if c['suffix'] !~/1/ #seg - if @search_for.text1 =~/\S+/ \ - or q['s1'] =~/\S+/ #% only this branch is working !! - unescaped_search=if @search_for.text1 =~/\S+/ - CGI.unescape(@search_for.text1) - elsif q['s1'] =~/\S+/ - CGI.unescape(q['s1']) - else nil - end - @search_regx=if unescaped_search #check - search_regex=unescaped_search.scan(/\S+/).each.map do |g| - (g.to_s =~/(AND|OR)/) \ - ? ('|') - : (%{#{g.to_s}}) - end.join(' ') - search_regex=search_regex.gsub(/\s*\|\s*/,'|') - Regexp.new(search_regex, Regexp::IGNORECASE) - else nil - end - else nil - end - matched_para=(@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) \ - ? (c['body'].gsub(/(<a\s+href="https?:\/\/[^><\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{<span style="background-color: #{@color_match}">\\1</span>})) - : c['body'] - %{<hr><p><font size="2">ocn <b><a href="#{path_filename(location,c['seg'],c['language_document_char'])}##{c['ocn']}">#{c['ocn']}</a></b>:</font></p>#{matched_para}} - elsif c['suffix'] =~/1/ #doc - %{#{title}<hr><p><font size="2">ocn #{c['ocn']}:#{c['body']}} - end - @counter_txt_ocn+=1 - output=title+text - else #elsif cgi['view']=~/index/ #% idx body - if c['suffix'] !~/1/ #seg - index=%{<a href="#{path_filename(location,c['seg'],c['language_document_char'])}##{c['ocn']}">#{c['ocn']}</a>, } if @text_search_flag - elsif c['suffix'] =~/1/ #doc #FIX - index=%{<a href="#{path_html_doc(location,c['language_document_char'])}##{c['ocn']}">#{c['ocn']}</a>, } - end - if c['seg'] =~/\S+/ - if @text_search_flag - @counter_txt_ocn+=1 - output=title+index - end - else - @counter_txt_ocn+=1 - output=c['suffix'] !~/1/ \ - ? title+index - : %{#{title}#{c['ocn'].sort}, } - end - end - else output=title - end - @counters_txt=if @counter_txt_doc > 0 - if checked_stats =~/\S/ - @@lt_t=(@counter_txt_ocn==dbi_statement.sql_match_limit.to_i) ? true : false - start=(@@offset.to_i+1).to_s - range=(@@offset.to_i+@counter_txt_ocn.to_i).to_s - %{<hr /><font size="2" color="#666666">Found #{@counter_txt_ocn} times in the main body of #{@counter_txt_doc} documents [ matches #{start} to #{range} ]</font><br>} - else '' - end - else '' - end - @body_main << output #+ details - end - oldtid = 0 - offset=dbi_statement.sql_offset.to_s - limit=dbi_statement.sql_match_limit.to_s - @@lt_t ||=false; @@lt_e ||=false - canned=(@@lt_t or @@lt_e) \ - ? dbi_statement.pre_next(true,@image_src).to_s - : dbi_statement.pre_next(false,@image_src).to_s - limit=dbi_statement.sql_match_limit.to_s - cgi.out{@header.force_encoding("UTF-8") + @counters_txt.force_encoding("UTF-8") + @counters_endn.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @body_main.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @tail.force_encoding("UTF-8")} #% print cgi_output_header+counters+body - end - rescue Exception => e - s='<pre>' + CGI::escapeHTML(e.backtrace.reverse.join("\n")) - s << CGI::escapeHTML(e.message) + '</pre>' - cgi.out{s} - next - ensure # eg. disconnect from server - @conn.disconnect if @conn - end - end diff --git a/util/rb/cgi/sisu_search_pg.cgi b/util/rb/cgi/sisu_search_pg.cgi deleted file mode 100755 index 5e07a16..0000000 --- a/util/rb/cgi/sisu_search_pg.cgi +++ /dev/null @@ -1,935 +0,0 @@ -#!/usr/bin/env ruby -=begin - * Name: SiSU information Structuring Universe - * Author: Ralph Amissah - * http://www.jus.uio.no/sisu - * http://www.jus.uio.no/sisu/SiSU/download - - * Description: generates naive cgi search form for search of sisu database (pgsql) - * Name: SiSU generated sample cgi search form - - * Description: generated sample cgi search form for SiSU - (SiSU is a framework for document structuring, publishing and search) - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2015, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see <http://www.gnu.org/licenses/>. - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - <http://www.fsf.org/licenses/gpl.html> - <http://www.gnu.org/licenses/gpl.html> - <http://www.jus.uio.no/sisu/gpl.fsf> - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - <http://www.jus.uio.no/sisu> - <http://www.sisudoc.org> - - * Ralph Amissah - <ralph@amissah.com> - <ralph.amissah@gmail.com> - -=end - begin - require 'cgi' - require 'fcgi' - require 'pg' - rescue LoadError - puts 'cgi, fcgi or pg NOT FOUND (LoadError)' - end - @stub_default='samples_by_language' - @image_src="http://#{ENV['HTTP_HOST']}/_sisu/image_sys" - @hosturl_cgi="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}" - @hosturl_files="http://#{ENV['HTTP_HOST']}" - @output_dir_structure_by='language' - @lingual='multi' - @port='5432' - @db_name_prefix='SiSU.7a.' - @user='www-data' # check user name for access to pg database: e.g. www-data or 'ralph' - @base="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}#{ENV['SCRIPT_NAME']}" -#Common TOP - @@offset=0 - @@canned_search_url=@base - @color_heading='#DDFFAA' - @color_match='#ffff48' - class Form - def initialize(base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') - search_note='' if checked_searched !~/\S/ - the_can='' if checked_url !~/\S/ - search_field='' if checked_echo !~/\S/ - @base,@search_field,@selected_db,@result_type,@checked_sql_limit,@checked_tip,@checked_stats,@checked_searched,@checked_url,@checked_case,@checked_echo,@checked_sql,@checked_all,@checked_none,@checked_selected,@checked_default,@search_note,@the_can=base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can - @tip=if checked_tip =~/\S/ - '<font size="2" color="#666666">text:__; fulltxt:__; keywords:__; title:__; author:__; topic_register:__; subject:__; description:__; publisher:__; editor:__; contributor:__; date:__; type:__; format:__; identifier:__; source:__; language:__; relation:__; coverage:__; rights:__; comment:__; abstract:__; filename:__;</font><br>' - else '' - end - end - def submission_form - search_form=<<-WOK - <!DOCTYPE html> - <html> - <head> - <title> - <meta charset="utf-8"> - <meta name="sourcefile" content="SiSU._sst" /> - SiSU search form (sample): SiSU information Structuring Universe - </title> - <link rel="generator" href="http://www.jus.uio.no/sisu" /> - <link rel="shortcut icon" href="http://#{ENV['HTTP_HOST']}/_sisu/image_sys/rb7.ico" /> - <link href="../_sisu/css/html.css" rel="stylesheet"> - </head> - <body lang="en" xml:lang="en"> - <table summary="band" border="0" cellpadding="3" cellspacing="0"> - <tr><td width="20%"> - <table summary="home button / home information" border="0" cellpadding="3" cellspacing="0"> - <tr><td align="left"> - <br><a href="http://sisudoc.org/" target="_top"> - <b>SiSU</b> - </a> - <br><a href="http://git.sisudoc.org/gitweb/" target="_top"> - git - </a> - </td></tr> - </table> - </td> - <td> - <label for="find"><b>SiSU search form (sample) (content organised by language)</b></label> - </td></tr> - </table> - <form action="#{@base}" id="Test Form" method="post"> - <table cellpadding="2"> - <tr><td valign=\"top\"> - <textarea id="find" name="find" type="text" rows="6" cols="40" maxlength="256">#{@search_field}</textarea> - </td> - <td valign=\"top\"> - #{@tip} - #{@search_note} - #{@the_can} - </td></tr></table> - <td valign=\"top\"><tr><td> - <!input type="text" id="find" name="find" value="#{@search_field}" /> - <!input type="text" id="find" name="find" value="" /> - <font size="2" color="#222222"> - <b>to search:</b> select which database to search (drop-down menu below); enter your search query (in the form above); and <b>click on the search button</b> (below) - <br> - <select name="db" size="1"> - #{@selected_db} - <option value="SiSU.7a.samples_by_language">samples_by_language</option> - <option value="SiSUv6c_manual">manual</option> - </select> - <input type="submit" value="SiSU search" /> - <input type="radio" name="view" value="index" #{@result_type[:index]}> index - <input type="radio" name="view" value="text" #{@result_type[:text]}> text / grep - <input type="checkbox" name="casesense" #{@checked_case}> case sensitive - <br> - match limit: - <input type="radio" name="sql_match_limit" value="1000" #{@checked_sql_limit[:l1000]}> 1,000 - <input type="radio" name="sql_match_limit" value="2500" #{@checked_sql_limit[:l2500]}> 2,500 - <br> - <input type="checkbox" name="echo" #{@checked_echo}> echo query - <input type="checkbox" name="stats" #{@checked_stats}> result stats - <input type="checkbox" name="url" #{@checked_url}> search url - <input type="checkbox" name="searched" #{@checked_searched}> searched - <input type="checkbox" name="tip" #{@checked_tip}> available fields - <input type="checkbox" name="sql" #{@checked_sql}> sql statement - <br> - checks: - <input type="radio" name="checks" value="check_default" #{@checked_default}> default - <input type="radio" name="checks" value="check_selected" #{@checked_selected}> selected - <input type="radio" name="checks" value="check_all" #{@checked_all}> all - <input type="radio" name="checks" value="check_none" #{@checked_none}> none - </font> - </td></tr> - </table> - </form> - WOK - end - end - class SearchRequest #% search_for - attr_accessor :text1,:fulltext,:keywords,:title,:author,:topic_register,:subject,:description,:publisher,:editor,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:filename - def initialize(search_field='',q='') - @search_field,@q=search_field,q - @text1=@fulltext=@keywords=@title=@author=@topic_register=@subject=@description=@publisher=@editor=@contributor=@date=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@comment=@abstract=@owner=@date_created=@date_issued=@date_modified=@date_available=@date_valid=@filename='' - if @search_field=~/\S/ - @text1=text_to_match('text:') - @fulltext=text_to_match('fulltxt:') - @topic_register=text_to_match('topic_register:') - @title=text_to_match('title:') # DublinCore 1 - title - @author=text_to_match('(?:author|creator)s?:') # DublinCore 2 - creator/author - @subject=text_to_match('subj(?:ect)?:') # DublinCore 3 - subject - @description=text_to_match('description:') # DublinCore 4 - description - @publisher=text_to_match('pub(?:lisher)?:') # DublinCore 5 - publisher - @editor=text_to_match('editor:') - @contributor=text_to_match('contributor:') # DublinCore 6 - contributor - @date=text_to_match('date:') # DublinCore 7 - date dd-mm-yy - @type=text_to_match('type:') # DublinCore 8 - type - @format=text_to_match('format:') # DublinCore 9 - format - @identifier=text_to_match('identifier:') # DublinCore 10 - identifier - @source=text_to_match('source:') # DublinCore 11 - source - @language=text_to_match('language:') # DublinCore 12 - language - @relation=text_to_match('relation:') # DublinCore 13 - relation - @coverage=text_to_match('coverage:') # DublinCore 14 - coverage - @rights=text_to_match('rights:') # DublinCore 15 - rights - @keywords=text_to_match('key(?:words?)?:') - @comment=text_to_match('comment:') - @abstract=text_to_match('abs(?:tract)?:') - @owner=text_to_match('owner:') - @date_created=text_to_match('date_created:') - @date_issued=text_to_match('date_issued:') - @date_modified=text_to_match('date_modified:') - @date_available=text_to_match('date_available:') - @date_valid=text_to_match('date_valid:') - @filename=text_to_match('filename:') - @text1=text_to_match unless @keywords or @author or @title or @text1 or @fulltext or @comment or @abstract or @rights or @subject or @publisher or @date or @filename or @topic_register - else - @text1=q['s1'] if q['s1']=~/\S/ - @fulltext=q['ft'] if q['ft']=~/\S/ - @keywords=q['key'] if q['key']=~/\S/ - @title=q['ti'] if q['ti']=~/\S/ - @author=q['au'] if q['au']=~/\S/ - @topic_register=q['tr'] if q['tr']=~/\S/ - @subject=q['sj'] if q['sj']=~/\S/ - @description=q['dsc'] if q['dsc']=~/\S/ - @publisher=q['pb'] if q['pb']=~/\S/ - @editor=q['cntr'] if q['cntr']=~/\S/ - @contributor=q['cntr'] if q['cntr']=~/\S/ - @date=q['dt'] if q['dt']=~/\S/ - @type=q['ty'] if q['ty']=~/\S/ - @identifier=q['id'] if q['id']=~/\S/ - @source=q['src'] if q['src']=~/\S/ - @language=q['lang'] if q['lang']=~/\S/ - @relation=q['rel'] if q['rel']=~/\S/ - @coverage=q['cov'] if q['cov']=~/\S/ - @rights=q['cr'] if q['cr']=~/\S/ - @comment=q['co'] if q['co']=~/\S/ - @abstract=q['ab'] if q['ab']=~/\S/ - @date_created=q['dtc'] if q['dtc']=~/\S/ - @date_issued=q['dti'] if q['dti']=~/\S/ - @date_modified=q['dtm'] if q['dtm']=~/\S/ - @date_available=q['dta'] if q['dta']=~/\S/ - @date_valid=q['dtv'] if q['dtv']=~/\S/ - @filename=if q['doc'] and q['search'] !~/search db/ then q['doc'] - elsif q['fns']=~/\S/ then q['fns'] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - end - end - def text_to_match(identifier='') - m={ - string: /#{identifier}\s*(.+?)/, - string: /#{identifier}\s*(.+?)(?:;|\n|\r|$)/, - word: /#{identifier}[\s(]*(\S+)/ - } - search_string=if @search_field =~m[:word] - search_string=if @search_field =~m[:braces] then m[:braces].match(@search_field)[1] - elsif @search_field =~m[:string] then m[:string].match(@search_field)[1] - else - str=m[:word].match(@search_field)[1] - str=str.gsub(/[()]/,'') - str - end - search_string=search_string.strip.gsub(/\s+/,'+') - #else - # "__" - end - end - end - class DBI_SearchString - def initialize(l,t,q,cse=false) - @l,@t,@q,@c=l,t,q,cse - end - def string - search={ search: [], flag: false } - if @t =~/\S+/ or @q =~/\S+/ - if @t =~/\S+/ then unescaped_search=CGI.unescape(@t) - elsif @q =~/\S+/ then unescaped_search=CGI.unescape(@q) - end - search_construct=[] - unescaped_search=if @c - unescaped_search.gsub(/\s*(AND|OR)\s*/,"' \) \\1 #{@l}~\( '"). - gsub(/(.+)/,"#{@l}~\( '\\1' \)") - else - unescaped_search.gsub(/\s*(AND|OR)\s*/,"' \) \\1 #{@l}~*\( '"). - gsub(/(.+)/,"#{@l}~*\( '\\1' \)") - end - search_construct << unescaped_search - search_construct=search_construct.join(' ') - search[:search] << search_construct - search[:flag]=true - search - end - search - end - end - class DBI_SearchStatement - attr_reader :text_search_flag,:sql_select_body_format,:sql_offset,:sql_limit - def initialize(conn,search_for,q,c) - @conn=conn - @text_search_flag=false - @sql_statement={ body: '', endnotes: '', range: '' } - #@offset||=@@offset - #@offset+=@@limit - search={ text: [], endnotes: [] } - cse=(c =~/\S/) ? true : false - st=DBI_SearchString.new('doc_objects.clean',search_for.text1,q['s1'],cse).string - se=DBI_SearchString.new('endnotes.clean',search_for.text1,q['s1'],cse).string - @text_search_flag=st[:flag] - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.fulltext',search_for.fulltext,q['ft'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.title',search_for.title,q['ti'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_author',search_for.author,q['au'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_topic_register',search_for.topic_register,q['tr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_subject',search_for.subject,q['sj'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_keywords',search_for.keywords,q['key'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_description',search_for.description,q['dsc'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.publisher',search_for.publisher,q['pb'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_editor',search_for.editor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_contributor',search_for.contributor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.date_published',search_for.date,q['dt'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_type',search_for.type,q['ty'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.original_source',search_for.source,q['src'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.language_document_char',search_for.language,q['lang'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_relation',search_for.relation,q['rel'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_coverage',search_for.coverage,q['cov'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.rights_all',search_for.rights,q['cr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_comment',search_for.comment,q['co'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_abstract',search_for.abstract,q['ab'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.src_filename',search_for.filename,q['fns'],cse).string - if st[:flag] - search[:text] << st[:search] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - @search_text='' - @search_text=search[:text].flatten.join(' AND ') - @search_text=@search_text.gsub(/(doc_objects\.clean~[*]?\(\s*'[^']+'\s*\)\s+(?:(?:AND|OR)\s+doc_objects\.clean~[*]?\(\s*'[^']+'\s*\))+)/,'(\1)') - end - def sql_offset - @@offset - end - def sql_match_limit - @@limit - end - def sql_canned_search - @offset_next=sql_offset.to_i + sql_match_limit.to_i - @offset_previous=sql_offset.to_i - sql_match_limit.to_i - def current - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + sql_offset.to_s - end - def next - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_next.to_s - end - def previous - @offset_previous >= 0 \ - ? (@@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_previous.to_s) - : '' - end - def start - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + 0.to_s - end - self - end - def pre_next(beyond_limit,img) - can=sql_canned_search - page=(sql_offset.to_i + sql_match_limit.to_i)/sql_match_limit.to_i - if beyond_limit - if page.to_s =~ /^1$/ - %{<br><center> - pg. #{page.to_s} - <a href="#{can.next}"> - <img border="0" width="22" height="22" src="#{img}/arrow_next_red.png" alt=" >>" /> - </a> - </center>} - elsif page.to_s =~ /^2$/ - %{<br><center> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - <a href="#{can.next}"> - <img border="0" width="22" height="22" src="#{img}/arrow_next_red.png" alt=" >>" /> - </a> - </center>} - else - %{<br><center> - <a href="#{can.start}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="|< " /> - </a> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - <a href="#{can.next}"> - <img border="0" width="22" height="22" src="#{img}/arrow_next_red.png" alt=" >>" /> - </a> - </center>} - end - else - if page.to_s =~ /^1$/ then '' - elsif page.to_s =~ /^2$/ - %{<br><center> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - </center>} - else - %{<br><center> - <a href="#{can.start}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="|< " /> - </a> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - </center>} - end - end - end - def sql_select_body - limit ||=@@limit - offset ||=@@offset - @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, metadata_and_text.language_document_char, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE (#{@search_text}) AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.language_document_char, metadata_and_text.title, metadata_and_text.src_filename, doc_objects.ocn} - @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} - select=@sql_statement[:body] + ' ' + @sql_statement[:range] - select - end - def sql_select_body_format - %{<font color="#666666" size="2">#{sql_select_body}</font>} - end - def contents - @conn.exec(sql_select_body) - end - end - def tail - <<-'WOK' - <br><hr /><br> -<table summary="SiSU summary" cellpadding="2" border="0"> - <!-- widget sisu --> -<tr><td valign="top" width="10%"> - <table summary="home button / home information" border="0" cellpadding="3" cellspacing="0"> - <tr><td align="left"> - <br><a href="http://sisudoc.org/" target="_top"> - <b>SiSU</b> - </a> - <br><a href="http://git.sisudoc.org/" target="_top"> - git - </a> - </td></tr> - </table> -</td> -<td valign="top" width="45%"> -<!-- SiSU Rights --> - <p class="tiny_left"><font color="#666666" size="2"> - Generated by - SiSU 7.1.2 2015-05-18 (2015w20/1) - <br> - <a href="http://www.sisudoc.org" > - <b>SiSU</b></a> <sup>©</sup> Ralph Amissah - 1993, current 2015. - All Rights Reserved. - <br> - SiSU is software for document structuring, publishing and search, - <br> - <a href="http://www.jus.uio.no/sisu" > - www.jus.uio.no/sisu - </a> - and - <a href="http://www.sisudoc.org" > - www.sisudoc.org - </a> - sources - <a href="http://git.sisudoc.org" > - git.sisudoc.org - </a> - <br> - <i>w3 since October 3 1993</i> - <a href="mailto:ralph@amissah.com" > - ralph@amissah.com - </a> - <br> - mailing list subscription - <a href="http://lists.sisudoc.org/listinfo/sisu" > - http://lists.sisudoc.org/listinfo/sisu - </a> - <br> - <a href="mailto:sisu@lists.sisudoc.org" > - sisu@lists.sisudoc.org - </a> - </font></p> -</td><td valign="top" width="45%"> - <p class="tiny_left"><font color="#666666" size="2"> - SiSU using: - <br>Standard SiSU markup syntax, - <br>Standard SiSU meta-markup syntax, and the - <br>Standard SiSU <u>object citation numbering</u> and system, (object/text identifying/locating system) - <br> - <sup>©</sup> Ralph Amissah 1997, current 2015. - All Rights Reserved. - </font></p> -</td></tr> - <!-- widget way better --> -<tr><td valign="top" width="10%"> - <p class="tiny_left"><font color="#666666" size="2"> - <a href="http://www.gnu.org/licenses/gpl.html"> - .: - </a> - </font></p> -</td><td valign="top" width="45%"> - <p class="tiny_left"><font color="#666666" size="2"> - SiSU is released under - <a href="http://www.gnu.org/licenses/gpl.html">GPL v3</a> - or later, - <a href="http://www.gnu.org/licenses/gpl.html"> - http://www.gnu.org/licenses/gpl.html - </a> - </font></p> -</td><td valign="top" width="45%"> - <p class="tiny_left"><font color="#666666" size="2"> - SiSU, developed using - <a href="http://www.ruby-lang.org/en/"> - Ruby - </a> - on - <a href="http://www.debian.org/"> - Debian/Gnu/Linux - </a> - software infrastructure, - with the usual GPL (or OSS) suspects. - </font></p> -</td></tr> -</table> - <a name="bottom" id="bottom"></a><a name="down" id="down"></a><a name="end" id="end"></a><a name="finish" id="finish"></a><a name="stop" id="stop"></a><a name="credits" id="credits"></a> - </body></html> - WOK - end - @tail=tail - @counter_txt_doc,@counter_txt_ocn,@counter_endn_doc,@counter_endn_ocn=0,0,0,0 - @counters_txt,@counters_endn,@sql_select_body='','','' - FCGI.each_cgi do |cgi| - begin # all code goes in begin section - @search={ text: [], endnotes: [] } - q=CGI.new - @db=if cgi['db'] =~ /#{@db_name_prefix}(\S+)/ - @stub=$1 - cgi['db'] - else - @stub=@stub_default - @db_name_prefix + @stub - end - checked_url,checked_stats,checked_searched,checked_tip,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,selected_db='','','','','','','','','' - result_type=(cgi['view']=~/text/) \ - ? result_type={ index: '', text: 'checked'} - : result_type={ index: 'checked', text: ''} - @@limit=if cgi['sql_match_limit'].to_s=~/2500/ - checked_sql_limit={ l1000: '', l2500: 'checked'} - '2500' - else - checked_sql_limit={ l1000: 'checked', l2500: ''} - '1000' - end - checked_echo='checked' if cgi['echo'] =~/\S/ - checked_stats='checked' if cgi['stats'] =~/\S/ - checked_url='checked' if cgi['url'] =~/\S/ or cgi['u'].to_i==1 - checked_searched='checked' if cgi['searched'] =~/\S/ - checked_tip='checked' if cgi['tip'] =~/\S/ - checked_case='checked' if cgi['casesense'] =~/\S/ - checked_sql='checked' if cgi['sql'] =~/\S/ - if cgi['checks'] =~/check_all/ or cgi['check_all'] =~/\S/ or cgi['a'].to_i==1 - checked_all='checked' - checked_echo=checked_stats=checked_url=checked_searched=checked_tip=checked_sql='checked' - checked_none='' - elsif cgi['checks'] =~/check_none/ - checked_none='checked' - checked_all=checked_url=checked_stats=checked_searched=checked_tip=checked_echo=checked_sql='' - elsif cgi['checks'] =~/check_selected/ - checked_selected='checked' - elsif cgi['checks'] =~/check_default/ - checked_default='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - else - checked_selected='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - end - selected_db=%{<option value="#{@db_name_prefix}#{@stub}">#{@stub}</option>} - @conn=PG::Connection.open(dbname: @db, port: @port, user: @user) - search_field=cgi['find'] if cgi['find'] # =~/\S+/ - @search_for=SearchRequest.new(search_field,q) #.analyze #% search_for - #% searches - #Canned_search.new(@base,@search_for.text1,cgi) - if @search_for.text1=~/\S+/ or @search_for.fulltext=~/\S+/ or @search_for.author=~/\S+/ or @search_for.topic_register=~/\S+/ #and search_field =~/\S/ - s1='s1=' + CGI.escape(@search_for.text1) if @search_for.text1=~/\S/ - ft='&ft=' + CGI.escape(@search_for.fulltext) if @search_for.fulltext=~/\S/ - key='key=' + CGI.escape(@search_for.keywords) if @search_for.keywords=~/\S/ - ti='&ti=' + CGI.escape(@search_for.title) if @search_for.title=~/\S/ - au='&au=' + CGI.escape(@search_for.author) if @search_for.author=~/\S/ - tr='&tr=' + CGI.escape(@search_for.topic_register) if @search_for.topic_register=~/\S/ - sj='&sj=' + CGI.escape(@search_for.subject) if @search_for.subject=~/\S/ - dsc='&dsc=' + CGI.escape(@search_for.description) if @search_for.description=~/\S/ - pb='&pb=' + CGI.escape(@search_for.publisher) if @search_for.publisher=~/\S/ - edt='&edt=' + CGI.escape(@search_for.editor) if @search_for.editor=~/\S/ - cntr='&cntr=' + CGI.escape(@search_for.contributor) if @search_for.contributor=~/\S/ - dt='&dt=' + CGI.escape(@search_for.date) if @search_for.date=~/\S/ - ty='&ty=' + CGI.escape(@search_for.type) if @search_for.type=~/\S/ - id='&id=' + CGI.escape(@search_for.identifier) if @search_for.identifier=~/\S/ - src='&src=' + CGI.escape(@search_for.source) if @search_for.source=~/\S/ - lang='&lang=' + CGI.escape(@search_for.language) if @search_for.language=~/\S/ - rel='&rel=' + CGI.escape(@search_for.relation) if @search_for.relation=~/\S/ - cov='&cov=' + CGI.escape(@search_for.coverage) if @search_for.coverage=~/\S/ - cr='&cr=' + CGI.escape(@search_for.rights) if @search_for.rights=~/\S/ - co='&co=' + CGI.escape(@search_for.comment) if @search_for.comment=~/\S/ - ab='&ab=' + CGI.escape(@search_for.abstract) if @search_for.abstract=~/\S/ - dtc='&dtc=' + CGI.escape(@search_for.date_created) if @search_for.date_created=~/\S/ - dti='&dti=' + CGI.escape(@search_for.date_issued) if @search_for.date_issued=~/\S/ - dtm='&dtm=' + CGI.escape(@search_for.date_modified) if @search_for.date_modified=~/\S/ - dta='&dta=' + CGI.escape(@search_for.date_available) if @search_for.date_available=~/\S/ - dtv='&dtv=' + CGI.escape(@search_for.date_valid) if @search_for.date_valid=~/\S/ - fns='&fns=' + CGI.escape(@search_for.filename) if @search_for.filename=~/\S/ - @@canned_search_url=(checked_all =~/checked/) \ - ? "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" - : "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" - mod=ft=~/\S+/ ? (ft.gsub(/ft/,'s1')) : s1 - @canned_base_url="#{@base}?#{mod}&db=#{cgi['db']}" - if checked_case=~/\S/ - @search[:text][1]=%{doc_objects.clean~'#{@search_for.text1}'} #s1 - else - @search[:text][1]=%{doc_objects.clean~*'#{@search_for.text1}'} #s1 - end - canned_note='search url:' - else - @@canned_search_url="#{@base}?db=#{@db}&view=index" - canned_note='search url example:' - end - if search_field =~/\S+/ - analyze_format=search_field.gsub(/\s*\n/,'; ') - elsif checked_all =~/checked/ or checked_url =~/checked/ - canned_search=@@canned_search_url.scan(/(?:s1|ft|au|ti|fns|tr)=[^&]+/) - af=canned_search.join('; ') - af=af.gsub(/s1=/,'text: '). - gsub(/ft=/,'fulltxt: '). - gsub(/au=/,'author: '). - gsub(/ti=/,'title: '). - gsub(/fns=/,'filename: '). - gsub(/tr=/,'topic_register: '). - gsub(/%2B/,' ') - analyze_format=af - st=af.split(/\s*;\s*/) - search_field=st.join("\n") - end - green=%{<font size="2" color="#004000">} - canned_search_url_txt=CGI.escapeHTML(@@canned_search_url) - the_can=%{<font size="2" color="#666666">#{canned_note} <a href="#{@@canned_search_url}">#{canned_search_url_txt}</a></font><br>} - p_text=p_fulltext=p_keywords=p_title=p_author=p_topic_register=p_subject=p_description=p_publisher=p_editor=p_contributor=p_date=p_type=p_format=p_identifier=p_source=p_language=p_relation=p_coverage=p_rights=p_comment=p_abstract=p_filename='' - p_filename=%{filename: #{green}#{@search_for.filename}</font><br>} if @search_for.filename =~/\S+/ - p_text=%{text: #{green}#{@search_for.text1}</font><br>} if @search_for.text1 =~/\S+/ - p_fulltext=%{fulltxt: #{green}#{@search_for.fulltext}</font><br>} if @search_for.fulltext =~/\S+/ - p_title=%{title: #{green}#{@search_for.title}</font><br>} if @search_for.title =~/\S+/ - p_author=%{author: #{green}#{@search_for.author}</font><br>} if @search_for.author =~/\S+/ - p_editor=%{editor: #{green}#{@search_for.editor}</font><br>} if @search_for.editor=~/\S+/ - p_contributor=%{contributor: #{green}#{@search_for.contributor}</font><br>} if @search_for.contributor =~/\S+/ - p_date=%{date: #{green}#{@search_for.date}</font><br>} if @search_for.date =~/\S+/ - p_rights=%{rights: #{green}#{@search_for.rights}</font><br>} if @search_for.rights =~/\S+/ - p_topic_register=%{topic_register: #{green}#{@search_for.topic_register}</font><br>} if @search_for.topic_register =~/\S+/ - p_subject=%{subject: #{green}#{@search_for.subject}</font><br>} if @search_for.subject =~/\S+/ - p_keywords=%{keywords: #{green}#{@search_for.keywords}</font><br>} if @search_for.keywords =~/\S+/ - p_identifier=%{identifier: #{green}#{@search_for.identifier}</font><br>} if @search_for.identifier =~/\S+/ - p_type=%{type: #{green}#{@search_for.type}</font><br>} if @search_for.type =~/\S+/ - p_format=%{format: #{green}#{@search_for.format}</font><br>} if @search_for.format =~/\S+/ - p_relation=%{relation: #{green}#{@search_for.relation}</font><br>} if @search_for.relation =~/\S+/ - p_coverage=%{coverage: #{green}#{@search_for.coverage}</font><br>} if @search_for.coverage =~/\S+/ - p_description=%{description: #{green}#{@search_for.description}</font><br>} if @search_for.description =~/\S+/ - p_abstract=%{abstract: #{green}#{@search_for.abstract}</font><br>} if @search_for.abstract =~/\S+/ - p_comment=%{comment: #{green}#{@search_for.comment}</font><br>} if @search_for.comment =~/\S+/ - p_publisher=%{publisher: #{green}#{@search_for.publisher}</font><br>} if @search_for.publisher =~/\S+/ - p_source=%{source: #{green}#{@search_for.source}</font><br>} if @search_for.source =~/\S+/ - p_language=%{language: #{green}#{@search_for.language}</font><br>} if @search_for.language =~/\S+/ - search_note=<<-WOK - <font size="2" color="#666666"> - <b>database:</b> #{green}#{@db}</font>; <b>selected view:</b> #{green}#{cgi['view']}</font> - <b>search string:</b> "#{green}#{analyze_format}</font>"<br> - #{p_text} #{p_fulltext} #{p_keywords} #{p_title} #{p_author} #{p_topic_register} #{p_subject} #{p_description} #{p_publisher} #{p_editor} #{p_contributor} #{p_date} #{p_type} #{p_format} #{p_identifier} #{p_source} #{p_language} #{p_relation} #{p_coverage} #{p_rights} #{p_comment} #{p_abstract} #{p_filename} - </font> - WOK - #eg = %{canned search e.g.:<br> <a href="#{url}">#{url}</a><br>find: #{analyze}<br>database: #{database}} - #% dbi_canning - @header=Form.new(@base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form - unless q['s1'] =~/\S/ or q['au'] =~/\S/ or @search[:text][1] =~/\S/ - print "Content-type: text/html\n\n" - puts (@header+@tail) - else #% searches - s1=(@search_for.text1 =~/\S/) \ - ? @search_for.text1 - : 'Unavailable' - if checked_case=~/\S/ - @search[:text]<<%{doc_objects.clean~'#{CGI.unescape(s1)}'} - else - @search[:text]<<%{doc_objects.clean~*'#{CGI.unescape(s1)}'} - end - #% dbi_request - dbi_statement=DBI_SearchStatement.new(@conn,@search_for,q,checked_case) - @text_search_flag=false - @text_search_flag=dbi_statement.text_search_flag - s_contents=dbi_statement.contents - @body_main='' - @search_regx=nil - oldtid=0 - if @text_search_flag - if checked_sql =~/\S/ - sql_select_body=dbi_statement.sql_select_body_format - else sql_select_body='' - end - @body_main << sql_select_body - else - end - @hostpath="#{@hosturl_files}/#{@stub}" - def path_manifest(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{@hostpath}/#{fn}/sisu_manifest.html" - : "#{@hostpath}/#{fn}/sisu_manifest.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/manifest/#{fn}.html" - : "#{@hostpath}/manifest/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/manifest/#{fn}.html" - end - end - def path_html_seg(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - "#{@hostpath}/#{fn}" - when 'filetype' - "#{@hostpath}/html/#{fn}" - else - "#{@hostpath}/#{ln}/html/#{fn}" - end - end - def path_toc(fn,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/toc.html" - : "#{path_html_seg(fn,ln)}/toc.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/toc.html" - end - end - def path_filename(fn,seg,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/#{seg}.html" - : "#{path_html_seg(fn,ln)}/#{seg}.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/#{seg}.html" - end - end - def path_html_doc(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/scroll.html" - : "#{path_html_seg(fn,ln)}/scroll.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/html/#{fn}.html" - : "#{@hostpath}/html/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/html/#{fn}.html" - end - end - #% text_objects_body - s_contents.each do |c| #% text body - location=c['src_filename'][/(.+?)\.(?:ssm\.sst|sst)$/,1] - file_suffix=c['src_filename'][/.+?\.(ssm\.sst|sst)$/,1] - lang=if location =~ /\S+?~(\S\S\S?)$/ - l=location[/\S+?~(\S\S\S?)$/,1] - location=location.gsub(/(\S+?)~\S\S\S?/,'\1') - l=".#{l}" - else '' - end - #% metadata_found_body - if c['tid'].to_i != oldtid.to_i - ti=c['title'] - can_txt_srch=(cgi['view']=~/index/) \ - ? %{<a href="#{@canned_base_url}&fns=#{c['src_filename']}&lang=#{c['language_document_char']}&view=text"><img border="0" width="24" height="16" src="#{@image_src}/b_search.png" alt="search"></a> } - : %{<a href="#{@canned_base_url}&fns=#{c['src_filename']}&lang=#{c['language_document_char']}&view=index"><img border="0" width="24" height="16" src="#{@image_src}/b_search.png" alt="search"></a> } - title=%{<span style="background-color: #{@color_heading}"><a href="#{path_toc(location,c['language_document_char'])}"><img border="0" width="15" height="18" src="#{@image_src}/b_toc.png" alt="toc html"> #{ti}</a></span> [#{c['language_document_char']}] by #{c['creator_author']} <a href="#{path_manifest(location,c['language_document_char'])}"><img border="0" width="15" height="15" src="#{@image_src}/b_info.png" alt="manifest"></a> #{can_txt_srch}<br>} if file_suffix=~/s/ #hmm watch file_suffix - title=@text_search_flag \ - ? '<br><hr>'+title - : '<br>'+title - @counter_txt_doc+=1 - oldtid=c['tid'].to_i - else title='' - end - if @text_search_flag - if cgi['view']=~/text/ \ - or (cgi['view']!~/index/ and cgi['search'] !~/search db/) #% txt body - text=if c['suffix'] !~/1/ #seg - if @search_for.text1 =~/\S+/ \ - or q['s1'] =~/\S+/ #% only this branch is working !! - unescaped_search=if @search_for.text1 =~/\S+/ - CGI.unescape(@search_for.text1) - elsif q['s1'] =~/\S+/ - CGI.unescape(q['s1']) - else nil - end - @search_regx=if unescaped_search #check - search_regex=unescaped_search.scan(/\S+/).each.map do |g| - (g.to_s =~/(AND|OR)/) \ - ? ('|') - : (%{#{g.to_s}}) - end.join(' ') - search_regex=search_regex.gsub(/\s*\|\s*/,'|') - Regexp.new(search_regex, Regexp::IGNORECASE) - else nil - end - else nil - end - matched_para=(@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) \ - ? (c['body'].gsub(/(<a\s+href="https?:\/\/[^><\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{<span style="background-color: #{@color_match}">\\1</span>})) - : c['body'] - %{<hr><p><font size="2">ocn <b><a href="#{path_filename(location,c['seg'],c['language_document_char'])}##{c['ocn']}">#{c['ocn']}</a></b>:</font></p>#{matched_para}} - elsif c['suffix'] =~/1/ #doc - %{#{title}<hr><p><font size="2">ocn #{c['ocn']}:#{c['body']}} - end - @counter_txt_ocn+=1 - output=title+text - else #elsif cgi['view']=~/index/ #% idx body - if c['suffix'] !~/1/ #seg - index=%{<a href="#{path_filename(location,c['seg'],c['language_document_char'])}##{c['ocn']}">#{c['ocn']}</a>, } if @text_search_flag - elsif c['suffix'] =~/1/ #doc #FIX - index=%{<a href="#{path_html_doc(location,c['language_document_char'])}##{c['ocn']}">#{c['ocn']}</a>, } - end - if c['seg'] =~/\S+/ - if @text_search_flag - @counter_txt_ocn+=1 - output=title+index - end - else - @counter_txt_ocn+=1 - output=c['suffix'] !~/1/ \ - ? title+index - : %{#{title}#{c['ocn'].sort}, } - end - end - else output=title - end - @counters_txt=if @counter_txt_doc > 0 - if checked_stats =~/\S/ - @@lt_t=(@counter_txt_ocn==dbi_statement.sql_match_limit.to_i) ? true : false - start=(@@offset.to_i+1).to_s - range=(@@offset.to_i+@counter_txt_ocn.to_i).to_s - %{<hr /><font size="2" color="#666666">Found #{@counter_txt_ocn} times in the main body of #{@counter_txt_doc} documents [ matches #{start} to #{range} ]</font><br>} - else '' - end - else '' - end - @body_main << output #+ details - end - oldtid = 0 - offset=dbi_statement.sql_offset.to_s - limit=dbi_statement.sql_match_limit.to_s - @@lt_t ||=false; @@lt_e ||=false - canned=(@@lt_t or @@lt_e) \ - ? dbi_statement.pre_next(true,@image_src).to_s - : dbi_statement.pre_next(false,@image_src).to_s - limit=dbi_statement.sql_match_limit.to_s - cgi.out{@header.force_encoding("UTF-8") + @counters_txt.force_encoding("UTF-8") + @counters_endn.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @body_main.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @tail.force_encoding("UTF-8")} #% print cgi_output_header+counters+body - end - rescue Exception => e - s='<pre>' + CGI::escapeHTML(e.backtrace.reverse.join("\n")) - s << CGI::escapeHTML(e.message) + '</pre>' - cgi.out{s} - next - ensure # eg. disconnect from server - @conn.disconnect if @conn - end - end diff --git a/util/rb/cgi/sisu_search_sqlite.cgi b/util/rb/cgi/sisu_search_sqlite.cgi deleted file mode 100755 index 0fe8ea5..0000000 --- a/util/rb/cgi/sisu_search_sqlite.cgi +++ /dev/null @@ -1,937 +0,0 @@ -#!/usr/bin/env ruby -=begin - * Name: SiSU information Structuring Universe - * Author: Ralph Amissah - * http://www.jus.uio.no/sisu - * http://www.jus.uio.no/sisu/SiSU/download - - * Description: generates naive cgi search form for search of sisu database (sqlite) - * Name: SiSU generated sample cgi search form - - * Description: generated sample cgi search form for SiSU - (SiSU is a framework for document structuring, publishing and search) - - * Author: Ralph Amissah - - * Copyright: (C) 1997 - 2014, Ralph Amissah, All Rights Reserved. - - * License: GPL 3 or later: - - SiSU, a framework for document structuring, publishing and search - - Copyright (C) Ralph Amissah - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the Free - Software Foundation, either version 3 of the License, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, but WITHOUT - ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - You should have received a copy of the GNU General Public License along with - this program. If not, see <http://www.gnu.org/licenses/>. - - If you have Internet connection, the latest version of the GPL should be - available at these locations: - <http://www.fsf.org/licenses/gpl.html> - <http://www.gnu.org/licenses/gpl.html> - <http://www.jus.uio.no/sisu/gpl.fsf> - - * SiSU uses: - * Standard SiSU markup syntax, - * Standard SiSU meta-markup syntax, and the - * Standard SiSU object citation numbering and system - - * Hompages: - <http://www.jus.uio.no/sisu> - <http://www.sisudoc.org> - - * Ralph Amissah - <ralph@amissah.com> - <ralph.amissah@gmail.com> - -=end - begin - require 'cgi' - require 'fcgi' - require 'sqlite3' - rescue LoadError - puts 'cgi, fcgi or sqlite3 NOT FOUND (LoadError)' - end - @stub_default='manual' - @image_src="http://#{ENV['HTTP_HOST']}/_sisu/image_sys" - @hosturl_cgi="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}" - @hosturl_files="http://#{ENV['HTTP_HOST']}" - @output_dir_structure_by='filetype' - @lingual='multi' - @db_name_prefix='SiSU.7a.' - @base="http://#{ENV['HTTP_HOST']}#{ENV['PATH_INFO']}#{ENV['SCRIPT_NAME']}" -#Common TOP - @@offset=0 - @@canned_search_url=@base - @color_heading='#DDFFAA' - @color_match='#ffff48' - class Form - def initialize(base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can='') - search_note='' if checked_searched !~/\S/ - the_can='' if checked_url !~/\S/ - search_field='' if checked_echo !~/\S/ - @base,@search_field,@selected_db,@result_type,@checked_sql_limit,@checked_tip,@checked_stats,@checked_searched,@checked_url,@checked_case,@checked_echo,@checked_sql,@checked_all,@checked_none,@checked_selected,@checked_default,@search_note,@the_can=base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can - @tip=if checked_tip =~/\S/ - '<font size="2" color="#666666">text:__; fulltxt:__; keywords:__; title:__; author:__; topic_register:__; subject:__; description:__; publisher:__; editor:__; contributor:__; date:__; type:__; format:__; identifier:__; source:__; language:__; relation:__; coverage:__; rights:__; comment:__; abstract:__; filename:__;</font><br />' - else '' - end - end - def submission_form - search_form=<<-WOK - <!DOCTYPE html> - <html> - <head> - <title> - <meta charset="utf-8"> - <meta name="sourcefile" content="SiSU._sst" /> - SiSU search form (sample): SiSU information Structuring Universe - </title> - <link rel="generator" href="http://www.jus.uio.no/sisu" /> - <link rel="shortcut icon" href="http://#{ENV['HTTP_HOST']}/_sisu/image_sys/rb7.ico" /> - <link href="../_sisu/css/html.css" rel="stylesheet"> - </head> - <body lang="en" xml:lang="en"> - <table summary="band" border="0" cellpadding="3" cellspacing="0"> - <tr><td width="20%"> - <table summary="home button / home information" border="0" cellpadding="3" cellspacing="0"> - <tr><td align="left"> - <br /><a href="http://sisudoc.org/" target="_top"> - <b>SiSU</b> - </a> - <br /><a href="http://git.sisudoc.org/" target="_top"> - git - </a> - </td></tr> - </table> - </td> - <td> - <label for="find"><b>SiSU (generated sample) search form (content organised by filetype)</b></label> - </td></tr> - </table> - <form action="#{@base}" id="Test Form" method="post"> - <table cellpadding="2"> - <tr><td valign=\"top\"> - <textarea id="find" name="find" type="text" rows="6" cols="40" maxlength="256">#{@search_field}</textarea> - </td> - <td valign=\"top\"> - #{@tip} - #{@search_note} - #{@the_can} - </td></tr></table> - <td valign=\"top\"><tr><td> - <!input type="text" id="find" name="find" value="#{@search_field}" /> - <!input type="text" id="find" name="find" value="" /> - <font size="2" color="#222222"> - <b>to search:</b> select which database to search (drop-down menu below); enter your search query (in the form above); and <b>click on the search button</b> (below) - <br /> - <select name="db" size="1"> - #{@selected_db} - <option value="SiSU.7a.manual">manual</option> - </select> - <input type="submit" value="SiSU search" /> - <input type="radio" name="view" value="index" #{@result_type[:index]}> index - <input type="radio" name="view" value="text" #{@result_type[:text]}> text / grep - <br /> - match limit: - <input type="radio" name="sql_match_limit" value="1000" #{@checked_sql_limit[:l1000]}> 1,000 - <input type="radio" name="sql_match_limit" value="2500" #{@checked_sql_limit[:l2500]}> 2,500 - <br /> - <input type="checkbox" name="echo" #{@checked_echo}> echo query - <input type="checkbox" name="stats" #{@checked_stats}> result stats - <input type="checkbox" name="url" #{@checked_url}> search url - <input type="checkbox" name="searched" #{@checked_searched}> searched - <input type="checkbox" name="tip" #{@checked_tip}> available fields - <input type="checkbox" name="sql" #{@checked_sql}> sql statement - <br /> - checks: - <input type="radio" name="checks" value="check_default" #{@checked_default}> default - <input type="radio" name="checks" value="check_selected" #{@checked_selected}> selected - <input type="radio" name="checks" value="check_all" #{@checked_all}> all - <input type="radio" name="checks" value="check_none" #{@checked_none}> none - </font> - </td></tr> - </table> - </form> - WOK - end - end - class SearchRequest #% search_for - attr_accessor :text1,:fulltext,:keywords,:title,:author,:topic_register,:subject,:description,:publisher,:editor,:contributor,:date,:type,:format,:identifier,:source,:language,:relation,:coverage,:rights,:comment,:abstract,:owner,:date_created,:date_issued,:date_modified,:date_available,:date_valid,:filename - def initialize(search_field='',q='') - @search_field,@q=search_field,q - @text1=@fulltext=@keywords=@title=@author=@topic_register=@subject=@description=@publisher=@editor=@contributor=@date=@type=@format=@identifier=@source=@language=@relation=@coverage=@rights=@comment=@abstract=@owner=@date_created=@date_issued=@date_modified=@date_available=@date_valid=@filename='' - if @search_field=~/\S/ - @text1=text_to_match('text:') - @fulltext=text_to_match('fulltxt:') - @topic_register=text_to_match('topic_register:') - @title=text_to_match('title:') # DublinCore 1 - title - @author=text_to_match('(?:author|creator)s?:') # DublinCore 2 - creator/author - @subject=text_to_match('subj(?:ect)?:') # DublinCore 3 - subject - @description=text_to_match('description:') # DublinCore 4 - description - @publisher=text_to_match('pub(?:lisher)?:') # DublinCore 5 - publisher - @editor=text_to_match('editor:') - @contributor=text_to_match('contributor:') # DublinCore 6 - contributor - @date=text_to_match('date:') # DublinCore 7 - date dd-mm-yy - @type=text_to_match('type:') # DublinCore 8 - type - @format=text_to_match('format:') # DublinCore 9 - format - @identifier=text_to_match('identifier:') # DublinCore 10 - identifier - @source=text_to_match('source:') # DublinCore 11 - source - @language=text_to_match('language:') # DublinCore 12 - language - @relation=text_to_match('relation:') # DublinCore 13 - relation - @coverage=text_to_match('coverage:') # DublinCore 14 - coverage - @rights=text_to_match('rights:') # DublinCore 15 - rights - @keywords=text_to_match('key(?:words?)?:') - @comment=text_to_match('comment:') - @abstract=text_to_match('abs(?:tract)?:') - @owner=text_to_match('owner:') - @date_created=text_to_match('date_created:') - @date_issued=text_to_match('date_issued:') - @date_modified=text_to_match('date_modified:') - @date_available=text_to_match('date_available:') - @date_valid=text_to_match('date_valid:') - @filename=text_to_match('filename:') - @text1=text_to_match unless @keywords or @author or @title or @text1 or @fulltext or @comment or @abstract or @rights or @subject or @publisher or @date or @filename or @topic_register - else - @text1=q['s1'] if q['s1']=~/\S/ - @fulltext=q['ft'] if q['ft']=~/\S/ - @keywords=q['key'] if q['key']=~/\S/ - @title=q['ti'] if q['ti']=~/\S/ - @author=q['au'] if q['au']=~/\S/ - @topic_register=q['tr'] if q['tr']=~/\S/ - @subject=q['sj'] if q['sj']=~/\S/ - @description=q['dsc'] if q['dsc']=~/\S/ - @publisher=q['pb'] if q['pb']=~/\S/ - @editor=q['cntr'] if q['cntr']=~/\S/ - @contributor=q['cntr'] if q['cntr']=~/\S/ - @date=q['dt'] if q['dt']=~/\S/ - @type=q['ty'] if q['ty']=~/\S/ - @identifier=q['id'] if q['id']=~/\S/ - @source=q['src'] if q['src']=~/\S/ - @language=q['lang'] if q['lang']=~/\S/ - @relation=q['rel'] if q['rel']=~/\S/ - @coverage=q['cov'] if q['cov']=~/\S/ - @rights=q['cr'] if q['cr']=~/\S/ - @comment=q['co'] if q['co']=~/\S/ - @abstract=q['ab'] if q['ab']=~/\S/ - @date_created=q['dtc'] if q['dtc']=~/\S/ - @date_issued=q['dti'] if q['dti']=~/\S/ - @date_modified=q['dtm'] if q['dtm']=~/\S/ - @date_available=q['dta'] if q['dta']=~/\S/ - @date_valid=q['dtv'] if q['dtv']=~/\S/ - @filename=if q['doc'] and q['search'] !~/search db/ then q['doc'] - elsif q['fns']=~/\S/ then q['fns'] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - end - end - def text_to_match(identifier='') - m={ - string: /#{identifier}\s*(.+?)/, - string: /#{identifier}\s*(.+?)(?:;|\n|\r|$)/, - word: /#{identifier}[\s(]*(\S+)/ - } - search_string=if @search_field =~m[:word] - search_string=if @search_field =~m[:braces] then m[:braces].match(@search_field)[1] - elsif @search_field =~m[:string] then m[:string].match(@search_field)[1] - else - str=m[:word].match(@search_field)[1] - str=str.gsub(/[()]/,'') - str - end - search_string=search_string.strip.gsub(/\s+/,'+') - #else - # "__" - end - end - end - class DBI_SearchString - def initialize(l,t,q,cse=false) - @l,@t,@q=l,t,q - end - def string - search={ search: [], flag: false } - if @t =~/\S+/ or @q =~/\S+/ - if @t =~/\S+/ then unescaped_search=CGI.unescape(@t) - elsif @q =~/\S+/ then unescaped_search=CGI.unescape(@q) - end - search_construct=[] - unescaped_search=unescaped_search.gsub(/\s*(AND|OR)\s*/,"%' \) \\1 #{@l} LIKE \( '%"). - gsub(/(.+)/,"#{@l} LIKE \( '%\\1%' \)") - search_construct << unescaped_search - search_construct=search_construct.join(' ') - search[:search] << search_construct - search[:flag]=true - search - end - search - end - end - class DBI_SearchStatement - attr_reader :text_search_flag,:sql_select_body_format,:sql_offset,:sql_limit - def initialize(conn,search_for,q,c) - @conn=conn - @text_search_flag=false - @sql_statement={ body: '', endnotes: '', range: '' } - #@offset||=@@offset - #@offset+=@@limit - search={ text: [], endnotes: [] } - cse=(c =~/\S/) ? true : false - st=DBI_SearchString.new('doc_objects.clean',search_for.text1,q['s1'],cse).string - se=DBI_SearchString.new('endnotes.clean',search_for.text1,q['s1'],cse).string - @text_search_flag=st[:flag] - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.fulltext',search_for.fulltext,q['ft'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.title',search_for.title,q['ti'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_author',search_for.author,q['au'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_topic_register',search_for.topic_register,q['tr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_subject',search_for.subject,q['sj'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.classify_keywords',search_for.keywords,q['key'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_description',search_for.description,q['dsc'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.publisher',search_for.publisher,q['pb'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_editor',search_for.editor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.creator_contributor',search_for.contributor,q['cntr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.date_published',search_for.date,q['dt'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_type',search_for.type,q['ty'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.original_source',search_for.source,q['src'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.language_document_char',search_for.language,q['lang'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_relation',search_for.relation,q['rel'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_coverage',search_for.coverage,q['cov'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.rights_all',search_for.rights,q['cr'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_comment',search_for.comment,q['co'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.notes_abstract',search_for.abstract,q['ab'],cse).string - if st[:flag] - search[:text] << st[:search] - end - st=DBI_SearchString.new('metadata_and_text.src_filename',search_for.filename,q['fns'],cse).string - if st[:flag] - search[:text] << st[:search] - end - @@limit=q['ltd'] if q['ltd']=~/\d+/ # 1000 - @@offset=q['off'] if q['off']=~/\d+/ # 0 - @search_text='' - @search_text=search[:text].flatten.join(' AND ') - @search_text=@search_text.gsub(/(doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\)\s+(?:(?:AND|OR)\s+doc_objects\.clean\s+LIKE\s+\(\s*'%[^']+%'\s*\))+)/,'(\1)') - end - def sql_offset - @@offset - end - def sql_match_limit - @@limit - end - def sql_canned_search - @offset_next=sql_offset.to_i + sql_match_limit.to_i - @offset_previous=sql_offset.to_i - sql_match_limit.to_i - def current - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + sql_offset.to_s - end - def next - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_next.to_s - end - def previous - @offset_previous >= 0 \ - ? (@@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + @offset_previous.to_s) - : '' - end - def start - @@canned_search_url.to_s + '<d=' + sql_match_limit.to_s + '&off=' + 0.to_s - end - self - end - def pre_next(beyond_limit,img) - can=sql_canned_search - page=(sql_offset.to_i + sql_match_limit.to_i)/sql_match_limit.to_i - if beyond_limit - if page.to_s =~ /^1$/ - %{<br /><center> - pg. #{page.to_s} - <a href="#{can.next}"> - <img border="0" width="22" height="22" src="#{img}/arrow_next_red.png" alt=" >>" /> - </a> - </center>} - elsif page.to_s =~ /^2$/ - %{<br /><center> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - <a href="#{can.next}"> - <img border="0" width="22" height="22" src="#{img}/arrow_next_red.png" alt=" >>" /> - </a> - </center>} - else - %{<br /><center> - <a href="#{can.start}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="|< " /> - </a> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - <a href="#{can.next}"> - <img border="0" width="22" height="22" src="#{img}/arrow_next_red.png" alt=" >>" /> - </a> - </center>} - end - else - if page.to_s =~ /^1$/ then '' - elsif page.to_s =~ /^2$/ - %{<br /><center> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - </center>} - else - %{<br /><center> - <a href="#{can.start}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="|< " /> - </a> - <a href="#{can.previous}"> - <img border="0" width="22" height="22" src="#{img}/arrow_prev_red.png" alt="<< " /> - </a> - pg. #{page.to_s} - </center>} - end - end - end - def sql_select_body - limit ||=@@limit - offset ||=@@offset - @sql_statement[:body]=%{SELECT metadata_and_text.title, metadata_and_text.creator_author, metadata_and_text.src_filename, metadata_and_text.language_document_char, metadata_and_text.notes_suffix, doc_objects.body, doc_objects.seg, doc_objects.ocn, metadata_and_text.tid FROM doc_objects, metadata_and_text WHERE #{@search_text} AND doc_objects.metadata_tid = metadata_and_text.tid ORDER BY metadata_and_text.language_document_char, metadata_and_text.title, metadata_and_text.src_filename, doc_objects.ocn} - @sql_statement[:range]=%{LIMIT #{limit} OFFSET #{offset} ;} - select=@sql_statement[:body] + ' ' + @sql_statement[:range] - select - end - def sql_select_body_format - %{<font color="#666666" size="2">#{sql_select_body}</font>} - end - def contents - @conn.execute(sql_select_body) - end - end - def tail - <<-'WOK' - <br /><hr /><br /> -<table summary="SiSU summary" cellpadding="2" border="0"> - <!-- widget sisu --> -<tr><td valign="top" width="10%"> - <table summary="home button / home information" border="0" cellpadding="3" cellspacing="0"> - <tr><td align="left"> - <br /><a href="http://sisudoc.org/" target="_top"> - <b>SiSU</b> - </a> - <br /><a href="http://git.sisudoc.org/" target="_top"> - git - </a> - </td></tr> - </table> -</td> -<td valign="top" width="45%"> -<!-- SiSU Rights --> - <p class="tiny_left"><font color="#666666" size="2"> - Generated by - SiSU 6.3.1 2014-10-19 (2014w41/7) - <br /> - <a href="http://www.sisudoc.org" > - <b>SiSU</b></a> <sup>©</sup> Ralph Amissah - 1993, current 2014. - All Rights Reserved. - <br /> - SiSU is software for document structuring, publishing and search, - <br /> - <a href="http://www.jus.uio.no/sisu" > - www.jus.uio.no/sisu - </a> - and - <a href="http://www.sisudoc.org" > - www.sisudoc.org - </a> - sources - <a href="http://git.sisudoc.org" > - git.sisudoc.org - </a> - <br /> - <i>w3 since October 3 1993</i> - <a href="mailto:ralph@amissah.com" > - ralph@amissah.com - </a> - <br /> - mailing list subscription - <a href="http://lists.sisudoc.org/listinfo/sisu" > - http://lists.sisudoc.org/listinfo/sisu - </a> - <br /> - <a href="mailto:sisu@lists.sisudoc.org" > - sisu@lists.sisudoc.org - </a> - </font></p> -</td><td valign="top" width="45%"> - <p class="tiny_left"><font color="#666666" size="2"> - SiSU using: - <br />Standard SiSU markup syntax, - <br />Standard SiSU meta-markup syntax, and the - <br />Standard SiSU <u>object citation numbering</u> and system, (object/text identifying/locating system) - <br /> - <sup>©</sup> Ralph Amissah 1997, current 2014. - All Rights Reserved. - </font></p> -</td></tr> - <!-- widget way better --> -<tr><td valign="top" width="10%"> - <p class="tiny_left"><font color="#666666" size="2"> - <a href="http://www.gnu.org/licenses/gpl.html"> - .: - </a> - </font></p> -</td><td valign="top" width="45%"> - <p class="tiny_left"><font color="#666666" size="2"> - SiSU is released under - <a href="http://www.gnu.org/licenses/gpl.html">GPL v3</a> - or later, - <a href="http://www.gnu.org/licenses/gpl.html"> - http://www.gnu.org/licenses/gpl.html - </a> - </font></p> -</td><td valign="top" width="45%"> - <p class="tiny_left"><font color="#666666" size="2"> - SiSU, developed using - <a href="http://www.ruby-lang.org/en/"> - Ruby - </a> - on - <a href="http://www.debian.org/"> - Debian/Gnu/Linux - </a> - software infrastructure, - with the usual GPL (or OSS) suspects. - </font></p> -</td></tr> -</table> - <a name="bottom" id="bottom"></a><a name="down" id="down"></a><a name="end" id="end"></a><a name="finish" id="finish"></a><a name="stop" id="stop"></a><a name="credits" id="credits"></a> - </body></html> - WOK - end - @tail=tail - @counter_txt_doc,@counter_txt_ocn,@counter_endn_doc,@counter_endn_ocn=0,0,0,0 - @counters_txt,@counters_endn,@sql_select_body='','','' - FCGI.each_cgi do |cgi| - begin # all code goes in begin section - @search={ text: [], endnotes: [] } - q=CGI.new - @db=if cgi['db'] =~ /#{@db_name_prefix}(\S+)/ - @stub=$1 - cgi['db'] - else - @stub=@stub_default - @db_name_prefix + @stub - end - checked_url,checked_stats,checked_searched,checked_tip,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,selected_db='','','','','','','','','' - result_type=(cgi['view']=~/text/) \ - ? result_type={ index: '', text: 'checked'} - : result_type={ index: 'checked', text: ''} - @@limit=if cgi['sql_match_limit'].to_s=~/2500/ - checked_sql_limit={ l1000: '', l2500: 'checked'} - '2500' - else - checked_sql_limit={ l1000: 'checked', l2500: ''} - '1000' - end - checked_echo='checked' if cgi['echo'] =~/\S/ - checked_stats='checked' if cgi['stats'] =~/\S/ - checked_url='checked' if cgi['url'] =~/\S/ or cgi['u'].to_i==1 - checked_searched='checked' if cgi['searched'] =~/\S/ - checked_tip='checked' if cgi['tip'] =~/\S/ - checked_case='checked' if cgi['casesense'] =~/\S/ - checked_sql='checked' if cgi['sql'] =~/\S/ - if cgi['checks'] =~/check_all/ or cgi['check_all'] =~/\S/ or cgi['a'].to_i==1 - checked_all='checked' - checked_echo=checked_stats=checked_url=checked_searched=checked_tip=checked_sql='checked' - checked_none='' - elsif cgi['checks'] =~/check_none/ - checked_none='checked' - checked_all=checked_url=checked_stats=checked_searched=checked_tip=checked_echo=checked_sql='' - elsif cgi['checks'] =~/check_selected/ - checked_selected='checked' - elsif cgi['checks'] =~/check_default/ - checked_default='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - else - checked_selected='checked' - checked_echo=checked_stats=checked_url='checked' - checked_searched=checked_tip=checked_case=checked_sql='' - end - selected_db=case cgi['db'] - when /SiSU.7a.manual/ then '<option value="SiSU.7a.manual">manual</option>' - end - db_name='sisu_sqlite.db' - db_sqlite=case cgi['db'] - when /SiSU.7a.manual/ then "/srv/complete.sisudoc.org/web/manual/#{db_name}" - else "/srv/complete.sisudoc.org/web/manual/#{db_name}" - end - @conn=SQLite3::Database.new(db_sqlite) - @conn.results_as_hash=true - search_field=cgi['find'] if cgi['find'] # =~/\S+/ - @search_for=SearchRequest.new(search_field,q) #.analyze #% search_for - #% searches - #Canned_search.new(@base,@search_for.text1,cgi) - if @search_for.text1=~/\S+/ or @search_for.fulltext=~/\S+/ or @search_for.author=~/\S+/ or @search_for.topic_register=~/\S+/ #and search_field =~/\S/ - s1='s1=' + CGI.escape(@search_for.text1) if @search_for.text1=~/\S/ - ft='&ft=' + CGI.escape(@search_for.fulltext) if @search_for.fulltext=~/\S/ - key='key=' + CGI.escape(@search_for.keywords) if @search_for.keywords=~/\S/ - ti='&ti=' + CGI.escape(@search_for.title) if @search_for.title=~/\S/ - au='&au=' + CGI.escape(@search_for.author) if @search_for.author=~/\S/ - tr='&tr=' + CGI.escape(@search_for.topic_register) if @search_for.topic_register=~/\S/ - sj='&sj=' + CGI.escape(@search_for.subject) if @search_for.subject=~/\S/ - dsc='&dsc=' + CGI.escape(@search_for.description) if @search_for.description=~/\S/ - pb='&pb=' + CGI.escape(@search_for.publisher) if @search_for.publisher=~/\S/ - edt='&edt=' + CGI.escape(@search_for.editor) if @search_for.editor=~/\S/ - cntr='&cntr=' + CGI.escape(@search_for.contributor) if @search_for.contributor=~/\S/ - dt='&dt=' + CGI.escape(@search_for.date) if @search_for.date=~/\S/ - ty='&ty=' + CGI.escape(@search_for.type) if @search_for.type=~/\S/ - id='&id=' + CGI.escape(@search_for.identifier) if @search_for.identifier=~/\S/ - src='&src=' + CGI.escape(@search_for.source) if @search_for.source=~/\S/ - lang='&lang=' + CGI.escape(@search_for.language) if @search_for.language=~/\S/ - rel='&rel=' + CGI.escape(@search_for.relation) if @search_for.relation=~/\S/ - cov='&cov=' + CGI.escape(@search_for.coverage) if @search_for.coverage=~/\S/ - cr='&cr=' + CGI.escape(@search_for.rights) if @search_for.rights=~/\S/ - co='&co=' + CGI.escape(@search_for.comment) if @search_for.comment=~/\S/ - ab='&ab=' + CGI.escape(@search_for.abstract) if @search_for.abstract=~/\S/ - dtc='&dtc=' + CGI.escape(@search_for.date_created) if @search_for.date_created=~/\S/ - dti='&dti=' + CGI.escape(@search_for.date_issued) if @search_for.date_issued=~/\S/ - dtm='&dtm=' + CGI.escape(@search_for.date_modified) if @search_for.date_modified=~/\S/ - dta='&dta=' + CGI.escape(@search_for.date_available) if @search_for.date_available=~/\S/ - dtv='&dtv=' + CGI.escape(@search_for.date_valid) if @search_for.date_valid=~/\S/ - fns='&fns=' + CGI.escape(@search_for.filename) if @search_for.filename=~/\S/ - @@canned_search_url=(checked_all =~/checked/) \ - ? "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}&a=1" - : "#{@base}?#{s1}#{ft}#{key}#{ti}#{au}#{tr}#{sj}#{dsc}#{pb}#{edt}#{cntr}#{dt}#{ty}#{id}#{src}#{lang}#{rel}#{cov}#{cr}#{co}#{ab}#{dtc}#{dti}#{dtm}#{dta}#{dtv}#{fns}&db=#{cgi['db']}&view=#{cgi['view']}" - mod=ft=~/\S+/ ? (ft.gsub(/ft/,'s1')) : s1 - @canned_base_url="#{@base}?#{mod}&db=#{cgi['db']}" - if checked_case=~/\S/ - @search[:text][1]=%{doc_objects.clean~'#{@search_for.text1}'} #s1 - else - @search[:text][1]=%{doc_objects.clean~*'#{@search_for.text1}'} #s1 - end - canned_note='search url:' - else - @@canned_search_url="#{@base}?db=#{@db}&view=index" - canned_note='search url example:' - end - if search_field =~/\S+/ - analyze_format=search_field.gsub(/\s*\n/,'; ') - elsif checked_all =~/checked/ or checked_url =~/checked/ - canned_search=@@canned_search_url.scan(/(?:s1|ft|au|ti|fns|tr)=[^&]+/) - af=canned_search.join('; ') - af=af.gsub(/s1=/,'text: '). - gsub(/ft=/,'fulltxt: '). - gsub(/au=/,'author: '). - gsub(/ti=/,'title: '). - gsub(/fns=/,'filename: '). - gsub(/tr=/,'topic_register: '). - gsub(/%2B/,' ') - analyze_format=af - st=af.split(/\s*;\s*/) - search_field=st.join("\n") - end - green=%{<font size="2" color="#004000">} - canned_search_url_txt=CGI.escapeHTML(@@canned_search_url) - the_can=%{<font size="2" color="#666666">#{canned_note} <a href="#{@@canned_search_url}">#{canned_search_url_txt}</a></font><br />} - p_text=p_fulltext=p_keywords=p_title=p_author=p_topic_register=p_subject=p_description=p_publisher=p_editor=p_contributor=p_date=p_type=p_format=p_identifier=p_source=p_language=p_relation=p_coverage=p_rights=p_comment=p_abstract=p_filename='' - p_filename=%{filename: #{green}#{@search_for.filename}</font><br />} if @search_for.filename =~/\S+/ - p_text=%{text: #{green}#{@search_for.text1}</font><br />} if @search_for.text1 =~/\S+/ - p_fulltext=%{fulltxt: #{green}#{@search_for.fulltext}</font><br />} if @search_for.fulltext =~/\S+/ - p_title=%{title: #{green}#{@search_for.title}</font><br />} if @search_for.title =~/\S+/ - p_author=%{author: #{green}#{@search_for.author}</font><br />} if @search_for.author =~/\S+/ - p_editor=%{editor: #{green}#{@search_for.editor}</font><br />} if @search_for.editor=~/\S+/ - p_contributor=%{contributor: #{green}#{@search_for.contributor}</font><br />} if @search_for.contributor =~/\S+/ - p_date=%{date: #{green}#{@search_for.date}</font><br />} if @search_for.date =~/\S+/ - p_rights=%{rights: #{green}#{@search_for.rights}</font><br />} if @search_for.rights =~/\S+/ - p_topic_register=%{topic_register: #{green}#{@search_for.topic_register}</font><br />} if @search_for.topic_register =~/\S+/ - p_subject=%{subject: #{green}#{@search_for.subject}</font><br />} if @search_for.subject =~/\S+/ - p_keywords=%{keywords: #{green}#{@search_for.keywords}</font><br />} if @search_for.keywords =~/\S+/ - p_identifier=%{identifier: #{green}#{@search_for.identifier}</font><br />} if @search_for.identifier =~/\S+/ - p_type=%{type: #{green}#{@search_for.type}</font><br />} if @search_for.type =~/\S+/ - p_format=%{format: #{green}#{@search_for.format}</font><br />} if @search_for.format =~/\S+/ - p_relation=%{relation: #{green}#{@search_for.relation}</font><br />} if @search_for.relation =~/\S+/ - p_coverage=%{coverage: #{green}#{@search_for.coverage}</font><br />} if @search_for.coverage =~/\S+/ - p_description=%{description: #{green}#{@search_for.description}</font><br />} if @search_for.description =~/\S+/ - p_abstract=%{abstract: #{green}#{@search_for.abstract}</font><br />} if @search_for.abstract =~/\S+/ - p_comment=%{comment: #{green}#{@search_for.comment}</font><br />} if @search_for.comment =~/\S+/ - p_publisher=%{publisher: #{green}#{@search_for.publisher}</font><br />} if @search_for.publisher =~/\S+/ - p_source=%{source: #{green}#{@search_for.source}</font><br />} if @search_for.source =~/\S+/ - p_language=%{language: #{green}#{@search_for.language}</font><br />} if @search_for.language =~/\S+/ - search_note=<<-WOK - <font size="2" color="#666666"> - <b>database:</b> #{green}#{@db}</font>; <b>selected view:</b> #{green}#{cgi['view']}</font> - <b>search string:</b> "#{green}#{analyze_format}</font>"<br /> - #{p_text} #{p_fulltext} #{p_keywords} #{p_title} #{p_author} #{p_topic_register} #{p_subject} #{p_description} #{p_publisher} #{p_editor} #{p_contributor} #{p_date} #{p_type} #{p_format} #{p_identifier} #{p_source} #{p_language} #{p_relation} #{p_coverage} #{p_rights} #{p_comment} #{p_abstract} #{p_filename} - </font> - WOK - #eg = %{canned search e.g.:<br /> <a href="#{url}">#{url}</a><br />find: #{analyze}<br />database: #{database}} - #% dbi_canning - @header=Form.new(@base,search_field,selected_db,result_type,checked_sql_limit,checked_tip,checked_stats,checked_searched,checked_url,checked_case,checked_echo,checked_sql,checked_all,checked_none,checked_selected,checked_default,search_note,the_can).submission_form #% form - unless q['s1'] =~/\S/ or q['au'] =~/\S/ or @search[:text][1] =~/\S/ - print "Content-type: text/html\n\n" - puts (@header+@tail) - else #% searches - s1=(@search_for.text1 =~/\S/) \ - ? @search_for.text1 - : 'Unavailable' - if checked_case=~/\S/ - @search[:text]<<%{doc_objects.clean~'#{CGI.unescape(s1)}'} - else - @search[:text]<<%{doc_objects.clean~*'#{CGI.unescape(s1)}'} - end - #% dbi_request - dbi_statement=DBI_SearchStatement.new(@conn,@search_for,q,checked_case) - @text_search_flag=false - @text_search_flag=dbi_statement.text_search_flag - s_contents=dbi_statement.contents - @body_main='' - @search_regx=nil - oldtid=0 - if @text_search_flag - if checked_sql =~/\S/ - sql_select_body=dbi_statement.sql_select_body_format - else sql_select_body='' - end - @body_main << sql_select_body - #@body_main << '<p><hr><br /><b>Main Text:</b><br />' << sql_select_body - else - end - @hostpath="#{@hosturl_files}/#{@stub}" - def path_manifest(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{@hostpath}/#{fn}/sisu_manifest.html" - : "#{@hostpath}/#{fn}/sisu_manifest.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/manifest/#{fn}.html" - : "#{@hostpath}/manifest/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/manifest/#{fn}.html" - end - end - def path_html_seg(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - "#{@hostpath}/#{fn}" - when 'filetype' - "#{@hostpath}/html/#{fn}" - else - "#{@hostpath}/#{ln}/html/#{fn}" - end - end - def path_toc(fn,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/toc.html" - : "#{path_html_seg(fn,ln)}/toc.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/toc.html" - end - end - def path_filename(fn,seg,ln=nil) - if @output_dir_structure_by =='filename' \ - or @output_dir_structure_by =='filetype' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/#{seg}.html" - : "#{path_html_seg(fn,ln)}/#{seg}.#{ln}.html" - else - "#{path_html_seg(fn,ln)}/#{seg}.html" - end - end - def path_html_doc(fn,ln=nil) - case @output_dir_structure_by - when 'filename' - @lingual =='mono' \ - ? "#{path_html_seg(fn,ln)}/scroll.html" - : "#{path_html_seg(fn,ln)}/scroll.#{ln}.html" - when 'filetype' - @lingual =='mono' \ - ? "#{@hostpath}/html/#{fn}.html" - : "#{@hostpath}/html/#{fn}.#{ln}.html" - else - "#{@hostpath}/#{ln}/html/#{fn}.html" - end - end - #% text_objects_body - s_contents.each do |c| #% text body - location=c['src_filename'][/(.+?)\.(?:ssm\.sst|sst)$/,1] - file_suffix=c['src_filename'][/.+?\.(ssm\.sst|sst)$/,1] - lang=if location =~ /\S+?~(\S\S\S?)$/ - l=location[/\S+?~(\S\S\S?)$/,1] - location=location.gsub(/(\S+?)~\S\S\S?/,'\1') - l=".#{l}" - else '' - end - #% metadata_found_body - if c['tid'].to_i != oldtid.to_i - ti=c['title'] - can_txt_srch=(cgi['view']=~/index/) \ - ? %{<a href="#{@canned_base_url}&fns=#{c['src_filename']}&lang=#{c['language_document_char']}&view=text"><img border="0" width="24" height="16" src="#{@image_src}/b_search.png" alt="search"></a> } - : %{<a href="#{@canned_base_url}&fns=#{c['src_filename']}&lang=#{c['language_document_char']}&view=index"><img border="0" width="24" height="16" src="#{@image_src}/b_search.png" alt="search"></a> } - title=%{<span style="background-color: #{@color_heading}"><a href="#{path_toc(location,c['language_document_char'])}"><img border="0" width="15" height="18" src="#{@image_src}/b_toc.png" alt="toc html"> #{ti}</a></span> [#{c['language_document_char']}] by #{c['creator_author']} <a href="#{path_manifest(location,c['language_document_char'])}"><img border="0" width="15" height="15" src="#{@image_src}/b_info.png" alt="manifest"></a> #{can_txt_srch}<br />} if file_suffix=~/s/ #hmm watch file_suffix - title=@text_search_flag \ - ? '<br /><hr>'+title - : '<br />'+title - @counter_txt_doc+=1 - oldtid=c['tid'].to_i - else title='' - end - if @text_search_flag - if cgi['view']=~/text/ \ - or (cgi['view']!~/index/ and cgi['search'] !~/search db/) #% txt body - text=if c['suffix'] !~/1/ #seg - if @search_for.text1 =~/\S+/ \ - or q['s1'] =~/\S+/ #% only this branch is working !! - unescaped_search=if @search_for.text1 =~/\S+/ - CGI.unescape(@search_for.text1) - elsif q['s1'] =~/\S+/ - CGI.unescape(q['s1']) - else nil - end - @search_regx=if unescaped_search #check - search_regex=[] - build=unescaped_search.scan(/\S+/).each do |g| - (g.to_s =~/(AND|OR)/) \ - ? (search_regex << '|') - : (search_regex << %{#{g.to_s}}) - end - search_regex=search_regex.join(' ') - search_regex=search_regex.gsub(/\s*\|\s*/,'|') - Regexp.new(search_regex, Regexp::IGNORECASE) - else nil - end - else nil - end - matched_para=(@search_regx.to_s.class==String && @search_regx.to_s=~/\S\S+/) \ - ? (c['body'].gsub(/(<a\s+href="https?:\/\/[^><\s]+#{@search_regx}[^>]+?>|#{@search_regx})/mi,%{<span style="background-color: #{@color_match}">\\1</span>})) - : c['body'] - %{<hr><p><font size="2">ocn <b><a href="#{path_filename(location,c['seg'],c['language_document_char'])}##{c['ocn']}">#{c['ocn']}</a></b>:</font></p>#{matched_para}} - elsif c['suffix'] =~/1/ #doc - %{#{title}<hr><p><font size="2">ocn #{c['ocn']}:#{c['body']}} - end - @counter_txt_ocn+=1 - output=title+text - else #elsif cgi['view']=~/index/ #% idx body - if c['suffix'] !~/1/ #seg - index=%{<a href="#{path_filename(location,c['seg'],c['language_document_char'])}##{c['ocn']}">#{c['ocn']}</a>, } if @text_search_flag - elsif c['suffix'] =~/1/ #doc #FIX - index=%{<a href="#{path_html_doc(location,c['language_document_char'])}##{c['ocn']}">#{c['ocn']}</a>, } - end - if c['seg'] =~/\S+/ - if @text_search_flag - @counter_txt_ocn+=1 - output=title+index - end - else - @counter_txt_ocn+=1 - output=c['suffix'] !~/1/ \ - ? title+index - : %{#{title}#{c['ocn'].sort}, } - end - end - else output=title - end - @counters_txt=if @counter_txt_doc > 0 - if checked_stats =~/\S/ - @@lt_t=(@counter_txt_ocn==dbi_statement.sql_match_limit.to_i) ? true : false - start=(@@offset.to_i+1).to_s - range=(@@offset.to_i+@counter_txt_ocn.to_i).to_s - %{<hr /><font size="2" color="#666666">Found #{@counter_txt_ocn} times in the main body of #{@counter_txt_doc} documents [ matches #{start} to #{range} ]</font><br />} - else '' - end - else '' - end - @body_main << output #+ details - end - oldtid = 0 - offset=dbi_statement.sql_offset.to_s - limit=dbi_statement.sql_match_limit.to_s - @@lt_t ||=false; @@lt_e ||=false - canned=(@@lt_t or @@lt_e) \ - ? dbi_statement.pre_next(true,@image_src).to_s - : dbi_statement.pre_next(false,@image_src).to_s - limit=dbi_statement.sql_match_limit.to_s - cgi.out{@header.force_encoding("UTF-8") + @counters_txt.force_encoding("UTF-8") + @counters_endn.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @body_main.force_encoding("UTF-8") + canned.force_encoding("UTF-8") + @tail.force_encoding("UTF-8")} #% print cgi_output_header+counters+body - end - rescue Exception => e - s='<pre>' + CGI::escapeHTML(e.backtrace.reverse.join("\n")) - s << CGI::escapeHTML(e.message) + '</pre>' - cgi.out{s} - next - ensure # eg. disconnect from server - @conn.disconnect if @conn - end - end |