aboutsummaryrefslogtreecommitdiffhomepage
path: root/data/doc/manuals_generated/sisu_manual/sisu_faq/sax.xml
diff options
context:
space:
mode:
Diffstat (limited to 'data/doc/manuals_generated/sisu_manual/sisu_faq/sax.xml')
-rw-r--r--data/doc/manuals_generated/sisu_manual/sisu_faq/sax.xml549
1 files changed, 549 insertions, 0 deletions
diff --git a/data/doc/manuals_generated/sisu_manual/sisu_faq/sax.xml b/data/doc/manuals_generated/sisu_manual/sisu_faq/sax.xml
new file mode 100644
index 00000000..5815b2b8
--- /dev/null
+++ b/data/doc/manuals_generated/sisu_manual/sisu_faq/sax.xml
@@ -0,0 +1,549 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?xml-stylesheet type="text/css" href="../_sisu/css/sax.css"?>
+<!-- Document processing information:
+ * Generated by: SiSU 0.59.0 of 2007w38/0 (2007-09-23)
+ * Ruby version: ruby 1.8.6 (2007-06-07 patchlevel 36) [i486-linux]
+ *
+ * Last Generated on: Sun Sep 23 04:12:03 +0100 2007
+ * SiSU http://www.jus.uio.no/sisu
+-->
+
+<document>
+<head>
+ <meta>Title:</meta>
+ <title class="dc">
+ SiSU - SiSU information Structuring Universe / Structured information, Serialized Units - FAQ - Frequently Asked/Answered Questions
+ </title>
+ <br />
+ <meta>Creator:</meta>
+ <creator class="dc">
+ Ralph Amissah
+ </creator>
+ <br />
+ <meta>Rights:</meta>
+ <rights class="dc">
+ Copyright (C) Ralph Amissah 2007, part of SiSU documentation, License GPL 3
+ </rights>
+ <br />
+ <meta>Type:</meta>
+ <type class="dc">
+ information
+ </type>
+ <br />
+ <meta>Subject:</meta>
+ <subject class="dc">
+ ebook, epublishing, electronic book, electronic publishing, electronic document, electronic citation, data structure, citation systems, search
+ </subject>
+ <br />
+ <meta>Date created:</meta>
+ <date_created class="extra">
+ 2006-09-06
+ </date_created>
+ <br />
+ <meta>Date available:</meta>
+ <date_available class="extra">
+ 2006-09-06
+ </date_available>
+ <br />
+ <meta>Date issued:</meta>
+ <date_issued class="extra">
+ 2006-09-06
+ </date_issued>
+ <br />
+ <meta>Date modified:</meta>
+ <date_modified class="extra">
+ 2007-09-16
+ </date_modified>
+ <br />
+ <meta>Date:</meta>
+ <date class="dc">
+ 2007-09-16
+ </date>
+ <br />
+</head>
+<body>
+<object id="1">
+ <ocn>1</ocn>
+ <text class="h1">
+ SiSU - SiSU information Structuring Universe / Structured information,
+Serialized Units - FAQ - Frequently Asked/Answered Questions,<br />
+Ralph Amissah
+ </text>
+</object>
+<object id="2">
+ <ocn>2</ocn>
+ <text class="h4">
+ 1. FAQ - Frequently Asked/Answered Questions
+ </text>
+</object>
+<object id="3">
+ <ocn>3</ocn>
+ <text class="h5">
+ 1.1 Why are urls produced with the -v (and -u) flag that point to a web
+server on port 8081?
+ </text>
+</object>
+<object id="4">
+ <ocn>4</ocn>
+ <text class="norm">
+ Try the following rune:
+ </text>
+</object>
+<object id="5">
+ <ocn>5</ocn>
+ <text class="indent_bullet">
+ sisu -W
+ </text>
+</object>
+<object id="6">
+ <ocn>6</ocn>
+ <text class="norm">
+ This should start the ruby webserver. It should be done after having
+produced some output as it scans the output directory for what to
+serve.
+ </text>
+</object>
+<object id="7">
+ <ocn>7</ocn>
+ <text class="h5">
+ 1.2 I cannot find my output, where is it?
+ </text>
+</object>
+<object id="8">
+ <ocn>8</ocn>
+ <text class="norm">
+ The following should provide help on output paths:
+ </text>
+</object>
+<object id="9">
+ <ocn>9</ocn>
+ <text class="indent_bullet">
+ sisu --help env
+ </text>
+</object>
+<object id="10">
+ <ocn>10</ocn>
+ <text class="indent_bullet">
+ sisu -V [same as the previous command]
+ </text>
+</object>
+<object id="11">
+ <ocn>11</ocn>
+ <text class="indent_bullet">
+ sisu --help directory
+ </text>
+</object>
+<object id="12">
+ <ocn>12</ocn>
+ <text class="indent_bullet">
+ sisu --help path
+ </text>
+</object>
+<object id="13">
+ <ocn>13</ocn>
+ <text class="indent_bullet">
+ sisu -U [filename]
+ </text>
+</object>
+<object id="14">
+ <ocn>14</ocn>
+ <text class="indent_bullet">
+ man sisu
+ </text>
+</object>
+<object id="15">
+ <ocn>15</ocn>
+ <text class="h5">
+ 1.3 I do not get any pdf output, why?
+ </text>
+</object>
+<object id="16">
+ <ocn>16</ocn>
+ <text class="norm">
+ <b>SiSU</b> produces LaTeX and pdflatex is run against that to generate
+pdf files.
+ </text>
+</object>
+<object id="17">
+ <ocn>17</ocn>
+ <text class="norm">
+ If you use <b>Debian</b> the following will install the required
+dependencies
+ </text>
+</object>
+<object id="18">
+ <ocn>18</ocn>
+ <text class="indent_bullet">
+ aptitude install sisu-pdf
+ </text>
+</object>
+<object id="19">
+ <ocn>19</ocn>
+ <text class="norm">
+ the following packages are required: tetex-bin, tetex-extra, latex-ucs
+ </text>
+</object>
+<object id="20">
+ <ocn>20</ocn>
+ <text class="h5">
+ 1.4 Where is the latex (or some other interim) output?
+ </text>
+</object>
+<object id="21">
+ <ocn>21</ocn>
+ <text class="norm">
+ Try adding -M (for maintenance) to your command flags, e.g.:
+ </text>
+</object>
+<object id="22">
+ <ocn>22</ocn>
+ <text class="indent_bullet">
+ sisu -HpMv [filename]
+ </text>
+</object>
+<object id="23">
+ <ocn>23</ocn>
+ <text class="norm">
+ this should result in the interim processing output being retained, and
+information being provided on where to find it.
+ </text>
+</object>
+<object id="24">
+ <ocn>24</ocn>
+ <text class="indent_bullet">
+ sisu --help directory
+ </text>
+</object>
+<object id="25">
+ <ocn>25</ocn>
+ <text class="indent_bullet">
+ sisu --help path
+ </text>
+</object>
+<object id="26">
+ <ocn>26</ocn>
+ <text class="norm">
+ should also provide some relevant information as to where it is placed.
+ </text>
+</object>
+<object id="27">
+ <ocn>27</ocn>
+ <text class="h5">
+ 1.5 Why isn't SiSU markup XML
+ </text>
+</object>
+<object id="28">
+ <ocn>28</ocn>
+ <text class="norm">
+ I worked with text and (though I find XML immensely valuable) disliked
+noise ... better to sidestep the question and say:
+ </text>
+</object>
+<object id="29">
+ <ocn>29</ocn>
+ <text class="norm">
+ <b>SiSU</b> currently "understands" three XML input representations -
+or more accurately, converts from three forms of XML to native
+<b>SiSU</b> markup for processing. The three types correspond to SAX
+(structure described), DOM (structure embedded, whole document must be
+read before structure is correctly discernable) and node based (a tree)
+forms of XML document structure representation. Problem is I use them
+very seldom and check that all is as it should be with them seldom, so
+I would not be surprised if something breaks there, but as far as I
+know they are working. I will check and add an XML markup help page
+before the next release. There already is a bit of information in the
+man page under the title <b>SiSU</b> VERSION CONVERSION
+ </text>
+</object>
+<object id="30">
+ <ocn>30</ocn>
+ <text class="indent1">
+ sisu --to-sax [filename/wildcard]
+ </text>
+</object>
+<object id="31">
+ <ocn>31</ocn>
+ <text class="indent1">
+ sisu --to-dom [filename/wildcard]
+ </text>
+</object>
+<object id="32">
+ <ocn>32</ocn>
+ <text class="indent1">
+ sisu --to-node [filename/wildcard]
+ </text>
+</object>
+<object id="33">
+ <ocn>33</ocn>
+ <text class="norm">
+ The XML should be well formed... must check, but lacks sensible
+headers. Suggestions welcome as to what to make of them. [For the
+present time I am satisfied that I can convert (both ways) between 3
+forms of XML representation and <b>SiSU</b> markup].
+ </text>
+</object>
+<object id="34">
+ <ocn>34</ocn>
+ <text class="indent1">
+ sisu --from-xml2sst [filename/wildcard]
+ </text>
+</object>
+<object id="35">
+ <ocn>35</ocn>
+ <text class="h5">
+ 1.6 LaTeX claims to be a document preparation system for high-quality
+typesetting. Can the same be said about SiSU?
+ </text>
+</object>
+<object id="36">
+ <ocn>36</ocn>
+ <text class="norm">
+ <b>SiSU</b> is not really about type-setting.
+ </text>
+</object>
+<object id="37">
+ <ocn>37</ocn>
+ <text class="norm">
+ LaTeX is the ultimate computer instruction type-setting language for
+paper based publication.
+ </text>
+</object>
+<object id="38">
+ <ocn>38</ocn>
+ <text class="norm">
+ LaTeX is able to control just about everything that happens on page and
+pixel, position letters kerning, space variation between characters,
+words, paragraphs etc. formula.
+ </text>
+</object>
+<object id="39">
+ <ocn>39</ocn>
+ <text class="norm">
+ <b>SiSU</b> is not really about type-setting at all. It is about a
+lightweight markup instruction that provides enough information for an
+abstraction of the documents structure and objects, from which
+different forms of representation of the document can be generated.
+ </text>
+</object>
+<object id="40">
+ <ocn>40</ocn>
+ <text class="norm">
+ <b>SiSU</b> with very little markup instruction is able to produce
+relatively high quality pdf by virtue of being able to generate usable
+default LaTeX; it produces "quality" html by generating the html
+directly; likewise it populates an SQL database in a useful way with
+the document in object sized chunks and its meta-data. But <b>SiSU</b>
+works on an abstraction of the document's structure and content and
+custom builds suitable uniform output. The html for browser viewing and
+pdf for paper viewing/publishing are rather different things with
+different needs for layout - as indeed is what is needed to store
+information in a database in searchable objects.
+ </text>
+</object>
+<object id="41">
+ <ocn>41</ocn>
+ <text class="norm">
+ The pdfs or html produced for example by open office based on open
+document format and other office/word processor suits usually attempt
+to have similar looking outputs - your document rendered in html looks
+much the same, or in pdf... sisu is less this way, it seeks to have a
+starting point with as little information about appearance as possible,
+and to come up with the best possible appearance for each output that
+can be derived based on this minimal information.
+ </text>
+</object>
+<object id="42">
+ <ocn>42</ocn>
+ <text class="norm">
+ Where there are large document sets, it provides consistency in
+appearance in each output format for the documents.
+ </text>
+</object>
+<object id="43">
+ <ocn>43</ocn>
+ <text class="norm">
+ The excuse for going this way is, it is a waste of time to think much
+about appearance when working on substantive content, it is the
+substantive content that is relevant, not the way it looks beyond the
+basic informational tags - and yet you want to be able to take
+advantage of as many useful different ways of representing documents as
+are available, and for various types of output to to be/look as good as
+it can for each medium/format in which it is presented, (with different
+mediums having different focuses) and <b>SiSU</b> tries to achieve this
+from minimal markup.
+ </text>
+</object>
+<object id="44">
+ <ocn>44</ocn>
+ <text class="h5">
+ 1.7 How do I create GIN or GiST index in Postgresql for use in SiSU
+ </text>
+</object>
+<object id="45">
+ <ocn>45</ocn>
+ <text class="norm">
+ This at present needs to be done "manually" and it is probably
+necessary to alter the sample search form. The following is a helpful
+response from one of the contributors of GiN to Postgresql Oleg
+Bartunov 2006-12-06:
+ </text>
+</object>
+<object id="46">
+ <ocn>46</ocn>
+ <text class="norm">
+ "I have tsearch2 slides which introduces tsearch2 &lt;<link
+xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple"
+xlink:href="http://www.sai.msu.su/&#126;megera/wiki/tsearch2slides">http://www.sai.msu.su/&#126;megera/wiki/tsearch2slides</link>&gt;
+ </text>
+</object>
+<object id="47">
+ <ocn>47</ocn>
+ <text class="norm">
+ FTS in PostgreSQL is provided by tsearch2, which should works without
+any indices (GiST or GIN) ! Indices provide performance, not
+functionality.
+ </text>
+</object>
+<object id="48">
+ <ocn>48</ocn>
+ <text class="norm">
+ In your example I'd do ( simple way, just for demo):
+ </text>
+</object>
+<object id="49">
+ <ocn>49</ocn>
+ <text class="norm">
+ <b>0.</b> compile, install tsearch2 and load tsearch2 into your
+database
+ </text>
+</object>
+<object id="50">
+ <ocn>50</ocn>
+ <text class="indent1">
+ cd contrib/tsearch2;
+make&amp;&amp;make&amp;&amp;install&amp;&amp;make installcheck; psql DB
+&lt; tsearch2.sql
+ </text>
+</object>
+<object id="51">
+ <ocn>51</ocn>
+ <text class="norm">
+ <b>1.</b> Add column fts, which holds tsvector
+ </text>
+</object>
+<object id="52">
+ <ocn>52</ocn>
+ <text class="indent1">
+ alter table documents add column fts tsvector;
+ </text>
+</object>
+<object id="53">
+ <ocn>53</ocn>
+ <text class="norm">
+ <b>2.</b> Fill fts column
+ </text>
+</object>
+<object id="54">
+ <ocn>54</ocn>
+ <text class="indent1">
+ update document set fts = to_tsvector(clean);
+ </text>
+</object>
+<object id="55">
+ <ocn>55</ocn>
+ <text class="norm">
+ <b>3.</b> create index - just for performance !
+ </text>
+</object>
+<object id="56">
+ <ocn>56</ocn>
+ <text class="indent1">
+ create index fts_gin_idx on document using gin(fts);
+ </text>
+</object>
+<object id="57">
+ <ocn>57</ocn>
+ <text class="norm">
+ <b>4.</b> Run vacuum
+ </text>
+</object>
+<object id="58">
+ <ocn>58</ocn>
+ <text class="indent1">
+ vacuum analyze document;
+ </text>
+</object>
+<object id="59">
+ <ocn>59</ocn>
+ <text class="norm">
+ That's all.
+ </text>
+</object>
+<object id="60">
+ <ocn>60</ocn>
+ <text class="norm">
+ Now you can search:
+ </text>
+</object>
+<object id="61">
+ <ocn>61</ocn>
+ <text class="norm">
+ select lid, metadata_tid, rank_cd(fts, q,2)as rank from document,
+plainto_tsquery('markup syntax') q where q @@ fts order by rank desc
+limit 10;
+ </text>
+</object>
+<object id="62">
+ <ocn>62</ocn>
+ <text class="h5">
+ 1.8 Where is version 1.0?
+ </text>
+</object>
+<object id="63">
+ <ocn>63</ocn>
+ <text class="norm">
+ <b>SiSU</b> works pretty well as it is supposed to. Version 1.0 will
+have the current markup, and directory structure. At this point it is
+largely a matter of choice as to when the name change is made.
+ </text>
+</object>
+<object id="64">
+ <ocn>64</ocn>
+ <text class="norm">
+ The feature set for html,<en>1</en> LaTeX/pdf and opendocument is in
+place. XML, and plaintext are in order.
+ </text>
+ <endnote notenumber="1">
+ <number>1</number>
+ <note>
+ html w3c compliance has been largely met.
+ </note>
+ </endnote>
+</object>
+<object id="65">
+ <ocn>65</ocn>
+ <text class="norm">
+ html and LaTeX/pdf may be regarded as reference copy outputs
+ </text>
+</object>
+<object id="66">
+ <ocn>66</ocn>
+ <text class="norm">
+ With regard to the populating of sql databases (postgresql and sqlite),
+there is a bit to be done.
+ </text>
+</object>
+<object id="67">
+ <ocn>67</ocn>
+ <text class="norm">
+ We are still almost there.
+ </text>
+</object>
+<object id="0">
+ <ocn>0</ocn>
+ <text class="h4">
+ Endnotes
+ </text>
+</object>
+</body>
+</document>