diff options
Diffstat (limited to 'lib/sisu/v1/dal_doc_str.rb')
-rw-r--r-- | lib/sisu/v1/dal_doc_str.rb | 265 |
1 files changed, 265 insertions, 0 deletions
diff --git a/lib/sisu/v1/dal_doc_str.rb b/lib/sisu/v1/dal_doc_str.rb new file mode 100644 index 00000000..2e135243 --- /dev/null +++ b/lib/sisu/v1/dal_doc_str.rb @@ -0,0 +1,265 @@ +# coding: utf-8 +=begin + + * Name: SiSU + + * Description: a framework for document structuring, publishing and search + + * Author: Ralph Amissah + + * Copyright: (C) 1997 - 2009 Ralph Amissah All Rights Reserved. + + * License: GPL 3 or later: + + SiSU, a framework for document structuring, publishing and search + + Copyright (C) Ralph Amissah + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free + Software Foundation, either version 3 of the License, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program. If not, see <http://www.gnu.org/licenses/>. + + If you have Internet connection, the latest version of the GPL should be + available at these locations: + <http://www.fsf.org/licensing/licenses/gpl.html> + <http://www.gnu.org/licenses/gpl.html> + + <http://www.jus.uio.no/sisu/gpl.fsf/toc.html> + <http://www.jus.uio.no/sisu/gpl.fsf/doc.html> + <http://www.jus.uio.no/sisu/gpl.fsf/plain.txt> + + * SiSU uses: + * Standard SiSU markup syntax, + * Standard SiSU meta-markup syntax, and the + * Standard SiSU object citation numbering and system + + * Hompages: + <http://www.jus.uio.no/sisu> + <http://www.sisudoc.org> + + * Download: + <http://www.jus.uio.no/sisu/SiSU/download.html> + + * Ralph Amissah + <ralph@amissah.com> + <ralph.amissah@gmail.com> + + ** Description: document abstraction + +=end +module SiSU_document_structure + require "#{SiSU_lib}/dal_doc_str_tables" # dal_doc_str_tables.rb + require "#{SiSU_lib}/dal_doc_str_code" # dal_doc_str_code.rb + class Structure + def initialize(md,para) + @md,@para=md,para + end + def structure + structure_markup_normalize + structure_markup + @para + end + def structure_markup + @para=unless @para =~/#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/ + @para=case @para + when /^\s*#{@md.lv1}/; @para.sub!(/(?:<[:!]1!?>\s*)?(.+)/,"#{Mx[:lv_o]}1:#{Mx[:lv_c]} \\1") if @para !~/^#{Mx[:lv_o]}1:\S*?#{Mx[:lv_c]}/ + when /^\s*#{@md.lv2}/; @para.sub!(/(?:<[:!]2!?>\s*)?(.+)/,"#{Mx[:lv_o]}2:#{Mx[:lv_c]} \\1") if @para !~/^#{Mx[:lv_o]}2:\S*?#{Mx[:lv_c]}/ + when /^\s*#{@md.lv3}/; @para.sub!(/(?:<[:!]3!?>\s*)?(.+)/,"#{Mx[:lv_o]}3:#{Mx[:lv_c]} \\1") if @para !~/^#{Mx[:lv_o]}3:\S*?#{Mx[:lv_c]}/ + when /^\s*#{@md.lv4}/; @para.sub!(/(?:<[:!]4!?>\s*)?(.+)/,"#{Mx[:lv_o]}4:#{Mx[:lv_c]} \\1") if @para !~/^#{Mx[:lv_o]}4:\S*?#{Mx[:lv_c]}/ + when /^\s*#{@md.lv5}/; @para.sub!(/(?:<[:!]5!?>\s*)?(.+)/,"#{Mx[:lv_o]}5:#{Mx[:lv_c]} \\1") if @para !~/^#{Mx[:lv_o]}5:\S*?#{Mx[:lv_c]}/ + when /^\s*#{@md.lv6}/; @para.sub!(/(?:<[:!]6!?>\s*)?(.+)/,"#{Mx[:lv_o]}6:#{Mx[:lv_c]} \\1") if @para !~/^#{Mx[:lv_o]}6:\S*?#{Mx[:lv_c]}/ + else @para + end + else @para + end + end + def structure_markup_normalize + para=if @md.markup_version.to_f >= 0.38 #%convert internal representation, consider making 0.38 structure default ([A-C1-6] instead of [1-9]), requires downstream changes + #keep and implement, requires downstream changes: + #@para.gsub!(/^6~/,'9~') + #@para.gsub!(/^5~/,'8~') + #@para.gsub!(/^4~/,'7~') + @para.gsub!(/^[456]~/,'!_') + @para.gsub!(/^3~(\S+)/,"#{Mx[:lv_o]}6:\\1#{Mx[:lv_c]}") + @para.gsub!(/^3~\s+/,"#{Mx[:lv_o]}6:#{Mx[:lv_c]}") + @para.gsub!(/^2~(\S+)/,"#{Mx[:lv_o]}5:\\1#{Mx[:lv_c]}") + @para.gsub!(/^2~\s+/,"#{Mx[:lv_o]}5:#{Mx[:lv_c]}") + @para.gsub!(/^1~(\S+)/,"#{Mx[:lv_o]}4:\\1#{Mx[:lv_c]}") + @para.gsub!(/^1~\s+/,"#{Mx[:lv_o]}4:#{Mx[:lv_c]}") + @para.gsub!(/^:?C~(\S+)/,"#{Mx[:lv_o]}3:\\1#{Mx[:lv_c]}") + @para.gsub!(/^:?C~\s+/,"#{Mx[:lv_o]}3:#{Mx[:lv_c]}") + @para.gsub!(/^:?B~(\S+)/,"#{Mx[:lv_o]}2:\\1#{Mx[:lv_c]}") + @para.gsub!(/^:?B~\s+/,"#{Mx[:lv_o]}2:#{Mx[:lv_c]}") + @para.gsub!(/^:?A~(\S+)/,"#{Mx[:lv_o]}1:\\1#{Mx[:lv_c]}") + @para.gsub!(/^:?A~\s+/,"#{Mx[:lv_o]}1:#{Mx[:lv_c]}") + @para=if @para =~/^@(?:level|markup):\s/ + @para.gsub!(/3/,'6') + @para.gsub!(/2/,'5') + @para.gsub!(/1/,'4') + @para.gsub!(/:?C/,'3') + @para.gsub!(/:?B/,'2') + @para.gsub!(/:?A/,'1') + @para + else @para + end + else @para + end + end + def structure_marks + para=if @md.markup_version.to_f < 0.38 + @para.gsub!(/^1~(\S+)/,"#{Mx[:lv_o]}1:\\1#{Mx[:lv_c]}") + @para.gsub!(/^1~\s+/,"#{Mx[:lv_o]}1:#{Mx[:lv_c]}") + @para.gsub!(/^2~(\S+)/,"#{Mx[:lv_o]}2:\\1#{Mx[:lv_c]}") + @para.gsub!(/^2~\s+/,"#{Mx[:lv_o]}2:#{Mx[:lv_c]}") + @para.gsub!(/^3~(\S+)/,"#{Mx[:lv_o]}3:\\1#{Mx[:lv_c]}") + @para.gsub!(/^3~\s+/,"#{Mx[:lv_o]}3:#{Mx[:lv_c]}") + @para.gsub!(/^4~(\S+)/,"#{Mx[:lv_o]}4:\\1#{Mx[:lv_c]}") + @para.gsub!(/^4~\s+/,"#{Mx[:lv_o]}4:#{Mx[:lv_c]}") + @para.gsub!(/^5~(\S+)/,"#{Mx[:lv_o]}5:\\1#{Mx[:lv_c]}") + @para.gsub!(/^5~\s+/,"#{Mx[:lv_o]}5:#{Mx[:lv_c]}") + @para.gsub!(/^6~(\S+)/,"#{Mx[:lv_o]}6:\\1#{Mx[:lv_c]}") + @para.gsub!(/^6~\s+/,"#{Mx[:lv_o]}6:#{Mx[:lv_c]}") + @para.gsub!(/^[789]~/,'!_') + @para + else @para + end + end + end + class Struct + def initialize(o) + @o=o + end + def structure + def txt + @o[:txt] + end + def node + @o[:node] + end + def ocn + @o[:ocn] + end + def lv + @o[:lv] + end + def type + @o[:type] + end + self + end + end + class OCN + def initialize(md,data) + @md,@data=md,data + end + def ocn #and auto segment numbering increment + data=@data + @o_array=[] + node=ocn=ocn_dv=ocn_sp=ocnh=ocnh1=ocnh2=ocnh3=ocnh4=ocnh5=ocnh6=ocno=ocnp=ocnt=ocnc=ocng=ocni=ocnm=ocnu=ocnk=0 # h heading, o other, t table, g group, i image + node_count_flag=false + headings='' #where headings omitted an alternative form of ocn heading numbering is required for html and other linking... + #headings=if @md.ocn.inspect =~/skip=headings/; '^(?:[A-C]|[1-9])~\S* |' + #else '' + #end + regex_exclude_ocn_and_node = /#{headings}^%{1,4}\s|#{Rx[:meta]}|^@\S+?:\s|^0~|^4~endnotes|^#{Mx[:lv_o]}4:endnotes#{Mx[:lv_c]}|^<\/center>|<:ee>|<:e[:_]>|^\^~ |<:e[:_]\d+?>|^#{Mx[:fa_o]}p[bn]#{Mx[:fa_c]}|^<:\#|<:- |<[:!]!4|<table|<\/table>|<td|<\/td>|<th|<\/th>|<tr>|<\/tr>|<hr width|<:4-endnotes>|\[endnotes\]|<:zz>|<:isbn-|<:journal-|<:conference-|#{Mx[:br_endnotes]}/i #ocn here # added with Tune.code #¡ + regex_exclude_ocn = /^(?:alt|code|group|poem|table)\{|^\}(?:alt|code|group|poem|table)|^\}table$/ #ocn here # added with Tune.code #¡ + data.each do |para| + o={} + if para =~/\w|\S|<|\(/ + if para !~ regex_exclude_ocn_and_node + if node_count_flag \ + or para=~/^#{Mx[:lv_o]}1:/ + node_count_flag=true + end + node+=1 if node_count_flag + if para !~ regex_exclude_ocn # regex_exclude_large previously excluded + unless para=~/<:#>|~#|-#/ # |^\s*\*\s*\*\s*\*\s*$ <-consider leaving un-numbered + ocn+=1 + if para=~/^#{Mx[:lv_o]}[1-9]:\S*?#{Mx[:lv_c]}/ \ + or para =~@md.lv1 \ + or para =~@md.lv2 \ + or para =~@md.lv3 \ + or para =~@md.lv4 \ + or para =~@md.lv5 \ + or para =~@md.lv6 + ocnh+=1 + if para=~/^#{Mx[:lv_o]}1:(\S*?)#{Mx[:lv_c]}/ \ + or para =~@md.lv1 + ocnh1+=1 #heading + ocn_dv,ocn_sp="1:#{ocnh1}","h#{ocnh}" + elsif para=~/^#{Mx[:lv_o]}2:(\S*?)#{Mx[:lv_c]}/ \ + or para =~@md.lv2; ocnh2+=1 + ocn_dv,ocn_sp="2:#{ocnh2}","h#{ocnh}" + elsif para=~/^#{Mx[:lv_o]}3:(\S*?)#{Mx[:lv_c]}/ \ + or para =~@md.lv3; ocnh3+=1 + ocn_dv,ocn_sp="3:#{ocnh3}","h#{ocnh}" + elsif para=~/^#{Mx[:lv_o]}4:(\S*?)#{Mx[:lv_c]}/ \ + or para =~@md.lv4; ocnh4+=1 + ocn_dv,ocn_sp="4:#{ocnh4}","h#{ocnh}" + elsif para=~/^#{Mx[:lv_o]}5:(\S*?)#{Mx[:lv_c]}/ \ + or para =~@md.lv5; ocnh5+=1 + ocn_dv,ocn_sp="5:#{ocnh5}","h#{ocnh}" + elsif para=~/^#{Mx[:lv_o]}6:(\S*?)#{Mx[:lv_c]}/ \ + or para =~@md.lv6; ocnh6+=1 + ocn_dv,ocn_sp="6:#{ocnh6}","h#{ocnh}" + end + else + ocno+=1 + if para=~/#{Mx[:gr_o]}TZ#{Mx[:gr_c]}/; ocnt+=1 #table + ocn_dv,ocn_sp="o#{ocno}","t#{ocnt}" + elsif para=~/^#{Mx[:gr_o]}code#{Mx[:gr_c]}/; ocnc+=1 #code block + ocn_dv,ocn_sp="o#{ocno}","c#{ocnc}" + elsif para=~/^#{Mx[:gr_o]}(?:group|alt|verse)#{Mx[:gr_c]}/; ocng+=1 #group, poem + ocn_dv,ocn_sp="o#{ocno}","g#{ocng}" + elsif para=~/#{Mx[:lnk_o]}\S+?\.(?:png|jpg|gif)\s+/m; ocni+=1 #image + ocn_dv,ocn_sp="o#{ocno}","i#{ocni}" + else ocnp+=1 #paragraph + ocn_dv,ocn_sp="o#{ocno}","p#{ocnp}" + end + end + o[:txt],o[:node],o[:ocn],o[:lv],o[:type]=para,node,ocn,ocn_dv,ocn_sp + else ocnu+=1 + #if para=~/-#/ #if implemented would remove need to keep <-#> + # ocnk+=1 + # ocn_dv,ocn_sp="k#{ocnk}","u#{ocnu}" + #else + # ocn_dv,ocn_sp="u#{ocnu}","u#{ocnu}" + #end + #para.gsub!(/<~#>|<-#>/,'') if para #get rid of need + para.gsub!(/#{Mx[:fa_o]}~##{Mx[:fa_c]}/,'') if para + ocn_dv,ocn_sp="u#{ocnu}","u#{ocnu}" + o[:txt],o[:node],o[:ocn],o[:lv],o[:type]=para,node,0,ocn_dv,ocn_sp + end + else o[:txt],o[:node],o[:ocn],o[:lv],o[:type]=para,node,nil,nil,nil + end + else + para=if para !~/^%{1,4}\s/ + o[:txt],o[:node],o[:ocn],o[:lv],o[:type]=para,node,nil,nil,nil + else '' + end + end + end + para.gsub!(/\n\n/,"\n") if para =~/#{Mx[:gr_o]}(?:code|verse|alt|group)#{Mx[:gr_c]}/ #newlines taken out + para.gsub!(/(#{Mx[:gr_o]}(?:code-end)#{Mx[:gr_c]})/,"\n\\1") if para =~/#{Mx[:gr_o]}(?:code-end)#{Mx[:gr_c]}/ #newlines added check + if para =~/#{Mx[:gr_o]}Th?#{Mx[:tc_p]}/u; para.gsub!(/(#{Mx[:gr_o]}Th?#{Mx[:tc_p]}.+?)#{Mx[:gr_c]}/u,"\\1#{Mx[:tc_p]}~#{o[:ocn]};#{o[:lv]};#{o[:type]}#{Mx[:gr_c]}") + end + @o_array << Struct.new(o).structure if o + end + @o_array + end + end + class Code < SiSU_document_structure_code::Code + end + class Tables < SiSU_document_structure_tables::Tables + end +end +__END__ |