=begin
* Name: SiSU
* Description: a framework for document structuring, publishing and search
* Author: Ralph Amissah
* Copyright: (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007 Ralph Amissah All Rights Reserved.
* License: GPL 3 or later:
SiSU, a framework for document structuring, publishing and search
Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007 Ralph Amissah
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program. If not, see .
If you have Internet connection, the latest version of the GPL should be
available at these locations:
* SiSU uses:
* Standard SiSU markup syntax,
* Standard SiSU meta-markup syntax, and the
* Standard SiSU object citation numbering and system
* Hompages:
* Download:
* Ralph Amissah
** Description: Syntax for markup, input markup syntaxes, determined here
=end
module Syntax
class Words
def initialize(line,md,mkp)
@line,@md,@mkp=line,md,mkp
end
end
class Markup
def initialize(md='',data='')
@data,@md=data,md
@vz=SiSU_Env::Get_init.instance.skin
@data_new=[]
url_and_stub=SiSU_Env::Info_env.new.url
@output_url="#{url_and_stub.remote}"
@http_m='\{.+?\}(?:https?|file)://\S+|(?:https?|file):\S+|\.\.\/\S+|\S+?\.png\b|[*]~\S+|^0~.+|<:(?:code|group|alt|verse)(?:-end)?>|<:br>'
@manmkp_ital='[i/]\\{.+?\\}[i/]'
tail_m_ital=%q{(?:\s|[.,;:?!'")]|~\^|~\\\{\s|$)}
tail_m_bold=%q{(?:(?:<\/i>)?(?:\s|[.,;:?!'")]|~\^|~\\\{\s|$))?}
bold_line=%q{^!_\s.+?(?:
|\n|$)}
@line_scan_ital=if defined? @md.make_italic[:str] \
and defined? @vz.markup_make_italic[:str]
/#@http_m|#{bold_line}|#@manmkp_ital#{tail_m_ital}|(?:#{@md.make_italic[:str]}|#{@vz.markup_make_italic[:str]})#{tail_m_ital}|\S+|\n/
elsif defined? @md.make_italic[:str]
/#@http_m|#{bold_line}|#@manmkp_ital#{tail_m_ital}|#{@md.make_italic[:str]}#{tail_m_ital}|\S+|\n/
elsif defined? @vz.markup_make_italic[:str]
/#@http_m|#{bold_line}|#@manmkp_ital#{tail_m_ital}|#{@vz.markup_make_italic[:str]}#{tail_m_ital}|\S+|\n/
end
@manmkp_bold='^!_\s.+?(?:\n|$)|[*!b]\\{.+?\\}[*!b]|[*!][a-zA-Z0-9\-_]+[*!]'
@line_scan_bold=if (defined? @md.make_bold[:str] \
and @md.make_bold[:str]) \
and (defined? @vz.markup_make_bold[:str] \
and @vz.markup_make_bold[:str])
/#@http_m|#{bold_line}|(?:#@manmkp_bold|#{@md.make_bold[:str]}|#{@vz.markup_make_bold[:str]})#{tail_m_bold}|\S+|\n/
elsif defined? @md.make_bold[:str] \
and @md.make_bold[:str]
/#@http_m|#{bold_line}|(?:#@manmkp_bold|#{@md.make_bold[:str]})#{tail_m_bold}|\S+|\n/
elsif defined? @vz.markup_make_bold[:str] \
and @vz.markup_make_bold[:str]
/#@http_m|#{bold_line}|(?:#@manmkp_bold|#{@vz.markup_make_bold[:str]})#{tail_m_bold}|\S+|\n/
end
end
def songsheet
@data.each do |line|
line=pre(line)
line=wordlist_italics(line)
line=wordlist_bold(line)
line=bodymarkup(line)
@data_new << line
end
@data_new
end
def pre(line)
line=line.dup
if line =~/\{(?:t|table)(?:~h)?\s*c?[\d; ]*\}/; line.gsub!(/(\n)/,';;\1') #markup for alternative tables
end
line
end
def wordlist_italics(line)
line=line.dup
if (defined? @md.make_italic[:str] \
and @md.make_italic[:str]) \
or (defined? @vz.markup_make_italic[:str] \
and @vz.markup_make_italic[:str])
line= if line !~/^(?:0~|%{1,4}\s|<:code)/ #!~/^(?:[0-6]~|!_|%+\s)/
word=line.scan(@line_scan_ital)
word.flatten!
word.compact! #reinstated
line_array=[]
word.each do |w|
unless /#@manmkp_ital|#@http_m/.match(w)
if defined? @md.make_italic[:regx] \
and @md.make_italic[:regx]
w.gsub!(@md.make_italic[:regx],'\1')
elsif defined? @vz.markup_make_italic \
and @vz.markup_make_italic
w.gsub!(@vz.markup_make_italic,'\1')
end
end
line_array << w
end
line_array.join(' ')
else line
end
end
line
end
def embolden(given)
given.gsub!(/(?:^!_|^[7-9]~)\s+(.+?)(
)/,'\1\2')
given.gsub!(/(?:^!_|^[7-9]~)\s+(.+?)\s+((?:[*]~\S+\s*)+)/,'\1 \2')
given.gsub!(/(?:^!_|^[7-9]~)\s+(.+?)\s*([~-]#)$/,'\1 \2')
given.gsub!(/(?:^!_\s+|^[7-9]~\s+)(.*)?\s*$/,'\1')
end
def wordlist_bold(line)
line=line.dup
if (defined? @md.make_bold[:str] \
and @md.make_bold[:str]) \
or (defined? @vz.markup_make_bold[:str] \
and @vz.markup_make_bold[:str])
line=if line !~/^(?:[0-9]~|%+\s|<:code)/
line_array=[]
word=line.scan(@line_scan_bold)
word.flatten!
word.compact!
word.each do |w|
unless /#@manmkp_bold|#@http_m/.match(w)
if defined? @md.make_bold[:regx] \
and @md.make_bold[:regx] #document header: 0~bold [bold word list]
w.gsub!(@md.make_bold[:regx],'\1')
elsif defined? @vz.markup_make_bold \
and @vz.markup_make_bold #defaults and skin adjusted bold word list
w.gsub!(@vz.markup_make_bold,'\1')
end
else
if w =~ /(?:^!_|^[7-9]~)\s+/; embolden(w) #bold paragraph/emphasize #may wish to remove think about 7{ 8{ conversion not satisfactory, as information is lost!
end
end
line_array << w
end
line_array.join(' ')
else line
end
else
if line !~/^(?:[0-9]~|%+\s)/ \
and line =~ /(?:^!_|^[7-9]~)\s+/
embolden(line)
end
end
line
end
def bodymarkup(line)
# << http://www.jus.uio.no/sisu/sisu_markup_table/markup >>
# See: data/sisu/sample/document_samples_sisu_markup/
# !{emphasis}! e{emphasis}e emphasis
# *{bold text}* b{bold}b bold text
# _{underline}_ u{underline}u underline
# /{italics}/ i{italics}i italics
# "{citation}" c{citation}c citation #blockquote?
# ^{superscript}^ superscript
# ,{subscript}, subscript
# +{inserted text}+ inserted text
# -{deleted text}- deleted text
# {url address}:url
# {image.png}imageurl
# {image.png}png
# ~{endnote}~
# !_ #bold/emphasise paragraph
# _" #blockquote paragraph
# _1 <:i1> #indent paragraph 1 step
# _2 <:i2> #indent paragraph 2 steps
# _3 <:i3> #indent paragraph 3 steps
# _4 <:i4> #indent paragraph 4 steps
# _* #bullet (list) ●
# _1* #bullet (list) indented
# _1* #bullet (list) indented
# # #numbered (list) level 1
# _# #numbered (list) level 2
line=line.dup
if line !~/^0~|<:codeline>|<:code-end>/
#special characters: ~ { } < > - _ / also used : ^ ! #
line_array=[]
line.gsub!(/^%{1,4} .+/mi,'') #remove comments
word=line.scan(/\S+|\n/) unless line =~/^(?:0~\S|%+\s)/
if word
word.each do |w| # _ - / # | : ! ^ ~
unless w =~/^[0-9]~|~\{|\}~|~\[|\]~|^\^~|~\^|\*~\S+|~#|\{t?~|\{table/
w.gsub!(/\\~/,'~') #escaped special character
w.gsub!(/~/,'~')
end
w.gsub!(/^\<$/,'<') #escaped special character
w.gsub!(/^\>$/,'>') #escaped special character
line_array << w
end
line=line_array.join(' ')
line=line.strip
end
line.gsub!(/^(1~\??) @title\s+(?:(by\s+)?(?:@creator|@author))\s*$/,"\\1 #{@md.title} - #{@md.subtitle},
\\2#{@md.dc_creator}")
line.gsub!(/^(1~\??) @title\s*$/,"\\1 #{@md.title} - #{@md.subtitle}") #
line.gsub!(/^([23]~\??) (?:(by\s+)?(?:@creator|@author))\s*$/,"\\1 \\2#{@md.dc_creator}") #
line.gsub!(/<((?:https?|file):\/\/\S+?)>/,'< \1 >') #catch problem markup
line.gsub!(/\}\.\.\/(\S+)/,"\}#@output_url/\\1") #means you are not supporting relative links (only relevant in html), converted to static here
line.gsub!(/<:=(\S+?)>/,'{ c_\1.png 14x14 }http://www.jus.uio.no/sisu') #adjustment 2005w30
line.gsub!(//,'<:\1>') #escaped special character
line.gsub!(/\\~/,'~') #escaped special character
line.gsub!(/\\\{/,'{') #escaped special character
line.gsub!(/\\\}/,'}') #escaped special character
line.gsub!(/\\\<,'<<') #escaped special character
line.gsub!(/\\\>>/,'>>') #escaped special character
line.gsub!(/\\\,'<') #escaped special character
line.gsub!(/\\\>/,'>') #escaped special character
line.gsub!(/\\\_/,'_') #escaped special character
line.gsub!(/\\\-/,'-') #escaped special character
line.gsub!(/\\\+/,'+') #escaped special character
line.gsub!(/\\\//,'/') #escaped special character
line.gsub!(/\\\#/,'#') #escaped special character
line.gsub!(/\\\&/,'&') #& #escaped special character
line.gsub!(/\\\|/,'|') #not really a sisu special character but made available as possibility
line.gsub!(/\\\:/,':') #not really a sisu special character but made available as possibility
line.gsub!(/\\\!/,'!') #not really a sisu special character but made available as possibility
line.gsub!(/\\\^/,'^') #not really a sisu special character but made available as possibility
line.gsub!(/\\\,/,',') #not really a sisu special character but made available as possibility
#ADD -->
line.gsub!(/\\\\/,'\') #escaped special character
line.gsub!(/\\\*/,'*') #escaped special character
line.gsub!(/\\\!/,'!') #escaped special character
line.gsub!(/(?:^| )\*~([a-z0-9._-]+)/i,' <:name#\1>') #html name marker
line.gsub!(/^([56]~)(\S+)(.+)/,'\1\2 \3 <:name#\2>') #html name marker , however at present takes you to correct position within sub-toc, will nneed to clean from sub-toc leaving in main body only
line.gsub!(/(^| )\{~\^ (.+?)\s*\}((?:https?|file|ftp):\S+?)([;,.]?(?=\s[^~]|$))/,'\1{ \2 }\3\4 ~{ \3 }~ ') #text url endnote url shortcut {~^ [text] }http://url is { [text] }http://url ~{ http://url }~ [plus adjustment for commas] #means for this class, non-object, un-numbered ~# will not work # shortcut should not be used in conjunction with rebgular matches #reversed order, and addition of no-tilde..
line.gsub!(/(^| )\{~\^ (.+?)\s*\}((?:https?|file|ftp):\S+)\s+~\{(.+?)\}~/,'\1{ \2 }\3 ~{ \3 \4 }~') # watch
line.gsub!(/<:?br>/,'
') #xml requires
# depreciated -->
line.gsub!(/(^|\s+|['"]| |[\(\[\{]|\>)e\{(.+?)\}e/,'\1\2') #emphasis
line.gsub!(/(^|\s+|['"]| |[\(\[\{]|\>)b\{(.+?)\}b/,'\1\2') #bold
line.gsub!(/(^|\s+|['"]| |[\(\[\{]|\>)u\{(.+?)\}u/,'\1\2') #underscore
line.gsub!(/(^|\s+|['"]| |[\(\[\{]|\>)c\{(.+?)\}c/,'\1\2') #cite /blockquote?
line.gsub!(/(^|\s+|['"]| |[\(\[\{]|\>)i\{(.+?)\}i/,'\1\2') #italics
# depreciated ^
line.gsub!(/(^|\s+|['"]| |[\(\[\{]|\>)!\{(.+?)\}!/,'\1\2') #emphasis
line.gsub!(/(^|\s+|['"]| |[\(\[\{]|\>)\*\{(.+?)\}\*/,'\1\2') #bold
line.gsub!(/(^|\s+|['"]| |[\(\[\{]|\>)_\{(.+?)\}_/,'\1\2') #underscore
line.gsub!(/(^|\s+|['"]| |[\(\[]|\(|\>)\/\{(.+?)\}\//,'\1\2') #italics
line.gsub!(/(^|\s+|['"]| |\(|\>)\"\{(.+?)\}\"/,'\1\2') #cite /blockquote?
line.gsub!(/(^|[^\\])\^\{(.+?)\}\^/,'\1\2') #superscript
line.gsub!(/(^|\s+|['"]| |\(|\>|\S)9\{(.+?)\}9/,'\1\2') #superscript
line.gsub!(/(^|[^\\]),\{(.+?)\},/,'\1\2') #subscript
line.gsub!(/(^|\s+|['"]| |\(|\>)6\{(.+?)\}6/,'\1\2') #subscript
line.gsub!(/(^|\s+|['"]| |\(|\>)\+\{(.+?)\}\+/,'\1\2') #inserted text
line.gsub!(/(^|\s+|['"]| |\(|\>)v\{(.+?)\}v/,'\1\2') #inserted text
line.gsub!(/(^|\s+|['"]| |\(|\>)-\{(.+?)\}-/,'\1\2') #strikethrough - deleted text
line.gsub!(/(^|\s+|['"]| |\(|\>)x\{(.+?)\}x/,'\1\2') #deleted text
line.gsub!(/(^|\s+|['"]| |\(|\>)\*(\S+?)\*/,'\1\2') #bold single word, watch
line.gsub!(/(^|\s+|['"]| |\(|\>)\!(\S+?)\!/,'\1\2') #bold single word, watch
line.gsub!(/(^|\s+|['"]| |\(|\>)\/([\(\)a-zA-Z0-9']+?)\/([^a-zA-Z0-9]|[ ,.;:'"~$]|$)/,'\1\2\3') #italics single word, watch
line.gsub!(/(^|\s+|['"]| |\(|\>)_(\S+?)_([.,!'")]?(?:\s|$))/,'\1\2\3') #underscore single word, watch (made more complicated by url decoration escape tag (_url))
line.gsub!(/(^|\s+)-([^{]\S+?)-( |$)/,'\1\2\3') #underscore single word, watch
line.gsub!(/(^|\s+|['"]| |\(|\>|\d+)\^(\S+?)\^/,'\1\2') #superscript single word, watch digit added
line.gsub!(/<[:e]\s+(.+?)!?>/,'~{ \1 }~') # not tested
line.gsub!(/^\s*_([1-9])(\*+)\s*/,'<:i\1> _* ') #bullets, shortcut
line.gsub!(/^\s*_([1-9])\s+/,'<:i\1> ') #indent
line.gsub!(/(?:
|
)\s*_[12]\s+/,'
') #indent used in endnotes, not implemented, replace when ready with: line.gsub!(/(?:
|
)\s*_([12])\s+/,'
<:i\1> ')
line.gsub!(/<:?br>/,'
') #adjustment 2004w41, from # line.gsub!(/
/,'
')
##added
#line.gsub!(/(?:^!_\s+|^[7-9]~\s+|<:b>)(.*)?([~-]#)$/i,'\1 \2') #bold paragraph/emphasize #may wish to remove think about 7{ 8{ conversion not satisfactory, as information is lost!
#line.gsub!(/(?:^!_\s+|^[7-9]~\s+|<:b>)(.*)?\s*$/i,'\1') #bold paragraph/emphasize #may wish to remove think about 7{ 8{ conversion not satisfactory, as information is lost!
#line.gsub!(/(?:(?:^| )!_ |^[7-9]~ |<:b>)(.*)\n/mi,'\1 ') #bold paragraph/emphasize #may wish to remove think about 7{ 8{ conversion not satisfactory, as information is lost!
#line.gsub!(/^_" (.*)\n/i,'\1
') #blockquotes #introduce KEEP
line.gsub!(/<:hi>/,'') # bright yellow rgb(255,255,0) pale yellow rgb(255,255,200)
line.gsub!(/<:\/hi>/,'')
line.gsub!(/(<:verse>.+)/m,"\\1\n")
line.gsub!(/[ ]+($)/,'\1')
if line =~/(<:(?:verse|group)>)/; line.gsub!(/(<:(?:verse|group)>)/i,"\\1\n") #cosmetic
else line.gsub!(/(
)/i,"\\1\n")
end
elsif line =~/^<:code(?:-end)?>|<:codeline>/ # /^<:code>/ #should be enough # underscore used as escape for angle brackets
line.gsub!(/([<>])/,'_\1')
line.gsub!(/_<:(\S+?)_>/,'<:\1>') #convert <:\S+> back, clumsy
line.gsub!(/_<(br(?: \/)?)_>/,'<\1>') #convert
back, clumsy
line.gsub!(/(^|\s)<(br(?: \/)?)>([\s,.]|$)/,'\1<\2>\3') #convert
back, clumsy
line.gsub!(/<:codeline>/,"\n ") #temporary fix, prefer: #line.gsub!(/<:codeline>/,"\n")
else # 0~
end
line
end
def tech #script markup planned to be more strict for technical documents
# !{emphasis}! e{emphasis}e emphasis
# *{bold text}* b{bold}b bold text
# _{underline}_ u{underline}u underline
# /{italics}/ i{italics}i italics
# "{citation}" c{citation}c citation
# ^{superscript}^ superscript
# ,{subscript}, subscript
# +{inserted text}+ inserted text
# -{deleted text}- deleted text
# {url address}:url
# {image.png}imageurl
# {image.png}png
# ~{endnote}~
# +1
# +2
puts 'tech'
@data.each do |line|
line.gsub!(/(^|\s+|['"]|[\(\[]|\>)e\{(.+?)\}e/,'\1\2') #emphasis
line.gsub!(/(^|\s+|['"]|[\(\[]|\>)b\{(.+?)\}b/,'\1\2') #bold
line.gsub!(/(^|\s+|['"]|[\(\[]|\>)u\{(.+?)\}u/,'\1\2') #underscore
line.gsub!(/(^|\s+|['"]|[\(\[]|\>)c\{(.+?)\}c/,'\1\2') #cite
line.gsub!(/(^|\s+|['"]|[\(\[]|\>)i\{(.+?)\}i/,'\1\2') #italics
line.gsub!(/(^|\s+|['"]|[\(\[]|\>)!\{(.+?)\}!/,'\1\2') #emphasis
line.gsub!(/(^|\s+|['"]|[\(\[]|\>)\*\{(.+?)\}\*/,'\1\2') #bold
line.gsub!(/(^|\s+|['"]|[\(\[]|\>)_\{(.+?)\}_/,'\1\2') #underscore
line.gsub!(/(^|\s+|['"]|[\(\[]|\(|\>)\/\{(.+?)\}\//,'\1\2') #italics
line.gsub!(/(^|\s+|['"]|\(|\>)\"\{(.+?)\}\"/,'\1\2')
line.gsub!(/(^|\s+|['"]|\(|\>|\S)\^\{(.+?)\}\^/,'\1\2')
line.gsub!(/(^|\s+|['"]|\(|\>|\S)9\{(.+?)\}9/,'\1\2')
line.gsub!(/(^|\s+|['"]|\(|\>),\{(.+?)\},/,'\1\2')
line.gsub!(/(^|\s+|['"]|\(|\>)6\{(.+?)\}6/,'\1\2')
line.gsub!(/(^|\s+|['"]|\(|\>)\+\{(.+?)\}\+/,'\1\2')
line.gsub!(/(^|\s+|['"]|\(|\>)v\{(.+?)\}v/,'\1\2')
line.gsub!(/(^|\s+|['"]|\(|\>)-\{(.+?)\}-/,'\1\2')
line.gsub!(/(^|\s+|['"]|\(|\>)x\{(.+?)\}x/,'\1\2')
line.gsub!(/(^|\s+|['"]|\(|\>)\*(\S+?)\*/,'\1\2') #bold single word, watch
line.gsub!(/(^|\s+|['"]|\(|\>)\!(\S+?)\!/,'\1\2') #bold single word, watch
line.gsub!(/(^|\s+|['"]|\(|\>)\/([\(\)a-zA-Z0-9']+?)\/([ ,.;:'"~$]|[^a-zA-Z0-9])/,'\1\2\3') #italics single word, watch
line.gsub!(/(^|\s+|['"]|\(|\>)_(\S+?)_/,'\1\2') #underscore single word, watch
line.gsub!(/(^|\s+|['"]|\(|\>|\d+)\^(\S+?)\^/,'\1\2') #superscript single word, watch digit added
line.gsub!(/^\s*_([1-9])(\*+)\s*/,'<:i\1> _* ') # bullets, shortcut
line.gsub!(/^\s*_([1-9])\s+/,'<:i\1> ')
line.gsub!(/<:?br>/,'
')
end
@data
end
end
end
__END__
NOTE:
downstream code blocks are not currently/yet honoured, e.g. stuff within angle brackets are removed