sisu-0.54.0 (minor additions to syntax, indent range extended) from upstream

* indent levels (1-9) added [previously two levels] done for plaintext, html, xml, odf, latex/pdf [minor syntax addition (_3 - _9), version number change] * bullet indent levels (1-9) added [previously two levels] done for plaintext, html, odf, latex/pdf [minor syntax addition (_3* - _9*), version number change] * url decoration, open close, default angle brackets <http://url> * fixes, some line-breaking for plaintext (and text groups for) odf and xml * css, modifications for extended indents in html and xml [sisu -CC (for update)] * vim syntax highlighter updated to take account of extended indent range * rant installer minor changes for future ruby * conversion script in data/sisu/conf/convert/sisu_convert
author: Ralph Amissah <ralph.amissah@gmail.com> 2007-06-14 10:21:17 +0100
committer: Ralph Amissah <ralph.amissah@gmail.com> 2007-06-14 10:21:17 +0100
commit: b130d967b295b548d837ea5f603a6b0fec7a08a8 (patch)
tree: 7f56fde49ae81ec8161d43239ce50af8674d8d49 /data
parent: gitignore (diff)
2 files changed, 265 insertions, 138 deletions
diff --git a/data/sisu/conf/convert/sisu_convert b/data/sisu/conf/convert/sisu_convert
index d7876083..9a734678 100644
--- a/data/sisu/conf/convert/sisu_convert
+++ b/data/sisu/conf/convert/sisu_convert
@@ -27,47 +27,47 @@ module CONVERT
     end
 	  def headerBasic
       <<WOK
-0~title 
+@title: 
 
-0~subtitle 
+@subtitle: 
 
-0~creator 
+@creator: 
 
-0~type 
+@type: 
 
-0~subject
+@subject: 
 
-0~date
+@date: 
 
-0~date.available
+@date.available: 
 
-0~publisher SiSU
+@publisher: SiSU
 
-0~rights 
+@rights: 
 
-0~level
+@level:
 
 WOK
 	  end
 	  def headerDefault
       <<WOK
-0~title 
+@title: 
 
-0~subtitle 
+@subtitle: 
 
-0~creator 
+@creator: 
 
-0~type 
+@type: 
 
-0~subject
+@subject: 
 
-0~date
+@date: 
 
-0~date.available
+@date.available: 
 
-0~publisher SiSU
+@publisher: SiSU
 
-0~rights ...
+@rights: 
 
 WOK
     end
@@ -79,7 +79,7 @@ WOK
       else
         pre << headerBasic
       end
-	  	@filename_wv=File.new(%{,,#{@filename}.er9}, "w+")
+	  	@filename_wv=File.new(%{,,#{@filename}.sst},'w+')
       @filename_wv << pre
       @data.each do |x| 
         y = x.split("\n")
@@ -98,11 +98,11 @@ WOK
     end
 	  def songsheet
       data=@data
-    	print "Convert to SiSU file from Word97 << gvim ,,#{@filename}.er9 >\n" #: <<#{@@html_title}>>
-	    data=WareWord97.new(data.collect, @filename, @instruct).strip
-	    data=WareWord97.new(data.collect, @filename, @instruct).strip
-	    data=WareWord97.new(data.collect, @filename, @instruct).markup_rules
-	  	data=MyOutput.new(data.collect, @filename, @instruct).hardOutput
+    	print "Convert to SiSU file from Word97 << gvim ,,#{@filename}.sst >\n" #: <<#{@@html_title}>>
+	    data=WareWord97.new(data.collect,@filename,@instruct).strip
+	    data=WareWord97.new(data.collect,@filename,@instruct).strip
+	    data=WareWord97.new(data.collect,@filename,@instruct).markup_rules
+	  	data=MyOutput.new(data.collect,@filename,@instruct).hardOutput
 	  end
     def strip
       data=@data
@@ -110,12 +110,12 @@ WOK
       endnote_no=1
       data.each do |para|
 		  	para.strip!
-        para.gsub!(/<u>\s*<\/u>/, '')
-        para.gsub!(/<\/u>\s*<u>/, '')
-        para.gsub!(/<b>\s*<\/b>/, '')
-        para.gsub!(/<\/b>\s*<b>/, '')
-        para.gsub!(/<i>\s*<\/i>/, '')
-        para.gsub!(/<\/i>\s*<i>/, '')
+        para.gsub!(/<u>\s*<\/u>/,'')
+        para.gsub!(/<\/u>\s*<u>/,'')
+        para.gsub!(/<b>\s*<\/b>/,'')
+        para.gsub!(/<\/b>\s*<b>/,'')
+        para.gsub!(/<i>\s*<\/i>/,'')
+        para.gsub!(/<\/i>\s*<i>/,'')
       	tuned_file << para unless para == nil
 		  end
       tuned_file
@@ -126,15 +126,16 @@ WOK
       endnote_no=1
       data.each do |para|
 		  	para.strip!
-        para.gsub!(/\s+/, ' ')
-        para.gsub!(/^<b>(Chapter|Article)(.+?)<\/b>/i, "4{ \\1 \\2") #watch case insensitivity
-        para.gsub!(/^<b>(Part|Section|Book)(.+?)<\/b>/i, "3{ \\1 \\2") #watch case insensitivity
-        para.gsub!(/^<b>(\d+\.\d+\.\d+)(.+?)<\/b>/i, "6{ \\1 \\2") #numeric, decide what to do, can be different
-        para.gsub!(/^<b>(\d+\.\d+)(.+?)<\/b>/i, "5{ \\1 \\2") #numeric, decide what to do, can be different
-        para.gsub!(/^<b>(\d+)(.+?)<\/b>/i, "4{ \\1 \\2") #numeric, decide what to do, can be different
-        para.gsub!(/<u>(.+?)<\/u>/, "_{\\1}_")
-        para.gsub!(/<b>(.+?)<\/b>/, "*{\\1}*")
-        para.gsub!(/<i>(.+?)<\/i>/, "/{\\1}/")
+        para.gsub!(/\s+/,' ')
+        para.gsub!(/^<b>(Chapter|Article)(.+?)<\/b>/i,'4~ \1 \2') #watch case insensitivity
+        para.gsub!(/^<b>(Part|Section|Book)(.+?)<\/b>/i,'3~ \1 \2') #watch case insensitivity
+        para.gsub!(/^<b>(\d+\.\d+\.\d+\.?)(.+?)<\/b>/i,'6~ \1 \2') #numeric, decide what to do, can be different
+        para.gsub!(/^<b>(\d+\.\d+\.?)(.+?)<\/b>/i,'5~ \1 \2') #numeric, decide what to do, can be different
+        para.gsub!(/^<b>(\d.+?)<\/b>/i,'4~ \1 \2') #numeric, decide what to do, can be different
+        #para.gsub!(/^<b>([\d.]+?)<\/b>/i,'4~ \1 \2') #numeric, decide what to do, can be different
+        para.gsub!(/<u>(.+?)<\/u>/,'_{\1}_')
+        para.gsub!(/<b>(.+?)<\/b>/,'*{\1}*')
+        para.gsub!(/<i>(.+?)<\/i>/,'/{\1}/')
       	tuned_file << para unless para == nil
 		  end
       tuned_file
@@ -148,10 +149,11 @@ WOK
     end
 	  def songsheet
       data=@data
-    	print "Convert to SiSU file from #{@filename}.html << gvim ,,#{@filename}.er9 >\n" #: <<#{@@html_title}>>
+    	print "Convert to SiSU file from #{@filename}.html << gvim ,,#{@filename}.sst >\n" #: <<#{@@html_title}>>
       #data=Html.new(data.collect, @filename, @instruct).space_paragraphs
       #data=Html.new(data.split(''), @filename, @instruct).space_paragraphs
-      data=Html.new(data.split("\n"), @filename, @instruct).space_paragraphs
+      data=Html.new(data.join.split(/\n\n+/), @filename, @instruct).space_paragraphs
+      #data=Html.new(data.split("\n"), @filename, @instruct).space_paragraphs
       #data=Html.new(data.collect.join.split("\n"), @filename, @instruct).space_paragraphs
       data=Html.new(data.collect, @filename, @instruct).multiline
       data=Html.new(data.collect.join.split("\n\n"), @filename, @instruct).markup_rules
@@ -164,18 +166,18 @@ WOK
       tuned_file=Array.new
       data.each do |para|
 		  	para.strip!
-        para.gsub!(/\r/, '')
+        para.gsub!(/\r/,'')
         #para.gsub!(/\n/, ' ') #PROBLEM, serious time issues on a few files also for \n (or multiline matches which is less surprising), edit out if necessary
-        para.gsub!(/<\/?p>/i, 'zZz')
-        para.gsub!(/<\/?\s*p(?:\s+ALIGN=.+?)?>/i, "zZz") #all manner of <p> para.gsub!(/<\/?p>/i, "\n\n")
-        para.gsub!(/<p\s+(class|align).+?>/i, "zZz") #
-        para.gsub!(/<\/p>/i, "zZz") # repeat actually
-        para.gsub!(/<(?:dir|tr|br)>/i, "zZz") #
+        para.gsub!(/<\/?p>/i,'zZz')
+        para.gsub!(/<\/?\s*p(?:\s+ALIGN=.+?)?>/i,'zZz') #all manner of <p> para.gsub!(/<\/?p>/i, "\n\n")
+        para.gsub!(/<p\s+(class|align).+?>/i,'zZz') #
+        para.gsub!(/<\/p>/i,'zZz') # repeat actually
+        para.gsub!(/<(?:dir|tr|br)>/i,'zZz') #
         #para.gsub!(/<(?:\/\s*)?(?:dir|tr|br)>/i, "zZz") #
-        para.gsub!(/(<\/center>)/i, "\\1zZz")
-        para.gsub!(/(<\/h[1-6]>)/i, "\\1zZz")
-        para.gsub!(/ \s+/i, ' ')
-        para.gsub!(/(?:\s*zZz\s*)+/i, "zZz") #
+        para.gsub!(/(<\/center>)/i,'\1zZz')
+        para.gsub!(/(<\/h[1-6]>)/i,'\1zZz')
+        para.gsub!(/ \s+/i,' ')
+        para.gsub!(/(?:\s*zZz\s*)+/i,'zZz') #
       	tuned_file << para unless para == nil
       end
       tuned_file
@@ -185,10 +187,10 @@ WOK
       sub.each do |x|
         if x=~/(<\/blockquote>)/i
           m = $1
-          res << x[/(.+?)#{m}/mi, 1].gsub!(/zZz/,"zZz_1 ") if x =~/.+?#{m}/mi 
-          res << x[/#{m}(.+)/mi, 1]
+          res << x[/(.+?)#{m}/mi,1].gsub!(/zZz/,'zZz_1 ') if x =~/.+?#{m}/mi 
+          res << x[/#{m}(.+)/mi,1]
         else
-          res << x #[/(.+)/mi, 1]
+          res << x #[/(.+)/mi,1]
         end
       end
       res.join
@@ -197,26 +199,27 @@ WOK
       data=@data
       tuned_file=Array.new
       data.each do |para|
-        para.gsub!(/\n/, ' ')
-        para.gsub!(/ \s+/mi, ' ')
+        para.gsub!(/\n/,' ')
+        para.gsub!(/ \s+/mi,' ')
        #ALL HERE could be very time EXPENSIVE but tamed? compromise ... /mi
-        para.gsub!(/<([biu]|h[1-6])>(?:zZz)?([^<]+)?zZz(.+?)<\/\1>/i, "zZz<\\1>\\2 \\3</\\1>")
-        para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)(?:<\/center>)?zZz(.+?)?<\/\1>/i, "zZz<\\1>\\2 \\3</\\1>")
-        #para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)<\/center>zZz(.+?)?<\/\1>/i, "zZz<\\1>\\2 \\3</\\1>")
-        para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)<\/\1>/i, "zZz<\\1>\\2</\\1>")
-        para.gsub!(/<(h[1-6])>(.+?)(?:<center>|zZz)+<\/\1>/i, "zZz<\\1>\\2</\\1>zZz") #does catch some h1, h2 etc, too expensive to have biu
-        #para.gsub!(/<([biu]|h[1-6])>(.+?)(?:<center>|zZz)+<\/\1>/i, "zZz<\\1>\\2 \\3</\\1>") #may go too far? useful for h1 h2 etc, remove biu?
-        #para.gsub!(/<([biu]|h[1-6])>([^<]+)?zZz(.+?)<\/\1>/i, "zZz<\\1>\\2 \\3</\\1>")
-        #para.gsub!(/<([biu]|h[1-6])>([^<]+)?zZz(.+?)<\/\1>/i, "zZz<\\1>\\2 \\3</\\1>")
+        para.gsub!(/<([biu]|h[1-6])>(?:zZz)?([^<]+)?zZz(.+?)<\/\1>/i,'zZz<\1>\2 \3</\1>')
+        para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)(?:<\/center>)?zZz(.+?)?<\/\1>/i,'zZz<\1>\2 \3</\1>')
+        #para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)<\/center>zZz(.+?)?<\/\1>/i,'zZz<\1>\2 \3</\1>')
+        para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)<\/\1>/i,'zZz<\1>\2</\1>')
+        para.gsub!(/<(h[1-6])>(.+?)(?:<center>|zZz)+<\/\1>/i,'zZz<\1>\2</\1>zZz') #does catch some h1, h2 etc, too expensive to have biu
+        #para.gsub!(/<([biu]|h[1-6])>(.+?)(?:<center>|zZz)+<\/\1>/i,'zZz<\1>\2 \3</\1>') #may go too far? useful for h1 h2 etc, remove biu?
+        #para.gsub!(/<([biu]|h[1-6])>([^<]+)?zZz(.+?)<\/\1>/i,'zZz<\1>\2 \3</\1>')
+        #para.gsub!(/<([biu]|h[1-6])>([^<]+)?zZz(.+?)<\/\1>/i,'zZz<\1>\2 \3</\1>')
        ### SERIOUS PROBLEM INTRODUCED
        # sub = para.split(/<blockquote>/i)
        # para = blockquotes(sub) if sub.length > 0 #check was on >1 could have serious repercussions 2004w29
-        para.gsub!(/zZz(\s*zZz)*/, "\n\n")
+        para.gsub!(/zZz(\s*zZz)*/,"\n\n")
       	tuned_file << para << "\n\n" unless para == nil
       end
       tuned_file
     end
     def markup_rules
+      @@flag_blockquote=false
       data=@data
       tuned_file=Array.new
       data.each do |para|
@@ -224,65 +227,186 @@ WOK
           #p para.grep(/<a href="(http:\/\/.+?)">/i)
           #m=$1
           #para.gsub!(/(?:&lt;\s*)?<a href="#{m}">#{m}<\/a>(?:\s*&gt;)?\.?/i, "#{m}")
-          para.gsub!(/(?:&lt;\s*)?<a href="(http:\/\/.+?)">http:\/\/.+?<\/a>(?:\s*&gt;)?\.?/i, "\\1") #risk that url & url are not to match
+          para.gsub!(/(?:&lt;\s*)?<a href="(http:\/\/.+?)">http:\/\/.+?<\/a>(?:\s*&gt;)?\.?/i,'\1') #risk that url & url are not to match
           #para.gsub!(/(?:&lt;\s*)?<a href="(\w+\.html)">(http:\/\/.+?\/\1)<\/a>(?:\s*&gt;)?\.?/i, "\\2") #does not match
         end
+        if para=~/<BLOCKQUOTE>/i
+          @@flag_blockquote=true
+        end
+        if @@flag_blockquote
+          para.gsub!(/^/,'_1 ') unless para.empty? or para =~/^\s*<\/?blockquote?>\s*$/i
+        end
+        if para=~/<\/BLOCKQUOTE>/i
+          @@flag_blockquote=false
+        end
+        para.gsub!(/<\/?blockquote?>/i,'')
         ### clean
-        para.gsub!(/^\s+/i, '')
-        para.gsub!(/<([bui]|em|su[pb])>\s*<\/\1>/i, '')
-        para.gsub!(/<\/?center>/i, '')
-        para.gsub!(/\s*<\/dir>/i, '')
-        para.gsub!(/<hr>/i, '')
-        para.gsub!(/\s*<a href=".+?\.html#(?:[a-z_]+)?(?:[a-z0-9_-]|\*)+">\[(\*+)\]<\/a>/i, "^{[\\1]}^ ") #other endnote marker
-        para.gsub!(/<a href=".+?\.html#(?:[a-z_$]+)?[0-9_-]+"(?:\s+name=".+?")?>\[[a-z]?\d+\](?:<\/[bi]>)?<\/a>/i, '~e ') #endnote marker
-        para.gsub!(/<a name=".+?"\s+href=".+?\.html#(?:[a-z_$]+)?[0-9_-]+"?>\[[a-z]?\d+\](?:<\/[bi]>)?<\/a>/i, '~e ') #endnote marker
-        para.gsub!(/<a name="(?:[a-z$]+)?[0-9_-]+">\s*(<\/a>)?\s*\d+\.?\s*(<\/a>)?\s*/i, '~{{ ') #endnote
-        #para.gsub!(/<h([1-6])>\s*(.+?)\s*<\/h\1>\s*/i, "\\1{ \\2") #
-        para.gsub!(/<h([1-6])(?: align=.+?)?>\s*(.+?)\s*<\/h\1>\s*/i, "\\1{ \\2") #
-        para.gsub!(/^<b>(Chapter|Article)(.+?)<\/b>/i, "4{ \\1 \\2") #watch case insensitivity
-        para.gsub!(/^<b>(Part|Section|Book)(.+?)<\/b>/i, "3{ \\1 \\2") #watch case insensitivity
-        para.gsub!(/^<b>(\d+\.\d+\.\d+)(.+?)<\/b>/i, "6{ \\1 \\2") #numeric, decide what to do, can be different
-        para.gsub!(/^<b>(\d+\.\d+)(.+?)<\/b>/i, "5{ \\1 \\2") #numeric, decide what to do, can be different
-        para.gsub!(/^<b>(\d+)(.+?)<\/b>/i, "4{ \\1 \\2") #numeric, decide what to do, can be different
+        para.gsub!(/^\s+/i,'')
+        para.gsub!(/<([bui]|em|su[pb])>\s*<\/\1>/i,'')
+        para.gsub!(/<\/?center>/i,'')
+        para.gsub!(/\s*<\/dir>/i,'')
+        para.gsub!(/<hr>/i,'')
+        para.gsub!(/\s*<a href=".+?\.html#(?:[a-z_]+)?(?:[a-z0-9_-]|\*)+">\[(\*+)\]<\/a>/i,'^{[\1]}^ ') #other endnote marker
+        para.gsub!(/<a href=".+?\.html#(?:[a-z_$]+)?[0-9_-]+"(?:\s+name=".+?")?>\[[a-z]?\d+\](?:<\/[bi]>)?<\/a>/i,'~^ ') #endnote marker
+        para.gsub!(/<a name=".+?"\s+href=".+?\.html#(?:[a-z_$]+)?[0-9_-]+"?>\[[a-z]?\d+\](?:<\/[bi]>)?<\/a>/i,'~^ ') #endnote marker
+        para.gsub!(/<a name="(?:[a-z$]+)?[0-9_-]+">\s*(<\/a>)?\s*\d+\.?\s*(<\/a>)?\s*/i,'^~ ') #endnote
+        #para.gsub!(/<h([1-6])>\s*(.+?)\s*<\/h\1>\s*/i,'\1~ \2') #
+        para.gsub!(/<h([1-6])(?: align=.+?)?>\s*(.+?)\s*<\/h\1>\s*/i,'\1~ \2') #
+        para.gsub!(/^<b>(Chapter|Article)(.+?)<\/b>/i,'4~ \1 \2') #watch case insensitivity
+        para.gsub!(/^<b>(Part|Section|Book)(.+?)<\/b>/i,'3~ \1 \2') #watch case insensitivity
+        para.gsub!(/^<b>(\d+\.\d+\.\d+\.?)(.+?)<\/b>/i,'6~ \1 \2') #numeric, decide what to do, can be different
+        para.gsub!(/^<b>(\d+\.\d+\.?)(.+?)<\/b>/i,'5~ \1 \2') #numeric, decide what to do, can be different
+        para.gsub!(/^<b>(\d+\.?)(.+?)<\/b>/i,'4~ \1 \2') #numeric, decide what to do, can be different
         #<a name="ii"></a><B>
-        para.gsub!(/^(<a name=".+?">)(?:<small>)?<(?:b|strong)>\s*(.+?)\s*<\/(?:b|strong)>/i, "5{ \\2 \\1") #watch
-        para.gsub!(/^(<(a name|A NAME)=".+?">)(\s*|<\/[aA]>)?([A-Z][A-Z])+/, "5{ \\2 \\1") #watch
-        para.gsub!(/^(\s+|<p>)?(<a name=".+?">)(\s*|<\/a>)?<b>/i, "5{ \\2 \\1") #watch
-        para.gsub!(/<h([1-6])>\s*(.+?)\s*<\/h\1>\s*/i, "\\1{ \\2") #
-        para.gsub!(/^<b>\s*(.+?)<\/b>\s*(<\/i>\s*)?$/i, "4{ \\1\\2") # wish it all were less messy
-        para.gsub!(/^<i>\s*([^"(].+?)<\/i>\s*(<\/b>\s*)?$/i, "5{ \\1\\2") # wish it all were less messy
-        para.gsub!(/<\/?[biu]>/i, '') if para =~/[1-6]\{/
-        para.gsub!(/<u>\s*(.+?)\s*<\/u>/i, "_{\\1}_")
-        para.gsub!(/<(b|strong)>\s*(.+?)\s*<\/\1>/i, "*{\\2}*")
-        para.gsub!(/<(i|em)>\s*(.+?)\s*<\/\1>/i, "/{\\2}/")
-        para.gsub!(/<sup>\s*(.+?)\s*<\/sup>/i, "^{\\1}^")
-        para.gsub!(/(([\/\*!_])\{.+?\}\2)\s\s+/i, "\\1 ")
-        para.gsub!(/(([\/\*!_])\{.+?\}\2)\s+([.,;?\)])\s+/i, "\\1\\3 ")
-        para.gsub!(/(([\/\*!_])\{.+?\}\2)(["'])\s+/i, "\\1\\3 ")
-        para.gsub!(/(([\/\*!_])\{.+?\}\2)\s*([a-z0-9])/i, "\\1 \\3")
-        para.gsub!(/(([\/\*_])\{.+?\}\2)\s*([a-z0-9])/i, "\\1 \\3")
-        para.gsub!(/([a-z0-9])(([\/\*_])\{.+?\}\3)/i, " \\1 \\2") #eg this/{problem}/
-        para.gsub!(/([\/\*_])\{([,.;; ]+)\}\1/i, "\\2") #eg /{,}/ or *{ }* etc.
-        para.gsub!(/ \s+/i, ' ')
+        para.gsub!(/^(<a name=".+?">)(?:<small>)?<(?:b|strong)>\s*(.+?)\s*<\/(?:b|strong)>/i,'5~ \2 \1') #watch
+        para.gsub!(/^(<(a name|A NAME)=".+?">)(\s*|<\/[aA]>)?([A-Z][A-Z])+/,'5~ \2 \1') #watch
+        para.gsub!(/^(\s+|<p>)?(<a name=".+?">)(\s*|<\/a>)?<b>/i,'5~ \2 \1') #watch
+        para.gsub!(/<h([1-6])>\s*(.+?)\s*<\/h\1>\s*/i,'\1~ \2') #
+        para.gsub!(/^<b>\s*(.+?)<\/b>\s*(<\/i>\s*)?$/i,'4~ \1\2') # wish it all were less messy
+        para.gsub!(/^<i>\s*([^"(].+?)<\/i>\s*(<\/b>\s*)?$/i,'5~ \1\2') # wish it all were less messy
+        para.gsub!(/<\/?[biu]>/i,'') if para =~/[1-6]\{/
+        para.gsub!(/<u>\s*(.+?)\s*<\/u>/i,'_{\1}_')
+        para.gsub!(/<(b|strong)>\s*(.+?)\s*<\/\1>/i,'*{\2}*')
+        para.gsub!(/<(i|em)>\s*(.+?)\s*<\/\1>/i,'/{\2}/')
+        para.gsub!(/<sup>\s*(.+?)\s*<\/sup>/i,'^{\1}^')
+        para.gsub!(/(([\/\*!_])\{.+?\}\2)\s\s+/i,'\1 ')
+        para.gsub!(/(([\/\*!_])\{.+?\}\2)\s+([.,;?\)])\s+/i,'\1\3 ')
+        para.gsub!(/(([\/\*!_])\{.+?\}\2)(["'])\s+/i,'\1\3 ')
+        para.gsub!(/(([\/\*!_])\{.+?\}\2)\s*([a-z0-9])/i,'\1 \3')
+        para.gsub!(/(([\/\*_])\{.+?\}\2)\s*([a-z0-9])/i,'\1 \3')
+        para.gsub!(/([a-z0-9])(([\/\*_])\{.+?\}\3)/i,' \1 \2') #eg this/{problem}/
+        para.gsub!(/([\/\*_])\{([,.;; ]+)\}\1/i,'\2') #eg /{,}/ or *{ }* etc.
+        para.gsub!(/ \s+/i,' ')
         #para.gsub!(/\/\{\*\{/i, '*{/{')
         #para.gsub!(/\}\*\}\//i, '}/}*')
-        para.gsub!(/&quot;/i, '"')
-        para.gsub!(/&amp;/i, 'and')
-        para.gsub!(/<!doctype html public .+/i, '')
-        para.gsub!(/<\/?(?:html|head|body|font|small)>/i, '')
-        para.gsub!(/<\/(?:title)>/i, '')
-        para.gsub!(/<title>/i, '#{~title? ')
-        para.gsub!(/<blockquote>(.+?)<\/blockquote>/mi, "\n\n_1 \\1\n\n")
-        para.gsub!(/<div align=.+?>|<\/div>|<font size=.+?>|<\/a><\/em><\/strong>/i, '')
-        para.gsub!(/~e\s+\.\s*/i, ".~e ") #check vim equiv # %s/\~e\s\+\.\s*/.\~e /c
-        para.gsub!(/\s+~e\s+/i, "~e ")
-        para.gsub!(/ \s+/i, ' ')
-        para.gsub!(/\s+$/i, '')
-        para.gsub!(/^(?:<\/[bi]>)+$/i, '')
-        para.gsub!(/^(?:(?:<i>)+<b>|(?:<b>)+<i>)\s*([^"(].+?)/i, "5{ \\1\\2") # wish it all were less messy
-        para.gsub!(/^(?:<\/?(?:[ib]|em)>\s*)+$/i, '') # cleaning up left over <i> etc.
-        para.gsub!(/<(?:i|em)>\s*(.+)/i, "/{\\1}/") # using up left over <i>
-        para.gsub!(/<b>\s*(.+)/i, "*{\\1}*") # using up left over <b>
+        para.gsub!(/&quot;/i,'"')
+        para.gsub!(/&amp;/i,'and')
+        para.gsub!(/<!doctype html public .+/i,'')
+        para.gsub!(/<\/?(?:html|head|body|font|small)>/i,'')
+        para.gsub!(/<\/(?:title)>/i,'')
+        para.gsub!(/<title>/i,'#{~title? ')
+        para.gsub!(/<blockquote>(.+?)<\/blockquote>/mi,"\n\n_1 \\1\n\n")
+        para.gsub!(/<div align=.+?>|<\/div>|<font size=.+?>|<\/a><\/em><\/strong>/i,'')
+        para.gsub!(/~^\s+\.\s*/i,'.~^ ') #check vim equiv # %s/\~e\s\+\.\s*/.\~e /c
+        para.gsub!(/\s+~^\s+/i,'~^ ')
+        para.gsub!(/ \s+/i,' ')
+        para.gsub!(/\s+$/i,'')
+        para.gsub!(/^(?:<\/[bi]>)+$/i,'')
+        para.gsub!(/^(?:(?:<i>)+<b>|(?:<b>)+<i>)\s*([^"(].+?)/i,'5~ \1\2') # wish it all were less messy
+        para.gsub!(/^(?:<\/?(?:[ib]|em)>\s*)+$/i,'') # cleaning up left over <i> etc.
+        para.gsub!(/<(?:i|em)>\s*(.+)/i,'/{\1}/') # using up left over <i>
+        para.gsub!(/<b>\s*(.+)/i,'*{\1}*') # using up left over <b>
+        para.gsub!(/<dd>([\d.]+)/i,'5~ \1')
+        para.gsub!(/<dd>(?:&nbsp;)+([\d.]+)/i,'6~ \1')
+        para.gsub!(/<dd>(\([a-z]\))/i,'7~ \1')
+        para.gsub!(/^([1-9]~)( .+?)<a name="(\S+?)">(.+?)(<\/a>)/i,'\1\3\2\4')
+        para.gsub!(/^([1-9]~)( .+?)<a name="(\S+?)">/i,'\1\3\2')
+        para.gsub!(/http\/\/(\S+)/i,'http:\/\/\1')
+        para.gsub!(/\s*<a href="\S+?">(http:\/\/\S+?)<\/a>\s*/i,' \1 ')
+        para.gsub!(/([a-zA-Z.,!?;:])([*\/_-]\{)/,'\1 \2')
+        para.gsub!(/^\s*(&nbsp;){10,12}/i,'_2 ')
+        para.gsub!(/^\s*(&nbsp;){4,5}/i,'_1 ')
+        para.gsub!(/&#9;/,' ') #check
+        ## glyphs & tildes
+        para.gsub!(/&iexcl;/,  '¡') #'Inverted exclamation     
+        para.gsub!(/&cent;/,   '¢') #'Cent sign               ¢
+        para.gsub!(/&pound;/,  '£') #'Pound sign              £
+        para.gsub!(/&curren;/, '¤') #'General currency sign    
+        para.gsub!(/&yen;/,    '¥') #'Yen sign                ¥
+        para.gsub!(/&brvbar;/, '¦') #'Broken vertical bar      
+        para.gsub!(/&sect;/,   '§') #'Section sign            §
+        para.gsub!(/&uml;/,    '¨') #'Umlaut                   
+        para.gsub!(/&copy;/,   '©') #'Copyright               ©
+        para.gsub!(/&ordf;/,   'ª') #'Feminine ordinal        ª
+        para.gsub!(/&laquo;/,  '«') #'Left angle quote        «
+        para.gsub!(/&not;/,    '¬') #'Not sign                 
+        para.gsub!(/&shy;/,    '') #'Soft hyphen              
+        para.gsub!(/&reg;/,    '®') #'Registered trademark    ®
+        para.gsub!(/&macr;/,   '¯') #'Macron accent            
+        para.gsub!(/&deg;/,    '°') #'Degree sign             °
+        para.gsub!(/&plusmin;/,'±') #'Plus or minus           ±
+        para.gsub!(/&sup2;/,   '²') #'Superscript 2           ²
+        para.gsub!(/&sup3;/,   '³') #'Superscript 3           ³
+        para.gsub!(/&acute;/,  '') #'Acute accent             
+        para.gsub!(/&micro;/,  'µ') #'Micro sign (Greek mu)   µ
+        para.gsub!(/&para;/,   '¶') #'Paragraph sign          ¶
+        para.gsub!(/&middot;/, '·') #'Middle dot               
+        para.gsub!(/&cedil;/,  '¸') #'Cedilla                  
+        para.gsub!(/&sup1;/,   '¹') #'Superscript 1           ¹
+        para.gsub!(/&ordm;/,   'º') #'Masculine ordinal       º
+        para.gsub!(/&raquo;/,  '»') #'Right angle quote        
+        para.gsub!(/&frac14;/, '¼') #'Fraction one quarter    ¼
+        para.gsub!(/&frac12;/, '½') #'Fraction on half        ½
+        para.gsub!(/&frac34;/, '¾') #'Fraction three quarters ¾
+        para.gsub!(/&iquest;/, '¿') #'Inverted question mark  ¿
+        para.gsub!(/&Agrave;/, 'À') #'Capital A, grave accent À
+        para.gsub!(/&Aacute;/, 'Á') #'Capital A, acute accent Á
+        para.gsub!(/&Acirc;/,  'Â') #'Capital A, circumflex accent Â
+        para.gsub!(/&Atilde;/, 'Ã') #'Capital A, tilde        Ã
+        para.gsub!(/&Auml;/,   'Ä') #'Capital A, umlaut       Ä
+        para.gsub!(/&Aring;/,  'Å') #'Capital A, ring         Å
+        para.gsub!(/&AElig;/,  'Æ') #'Capital AE ligature     Æ
+        para.gsub!(/&Ccedil;/, 'Ç') #'Capital C, cedilla      Ç
+        para.gsub!(/&Egrave;/, 'È') #'Capital E, grave accent È
+        para.gsub!(/&Eacute;/, 'É') #'Capital E, acute accent É
+        para.gsub!(/&Ecirc;/,  'Ê') #'Capital E, circumflex accent Ê
+        para.gsub!(/&Euml;/,   'Ë') #'Capital E, umlaut       Ë
+        para.gsub!(/&Igrave;/, 'Ì') #'Capital I, grave accent Ì
+        para.gsub!(/&Iacute;/, 'Í') #'Capital I, acute accent Í
+        para.gsub!(/&Icirc;/,  'Î') #'Capital I, circumflex accent Î
+        para.gsub!(/&Iuml;/,   'Ï') #'Capital I, umlaut       Ï
+        para.gsub!(/&ETH;/,    'Ð') #'Capital eth, Icelandic   
+        para.gsub!(/&Ntilde;/, 'Ñ') #'Capital N, tilde        Ñ
+        para.gsub!(/&Ograve;/, 'Ò') #'Capital O, grave accent Ò
+        para.gsub!(/&Oacute;/, 'Ó') #'Capital O, acute accent Ó
+        para.gsub!(/&Ocirc;/,  'Ô') #'Capital O, circumflex accent Ô
+        para.gsub!(/&Otilde;/, 'Õ') #'Capital O, tilde        Õ
+        para.gsub!(/&Ouml;/,   'Ö') #'Capital O, umlaut       Ö
+        para.gsub!(/&times;/,  '×') #'Multiply sign           ×
+        para.gsub!(/&Oslash;/, 'Ø') #'Capital O, slash        Ø
+        para.gsub!(/&Ugrave;/, 'Ù') #'Capital U, grave accent Ù
+        para.gsub!(/&Uacute;/, 'Ú') #'Capital U, acute accent Ú
+        para.gsub!(/&Ucirc;/,  'Û') #'Capital U, circumflex accent Û
+        para.gsub!(/&Uuml;/,   'Ü') #'Capital U, umlaut       Ü
+        para.gsub!(/&Yacute;/, 'Ý') #'Capital Y, acute accent Ý
+        para.gsub!(/&THORN;/,  'Þ') #'Capital thorn, Icelandic     Þ
+        para.gsub!(/&szlig;/,  'ß') #'Small sz ligature, German    ß
+        para.gsub!(/&agrave;/, 'à') #'Small a, grave accent   à
+        para.gsub!(/&aacute;/, 'á') #'Small a, acute accent   á
+        para.gsub!(/&acirc;/,  'â') #'Small a, circumflex accent   â
+        para.gsub!(/&atilde;/, 'ã') #'Small a, tilde          ã
+        para.gsub!(/&auml;/,   'ä') #'Small a, umlaut         ä
+        para.gsub!(/&aring;/,  'å') #'Small a, ring           å
+        para.gsub!(/&aelig;/,  'æ') #'Small ae ligature       æ
+        para.gsub!(/&ccedil;/, 'ç') #'Small c, cedilla        ç
+        para.gsub!(/&egrave;/, 'è') #'Small e, grave accent   è
+        para.gsub!(/&eacute;/, 'é') #'Small e, acute accent   é
+        para.gsub!(/&ecirc;/,  'ê') #'Small e, circumflex accent   ê
+        para.gsub!(/&euml;/,   'ë') #'Small e, umlaut         ë
+        para.gsub!(/&igrave;/, 'ì') #'Small i, grave accent   ì
+        para.gsub!(/&iacute;/, 'í') #'Small i, acute accent   í
+        para.gsub!(/&icirc;/,  'î') #'Small i, circumflex accent   î
+        para.gsub!(/&iuml;/,   'ï') #'Small i, umlaut         ï
+        para.gsub!(/&eth;/,    'ð') #'Small eth, Icelandic    ð
+        para.gsub!(/&ntilde;/, 'ñ') #'Small n, tilde          ñ
+        para.gsub!(/&ograve;/, 'ò') #'Small o, grave accent   ò
+        para.gsub!(/&oacute;/, 'ó') #'Small o, acute accent   ó
+        para.gsub!(/&ocirc;/,  'ô') #'Small o, circumflex accent   ô
+        para.gsub!(/&otilde;/, 'õ') #'Small o, tilde          õ
+        para.gsub!(/&ouml;/,   'ö') #'Small o, umlaut         ö
+        para.gsub!(/&divide;/, '÷') #'Divide sign             ÷
+        para.gsub!(/&oslash;/, 'ø') #'Small o, slash          ø
+        para.gsub!(/&ugrave;/, 'ù') #'Small u, grave accent   ù
+        para.gsub!(/&uacute;/, 'ú') #'Small u, acute accent   ú
+        para.gsub!(/&ucirc;/,  'û') #'Small u, circumflex accent   û
+        para.gsub!(/&uuml;/,   'ü') #'Small u, umlaut         ü
+        para.gsub!(/&yacute;/, 'ý') #'Small y, acute accent   ý
+        para.gsub!(/&thorn;/,  'þ') #'Small thorn, Icelandic  þ
+        para.gsub!(/&yuml;/,   'ÿ') #'Smally y, umlaut        ÿ
+        ##
+        para.gsub!(/\s\s+/,' ')
+        para.gsub!(/\t+/,' ')
+        #para.gsub!(/	+/,' ')
         #para.gsub!(/^(?:<(?:\/)?[bi]>)+$/i, '')
       	tuned_file << para unless para == nil
 		  end
@@ -297,7 +421,7 @@ WOK
     end
 	  def songsheet
       data=@data
-    	print "Convert to SiSU file from #{@filename}.html << gvim ,,#{@filename}.er9 >\n" #: <<#{@@html_title}>>
+    	print "Convert to SiSU file from #{@filename}.html << gvim ,,#{@filename}.sst >\n" #: <<#{@@html_title}>>
 	    data=Default.new(data.collect, @filename, @instruct).space_paragraphs
       data=Default.new(data.collect, @filename, @instruct).multiline
       data=Default.new(data.collect.join.split("\n\n"), @filename, @instruct).markup_rules
@@ -308,9 +432,9 @@ WOK
       data=@data
       tuned_file=Array.new
       data.each do |para|
-        para.gsub!(/<i>(Id\.?)(\s|$)/i, "/\{\\1\}\\2/")
-        para.gsub!(/^(~\{\{ .+?)(<\/LI>\s*|<\/OL>\s*)+$/i, "\\1")
-        para.gsub!(/\/\{Id\.\s*<\/LI>\s*\}\//i, '/{Id.}/')
+        para.gsub!(/<i>(Id\.?)(\s|$)/i,'/\{\1\}\2/')
+        para.gsub!(/^(~\{\{ .+?)(<\/LI>\s*|<\/OL>\s*)+$/i,'\1')
+        para.gsub!(/\/\{Id\.\s*<\/LI>\s*\}\//i,'/{Id.}/')
       	tuned_file << para unless para == nil
 		  end
       tuned_file
@@ -328,7 +452,7 @@ initial SiSU markup from other file formats
 
 WOK
 end
-def doWord(argv, instruct)
+def do_word(argv, instruct)
   argv.each do  |f|
     if f =~/.+?\.doc$/
       @argv << f[/(.+?)\.doc$/, 1]
@@ -337,12 +461,12 @@ def doWord(argv, instruct)
     end
   end
   @argv.each do |filename|
-    system(%{wvWare -x #{@dir.home}/.sisu/convert/wvSiSU.xml #{filename}.doc > #{filename}.wv})
+    system(%{wvWare -x #{@dir.path.home}/.sisu/convert/wvSiSU.xml #{filename}.doc > #{filename}.wv})
     file_array=IO.readlines("#{filename}.wv", "")
   	CONVERT::WareWord97.new(file_array, filename, instruct).songsheet # metaverse created here
   end
 end
-def doHtml(argv, instruct)
+def do_html(argv, instruct)
   argv.each do  |f|
     if f =~/.+?\.html$/
       @argv << f[/(.+?)\.html$/, 1]
@@ -351,11 +475,11 @@ def doHtml(argv, instruct)
     end
   end
   @argv.each do |filename|
-    file_array=IO.readlines("#{filename}.html", "\n\r")
-  	CONVERT::Html.new(file_array, filename, instruct).songsheet # metaverse created here
+    file_array=IO.readlines("#{filename}.html","\n\r")
+  	CONVERT::Html.new(file_array,filename,instruct).songsheet # metaverse created here
   end
 end
-def doDefault(argv, instruct)
+def do_default(argv, instruct)
   argv.each do  |f|
     if f =~/.+?\.html$/
       @argv << f[/(.+?)\.html$/, 1]
@@ -371,18 +495,21 @@ end
 def cases(argv, instruct)
 	case instruct
 		when/^--(word(97)?|doc)$/i #creates minimal sisu_small.gz package to send
-      doWord(argv, instruct)
+      do_word(argv, instruct)
 		when/^--(html)$/i #creates sisu.gz package to send
-      doHtml(argv, instruct)
+      do_html(argv, instruct)
 		when/^--(default)$/i #creates sisu.gz package to send
-      doDefault(argv, instruct)
+      do_default(argv, instruct)
 		else
       help
 	end
 end
-require 'zxy_sysenv.rb'
+$KCODE='u'
+branch='v0'
+SiSU_lib="sisu/#{branch}"
+require "#{SiSU_lib}/sysenv"
 include SiSU_Env
-@dir=SiSU_Env::Info_dir.new
+@dir=SiSU_Env::Info_env.new
 @argv=Array.new
 argv=$*
 instruct = "#{argv[0].to_s}"
diff --git a/data/sisu/conf/vim/addons/syntax/sisu.vim b/data/sisu/conf/vim/addons/syntax/sisu.vim
index 13d22bb1..8a114b64 100644
--- a/data/sisu/conf/vim/addons/syntax/sisu.vim
+++ b/data/sisu/conf/vim/addons/syntax/sisu.vim
@@ -10,7 +10,7 @@ else
 endif
 "% 11 Errors?
 syn match sisu_error contains=sisu_link,sisu_error_wspace "<![^ei]\S\+!>"
-"% 10 Markers: Endnote Identifiers, Pagebreaks etc.: 
+"% 10 Markers: Endnote Identifiers, Pagebreaks etc.:
 if !exists("sisu_no_identifiers")
   syn match   sisu_mark_endnote                      "\~^"
   syn match   sisu_contain       contains=@NoSpell   "</\?sub>"
@@ -26,7 +26,7 @@ if !exists("sisu_no_identifiers")
   "metaverse specific
   syn match   sisu_ocn           contains=@NoSpell   "<\~\d\+;\w\d\+;\w\d\+>"
   syn match   sisu_marktail                          "<\~#>"
-  syn match   sisu_markpara      contains=@NoSpell   "<:i[12]>"
+  syn match   sisu_markpara      contains=@NoSpell   "<:i[1-9]>"
   syn match   sisu_link                              " \*\~\S\+"
   syn match   sisu_action                            "^<:insert\d\+>"
   syn match   sisu_contain                           "<:e>"
@@ -63,7 +63,7 @@ syn region sisu_linked contains=sisu_fontface,sisu_strikeout,sisu_number,sisu_co
 syn region sisu_linked contains=sisu_fontface,sisu_strikeout,sisu_number,sisu_control,sisu_identifier,sisu_error matchgroup=sisu_link start="{" end="}image" oneline
 "%   some line operations
 syn region sisu_control contains=sisu_strikeout,sisu_identifier,sisu_content_endnote,sisu_mark_endnote,sisu_error,sisu_error_wspace matchgroup=sisu_control start="\(\(^\| \)!_ \|<:b>\)" end="$"
-syn region sisu_normal contains=sisu_strikeout,sisu_identifier,sisu_content_endnote,sisu_mark_endnote,sisu_link,sisu_linked,sisu_error,sisu_error_wspace matchgroup=sisu_markpara start="^_\([12*]\|[12]\*\) " end="$"
+syn region sisu_normal contains=sisu_strikeout,sisu_identifier,sisu_content_endnote,sisu_mark_endnote,sisu_link,sisu_linked,sisu_error,sisu_error_wspace matchgroup=sisu_markpara start="^_\([1-9*]\|[1-9]\*\) " end="$"
 syn region sisu_normal contains=sisu_strikeout,sisu_identifier,sisu_content_endnote,sisu_mark_endnote,sisu_link,sisu_linked,sisu_error,sisu_error_wspace matchgroup=sisu_markpara start="^\(#[ 1]\|_# \)" end="$"
 syn region sisu_comment matchgroup=sisu_comment start="^%\{1,2\} " end="$"
 "%   font face curly brackets
@@ -74,7 +74,7 @@ syn region sisu_identifier contains=sisu_strikeout,sisu_number,sisu_control,sisu
 syn region sisu_underline contains=sisu_strikeout,sisu_number,sisu_control,sisu_identifier,sisu_error matchgroup=sisu_fontface start="+{" end="}+"
 syn region sisu_identifier contains=sisu_strikeout,sisu_number,sisu_control,sisu_identifier,sisu_error matchgroup=sisu_fontface start="\^{" end="}\^"
 syn region sisu_identifier contains=sisu_strikeout,sisu_number,sisu_control,sisu_identifier,sisu_error matchgroup=sisu_fontface start=",{" end="},"
-syn region sisu_strikeout contains=sisu_error matchgroup=sisu_fontface start="-{" end="}-" 
+syn region sisu_strikeout contains=sisu_error matchgroup=sisu_fontface start="-{" end="}-"
 syn region sisu_html contains=sisu_error contains=sisu_strikeout matchgroup=sisu_contain start="<a href=\".\{-}\">" end="</a>" oneline
 "%   single words bold italicise etc. "workon
 syn region sisu_control contains=sisu_error matchgroup=sisu_control start="\([ (]\|^\)\*[^\|{\n\~\\]"hs=e-1 end="\*"he=e-0 skip="[a-zA-Z0-9']" oneline
author	Ralph Amissah <ralph.amissah@gmail.com>	2007-06-14 10:21:17 +0100
committer	Ralph Amissah <ralph.amissah@gmail.com>	2007-06-14 10:21:17 +0100
commit	b130d967b295b548d837ea5f603a6b0fec7a08a8 (patch)
tree	7f56fde49ae81ec8161d43239ce50af8674d8d49 /data
parent	gitignore (diff)