diff options
author | Ralph Amissah <ralph@amissah.com> | 2012-03-19 22:07:29 -0400 |
---|---|---|
committer | Ralph Amissah <ralph@amissah.com> | 2012-03-19 22:07:33 -0400 |
commit | 6811ac91f21a434fc7d967c11e1b20f33918c6ea (patch) | |
tree | 30f39674ca96a79f8a604a9f02d571f24320e221 /data/sisu/v3dv/conf/convert/sisu_convert | |
parent | v3: 3.2.0 version & changelog "opened" (diff) |
v3: 3.2 branch is main (v3dv --> v3); dev (v3dv) branch directories removed
* v3dv (3.2) "merged" into v3 (previously 3.1) (& removed)
* conf/sisu/v3dv --> conf/sisu/v3
* data/sisu/v3dv --> data/sisu/v3
* lib/sisu/v3dv --> lib/sisu/v3
* bin/sisu* (v3dv references changed to v3)
* (--dev modifier (superfluous for the time being) runs main v3 branch)
Diffstat (limited to 'data/sisu/v3dv/conf/convert/sisu_convert')
-rw-r--r-- | data/sisu/v3dv/conf/convert/sisu_convert | 519 |
1 files changed, 0 insertions, 519 deletions
diff --git a/data/sisu/v3dv/conf/convert/sisu_convert b/data/sisu/v3dv/conf/convert/sisu_convert deleted file mode 100644 index a3a12189..00000000 --- a/data/sisu/v3dv/conf/convert/sisu_convert +++ /dev/null @@ -1,519 +0,0 @@ -#!/usr/bin/env ruby -# = sisu - SiSU information Structuring Universe -# -# Copyright (c) Ralph Amissah 1997,2004 -# -# Ralph Amissah mailto:ralph@amissah.com -# -# * Name: SiSU information Structuring Universe -# * Author: Ralph@Amissah.com -# * Description: document conversion tool, to sisu from other formats -# * License: GPL 3 or later -# * Notes: word conversion uses wvWare and wvSiSU.xml (a modified/stripped wvHtml.xml) -# * http://wvware.sourceforge.net/ -# * http://sourceforge.net/projects/wvware -# * <url:sisu.lnk>|sisu.lnk|@|^| -# * <url:sisu> -module CONVERT - class MyOutput - def initialize(data, filename, instruct) - @data=data.compact - @filename=filename - @instruct=instruct - end - def headerBasic - <<WOK -% SiSU 2.0 - -@title: - :subtitle: - -@creator: - :author: - -@classify: - :topic_register: - -@date: - :published: - -@rights: - :copyright: - :license: - -WOK - end - def headerDefault - <<WOK -% SiSU 2.0 - -@title: - :subtitle: - -@creator: - :author: - -@classify: - :topic_register: - -@date: - :published: - -@rights: - :copyright: - :license: - -WOK - end - def hardOutput - pre = Array.new - case @instruct - when /default/ - pre << headerDefault - else - pre << headerBasic - end - @filename_wv=File.new(%{,,#{@filename}.sst},'w+') - @filename_wv << pre - @data.each do |x| - y = x.split("\n") - y.each do |z| # cleaner output this way - z.strip! - @filename_wv.puts "#{z}\n\n" unless z =~/^$/ - end - end - end - end - class WareWord97 - def initialize(data, filename, instruct) - @data=data - @filename=filename - @instruct=instruct - end - def songsheet - data=@data - print "Convert to SiSU file from Word97 << gvim ,,#{@filename}.sst >\n" #: <<#{@@html_title}>> - data=WareWord97.new(data.collect,@filename,@instruct).strip - data=WareWord97.new(data.collect,@filename,@instruct).strip - data=WareWord97.new(data.collect,@filename,@instruct).markup_rules - data=MyOutput.new(data.collect,@filename,@instruct).hardOutput - end - def strip - data=@data - tuned_file=Array.new - endnote_no=1 - data.each do |para| - para.strip! - para.gsub!(/<u>\s*<\/u>/,'') - para.gsub!(/<\/u>\s*<u>/,'') - para.gsub!(/<b>\s*<\/b>/,'') - para.gsub!(/<\/b>\s*<b>/,'') - para.gsub!(/<i>\s*<\/i>/,'') - para.gsub!(/<\/i>\s*<i>/,'') - tuned_file << para unless para == nil - end - tuned_file - end - def markup_rules - data=@data - tuned_file=Array.new - endnote_no=1 - data.each do |para| - para.strip! - para.gsub!(/\s+/,' ') - para.gsub!(/^<b>(Chapter|Article)(.+?)<\/b>/i,'1~ \1 \2') #watch case insensitivity - para.gsub!(/^<b>(Part|Section|Book)(.+?)<\/b>/i,':C~ \1 \2') #watch case insensitivity - para.gsub!(/^<b>(\d+\.\d+\.\d+\.?)(.+?)<\/b>/i,'3~ \1 \2') #numeric, decide what to do, can be different - para.gsub!(/^<b>(\d+\.\d+\.?)(.+?)<\/b>/i,'2~ \1 \2') #numeric, decide what to do, can be different - para.gsub!(/^<b>(\d.+?)<\/b>/i,'4~ \1 \2') #numeric, decide what to do, can be different - #para.gsub!(/^<b>([\d.]+?)<\/b>/i,'4~ \1 \2') #numeric, decide what to do, can be different - para.gsub!(/<u>(.+?)<\/u>/,'_{\1}_') - para.gsub!(/<b>(.+?)<\/b>/,'!{\1}!') - para.gsub!(/<i>(.+?)<\/i>/,'/{\1}/') - tuned_file << para unless para == nil - end - tuned_file - end - end - class Html - def initialize(data, filename, instruct) - @data=data - @filename=filename - @instruct=instruct - end - def songsheet - data=@data - print "Convert to SiSU file from #{@filename}.html << gvim ,,#{@filename}.sst >\n" #: <<#{@@html_title}>> - #data=Html.new(data.collect, @filename, @instruct).space_paragraphs - #data=Html.new(data.split(''), @filename, @instruct).space_paragraphs - data=Html.new(data.join.split(/\n\n+/), @filename, @instruct).space_paragraphs - #data=Html.new(data.split("\n"), @filename, @instruct).space_paragraphs - #data=Html.new(data.collect.join.split("\n"), @filename, @instruct).space_paragraphs - data=Html.new(data.collect, @filename, @instruct).multiline - data=Html.new(data.collect.join.split("\n\n"), @filename, @instruct).markup_rules - data=MyOutput.new(data.collect, @filename, @instruct).hardOutput - end - def space_paragraphs - #data=@data.join.split(/\n/) - data=@data - #p data.length - tuned_file=Array.new - data.each do |para| - para.strip! - para.gsub!(/\r/,'') - #para.gsub!(/\n/, ' ') #PROBLEM, serious time issues on a few files also for \n (or multiline matches which is less surprising), edit out if necessary - para.gsub!(/<\/?p>/i,'zZz') - para.gsub!(/<\/?\s*p(?:\s+ALIGN=.+?)?>/i,'zZz') #all manner of <p> para.gsub!(/<\/?p>/i, "\n\n") - para.gsub!(/<p\s+(class|align).+?>/i,'zZz') # - para.gsub!(/<\/p>/i,'zZz') # repeat actually - para.gsub!(/<(?:dir|tr|br)>/i,'zZz') # - #para.gsub!(/<(?:\/\s*)?(?:dir|tr|br)>/i, "zZz") # - para.gsub!(/(<\/center>)/i,'\1zZz') - para.gsub!(/(<\/h[1-6]>)/i,'\1zZz') - para.gsub!(/ \s+/i,' ') - para.gsub!(/(?:\s*zZz\s*)+/i,'zZz') # - tuned_file << para unless para == nil - end - tuned_file - end - def blockquotes(sub='') # SERIOUS PROBLEM INTRODUCED, some blockquotes go missing !, quite unacceptable, debug, for now not used - res=Array.new - sub.each do |x| - if x=~/(<\/blockquote>)/i - m = $1 - res << x[/(.+?)#{m}/mi,1].gsub!(/zZz/,'zZz_1 ') if x =~/.+?#{m}/mi - res << x[/#{m}(.+)/mi,1] - else - res << x #[/(.+)/mi,1] - end - end - res.join - end - def multiline - data=@data - tuned_file=Array.new - data.each do |para| - para.gsub!(/\n/,' ') - para.gsub!(/ \s+/mi,' ') - #ALL HERE could be very time EXPENSIVE but tamed? compromise ... /mi - para.gsub!(/<([biu]|h[1-6])>(?:zZz)?([^<]+)?zZz(.+?)<\/\1>/i,'zZz<\1>\2 \3</\1>') - para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)(?:<\/center>)?zZz(.+?)?<\/\1>/i,'zZz<\1>\2 \3</\1>') - #para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)<\/center>zZz(.+?)?<\/\1>/i,'zZz<\1>\2 \3</\1>') - para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)<\/\1>/i,'zZz<\1>\2</\1>') - para.gsub!(/<(h[1-6])>(.+?)(?:<center>|zZz)+<\/\1>/i,'zZz<\1>\2</\1>zZz') #does catch some h1, h2 etc, too expensive to have biu - #para.gsub!(/<([biu]|h[1-6])>(.+?)(?:<center>|zZz)+<\/\1>/i,'zZz<\1>\2 \3</\1>') #may go too far? useful for h1 h2 etc, remove biu? - #para.gsub!(/<([biu]|h[1-6])>([^<]+)?zZz(.+?)<\/\1>/i,'zZz<\1>\2 \3</\1>') - #para.gsub!(/<([biu]|h[1-6])>([^<]+)?zZz(.+?)<\/\1>/i,'zZz<\1>\2 \3</\1>') - ### SERIOUS PROBLEM INTRODUCED - # sub = para.split(/<blockquote>/i) - # para = blockquotes(sub) if sub.length > 0 #check was on >1 could have serious repercussions 2004w29 - para.gsub!(/zZz(\s*zZz)*/,"\n\n") - tuned_file << para << "\n\n" unless para == nil - end - tuned_file - end - def markup_rules - @@flag_blockquote=false - data=@data - tuned_file=Array.new - data.each do |para| - if para=~/<a href="(http:\/\/.+?)">/i - #p para.grep(/<a href="(http:\/\/.+?)">/i) - #m=$1 - #para.gsub!(/(?:<\s*)?<a href="#{m}">#{m}<\/a>(?:\s*>)?\.?/i, "#{m}") - para.gsub!(/(?:<\s*)?<a href="(http:\/\/.+?)">http:\/\/.+?<\/a>(?:\s*>)?\.?/i,'\1') #risk that url & url are not to match - #para.gsub!(/(?:<\s*)?<a href="(\w+\.html)">(http:\/\/.+?\/\1)<\/a>(?:\s*>)?\.?/i, "\\2") #does not match - end - if para=~/<BLOCKQUOTE>/i - @@flag_blockquote=true - end - if @@flag_blockquote - para.gsub!(/^/,'_1 ') unless para.empty? or para =~/^\s*<\/?blockquote?>\s*$/i - end - if para=~/<\/BLOCKQUOTE>/i - @@flag_blockquote=false - end - para.gsub!(/<\/?blockquote?>/i,'') - ### clean - para.gsub!(/^\s+/i,'') - para.gsub!(/<([bui]|em|su[pb])>\s*<\/\1>/i,'') - para.gsub!(/<\/?center>/i,'') - para.gsub!(/\s*<\/dir>/i,'') - para.gsub!(/<hr>/i,'') - para.gsub!(/\s*<a href=".+?\.html#(?:[a-z_]+)?(?:[a-z0-9_-]|\*)+">\[(\*+)\]<\/a>/i,'^{[\1]}^ ') #other endnote marker - para.gsub!(/<a href=".+?\.html#(?:[a-z_$]+)?[0-9_-]+"(?:\s+name=".+?")?>\[[a-z]?\d+\](?:<\/[bi]>)?<\/a>/i,'~^ ') #endnote marker - para.gsub!(/<a name=".+?"\s+href=".+?\.html#(?:[a-z_$]+)?[0-9_-]+"?>\[[a-z]?\d+\](?:<\/[bi]>)?<\/a>/i,'~^ ') #endnote marker - para.gsub!(/<a name="(?:[a-z$]+)?[0-9_-]+">\s*(<\/a>)?\s*\d+\.?\s*(<\/a>)?\s*/i,'^~ ') #endnote - #para.gsub!(/<h([1-6])>\s*(.+?)\s*<\/h\1>\s*/i,'\1~ \2') # - para.gsub!(/<h([1-6])(?: align=.+?)?>\s*(.+?)\s*<\/h\1>\s*/i,'\1~ \2') # - para.gsub!(/^<b>(Chapter|Article)(.+?)<\/b>/i,'4~ \1 \2') #watch case insensitivity - para.gsub!(/^<b>(Part|Section|Book)(.+?)<\/b>/i,'3~ \1 \2') #watch case insensitivity - para.gsub!(/^<b>(\d+\.\d+\.\d+\.?)(.+?)<\/b>/i,'6~ \1 \2') #numeric, decide what to do, can be different - para.gsub!(/^<b>(\d+\.\d+\.?)(.+?)<\/b>/i,'5~ \1 \2') #numeric, decide what to do, can be different - para.gsub!(/^<b>(\d+\.?)(.+?)<\/b>/i,'4~ \1 \2') #numeric, decide what to do, can be different - #<a name="ii"></a><B> - para.gsub!(/^(<a name=".+?">)(?:<small>)?<(?:b|strong)>\s*(.+?)\s*<\/(?:b|strong)>/i,'5~ \2 \1') #watch - para.gsub!(/^(<(a name|A NAME)=".+?">)(\s*|<\/[aA]>)?([A-Z][A-Z])+/,'5~ \2 \1') #watch - para.gsub!(/^(\s+|<p>)?(<a name=".+?">)(\s*|<\/a>)?<b>/i,'5~ \2 \1') #watch - para.gsub!(/<h([1-6])>\s*(.+?)\s*<\/h\1>\s*/i,'\1~ \2') # - para.gsub!(/^<b>\s*(.+?)<\/b>\s*(<\/i>\s*)?$/i,'4~ \1\2') # wish it all were less messy - para.gsub!(/^<i>\s*([^"(].+?)<\/i>\s*(<\/b>\s*)?$/i,'5~ \1\2') # wish it all were less messy - para.gsub!(/<\/?[biu]>/i,'') if para =~/[1-6]\{/ - para.gsub!(/<u>\s*(.+?)\s*<\/u>/i,'_{\1}_') - para.gsub!(/<(b|strong)>\s*(.+?)\s*<\/\1>/i,'*{\2}*') - para.gsub!(/<(i|em)>\s*(.+?)\s*<\/\1>/i,'/{\2}/') - para.gsub!(/<sup>\s*(.+?)\s*<\/sup>/i,'^{\1}^') - para.gsub!(/(([\/\*!_])\{.+?\}\2)\s\s+/i,'\1 ') - para.gsub!(/(([\/\*!_])\{.+?\}\2)\s+([.,;?\)])\s+/i,'\1\3 ') - para.gsub!(/(([\/\*!_])\{.+?\}\2)(["'])\s+/i,'\1\3 ') - para.gsub!(/(([\/\*!_])\{.+?\}\2)\s*([a-z0-9])/i,'\1 \3') - para.gsub!(/(([\/\*_])\{.+?\}\2)\s*([a-z0-9])/i,'\1 \3') - para.gsub!(/([a-z0-9])(([\/\*_])\{.+?\}\3)/i,' \1 \2') #eg this/{problem}/ - para.gsub!(/([\/\*_])\{([,.;; ]+)\}\1/i,'\2') #eg /{,}/ or *{ }* etc. - para.gsub!(/ \s+/i,' ') - #para.gsub!(/\/\{\*\{/i, '*{/{') - #para.gsub!(/\}\*\}\//i, '}/}*') - para.gsub!(/"/i,'"') - para.gsub!(/&/i,'and') - para.gsub!(/<!doctype html public .+/i,'') - para.gsub!(/<\/?(?:html|head|body|font|small)>/i,'') - para.gsub!(/<\/(?:title)>/i,'') - para.gsub!(/<title>/i,'#{~title? ') - para.gsub!(/<blockquote>(.+?)<\/blockquote>/mi,"\n\n_1 \\1\n\n") - para.gsub!(/<div align=.+?>|<\/div>|<font size=.+?>|<\/a><\/em><\/strong>/i,'') - para.gsub!(/~^\s+\.\s*/i,'.~^ ') #check vim equiv # %s/\~e\s\+\.\s*/.\~e /c - para.gsub!(/\s+~^\s+/i,'~^ ') - para.gsub!(/ \s+/i,' ') - para.gsub!(/\s+$/i,'') - para.gsub!(/^(?:<\/[bi]>)+$/i,'') - para.gsub!(/^(?:(?:<i>)+<b>|(?:<b>)+<i>)\s*([^"(].+?)/i,'5~ \1\2') # wish it all were less messy - para.gsub!(/^(?:<\/?(?:[ib]|em)>\s*)+$/i,'') # cleaning up left over <i> etc. - para.gsub!(/<(?:i|em)>\s*(.+)/i,'/{\1}/') # using up left over <i> - para.gsub!(/<b>\s*(.+)/i,'*{\1}*') # using up left over <b> - para.gsub!(/<dd>([\d.]+)/i,'5~ \1') - para.gsub!(/<dd>(?: )+([\d.]+)/i,'6~ \1') - para.gsub!(/<dd>(\([a-z]\))/i,'7~ \1') - para.gsub!(/^([1-9]~)( .+?)<a name="(\S+?)">(.+?)(<\/a>)/i,'\1\3\2\4') - para.gsub!(/^([1-9]~)( .+?)<a name="(\S+?)">/i,'\1\3\2') - para.gsub!(/http\/\/(\S+)/i,'http:\/\/\1') - para.gsub!(/\s*<a href="\S+?">(http:\/\/\S+?)<\/a>\s*/i,' \1 ') - para.gsub!(/([a-zA-Z.,!?;:])([*\/_-]\{)/,'\1 \2') - para.gsub!(/^\s*( ){10,12}/i,'_2 ') - para.gsub!(/^\s*( ){4,5}/i,'_1 ') - para.gsub!(/	/,' ') #check - ## glyphs & tildes - para.gsub!(/¡/, '¡') #'Inverted exclamation - para.gsub!(/¢/, '¢') #'Cent sign ¢ - para.gsub!(/£/, '£') #'Pound sign £ - para.gsub!(/¤/, '¤') #'General currency sign - para.gsub!(/¥/, '¥') #'Yen sign ¥ - para.gsub!(/¦/, '¦') #'Broken vertical bar - para.gsub!(/§/, '§') #'Section sign § - para.gsub!(/¨/, '¨') #'Umlaut - para.gsub!(/©/, '©') #'Copyright © - para.gsub!(/ª/, 'ª') #'Feminine ordinal ª - para.gsub!(/«/, '«') #'Left angle quote « - para.gsub!(/¬/, '¬') #'Not sign - para.gsub!(/­/, '') #'Soft hyphen - para.gsub!(/®/, '®') #'Registered trademark ® - para.gsub!(/¯/, '¯') #'Macron accent - para.gsub!(/°/, '°') #'Degree sign ° - para.gsub!(/&plusmin;/,'±') #'Plus or minus ± - para.gsub!(/²/, '²') #'Superscript 2 ² - para.gsub!(/³/, '³') #'Superscript 3 ³ - para.gsub!(/´/, '') #'Acute accent - para.gsub!(/µ/, 'µ') #'Micro sign (Greek mu) µ - para.gsub!(/¶/, '¶') #'Paragraph sign ¶ - para.gsub!(/·/, '·') #'Middle dot - para.gsub!(/¸/, '¸') #'Cedilla - para.gsub!(/¹/, '¹') #'Superscript 1 ¹ - para.gsub!(/º/, 'º') #'Masculine ordinal º - para.gsub!(/»/, '»') #'Right angle quote - para.gsub!(/¼/, '¼') #'Fraction one quarter ¼ - para.gsub!(/½/, '½') #'Fraction on half ½ - para.gsub!(/¾/, '¾') #'Fraction three quarters ¾ - para.gsub!(/¿/, '¿') #'Inverted question mark ¿ - para.gsub!(/À/, 'À') #'Capital A, grave accent À - para.gsub!(/Á/, 'Á') #'Capital A, acute accent Á - para.gsub!(/Â/, 'Â') #'Capital A, circumflex accent  - para.gsub!(/Ã/, 'Ã') #'Capital A, tilde à - para.gsub!(/Ä/, 'Ä') #'Capital A, umlaut Ä - para.gsub!(/Å/, 'Å') #'Capital A, ring Å - para.gsub!(/Æ/, 'Æ') #'Capital AE ligature Æ - para.gsub!(/Ç/, 'Ç') #'Capital C, cedilla Ç - para.gsub!(/È/, 'È') #'Capital E, grave accent È - para.gsub!(/É/, 'É') #'Capital E, acute accent É - para.gsub!(/Ê/, 'Ê') #'Capital E, circumflex accent Ê - para.gsub!(/Ë/, 'Ë') #'Capital E, umlaut Ë - para.gsub!(/Ì/, 'Ì') #'Capital I, grave accent Ì - para.gsub!(/Í/, 'Í') #'Capital I, acute accent Í - para.gsub!(/Î/, 'Î') #'Capital I, circumflex accent Î - para.gsub!(/Ï/, 'Ï') #'Capital I, umlaut Ï - para.gsub!(/Ð/, 'Ð') #'Capital eth, Icelandic - para.gsub!(/Ñ/, 'Ñ') #'Capital N, tilde Ñ - para.gsub!(/Ò/, 'Ò') #'Capital O, grave accent Ò - para.gsub!(/Ó/, 'Ó') #'Capital O, acute accent Ó - para.gsub!(/Ô/, 'Ô') #'Capital O, circumflex accent Ô - para.gsub!(/Õ/, 'Õ') #'Capital O, tilde Õ - para.gsub!(/Ö/, 'Ö') #'Capital O, umlaut Ö - para.gsub!(/×/, '×') #'Multiply sign × - para.gsub!(/Ø/, 'Ø') #'Capital O, slash Ø - para.gsub!(/Ù/, 'Ù') #'Capital U, grave accent Ù - para.gsub!(/Ú/, 'Ú') #'Capital U, acute accent Ú - para.gsub!(/Û/, 'Û') #'Capital U, circumflex accent Û - para.gsub!(/Ü/, 'Ü') #'Capital U, umlaut Ü - para.gsub!(/Ý/, 'Ý') #'Capital Y, acute accent Ý - para.gsub!(/Þ/, 'Þ') #'Capital thorn, Icelandic Þ - para.gsub!(/ß/, 'ß') #'Small sz ligature, German ß - para.gsub!(/à/, 'à') #'Small a, grave accent à - para.gsub!(/á/, 'á') #'Small a, acute accent á - para.gsub!(/â/, 'â') #'Small a, circumflex accent â - para.gsub!(/ã/, 'ã') #'Small a, tilde ã - para.gsub!(/ä/, 'ä') #'Small a, umlaut ä - para.gsub!(/å/, 'å') #'Small a, ring å - para.gsub!(/æ/, 'æ') #'Small ae ligature æ - para.gsub!(/ç/, 'ç') #'Small c, cedilla ç - para.gsub!(/è/, 'è') #'Small e, grave accent è - para.gsub!(/é/, 'é') #'Small e, acute accent é - para.gsub!(/ê/, 'ê') #'Small e, circumflex accent ê - para.gsub!(/ë/, 'ë') #'Small e, umlaut ë - para.gsub!(/ì/, 'ì') #'Small i, grave accent ì - para.gsub!(/í/, 'í') #'Small i, acute accent í - para.gsub!(/î/, 'î') #'Small i, circumflex accent î - para.gsub!(/ï/, 'ï') #'Small i, umlaut ï - para.gsub!(/ð/, 'ð') #'Small eth, Icelandic ð - para.gsub!(/ñ/, 'ñ') #'Small n, tilde ñ - para.gsub!(/ò/, 'ò') #'Small o, grave accent ò - para.gsub!(/ó/, 'ó') #'Small o, acute accent ó - para.gsub!(/ô/, 'ô') #'Small o, circumflex accent ô - para.gsub!(/õ/, 'õ') #'Small o, tilde õ - para.gsub!(/ö/, 'ö') #'Small o, umlaut ö - para.gsub!(/÷/, '÷') #'Divide sign ÷ - para.gsub!(/ø/, 'ø') #'Small o, slash ø - para.gsub!(/ù/, 'ù') #'Small u, grave accent ù - para.gsub!(/ú/, 'ú') #'Small u, acute accent ú - para.gsub!(/û/, 'û') #'Small u, circumflex accent û - para.gsub!(/ü/, 'ü') #'Small u, umlaut ü - para.gsub!(/ý/, 'ý') #'Small y, acute accent ý - para.gsub!(/þ/, 'þ') #'Small thorn, Icelandic þ - para.gsub!(/ÿ/, 'ÿ') #'Smally y, umlaut ÿ - ## - para.gsub!(/\s\s+/,' ') - para.gsub!(/\t+/,' ') - #para.gsub!(/ +/,' ') - #para.gsub!(/^(?:<(?:\/)?[bi]>)+$/i, '') - tuned_file << para unless para == nil - end - tuned_file - end - end - class Default < Html - def initialize(data, filename, instruct) - @data=data - @filename=filename - @instruct=instruct - end - def songsheet - data=@data - print "Convert to SiSU file from #{@filename}.html << gvim ,,#{@filename}.sst >\n" #: <<#{@@html_title}>> - data=Default.new(data.collect, @filename, @instruct).space_paragraphs - data=Default.new(data.collect, @filename, @instruct).multiline - data=Default.new(data.collect.join.split("\n\n"), @filename, @instruct).markup_rules - data=Default.new(data.collect, @filename, @instruct).markup_default - data=MyOutput.new(data.collect, @filename, @instruct).hardOutput - end - def markup_default - data=@data - tuned_file=Array.new - data.each do |para| - para.gsub!(/<i>(Id\.?)(\s|$)/i,'/\{\1\}\2/') - para.gsub!(/^(~\{\{ .+?)(<\/LI>\s*|<\/OL>\s*)+$/i,'\1') - para.gsub!(/\/\{Id\.\s*<\/LI>\s*\}\//i,'/{Id.}/') - tuned_file << para unless para == nil - end - tuned_file - end - end -end -def help - puts <<WOK -conversion program -initial SiSU markup from other file formats - - zxy_convert --word does initial conversion from word97 to sisu markup, expects [filename].doc (can also use --doc) - zxy_convert --html does initial conversion from html to sisu markup, expects [filename].html - zxy_convert --default does initial conversion from defalt html to sisu markup, expects [filename].html - -WOK -end -def do_word(argv, instruct) - argv.each do |f| - if f =~/.+?\.doc$/ - @argv << f[/(.+?)\.doc$/, 1] - else - print "not .doc? << #{f} >> " - end - end - @argv.each do |filename| - system(%{wvWare -x #{@dir.path.home}/.sisu/convert/wvSiSU.xml #{filename}.doc > #{filename}.wv}) - file_array=IO.readlines("#{filename}.wv", "") - CONVERT::WareWord97.new(file_array, filename, instruct).songsheet # metaverse created here - end -end -def do_html(argv, instruct) - argv.each do |f| - if f =~/.+?\.html?$/ - @argv << f[/(.+?)\.html?$/, 1] - else - print "not .html? << #{f} >> " - end - end - @argv.each do |filename| - file_end=if FileTest.file?("#{filename}.html") - 'html' - elsif FileTest.file?("#{filename}.htm") - 'htm' - end - file_array=IO.readlines("#{filename}.#{file_end}","\n\r") - CONVERT::Html.new(file_array,filename,instruct).songsheet # metaverse created here - end -end -def do_default(argv, instruct) - argv.each do |f| - if f =~/.+?\.html$/ - @argv << f[/(.+?)\.html$/, 1] - else - print "not .html? << #{f} >> " - end - end - @argv.each do |filename| - file_array=IO.readlines("#{filename}.html", "\n\r") - CONVERT::Default.new(file_array, filename, instruct).songsheet # metaverse created here - end -end -def cases(argv, instruct) - case instruct - when/^--(word(97)?|doc)$/i #creates minimal sisu_small.gz package to send - do_word(argv, instruct) - when/^--(html)$/i #creates sisu.gz package to send - do_html(argv, instruct) - when/^--(default)$/i #creates sisu.gz package to send - do_default(argv, instruct) - else - help - end -end -$KCODE='u' -branch='v2' -@argv=Array.new -argv=$* -SiSU_version_dir=(argv.inspect=~/--v1/) ? 'v1' : 'v2' -SiSU_lib="sisu/#{SiSU_version_dir}" -require "#{SiSU_lib}/sysenv" -include SiSU_Env -@dir=SiSU_Env::Info_env.new -instruct = "#{argv[0].to_s}" -argv.shift -instruct.chomp! -instruct = "help" if instruct.nil? or instruct == ""; -cases(argv, instruct) |