diff options
Diffstat (limited to 'data/sisu/v1/conf/convert/sisu_convert')
-rw-r--r-- | data/sisu/v1/conf/convert/sisu_convert | 519 |
1 files changed, 519 insertions, 0 deletions
diff --git a/data/sisu/v1/conf/convert/sisu_convert b/data/sisu/v1/conf/convert/sisu_convert new file mode 100644 index 00000000..7be075fc --- /dev/null +++ b/data/sisu/v1/conf/convert/sisu_convert @@ -0,0 +1,519 @@ +#!/usr/bin/env ruby +# = sisu - SiSU information Structuring Universe +# +# Copyright (c) Ralph Amissah 1997,2004 +# +# Ralph Amissah mailto:ralph@amissah.com +# +# * Name: SiSU information Structuring Universe +# * Author: Ralph@Amissah.com +# * Description: document conversion tool, to sisu from other formats +# * arch-tag: document conversion tool to sisu markup +# * $Date: 2004/10/16 15:51:06 $ +# * $Id: sisu_convert,v 1.37 2004/10/16 15:51:06 ralph Exp $ +# * License: GPL 3 or later +# * Notes: word conversion uses wvWare and wvSiSU.xml (a modified/stripped wvHtml.xml) +# * http://wvware.sourceforge.net/ +# * http://sourceforge.net/projects/wvware +# * <url:sisu.lnk>|sisu.lnk|@|^| +# * <url:sisu> +# * <url:zxy_param.rb>|zxy_param.rb|@|^| +module CONVERT + class MyOutput + def initialize(data, filename, instruct) + @data=data.compact + @filename=filename + @instruct=instruct + end + def headerBasic + <<WOK +@title: + +@subtitle: + +@creator: + +@type: + +@subject: + +@date: + +@date.available: + +@publisher: SiSU + +@rights: + +@level: + +WOK + end + def headerDefault + <<WOK +@title: + +@subtitle: + +@creator: + +@type: + +@subject: + +@date: + +@date.available: + +@publisher: SiSU + +@rights: + +WOK + end + def hardOutput + pre = Array.new + case @instruct + when /default/ + pre << headerDefault + else + pre << headerBasic + end + @filename_wv=File.new(%{,,#{@filename}.sst},'w+') + @filename_wv << pre + @data.each do |x| + y = x.split("\n") + y.each do |z| # cleaner output this way + z.strip! + @filename_wv.puts "#{z}\n\n" unless z =~/^$/ + end + end + end + end + class WareWord97 + def initialize(data, filename, instruct) + @data=data + @filename=filename + @instruct=instruct + end + def songsheet + data=@data + print "Convert to SiSU file from Word97 << gvim ,,#{@filename}.sst >\n" #: <<#{@@html_title}>> + data=WareWord97.new(data.collect,@filename,@instruct).strip + data=WareWord97.new(data.collect,@filename,@instruct).strip + data=WareWord97.new(data.collect,@filename,@instruct).markup_rules + data=MyOutput.new(data.collect,@filename,@instruct).hardOutput + end + def strip + data=@data + tuned_file=Array.new + endnote_no=1 + data.each do |para| + para.strip! + para.gsub!(/<u>\s*<\/u>/,'') + para.gsub!(/<\/u>\s*<u>/,'') + para.gsub!(/<b>\s*<\/b>/,'') + para.gsub!(/<\/b>\s*<b>/,'') + para.gsub!(/<i>\s*<\/i>/,'') + para.gsub!(/<\/i>\s*<i>/,'') + tuned_file << para unless para == nil + end + tuned_file + end + def markup_rules + data=@data + tuned_file=Array.new + endnote_no=1 + data.each do |para| + para.strip! + para.gsub!(/\s+/,' ') + para.gsub!(/^<b>(Chapter|Article)(.+?)<\/b>/i,'4~ \1 \2') #watch case insensitivity + para.gsub!(/^<b>(Part|Section|Book)(.+?)<\/b>/i,'3~ \1 \2') #watch case insensitivity + para.gsub!(/^<b>(\d+\.\d+\.\d+\.?)(.+?)<\/b>/i,'6~ \1 \2') #numeric, decide what to do, can be different + para.gsub!(/^<b>(\d+\.\d+\.?)(.+?)<\/b>/i,'5~ \1 \2') #numeric, decide what to do, can be different + para.gsub!(/^<b>(\d.+?)<\/b>/i,'4~ \1 \2') #numeric, decide what to do, can be different + #para.gsub!(/^<b>([\d.]+?)<\/b>/i,'4~ \1 \2') #numeric, decide what to do, can be different + para.gsub!(/<u>(.+?)<\/u>/,'_{\1}_') + para.gsub!(/<b>(.+?)<\/b>/,'*{\1}*') + para.gsub!(/<i>(.+?)<\/i>/,'/{\1}/') + tuned_file << para unless para == nil + end + tuned_file + end + end + class Html + def initialize(data, filename, instruct) + @data=data + @filename=filename + @instruct=instruct + end + def songsheet + data=@data + print "Convert to SiSU file from #{@filename}.html << gvim ,,#{@filename}.sst >\n" #: <<#{@@html_title}>> + #data=Html.new(data.collect, @filename, @instruct).space_paragraphs + #data=Html.new(data.split(''), @filename, @instruct).space_paragraphs + data=Html.new(data.join.split(/\n\n+/), @filename, @instruct).space_paragraphs + #data=Html.new(data.split("\n"), @filename, @instruct).space_paragraphs + #data=Html.new(data.collect.join.split("\n"), @filename, @instruct).space_paragraphs + data=Html.new(data.collect, @filename, @instruct).multiline + data=Html.new(data.collect.join.split("\n\n"), @filename, @instruct).markup_rules + data=MyOutput.new(data.collect, @filename, @instruct).hardOutput + end + def space_paragraphs + #data=@data.join.split(/\n/) + data=@data + #p data.length + tuned_file=Array.new + data.each do |para| + para.strip! + para.gsub!(/\r/,'') + #para.gsub!(/\n/, ' ') #PROBLEM, serious time issues on a few files also for \n (or multiline matches which is less surprising), edit out if necessary + para.gsub!(/<\/?p>/i,'zZz') + para.gsub!(/<\/?\s*p(?:\s+ALIGN=.+?)?>/i,'zZz') #all manner of <p> para.gsub!(/<\/?p>/i, "\n\n") + para.gsub!(/<p\s+(class|align).+?>/i,'zZz') # + para.gsub!(/<\/p>/i,'zZz') # repeat actually + para.gsub!(/<(?:dir|tr|br)>/i,'zZz') # + #para.gsub!(/<(?:\/\s*)?(?:dir|tr|br)>/i, "zZz") # + para.gsub!(/(<\/center>)/i,'\1zZz') + para.gsub!(/(<\/h[1-6]>)/i,'\1zZz') + para.gsub!(/ \s+/i,' ') + para.gsub!(/(?:\s*zZz\s*)+/i,'zZz') # + tuned_file << para unless para == nil + end + tuned_file + end + def blockquotes(sub='') # SERIOUS PROBLEM INTRODUCED, some blockquotes go missing !, quite unacceptable, debug, for now not used + res=Array.new + sub.each do |x| + if x=~/(<\/blockquote>)/i + m = $1 + res << x[/(.+?)#{m}/mi,1].gsub!(/zZz/,'zZz_1 ') if x =~/.+?#{m}/mi + res << x[/#{m}(.+)/mi,1] + else + res << x #[/(.+)/mi,1] + end + end + res.join + end + def multiline + data=@data + tuned_file=Array.new + data.each do |para| + para.gsub!(/\n/,' ') + para.gsub!(/ \s+/mi,' ') + #ALL HERE could be very time EXPENSIVE but tamed? compromise ... /mi + para.gsub!(/<([biu]|h[1-6])>(?:zZz)?([^<]+)?zZz(.+?)<\/\1>/i,'zZz<\1>\2 \3</\1>') + para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)(?:<\/center>)?zZz(.+?)?<\/\1>/i,'zZz<\1>\2 \3</\1>') + #para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)<\/center>zZz(.+?)?<\/\1>/i,'zZz<\1>\2 \3</\1>') + para.gsub!(/<([biu]|h[1-6])>(?:<center>|zZz)+(.+?)<\/\1>/i,'zZz<\1>\2</\1>') + para.gsub!(/<(h[1-6])>(.+?)(?:<center>|zZz)+<\/\1>/i,'zZz<\1>\2</\1>zZz') #does catch some h1, h2 etc, too expensive to have biu + #para.gsub!(/<([biu]|h[1-6])>(.+?)(?:<center>|zZz)+<\/\1>/i,'zZz<\1>\2 \3</\1>') #may go too far? useful for h1 h2 etc, remove biu? + #para.gsub!(/<([biu]|h[1-6])>([^<]+)?zZz(.+?)<\/\1>/i,'zZz<\1>\2 \3</\1>') + #para.gsub!(/<([biu]|h[1-6])>([^<]+)?zZz(.+?)<\/\1>/i,'zZz<\1>\2 \3</\1>') + ### SERIOUS PROBLEM INTRODUCED + # sub = para.split(/<blockquote>/i) + # para = blockquotes(sub) if sub.length > 0 #check was on >1 could have serious repercussions 2004w29 + para.gsub!(/zZz(\s*zZz)*/,"\n\n") + tuned_file << para << "\n\n" unless para == nil + end + tuned_file + end + def markup_rules + @@flag_blockquote=false + data=@data + tuned_file=Array.new + data.each do |para| + if para=~/<a href="(http:\/\/.+?)">/i + #p para.grep(/<a href="(http:\/\/.+?)">/i) + #m=$1 + #para.gsub!(/(?:<\s*)?<a href="#{m}">#{m}<\/a>(?:\s*>)?\.?/i, "#{m}") + para.gsub!(/(?:<\s*)?<a href="(http:\/\/.+?)">http:\/\/.+?<\/a>(?:\s*>)?\.?/i,'\1') #risk that url & url are not to match + #para.gsub!(/(?:<\s*)?<a href="(\w+\.html)">(http:\/\/.+?\/\1)<\/a>(?:\s*>)?\.?/i, "\\2") #does not match + end + if para=~/<BLOCKQUOTE>/i + @@flag_blockquote=true + end + if @@flag_blockquote + para.gsub!(/^/,'_1 ') unless para.empty? or para =~/^\s*<\/?blockquote?>\s*$/i + end + if para=~/<\/BLOCKQUOTE>/i + @@flag_blockquote=false + end + para.gsub!(/<\/?blockquote?>/i,'') + ### clean + para.gsub!(/^\s+/i,'') + para.gsub!(/<([bui]|em|su[pb])>\s*<\/\1>/i,'') + para.gsub!(/<\/?center>/i,'') + para.gsub!(/\s*<\/dir>/i,'') + para.gsub!(/<hr>/i,'') + para.gsub!(/\s*<a href=".+?\.html#(?:[a-z_]+)?(?:[a-z0-9_-]|\*)+">\[(\*+)\]<\/a>/i,'^{[\1]}^ ') #other endnote marker + para.gsub!(/<a href=".+?\.html#(?:[a-z_$]+)?[0-9_-]+"(?:\s+name=".+?")?>\[[a-z]?\d+\](?:<\/[bi]>)?<\/a>/i,'~^ ') #endnote marker + para.gsub!(/<a name=".+?"\s+href=".+?\.html#(?:[a-z_$]+)?[0-9_-]+"?>\[[a-z]?\d+\](?:<\/[bi]>)?<\/a>/i,'~^ ') #endnote marker + para.gsub!(/<a name="(?:[a-z$]+)?[0-9_-]+">\s*(<\/a>)?\s*\d+\.?\s*(<\/a>)?\s*/i,'^~ ') #endnote + #para.gsub!(/<h([1-6])>\s*(.+?)\s*<\/h\1>\s*/i,'\1~ \2') # + para.gsub!(/<h([1-6])(?: align=.+?)?>\s*(.+?)\s*<\/h\1>\s*/i,'\1~ \2') # + para.gsub!(/^<b>(Chapter|Article)(.+?)<\/b>/i,'4~ \1 \2') #watch case insensitivity + para.gsub!(/^<b>(Part|Section|Book)(.+?)<\/b>/i,'3~ \1 \2') #watch case insensitivity + para.gsub!(/^<b>(\d+\.\d+\.\d+\.?)(.+?)<\/b>/i,'6~ \1 \2') #numeric, decide what to do, can be different + para.gsub!(/^<b>(\d+\.\d+\.?)(.+?)<\/b>/i,'5~ \1 \2') #numeric, decide what to do, can be different + para.gsub!(/^<b>(\d+\.?)(.+?)<\/b>/i,'4~ \1 \2') #numeric, decide what to do, can be different + #<a name="ii"></a><B> + para.gsub!(/^(<a name=".+?">)(?:<small>)?<(?:b|strong)>\s*(.+?)\s*<\/(?:b|strong)>/i,'5~ \2 \1') #watch + para.gsub!(/^(<(a name|A NAME)=".+?">)(\s*|<\/[aA]>)?([A-Z][A-Z])+/,'5~ \2 \1') #watch + para.gsub!(/^(\s+|<p>)?(<a name=".+?">)(\s*|<\/a>)?<b>/i,'5~ \2 \1') #watch + para.gsub!(/<h([1-6])>\s*(.+?)\s*<\/h\1>\s*/i,'\1~ \2') # + para.gsub!(/^<b>\s*(.+?)<\/b>\s*(<\/i>\s*)?$/i,'4~ \1\2') # wish it all were less messy + para.gsub!(/^<i>\s*([^"(].+?)<\/i>\s*(<\/b>\s*)?$/i,'5~ \1\2') # wish it all were less messy + para.gsub!(/<\/?[biu]>/i,'') if para =~/[1-6]\{/ + para.gsub!(/<u>\s*(.+?)\s*<\/u>/i,'_{\1}_') + para.gsub!(/<(b|strong)>\s*(.+?)\s*<\/\1>/i,'*{\2}*') + para.gsub!(/<(i|em)>\s*(.+?)\s*<\/\1>/i,'/{\2}/') + para.gsub!(/<sup>\s*(.+?)\s*<\/sup>/i,'^{\1}^') + para.gsub!(/(([\/\*!_])\{.+?\}\2)\s\s+/i,'\1 ') + para.gsub!(/(([\/\*!_])\{.+?\}\2)\s+([.,;?\)])\s+/i,'\1\3 ') + para.gsub!(/(([\/\*!_])\{.+?\}\2)(["'])\s+/i,'\1\3 ') + para.gsub!(/(([\/\*!_])\{.+?\}\2)\s*([a-z0-9])/i,'\1 \3') + para.gsub!(/(([\/\*_])\{.+?\}\2)\s*([a-z0-9])/i,'\1 \3') + para.gsub!(/([a-z0-9])(([\/\*_])\{.+?\}\3)/i,' \1 \2') #eg this/{problem}/ + para.gsub!(/([\/\*_])\{([,.;; ]+)\}\1/i,'\2') #eg /{,}/ or *{ }* etc. + para.gsub!(/ \s+/i,' ') + #para.gsub!(/\/\{\*\{/i, '*{/{') + #para.gsub!(/\}\*\}\//i, '}/}*') + para.gsub!(/"/i,'"') + para.gsub!(/&/i,'and') + para.gsub!(/<!doctype html public .+/i,'') + para.gsub!(/<\/?(?:html|head|body|font|small)>/i,'') + para.gsub!(/<\/(?:title)>/i,'') + para.gsub!(/<title>/i,'#{~title? ') + para.gsub!(/<blockquote>(.+?)<\/blockquote>/mi,"\n\n_1 \\1\n\n") + para.gsub!(/<div align=.+?>|<\/div>|<font size=.+?>|<\/a><\/em><\/strong>/i,'') + para.gsub!(/~^\s+\.\s*/i,'.~^ ') #check vim equiv # %s/\~e\s\+\.\s*/.\~e /c + para.gsub!(/\s+~^\s+/i,'~^ ') + para.gsub!(/ \s+/i,' ') + para.gsub!(/\s+$/i,'') + para.gsub!(/^(?:<\/[bi]>)+$/i,'') + para.gsub!(/^(?:(?:<i>)+<b>|(?:<b>)+<i>)\s*([^"(].+?)/i,'5~ \1\2') # wish it all were less messy + para.gsub!(/^(?:<\/?(?:[ib]|em)>\s*)+$/i,'') # cleaning up left over <i> etc. + para.gsub!(/<(?:i|em)>\s*(.+)/i,'/{\1}/') # using up left over <i> + para.gsub!(/<b>\s*(.+)/i,'*{\1}*') # using up left over <b> + para.gsub!(/<dd>([\d.]+)/i,'5~ \1') + para.gsub!(/<dd>(?: )+([\d.]+)/i,'6~ \1') + para.gsub!(/<dd>(\([a-z]\))/i,'7~ \1') + para.gsub!(/^([1-9]~)( .+?)<a name="(\S+?)">(.+?)(<\/a>)/i,'\1\3\2\4') + para.gsub!(/^([1-9]~)( .+?)<a name="(\S+?)">/i,'\1\3\2') + para.gsub!(/http\/\/(\S+)/i,'http:\/\/\1') + para.gsub!(/\s*<a href="\S+?">(http:\/\/\S+?)<\/a>\s*/i,' \1 ') + para.gsub!(/([a-zA-Z.,!?;:])([*\/_-]\{)/,'\1 \2') + para.gsub!(/^\s*( ){10,12}/i,'_2 ') + para.gsub!(/^\s*( ){4,5}/i,'_1 ') + para.gsub!(/	/,' ') #check + ## glyphs & tildes + para.gsub!(/¡/, '¡') #'Inverted exclamation + para.gsub!(/¢/, '¢') #'Cent sign ¢ + para.gsub!(/£/, '£') #'Pound sign £ + para.gsub!(/¤/, '¤') #'General currency sign + para.gsub!(/¥/, '¥') #'Yen sign ¥ + para.gsub!(/¦/, '¦') #'Broken vertical bar + para.gsub!(/§/, '§') #'Section sign § + para.gsub!(/¨/, '¨') #'Umlaut + para.gsub!(/©/, '©') #'Copyright © + para.gsub!(/ª/, 'ª') #'Feminine ordinal ª + para.gsub!(/«/, '«') #'Left angle quote « + para.gsub!(/¬/, '¬') #'Not sign + para.gsub!(/­/, '') #'Soft hyphen + para.gsub!(/®/, '®') #'Registered trademark ® + para.gsub!(/¯/, '¯') #'Macron accent + para.gsub!(/°/, '°') #'Degree sign ° + para.gsub!(/&plusmin;/,'±') #'Plus or minus ± + para.gsub!(/²/, '²') #'Superscript 2 ² + para.gsub!(/³/, '³') #'Superscript 3 ³ + para.gsub!(/´/, '') #'Acute accent + para.gsub!(/µ/, 'µ') #'Micro sign (Greek mu) µ + para.gsub!(/¶/, '¶') #'Paragraph sign ¶ + para.gsub!(/·/, '·') #'Middle dot + para.gsub!(/¸/, '¸') #'Cedilla + para.gsub!(/¹/, '¹') #'Superscript 1 ¹ + para.gsub!(/º/, 'º') #'Masculine ordinal º + para.gsub!(/»/, '»') #'Right angle quote + para.gsub!(/¼/, '¼') #'Fraction one quarter ¼ + para.gsub!(/½/, '½') #'Fraction on half ½ + para.gsub!(/¾/, '¾') #'Fraction three quarters ¾ + para.gsub!(/¿/, '¿') #'Inverted question mark ¿ + para.gsub!(/À/, 'À') #'Capital A, grave accent À + para.gsub!(/Á/, 'Á') #'Capital A, acute accent Á + para.gsub!(/Â/, 'Â') #'Capital A, circumflex accent  + para.gsub!(/Ã/, 'Ã') #'Capital A, tilde à + para.gsub!(/Ä/, 'Ä') #'Capital A, umlaut Ä + para.gsub!(/Å/, 'Å') #'Capital A, ring Å + para.gsub!(/Æ/, 'Æ') #'Capital AE ligature Æ + para.gsub!(/Ç/, 'Ç') #'Capital C, cedilla Ç + para.gsub!(/È/, 'È') #'Capital E, grave accent È + para.gsub!(/É/, 'É') #'Capital E, acute accent É + para.gsub!(/Ê/, 'Ê') #'Capital E, circumflex accent Ê + para.gsub!(/Ë/, 'Ë') #'Capital E, umlaut Ë + para.gsub!(/Ì/, 'Ì') #'Capital I, grave accent Ì + para.gsub!(/Í/, 'Í') #'Capital I, acute accent Í + para.gsub!(/Î/, 'Î') #'Capital I, circumflex accent Î + para.gsub!(/Ï/, 'Ï') #'Capital I, umlaut Ï + para.gsub!(/Ð/, 'Ð') #'Capital eth, Icelandic + para.gsub!(/Ñ/, 'Ñ') #'Capital N, tilde Ñ + para.gsub!(/Ò/, 'Ò') #'Capital O, grave accent Ò + para.gsub!(/Ó/, 'Ó') #'Capital O, acute accent Ó + para.gsub!(/Ô/, 'Ô') #'Capital O, circumflex accent Ô + para.gsub!(/Õ/, 'Õ') #'Capital O, tilde Õ + para.gsub!(/Ö/, 'Ö') #'Capital O, umlaut Ö + para.gsub!(/×/, '×') #'Multiply sign × + para.gsub!(/Ø/, 'Ø') #'Capital O, slash Ø + para.gsub!(/Ù/, 'Ù') #'Capital U, grave accent Ù + para.gsub!(/Ú/, 'Ú') #'Capital U, acute accent Ú + para.gsub!(/Û/, 'Û') #'Capital U, circumflex accent Û + para.gsub!(/Ü/, 'Ü') #'Capital U, umlaut Ü + para.gsub!(/Ý/, 'Ý') #'Capital Y, acute accent Ý + para.gsub!(/Þ/, 'Þ') #'Capital thorn, Icelandic Þ + para.gsub!(/ß/, 'ß') #'Small sz ligature, German ß + para.gsub!(/à/, 'à') #'Small a, grave accent à + para.gsub!(/á/, 'á') #'Small a, acute accent á + para.gsub!(/â/, 'â') #'Small a, circumflex accent â + para.gsub!(/ã/, 'ã') #'Small a, tilde ã + para.gsub!(/ä/, 'ä') #'Small a, umlaut ä + para.gsub!(/å/, 'å') #'Small a, ring å + para.gsub!(/æ/, 'æ') #'Small ae ligature æ + para.gsub!(/ç/, 'ç') #'Small c, cedilla ç + para.gsub!(/è/, 'è') #'Small e, grave accent è + para.gsub!(/é/, 'é') #'Small e, acute accent é + para.gsub!(/ê/, 'ê') #'Small e, circumflex accent ê + para.gsub!(/ë/, 'ë') #'Small e, umlaut ë + para.gsub!(/ì/, 'ì') #'Small i, grave accent ì + para.gsub!(/í/, 'í') #'Small i, acute accent í + para.gsub!(/î/, 'î') #'Small i, circumflex accent î + para.gsub!(/ï/, 'ï') #'Small i, umlaut ï + para.gsub!(/ð/, 'ð') #'Small eth, Icelandic ð + para.gsub!(/ñ/, 'ñ') #'Small n, tilde ñ + para.gsub!(/ò/, 'ò') #'Small o, grave accent ò + para.gsub!(/ó/, 'ó') #'Small o, acute accent ó + para.gsub!(/ô/, 'ô') #'Small o, circumflex accent ô + para.gsub!(/õ/, 'õ') #'Small o, tilde õ + para.gsub!(/ö/, 'ö') #'Small o, umlaut ö + para.gsub!(/÷/, '÷') #'Divide sign ÷ + para.gsub!(/ø/, 'ø') #'Small o, slash ø + para.gsub!(/ù/, 'ù') #'Small u, grave accent ù + para.gsub!(/ú/, 'ú') #'Small u, acute accent ú + para.gsub!(/û/, 'û') #'Small u, circumflex accent û + para.gsub!(/ü/, 'ü') #'Small u, umlaut ü + para.gsub!(/ý/, 'ý') #'Small y, acute accent ý + para.gsub!(/þ/, 'þ') #'Small thorn, Icelandic þ + para.gsub!(/ÿ/, 'ÿ') #'Smally y, umlaut ÿ + ## + para.gsub!(/\s\s+/,' ') + para.gsub!(/\t+/,' ') + #para.gsub!(/ +/,' ') + #para.gsub!(/^(?:<(?:\/)?[bi]>)+$/i, '') + tuned_file << para unless para == nil + end + tuned_file + end + end + class Default < Html + def initialize(data, filename, instruct) + @data=data + @filename=filename + @instruct=instruct + end + def songsheet + data=@data + print "Convert to SiSU file from #{@filename}.html << gvim ,,#{@filename}.sst >\n" #: <<#{@@html_title}>> + data=Default.new(data.collect, @filename, @instruct).space_paragraphs + data=Default.new(data.collect, @filename, @instruct).multiline + data=Default.new(data.collect.join.split("\n\n"), @filename, @instruct).markup_rules + data=Default.new(data.collect, @filename, @instruct).markup_default + data=MyOutput.new(data.collect, @filename, @instruct).hardOutput + end + def markup_default + data=@data + tuned_file=Array.new + data.each do |para| + para.gsub!(/<i>(Id\.?)(\s|$)/i,'/\{\1\}\2/') + para.gsub!(/^(~\{\{ .+?)(<\/LI>\s*|<\/OL>\s*)+$/i,'\1') + para.gsub!(/\/\{Id\.\s*<\/LI>\s*\}\//i,'/{Id.}/') + tuned_file << para unless para == nil + end + tuned_file + end + end +end +def help + puts <<WOK +conversion program +initial SiSU markup from other file formats + + zxy_convert --word does initial conversion from word97 to sisu markup, expects [filename].doc (can also use --doc) + zxy_convert --html does initial conversion from html to sisu markup, expects [filename].html + zxy_convert --default does initial conversion from defalt html to sisu markup, expects [filename].html + +WOK +end +def do_word(argv, instruct) + argv.each do |f| + if f =~/.+?\.doc$/ + @argv << f[/(.+?)\.doc$/, 1] + else + print "not .doc? << #{f} >> " + end + end + @argv.each do |filename| + system(%{wvWare -x #{@dir.path.home}/.sisu/convert/wvSiSU.xml #{filename}.doc > #{filename}.wv}) + file_array=IO.readlines("#{filename}.wv", "") + CONVERT::WareWord97.new(file_array, filename, instruct).songsheet # metaverse created here + end +end +def do_html(argv, instruct) + argv.each do |f| + if f =~/.+?\.html$/ + @argv << f[/(.+?)\.html$/, 1] + else + print "not .html? << #{f} >> " + end + end + @argv.each do |filename| + file_array=IO.readlines("#{filename}.html","\n\r") + CONVERT::Html.new(file_array,filename,instruct).songsheet # metaverse created here + end +end +def do_default(argv, instruct) + argv.each do |f| + if f =~/.+?\.html$/ + @argv << f[/(.+?)\.html$/, 1] + else + print "not .html? << #{f} >> " + end + end + @argv.each do |filename| + file_array=IO.readlines("#{filename}.html", "\n\r") + CONVERT::Default.new(file_array, filename, instruct).songsheet # metaverse created here + end +end +def cases(argv, instruct) + case instruct + when/^--(word(97)?|doc)$/i #creates minimal sisu_small.gz package to send + do_word(argv, instruct) + when/^--(html)$/i #creates sisu.gz package to send + do_html(argv, instruct) + when/^--(default)$/i #creates sisu.gz package to send + do_default(argv, instruct) + else + help + end +end +$KCODE='u' +branch='v0' +SiSU_lib="sisu/#{branch}" +require "#{SiSU_lib}/sysenv" +include SiSU_Env +@dir=SiSU_Env::Info_env.new +@argv=Array.new +argv=$* +instruct = "#{argv[0].to_s}" +argv.shift +instruct.chomp! +instruct = "help" if instruct.nil? or instruct == ""; +cases(argv, instruct) |