diff options
Diffstat (limited to 'lib/sisu/v0/texpdf_format.rb')
| -rw-r--r-- | lib/sisu/v0/texpdf_format.rb | 472 | 
1 files changed, 329 insertions, 143 deletions
| diff --git a/lib/sisu/v0/texpdf_format.rb b/lib/sisu/v0/texpdf_format.rb index 03bdd184..9e7fccde 100644 --- a/lib/sisu/v0/texpdf_format.rb +++ b/lib/sisu/v0/texpdf_format.rb @@ -284,6 +284,7 @@ WOK        @dp=@@dp ||=SiSU_Env::Info_env.new.digest.pattern        @tx=SiSU_Env::Get_init.instance.tex        @url_brace=SiSU_Viz::Skin.new.url_decoration +      @tex2pdf=@@tex3pdf ||=SiSU_Env::System_call.new.tex2pdf_engine      end      def longtable_landscape        @end_table='\end{longtable}' @@ -432,14 +433,14 @@ WOK        end        @string      end -    def special_characters_1(para)             # ~ ^ $ & % _ { }  #LaTeX special characters - KEEP list +    def pdftex_special_characters_1(string)             # ~ ^ $ & % _ { }  #LaTeX special characters - KEEP list        #p @@utf_8.list        #@string=Iconv.conv('ISO-8859-1', 'UTF-8', @string) -      word=@string.scan(/\S+|\n/) #unless line =~/^(?:0~\S|%+\s)/ +      word=string.scan(/\S+|\n/) #unless line =~/^(?:0~\S|%+\s)/        para_array=[] -      if word +      string=if word          word.each do |w| # _ - / # | : ! ^ ~ -          unless para =~/^(?:0~|%+ |<!Th?¡ )/um +          unless string =~/^(?:0~|%+ |<!Th?¡ )/um              w.gsub!(/[\\]?~/,'<=tilde>') unless w=~/^[1-6]~|~\{|\}~|~\[|\]~|^\^~\s|~\^|\*~\S+|~#|\{t~|<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/              w.gsub!(/&#(?:126|152);/,'<=tilde>') #126 usual              #w.gsub!(/&#(?:126|152);/,'<=tilde>') unless w=~/https?:\/\/\S+/ #126 usual @@ -447,162 +448,334 @@ WOK            end            para_array << w          end -        para=para_array.join(' ') -        @string=para.strip +        string=para_array.join(' ') +        string=string.strip +        string +      else ''        end -      @string.gsub!(/<~\d+;(?:\w|[0-6]:)\d+;[umdv]\d+><#@dp:#@dp>/,'') -      @string.gsub!(/.+?<-#>/,'') -      @string.gsub!(/<EOF>/,'') -      @string.gsub!(/<ENDNOTES?>/,'') +      string.gsub!(/<~\d+;(?:\w|[0-6]:)\d+;[umdv]\d+><#@dp:#@dp>/,'') +      string.gsub!(/.+?<-#>/,'') +      string.gsub!(/<EOF>/,'') +      string.gsub!(/<ENDNOTES?>/,'')        #problem sequence -> -      @string.gsub!(/&(?:nbsp);/,'<=hardspace>')                                 # < SiSU special character also LaTeX -      @string.gsub!(/&(?:lt|#060);/,'<=lt>')                                     # < SiSU special character also LaTeX -      @string.gsub!(/&(?:gt|#062);/,'<=gt>')                                     # > SiSU special character also LaTeX -      @string.gsub!(/{/,'<=curlyopen>')                                     # { SiSU special character also LaTeX -      @string.gsub!(/}/,'<=curlyclose>')                                    # } SiSU special character also LaTeX -      @string.gsub!(/&#(?:126|152);/,'<=tilde>')                                 # ~ SiSU special character also LaTeX -      @string.gsub!(/#/,'\#')                                               # # SiSU special character also LaTeX -      @string.gsub!(/!/,'!')                                                # ! SiSU not really special sisu character but done, also LaTeX -      @string.gsub!(/*/,'*')                                                # * should you wish to escape astrisk e.g. describing \*{bold}* -      @string.gsub!(/-/,'-')                                                # - SiSU special character also LaTeX -      @string.gsub!(/+/,'+')                                                # + SiSU special character also LaTeX -      @string.gsub!(/,/,',')                                                # + SiSU special character also LaTeX -      @string.gsub!(/&/,'<=amp>') #unless @string=~/<:code>/                   # / SiSU special character also LaTeX -      @string.gsub!(///,'<=slash>')                                         # / SiSU special character also LaTeX -      @string.gsub!(/\/,'<=backslash>')                                     # \ SiSU special character also LaTeX -      @string.gsub!(/_/,'<=underscore>')                                    # _ SiSU special character also LaTeX -      @string.gsub!(/|/,'|')                                                # | SiSU not really special sisu character but done, also LaTeX -      @string.gsub!(/:/,':')                                                # : SiSU not really special sisu character but done, also LaTeX -      @string.gsub!(/^|\^/,'<=caret>')                                      # ^ SiSU not really special sisu character but done, also LaTeX -      @string.gsub!(/\#/,'<=hash>') +      string.gsub!(/&(?:nbsp);/,'<=hardspace>')                                 # < SiSU special character also LaTeX +      string.gsub!(/&(?:lt|#060);/,'<=lt>')                                     # < SiSU special character also LaTeX +      string.gsub!(/&(?:gt|#062);/,'<=gt>')                                     # > SiSU special character also LaTeX +      string.gsub!(/{/,'<=curlyopen>')                                     # { SiSU special character also LaTeX +      string.gsub!(/}/,'<=curlyclose>')                                    # } SiSU special character also LaTeX +      string.gsub!(/&#(?:126|152);/,'<=tilde>')                                 # ~ SiSU special character also LaTeX +      string.gsub!(/#/,'\#')                                               # # SiSU special character also LaTeX +      string.gsub!(/!/,'!')                                                # ! SiSU not really special sisu character but done, also LaTeX +      string.gsub!(/*/,'*')                                                # * should you wish to escape astrisk e.g. describing \*{bold}* +      string.gsub!(/-/,'-')                                                # - SiSU special character also LaTeX +      string.gsub!(/+/,'+')                                                # + SiSU special character also LaTeX +      string.gsub!(/,/,',')                                                # + SiSU special character also LaTeX +      string.gsub!(/&/,'<=amp>') #unless @string=~/<:code>/                   # / SiSU special character also LaTeX +      string.gsub!(///,'<=slash>')                                         # / SiSU special character also LaTeX +      string.gsub!(/\/,'<=backslash>')                                     # \ SiSU special character also LaTeX +      string.gsub!(/_/,'<=underscore>')                                    # _ SiSU special character also LaTeX +      string.gsub!(/|/,'|')                                                # | SiSU not really special sisu character but done, also LaTeX +      string.gsub!(/:/,':')                                                # : SiSU not really special sisu character but done, also LaTeX +      string.gsub!(/^|\^/,'<=caret>')                                      # ^ SiSU not really special sisu character but done, also LaTeX +      string.gsub!(/\#/,'<=hash>')        ##watch placement, problem sequence ^ -      @string.gsub!(/<sup><font face=symbol>&atild;<\/font><\/sup>/,' ') -      @string.gsub!(/<:pb>/,'\newpage') -      @string.gsub!(/<:pn>/,'\clearpage') -      @string.gsub!(/\\copy(right|mark)?/,'<=copymark>') # ok problem with superscript -    end -    def special_characters_2(para) -      @string.gsub!(/œ/,'\oe ') -      @string.gsub!(/\$/,'\$') -      @string.gsub!(/\#/,'\#') -      @string.gsub!(/\%/,'\%') -      @string.gsub!(/\~/,'\~') #revist, should not be necessary to mark remaining tildes -      if @string !~/^\s*<:image|\}:image\s/ -        @string.gsub!(/_/,'\_') +      string.gsub!(/<sup><font face=symbol>&atild;<\/font><\/sup>/,' ') +      string.gsub!(/<:pb>/,'\newpage') +      string.gsub!(/<:pn>/,'\clearpage') +      string.gsub!(/\\copy(right|mark)?/,'<=copymark>') # ok problem with superscript +      string +    end +    def pdftex_special_characters_2(string) +      string.gsub!(/œ/,'\oe ') +      string.gsub!(/\$/,'\$') +      string.gsub!(/\#/,'\#') +      string.gsub!(/\%/,'\%') +      string.gsub!(/\~/,'\~') #revist, should not be necessary to mark remaining tildes +      if string !~/^\s*<:image|\}:image\s/ +        string.gsub!(/_/,'\_')        end -      @string.gsub!(/\{/,'\{') -      @string.gsub!(/\}/,'\}') -      @string.gsub!(/ /,'~') # ~ character for hardspace +      string.gsub!(/\{/,'\{') +      string.gsub!(/\}/,'\}') +      string.gsub!(/ /,'~') # ~ character for hardspace        # sequence important must appear after removal of { and } -      @string.gsub!(/&\S+?;/,'') #hmmm +      string.gsub!(/&\S+?;/,'') #hmmm        # sequence imortant place before removal of & -      if @string=~/<:code>/;        @@flag_code=true -      elsif @string=~/<:code-end>/; @@flag_code=false +      if string=~/<:code>/;        @@flag_code=true +      elsif string=~/<:code-end>/; @@flag_code=false        end -      if @@flag_code; @string.gsub!(/&/,'{\\\&}') -      else @string.gsub!(/(\s+&\s+)/,' and ') +      if @@flag_code; string.gsub!(/&/,'{\\\&}') +      else string.gsub!(/(\s+&\s+)/,' and ')        end -      @string.gsub!(/§/u,'\S') #latex: space between next character not preserved? #@string.gsub!(/§ /,'\S ') -      @string.gsub!(/£/u,'\pounds') -      @string.gsub!(/&\S+?;/,' ') -      @string.gsub!(/<a href=".+?">/,' ') -      @string.gsub!(/<\/a>/,' ') -      @string.gsub!(/[^\}>_]((?:https?|file|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case -      @string.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url -      @string.gsub!(/\B(?:\\_|\\)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration +      string.gsub!(/§/u,'\S') #latex: space between next character not preserved? #string.gsub!(/§ /,'\S ') +      string.gsub!(/£/u,'\pounds') +      string.gsub!(/&\S+?;/,' ') +      string.gsub!(/<a href=".+?">/,' ') +      string.gsub!(/<\/a>/,' ') +      string.gsub!(/[^\}>_]((?:https?|file|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case +      string.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url +      string.gsub!(/\B(?:\\_|\\)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration        unless @@flag_code -        @string.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration <url> positive lookahead, sequence issue with { linked }http://url cannot use \b at start +        string.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration <url> positive lookahead, sequence issue with { linked }http://url cannot use \b at start        else #code-block: angle brackets special characters, note _ already escaped -        @string.gsub!(/\\_</,'{\UseTextSymbol{OML}{<}}') -        @string.gsub!(/\\_>/,'{\UseTextSymbol{OML}{>}}') +        string.gsub!(/\\_</,'{\UseTextSymbol{OML}{<}}') +        string.gsub!(/\\_>/,'{\UseTextSymbol{OML}{>}}')        end -      @string.gsub!(/<:ee>/,'') -      @string.gsub!(/<!>/,' ') +      string.gsub!(/<:ee>/,'') +      string.gsub!(/<!>/,' ')        #proposed change, insert, but may be redundant -      @string.gsub!(/ \/><:i[12]>(.+?)(?:\}~|<br)/,' \begin{ParagraphIndent}{0.01\columnwidth}\1\end{ParagraphIndent} ') # footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder -      @string.gsub!(/<(br|p)>|<\/\s*(br|p)>|<(br|p)\s*\/>/," #{@@tex_backslash*2} ") # Work Area -      @string.gsub!(/<b>(.+?)<\/b>/,'\begin{bfseries}\1 \end{bfseries}') -      @string.gsub!(/<em>(.+?)<\/em>/,'\begin{bfseries}\1 \end{bfseries}') -      @string.gsub!(/<(bold|strong)>(.+?)<\/(bold|strong)>/,'\begin{bfseries}\1 \end{bfseries}') -      @string.gsub!(/<h\d+>(.+?)<\/h\d+>/,'\begin{bfseries}\1 \end{bfseries}') -      @string.gsub!(/<i>(.+?)<\/i>/,'\emph{\1}') -      @string.gsub!(/<italic>(.+?)<\/italic>/,'\emph{\1}') -      @string.gsub!(/<u>(.+?)<\/u>/,'\uline{\1}') # ulem -      @string.gsub!(/<cite>(.+?)<\/cite>/,"``\\1''") # quote -      @string.gsub!(/<ins>(.+?)<\/ins>/,'\uline{\1}') # ulem -      @string.gsub!(/<del>(.+?)<\/del>/,'\sout{\1}') # ulem -      @string.gsub!(/<sub>(.+?)<\/sub>/,"\$_{\\textrm{\\1}}\$") -      @string.gsub!(/<sup>(.+?)<\/sup>/,"\$^{\\textrm{\\1}}\$") +      string.gsub!(/ \/><:i[12]>(.+?)(?:\}~|<br)/,' \begin{ParagraphIndent}{0.01\columnwidth}\1\end{ParagraphIndent} ') # footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder +      string.gsub!(/<(br|p)>|<\/\s*(br|p)>|<(br|p)\s*\/>/," #{@@tex_backslash*2} ") # Work Area +      string.gsub!(/<b>(.+?)<\/b>/,'\begin{bfseries}\1 \end{bfseries}') +      string.gsub!(/<em>(.+?)<\/em>/,'\begin{bfseries}\1 \end{bfseries}') +      string.gsub!(/<(bold|strong)>(.+?)<\/(bold|strong)>/,'\begin{bfseries}\1 \end{bfseries}') +      string.gsub!(/<h\d+>(.+?)<\/h\d+>/,'\begin{bfseries}\1 \end{bfseries}') +      string.gsub!(/<i>(.+?)<\/i>/,'\emph{\1}') +      string.gsub!(/<italic>(.+?)<\/italic>/,'\emph{\1}') +      string.gsub!(/<u>(.+?)<\/u>/,'\uline{\1}') # ulem +      string.gsub!(/<cite>(.+?)<\/cite>/,"``\\1''") # quote +      string.gsub!(/<ins>(.+?)<\/ins>/,'\uline{\1}') # ulem +      string.gsub!(/<del>(.+?)<\/del>/,'\sout{\1}') # ulem +      string.gsub!(/<sub>(.+?)<\/sub>/,"\$_{\\textrm{\\1}}\$") +      string.gsub!(/<sup>(.+?)<\/sup>/,"\$^{\\textrm{\\1}}\$")        unless @@flag_code -        @string.gsub!(/"(.+?)"/,"``\\1''")  # quote marks / quotations open & close " need condition exclude for code -        @string.gsub!(/\s+"/,' ``')                                          # open " -        @string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*"/,'\1``')       # open " -        @string.gsub!(/"(\s|\.|,|:|;)/,"''\\1")                              # close " -        @string.gsub!(/"([1-6-]#{@@tilde}\S*|<.+?>)?\s*$/,"''\\1")      # close " -        @string.gsub!(/"(\.|,)/,"''")                                        # close " -        @string.gsub!(/\s+'/,' `')                                           # open ' -        @string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*'/,'\1`')        # open ' +        string.gsub!(/"(.+?)"/,'“\1”')  # quote marks / quotations open & close " need condition exclude for code +        string.gsub!(/\s+"/,' “')                                # open " +        string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*"/,'\1“')  # open " +        string.gsub!(/"(\s|\.|,|:|;)/,'”\1')                     # close " +        string.gsub!(/"([1-6-]#{@@tilde}\S*|<.+?>)?\s*$/,'”\1')  # close " +        string.gsub!(/"(\.|,)/,'”')                              # close " +        string.gsub!(/\s+'/,' `')                                # open ' +        string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*'/,'\1`')  # open '        end -      @string.gsub!(/^(<:i[1-9]>)?\s*\\_\*\s*/,'\1 \begin{math} \bullet \end{math}~~') #bullets - added 2004w17 watch \\_ -      @string.gsub!(/(<font.*?>|<\/font>)/,'') -      @string.gsub!(/\s*<sup>(\S+?)<\/sup>/,'^\1') -      @string.gsub!(/(<sup>|<\/sup>)/,'') -      @string +      string.gsub!(/^(<:i[1-9]>)?\s*\\_\*\s*/,'\1 \begin{math} \bullet \end{math}~~') #bullets - added 2004w17 watch \\_ +      string.gsub!(/(<font.*?>|<\/font>)/,'') +      string.gsub!(/\s*<sup>(\S+?)<\/sup>/,'^\1') +      string.gsub!(/(<sup>|<\/sup>)/,'') +      string +    end +    def pdftex_special_characters_3(string) +      string.gsub!(/<br(\s*[^\/][^>])/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder +      string.gsub!(/([^<][^b][^r]\s+)\/>/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder +      #problem sequence (another kludge) -> +      string.gsub!(/<=lt>/,'{\UseTextSymbol{OML}{<}}') +      string.gsub!(/<=gt>/,'{\UseTextSymbol{OML}{>}}') +      #string.gsub!(/<=lt>/,'\<') +      #string.gsub!(/<=gt>/,'\>') +      string.gsub!(/<=underscore>/,'\_') +      string.gsub!(/(\href\{http:\/\/\S+?)(?:(?:<=tilde>)(\S+))+\}/,'\1\~\2}') #tildes in urls \href treated differently from text +      string.gsub!(/<=tilde>/,'{\~~}') +      string.gsub!(/<=pipe>/,'{\textbar}') +      string.gsub!(/<=caret>/,'{\^{~}}') +      #string.gsub!(/<=caret>/,'\^{}') +      string.gsub!(/<=exclaim>/,'\Verbatim{!}') +      string.gsub!(/<=hash>/,'{\#}') +      #string.gsub!(/<=hash>/,'{\UseTextSymbol{OT1}{#}}') +      #string.gsub!(/<=slash>/,'{\slash}') +      string.gsub!(/<=hardspace>/,'{~}') #changed ... 2005 +      string.gsub!(/<=amp>/,'{\\\&}') #changed ... 2005 +      #string.gsub!(/<=amp>/,'{\UseTextSymbol{OT1}{&}}') +      string.gsub!(/<=slash>/,'{/}') +      string.gsub!(/<=backslash>/,'{\textbackslash}') +      #string.gsub!(/<=asterisk>/,'*') +      #string.gsub!(/<=exclaim>/,'!') +      #string.gsub!(/<=asterisk>/,'{\ast}') +      #string.gsub!(/<=copymark>/,"^{\\copyright} ") # watch has been problematic +      #copymark='{\\begin{small}\\raisebox{1ex}{\\copyright}\\end{small}} ' +      string.gsub!(/<=copymark>\s*(.+)?\s+(<\\~\d+;\w(?:[0-6]:)?\d+;\w\d+><#@dp:#@dp>)/,"^\\copyright \\textnormal{\\1} \\2") # watch likely to be problematic +      string      end -    def special_characters_3(para) -      @string.gsub!(/<br(\s*[^\/][^>])/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder -      @string.gsub!(/([^<][^b][^r]\s+)\/>/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder +    def xetex_special_characters_1(string)             # ~ ^ $ & % _ { }  #LaTeX special characters - KEEP list +      #p @@utf_8.list +      #string=Iconv.conv('ISO-8859-1', 'UTF-8', @string) +      word=string.scan(/\S+|\n/) #unless line =~/^(?:0~\S|%+\s)/ +      para_array=[] +      string=if word +        word.each do |w| # _ - / # | : ! ^ ~ +          unless string =~/^(?:0~|%+ |<!Th?¡ )/um +            w.gsub!(/[\\]?~/,'<=tilde>') unless w=~/^[1-6]~|~\{|\}~|~\[|\]~|^\^~\s|~\^|\*~\S+|~#|\{t~|<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/ +            w.gsub!(/&#(?:126|152);/,'<=tilde>') #126 usual +            #w.gsub!(/&#(?:126|152);/,'<=tilde>') unless w=~/https?:\/\/\S+/ #126 usual +            w.gsub!(/\\?\|||/,'<=pipe>') #unless w=~/<~\d+;(?:[ohmu]|[0-6]:)\d+;\w\d+>/ # | SiSU not really special sisu character but done, also LaTeX +          end +          para_array << w +        end +        string=para_array.join(' ') +        string=string.strip +        string +      else '' +      end +      string.gsub!(/<~\d+;(?:\w|[0-6]:)\d+;[umdv]\d+><#@dp:#@dp>/,'') +      string.gsub!(/.+?<-#>/,'') +      string.gsub!(/<EOF>/,'') +      string.gsub!(/<ENDNOTES?>/,'') +      #problem sequence -> +      string.gsub!(/&(?:nbsp);/,'<=hardspace>')                                 # < SiSU special character also LaTeX +      string.gsub!(/&(?:lt|#060);/,'<=lt>')                                     # < SiSU special character also LaTeX +      string.gsub!(/&(?:gt|#062);/,'<=gt>')                                     # > SiSU special character also LaTeX +      string.gsub!(/{/,'<=curlyopen>')                                     # { SiSU special character also LaTeX +      string.gsub!(/}/,'<=curlyclose>')                                    # } SiSU special character also LaTeX +      string.gsub!(/&#(?:126|152);/,'<=tilde>')                                 # ~ SiSU special character also LaTeX +      string.gsub!(/#/,'\#')                                               # # SiSU special character also LaTeX +      string.gsub!(/!/,'!')                                                # ! SiSU not really special sisu character but done, also LaTeX +      string.gsub!(/*/,'*')                                                # * should you wish to escape astrisk e.g. describing \*{bold}* +      string.gsub!(/-/,'-')                                                # - SiSU special character also LaTeX +      string.gsub!(/+/,'+')                                                # + SiSU special character also LaTeX +      string.gsub!(/,/,',')                                                # + SiSU special character also LaTeX +      string.gsub!(/&/,'<=amp>') #unless @string=~/<:code>/                   # / SiSU special character also LaTeX +      string.gsub!(///,'<=slash>')                                         # / SiSU special character also LaTeX +      string.gsub!(/\/,'<=backslash>')                                     # \ SiSU special character also LaTeX +      string.gsub!(/_/,'<=underscore>')                                    # _ SiSU special character also LaTeX +      string.gsub!(/|/,'|')                                                # | SiSU not really special sisu character but done, also LaTeX +      string.gsub!(/:/,':')                                                # : SiSU not really special sisu character but done, also LaTeX +      string.gsub!(/^|\^/,'<=caret>')                                      # ^ SiSU not really special sisu character but done, also LaTeX +      string.gsub!(/\#/,'<=hash>') +      ##watch placement, problem sequence ^ +      string.gsub!(/<sup><font face=symbol>&atild;<\/font><\/sup>/,' ') +      string.gsub!(/<:pb>/,'\newpage') +      string.gsub!(/<:pn>/,'\clearpage') +      string.gsub!(/\\copy(right|mark)?/,'<=copymark>') # ok problem with superscript +      string +    end +    def xetex_special_characters_2(string) +      string.gsub!(/œ/,'\oe ') +      string.gsub!(/\$/,'\$') +      string.gsub!(/\#/,'\#') +      string.gsub!(/\%/,'\%') +      string.gsub!(/\~/,'\~') #revist, should not be necessary to mark remaining tildes +      if string !~/^\s*<:image|\}:image\s/ +        string.gsub!(/_/,'\_') +      end +      string.gsub!(/\{/,'\{') +      string.gsub!(/\}/,'\}') +      string.gsub!(/ /,'~') # ~ character for hardspace +      # sequence important must appear after removal of { and } +      string.gsub!(/&\S+?;/,'') #hmmm +      # sequence imortant place before removal of & +      if string=~/<:code>/;        @@flag_code=true +      elsif string=~/<:code-end>/; @@flag_code=false +      end +      if @@flag_code; string.gsub!(/&/,'{\\\&}') +      else string.gsub!(/(\s+&\s+)/,' and ') +      end +      string.gsub!(/§/u,'\S') #latex: space between next character not preserved? #string.gsub!(/§ /,'\S ') +      string.gsub!(/£/u,'\pounds') +      string.gsub!(/&\S+?;/,' ') +      string.gsub!(/<a href=".+?">/,' ') +      string.gsub!(/<\/a>/,' ') +      string.gsub!(/[^\}>_]((?:https?|file|ftp):\/\/\S+?)(<\/\S>)/,' \begin{scriptsize}\href{\1}{\1} \end{scriptsize}\2') #special case +      string.gsub!(/((?:^|\s)[}])((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\1\begin{scriptsize}\\href{\2}{\2}\end{scriptsize}\3') #special case \{ e.g. \}http://url +      string.gsub!(/\B(?:\\_|\\)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/,'\begin{scriptsize}\\href{\1}{\1}\end{scriptsize}\2') #specially escaped url no decoration +      unless @@flag_code +        string.gsub!(/(^|\s)((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?=\s|$))/,"\\1#{@url_brace.tex_open}\\begin{scriptsize}\\href{\\2}{\\2}\\end{scriptsize}#{@url_brace.tex_close}\\3") #url matching with decoration <url> positive lookahead, sequence issue with { linked }http://url cannot use \b at start +      else #code-block: angle brackets special characters, note _ already escaped +        string.gsub!(/\\_</,'{\UseTextSymbol{OML}{<}}') +        string.gsub!(/\\_>/,'{\UseTextSymbol{OML}{>}}') +      end +      string.gsub!(/<:ee>/,'') +      string.gsub!(/<!>/,' ') +      #proposed change, insert, but may be redundant +      string.gsub!(/ \/><:i[12]>(.+?)(?:\}~|<br)/,' \begin{ParagraphIndent}{0.01\columnwidth}\1\end{ParagraphIndent} ') # footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder +      string.gsub!(/<(br|p)>|<\/\s*(br|p)>|<(br|p)\s*\/>/," #{@@tex_backslash*2} ") # Work Area +      string.gsub!(/<b>(.+?)<\/b>/,'\begin{bfseries}\1 \end{bfseries}') +      string.gsub!(/<em>(.+?)<\/em>/,'\begin{bfseries}\1 \end{bfseries}') +      string.gsub!(/<(bold|strong)>(.+?)<\/(bold|strong)>/,'\begin{bfseries}\1 \end{bfseries}') +      string.gsub!(/<h\d+>(.+?)<\/h\d+>/,'\begin{bfseries}\1 \end{bfseries}') +      string.gsub!(/<i>(.+?)<\/i>/,'\emph{\1}') +      string.gsub!(/<italic>(.+?)<\/italic>/,'\emph{\1}') +      string.gsub!(/<u>(.+?)<\/u>/,'\uline{\1}') # ulem +      string.gsub!(/<cite>(.+?)<\/cite>/,"``\\1''") # quote +      string.gsub!(/<ins>(.+?)<\/ins>/,'\uline{\1}') # ulem +      string.gsub!(/<del>(.+?)<\/del>/,'\sout{\1}') # ulem +      string.gsub!(/<sub>(.+?)<\/sub>/,"\$_{\\textrm{\\1}}\$") +      string.gsub!(/<sup>(.+?)<\/sup>/,"\$^{\\textrm{\\1}}\$") +      unless @@flag_code +        string.gsub!(/"(.+?)"/,'“\1”')  # quote marks / quotations open & close " need condition exclude for code +        string.gsub!(/\s+"/,' “')                                # open " +        string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*"/,'\1“')  # open " +        string.gsub!(/"(\s|\.|,|:|;)/,'”\1')                     # close " +        string.gsub!(/"([1-6-]#{@@tilde}\S*|<.+?>)?\s*$/,'”\1')  # close " +        string.gsub!(/"(\.|,)/,'”')                              # close " +        string.gsub!(/\s+'/,' `')                                # open ' +        string.gsub!(/^([1-6-]#{@@tilde}\S*|<.+?>)?\s*'/,'\1`')  # open ' +      end +      #string.gsub!(/^(<:i[1-9]>)?\s*\\_\*\s*/,'\1 \begin{math} \bullet \end{math}~~') #bullets - added 2004w17 watch \\_ +      string.gsub!(/^(<:i[1-9]>)?\s*\\_\*\s*/,'\1 ● ~~') +      string.gsub!(/(<font.*?>|<\/font>)/,'') +      string.gsub!(/\s*<sup>(\S+?)<\/sup>/,'^\1') +      string.gsub!(/(<sup>|<\/sup>)/,'') +      string +    end +    def xetex_special_characters_3(string) +      string.gsub!(/<br(\s*[^\/][^>])/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder +      string.gsub!(/([^<][^b][^r]\s+)\/>/,'\1') # clean up, incredibly messy :-( footnote indents, problems if match exists in ordinary paragraphs? check! Work Area 200501 a bit tricky as must be able to match multiple times, and to clean remainder        #problem sequence (another kludge) -> -      @string.gsub!(/<=lt>/,'{\UseTextSymbol{OML}{<}}') -      @string.gsub!(/<=gt>/,'{\UseTextSymbol{OML}{>}}') -      #@string.gsub!(/<=lt>/,'\<') -      #@string.gsub!(/<=gt>/,'\>') -      @string.gsub!(/<=underscore>/,'\_') -      @string.gsub!(/(\href\{http:\/\/\S+?)(?:(?:<=tilde>)(\S+))+\}/,'\1\~\2}') #tildes in urls \href treated differently from text -      @string.gsub!(/<=tilde>/,'{\~~}') -      @string.gsub!(/<=pipe>/,'{\textbar}') -      @string.gsub!(/<=caret>/,'{\^{~}}') -      #@string.gsub!(/<=caret>/,'\^{}') -      @string.gsub!(/<=exclaim>/,'\Verbatim{!}') -      @string.gsub!(/<=hash>/,'{\#}') -      #@string.gsub!(/<=hash>/,'{\UseTextSymbol{OT1}{#}}') -      #@string.gsub!(/<=slash>/,'{\slash}') -      @string.gsub!(/<=hardspace>/,'{~}') #changed ... 2005 -      @string.gsub!(/<=amp>/,'{\\\&}') #changed ... 2005 -      #@string.gsub!(/<=amp>/,'{\UseTextSymbol{OT1}{&}}') -      @string.gsub!(/<=slash>/,'{/}') -      @string.gsub!(/<=backslash>/,'{\textbackslash}') -      #@string.gsub!(/<=asterisk>/,'*') -      #@string.gsub!(/<=exclaim>/,'!') -      #@string.gsub!(/<=asterisk>/,'{\ast}') -      #@string.gsub!(/<=copymark>/,"^{\\copyright} ") # watch has been problematic +      string.gsub!(/<=lt>/,'{\UseTextSymbol{OML}{<}}') +      string.gsub!(/<=gt>/,'{\UseTextSymbol{OML}{>}}') +      #string.gsub!(/<=lt>/,'\<') +      #string.gsub!(/<=gt>/,'\>') +      string.gsub!(/<=underscore>/,'\_') +      string.gsub!(/(\href\{http:\/\/\S+?)(?:(?:<=tilde>)(\S+))+\}/,'\1\~\2}') #tildes in urls \href treated differently from text +      string.gsub!(/<=tilde>/,'{\~~}') +      string.gsub!(/<=pipe>/,'{\textbar}') +      string.gsub!(/<=caret>/,'{\^{~}}') +      #string.gsub!(/<=caret>/,'\^{}') +      string.gsub!(/<=exclaim>/,'\Verbatim{!}') +      string.gsub!(/<=hash>/,'{\#}') +      #string.gsub!(/<=hash>/,'{\UseTextSymbol{OT1}{#}}') +      #string.gsub!(/<=slash>/,'{\slash}') +      string.gsub!(/<=hardspace>/,'{~}') #changed ... 2005 +      string.gsub!(/<=amp>/,'{\\\&}') #changed ... 2005 +      #string.gsub!(/<=amp>/,'{\UseTextSymbol{OT1}{&}}') +      string.gsub!(/<=slash>/,'{/}') +      string.gsub!(/<=backslash>/,'{\textbackslash}') +      #string.gsub!(/<=asterisk>/,'*') +      #string.gsub!(/<=exclaim>/,'!') +      #string.gsub!(/<=asterisk>/,'{\ast}') +      #string.gsub!(/<=copymark>/,"^{\\copyright} ") # watch has been problematic        #copymark='{\\begin{small}\\raisebox{1ex}{\\copyright}\\end{small}} ' -      @string.gsub!(/<=copymark>\s*(.+)?\s+(<\\~\d+;\w(?:[0-6]:)?\d+;\w\d+><#@dp:#@dp>)/,"^\\copyright \\textnormal{\\1} \\2") # watch likely to be problematic -      @string +      string.gsub!(/<=copymark>\s*(.+)?\s+(<\\~\d+;\w(?:[0-6]:)?\d+;\w\d+><#@dp:#@dp>)/,"^\\copyright \\textnormal{\\1} \\2") # watch likely to be problematic +      string      end -    def special_characters_curly(para) -      @string.gsub!(/<=curlyopen>/,'\{') -      @string.gsub!(/<=curlyclose>/,'\}') -      @string +    def special_characters_curly(string) +      string.gsub!(/<=curlyopen>/,'\{') +      string.gsub!(/<=curlyclose>/,'\}') +      string      end -    def special_characters_unsafe_1(para) #depreciated, make obsolete + + +    def special_characters_unsafe_1(string) #depreciated, make obsolete        # some substitutions are sequence sensitive, rearrange with care. -      @string.gsub!(/\\backslash (copyright|clearpage|newpage)/,"\\\\\\1")  #kludge bad solution, find out where tail is sent through specChar ! -    end -    def special_characters_unsafe_2(para) -    end -    def special_characters_unsafe_3(para) +      string.gsub!(/\\backslash (copyright|clearpage|newpage)/,"\\\\\\1")  #kludge bad solution, find out where tail is sent through specChar ! +      string      end      def special_characters                                                       #special characters - some substitutions are sequence sensitive, rearrange with care. -      special_characters_1(@string) -      special_characters_unsafe_1(@string) -      special_characters_2(@string) -      special_characters_3(@string) +      string=@string +      case @tex2pdf +      when /pdf/ +        string=pdftex_special_characters_1(string) unless string.nil? +        string=special_characters_unsafe_1(string) unless string.nil? #pdftex_special_characters_unsafe_1(@string) +        string=pdftex_special_characters_2(string) unless string.nil? +        string=pdftex_special_characters_3(string) unless string.nil? +      when /xe/ +        string=xetex_special_characters_1(string) unless string.nil? +        string=special_characters_unsafe_1(string) unless string.nil? #xetex_special_characters_unsafe_1(@string) +        string=xetex_special_characters_2(string) unless string.nil? #issues with xetex +        string=xetex_special_characters_3(string) unless string.nil? +      end +      @string=string      end      def special_characters_safe                                                  #special characters - some substitutions are sequence sensitive, rearrange with care. -      special_characters_1(@string) -      special_characters_2(@string) -      #special_characters_3(@string) +      string=@string +      case @tex2pdf +      when /pdf/ +        string=pdftex_special_characters_1(@string) unless string.nil? +        string=pdftex_special_characters_2(@string) unless string.nil? +        #special_characters_3(@string) +      when /xe/ +        string=xetex_special_characters_1(@string) unless string.nil? +        string=xetex_special_characters_2(@string) unless string.nil? # remove this to start with, causes issues +      end +      @string=string      end      def heading_major(para,lev)        title=@md.title @@ -947,17 +1120,27 @@ WOK        end      end      def tex_head_encode -      case @md.file_encoding -      when /iso-?8859/i                                                         #% iso8859 -      <<WOK -\\usepackage[latin1]{inputenc} +      case @tex2pdf +      when /xe/ +        <<WOK +\\usepackage{babel} +\\usepackage{ucs} +\\usepackage{fontspec} +\\usepackage{xunicode}  WOK -      else                                                                      #% utf-8 assumed -      <<WOK +      when /pdf/ +        if @md.file_encoding =~ /iso-?8859/i                                                         #% iso8859 +        <<WOK +% \\usepackage[latin1]{inputenc} +\\usepackage{fontspec} +WOK +        else                                                                      #% utf-8 assumed +        <<WOK  \\usepackage{babel}  \\usepackage{ucs}  \\usepackage[utf8x]{inputenc}  WOK +        end        end      end      def tex_head_info @@ -1099,7 +1282,7 @@ WOK  \\usepackage{url}  \\usepackage{alltt}  \\usepackage{thumbpdf} -\\usepackage[pdftex, +\\usepackage[#{@tex2pdf},    #{color.strip}    pdftitle={#@string1},  %  pdftitle={Untitled}, @@ -1125,6 +1308,9 @@ WOK    pdfstartview=FitH  ]  {hyperref} +%% trace lost characters +% \\tracinglostchars = 1 +% \\tracingonline = 1  \\usepackage[usenames]{color}  \\definecolor{myblack}{rgb}{0,0,0}  \\definecolor{myred}{rgb}{0.75,0,0} | 
