From fdd1489c82a274615e46e3c67fc5707e3fb0465f Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Mon, 9 Jul 2007 11:19:48 +0100 Subject: improved url matching, and texpdf tolerance and indentation levels set --- lib/sisu/v0/shared_xml.rb | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'lib/sisu/v0/shared_xml.rb') diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index bcdf44f5..995044db 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -347,8 +347,7 @@ module SiSU_XML_munge def markup(para='') wordlist=para.scan(/\S+|\n/) #\n needed for tables, check though added 2005w17 para=tidywords(wordlist).join(' ').strip - para.gsub!(/(^|\s+)<\s+/,'\1< ') - para.gsub!(/\s+>(\s+|$)/,' >\1') + para.gsub!(/(^|\s+)<\s+/,'\1< '); para.gsub!(/\s+>(\s+|$)/,' >\1') para.gsub!(/<:pb>\s*/,'') para.gsub!(/<+[-~]#>+/,'') para.gsub!(/<0;\w\d+;[um]\d+><#@dp:#@dp>/,'') @@ -357,8 +356,12 @@ module SiSU_XML_munge #para.gsub!(/^_\*\s+/,'* ') para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))\s+(\d+)x(\d+)(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1[\\2] \\5}) para.gsub!(/(^|\s)\{\s*(\S+?\.(?:jpg|png|gif))(\s+[^}]+)?\}(https?:\/\/\S+)/,%{\\1\\2}) - para.gsub!(/(^|\s)\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/,'\1\2\4') - para.gsub!(/(^|\s)(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/,%{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) + para.gsub!(/(^|\s)\{([^}]+)\}(https?:\/\/[^"><]+?)([,.:;"><]?(?:\s|$))/, + '\1\2\4') #watch, compare html_tune + para.gsub!(/(^|\s)((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/, + %{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) + para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #escaped urls not linked, deal with later + #para.gsub!(/(^|\s)[_\\]((?:https?|ftp):\/\/\S+?\.[^'"><\s]+?)([.,]?(?:\s|$))/,'\1\2\3') #escaped urls not linked, deal with later para.gsub!(/ /,' ') #clean para end -- cgit v1.2.3