From 9432d0345a982f2c64c684cc913b704cca5660f5 Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Fri, 5 Sep 2008 01:24:49 -0400 Subject: character given to represent non-break space in internal code (dal, constants and affected downstream code) --- lib/sisu/v0/constants.rb | 2 +- lib/sisu/v0/dal.rb | 18 +++++++------- lib/sisu/v0/dal_doc_str_code.rb | 2 +- lib/sisu/v0/dal_syntax.rb | 45 +++++++++++++++++----------------- lib/sisu/v0/db_import.rb | 2 +- lib/sisu/v0/html_tune.rb | 1 + lib/sisu/v0/manpage.rb | 6 ++--- lib/sisu/v0/odf.rb | 7 +----- lib/sisu/v0/shared_xml.rb | 8 +++--- lib/sisu/v0/spell.rb | 3 ++- lib/sisu/v0/sst_do_inline_footnotes.rb | 2 +- lib/sisu/v0/sst_to_s_xml_dom.rb | 2 +- lib/sisu/v0/texinfo_format.rb | 3 +-- lib/sisu/v0/texpdf_format.rb | 8 +++--- lib/sisu/v0/wikispeak.rb | 2 +- 15 files changed, 54 insertions(+), 57 deletions(-) diff --git a/lib/sisu/v0/constants.rb b/lib/sisu/v0/constants.rb index 2c08bdfa..96878fcf 100644 --- a/lib/sisu/v0/constants.rb +++ b/lib/sisu/v0/constants.rb @@ -90,6 +90,7 @@ Mx[:gl_bullet]= "#{Mx[:gl_o]}●#{Mx[:gl_c]}" #non substantive text sort: <-#> <~#> Mx[:pa_non_object_dummy_heading]="#{Mx[:pa_o]}-##{Mx[:pa_c]}" #unnumbered paragraph, delete when not required [used in dummy headings, eg. for segmented html] (place marker at end of paragraph) Mx[:pa_non_object_no_heading]="#{Mx[:pa_o]}~##{Mx[:pa_c]}" #unnumbered paragraph (place marker at end of paragraph) +Mx[:nbsp]= '▭ ' Mx[:br_line]= "#{Mx[:mk_o]}br#{Mx[:mk_c]}" Mx[:br_paragraph]= "#{Mx[:mk_o]}br#{Mx[:mk_c]}" Mx[:br_nl]= "#{Mx[:mk_o]}nl#{Mx[:mk_c]}" @@ -172,4 +173,3 @@ check: ▶this is text or an image◀http:// ◀this is text or an image▶ http:// - diff --git a/lib/sisu/v0/dal.rb b/lib/sisu/v0/dal.rb index 6bfbd76e..25b7528e 100644 --- a/lib/sisu/v0/dal.rb +++ b/lib/sisu/v0/dal.rb @@ -407,14 +407,14 @@ module SiSU_DAL if describe tuned_file_tmp << if @u.remote #to double space <:br> at beginning of entry if describe =~/^~\^ / - "     {#{describe} }#{@u.remote}/#{url_dir}/#{o_f} " + "#{Mx[:nbsp]*4} {#{describe} }#{@u.remote}/#{url_dir}/#{o_f} " else - "     { #{describe} }#{@u.remote}/#{url_dir}/#{o_f} " + "#{Mx[:nbsp]*4} { #{describe} }#{@u.remote}/#{url_dir}/#{o_f} " end else if describe =~/^~\^ / - "     {#{describe} }../#{url_dir}/#{o_f} " - else "     { #{describe} }../#{url_dir}/#{o_f} " + "#{Mx[:nbsp]*4} {#{describe} }../#{url_dir}/#{o_f} " + else "#{Mx[:nbsp]*4} { #{describe} }../#{url_dir}/#{o_f} " end end end @@ -428,13 +428,13 @@ module SiSU_DAL if describe tuned_file_tmp << if @u.remote x=if describe =~/zip/ - "     {#{describe} }#{@u.src_pod}/#{o_f} " - else "     {#{describe} }#{@u.src_txt}/#{o_f} " + "#{Mx[:nbsp]*4} {#{describe} }#{@u.src_pod}/#{o_f} " + else "#{Mx[:nbsp]*4} {#{describe} }#{@u.src_txt}/#{o_f} " end else x=if describe =~/zip/ - "     { #{describe} }../pod/#{o_f} " - else "     { #{describe} }../zip/#{o_f} " + "#{Mx[:nbsp]*4} { #{describe} }../pod/#{o_f} " + else "#{Mx[:nbsp]*4} { #{describe} }../zip/#{o_f} " end end end @@ -1137,7 +1137,7 @@ module SiSU_DAL #% same as db clean --> s=s.gsub(/(.+?)<\/del>/,'DELETED(\1)') # deletions s=s.gsub(/(\d+)<\/sup>/,'[\1]') - s=s.gsub(/(?: \\;)+/,' ') + s=s.gsub(/(?:#{Mx[:nbsp]})+/,' ') #s=s.gsub(//,"[TABLE]\n") # tables #s=s.gsub(//,'\1') # tables #s=s.gsub(/¡¡\d+¡/,' ') # tables diff --git a/lib/sisu/v0/dal_doc_str_code.rb b/lib/sisu/v0/dal_doc_str_code.rb index 82ada040..27e20d51 100644 --- a/lib/sisu/v0/dal_doc_str_code.rb +++ b/lib/sisu/v0/dal_doc_str_code.rb @@ -170,7 +170,7 @@ module SiSU_document_structure_code data.each do |line| if line =~/\S/ \ and line !~/^code\{|^\}code|#{Mx[:gr_o]}code.+/ - line.gsub!(/\s\s/,'  ') + line.gsub!(/\s\s/,"#{Mx[:nbsp]}#{Mx[:nbsp]}") line.gsub!(/^/,"#{Mx[:gr_o]}codeline#{Mx[:gr_c]}") if type=='code' # try sort for texpdf special case if line =~/(?:https?|file|ftp):\/\/\S+$/ line.gsub!(/$/," #{Mx[:br_nl]}") diff --git a/lib/sisu/v0/dal_syntax.rb b/lib/sisu/v0/dal_syntax.rb index 8e439870..acdec0e4 100644 --- a/lib/sisu/v0/dal_syntax.rb +++ b/lib/sisu/v0/dal_syntax.rb @@ -260,6 +260,7 @@ module SiSU_Syntax line.gsub!(/\}\.\.\/(\S+)/,"\}#@output_url/\\1") #means you are not supporting relative links (only relevant in html), converted to static here line.gsub!(/<:=(\S+?)>/,'{ c_\1.png 14x14 }http://www.jus.uio.no/sisu') #adjustment 2005w30 line.gsub!(//,'<:\1>') #escaped special character + line.gsub!(/ /,"#{Mx[:nbsp]}") #escaped special character line.gsub!(/\\~/,"#{Mx[:gl_o]}#126#{Mx[:gl_c]}") #escaped special character line.gsub!(/\\\{/,"#{Mx[:gl_o]}#123#{Mx[:gl_c]}") #escaped special character line.gsub!(/\\\}/,"#{Mx[:gl_o]}#125#{Mx[:gl_c]}") #escaped special character @@ -298,31 +299,31 @@ module SiSU_Syntax #line.gsub!(/(^| )\{~\^ (.+?)\s*\}((?:https?|file|ftp):\S+)\s+~\{(.+?)\}~/,'\1{ \2 }\3 ~{ \3 \4 }~') # watch line.gsub!(/<:?p([nb])>/,"#{Mx[:fa_o]}p\\1#{Mx[:fa_c]}") #indent used in endnotes, not implemented, replace when ready with: line.gsub!(/(?:
|
)\s*_([12])\s+/,'
<:i\1> ') # depreciated -->#{Mx[:fa_c]} - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|[\(\[\{]|\>)e\{(.+?)\}e/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #emphasis - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|[\(\[\{]|\>)b\{(.+?)\}b/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|[\(\[\{]|\>)u\{(.+?)\}u/,"\\1#{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]}") #underscore - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|[\(\[\{]|\>)c\{(.+?)\}c/,"\\1#{Mx[:fa_cite_o]}\\2#{Mx[:fa_c_o]}cite#{Mx[:fa_c]}") #cite /blockquote? - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|[\(\[\{]|\>)i\{(.+?)\}i/,"\\1#{Mx[:fa_italics_o]}\\2#{Mx[:fa_italics_c]}") #italics + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|[\(\[\{]|\>)e\{(.+?)\}e/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #emphasis + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|[\(\[\{]|\>)b\{(.+?)\}b/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|[\(\[\{]|\>)u\{(.+?)\}u/,"\\1#{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]}") #underscore + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|[\(\[\{]|\>)c\{(.+?)\}c/,"\\1#{Mx[:fa_cite_o]}\\2#{Mx[:fa_c_o]}cite#{Mx[:fa_c]}") #cite /blockquote? + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|[\(\[\{]|\>)i\{(.+?)\}i/,"\\1#{Mx[:fa_italics_o]}\\2#{Mx[:fa_italics_c]}") #italics # depreciated ^ - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|[\(\[\{]|\>)!\{(.+?)\}!/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #emphasis - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|[\(\[\{]|\>)\*\{(.+?)\}\*/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|[\(\[\{]|\>)_\{(.+?)\}_/,"\\1#{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]}") #underscore - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|[\(\[]|\(|\>)\/\{(.+?)\}\//,"\\1#{Mx[:fa_italics_o]}\\2#{Mx[:fa_italics_c]}") #italics - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\"\{(.+?)\}\"/,"\\1#{Mx[:fa_cite_o]}\\2#{Mx[:fa_c_o]}cite#{Mx[:fa_c]}") #cite /blockquote? + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|[\(\[\{]|\>)!\{(.+?)\}!/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #emphasis + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|[\(\[\{]|\>)\*\{(.+?)\}\*/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|[\(\[\{]|\>)_\{(.+?)\}_/,"\\1#{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]}") #underscore + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|#{Mx[:lnk_o]}|[\(\[]|\(|\>)\/\{(.+?)\}\//,"\\1#{Mx[:fa_italics_o]}\\2#{Mx[:fa_italics_c]}") #italics + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\"\{(.+?)\}\"/,"\\1#{Mx[:fa_cite_o]}\\2#{Mx[:fa_c_o]}cite#{Mx[:fa_c]}") #cite /blockquote? line.gsub!(/(^|[^\\])\^\{(.+?)\}\^/,"\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") #superscript - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |\(|\>|\S)9\{(.+?)\}9/,"\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") #superscript + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|\(|\>|\S)9\{(.+?)\}9/,"\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") #superscript line.gsub!(/(^|[^\\]),\{(.+?)\},/,"\\1#{Mx[:fa_subscript_o]}\\2#{Mx[:fa_subscript_c]}") #subscript - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)6\{(.+?)\}6/,"\\1#{Mx[:fa_subscript_o]}\\2#{Mx[:fa_subscript_c]}") #subscript - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\+\{(.+?)\}\+/,"\\1#{Mx[:fa_insert_o]}\\2#{Mx[:fa_insert_c]}") #inserted text - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)v\{(.+?)\}v/,"\\1#{Mx[:fa_insert_o]}\\2#{Mx[:fa_insert_c]}") #inserted text - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)-\{(.+?)\}-/,"\\1#{Mx[:fa_strike_o]}\\2#{Mx[:fa_strike_c]}") #strikethrough - deleted text - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)x\{(.+?)\}x/,"\\1#{Mx[:fa_strike_o]}\\2#{Mx[:fa_strike_c]}") #deleted text - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\*(\S+?)\*/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold single word, watch - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\!(\S+?)\!/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold single word, watch - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\/([\(\)a-zA-Z0-9']+?)\/([^a-zA-Z0-9]|[ ,.;:'"~$]|$)/,"\\1#{Mx[:fa_italics_o]}\\2#{Mx[:fa_italics_c]}\\3") #italics single word, watch - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)_(\S+?)_([.,!'")]?(?:\s|$))/,"\\1#{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]}\\3") #underscore single word, watch (made more complicated by url decoration escape tag (_url)) + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)6\{(.+?)\}6/,"\\1#{Mx[:fa_subscript_o]}\\2#{Mx[:fa_subscript_c]}") #subscript + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\+\{(.+?)\}\+/,"\\1#{Mx[:fa_insert_o]}\\2#{Mx[:fa_insert_c]}") #inserted text + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)v\{(.+?)\}v/,"\\1#{Mx[:fa_insert_o]}\\2#{Mx[:fa_insert_c]}") #inserted text + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)-\{(.+?)\}-/,"\\1#{Mx[:fa_strike_o]}\\2#{Mx[:fa_strike_c]}") #strikethrough - deleted text + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)x\{(.+?)\}x/,"\\1#{Mx[:fa_strike_o]}\\2#{Mx[:fa_strike_c]}") #deleted text + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\*(\S+?)\*/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold single word, watch + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\!(\S+?)\!/,"\\1#{Mx[:fa_bold_o]}\\2#{Mx[:fa_bold_c]}") #bold single word, watch + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)\/([\(\)a-zA-Z0-9']+?)\/([^a-zA-Z0-9]|[ ,.;:'"~$]|$)/,"\\1#{Mx[:fa_italics_o]}\\2#{Mx[:fa_italics_c]}\\3") #italics single word, watch + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>)_(\S+?)_([.,!'")]?(?:\s|$))/,"\\1#{Mx[:fa_underscore_o]}\\2#{Mx[:fa_underscore_c]}\\3") #underscore single word, watch (made more complicated by url decoration escape tag (_url)) line.gsub!(/(^|#{Mx[:gl_c]}|\s+)-([^{]\S+?)-( |$)/,"\\1#{Mx[:fa_strike_o]}\\2#{Mx[:fa_strike_c]}\\3") #underscore single word, watch - line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]| |#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>|\d+)\^(\S+?)\^/,"\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") #superscript single word, watch digit added + line.gsub!(/(^|#{Mx[:gl_c]}|\s+|['"]|#{Mx[:nbsp]}|#{Mx[:fa_o_c]}|#{Mx[:fa_c]}|\(|\>|\d+)\^(\S+?)\^/,"\\1#{Mx[:fa_superscript_o]}\\2#{Mx[:fa_superscript_c]}") #superscript single word, watch digit added line.gsub!(/<[:e]\s+(.+?)!?>/,"#{Mx[:en_a_o]}\\1#{Mx[:en_a_c]}") #not tested line.gsub!(/^\s*_\*\s*/,"#{Mx[:gl_bullet]}") #bullets, shortcut #line.gsub!(/^\s*_(\*+)\s*/,"#{Mx[:gl_bullet]}") #bullets, shortcut @@ -355,7 +356,7 @@ module SiSU_Syntax line.gsub!(/_<:(\S+?)_>/,'<:\1>') #convert <:\S+> back, clumsy line.gsub!(/_<(br(?: \/)?)_>/,'<\1>') #convert

back, clumsy line.gsub!(/(^|#{Mx[:gl_c]}|\s)<(br(?: \/)?)>([\s,.]|$)/,'\1<\2>\3') #convert

back, clumsy - line.gsub!(/#{Mx[:gr_o]}codeline#{Mx[:gr_c]}/,"\n  ") #temporary fix, prefer: #line.gsub!(/<:codeline>/,"\n") + line.gsub!(/#{Mx[:gr_o]}codeline#{Mx[:gr_c]}/,"\n#{Mx[:nbsp]}#{Mx[:nbsp]}") #temporary fix, prefer: #line.gsub!(/<:codeline>/,"\n") else # 0~ end line diff --git a/lib/sisu/v0/db_import.rb b/lib/sisu/v0/db_import.rb index 52bb3ad2..f02ccd3f 100644 --- a/lib/sisu/v0/db_import.rb +++ b/lib/sisu/v0/db_import.rb @@ -147,7 +147,7 @@ module SiSU_DB_import def strip_markup(string) #define rules, make same as in dal clean string.gsub!(/#{Mx[:fa_superscript_o]}(\d+)#{Mx[:fa_superscript_c]}/,'[\1]') string.gsub!(/#{Mx[:pa_o]}:i[1-9]#{Mx[:pa_c]}/,'') - string.gsub!(/(?: \\;)+/,' ') + string.gsub!(/(?: \\;|#{Mx[:nbsp]})+/,' ') string.gsub!(/#{Mx[:gr_o]}T[h]?#{Mx[:tc_p]}.+?#{Mx[:gr_c]}/u,"[TABLE]\n") #tables #CHECK should take whole table string.gsub!(/#{Mx[:tc_o]}#{Mx[:tc_p]}#{Mx[:tc_p]}\d+(.+)#{Mx[:tc_c]}/u,'\1') #tables string.gsub!(/#{Mx[:tc_p]}#{Mx[:tc_p]}\d+#{Mx[:tc_p]}/u,' ') #tables diff --git a/lib/sisu/v0/html_tune.rb b/lib/sisu/v0/html_tune.rb index f06b8764..99f2d7f4 100644 --- a/lib/sisu/v0/html_tune.rb +++ b/lib/sisu/v0/html_tune.rb @@ -245,6 +245,7 @@ module SiSU_Tune para.gsub!(/#{Mx[:fa_cite_o]}(.+?)#{Mx[:fa_cite_c]}/,'\1') para.gsub!(/#{Mx[:fa_strike_o]}(.+?)#{Mx[:fa_strike_c]}/,'\1') para.gsub!(/#{Mx[:mk_o]}:name#(\S+?)#{Mx[:mk_c]}/,'') + para.gsub!(/#{Mx[:nbsp]}/,' ') para.gsub!(/<(p|br)>/,'<\1 />') para=SiSU_Tune::Clean_html.new(para).clean @tuned_file << para diff --git a/lib/sisu/v0/manpage.rb b/lib/sisu/v0/manpage.rb index 77d6e408..597099ed 100644 --- a/lib/sisu/v0/manpage.rb +++ b/lib/sisu/v0/manpage.rb @@ -144,7 +144,7 @@ module SiSU_manpage end wrap=util.line_wrap if wrap =~ /^\s*[\d*+]+\s+.+?\s*\Z/m - wrap.gsub!(/(^| |\s|\*)\\\*/,'\1\\\\\*') #man page requires + wrap.gsub!(/(^| |#{Mx[:nbsp]}|\s|\*)\\\*/,'\1\\\\\*') #man page requires wrap.gsub!(/\s\.(\S+)/,' \\.\1') wrap.gsub!(/(["''])/,"\\\\\\1") # quotation marks need escape wrap.gsub!(/^\s*([\d*+]+)\s+(.+?)\s*\Z/m, <<\s]+?)([.,]?(?:\s|$))/,'\1\2\3') para.gsub!(/(.+?)<\/a>/m,'\1') para.gsub!(/#{Mx[:mk_o]}name#\S+?#{Mx[:mk_c]}/,'') # remove name links - para.gsub!(/ /,' ') # decide on + para.gsub!(/ |#{Mx[:nbsp]}/,' ') # decide on para.gsub!(/(["''])/,"\\\\\\1") # quotation marks need escape para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") para.gsub!(/^(?:^|[^_\\])#{Mx[:lnk_o]}\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*#{Mx[:lnk_c]}\S+/,'[image: "\1"]') diff --git a/lib/sisu/v0/odf.rb b/lib/sisu/v0/odf.rb index bf23f91f..d6558634 100644 --- a/lib/sisu/v0/odf.rb +++ b/lib/sisu/v0/odf.rb @@ -307,7 +307,7 @@ module SiSU_ODF para end def group_clean(para) - para.gsub!(/&nbsp;| /,' ') + para.gsub!(/&nbsp;| |#{Mx[:nbsp]}/,' ') para.gsub!(//,'>') para.gsub!(/<(text:span text:style-name="T[1-5]"|\/text:span)>/,'<\1>') #works, not ideal para.gsub!(/#{Mx[:br_line]}/,'
') @@ -447,9 +447,6 @@ module SiSU_ODF word=para.scan(/\S+|\n/) if word word.each do |w| # _ - / # | : ! ^ ~ - unless w =~/#{Mx[:id_o]}~\S+?;\S+?;\S+?#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}|#{Mx[:gr_o]}.+?#{Mx[:gr_c]}|<[:!][^<>]+?>/ - w.gsub!(/^<([^<>][^<>][^<>][^<>]+?)>$/,'<\1>') #refix - end unless para =~/^(?:#{Rx[:meta]}|%+ )/m w.gsub!(/&#(?:126|152);/,'~') #126 usual if w !~/&\S{1,7};/ \ @@ -488,12 +485,10 @@ module SiSU_ODF para.gsub!(/#{Mx[:fa_underscore_o]}(.+?)#{Mx[:fa_underscore_c]}/,'\1') para.gsub!(/#{Mx[:fa_superscript_o]}(.+?)#{Mx[:fa_superscript_c]}/,'\1') para.gsub!(/#{Mx[:fa_subscript_o]}(.+?)#{Mx[:fa_subscript_c]}/,'\1') - para.gsub!(/[`’]/,"'") para.gsub!(/­/u,'-') para.gsub!(/ /u, ' ') # space identify para.gsub!(/ /u, ' ') # space identify para.gsub!(/·/u,'*') - para.gsub!(/[“”]/u,'""') para.gsub!(/[­–—]/u,'-') #— – chk para.gsub!(/ < /i,'<') para.gsub!(/\\copy(?:right)?\b/,'©') diff --git a/lib/sisu/v0/shared_xml.rb b/lib/sisu/v0/shared_xml.rb index 7ecc52bb..437f5482 100644 --- a/lib/sisu/v0/shared_xml.rb +++ b/lib/sisu/v0/shared_xml.rb @@ -422,12 +422,12 @@ module SiSU_XML_munge %{\\1#{@url_brace.xml_open}\\2#{@url_brace.xml_close}\\3}) para.gsub!(/\b[_\\]((?:https?|file|ftp):\/\/\S+?\.[^'"><\s]+?)([;.,]?(?:\s|$))/, '\1\2') #escaped urls not linked, deal with later - para.gsub!(/ /,' ') + para.gsub!(/ |#{Mx[:nbsp]}/,' ') #para.gsub!(/ /,' ') #clean else para.gsub!(/(^|[^}])_/m,'\1>') #code-block: angle brackets special characters para.gsub!(/(^|[^}])_/m,'\1>') - para.gsub!(/ /,' ') + para.gsub!(/ |#{Mx[:nbsp]}/,' ') end para end @@ -443,7 +443,7 @@ module SiSU_XML_munge para.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/, "#{@dir.url.images_local}\/\\1") - para.gsub!(/ /,' ') + para.gsub!(/ |#{Mx[:nbsp]}/,' ') #para.gsub!(/ /,' ') #clean wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 para=tidywords(wordlist).join(' ').strip @@ -466,7 +466,7 @@ module SiSU_XML_munge para.gsub!(/&([^;]{1,5})/,'&\1') #sort, rough estimate, revisit #WATCH found in node not sax para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/, "#{@dir.url.images_local}\/\\1") - para.gsub!(/ /,' ') + para.gsub!(/ |#{Mx[:nbsp]}/,' ') #para.gsub!(/ /,' ') #clean wordlist=para.scan(/&[#0-9a-z]+;|\S+|\n/) #\n needed for tables, check though added 2005w17 para=tidywords(wordlist).join(' ').strip diff --git a/lib/sisu/v0/spell.rb b/lib/sisu/v0/spell.rb index 477a472f..8f89c470 100644 --- a/lib/sisu/v0/spell.rb +++ b/lib/sisu/v0/spell.rb @@ -72,7 +72,8 @@ module Utility end def check @input.each do |data| - data.gsub!(/(https?|www|ftp|gopher|png|jpg|gif|html|htm| )\S+/i,' ') + data.gsub!(/(https?|www|ftp|gopher|png|jpg|gif|html|htm)\S+/i,' ') + data.gsub!(/( |#{Mx[:nbsp]})/i,' ') data.gsub!(/<\/?(table|tr|td|b|p|href).*?>/i,' ') data.gsub!(/(<==.+|<:\S+>||^0~.+|\{\{\{|~)/,' ') data.gsub!(/(["|<>)(\n'`'.;&_-]|\=)/,' ') diff --git a/lib/sisu/v0/sst_do_inline_footnotes.rb b/lib/sisu/v0/sst_do_inline_footnotes.rb index ba1f6379..514eb2c8 100644 --- a/lib/sisu/v0/sst_do_inline_footnotes.rb +++ b/lib/sisu/v0/sst_do_inline_footnotes.rb @@ -473,7 +473,7 @@ module SiSU_Convert_footnotes #% same as db clean --> s=s.gsub(/(.+?)<\/del>/,'DELETED(\1)') # deletions s=s.gsub(/(\d+)<\/sup>/,'[\1]') - s=s.gsub(/(?: \\;)+/,' ') + s=s.gsub(/(?: \\;|#{Mx[:nbsp]})+/,' ') #checking source Mx not necessary s=s.gsub(/\{.+?\.(?:png|jpg|gif).+?\}(?:https?|file|ftp)\\\:\S+ /,' [image] ') # else image names found in search s=s.gsub(/#{Mx[:lnk_o]}.+?\.(?:png|jpg|gif).+?#{Mx[:lnk_c]}(?:https?|file|ftp)\\\:\S+ /,' [image] ') # else image names found in search s=s.gsub(/\s\s+/,' ') diff --git a/lib/sisu/v0/sst_to_s_xml_dom.rb b/lib/sisu/v0/sst_to_s_xml_dom.rb index efb60a88..30dc370a 100644 --- a/lib/sisu/v0/sst_to_s_xml_dom.rb +++ b/lib/sisu/v0/sst_to_s_xml_dom.rb @@ -367,7 +367,7 @@ WOK "#{dir.url.images_local}/\\1") para.gsub!(/#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/, "#{dir.url.images_local}/\\1") - para.gsub!(/ /,' ') + para.gsub!(/ |#{Mx[:nbsp]}/,' ') # checking source Mx not necessary para=SiSU_document_structure::Structure.new(@md,para).structure @trans.char_enc.utf8(para) if @sys.locale =~/utf-?8/i #% utf8 para.gsub!(/^0~(\S+)/,"#{Mx[:lv_o]}@\\1#{Mx[:lv_c]}") diff --git a/lib/sisu/v0/texinfo_format.rb b/lib/sisu/v0/texinfo_format.rb index cdfa6a75..22fd7a84 100644 --- a/lib/sisu/v0/texinfo_format.rb +++ b/lib/sisu/v0/texinfo_format.rb @@ -367,8 +367,7 @@ WOK @txt.gsub!(/@/i,'@@') @txt.gsub!(/\{/,'@{'); @txt.gsub!(/\}/,'@}') #@txt.gsub!(/(^|[\s*!\/#_-])\{/,'\1@{'); @txt.gsub!(/\}([\s*!\/#_-]|$)/,'@}\1') - @txt.gsub!(/  /,' ') # ~ character for hardspace - @txt.gsub!(/ /,' ') # ~ character for hardspace + @txt.gsub!(/(?: |#{Mx[:nbsp]})+/,' ') # ~ character for hardspace @txt.gsub!(/&(\S+?);/,' ') @txt.gsub!(/&/,'<=and>') @txt.gsub!(/(\s+&\s+)/,' and ') diff --git a/lib/sisu/v0/texpdf_format.rb b/lib/sisu/v0/texpdf_format.rb index b6a1e1f2..58a057d8 100644 --- a/lib/sisu/v0/texpdf_format.rb +++ b/lib/sisu/v0/texpdf_format.rb @@ -987,7 +987,7 @@ WOK string.gsub!(/.+?<-#>/,'') string.gsub!(/#{Mx[:br_eof]}|#{Mx[:br_endnotes]}/,'') #problem sequence -> - string.gsub!(/&(?:nbsp);/,'<=hardspace>') # < SiSU special character also LaTeX + string.gsub!(/&(?:nbsp);|#{Mx[:nbsp]}/,'<=hardspace>') # < SiSU special character also LaTeX string.gsub!(/#{Mx[:gl_o]}#nbsp#{Mx[:gl_c]}/,'<=hardspace>') # < SiSU special character also LaTeX string.gsub!(/#{Mx[:gl_o]}(?:#lt|#060)#{Mx[:gl_c]}/,'<=lt>') # < SiSU special character also LaTeX string.gsub!(/#{Mx[:gl_o]}(?:#gt|#062)#{Mx[:gl_c]}/,'<=gt>') # > SiSU special character also LaTeX @@ -1027,7 +1027,7 @@ WOK end string.gsub!(/\{/,'\{') #string.gsub!(/\}/,'\}') - string.gsub!(/ /,'~') # ~ character for hardspace + string.gsub!(/ |#{Mx[:nbsp]}/,'~') # ~ character for hardspace # sequence important must appear after removal of { and } string.gsub!(/&\S+?;/,'') #hmmm # sequence imortant place before removal of & @@ -1155,7 +1155,7 @@ WOK string.gsub!(/#{Mx[:br_endnotes]}/,'') #string.gsub!(//,'') #problem sequence -> - string.gsub!(/&(?:nbsp);/,'<=hardspace>') # < SiSU special character also LaTeX + string.gsub!(/&(?:nbsp);|#{Mx[:nbsp]}/,'<=hardspace>') # < SiSU special character also LaTeX string.gsub!(/&(?:lt|#060);/,'<=lt>') # < SiSU special character also LaTeX string.gsub!(/#{Mx[:gl_o]}#(?:gt|062)#{Mx[:gl_c]}/,'<=gt>') # > SiSU special character also LaTeX #string.gsub!(/#{Mx[:gl_o]}(&#(?:[a-z]+|[0-9]+);)#{Mx[:gl_c]}/,'\1') @@ -1194,7 +1194,7 @@ WOK end string.gsub!(/\{/,'\{') string.gsub!(/\}/,'\}') - string.gsub!(/ /,'~') # ~ character for hardspace + string.gsub!(/ |#{Mx[:nbsp]}/,'~') # ~ character for hardspace # sequence important must appear after removal of { and } string.gsub!(/&\S+?;/,'') #hmmm # sequence imortant place before removal of & diff --git a/lib/sisu/v0/wikispeak.rb b/lib/sisu/v0/wikispeak.rb index 0e8d3989..d9cb3cbd 100644 --- a/lib/sisu/v0/wikispeak.rb +++ b/lib/sisu/v0/wikispeak.rb @@ -260,7 +260,7 @@ WOK para.gsub!(/^\s*#{Mx[:id_o]}~\d+;(?:\w|[0-6]:)\d+;\w\d+#{Mx[:id_c]}#{Mx[:id_o]}#@dp:#@dp#{Mx[:id_c]}$/,'') # remove empty lines - check para.gsub!(/
(.+?)<\/a>/m,'\1') para.gsub!(/<:name#\S+?>/,'') # remove name links - para.gsub!(/ /,' ') # decide on + para.gsub!(/ |#{Mx[:nbsp]}/,' ') # decide on para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}(\S+?\.(?:png|jpg|gif)) .+?#{Mx[:lnk_c]}(?:(?:https?|file|ftp):\/\/\S+|image)/,' [ \1 ]') #"[ #{dir.url.images_local}\/\\1 ]") para.gsub!(/(?:^|[^_\\])#{Mx[:lnk_o]}\s*\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*#{Mx[:lnk_c]}\S+/,'[image: "\1"]') #para.gsub!(/^\{\S+?\.(?:png|jpg|gif)\s+.+?"(.*?)"\s*\}\S+/,'[image: "\1"]') -- cgit v1.2.3