From daf2b7fe5f1479cecbe0c7283b34e47324d10e1b Mon Sep 17 00:00:00 2001 From: Ralph Amissah Date: Sat, 16 Feb 2008 00:33:34 +0000 Subject: markup tag match refinement --- CHANGELOG | 15 ++++++++------- lib/sisu/v0/shared_sem.rb | 31 +++++++++++++++++++++++-------- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index c35925bb..d6676e53 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -9,13 +9,14 @@ Reverse Chronological: %% STABLE MANIFEST -%% sisu_0.64.5.orig.tar.gz (2008-02-15:06/5) -http://www.jus.uio.no/sisu/pkg/src/sisu_0.64.5.orig.tar.gz - sisu_0.64.5.orig.tar.gz - sisu_0.64.5-1.dsc - sisu_0.64.5-1.diff.gz - - * semantic tag, refinement to regex matching +%% sisu_0.65.0.orig.tar.gz (2008-02-16:06/6) +http://www.jus.uio.no/sisu/pkg/src/sisu_0.65.0.orig.tar.gz + sisu_0.65.0.orig.tar.gz + sisu_0.65.0-1.dsc + sisu_0.65.0-1.diff.gz + + * semantic tag, match refinement related, (consider a possible extension) + [version bump for tags] %% sisu_0.64.4.orig.tar.gz (2008-02-14:06/4) http://www.jus.uio.no/sisu/pkg/src/sisu_0.64.4.orig.tar.gz diff --git a/lib/sisu/v0/shared_sem.rb b/lib/sisu/v0/shared_sem.rb index 0c9e485c..f6303ee0 100644 --- a/lib/sisu/v0/shared_sem.rb +++ b/lib/sisu/v0/shared_sem.rb @@ -69,25 +69,36 @@ module SiSU_sem /^<:code>/ end def each_csc - /\b[a-z]+[:;]\{|\}[:;][a-z]+\b/ + /\b[a-z]+[:;]\{|\}[:;][a-z]+\b/m + #/\b(?:[a-z]+(?:[_.][a-z]+)+|[a-z]+)[:;]\{|\}[:;](?:[a-z]+(?:[_.][a-z]+)+|[a-z]+)\b/m end def each_c - /\b[a-z]+:\{|\}:[a-z]+\b/ + /\b[a-z]+:\{|\}:[a-z]+\b/m + #/\b(?:[a-z]+(?:[_.][a-z]+)+|[a-z]+):\{|\}:(?:[a-z]+(?:[_.][a-z]+)+|[a-z]+)\b/m end def each_sc - /\b[a-z]+;\{|\};[a-z]+\b/ + /\b[a-z]+;\{|\};[a-z]+\b/m + #/\b(?:[a-z]+(?:[_.][a-z]+)+|[a-z]+);\{|\};(?:[a-z]+(?:[_.][a-z]+)+|[a-z]+)\b/m end def pair_csc - /(([a-z]+)(?::\{(.+?)\}:\2|;\{(.+?)\};\2))/ + /(([a-z]+)(?::\{(.+?)\}:\2|;\{(.+?)\};\2))/m + #/(([a-z]+(?:[_.][a-z]+)+|[a-z]+)(?::\{(.+?)\}:\2|;\{(.+?)\};\2))/m end def pair_c - /(([a-z]+):\{(.+?)\}:\2)/ + /(([a-z]+):\{(.+?)\}:\2)/m + #/(([a-z]+(?:[_.][a-z]+)+|[a-z]+):\{(.+?)\}:\2)/m end def pair_sc - /(([a-z]+);\{.+?\};\2)/ + /(([a-z]+);\{.+?\};\2)/m + #/(([a-z]+(?:[_.][a-z]+)+|[a-z]+);\{.+?\};\2)/m end def whole_csc_ae - /(([a-z]+)(?::\[(.+?)\]:\2|;\[(.+?)\];\2))/ + /(([a-z]+)(?::\[(.+?)\]:\2|;\[(.+?)\];\2))/m + #/(([a-z]+(?:[_.][a-z]+)+|[a-z]+)(?::\[(.+?)\]:\2|;\[(.+?)\];\2))/m + end + def each_csc_ae + /\b[a-z]+[:;]\[|\][:;][a-z]+\b/m + #/\b(?:[a-z]+(?:[_.][a-z]+)+|[a-z]+)[:;]\[|\][:;](?:[a-z]+(?:[_.][a-z]+)+|[a-z]+)\b/m end self end @@ -99,7 +110,11 @@ module SiSU_sem @para end def sem_marker_added_extra_parts - @para.gsub!(rgx.whole_csc_ae,'') + unless @para =~ rgx.exclude + @para.gsub!(rgx.whole_csc_ae,'') + if para =~rgx.each_csc_ae; puts "WARNING semantic taggin error: #@para" + end + end @para end def all -- cgit v1.2.3