0
0
mirror of https://github.com/vim/vim.git synced 2025-09-26 04:04:07 -04:00

updated for version 7.4.497

Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)
This commit is contained in:
Bram Moolenaar
2014-11-05 14:27:36 +01:00
parent 4f7e821f26
commit fda3729a06
14 changed files with 244 additions and 45 deletions

View File

@@ -1879,6 +1879,9 @@ test check:
cd testdir; $(MAKE) -f Makefile $(GUI_TESTTARGET) VIMPROG=../$(VIMTARGET) $(GUI_TESTARG) SCRIPTSOURCE=../$(SCRIPTSOURCE)
$(MAKE) -f Makefile unittest
benchmark:
cd testdir; $(MAKE) -f Makefile benchmark VIMPROG=../$(VIMTARGET) SCRIPTSOURCE=../$(SCRIPTSOURCE)
unittesttargets:
$(MAKE) -f Makefile $(UNITTEST_TARGETS)

View File

@@ -8011,13 +8011,10 @@ static regengine_T bt_regengine =
bt_regcomp,
bt_regfree,
bt_regexec_nl,
bt_regexec_multi
#ifdef DEBUG
,(char_u *)""
#endif
bt_regexec_multi,
(char_u *)""
};
#include "regexp_nfa.c"
static regengine_T nfa_regengine =
@@ -8025,18 +8022,14 @@ static regengine_T nfa_regengine =
nfa_regcomp,
nfa_regfree,
nfa_regexec_nl,
nfa_regexec_multi
#ifdef DEBUG
,(char_u *)""
#endif
nfa_regexec_multi,
(char_u *)""
};
/* Which regexp engine to use? Needed for vim_regcomp().
* Must match with 'regexpengine'. */
static int regexp_engine = 0;
#define AUTOMATIC_ENGINE 0
#define BACKTRACKING_ENGINE 1
#define NFA_ENGINE 2
#ifdef DEBUG
static char_u regname[][30] = {
"AUTOMATIC Regexp Engine",
@@ -8083,10 +8076,8 @@ vim_regcomp(expr_arg, re_flags)
regexp_engine = AUTOMATIC_ENGINE;
}
}
#ifdef DEBUG
bt_regengine.expr = expr;
nfa_regengine.expr = expr;
#endif
/*
* First try the NFA engine, unless backtracking was requested.
@@ -8096,7 +8087,8 @@ vim_regcomp(expr_arg, re_flags)
else
prog = bt_regengine.regcomp(expr, re_flags);
if (prog == NULL) /* error compiling regexp with initial engine */
/* Check for error compiling regexp with initial engine. */
if (prog == NULL)
{
#ifdef BT_REGEXP_DEBUG_LOG
if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
@@ -8114,13 +8106,27 @@ vim_regcomp(expr_arg, re_flags)
}
#endif
/*
* If the NFA engine failed, the backtracking engine won't work either.
* If the NFA engine failed, try the backtracking engine.
* Disabled for now, both engines fail on the same patterns.
* Re-enable when regcomp() fails when the pattern would work better
* with the other engine.
*
if (regexp_engine == AUTOMATIC_ENGINE)
{
prog = bt_regengine.regcomp(expr, re_flags);
regexp_engine == BACKTRACKING_ENGINE;
}
*/
}
if (prog != NULL)
{
/* Store the info needed to call regcomp() again when the engine turns
* out to be very slow when executing it. */
prog->re_engine = regexp_engine;
prog->re_flags = re_flags;
}
return prog;
}
@@ -8135,20 +8141,75 @@ vim_regfree(prog)
prog->engine->regfree(prog);
}
#ifdef FEAT_EVAL
static void report_re_switch __ARGS((char_u *pat));
static void
report_re_switch(pat)
char_u *pat;
{
if (p_verbose > 0)
{
verbose_enter();
MSG_PUTS(_("Switching to backtracking RE engine for pattern: "));
MSG_PUTS(pat);
verbose_leave();
}
}
#endif
static int vim_regexec_both __ARGS((regmatch_T *rmp, char_u *line, colnr_T col, int nl));
/*
* Match a regexp against a string.
* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
* Uses curbuf for line count and 'iskeyword'.
* When "nl" is TRUE consider a "\n" in "line" to be a line break.
*
* Return TRUE if there is a match, FALSE if not.
*/
static int
vim_regexec_both(rmp, line, col, nl)
regmatch_T *rmp;
char_u *line; /* string to match against */
colnr_T col; /* column to start looking for match */
int nl;
{
int result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
/* NFA engine aborted because it's very slow. */
if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
&& result == NFA_TOO_EXPENSIVE)
{
int save_p_re = p_re;
int re_flags = rmp->regprog->re_flags;
char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
p_re = BACKTRACKING_ENGINE;
vim_regfree(rmp->regprog);
if (pat != NULL)
{
#ifdef FEAT_EVAL
report_re_switch(pat);
#endif
rmp->regprog = vim_regcomp(pat, re_flags);
if (rmp->regprog != NULL)
result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
vim_free(pat);
}
p_re = save_p_re;
}
return result;
}
int
vim_regexec(rmp, line, col)
regmatch_T *rmp;
char_u *line; /* string to match against */
colnr_T col; /* column to start looking for match */
regmatch_T *rmp;
char_u *line;
colnr_T col;
{
return rmp->regprog->engine->regexec_nl(rmp, line, col, FALSE);
return vim_regexec_both(rmp, line, col, FALSE);
}
#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
@@ -8158,11 +8219,11 @@ vim_regexec(rmp, line, col)
*/
int
vim_regexec_nl(rmp, line, col)
regmatch_T *rmp;
char_u *line;
colnr_T col;
regmatch_T *rmp;
char_u *line;
colnr_T col;
{
return rmp->regprog->engine->regexec_nl(rmp, line, col, TRUE);
return vim_regexec_both(rmp, line, col, TRUE);
}
#endif
@@ -8183,5 +8244,32 @@ vim_regexec_multi(rmp, win, buf, lnum, col, tm)
colnr_T col; /* column to start looking for match */
proftime_T *tm; /* timeout limit or NULL */
{
return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm);
int result = rmp->regprog->engine->regexec_multi(
rmp, win, buf, lnum, col, tm);
/* NFA engine aborted because it's very slow. */
if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
&& result == NFA_TOO_EXPENSIVE)
{
int save_p_re = p_re;
int re_flags = rmp->regprog->re_flags;
char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
p_re = BACKTRACKING_ENGINE;
vim_regfree(rmp->regprog);
if (pat != NULL)
{
#ifdef FEAT_EVAL
report_re_switch(pat);
#endif
rmp->regprog = vim_regcomp(pat, re_flags);
if (rmp->regprog != NULL)
result = rmp->regprog->engine->regexec_multi(
rmp, win, buf, lnum, col, tm);
vim_free(pat);
}
p_re = save_p_re;
}
return result;
}

View File

@@ -27,6 +27,18 @@
*/
#define NFA_MAX_BRACES 20
/*
* In the NFA engine: how many states are allowed
*/
#define NFA_MAX_STATES 100000
#define NFA_TOO_EXPENSIVE -1
/* Which regexp engine to use? Needed for vim_regcomp().
* Must match with 'regexpengine'. */
#define AUTOMATIC_ENGINE 0
#define BACKTRACKING_ENGINE 1
#define NFA_ENGINE 2
typedef struct regengine regengine_T;
/*
@@ -38,6 +50,8 @@ typedef struct regprog
{
regengine_T *engine;
unsigned regflags;
unsigned re_engine; /* automatic, backtracking or nfa engine */
unsigned re_flags; /* second argument for vim_regcomp() */
} regprog_T;
/*
@@ -47,9 +61,11 @@ typedef struct regprog
*/
typedef struct
{
/* These two members implement regprog_T */
/* These four members implement regprog_T */
regengine_T *engine;
unsigned regflags;
unsigned re_engine;
unsigned re_flags; /* second argument for vim_regcomp() */
int regstart;
char_u reganch;
@@ -81,9 +97,11 @@ struct nfa_state
*/
typedef struct
{
/* These two members implement regprog_T */
/* These three members implement regprog_T */
regengine_T *engine;
unsigned regflags;
unsigned re_engine;
unsigned re_flags; /* second argument for vim_regcomp() */
nfa_state_T *start; /* points into state[] */
@@ -96,9 +114,7 @@ typedef struct
#ifdef FEAT_SYN_HL
int reghasz;
#endif
#ifdef DEBUG
char_u *pattern;
#endif
int nsubexp; /* number of () */
int nstate;
nfa_state_T state[1]; /* actually longer.. */
@@ -151,9 +167,7 @@ struct regengine
void (*regfree)(regprog_T *);
int (*regexec_nl)(regmatch_T*, char_u*, colnr_T, int);
long (*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T, proftime_T*);
#ifdef DEBUG
char_u *expr;
#endif
};
#endif /* _REGEXP_H */

View File

@@ -5522,6 +5522,13 @@ nfa_regmatch(prog, start, submatch, m)
nextlist->n = 0; /* clear nextlist */
nextlist->has_pim = FALSE;
++nfa_listid;
if (prog->re_engine == AUTOMATIC_ENGINE && nfa_listid >= NFA_MAX_STATES)
{
/* too many states, retry with old engine */
nfa_match = NFA_TOO_EXPENSIVE;
goto theend;
}
thislist->id = nfa_listid;
nextlist->id = nfa_listid + 1;
@@ -5704,6 +5711,11 @@ nfa_regmatch(prog, start, submatch, m)
*/
result = recursive_regmatch(t->state, NULL, prog,
submatch, m, &listids);
if (result == NFA_TOO_EXPENSIVE)
{
nfa_match = result;
goto theend;
}
/* for \@! and \@<! it is a match when the result is
* FALSE */
@@ -5817,6 +5829,11 @@ nfa_regmatch(prog, start, submatch, m)
/* First try matching the pattern. */
result = recursive_regmatch(t->state, NULL, prog,
submatch, m, &listids);
if (result == NFA_TOO_EXPENSIVE)
{
nfa_match = result;
goto theend;
}
if (result)
{
int bytelen;
@@ -6760,6 +6777,7 @@ nfa_regtry(prog, col)
int i;
regsubs_T subs, m;
nfa_state_T *start = prog->start;
int result;
#ifdef ENABLE_LOG
FILE *f;
#endif
@@ -6791,8 +6809,11 @@ nfa_regtry(prog, col)
clear_sub(&m.synt);
#endif
if (nfa_regmatch(prog, start, &subs, &m) == FALSE)
result = nfa_regmatch(prog, start, &subs, &m);
if (result == FALSE)
return 0;
else if (result == NFA_TOO_EXPENSIVE)
return result;
cleanup_subexpr();
if (REG_MULTI)
@@ -6929,9 +6950,7 @@ nfa_regexec_both(line, startcol)
nfa_nsubexpr = prog->nsubexp;
nfa_listid = 1;
nfa_alt_listid = 2;
#ifdef DEBUG
nfa_regengine.expr = prog->pattern;
#endif
if (prog->reganch && col > 0)
return 0L;
@@ -6979,9 +6998,7 @@ nfa_regexec_both(line, startcol)
retval = nfa_regtry(prog, col);
#ifdef DEBUG
nfa_regengine.expr = NULL;
#endif
theend:
return retval;
@@ -7003,9 +7020,7 @@ nfa_regcomp(expr, re_flags)
if (expr == NULL)
return NULL;
#ifdef DEBUG
nfa_regengine.expr = expr;
#endif
init_class_tab();
@@ -7082,10 +7097,8 @@ nfa_regcomp(expr, re_flags)
/* Remember whether this pattern has any \z specials in it. */
prog->reghasz = re_has_z;
#endif
#ifdef DEBUG
prog->pattern = vim_strsave(expr);
nfa_regengine.expr = NULL;
#endif
out:
vim_free(post_start);
@@ -7099,9 +7112,7 @@ fail:
#ifdef ENABLE_LOG
nfa_postfix_dump(expr, FAIL);
#endif
#ifdef DEBUG
nfa_regengine.expr = NULL;
#endif
goto out;
}
@@ -7115,9 +7126,7 @@ nfa_regfree(prog)
if (prog != NULL)
{
vim_free(((nfa_regprog_T *)prog)->match_text);
#ifdef DEBUG
vim_free(((nfa_regprog_T *)prog)->pattern);
#endif
vim_free(prog);
}
}

View File

@@ -87,6 +87,7 @@ clean:
-if exist Xfind rd /s /q Xfind
-if exist viminfo del viminfo
-del test.log
-if exists benchmark.out del benchmark.out
.in.out:
-if exist $*.failed del $*.failed
@@ -103,3 +104,11 @@ clean:
nolog:
-del test.log
benchmark:
bench_re_freeze.out
bench_re_freeze.out: bench_re_freeze.vim
-if exist benchmark.out del benchmark.out
$(VIMPROG) -u dos.vim -U NONE --noplugin $*.in
@IF EXIST benchmark.out ( type benchmark.out )

View File

@@ -12,11 +12,13 @@ ifneq (sh.exe, $(SHELL))
DEL = rm -f
MV = mv
CP = cp
CAT = cat
DIRSLASH = /
else
DEL = del
MV = rename
CP = copy
CAT = type
DIRSLASH = \\
endif
@@ -72,6 +74,8 @@ SCRIPTS32 = test50.out test70.out
SCRIPTS_GUI = test16.out
SCRIPTS_BENCH = bench_re_freeze.out
.SUFFIXES: .in .out
vimall: fixff $(SCRIPTS16) $(SCRIPTS) $(SCRIPTS_GUI) $(SCRIPTS32)
@@ -80,6 +84,8 @@ vimall: fixff $(SCRIPTS16) $(SCRIPTS) $(SCRIPTS_GUI) $(SCRIPTS32)
nongui: fixff $(SCRIPTS16) $(SCRIPTS)
echo ALL DONE
benchmark: $(SCRIPTS_BENCH)
small:
echo ALL DONE
@@ -114,3 +120,8 @@ clean:
-$(DEL) X*
-$(DEL) test.ok
-$(DEL) viminfo
bench_re_freeze.out: bench_re_freeze.vim
-$(DEL) benchmark.out
$(VIMPROG) -u dos.vim -U NONE --noplugin $*.in
$(CAT) benchmark.out

View File

@@ -50,6 +50,8 @@ SCRIPTS = test1.out test3.out test4.out test5.out test6.out \
test_signs.out \
test_utf8.out
SCRIPTS_BENCH = bench_re_freeze.out
.SUFFIXES: .in .out
all: /tmp $(SCRIPTS)
@@ -57,6 +59,8 @@ all: /tmp $(SCRIPTS)
$(SCRIPTS): $(VIMPROG)
benchmark: $(SCRIPTS_BENCH)
clean:
-rm -rf *.out Xdotest test.ok tiny.vim small.vim mbyte.vim viminfo
@@ -75,3 +79,10 @@ clean:
# Create a directory for temp files
/tmp:
-mkdir /tmp
bench_re_freeze.out: bench_re_freeze.vim
-del $*.failed test.ok benchmark.out
copy $*.ok test.ok
$(VIMPROG) -u os2.vim --noplugin -s dotest.in $*.in
type benchmark.out

View File

@@ -48,12 +48,16 @@ SCRIPTS = test1.out test2.out test3.out test4.out test5.out test6.out \
SCRIPTS_GUI = test16.out
SCRIPTS_BENCH = bench_re_freeze.out
.SUFFIXES: .in .out
nongui: nolog $(SCRIPTS) report
gui: nolog $(SCRIPTS) $(SCRIPTS_GUI) report
benchmark: $(SCRIPTS_BENCH)
report:
@echo
@echo 'Test results:'
@@ -65,7 +69,7 @@ report:
$(SCRIPTS) $(SCRIPTS_GUI): $(VIMPROG)
RM_ON_RUN = test.out X* viminfo
RM_ON_START = tiny.vim small.vim mbyte.vim mzscheme.vim lua.vim test.ok
RM_ON_START = tiny.vim small.vim mbyte.vim mzscheme.vim lua.vim test.ok benchmark.out
RUN_VIM = VIMRUNTIME=$(SCRIPTSOURCE); export VIMRUNTIME; $(VALGRIND) $(VIMPROG) -u unix.vim -U NONE --noplugin -s dotest.in
clean:
@@ -120,5 +124,14 @@ test49.out: test49.vim
test60.out: test60.vim
bench_re_freeze.out: bench_re_freeze.vim
-rm -rf benchmark.out $(RM_ON_RUN)
# Sleep a moment to avoid that the xterm title is messed up.
# 200 msec is sufficient, but only modern sleep supports a fraction of
# a second, fall back to a second if it fails.
@-/bin/sh -c "sleep .2 > /dev/null 2>&1 || sleep 1"
-$(RUN_VIM) $*.in
@/bin/sh -c "if test -f benchmark.out; then cat benchmark.out; fi"
nolog:
-rm -f test.log

View File

@@ -0,0 +1,13 @@
Test for Benchmarking RE engine
STARTTEST
:so small.vim
:if !has("reltime") | qa! | endif
:set nocp cpo&vim
:so bench_re_freeze.vim
:call Measure('samples/re.freeze.txt', '\s\+\%#\@<!$', '+5')
:/^" Benchmark/,$w! benchmark.out
:qa!
ENDTEST
" Benchmark_results:

View File

@@ -0,0 +1,13 @@
"Test for benchmarking the RE engine
so small.vim
if !has("reltime") | finish | endif
func! Measure(file, pattern, arg)
for re in range(3)
let sstart=reltime()
let cmd=printf("../vim -u NONE -N --cmd ':set re=%d'".
\ " -c 'call search(\"%s\", \"\", \"\", 10000)' -c ':q!' %s", re, escape(a:pattern, '\\'), empty(a:arg) ? '' : a:arg)
call system(cmd. ' '. a:file)
$put =printf('file: %s, re: %d, time: %s', a:file, re, reltimestr(reltime(sstart)))
endfor
endfunc

File diff suppressed because one or more lines are too long

View File

@@ -741,6 +741,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
497,
/**/
496,
/**/