1
0
forked from aniani/vim

updated for version 7.4.497

Problem:    With some regexp patterns the NFA engine uses many states and
            becomes very slow.  To the user it looks like Vim freezes.
Solution:   When the number of states reaches a limit fall back to the old
            engine. (Christian Brabandt)
This commit is contained in:
Bram Moolenaar 2014-11-05 14:27:36 +01:00
parent 4f7e821f26
commit fda3729a06
14 changed files with 244 additions and 45 deletions

View File

@ -102,6 +102,9 @@ SRC_ALL = \
src/testdir/pythonx/topmodule/submodule/subsubmodule/subsubsubmodule.py \
src/testdir/python_after/*.py \
src/testdir/python_before/*.py \
src/testdir/bench*.in \
src/testdir/bench*.vim \
src/testdir/samples.*.txt \
src/proto.h \
src/proto/blowfish.pro \
src/proto/buffer.pro \

View File

@ -5626,6 +5626,10 @@ A jump table for the options with a short description can be found at |Q_op|.
Note that when using the NFA engine and the pattern contains something
that is not supported the pattern will not match. This is only useful
for debugging the regexp engine.
Using automatic selection enables Vim to switch the engine, if the
default engine becomes too costly. E.g., when the NFA engine uses too
many states. This should prevent Vim from hanging on a combination of
a complex pattern with long text.
*'relativenumber'* *'rnu'* *'norelativenumber'* *'nornu'*
'relativenumber' 'rnu' boolean (default off)

View File

@ -1879,6 +1879,9 @@ test check:
cd testdir; $(MAKE) -f Makefile $(GUI_TESTTARGET) VIMPROG=../$(VIMTARGET) $(GUI_TESTARG) SCRIPTSOURCE=../$(SCRIPTSOURCE)
$(MAKE) -f Makefile unittest
benchmark:
cd testdir; $(MAKE) -f Makefile benchmark VIMPROG=../$(VIMTARGET) SCRIPTSOURCE=../$(SCRIPTSOURCE)
unittesttargets:
$(MAKE) -f Makefile $(UNITTEST_TARGETS)

View File

@ -8011,13 +8011,10 @@ static regengine_T bt_regengine =
bt_regcomp,
bt_regfree,
bt_regexec_nl,
bt_regexec_multi
#ifdef DEBUG
,(char_u *)""
#endif
bt_regexec_multi,
(char_u *)""
};
#include "regexp_nfa.c"
static regengine_T nfa_regengine =
@ -8025,18 +8022,14 @@ static regengine_T nfa_regengine =
nfa_regcomp,
nfa_regfree,
nfa_regexec_nl,
nfa_regexec_multi
#ifdef DEBUG
,(char_u *)""
#endif
nfa_regexec_multi,
(char_u *)""
};
/* Which regexp engine to use? Needed for vim_regcomp().
* Must match with 'regexpengine'. */
static int regexp_engine = 0;
#define AUTOMATIC_ENGINE 0
#define BACKTRACKING_ENGINE 1
#define NFA_ENGINE 2
#ifdef DEBUG
static char_u regname[][30] = {
"AUTOMATIC Regexp Engine",
@ -8083,10 +8076,8 @@ vim_regcomp(expr_arg, re_flags)
regexp_engine = AUTOMATIC_ENGINE;
}
}
#ifdef DEBUG
bt_regengine.expr = expr;
nfa_regengine.expr = expr;
#endif
/*
* First try the NFA engine, unless backtracking was requested.
@ -8096,7 +8087,8 @@ vim_regcomp(expr_arg, re_flags)
else
prog = bt_regengine.regcomp(expr, re_flags);
if (prog == NULL) /* error compiling regexp with initial engine */
/* Check for error compiling regexp with initial engine. */
if (prog == NULL)
{
#ifdef BT_REGEXP_DEBUG_LOG
if (regexp_engine != BACKTRACKING_ENGINE) /* debugging log for NFA */
@ -8114,13 +8106,27 @@ vim_regcomp(expr_arg, re_flags)
}
#endif
/*
* If the NFA engine failed, the backtracking engine won't work either.
* If the NFA engine failed, try the backtracking engine.
* Disabled for now, both engines fail on the same patterns.
* Re-enable when regcomp() fails when the pattern would work better
* with the other engine.
*
if (regexp_engine == AUTOMATIC_ENGINE)
{
prog = bt_regengine.regcomp(expr, re_flags);
regexp_engine == BACKTRACKING_ENGINE;
}
*/
}
if (prog != NULL)
{
/* Store the info needed to call regcomp() again when the engine turns
* out to be very slow when executing it. */
prog->re_engine = regexp_engine;
prog->re_flags = re_flags;
}
return prog;
}
@ -8135,20 +8141,75 @@ vim_regfree(prog)
prog->engine->regfree(prog);
}
#ifdef FEAT_EVAL
static void report_re_switch __ARGS((char_u *pat));
static void
report_re_switch(pat)
char_u *pat;
{
if (p_verbose > 0)
{
verbose_enter();
MSG_PUTS(_("Switching to backtracking RE engine for pattern: "));
MSG_PUTS(pat);
verbose_leave();
}
}
#endif
static int vim_regexec_both __ARGS((regmatch_T *rmp, char_u *line, colnr_T col, int nl));
/*
* Match a regexp against a string.
* "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
* Uses curbuf for line count and 'iskeyword'.
* When "nl" is TRUE consider a "\n" in "line" to be a line break.
*
* Return TRUE if there is a match, FALSE if not.
*/
static int
vim_regexec_both(rmp, line, col, nl)
regmatch_T *rmp;
char_u *line; /* string to match against */
colnr_T col; /* column to start looking for match */
int nl;
{
int result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
/* NFA engine aborted because it's very slow. */
if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
&& result == NFA_TOO_EXPENSIVE)
{
int save_p_re = p_re;
int re_flags = rmp->regprog->re_flags;
char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
p_re = BACKTRACKING_ENGINE;
vim_regfree(rmp->regprog);
if (pat != NULL)
{
#ifdef FEAT_EVAL
report_re_switch(pat);
#endif
rmp->regprog = vim_regcomp(pat, re_flags);
if (rmp->regprog != NULL)
result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
vim_free(pat);
}
p_re = save_p_re;
}
return result;
}
int
vim_regexec(rmp, line, col)
regmatch_T *rmp;
char_u *line; /* string to match against */
colnr_T col; /* column to start looking for match */
regmatch_T *rmp;
char_u *line;
colnr_T col;
{
return rmp->regprog->engine->regexec_nl(rmp, line, col, FALSE);
return vim_regexec_both(rmp, line, col, FALSE);
}
#if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
@ -8158,11 +8219,11 @@ vim_regexec(rmp, line, col)
*/
int
vim_regexec_nl(rmp, line, col)
regmatch_T *rmp;
char_u *line;
colnr_T col;
regmatch_T *rmp;
char_u *line;
colnr_T col;
{
return rmp->regprog->engine->regexec_nl(rmp, line, col, TRUE);
return vim_regexec_both(rmp, line, col, TRUE);
}
#endif
@ -8183,5 +8244,32 @@ vim_regexec_multi(rmp, win, buf, lnum, col, tm)
colnr_T col; /* column to start looking for match */
proftime_T *tm; /* timeout limit or NULL */
{
return rmp->regprog->engine->regexec_multi(rmp, win, buf, lnum, col, tm);
int result = rmp->regprog->engine->regexec_multi(
rmp, win, buf, lnum, col, tm);
/* NFA engine aborted because it's very slow. */
if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
&& result == NFA_TOO_EXPENSIVE)
{
int save_p_re = p_re;
int re_flags = rmp->regprog->re_flags;
char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
p_re = BACKTRACKING_ENGINE;
vim_regfree(rmp->regprog);
if (pat != NULL)
{
#ifdef FEAT_EVAL
report_re_switch(pat);
#endif
rmp->regprog = vim_regcomp(pat, re_flags);
if (rmp->regprog != NULL)
result = rmp->regprog->engine->regexec_multi(
rmp, win, buf, lnum, col, tm);
vim_free(pat);
}
p_re = save_p_re;
}
return result;
}

View File

@ -27,6 +27,18 @@
*/
#define NFA_MAX_BRACES 20
/*
* In the NFA engine: how many states are allowed
*/
#define NFA_MAX_STATES 100000
#define NFA_TOO_EXPENSIVE -1
/* Which regexp engine to use? Needed for vim_regcomp().
* Must match with 'regexpengine'. */
#define AUTOMATIC_ENGINE 0
#define BACKTRACKING_ENGINE 1
#define NFA_ENGINE 2
typedef struct regengine regengine_T;
/*
@ -38,6 +50,8 @@ typedef struct regprog
{
regengine_T *engine;
unsigned regflags;
unsigned re_engine; /* automatic, backtracking or nfa engine */
unsigned re_flags; /* second argument for vim_regcomp() */
} regprog_T;
/*
@ -47,9 +61,11 @@ typedef struct regprog
*/
typedef struct
{
/* These two members implement regprog_T */
/* These four members implement regprog_T */
regengine_T *engine;
unsigned regflags;
unsigned re_engine;
unsigned re_flags; /* second argument for vim_regcomp() */
int regstart;
char_u reganch;
@ -81,9 +97,11 @@ struct nfa_state
*/
typedef struct
{
/* These two members implement regprog_T */
/* These three members implement regprog_T */
regengine_T *engine;
unsigned regflags;
unsigned re_engine;
unsigned re_flags; /* second argument for vim_regcomp() */
nfa_state_T *start; /* points into state[] */
@ -96,9 +114,7 @@ typedef struct
#ifdef FEAT_SYN_HL
int reghasz;
#endif
#ifdef DEBUG
char_u *pattern;
#endif
int nsubexp; /* number of () */
int nstate;
nfa_state_T state[1]; /* actually longer.. */
@ -151,9 +167,7 @@ struct regengine
void (*regfree)(regprog_T *);
int (*regexec_nl)(regmatch_T*, char_u*, colnr_T, int);
long (*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T, proftime_T*);
#ifdef DEBUG
char_u *expr;
#endif
};
#endif /* _REGEXP_H */

View File

@ -5522,6 +5522,13 @@ nfa_regmatch(prog, start, submatch, m)
nextlist->n = 0; /* clear nextlist */
nextlist->has_pim = FALSE;
++nfa_listid;
if (prog->re_engine == AUTOMATIC_ENGINE && nfa_listid >= NFA_MAX_STATES)
{
/* too many states, retry with old engine */
nfa_match = NFA_TOO_EXPENSIVE;
goto theend;
}
thislist->id = nfa_listid;
nextlist->id = nfa_listid + 1;
@ -5704,6 +5711,11 @@ nfa_regmatch(prog, start, submatch, m)
*/
result = recursive_regmatch(t->state, NULL, prog,
submatch, m, &listids);
if (result == NFA_TOO_EXPENSIVE)
{
nfa_match = result;
goto theend;
}
/* for \@! and \@<! it is a match when the result is
* FALSE */
@ -5817,6 +5829,11 @@ nfa_regmatch(prog, start, submatch, m)
/* First try matching the pattern. */
result = recursive_regmatch(t->state, NULL, prog,
submatch, m, &listids);
if (result == NFA_TOO_EXPENSIVE)
{
nfa_match = result;
goto theend;
}
if (result)
{
int bytelen;
@ -6760,6 +6777,7 @@ nfa_regtry(prog, col)
int i;
regsubs_T subs, m;
nfa_state_T *start = prog->start;
int result;
#ifdef ENABLE_LOG
FILE *f;
#endif
@ -6791,8 +6809,11 @@ nfa_regtry(prog, col)
clear_sub(&m.synt);
#endif
if (nfa_regmatch(prog, start, &subs, &m) == FALSE)
result = nfa_regmatch(prog, start, &subs, &m);
if (result == FALSE)
return 0;
else if (result == NFA_TOO_EXPENSIVE)
return result;
cleanup_subexpr();
if (REG_MULTI)
@ -6929,9 +6950,7 @@ nfa_regexec_both(line, startcol)
nfa_nsubexpr = prog->nsubexp;
nfa_listid = 1;
nfa_alt_listid = 2;
#ifdef DEBUG
nfa_regengine.expr = prog->pattern;
#endif
if (prog->reganch && col > 0)
return 0L;
@ -6979,9 +6998,7 @@ nfa_regexec_both(line, startcol)
retval = nfa_regtry(prog, col);
#ifdef DEBUG
nfa_regengine.expr = NULL;
#endif
theend:
return retval;
@ -7003,9 +7020,7 @@ nfa_regcomp(expr, re_flags)
if (expr == NULL)
return NULL;
#ifdef DEBUG
nfa_regengine.expr = expr;
#endif
init_class_tab();
@ -7082,10 +7097,8 @@ nfa_regcomp(expr, re_flags)
/* Remember whether this pattern has any \z specials in it. */
prog->reghasz = re_has_z;
#endif
#ifdef DEBUG
prog->pattern = vim_strsave(expr);
nfa_regengine.expr = NULL;
#endif
out:
vim_free(post_start);
@ -7099,9 +7112,7 @@ fail:
#ifdef ENABLE_LOG
nfa_postfix_dump(expr, FAIL);
#endif
#ifdef DEBUG
nfa_regengine.expr = NULL;
#endif
goto out;
}
@ -7115,9 +7126,7 @@ nfa_regfree(prog)
if (prog != NULL)
{
vim_free(((nfa_regprog_T *)prog)->match_text);
#ifdef DEBUG
vim_free(((nfa_regprog_T *)prog)->pattern);
#endif
vim_free(prog);
}
}

View File

@ -87,6 +87,7 @@ clean:
-if exist Xfind rd /s /q Xfind
-if exist viminfo del viminfo
-del test.log
-if exists benchmark.out del benchmark.out
.in.out:
-if exist $*.failed del $*.failed
@ -103,3 +104,11 @@ clean:
nolog:
-del test.log
benchmark:
bench_re_freeze.out
bench_re_freeze.out: bench_re_freeze.vim
-if exist benchmark.out del benchmark.out
$(VIMPROG) -u dos.vim -U NONE --noplugin $*.in
@IF EXIST benchmark.out ( type benchmark.out )

View File

@ -12,11 +12,13 @@ ifneq (sh.exe, $(SHELL))
DEL = rm -f
MV = mv
CP = cp
CAT = cat
DIRSLASH = /
else
DEL = del
MV = rename
CP = copy
CAT = type
DIRSLASH = \\
endif
@ -72,6 +74,8 @@ SCRIPTS32 = test50.out test70.out
SCRIPTS_GUI = test16.out
SCRIPTS_BENCH = bench_re_freeze.out
.SUFFIXES: .in .out
vimall: fixff $(SCRIPTS16) $(SCRIPTS) $(SCRIPTS_GUI) $(SCRIPTS32)
@ -80,6 +84,8 @@ vimall: fixff $(SCRIPTS16) $(SCRIPTS) $(SCRIPTS_GUI) $(SCRIPTS32)
nongui: fixff $(SCRIPTS16) $(SCRIPTS)
echo ALL DONE
benchmark: $(SCRIPTS_BENCH)
small:
echo ALL DONE
@ -114,3 +120,8 @@ clean:
-$(DEL) X*
-$(DEL) test.ok
-$(DEL) viminfo
bench_re_freeze.out: bench_re_freeze.vim
-$(DEL) benchmark.out
$(VIMPROG) -u dos.vim -U NONE --noplugin $*.in
$(CAT) benchmark.out

View File

@ -50,6 +50,8 @@ SCRIPTS = test1.out test3.out test4.out test5.out test6.out \
test_signs.out \
test_utf8.out
SCRIPTS_BENCH = bench_re_freeze.out
.SUFFIXES: .in .out
all: /tmp $(SCRIPTS)
@ -57,6 +59,8 @@ all: /tmp $(SCRIPTS)
$(SCRIPTS): $(VIMPROG)
benchmark: $(SCRIPTS_BENCH)
clean:
-rm -rf *.out Xdotest test.ok tiny.vim small.vim mbyte.vim viminfo
@ -75,3 +79,10 @@ clean:
# Create a directory for temp files
/tmp:
-mkdir /tmp
bench_re_freeze.out: bench_re_freeze.vim
-del $*.failed test.ok benchmark.out
copy $*.ok test.ok
$(VIMPROG) -u os2.vim --noplugin -s dotest.in $*.in
type benchmark.out

View File

@ -48,12 +48,16 @@ SCRIPTS = test1.out test2.out test3.out test4.out test5.out test6.out \
SCRIPTS_GUI = test16.out
SCRIPTS_BENCH = bench_re_freeze.out
.SUFFIXES: .in .out
nongui: nolog $(SCRIPTS) report
gui: nolog $(SCRIPTS) $(SCRIPTS_GUI) report
benchmark: $(SCRIPTS_BENCH)
report:
@echo
@echo 'Test results:'
@ -65,7 +69,7 @@ report:
$(SCRIPTS) $(SCRIPTS_GUI): $(VIMPROG)
RM_ON_RUN = test.out X* viminfo
RM_ON_START = tiny.vim small.vim mbyte.vim mzscheme.vim lua.vim test.ok
RM_ON_START = tiny.vim small.vim mbyte.vim mzscheme.vim lua.vim test.ok benchmark.out
RUN_VIM = VIMRUNTIME=$(SCRIPTSOURCE); export VIMRUNTIME; $(VALGRIND) $(VIMPROG) -u unix.vim -U NONE --noplugin -s dotest.in
clean:
@ -120,5 +124,14 @@ test49.out: test49.vim
test60.out: test60.vim
bench_re_freeze.out: bench_re_freeze.vim
-rm -rf benchmark.out $(RM_ON_RUN)
# Sleep a moment to avoid that the xterm title is messed up.
# 200 msec is sufficient, but only modern sleep supports a fraction of
# a second, fall back to a second if it fails.
@-/bin/sh -c "sleep .2 > /dev/null 2>&1 || sleep 1"
-$(RUN_VIM) $*.in
@/bin/sh -c "if test -f benchmark.out; then cat benchmark.out; fi"
nolog:
-rm -f test.log

View File

@ -0,0 +1,13 @@
Test for Benchmarking RE engine
STARTTEST
:so small.vim
:if !has("reltime") | qa! | endif
:set nocp cpo&vim
:so bench_re_freeze.vim
:call Measure('samples/re.freeze.txt', '\s\+\%#\@<!$', '+5')
:/^" Benchmark/,$w! benchmark.out
:qa!
ENDTEST
" Benchmark_results:

View File

@ -0,0 +1,13 @@
"Test for benchmarking the RE engine
so small.vim
if !has("reltime") | finish | endif
func! Measure(file, pattern, arg)
for re in range(3)
let sstart=reltime()
let cmd=printf("../vim -u NONE -N --cmd ':set re=%d'".
\ " -c 'call search(\"%s\", \"\", \"\", 10000)' -c ':q!' %s", re, escape(a:pattern, '\\'), empty(a:arg) ? '' : a:arg)
call system(cmd. ' '. a:file)
$put =printf('file: %s, re: %d, time: %s', a:file, re, reltimestr(reltime(sstart)))
endfor
endfunc

File diff suppressed because one or more lines are too long

View File

@ -741,6 +741,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
497,
/**/
496,
/**/