0
0
mirror of https://github.com/vim/vim.git synced 2025-09-26 04:04:07 -04:00

updated for version 7.3.1037

Problem:    Look-behind matching is very slow on long lines.
Solution:   Add a byte limit to how far back an attempt is made.
This commit is contained in:
Bram Moolenaar
2013-05-29 18:45:11 +02:00
parent f9e56b2b03
commit 75eb1610e1
5 changed files with 123 additions and 13 deletions

View File

@@ -701,6 +701,7 @@ static void regmbc __ARGS((int c));
# define CASEMBC(x) # define CASEMBC(x)
#endif #endif
static void reginsert __ARGS((int, char_u *)); static void reginsert __ARGS((int, char_u *));
static void reginsert_nr __ARGS((int op, long val, char_u *opnd));
static void reginsert_limits __ARGS((int, long, long, char_u *)); static void reginsert_limits __ARGS((int, long, long, char_u *));
static char_u *re_put_long __ARGS((char_u *pr, long_u val)); static char_u *re_put_long __ARGS((char_u *pr, long_u val));
static int read_limits __ARGS((long *, long *)); static int read_limits __ARGS((long *, long *));
@@ -1781,7 +1782,9 @@ regpiece(flagp)
case Magic('@'): case Magic('@'):
{ {
int lop = END; int lop = END;
int nr;
nr = getdecchrs();
switch (no_Magic(getchr())) switch (no_Magic(getchr()))
{ {
case '=': lop = MATCH; break; /* \@= */ case '=': lop = MATCH; break; /* \@= */
@@ -1803,6 +1806,13 @@ regpiece(flagp)
*flagp |= HASLOOKBH; *flagp |= HASLOOKBH;
} }
regtail(ret, regnode(END)); /* operand ends */ regtail(ret, regnode(END)); /* operand ends */
if (lop == BEHIND || lop == NOBEHIND)
{
if (nr < 0)
nr = 0; /* no limit is same as zero limit */
reginsert_nr(lop, nr, ret);
}
else
reginsert(lop, ret); reginsert(lop, ret);
break; break;
} }
@@ -2778,6 +2788,38 @@ reginsert(op, opnd)
*place = NUL; *place = NUL;
} }
/*
* Insert an operator in front of already-emitted operand.
* Add a number to the operator.
*/
static void
reginsert_nr(op, val, opnd)
int op;
long val;
char_u *opnd;
{
char_u *src;
char_u *dst;
char_u *place;
if (regcode == JUST_CALC_SIZE)
{
regsize += 7;
return;
}
src = regcode;
regcode += 7;
dst = regcode;
while (src > opnd)
*--dst = *--src;
place = opnd; /* Op node, where operand used to be. */
*place++ = op;
*place++ = NUL;
*place++ = NUL;
place = re_put_long(place, (long_u)val);
}
/* /*
* Insert an operator in front of already-emitted operand. * Insert an operator in front of already-emitted operand.
* The operator has the given limit values as operands. Also set next pointer. * The operator has the given limit values as operands. Also set next pointer.
@@ -3182,7 +3224,7 @@ gethexchrs(maxinputlen)
} }
/* /*
* get and return the value of the decimal string immediately after the * Get and return the value of the decimal string immediately after the
* current position. Return -1 for invalid. Consumes all digits. * current position. Return -1 for invalid. Consumes all digits.
*/ */
static int static int
@@ -3200,6 +3242,7 @@ getdecchrs()
nr *= 10; nr *= 10;
nr += c - '0'; nr += c - '0';
++regparse; ++regparse;
curchr = -1; /* no longer valid */
} }
if (i == 0) if (i == 0)
@@ -5432,7 +5475,7 @@ regmatch(scan)
/* save the position after the found match for next */ /* save the position after the found match for next */
reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos); reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
/* start looking for a match with operand at the current /* Start looking for a match with operand at the current
* position. Go back one character until we find the * position. Go back one character until we find the
* result, hitting the start of the line or the previous * result, hitting the start of the line or the previous
* line (for multi-line matching). * line (for multi-line matching).
@@ -5444,7 +5487,7 @@ regmatch(scan)
rp->rs_state = RS_BEHIND2; rp->rs_state = RS_BEHIND2;
reg_restore(&rp->rs_un.regsave, &backpos); reg_restore(&rp->rs_un.regsave, &backpos);
scan = OPERAND(rp->rs_scan); scan = OPERAND(rp->rs_scan) + 4;
} }
break; break;
@@ -5472,9 +5515,12 @@ regmatch(scan)
} }
else else
{ {
long limit;
/* No match or a match that doesn't end where we want it: Go /* No match or a match that doesn't end where we want it: Go
* back one character. May go to previous line once. */ * back one character. May go to previous line once. */
no = OK; no = OK;
limit = OPERAND_MIN(rp->rs_scan);
if (REG_MULTI) if (REG_MULTI)
{ {
if (rp->rs_un.regsave.rs_u.pos.col == 0) if (rp->rs_un.regsave.rs_u.pos.col == 0)
@@ -5493,6 +5539,7 @@ regmatch(scan)
} }
} }
else else
{
#ifdef FEAT_MBYTE #ifdef FEAT_MBYTE
if (has_mbyte) if (has_mbyte)
rp->rs_un.regsave.rs_u.pos.col -= rp->rs_un.regsave.rs_u.pos.col -=
@@ -5501,19 +5548,32 @@ regmatch(scan)
else else
#endif #endif
--rp->rs_un.regsave.rs_u.pos.col; --rp->rs_un.regsave.rs_u.pos.col;
if (limit > 0
&& ((rp->rs_un.regsave.rs_u.pos.lnum
< behind_pos.rs_u.pos.lnum
? (colnr_T)STRLEN(regline)
: behind_pos.rs_u.pos.col)
- rp->rs_un.regsave.rs_u.pos.col > limit))
no = FAIL;
}
} }
else else
{ {
if (rp->rs_un.regsave.rs_u.ptr == regline) if (rp->rs_un.regsave.rs_u.ptr == regline)
no = FAIL; no = FAIL;
else else
--rp->rs_un.regsave.rs_u.ptr; {
mb_ptr_back(regline, rp->rs_un.regsave.rs_u.ptr);
if (limit > 0 && (long)(behind_pos.rs_u.ptr
- rp->rs_un.regsave.rs_u.ptr) > limit)
no = FAIL;
}
} }
if (no == OK) if (no == OK)
{ {
/* Advanced, prepare for finding match again. */ /* Advanced, prepare for finding match again. */
reg_restore(&rp->rs_un.regsave, &backpos); reg_restore(&rp->rs_un.regsave, &backpos);
scan = OPERAND(rp->rs_scan); scan = OPERAND(rp->rs_scan) + 4;
if (status == RA_MATCH) if (status == RA_MATCH)
{ {
/* We did match, so subexpr may have been changed, /* We did match, so subexpr may have been changed,
@@ -7773,7 +7833,7 @@ static int regexp_engine = 0;
#ifdef DEBUG #ifdef DEBUG
static char_u regname[][30] = { static char_u regname[][30] = {
"AUTOMATIC Regexp Engine", "AUTOMATIC Regexp Engine",
"BACKTACKING Regexp Engine", "BACKTRACKING Regexp Engine",
"NFA Regexp Engine" "NFA Regexp Engine"
}; };
#endif #endif

View File

@@ -1331,6 +1331,16 @@ nfa_regpiece()
case '=': case '=':
EMIT(NFA_PREV_ATOM_NO_WIDTH); EMIT(NFA_PREV_ATOM_NO_WIDTH);
break; break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '!': case '!':
case '<': case '<':
case '>': case '>':
@@ -3817,7 +3827,9 @@ nfa_regmatch(start, submatch, m)
* because recursive calls should only start in the first position. * because recursive calls should only start in the first position.
* Also don't start a match past the first line. */ * Also don't start a match past the first line. */
if (nfa_match == FALSE && start->c == NFA_MOPEN + 0 if (nfa_match == FALSE && start->c == NFA_MOPEN + 0
&& reglnum == 0 && clen != 0) && reglnum == 0 && clen != 0
&& (ireg_maxcol == 0
|| (colnr_T)(reginput - regline) < ireg_maxcol))
{ {
#ifdef ENABLE_LOG #ifdef ENABLE_LOG
fprintf(log_fd, "(---) STARTSTATE\n"); fprintf(log_fd, "(---) STARTSTATE\n");

View File

@@ -336,6 +336,14 @@ STARTTEST
:"call add(tl, [2, '\(\i\+\) \1', 'xgoo goox', 'goo goo', 'goo']) :"call add(tl, [2, '\(\i\+\) \1', 'xgoo goox', 'goo goo', 'goo'])
:call add(tl, [2, '\(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9', 'xabcddefghiabcddefghix', 'abcddefghiabcddefghi', 'a', 'b', 'c', 'dd', 'e', 'f', 'g', 'h', 'i']) :call add(tl, [2, '\(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9', 'xabcddefghiabcddefghix', 'abcddefghiabcddefghi', 'a', 'b', 'c', 'dd', 'e', 'f', 'g', 'h', 'i'])
:" :"
:"""" Look-behind with limit
:call add(tl, [0, '<\@<=span.', 'xxspanxx<spanyyy', 'spany'])
:call add(tl, [0, '<\@1<=span.', 'xxspanxx<spanyyy', 'spany'])
:call add(tl, [0, '<\@2<=span.', 'xxspanxx<spanyyy', 'spany'])
:call add(tl, [0, '\(<<\)\@<=span.', 'xxspanxxxx<spanxx<<spanyyy', 'spany', '<<'])
:call add(tl, [0, '\(<<\)\@1<=span.', 'xxspanxxxx<spanxx<<spanyyy'])
:call add(tl, [0, '\(<<\)\@2<=span.', 'xxspanxxxx<spanxx<<spanyyy', 'spany', '<<'])
:"
:"""" Run the tests :"""" Run the tests
:" :"
:" :"
@@ -406,6 +414,12 @@ Gop:"
y$Gop:" y$Gop:"
:" :"
:" :"
:" Check a pattern with a look beind crossing a line boundary
/^Behind:
/\(<\_[xy]\+\)\@3<=start
:.yank
Gop:"
:"
:/\%#=1^Results/,$wq! test.out :/\%#=1^Results/,$wq! test.out
ENDTEST ENDTEST
@@ -423,4 +437,12 @@ ghi
xjk xjk
lmn lmn
Behind:
asdfasd<yyy
xxstart1
asdfasd<yy
xxxxstart2
asdfasd<yy
xxxstart3
Results of test64: Results of test64:

View File

@@ -719,6 +719,18 @@ OK 2 - \(\i\+\) \1
OK 0 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9 OK 0 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
OK 1 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9 OK 1 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
OK 2 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9 OK 2 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
OK 0 - <\@<=span.
OK 1 - <\@<=span.
OK 0 - <\@1<=span.
OK 1 - <\@1<=span.
OK 0 - <\@2<=span.
OK 1 - <\@2<=span.
OK 0 - \(<<\)\@<=span.
OK 1 - \(<<\)\@<=span.
OK 0 - \(<<\)\@1<=span.
OK 1 - \(<<\)\@1<=span.
OK 0 - \(<<\)\@2<=span.
OK 1 - \(<<\)\@2<=span.
192.168.0.1 192.168.0.1
192.168.0.1 192.168.0.1
192.168.0.1 192.168.0.1
@@ -726,3 +738,5 @@ OK 2 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
<T="5">Ta 5</Title> <T="5">Ta 5</Title>
<T="7">Ac 7</Title> <T="7">Ac 7</Title>
ghi ghi
xxxstart3

View File

@@ -728,6 +728,8 @@ static char *(features[]) =
static int included_patches[] = static int included_patches[] =
{ /* Add new patch number below this line */ { /* Add new patch number below this line */
/**/
1037,
/**/ /**/
1036, 1036,
/**/ /**/