mirror of
https://github.com/vim/vim.git
synced 2025-09-26 04:04:07 -04:00
updated for version 7.3.1037
Problem: Look-behind matching is very slow on long lines. Solution: Add a byte limit to how far back an attempt is made.
This commit is contained in:
72
src/regexp.c
72
src/regexp.c
@@ -701,6 +701,7 @@ static void regmbc __ARGS((int c));
|
|||||||
# define CASEMBC(x)
|
# define CASEMBC(x)
|
||||||
#endif
|
#endif
|
||||||
static void reginsert __ARGS((int, char_u *));
|
static void reginsert __ARGS((int, char_u *));
|
||||||
|
static void reginsert_nr __ARGS((int op, long val, char_u *opnd));
|
||||||
static void reginsert_limits __ARGS((int, long, long, char_u *));
|
static void reginsert_limits __ARGS((int, long, long, char_u *));
|
||||||
static char_u *re_put_long __ARGS((char_u *pr, long_u val));
|
static char_u *re_put_long __ARGS((char_u *pr, long_u val));
|
||||||
static int read_limits __ARGS((long *, long *));
|
static int read_limits __ARGS((long *, long *));
|
||||||
@@ -1781,7 +1782,9 @@ regpiece(flagp)
|
|||||||
case Magic('@'):
|
case Magic('@'):
|
||||||
{
|
{
|
||||||
int lop = END;
|
int lop = END;
|
||||||
|
int nr;
|
||||||
|
|
||||||
|
nr = getdecchrs();
|
||||||
switch (no_Magic(getchr()))
|
switch (no_Magic(getchr()))
|
||||||
{
|
{
|
||||||
case '=': lop = MATCH; break; /* \@= */
|
case '=': lop = MATCH; break; /* \@= */
|
||||||
@@ -1803,6 +1806,13 @@ regpiece(flagp)
|
|||||||
*flagp |= HASLOOKBH;
|
*flagp |= HASLOOKBH;
|
||||||
}
|
}
|
||||||
regtail(ret, regnode(END)); /* operand ends */
|
regtail(ret, regnode(END)); /* operand ends */
|
||||||
|
if (lop == BEHIND || lop == NOBEHIND)
|
||||||
|
{
|
||||||
|
if (nr < 0)
|
||||||
|
nr = 0; /* no limit is same as zero limit */
|
||||||
|
reginsert_nr(lop, nr, ret);
|
||||||
|
}
|
||||||
|
else
|
||||||
reginsert(lop, ret);
|
reginsert(lop, ret);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -2778,6 +2788,38 @@ reginsert(op, opnd)
|
|||||||
*place = NUL;
|
*place = NUL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Insert an operator in front of already-emitted operand.
|
||||||
|
* Add a number to the operator.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
reginsert_nr(op, val, opnd)
|
||||||
|
int op;
|
||||||
|
long val;
|
||||||
|
char_u *opnd;
|
||||||
|
{
|
||||||
|
char_u *src;
|
||||||
|
char_u *dst;
|
||||||
|
char_u *place;
|
||||||
|
|
||||||
|
if (regcode == JUST_CALC_SIZE)
|
||||||
|
{
|
||||||
|
regsize += 7;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
src = regcode;
|
||||||
|
regcode += 7;
|
||||||
|
dst = regcode;
|
||||||
|
while (src > opnd)
|
||||||
|
*--dst = *--src;
|
||||||
|
|
||||||
|
place = opnd; /* Op node, where operand used to be. */
|
||||||
|
*place++ = op;
|
||||||
|
*place++ = NUL;
|
||||||
|
*place++ = NUL;
|
||||||
|
place = re_put_long(place, (long_u)val);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Insert an operator in front of already-emitted operand.
|
* Insert an operator in front of already-emitted operand.
|
||||||
* The operator has the given limit values as operands. Also set next pointer.
|
* The operator has the given limit values as operands. Also set next pointer.
|
||||||
@@ -3182,7 +3224,7 @@ gethexchrs(maxinputlen)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* get and return the value of the decimal string immediately after the
|
* Get and return the value of the decimal string immediately after the
|
||||||
* current position. Return -1 for invalid. Consumes all digits.
|
* current position. Return -1 for invalid. Consumes all digits.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
@@ -3200,6 +3242,7 @@ getdecchrs()
|
|||||||
nr *= 10;
|
nr *= 10;
|
||||||
nr += c - '0';
|
nr += c - '0';
|
||||||
++regparse;
|
++regparse;
|
||||||
|
curchr = -1; /* no longer valid */
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i == 0)
|
if (i == 0)
|
||||||
@@ -5432,7 +5475,7 @@ regmatch(scan)
|
|||||||
/* save the position after the found match for next */
|
/* save the position after the found match for next */
|
||||||
reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
|
reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
|
||||||
|
|
||||||
/* start looking for a match with operand at the current
|
/* Start looking for a match with operand at the current
|
||||||
* position. Go back one character until we find the
|
* position. Go back one character until we find the
|
||||||
* result, hitting the start of the line or the previous
|
* result, hitting the start of the line or the previous
|
||||||
* line (for multi-line matching).
|
* line (for multi-line matching).
|
||||||
@@ -5444,7 +5487,7 @@ regmatch(scan)
|
|||||||
rp->rs_state = RS_BEHIND2;
|
rp->rs_state = RS_BEHIND2;
|
||||||
|
|
||||||
reg_restore(&rp->rs_un.regsave, &backpos);
|
reg_restore(&rp->rs_un.regsave, &backpos);
|
||||||
scan = OPERAND(rp->rs_scan);
|
scan = OPERAND(rp->rs_scan) + 4;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@@ -5472,9 +5515,12 @@ regmatch(scan)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
long limit;
|
||||||
|
|
||||||
/* No match or a match that doesn't end where we want it: Go
|
/* No match or a match that doesn't end where we want it: Go
|
||||||
* back one character. May go to previous line once. */
|
* back one character. May go to previous line once. */
|
||||||
no = OK;
|
no = OK;
|
||||||
|
limit = OPERAND_MIN(rp->rs_scan);
|
||||||
if (REG_MULTI)
|
if (REG_MULTI)
|
||||||
{
|
{
|
||||||
if (rp->rs_un.regsave.rs_u.pos.col == 0)
|
if (rp->rs_un.regsave.rs_u.pos.col == 0)
|
||||||
@@ -5493,6 +5539,7 @@ regmatch(scan)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
#ifdef FEAT_MBYTE
|
#ifdef FEAT_MBYTE
|
||||||
if (has_mbyte)
|
if (has_mbyte)
|
||||||
rp->rs_un.regsave.rs_u.pos.col -=
|
rp->rs_un.regsave.rs_u.pos.col -=
|
||||||
@@ -5501,19 +5548,32 @@ regmatch(scan)
|
|||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
--rp->rs_un.regsave.rs_u.pos.col;
|
--rp->rs_un.regsave.rs_u.pos.col;
|
||||||
|
if (limit > 0
|
||||||
|
&& ((rp->rs_un.regsave.rs_u.pos.lnum
|
||||||
|
< behind_pos.rs_u.pos.lnum
|
||||||
|
? (colnr_T)STRLEN(regline)
|
||||||
|
: behind_pos.rs_u.pos.col)
|
||||||
|
- rp->rs_un.regsave.rs_u.pos.col > limit))
|
||||||
|
no = FAIL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (rp->rs_un.regsave.rs_u.ptr == regline)
|
if (rp->rs_un.regsave.rs_u.ptr == regline)
|
||||||
no = FAIL;
|
no = FAIL;
|
||||||
else
|
else
|
||||||
--rp->rs_un.regsave.rs_u.ptr;
|
{
|
||||||
|
mb_ptr_back(regline, rp->rs_un.regsave.rs_u.ptr);
|
||||||
|
if (limit > 0 && (long)(behind_pos.rs_u.ptr
|
||||||
|
- rp->rs_un.regsave.rs_u.ptr) > limit)
|
||||||
|
no = FAIL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (no == OK)
|
if (no == OK)
|
||||||
{
|
{
|
||||||
/* Advanced, prepare for finding match again. */
|
/* Advanced, prepare for finding match again. */
|
||||||
reg_restore(&rp->rs_un.regsave, &backpos);
|
reg_restore(&rp->rs_un.regsave, &backpos);
|
||||||
scan = OPERAND(rp->rs_scan);
|
scan = OPERAND(rp->rs_scan) + 4;
|
||||||
if (status == RA_MATCH)
|
if (status == RA_MATCH)
|
||||||
{
|
{
|
||||||
/* We did match, so subexpr may have been changed,
|
/* We did match, so subexpr may have been changed,
|
||||||
@@ -7773,7 +7833,7 @@ static int regexp_engine = 0;
|
|||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
static char_u regname[][30] = {
|
static char_u regname[][30] = {
|
||||||
"AUTOMATIC Regexp Engine",
|
"AUTOMATIC Regexp Engine",
|
||||||
"BACKTACKING Regexp Engine",
|
"BACKTRACKING Regexp Engine",
|
||||||
"NFA Regexp Engine"
|
"NFA Regexp Engine"
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
@@ -1331,6 +1331,16 @@ nfa_regpiece()
|
|||||||
case '=':
|
case '=':
|
||||||
EMIT(NFA_PREV_ATOM_NO_WIDTH);
|
EMIT(NFA_PREV_ATOM_NO_WIDTH);
|
||||||
break;
|
break;
|
||||||
|
case '0':
|
||||||
|
case '1':
|
||||||
|
case '2':
|
||||||
|
case '3':
|
||||||
|
case '4':
|
||||||
|
case '5':
|
||||||
|
case '6':
|
||||||
|
case '7':
|
||||||
|
case '8':
|
||||||
|
case '9':
|
||||||
case '!':
|
case '!':
|
||||||
case '<':
|
case '<':
|
||||||
case '>':
|
case '>':
|
||||||
@@ -3817,7 +3827,9 @@ nfa_regmatch(start, submatch, m)
|
|||||||
* because recursive calls should only start in the first position.
|
* because recursive calls should only start in the first position.
|
||||||
* Also don't start a match past the first line. */
|
* Also don't start a match past the first line. */
|
||||||
if (nfa_match == FALSE && start->c == NFA_MOPEN + 0
|
if (nfa_match == FALSE && start->c == NFA_MOPEN + 0
|
||||||
&& reglnum == 0 && clen != 0)
|
&& reglnum == 0 && clen != 0
|
||||||
|
&& (ireg_maxcol == 0
|
||||||
|
|| (colnr_T)(reginput - regline) < ireg_maxcol))
|
||||||
{
|
{
|
||||||
#ifdef ENABLE_LOG
|
#ifdef ENABLE_LOG
|
||||||
fprintf(log_fd, "(---) STARTSTATE\n");
|
fprintf(log_fd, "(---) STARTSTATE\n");
|
||||||
|
@@ -336,6 +336,14 @@ STARTTEST
|
|||||||
:"call add(tl, [2, '\(\i\+\) \1', 'xgoo goox', 'goo goo', 'goo'])
|
:"call add(tl, [2, '\(\i\+\) \1', 'xgoo goox', 'goo goo', 'goo'])
|
||||||
:call add(tl, [2, '\(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9', 'xabcddefghiabcddefghix', 'abcddefghiabcddefghi', 'a', 'b', 'c', 'dd', 'e', 'f', 'g', 'h', 'i'])
|
:call add(tl, [2, '\(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9', 'xabcddefghiabcddefghix', 'abcddefghiabcddefghi', 'a', 'b', 'c', 'dd', 'e', 'f', 'g', 'h', 'i'])
|
||||||
:"
|
:"
|
||||||
|
:"""" Look-behind with limit
|
||||||
|
:call add(tl, [0, '<\@<=span.', 'xxspanxx<spanyyy', 'spany'])
|
||||||
|
:call add(tl, [0, '<\@1<=span.', 'xxspanxx<spanyyy', 'spany'])
|
||||||
|
:call add(tl, [0, '<\@2<=span.', 'xxspanxx<spanyyy', 'spany'])
|
||||||
|
:call add(tl, [0, '\(<<\)\@<=span.', 'xxspanxxxx<spanxx<<spanyyy', 'spany', '<<'])
|
||||||
|
:call add(tl, [0, '\(<<\)\@1<=span.', 'xxspanxxxx<spanxx<<spanyyy'])
|
||||||
|
:call add(tl, [0, '\(<<\)\@2<=span.', 'xxspanxxxx<spanxx<<spanyyy', 'spany', '<<'])
|
||||||
|
:"
|
||||||
:"""" Run the tests
|
:"""" Run the tests
|
||||||
:"
|
:"
|
||||||
:"
|
:"
|
||||||
@@ -406,6 +414,12 @@ Gop:"
|
|||||||
y$Gop:"
|
y$Gop:"
|
||||||
:"
|
:"
|
||||||
:"
|
:"
|
||||||
|
:" Check a pattern with a look beind crossing a line boundary
|
||||||
|
/^Behind:
|
||||||
|
/\(<\_[xy]\+\)\@3<=start
|
||||||
|
:.yank
|
||||||
|
Gop:"
|
||||||
|
:"
|
||||||
:/\%#=1^Results/,$wq! test.out
|
:/\%#=1^Results/,$wq! test.out
|
||||||
ENDTEST
|
ENDTEST
|
||||||
|
|
||||||
@@ -423,4 +437,12 @@ ghi
|
|||||||
xjk
|
xjk
|
||||||
lmn
|
lmn
|
||||||
|
|
||||||
|
Behind:
|
||||||
|
asdfasd<yyy
|
||||||
|
xxstart1
|
||||||
|
asdfasd<yy
|
||||||
|
xxxxstart2
|
||||||
|
asdfasd<yy
|
||||||
|
xxxstart3
|
||||||
|
|
||||||
Results of test64:
|
Results of test64:
|
||||||
|
@@ -719,6 +719,18 @@ OK 2 - \(\i\+\) \1
|
|||||||
OK 0 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
|
OK 0 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
|
||||||
OK 1 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
|
OK 1 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
|
||||||
OK 2 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
|
OK 2 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
|
||||||
|
OK 0 - <\@<=span.
|
||||||
|
OK 1 - <\@<=span.
|
||||||
|
OK 0 - <\@1<=span.
|
||||||
|
OK 1 - <\@1<=span.
|
||||||
|
OK 0 - <\@2<=span.
|
||||||
|
OK 1 - <\@2<=span.
|
||||||
|
OK 0 - \(<<\)\@<=span.
|
||||||
|
OK 1 - \(<<\)\@<=span.
|
||||||
|
OK 0 - \(<<\)\@1<=span.
|
||||||
|
OK 1 - \(<<\)\@1<=span.
|
||||||
|
OK 0 - \(<<\)\@2<=span.
|
||||||
|
OK 1 - \(<<\)\@2<=span.
|
||||||
192.168.0.1
|
192.168.0.1
|
||||||
192.168.0.1
|
192.168.0.1
|
||||||
192.168.0.1
|
192.168.0.1
|
||||||
@@ -726,3 +738,5 @@ OK 2 - \(a\)\(b\)\(c\)\(dd\)\(e\)\(f\)\(g\)\(h\)\(i\)\1\2\3\4\5\6\7\8\9
|
|||||||
<T="5">Ta 5</Title>
|
<T="5">Ta 5</Title>
|
||||||
<T="7">Ac 7</Title>
|
<T="7">Ac 7</Title>
|
||||||
ghi
|
ghi
|
||||||
|
|
||||||
|
xxxstart3
|
||||||
|
@@ -728,6 +728,8 @@ static char *(features[]) =
|
|||||||
|
|
||||||
static int included_patches[] =
|
static int included_patches[] =
|
||||||
{ /* Add new patch number below this line */
|
{ /* Add new patch number below this line */
|
||||||
|
/**/
|
||||||
|
1037,
|
||||||
/**/
|
/**/
|
||||||
1036,
|
1036,
|
||||||
/**/
|
/**/
|
||||||
|
Reference in New Issue
Block a user