mirror of
https://github.com/vim/vim.git
synced 2025-07-26 11:04:33 -04:00
updated for version 7.3.1150
Problem: New regexpengine: Slow when a look-behind match does not have a width specified. Solution: Try to compute the maximum width.
This commit is contained in:
parent
473de61b04
commit
e7766eeaa5
263
src/regexp_nfa.c
263
src/regexp_nfa.c
@ -38,19 +38,19 @@ enum
|
|||||||
NFA_START_COLL, /* [abc] start */
|
NFA_START_COLL, /* [abc] start */
|
||||||
NFA_END_COLL, /* [abc] end */
|
NFA_END_COLL, /* [abc] end */
|
||||||
NFA_START_NEG_COLL, /* [^abc] start */
|
NFA_START_NEG_COLL, /* [^abc] start */
|
||||||
NFA_END_NEG_COLL, /* [^abc] end (only used in postfix) */
|
NFA_END_NEG_COLL, /* [^abc] end (postfix only) */
|
||||||
NFA_RANGE, /* range of the two previous items (only
|
NFA_RANGE, /* range of the two previous items
|
||||||
* used in postfix) */
|
* (postfix only) */
|
||||||
NFA_RANGE_MIN, /* low end of a range */
|
NFA_RANGE_MIN, /* low end of a range */
|
||||||
NFA_RANGE_MAX, /* high end of a range */
|
NFA_RANGE_MAX, /* high end of a range */
|
||||||
|
|
||||||
NFA_CONCAT, /* concatenate two previous items (only
|
NFA_CONCAT, /* concatenate two previous items (postfix
|
||||||
* used in postfix) */
|
* only) */
|
||||||
NFA_OR,
|
NFA_OR, /* \| (postfix only) */
|
||||||
NFA_STAR, /* greedy * */
|
NFA_STAR, /* greedy * (posfix only) */
|
||||||
NFA_STAR_NONGREEDY, /* non-greedy * */
|
NFA_STAR_NONGREEDY, /* non-greedy * (postfix only) */
|
||||||
NFA_QUEST, /* greedy \? */
|
NFA_QUEST, /* greedy \? (postfix only) */
|
||||||
NFA_QUEST_NONGREEDY, /* non-greedy \? */
|
NFA_QUEST_NONGREEDY, /* non-greedy \? (postfix only) */
|
||||||
|
|
||||||
NFA_BOL, /* ^ Begin line */
|
NFA_BOL, /* ^ Begin line */
|
||||||
NFA_EOL, /* $ End line */
|
NFA_EOL, /* $ End line */
|
||||||
@ -153,8 +153,6 @@ enum
|
|||||||
|
|
||||||
/* NFA_FIRST_NL */
|
/* NFA_FIRST_NL */
|
||||||
NFA_ANY, /* Match any one character. */
|
NFA_ANY, /* Match any one character. */
|
||||||
NFA_ANYOF, /* Match any character in this string. */
|
|
||||||
NFA_ANYBUT, /* Match any character not in this string. */
|
|
||||||
NFA_IDENT, /* Match identifier char */
|
NFA_IDENT, /* Match identifier char */
|
||||||
NFA_SIDENT, /* Match identifier char but no digit */
|
NFA_SIDENT, /* Match identifier char but no digit */
|
||||||
NFA_KWORD, /* Match keyword char */
|
NFA_KWORD, /* Match keyword char */
|
||||||
@ -496,8 +494,8 @@ nfa_get_regstart(start, depth)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Figure out if the NFA state list contains just literal text and nothing
|
* Figure out if the NFA state list contains just literal text and nothing
|
||||||
* else. If so return a string with what must match after regstart.
|
* else. If so return a string in allocated memory with what must match after
|
||||||
* Otherwise return NULL.
|
* regstart. Otherwise return NULL.
|
||||||
*/
|
*/
|
||||||
static char_u *
|
static char_u *
|
||||||
nfa_get_match_text(start)
|
nfa_get_match_text(start)
|
||||||
@ -2577,6 +2575,225 @@ st_pop(p, stack)
|
|||||||
return **p;
|
return **p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Estimate the maximum byte length of anything matching "state".
|
||||||
|
* When unknown or unlimited return -1.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
nfa_max_width(startstate, depth)
|
||||||
|
nfa_state_T *startstate;
|
||||||
|
int depth;
|
||||||
|
{
|
||||||
|
int l, r;
|
||||||
|
nfa_state_T *state = startstate;
|
||||||
|
int len = 0;
|
||||||
|
|
||||||
|
/* detect looping in a NFA_SPLIT */
|
||||||
|
if (depth > 4)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
switch (state->c)
|
||||||
|
{
|
||||||
|
case NFA_END_INVISIBLE:
|
||||||
|
case NFA_END_INVISIBLE_NEG:
|
||||||
|
/* the end, return what we have */
|
||||||
|
return len;
|
||||||
|
|
||||||
|
case NFA_SPLIT:
|
||||||
|
/* two alternatives, use the maximum */
|
||||||
|
l = nfa_max_width(state->out, depth + 1);
|
||||||
|
r = nfa_max_width(state->out1, depth + 1);
|
||||||
|
if (l < 0 || r < 0)
|
||||||
|
return -1;
|
||||||
|
return len + (l > r ? l : r);
|
||||||
|
|
||||||
|
case NFA_ANY:
|
||||||
|
case NFA_START_COLL:
|
||||||
|
case NFA_START_NEG_COLL:
|
||||||
|
/* matches some character, including composing chars */
|
||||||
|
#ifdef FEAT_MBYTE
|
||||||
|
if (enc_utf8)
|
||||||
|
len += MB_MAXBYTES;
|
||||||
|
else if (has_mbyte)
|
||||||
|
len += 2;
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
++len;
|
||||||
|
if (state->c != NFA_ANY)
|
||||||
|
{
|
||||||
|
/* skip over the characters */
|
||||||
|
state = state->out1->out;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case NFA_DIGIT:
|
||||||
|
case NFA_WHITE:
|
||||||
|
case NFA_HEX:
|
||||||
|
case NFA_OCTAL:
|
||||||
|
/* ascii */
|
||||||
|
++len;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case NFA_IDENT:
|
||||||
|
case NFA_SIDENT:
|
||||||
|
case NFA_KWORD:
|
||||||
|
case NFA_SKWORD:
|
||||||
|
case NFA_FNAME:
|
||||||
|
case NFA_SFNAME:
|
||||||
|
case NFA_PRINT:
|
||||||
|
case NFA_SPRINT:
|
||||||
|
case NFA_NWHITE:
|
||||||
|
case NFA_NDIGIT:
|
||||||
|
case NFA_NHEX:
|
||||||
|
case NFA_NOCTAL:
|
||||||
|
case NFA_WORD:
|
||||||
|
case NFA_NWORD:
|
||||||
|
case NFA_HEAD:
|
||||||
|
case NFA_NHEAD:
|
||||||
|
case NFA_ALPHA:
|
||||||
|
case NFA_NALPHA:
|
||||||
|
case NFA_LOWER:
|
||||||
|
case NFA_NLOWER:
|
||||||
|
case NFA_UPPER:
|
||||||
|
case NFA_NUPPER:
|
||||||
|
/* possibly non-ascii */
|
||||||
|
#ifdef FEAT_MBYTE
|
||||||
|
if (has_mbyte)
|
||||||
|
len += 3;
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
++len;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case NFA_START_INVISIBLE:
|
||||||
|
case NFA_START_INVISIBLE_NEG:
|
||||||
|
case NFA_START_INVISIBLE_BEFORE:
|
||||||
|
case NFA_START_INVISIBLE_BEFORE_NEG:
|
||||||
|
/* zero-width, out1 points to the END state */
|
||||||
|
state = state->out1->out;
|
||||||
|
continue;
|
||||||
|
|
||||||
|
case NFA_BACKREF1:
|
||||||
|
case NFA_BACKREF2:
|
||||||
|
case NFA_BACKREF3:
|
||||||
|
case NFA_BACKREF4:
|
||||||
|
case NFA_BACKREF5:
|
||||||
|
case NFA_BACKREF6:
|
||||||
|
case NFA_BACKREF7:
|
||||||
|
case NFA_BACKREF8:
|
||||||
|
case NFA_BACKREF9:
|
||||||
|
#ifdef FEAT_SYN_HL
|
||||||
|
case NFA_ZREF1:
|
||||||
|
case NFA_ZREF2:
|
||||||
|
case NFA_ZREF3:
|
||||||
|
case NFA_ZREF4:
|
||||||
|
case NFA_ZREF5:
|
||||||
|
case NFA_ZREF6:
|
||||||
|
case NFA_ZREF7:
|
||||||
|
case NFA_ZREF8:
|
||||||
|
case NFA_ZREF9:
|
||||||
|
#endif
|
||||||
|
case NFA_NEWL:
|
||||||
|
case NFA_SKIP:
|
||||||
|
/* unknown width */
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
case NFA_BOL:
|
||||||
|
case NFA_EOL:
|
||||||
|
case NFA_BOF:
|
||||||
|
case NFA_EOF:
|
||||||
|
case NFA_BOW:
|
||||||
|
case NFA_EOW:
|
||||||
|
case NFA_MOPEN:
|
||||||
|
case NFA_MOPEN1:
|
||||||
|
case NFA_MOPEN2:
|
||||||
|
case NFA_MOPEN3:
|
||||||
|
case NFA_MOPEN4:
|
||||||
|
case NFA_MOPEN5:
|
||||||
|
case NFA_MOPEN6:
|
||||||
|
case NFA_MOPEN7:
|
||||||
|
case NFA_MOPEN8:
|
||||||
|
case NFA_MOPEN9:
|
||||||
|
#ifdef FEAT_SYN_HL
|
||||||
|
case NFA_ZOPEN:
|
||||||
|
case NFA_ZOPEN1:
|
||||||
|
case NFA_ZOPEN2:
|
||||||
|
case NFA_ZOPEN3:
|
||||||
|
case NFA_ZOPEN4:
|
||||||
|
case NFA_ZOPEN5:
|
||||||
|
case NFA_ZOPEN6:
|
||||||
|
case NFA_ZOPEN7:
|
||||||
|
case NFA_ZOPEN8:
|
||||||
|
case NFA_ZOPEN9:
|
||||||
|
case NFA_ZCLOSE:
|
||||||
|
case NFA_ZCLOSE1:
|
||||||
|
case NFA_ZCLOSE2:
|
||||||
|
case NFA_ZCLOSE3:
|
||||||
|
case NFA_ZCLOSE4:
|
||||||
|
case NFA_ZCLOSE5:
|
||||||
|
case NFA_ZCLOSE6:
|
||||||
|
case NFA_ZCLOSE7:
|
||||||
|
case NFA_ZCLOSE8:
|
||||||
|
case NFA_ZCLOSE9:
|
||||||
|
#endif
|
||||||
|
case NFA_MCLOSE:
|
||||||
|
case NFA_MCLOSE1:
|
||||||
|
case NFA_MCLOSE2:
|
||||||
|
case NFA_MCLOSE3:
|
||||||
|
case NFA_MCLOSE4:
|
||||||
|
case NFA_MCLOSE5:
|
||||||
|
case NFA_MCLOSE6:
|
||||||
|
case NFA_MCLOSE7:
|
||||||
|
case NFA_MCLOSE8:
|
||||||
|
case NFA_MCLOSE9:
|
||||||
|
case NFA_NOPEN:
|
||||||
|
case NFA_NCLOSE:
|
||||||
|
|
||||||
|
case NFA_LNUM_GT:
|
||||||
|
case NFA_LNUM_LT:
|
||||||
|
case NFA_COL_GT:
|
||||||
|
case NFA_COL_LT:
|
||||||
|
case NFA_VCOL_GT:
|
||||||
|
case NFA_VCOL_LT:
|
||||||
|
case NFA_MARK_GT:
|
||||||
|
case NFA_MARK_LT:
|
||||||
|
case NFA_VISUAL:
|
||||||
|
case NFA_LNUM:
|
||||||
|
case NFA_CURSOR:
|
||||||
|
case NFA_COL:
|
||||||
|
case NFA_VCOL:
|
||||||
|
case NFA_MARK:
|
||||||
|
|
||||||
|
case NFA_ZSTART:
|
||||||
|
case NFA_ZEND:
|
||||||
|
case NFA_OPT_CHARS:
|
||||||
|
case NFA_SKIP_CHAR:
|
||||||
|
case NFA_START_PATTERN:
|
||||||
|
case NFA_END_PATTERN:
|
||||||
|
case NFA_COMPOSING:
|
||||||
|
case NFA_END_COMPOSING:
|
||||||
|
/* zero-width */
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
if (state->c < 0)
|
||||||
|
/* don't know what this is */
|
||||||
|
return -1;
|
||||||
|
/* normal character */
|
||||||
|
len += MB_CHAR2LEN(state->c);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* normal way to continue */
|
||||||
|
state = state->out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* unrecognized */
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* Convert a postfix form into its equivalent NFA.
|
* Convert a postfix form into its equivalent NFA.
|
||||||
* Return the NFA start state on success, NULL otherwise.
|
* Return the NFA start state on success, NULL otherwise.
|
||||||
@ -2856,8 +3073,6 @@ post2nfa(postfix, end, nfa_calc_size)
|
|||||||
s = alloc_state(start_state, e.start, s1);
|
s = alloc_state(start_state, e.start, s1);
|
||||||
if (s == NULL)
|
if (s == NULL)
|
||||||
goto theend;
|
goto theend;
|
||||||
if (before)
|
|
||||||
s->val = n; /* store the count */
|
|
||||||
if (pattern)
|
if (pattern)
|
||||||
{
|
{
|
||||||
/* NFA_ZEND -> NFA_END_PATTERN -> NFA_SKIP -> what follows. */
|
/* NFA_ZEND -> NFA_END_PATTERN -> NFA_SKIP -> what follows. */
|
||||||
@ -2871,6 +3086,14 @@ post2nfa(postfix, end, nfa_calc_size)
|
|||||||
{
|
{
|
||||||
patch(e.out, s1);
|
patch(e.out, s1);
|
||||||
PUSH(frag(s, list1(&s1->out)));
|
PUSH(frag(s, list1(&s1->out)));
|
||||||
|
if (before)
|
||||||
|
{
|
||||||
|
if (n <= 0)
|
||||||
|
/* See if we can guess the maximum width, it avoids a
|
||||||
|
* lot of pointless tries. */
|
||||||
|
n = nfa_max_width(e.start, 0);
|
||||||
|
s->val = n; /* store the count */
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -4088,9 +4311,8 @@ recursive_regmatch(state, prog, submatch, m, listids)
|
|||||||
|
|
||||||
/* Go back the specified number of bytes, or as far as the
|
/* Go back the specified number of bytes, or as far as the
|
||||||
* start of the previous line, to try matching "\@<=" or
|
* start of the previous line, to try matching "\@<=" or
|
||||||
* not matching "\@<!".
|
* not matching "\@<!". This is very ineffecient, limit the number of
|
||||||
* TODO: This is very inefficient! Would be better to
|
* bytes if possible. */
|
||||||
* first check for a match with what follows. */
|
|
||||||
if (state->val <= 0)
|
if (state->val <= 0)
|
||||||
{
|
{
|
||||||
if (REG_MULTI)
|
if (REG_MULTI)
|
||||||
@ -4386,7 +4608,7 @@ skip_to_start(c, colp)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Check for a match with match_text.
|
* Check for a match with match_text.
|
||||||
* Called after skip_to_start() has find regstart.
|
* Called after skip_to_start() has found regstart.
|
||||||
* Returns zero for no match, 1 for a match.
|
* Returns zero for no match, 1 for a match.
|
||||||
*/
|
*/
|
||||||
static long
|
static long
|
||||||
@ -4736,7 +4958,6 @@ nfa_regmatch(prog, start, submatch, m)
|
|||||||
#ifdef FEAT_SYN_HL
|
#ifdef FEAT_SYN_HL
|
||||||
|| (cout >= NFA_ZCLOSE && cout <= NFA_ZCLOSE9)
|
|| (cout >= NFA_ZCLOSE && cout <= NFA_ZCLOSE9)
|
||||||
#endif
|
#endif
|
||||||
|| cout == NFA_NCLOSE
|
|
||||||
|| t->pim != NULL
|
|| t->pim != NULL
|
||||||
|| (t->state->c != NFA_START_INVISIBLE_BEFORE
|
|| (t->state->c != NFA_START_INVISIBLE_BEFORE
|
||||||
&& t->state->c != NFA_START_INVISIBLE_BEFORE_NEG
|
&& t->state->c != NFA_START_INVISIBLE_BEFORE_NEG
|
||||||
|
@ -728,6 +728,8 @@ static char *(features[]) =
|
|||||||
|
|
||||||
static int included_patches[] =
|
static int included_patches[] =
|
||||||
{ /* Add new patch number below this line */
|
{ /* Add new patch number below this line */
|
||||||
|
/**/
|
||||||
|
1150,
|
||||||
/**/
|
/**/
|
||||||
1149,
|
1149,
|
||||||
/**/
|
/**/
|
||||||
|
Loading…
x
Reference in New Issue
Block a user