forked from aniani/vim
updated for version 7.3.1073
Problem: New regexp engine may run out of states. Solution: Allocate states dynamically. Also make the test report errors.
This commit is contained in:
@@ -233,7 +233,7 @@ static long nfa_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf,
|
|||||||
|
|
||||||
/* helper functions used when doing re2post() ... regatom() parsing */
|
/* helper functions used when doing re2post() ... regatom() parsing */
|
||||||
#define EMIT(c) do { \
|
#define EMIT(c) do { \
|
||||||
if (post_ptr >= post_end) \
|
if (post_ptr >= post_end && realloc_post_list() == FAIL) \
|
||||||
return FAIL; \
|
return FAIL; \
|
||||||
*post_ptr++ = c; \
|
*post_ptr++ = c; \
|
||||||
} while (0)
|
} while (0)
|
||||||
@@ -256,11 +256,11 @@ nfa_regcomp_start(expr, re_flags)
|
|||||||
nstate_max = (int)(STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER;
|
nstate_max = (int)(STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER;
|
||||||
|
|
||||||
/* Some items blow up in size, such as [A-z]. Add more space for that.
|
/* Some items blow up in size, such as [A-z]. Add more space for that.
|
||||||
* TODO: some patterns may still fail. */
|
* When it is still not enough realloc_post_list() will be used. */
|
||||||
nstate_max += 1000;
|
nstate_max += 1000;
|
||||||
|
|
||||||
/* Size for postfix representation of expr. */
|
/* Size for postfix representation of expr. */
|
||||||
postfix_size = sizeof(*post_start) * nstate_max;
|
postfix_size = sizeof(int) * nstate_max;
|
||||||
|
|
||||||
post_start = (int *)lalloc(postfix_size, TRUE);
|
post_start = (int *)lalloc(postfix_size, TRUE);
|
||||||
if (post_start == NULL)
|
if (post_start == NULL)
|
||||||
@@ -276,6 +276,31 @@ nfa_regcomp_start(expr, re_flags)
|
|||||||
return OK;
|
return OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate more space for post_start. Called when
|
||||||
|
* running above the estimated number of states.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
realloc_post_list()
|
||||||
|
{
|
||||||
|
int nstate_max = post_end - post_start;
|
||||||
|
int new_max = nstate_max + 1000;
|
||||||
|
int *new_start;
|
||||||
|
int *old_start;
|
||||||
|
|
||||||
|
new_start = (int *)lalloc(new_max * sizeof(int), TRUE);
|
||||||
|
if (new_start == NULL)
|
||||||
|
return FAIL;
|
||||||
|
mch_memmove(new_start, post_start, nstate_max * sizeof(int));
|
||||||
|
vim_memset(new_start + nstate_max, 0, 1000 * sizeof(int));
|
||||||
|
old_start = post_start;
|
||||||
|
post_start = new_start;
|
||||||
|
post_ptr = new_start + (post_ptr - old_start);
|
||||||
|
post_end = post_start + new_max;
|
||||||
|
vim_free(old_start);
|
||||||
|
return OK;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Search between "start" and "end" and try to recognize a
|
* Search between "start" and "end" and try to recognize a
|
||||||
* character class in expanded form. For example [0-9].
|
* character class in expanded form. For example [0-9].
|
||||||
@@ -1306,7 +1331,8 @@ nfa_regpiece()
|
|||||||
int greedy = TRUE; /* Braces are prefixed with '-' ? */
|
int greedy = TRUE; /* Braces are prefixed with '-' ? */
|
||||||
char_u *old_regparse, *new_regparse;
|
char_u *old_regparse, *new_regparse;
|
||||||
int c2;
|
int c2;
|
||||||
int *old_post_ptr, *my_post_start;
|
int old_post_pos;
|
||||||
|
int my_post_start;
|
||||||
int old_regnpar;
|
int old_regnpar;
|
||||||
int quest;
|
int quest;
|
||||||
|
|
||||||
@@ -1317,7 +1343,7 @@ nfa_regpiece()
|
|||||||
* <atom>{m,n} is next */
|
* <atom>{m,n} is next */
|
||||||
old_regnpar = regnpar;
|
old_regnpar = regnpar;
|
||||||
/* store current pos in the postfix form, for \{m,n} involving 0s */
|
/* store current pos in the postfix form, for \{m,n} involving 0s */
|
||||||
my_post_start = post_ptr;
|
my_post_start = (int)(post_ptr - post_start);
|
||||||
|
|
||||||
ret = nfa_regatom();
|
ret = nfa_regatom();
|
||||||
if (ret == FAIL)
|
if (ret == FAIL)
|
||||||
@@ -1430,14 +1456,14 @@ nfa_regpiece()
|
|||||||
if (maxval == 0)
|
if (maxval == 0)
|
||||||
{
|
{
|
||||||
/* Ignore result of previous call to nfa_regatom() */
|
/* Ignore result of previous call to nfa_regatom() */
|
||||||
post_ptr = my_post_start;
|
post_ptr = post_start + my_post_start;
|
||||||
/* NFA_SKIP_CHAR has 0-length and works everywhere */
|
/* NFA_SKIP_CHAR has 0-length and works everywhere */
|
||||||
EMIT(NFA_SKIP_CHAR);
|
EMIT(NFA_SKIP_CHAR);
|
||||||
return OK;
|
return OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Ignore previous call to nfa_regatom() */
|
/* Ignore previous call to nfa_regatom() */
|
||||||
post_ptr = my_post_start;
|
post_ptr = post_start + my_post_start;
|
||||||
/* Save pos after the repeated atom and the \{} */
|
/* Save pos after the repeated atom and the \{} */
|
||||||
new_regparse = regparse;
|
new_regparse = regparse;
|
||||||
|
|
||||||
@@ -1449,13 +1475,13 @@ nfa_regpiece()
|
|||||||
curchr = -1;
|
curchr = -1;
|
||||||
/* Restore count of parenthesis */
|
/* Restore count of parenthesis */
|
||||||
regnpar = old_regnpar;
|
regnpar = old_regnpar;
|
||||||
old_post_ptr = post_ptr;
|
old_post_pos = (int)(post_ptr - post_start);
|
||||||
if (nfa_regatom() == FAIL)
|
if (nfa_regatom() == FAIL)
|
||||||
return FAIL;
|
return FAIL;
|
||||||
/* after "minval" times, atoms are optional */
|
/* after "minval" times, atoms are optional */
|
||||||
if (i + 1 > minval)
|
if (i + 1 > minval)
|
||||||
EMIT(quest);
|
EMIT(quest);
|
||||||
if (old_post_ptr != my_post_start)
|
if (old_post_pos != my_post_start)
|
||||||
EMIT(NFA_CONCAT);
|
EMIT(NFA_CONCAT);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1572,9 +1598,9 @@ nfa_regconcat()
|
|||||||
nfa_regbranch()
|
nfa_regbranch()
|
||||||
{
|
{
|
||||||
int ch;
|
int ch;
|
||||||
int *old_post_ptr;
|
int old_post_pos;
|
||||||
|
|
||||||
old_post_ptr = post_ptr;
|
old_post_pos = (int)(post_ptr - post_start);
|
||||||
|
|
||||||
/* First branch, possibly the only one */
|
/* First branch, possibly the only one */
|
||||||
if (nfa_regconcat() == FAIL)
|
if (nfa_regconcat() == FAIL)
|
||||||
@@ -1587,18 +1613,18 @@ nfa_regbranch()
|
|||||||
skipchr();
|
skipchr();
|
||||||
EMIT(NFA_NOPEN);
|
EMIT(NFA_NOPEN);
|
||||||
EMIT(NFA_PREV_ATOM_NO_WIDTH);
|
EMIT(NFA_PREV_ATOM_NO_WIDTH);
|
||||||
old_post_ptr = post_ptr;
|
old_post_pos = (int)(post_ptr - post_start);
|
||||||
if (nfa_regconcat() == FAIL)
|
if (nfa_regconcat() == FAIL)
|
||||||
return FAIL;
|
return FAIL;
|
||||||
/* if concat is empty, skip a input char. But do emit a node */
|
/* if concat is empty, skip a input char. But do emit a node */
|
||||||
if (old_post_ptr == post_ptr)
|
if (old_post_pos == (int)(post_ptr - post_start))
|
||||||
EMIT(NFA_SKIP_CHAR);
|
EMIT(NFA_SKIP_CHAR);
|
||||||
EMIT(NFA_CONCAT);
|
EMIT(NFA_CONCAT);
|
||||||
ch = peekchr();
|
ch = peekchr();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Even if a branch is empty, emit one node for it */
|
/* Even if a branch is empty, emit one node for it */
|
||||||
if (old_post_ptr == post_ptr)
|
if (old_post_pos == (int)(post_ptr - post_start))
|
||||||
EMIT(NFA_SKIP_CHAR);
|
EMIT(NFA_SKIP_CHAR);
|
||||||
|
|
||||||
return OK;
|
return OK;
|
||||||
|
@@ -348,6 +348,9 @@ STARTTEST
|
|||||||
:call add(tl, [2, '\_[^8-9]\+', "asfi\n9888", "asfi\n"])
|
:call add(tl, [2, '\_[^8-9]\+', "asfi\n9888", "asfi\n"])
|
||||||
:call add(tl, [2, '\_[^a]\+', "asfi\n9888", "sfi\n9888"])
|
:call add(tl, [2, '\_[^a]\+', "asfi\n9888", "sfi\n9888"])
|
||||||
:"
|
:"
|
||||||
|
:"""" Requiring lots of states.
|
||||||
|
:call add(tl, [0, '[0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}', " 12345678-1234-1234-1234-123456789012 ", "12345678-1234-1234-1234-123456789012", "1234-"])
|
||||||
|
:"
|
||||||
:"
|
:"
|
||||||
:"""" Run the tests
|
:"""" Run the tests
|
||||||
:"
|
:"
|
||||||
@@ -361,7 +364,11 @@ STARTTEST
|
|||||||
: continue
|
: continue
|
||||||
: endif
|
: endif
|
||||||
: let ®expengine = engine
|
: let ®expengine = engine
|
||||||
: let l = matchlist(text, pat)
|
: try
|
||||||
|
: let l = matchlist(text, pat)
|
||||||
|
: catch
|
||||||
|
: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"'
|
||||||
|
: endtry
|
||||||
:" check the match itself
|
:" check the match itself
|
||||||
: if len(l) == 0 && len(t) > matchidx
|
: if len(l) == 0 && len(t) > matchidx
|
||||||
: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"'
|
: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"'
|
||||||
|
@@ -740,6 +740,8 @@ OK 2 - \_[^8-9]\+
|
|||||||
OK 0 - \_[^a]\+
|
OK 0 - \_[^a]\+
|
||||||
OK 1 - \_[^a]\+
|
OK 1 - \_[^a]\+
|
||||||
OK 2 - \_[^a]\+
|
OK 2 - \_[^a]\+
|
||||||
|
OK 0 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}
|
||||||
|
OK 1 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}
|
||||||
192.168.0.1
|
192.168.0.1
|
||||||
192.168.0.1
|
192.168.0.1
|
||||||
192.168.0.1
|
192.168.0.1
|
||||||
|
@@ -85,7 +85,11 @@ STARTTEST
|
|||||||
: continue
|
: continue
|
||||||
: endif
|
: endif
|
||||||
: let ®expengine = engine
|
: let ®expengine = engine
|
||||||
: let l = matchlist(text, pat)
|
: try
|
||||||
|
: let l = matchlist(text, pat)
|
||||||
|
: catch
|
||||||
|
: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"'
|
||||||
|
: endtry
|
||||||
:" check the match itself
|
:" check the match itself
|
||||||
: if len(l) == 0 && len(t) > matchidx
|
: if len(l) == 0 && len(t) > matchidx
|
||||||
: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"'
|
: $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"'
|
||||||
|
@@ -728,6 +728,8 @@ static char *(features[]) =
|
|||||||
|
|
||||||
static int included_patches[] =
|
static int included_patches[] =
|
||||||
{ /* Add new patch number below this line */
|
{ /* Add new patch number below this line */
|
||||||
|
/**/
|
||||||
|
1073,
|
||||||
/**/
|
/**/
|
||||||
1072,
|
1072,
|
||||||
/**/
|
/**/
|
||||||
|
Reference in New Issue
Block a user