0
0
mirror of https://github.com/vim/vim.git synced 2025-09-23 03:43:49 -04:00

updated for version 7.3.1017

Problem:    Zero width match changes length of match.
Solution:   For a zero width match put new states in the current position in
            the state list.
This commit is contained in:
Bram Moolenaar
2013-05-25 20:19:50 +02:00
parent f47ca63dbc
commit 4b41706477
5 changed files with 99 additions and 37 deletions

View File

@@ -29,8 +29,6 @@
typedef struct regengine regengine_T; typedef struct regengine regengine_T;
typedef struct thread thread_T;
/* /*
* Structure returned by vim_regcomp() to pass on to vim_regexec(). * Structure returned by vim_regcomp() to pass on to vim_regexec().
* This is the general structure. For the actual matcher, two specific * This is the general structure. For the actual matcher, two specific

View File

@@ -2471,24 +2471,27 @@ theend:
* NFA execution code. * NFA execution code.
****************************************************************/ ****************************************************************/
/* thread_T contains runtime information of a NFA state */ /* nfa_thread_T contains runtime information of a NFA state */
struct thread typedef struct
{ {
nfa_state_T *state; nfa_state_T *state;
regsub_T sub; /* submatch info */ regsub_T sub; /* Submatch info. TODO: expensive! */
}; } nfa_thread_T;
typedef struct typedef struct
{ {
thread_T *t; nfa_thread_T *t;
int n; int n;
} List; } nfa_list_T;
static void addstate __ARGS((List *l, nfa_state_T *state, regsub_T *m, int off, int lid, int *match)); static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int off, int lid, int *match));
static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int lid, int *match, int *ip));
static void static void
addstate(l, state, m, off, lid, match) addstate(l, state, m, off, lid, match)
List *l; /* runtime state list */ nfa_list_T *l; /* runtime state list */
nfa_state_T *state; /* state to update */ nfa_state_T *state; /* state to update */
regsub_T *m; /* pointers to subexpressions */ regsub_T *m; /* pointers to subexpressions */
int off; /* byte offset, when -1 go to next line */ int off; /* byte offset, when -1 go to next line */
@@ -2497,7 +2500,7 @@ addstate(l, state, m, off, lid, match)
{ {
regsub_T save; regsub_T save;
int subidx = 0; int subidx = 0;
thread_T *lastthread; nfa_thread_T *lastthread;
if (l == NULL || state == NULL) if (l == NULL || state == NULL)
return; return;
@@ -2533,7 +2536,7 @@ addstate(l, state, m, off, lid, match)
state->lastlist = lid; state->lastlist = lid;
lastthread = &l->t[l->n++]; lastthread = &l->t[l->n++];
lastthread->state = state; lastthread->state = state;
lastthread->sub = *m; lastthread->sub = *m; /* TODO: expensive! */
} }
} }
@@ -2697,6 +2700,54 @@ addstate(l, state, m, off, lid, match)
} }
} }
/*
* Like addstate(), but the new state(s) are put at position "*ip".
* Used for zero-width matches, next state to use is the added one.
* This makes sure the order of states to be tried does not change, which
* matters for alternatives.
*/
static void
addstate_here(l, state, m, lid, matchp, ip)
nfa_list_T *l; /* runtime state list */
nfa_state_T *state; /* state to update */
regsub_T *m; /* pointers to subexpressions */
int lid;
int *matchp; /* found match? */
int *ip;
{
int tlen = l->n;
int count;
int i = *ip;
/* first add the state(s) at the end, so that we know how many there are */
addstate(l, state, m, 0, lid, matchp);
/* when "*ip" was at the end of the list, nothing to do */
if (i + 1 == tlen)
return;
/* re-order to put the new state at the current position */
count = l->n - tlen;
if (count > 1)
{
/* make space for new states, then move them from the
* end to the current position */
mch_memmove(&(l->t[i + count]),
&(l->t[i + 1]),
sizeof(nfa_thread_T) * (l->n - i - 1));
mch_memmove(&(l->t[i]),
&(l->t[l->n - 1]),
sizeof(nfa_thread_T) * count);
}
else
{
/* overwrite the current state */
l->t[i] = l->t[l->n - 1];
}
--l->n;
*ip = i - 1;
}
/* /*
* Check character class "class" against current character c. * Check character class "class" against current character c.
*/ */
@@ -2872,17 +2923,17 @@ nfa_regmatch(start, submatch, m)
int match = FALSE; int match = FALSE;
int flag = 0; int flag = 0;
int old_reglnum = -1; int old_reglnum = -1;
int go_to_nextline; int go_to_nextline = FALSE;
thread_T *t; nfa_thread_T *t;
char_u *old_reginput = NULL; char_u *old_reginput = NULL;
char_u *old_regline = NULL; char_u *old_regline = NULL;
List list[3]; nfa_list_T list[3];
List *listtbl[2][2]; nfa_list_T *listtbl[2][2];
List *ll; nfa_list_T *ll;
int listid = 1; int listid = 1;
List *thislist; nfa_list_T *thislist;
List *nextlist; nfa_list_T *nextlist;
List *neglist; nfa_list_T *neglist;
int *listids = NULL; int *listids = NULL;
int j = 0; int j = 0;
#ifdef NFA_REGEXP_DEBUG_LOG #ifdef NFA_REGEXP_DEBUG_LOG
@@ -2896,10 +2947,10 @@ nfa_regmatch(start, submatch, m)
#endif #endif
/* Allocate memory for the lists of nodes */ /* Allocate memory for the lists of nodes */
size = (nstate + 1) * sizeof(thread_T); size = (nstate + 1) * sizeof(nfa_thread_T);
list[0].t = (thread_T *)lalloc(size, TRUE); list[0].t = (nfa_thread_T *)lalloc(size, TRUE);
list[1].t = (thread_T *)lalloc(size, TRUE); list[1].t = (nfa_thread_T *)lalloc(size, TRUE);
list[2].t = (thread_T *)lalloc(size, TRUE); list[2].t = (nfa_thread_T *)lalloc(size, TRUE);
if (list[0].t == NULL || list[1].t == NULL || list[2].t == NULL) if (list[0].t == NULL || list[1].t == NULL || list[2].t == NULL)
goto theend; goto theend;
vim_memset(list[0].t, 0, size); vim_memset(list[0].t, 0, size);
@@ -3056,8 +3107,8 @@ nfa_regmatch(start, submatch, m)
* nfa_regmatch(). Submatches are stored in *m, and used in * nfa_regmatch(). Submatches are stored in *m, and used in
* the parent call. */ * the parent call. */
if (start->c == NFA_MOPEN + 0) if (start->c == NFA_MOPEN + 0)
addstate(thislist, t->state->out, &t->sub, 0, listid, addstate_here(thislist, t->state->out, &t->sub, listid,
&match); &match, &i);
else else
{ {
*m = t->sub; *m = t->sub;
@@ -3130,8 +3181,8 @@ nfa_regmatch(start, submatch, m)
t->sub.end[j] = m->end[j]; t->sub.end[j] = m->end[j];
} }
/* t->state->out1 is the corresponding END_INVISIBLE node */ /* t->state->out1 is the corresponding END_INVISIBLE node */
addstate(thislist, t->state->out1->out, &t->sub, 0, listid, addstate_here(thislist, t->state->out1->out, &t->sub,
&match); listid, &match, &i);
} }
else else
{ {
@@ -3142,14 +3193,14 @@ nfa_regmatch(start, submatch, m)
case NFA_BOL: case NFA_BOL:
if (reginput == regline) if (reginput == regline)
addstate(thislist, t->state->out, &t->sub, 0, listid, addstate_here(thislist, t->state->out, &t->sub, listid,
&match); &match, &i);
break; break;
case NFA_EOL: case NFA_EOL:
if (c == NUL) if (c == NUL)
addstate(thislist, t->state->out, &t->sub, 0, listid, addstate_here(thislist, t->state->out, &t->sub, listid,
&match); &match, &i);
break; break;
case NFA_BOW: case NFA_BOW:
@@ -3176,8 +3227,8 @@ nfa_regmatch(start, submatch, m)
&& vim_iswordc_buf(reginput[-1], reg_buf))) && vim_iswordc_buf(reginput[-1], reg_buf)))
bow = FALSE; bow = FALSE;
if (bow) if (bow)
addstate(thislist, t->state->out, &t->sub, 0, listid, addstate_here(thislist, t->state->out, &t->sub, listid,
&match); &match, &i);
break; break;
} }
@@ -3204,8 +3255,8 @@ nfa_regmatch(start, submatch, m)
|| (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf))) || (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf)))
eow = FALSE; eow = FALSE;
if (eow) if (eow)
addstate(thislist, t->state->out, &t->sub, 0, listid, addstate_here(thislist, t->state->out, &t->sub, listid,
&match); &match, &i);
break; break;
} }

View File

@@ -270,6 +270,7 @@ STARTTEST
:call add(tl, ['aa \zsax', ' ax']) " must match before \zs :call add(tl, ['aa \zsax', ' ax']) " must match before \zs
:call add(tl, ['abc \zsmatch\ze abc', 'abc abc abc match abc abc', 'match']) :call add(tl, ['abc \zsmatch\ze abc', 'abc abc abc match abc abc', 'match'])
:call add(tl, ['\v(a \zsif .*){2}', 'a if then a if last', 'if last', 'a if last']) :call add(tl, ['\v(a \zsif .*){2}', 'a if then a if last', 'if last', 'a if last'])
:call add(tl, ['\>\zs.', 'aword. ', '.'])
:"""" Tests for \@ features :"""" Tests for \@ features
:call add(tl, ['abc\@=', 'abc', 'ab']) :call add(tl, ['abc\@=', 'abc', 'ab'])
@@ -299,6 +300,12 @@ STARTTEST
:call add(tl, ['\%u0020', 'yes no', ' ']) :call add(tl, ['\%u0020', 'yes no', ' '])
:call add(tl, ['\%U00000020', 'yes no', ' ']) :call add(tl, ['\%U00000020', 'yes no', ' '])
:"""" Alternatives, must use first longest match
:call add(tl, ['goo\|go', 'google', 'goo'])
:call add(tl, ['\<goo\|\<go', 'google', 'goo'])
:call add(tl, ['\<goo\|go', 'google', 'goo'])
:"""" Run the tests :"""" Run the tests
:" :"

View File

@@ -209,6 +209,7 @@ OK - abc\zsdd
OK - aa \zsax OK - aa \zsax
OK - abc \zsmatch\ze abc OK - abc \zsmatch\ze abc
OK - \v(a \zsif .*){2} OK - \v(a \zsif .*){2}
OK - \>\zs.
OK - abc\@= OK - abc\@=
OK - abc\@=cd OK - abc\@=cd
OK - abc\@= OK - abc\@=
@@ -231,4 +232,7 @@ OK - \%o40
OK - \%x20 OK - \%x20
OK - \%u0020 OK - \%u0020
OK - \%U00000020 OK - \%U00000020
OK - goo\|go
OK - \<goo\|\<go
OK - \<goo\|go
192.168.0.1 192.168.0.1

View File

@@ -728,6 +728,8 @@ static char *(features[]) =
static int included_patches[] = static int included_patches[] =
{ /* Add new patch number below this line */ { /* Add new patch number below this line */
/**/
1017,
/**/ /**/
1016, 1016,
/**/ /**/