forked from aniani/vim
patch 8.2.2278: falling back to old regexp engine can some patterns
Problem: Falling back to old regexp engine can some patterns. Solution: Do not fall back once [[:lower:]] or [[:upper:]] is used. (Christian Brabandt, closes #7572)
This commit is contained in:
@@ -294,6 +294,7 @@ init_class_tab(void)
|
|||||||
|
|
||||||
static char_u *regparse; // Input-scan pointer.
|
static char_u *regparse; // Input-scan pointer.
|
||||||
static int regnpar; // () count.
|
static int regnpar; // () count.
|
||||||
|
static int wants_nfa; // regex should use NFA engine
|
||||||
#ifdef FEAT_SYN_HL
|
#ifdef FEAT_SYN_HL
|
||||||
static int regnzpar; // \z() count.
|
static int regnzpar; // \z() count.
|
||||||
static int re_has_z; // \z item detected
|
static int re_has_z; // \z item detected
|
||||||
@@ -381,6 +382,9 @@ static int cstrncmp(char_u *s1, char_u *s2, int *n);
|
|||||||
static char_u *cstrchr(char_u *, int);
|
static char_u *cstrchr(char_u *, int);
|
||||||
static int re_mult_next(char *what);
|
static int re_mult_next(char *what);
|
||||||
static int reg_iswordc(int);
|
static int reg_iswordc(int);
|
||||||
|
#ifdef FEAT_EVAL
|
||||||
|
static void report_re_switch(char_u *pat);
|
||||||
|
#endif
|
||||||
|
|
||||||
static regengine_T bt_regengine;
|
static regengine_T bt_regengine;
|
||||||
static regengine_T nfa_regengine;
|
static regengine_T nfa_regengine;
|
||||||
@@ -2662,7 +2666,7 @@ vim_regcomp(char_u *expr_arg, int re_flags)
|
|||||||
if (prog == NULL)
|
if (prog == NULL)
|
||||||
{
|
{
|
||||||
#ifdef BT_REGEXP_DEBUG_LOG
|
#ifdef BT_REGEXP_DEBUG_LOG
|
||||||
if (regexp_engine != BACKTRACKING_ENGINE) // debugging log for NFA
|
if (regexp_engine == BACKTRACKING_ENGINE) // debugging log for BT engine
|
||||||
{
|
{
|
||||||
FILE *f;
|
FILE *f;
|
||||||
f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
|
f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
|
||||||
@@ -2686,6 +2690,9 @@ vim_regcomp(char_u *expr_arg, int re_flags)
|
|||||||
&& called_emsg == called_emsg_before)
|
&& called_emsg == called_emsg_before)
|
||||||
{
|
{
|
||||||
regexp_engine = BACKTRACKING_ENGINE;
|
regexp_engine = BACKTRACKING_ENGINE;
|
||||||
|
#ifdef FEAT_EVAL
|
||||||
|
report_re_switch(expr);
|
||||||
|
#endif
|
||||||
prog = bt_regengine.regcomp(expr, re_flags);
|
prog = bt_regengine.regcomp(expr, re_flags);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -253,6 +253,12 @@ static int nfa_re_flags; // re_flags passed to nfa_regcomp()
|
|||||||
static int *post_start; // holds the postfix form of r.e.
|
static int *post_start; // holds the postfix form of r.e.
|
||||||
static int *post_end;
|
static int *post_end;
|
||||||
static int *post_ptr;
|
static int *post_ptr;
|
||||||
|
|
||||||
|
// Set when the pattern should use the NFA engine.
|
||||||
|
// E.g. [[:upper:]] only allows 8bit characters for BT engine,
|
||||||
|
// while NFA engine handles multibyte characters correctly.
|
||||||
|
static int wants_nfa;
|
||||||
|
|
||||||
static int nstate; // Number of states in the NFA.
|
static int nstate; // Number of states in the NFA.
|
||||||
static int istate; // Index in the state vector, used in alloc_state()
|
static int istate; // Index in the state vector, used in alloc_state()
|
||||||
|
|
||||||
@@ -306,6 +312,7 @@ nfa_regcomp_start(
|
|||||||
return FAIL;
|
return FAIL;
|
||||||
post_ptr = post_start;
|
post_ptr = post_start;
|
||||||
post_end = post_start + nstate_max;
|
post_end = post_start + nstate_max;
|
||||||
|
wants_nfa = FALSE;
|
||||||
rex.nfa_has_zend = FALSE;
|
rex.nfa_has_zend = FALSE;
|
||||||
rex.nfa_has_backref = FALSE;
|
rex.nfa_has_backref = FALSE;
|
||||||
|
|
||||||
@@ -1707,6 +1714,7 @@ collection:
|
|||||||
EMIT(NFA_CLASS_GRAPH);
|
EMIT(NFA_CLASS_GRAPH);
|
||||||
break;
|
break;
|
||||||
case CLASS_LOWER:
|
case CLASS_LOWER:
|
||||||
|
wants_nfa = TRUE;
|
||||||
EMIT(NFA_CLASS_LOWER);
|
EMIT(NFA_CLASS_LOWER);
|
||||||
break;
|
break;
|
||||||
case CLASS_PRINT:
|
case CLASS_PRINT:
|
||||||
@@ -1719,6 +1727,7 @@ collection:
|
|||||||
EMIT(NFA_CLASS_SPACE);
|
EMIT(NFA_CLASS_SPACE);
|
||||||
break;
|
break;
|
||||||
case CLASS_UPPER:
|
case CLASS_UPPER:
|
||||||
|
wants_nfa = TRUE;
|
||||||
EMIT(NFA_CLASS_UPPER);
|
EMIT(NFA_CLASS_UPPER);
|
||||||
break;
|
break;
|
||||||
case CLASS_XDIGIT:
|
case CLASS_XDIGIT:
|
||||||
@@ -2137,9 +2146,15 @@ nfa_regpiece(void)
|
|||||||
|
|
||||||
// The engine is very inefficient (uses too many states) when the
|
// The engine is very inefficient (uses too many states) when the
|
||||||
// maximum is much larger than the minimum and when the maximum is
|
// maximum is much larger than the minimum and when the maximum is
|
||||||
// large. Bail out if we can use the other engine.
|
// large. However, when maxval is MAX_LIMIT, it is okay, as this
|
||||||
|
// will emit NFA_STAR.
|
||||||
|
// Bail out if we can use the other engine, but only, when the
|
||||||
|
// pattern does not need the NFA engine like (e.g. [[:upper:]]\{2,\}
|
||||||
|
// does not work with with characters > 8 bit with the BT engine)
|
||||||
if ((nfa_re_flags & RE_AUTO)
|
if ((nfa_re_flags & RE_AUTO)
|
||||||
&& (maxval > 500 || maxval > minval + 200))
|
&& (maxval > 500 || maxval > minval + 200)
|
||||||
|
&& (maxval != MAX_LIMIT && minval < 200)
|
||||||
|
&& !wants_nfa)
|
||||||
return FAIL;
|
return FAIL;
|
||||||
|
|
||||||
// Ignore previous call to nfa_regatom()
|
// Ignore previous call to nfa_regatom()
|
||||||
|
@@ -510,6 +510,52 @@ func Test_match_start_of_line_combining()
|
|||||||
bwipe!
|
bwipe!
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
|
" Check that [[:upper:]] matches for automatic engine
|
||||||
|
func Test_match_char_class_upper()
|
||||||
|
new
|
||||||
|
let _engine=®expengine
|
||||||
|
|
||||||
|
" Test 1: [[:upper:]]\{2,\}
|
||||||
|
set regexpengine=0
|
||||||
|
call setline(1, ['05. ПЕСНЯ О ГЕРОЯХ муз. А. Давиденко, М. Коваля и Б. Шехтера ...', '05. PJESNJA O GJEROJAKH mus. A. Davidjenko, M. Kovalja i B. Shjekhtjera ...'])
|
||||||
|
call cursor(1,1)
|
||||||
|
let search_cmd='norm /\<[[:upper:]]\{2,\}\>' .. "\<CR>"
|
||||||
|
exe search_cmd
|
||||||
|
call assert_equal(4, searchcount().total, 'TEST 1')
|
||||||
|
set regexpengine=1
|
||||||
|
exe search_cmd
|
||||||
|
call assert_equal(2, searchcount().total, 'TEST 1')
|
||||||
|
set regexpengine=2
|
||||||
|
exe search_cmd
|
||||||
|
call assert_equal(4, searchcount().total, 'TEST 1')
|
||||||
|
|
||||||
|
" Test 2: [[:upper:]].\+
|
||||||
|
let search_cmd='norm /\<[[:upper:]].\+\>' .. "\<CR>"
|
||||||
|
set regexpengine=0
|
||||||
|
exe search_cmd
|
||||||
|
call assert_equal(2, searchcount().total, 'TEST 2')
|
||||||
|
set regexpengine=1
|
||||||
|
exe search_cmd
|
||||||
|
call assert_equal(1, searchcount().total, 'TEST 2')
|
||||||
|
set regexpengine=2
|
||||||
|
exe search_cmd
|
||||||
|
call assert_equal(2, searchcount().total, 'TEST 2')
|
||||||
|
|
||||||
|
" Test 3: [[:lower:]]\+
|
||||||
|
let search_cmd='norm /\<[[:lower:]]\+\>' .. "\<CR>"
|
||||||
|
set regexpengine=0
|
||||||
|
exe search_cmd
|
||||||
|
call assert_equal(4, searchcount().total, 'TEST 3 lower')
|
||||||
|
set regexpengine=1
|
||||||
|
exe search_cmd
|
||||||
|
call assert_equal(2, searchcount().total, 'TEST 3 lower')
|
||||||
|
set regexpengine=2
|
||||||
|
exe search_cmd
|
||||||
|
call assert_equal(4, searchcount().total, 'TEST 3 lower')
|
||||||
|
|
||||||
|
" clean up
|
||||||
|
let ®expengine=_engine
|
||||||
|
bwipe!
|
||||||
|
endfunc
|
||||||
|
|
||||||
" vim: shiftwidth=2 sts=2 expandtab
|
" vim: shiftwidth=2 sts=2 expandtab
|
||||||
|
@@ -750,6 +750,8 @@ static char *(features[]) =
|
|||||||
|
|
||||||
static int included_patches[] =
|
static int included_patches[] =
|
||||||
{ /* Add new patch number below this line */
|
{ /* Add new patch number below this line */
|
||||||
|
/**/
|
||||||
|
2278,
|
||||||
/**/
|
/**/
|
||||||
2277,
|
2277,
|
||||||
/**/
|
/**/
|
||||||
|
Reference in New Issue
Block a user