mirror of
https://github.com/vim/vim.git
synced 2025-09-24 03:44:06 -04:00
patch 8.2.0938: NFA regexp uses tolower ()to compare ignore-case
Problem: NFA regexp uses tolower() to compare ignore-case. (Thayne McCombs) Solution: Use utf_fold() when possible. (ref. neovim #12456)
This commit is contained in:
@@ -747,7 +747,7 @@ diff_write_buffer(buf_T *buf, diffin_T *din)
|
|||||||
|
|
||||||
// xdiff doesn't support ignoring case, fold-case the text.
|
// xdiff doesn't support ignoring case, fold-case the text.
|
||||||
c = PTR2CHAR(s);
|
c = PTR2CHAR(s);
|
||||||
c = enc_utf8 ? utf_fold(c) : MB_TOLOWER(c);
|
c = MB_CASEFOLD(c);
|
||||||
orig_len = mb_ptr2len(s);
|
orig_len = mb_ptr2len(s);
|
||||||
if (mb_char2bytes(c, cbuf) != orig_len)
|
if (mb_char2bytes(c, cbuf) != orig_len)
|
||||||
// TODO: handle byte length difference
|
// TODO: handle byte length difference
|
||||||
|
@@ -93,6 +93,7 @@
|
|||||||
#define MB_ISUPPER(c) vim_isupper(c)
|
#define MB_ISUPPER(c) vim_isupper(c)
|
||||||
#define MB_TOLOWER(c) vim_tolower(c)
|
#define MB_TOLOWER(c) vim_tolower(c)
|
||||||
#define MB_TOUPPER(c) vim_toupper(c)
|
#define MB_TOUPPER(c) vim_toupper(c)
|
||||||
|
#define MB_CASEFOLD(c) (enc_utf8 ? utf_fold(c) : MB_TOLOWER(c))
|
||||||
|
|
||||||
// Use our own isdigit() replacement, because on MS-Windows isdigit() returns
|
// Use our own isdigit() replacement, because on MS-Windows isdigit() returns
|
||||||
// non-zero for superscript 1. Also avoids that isdigit() crashes for numbers
|
// non-zero for superscript 1. Also avoids that isdigit() crashes for numbers
|
||||||
|
@@ -5459,7 +5459,7 @@ find_match_text(colnr_T startcol, int regstart, char_u *match_text)
|
|||||||
{
|
{
|
||||||
c1 = PTR2CHAR(match_text + len1);
|
c1 = PTR2CHAR(match_text + len1);
|
||||||
c2 = PTR2CHAR(rex.line + col + len2);
|
c2 = PTR2CHAR(rex.line + col + len2);
|
||||||
if (c1 != c2 && (!rex.reg_ic || MB_TOLOWER(c1) != MB_TOLOWER(c2)))
|
if (c1 != c2 && (!rex.reg_ic || MB_CASEFOLD(c1) != MB_CASEFOLD(c2)))
|
||||||
{
|
{
|
||||||
match = FALSE;
|
match = FALSE;
|
||||||
break;
|
break;
|
||||||
@@ -6271,11 +6271,11 @@ nfa_regmatch(
|
|||||||
}
|
}
|
||||||
if (rex.reg_ic)
|
if (rex.reg_ic)
|
||||||
{
|
{
|
||||||
int curc_low = MB_TOLOWER(curc);
|
int curc_low = MB_CASEFOLD(curc);
|
||||||
int done = FALSE;
|
int done = FALSE;
|
||||||
|
|
||||||
for ( ; c1 <= c2; ++c1)
|
for ( ; c1 <= c2; ++c1)
|
||||||
if (MB_TOLOWER(c1) == curc_low)
|
if (MB_CASEFOLD(c1) == curc_low)
|
||||||
{
|
{
|
||||||
result = result_if_matched;
|
result = result_if_matched;
|
||||||
done = TRUE;
|
done = TRUE;
|
||||||
@@ -6287,8 +6287,8 @@ nfa_regmatch(
|
|||||||
}
|
}
|
||||||
else if (state->c < 0 ? check_char_class(state->c, curc)
|
else if (state->c < 0 ? check_char_class(state->c, curc)
|
||||||
: (curc == state->c
|
: (curc == state->c
|
||||||
|| (rex.reg_ic && MB_TOLOWER(curc)
|
|| (rex.reg_ic && MB_CASEFOLD(curc)
|
||||||
== MB_TOLOWER(state->c))))
|
== MB_CASEFOLD(state->c))))
|
||||||
{
|
{
|
||||||
result = result_if_matched;
|
result = result_if_matched;
|
||||||
break;
|
break;
|
||||||
@@ -6713,7 +6713,7 @@ nfa_regmatch(
|
|||||||
result = (c == curc);
|
result = (c == curc);
|
||||||
|
|
||||||
if (!result && rex.reg_ic)
|
if (!result && rex.reg_ic)
|
||||||
result = MB_TOLOWER(c) == MB_TOLOWER(curc);
|
result = MB_CASEFOLD(c) == MB_CASEFOLD(curc);
|
||||||
// If rex.reg_icombine is not set only skip over the character
|
// If rex.reg_icombine is not set only skip over the character
|
||||||
// itself. When it is set skip over composing characters.
|
// itself. When it is set skip over composing characters.
|
||||||
if (result && enc_utf8 && !rex.reg_icombine)
|
if (result && enc_utf8 && !rex.reg_icombine)
|
||||||
@@ -6882,7 +6882,7 @@ nfa_regmatch(
|
|||||||
// cheaper than adding a state that won't match.
|
// cheaper than adding a state that won't match.
|
||||||
c = PTR2CHAR(rex.input + clen);
|
c = PTR2CHAR(rex.input + clen);
|
||||||
if (c != prog->regstart && (!rex.reg_ic
|
if (c != prog->regstart && (!rex.reg_ic
|
||||||
|| MB_TOLOWER(c) != MB_TOLOWER(prog->regstart)))
|
|| MB_CASEFOLD(c) != MB_CASEFOLD(prog->regstart)))
|
||||||
{
|
{
|
||||||
#ifdef ENABLE_LOG
|
#ifdef ENABLE_LOG
|
||||||
fprintf(log_fd, " Skipping start state, regstart does not match\n");
|
fprintf(log_fd, " Skipping start state, regstart does not match\n");
|
||||||
|
@@ -355,4 +355,23 @@ func Test_ambiwidth()
|
|||||||
set regexpengine& ambiwidth&
|
set regexpengine& ambiwidth&
|
||||||
endfunc
|
endfunc
|
||||||
|
|
||||||
|
func Run_regexp_ignore_case()
|
||||||
|
call assert_equal('iIİ', substitute('iIİ', '\([iIİ]\)', '\1', 'g'))
|
||||||
|
|
||||||
|
call assert_equal('iIx', substitute('iIİ', '\c\([İ]\)', 'x', 'g'))
|
||||||
|
call assert_equal('xxİ', substitute('iIİ', '\(i\c\)', 'x', 'g'))
|
||||||
|
call assert_equal('iIx', substitute('iIİ', '\(İ\c\)', 'x', 'g'))
|
||||||
|
call assert_equal('iIx', substitute('iIİ', '\c\(\%u0130\)', 'x', 'g'))
|
||||||
|
call assert_equal('iIx', substitute('iIİ', '\c\([\u0130]\)', 'x', 'g'))
|
||||||
|
call assert_equal('iIx', substitute('iIİ', '\c\([\u012f-\u0131]\)', 'x', 'g'))
|
||||||
|
endfunc
|
||||||
|
|
||||||
|
func Test_regexp_ignore_case()
|
||||||
|
set regexpengine=1
|
||||||
|
call Run_regexp_ignore_case()
|
||||||
|
set regexpengine=2
|
||||||
|
call Run_regexp_ignore_case()
|
||||||
|
set regexpengine&
|
||||||
|
endfunc
|
||||||
|
|
||||||
" vim: shiftwidth=2 sts=2 expandtab
|
" vim: shiftwidth=2 sts=2 expandtab
|
||||||
|
@@ -754,6 +754,8 @@ static char *(features[]) =
|
|||||||
|
|
||||||
static int included_patches[] =
|
static int included_patches[] =
|
||||||
{ /* Add new patch number below this line */
|
{ /* Add new patch number below this line */
|
||||||
|
/**/
|
||||||
|
938,
|
||||||
/**/
|
/**/
|
||||||
937,
|
937,
|
||||||
/**/
|
/**/
|
||||||
|
Reference in New Issue
Block a user