forked from aniani/vim
updated for version 7.3.1015
Problem: New regexp engine: Matching composing characters is wrong. Solution: Fix matching composing characters.
This commit is contained in:
@@ -716,6 +716,7 @@ nfa_regatom()
|
|||||||
* the composing char is matched here. */
|
* the composing char is matched here. */
|
||||||
if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
|
if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
|
||||||
{
|
{
|
||||||
|
old_regparse = regparse;
|
||||||
c = getchr();
|
c = getchr();
|
||||||
goto nfa_do_multibyte;
|
goto nfa_do_multibyte;
|
||||||
}
|
}
|
||||||
@@ -1217,9 +1218,11 @@ collection:
|
|||||||
|
|
||||||
nfa_do_multibyte:
|
nfa_do_multibyte:
|
||||||
/* Length of current char with composing chars. */
|
/* Length of current char with composing chars. */
|
||||||
if (enc_utf8 && clen != (plen = (*mb_ptr2len)(old_regparse)))
|
if (enc_utf8 && (clen != (plen = (*mb_ptr2len)(old_regparse))
|
||||||
|
|| utf_iscomposing(c)))
|
||||||
{
|
{
|
||||||
/* A base character plus composing characters.
|
/* A base character plus composing characters, or just one
|
||||||
|
* or more composing characters.
|
||||||
* This requires creating a separate atom as if enclosing
|
* This requires creating a separate atom as if enclosing
|
||||||
* the characters in (), where NFA_COMPOSING is the ( and
|
* the characters in (), where NFA_COMPOSING is the ( and
|
||||||
* NFA_END_COMPOSING is the ). Note that right now we are
|
* NFA_END_COMPOSING is the ). Note that right now we are
|
||||||
@@ -1400,7 +1403,6 @@ nfa_regpiece()
|
|||||||
/* Save pos after the repeated atom and the \{} */
|
/* Save pos after the repeated atom and the \{} */
|
||||||
new_regparse = regparse;
|
new_regparse = regparse;
|
||||||
|
|
||||||
new_regparse = regparse;
|
|
||||||
quest = (greedy == TRUE? NFA_QUEST : NFA_QUEST_NONGREEDY);
|
quest = (greedy == TRUE? NFA_QUEST : NFA_QUEST_NONGREEDY);
|
||||||
for (i = 0; i < maxval; i++)
|
for (i = 0; i < maxval; i++)
|
||||||
{
|
{
|
||||||
@@ -3218,11 +3220,19 @@ nfa_regmatch(start, submatch, m)
|
|||||||
result = OK;
|
result = OK;
|
||||||
sta = t->state->out;
|
sta = t->state->out;
|
||||||
len = 0;
|
len = 0;
|
||||||
|
if (utf_iscomposing(sta->c))
|
||||||
|
{
|
||||||
|
/* Only match composing character(s), ignore base
|
||||||
|
* character. Used for ".{composing}" and "{composing}"
|
||||||
|
* (no preceding character). */
|
||||||
|
len += mb_char2len(c);
|
||||||
|
}
|
||||||
if (ireg_icombine)
|
if (ireg_icombine)
|
||||||
{
|
{
|
||||||
/* If \Z was present, then ignore composing characters. */
|
/* If \Z was present, then ignore composing characters.
|
||||||
|
* When ignoring the base character this always matches. */
|
||||||
/* TODO: How about negated? */
|
/* TODO: How about negated? */
|
||||||
if (sta->c != c)
|
if (len == 0 && sta->c != c)
|
||||||
result = FAIL;
|
result = FAIL;
|
||||||
len = n;
|
len = n;
|
||||||
while (sta->c != NFA_END_COMPOSING)
|
while (sta->c != NFA_END_COMPOSING)
|
||||||
|
@@ -38,6 +38,15 @@ STARTTEST
|
|||||||
:"""" Test composing character matching
|
:"""" Test composing character matching
|
||||||
:call add(tl, ['.ม', 'xม่x yมy', 'yม'])
|
:call add(tl, ['.ม', 'xม่x yมy', 'yม'])
|
||||||
:call add(tl, ['.ม่', 'xม่x yมy', 'xม่'])
|
:call add(tl, ['.ม่', 'xม่x yมy', 'xม่'])
|
||||||
|
:call add(tl, ["\u05b9", " x\u05b9 ", "x\u05b9"])
|
||||||
|
:call add(tl, [".\u05b9", " x\u05b9 ", "x\u05b9"])
|
||||||
|
:call add(tl, ["\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
|
||||||
|
:call add(tl, [".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
|
||||||
|
:call add(tl, ["\u05bb\u05b9", " x\u05b9\u05bb "])
|
||||||
|
:call add(tl, [".\u05bb\u05b9", " x\u05b9\u05bb "])
|
||||||
|
:call add(tl, ["\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
|
||||||
|
:call add(tl, [".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
|
||||||
|
|
||||||
|
|
||||||
:"""" Test \Z
|
:"""" Test \Z
|
||||||
:call add(tl, ['ú\Z', 'x'])
|
:call add(tl, ['ú\Z', 'x'])
|
||||||
@@ -50,6 +59,8 @@ STARTTEST
|
|||||||
:call add(tl, ["ק\u200d\u05b9x\\Z", "xק\u200dxy", "ק\u200dx"])
|
:call add(tl, ["ק\u200d\u05b9x\\Z", "xק\u200dxy", "ק\u200dx"])
|
||||||
:call add(tl, ["ק\u200dx\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"])
|
:call add(tl, ["ק\u200dx\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"])
|
||||||
:call add(tl, ["ק\u200dx\\Z", "xק\u200dxy", "ק\u200dx"])
|
:call add(tl, ["ק\u200dx\\Z", "xק\u200dxy", "ק\u200dx"])
|
||||||
|
:call add(tl, ["\u05b9\\+\\Z", "xyz", "xyz"])
|
||||||
|
:call add(tl, ["\\Z\u05b9\\+", "xyz", "xyz"])
|
||||||
|
|
||||||
:"""" Combining different tests and features
|
:"""" Combining different tests and features
|
||||||
:call add(tl, ['[^[=a=]]\+', 'ddaãâbcd', 'dd'])
|
:call add(tl, ['[^[=a=]]\+', 'ddaãâbcd', 'dd'])
|
||||||
|
@@ -11,6 +11,14 @@ OK - \f\+
|
|||||||
OK - \%#=1\f\+
|
OK - \%#=1\f\+
|
||||||
OK - .ม
|
OK - .ม
|
||||||
OK - .ม่
|
OK - .ม่
|
||||||
|
OK - ֹ
|
||||||
|
OK - .ֹ
|
||||||
|
OK - ֹֻ
|
||||||
|
OK - .ֹֻ
|
||||||
|
OK - ֹֻ
|
||||||
|
OK - .ֹֻ
|
||||||
|
OK - ֹ
|
||||||
|
OK - .ֹ
|
||||||
OK - ú\Z
|
OK - ú\Z
|
||||||
OK - יהוה\Z
|
OK - יהוה\Z
|
||||||
OK - יְהוָה\Z
|
OK - יְהוָה\Z
|
||||||
@@ -21,4 +29,6 @@ OK - קֹx\Z
|
|||||||
OK - קֹx\Z
|
OK - קֹx\Z
|
||||||
OK - קx\Z
|
OK - קx\Z
|
||||||
OK - קx\Z
|
OK - קx\Z
|
||||||
|
OK - ֹ\+\Z
|
||||||
|
OK - \Zֹ\+
|
||||||
OK - [^[=a=]]\+
|
OK - [^[=a=]]\+
|
||||||
|
@@ -728,6 +728,8 @@ static char *(features[]) =
|
|||||||
|
|
||||||
static int included_patches[] =
|
static int included_patches[] =
|
||||||
{ /* Add new patch number below this line */
|
{ /* Add new patch number below this line */
|
||||||
|
/**/
|
||||||
|
1015,
|
||||||
/**/
|
/**/
|
||||||
1014,
|
1014,
|
||||||
/**/
|
/**/
|
||||||
|
Reference in New Issue
Block a user