1
0
forked from aniani/vim

updated for version 7.3.1015

Problem:    New regexp engine: Matching composing characters is wrong.
Solution:   Fix matching composing characters.
This commit is contained in:
Bram Moolenaar
2013-05-25 14:42:03 +02:00
parent 152e7890c1
commit 56d58d51bf
4 changed files with 38 additions and 5 deletions

View File

@@ -716,6 +716,7 @@ nfa_regatom()
* the composing char is matched here. */ * the composing char is matched here. */
if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr())) if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
{ {
old_regparse = regparse;
c = getchr(); c = getchr();
goto nfa_do_multibyte; goto nfa_do_multibyte;
} }
@@ -1217,9 +1218,11 @@ collection:
nfa_do_multibyte: nfa_do_multibyte:
/* Length of current char with composing chars. */ /* Length of current char with composing chars. */
if (enc_utf8 && clen != (plen = (*mb_ptr2len)(old_regparse))) if (enc_utf8 && (clen != (plen = (*mb_ptr2len)(old_regparse))
|| utf_iscomposing(c)))
{ {
/* A base character plus composing characters. /* A base character plus composing characters, or just one
* or more composing characters.
* This requires creating a separate atom as if enclosing * This requires creating a separate atom as if enclosing
* the characters in (), where NFA_COMPOSING is the ( and * the characters in (), where NFA_COMPOSING is the ( and
* NFA_END_COMPOSING is the ). Note that right now we are * NFA_END_COMPOSING is the ). Note that right now we are
@@ -1400,7 +1403,6 @@ nfa_regpiece()
/* Save pos after the repeated atom and the \{} */ /* Save pos after the repeated atom and the \{} */
new_regparse = regparse; new_regparse = regparse;
new_regparse = regparse;
quest = (greedy == TRUE? NFA_QUEST : NFA_QUEST_NONGREEDY); quest = (greedy == TRUE? NFA_QUEST : NFA_QUEST_NONGREEDY);
for (i = 0; i < maxval; i++) for (i = 0; i < maxval; i++)
{ {
@@ -3218,11 +3220,19 @@ nfa_regmatch(start, submatch, m)
result = OK; result = OK;
sta = t->state->out; sta = t->state->out;
len = 0; len = 0;
if (utf_iscomposing(sta->c))
{
/* Only match composing character(s), ignore base
* character. Used for ".{composing}" and "{composing}"
* (no preceding character). */
len += mb_char2len(c);
}
if (ireg_icombine) if (ireg_icombine)
{ {
/* If \Z was present, then ignore composing characters. */ /* If \Z was present, then ignore composing characters.
* When ignoring the base character this always matches. */
/* TODO: How about negated? */ /* TODO: How about negated? */
if (sta->c != c) if (len == 0 && sta->c != c)
result = FAIL; result = FAIL;
len = n; len = n;
while (sta->c != NFA_END_COMPOSING) while (sta->c != NFA_END_COMPOSING)

View File

@@ -38,6 +38,15 @@ STARTTEST
:"""" Test composing character matching :"""" Test composing character matching
:call add(tl, ['.ม', 'xม่x yมy', 'yม']) :call add(tl, ['.ม', 'xม่x yมy', 'yม'])
:call add(tl, ['.ม่', 'xม่x yมy', 'xม่']) :call add(tl, ['.ม่', 'xม่x yมy', 'xม่'])
:call add(tl, ["\u05b9", " x\u05b9 ", "x\u05b9"])
:call add(tl, [".\u05b9", " x\u05b9 ", "x\u05b9"])
:call add(tl, ["\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
:call add(tl, [".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"])
:call add(tl, ["\u05bb\u05b9", " x\u05b9\u05bb "])
:call add(tl, [".\u05bb\u05b9", " x\u05b9\u05bb "])
:call add(tl, ["\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
:call add(tl, [".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"])
:"""" Test \Z :"""" Test \Z
:call add(tl, ['ú\Z', 'x']) :call add(tl, ['ú\Z', 'x'])
@@ -50,6 +59,8 @@ STARTTEST
:call add(tl, ["ק\u200d\u05b9x\\Z", "xק\u200dxy", "ק\u200dx"]) :call add(tl, ["ק\u200d\u05b9x\\Z", "xק\u200dxy", "ק\u200dx"])
:call add(tl, ["ק\u200dx\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"]) :call add(tl, ["ק\u200dx\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"])
:call add(tl, ["ק\u200dx\\Z", "xק\u200dxy", "ק\u200dx"]) :call add(tl, ["ק\u200dx\\Z", "xק\u200dxy", "ק\u200dx"])
:call add(tl, ["\u05b9\\+\\Z", "xyz", "xyz"])
:call add(tl, ["\\Z\u05b9\\+", "xyz", "xyz"])
:"""" Combining different tests and features :"""" Combining different tests and features
:call add(tl, ['[^[=a=]]\+', 'ddaãâbcd', 'dd']) :call add(tl, ['[^[=a=]]\+', 'ddaãâbcd', 'dd'])

View File

@@ -11,6 +11,14 @@ OK - \f\+
OK - \%#=1\f\+ OK - \%#=1\f\+
OK - .ม OK - .ม
OK - .ม่ OK - .ม่
OK - ֹ
OK - .ֹ
OK - ֹֻ
OK - .ֹֻ
OK - ֹֻ
OK - .ֹֻ
OK - ֹ
OK - .ֹ
OK - ú\Z OK - ú\Z
OK - יהוה\Z OK - יהוה\Z
OK - יְהוָה\Z OK - יְהוָה\Z
@@ -21,4 +29,6 @@ OK - קֹx\Z
OK - קֹx\Z OK - קֹx\Z
OK - קx\Z OK - קx\Z
OK - קx\Z OK - קx\Z
OK - ֹ\+\Z
OK - \Zֹ\+
OK - [^[=a=]]\+ OK - [^[=a=]]\+

View File

@@ -728,6 +728,8 @@ static char *(features[]) =
static int included_patches[] = static int included_patches[] =
{ /* Add new patch number below this line */ { /* Add new patch number below this line */
/**/
1015,
/**/ /**/
1014, 1014,
/**/ /**/