patch 8.2.1893: fuzzy matching does not support multiple words

Problem: Fuzzy matching does not support multiple words. Solution: Add support for matching white space separated words. (Yegappan Lakshmanan, closes #7163)
2020-10-23 16:50:30 +02:00
parent 9c24cd11e2
commit 8ded5b647a
4 changed files with 195 additions and 59 deletions
--- a/runtime/doc/eval.txt
+++ b/runtime/doc/eval.txt
@@ -7367,8 +7367,15 @@ matchfuzzy({list}, {str} [, {dict}])			*matchfuzzy()*
 		the strings in {list} that fuzzy match {str}. The strings in
 		the returned list are sorted based on the matching score.
 		The optional {dict} argument always supports the following
 		items:
 		    matchseq	When this item is present and {str} contains
 				multiple words separated by white space, then
 				returns only matches that contain the words in
 				the given sequence.
 		If {list} is a list of dictionaries, then the optional {dict}
-		argument supports the following items:
+		argument supports the following additional items:
 		    key		key of the item which is fuzzy matched against
 				{str}. The value of this item should be a
 				string.
@@ -7382,6 +7389,9 @@ matchfuzzy({list}, {str} [, {dict}])			*matchfuzzy()*
 		matching is NOT supported.  The maximum supported {str} length
 		is 256.
 		When {str} has multiple words each separated by white space,
 		then the list of strings that have all the words is returned.
 		If there are no matching strings or there is an error, then an
 		empty list is returned. If length of {str} is greater than
 		256, then returns an empty list.
@@ -7401,7 +7411,12 @@ matchfuzzy({list}, {str} [, {dict}])			*matchfuzzy()*
 		   :echo v:oldfiles->matchfuzzy("test")
 <		results in a list of file names fuzzy matching "test". >
 		   :let l = readfile("buffer.c")->matchfuzzy("str")
-<		results in a list of lines in "buffer.c" fuzzy matching "str".
+<		results in a list of lines in "buffer.c" fuzzy matching "str". >
 		   :echo ['one two', 'two one']->matchfuzzy('two one')
 <		results in ['two one', 'one two']. >
 		   :echo ['one two', 'two one']->matchfuzzy('two one',
 						\ {'matchseq': 1})
 <		results in ['two one'].
 matchfuzzypos({list}, {str} [, {dict}])			*matchfuzzypos()*
 		Same as |matchfuzzy()|, but returns the list of matched
--- a/src/search.c
+++ b/src/search.c
@@ -4203,16 +4203,16 @@ the_end:
 * Ported from the lib_fts library authored by Forrest Smith.
 * https://github.com/forrestthewoods/lib_fts/tree/master/code
 *
- * Blog describing the algorithm:
+ * The following blog describes the fuzzy matching algorithm:
 * https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/
 *
 * Each matching string is assigned a score. The following factors are checked:
- *   Matched letter
+ *   - Matched letter
- *   Unmatched letter
+ *   - Unmatched letter
- *   Consecutively matched letters
+ *   - Consecutively matched letters
- *   Proximity to start
+ *   - Proximity to start
- *   Letter following a separator (space, underscore)
+ *   - Letter following a separator (space, underscore)
- *   Uppercase letter following lowercase (aka CamelCase)
+ *   - Uppercase letter following lowercase (aka CamelCase)
 *
 * Matched letters are good. Unmatched letters are bad. Matching near the start
 * is good. Matching the first letter in the middle of a phrase is good.
@@ -4222,16 +4222,17 @@ the_end:
 * File paths are different from file names. File extensions may be ignorable.
 * Single words care about consecutive matches but not separators or camel
 * case.
- *   Score starts at 0
+ *   Score starts at 100
 *   Matched letter: +0 points
 *   Unmatched letter: -1 point
- *   Consecutive match bonus: +5 points
+ *   Consecutive match bonus: +15 points
- *   Separator bonus: +10 points
+ *   First letter bonus: +15 points
- *   Camel case bonus: +10 points
+ *   Separator bonus: +30 points
- *   Unmatched leading letter: -3 points (max: -9)
+ *   Camel case bonus: +30 points
 *   Unmatched leading letter: -5 points (max: -15)
 *
 * There is some nuance to this. Scores don’t have an intrinsic meaning. The
- * score range isn’t 0 to 100. It’s roughly [-50, 50]. Longer words have a
+ * score range isn’t 0 to 100. It’s roughly [50, 150]. Longer words have a
 * lower minimum score due to unmatched letter penalty. Longer search patterns
 * have a higher maximum score due to match bonuses.
 *
@@ -4247,6 +4248,7 @@ the_end:
 */
 typedef struct
 {
    int		idx;		// used for stable sort
    listitem_T	*item;
    int		score;
    list_T	*lmatchpos;
@@ -4267,6 +4269,8 @@ typedef struct
 #define MAX_LEADING_LETTER_PENALTY -15
 // penalty for every letter that doesn't match
 #define UNMATCHED_LETTER_PENALTY -1
 // penalty for gap in matching positions (-2 * k)
 #define GAP_PENALTY	-2
 // Score for a string that doesn't fuzzy match the pattern
 #define SCORE_NONE	-9999
@@ -4319,6 +4323,8 @@ fuzzy_match_compute_score(
 	    // Sequential
 	    if (currIdx == (prevIdx + 1))
 		score += SEQUENTIAL_BONUS;
 	    else
 		score += GAP_PENALTY * (currIdx - prevIdx);
 	}
 	// Check for bonuses based on neighbor character value
@@ -4334,7 +4340,7 @@ fuzzy_match_compute_score(
 		while (sidx < currIdx)
 		{
 		    neighbor = (*mb_ptr2char)(p);
-		    (void)mb_ptr2char_adv(&p);
+		    MB_PTR_ADV(p);
 		    sidx++;
 		}
 		curr = (*mb_ptr2char)(p);
@@ -4362,6 +4368,10 @@ fuzzy_match_compute_score(
    return score;
 }
 /*
 * Perform a recursive search for fuzzy matching 'fuzpat' in 'str'.
 * Return the number of matching characters.
 */
    static int
 fuzzy_match_recursive(
 	char_u		*fuzpat,
@@ -4386,11 +4396,11 @@ fuzzy_match_recursive(
    // Count recursions
    ++*recursionCount;
    if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT)
-	return FALSE;
+	return 0;
    // Detect end of strings
    if (*fuzpat == '\0' || *str == '\0')
-	return FALSE;
+	return 0;
    // Loop through fuzpat and str looking for a match
    first_match = TRUE;
@@ -4411,7 +4421,7 @@ fuzzy_match_recursive(
 	    // Supplied matches buffer was too short
 	    if (nextMatch >= maxMatches)
-		return FALSE;
+		return 0;
 	    // "Copy-on-Write" srcMatches into matches
 	    if (first_match && srcMatches)
@@ -4444,12 +4454,12 @@ fuzzy_match_recursive(
 	    // Advance
 	    matches[nextMatch++] = strIdx;
 	    if (has_mbyte)
-		(void)mb_ptr2char_adv(&fuzpat);
+		MB_PTR_ADV(fuzpat);
 	    else
 		++fuzpat;
 	}
 	if (has_mbyte)
-	    (void)mb_ptr2char_adv(&str);
+	    MB_PTR_ADV(str);
 	else
 	    ++str;
 	strIdx++;
@@ -4469,12 +4479,12 @@ fuzzy_match_recursive(
 	// Recursive score is better than "this"
 	memcpy(matches, bestRecursiveMatches, maxMatches * sizeof(matches[0]));
 	*outScore = bestRecursiveScore;
-	return TRUE;
+	return nextMatch;
    }
    else if (matched)
-	return TRUE;		// "this" score is better than recursive
+	return nextMatch;	// "this" score is better than recursive
-    return FALSE;		// no match
+    return 0;		// no match
 }
 /*
@@ -4485,45 +4495,110 @@ fuzzy_match_recursive(
 * Scores values have no intrinsic meaning.  Possible score range is not
 * normalized and varies with pattern.
 * Recursion is limited internally (default=10) to prevent degenerate cases
- * (fuzpat="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").
+ * (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").
 * Uses char_u for match indices. Therefore patterns are limited to MAXMATCHES
 * characters.
 *
- * Returns TRUE if 'fuzpat' matches 'str'. Also returns the match score in
+ * Returns TRUE if 'pat_arg' matches 'str'. Also returns the match score in
 * 'outScore' and the matching character positions in 'matches'.
 */
    static int
 fuzzy_match(
 	char_u		*str,
-	char_u		*fuzpat,
+	char_u		*pat_arg,
 	int		matchseq,
 	int		*outScore,
 	matchidx_T	*matches,
 	int		maxMatches)
 {
    int		recursionCount = 0;
    int		len = MB_CHARLEN(str);
    char_u	*save_pat;
    char_u	*pat;
    char_u	*p;
    int		complete = FALSE;
    int		score = 0;
    int		numMatches = 0;
    int		matchCount;
    *outScore = 0;
-    return fuzzy_match_recursive(fuzpat, str, 0, outScore, str, len, NULL,
+    save_pat = vim_strsave(pat_arg);
-	    matches, maxMatches, 0, &recursionCount);
+    if (save_pat == NULL)
 	return FALSE;
    pat = save_pat;
    p = pat;
    // Try matching each word in 'pat_arg' in 'str'
    while (TRUE)
    {
 	if (matchseq)
 	    complete = TRUE;
 	else
 	{
 	    // Extract one word from the pattern (separated by space)
 	    p = skipwhite(p);
 	    if (*p == NUL)
 		break;
 	    pat = p;
 	    while (*p != NUL && !VIM_ISWHITE(PTR2CHAR(p)))
 	    {
 		if (has_mbyte)
 		    MB_PTR_ADV(p);
 		else
 		    ++p;
 	    }
 	    if (*p == NUL)		// processed all the words
 		complete = TRUE;
 	    *p = NUL;
 	}
 	score = 0;
 	recursionCount = 0;
 	matchCount = fuzzy_match_recursive(pat, str, 0, &score, str, len, NULL,
 				matches + numMatches, maxMatches - numMatches,
 				0, &recursionCount);
 	if (matchCount == 0)
 	{
 	    numMatches = 0;
 	    break;
 	}
 	// Accumulate the match score and the number of matches
 	*outScore += score;
 	numMatches += matchCount;
 	if (complete)
 	    break;
 	// try matching the next word
 	++p;
    }
    vim_free(save_pat);
    return numMatches != 0;
 }
 /*
 * Sort the fuzzy matches in the descending order of the match score.
 * For items with same score, retain the order using the index (stable sort)
 */
    static int
-fuzzy_item_compare(const void *s1, const void *s2)
+fuzzy_match_item_compare(const void *s1, const void *s2)
 {
    int		v1 = ((fuzzyItem_T *)s1)->score;
    int		v2 = ((fuzzyItem_T *)s2)->score;
    int		idx1 = ((fuzzyItem_T *)s1)->idx;
    int		idx2 = ((fuzzyItem_T *)s2)->idx;
-    return v1 == v2 ? 0 : v1 > v2 ? -1 : 1;
+    return v1 == v2 ? (idx1 - idx2) : v1 > v2 ? -1 : 1;
 }
 /*
 * Fuzzy search the string 'str' in a list of 'items' and return the matching
 * strings in 'fmatchlist'.
 * If 'matchseq' is TRUE, then for multi-word search strings, match all the
 * words in sequence.
 * If 'items' is a list of strings, then search for 'str' in the list.
 * If 'items' is a list of dicts, then either use 'key' to lookup the string
 * for each item or use 'item_cb' Funcref function to get the string.
@@ -4531,9 +4606,10 @@ fuzzy_item_compare(const void *s1, const void *s2)
 * matches for each item.
 */
    static void
-match_fuzzy(
+fuzzy_match_in_list(
 	list_T		*items,
 	char_u		*str,
 	int		matchseq,
 	char_u		*key,
 	callback_T	*item_cb,
 	int		retmatchpos,
@@ -4561,6 +4637,7 @@ match_fuzzy(
 	char_u		*itemstr;
 	typval_T	rettv;
 	ptrs[i].idx = i;
 	ptrs[i].item = li;
 	ptrs[i].score = SCORE_NONE;
 	itemstr = NULL;
@@ -4593,25 +4670,34 @@ match_fuzzy(
 	}
 	if (itemstr != NULL
-		&& fuzzy_match(itemstr, str, &score, matches,
+		&& fuzzy_match(itemstr, str, matchseq, &score, matches,
 		    sizeof(matches) / sizeof(matches[0])))
 	{
 	    // Copy the list of matching positions in itemstr to a list, if
 	    // 'retmatchpos' is set.
 	    if (retmatchpos)
 	    {
-		int	j;
+		int	j = 0;
-		int	strsz;
+		char_u	*p;
 		ptrs[i].lmatchpos = list_alloc();
 		if (ptrs[i].lmatchpos == NULL)
 		    goto done;
-		strsz = MB_CHARLEN(str);
+
-		for (j = 0; j < strsz; j++)
+		p = str;
 		while (*p != NUL)
 		{
-		    if (list_append_number(ptrs[i].lmatchpos,
+		    if (!VIM_ISWHITE(PTR2CHAR(p)))
-				matches[j]) == FAIL)
+		    {
-			goto done;
+			if (list_append_number(ptrs[i].lmatchpos,
 				    matches[j]) == FAIL)
 			    goto done;
 			j++;
 		    }
 		    if (has_mbyte)
 			MB_PTR_ADV(p);
 		    else
 			++p;
 		}
 	    }
 	    ptrs[i].score = score;
@@ -4627,7 +4713,7 @@ match_fuzzy(
 	// Sort the list by the descending order of the match score
 	qsort((void *)ptrs, (size_t)len, sizeof(fuzzyItem_T),
-		fuzzy_item_compare);
+		fuzzy_match_item_compare);
 	// For matchfuzzy(), return a list of matched strings.
 	//	    ['str1', 'str2', 'str3']
@@ -4687,6 +4773,7 @@ do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos)
    callback_T	cb;
    char_u	*key = NULL;
    int		ret;
    int		matchseq = FALSE;
    CLEAR_POINTER(&cb);
@@ -4737,6 +4824,8 @@ do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos)
 		return;
 	    }
 	}
 	if ((di = dict_find(d, (char_u *)"matchseq", -1)) != NULL)
 	    matchseq = TRUE;
    }
    // get the fuzzy matches
@@ -4762,8 +4851,8 @@ do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos)
 	    goto done;
    }
-    match_fuzzy(argvars[0].vval.v_list, tv_get_string(&argvars[1]), key,
+    fuzzy_match_in_list(argvars[0].vval.v_list, tv_get_string(&argvars[1]),
-	    &cb, retmatchpos, rettv->vval.v_list);
+	    matchseq, key, &cb, retmatchpos, rettv->vval.v_list);
 done:
    free_callback(&cb);
--- a/src/testdir/test_matchfuzzy.vim
+++ b/src/testdir/test_matchfuzzy.vim
@@ -22,16 +22,15 @@ func Test_matchfuzzy()
  call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa'))
  call assert_equal(256, matchfuzzy([repeat('a', 256)], repeat('a', 256))[0]->len())
  call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257)))
  " matches with same score should not be reordered
  let l = ['abc1', 'abc2', 'abc3']
  call assert_equal(l, l->matchfuzzy('abc'))
  " Tests for match preferences
  " preference for camel case match
  call assert_equal(['oneTwo', 'onetwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo'))
  " preference for match after a separator (_ or space)
-  if has("win32")
+  call assert_equal(['onetwo', 'one_two', 'one two'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo'))
    call assert_equal(['onetwo', 'one two', 'one_two'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo'))
  else
    call assert_equal(['onetwo', 'one_two', 'one two'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo'))
  endif
  " preference for leading letter match
  call assert_equal(['onetwo', 'xonetwo'], ['xonetwo', 'onetwo']->matchfuzzy('onetwo'))
  " preference for sequential match
@@ -42,6 +41,17 @@ func Test_matchfuzzy()
  call assert_equal(['one', 'onex', 'onexx'], ['onexx', 'one', 'onex']->matchfuzzy('one'))
  " prefer complete matches over separator matches
  call assert_equal(['.vim/vimrc', '.vim/vimrc_colors', '.vim/v_i_m_r_c'], ['.vim/vimrc', '.vim/vimrc_colors', '.vim/v_i_m_r_c']->matchfuzzy('vimrc'))
  " gap penalty
  call assert_equal(['xxayybxxxx', 'xxayyybxxx', 'xxayyyybxx'], ['xxayyyybxx', 'xxayyybxxx', 'xxayybxxxx']->matchfuzzy('ab'))
  " match multiple words (separated by space)
  call assert_equal(['foo bar baz'], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzy('baz foo'))
  call assert_equal([], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzy('one two'))
  call assert_equal([], ['foo bar']->matchfuzzy(" \t "))
  " test for matching a sequence of words
  call assert_equal(['bar foo'], ['foo bar', 'bar foo', 'foobar', 'barfoo']->matchfuzzy('bar foo', {'matchseq' : 1}))
  call assert_equal([#{text: 'two one'}], [#{text: 'one two'}, #{text: 'two one'}]->matchfuzzy('two one', #{key: 'text', matchseq: v:true}))
  %bw!
  eval ['somebuf', 'anotherone', 'needle', 'yetanotherone']->map({_, v -> bufadd(v) + bufload(v)})
@@ -49,6 +59,7 @@ func Test_matchfuzzy()
  call assert_equal(1, len(l))
  call assert_match('needle', l[0])
  " Test for fuzzy matching dicts
  let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}]
  call assert_equal([{'id' : 6, 'val' : 'camera'}], matchfuzzy(l, 'cam', {'text_cb' : {v -> v.val}}))
  call assert_equal([{'id' : 6, 'val' : 'camera'}], matchfuzzy(l, 'cam', {'key' : 'val'}))
@@ -64,6 +75,9 @@ func Test_matchfuzzy()
  call assert_fails("let x = matchfuzzy(l, 'cam', test_null_dict())", 'E715:')
  call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : test_null_string()})", 'E475:')
  call assert_fails("let x = matchfuzzy(l, 'foo', {'text_cb' : test_null_function()})", 'E475:')
  " matches with same score should not be reordered
  let l = [#{text: 'abc', id: 1}, #{text: 'abc', id: 2}, #{text: 'abc', id: 3}]
  call assert_equal(l, l->matchfuzzy('abc', #{key: 'text'}))
  let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}]
  call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : 'name'})", 'E730:')
@@ -75,7 +89,7 @@ func Test_matchfuzzy()
  let &encoding = save_enc
 endfunc
-" Test for the fuzzymatchpos() function
+" Test for the matchfuzzypos() function
 func Test_matchfuzzypos()
  call assert_equal([['curl', 'world'], [[2,3], [2,3]]], matchfuzzypos(['world', 'curl'], 'rl'))
  call assert_equal([['curl', 'world'], [[2,3], [2,3]]], matchfuzzypos(['world', 'one', 'curl'], 'rl'))
@@ -83,6 +97,10 @@ func Test_matchfuzzypos()
        \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]],
        \ matchfuzzypos(['hello world hello world', 'hello', 'world'], 'hello'))
  call assert_equal([['aaaaaaa'], [[0, 1, 2]]], matchfuzzypos(['aaaaaaa'], 'aaa'))
  call assert_equal([['a  b'], [[0, 3]]], matchfuzzypos(['a  b'], 'a  b'))
  call assert_equal([['a  b'], [[0, 3]]], matchfuzzypos(['a  b'], 'a    b'))
  call assert_equal([['a  b'], [[0]]], matchfuzzypos(['a  b'], '  a  '))
  call assert_equal([[], []], matchfuzzypos(['a  b'], '  '))
  call assert_equal([[], []], matchfuzzypos(['world', 'curl'], 'ab'))
  let x = matchfuzzypos([repeat('a', 256)], repeat('a', 256))
  call assert_equal(range(256), x[1][0])
@@ -104,6 +122,12 @@ func Test_matchfuzzypos()
  " best recursive match
  call assert_equal([['xoone'], [[2, 3, 4]]], matchfuzzypos(['xoone'], 'one'))
  " match multiple words (separated by space)
  call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo'))
  call assert_equal([[], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('one two'))
  call assert_equal([[], []], ['foo bar']->matchfuzzypos(" \t "))
  call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]]], ['grace']->matchfuzzypos('race ace grace'))
  let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}]
  call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]]],
        \ matchfuzzypos(l, 'cam', {'text_cb' : {v -> v.val}}))
@@ -126,6 +150,7 @@ func Test_matchfuzzypos()
  call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : 'name'})", 'E730:')
 endfunc
 " Test for matchfuzzy() with multibyte characters
 func Test_matchfuzzy_mbyte()
  CheckFeature multi_lang
  call assert_equal(['ンヹㄇヺヴ'], matchfuzzy(['ンヹㄇヺヴ'], 'ヹヺ'))
@@ -136,19 +161,19 @@ func Test_matchfuzzy_mbyte()
  call assert_equal(['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'],
        \ matchfuzzy(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ'))
  " match multiple words (separated by space)
  call assert_equal(['세 마리의 작은 돼지'], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzy('돼지 마리의'))
  call assert_equal([], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzy('파란 하늘'))
  " preference for camel case match
  call assert_equal(['oneĄwo', 'oneąwo'],
        \ ['oneąwo', 'oneĄwo']->matchfuzzy('oneąwo'))
  " preference for complete match then match after separator (_ or space)
-  if has("win32")
+  call assert_equal(['ⅠⅡabㄟㄠ'] + sort(['ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ']),
    " order is different between Windows and Unix :(
    " It's important that the complete match is first
    call assert_equal(['ⅠⅡabㄟㄠ', 'ⅠⅡa bㄟㄠ', 'ⅠⅡa_bㄟㄠ'],
          \ ['ⅠⅡabㄟㄠ', 'ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ']->matchfuzzy('ⅠⅡabㄟㄠ'))
  else
    call assert_equal(['ⅠⅡabㄟㄠ'] + sort(['ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ']),
          \ ['ⅠⅡabㄟㄠ', 'ⅠⅡa bㄟㄠ', 'ⅠⅡa_bㄟㄠ']->matchfuzzy('ⅠⅡabㄟㄠ'))
-  endif
+  " preference for match after a separator (_ or space)
  call assert_equal(['ㄓㄔabㄟㄠ', 'ㄓㄔa_bㄟㄠ', 'ㄓㄔa bㄟㄠ'],
        \ ['ㄓㄔa_bㄟㄠ', 'ㄓㄔa bㄟㄠ', 'ㄓㄔabㄟㄠ']->matchfuzzy('ㄓㄔabㄟㄠ'))
  " preference for leading letter match
  call assert_equal(['ŗŝţũŵż', 'xŗŝţũŵż'],
        \ ['xŗŝţũŵż', 'ŗŝţũŵż']->matchfuzzy('ŗŝţũŵż'))
@@ -163,6 +188,7 @@ func Test_matchfuzzy_mbyte()
        \ ['ŗŝţxx', 'ŗŝţ', 'ŗŝţx']->matchfuzzy('ŗŝţ'))
 endfunc
 " Test for matchfuzzypos() with multibyte characters
 func Test_matchfuzzypos_mbyte()
  CheckFeature multi_lang
  call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]]],
@@ -183,9 +209,13 @@ func Test_matchfuzzypos_mbyte()
  call assert_equal(range(256), x[1][0])
  call assert_equal([[], []], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257)))
  " match multiple words (separated by space)
  call assert_equal([['세 마리의 작은 돼지'], [[9, 10, 2, 3, 4]]], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('돼지 마리의'))
  call assert_equal([[], []], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('파란 하늘'))
  " match in a long string
-  call assert_equal([[repeat('♪', 300) .. '✗✗✗'], [[300, 301, 302]]],
+  call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]]],
-        \ matchfuzzypos([repeat('♪', 300) .. '✗✗✗'], '✗✗✗'))
+        \ matchfuzzypos([repeat('ぶ', 300) .. 'ẼẼẼ'], 'ẼẼẼ'))
  " preference for camel case match
  call assert_equal([['xѳѵҁxxѳѴҁ'], [[6, 7, 8]]], matchfuzzypos(['xѳѵҁxxѳѴҁ'], 'ѳѵҁ'))
  " preference for match after a separator (_ or space)
--- a/src/version.c
+++ b/src/version.c
@@ -750,6 +750,8 @@ static char *(features[]) =
 static int included_patches[] =
 {   /* Add new patch number below this line */
 /**/
    1893,
 /**/
    1892,
 /**/