diff --git a/runtime/doc/options.txt b/runtime/doc/options.txt index 84deecae6f..e2206f0b46 100644 --- a/runtime/doc/options.txt +++ b/runtime/doc/options.txt @@ -1,4 +1,4 @@ -*options.txt* For Vim version 9.1. Last change: 2025 Mar 28 +*options.txt* For Vim version 9.1. Last change: 2025 Apr 04 VIM REFERENCE MANUAL by Bram Moolenaar @@ -2989,7 +2989,10 @@ A jump table for the options with a short description can be found at |Q_op|. difference. word Use internal diff to perform a |word|-wise diff and highlight the - difference. + difference. Non-alphanumeric + multi-byte characters such as emoji + and CJK characters are considered + individual words. internal Use the internal diff library. This is ignored when 'diffexpr' is set. *E960* diff --git a/src/diff.c b/src/diff.c index 3adcdb7dba..e694cf20cd 100644 --- a/src/diff.c +++ b/src/diff.c @@ -3309,10 +3309,17 @@ diff_find_change_inline_diff( char_u *s; for (s = curline; *s != NUL;) { - // Always use the first buffer's 'iskeyword' to have a consistent diff int new_in_keyword = FALSE; if (diff_flags & DIFF_INLINE_WORD) - new_in_keyword = vim_iswordp_buf(s, curtab->tp_diffbuf[file1_idx]); + { + // Always use the first buffer's 'iskeyword' to have a + // consistent diff. + // For multibyte chars, only treat alphanumeric chars + // (class 2) as "word", as other classes such as emojis and + // CJK ideographs do not usually benefit from word diff as + // Vim doesn't have a good way to segment them. + new_in_keyword = (mb_get_class_buf(s, curtab->tp_diffbuf[file1_idx]) == 2); + } if (in_keyword && !new_in_keyword) { ga_append(curstr, NL); diff --git a/src/mbyte.c b/src/mbyte.c index a38ab24f32..cc8d628ed5 100644 --- a/src/mbyte.c +++ b/src/mbyte.c @@ -828,8 +828,8 @@ remove_bom(char_u *s) * Get class of pointer: * 0 for blank or NUL * 1 for punctuation - * 2 for an (ASCII) word character - * >2 for other word characters + * 2 for an alphanumeric word character + * >2 for other word characters, including CJK and emoji */ int mb_get_class(char_u *p) diff --git a/src/testdir/dumps/Test_diff_inline_word_03.dump b/src/testdir/dumps/Test_diff_inline_word_03.dump new file mode 100644 index 0000000000..30efaed3a6 --- /dev/null +++ b/src/testdir/dumps/Test_diff_inline_word_03.dump @@ -0,0 +1,20 @@ +| +0#0000e05#a8a8a8255@1|🚀*0#0000000#ffd7ff255|⛵️*2&#ff404010|一*0&#ffd7ff255|二|三*2&#ff404010|ひ*0&#ffd7ff255|ら|が*0࿈ff13|な*0&#ffd7ff255|Δ+2&#ff404010|έ|λ|τ|α| +0&#ffd7ff255|Δ+2&#ff404010|e|l|t|a| +0&#ffd7ff255|f|o@1|b|a||+1&#ffffff0| +0#0000e05#a8a8a8255@1|🚀*0#0000000#ffd7ff255|🛸*2&#ff404010|一*0&#ffd7ff255|二|四*2&#ff404010|ひ*0&#ffd7ff255|ら|な|δ+2&#ff404010|έ|λ|τ|α| +0&#ffd7ff255|δ+2&#ff404010|e|l|t|a| +0&#ffd7ff255|f|o@1|b|a|r| +|~+0#4040ff13#ffffff0| @35||+1#0000000&|~+0#4040ff13&| @35 +|~| @35||+1#0000000&|~+0#4040ff13&| @35 +|~| @35||+1#0000000&|~+0#4040ff13&| @35 +|~| @35||+1#0000000&|~+0#4040ff13&| @35 +|~| @35||+1#0000000&|~+0#4040ff13&| @35 +|~| @35||+1#0000000&|~+0#4040ff13&| @35 +|~| @35||+1#0000000&|~+0#4040ff13&| @35 +|~| @35||+1#0000000&|~+0#4040ff13&| @35 +|~| @35||+1#0000000&|~+0#4040ff13&| @35 +|~| @35||+1#0000000&|~+0#4040ff13&| @35 +|~| @35||+1#0000000&|~+0#4040ff13&| @35 +|~| @35||+1#0000000&|~+0#4040ff13&| @35 +|~| @35||+1#0000000&|~+0#4040ff13&| @35 +|~| @35||+1#0000000&|~+0#4040ff13&| @35 +|~| @35||+1#0000000&|~+0#4040ff13&| @35 +|~| @35||+1#0000000&|~+0#4040ff13&| @35 +|~| @35||+1#0000000&|~+0#4040ff13&| @35 +|X+3#0000000&|d|i|f|i|l|e|1| @10|1|,|1| @11|A|l@1| |X+1&&|d|i|f|i|l|e|2| @10|1|,|1| @11|A|l@1 +|:+0&&> @73 diff --git a/src/testdir/test_diffmode.vim b/src/testdir/test_diffmode.vim index 1b5e5c0819..d0c2f18e4e 100644 --- a/src/testdir/test_diffmode.vim +++ b/src/testdir/test_diffmode.vim @@ -2444,6 +2444,11 @@ func Test_diff_inline() call term_sendkeys(buf, ":windo set iskeyword&\:1wincmd w\") + " word diff: test handling of multi-byte characters. Only alphanumeric chars + " (e.g. Greek alphabet, but not CJK/emoji) count as words. + call WriteDiffFiles(buf, ["🚀⛵️一二三ひらがなΔέλτα Δelta foobar"], ["🚀🛸一二四ひらなδέλτα δelta foobar"]) + call VerifyInternal(buf, "Test_diff_inline_word_03", " diffopt+=inline:word") + " char diff: should slide highlight to whitespace boundary if possible for " better readability (by using forced indent-heuristics). A wrong result " would be if the highlight is "Bar, prefix". It should be "prefixBar, " diff --git a/src/version.c b/src/version.c index d1ba7adf5a..3e45e2f95f 100644 --- a/src/version.c +++ b/src/version.c @@ -704,6 +704,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 1276, /**/ 1275, /**/