0
0
mirror of https://github.com/vim/vim.git synced 2025-09-29 04:34:16 -04:00

patch 8.2.0901: formatting CJK text isn't optimal

Problem:    Formatting CJK text isn't optimal.
Solution:   Properly break CJK lines. (closes #3875)
This commit is contained in:
Bram Moolenaar
2020-06-04 18:22:13 +02:00
parent 9155825b24
commit e52702f003
9 changed files with 329 additions and 9 deletions

View File

@@ -45,10 +45,12 @@ internal_format(
int c) // character to be inserted (can be NUL)
{
int cc;
int skip_pos;
int save_char = NUL;
int haveto_redraw = FALSE;
int fo_ins_blank = has_format_option(FO_INS_BLANK);
int fo_multibyte = has_format_option(FO_MBYTE_BREAK);
int fo_rigor_tw = has_format_option(FO_RIGOROUS_TW);
int fo_white_par = has_format_option(FO_WHITE_PAR);
int first_line = TRUE;
colnr_T leader_len;
@@ -125,6 +127,7 @@ internal_format(
curwin->w_cursor.col = startcol;
foundcol = 0;
skip_pos = 0;
// Find position to break at.
// Stop at first entered white when 'formatoptions' has 'v'
@@ -189,8 +192,11 @@ internal_format(
if (curwin->w_cursor.col <= (colnr_T)wantcol)
break;
}
else if (cc >= 0x100 && fo_multibyte)
else if ((cc >= 0x100 || !utf_allow_break_before(cc)) && fo_multibyte)
{
int ncc;
int allow_break;
// Break after or before a multi-byte character.
if (curwin->w_cursor.col != startcol)
{
@@ -199,8 +205,14 @@ internal_format(
break;
col = curwin->w_cursor.col;
inc_cursor();
// Don't change end_foundcol if already set.
if (foundcol != curwin->w_cursor.col)
ncc = gchar_cursor();
allow_break =
(enc_utf8 && utf_allow_break(cc, ncc))
|| enc_dbcs;
// If we have already checked this position, skip!
if (curwin->w_cursor.col != skip_pos && allow_break)
{
foundcol = curwin->w_cursor.col;
end_foundcol = foundcol;
@@ -213,6 +225,7 @@ internal_format(
if (curwin->w_cursor.col == 0)
break;
ncc = cc;
col = curwin->w_cursor.col;
dec_cursor();
@@ -220,16 +233,65 @@ internal_format(
if (WHITECHAR(cc))
continue; // break with space
// Don't break until after the comment leader
// Don't break until after the comment leader.
if (curwin->w_cursor.col < leader_len)
break;
curwin->w_cursor.col = col;
skip_pos = curwin->w_cursor.col;
foundcol = curwin->w_cursor.col;
end_foundcol = foundcol;
allow_break =
(enc_utf8 && utf_allow_break(cc, ncc))
|| enc_dbcs;
// Must handle this to respect line break prohibition.
if (allow_break)
{
foundcol = curwin->w_cursor.col;
end_foundcol = foundcol;
}
if (curwin->w_cursor.col <= (colnr_T)wantcol)
break;
{
int ncc_allow_break =
(enc_utf8 && utf_allow_break_before(ncc)) || enc_dbcs;
if (allow_break)
break;
if (!ncc_allow_break && !fo_rigor_tw)
{
// Enable at most 1 punct hang outside of textwidth.
if (curwin->w_cursor.col == startcol)
{
// We are inserting a non-breakable char, postpone
// line break check to next insert.
end_foundcol = foundcol = 0;
break;
}
// Neither cc nor ncc is NUL if we are here, so
// it's safe to inc_cursor.
col = curwin->w_cursor.col;
inc_cursor();
cc = ncc;
ncc = gchar_cursor();
// handle insert
ncc = (ncc != NUL) ? ncc : c;
allow_break =
(enc_utf8 && utf_allow_break(cc, ncc))
|| enc_dbcs;
if (allow_break)
{
// Break only when we are not at end of line.
end_foundcol = foundcol =
ncc == NUL? 0 : curwin->w_cursor.col;
break;
}
curwin->w_cursor.col = col;
}
}
}
if (curwin->w_cursor.col == 0)
break;