Fix 'getccol()' and 'getgoal()' functions for multibyte UTF-8 characters

These functions convert the byte offset into the column number
(getccol()) and vice versa (getgoal()).

Getting this right means that moving up and down the text gets us the
right columns, rather than moving randomly left and right when you move
up and down.  We also won't end up in the middle of a utf-8 character,
because we're not just moving into some random byte offset, we're moving
into a proper column.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Linus Torvalds 2012-07-15 14:36:38 -07:00
parent 1edeced67c
commit ddd45dbff1
2 changed files with 22 additions and 8 deletions

14
basic.c
View File

@ -24,25 +24,31 @@
*/
static int getgoal(struct line *dlp)
{
int c;
int col;
int newcol;
int dbo;
int len = llength(dlp);
col = 0;
dbo = 0;
while (dbo != llength(dlp)) {
c = lgetc(dlp, dbo);
while (dbo != len) {
unicode_t c;
int width = utf8_to_unicode(dlp->l_text, dbo, len, &c);
newcol = col;
/* Take tabs, ^X and \xx hex characters into account */
if (c == '\t')
newcol |= tabmask;
else if (c < 0x20 || c == 0x7F)
++newcol;
else if (c >= 0x80 && c <= 0xa0)
newcol += 2;
++newcol;
if (newcol > curgoal)
break;
col = newcol;
++dbo;
dbo += width;
}
return dbo;
}

View File

@ -124,16 +124,24 @@ int getcline(void)
*/
int getccol(int bflg)
{
int c, i, col;
col = 0;
for (i = 0; i < curwp->w_doto; ++i) {
c = lgetc(curwp->w_dotp, i);
int i, col;
struct line *dlp = curwp->w_dotp;
int byte_offset = curwp->w_doto;
int len = llength(dlp);
col = i = 0;
while (i < byte_offset) {
unicode_t c;
i += utf8_to_unicode(dlp->l_text, i, len, &c);
if (c != ' ' && c != '\t' && bflg)
break;
if (c == '\t')
col |= tabmask;
else if (c < 0x20 || c == 0x7F)
++col;
else if (c >= 0xc0 && c <= 0xa0)
col += 2;
++col;
}
return col;