Make cursor movement (largely) understand UTF-8 character boundaries

Ok, so it may do odd things if it's not truly utf-8, and when moving up
and down lines that have utf-8 the cursor moves oddly (because the byte
offset within the line stays constant, rather than the character
offset), but with this you can actually open the UTF8 example file and
move around it, and at least some of the movement makes sense.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Linus Torvalds 2012-07-10 16:40:36 -07:00
parent e62cdf04cf
commit 6b793211c2
3 changed files with 31 additions and 11 deletions

26
basic.c
View File

@ -15,6 +15,7 @@
#include "edef.h"
#include "efunc.h"
#include "line.h"
#include "utf8.h"
/*
* This routine, given a pointer to a struct line, and the current cursor goal
@ -74,8 +75,15 @@ int backchar(int f, int n)
curwp->w_dotp = lp;
curwp->w_doto = llength(lp);
curwp->w_flag |= WFMOVE;
} else
curwp->w_doto--;
} else {
do {
unsigned char c;
curwp->w_doto--;
c = lgetc(curwp->w_dotp, curwp->w_doto);
if (is_beginning_utf8(c))
break;
} while (curwp->w_doto);
}
}
return TRUE;
}
@ -100,14 +108,22 @@ int forwchar(int f, int n)
if (n < 0)
return backchar(f, -n);
while (n--) {
if (curwp->w_doto == llength(curwp->w_dotp)) {
int len = llength(curwp->w_dotp);
if (curwp->w_doto == len) {
if (curwp->w_dotp == curbp->b_linep)
return FALSE;
curwp->w_dotp = lforw(curwp->w_dotp);
curwp->w_doto = 0;
curwp->w_flag |= WFMOVE;
} else
curwp->w_doto++;
} else {
do {
unsigned char c;
curwp->w_doto++;
c = lgetc(curwp->w_dotp, curwp->w_doto);
if (is_beginning_utf8(c))
break;
} while (curwp->w_doto < len);
}
}
return TRUE;
}

View File

@ -528,7 +528,6 @@ static void updall(struct window *wp)
void updpos(void)
{
struct line *lp;
int c;
int i;
/* find the current row */
@ -543,13 +542,13 @@ void updpos(void)
curcol = 0;
i = 0;
while (i < curwp->w_doto) {
c = lgetc(lp, i++);
unicode_t c;
int bytes;
bytes = utf8_to_unicode(lp->l_text, i, curwp->w_doto, &c);
i += bytes;
if (c == '\t')
curcol |= tabmask;
else if (c < 0x20 || c == 0x7f)
++curcol;
else if (c >= 0x80 && c <= 0xa0)
curcol+=2;
++curcol;
}

5
utf8.h
View File

@ -6,4 +6,9 @@ typedef unsigned int unicode_t;
unsigned utf8_to_unicode(char *line, unsigned index, unsigned len, unicode_t *res);
unsigned unicode_to_utf8(unsigned int c, char *utf8);
static inline int is_beginning_utf8(unsigned char c)
{
return (c & 0xc0) != 0x80;
}
#endif