1
0
mirror of https://github.com/rfivet/uemacs.git synced 2024-06-09 13:30:43 +00:00

Make cursor movement (largely) understand UTF-8 character boundaries

Ok, so it may do odd things if it's not truly utf-8, and when moving up
and down lines that have utf-8 the cursor moves oddly (because the byte
offset within the line stays constant, rather than the character
offset), but with this you can actually open the UTF8 example file and
move around it, and at least some of the movement makes sense.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Linus Torvalds 2012-07-10 16:40:36 -07:00
parent e62cdf04cf
commit 6b793211c2
3 changed files with 31 additions and 11 deletions

26
basic.c
View File

@ -15,6 +15,7 @@
#include "edef.h" #include "edef.h"
#include "efunc.h" #include "efunc.h"
#include "line.h" #include "line.h"
#include "utf8.h"
/* /*
* This routine, given a pointer to a struct line, and the current cursor goal * This routine, given a pointer to a struct line, and the current cursor goal
@ -74,8 +75,15 @@ int backchar(int f, int n)
curwp->w_dotp = lp; curwp->w_dotp = lp;
curwp->w_doto = llength(lp); curwp->w_doto = llength(lp);
curwp->w_flag |= WFMOVE; curwp->w_flag |= WFMOVE;
} else } else {
curwp->w_doto--; do {
unsigned char c;
curwp->w_doto--;
c = lgetc(curwp->w_dotp, curwp->w_doto);
if (is_beginning_utf8(c))
break;
} while (curwp->w_doto);
}
} }
return TRUE; return TRUE;
} }
@ -100,14 +108,22 @@ int forwchar(int f, int n)
if (n < 0) if (n < 0)
return backchar(f, -n); return backchar(f, -n);
while (n--) { while (n--) {
if (curwp->w_doto == llength(curwp->w_dotp)) { int len = llength(curwp->w_dotp);
if (curwp->w_doto == len) {
if (curwp->w_dotp == curbp->b_linep) if (curwp->w_dotp == curbp->b_linep)
return FALSE; return FALSE;
curwp->w_dotp = lforw(curwp->w_dotp); curwp->w_dotp = lforw(curwp->w_dotp);
curwp->w_doto = 0; curwp->w_doto = 0;
curwp->w_flag |= WFMOVE; curwp->w_flag |= WFMOVE;
} else } else {
curwp->w_doto++; do {
unsigned char c;
curwp->w_doto++;
c = lgetc(curwp->w_dotp, curwp->w_doto);
if (is_beginning_utf8(c))
break;
} while (curwp->w_doto < len);
}
} }
return TRUE; return TRUE;
} }

View File

@ -528,7 +528,6 @@ static void updall(struct window *wp)
void updpos(void) void updpos(void)
{ {
struct line *lp; struct line *lp;
int c;
int i; int i;
/* find the current row */ /* find the current row */
@ -543,13 +542,13 @@ void updpos(void)
curcol = 0; curcol = 0;
i = 0; i = 0;
while (i < curwp->w_doto) { while (i < curwp->w_doto) {
c = lgetc(lp, i++); unicode_t c;
int bytes;
bytes = utf8_to_unicode(lp->l_text, i, curwp->w_doto, &c);
i += bytes;
if (c == '\t') if (c == '\t')
curcol |= tabmask; curcol |= tabmask;
else if (c < 0x20 || c == 0x7f)
++curcol;
else if (c >= 0x80 && c <= 0xa0)
curcol+=2;
++curcol; ++curcol;
} }

5
utf8.h
View File

@ -6,4 +6,9 @@ typedef unsigned int unicode_t;
unsigned utf8_to_unicode(char *line, unsigned index, unsigned len, unicode_t *res); unsigned utf8_to_unicode(char *line, unsigned index, unsigned len, unicode_t *res);
unsigned unicode_to_utf8(unsigned int c, char *utf8); unsigned unicode_to_utf8(unsigned int c, char *utf8);
static inline int is_beginning_utf8(unsigned char c)
{
return (c & 0xc0) != 0x80;
}
#endif #endif