mirror of
https://github.com/rfivet/uemacs.git
synced 2024-12-18 23:36:23 -05:00
Fix 'getccol()' and 'getgoal()' functions for multibyte UTF-8 characters
These functions convert the byte offset into the column number (getccol()) and vice versa (getgoal()). Getting this right means that moving up and down the text gets us the right columns, rather than moving randomly left and right when you move up and down. We also won't end up in the middle of a utf-8 character, because we're not just moving into some random byte offset, we're moving into a proper column. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
1edeced67c
commit
ddd45dbff1
14
basic.c
14
basic.c
@ -24,25 +24,31 @@
|
|||||||
*/
|
*/
|
||||||
static int getgoal(struct line *dlp)
|
static int getgoal(struct line *dlp)
|
||||||
{
|
{
|
||||||
int c;
|
|
||||||
int col;
|
int col;
|
||||||
int newcol;
|
int newcol;
|
||||||
int dbo;
|
int dbo;
|
||||||
|
int len = llength(dlp);
|
||||||
|
|
||||||
col = 0;
|
col = 0;
|
||||||
dbo = 0;
|
dbo = 0;
|
||||||
while (dbo != llength(dlp)) {
|
while (dbo != len) {
|
||||||
c = lgetc(dlp, dbo);
|
unicode_t c;
|
||||||
|
int width = utf8_to_unicode(dlp->l_text, dbo, len, &c);
|
||||||
newcol = col;
|
newcol = col;
|
||||||
|
|
||||||
|
/* Take tabs, ^X and \xx hex characters into account */
|
||||||
if (c == '\t')
|
if (c == '\t')
|
||||||
newcol |= tabmask;
|
newcol |= tabmask;
|
||||||
else if (c < 0x20 || c == 0x7F)
|
else if (c < 0x20 || c == 0x7F)
|
||||||
++newcol;
|
++newcol;
|
||||||
|
else if (c >= 0x80 && c <= 0xa0)
|
||||||
|
newcol += 2;
|
||||||
|
|
||||||
++newcol;
|
++newcol;
|
||||||
if (newcol > curgoal)
|
if (newcol > curgoal)
|
||||||
break;
|
break;
|
||||||
col = newcol;
|
col = newcol;
|
||||||
++dbo;
|
dbo += width;
|
||||||
}
|
}
|
||||||
return dbo;
|
return dbo;
|
||||||
}
|
}
|
||||||
|
16
random.c
16
random.c
@ -124,16 +124,24 @@ int getcline(void)
|
|||||||
*/
|
*/
|
||||||
int getccol(int bflg)
|
int getccol(int bflg)
|
||||||
{
|
{
|
||||||
int c, i, col;
|
int i, col;
|
||||||
col = 0;
|
struct line *dlp = curwp->w_dotp;
|
||||||
for (i = 0; i < curwp->w_doto; ++i) {
|
int byte_offset = curwp->w_doto;
|
||||||
c = lgetc(curwp->w_dotp, i);
|
int len = llength(dlp);
|
||||||
|
|
||||||
|
col = i = 0;
|
||||||
|
while (i < byte_offset) {
|
||||||
|
unicode_t c;
|
||||||
|
|
||||||
|
i += utf8_to_unicode(dlp->l_text, i, len, &c);
|
||||||
if (c != ' ' && c != '\t' && bflg)
|
if (c != ' ' && c != '\t' && bflg)
|
||||||
break;
|
break;
|
||||||
if (c == '\t')
|
if (c == '\t')
|
||||||
col |= tabmask;
|
col |= tabmask;
|
||||||
else if (c < 0x20 || c == 0x7F)
|
else if (c < 0x20 || c == 0x7F)
|
||||||
++col;
|
++col;
|
||||||
|
else if (c >= 0xc0 && c <= 0xa0)
|
||||||
|
col += 2;
|
||||||
++col;
|
++col;
|
||||||
}
|
}
|
||||||
return col;
|
return col;
|
||||||
|
Loading…
Reference in New Issue
Block a user