Merge with master

2025-06-30 22:19:29 -04:00 · 2006-12-22 18:06:26 +01:00 · 2006-12-22 18:06:26 +01:00 · f477f41c8b
commit f477f41c8b
parent 2530b502b9 e9dfc8f815
3 changed files with 362 additions and 295 deletions
--- a/po/fr.po
+++ b/po/fr.po
--- a/src/document/html/renderer.c
+++ b/src/document/html/renderer.c
@ -404,8 +404,20 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
 		return 0;

 	if (part->document) {
+		/* Reallocate LINE(y).chars[] to large enough.  The
+		 * last parameter of realloc_line is the index of the
+		 * last element to which we may want to write,
+		 * i.e. one less than the required size of the array.
+		 * Compute the required size by assuming that each
+		 * byte of input will need at most one character cell.
+		 * (All double-cell characters take up at least two
+		 * bytes in UTF-8, and there are no triple-cell or
+		 * wider characters.)  However, if there already is an
+		 * incomplete character in part->document->buf, then
+		 * the first byte of input can result in a double-cell
+		 * character, so we must reserve one extra element.  */
 		if (realloc_line(html_context, part->document,
-		                 Y(y), X(x) + charslen - 1))
+		                 Y(y), X(x) + charslen))
 			return 0;
 		if (utf8) {
 			unsigned char *end = chars + charslen;
@ -459,7 +471,7 @@ set_hline(struct html_context *html_context, unsigned char *chars, int charslen,
 								part->document->buf[i] = *chars++;
 							}
 							part->document->buf_length = i;
-							return x - x2;
+							break;
 						}
 					} else {
 good_char:
@ -492,6 +504,13 @@ good_char:
 			}

 		}
+		/* Assert that we haven't written past the end of the
+		 * LINE(y).chars array.  @x here is one greater than
+		 * the last one used in POS(x, y).  Instead of this,
+		 * we could assert(X(x) < LINE(y).length) immediately
+		 * before each @copy_screen_chars call above, but
+		 * those are in an inner loop that should be fast.  */
+		assert(X(x) <= LINE(y).length);
 		len = x - x2;
 	} else {
 		if (utf8) {
--- a/src/intl/charsets.c
+++ b/src/intl/charsets.c
@ -640,39 +640,70 @@ utf8_to_unicode(unsigned char **string, unsigned char *end)

 	switch (length) {
 		case 1:
+			if (str[0] >= 0x80) {
+invalid_utf8:
+				++*string;
+				return UCS_REPLACEMENT_CHARACTER;
+			}
 			u = str[0];
 			break;
 		case 2:
+			if ((str[1] & 0xc0) != 0x80)
+				goto invalid_utf8;
 			u = (str[0] & 0x1f) << 6;
 			u += (str[1] & 0x3f);
+			if (u < 0x80)
+				goto invalid_utf8;
 			break;
 		case 3:
+			if ((str[1] & 0xc0) != 0x80 || (str[2] & 0xc0) != 0x80)
+				goto invalid_utf8;
 			u = (str[0] & 0x0f) << 12;
 			u += ((str[1] & 0x3f) << 6);
 			u += (str[2] & 0x3f);
+			if (u < 0x800)
+				goto invalid_utf8;
 			break;
 		case 4:
+			if ((str[1] & 0xc0) != 0x80 || (str[2] & 0xc0) != 0x80
+			    || (str[3] & 0xc0) != 0x80)
+				goto invalid_utf8;
 			u = (str[0] & 0x0f) << 18;
 			u += ((str[1] & 0x3f) << 12);
 			u += ((str[2] & 0x3f) << 6);
 			u += (str[3] & 0x3f);
+			if (u < 0x10000)
+				goto invalid_utf8;
 			break;
 		case 5:
+			if ((str[1] & 0xc0) != 0x80 || (str[2] & 0xc0) != 0x80
+			    || (str[3] & 0xc0) != 0x80 || (str[4] & 0xc0) != 0x80)
+				goto invalid_utf8;
 			u = (str[0] & 0x0f) << 24;
 			u += ((str[1] & 0x3f) << 18);
 			u += ((str[2] & 0x3f) << 12);
 			u += ((str[3] & 0x3f) << 6);
 			u += (str[4] & 0x3f);
+			if (u < 0x200000)
+				goto invalid_utf8;
 			break;
 		case 6:
-		default:
+			if ((str[1] & 0xc0) != 0x80 || (str[2] & 0xc0) != 0x80
+			    || (str[3] & 0xc0) != 0x80 || (str[4] & 0xc0) != 0x80
+			    || (str[5] & 0xc0) != 0x80)
+				goto invalid_utf8;
 			u = (str[0] & 0x01) << 30;
 			u += ((str[1] & 0x3f) << 24);
 			u += ((str[2] & 0x3f) << 18);
 			u += ((str[3] & 0x3f) << 12);
 			u += ((str[4] & 0x3f) << 6);
 			u += (str[5] & 0x3f);
+			if (u < 0x4000000)
+				goto invalid_utf8;
 			break;
+		default:
+			INTERNAL("utf8char_len_tab out of range");
+			goto invalid_utf8;
 	}
 	*string = str + length;
 	return u;