mirror of
				https://github.com/netwide-assembler/nasm.git
				synced 2025-10-10 00:25:06 -04:00 
			
		
		
		
	SPDX is an international standard for documenting software license requirements. Remove the existing headers and replace with a brief SPDX preamble. See: https://spdx.dev/use/specifications/ The script used to convert the files is added to "tools", and the file header templates in headers/ are updated. Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
		
			
				
	
	
		
			330 lines
		
	
	
		
			8.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			330 lines
		
	
	
		
			8.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* SPDX-License-Identifier: BSD-2-Clause */
 | |
| /* Copyright 1996-2009 The NASM Authors - All Rights Reserved */
 | |
| 
 | |
| /*
 | |
|  * strfunc.c
 | |
|  *
 | |
|  * String transformation functions
 | |
|  */
 | |
| 
 | |
| #include "nasmlib.h"
 | |
| #include "nasm.h"
 | |
| 
 | |
| /*
 | |
|  * Convert a string in UTF-8 format to UTF-16LE
 | |
|  */
 | |
| static size_t utf8_to_16le(uint8_t *str, size_t len, char *op)
 | |
| {
 | |
| #define EMIT(x) do { if (op) { WRITESHORT(op,x); } outlen++; } while(0)
 | |
| 
 | |
|     size_t outlen = 0;
 | |
|     int expect = 0;
 | |
|     uint8_t c;
 | |
|     uint32_t v = 0, vmin = 0;
 | |
| 
 | |
|     while (len--) {
 | |
|         c = *str++;
 | |
| 
 | |
|         if (expect) {
 | |
|             if ((c & 0xc0) != 0x80) {
 | |
|                 expect = 0;
 | |
|                 return -1;
 | |
|             } else {
 | |
|                 v = (v << 6) | (c & 0x3f);
 | |
|                 if (!--expect) {
 | |
|                     if (v < vmin || v > 0x10ffff ||
 | |
|                         (v >= 0xd800 && v <= 0xdfff)) {
 | |
|                         return -1;
 | |
|                     } else if (v > 0xffff) {
 | |
|                         v -= 0x10000;
 | |
|                         EMIT(0xd800 | (v >> 10));
 | |
|                         EMIT(0xdc00 | (v & 0x3ff));
 | |
|                     } else {
 | |
|                         EMIT(v);
 | |
|                     }
 | |
|                 }
 | |
|                 continue;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         if (c < 0x80) {
 | |
|             EMIT(c);
 | |
|         } else if (c < 0xc0 || c >= 0xfe) {
 | |
|             /* Invalid UTF-8 */
 | |
|             return -1;
 | |
|         } else if (c < 0xe0) {
 | |
|             v = c & 0x1f;
 | |
|             expect = 1;
 | |
|             vmin = 0x80;
 | |
|         } else if (c < 0xf0) {
 | |
|             v = c & 0x0f;
 | |
|             expect = 2;
 | |
|             vmin = 0x800;
 | |
|         } else if (c < 0xf8) {
 | |
|             v = c & 0x07;
 | |
|             expect = 3;
 | |
|             vmin = 0x10000;
 | |
|         } else if (c < 0xfc) {
 | |
|             v = c & 0x03;
 | |
|             expect = 4;
 | |
|             vmin = 0x200000;
 | |
|         } else {
 | |
|             v = c & 0x01;
 | |
|             expect = 5;
 | |
|             vmin = 0x4000000;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return expect ? (size_t)-1 : outlen << 1;
 | |
| 
 | |
| #undef EMIT
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Convert a string in UTF-8 format to UTF-16BE
 | |
|  */
 | |
| static size_t utf8_to_16be(uint8_t *str, size_t len, char *op)
 | |
| {
 | |
| #define EMIT(x)                                 \
 | |
|     do {                                        \
 | |
|         uint16_t _y = (x);                      \
 | |
|         if (op) {                               \
 | |
|             WRITECHAR(op, _y >> 8);             \
 | |
|             WRITECHAR(op, _y);                  \
 | |
|         }                                       \
 | |
|         outlen++;                               \
 | |
|     } while (0)                                 \
 | |
| 
 | |
|     size_t outlen = 0;
 | |
|     int expect = 0;
 | |
|     uint8_t c;
 | |
|     uint32_t v = 0, vmin = 0;
 | |
| 
 | |
|     while (len--) {
 | |
|         c = *str++;
 | |
| 
 | |
|         if (expect) {
 | |
|             if ((c & 0xc0) != 0x80) {
 | |
|                 expect = 0;
 | |
|                 return -1;
 | |
|             } else {
 | |
|                 v = (v << 6) | (c & 0x3f);
 | |
|                 if (!--expect) {
 | |
|                     if (v < vmin || v > 0x10ffff ||
 | |
|                         (v >= 0xd800 && v <= 0xdfff)) {
 | |
|                         return -1;
 | |
|                     } else if (v > 0xffff) {
 | |
|                         v -= 0x10000;
 | |
|                         EMIT(0xdc00 | (v & 0x3ff));
 | |
|                         EMIT(0xd800 | (v >> 10));
 | |
|                     } else {
 | |
|                         EMIT(v);
 | |
|                     }
 | |
|                 }
 | |
|                 continue;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         if (c < 0x80) {
 | |
|             EMIT(c);
 | |
|         } else if (c < 0xc0 || c >= 0xfe) {
 | |
|             /* Invalid UTF-8 */
 | |
|             return -1;
 | |
|         } else if (c < 0xe0) {
 | |
|             v = c & 0x1f;
 | |
|             expect = 1;
 | |
|             vmin = 0x80;
 | |
|         } else if (c < 0xf0) {
 | |
|             v = c & 0x0f;
 | |
|             expect = 2;
 | |
|             vmin = 0x800;
 | |
|         } else if (c < 0xf8) {
 | |
|             v = c & 0x07;
 | |
|             expect = 3;
 | |
|             vmin = 0x10000;
 | |
|         } else if (c < 0xfc) {
 | |
|             v = c & 0x03;
 | |
|             expect = 4;
 | |
|             vmin = 0x200000;
 | |
|         } else {
 | |
|             v = c & 0x01;
 | |
|             expect = 5;
 | |
|             vmin = 0x4000000;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return expect ? (size_t)-1 : outlen << 1;
 | |
| 
 | |
| #undef EMIT
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Convert a string in UTF-8 format to UTF-32LE
 | |
|  */
 | |
| static size_t utf8_to_32le(uint8_t *str, size_t len, char *op)
 | |
| {
 | |
| #define EMIT(x) do { if (op) { WRITELONG(op,x); } outlen++; } while(0)
 | |
| 
 | |
|     size_t outlen = 0;
 | |
|     int expect = 0;
 | |
|     uint8_t c;
 | |
|     uint32_t v = 0, vmin = 0;
 | |
| 
 | |
|     while (len--) {
 | |
|         c = *str++;
 | |
| 
 | |
|         if (expect) {
 | |
|             if ((c & 0xc0) != 0x80) {
 | |
|                 return -1;
 | |
|             } else {
 | |
|                 v = (v << 6) | (c & 0x3f);
 | |
|                 if (!--expect) {
 | |
|                     if (v < vmin || (v >= 0xd800 && v <= 0xdfff)) {
 | |
|                         return -1;
 | |
|                     } else {
 | |
|                         EMIT(v);
 | |
|                     }
 | |
|                 }
 | |
|                 continue;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         if (c < 0x80) {
 | |
|             EMIT(c);
 | |
|         } else if (c < 0xc0 || c >= 0xfe) {
 | |
|             /* Invalid UTF-8 */
 | |
|             return -1;
 | |
|         } else if (c < 0xe0) {
 | |
|             v = c & 0x1f;
 | |
|             expect = 1;
 | |
|             vmin = 0x80;
 | |
|         } else if (c < 0xf0) {
 | |
|             v = c & 0x0f;
 | |
|             expect = 2;
 | |
|             vmin = 0x800;
 | |
|         } else if (c < 0xf8) {
 | |
|             v = c & 0x07;
 | |
|             expect = 3;
 | |
|             vmin = 0x10000;
 | |
|         } else if (c < 0xfc) {
 | |
|             v = c & 0x03;
 | |
|             expect = 4;
 | |
|             vmin = 0x200000;
 | |
|         } else {
 | |
|             v = c & 0x01;
 | |
|             expect = 5;
 | |
|             vmin = 0x4000000;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return expect ? (size_t)-1 : outlen << 2;
 | |
| 
 | |
| #undef EMIT
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Convert a string in UTF-8 format to UTF-32BE
 | |
|  */
 | |
| static size_t utf8_to_32be(uint8_t *str, size_t len, char *op)
 | |
| {
 | |
| #define EMIT(x)                                         \
 | |
|     do {                                                \
 | |
|         uint32_t _y = (x);                              \
 | |
|         if (op) {                                       \
 | |
|             WRITECHAR(op,_y >> 24);                     \
 | |
|             WRITECHAR(op,_y >> 16);                     \
 | |
|             WRITECHAR(op,_y >> 8);                      \
 | |
|             WRITECHAR(op,_y);                           \
 | |
|         }                                               \
 | |
|         outlen++;                                       \
 | |
|     } while (0)
 | |
| 
 | |
|     size_t outlen = 0;
 | |
|     int expect = 0;
 | |
|     uint8_t c;
 | |
|     uint32_t v = 0, vmin = 0;
 | |
| 
 | |
|     while (len--) {
 | |
|         c = *str++;
 | |
| 
 | |
|         if (expect) {
 | |
|             if ((c & 0xc0) != 0x80) {
 | |
|                 return -1;
 | |
|             } else {
 | |
|                 v = (v << 6) | (c & 0x3f);
 | |
|                 if (!--expect) {
 | |
|                     if (v < vmin || (v >= 0xd800 && v <= 0xdfff)) {
 | |
|                         return -1;
 | |
|                     } else {
 | |
|                         EMIT(v);
 | |
|                     }
 | |
|                 }
 | |
|                 continue;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         if (c < 0x80) {
 | |
|             EMIT(c);
 | |
|         } else if (c < 0xc0 || c >= 0xfe) {
 | |
|             /* Invalid UTF-8 */
 | |
|             return -1;
 | |
|         } else if (c < 0xe0) {
 | |
|             v = c & 0x1f;
 | |
|             expect = 1;
 | |
|             vmin = 0x80;
 | |
|         } else if (c < 0xf0) {
 | |
|             v = c & 0x0f;
 | |
|             expect = 2;
 | |
|             vmin = 0x800;
 | |
|         } else if (c < 0xf8) {
 | |
|             v = c & 0x07;
 | |
|             expect = 3;
 | |
|             vmin = 0x10000;
 | |
|         } else if (c < 0xfc) {
 | |
|             v = c & 0x03;
 | |
|             expect = 4;
 | |
|             vmin = 0x200000;
 | |
|         } else {
 | |
|             v = c & 0x01;
 | |
|             expect = 5;
 | |
|             vmin = 0x4000000;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return expect ? (size_t)-1 : outlen << 2;
 | |
| 
 | |
| #undef EMIT
 | |
| }
 | |
| 
 | |
| typedef size_t (*transform_func)(uint8_t *, size_t, char *);
 | |
| 
 | |
| /*
 | |
|  * Apply a specific string transform and return it in a nasm_malloc'd
 | |
|  * buffer, returning the length.  On error, returns (size_t)-1 and no
 | |
|  * buffer is allocated.
 | |
|  */
 | |
| size_t string_transform(char *str, size_t len, char **out, enum strfunc func)
 | |
| {
 | |
|     /* This should match enum strfunc in nasm.h */
 | |
|     static const transform_func str_transforms[] = {
 | |
|         utf8_to_16le,
 | |
|         utf8_to_16le,
 | |
|         utf8_to_16be,
 | |
|         utf8_to_32le,
 | |
|         utf8_to_32le,
 | |
|         utf8_to_32be,
 | |
|     };
 | |
|     transform_func transform = str_transforms[func];
 | |
|     size_t outlen;
 | |
|     uint8_t *s = (uint8_t *)str;
 | |
|     char *buf;
 | |
| 
 | |
|     outlen = transform(s, len, NULL);
 | |
|     if (outlen == (size_t)-1)
 | |
|         return -1;
 | |
| 
 | |
|     *out = buf = nasm_malloc(outlen+1);
 | |
|     buf[outlen] = '\0'; /* Forcibly null-terminate the buffer */
 | |
|     return transform(s, len, buf);
 | |
| }
 |