mirror of
				https://github.com/netwide-assembler/nasm.git
				synced 2025-10-10 00:25:06 -04:00 
			
		
		
		
	stdscan: handle $-escaped symbols starting with $
For a symbol to start with $, it needs to be escaped with a second dollar sign: $$. This was not handled correctly, instead $$ was seen as TOKEN_BASE. Fix this. Reported-by: E. C. Masloch <pushbx@ulukai.org> Fixes: https://bugzilla.nasm.us/show_bug.cgi?id=3392922 Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
This commit is contained in:
		
							
								
								
									
										110
									
								
								asm/stdscan.c
									
									
									
									
									
								
							
							
						
						
									
										110
									
								
								asm/stdscan.c
									
									
									
									
									
								
							| @@ -312,68 +312,86 @@ int stdscan(void *private_data, struct tokenval *tv) | |||||||
|     return i; |     return i; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /* Skip id chars and return an appropriate string */ | ||||||
|  | static int stdscan_symbol(struct tokenval *tv) | ||||||
|  | { | ||||||
|  |     char *p = scan.bufptr; | ||||||
|  |     const char *r = p; | ||||||
|  |     size_t len; | ||||||
|  |  | ||||||
|  |     p++;                        /* Leading character already verified */ | ||||||
|  |  | ||||||
|  |     /* Skip the entire symbol but only copy up to IDLEN_MAX characters */ | ||||||
|  |     while (nasm_isidchar(*p)) | ||||||
|  |         p++; | ||||||
|  |  | ||||||
|  |     scan.bufptr = p; | ||||||
|  |     len = p - r; | ||||||
|  |     if (len >= IDLEN_MAX) | ||||||
|  |         len = IDLEN_MAX - 1; | ||||||
|  |  | ||||||
|  |     tv->t_len  = len; | ||||||
|  |     tv->t_charptr = stdscan_copy(r, len); | ||||||
|  |     return tv->t_type = TOKEN_ID; | ||||||
|  | } | ||||||
|  |  | ||||||
| static int stdscan_token(struct tokenval *tv) | static int stdscan_token(struct tokenval *tv) | ||||||
| { | { | ||||||
|     const char *r; |     const char *r; | ||||||
|  |  | ||||||
|     /* we have a token; either an id, a number, operator or char */ |     /* we have a token; either an id, a number, operator or char */ | ||||||
|     if (nasm_isidstart(*scan.bufptr) || |     if (nasm_isidstart(*scan.bufptr)) { | ||||||
|         (*scan.bufptr == '$' && nasm_isidstart(scan.bufptr[1]))) { |  | ||||||
|         /* now we've got an identifier */ |  | ||||||
|         bool is_sym = false; |  | ||||||
|         int token_type; |         int token_type; | ||||||
|  |  | ||||||
|         if (*scan.bufptr == '$') { |         stdscan_symbol(tv); | ||||||
|             is_sym = true; |  | ||||||
|             scan.bufptr++; |         if (tv->t_len <= MAX_KEYWORD) { | ||||||
|         } |             /* Check to see if it is a keyword of some kind */ | ||||||
|  |  | ||||||
|         r = scan.bufptr++; |             token_type = nasm_token_hash(tv->t_charptr, tv); | ||||||
|         /* read the entire buffer to advance the buffer pointer but... */ |             if (unlikely(tv->t_flag & TFLAG_WARN)) { | ||||||
|         while (nasm_isidchar(*scan.bufptr)) |                 /*! ptr [on] non-NASM keyword used in other assemblers | ||||||
|             scan.bufptr++; |                  *!  warns about keywords used in other assemblers that | ||||||
|  |                  *!  might indicate a mistake in the source code. | ||||||
|         /* ... copy only up to IDLEN_MAX-1 characters */ |                  *!  Currently only the MASM \c{PTR} keyword is | ||||||
|         tv->t_charptr = stdscan_copy(r, scan.bufptr - r < IDLEN_MAX ? |                  *!  recognized. If (limited) MASM compatibility is | ||||||
|                                      scan.bufptr - r : IDLEN_MAX - 1); |                  *!  desired, the \c{%use masm} macro package is | ||||||
|  |                  *!  available, see \k{pkg_masm}; however, carefully note | ||||||
|         if (is_sym || scan.bufptr - r > MAX_KEYWORD) |                  *!  the caveats listed. | ||||||
|             return tv->t_type = TOKEN_ID;       /* bypass all other checks */ |                  */ | ||||||
|  |                 nasm_warn(WARN_PTR, "`%s' is not a NASM keyword", | ||||||
|         token_type = nasm_token_hash(tv->t_charptr, tv); |                           tv->t_charptr); | ||||||
|         if (unlikely(tv->t_flag & TFLAG_WARN)) { |             } | ||||||
|             /*! ptr [on] non-NASM keyword used in other assemblers |  | ||||||
|              *!  warns about keywords used in other assemblers that |             if (likely(!(tv->t_flag & TFLAG_BRC))) { | ||||||
|              *!  might indicate a mistake in the source code. |                 /* most of the tokens fall into this case */ | ||||||
|              *!  Currently only the MASM \c{PTR} keyword is |                 return token_type; | ||||||
|              *!  recognized. If (limited) MASM compatibility is |             } | ||||||
|              *!  desired, the \c{%use masm} macro package is |  | ||||||
|              *!  available, see \k{pkg_masm}; however, carefully note |  | ||||||
|              *!  the caveats listed. |  | ||||||
|              */ |  | ||||||
|             nasm_warn(WARN_PTR, "`%s' is not a NASM keyword", |  | ||||||
|                        tv->t_charptr); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         if (likely(!(tv->t_flag & TFLAG_BRC))) { |  | ||||||
|             /* most of the tokens fall into this case */ |  | ||||||
|             return token_type; |  | ||||||
|         } else { |  | ||||||
|             return tv->t_type = TOKEN_ID; |  | ||||||
|         } |         } | ||||||
|  |         return tv->t_type = TOKEN_ID; | ||||||
|     } else if (*scan.bufptr == '$' && !nasm_isnumchar(scan.bufptr[1])) { |     } else if (*scan.bufptr == '$' && !nasm_isnumchar(scan.bufptr[1])) { | ||||||
|         /* |         /* | ||||||
|          * It's a $ sign with no following hex number; this must |          * It's a $ sign with no following hex number; this must | ||||||
|          * mean it's a Here token ($), evaluating to the current |          * mean it's a Here token ($), evaluating to the current | ||||||
|          * assembly location, or a Base token ($$), evaluating to |          * assembly location, a Base token ($$), evaluating to | ||||||
|          * the base of the current segment. |          * the base of the current segment, or an identifier beginning | ||||||
|  |          * with $ (escaped by a previous $). | ||||||
|          */ |          */ | ||||||
|         scan.bufptr++; |         scan.bufptr++; | ||||||
|         if (*scan.bufptr == '$') { |         if (*scan.bufptr == '$') { | ||||||
|             scan.bufptr++; |             if (nasm_isidchar(scan.bufptr[1])) { | ||||||
|             return tv->t_type = TOKEN_BASE; |                 /* $-escaped symbol starting with $ */ | ||||||
|  |                 return stdscan_symbol(tv); | ||||||
|  |             } else { | ||||||
|  |                 scan.bufptr++; | ||||||
|  |                 return tv->t_type = TOKEN_BASE; | ||||||
|  |             } | ||||||
|  |         } else if (nasm_isidstart(*scan.bufptr)) { | ||||||
|  |             /* $-escaped symbol that does NOT start with $ */ | ||||||
|  |             return stdscan_symbol(tv); | ||||||
|  |         } else { | ||||||
|  |             return tv->t_type = TOKEN_HERE; | ||||||
|         } |         } | ||||||
|         return tv->t_type = TOKEN_HERE; |  | ||||||
|     } else if (nasm_isnumstart(*scan.bufptr)) {   /* now we've got a number */ |     } else if (nasm_isnumstart(*scan.bufptr)) {   /* now we've got a number */ | ||||||
|         bool rn_error; |         bool rn_error; | ||||||
|         bool is_hex = false; |         bool is_hex = false; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user