stdscan: handle $-escaped symbols starting with $

For a symbol to start with $, it needs to be escaped with a second dollar sign: $$. This was not handled correctly, instead $$ was seen as TOKEN_BASE. Fix this. Reported-by: E. C. Masloch <pushbx@ulukai.org> Fixes: https://bugzilla.nasm.us/show_bug.cgi?id=3392922 Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
2025-10-10 00:25:06 -04:00 · 2025-09-02 17:55:22 -07:00
parent 5201aab90f
commit 9c3d6ff000
1 changed files with 64 additions and 46 deletions
--- a/asm/stdscan.c
+++ b/asm/stdscan.c
@@ -312,68 +312,86 @@ int stdscan(void *private_data, struct tokenval *tv)
    return i;
 }

+/* Skip id chars and return an appropriate string */
+static int stdscan_symbol(struct tokenval *tv)
+{
+    char *p = scan.bufptr;
+    const char *r = p;
+    size_t len;
+
+    p++;                        /* Leading character already verified */
+
+    /* Skip the entire symbol but only copy up to IDLEN_MAX characters */
+    while (nasm_isidchar(*p))
+        p++;
+
+    scan.bufptr = p;
+    len = p - r;
+    if (len >= IDLEN_MAX)
+        len = IDLEN_MAX - 1;
+
+    tv->t_len  = len;
+    tv->t_charptr = stdscan_copy(r, len);
+    return tv->t_type = TOKEN_ID;
+}
+
 static int stdscan_token(struct tokenval *tv)
 {
    const char *r;

    /* we have a token; either an id, a number, operator or char */
-    if (nasm_isidstart(*scan.bufptr) ||
-        (*scan.bufptr == '$' && nasm_isidstart(scan.bufptr[1]))) {
-        /* now we've got an identifier */
-        bool is_sym = false;
+    if (nasm_isidstart(*scan.bufptr)) {
        int token_type;

-        if (*scan.bufptr == '$') {
-            is_sym = true;
-            scan.bufptr++;
-        }
-
-        r = scan.bufptr++;
-        /* read the entire buffer to advance the buffer pointer but... */
-        while (nasm_isidchar(*scan.bufptr))
-            scan.bufptr++;
-
-        /* ... copy only up to IDLEN_MAX-1 characters */
-        tv->t_charptr = stdscan_copy(r, scan.bufptr - r < IDLEN_MAX ?
-                                     scan.bufptr - r : IDLEN_MAX - 1);
-
-        if (is_sym || scan.bufptr - r > MAX_KEYWORD)
-            return tv->t_type = TOKEN_ID;       /* bypass all other checks */
-
-        token_type = nasm_token_hash(tv->t_charptr, tv);
-        if (unlikely(tv->t_flag & TFLAG_WARN)) {
-            /*! ptr [on] non-NASM keyword used in other assemblers
-             *!  warns about keywords used in other assemblers that
-             *!  might indicate a mistake in the source code.
-             *!  Currently only the MASM \c{PTR} keyword is
-             *!  recognized. If (limited) MASM compatibility is
-             *!  desired, the \c{%use masm} macro package is
-             *!  available, see \k{pkg_masm}; however, carefully note
-             *!  the caveats listed.
-             */
-            nasm_warn(WARN_PTR, "`%s' is not a NASM keyword",
-                       tv->t_charptr);
-        }
-
-        if (likely(!(tv->t_flag & TFLAG_BRC))) {
-            /* most of the tokens fall into this case */
-            return token_type;
-        } else {
-            return tv->t_type = TOKEN_ID;
+        stdscan_symbol(tv);
+
+        if (tv->t_len <= MAX_KEYWORD) {
+            /* Check to see if it is a keyword of some kind */
+
+            token_type = nasm_token_hash(tv->t_charptr, tv);
+            if (unlikely(tv->t_flag & TFLAG_WARN)) {
+                /*! ptr [on] non-NASM keyword used in other assemblers
+                 *!  warns about keywords used in other assemblers that
+                 *!  might indicate a mistake in the source code.
+                 *!  Currently only the MASM \c{PTR} keyword is
+                 *!  recognized. If (limited) MASM compatibility is
+                 *!  desired, the \c{%use masm} macro package is
+                 *!  available, see \k{pkg_masm}; however, carefully note
+                 *!  the caveats listed.
+                 */
+                nasm_warn(WARN_PTR, "`%s' is not a NASM keyword",
+                          tv->t_charptr);
+            }
+
+            if (likely(!(tv->t_flag & TFLAG_BRC))) {
+                /* most of the tokens fall into this case */
+                return token_type;
+            }
        }
+        return tv->t_type = TOKEN_ID;
    } else if (*scan.bufptr == '$' && !nasm_isnumchar(scan.bufptr[1])) {
        /*
         * It's a $ sign with no following hex number; this must
         * mean it's a Here token ($), evaluating to the current
-         * assembly location, or a Base token ($$), evaluating to
-         * the base of the current segment.
+         * assembly location, a Base token ($$), evaluating to
+         * the base of the current segment, or an identifier beginning
+         * with $ (escaped by a previous $).
         */
        scan.bufptr++;
        if (*scan.bufptr == '$') {
-            scan.bufptr++;
-            return tv->t_type = TOKEN_BASE;
+            if (nasm_isidchar(scan.bufptr[1])) {
+                /* $-escaped symbol starting with $ */
+                return stdscan_symbol(tv);
+            } else {
+                scan.bufptr++;
+                return tv->t_type = TOKEN_BASE;
+            }
+        } else if (nasm_isidstart(*scan.bufptr)) {
+            /* $-escaped symbol that does NOT start with $ */
+            return stdscan_symbol(tv);
+        } else {
+            return tv->t_type = TOKEN_HERE;
        }
-        return tv->t_type = TOKEN_HERE;
    } else if (nasm_isnumstart(*scan.bufptr)) {   /* now we've got a number */
        bool rn_error;
        bool is_hex = false;