directives: correctly handle quoted strings in directives

Quoted strings should be valid inside directives, including ones that contain a ] character. Also check for garbage on the line after the directive. Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
2025-11-08 23:27:15 -05:00 · 2025-10-10 10:41:09 -07:00
parent 5a6b276b89
commit c0aec6969b
3 changed files with 81 additions and 37 deletions
--- a/asm/directiv.c
+++ b/asm/directiv.c
@@ -22,6 +22,7 @@
 #include "listing.h"
 #include "labels.h"
 #include "iflag.h"
+#include "quote.h"

 struct cpunames {
    const char *name;
@@ -178,34 +179,51 @@ static int get_bits(const char *value)

 static enum directive parse_directive_line(char **directive, char **value)
 {
-    char *p, *q, *buf;
+    char *p, *q, *eol, *buf;
+    char c;

    buf = nasm_skip_spaces(*directive);

    /*
     * It should be enclosed in [ ].
-     * XXX: we don't check there is nothing else on the remainder of the
-     * line, except a possible comment.
+     *
+     * Strip off the comments.  We should really strip the comments in
+     * generic code, not here.  While we're at it, it would be better
+     * to pass the backend a series of tokens instead of a raw string,
+     * and actually process quoted strings for it, like of like argv
+     * is handled in C.
     */
    if (*buf != '[')
        return D_none;
-    q = strchr(buf, ']');
-    if (!q)
-        return D_corrupt;
+
+    q = buf;
+    while ((c = *q) != ']') {
+        switch (c) {
+        case '\0':
+        case ';':
+            return D_corrupt;   /* No ] in directive */
+        case '\'':
+        case '\"':
+        case '`':
+            q = nasm_skip_string(q);
+            if (!*q++)
+                return D_corrupt;
+            break;
+        default:
+            q++;
+            break;
+        }
+    }

    /*
-     * Strip off the comments.  XXX: this doesn't account for quoted
-     * strings inside a directive.  We should really strip the
-     * comments in generic code, not here.  While we're at it, it
-     * would be better to pass the backend a series of tokens instead
-     * of a raw string, and actually process quoted strings for it,
-     * like of like argv is handled in C.
+     * Found the ] at the end of the directive. Make sure there isn't
+     * anything else at the end of the line, except a possible
+     * comment.
     */
-    p = strchr(buf, ';');
-    if (p) {
-        if (p < q) /* ouch! somewhere inside */
-            return D_corrupt;
-        *p = '\0';
+    eol = nasm_skip_spaces(q+1);
+    if (*eol != '\0' && *eol != ';') {
+        nasm_warn(WARN_DIRECTIVE_GARBAGE_EOL,
+                  "garbage found on line after directive");
    }

    /* no brace, no trailing spaces */
@@ -264,7 +282,7 @@ bool process_directives(char *directive)

    switch (d) {
    case D_none:
-        return D_none;      /* Not a directive */
+        return false;

    case D_corrupt:
 	nasm_nonfatal("invalid directive line");
@@ -285,6 +303,12 @@ bool process_directives(char *directive)
            default:
                panic();
            }
+        } else if (d < D_pseudo_ops) {
+            nasm_nonfatal("internal error: unimplemented directive [%s]",
+                          directive);
+            break;
+        } else {
+            goto unknown;
        }
        break;

@@ -617,11 +641,10 @@ bool process_directives(char *directive)
        break;
    }

-
    /* A common error message */
    if (bad_param) {
        nasm_nonfatal("invalid parameter to [%s] directive", directive);
    }

-    return d != D_none;
+    return true;
 }
--- a/asm/directiv.dat
+++ b/asm/directiv.dat
@@ -17,6 +17,12 @@
 ;; In the future, this will be turned into a general list of keywords
 ;; to be parsed in special contexts.
 ;;
+;; #special tokens are used to separate different classes of tokens.
+;; Make sure to add new tokens to the correct place. If a token is used
+;; in both a directive and in pragmas, they should be in the directive
+;; part of the list. Tokens used in pragmas *only* go at the end of the
+;; list.
+;;

 ; --- General configuration
 #name directive
@@ -25,10 +31,15 @@
 #header directiv.h

 ; --- Special enum values
+;     These must be first in the list.
 #special none = 0			; Must be zero
 #special unknown
 #special corrupt

+; --- True directives
+;     These tokens are accepted as directives by the global code or
+;     ignored if not implemented.
+
 ; --- Global directives
 absolute
 bits
@@ -40,16 +51,29 @@ dollarhex
 extern
 float
 global
-static
 list
-section
-segment
-warning
-sectalign
 pragma
 required
+sectalign
+section
+segment
+static
+warning
+
+; --- Common output directives/pragmas
+prefix
+suffix
+postfix
+gprefix
+gsuffix
+gpostfix
+lprefix
+lsuffix
+lpostfix

 ; --- Pseudo-op list, for the benefit of %isdirective
+;     Tokens put in this part will be rejected as either directives or
+;     pragmas
 #special pseudo_ops
 db
 dw
@@ -71,6 +95,8 @@ incbin
 equ

 ; --- Format-specific directives
+;     Tokens put in this part of the list will be forwarded to the
+;     backend ofmt->directive() method.
 #special ofmt
 export				; outcoff, outobj
 group				; outobj
@@ -83,7 +109,8 @@ osabi				; outelf
 safeseh				; outcoff
 uppercase			; outieee, outobj

-; --- The following are tokens used in pragmas, not actual directives
+; --- The following are tokens used in pragmas, not actual directives.
+;     They will not be accepted as directives.
 #special pragma_tokens

 ; --- Assembler pragmas
@@ -92,17 +119,6 @@ limit
 ; --- Listing pragmas
 options

-; --- Common output pragmas
-prefix
-suffix
-postfix
-gprefix
-gsuffix
-gpostfix
-lprefix
-lsuffix
-lpostfix
-
 ; --- Backend-specific pragmas
 subsections_via_symbols		; macho
 no_dead_strip			; macho
--- a/asm/warnings.dat
+++ b/asm/warnings.dat
@@ -9,6 +9,11 @@ db-empty [on] no operand for data declaration
    This is permitted, but often indicative of an error.
    See \k{db}.

+directive-garbage-eol [err] garbage after directive
+    Text was found after a directive. This is a warning so it can be
+    suppressed, because previous versions of NASM did not check for
+    this condition.
+
 ea-absolute [on] absolute address cannot be RIP-relative
    Warns that an address that is inherently absolute cannot
    be generated with RIP-relative encoding using \c{REL},