From 88477764f37a8462c7c01f2b235ef4efd08c765f Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 30 Dec 2018 07:54:48 -0800 Subject: [PATCH 1/9] ELF: add support for the ELF "merge" attribute Add support for the "merge" attribute in ELF, along with the associated "strings" and size specifier attributes. Fix a few places where we used "int", but a larger type really ought to have been used. Be a bit more lax about respecifying attributes. For example, align= can be respecified; the highest resulting value is used. Signed-off-by: H. Peter Anvin --- doc/changes.src | 3 + doc/nasmdoc.src | 25 +++++++- output/outelf.c | 147 +++++++++++++++++++++++++++++++++++++----------- output/outelf.h | 1 + 4 files changed, 142 insertions(+), 34 deletions(-) diff --git a/doc/changes.src b/doc/changes.src index a4df0473..6fd19943 100644 --- a/doc/changes.src +++ b/doc/changes.src @@ -12,6 +12,9 @@ since 2007. \b Suppress nuisance "\c{label changed during code generation}" messages after a real error. +\b Add support for the \c{merge} and \c{strings} attributes on ELF +sections. See \k{elfsect}. + \S{cl-2.14.02} Version 2.14.02 \b Fix crash due to multiple errors or warnings during the code diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src index ea6f10f2..bcfcad90 100644 --- a/doc/nasmdoc.src +++ b/doc/nasmdoc.src @@ -256,9 +256,12 @@ Object File Format \IA{sectalign}{sectalign} \IR{solaris x86} Solaris x86 \IA{standard section names}{standardized section names} +\IR{strings, elf attribute} \c{strings} \IR{symbols, exporting from dlls} symbols, exporting from DLLs \IR{symbols, importing from dlls} symbols, importing from DLLs \IR{test subdirectory} \c{test} subdirectory +\IR{thread local storage in elf} thread local storage, in \c{elf} +\IR{thread local storage in mach-o} thread local storage, in \c{macho} \IR{tlink} \c{TLINK} \IR{underscore, in c symbols} underscore, in C symbols \IR{unicode} Unicode @@ -5951,6 +5954,26 @@ contents given, such as a BSS section. \I{section alignment, in elf}\I{alignment, in elf sections}alignment requirements of the section. +\b \c{ent=} or \c{entsize=} specifies the fundamental data item size +for a section which contains either fixed-sized data structures or +strings; this is generally used with the \c{merge} attribute (see +below.) + +\b \c{byte}, \c{word}, \c{dword}, \c{qword}, \c{tword}, \c{oword}, +\c{yword}, or \c{zword} are both shorthand for \c{entsize=}, but also +sets the default alignment. + +\b \i{strings, ELF attribute}\c{strings} indicate that this section +contains exclusively null-terminated strings. By default these are +assumed to be byte strings, but a size specifier can be used to +override that. + +\b \i\c{merge} indicates that duplicate data elements in this section +should be merged with data elements from other object files. Data +elements can be either fixed-sized objects or null-terminatedstrings +(with the \c{strings} attribute.) A size specifier is required unless +\c{strings} is specified, in which case the size defaults to \c{byte}. + \b \i\c{tls} defines the section to be one which contains thread local variables. @@ -8213,7 +8236,7 @@ then the correct first instruction in the code section will not be seen because the starting point skipped over it. This isn't really ideal. -To avoid this, you can specify a `\i\c{synchronisation}' point, or indeed +To avoid this, you can specify a `\i{synchronisation}' point, or indeed as many synchronisation points as you like (although NDISASM can only handle 2147483647 sync points internally). The definition of a sync point is this: NDISASM guarantees to hit sync points exactly during diff --git a/output/outelf.c b/output/outelf.c index de99d076..c0d19e8b 100644 --- a/output/outelf.c +++ b/output/outelf.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * - * - * Copyright 1996-2017 The NASM Authors - All Rights Reserved + * + * Copyright 1996-2018 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. * @@ -14,7 +14,7 @@ * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. - * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF @@ -97,8 +97,10 @@ static int64_t elf_foffs; static void elf_write(void); static void elf_sect_write(struct elf_section *, const void *, size_t); static void elf_sect_writeaddr(struct elf_section *, int64_t, size_t); -static void elf_section_header(int, int, uint64_t, void *, bool, uint64_t, int, int, - int, int); +static void elf_section_header(int name, int type, uint64_t flags, + void *data, bool is_saa, uint64_t datalen, + int link, int info, + uint64_t align, uint64_t entsize); static void elf_write_sections(void); static struct SAA *elf_build_symtab(int32_t *, int32_t *); static struct SAA *elf_build_reltab(uint64_t *, struct elf_reloc *); @@ -211,9 +213,11 @@ const struct elf_known_section elf_known_sections[] = { /* parse section attributes */ static void elf_section_attrib(char *name, char *attr, int pass, uint32_t *flags_and, uint32_t *flags_or, - uint64_t *align, int *type) + uint64_t *alignp, uint64_t *entsize, int *type) { char *opt, *val, *next; + uint64_t align = 0; + uint64_t xalign = 0; opt = nasm_skip_spaces(attr); if (!opt || !*opt) @@ -225,14 +229,14 @@ static void elf_section_attrib(char *name, char *attr, int pass, nasm_error(ERR_NONFATAL, "section align without value specified"); } else { - *align = atoi(val); - if (*align == 0) { - *align = SHA_ANY; - } else if (!is_power2(*align)) { + bool err; + uint64_t a = readnum(val, &err); + if (a && !is_power2(a)) { nasm_error(ERR_NONFATAL, "section alignment %"PRId64" is not a power of two", - *align); - *align = SHA_ANY; + a); + } else if (a > align) { + align = a; } } } else if (!nasm_stricmp(opt, "alloc")) { @@ -250,16 +254,64 @@ static void elf_section_attrib(char *name, char *attr, int pass, } else if (!nasm_stricmp(opt, "write")) { *flags_and |= SHF_WRITE; *flags_or |= SHF_WRITE; - } else if (!nasm_stricmp(opt, "tls")) { - *flags_and |= SHF_TLS; - *flags_or |= SHF_TLS; } else if (!nasm_stricmp(opt, "nowrite")) { *flags_and |= SHF_WRITE; *flags_or &= ~SHF_WRITE; + } else if (!nasm_stricmp(opt, "tls")) { + *flags_and |= SHF_TLS; + *flags_or |= SHF_TLS; + } else if (!nasm_stricmp(opt, "notls")) { + *flags_and |= SHF_TLS; + *flags_or &= ~SHF_TLS; + } else if (!nasm_stricmp(opt, "merge")) { + *flags_and |= SHF_MERGE; + *flags_or |= SHF_MERGE; + } else if (!nasm_stricmp(opt, "nomerge")) { + *flags_and |= SHF_MERGE; + *flags_or &= ~SHF_MERGE; + } else if (!nasm_stricmp(opt, "strings")) { + *flags_and |= SHF_STRINGS; + *flags_or |= SHF_STRINGS; + } else if (!nasm_stricmp(opt, "nostrings")) { + *flags_and |= SHF_STRINGS; + *flags_or &= ~SHF_STRINGS; } else if (!nasm_stricmp(opt, "progbits")) { *type = SHT_PROGBITS; } else if (!nasm_stricmp(opt, "nobits")) { *type = SHT_NOBITS; + } else if (!nasm_stricmp(opt, "ent") || !nasm_stricmp(opt,"entsize")) { + bool err; + uint64_t es; + if (!val) { + nasm_error(ERR_NONFATAL, + "section attribute %s without value specified", opt); + } else { + es = readnum(val, &err); + if (err) { + nasm_error(ERR_NONFATAL, + "invalid value %s for section attribute %s", + val, opt); + } else { + *entsize = es; + } + } + } else if (!nasm_stricmp(opt, "byte")) { + xalign = *entsize = 1; + } else if (!nasm_stricmp(opt, "word")) { + xalign = *entsize = 2; + } else if (!nasm_stricmp(opt, "dword")) { + xalign = *entsize = 4; + } else if (!nasm_stricmp(opt, "qword")) { + xalign = *entsize = 8; + } else if (!nasm_stricmp(opt, "tword")) { + *entsize = 10; + xalign = 2; + } else if (!nasm_stricmp(opt, "oword")) { + xalign = *entsize = 16; + } else if (!nasm_stricmp(opt, "yword")) { + xalign = *entsize = 32; + } else if (!nasm_stricmp(opt, "zword")) { + xalign = *entsize = 64; } else if (pass == 1) { nasm_error(ERR_WARNING, "Unknown section attribute '%s' ignored on" @@ -267,6 +319,14 @@ static void elf_section_attrib(char *name, char *attr, int pass, } opt = next; } + + if (!align) + align = xalign; + + if (!align) + align = SHA_ANY; + + *alignp = align; } static enum directive_result @@ -389,7 +449,7 @@ static void add_sectname(const char *firsthalf, const char *secondhalf) shstrtablen += len + 1; } -static int elf_make_section(char *name, int type, int flags, int align) +static int elf_make_section(char *name, int type, int flags, uint64_t align) { struct elf_section *s; @@ -420,7 +480,8 @@ static int32_t elf_section_names(char *name, int pass, int *bits) { char *p; uint32_t flags, flags_and, flags_or; - uint64_t align; + uint64_t align, entsize; + struct elf_section *s; int type, i; if (!name) { @@ -431,10 +492,10 @@ static int32_t elf_section_names(char *name, int pass, int *bits) p = nasm_skip_word(name); if (*p) *p++ = '\0'; - flags_and = flags_or = type = align = 0; + flags_and = flags_or = type = align = entsize = 0; elf_section_attrib(name, p, pass, &flags_and, - &flags_or, &align, &type); + &flags_or, &align, &entsize, &type); if (!strcmp(name, ".shstrtab") || !strcmp(name, ".symtab") || @@ -461,15 +522,34 @@ static int32_t elf_section_names(char *name, int pass, int *bits) flags = (ks->flags & ~flags_and) | flags_or; i = elf_make_section(name, type, flags, align); - } else if (pass == 1) { - if ((type && sects[i]->type != type) - || (align && sects[i]->align != align) - || (flags_and && ((sects[i]->flags & flags_and) != flags_or))) - nasm_error(ERR_WARNING, "incompatible section attributes ignored on" - " redeclaration of section `%s'", name); } - return sects[i]->index; + s = sects[i]; + + if (pass == 1) { + if ((type && s->type != type) + || ((s->flags & flags_and) != flags_or) + || (entsize && s->entsize && entsize != s->entsize)) { + nasm_error(ERR_WARNING, + "incompatible section attributes ignored on" + " redeclaration of section `%s'", name); + } + } + + if (align > s->align) + s->align = align; + + if (entsize && !s->entsize) + s->entsize = entsize; + + if (pass == 2 && (flags_or & SHF_MERGE) && s->entsize == 0) { + if (!(s->flags & SHF_STRINGS)) + nasm_error(ERR_NONFATAL, + "section attribute merge specified without an entry size"); + s->entsize = 1; + } + + return s->index; } static void elf_deflabel(char *name, int32_t segment, int64_t offset, @@ -868,7 +948,7 @@ static void elf32_out(int32_t segto, const void *data, " segment base references"); } else { if (wrt == NO_SEG) { - /* + /* * The if() is a hack to deal with compilers which * don't handle switch() statements with 64-bit * expressions. @@ -1693,9 +1773,9 @@ static void elf_write(void) /* The normal sections */ for (i = 0; i < nsects; i++) { elf_section_header(p - shstrtab, sects[i]->type, sects[i]->flags, - (sects[i]->type == SHT_PROGBITS ? - sects[i]->data : NULL), true, - sects[i]->len, 0, 0, sects[i]->align, 0); + sects[i]->data, true, + sects[i]->len, 0, 0, + sects[i]->align, sects[i]->entsize); p += strlen(p) + 1; } @@ -2129,7 +2209,8 @@ static struct SAA *elf_build_reltab(uint64_t *len, struct elf_reloc *r) static void elf_section_header(int name, int type, uint64_t flags, void *data, bool is_saa, uint64_t datalen, - int link, int info, int align, int eltsize) + int link, int info, + uint64_t align, uint64_t entsize) { union { Elf32_Shdr shdr32; @@ -2153,7 +2234,7 @@ static void elf_section_header(int name, int type, uint64_t flags, shdr.shdr32.sh_link = cpu_to_le32(link); shdr.shdr32.sh_info = cpu_to_le32(info); shdr.shdr32.sh_addralign = cpu_to_le32(align); - shdr.shdr32.sh_entsize = cpu_to_le32(eltsize); + shdr.shdr32.sh_entsize = cpu_to_le32(entsize); } else { nasm_assert(is_elf64()); @@ -2168,7 +2249,7 @@ static void elf_section_header(int name, int type, uint64_t flags, shdr.shdr64.sh_link = cpu_to_le32(link); shdr.shdr64.sh_info = cpu_to_le32(info); shdr.shdr64.sh_addralign = cpu_to_le64(align); - shdr.shdr64.sh_entsize = cpu_to_le64(eltsize); + shdr.shdr64.sh_entsize = cpu_to_le64(entsize); } nasm_write(&shdr, is_elf64() ? sizeof(shdr.shdr64) : sizeof(shdr.shdr32), ofile); diff --git a/output/outelf.h b/output/outelf.h index 8eef73ae..59d8d929 100644 --- a/output/outelf.h +++ b/output/outelf.h @@ -141,6 +141,7 @@ struct elf_section { int type; /* SHT_PROGBITS or SHT_NOBITS */ uint64_t align; /* alignment: power of two */ uint64_t flags; /* section flags */ + uint64_t entsize; /* entry size */ char *name; struct SAA *rel; uint64_t rellen; From 52266ad42490f48b91a70efb5c2f93ea281eeb60 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 30 Dec 2018 07:56:59 -0800 Subject: [PATCH 2/9] NASM 2.14.03rc2 --- version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version b/version index 0c7ba9ad..7936fece 100644 --- a/version +++ b/version @@ -1 +1 @@ -2.14.03rc1 +2.14.03rc2 From 69fa3c2e8e09c1cda8334bec1a7d022cdf877383 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sat, 12 Jan 2019 00:35:20 -0800 Subject: [PATCH 3/9] outelf: hash sections for performance Use a hash table to look up sections by name, and an RAA to look up sections by index; thus remove O(n) searches. This becomes important since ELF uses sections for dead code elimination. Signed-off-by: H. Peter Anvin --- output/outelf.c | 75 ++++++++++++++++++++++++++++++----------------- output/outelf.h | 3 +- test/manysecs.asm | 6 ++++ 3 files changed, 56 insertions(+), 28 deletions(-) create mode 100644 test/manysecs.asm diff --git a/output/outelf.c b/output/outelf.c index c0d19e8b..bd5a3e6d 100644 --- a/output/outelf.c +++ b/output/outelf.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * * - * Copyright 1996-2018 The NASM Authors - All Rights Reserved + * Copyright 1996-2019 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. * @@ -50,6 +50,7 @@ #include "outform.h" #include "outlib.h" #include "rbtree.h" +#include "hashtbl.h" #include "ver.h" #include "dwarf.h" @@ -77,6 +78,9 @@ static struct RAA *bsym; static struct SAA *strs; static uint32_t strslen; +static struct RAAPTR *section_by_index; +static struct hash_table section_by_name; + static struct elf_symbol *fwds; static char elf_module[FILENAME_MAX]; @@ -375,6 +379,11 @@ elf_directive(enum directive directive, char *value, int pass) static void elf_init(void) { + static const char * const reserved_sections[] = { + ".shstrtab", ".strtab", ".symtab", ".symtab_shndx", NULL + }; + const char * const *p; + strlcpy(elf_module, inname, sizeof(elf_module)); sects = NULL; nsects = sectlen = 0; @@ -391,11 +400,23 @@ static void elf_init(void) fwds = NULL; + hash_init(§ion_by_name, HASH_MEDIUM); + section_by_index = raa_init_ptr(); + + /* + * Add reserved section names to the section hash, with NULL + * as the data pointer + */ + for (p = reserved_sections; *p; p++) { + struct hash_insert hi; + hash_find(§ion_by_name, *p, &hi); + hash_add(&hi, *p, NULL); + } + /* * FIXME: tlsie is Elf32 only and * gottpoff is Elfx32|64 only. */ - elf_gotpc_sect = seg_alloc(); backend_label("..gotpc", elf_gotpc_sect + 1, 0L); elf_gotoff_sect = seg_alloc(); @@ -431,6 +452,8 @@ static void elf_cleanup(void) nasm_free(r); } } + hash_free(§ion_by_name); + raa_free_ptr(section_by_index); nasm_free(sects); saa_free(syms); raa_free(bsym); @@ -449,7 +472,8 @@ static void add_sectname(const char *firsthalf, const char *secondhalf) shstrtablen += len + 1; } -static int elf_make_section(char *name, int type, int flags, uint64_t align) +static struct elf_section * +elf_make_section(char *name, int type, int flags, uint64_t align) { struct elf_section *s; @@ -468,12 +492,13 @@ static int elf_make_section(char *name, int type, int flags, uint64_t align) s->type = type; s->flags = flags; s->align = align; + s->shndx = nsects + 1; if (nsects >= sectlen) sects = nasm_realloc(sects, (sectlen += SECT_DELTA) * sizeof(*sects)); sects[nsects++] = s; - return nsects - 1; + return s; } static int32_t elf_section_names(char *name, int pass, int *bits) @@ -481,8 +506,10 @@ static int32_t elf_section_names(char *name, int pass, int *bits) char *p; uint32_t flags, flags_and, flags_or; uint64_t align, entsize; + void **hp; struct elf_section *s; - int type, i; + struct hash_insert hi; + int type; if (!name) { *bits = ofmt->maxbits; @@ -497,18 +524,15 @@ static int32_t elf_section_names(char *name, int pass, int *bits) elf_section_attrib(name, p, pass, &flags_and, &flags_or, &align, &entsize, &type); - if (!strcmp(name, ".shstrtab") || - !strcmp(name, ".symtab") || - !strcmp(name, ".strtab")) { - nasm_error(ERR_NONFATAL, "attempt to redefine reserved section" - "name `%s'", name); - return NO_SEG; - } - - for (i = 0; i < nsects; i++) - if (!strcmp(name, sects[i]->name)) - break; - if (i == nsects) { + hp = hash_find(§ion_by_name, name, &hi); + if (hp) { + s = *hp; + if (!s) { + nasm_error(ERR_NONFATAL, "attempt to redefine reserved section" + "name `%s'", name); + return NO_SEG; + } + } else { const struct elf_known_section *ks = elf_known_sections; while (ks->name) { @@ -521,11 +545,11 @@ static int32_t elf_section_names(char *name, int pass, int *bits) align = align ? align : ks->align; flags = (ks->flags & ~flags_and) | flags_or; - i = elf_make_section(name, type, flags, align); + s = elf_make_section(name, type, flags, align); + hash_add(&hi, s->name, s); + section_by_index = raa_write_ptr(section_by_index, s->index >> 1, s); } - s = sects[i]; - if (pass == 1) { if ((type && s->type != type) || ((s->flags & flags_and) != flags_or) @@ -628,7 +652,7 @@ static void elf_deflabel(char *name, int32_t segment, int64_t offset, if (segment == NO_SEG) sym->section = SHN_ABS; else { - int i; + const struct elf_section *s; sym->section = SHN_UNDEF; if (segment == def_seg) { /* we have to be sure at least text section is there */ @@ -636,12 +660,9 @@ static void elf_deflabel(char *name, int32_t segment, int64_t offset, if (segment != elf_section_names(".text", 2, &tempint)) nasm_panic(0, "strange segment conditions in ELF driver"); } - for (i = 0; i < nsects; i++) { - if (segment == sects[i]->index) { - sym->section = i + 1; - break; - } - } + s = raa_read_ptr(section_by_index, segment >> 1); + if (s) + sym->section = s->shndx; } if (is_global == 2) { diff --git a/output/outelf.h b/output/outelf.h index 59d8d929..3c4a40c0 100644 --- a/output/outelf.h +++ b/output/outelf.h @@ -137,7 +137,8 @@ struct elf_section { uint64_t len; uint64_t size; uint64_t nrelocs; - int32_t index; + int32_t index; /* NASM index */ + int shndx; /* ELF index */ int type; /* SHT_PROGBITS or SHT_NOBITS */ uint64_t align; /* alignment: power of two */ uint64_t flags; /* section flags */ diff --git a/test/manysecs.asm b/test/manysecs.asm new file mode 100644 index 00000000..c65c6091 --- /dev/null +++ b/test/manysecs.asm @@ -0,0 +1,6 @@ +%assign n 0 +%rep 10000 + section .text %+ n progbits exec + nop +%assign n n+1 +%endrep From efee3ea312358c16190ca5923c495ad8dc9a2547 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 25 Feb 2019 21:00:43 -0800 Subject: [PATCH 4/9] configure.ac: add --enable-profiling option Add option to enable profiling without having to modify Makefile. Signed-off-by: H. Peter Anvin --- configure.ac | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/configure.ac b/configure.ac index 38b3b596..ad724797 100644 --- a/configure.ac +++ b/configure.ac @@ -43,6 +43,11 @@ AS_IF([test x"$pa_init_cflags" = x], AS_IF([test x"$pa_optimize" = "x-O0"], [PA_ADD_CFLAGS([-fno-omit-frame-pointer])]) +dnl Profiling +PA_ARG_ENABLED([profiling], + [compile with profiling (-pg option)], + [PA_ADD_CFLAGS([-pg])]) + dnl Abort on panic PA_ARG_ENABLED([panic-abort], [call abort() on panic to trap in the debugger], From 437e0ffa01505d173a8b9cfe2decf74f2e9795a5 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 25 Feb 2019 21:02:18 -0800 Subject: [PATCH 5/9] SAA: allow seeking beyond the end of the array If putting the file pointer past the end of the array, expand the array with zeroes. Signed-off-by: H. Peter Anvin --- nasmlib/saa.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/nasmlib/saa.c b/nasmlib/saa.c index fe7741a4..dcc2c019 100644 --- a/nasmlib/saa.c +++ b/nasmlib/saa.c @@ -1,5 +1,5 @@ /* ----------------------------------------------------------------------- * - * + * * Copyright 1996-2017 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. @@ -14,7 +14,7 @@ * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. - * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF @@ -262,9 +262,12 @@ void saa_fread(struct SAA *s, size_t posn, void *data, size_t len) void saa_fwrite(struct SAA *s, size_t posn, const void *data, size_t len) { size_t ix; + size_t padding = 0; - /* Seek beyond the end of the existing array not supported */ - nasm_assert(posn <= s->datalen); + if (posn > s->datalen) { + padding = posn - s->datalen; + posn = s->datalen; + } if (likely(s->blk_len == SAA_BLKLEN)) { ix = posn >> SAA_BLKSHIFT; @@ -281,6 +284,9 @@ void saa_fwrite(struct SAA *s, size_t posn, const void *data, size_t len) s->wblk--; } + if (padding) + saa_wbytes(s, NULL, padding); + saa_wbytes(s, data, len); } From b2004511dddeefd7c0866a33ceaa5fa1a6ee0510 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 26 Feb 2019 00:02:35 -0800 Subject: [PATCH 6/9] ELF: handle more than 32,633 sections Dead code elimination in ELF uses separate ELF sections for every functions or data items that may be garbage collected. This can end up being more than 32,633 sections which, when the ELF internal and relocation sections are added in, can exceed the legacy ELF maximum of 65,279 sections. Newer versions of the ELF specification has added support for much larger number of sections by putting a place holder value (usually SHN_XINDEX == 0xffff, but 0 in some cases) into fields where the section index is a 16-bit value, and storing the full value in a diffent place: the program header uses entries in section header 0, the symbol table uses an auxiliary segment with the additional indicies; the section header did not need it as the sh_link field is already 32 (or 64) bits long. Signed-off-by: H. Peter Anvin --- doc/changes.src | 2 + output/elf.h | 19 +- output/outelf.c | 1064 ++++++++++++++++++++++----------------------- output/outelf.h | 43 +- test/fewsecs.asm | 2 + test/manysecs.asm | 13 +- test/moresecs.asm | 3 + test/mostsecs.asm | 3 + 8 files changed, 583 insertions(+), 566 deletions(-) create mode 100644 test/fewsecs.asm create mode 100644 test/moresecs.asm create mode 100644 test/mostsecs.asm diff --git a/doc/changes.src b/doc/changes.src index 6fd19943..1e67bec5 100644 --- a/doc/changes.src +++ b/doc/changes.src @@ -15,6 +15,8 @@ after a real error. \b Add support for the \c{merge} and \c{strings} attributes on ELF sections. See \k{elfsect}. +\b Handle more than 32,633 sections in ELF. + \S{cl-2.14.02} Version 2.14.02 \b Fix crash due to multiple errors or warnings during the code diff --git a/output/elf.h b/output/elf.h index 32f5b47a..72b43073 100644 --- a/output/elf.h +++ b/output/elf.h @@ -160,7 +160,11 @@ #define SHT_REL 9 #define SHT_SHLIB 10 #define SHT_DYNSYM 11 -#define SHT_NUM 12 +#define SHT_INIT_ARRAY 14 +#define SHT_FINI_ARRAY 15 +#define SHT_PREINIT_ARRAY 16 +#define SHT_GROUP 17 +#define SHT_SYMTAB_SHNDX 18 #define SHT_LOPROC 0x70000000 #define SHT_HIPROC 0x7fffffff #define SHT_LOUSER 0x80000000 @@ -179,14 +183,25 @@ #define SHF_TLS (1 << 10) /* Section hold thread-local data. */ /* Special section numbers */ -#define SHN_UNDEF 0 +#define SHN_UNDEF 0x0000 #define SHN_LORESERVE 0xff00 #define SHN_LOPROC 0xff00 #define SHN_HIPROC 0xff1f #define SHN_ABS 0xfff1 #define SHN_COMMON 0xfff2 +#define SHN_XINDEX 0xffff #define SHN_HIRESERVE 0xffff +/* Same, but signed/sign-extended */ +#define XSHN_UNDEF ((int16_t)SHN_UNDEF) +#define XSHN_LORESERVE ((int16_t)SHN_LORESERVE) +#define XSHN_LOPROC ((int16_t)SHN_LOPROC) +#define XSHN_HIPROC ((int16_t)SHN_HIPROC) +#define XSHN_ABS ((int16_t)SHN_ABS) +#define XSHN_COMMON ((int16_t)SHN_COMMON) +#define XSHN_XINDEX ((int16_t)SHN_XINDEX) +#define XSHN_HIRESERVE ((int16_t)SHN_HIRESERVE) + /* Section align flag */ #define SHA_ANY 1 /* No alignment constraint */ diff --git a/output/outelf.c b/output/outelf.c index bd5a3e6d..4db7fbde 100644 --- a/output/outelf.c +++ b/output/outelf.c @@ -75,6 +75,8 @@ static int32_t def_seg; static struct RAA *bsym; +static struct SAA *symtab, *symtab_shndx; + static struct SAA *strs; static uint32_t strslen; @@ -106,9 +108,11 @@ static void elf_section_header(int name, int type, uint64_t flags, int link, int info, uint64_t align, uint64_t entsize); static void elf_write_sections(void); -static struct SAA *elf_build_symtab(int32_t *, int32_t *); -static struct SAA *elf_build_reltab(uint64_t *, struct elf_reloc *); -static void add_sectname(const char *, const char *); +static size_t elf_build_symtab(void); +static int add_sectname(const char *, const char *); + +/* First debugging section index */ +static int sec_debug; struct erel { int offset; @@ -182,9 +186,33 @@ static void dwarf_cleanup(void); static void dwarf_findfile(const char *); static void dwarf_findsect(const int); -static bool is_elf64(void); -static bool is_elf32(void); -static bool is_elfx32(void); +struct elf_format_info { + size_t word; /* Word size (4 or 8) */ + size_t ehdr_size; /* Size of the ELF header */ + size_t shdr_size; /* Size of a section header */ + size_t sym_size; /* Size of a symbol */ + size_t rel_size; /* Size of a reltype relocation */ + size_t rela_size; /* Size of a RELA relocation */ + char relpfx[8]; /* Relocation section prefix */ + uint32_t reltype; /* Relocation section type */ + uint16_t e_machine; /* Header e_machine field */ + uint8_t ei_class; /* ELFCLASS32 or ELFCLASS64 */ + bool elf64; /* 64-bit ELF */ + + /* Write a symbol */ + void (*elf_sym)(const struct elf_symbol *); + + /* Build a relocation table */ + struct SAA *(*elf_build_reltab)(const struct elf_reloc *); +}; +static const struct elf_format_info *efmt; + +static void elf32_sym(const struct elf_symbol *sym); +static void elf64_sym(const struct elf_symbol *sym); + +static struct SAA *elf32_build_reltab(const struct elf_reloc *r); +static struct SAA *elfx32_build_reltab(const struct elf_reloc *r); +static struct SAA *elf64_build_reltab(const struct elf_reloc *r); static bool dfmt_is_stabs(void); static bool dfmt_is_dwarf(void); @@ -377,6 +405,74 @@ elf_directive(enum directive directive, char *value, int pass) } } +static void elf_init(void); + +static void elf32_init(void) +{ + static const struct elf_format_info ef_elf32 = { + 4, + sizeof(Elf32_Ehdr), + sizeof(Elf32_Shdr), + sizeof(Elf32_Sym), + sizeof(Elf32_Rel), + sizeof(Elf32_Rela), + ".rel", + SHT_REL, + EM_386, + ELFCLASS32, + false, + + elf32_sym, + elf32_build_reltab + }; + efmt = &ef_elf32; + elf_init(); +} + +static void elfx32_init(void) +{ + static const struct elf_format_info ef_elfx32 = { + 4, + sizeof(Elf32_Ehdr), + sizeof(Elf32_Shdr), + sizeof(Elf32_Sym), + sizeof(Elf32_Rela), + sizeof(Elf32_Rela), + ".rela", + SHT_RELA, + EM_X86_64, + ELFCLASS32, + false, + + elf32_sym, + elfx32_build_reltab + }; + efmt = &ef_elfx32; + elf_init(); +} + +static void elf64_init(void) +{ + static const struct elf_format_info ef_elf64 = { + 8, + sizeof(Elf64_Ehdr), + sizeof(Elf64_Shdr), + sizeof(Elf64_Sym), + sizeof(Elf64_Rela), + sizeof(Elf64_Rela), + ".rela", + SHT_RELA, + EM_X86_64, + ELFCLASS64, + true, + + elf64_sym, + elf64_build_reltab + }; + efmt = &ef_elf64; + elf_init(); +} + static void elf_init(void) { static const char * const reserved_sections[] = { @@ -396,7 +492,7 @@ static void elf_init(void) strslen = 2 + strlen(elf_module); shstrtab = NULL; shstrtablen = shstrtabsize = 0;; - add_sectname("", ""); + add_sectname("", ""); /* SHN_UNDEF */ fwds = NULL; @@ -444,7 +540,7 @@ static void elf_cleanup(void) for (i = 0; i < nsects; i++) { if (sects[i]->type != SHT_NOBITS) saa_free(sects[i]->data); - if (sects[i]->head) + if (sects[i]->rel) saa_free(sects[i]->rel); while (sects[i]->head) { r = sects[i]->head; @@ -461,15 +557,27 @@ static void elf_cleanup(void) dfmt->cleanup(); } -/* add entry to the elf .shstrtab section */ -static void add_sectname(const char *firsthalf, const char *secondhalf) +/* + * Add entry to the elf .shstrtab section and increment nsections. + * Returns the section index for this new section. + * + * IMPORTANT: this needs to match the order the section headers are + * emitted. + */ +static int add_sectname(const char *firsthalf, const char *secondhalf) { - int len = strlen(firsthalf) + strlen(secondhalf); - while (shstrtablen + len + 1 > shstrtabsize) + int l1 = strlen(firsthalf); + int l2 = strlen(secondhalf); + + while (shstrtablen + l1 + l2 + 1 > shstrtabsize) shstrtab = nasm_realloc(shstrtab, (shstrtabsize += SHSTR_DELTA)); - strcpy(shstrtab + shstrtablen, firsthalf); - strcat(shstrtab + shstrtablen, secondhalf); - shstrtablen += len + 1; + + memcpy(shstrtab + shstrtablen, firsthalf, l1); + shstrtablen += l1; + memcpy(shstrtab + shstrtablen, secondhalf, l2+1); + shstrtablen += l2 + 1; + + return nsections++; } static struct elf_section * @@ -486,13 +594,12 @@ elf_make_section(char *name, int type, int flags, uint64_t align) s->index = def_seg; else s->index = seg_alloc(); - add_sectname("", name); s->name = nasm_strdup(name); s->type = type; s->flags = flags; s->align = align; - s->shndx = nsects + 1; + s->shndx = add_sectname("", name); if (nsects >= sectlen) sects = nasm_realloc(sects, (sectlen += SECT_DELTA) * sizeof(*sects)); @@ -650,10 +757,10 @@ static void elf_deflabel(char *name, int32_t segment, int64_t offset, sym->other = STV_DEFAULT; sym->size = 0; if (segment == NO_SEG) - sym->section = SHN_ABS; + sym->section = XSHN_ABS; else { const struct elf_section *s; - sym->section = SHN_UNDEF; + sym->section = XSHN_UNDEF; if (segment == def_seg) { /* we have to be sure at least text section is there */ int tempint; @@ -668,7 +775,7 @@ static void elf_deflabel(char *name, int32_t segment, int64_t offset, if (is_global == 2) { sym->size = offset; sym->symv.key = 0; - sym->section = SHN_COMMON; + sym->section = XSHN_COMMON; /* * We have a common variable. Check the special text to see * if it's a valid number and power of two; if so, store it @@ -686,7 +793,7 @@ static void elf_deflabel(char *name, int32_t segment, int64_t offset, } special_used = true; } else - sym->symv.key = (sym->section == SHN_UNDEF ? 0 : offset); + sym->symv.key = (sym->section == XSHN_UNDEF ? 0 : offset); if (sym->type == SYM_GLOBAL) { /* @@ -699,9 +806,9 @@ static void elf_deflabel(char *name, int32_t segment, int64_t offset, * To avoid such a crash, such requests are silently discarded. * This may not be the best solution. */ - if (sym->section == SHN_UNDEF || sym->section == SHN_COMMON) { + if (sym->section == XSHN_UNDEF || sym->section == XSHN_COMMON) { bsym = raa_write(bsym, segment, nglobs); - } else if (sym->section != SHN_ABS) { + } else if (sym->section != XSHN_ABS) { /* * This is a global symbol; so we must add it to the rbtree * of global symbols in its section. @@ -803,11 +910,11 @@ static void elf_add_reloc(struct elf_section *sect, int32_t segment, r->offset = offset; if (segment != NO_SEG) { - int i; - for (i = 0; i < nsects; i++) - if (segment == sects[i]->index) - r->symbol = i + 2; - if (!r->symbol) + const struct elf_section *s; + s = raa_read_ptr(section_by_index, segment >> 1); + if (s) + r->symbol = s->shndx + 1; + else r->symbol = GLOBAL_TEMP_BASE + raa_read(bsym, segment); } r->type = type; @@ -845,7 +952,6 @@ static int64_t elf_add_gsym_reloc(struct elf_section *sect, struct elf_section *s; struct elf_symbol *sym; struct rbtree *srb; - int i; /* * First look up the segment/offset pair and find a global @@ -854,13 +960,7 @@ static int64_t elf_add_gsym_reloc(struct elf_section *sect, * doing a normal elf_add_reloc after first sanity-checking * that the offset from the symbol is zero. */ - s = NULL; - for (i = 0; i < nsects; i++) - if (segment == sects[i]->index) { - s = sects[i]; - break; - } - + s = raa_read_ptr(section_by_index, segment >> 1); if (!s) { if (exact && offset) nasm_error(ERR_NONFATAL, "invalid access to an external symbol"); @@ -897,7 +997,6 @@ static void elf32_out(int32_t segto, const void *data, struct elf_section *s; int64_t addr; int reltype, bytes; - int i; static struct symlininfo sinfo; /* @@ -910,25 +1009,18 @@ static void elf32_out(int32_t segto, const void *data, return; } - s = NULL; - for (i = 0; i < nsects; i++) - if (segto == sects[i]->index) { - s = sects[i]; - break; - } + s = raa_read_ptr(section_by_index, segto >> 1); if (!s) { int tempint; /* ignored */ if (segto != elf_section_names(".text", 2, &tempint)) nasm_panic(0, "strange segment conditions in ELF driver"); - else { + else s = sects[nsects - 1]; - i = nsects - 1; - } } /* again some stabs debugging stuff */ sinfo.offset = s->len; - sinfo.section = i; + sinfo.section = s->shndx; sinfo.segto = segto; sinfo.name = s->name; dfmt->debug_output(TY_DEBUGSYMLIN, &sinfo); @@ -1116,7 +1208,6 @@ static void elf64_out(int32_t segto, const void *data, struct elf_section *s; int64_t addr; int reltype, bytes; - int i; static struct symlininfo sinfo; /* @@ -1129,25 +1220,18 @@ static void elf64_out(int32_t segto, const void *data, return; } - s = NULL; - for (i = 0; i < nsects; i++) - if (segto == sects[i]->index) { - s = sects[i]; - break; - } + s = raa_read_ptr(section_by_index, segto >> 1); if (!s) { int tempint; /* ignored */ if (segto != elf_section_names(".text", 2, &tempint)) nasm_panic(0, "strange segment conditions in ELF driver"); - else { + else s = sects[nsects - 1]; - i = nsects - 1; - } } /* again some stabs debugging stuff */ sinfo.offset = s->len; - sinfo.section = i; + sinfo.section = s->shndx; sinfo.segto = segto; sinfo.name = s->name; dfmt->debug_output(TY_DEBUGSYMLIN, &sinfo); @@ -1406,7 +1490,6 @@ static void elfx32_out(int32_t segto, const void *data, struct elf_section *s; int64_t addr; int reltype, bytes; - int i; static struct symlininfo sinfo; /* @@ -1419,25 +1502,18 @@ static void elfx32_out(int32_t segto, const void *data, return; } - s = NULL; - for (i = 0; i < nsects; i++) - if (segto == sects[i]->index) { - s = sects[i]; - break; - } + s = raa_read_ptr(section_by_index, segto >> 1); if (!s) { int tempint; /* ignored */ if (segto != elf_section_names(".text", 2, &tempint)) nasm_panic(0, "strange segment conditions in ELF driver"); - else { + else s = sects[nsects - 1]; - i = nsects - 1; - } } /* again some stabs debugging stuff */ sinfo.offset = s->len; - sinfo.section = i; + sinfo.section = s->shndx; sinfo.segto = segto; sinfo.name = s->name; dfmt->debug_output(TY_DEBUGSYMLIN, &sinfo); @@ -1651,51 +1727,53 @@ rel12adr: } } +/* + * Section index/count with a specified overflow value (usually SHN_INDEX, + * but 0 for e_shnum. + */ +static inline uint16_t elf_shndx(int section, uint16_t overflow) +{ + return cpu_to_le16(section < (int)SHN_LORESERVE ? section : overflow); +} + +struct ehdr_common { + uint8_t e_ident[EI_NIDENT]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; +}; + +union ehdr { + Elf32_Ehdr ehdr32; + Elf64_Ehdr ehdr64; + struct ehdr_common com; +}; + static void elf_write(void) { int align; char *p; int i; - - struct SAA *symtab; - int32_t symtablen, symtablocal; + size_t symtablocal; + int sec_shstrtab, sec_symtab, sec_strtab; + union ehdr ehdr; /* - * Work out how many sections we will have. We have SHN_UNDEF, - * then the flexible user sections, then the fixed sections - * `.shstrtab', `.symtab' and `.strtab', then optionally - * relocation sections for the user sections. + * Add any sections we don't already have: + * rel/rela sections for the user sections, debug sections, and + * the ELF special sections. */ - nsections = sec_numspecial + 1; - if (dfmt_is_stabs()) - nsections += 3; - else if (dfmt_is_dwarf()) - nsections += 10; - - add_sectname("", ".shstrtab"); - add_sectname("", ".symtab"); - add_sectname("", ".strtab"); - for (i = 0; i < nsects; i++) { - nsections++; /* for the section itself */ - if (sects[i]->head) { - nsections++; /* for its relocations */ - add_sectname(is_elf32() ? ".rel" : ".rela", sects[i]->name); - } - } + sec_debug = nsections; if (dfmt_is_stabs()) { /* in case the debug information is wanted, just add these three sections... */ add_sectname("", ".stab"); add_sectname("", ".stabstr"); - add_sectname(is_elf32() ? ".rel" : ".rela", ".stab"); + add_sectname(efmt->relpfx, ".stab"); } else if (dfmt_is_dwarf()) { /* the dwarf debug standard specifies the following ten sections, not all of which are currently implemented, although all of them are defined. */ -#define debug_aranges (int64_t) (nsections-10) -#define debug_info (int64_t) (nsections-7) -#define debug_abbrev (int64_t) (nsections-5) -#define debug_line (int64_t) (nsections-4) add_sectname("", ".debug_aranges"); add_sectname(".rela", ".debug_aranges"); add_sectname("", ".debug_pubnames"); @@ -1708,87 +1786,72 @@ static void elf_write(void) add_sectname("", ".debug_loc"); } - /* - * Output the ELF header. - */ - if (is_elf32() || is_elfx32()) { - Elf32_Ehdr ehdr; - - nasm_zero(ehdr.e_ident); - memcpy(ehdr.e_ident, ELFMAG, SELFMAG); - ehdr.e_ident[EI_CLASS] = ELFCLASS32; - ehdr.e_ident[EI_DATA] = ELFDATA2LSB; - ehdr.e_ident[EI_VERSION] = EV_CURRENT; - ehdr.e_ident[EI_OSABI] = elf_osabi; - ehdr.e_ident[EI_ABIVERSION] = elf_abiver; - - ehdr.e_type = cpu_to_le16(ET_REL); - ehdr.e_machine = cpu_to_le16(is_elf32() ? EM_386 : EM_X86_64); - ehdr.e_version = cpu_to_le16(EV_CURRENT); - ehdr.e_entry = 0; - ehdr.e_phoff = 0; - ehdr.e_shoff = sizeof(Elf64_Ehdr); - ehdr.e_flags = 0; - ehdr.e_ehsize = cpu_to_le16(sizeof(Elf32_Ehdr)); - ehdr.e_phentsize = 0; - ehdr.e_phnum = 0; - ehdr.e_shentsize = cpu_to_le16(sizeof(Elf32_Shdr)); - ehdr.e_shnum = cpu_to_le16(nsections); - ehdr.e_shstrndx = cpu_to_le16(sec_shstrtab); - - nasm_write(&ehdr, sizeof(ehdr), ofile); - fwritezero(sizeof(Elf64_Ehdr) - sizeof(Elf32_Ehdr), ofile); - } else { - Elf64_Ehdr ehdr; - - nasm_assert(is_elf64()); - - nasm_zero(ehdr.e_ident); - memcpy(ehdr.e_ident, ELFMAG, SELFMAG); - ehdr.e_ident[EI_CLASS] = ELFCLASS64; - ehdr.e_ident[EI_DATA] = ELFDATA2LSB; - ehdr.e_ident[EI_VERSION] = EV_CURRENT; - ehdr.e_ident[EI_OSABI] = elf_osabi; - ehdr.e_ident[EI_ABIVERSION] = elf_abiver; - - ehdr.e_type = cpu_to_le16(ET_REL); - ehdr.e_machine = cpu_to_le16(EM_X86_64); - ehdr.e_version = cpu_to_le16(EV_CURRENT); - ehdr.e_entry = 0; - ehdr.e_phoff = 0; - ehdr.e_shoff = sizeof(Elf64_Ehdr); - ehdr.e_flags = 0; - ehdr.e_ehsize = cpu_to_le16(sizeof(Elf64_Ehdr)); - ehdr.e_phentsize = 0; - ehdr.e_phnum = 0; - ehdr.e_shentsize = cpu_to_le16(sizeof(Elf64_Shdr)); - ehdr.e_shnum = cpu_to_le16(nsections); - ehdr.e_shstrndx = cpu_to_le16(sec_shstrtab); - - nasm_write(&ehdr, sizeof(ehdr), ofile); - } + sec_shstrtab = add_sectname("", ".shstrtab"); + sec_symtab = add_sectname("", ".symtab"); + sec_strtab = add_sectname("", ".strtab"); /* * Build the symbol table and relocation tables. */ - symtab = elf_build_symtab(&symtablen, &symtablocal); - for (i = 0; i < nsects; i++) - if (sects[i]->head) - sects[i]->rel = elf_build_reltab(§s[i]->rellen, - sects[i]->head); + symtablocal = elf_build_symtab(); + + /* Do we need an .symtab_shndx section? */ + if (symtab_shndx) + add_sectname("", ".symtab_shndx"); + + for (i = 0; i < nsects; i++) { + if (sects[i]->head) { + add_sectname(efmt->relpfx, sects[i]->name); + sects[i]->rel = efmt->elf_build_reltab(sects[i]->head); + } + } + + /* + * Output the ELF header. + */ + nasm_zero(ehdr); + + /* These fields are in the same place for 32 and 64 bits */ + memcpy(&ehdr.com.e_ident[EI_MAG0], ELFMAG, SELFMAG); + ehdr.com.e_ident[EI_CLASS] = efmt->ei_class; + ehdr.com.e_ident[EI_DATA] = ELFDATA2LSB; + ehdr.com.e_ident[EI_VERSION] = EV_CURRENT; + ehdr.com.e_ident[EI_OSABI] = elf_osabi; + ehdr.com.e_ident[EI_ABIVERSION] = elf_abiver; + ehdr.com.e_type = cpu_to_le16(ET_REL); + ehdr.com.e_machine = cpu_to_le16(efmt->e_machine); + ehdr.com.e_version = cpu_to_le16(EV_CURRENT); + + if (!efmt->elf64) { + ehdr.ehdr32.e_shoff = cpu_to_le32(sizeof ehdr); + ehdr.ehdr32.e_ehsize = cpu_to_le16(sizeof(Elf32_Ehdr)); + ehdr.ehdr32.e_shentsize = cpu_to_le16(sizeof(Elf32_Shdr)); + ehdr.ehdr32.e_shnum = elf_shndx(nsections, 0); + ehdr.ehdr32.e_shstrndx = elf_shndx(sec_shstrtab, SHN_XINDEX); + } else { + ehdr.ehdr64.e_shoff = cpu_to_le64(sizeof ehdr); + ehdr.ehdr64.e_ehsize = cpu_to_le16(sizeof(Elf64_Ehdr)); + ehdr.ehdr64.e_shentsize = cpu_to_le16(sizeof(Elf64_Shdr)); + ehdr.ehdr64.e_shnum = elf_shndx(nsections, 0); + ehdr.ehdr64.e_shstrndx = elf_shndx(sec_shstrtab, SHN_XINDEX); + } + + nasm_write(&ehdr, sizeof(ehdr), ofile); + elf_foffs = sizeof ehdr + efmt->shdr_size * nsections; /* * Now output the section header table. */ - - elf_foffs = sizeof(Elf64_Ehdr) + (is_elf64() ? sizeof(Elf64_Shdr): sizeof(Elf32_Shdr)) * nsections; align = ALIGN(elf_foffs, SEC_FILEALIGN) - elf_foffs; elf_foffs += align; elf_nsect = 0; elf_sects = nasm_malloc(sizeof(*elf_sects) * nsections); /* SHN_UNDEF */ - elf_section_header(0, SHT_NULL, 0, NULL, false, 0, SHN_UNDEF, 0, 0, 0); + elf_section_header(0, SHT_NULL, 0, NULL, false, + nsections > (int)SHN_LORESERVE ? nsections : 0, + sec_shstrtab >= (int)SHN_LORESERVE ? sec_shstrtab : 0, + 0, 0, 0); p = shstrtab + 1; /* The normal sections */ @@ -1800,53 +1863,7 @@ static void elf_write(void) p += strlen(p) + 1; } - /* .shstrtab */ - elf_section_header(p - shstrtab, SHT_STRTAB, 0, shstrtab, false, - shstrtablen, 0, 0, 1, 0); - p += strlen(p) + 1; - - /* .symtab */ - if (is_elf64()) - elf_section_header(p - shstrtab, SHT_SYMTAB, 0, symtab, true, - symtablen, sec_strtab, symtablocal, 8, 24); - else - elf_section_header(p - shstrtab, SHT_SYMTAB, 0, symtab, true, - symtablen, sec_strtab, symtablocal, 4, 16); - p += strlen(p) + 1; - - /* .strtab */ - elf_section_header(p - shstrtab, SHT_STRTAB, 0, strs, true, - strslen, 0, 0, 1, 0); - p += strlen(p) + 1; - - /* The relocation sections */ - if (is_elf32()) { - for (i = 0; i < nsects; i++) { - if (sects[i]->head) { - elf_section_header(p - shstrtab, SHT_REL, 0, sects[i]->rel, true, - sects[i]->rellen, sec_symtab, i + 1, 4, 8); - p += strlen(p) + 1; - } - } - } else if (is_elfx32()) { - for (i = 0; i < nsects; i++) { - if (sects[i]->head) { - elf_section_header(p - shstrtab, SHT_RELA, 0, sects[i]->rel, true, - sects[i]->rellen, sec_symtab, i + 1, 4, 12); - p += strlen(p) + 1; - } - } - } else { - nasm_assert(is_elf64()); - for (i = 0; i < nsects; i++) { - if (sects[i]->head) { - elf_section_header(p - shstrtab, SHT_RELA, 0, sects[i]->rel, true, - sects[i]->rellen, sec_symtab, i + 1, 8, 24); - p += strlen(p) + 1; - } - } - } - + /* The debugging sections */ if (dfmt_is_stabs()) { /* for debugging information, create the last three sections which are the .stab , .stabstr and .rel.stab sections respectively */ @@ -1856,7 +1873,7 @@ static void elf_write(void) if (stabbuf && stabstrbuf && stabrelbuf) { elf_section_header(p - shstrtab, SHT_PROGBITS, 0, stabbuf, false, - stablen, sec_stabstr, 0, 4, 12); + stablen, sec_stabstr, 0, 4, 12); p += strlen(p) + 1; elf_section_header(p - shstrtab, SHT_STRTAB, 0, stabstrbuf, false, @@ -1864,67 +1881,99 @@ static void elf_write(void) p += strlen(p) + 1; /* link -> symtable info -> section to refer to */ - if (is_elf32()) { - elf_section_header(p - shstrtab, SHT_REL, 0, stabrelbuf, false, - stabrellen, sec_symtab, sec_stab, 4, 8); - } else { - elf_section_header(p - shstrtab, SHT_RELA, 0, stabrelbuf, false, - stabrellen, sec_symtab, sec_stab, 4, is_elf64() ? 24 : 12); - } + elf_section_header(p - shstrtab, efmt->reltype, 0, + stabrelbuf, false, stabrellen, + sec_symtab, sec_stab, + efmt->word, efmt->rel_size); p += strlen(p) + 1; } } else if (dfmt_is_dwarf()) { - /* for dwarf debugging information, create the ten dwarf sections */ + /* for dwarf debugging information, create the ten dwarf sections */ - /* this function call creates the dwarf sections in memory */ - if (dwarf_fsect) - dwarf_generate(); + /* this function call creates the dwarf sections in memory */ + if (dwarf_fsect) + dwarf_generate(); - elf_section_header(p - shstrtab, SHT_PROGBITS, 0, arangesbuf, false, - arangeslen, 0, 0, 1, 0); - p += strlen(p) + 1; - - elf_section_header(p - shstrtab, SHT_RELA, 0, arangesrelbuf, false, - arangesrellen, sec_symtab, - is_elf64() ? debug_aranges : sec_debug_aranges, - 1, is_elf64() ? 24 : 12); - p += strlen(p) + 1; - - elf_section_header(p - shstrtab, SHT_PROGBITS, 0, pubnamesbuf, - false, pubnameslen, 0, 0, 1, 0); - p += strlen(p) + 1; - - elf_section_header(p - shstrtab, SHT_PROGBITS, 0, infobuf, false, - infolen, 0, 0, 1, 0); - p += strlen(p) + 1; - - elf_section_header(p - shstrtab, SHT_RELA, 0, inforelbuf, false, - inforellen, sec_symtab, - is_elf64() ? debug_info : sec_debug_info, - 1, is_elf64() ? 24 : 12); - p += strlen(p) + 1; - - elf_section_header(p - shstrtab, SHT_PROGBITS, 0, abbrevbuf, false, - abbrevlen, 0, 0, 1, 0); - p += strlen(p) + 1; - - elf_section_header(p - shstrtab, SHT_PROGBITS, 0, linebuf, false, - linelen, 0, 0, 1, 0); - p += strlen(p) + 1; - - elf_section_header(p - shstrtab, SHT_RELA, 0, linerelbuf, false, - linerellen, sec_symtab, - is_elf64() ? debug_line : sec_debug_line, - 1, is_elf64() ? 24 : 12); - p += strlen(p) + 1; - - elf_section_header(p - shstrtab, SHT_PROGBITS, 0, framebuf, false, - framelen, 0, 0, 8, 0); - p += strlen(p) + 1; - - elf_section_header(p - shstrtab, SHT_PROGBITS, 0, locbuf, false, - loclen, 0, 0, 1, 0); + elf_section_header(p - shstrtab, SHT_PROGBITS, 0, arangesbuf, false, + arangeslen, 0, 0, 1, 0); + p += strlen(p) + 1; + + elf_section_header(p - shstrtab, SHT_RELA, 0, arangesrelbuf, false, + arangesrellen, sec_symtab, + sec_debug_aranges, + efmt->word, efmt->rela_size); + p += strlen(p) + 1; + + elf_section_header(p - shstrtab, SHT_PROGBITS, 0, pubnamesbuf, + false, pubnameslen, 0, 0, 1, 0); + p += strlen(p) + 1; + + elf_section_header(p - shstrtab, SHT_PROGBITS, 0, infobuf, false, + infolen, 0, 0, 1, 0); + p += strlen(p) + 1; + + elf_section_header(p - shstrtab, SHT_RELA, 0, inforelbuf, false, + inforellen, sec_symtab, + sec_debug_info, + efmt->word, efmt->rela_size); + p += strlen(p) + 1; + + elf_section_header(p - shstrtab, SHT_PROGBITS, 0, abbrevbuf, false, + abbrevlen, 0, 0, 1, 0); + p += strlen(p) + 1; + + elf_section_header(p - shstrtab, SHT_PROGBITS, 0, linebuf, false, + linelen, 0, 0, 1, 0); + p += strlen(p) + 1; + + elf_section_header(p - shstrtab, SHT_RELA, 0, linerelbuf, false, + linerellen, sec_symtab, + sec_debug_line, + efmt->word, efmt->rela_size); + p += strlen(p) + 1; + + elf_section_header(p - shstrtab, SHT_PROGBITS, 0, framebuf, false, + framelen, 0, 0, 8, 0); + p += strlen(p) + 1; + + elf_section_header(p - shstrtab, SHT_PROGBITS, 0, locbuf, false, + loclen, 0, 0, 1, 0); + p += strlen(p) + 1; + } + + /* .shstrtab */ + elf_section_header(p - shstrtab, SHT_STRTAB, 0, shstrtab, false, + shstrtablen, 0, 0, 1, 0); + p += strlen(p) + 1; + + /* .symtab */ + elf_section_header(p - shstrtab, SHT_SYMTAB, 0, symtab, true, + symtab->datalen, sec_strtab, symtablocal, + efmt->word, efmt->sym_size); + p += strlen(p) + 1; + + /* .strtab */ + elf_section_header(p - shstrtab, SHT_STRTAB, 0, strs, true, + strslen, 0, 0, 1, 0); + p += strlen(p) + 1 +; + /* .symtab_shndx */ + if (symtab_shndx) { + elf_section_header(p - shstrtab, SHT_SYMTAB_SHNDX, 0, + symtab_shndx, true, symtab_shndx->datalen, + sec_symtab, 0, 1, 0); + p += strlen(p) + 1; + } + + /* The relocation sections */ + for (i = 0; i < nsects; i++) { + if (sects[i]->rel) { + elf_section_header(p - shstrtab, efmt->reltype, 0, + sects[i]->rel, true, sects[i]->rel->datalen, + sec_symtab, sects[i]->shndx, + efmt->word, efmt->rel_size); p += strlen(p) + 1; + } } fwritezero(align, ofile); @@ -1935,241 +1984,155 @@ static void elf_write(void) nasm_free(elf_sects); saa_free(symtab); + if (symtab_shndx) + saa_free(symtab_shndx); } -static struct SAA *elf_build_symtab(int32_t *len, int32_t *local) +static size_t nsyms; + +static void elf_sym(const struct elf_symbol *sym) { - struct SAA *s = saa_init(1L); - struct elf_symbol *sym; + int shndx = sym->section; + + /* + * Careful here. This relies on sym->section being signed; for + * special section indicies this value needs to be cast to + * (int16_t) so that it sign-extends, however, here SHN_LORESERVE + * is used as an unsigned constant. + */ + if (shndx >= (int)SHN_LORESERVE) { + if (unlikely(!symtab_shndx)) { + /* Create symtab_shndx and fill previous entries with zero */ + symtab_shndx = saa_init(1); + saa_wbytes(symtab_shndx, NULL, nsyms << 2); + } + } else { + shndx = 0; /* Section index table always write zero */ + } + + if (symtab_shndx) + saa_write32(symtab_shndx, shndx); + + efmt->elf_sym(sym); + nsyms++; +} + +static void elf32_sym(const struct elf_symbol *sym) +{ + Elf32_Sym sym32; + + sym32.st_name = cpu_to_le32(sym->strpos); + sym32.st_value = cpu_to_le32(sym->symv.key); + sym32.st_size = cpu_to_le32(sym->size); + sym32.st_info = sym->type; + sym32.st_other = sym->other; + sym32.st_shndx = elf_shndx(sym->section, SHN_XINDEX); + saa_wbytes(symtab, &sym32, sizeof sym32); +} + +static void elf64_sym(const struct elf_symbol *sym) +{ + Elf64_Sym sym64; + + sym64.st_name = cpu_to_le32(sym->strpos); + sym64.st_value = cpu_to_le64(sym->symv.key); + sym64.st_size = cpu_to_le64(sym->size); + sym64.st_info = sym->type; + sym64.st_other = sym->other; + sym64.st_shndx = elf_shndx(sym->section, SHN_XINDEX); + saa_wbytes(symtab, &sym64, sizeof sym64); +} + +static size_t elf_build_symtab(void) +{ + struct elf_symbol *sym, xsym; + size_t nlocal; int i; - size_t usize = is_elf64() ? sizeof(Elf64_Sym) : sizeof(Elf32_Sym); - union { - Elf32_Sym sym32; - Elf64_Sym sym64; - } u; - - *len = *local = 0; + symtab = saa_init(1); + symtab_shndx = NULL; /* * Zero symbol first as required by spec. */ - saa_wbytes(s, NULL, usize); - *len += usize; - (*local)++; + nasm_zero(xsym); + elf_sym(&xsym); /* * Next, an entry for the file name. */ - if (is_elf64()) { - u.sym64.st_name = cpu_to_le32(1); - u.sym64.st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE); - u.sym64.st_other = 0; - u.sym64.st_shndx = cpu_to_le16(SHN_ABS); - u.sym64.st_value = 0; - u.sym64.st_size = 0; - } else { - u.sym32.st_name = cpu_to_le32(1); - u.sym32.st_value = 0; - u.sym32.st_size = 0; - u.sym32.st_info = ELF32_ST_INFO(STB_LOCAL, STT_FILE); - u.sym32.st_other = 0; - u.sym32.st_shndx = cpu_to_le16(SHN_ABS); - } - saa_wbytes(s, &u, usize); - *len += usize; - (*local)++; - + nasm_zero(xsym); + xsym.strpos = 1; + xsym.type = ELF32_ST_INFO(STB_LOCAL, STT_FILE); + xsym.section = XSHN_ABS; + elf_sym(&xsym); /* * Now some standard symbols defining the segments, for relocation * purposes. */ - if (is_elf64()) { - u.sym64.st_name = 0; - u.sym64.st_other = 0; - u.sym64.st_value = 0; - u.sym64.st_size = 0; - for (i = 1; i <= nsects; i++) { - u.sym64.st_info = ELF64_ST_INFO(STB_LOCAL, STT_SECTION); - u.sym64.st_shndx = cpu_to_le16(i); - saa_wbytes(s, &u, usize); - *len += usize; - (*local)++; - } - } else { - u.sym32.st_name = 0; - u.sym32.st_value = 0; - u.sym32.st_size = 0; - u.sym32.st_other = 0; - for (i = 1; i <= nsects; i++) { - u.sym32.st_info = ELF32_ST_INFO(STB_LOCAL, STT_SECTION); - u.sym32.st_shndx = cpu_to_le16(i); - saa_wbytes(s, &u, usize); - *len += usize; - (*local)++; - } + nasm_zero(xsym); + for (i = 1; i <= nsects; i++) { + xsym.type = ELF64_ST_INFO(STB_LOCAL, STT_SECTION); + xsym.section = i; + elf_sym(&xsym); + } + + /* + * dwarf needs symbols for debug sections + * which are relocation targets. + */ + if (dfmt_is_dwarf()) { + dwarf_infosym = nsyms; + xsym.section = sec_debug_info; + elf_sym(&xsym); + + dwarf_abbrevsym = nsyms; + xsym.section = sec_debug_abbrev; + elf_sym(&xsym); + + dwarf_linesym = nsyms; + xsym.section = sec_debug_line; + elf_sym(&xsym); } /* * Now the other local symbols. */ saa_rewind(syms); - if (is_elf64()) { - while ((sym = saa_rstruct(syms))) { - if (sym->type & SYM_GLOBAL) - continue; - u.sym64.st_name = cpu_to_le32(sym->strpos); - u.sym64.st_info = sym->type; - u.sym64.st_other = sym->other; - u.sym64.st_shndx = cpu_to_le16(sym->section); - u.sym64.st_value = cpu_to_le64(sym->symv.key); - u.sym64.st_size = cpu_to_le64(sym->size); - saa_wbytes(s, &u, usize); - *len += usize; - (*local)++; - } - /* - * dwarf needs symbols for debug sections - * which are relocation targets. - */ - if (dfmt_is_dwarf()) { - dwarf_infosym = *local; - u.sym64.st_name = 0; - u.sym64.st_info = ELF64_ST_INFO(STB_LOCAL, STT_SECTION); - u.sym64.st_other = 0; - u.sym64.st_shndx = cpu_to_le16(debug_info); - u.sym64.st_value = 0; - u.sym64.st_size = 0; - saa_wbytes(s, &u, usize); - *len += usize; - (*local)++; - dwarf_abbrevsym = *local; - u.sym64.st_name = 0; - u.sym64.st_info = ELF64_ST_INFO(STB_LOCAL, STT_SECTION); - u.sym64.st_other = 0; - u.sym64.st_shndx = cpu_to_le16(debug_abbrev); - u.sym64.st_value = 0; - u.sym64.st_size = 0; - saa_wbytes(s, &u, usize); - *len += usize; - (*local)++; - dwarf_linesym = *local; - u.sym64.st_name = 0; - u.sym64.st_info = ELF64_ST_INFO(STB_LOCAL, STT_SECTION); - u.sym64.st_other = 0; - u.sym64.st_shndx = cpu_to_le16(debug_line); - u.sym64.st_value = 0; - u.sym64.st_size = 0; - saa_wbytes(s, &u, usize); - *len += usize; - (*local)++; - } - } else { - while ((sym = saa_rstruct(syms))) { - if (sym->type & SYM_GLOBAL) - continue; - u.sym32.st_name = cpu_to_le32(sym->strpos); - u.sym32.st_value = cpu_to_le32(sym->symv.key); - u.sym32.st_size = cpu_to_le32(sym->size); - u.sym32.st_info = sym->type; - u.sym32.st_other = sym->other; - u.sym32.st_shndx = cpu_to_le16(sym->section); - saa_wbytes(s, &u, usize); - *len += usize; - (*local)++; - } - /* - * dwarf needs symbols for debug sections - * which are relocation targets. - */ - if (dfmt_is_dwarf()) { - dwarf_infosym = *local; - u.sym32.st_name = 0; - u.sym32.st_value = 0; - u.sym32.st_size = 0; - u.sym32.st_info = ELF32_ST_INFO(STB_LOCAL, STT_SECTION); - u.sym32.st_other = 0; - u.sym32.st_shndx = cpu_to_le16(sec_debug_info); - saa_wbytes(s, &u, usize); - *len += usize; - (*local)++; - dwarf_abbrevsym = *local; - u.sym32.st_name = 0; - u.sym32.st_value = 0; - u.sym32.st_size = 0; - u.sym32.st_info = ELF32_ST_INFO(STB_LOCAL, STT_SECTION); - u.sym32.st_other = 0; - u.sym32.st_shndx = cpu_to_le16(sec_debug_abbrev); - saa_wbytes(s, &u, usize); - *len += usize; - (*local)++; - dwarf_linesym = *local; - u.sym32.st_name = 0; - u.sym32.st_value = 0; - u.sym32.st_size = 0; - u.sym32.st_info = ELF32_ST_INFO(STB_LOCAL, STT_SECTION); - u.sym32.st_other = 0; - u.sym32.st_shndx = cpu_to_le16(sec_debug_line); - saa_wbytes(s, &u, usize); - *len += usize; - (*local)++; - } + while ((sym = saa_rstruct(syms))) { + if (sym->type & SYM_GLOBAL) + continue; + + elf_sym(sym); } + nlocal = nsyms; + /* * Now the global symbols. */ saa_rewind(syms); - if (is_elf64()) { - while ((sym = saa_rstruct(syms))) { - if (!(sym->type & SYM_GLOBAL)) - continue; - u.sym64.st_name = cpu_to_le32(sym->strpos); - u.sym64.st_info = sym->type; - u.sym64.st_other = sym->other; - u.sym64.st_shndx = cpu_to_le16(sym->section); - u.sym64.st_value = cpu_to_le64(sym->symv.key); - u.sym64.st_size = cpu_to_le64(sym->size); - saa_wbytes(s, &u, usize); - *len += usize; - } - } else { - while ((sym = saa_rstruct(syms))) { - if (!(sym->type & SYM_GLOBAL)) - continue; - u.sym32.st_name = cpu_to_le32(sym->strpos); - u.sym32.st_value = cpu_to_le32(sym->symv.key); - u.sym32.st_size = cpu_to_le32(sym->size); - u.sym32.st_info = sym->type; - u.sym32.st_other = sym->other; - u.sym32.st_shndx = cpu_to_le16(sym->section); - saa_wbytes(s, &u, usize); - *len += usize; - } + while ((sym = saa_rstruct(syms))) { + if (!(sym->type & SYM_GLOBAL)) + continue; + + elf_sym(sym); } - return s; + return nlocal; } -static struct SAA *elf_build_reltab(uint64_t *len, struct elf_reloc *r) +static struct SAA *elf32_build_reltab(const struct elf_reloc *r) { struct SAA *s; int32_t global_offset; - - size_t usize = is_elf64() ? sizeof(Elf64_Rela) : - (is_elfx32() ? sizeof(Elf32_Rela) : sizeof(Elf32_Rel)); - union { - Elf32_Rel rel32; - Elf32_Rela rela32; - Elf64_Rela rela64; - } u; + Elf32_Rel rel32; if (!r) return NULL; s = saa_init(1L); - *len = 0; /* * How to onvert from a global placeholder to a real symbol index; @@ -2178,51 +2141,87 @@ static struct SAA *elf_build_reltab(uint64_t *len, struct elf_reloc *r) */ global_offset = -GLOBAL_TEMP_BASE + nsects + nlocals + ndebugs + 2; - if (is_elf32()) { - while (r) { - int32_t sym = r->symbol; + while (r) { + int32_t sym = r->symbol; - if (sym >= GLOBAL_TEMP_BASE) - sym += global_offset; + if (sym >= GLOBAL_TEMP_BASE) + sym += global_offset; - u.rel32.r_offset = cpu_to_le32(r->address); - u.rel32.r_info = cpu_to_le32(ELF32_R_INFO(sym, r->type)); - saa_wbytes(s, &u, usize); - *len += usize; + rel32.r_offset = cpu_to_le32(r->address); + rel32.r_info = cpu_to_le32(ELF32_R_INFO(sym, r->type)); + saa_wbytes(s, &rel32, sizeof rel32); - r = r->next; - } - } else if (is_elfx32()) { - while (r) { - int32_t sym = r->symbol; + r = r->next; + } - if (sym >= GLOBAL_TEMP_BASE) - sym += global_offset; + return s; +} - u.rela32.r_offset = cpu_to_le32(r->address); - u.rela32.r_info = cpu_to_le32(ELF32_R_INFO(sym, r->type)); - u.rela32.r_addend = cpu_to_le32(r->offset); - saa_wbytes(s, &u, usize); - *len += usize; +static struct SAA *elfx32_build_reltab(const struct elf_reloc *r) +{ + struct SAA *s; + int32_t global_offset; + Elf32_Rela rela32; - r = r->next; - } - } else { - nasm_assert(is_elf64()); - while (r) { - int32_t sym = r->symbol; + if (!r) + return NULL; - if (sym >= GLOBAL_TEMP_BASE) - sym += global_offset; + s = saa_init(1L); - u.rela64.r_offset = cpu_to_le64(r->address); - u.rela64.r_info = cpu_to_le64(ELF64_R_INFO(sym, r->type)); - u.rela64.r_addend = cpu_to_le64(r->offset); - saa_wbytes(s, &u, usize); - *len += usize; + /* + * How to onvert from a global placeholder to a real symbol index; + * the +2 refers to the two special entries, the null entry and + * the filename entry. + */ + global_offset = -GLOBAL_TEMP_BASE + nsects + nlocals + ndebugs + 2; - r = r->next; - } + while (r) { + int32_t sym = r->symbol; + + if (sym >= GLOBAL_TEMP_BASE) + sym += global_offset; + + rela32.r_offset = cpu_to_le32(r->address); + rela32.r_info = cpu_to_le32(ELF32_R_INFO(sym, r->type)); + rela32.r_addend = cpu_to_le32(r->offset); + saa_wbytes(s, &rela32, sizeof rela32); + + r = r->next; + } + + return s; +} + +static struct SAA *elf64_build_reltab(const struct elf_reloc *r) +{ + struct SAA *s; + int32_t global_offset; + Elf64_Rela rela64; + + if (!r) + return NULL; + + s = saa_init(1L); + + /* + * How to onvert from a global placeholder to a real symbol index; + * the +2 refers to the two special entries, the null entry and + * the filename entry. + */ + global_offset = -GLOBAL_TEMP_BASE + nsects + nlocals + ndebugs + 2; + + while (r) { + int32_t sym = r->symbol; + + if (sym >= GLOBAL_TEMP_BASE) + sym += global_offset; + + rela64.r_offset = cpu_to_le64(r->address); + rela64.r_info = cpu_to_le64(ELF64_R_INFO(sym, r->type)); + rela64.r_addend = cpu_to_le64(r->offset); + saa_wbytes(s, &rela64, sizeof rela64); + + r = r->next; } return s; @@ -2233,47 +2232,46 @@ static void elf_section_header(int name, int type, uint64_t flags, int link, int info, uint64_t align, uint64_t entsize) { - union { - Elf32_Shdr shdr32; - Elf64_Shdr shdr64; - } shdr; - elf_sects[elf_nsect].data = data; elf_sects[elf_nsect].len = datalen; elf_sects[elf_nsect].is_saa = is_saa; elf_nsect++; - if (is_elf32() || is_elfx32()) { - shdr.shdr32.sh_name = cpu_to_le32(name); - shdr.shdr32.sh_type = cpu_to_le32(type); - shdr.shdr32.sh_flags = cpu_to_le32(flags); - shdr.shdr32.sh_addr = 0; - shdr.shdr32.sh_offset = cpu_to_le32(type == SHT_NULL ? 0 : elf_foffs); - shdr.shdr32.sh_size = cpu_to_le32(datalen); + if (!efmt->elf64) { + Elf32_Shdr shdr; + + shdr.sh_name = cpu_to_le32(name); + shdr.sh_type = cpu_to_le32(type); + shdr.sh_flags = cpu_to_le32(flags); + shdr.sh_addr = 0; + shdr.sh_offset = cpu_to_le32(type == SHT_NULL ? 0 : elf_foffs); + shdr.sh_size = cpu_to_le32(datalen); if (data) elf_foffs += ALIGN(datalen, SEC_FILEALIGN); - shdr.shdr32.sh_link = cpu_to_le32(link); - shdr.shdr32.sh_info = cpu_to_le32(info); - shdr.shdr32.sh_addralign = cpu_to_le32(align); - shdr.shdr32.sh_entsize = cpu_to_le32(entsize); + shdr.sh_link = cpu_to_le32(link); + shdr.sh_info = cpu_to_le32(info); + shdr.sh_addralign = cpu_to_le32(align); + shdr.sh_entsize = cpu_to_le32(entsize); + + nasm_write(&shdr, sizeof shdr, ofile); } else { - nasm_assert(is_elf64()); + Elf64_Shdr shdr; - shdr.shdr64.sh_name = cpu_to_le32(name); - shdr.shdr64.sh_type = cpu_to_le32(type); - shdr.shdr64.sh_flags = cpu_to_le64(flags); - shdr.shdr64.sh_addr = 0; - shdr.shdr64.sh_offset = cpu_to_le64(type == SHT_NULL ? 0 : elf_foffs); - shdr.shdr64.sh_size = cpu_to_le32(datalen); + shdr.sh_name = cpu_to_le32(name); + shdr.sh_type = cpu_to_le32(type); + shdr.sh_flags = cpu_to_le64(flags); + shdr.sh_addr = 0; + shdr.sh_offset = cpu_to_le64(type == SHT_NULL ? 0 : elf_foffs); + shdr.sh_size = cpu_to_le64(datalen); if (data) elf_foffs += ALIGN(datalen, SEC_FILEALIGN); - shdr.shdr64.sh_link = cpu_to_le32(link); - shdr.shdr64.sh_info = cpu_to_le32(info); - shdr.shdr64.sh_addralign = cpu_to_le64(align); - shdr.shdr64.sh_entsize = cpu_to_le64(entsize); - } + shdr.sh_link = cpu_to_le32(link); + shdr.sh_info = cpu_to_le32(info); + shdr.sh_addralign = cpu_to_le64(align); + shdr.sh_entsize = cpu_to_le64(entsize); - nasm_write(&shdr, is_elf64() ? sizeof(shdr.shdr64) : sizeof(shdr.shdr32), ofile); + nasm_write(&shdr, sizeof shdr, ofile); + } } static void elf_write_sections(void) @@ -2306,15 +2304,9 @@ static void elf_sect_writeaddr(struct elf_section *sect, int64_t data, size_t le static void elf_sectalign(int32_t seg, unsigned int value) { - struct elf_section *s = NULL; - int i; + struct elf_section *s; - for (i = 0; i < nsects; i++) { - if (sects[i]->index == seg) { - s = sects[i]; - break; - } - } + s = raa_read_ptr(section_by_index, seg >> 1); if (!s || !is_power2(value)) return; @@ -2375,7 +2367,7 @@ const struct ofmt of_elf32 = { elf32_debugs_arr, &elf32_df_stabs, elf_stdmac, - elf_init, + elf32_init, null_reset, nasm_do_legacy_output, elf32_out, @@ -2427,7 +2419,7 @@ const struct ofmt of_elf64 = { elf64_debugs_arr, &elf64_df_stabs, elf_stdmac, - elf_init, + elf64_init, null_reset, nasm_do_legacy_output, elf64_out, @@ -2479,7 +2471,7 @@ const struct ofmt of_elfx32 = { elfx32_debugs_arr, &elfx32_df_stabs, elf_stdmac, - elf_init, + elfx32_init, null_reset, nasm_do_legacy_output, elfx32_out, diff --git a/output/outelf.h b/output/outelf.h index 3c4a40c0..d499117c 100644 --- a/output/outelf.h +++ b/output/outelf.h @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- * * - * Copyright 1996-2009 The NASM Authors - All Rights Reserved + * Copyright 1996-2019 The NASM Authors - All Rights Reserved * See the file AUTHORS included with the NASM distribution for * the specific copyright holders. * @@ -63,21 +63,13 @@ struct elf_known_section { extern const struct elf_known_section elf_known_sections[]; /* - * Special ELF sections (after the real sections but before debugging ones) - */ -#define sec_shstrtab (nsects + 1) -#define sec_symtab (nsects + 2) -#define sec_strtab (nsects + 3) -#define sec_numspecial 3 - -/* - * Debugging ELF sections (last in the file) + * Debugging ELF sections (section indicies starting with sec_debug) */ /* stabs */ -#define sec_stab (nsections-3) -#define sec_stabstr (nsections-2) -#define sec_rel_stab (nsections-1) +#define sec_stab (sec_debug + 0) +#define sec_stabstr (sec_debug + 1) +#define sec_rel_stab (sec_debug + 2) /* stabs symbol table format */ struct stabentry { @@ -89,16 +81,16 @@ struct stabentry { }; /* dwarf */ -#define sec_debug_aranges (nsections-10) -#define sec_rela_debug_aranges (nsections-9) -#define sec_debug_pubnames (nsections-8) -#define sec_debug_info (nsections-7) -#define sec_rela_debug_info (nsections-6) -#define sec_debug_abbrev (nsections-5) -#define sec_debug_line (nsections-4) -#define sec_rela_debug_line (nsections-3) -#define sec_debug_frame (nsections-2) -#define sec_debug_loc (nsections-1) +#define sec_debug_aranges (sec_debug + 0) +#define sec_rela_debug_aranges (sec_debug + 1) +#define sec_debug_pubnames (sec_debug + 2) +#define sec_debug_info (sec_debug + 3) +#define sec_rela_debug_info (sec_debug + 4) +#define sec_debug_abbrev (sec_debug + 5) +#define sec_debug_line (sec_debug + 6) +#define sec_rela_debug_line (sec_debug + 7) +#define sec_debug_frame (sec_debug + 8) +#define sec_debug_loc (sec_debug + 9) extern uint8_t elf_osabi; extern uint8_t elf_abiver; @@ -137,15 +129,14 @@ struct elf_section { uint64_t len; uint64_t size; uint64_t nrelocs; - int32_t index; /* NASM index */ + int32_t index; /* NASM index or NO_SEG if internal */ int shndx; /* ELF index */ - int type; /* SHT_PROGBITS or SHT_NOBITS */ + int type; /* SHT_* */ uint64_t align; /* alignment: power of two */ uint64_t flags; /* section flags */ uint64_t entsize; /* entry size */ char *name; struct SAA *rel; - uint64_t rellen; struct elf_reloc *head; struct elf_reloc **tail; struct rbtree *gsyms; /* global symbols in section */ diff --git a/test/fewsecs.asm b/test/fewsecs.asm new file mode 100644 index 00000000..85731acf --- /dev/null +++ b/test/fewsecs.asm @@ -0,0 +1,2 @@ +%assign NSECS 64 +%include "manysecs.asm" diff --git a/test/manysecs.asm b/test/manysecs.asm index c65c6091..49799453 100644 --- a/test/manysecs.asm +++ b/test/manysecs.asm @@ -1,6 +1,15 @@ +%ifndef NSECS + %assign NSECS 16384 +%endif + +%assign NSECS ((NSECS+3) & ~3) + %assign n 0 -%rep 10000 +%rep NSECS + %assign gcom (n & ~3) + 2 section .text %+ n progbits exec +start_ %+ n: nop -%assign n n+1 + jmp start_ %+ gcom + %assign n n+1 %endrep diff --git a/test/moresecs.asm b/test/moresecs.asm new file mode 100644 index 00000000..78d9887b --- /dev/null +++ b/test/moresecs.asm @@ -0,0 +1,3 @@ +; Less than 65,279 data sections, but more total sections +%assign NSECS 37600 +%include "manysecs.asm" diff --git a/test/mostsecs.asm b/test/mostsecs.asm new file mode 100644 index 00000000..0b91816a --- /dev/null +++ b/test/mostsecs.asm @@ -0,0 +1,3 @@ +; More than 65,279 data sections +%assign NSECS 131072 +%include "manysecs.asm" From dc5939b4960e169e19c536e5503ec4487cff550d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 26 Feb 2019 01:44:55 -0800 Subject: [PATCH 7/9] Handle more ELF section types note, preinit_array, init_array, and fini_array are ELF section types that can matter to the assembly programmer. Signed-off-by: H. Peter Anvin --- doc/changes.src | 3 + doc/nasmdoc.src | 131 ++++++++++++++++++++--------------- output/outelf.c | 177 +++++++++++++++++++++++++++++++----------------- output/outelf.h | 9 --- 4 files changed, 195 insertions(+), 125 deletions(-) diff --git a/doc/changes.src b/doc/changes.src index 1e67bec5..d1181971 100644 --- a/doc/changes.src +++ b/doc/changes.src @@ -15,6 +15,9 @@ after a real error. \b Add support for the \c{merge} and \c{strings} attributes on ELF sections. See \k{elfsect}. +\b Add support for the \c{note}, \c{preinit_array}, \c{init_array}, +and \c{fini_array} sections type in ELF. See \k{elfsect}. + \b Handle more than 32,633 sections in ELF. \S{cl-2.14.02} Version 2.14.02 diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src index bcfcad90..cb58045a 100644 --- a/doc/nasmdoc.src +++ b/doc/nasmdoc.src @@ -122,15 +122,14 @@ \IR{- opunary} \c{-} operator, unary \IR{! opunary} \c{!} operator, unary \IR{alignment, in bin sections} alignment, in \c{bin} sections -\IR{alignment, in elf sections} alignment, in \c{elf} sections +\IR{alignment, in elf sections} alignment, in ELF sections \IR{alignment, in win32 sections} alignment, in \c{win32} sections -\IR{alignment, of elf common variables} alignment, of \c{elf} common +\IR{alignment, of elf common variables} alignment, of ELF common variables \IR{alignment, in obj sections} alignment, in \c{obj} sections \IR{a.out, bsd version} \c{a.out}, BSD version \IR{a.out, linux version} \c{a.out}, Linux version -\IR{autoconf} Autoconf -\IR{bin} bin +\IR{bin} \c{bin} output format \IR{bitwise and} bitwise AND \IR{bitwise or} bitwise OR \IR{bitwise xor} bitwise XOR @@ -150,8 +149,8 @@ variables \IR{codeview} CodeView debugging format \IR{common object file format} Common Object File Format \IR{common variables, alignment in elf} common variables, alignment -in \c{elf} -\IR{common, elf extensions to} \c{COMMON}, \c{elf} extensions to +in ELF +\IR{common, elf extensions to} \c{COMMON}, ELF extensions to \IR{common, obj extensions to} \c{COMMON}, \c{obj} extensions to \IR{declaring structure} declaring structures \IR{default-wrt mechanism} default-\c{WRT} mechanism @@ -165,7 +164,8 @@ in \c{elf} \IA{effective address}{effective addresses} \IA{effective-address}{effective addresses} \IR{elf} ELF -\IR{elf, 16-bit code and} ELF, 16-bit code and +\IR{elf, 16-bit code} ELF, 16-bit code +\IR{elf, debug formats} ELF, debug formats \IR{elf shared libraries} ELF, shared libraries \IR{elf32} \c{elf32} \IR{elf64} \c{elf64} @@ -181,7 +181,7 @@ in \c{elf} \IR{functions, pascal calling convention} functions, Pascal calling convention \IR{global, aoutb extensions to} \c{GLOBAL}, \c{aoutb} extensions to -\IR{global, elf extensions to} \c{GLOBAL}, \c{elf} extensions to +\IR{global, elf extensions to} \c{GLOBAL}, ELF extensions to \IR{global, rdf extensions to} \c{GLOBAL}, \c{rdf} extensions to \IR{got} GOT \IR{got relocations} \c{GOT} relocations @@ -238,16 +238,16 @@ convention Object File Format \IR{relocations, pic-specific} relocations, PIC-specific \IA{repeating}{repeating code} -\IR{section alignment, in elf} section alignment, in \c{elf} +\IR{section alignment, in elf} section alignment, in ELF \IR{section alignment, in bin} section alignment, in \c{bin} \IR{section alignment, in obj} section alignment, in \c{obj} \IR{section alignment, in win32} section alignment, in \c{win32} -\IR{section, elf extensions to} \c{SECTION}, \c{elf} extensions to +\IR{section, elf extensions to} \c{SECTION}, ELF extensions to \IR{section, macho extensions to} \c{SECTION}, \c{macho} extensions to \IR{section, win32 extensions to} \c{SECTION}, \c{win32} extensions to \IR{segment alignment, in bin} segment alignment, in \c{bin} \IR{segment alignment, in obj} segment alignment, in \c{obj} -\IR{segment, obj extensions to} \c{SEGMENT}, \c{elf} extensions to +\IR{segment, obj extensions to} \c{SEGMENT}, ELF extensions to \IR{segment names, borland pascal} segment names, Borland Pascal \IR{shift command} \c{shift} command \IA{sib}{sib byte} @@ -256,11 +256,10 @@ Object File Format \IA{sectalign}{sectalign} \IR{solaris x86} Solaris x86 \IA{standard section names}{standardized section names} -\IR{strings, elf attribute} \c{strings} \IR{symbols, exporting from dlls} symbols, exporting from DLLs \IR{symbols, importing from dlls} symbols, importing from DLLs \IR{test subdirectory} \c{test} subdirectory -\IR{thread local storage in elf} thread local storage, in \c{elf} +\IR{thread local storage in elf} thread local storage, in ELF \IR{thread local storage in mach-o} thread local storage, in \c{macho} \IR{tlink} \c{TLINK} \IR{underscore, in c symbols} underscore, in C symbols @@ -298,16 +297,16 @@ Object File Format The Netwide Assembler, NASM, is an 80x86 and x86-64 assembler designed for portability and modularity. It supports a range of object file -formats, including Linux and \c{*BSD} \c{a.out}, \c{ELF}, \c{COFF}, -\c{Mach-O}, 16-bit and 32-bit \c{OBJ} (OMF) format, \c{Win32} and -\c{Win64}. It will also output plain binary files, Intel hex and +formats, including Linux and *BSD \c{a.out}, ELF, Mach-O, 16-bit and +32-bit \c{.obj} (OMF) format, COFF (including its Win32 and Win64 +variants.) It can also output plain binary files, Intel hex and Motorola S-Record formats. Its syntax is designed to be simple and easy to understand, similar to the syntax in the Intel Software Developer Manual with minimal complexity. It supports all currently known x86 architectural extensions, and has strong support for macros. -NASM also comes with a set of utilities for handling the \c{RDOFF} -custom object-file format. +NASM also comes with a set of utilities for handling its own RDOFF2 +object-file format. \S{legal} \i{License} Conditions @@ -355,7 +354,7 @@ For example, \c nasm -f elf myfile.asm -will assemble \c{myfile.asm} into an \c{ELF} object file \c{myfile.o}. And +will assemble \c{myfile.asm} into an ELF object file \c{myfile.o}. And \c nasm -f bin myfile.asm -o myfile.com @@ -377,7 +376,7 @@ The option \c{-hf} will also list the available output file formats, and what they are. If you use Linux but aren't sure whether your system is \c{a.out} -or \c{ELF}, type +or ELF, type \c file nasm @@ -4376,7 +4375,7 @@ operating in 16-bit mode, 32-bit mode or 64-bit mode. The syntax is \c{BITS XX}, where XX is 16, 32 or 64. In most cases, you should not need to use \c{BITS} explicitly. The -\c{aout}, \c{coff}, \c{elf}, \c{macho}, \c{win32} and \c{win64} +\c{aout}, \c{coff}, \c{elf*}, \c{macho}, \c{win32} and \c{win64} object formats, which are designed for use in 32-bit or 64-bit operating systems, all cause NASM to select 32-bit or 64-bit mode, respectively, by default. The \c{obj} object format allows you @@ -4653,9 +4652,8 @@ refer to symbols which \e{are} defined in the same module as the \c ; some code \c{GLOBAL}, like \c{EXTERN}, allows object formats to define private -extensions by means of a colon. The \c{elf} object format, for -example, lets you specify whether global data items are functions or -data: +extensions by means of a colon. The ELF object format, for example, +lets you specify whether global data items are functions or data: \c global hashlookup:function, hashtable:data @@ -4686,8 +4684,8 @@ at the same piece of memory. Like \c{GLOBAL} and \c{EXTERN}, \c{COMMON} supports object-format specific extensions. For example, the \c{obj} format allows common -variables to be NEAR or FAR, and the \c{elf} format allows you to -specify the alignment requirements of a common variable: +variables to be NEAR or FAR, and the ELF format allows you to specify +the alignment requirements of a common variable: \c common commvar 4:near ; works in OBJ \c common intarray 100:4 ; works in ELF: 4 byte aligned @@ -4759,7 +4757,7 @@ For example, when mangling local symbols via the generic namespace: This is useful when the directive is needed to be output format agnostic. -The example is also euquivalent to this, when the output format is \c{elf}: +The example is also euquivalent to this, when the output format is ELF: \c %pragma elf gprefix _ @@ -5907,8 +5905,8 @@ Format} Object Files The \c{elf32}, \c{elf64} and \c{elfx32} output formats generate \c{ELF32 and ELF64} (Executable and Linkable Format) object files, as used by Linux as well as \i{Unix System V}, including \i{Solaris x86}, -\i{UnixWare} and \i{SCO Unix}. \c{elf} provides a default output -file-name extension of \c{.o}. \c{elf} is a synonym for \c{elf32}. +\i{UnixWare} and \i{SCO Unix}. ELF provides a default output +file-name extension of \c{.o}. \c{elf} is a synonym for \c{elf32}. The \c{elfx32} format is used for the \i{x32} ABI, which is a 32-bit ABI with the CPU in 64-bit mode. @@ -5921,8 +5919,8 @@ target operating system (OSABI). This field can be set by using the system. If this directive is not used, the default value will be "UNIX System V ABI" (0) which will work on most systems which support ELF. -\S{elfsect} \c{elf} extensions to the \c{SECTION} Directive -\I{SECTION, elf extensions to} +\S{elfsect} ELF extensions to the \c{SECTION} Directive +\I{SECTION, ELF extensions to} Like the \c{obj} format, \c{elf} allows you to specify additional information on the \c{SECTION} directive line, to control the type @@ -5947,23 +5945,42 @@ not. \b \i\c{progbits} defines the section to be one with explicit contents stored in the object file: an ordinary code or data section, for -example, \i\c{nobits} defines the section to be one with no explicit +example. + +\b \i\c{nobits} defines the section to be one with no explicit contents given, such as a BSS section. -\b \c{align=}, used with a trailing number as in \c{obj}, gives the +\b \i\c{note} indicates that this section contains ELF notes. The +content of ELF notes are specified using normal assembly instructions; +it is up to the programmer to ensure these are valid ELF notes. + +\b \i\c{preinit_array} indicates that this section contains function +addresses to be called before any other initialization has happened. + +\b \i\c{init_array} indicates that this section contains function +addresses to be called during initialization. + +\b \i\c{fini_array} indicates that this section contains function +pointers to be called during termination. + +\b \I{align, ELF attribute}\c{align=}, used with a trailing number as in \c{obj}, gives the \I{section alignment, in elf}\I{alignment, in elf sections}alignment requirements of the section. -\b \c{ent=} or \c{entsize=} specifies the fundamental data item size -for a section which contains either fixed-sized data structures or -strings; this is generally used with the \c{merge} attribute (see -below.) - \b \c{byte}, \c{word}, \c{dword}, \c{qword}, \c{tword}, \c{oword}, -\c{yword}, or \c{zword} are both shorthand for \c{entsize=}, but also -sets the default alignment. +\c{yword}, or \c{zword} with an optional \c{*}\i{multiplier} specify +the fundamental data item size for a section which contains either +fixed-sized data structures or strings; it also sets a default +alignment. This is generally used with the \c{strings} and \c{merge} +attributes (see below.) For example \c{byte*4} defines a unit size of +4 bytes, with a default alignment of 1; \c{dword} also defines a unit +size of 4 bytes, but with a default alignment of 4. The \c{align=} +attribute, if specified, overrides this default alignment. -\b \i{strings, ELF attribute}\c{strings} indicate that this section +\b \I{pointer, ELF attribute}\c{pointer} is equivalent to \c{dword} +for \c{elf32} or \c{elfx32}, and \c{qword} for \c{elf64}. + +\b \I{strings, ELF attribute}\c{strings} indicate that this section contains exclusively null-terminated strings. By default these are assumed to be byte strings, but a size specifier can be used to override that. @@ -5983,24 +6000,28 @@ qualifiers are: \I\c{.text} \I\c{.rodata} \I\c{.lrodata} \I\c{.data} \I\c{.ldata} \I\c{.bss} \I\c{.lbss} \I\c{.tdata} \I\c{.tbss} \I\c\{.comment} -\c section .text progbits alloc exec nowrite align=16 -\c section .rodata progbits alloc noexec nowrite align=4 -\c section .lrodata progbits alloc noexec nowrite align=4 -\c section .data progbits alloc noexec write align=4 -\c section .ldata progbits alloc noexec write align=4 -\c section .bss nobits alloc noexec write align=4 -\c section .lbss nobits alloc noexec write align=4 -\c section .tdata progbits alloc noexec write align=4 tls -\c section .tbss nobits alloc noexec write align=4 tls -\c section .comment progbits noalloc noexec nowrite align=1 -\c section other progbits alloc noexec nowrite align=1 +\c section .text progbits alloc exec nowrite align=16 +\c section .rodata progbits alloc noexec nowrite align=4 +\c section .lrodata progbits alloc noexec nowrite align=4 +\c section .data progbits alloc noexec write align=4 +\c section .ldata progbits alloc noexec write align=4 +\c section .bss nobits alloc noexec write align=4 +\c section .lbss nobits alloc noexec write align=4 +\c section .tdata progbits alloc noexec write align=4 tls +\c section .tbss nobits alloc noexec write align=4 tls +\c section .comment progbits noalloc noexec nowrite align=1 +\c section .preinit_array preinit_array alloc noexec nowrite pointer +\c section .init_array init_array alloc noexec nowrite pointer +\c section .fini_array fini_array alloc noexec nowrite pointer +\c section .note note noalloc noexec nowrite align=1 +\c section other progbits alloc noexec nowrite align=1 (Any section name other than those in the above table is treated by default like \c{other} in the above table. Please note that section names are case sensitive.) -\S{elfwrt} \i{Position-Independent Code}\I{PIC}: \c{macho} Special +\S{elfwrt} \i{Position-Independent Code}\I{PIC}: ELF Special Symbols and \i\c{WRT} Since \c{ELF} does not support segment-base references, the \c{WRT} @@ -6138,7 +6159,7 @@ requires that it be aligned on a 4-byte boundary. \S{elf16} 16-bit code and ELF -\I{ELF, 16-bit code and} +\I{ELF, 16-bit code} The \c{ELF32} specification doesn't provide relocations for 8- and 16-bit values, but the GNU \c{ld} linker adds these as an extension. @@ -6148,7 +6169,7 @@ be linked as ELF using GNU \c{ld}. If NASM is used with the these relocations is generated. \S{elfdbg} Debug formats and ELF -\I{ELF, Debug formats and} +\I{ELF, debug formats} ELF provides debug information in \c{STABS} and \c{DWARF} formats. Line number information is generated for all executable sections, but please diff --git a/output/outelf.c b/output/outelf.c index 4db7fbde..f0641fd7 100644 --- a/output/outelf.c +++ b/output/outelf.c @@ -228,23 +228,61 @@ static int32_t elf_sym_sect, elf_gottpoff_sect, elf_tlsie_sect; uint8_t elf_osabi = 0; /* Default OSABI = 0 (System V or Linux) */ uint8_t elf_abiver = 0; /* Current ABI version */ -const struct elf_known_section elf_known_sections[] = { - { ".text", SHT_PROGBITS, SHF_ALLOC|SHF_EXECINSTR, 16 }, - { ".rodata", SHT_PROGBITS, SHF_ALLOC, 4 }, - { ".lrodata", SHT_PROGBITS, SHF_ALLOC, 4 }, - { ".data", SHT_PROGBITS, SHF_ALLOC|SHF_WRITE, 4 }, - { ".ldata", SHT_PROGBITS, SHF_ALLOC|SHF_WRITE, 4 }, - { ".bss", SHT_NOBITS, SHF_ALLOC|SHF_WRITE, 4 }, - { ".lbss", SHT_NOBITS, SHF_ALLOC|SHF_WRITE, 4 }, - { ".tdata", SHT_PROGBITS, SHF_ALLOC|SHF_WRITE|SHF_TLS, 4 }, - { ".tbss", SHT_NOBITS, SHF_ALLOC|SHF_WRITE|SHF_TLS, 4 }, - { ".comment", SHT_PROGBITS, 0, 1 }, - { NULL, SHT_PROGBITS, SHF_ALLOC, 1 } /* default */ +/* Known sections with nonstandard defaults. -n means n*pointer size. */ +struct elf_known_section { + const char *name; /* Name of section */ + int type; /* Section type (SHT_) */ + uint32_t flags; /* Section flags (SHF_) */ + int align; /* Section alignment */ + int entsize; /* Entry size, if applicable */ }; +static const struct elf_known_section elf_known_sections[] = { + { ".text", SHT_PROGBITS, SHF_ALLOC|SHF_EXECINSTR, 16, 0 }, + { ".rodata", SHT_PROGBITS, SHF_ALLOC, 4, 0 }, + { ".lrodata", SHT_PROGBITS, SHF_ALLOC, 4, 0 }, + { ".data", SHT_PROGBITS, SHF_ALLOC|SHF_WRITE, 4, 0 }, + { ".ldata", SHT_PROGBITS, SHF_ALLOC|SHF_WRITE, 4, 0 }, + { ".bss", SHT_NOBITS, SHF_ALLOC|SHF_WRITE, 4, 0 }, + { ".lbss", SHT_NOBITS, SHF_ALLOC|SHF_WRITE, 4, 0 }, + { ".tdata", SHT_PROGBITS, SHF_ALLOC|SHF_WRITE|SHF_TLS, 4, 0 }, + { ".tbss", SHT_NOBITS, SHF_ALLOC|SHF_WRITE|SHF_TLS, 4, 0 }, + { ".comment", SHT_PROGBITS, 0, 1, 0 }, + { ".preinit_array", SHT_PREINIT_ARRAY, SHF_ALLOC, -1, -1 }, + { ".init_array", SHT_INIT_ARRAY, SHF_ALLOC, -1, -1 }, + { ".fini_array", SHT_FINI_ARRAY, SHF_ALLOC, -1, -1 }, + { ".note", SHT_NOTE, 0, 1, 0 }, + { NULL /*default*/, SHT_PROGBITS, SHF_ALLOC, 1, 0 } +}; + +struct size_unit { + char name[8]; + int bytes; + int align; +}; +static const struct size_unit size_units[] = +{ + { "byte", 1, 1 }, + { "word", 2, 2 }, + { "dword", 4, 4 }, + { "qword", 8, 8 }, + { "tword", 10, 2 }, + { "tbyte", 10, 2 }, + { "oword", 16, 16 }, + { "xword", 16, 16 }, + { "yword", 32, 32 }, + { "zword", 64, 64 }, + { "pointer", -1, -1 }, + { "", 0, 0 } +}; + +static inline size_t to_bytes(int val) +{ + return (val >= 0) ? (size_t)val : -val * efmt->word; +} + /* parse section attributes */ -static void elf_section_attrib(char *name, char *attr, int pass, - uint32_t *flags_and, uint32_t *flags_or, +static void elf_section_attrib(char *name, char *attr, uint32_t *flags_and, uint32_t *flags_or, uint64_t *alignp, uint64_t *entsize, int *type) { char *opt, *val, *next; @@ -286,7 +324,8 @@ static void elf_section_attrib(char *name, char *attr, int pass, } else if (!nasm_stricmp(opt, "write")) { *flags_and |= SHF_WRITE; *flags_or |= SHF_WRITE; - } else if (!nasm_stricmp(opt, "nowrite")) { + } else if (!nasm_stricmp(opt, "nowrite") || + !nasm_stricmp(opt, "readonly")) { *flags_and |= SHF_WRITE; *flags_or &= ~SHF_WRITE; } else if (!nasm_stricmp(opt, "tls")) { @@ -311,52 +350,64 @@ static void elf_section_attrib(char *name, char *attr, int pass, *type = SHT_PROGBITS; } else if (!nasm_stricmp(opt, "nobits")) { *type = SHT_NOBITS; - } else if (!nasm_stricmp(opt, "ent") || !nasm_stricmp(opt,"entsize")) { - bool err; - uint64_t es; - if (!val) { - nasm_error(ERR_NONFATAL, - "section attribute %s without value specified", opt); - } else { - es = readnum(val, &err); - if (err) { - nasm_error(ERR_NONFATAL, - "invalid value %s for section attribute %s", - val, opt); - } else { - *entsize = es; - } - } - } else if (!nasm_stricmp(opt, "byte")) { - xalign = *entsize = 1; - } else if (!nasm_stricmp(opt, "word")) { - xalign = *entsize = 2; - } else if (!nasm_stricmp(opt, "dword")) { - xalign = *entsize = 4; - } else if (!nasm_stricmp(opt, "qword")) { - xalign = *entsize = 8; - } else if (!nasm_stricmp(opt, "tword")) { - *entsize = 10; - xalign = 2; - } else if (!nasm_stricmp(opt, "oword")) { - xalign = *entsize = 16; - } else if (!nasm_stricmp(opt, "yword")) { - xalign = *entsize = 32; - } else if (!nasm_stricmp(opt, "zword")) { - xalign = *entsize = 64; - } else if (pass == 1) { - nasm_error(ERR_WARNING, - "Unknown section attribute '%s' ignored on" - " declaration of section `%s'", opt, name); + } else if (!nasm_stricmp(opt, "note")) { + *type = SHT_NOTE; + } else if (!nasm_stricmp(opt, "preinit_array")) { + *type = SHT_PREINIT_ARRAY; + } else if (!nasm_stricmp(opt, "init_array")) { + *type = SHT_INIT_ARRAY; + } else if (!nasm_stricmp(opt, "fini_array")) { + *type = SHT_FINI_ARRAY; + } else { + uint64_t mult; + size_t l; + const char *a = strchr(opt, '*'); + bool err; + const struct size_unit *su; + + if (a) { + l = a - opt - 1; + mult = readnum(a+1, &err); + } else { + l = strlen(opt); + mult = 1; + } + + for (su = size_units; su->bytes; su++) { + if (!nasm_strnicmp(opt, su->name, l)) + break; + } + + if (su->bytes) { + *entsize = to_bytes(su->bytes) * mult; + xalign = to_bytes(su->align); + } else { + /* Unknown attribute */ + nasm_error(ERR_WARNING|ERR_PASS1, + "unknown section attribute '%s' ignored on" + " declaration of section `%s'", opt, name); + } } opt = next; } - if (!align) - align = xalign; + switch (*type) { + case SHT_PREINIT_ARRAY: + case SHT_INIT_ARRAY: + case SHT_FINI_ARRAY: + if (!xalign) + xalign = efmt->word; + if (!*entsize) + *entsize = efmt->word; + break; + default: + break; + } if (!align) - align = SHA_ANY; + align = xalign; + if (!align) + align = SHA_ANY; *alignp = align; } @@ -618,6 +669,8 @@ static int32_t elf_section_names(char *name, int pass, int *bits) struct hash_insert hi; int type; + (void)pass; + if (!name) { *bits = ofmt->maxbits; return def_seg; @@ -628,8 +681,7 @@ static int32_t elf_section_names(char *name, int pass, int *bits) *p++ = '\0'; flags_and = flags_or = type = align = entsize = 0; - elf_section_attrib(name, p, pass, &flags_and, - &flags_or, &align, &entsize, &type); + elf_section_attrib(name, p, &flags_and, &flags_or, &align, &entsize, &type); hp = hash_find(§ion_by_name, name, &hi); if (hp) { @@ -649,7 +701,10 @@ static int32_t elf_section_names(char *name, int pass, int *bits) } type = type ? type : ks->type; - align = align ? align : ks->align; + if (!align) + align = to_bytes(ks->align); + if (!entsize) + entsize = to_bytes(ks->entsize); flags = (ks->flags & ~flags_and) | flags_or; s = elf_make_section(name, type, flags, align); @@ -1035,7 +1090,7 @@ static void elf32_out(int32_t segto, const void *data, switch (type) { case OUT_RESERVE: - if (s->type == SHT_PROGBITS) { + if (s->type != SHT_NOBITS) { nasm_error(ERR_WARNING, "uninitialized space declared in" " non-BSS section `%s': zeroing", s->name); elf_sect_write(s, NULL, size); @@ -1246,7 +1301,7 @@ static void elf64_out(int32_t segto, const void *data, switch (type) { case OUT_RESERVE: - if (s->type == SHT_PROGBITS) { + if (s->type != SHT_NOBITS) { nasm_error(ERR_WARNING, "uninitialized space declared in" " non-BSS section `%s': zeroing", s->name); elf_sect_write(s, NULL, size); @@ -1528,7 +1583,7 @@ static void elfx32_out(int32_t segto, const void *data, switch (type) { case OUT_RESERVE: - if (s->type == SHT_PROGBITS) { + if (s->type != SHT_NOBITS) { nasm_error(ERR_WARNING, "uninitialized space declared in" " non-BSS section `%s': zeroing", s->name); elf_sect_write(s, NULL, size); diff --git a/output/outelf.h b/output/outelf.h index d499117c..af6af070 100644 --- a/output/outelf.h +++ b/output/outelf.h @@ -53,15 +53,6 @@ /* this stuff is needed for the dwarf/stabs debugging format */ #define TY_DEBUGSYMLIN 0x40 /* internal call to debug_out */ -/* Known sections with nonstandard defaults */ -struct elf_known_section { - const char *name; /* Name of section */ - int type; /* Section type (SHT_) */ - uint32_t flags; /* Section flags (SHF_) */ - uint32_t align; /* Section alignment */ -}; -extern const struct elf_known_section elf_known_sections[]; - /* * Debugging ELF sections (section indicies starting with sec_debug) */ From a8604c83fa8ece9859fb76b328b8753f549b8863 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 26 Feb 2019 02:36:15 -0800 Subject: [PATCH 8/9] ELF: the .note section should be 4-byte aligned The ELF .note section contains of 4-byte words and should be aligned accordingly. Signed-off-by: H. Peter Anvin --- doc/nasmdoc.src | 2 +- output/outelf.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src index cb58045a..8310faac 100644 --- a/doc/nasmdoc.src +++ b/doc/nasmdoc.src @@ -6013,7 +6013,7 @@ qualifiers are: \c section .preinit_array preinit_array alloc noexec nowrite pointer \c section .init_array init_array alloc noexec nowrite pointer \c section .fini_array fini_array alloc noexec nowrite pointer -\c section .note note noalloc noexec nowrite align=1 +\c section .note note noalloc noexec nowrite align=4 \c section other progbits alloc noexec nowrite align=1 (Any section name other than those in the above table diff --git a/output/outelf.c b/output/outelf.c index f0641fd7..36531d3a 100644 --- a/output/outelf.c +++ b/output/outelf.c @@ -251,7 +251,7 @@ static const struct elf_known_section elf_known_sections[] = { { ".preinit_array", SHT_PREINIT_ARRAY, SHF_ALLOC, -1, -1 }, { ".init_array", SHT_INIT_ARRAY, SHF_ALLOC, -1, -1 }, { ".fini_array", SHT_FINI_ARRAY, SHF_ALLOC, -1, -1 }, - { ".note", SHT_NOTE, 0, 1, 0 }, + { ".note", SHT_NOTE, 0, 4, 0 }, { NULL /*default*/, SHT_PROGBITS, SHF_ALLOC, 1, 0 } }; From 8b262474424c0f6912b22bbf7452f26bfa4d1235 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 26 Feb 2019 14:00:54 -0800 Subject: [PATCH 9/9] preproc: add %i... variants, evaluated macro parameters, cleanups All directives which create single-line macros now have %i... variants to define case-insensitive versions. Case insensitive rather sucks, but at least this way it is consistent. Single-line macro parameters can now be evaluated as a number, as done by %assign. To do so, declare a parameter starting with =, for example: %define foo(x,=y) mov [x],macro_array_y ... would evaluate y as a number but leave x as a string. NOTE: it would arguably be better to have this as a per-instance basis, but it is easily handled by having a secondary macro called with the same argument twice. Finally, add a more consistent method for defining "magic" macros, which need to be evaluated at runtime. For now, it is only used by the special macros __FILE__, __LINE__, __BITS__, __PTR__, and __PASS__. __PTR__ is a new macro which evaluates to word, dword or qword matching the value of __BITS__. The magic macro framework, however, provides a natural hook for a future plug-in infrastructure to hook into a scripting language. Signed-off-by: H. Peter Anvin --- asm/pptok.dat | 8 +- asm/preproc.c | 622 +++++++++++++++++++++++++++----------------- asm/quote.c | 8 + asm/quote.h | 1 + macros/standard.mac | 8 +- test/evalmacro.asm | 4 + 6 files changed, 407 insertions(+), 244 deletions(-) create mode 100644 test/evalmacro.asm diff --git a/asm/pptok.dat b/asm/pptok.dat index a2c64d0a..a5a6e590 100644 --- a/asm/pptok.dat +++ b/asm/pptok.dat @@ -1,6 +1,6 @@ ## -------------------------------------------------------------------------- ## -## Copyright 1996-2016 The NASM Authors - All Rights Reserved +## Copyright 1996-2019 The NASM Authors - All Rights Reserved ## See the file AUTHORS included with the NASM distribution for ## the specific copyright holders. ## @@ -70,8 +70,12 @@ %ideftok %if* %imacro -%irmacro %include +%ipathsearch +%irmacro +%istrcat +%istrlen +%isubstr %ixdefine %line %local diff --git a/asm/preproc.c b/asm/preproc.c index 95ca56fc..5ae2587d 100644 --- a/asm/preproc.c +++ b/asm/preproc.c @@ -106,12 +106,17 @@ typedef struct IncPath IncPath; * Store the definition of a single-line macro. */ struct SMacro { - SMacro *next; + SMacro *next; /* MUST BE FIRST - see free_smacro() */ char *name; - bool casesense; - bool in_progress; + union { + Token *expansion; + Token *(*magic)(const SMacro *s, Token **params, int *paramsize); + } e; + bool *eval_param; unsigned int nparam; - Token *expansion; + bool casesense; + bool magic; + bool in_progress; }; /* @@ -189,15 +194,15 @@ struct Context { * This is the internal form which we break input lines up into. * Typically stored in linked lists. * - * Note that `type' serves a double meaning: TOK_SMAC_PARAM is not - * necessarily used as-is, but is intended to denote the number of - * the substituted parameter. So in the definition + * Note that `type' serves a double meaning: TOK_SMAC_START_PARAMS is + * not necessarily used as-is, but is also used to encode the number + * and expansion type of substituted parameter. So in the definition * - * %define a(x,y) ( (x) & ~(y) ) + * %define a(x,=y) ( (x) & ~(y) ) * * the token representing `x' will have its type changed to - * TOK_SMAC_PARAM, but the one representing `y' will be - * TOK_SMAC_PARAM+1. + * tok_smac_param(0) but the one representing `y' will be + * tok_smac_param(1); see the accessor functions below. * * TOK_INTERNAL_STRING is a dirty hack: it's a single string token * which doesn't need quotes around it. Used in the pre-include @@ -207,15 +212,29 @@ struct Context { enum pp_token_type { TOK_NONE = 0, TOK_WHITESPACE, TOK_COMMENT, TOK_ID, TOK_PREPROC_ID, TOK_STRING, - TOK_NUMBER, TOK_FLOAT, TOK_SMAC_END, TOK_OTHER, + TOK_NUMBER, TOK_FLOAT, TOK_OTHER, TOK_INTERNAL_STRING, TOK_PREPROC_Q, TOK_PREPROC_QQ, TOK_PASTE, /* %+ */ TOK_INDIRECT, /* %[...] */ - TOK_SMAC_PARAM, /* MUST BE LAST IN THE LIST!!! */ + TOK_SMAC_END, /* Marker for the end of smacro expansion */ + TOK_SMAC_START_PARAMS, /* MUST BE LAST IN THE LIST!!! */ TOK_MAX = INT_MAX /* Keep compiler from reducing the range */ }; +static inline enum pp_token_type tok_smac_param(int param) +{ + return TOK_SMAC_START_PARAMS + param; +} +static int smac_nparam(enum pp_token_type toktype) +{ + return toktype - TOK_SMAC_START_PARAMS; +} +static bool is_smac_param(enum pp_token_type toktype) +{ + return toktype >= TOK_SMAC_START_PARAMS; +} + #define PP_CONCAT_MASK(x) (1 << (x)) #define PP_CONCAT_MATCH(t, mask) (PP_CONCAT_MASK((t)->type) & mask) @@ -461,7 +480,8 @@ static Token *expand_mmac_params(Token * tline); static Token *expand_smacro(Token * tline); static Token *expand_id(Token * tline); static Context *get_ctx(const char *name, const char **namep); -static void make_tok_num(Token * tok, int64_t val); +static Token *make_tok_num(int64_t val); +static Token *make_tok_qstr(const char *str); static void pp_verror(int severity, const char *fmt, va_list ap); static vefunc real_verror; static void *new_Block(size_t size); @@ -630,6 +650,23 @@ static void free_mmacro(MMacro * m) nasm_free(m); } +/* + * Free an SMacro + */ +static void free_smacro(SMacro *s, bool really) +{ + nasm_free(s->name); + if (!s->magic) + free_tlist(s->e.expansion); + nasm_free(s->eval_param); + if (really) { + nasm_free(s); + } else { + /* Wipe everything except the next pointer */ + memset(&s->next + 1, 0, sizeof *s - sizeof s->next); + } +} + /* * Free all currently defined macros, and free the hash tables */ @@ -641,11 +678,8 @@ static void free_smacro_table(struct hash_table *smt) while ((s = hash_iterate(smt, &it, &key)) != NULL) { nasm_free((void *)key); - list_for_each_safe(s, tmp, s) { - nasm_free(s->name); - free_tlist(s->expansion); - nasm_free(s); - } + list_for_each_safe(s, tmp, s) + free_smacro(s, true); } hash_free(smt); } @@ -1326,20 +1360,27 @@ static char *detoken(Token * tlist, bool expand_locals) * * FIX: This really needs to be unified with stdscan. */ +struct ppscan { + Token *tptr; + int ntokens; +}; + static int ppscan(void *private_data, struct tokenval *tokval) { - Token **tlineptr = private_data; + struct ppscan *pps = private_data; Token *tline; char ourcopy[MAX_KEYWORD+1], *p, *r, *s; do { - tline = *tlineptr; - *tlineptr = tline ? tline->next : NULL; - } while (tline && (tline->type == TOK_WHITESPACE || - tline->type == TOK_COMMENT)); - - if (!tline) - return tokval->t_type = TOKEN_EOS; + if (pps->ntokens && (tline = pps->tptr)) { + pps->ntokens--; + pps->tptr = tline->next; + } else { + pps->tptr = NULL; + pps->ntokens = 0; + return tokval->t_type = TOKEN_EOS; + } + } while (tline->type == TOK_WHITESPACE || tline->type == TOK_COMMENT); tokval->t_charptr = tline->text; @@ -1755,7 +1796,8 @@ static bool if_condition(Token * tline, enum preproc_token ct) { enum pp_conditional i = PP_COND(ct); bool j; - Token *t, *tt, **tptr, *origline; + Token *t, *tt, *origline; + struct ppscan pps; struct tokenval tokval; expr *evalresult; enum pp_token_type needtype; @@ -2008,10 +2050,10 @@ iftype: break; case PPC_IF: - t = tline = expand_smacro(tline); - tptr = &t; + pps.tptr = tline = expand_smacro(tline); + pps.ntokens = -1; tokval.t_type = TOKEN_INVALID; - evalresult = evaluate(ppscan, tptr, &tokval, + evalresult = evaluate(ppscan, &pps, &tokval, NULL, pass | CRITICAL, NULL); if (!evalresult) return -1; @@ -2044,8 +2086,8 @@ fail: /* * Common code for defining an smacro */ -static bool define_smacro(Context *ctx, const char *mname, bool casesense, - int nparam, Token *expansion) +static SMacro *define_smacro(Context *ctx, const char *mname, + bool casesense, int nparam, Token *expansion) { SMacro *smac, **smhead; struct hash_table *smtbl; @@ -2060,29 +2102,27 @@ static bool define_smacro(Context *ctx, const char *mname, bool casesense, * some others didn't. What is the right thing to do here? */ free_tlist(expansion); - return false; /* Failure */ + return NULL; /* Failure */ } else { /* * We're redefining, so we have to take over an * existing SMacro structure. This means freeing - * what was already in it. + * what was already in it, but not the structure itself. */ - nasm_free(smac->name); - free_tlist(smac->expansion); + free_smacro(smac, false); } } else { smtbl = ctx ? &ctx->localmac : &smacros; smhead = (SMacro **) hash_findi_add(smtbl, mname); - smac = nasm_malloc(sizeof(SMacro)); + nasm_new(smac); smac->next = *smhead; *smhead = smac; } smac->name = nasm_strdup(mname); smac->casesense = casesense; smac->nparam = nparam; - smac->expansion = expansion; - smac->in_progress = false; - return true; /* Success */ + smac->e.expansion = expansion; + return smac; } /* @@ -2104,9 +2144,7 @@ static void undef_smacro(Context *ctx, const char *mname) while ((s = *sp) != NULL) { if (!mstrcmp(s->name, mname, s->casesense)) { *sp = s->next; - nasm_free(s->name); - free_tlist(s->expansion); - nasm_free(s); + free_smacro(s, true); } else { sp = &s->next; } @@ -2253,11 +2291,12 @@ static int do_directive(Token *tline, char **output) char *p, *pp; const char *found_path; const char *mname; + struct ppscan pps; Include *inc; Context *ctx; Cond *cond; MMacro *mmac, **mmhead; - Token *t = NULL, *tt, *param_start, *macro_start, *last, **tptr, *origline; + Token *t = NULL, *tt, *param_start, *macro_start, *last, *origline; Line *l; struct tokenval tokval; expr *evalresult; @@ -2265,6 +2304,7 @@ static int do_directive(Token *tline, char **output) int64_t count; size_t len; int severity; + const char *dname; /* Name of directive, for messages */ *output = NULL; /* No output generated */ origline = tline; @@ -2337,6 +2377,8 @@ static int do_directive(Token *tline, char **output) } } + dname = pp_directives[i]; /* Directive name, for error messages */ + casesense = true; /* Default to case sensitive */ switch (i) { case PP_INVALID: nasm_error(ERR_NONFATAL, "unknown preprocessor directive `%s'", @@ -2385,7 +2427,7 @@ static int do_directive(Token *tline, char **output) if (tline && tline->type == TOK_WHITESPACE) tline = tline->next; if (!tline || tline->type != TOK_ID) { - nasm_error(ERR_NONFATAL, "`%%stacksize' missing size parameter"); + nasm_error(ERR_NONFATAL, "`%s' missing size parameter", dname); free_tlist(origline); return DIRECTIVE_FOUND; } @@ -2418,7 +2460,7 @@ static int do_directive(Token *tline, char **output) ArgOffset = 6; LocalOffset = 0; } else { - nasm_error(ERR_NONFATAL, "`%%stacksize' invalid size type"); + nasm_error(ERR_NONFATAL, "`%s' invalid size type", dname); free_tlist(origline); return DIRECTIVE_FOUND; } @@ -2441,7 +2483,7 @@ static int do_directive(Token *tline, char **output) if (tline && tline->type == TOK_WHITESPACE) tline = tline->next; if (!tline || tline->type != TOK_ID) { - nasm_error(ERR_NONFATAL, "`%%arg' missing argument parameter"); + nasm_error(ERR_NONFATAL, "`%s' missing argument parameter", dname); free_tlist(origline); return DIRECTIVE_FOUND; } @@ -2452,13 +2494,13 @@ static int do_directive(Token *tline, char **output) if (!tline || tline->type != TOK_OTHER || tline->text[0] != ':') { nasm_error(ERR_NONFATAL, - "Syntax error processing `%%arg' directive"); + "syntax error processing `%s' directive", dname); free_tlist(origline); return DIRECTIVE_FOUND; } tline = tline->next; if (!tline || tline->type != TOK_ID) { - nasm_error(ERR_NONFATAL, "`%%arg' missing size type parameter"); + nasm_error(ERR_NONFATAL, "`%s' missing size type parameter", dname); free_tlist(origline); return DIRECTIVE_FOUND; } @@ -2469,7 +2511,7 @@ static int do_directive(Token *tline, char **output) size = parse_size(tt->text); if (!size) { nasm_error(ERR_NONFATAL, - "Invalid size type for `%%arg' missing directive"); + "invalid size type for `%s' missing directive", dname); free_tlist(tt); free_tlist(origline); return DIRECTIVE_FOUND; @@ -2515,7 +2557,7 @@ static int do_directive(Token *tline, char **output) tline = tline->next; if (!tline || tline->type != TOK_ID) { nasm_error(ERR_NONFATAL, - "`%%local' missing argument parameter"); + "`%s' missing argument parameter", dname); free_tlist(origline); return DIRECTIVE_FOUND; } @@ -2526,14 +2568,14 @@ static int do_directive(Token *tline, char **output) if (!tline || tline->type != TOK_OTHER || tline->text[0] != ':') { nasm_error(ERR_NONFATAL, - "Syntax error processing `%%local' directive"); + "syntax error processing `%s' directive", dname); free_tlist(origline); return DIRECTIVE_FOUND; } tline = tline->next; if (!tline || tline->type != TOK_ID) { nasm_error(ERR_NONFATAL, - "`%%local' missing size type parameter"); + "`%s' missing size type parameter", dname); free_tlist(origline); return DIRECTIVE_FOUND; } @@ -2544,7 +2586,7 @@ static int do_directive(Token *tline, char **output) size = parse_size(tt->text); if (!size) { nasm_error(ERR_NONFATAL, - "Invalid size type for `%%local' missing directive"); + "invalid size type for `%s' missing directive", dname); free_tlist(tt); free_tlist(origline); return DIRECTIVE_FOUND; @@ -2578,7 +2620,7 @@ static int do_directive(Token *tline, char **output) case PP_CLEAR: if (tline->next) nasm_error(ERR_WARNING|ERR_PASS1, - "trailing garbage after `%%clear' ignored"); + "trailing garbage after `%s' ignored", dname); free_macros(); init_macros(); free_tlist(origline); @@ -2589,13 +2631,13 @@ static int do_directive(Token *tline, char **output) skip_white_(t); if (!t || (t->type != TOK_STRING && t->type != TOK_INTERNAL_STRING)) { - nasm_error(ERR_NONFATAL, "`%%depend' expects a file name"); + nasm_error(ERR_NONFATAL, "`%s' expects a file name", dname); free_tlist(origline); return DIRECTIVE_FOUND; /* but we did _something_ */ } if (t->next) nasm_error(ERR_WARNING|ERR_PASS1, - "trailing garbage after `%%depend' ignored"); + "trailing garbage after `%s' ignored", dname); p = t->text; if (t->type != TOK_INTERNAL_STRING) nasm_unquote_cstr(p, i); @@ -2609,13 +2651,13 @@ static int do_directive(Token *tline, char **output) if (!t || (t->type != TOK_STRING && t->type != TOK_INTERNAL_STRING)) { - nasm_error(ERR_NONFATAL, "`%%include' expects a file name"); + nasm_error(ERR_NONFATAL, "`%s' expects a file name", dname); free_tlist(origline); return DIRECTIVE_FOUND; /* but we did _something_ */ } if (t->next) nasm_error(ERR_WARNING|ERR_PASS1, - "trailing garbage after `%%include' ignored"); + "trailing garbage after `%s' ignored", dname); p = t->text; if (t->type != TOK_INTERNAL_STRING) nasm_unquote_cstr(p, i); @@ -2652,18 +2694,18 @@ static int do_directive(Token *tline, char **output) if (!tline || (tline->type != TOK_STRING && tline->type != TOK_INTERNAL_STRING && tline->type != TOK_ID)) { - nasm_error(ERR_NONFATAL, "`%%use' expects a package name"); + nasm_error(ERR_NONFATAL, "`%s' expects a package name", dname); free_tlist(origline); return DIRECTIVE_FOUND; /* but we did _something_ */ } if (tline->next) nasm_error(ERR_WARNING|ERR_PASS1, - "trailing garbage after `%%use' ignored"); + "trailing garbage after `%s' ignored", dname); if (tline->type == TOK_STRING) nasm_unquote_cstr(tline->text, i); use_pkg = nasm_stdmac_find_package(tline->text); if (!use_pkg) - nasm_error(ERR_NONFATAL, "unknown `%%use' package: %s", tline->text); + nasm_error(ERR_NONFATAL, "unknown `%s' package: %s", dname, tline->text); else pkg_macro = (char *)use_pkg + 1; /* The first string will be <%define>__USE_*__ */ if (use_pkg && ! smacro_defined(NULL, pkg_macro, 0, NULL, true)) { @@ -2709,9 +2751,9 @@ static int do_directive(Token *tline, char **output) pp_directives[i]); } else if (i == PP_POP) { if (p && (!cstk->name || nasm_stricmp(p, cstk->name))) - nasm_error(ERR_NONFATAL, "`%%pop' in wrong context: %s, " - "expected %s", - cstk->name ? cstk->name : "anonymous", p); + nasm_error(ERR_NONFATAL, "`%s' in wrong context: %s, " + "expected %s", + dname, cstk->name ? cstk->name : "anonymous", p); else ctx_pop(); } else { @@ -2779,7 +2821,7 @@ issue_error: CASE_PP_ELIF: if (!istk->conds) - nasm_error(ERR_FATAL, "`%s': no matching `%%if'", pp_directives[i]); + nasm_error(ERR_FATAL, "`%s': no matching `%%if'", dname); switch(istk->conds->state) { case COND_IF_TRUE: istk->conds->state = COND_DONE; @@ -2857,20 +2899,22 @@ issue_error: free_tlist(origline); return DIRECTIVE_FOUND; - case PP_RMACRO: case PP_IRMACRO: - case PP_MACRO: case PP_IMACRO: + casesense = false; + /* fall through */ + case PP_RMACRO: + case PP_MACRO: if (defining) { nasm_error(ERR_FATAL, "`%s': already defining a macro", - pp_directives[i]); + dname); return DIRECTIVE_FOUND; } defining = nasm_zalloc(sizeof(MMacro)); defining->max_depth = ((i == PP_RMACRO) || (i == PP_IRMACRO)) ? nasm_limit[LIMIT_MACROS] : 0; - defining->casesense = (i == PP_MACRO) || (i == PP_RMACRO); - if (!parse_mmacro_spec(tline, defining, pp_directives[i])) { + defining->casesense = casesense; + if (!parse_mmacro_spec(tline, defining, dname)) { nasm_free(defining); defining = NULL; return DIRECTIVE_FOUND; @@ -2934,14 +2978,16 @@ issue_error: free_tlist(origline); return DIRECTIVE_FOUND; - case PP_UNMACRO: case PP_UNIMACRO: + casesense = false; + /* fall through */ + case PP_UNMACRO: { MMacro **mmac_p; MMacro spec; - spec.casesense = (i == PP_UNMACRO); - if (!parse_mmacro_spec(tline, &spec, pp_directives[i])) { + spec.casesense = casesense; + if (!parse_mmacro_spec(tline, &spec, dname)) { return DIRECTIVE_FOUND; } mmac_p = (MMacro **) hash_findi(&mmacros, spec.name, NULL); @@ -2974,11 +3020,11 @@ issue_error: t = expand_smacro(tline->next); tline->next = NULL; free_tlist(origline); - tline = t; - tptr = &t; + pps.tptr = tline = t; + pps.ntokens = -1; tokval.t_type = TOKEN_INVALID; evalresult = - evaluate(ppscan, tptr, &tokval, NULL, pass, NULL); + evaluate(ppscan, &pps, &tokval, NULL, pass, NULL); free_tlist(tline); if (!evalresult) return DIRECTIVE_FOUND; @@ -3023,11 +3069,11 @@ issue_error: } if (tline) { - t = expand_smacro(tline); - tptr = &t; + pps.tptr = expand_smacro(tline); + pps.ntokens = -1; tokval.t_type = TOKEN_INVALID; evalresult = - evaluate(ppscan, tptr, &tokval, NULL, pass, NULL); + evaluate(ppscan, &pps, &tokval, NULL, pass, NULL); if (!evalresult) { free_tlist(origline); return DIRECTIVE_FOUND; @@ -3123,11 +3169,15 @@ issue_error: free_tlist(origline); return DIRECTIVE_FOUND; - case PP_XDEFINE: - case PP_IXDEFINE: - case PP_DEFINE: case PP_IDEFINE: - casesense = (i == PP_DEFINE || i == PP_XDEFINE); + case PP_IXDEFINE: + casesense = false; + /* fall through */ + case PP_DEFINE: + case PP_XDEFINE: + { + SMacro *s; + bool have_eval_params = false; tline = tline->next; skip_white_(tline); @@ -3136,7 +3186,7 @@ issue_error: (tline->type != TOK_PREPROC_ID || tline->text[1] != '$'))) { nasm_error(ERR_NONFATAL, "`%s' expects a macro identifier", - pp_directives[i]); + dname); free_tlist(origline); return DIRECTIVE_FOUND; } @@ -3146,10 +3196,6 @@ issue_error: param_start = tline = tline->next; nparam = 0; - /* Expand the macro definition now for %xdefine and %ixdefine */ - if ((i == PP_XDEFINE) || (i == PP_IXDEFINE)) - tline = expand_smacro(tline); - if (tok_is_(tline, "(")) { /* * This macro has parameters. @@ -3163,6 +3209,10 @@ issue_error: free_tlist(origline); return DIRECTIVE_FOUND; } + if (tok_is_(tline, "=")) { + have_eval_params = true; + tline = tline->next; + } if (tline->type != TOK_ID) { nasm_error(ERR_NONFATAL, "`%s': parameter identifier expected", @@ -3170,7 +3220,7 @@ issue_error: free_tlist(origline); return DIRECTIVE_FOUND; } - tline->type = TOK_SMAC_PARAM + nparam++; + tline->type = tok_smac_param(nparam++); tline = tline->next; skip_white_(tline); if (tok_is_(tline, ",")) { @@ -3190,14 +3240,18 @@ issue_error: } if (tok_type_(tline, TOK_WHITESPACE)) last = tline, tline = tline->next; - macro_start = NULL; last->next = NULL; + + /* Expand the macro definition now for %xdefine and %ixdefine */ + if ((i == PP_XDEFINE) || (i == PP_IXDEFINE)) + tline = expand_smacro(tline); + + macro_start = NULL; t = tline; while (t) { if (t->type == TOK_ID) { list_for_each(tt, param_start) - if (tt->type >= TOK_SMAC_PARAM && - !strcmp(tt->text, t->text)) + if (is_smac_param(tt->type) && !strcmp(tt->text, t->text)) t->type = tt->type; } tt = t->next; @@ -3213,9 +3267,24 @@ issue_error: * carefully re-terminated after chopping off the expansion * from the end). */ - define_smacro(ctx, mname, casesense, nparam, macro_start); + s = define_smacro(ctx, mname, casesense, nparam, macro_start); + + if (have_eval_params) { + /* Create evaluated parameters table */ + bool is_eval = false; + + nasm_newn(s->eval_param, nparam); + list_for_each(tt, param_start) { + if (is_smac_param(tt->type)) + s->eval_param[smac_nparam(tt->type)] = is_eval; + is_eval = tok_is_(tt, "="); + } + } + + free_tlist(origline); return DIRECTIVE_FOUND; + } case PP_UNDEF: tline = tline->next; @@ -3239,10 +3308,10 @@ issue_error: free_tlist(origline); return DIRECTIVE_FOUND; - case PP_DEFSTR: case PP_IDEFSTR: - casesense = (i == PP_DEFSTR); - + casesense = false; + /* fall through */ + case PP_DEFSTR: tline = tline->next; skip_white_(tline); tline = expand_id(tline); @@ -3250,7 +3319,7 @@ issue_error: (tline->type != TOK_PREPROC_ID || tline->text[1] != '$'))) { nasm_error(ERR_NONFATAL, "`%s' expects a macro identifier", - pp_directives[i]); + dname); free_tlist(origline); return DIRECTIVE_FOUND; } @@ -3264,11 +3333,7 @@ issue_error: tline = delete_Token(tline); p = detoken(tline, false); - macro_start = nasm_malloc(sizeof(*macro_start)); - macro_start->next = NULL; - macro_start->text = nasm_quote(p, strlen(p)); - macro_start->type = TOK_STRING; - macro_start->a.mac = NULL; + macro_start = make_tok_qstr(p); nasm_free(p); /* @@ -3280,10 +3345,10 @@ issue_error: free_tlist(origline); return DIRECTIVE_FOUND; - case PP_DEFTOK: case PP_IDEFTOK: - casesense = (i == PP_DEFTOK); - + casesense = false; + /* fall through */ + case PP_DEFTOK: tline = tline->next; skip_white_(tline); tline = expand_id(tline); @@ -3292,7 +3357,7 @@ issue_error: tline->text[1] != '$'))) { nasm_error(ERR_NONFATAL, "`%s' expects a macro identifier as first parameter", - pp_directives[i]); + dname); free_tlist(origline); return DIRECTIVE_FOUND; } @@ -3308,7 +3373,7 @@ issue_error: if (!tok_type_(t, TOK_STRING)) { nasm_error(ERR_NONFATAL, "`%s` requires string as second parameter", - pp_directives[i]); + dname); free_tlist(tline); free_tlist(origline); return DIRECTIVE_FOUND; @@ -3332,12 +3397,13 @@ issue_error: free_tlist(origline); return DIRECTIVE_FOUND; + case PP_IPATHSEARCH: + casesense = false; + /* fall through */ case PP_PATHSEARCH: { const char *found_path; - casesense = true; - tline = tline->next; skip_white_(tline); tline = expand_id(tline); @@ -3345,7 +3411,7 @@ issue_error: (tline->type != TOK_PREPROC_ID || tline->text[1] != '$'))) { nasm_error(ERR_NONFATAL, - "`%%pathsearch' expects a macro identifier as first parameter"); + "`%s' expects a macro identifier as first parameter", dname); free_tlist(origline); return DIRECTIVE_FOUND; } @@ -3360,14 +3426,14 @@ issue_error: if (!t || (t->type != TOK_STRING && t->type != TOK_INTERNAL_STRING)) { - nasm_error(ERR_NONFATAL, "`%%pathsearch' expects a file name"); + nasm_error(ERR_NONFATAL, "`%s' expects a file name", dname); free_tlist(tline); free_tlist(origline); return DIRECTIVE_FOUND; /* but we did _something_ */ } if (t->next) nasm_error(ERR_WARNING|ERR_PASS1, - "trailing garbage after `%%pathsearch' ignored"); + "trailing garbage after `%s' ignored", dname); p = t->text; if (t->type != TOK_INTERNAL_STRING) nasm_unquote(p, NULL); @@ -3375,11 +3441,7 @@ issue_error: inc_fopen(p, NULL, &found_path, INC_PROBE, NF_BINARY); if (!found_path) found_path = p; - macro_start = nasm_malloc(sizeof(*macro_start)); - macro_start->next = NULL; - macro_start->text = nasm_quote(found_path, strlen(found_path)); - macro_start->type = TOK_STRING; - macro_start->a.mac = NULL; + macro_start = make_tok_qstr(found_path); /* * We now have a macro name, an implicit parameter count of @@ -3392,9 +3454,10 @@ issue_error: return DIRECTIVE_FOUND; } + case PP_ISTRLEN: + casesense = false; + /* fall through */ case PP_STRLEN: - casesense = true; - tline = tline->next; skip_white_(tline); tline = expand_id(tline); @@ -3402,7 +3465,7 @@ issue_error: (tline->type != TOK_PREPROC_ID || tline->text[1] != '$'))) { nasm_error(ERR_NONFATAL, - "`%%strlen' expects a macro identifier as first parameter"); + "`%s' expects a macro identifier as first parameter", dname); free_tlist(origline); return DIRECTIVE_FOUND; } @@ -3417,16 +3480,13 @@ issue_error: /* t should now point to the string */ if (!tok_type_(t, TOK_STRING)) { nasm_error(ERR_NONFATAL, - "`%%strlen` requires string as second parameter"); + "`%s` requires string as second parameter", dname); free_tlist(tline); free_tlist(origline); return DIRECTIVE_FOUND; } - macro_start = nasm_malloc(sizeof(*macro_start)); - macro_start->next = NULL; - make_tok_num(macro_start, nasm_unquote(t->text, NULL)); - macro_start->a.mac = NULL; + macro_start = make_tok_num(nasm_unquote(t->text, NULL)); /* * We now have a macro name, an implicit parameter count of @@ -3438,9 +3498,10 @@ issue_error: free_tlist(origline); return DIRECTIVE_FOUND; + case PP_ISTRCAT: + casesense = false; + /* fall through */ case PP_STRCAT: - casesense = true; - tline = tline->next; skip_white_(tline); tline = expand_id(tline); @@ -3448,7 +3509,7 @@ issue_error: (tline->type != TOK_PREPROC_ID || tline->text[1] != '$'))) { nasm_error(ERR_NONFATAL, - "`%%strcat' expects a macro identifier as first parameter"); + "`%s' expects a macro identifier as first parameter", dname); free_tlist(origline); return DIRECTIVE_FOUND; } @@ -3471,7 +3532,7 @@ issue_error: /* else fall through */ default: nasm_error(ERR_NONFATAL, - "non-string passed to `%%strcat' (%d)", t->type); + "non-string passed to `%s': %s", dname, t->text); free_tlist(tline); free_tlist(origline); return DIRECTIVE_FOUND; @@ -3491,21 +3552,21 @@ issue_error: * zero, and a numeric token to use as an expansion. Create * and store an SMacro. */ - macro_start = new_Token(NULL, TOK_STRING, NULL, 0); - macro_start->text = nasm_quote(pp, len); + macro_start = make_tok_qstr(pp); nasm_free(pp); define_smacro(ctx, mname, casesense, 0, macro_start); free_tlist(tline); free_tlist(origline); return DIRECTIVE_FOUND; + case PP_ISUBSTR: + casesense = false; + /* fall through */ case PP_SUBSTR: { int64_t start, count; size_t len; - casesense = true; - tline = tline->next; skip_white_(tline); tline = expand_id(tline); @@ -3513,7 +3574,7 @@ issue_error: (tline->type != TOK_PREPROC_ID || tline->text[1] != '$'))) { nasm_error(ERR_NONFATAL, - "`%%substr' expects a macro identifier as first parameter"); + "`%s' expects a macro identifier as first parameter", dname); free_tlist(origline); return DIRECTIVE_FOUND; } @@ -3530,41 +3591,41 @@ issue_error: /* t should now point to the string */ if (!tok_type_(t, TOK_STRING)) { nasm_error(ERR_NONFATAL, - "`%%substr` requires string as second parameter"); + "`%s' requires string as second parameter", dname); free_tlist(tline); free_tlist(origline); return DIRECTIVE_FOUND; } - tt = t->next; - tptr = &tt; + pps.tptr = t->next; + pps.ntokens = -1; tokval.t_type = TOKEN_INVALID; - evalresult = evaluate(ppscan, tptr, &tokval, NULL, pass, NULL); + evalresult = evaluate(ppscan, &pps, &tokval, NULL, pass, NULL); if (!evalresult) { free_tlist(tline); free_tlist(origline); return DIRECTIVE_FOUND; } else if (!is_simple(evalresult)) { - nasm_error(ERR_NONFATAL, "non-constant value given to `%%substr`"); + nasm_error(ERR_NONFATAL, "non-constant value given to `%s'", dname); free_tlist(tline); free_tlist(origline); return DIRECTIVE_FOUND; } start = evalresult->value - 1; - while (tok_type_(tt, TOK_WHITESPACE)) - tt = tt->next; - if (!tt) { + while (tok_type_(pps.tptr, TOK_WHITESPACE)) + pps.tptr = pps.tptr->next; + if (!pps.tptr) { count = 1; /* Backwards compatibility: one character */ } else { tokval.t_type = TOKEN_INVALID; - evalresult = evaluate(ppscan, tptr, &tokval, NULL, pass, NULL); + evalresult = evaluate(ppscan, &pps, &tokval, NULL, pass, NULL); if (!evalresult) { free_tlist(tline); free_tlist(origline); return DIRECTIVE_FOUND; } else if (!is_simple(evalresult)) { - nasm_error(ERR_NONFATAL, "non-constant value given to `%%substr`"); + nasm_error(ERR_NONFATAL, "non-constant value given to `%s'", dname); free_tlist(tline); free_tlist(origline); return DIRECTIVE_FOUND; @@ -3584,11 +3645,8 @@ issue_error: if (!len || count < 0 || start >=(int64_t)len) start = -1, count = 0; /* empty string */ - macro_start = nasm_malloc(sizeof(*macro_start)); - macro_start->next = NULL; + macro_start = new_Token(NULL, TOK_STRING, NULL, 0); macro_start->text = nasm_quote((start < 0) ? "" : t->text + start, count); - macro_start->type = TOK_STRING; - macro_start->a.mac = NULL; /* * We now have a macro name, an implicit parameter count of @@ -3601,10 +3659,10 @@ issue_error: return DIRECTIVE_FOUND; } - case PP_ASSIGN: case PP_IASSIGN: - casesense = (i == PP_ASSIGN); - + casesense = false; + /* fall through */ + case PP_ASSIGN: tline = tline->next; skip_white_(tline); tline = expand_id(tline); @@ -3612,8 +3670,7 @@ issue_error: (tline->type != TOK_PREPROC_ID || tline->text[1] != '$'))) { nasm_error(ERR_NONFATAL, - "`%%%sassign' expects a macro identifier", - (i == PP_IASSIGN ? "i" : "")); + "`%s' expects a macro identifier", dname); free_tlist(origline); return DIRECTIVE_FOUND; } @@ -3622,10 +3679,10 @@ issue_error: tline = expand_smacro(tline->next); last->next = NULL; - t = tline; - tptr = &t; + pps.tptr = tline; + pps.ntokens = -1; tokval.t_type = TOKEN_INVALID; - evalresult = evaluate(ppscan, tptr, &tokval, NULL, pass, NULL); + evalresult = evaluate(ppscan, &pps, &tokval, NULL, pass, NULL); free_tlist(tline); if (!evalresult) { free_tlist(origline); @@ -3638,16 +3695,12 @@ issue_error: if (!is_simple(evalresult)) { nasm_error(ERR_NONFATAL, - "non-constant value given to `%%%sassign'", - (i == PP_IASSIGN ? "i" : "")); - free_tlist(origline); + "non-constant value given to `%s'", dname); + free_tlist(origline); return DIRECTIVE_FOUND; - } + } - macro_start = nasm_malloc(sizeof(*macro_start)); - macro_start->next = NULL; - make_tok_num(macro_start, reloc_value(evalresult)); - macro_start->a.mac = NULL; + macro_start = make_tok_num(reloc_value(evalresult)); /* * We now have a macro name, an implicit parameter count of @@ -3669,7 +3722,7 @@ issue_error: tline = tline->next; skip_white_(tline); if (!tok_type_(tline, TOK_NUMBER)) { - nasm_error(ERR_NONFATAL, "`%%line' expects line number"); + nasm_error(ERR_NONFATAL, "`%s' expects line number", dname); free_tlist(origline); return DIRECTIVE_FOUND; } @@ -3679,7 +3732,7 @@ issue_error: if (tok_is_(tline, "+")) { tline = tline->next; if (!tok_type_(tline, TOK_NUMBER)) { - nasm_error(ERR_NONFATAL, "`%%line' expects line increment"); + nasm_error(ERR_NONFATAL, "`%s' expects line increment", dname); free_tlist(origline); return DIRECTIVE_FOUND; } @@ -3700,7 +3753,7 @@ issue_error: default: nasm_error(ERR_FATAL, "preprocessor directive `%s' not yet implemented", - pp_directives[i]); + dname); return DIRECTIVE_FOUND; } } @@ -4194,7 +4247,8 @@ static Token *expand_smacro(Token * tline) SMacro *head = NULL, *m; Token **params; int *paramsize; - unsigned int nparam, sparam; + Token *eparams; + unsigned int nparam, sparam, i; int brackets; Token *org_tline = tline; Context *ctx; @@ -4244,40 +4298,22 @@ again: * all, then think about checking for parameters if * necessary. */ - list_for_each(m, head) + list_for_each(m, head) { if (!mstrcmp(m->name, mname, m->casesense)) break; + } if (m) { mstart = tline; params = NULL; paramsize = NULL; + eparams = NULL; + if (m->nparam == 0) { /* - * Simple case: the macro is parameterless. Discard the - * one token that the macro call took, and push the - * expansion back on the to-do stack. + * Simple case: the macro is parameterless. + * Nothing to parse; just drop the macro token itself. */ - if (!m->expansion) { - if (!strcmp("__FILE__", m->name)) { - const char *file = src_get_fname(); - /* nasm_free(tline->text); here? */ - tline->text = nasm_quote(file, strlen(file)); - tline->type = TOK_STRING; - continue; - } - if (!strcmp("__LINE__", m->name)) { - nasm_free(tline->text); - make_tok_num(tline, src_get_linnum()); - continue; - } - if (!strcmp("__BITS__", m->name)) { - nasm_free(tline->text); - make_tok_num(tline, globalbits); - continue; - } - tline = delete_Token(tline); - continue; - } + tline = tline->next; } else { /* * Complicated case: at least one macro with this name @@ -4386,26 +4422,63 @@ again: white = 0; } /* parameter loop */ nparam++; + while (m && (m->nparam != nparam || - mstrcmp(m->name, mname, - m->casesense))) + mstrcmp(m->name, mname, m->casesense))) m = m->next; - if (!m) + if (!m) { nasm_error(ERR_WARNING|ERR_PASS1|WARN_MNP, "macro `%s' exists, " "but not taking %d parameters", mstart->text, nparam); + } else if (m->eval_param) { + struct ppscan pps; + struct tokenval tokval; + expr *evalresult; + + /* Evaluate parameters if applicable */ + for (i = 0; i < nparam; i++) { + if (!m->eval_param[i]) + continue; + + pps.tptr = params[i]; + pps.ntokens = paramsize[i]; + tokval.t_type = TOKEN_INVALID; + evalresult = evaluate(ppscan, &pps, &tokval, + NULL, pass, NULL); + if (!evalresult) + continue; + + if (tokval.t_type) { + nasm_error(ERR_NONFATAL, + "invalid expression in parameter %d of macro `%s'", i, m->name); + continue; + } + + if (!is_simple(evalresult)) { + nasm_error(ERR_NONFATAL, + "non-constant expression in parameter %d of macro `%s'", i, m->name); + continue; + } + params[i] = make_tok_num(reloc_value(evalresult)); + params[i]->next = eparams; + eparams = params[i]; + paramsize[i] = 1; + } + } } } if (m && m->in_progress) m = NULL; - if (!m) { /* in progess or didn't find '(' or wrong nparam */ + if (!m) { + /* in progress or didn't find '(' or wrong nparam */ /* * Design question: should we handle !tline, which * indicates missing ')' here, or expand those * macros anyway, which requires the (t) test a few * lines down? */ + free_tlist(eparams); nasm_free(params); nasm_free(paramsize); tline = mstart; @@ -4416,6 +4489,8 @@ again: * following tokens. We also start by pushing an SMAC_END * token for the cycle removal. */ + Token *expansion; + t = tline; if (t) { tline = t->next; @@ -4424,30 +4499,26 @@ again: tt = new_Token(tline, TOK_SMAC_END, NULL, 0); tt->a.mac = m; m->in_progress = true; + if (unlikely(m->magic)) + expansion = m->e.magic(m, params, paramsize); + else + expansion = m->e.expansion; + tline = tt; - list_for_each(t, m->expansion) { - if (t->type >= TOK_SMAC_PARAM) { + list_for_each(t, expansion) { + if (is_smac_param(t->type)) { Token *pcopy = tline, **ptail = &pcopy; Token *ttt, *pt; int i; - ttt = params[t->type - TOK_SMAC_PARAM]; - i = paramsize[t->type - TOK_SMAC_PARAM]; + ttt = params[smac_nparam(t->type)]; + i = paramsize[smac_nparam(t->type)]; while (--i >= 0) { + nasm_assert(ttt); pt = *ptail = new_Token(tline, ttt->type, ttt->text, 0); ptail = &pt->next; ttt = ttt->next; - if (!ttt && i > 0) { - /* - * FIXME: Need to handle more gracefully, - * exiting early on agruments analysis. - */ - nasm_error(ERR_FATAL, - "macro `%s' expects %d args", - mstart->text, - (int)paramsize[t->type - TOK_SMAC_PARAM]); - } } tline = pcopy; } else if (t->type == TOK_PREPROC_Q) { @@ -4469,6 +4540,9 @@ again: nasm_free(params); nasm_free(paramsize); free_tlist(mstart); + free_tlist(eparams); + if (m->magic) + free_tlist(expansion); expanded = true; continue; /* main token loop */ } @@ -4976,19 +5050,100 @@ static void pp_verror(int severity, const char *fmt, va_list arg) } } +static Token *stdmac_file(const SMacro *s, Token **params, int *paramsize) +{ + (void)s; + (void)params; + (void)paramsize; + + return make_tok_qstr(src_get_fname()); +} + +static Token *stdmac_line(const SMacro *s, Token **params, int *paramsize) +{ + (void)s; + (void)params; + (void)paramsize; + + return make_tok_num(src_get_linnum()); +} + +static Token *stdmac_bits(const SMacro *s, Token **params, int *paramsize) +{ + (void)s; + (void)params; + (void)paramsize; + + return make_tok_num(globalbits); +} + +static Token *stdmac_ptr(const SMacro *s, Token **params, int *paramsize) +{ + const char *name; + + (void)s; + (void)params; + (void)paramsize; + + switch (globalbits) { + case 16: + name = "word"; + break; + case 32: + name = "dword"; + break; + case 64: + name = "qword"; + break; + default: + panic(); + } + return new_Token(NULL, TOK_ID, name, 0); +} + +static Token *stdmac_pass(const SMacro *s, Token **params, int *paramsize) +{ + (void)s; + (void)params; + (void)paramsize; + + return make_tok_num(pass); +} + +/* Add magic standard macros */ +struct magic_macros { + const char *name; + int nparams; + Token *(*func)(const SMacro *s, Token **params, int *paramsize); +}; +static const struct magic_macros magic_macros[] = +{ + { "__FILE__", 0, stdmac_file }, + { "__LINE__", 0, stdmac_line }, + { "__BITS__", 0, stdmac_bits }, + { "__PTR__", 0, stdmac_ptr }, + { "__PASS__", 0, stdmac_pass }, + { NULL, 0, NULL } +}; + +static void pp_add_magic_stdmac(void) +{ + const struct magic_macros *m; + SMacro *s; + + for (m = magic_macros; m->name; m++) { + s = define_smacro(NULL, m->name, true, m->nparams, NULL); + s->magic = true; + s->e.magic = m->func; + } +} + static void pp_reset(const char *file, int apass, StrList **deplist) { - Token *t; - cstk = NULL; - istk = nasm_malloc(sizeof(Include)); - istk->next = NULL; - istk->conds = NULL; - istk->expansion = NULL; - istk->mstk = NULL; + nasm_new(istk); istk->fp = nasm_open_read(file, NF_TEXT); - istk->fname = NULL; src_set(0, file); istk->lineinc = 1; if (!istk->fp) @@ -4999,6 +5154,8 @@ pp_reset(const char *file, int apass, StrList **deplist) init_macros(); unique = 0; + pp_add_magic_stdmac(); + if (tasm_compatible_mode) pp_add_stdmac(nasm_stdmac_tasm); @@ -5022,17 +5179,6 @@ pp_reset(const char *file, int apass, StrList **deplist) dephead = deplist; nasm_add_string_to_strlist(dephead, file); - - /* - * Define the __PASS__ macro. This is defined here unlike - * all the other builtins, because it is special -- it varies between - * passes. - */ - t = nasm_malloc(sizeof(*t)); - t->next = NULL; - make_tok_num(t, apass); - t->a.mac = NULL; - define_smacro(NULL, "__PASS__", true, 0, t); } static void pp_init(void) @@ -5437,12 +5583,18 @@ static void pp_extra_stdmac(macros_t *macros) extrastdmac = macros; } -static void make_tok_num(Token * tok, int64_t val) +static Token *make_tok_num(int64_t val) { char numbuf[32]; - snprintf(numbuf, sizeof(numbuf), "%"PRId64"", val); - tok->text = nasm_strdup(numbuf); - tok->type = TOK_NUMBER; + int len = snprintf(numbuf, sizeof(numbuf), "%"PRId64"", val); + return new_Token(NULL, TOK_NUMBER, numbuf, len); +} + +static Token *make_tok_qstr(const char *str) +{ + Token *t = new_Token(NULL, TOK_STRING, NULL, 0); + t->text = nasm_quote_cstr(str); + return t; } static void pp_list_one_macro(MMacro *m, int severity) diff --git a/asm/quote.c b/asm/quote.c index 75a93726..813141b3 100644 --- a/asm/quote.c +++ b/asm/quote.c @@ -216,6 +216,14 @@ static char *emit_utf8(char *q, int32_t v) return q; } +/* + * Quote a C string + */ +char *nasm_quote_cstr(const char *str) +{ + return nasm_quote(str, strlen(str)); +} + /* * Do an *in-place* dequoting of the specified string, returning the * resulting length (which may be containing embedded nulls.) diff --git a/asm/quote.h b/asm/quote.h index 2d8ce87b..ed934f25 100644 --- a/asm/quote.h +++ b/asm/quote.h @@ -37,6 +37,7 @@ #include "compiler.h" char *nasm_quote(const char *str, size_t len); +char *nasm_quote_cstr(const char *str); size_t nasm_unquote(char *str, char **endptr); char *nasm_skip_string(char *str); diff --git a/macros/standard.mac b/macros/standard.mac index a6c50b68..7bc5af67 100644 --- a/macros/standard.mac +++ b/macros/standard.mac @@ -1,6 +1,6 @@ ;; -------------------------------------------------------------------------- ;; -;; Copyright 1996-2016 The NASM Authors - All Rights Reserved +;; Copyright 1996-2019 The NASM Authors - All Rights Reserved ;; See the file AUTHORS included with the NASM distribution for ;; the specific copyright holders. ;; @@ -49,12 +49,6 @@ STD: nasm ; here, not all of them are: the user-level form of a format-specific ; directive should be defined in the module for that directive. -; These three need to be defined, though the actual definitions will -; be constantly updated during preprocessing. -%define __FILE__ -%define __LINE__ -%define __BITS__ - %define __SECT__ ; it ought to be defined, even if as nothing %imacro section 1+.nolist diff --git a/test/evalmacro.asm b/test/evalmacro.asm new file mode 100644 index 00000000..0dd668dd --- /dev/null +++ b/test/evalmacro.asm @@ -0,0 +1,4 @@ +%define tonum(=x) x + + dd tonum(1+3) + dd tonum(5*7)