nasm/macros/macros.pl

#!/usr/bin/perl
## --------------------------------------------------------------------------
##
##   Copyright 1996-2025 The NASM Authors - All Rights Reserved
##   See the file AUTHORS included with the NASM distribution for
##   the specific copyright holders.
##
##   Redistribution and use in source and binary forms, with or without
##   modification, are permitted provided that the following
##   conditions are met:
##
##   * Redistributions of source code must retain the above copyright
##     notice, this list of conditions and the following disclaimer.
##   * Redistributions in binary form must reproduce the above
##     copyright notice, this list of conditions and the following
##     disclaimer in the documentation and/or other materials provided
##     with the distribution.
##
##     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
##     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
##     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
##     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
##     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
##     CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
##     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
##     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
##     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
##     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
##     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
##     OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
##     EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
##
## --------------------------------------------------------------------------

#
# macros.pl   produce macros.c from standard.mac
#

use strict;
use integer;
use bytes;
use Compress::Zlib;

require 'phash.ph';

my $fname;
my $line = 0;
my @pname;

my $dump_text = 1;

#
# Print out a string as a byte array
#
sub print_data($$) {
    my($o, $s) = @_;
    my $perline = 8;

    for (my $ix = 0; $ix < length($s); $ix += $perline) {
	my $ss = substr($s, $ix, $perline);
	print $o '    ';
	foreach my $b (unpack('C*', $ss)) {
	    printf $o '0x%02x,', $b;
	}
	print $o "\n";
    }
    print $o "};\n";
}

#
# Prefix a string with its length in uleb128 encoding
#
sub uleb128($)
{
    my($n) = @_;
    my $o = '';

    do {
	my $nn = $n >> 7;
	$o .= pack('C', ($n & 127) | ($nn ? 128 : 0));
	$n = $nn;
    } while ($n);

    return $o;
}

sub addstringlen($)
{
    my($s) = @_;

    my $l = length($s);
    return $l ? uleb128($l).$s : '';
}

sub init_mac() {
    return {
	'name'   => undef,
        'lines'  => [],
	'ifdefs' => [],
	'static' => 0
    };
}

#
# Output a data blob and a data structure
#
sub flush_mac($$)
{
    my($out, $mac) = @_;

    my $init = init_mac();

    return $init if (!defined($mac));

    my $name = $mac->{'name'};
    return $init if (!$name);

    printf $out "\n\n/* --- from %s --- */\n\n", $mac->{'fname'};

    my $ifdefs = $mac->{'ifdefs'};
    if (scalar(@$ifdefs)) {
	print $out '#if', join(' ||', map { " defined($_)" } @$ifdefs), "\n";
    }

    if ($dump_text) {
	print $out "/\*\n";
	print $out map { " * $_\n" } @{$mac->{'lines'}};
	print $out " \*/\n\n";
    }

    my $data;
    foreach my $l (@{$mac->{'lines'}}) {
	$data .= addstringlen($l);
    }
    $data .= pack('C', 0);	# End of blob marker

    my $dlen = length($data);
    my $zblob = Compress::Zlib::compress($data, 9);
    my $zlen = length($zblob);

    if ($zlen >= $dlen) {
	$zblob = $data;
	$zlen = $dlen;
    }

    printf $out "static const unsigned char %s_blob[%d] = {\n", $name, $zlen;
    print_data($out, $zblob);

    printf $out "\n%sconst macros_t %s = {\n    %d, %d, %s_blob\n};\n",
	$mac->{'static'} ? 'static ' : '',
	$name, $dlen, $zlen, $name;

    print $out "#endif\n" if (scalar(@$ifdefs));
    return $init;
}

#
# Generate macros.c
#
my $out;

open($out, '>', 'macros/macros.c') or die "unable to open macros.c\n";

print $out "/*\n";
print $out " * Do not edit - this file auto-generated by macros.pl from:\n";
print $out " *   ", join("\n *   ", @ARGV), "\n";
print $out " */\n";
print $out "\n";
print $out "#include \"macros.h\"\n";
print $out "#include \"nasmlib.h\"\n";
print $out "#include \"hashtbl.h\"\n";
print $out "#include \"outform.h\"\n";

my $mac = undef;
my $npkg = 0;
my @pkg_list   = ();
my %pkg_number = ();
my $pkg;

foreach my $args ( @ARGV ) {
    my @file_list = glob ( $args );
    foreach $fname ( @file_list ) {
        open(INPUT,'<', $fname) or die "$0: $fname: $!\n";
	while (<INPUT>) {
	    $line++;
	    chomp;
	    while (/^(.*)\\$/) {
		$_ = $1;
		$_ .= <INPUT>;
		chomp;
		$line++;
	    }

	    s/^\s*(([^\'\"\;]|\"[^\"]*\"|\'[^\']*\')*?)\s*(\;.*)?$/$1/;
	    s/\s+/ /g;		# XXX: wrong if strings have whitespace
	    next if ($_ eq '');

	    if (m/^OUT:\s*(\S.*)$/) {
		my @out_alias = split(/\s+/, $1);
		undef $pkg;
		$mac = flush_mac($out, $mac);
		push(@{$mac->{'ifdefs'}}, map { "OF_\U$_\E" } @out_alias);
		$mac->{'name'} = $out_alias[0].'_stdmac';
		$mac->{'fname'} = $fname;
	    } elsif (m/^STD:\s*(\S+)$/) {
		undef $pkg;
		my $std = $1;
		$mac = flush_mac($out, $mac);
		$mac->{'name'} = 'nasm_stdmac_' . $std;
		$mac->{'fname'} = $fname;
	    } elsif (m/^USE:\s*(\S+)$/) {
		$pkg = $1;
		if (defined($pkg_number{$pkg})) {
		    die "$0: $fname: duplicate package: $pkg\n";
		}
		$mac = flush_mac($out, $mac);
		$mac->{'name'} = 'nasm_usemac_' . $pkg;
		$mac->{'static'} = 1;
		$mac->{'fname'} = $fname;
		push(@pkg_list, $pkg);
		$pkg_number{$pkg} = $npkg++;
		push(@{$mac->{'lines'}},
		     "\%define __?USE_\U$pkg\E?__",
		     "\%defalias __USE_\U$pkg\E__ __?USE\U$pkg\E?__");
	    } else {
		if (!defined($mac)) {
		    die "$0: $fname: macro declarations outside a known block\n";
		}

		push(@{$mac->{'lines'}}, $_);
	    }
	}
        close(INPUT);
    }
}


$mac = flush_mac($out, $mac);

my @hashinfo = gen_perfect_hash(\%pkg_number);
if (!@hashinfo) {
    die "$0: no hash found\n";
}
# Paranoia...
verify_hash_table(\%pkg_number, \@hashinfo);
my ($n, $sv, $g) = @hashinfo;
die if ($n & ($n-1));
$n <<= 1;

printf $out "\n\nconst unsigned int use_package_count = %d;\n\n", $npkg;

print $out "const struct use_package *nasm_find_use_package(const char *name)\n";
print $out "{\n";
print $out "    static const struct use_package packages[$npkg] = {\n";
my $ix = 0;
foreach $pkg (@pkg_list) {
    printf $out "        { \"%s\", \&nasm_usemac_%s, %d },\n",
	$pkg, $pkg, $ix++;
}
print $out "    };\n";

# Put a large value in unused slots.  This makes it extremely unlikely
# that any combination that involves unused slot will pass the range test.
# This speeds up rejection of unrecognized tokens, i.e. identifiers.
print $out "#define INVALID_HASH_ENTRY (65535/3)\n";

print $out "    static const int16_t hashdata[$n] = {\n";
for (my $i = 0; $i < $n; $i++) {
    my $h = ${$g}[$i];
    print $out "        ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n";
}
print $out "    };\n";

print $out  "    uint32_t k1, k2;\n";
print $out  "    uint64_t crc;\n";
# For correct overflow behavior, "ix" should be unsigned of the same
# width as the hash arrays.
print $out  "    uint16_t ix;\n";
print $out  "\n";

printf $out "    crc = crc64i(UINT64_C(0x%08x%08x), name);\n",
    $$sv[0], $$sv[1];
printf $out "    k1 = ((uint32_t)crc & 0x%x) + 0;\n", $n-2;
printf $out "    k2 = ((uint32_t)(crc >> 32) & 0x%x) + 1;\n", $n-2;
print  $out "\n";
printf $out "    ix = hashdata[k1] + hashdata[k2];\n";
printf $out "    if (ix >= %d)\n", scalar(@pkg_list);
print $out  "        return NULL;\n";
print $out  "\n";
print $out  "    if (nasm_stricmp(packages[ix].package, name))\n";
print $out  "        return NULL;\n";
print $out  "\n";
print $out  "    return &packages[ix];\n";
print $out  "}\n";

close($out);