0
0
mirror of https://github.com/netwide-assembler/nasm.git synced 2025-10-10 00:25:06 -04:00
Files
nasm/editors/nasmtok.pl
H. Peter Anvin 534edcba04 editors: add JSON output
Add JSON formatted editor help output as a way to help more general
editor support code.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
2025-10-02 14:18:57 -07:00

410 lines
9.7 KiB
Perl
Executable File

#!/usr/bin/perl
#
# Automatically produce some tables useful for a NASM major mode
#
use integer;
use strict;
use File::Spec;
use File::Find;
my $format = 'el';
if ($ARGV[0] =~ /^-(\S+)$/) {
$format = $1;
shift @ARGV;
}
my($outfile, $srcdir, $objdir) = @ARGV;
if (!defined($outfile)) {
die "Usage: $0 [-format] outfile srcdir objdir\n";
}
my @vpath;
$srcdir = $srcdir || File::Spec->curdir();
$objdir = $objdir || $srcdir;
push(@vpath, $objdir) if ($objdir ne $srcdir);
push(@vpath, $srcdir);
my %tokens = (); # Token lists per category
my %token_category = (); # Tokens to category map
sub xpush($@) {
my $ref = shift @_;
$$ref = [] unless (defined($$ref));
return push(@$$ref, @_);
}
# Search for a file, and return a file handle if successfully opened
sub open_vpath($$) {
my($mode, $file) = @_;
my %tried;
# For simplicity, allow filenames to be specified
# with Unix / syntax internally
$file = File::Spec->catfile(split(/\//, $file));
foreach my $d (@vpath) {
my $fn = File::Spec->catfile($d, $file);
next if ($tried{$fn});
$tried{$fn}++;
my $fh;
return $fh if (open($fh, $mode, $fn));
}
return undef;
}
sub must_open($) {
my($file) = @_;
my $fh = open_vpath('<', $file);
return $fh if (defined($fh));
die "$0:$file: $!\n";
}
# Combine some specific token types
my %override = (
'brcconst' => 'special-constant',
'id' => 'special',
'float' => 'function',
'floatize' => 'function',
'strfunc' => 'function',
'ifunc' => 'function',
'insn' => 'instruction',
'reg' => 'register',
'seg' => 'special',
'wrt' => 'special',
'times' => 'special');
sub addtoken($@) {
my $type = shift @_;
foreach my $token (@_) {
unless (defined($token_category{$token})) {
$type = $override{$type} if (defined($override{$type}));
xpush(\$tokens{$type}, $token);
$token_category{$token} = $type;
}
}
}
sub read_tokhash_c($) {
my($tokhash_c) = @_;
my $th = must_open($tokhash_c);
my $l;
my $tokendata = 0;
while (defined($l = <$th>)) {
if ($l =~ /\bstruct tokendata tokendata\[/) {
$tokendata = 1;
next;
} elsif (!$tokendata) {
next;
}
last if ($l =~ /\}\;/);
if ($l =~ /^\s*\{\s*\"(.*?)\",.*?,\s*TOKEN_(\w+),(.*)\}/) {
my $token = $1;
my $type = lc($2);
my $flags = $3;
$token = "{${token}}" if ($flags =~ /\bTFLAG_BRC\b/);
# Parametric token: omit the actual parameter(s)
$token =~ s/^(\{[\w-]+=).+(\})$/$1$2/;
if ($token !~ /^(\{[\w-]+=?\}|\w+)$/) {
$type = 'operator';
} elsif ($token =~ /^__\?masm_.*\?__$/) {
next;
}
addtoken($type, $token);
if ($token =~ /^__\?(.*)\?__$/) {
# Also encode the "user" (macro) form without __?...?__
addtoken($type, $1);
}
}
}
close($th);
}
sub read_pptok_c($) {
my($pptok_c) = @_;
my $pt = must_open($pptok_c);
my $l;
my $pp_dir = 0;
while (defined($l = <$pt>)) {
if ($l =~ /\bpp_directives\[/) {
$pp_dir = 1;
next;
} elsif (!$pp_dir) {
next;
}
last if ($l =~ /\}\;/);
if ($l =~ /^\s*\"(.*?)\"/) {
addtoken('pp-directive', $1);
}
}
close($pt);
}
sub read_directiv_dat($) {
my($directiv_dat) = @_;
my $dd = must_open($directiv_dat);
my $l;
my $directiv = 0;
while (defined($l = <$dd>)) {
if ($l =~ /^\; ---.*?(pragma)?/) {
$directiv = ($1 ne 'pragma');
next;
} elsif (!$directiv) {
next;
}
if ($l =~ /^\s*(\w+)/) {
addtoken('directive', $1);
}
}
close($dd);
}
my %version;
sub read_version($) {
my($vfile) = @_;
my $v = must_open($vfile);
while (defined(my $vl = <$v>)) {
if ($vl =~ /^NASM_(\w+)=(\S+)\s*$/) {
$version{lc($1)} = $2;
}
}
close($v);
}
# This is called from the directory search in read_macros(), so
# don't use must_open() here.
sub read_macro_file($) {
my($file) = @_;
open(my $fh, '<', $file) or die "$0:$file: $!\n";
while (defined(my $l = <$fh>)) {
next unless ($l =~ /^\s*\%/);
my @f = split(/\s+/, $l);
next unless (scalar(@f) >= 2);
next if ($f[1] =~ /^[\%\$][^\(]+$/); # Internal use only
$f[1] =~ s/\(.*$//; # Strip argument list if any
$f[1] = lc($f[1]) if ($f[0] =~ /^\%i/);
if ($f[0] =~ /^\%(i)?(assign|defalias|define|defstr|substr|xdefine)\b/) {
addtoken('smacro', $f[1]);
} elsif ($f[0] =~ /^\%i?macro$/) {
addtoken('mmacro', $f[1]);
}
}
close($fh);
}
sub read_macros(@) {
my %visited;
my @dirs = (File::Spec->curdir(), qw(macros output editors));
@dirs = map { my $od = $_; map { File::Spec->catdir($od, $_) } @dirs } @_;
foreach my $dir (@dirs) {
next if ($visited{$dir});
$visited{$dir}++;
next unless opendir(my $dh, $dir);
while (defined(my $fn = readdir($dh))) {
next unless ($fn =~ /\.mac$/i);
read_macro_file(File::Spec->catfile($dir, $fn));
}
closedir($dh);
}
}
# Handle special tokens which may not have been picked up by the automatic
# process, because they depend on the build parameters, or are buried
# deep in C code...
sub add_special_cases() {
# Not defined in non-snapshot builds
addtoken('smacro', '__NASM_SNAPSHOT__', '__?NASM_SNAPSHOT?__');
}
sub make_lines($$@) {
my $maxline = shift @_;
my $indent = shift @_;
# The first line isn't explicitly indented and the last line
# doesn't end in "\n"; assumed the surrounding formatter wants
# do control that
my $linepos = 0;
my $linewidth = $maxline - $indent;
my $line = '';
my @lines = ();
foreach my $w (@_) {
my $l = length($w);
if ($linepos > 0 && $linepos+$l+1 >= $linewidth) {
$line .= "\n" . (' ' x $indent);
push(@lines, $line);
$linepos = 0;
$line = '';
}
if ($linepos > 0) {
$line .= ' ';
$linepos++;
}
$line .= $w;
$linepos += $l;
}
if ($linepos > 0) {
push(@lines, $line);
}
return @lines;
}
sub quote_for_emacs(@) {
return map { s/[\\\"\']/\\$1/g; '"'.$_.'"' } @_;
}
# Emacs LISP
sub write_output_el {
my($out, $outfile, $file) = @_;
my $whoami = 'NASM '.$version{'ver'};
print $out ";;; ${file} --- lists of NASM assembler tokens\n\n";
print $out ";;; Commentary:\n\n";
print $out ";; This file contains list of tokens from the NASM x86\n";
print $out ";; assembler, automatically extracted from ${whoami}.\n";
print $out ";;\n";
print $out ";; This file is intended to be (require)d from a `nasm-mode\'\n";
print $out ";; major mode definition.\n";
print $out ";;\n";
print $out ";; Tokens that are only recognized inside curly braces are\n";
print $out ";; noted as such. Tokens of the form {xxx=} are parametric\n";
print $out ";; tokens, where the token may contain additional text on\n";
print $out ";; the right side of the = sign. For example,\n";
print $out ";; {dfv=} should be matched by {dfv=cf,zf}.\n";
print $out "\n";
print $out ";;; Code:\n";
my @types = sort keys(%tokens);
# Write the individual token type lists
foreach my $type (sort keys(%tokens)) {
print $out "\n(defconst nasm-${type}\n";
print $out " \'(";
print $out make_lines(78, 4, quote_for_emacs(sort @{$tokens{$type}}));
print $out ")\n";
print $out " \"${whoami} ${type} tokens for `nasm-mode\'.\")\n";
}
# Generate a list of all the token type lists.
print $out "\n(defconst nasm-token-lists\n";
print $out " \'(";
print $out make_lines(78, 4, map { "'nasm-$_" } sort keys(%tokens));
print $out ")\n";
print $out " \"List of all ${whoami} token type lists.\")\n";
# The NASM token extracted version
printf $out "\n(defconst nasm-token-version %s\n",
quote_for_emacs($version{'ver'});
print $out " \"Version of NASM from which tokens were extracted,\n";
print $out "as a human-readable string.\")\n";
printf $out "\n(defconst nasm-token-version-id #x%08x\n",
$version{'version_id'};
print $out " \"Version of NASM from which tokens were extracted,\n";
print $out "as numeric identifier, for comparisons. Equivalent to the\n";
print $out "__?NASM_VERSION_ID?__ NASM macro value.\")\n";
printf $out "\n(defconst nasm-token-version-snapshot %s\n",
$version{'snapshot'} || 'nil';
print $out " \"Daily NASM snapshot build from which tokens were extracted,\n";
print $out "as a decimal number in YYYYMMDD format, or nil if not a\n";
print $out "daily snapshot build.\")\n";
# Footer
print $out "\n(provide 'nasmtok)\n";
print $out ";;; ${file} ends here\n";
return 0;
}
# JSON
sub write_output_json {
use JSON;
my($out, $outfile, $file) = @_;
my $whoami = 'NASM '.$version{'ver'};
my $json = JSON->new;
$json = $json->ascii(1)->canonical(1);
my %ver;
foreach my $vn (keys(%version)) {
my $vv = $version{$vn};
next if ($vn eq 'version_xid');
$vn =~ s/_ver$//;
$vn =~ s/^version_//;
$vv = $vv + 0 if ($vn ne 'ver');
$ver{$vn} = $vv;
}
print $out $json->encode({
'$comment' => "NASM syntax information extracted from ${whoami}",
'tokens' => \%tokens, 'version' => \%ver});
print $out "\n";
return 0;
}
sub write_output($$) {
my($format, $outfile) = @_;
my %formats = (
'el' => \&write_output_el,
'json' => \&write_output_json
);
my $outfunc = $formats{$format};
if (!defined($outfunc)) {
die "$0: unknown output format: $format\n";
}
open(my $out, '>', $outfile)
or die "$0:$outfile: $!\n";
my($vol,$dir,$file) = File::Spec->splitpath($outfile);
my $err = $outfunc->($out, $outfile, $file);
close($out);
if ($err) {
unlink($outfile);
die "$0:$outfile: error writing output\n";
}
}
add_special_cases();
read_tokhash_c('asm/tokhash.c');
read_pptok_c('asm/pptok.c');
read_directiv_dat('asm/directiv.dat');
read_version('version.mak');
read_macros(@vpath);
write_output($format, $outfile);