#!/usr/bin/perl # # Automatically produce some tables useful for a NASM major mode # use integer; use strict; use File::Spec; use File::Find; my $format = 'el'; if ($ARGV[0] =~ /^-(\S+)$/) { $format = $1; shift @ARGV; } my($outfile, $srcdir, $objdir) = @ARGV; if (!defined($outfile)) { die "Usage: $0 [-format] outfile srcdir objdir\n"; } my @vpath; $srcdir = $srcdir || File::Spec->curdir(); $objdir = $objdir || $srcdir; push(@vpath, $objdir) if ($objdir ne $srcdir); push(@vpath, $srcdir); my %tokens = (); # Token lists per category my %token_category = (); # Tokens to category map sub xpush($@) { my $ref = shift @_; $$ref = [] unless (defined($$ref)); return push(@$$ref, @_); } # Search for a file, and return a file handle if successfully opened sub open_vpath($$) { my($mode, $file) = @_; my %tried; # For simplicity, allow filenames to be specified # with Unix / syntax internally $file = File::Spec->catfile(split(/\//, $file)); foreach my $d (@vpath) { my $fn = File::Spec->catfile($d, $file); next if ($tried{$fn}); $tried{$fn}++; my $fh; return $fh if (open($fh, $mode, $fn)); } return undef; } sub must_open($) { my($file) = @_; my $fh = open_vpath('<', $file); return $fh if (defined($fh)); die "$0:$file: $!\n"; } # Combine some specific token types my %override = ( 'brcconst' => 'special-constant', 'id' => 'special', 'float' => 'function', 'floatize' => 'function', 'strfunc' => 'function', 'ifunc' => 'function', 'insn' => 'instruction', 'reg' => 'register', 'seg' => 'special', 'wrt' => 'special', 'times' => 'special'); sub addtoken($@) { my $type = shift @_; foreach my $token (@_) { unless (defined($token_category{$token})) { $type = $override{$type} if (defined($override{$type})); xpush(\$tokens{$type}, $token); $token_category{$token} = $type; } } } sub read_tokhash_c($) { my($tokhash_c) = @_; my $th = must_open($tokhash_c); my $l; my $tokendata = 0; while (defined($l = <$th>)) { if ($l =~ /\bstruct tokendata tokendata\[/) { $tokendata = 1; next; } elsif (!$tokendata) { next; } last if ($l =~ /\}\;/); if ($l =~ /^\s*\{\s*\"(.*?)\",.*?,\s*TOKEN_(\w+),(.*)\}/) { my $token = $1; my $type = lc($2); my $flags = $3; $token = "{${token}}" if ($flags =~ /\bTFLAG_BRC\b/); # Parametric token: omit the actual parameter(s) $token =~ s/^(\{[\w-]+=).+(\})$/$1$2/; if ($token !~ /^(\{[\w-]+=?\}|\w+)$/) { $type = 'operator'; } elsif ($token =~ /^__\?masm_.*\?__$/) { next; } addtoken($type, $token); if ($token =~ /^__\?(.*)\?__$/) { # Also encode the "user" (macro) form without __?...?__ addtoken($type, $1); } } } close($th); } sub read_pptok_c($) { my($pptok_c) = @_; my $pt = must_open($pptok_c); my $l; my $pp_dir = 0; while (defined($l = <$pt>)) { if ($l =~ /\bpp_directives\[/) { $pp_dir = 1; next; } elsif (!$pp_dir) { next; } last if ($l =~ /\}\;/); if ($l =~ /^\s*\"(.*?)\"/) { addtoken('pp-directive', $1); } } close($pt); } sub read_directiv_dat($) { my($directiv_dat) = @_; my $dd = must_open($directiv_dat); my $l; my $directiv = 0; while (defined($l = <$dd>)) { if ($l =~ /^\; ---.*?(pragma)?/) { $directiv = ($1 ne 'pragma'); next; } elsif (!$directiv) { next; } if ($l =~ /^\s*(\w+)/) { addtoken('directive', $1); } } close($dd); } my %version; sub read_version($) { my($vfile) = @_; my $v = must_open($vfile); while (defined(my $vl = <$v>)) { if ($vl =~ /^NASM_(\w+)=(\S+)\s*$/) { $version{lc($1)} = $2; } } close($v); } # This is called from the directory search in read_macros(), so # don't use must_open() here. sub read_macro_file($) { my($file) = @_; open(my $fh, '<', $file) or die "$0:$file: $!\n"; while (defined(my $l = <$fh>)) { next unless ($l =~ /^\s*\%/); my @f = split(/\s+/, $l); next unless (scalar(@f) >= 2); next if ($f[1] =~ /^[\%\$][^\(]+$/); # Internal use only $f[1] =~ s/\(.*$//; # Strip argument list if any $f[1] = lc($f[1]) if ($f[0] =~ /^\%i/); if ($f[0] =~ /^\%(i)?(assign|defalias|define|defstr|substr|xdefine)\b/) { addtoken('smacro', $f[1]); } elsif ($f[0] =~ /^\%i?macro$/) { addtoken('mmacro', $f[1]); } } close($fh); } sub read_macros(@) { my %visited; my @dirs = (File::Spec->curdir(), qw(macros output editors)); @dirs = map { my $od = $_; map { File::Spec->catdir($od, $_) } @dirs } @_; foreach my $dir (@dirs) { next if ($visited{$dir}); $visited{$dir}++; next unless opendir(my $dh, $dir); while (defined(my $fn = readdir($dh))) { next unless ($fn =~ /\.mac$/i); read_macro_file(File::Spec->catfile($dir, $fn)); } closedir($dh); } } # Handle special tokens which may not have been picked up by the automatic # process, because they depend on the build parameters, or are buried # deep in C code... sub add_special_cases() { # Not defined in non-snapshot builds addtoken('smacro', '__NASM_SNAPSHOT__', '__?NASM_SNAPSHOT?__'); } sub make_lines($$@) { my $maxline = shift @_; my $indent = shift @_; # The first line isn't explicitly indented and the last line # doesn't end in "\n"; assumed the surrounding formatter wants # do control that my $linepos = 0; my $linewidth = $maxline - $indent; my $line = ''; my @lines = (); foreach my $w (@_) { my $l = length($w); if ($linepos > 0 && $linepos+$l+1 >= $linewidth) { $line .= "\n" . (' ' x $indent); push(@lines, $line); $linepos = 0; $line = ''; } if ($linepos > 0) { $line .= ' '; $linepos++; } $line .= $w; $linepos += $l; } if ($linepos > 0) { push(@lines, $line); } return @lines; } sub quote_for_emacs(@) { return map { s/[\\\"\']/\\$1/g; '"'.$_.'"' } @_; } # Emacs LISP sub write_output_el { my($out, $outfile, $file) = @_; my $whoami = 'NASM '.$version{'ver'}; print $out ";;; ${file} --- lists of NASM assembler tokens\n\n"; print $out ";;; Commentary:\n\n"; print $out ";; This file contains list of tokens from the NASM x86\n"; print $out ";; assembler, automatically extracted from ${whoami}.\n"; print $out ";;\n"; print $out ";; This file is intended to be (require)d from a `nasm-mode\'\n"; print $out ";; major mode definition.\n"; print $out ";;\n"; print $out ";; Tokens that are only recognized inside curly braces are\n"; print $out ";; noted as such. Tokens of the form {xxx=} are parametric\n"; print $out ";; tokens, where the token may contain additional text on\n"; print $out ";; the right side of the = sign. For example,\n"; print $out ";; {dfv=} should be matched by {dfv=cf,zf}.\n"; print $out "\n"; print $out ";;; Code:\n"; my @types = sort keys(%tokens); # Write the individual token type lists foreach my $type (sort keys(%tokens)) { print $out "\n(defconst nasm-${type}\n"; print $out " \'("; print $out make_lines(78, 4, quote_for_emacs(sort @{$tokens{$type}})); print $out ")\n"; print $out " \"${whoami} ${type} tokens for `nasm-mode\'.\")\n"; } # Generate a list of all the token type lists. print $out "\n(defconst nasm-token-lists\n"; print $out " \'("; print $out make_lines(78, 4, map { "'nasm-$_" } sort keys(%tokens)); print $out ")\n"; print $out " \"List of all ${whoami} token type lists.\")\n"; # The NASM token extracted version printf $out "\n(defconst nasm-token-version %s\n", quote_for_emacs($version{'ver'}); print $out " \"Version of NASM from which tokens were extracted,\n"; print $out "as a human-readable string.\")\n"; printf $out "\n(defconst nasm-token-version-id #x%08x\n", $version{'version_id'}; print $out " \"Version of NASM from which tokens were extracted,\n"; print $out "as numeric identifier, for comparisons. Equivalent to the\n"; print $out "__?NASM_VERSION_ID?__ NASM macro value.\")\n"; printf $out "\n(defconst nasm-token-version-snapshot %s\n", $version{'snapshot'} || 'nil'; print $out " \"Daily NASM snapshot build from which tokens were extracted,\n"; print $out "as a decimal number in YYYYMMDD format, or nil if not a\n"; print $out "daily snapshot build.\")\n"; # Footer print $out "\n(provide 'nasmtok)\n"; print $out ";;; ${file} ends here\n"; return 0; } # JSON sub write_output_json { use JSON; my($out, $outfile, $file) = @_; my $whoami = 'NASM '.$version{'ver'}; my $json = JSON->new; $json = $json->ascii(1)->canonical(1); my %ver; foreach my $vn (keys(%version)) { my $vv = $version{$vn}; next if ($vn eq 'version_xid'); $vn =~ s/_ver$//; $vn =~ s/^version_//; $vv = $vv + 0 if ($vn ne 'ver'); $ver{$vn} = $vv; } print $out $json->encode({ '$comment' => "NASM syntax information extracted from ${whoami}", 'tokens' => \%tokens, 'version' => \%ver}); print $out "\n"; return 0; } sub write_output($$) { my($format, $outfile) = @_; my %formats = ( 'el' => \&write_output_el, 'json' => \&write_output_json ); my $outfunc = $formats{$format}; if (!defined($outfunc)) { die "$0: unknown output format: $format\n"; } open(my $out, '>', $outfile) or die "$0:$outfile: $!\n"; my($vol,$dir,$file) = File::Spec->splitpath($outfile); my $err = $outfunc->($out, $outfile, $file); close($out); if ($err) { unlink($outfile); die "$0:$outfile: error writing output\n"; } } add_special_cases(); read_tokhash_c('asm/tokhash.c'); read_pptok_c('asm/pptok.c'); read_directiv_dat('asm/directiv.dat'); read_version('version.mak'); read_macros(@vpath); write_output($format, $outfile);