freebsd-ports/Tools/make_index
Kris Kennaway 54e565eedc Major optimizations for 'make index' and other recursive traversal
targets.

* Use /rescue/sh for index builds instead of /bin/sh, when it exists.
  The former is statically linked and faster to execute, which becomes
  significant when executing it tens of thousands of times.  This
  trick can be used with other recursive targets by passing in
  __MAKE_SHELL.

* Get rid of make variable assignments that use != command invocations
  in the critical path, using several methods:

  - rewriting logic to use shell or make builtins instead of external command executions
  - macroizing commands and executing them in the targets where they
    are needed instead of with every invocation of make
  - precomputing the results of invariant commands in
    bsd.port.subdir.mk and passing them in explicitly to child makes,
    and using this to avoid recalculation in all the children. NB: the
    commands are still run one per top-level subdirectory but this
    does not currently seem to be a major issue.  They could be moved
    further up into the top-level Makefile at the cost of some
    cleanliness.
  - Committers are strongly discouraged from adding further "bare" !=
    assignments to the ports tree, even in their own ports.  One of
    the above strategies should be used to avoid future bloat.

* Rewrite the core 'describe' target to work entirely within a single
  shell process using only builtin commands.  The old version is
  retained as a backup for use on systems older than 603104, which
  does not have the make :u modifier.  This cuts down the number of
  processes executed during the course of a 'make index' by an order
  of magnitude, and we are essentially now amortized to the minimum of
  a single make + sh instance per port, plus whatever commands the
  port makefile itself executes (which are usually unnecessary and
  bogus).

* Less validation of the WWW: target is performed; this can become
  policed at a port level by portlint.  Specifically we look at the
  second word of the first line beginning with "WWW:" in pkg-descr,
  and append "http://" to it unless it already begins with "http://",
  "https://" or "ftp://".  Thanks to dougb for the idea of how to
  extract WWW: using shell builtins.

* Use the "true" shell builtin instead of echo > /dev/null for a
  measurable decrease in CPU use.

* Add a note about dubious escaping strategy in bsd.port.subdir.mk

* Minor change in output of 'make describe': it no longer strips
  trailing CR characters from pkg-descr files with MSDOS CR/LF
  termination.  Instead the makeindex perl script that post-processes
  make describe into the INDEX is tweaked to strip on input.

The bottom line is that on my test hardware INDEX builds are now
faster by more than a factor of 2 and with a reduction in system time
by a factor of 4-8 depending on configuration.
2008-07-19 17:59:41 +00:00

181 lines
5.1 KiB
Perl

#!/usr/bin/perl
#
# $FreeBSD$
#
# INDEX builds visit each port once and write out each port's
# *-depends as a list of directories, using 'make describe'. This
# script goes back in and maps the directories back to pkgnames,
# fixes up the *-depends list, and writes out the new INDEX file.
require 5.002;
# Helper function to map a directory to a pkgname.
sub by_path {
my ($name, $port) = @_;
# If a direct mapping exists, then use it.
return $by_path{$name} if (defined $by_path{$name});
# Make sure we have /usr/ports at the beginning.
$name =~ s!^$pwd!/usr/ports!o;
return $by_path{$name} if (defined $by_path{$name});
# Collapse all the '..' sequences.
my @f = split('/', $name), @p = ();
foreach (@f) { (/\.\./) ? pop(@p) : push(@p, $_); }
$name = join('/', @p);
return $by_path{$name} if (defined $by_path{$name});
print STDERR "make_index: $port: no entry for $name\n";
return undef;
}
# This routine replaces what used to be the time-consuming
# recursive 'depends-list' and 'package-depends' targets.
sub recurse {
my $pkg = shift(@_);
return if $pkg->{checked};
# extract-depends = extract-depends + recursive list of run-depends
# for each extract-depends
my @deps = ();
foreach $name (@{$pkg->{edep}}) {
recurse($index{$name});
push(@deps, @{$index{$name}->{rdep}});
}
$pkg->{edep} = uniqify(@{$pkg->{edep}}, @deps);
# same as above except for patch-depends this time
@deps = ();
foreach $name (@{$pkg->{pdep}}) {
recurse($index{$name});
push(@deps, @{$index{$name}->{rdep}});
}
$pkg->{pdep} = uniqify(@{$pkg->{pdep}}, @deps);
# same as above except for fetch-depends this time
@deps = ();
foreach $name (@{$pkg->{fdep}}) {
recurse($index{$name});
push(@deps, @{$index{$name}->{rdep}});
}
$pkg->{fdep} = uniqify(@{$pkg->{fdep}}, @deps);
$pkg->{checked} = 1;
# same as above except for build-depends this time
@deps = ();
foreach $name (@{$pkg->{bdep}}) {
recurse($index{$name});
push(@deps, @{$index{$name}->{rdep}});
}
$pkg->{bdep} = uniqify(@{$pkg->{bdep}}, @deps);
$pkg->{checked} = 1;
# same as above except for run-depends this time
@deps = ();
foreach $name (@{$pkg->{rdep}}) {
recurse($index{$name});
push(@deps, @{$index{$name}->{rdep}});
}
$pkg->{rdep} = uniqify(@{$pkg->{rdep}}, @deps);
$pkg->{checked} = 1;
}
# Given one or more lists as arguments return the set
# of unique elements among them.
sub uniqify {
my %seen = ();
my @unique = grep {! $seen{$_}++} (@_);
return \@unique;
}
# Save where we are so that we can map all directories formed
# from ${PORTSDIR} to their canonical location '/usr/ports/...'.
chomp($pwd = `pwd`);
# Read each line of output generated by the 'index' target.
while (<>) {
chomp;
s/\015$//;
my @f = split(/\|/);
# Force to canonical form.
$f[1] =~ s!^$pwd!/usr/ports!o;
$f[4] =~ s!^$pwd!/usr/ports!o;
# Save directory -> pkgname relationship.
# Note: $f[0] gets clobbered by the splice below so we'll save
# it to a new $name first.
$by_path{$f[1]} = $name = $f[0];
# Create a hash table of the infomation we need about this port.
my $pkg = {
'edep' => [split(/ /, $f[7])],
'pdep' => [split(/ /, $f[8])],
'fdep' => [split(/ /, $f[9])],
'bdep' => [split(/ /, $f[10])],
'rdep' => [split(/ /, $f[11])],
'rest' => join('|', splice(@f, 12)),
'text' => join('|', splice(@f, 0, 7))
};
$index{$name} = $pkg;
# This is a cheap way of preserving the order of the entries.
push(@names, $name);
}
# For each port perform the mapping between directory and pkgnames.
foreach $name (keys %index) {
my $pkg = $index{$name};
# first the extract dependencies
if (@{$pkg->{edep}}) {
my @edep = map { by_path($_, $name) } @{$pkg->{edep}};
$pkg->{edep} = \@edep;
}
# then the patch dependencies
if (@{$pkg->{pdep}}) {
my @pdep = map { by_path($_, $name) } @{$pkg->{pdep}};
$pkg->{pdep} = \@pdep;
}
# then the fetch dependencies
if (@{$pkg->{fdep}}) {
my @fdep = map { by_path($_, $name) } @{$pkg->{fdep}};
$pkg->{fdep} = \@fdep;
}
# then the build dependencies
if (@{$pkg->{bdep}}) {
my @bdep = map { by_path($_, $name) } @{$pkg->{bdep}};
$pkg->{bdep} = \@bdep;
}
# then the run dependencies
if (@{$pkg->{rdep}}) {
my @rdep = map { by_path($_, $name) } @{$pkg->{rdep}};
$pkg->{rdep} = \@rdep;
}
}
# With all that done we're finally ready to write out the new
# INDEX file one port at a time.
foreach $name (@names) {
my $pkg = $index{$name};
if (exists $pkg->{'PRINTED'}) {
print STDERR "Warning: Duplicate INDEX entry: $name\n";
} else {
recurse($pkg);
print "$pkg->{text}|";
print join(' ', sort(@{$pkg->{bdep}})) if @{$pkg->{bdep}};
print "|";
print join(' ', sort(@{$pkg->{rdep}})) if @{$pkg->{rdep}};
print "|$pkg->{rest}|";
print join(' ', sort(@{$pkg->{edep}})) if @{$pkg->{edep}};
print "|";
print join(' ', sort(@{$pkg->{pdep}})) if @{$pkg->{pdep}};
print "|";
print join(' ', sort(@{$pkg->{fdep}})) if @{$pkg->{fdep}};
print "\n";
++$pkg->{'PRINTED'};
}
}