2006-08-05 12:45:53 -04:00
|
|
|
#! /usr/bin/perl
|
|
|
|
use strict;
|
|
|
|
use warnings;
|
|
|
|
|
|
|
|
my @trans;
|
|
|
|
|
|
|
|
print "\t/* -*- c -*- source code generated by ", join(" ", $0, @ARGV), " */\n";
|
|
|
|
while (<>) {
|
|
|
|
s/#.*$//;
|
|
|
|
next if /^\s*$/;
|
|
|
|
my($code, $status, $mapping) = /^([[:xdigit:]]+);\s*([CFST]);\s*([[:xdigit:]]+(?:\s+[[:xdigit:]]+)*);\s*$/
|
|
|
|
or warn("$ARGV:$.: weird line\n"), next;
|
|
|
|
next unless $status eq "C" or $status eq "S";
|
|
|
|
warn("$ARGV:$.: multi-char simple mapping\n"), next
|
|
|
|
if $mapping =~ /\s/;
|
|
|
|
$code = hex($code);
|
|
|
|
$mapping = hex($mapping);
|
|
|
|
$trans[$code] = $mapping;
|
|
|
|
} continue {
|
|
|
|
close ARGV if eof;
|
|
|
|
}
|
|
|
|
|
|
|
|
sub gobble {
|
|
|
|
my($begin, $step) = @_;
|
|
|
|
my $diff = $trans[$begin] - $begin;
|
|
|
|
my @codes;
|
|
|
|
my @holes;
|
|
|
|
my $probe = $begin;
|
|
|
|
my $hole;
|
|
|
|
while (1) {
|
|
|
|
my @beyond;
|
|
|
|
while (defined($trans[$probe]) && $trans[$probe] == $probe + $diff) {
|
|
|
|
push @beyond, $probe;
|
|
|
|
$probe += $step;
|
|
|
|
}
|
|
|
|
last unless @beyond >= 2;
|
|
|
|
push @holes, $hole if defined $hole;
|
|
|
|
push @codes, @beyond;
|
|
|
|
$hole = $probe;
|
|
|
|
$probe += $step;
|
|
|
|
}
|
|
|
|
return 0 unless @codes;
|
|
|
|
|
|
|
|
# The following formula was tuned for i486-linux-gnu-gcc-4.0 -O1.
|
|
|
|
if (@codes <= 2 + @holes) {
|
|
|
|
print "if (", join(" || ", map { sprintf("c == 0x%X", $_) } @codes), ")\n";
|
|
|
|
} else {
|
|
|
|
printf "if (c >= 0x%X && c <= 0x%X", $codes[0], $codes[-1];
|
|
|
|
printf " && c != 0x%X", $_ foreach @holes;
|
|
|
|
if ($step == 2) { printf " && (c & 1) == %d", $begin & 1 }
|
|
|
|
elsif ($step != 1) { printf " && c %% %d == %d", $step, $begin % $step }
|
|
|
|
print ")\n";
|
|
|
|
}
|
|
|
|
if ($diff != 0) {
|
|
|
|
if ($diff < 0) { printf "\t\tc -= "; $diff = -$diff }
|
|
|
|
else { printf "\t\tc += " }
|
|
|
|
if ($diff < 10) { printf "%d", $diff }
|
|
|
|
else { printf "0x%X", $diff }
|
|
|
|
}
|
|
|
|
print ";\n";
|
|
|
|
|
|
|
|
undef $trans[$_] foreach @codes;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
my $first = 1;
|
|
|
|
for (my $code = 0; $code <= $#trans; ++$code) {
|
|
|
|
next unless defined $trans[$code];
|
|
|
|
|
|
|
|
print $first ? "\t" : "\telse ";
|
|
|
|
gobble($code, 1) or gobble($code, 2) or gobble($code, 3) or gobble($code, 4)
|
|
|
|
or printf "if (c == 0x%X)\n\t\tc = 0x%X;\n", $code, $trans[$code];
|
|
|
|
$first = 0;
|
|
|
|
}
|
|
|
|
close STDOUT or die "$0: -: $!\n";
|
|
|
|
|
|
|
|
__END__
|
|
|
|
|
|
|
|
=head1 NAME
|
|
|
|
|
|
|
|
gen-case - Generate C source code for folding the case of a Unicode character.
|
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
|
|
|
|
B<gen-case> CaseFolding.txt > ../src/intl/casefold.inc
|
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
|
|
|
|
B<gen-case> reads F<CaseFolding.txt> of the Unicode Character Database
|
|
|
|
and generates C source code that implements the I<simple case folding>
|
|
|
|
as defined in that file.
|
|
|
|
|
|
|
|
The generated source code can then be used like this:
|
|
|
|
|
|
|
|
unicode_val_T
|
|
|
|
unicode_simple_case_fold(unicode_val_T c)
|
|
|
|
{
|
|
|
|
#include "casefold.inc"
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
=head1 BUGS
|
|
|
|
|
|
|
|
Does not support B<--help> nor B<--version>.
|
|
|
|
|
|
|
|
=head1 AUTHOR
|
|
|
|
|
|
|
|
Kalle Olavi Niemitalo <kon@iki.fi>
|
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE
|
|
|
|
|
|
|
|
Copyright (c) 2006 Kalle Olavi Niemitalo.
|
|
|
|
|
2012-10-28 03:34:35 -04:00
|
|
|
Permission to use, copy, modify, and/or distribute this software for any
|
2008-03-23 07:28:06 -04:00
|
|
|
purpose with or without fee is hereby granted, provided that the above
|
|
|
|
copyright notice and this permission notice appear in all copies.
|
|
|
|
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
|
|
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
|
|
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
|
|
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
|
|
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
|
|
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
|
|
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|