mirror of
https://github.com/rkd77/elinks.git
synced 2024-12-04 14:46:47 -05:00
UTF-8: New function unicode_fold_label_case and a related script.
This commit is contained in:
parent
8a1d7e2fa3
commit
f7fd49cf28
137
Unicode/gen-case
Executable file
137
Unicode/gen-case
Executable file
@ -0,0 +1,137 @@
|
||||
#! /usr/bin/perl
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
my @trans;
|
||||
|
||||
print "\t/* -*- c -*- source code generated by ", join(" ", $0, @ARGV), " */\n";
|
||||
while (<>) {
|
||||
s/#.*$//;
|
||||
next if /^\s*$/;
|
||||
my($code, $status, $mapping) = /^([[:xdigit:]]+);\s*([CFST]);\s*([[:xdigit:]]+(?:\s+[[:xdigit:]]+)*);\s*$/
|
||||
or warn("$ARGV:$.: weird line\n"), next;
|
||||
next unless $status eq "C" or $status eq "S";
|
||||
warn("$ARGV:$.: multi-char simple mapping\n"), next
|
||||
if $mapping =~ /\s/;
|
||||
$code = hex($code);
|
||||
$mapping = hex($mapping);
|
||||
$trans[$code] = $mapping;
|
||||
} continue {
|
||||
close ARGV if eof;
|
||||
}
|
||||
|
||||
sub gobble {
|
||||
my($begin, $step) = @_;
|
||||
my $diff = $trans[$begin] - $begin;
|
||||
my @codes;
|
||||
my @holes;
|
||||
my $probe = $begin;
|
||||
my $hole;
|
||||
while (1) {
|
||||
my @beyond;
|
||||
while (defined($trans[$probe]) && $trans[$probe] == $probe + $diff) {
|
||||
push @beyond, $probe;
|
||||
$probe += $step;
|
||||
}
|
||||
last unless @beyond >= 2;
|
||||
push @holes, $hole if defined $hole;
|
||||
push @codes, @beyond;
|
||||
$hole = $probe;
|
||||
$probe += $step;
|
||||
}
|
||||
return 0 unless @codes;
|
||||
|
||||
# The following formula was tuned for i486-linux-gnu-gcc-4.0 -O1.
|
||||
if (@codes <= 2 + @holes) {
|
||||
print "if (", join(" || ", map { sprintf("c == 0x%X", $_) } @codes), ")\n";
|
||||
} else {
|
||||
printf "if (c >= 0x%X && c <= 0x%X", $codes[0], $codes[-1];
|
||||
printf " && c != 0x%X", $_ foreach @holes;
|
||||
if ($step == 2) { printf " && (c & 1) == %d", $begin & 1 }
|
||||
elsif ($step != 1) { printf " && c %% %d == %d", $step, $begin % $step }
|
||||
print ")\n";
|
||||
}
|
||||
if ($diff != 0) {
|
||||
if ($diff < 0) { printf "\t\tc -= "; $diff = -$diff }
|
||||
else { printf "\t\tc += " }
|
||||
if ($diff < 10) { printf "%d", $diff }
|
||||
else { printf "0x%X", $diff }
|
||||
}
|
||||
print ";\n";
|
||||
|
||||
undef $trans[$_] foreach @codes;
|
||||
return 1;
|
||||
}
|
||||
|
||||
my $first = 1;
|
||||
for (my $code = 0; $code <= $#trans; ++$code) {
|
||||
next unless defined $trans[$code];
|
||||
|
||||
print $first ? "\t" : "\telse ";
|
||||
gobble($code, 1) or gobble($code, 2) or gobble($code, 3) or gobble($code, 4)
|
||||
or printf "if (c == 0x%X)\n\t\tc = 0x%X;\n", $code, $trans[$code];
|
||||
$first = 0;
|
||||
}
|
||||
close STDOUT or die "$0: -: $!\n";
|
||||
|
||||
__END__
|
||||
|
||||
=head1 NAME
|
||||
|
||||
gen-case - Generate C source code for folding the case of a Unicode character.
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
B<gen-case> CaseFolding.txt > ../src/intl/casefold.inc
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
B<gen-case> reads F<CaseFolding.txt> of the Unicode Character Database
|
||||
and generates C source code that implements the I<simple case folding>
|
||||
as defined in that file.
|
||||
|
||||
The generated source code can then be used like this:
|
||||
|
||||
unicode_val_T
|
||||
unicode_simple_case_fold(unicode_val_T c)
|
||||
{
|
||||
#include "casefold.inc"
|
||||
return c;
|
||||
}
|
||||
|
||||
=head1 BUGS
|
||||
|
||||
Does not support B<--help> nor B<--version>.
|
||||
|
||||
=head1 AUTHOR
|
||||
|
||||
Kalle Olavi Niemitalo <kon@iki.fi>
|
||||
|
||||
=head1 COPYRIGHT AND LICENSE
|
||||
|
||||
Copyright (c) 2006 Kalle Olavi Niemitalo.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the same terms as Perl itself. In addition:
|
||||
|
||||
=over
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the "Software"),
|
||||
to deal in the Software without restriction, including without limitation
|
||||
the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
and/or sell copies of the Software, and to permit persons to whom the
|
||||
Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
=back
|
@ -10,6 +10,9 @@
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdlib.h>
|
||||
#if HAVE_WCTYPE_H
|
||||
#include <wctype.h>
|
||||
#endif
|
||||
|
||||
#include "elinks.h"
|
||||
|
||||
@ -404,6 +407,27 @@ unicode_to_cell(unicode_val_T c)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Fold the case of a Unicode character, so that hotkeys in labels can
|
||||
* be compared case-insensitively. This should be called only if
|
||||
* check_kbd_label_key(c) is true. It is unspecified whether the
|
||||
* result will be in upper or lower case. */
|
||||
unicode_val_T
|
||||
unicode_fold_label_case(unicode_val_T c)
|
||||
{
|
||||
#if __STDC_ISO_10646__ && HAVE_WCTYPE_H
|
||||
return towlower(c);
|
||||
#else /* !(__STDC_ISO_10646__ && HAVE_WCTYPE_H) */
|
||||
/* For now, this supports only ASCII. It would be possible to
|
||||
* use code generated from CaseFolding.txt of Unicode if the
|
||||
* acknowledgements required by http://www.unicode.org/copyright.html
|
||||
* were added to associated documentation of ELinks. */
|
||||
if (c >= 0x41 && c <= 0x5A)
|
||||
return c + 0x20;
|
||||
else
|
||||
return c;
|
||||
#endif /* !(__STDC_ISO_10646__ && HAVE_WCTYPE_H) */
|
||||
}
|
||||
|
||||
inline unicode_val_T
|
||||
utf_8_to_unicode(unsigned char **string, unsigned char *end)
|
||||
{
|
||||
|
@ -62,6 +62,7 @@ int utf8_ptr2cells(unsigned char *, unsigned char *);
|
||||
int utf8_ptr2chars(unsigned char *, unsigned char *);
|
||||
int utf8_cells2bytes(unsigned char *, int, unsigned char *);
|
||||
inline int unicode_to_cell(unicode_val_T);
|
||||
unicode_val_T unicode_fold_label_case(unicode_val_T);
|
||||
inline int strlen_utf8(unsigned char **);
|
||||
inline unicode_val_T utf_8_to_unicode(unsigned char **, unsigned char *);
|
||||
unicode_val_T cp2u(int, unsigned char);
|
||||
|
Loading…
Reference in New Issue
Block a user