1
0
mirror of https://github.com/rkd77/elinks.git synced 2024-06-21 00:25:37 +00:00

UTF-8: New function unicode_fold_label_case and a related script.

This commit is contained in:
Kalle Olavi Niemitalo 2006-08-05 19:45:53 +03:00 committed by Miciah Dashiel Butler Masters
parent 8a1d7e2fa3
commit f7fd49cf28
3 changed files with 162 additions and 0 deletions

137
Unicode/gen-case Executable file
View File

@ -0,0 +1,137 @@
#! /usr/bin/perl
use strict;
use warnings;
my @trans;
print "\t/* -*- c -*- source code generated by ", join(" ", $0, @ARGV), " */\n";
while (<>) {
s/#.*$//;
next if /^\s*$/;
my($code, $status, $mapping) = /^([[:xdigit:]]+);\s*([CFST]);\s*([[:xdigit:]]+(?:\s+[[:xdigit:]]+)*);\s*$/
or warn("$ARGV:$.: weird line\n"), next;
next unless $status eq "C" or $status eq "S";
warn("$ARGV:$.: multi-char simple mapping\n"), next
if $mapping =~ /\s/;
$code = hex($code);
$mapping = hex($mapping);
$trans[$code] = $mapping;
} continue {
close ARGV if eof;
}
sub gobble {
my($begin, $step) = @_;
my $diff = $trans[$begin] - $begin;
my @codes;
my @holes;
my $probe = $begin;
my $hole;
while (1) {
my @beyond;
while (defined($trans[$probe]) && $trans[$probe] == $probe + $diff) {
push @beyond, $probe;
$probe += $step;
}
last unless @beyond >= 2;
push @holes, $hole if defined $hole;
push @codes, @beyond;
$hole = $probe;
$probe += $step;
}
return 0 unless @codes;
# The following formula was tuned for i486-linux-gnu-gcc-4.0 -O1.
if (@codes <= 2 + @holes) {
print "if (", join(" || ", map { sprintf("c == 0x%X", $_) } @codes), ")\n";
} else {
printf "if (c >= 0x%X && c <= 0x%X", $codes[0], $codes[-1];
printf " && c != 0x%X", $_ foreach @holes;
if ($step == 2) { printf " && (c & 1) == %d", $begin & 1 }
elsif ($step != 1) { printf " && c %% %d == %d", $step, $begin % $step }
print ")\n";
}
if ($diff != 0) {
if ($diff < 0) { printf "\t\tc -= "; $diff = -$diff }
else { printf "\t\tc += " }
if ($diff < 10) { printf "%d", $diff }
else { printf "0x%X", $diff }
}
print ";\n";
undef $trans[$_] foreach @codes;
return 1;
}
my $first = 1;
for (my $code = 0; $code <= $#trans; ++$code) {
next unless defined $trans[$code];
print $first ? "\t" : "\telse ";
gobble($code, 1) or gobble($code, 2) or gobble($code, 3) or gobble($code, 4)
or printf "if (c == 0x%X)\n\t\tc = 0x%X;\n", $code, $trans[$code];
$first = 0;
}
close STDOUT or die "$0: -: $!\n";
__END__
=head1 NAME
gen-case - Generate C source code for folding the case of a Unicode character.
=head1 SYNOPSIS
B<gen-case> CaseFolding.txt > ../src/intl/casefold.inc
=head1 DESCRIPTION
B<gen-case> reads F<CaseFolding.txt> of the Unicode Character Database
and generates C source code that implements the I<simple case folding>
as defined in that file.
The generated source code can then be used like this:
unicode_val_T
unicode_simple_case_fold(unicode_val_T c)
{
#include "casefold.inc"
return c;
}
=head1 BUGS
Does not support B<--help> nor B<--version>.
=head1 AUTHOR
Kalle Olavi Niemitalo <kon@iki.fi>
=head1 COPYRIGHT AND LICENSE
Copyright (c) 2006 Kalle Olavi Niemitalo.
This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself. In addition:
=over
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
=back

View File

@ -10,6 +10,9 @@
#include <ctype.h>
#include <stdlib.h>
#if HAVE_WCTYPE_H
#include <wctype.h>
#endif
#include "elinks.h"
@ -404,6 +407,27 @@ unicode_to_cell(unicode_val_T c)
return 1;
}
/* Fold the case of a Unicode character, so that hotkeys in labels can
* be compared case-insensitively. This should be called only if
* check_kbd_label_key(c) is true. It is unspecified whether the
* result will be in upper or lower case. */
unicode_val_T
unicode_fold_label_case(unicode_val_T c)
{
#if __STDC_ISO_10646__ && HAVE_WCTYPE_H
return towlower(c);
#else /* !(__STDC_ISO_10646__ && HAVE_WCTYPE_H) */
/* For now, this supports only ASCII. It would be possible to
* use code generated from CaseFolding.txt of Unicode if the
* acknowledgements required by http://www.unicode.org/copyright.html
* were added to associated documentation of ELinks. */
if (c >= 0x41 && c <= 0x5A)
return c + 0x20;
else
return c;
#endif /* !(__STDC_ISO_10646__ && HAVE_WCTYPE_H) */
}
inline unicode_val_T
utf_8_to_unicode(unsigned char **string, unsigned char *end)
{

View File

@ -62,6 +62,7 @@ int utf8_ptr2cells(unsigned char *, unsigned char *);
int utf8_ptr2chars(unsigned char *, unsigned char *);
int utf8_cells2bytes(unsigned char *, int, unsigned char *);
inline int unicode_to_cell(unicode_val_T);
unicode_val_T unicode_fold_label_case(unicode_val_T);
inline int strlen_utf8(unsigned char **);
inline unicode_val_T utf_8_to_unicode(unsigned char **, unsigned char *);
unicode_val_T cp2u(int, unsigned char);