mirror of
https://github.com/rkd77/elinks.git
synced 2025-02-02 15:09:23 -05:00
cp1250, cp1257: Don't map undefined bytes to U+0000.
This commit is contained in:
parent
5882ec0ce7
commit
65645624b4
@ -147,14 +147,14 @@ Window$ 1250
|
||||
0x7E 0x007E #TILDE
|
||||
0x7F 0x007F #DELETE
|
||||
0x80 0x20AC #EURO SIGN
|
||||
0x81 0x0000 #UNDEFINED
|
||||
0x81 #UNDEFINED
|
||||
0x82 0x201A #SINGLE LOW-9 QUOTATION MARK
|
||||
0x83 0x0000 #UNDEFINED
|
||||
0x83 #UNDEFINED
|
||||
0x84 0x201E #DOUBLE LOW-9 QUOTATION MARK
|
||||
0x85 0x2026 #HORIZONTAL ELLIPSIS
|
||||
0x86 0x2020 #DAGGER
|
||||
0x87 0x2021 #DOUBLE DAGGER
|
||||
0x88 0x0000 #UNDEFINED
|
||||
0x88 #UNDEFINED
|
||||
0x89 0x2030 #PER MILLE SIGN
|
||||
0x8A 0x0160 #LATIN CAPITAL LETTER S WITH CARON
|
||||
0x8B 0x2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
@ -162,7 +162,7 @@ Window$ 1250
|
||||
0x8D 0x0164 #LATIN CAPITAL LETTER T WITH CARON
|
||||
0x8E 0x017D #LATIN CAPITAL LETTER Z WITH CARON
|
||||
0x8F 0x0179 #LATIN CAPITAL LETTER Z WITH ACUTE
|
||||
0x90 0x0000 #UNDEFINED
|
||||
0x90 #UNDEFINED
|
||||
0x91 0x2018 #LEFT SINGLE QUOTATION MARK
|
||||
0x92 0x2019 #RIGHT SINGLE QUOTATION MARK
|
||||
0x93 0x201C #LEFT DOUBLE QUOTATION MARK
|
||||
@ -170,7 +170,7 @@ Window$ 1250
|
||||
0x95 0x2022 #BULLET
|
||||
0x96 0x2013 #EN DASH
|
||||
0x97 0x2014 #EM DASH
|
||||
0x98 0x0000 #UNDEFINED
|
||||
0x98 #UNDEFINED
|
||||
0x99 0x2122 #TRADE MARK SIGN
|
||||
0x9A 0x0161 #LATIN SMALL LETTER S WITH CARON
|
||||
0x9B 0x203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
|
@ -23,22 +23,22 @@ Window$ 1257
|
||||
0x20-0x7e idem
|
||||
#
|
||||
0x80 0x20AC #EURO SIGN
|
||||
0x81 0x0000 #UNDEFINED
|
||||
0x81 #UNDEFINED
|
||||
0x82 0x201A #SINGLE LOW-9 QUOTATION MARK
|
||||
0x83 0x0000 #UNDEFINED
|
||||
0x83 #UNDEFINED
|
||||
0x84 0x201E #DOUBLE LOW-9 QUOTATION MARK
|
||||
0x85 0x2026 #HORIZONTAL ELLIPSIS
|
||||
0x86 0x2020 #DAGGER
|
||||
0x87 0x2021 #DOUBLE DAGGER
|
||||
0x88 0x0000 #UNDEFINED
|
||||
0x88 #UNDEFINED
|
||||
0x89 0x2030 #PER MILLE SIGN
|
||||
0x8A 0x0000 #UNDEFINED
|
||||
0x8A #UNDEFINED
|
||||
0x8B 0x2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
0x8C 0x0000 #UNDEFINED
|
||||
0x8C #UNDEFINED
|
||||
0x8D 0x00A8 #DIAERESIS
|
||||
0x8E 0x02C7 #CARON
|
||||
0x8F 0x00B8 #CEDILLA
|
||||
0x90 0x0000 #UNDEFINED
|
||||
0x90 #UNDEFINED
|
||||
0x91 0x2018 #LEFT SINGLE QUOTATION MARK
|
||||
0x92 0x2019 #RIGHT SINGLE QUOTATION MARK
|
||||
0x93 0x201C #LEFT DOUBLE QUOTATION MARK
|
||||
@ -46,20 +46,20 @@ Window$ 1257
|
||||
0x95 0x2022 #BULLET
|
||||
0x96 0x2013 #EN DASH
|
||||
0x97 0x2014 #EM DASH
|
||||
0x98 0x0000 #UNDEFINED
|
||||
0x98 #UNDEFINED
|
||||
0x99 0x2122 #TRADE MARK SIGN
|
||||
0x9A 0x0000 #UNDEFINED
|
||||
0x9A #UNDEFINED
|
||||
0x9B 0x203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
0x9C 0x0000 #UNDEFINED
|
||||
0x9C #UNDEFINED
|
||||
0x9D 0x00AF #MACRON
|
||||
0x9E 0x02DB #OGONEK
|
||||
0x9F 0x0000 #UNDEFINED
|
||||
0x9F #UNDEFINED
|
||||
0xA0 0x00A0 #NO-BREAK SPACE
|
||||
0xA1 0x0000 #UNDEFINED
|
||||
0xA1 #UNDEFINED
|
||||
0xA2 0x00A2 #CENT SIGN
|
||||
0xA3 0x00A3 #POUND SIGN
|
||||
0xA4 0x00A4 #CURRENCY SIGN
|
||||
0xA5 0x0000 #UNDEFINED
|
||||
0xA5 #UNDEFINED
|
||||
0xA6 0x00A6 #BROKEN BAR
|
||||
0xA7 0x00A7 #SECTION SIGN
|
||||
0xA8 0x00D8 #LATIN CAPITAL LETTER O WITH STROKE
|
||||
|
@ -23,17 +23,29 @@ for i in $codepages; do
|
||||
echo "/*** $i ***/"
|
||||
echo
|
||||
|
||||
sed '1,2d
|
||||
sed ' # Delete the name and aliases lines.
|
||||
1,2d
|
||||
# Delete comment-only and blank lines.
|
||||
/^[ ]*\(#.*\)\{,1\}$/d
|
||||
# Copy to the hold space.
|
||||
h
|
||||
# Delete everything except the comment.
|
||||
s/^[^#]*//
|
||||
# If there is a comment, change it to use /* */ delimiters.
|
||||
s!#[ ]*\(.*\)!/* \1 */!
|
||||
# Exchange spaces; now hold space = comment and pattern space = all.
|
||||
x
|
||||
# Delete the comment.
|
||||
s/#.*//
|
||||
# Canonicalize case so the strings can be used as lookup keys.
|
||||
y/Xabcdef/xABCDEF/
|
||||
# Delete mappings of bytes 0x00...0x7F. ELinks assumes those match ASCII.
|
||||
/^0x[01234567]/d
|
||||
/[^0x0123456789ABCDEF ]/d
|
||||
# Delete lines that do not map the byte to exactly one character.
|
||||
/^[ ]*0x[0123456789ABCDEF]\{2\}[ ]\{1,\}0x[0123456789ABCDEF]\{1,\}[ ]*$/!d
|
||||
# Append a newline and the comment from the hold space.
|
||||
G
|
||||
# Delete the newline added by the previous command.
|
||||
s/\n//' "$i.cp" | {
|
||||
for left in 8 9 A B C D E F; do
|
||||
for right in 0 1 2 3 4 5 6 7 8 9 A B C D E F; do
|
||||
|
@ -2134,14 +2134,14 @@ unsigned char *const aliases_8859_16 [] = {
|
||||
|
||||
const uint16_t highhalf_cp1250 [] = {
|
||||
/* 0x80 */ 0x20AC, /* EURO SIGN */
|
||||
/* 0x81 */ 0x0000, /* UNDEFINED */
|
||||
/* 0x81 */ 0xFFFF,
|
||||
/* 0x82 */ 0x201A, /* SINGLE LOW-9 QUOTATION MARK */
|
||||
/* 0x83 */ 0x0000, /* UNDEFINED */
|
||||
/* 0x83 */ 0xFFFF,
|
||||
/* 0x84 */ 0x201E, /* DOUBLE LOW-9 QUOTATION MARK */
|
||||
/* 0x85 */ 0x2026, /* HORIZONTAL ELLIPSIS */
|
||||
/* 0x86 */ 0x2020, /* DAGGER */
|
||||
/* 0x87 */ 0x2021, /* DOUBLE DAGGER */
|
||||
/* 0x88 */ 0x0000, /* UNDEFINED */
|
||||
/* 0x88 */ 0xFFFF,
|
||||
/* 0x89 */ 0x2030, /* PER MILLE SIGN */
|
||||
/* 0x8A */ 0x0160, /* LATIN CAPITAL LETTER S WITH CARON */
|
||||
/* 0x8B */ 0x2039, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
|
||||
@ -2149,7 +2149,7 @@ const uint16_t highhalf_cp1250 [] = {
|
||||
/* 0x8D */ 0x0164, /* LATIN CAPITAL LETTER T WITH CARON */
|
||||
/* 0x8E */ 0x017D, /* LATIN CAPITAL LETTER Z WITH CARON */
|
||||
/* 0x8F */ 0x0179, /* LATIN CAPITAL LETTER Z WITH ACUTE */
|
||||
/* 0x90 */ 0x0000, /* UNDEFINED */
|
||||
/* 0x90 */ 0xFFFF,
|
||||
/* 0x91 */ 0x2018, /* LEFT SINGLE QUOTATION MARK */
|
||||
/* 0x92 */ 0x2019, /* RIGHT SINGLE QUOTATION MARK */
|
||||
/* 0x93 */ 0x201C, /* LEFT DOUBLE QUOTATION MARK */
|
||||
@ -2157,7 +2157,7 @@ const uint16_t highhalf_cp1250 [] = {
|
||||
/* 0x95 */ 0x2022, /* BULLET */
|
||||
/* 0x96 */ 0x2013, /* EN DASH */
|
||||
/* 0x97 */ 0x2014, /* EM DASH */
|
||||
/* 0x98 */ 0x0000, /* UNDEFINED */
|
||||
/* 0x98 */ 0xFFFF,
|
||||
/* 0x99 */ 0x2122, /* TRADE MARK SIGN */
|
||||
/* 0x9A */ 0x0161, /* LATIN SMALL LETTER S WITH CARON */
|
||||
/* 0x9B */ 0x203A, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
|
||||
@ -2710,22 +2710,22 @@ unsigned char *const aliases_cp1256 [] = {
|
||||
|
||||
const uint16_t highhalf_cp1257 [] = {
|
||||
/* 0x80 */ 0x20AC, /* EURO SIGN */
|
||||
/* 0x81 */ 0x0000, /* UNDEFINED */
|
||||
/* 0x81 */ 0xFFFF,
|
||||
/* 0x82 */ 0x201A, /* SINGLE LOW-9 QUOTATION MARK */
|
||||
/* 0x83 */ 0x0000, /* UNDEFINED */
|
||||
/* 0x83 */ 0xFFFF,
|
||||
/* 0x84 */ 0x201E, /* DOUBLE LOW-9 QUOTATION MARK */
|
||||
/* 0x85 */ 0x2026, /* HORIZONTAL ELLIPSIS */
|
||||
/* 0x86 */ 0x2020, /* DAGGER */
|
||||
/* 0x87 */ 0x2021, /* DOUBLE DAGGER */
|
||||
/* 0x88 */ 0x0000, /* UNDEFINED */
|
||||
/* 0x88 */ 0xFFFF,
|
||||
/* 0x89 */ 0x2030, /* PER MILLE SIGN */
|
||||
/* 0x8A */ 0x0000, /* UNDEFINED */
|
||||
/* 0x8A */ 0xFFFF,
|
||||
/* 0x8B */ 0x2039, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
|
||||
/* 0x8C */ 0x0000, /* UNDEFINED */
|
||||
/* 0x8C */ 0xFFFF,
|
||||
/* 0x8D */ 0x00A8, /* DIAERESIS */
|
||||
/* 0x8E */ 0x02C7, /* CARON */
|
||||
/* 0x8F */ 0x00B8, /* CEDILLA */
|
||||
/* 0x90 */ 0x0000, /* UNDEFINED */
|
||||
/* 0x90 */ 0xFFFF,
|
||||
/* 0x91 */ 0x2018, /* LEFT SINGLE QUOTATION MARK */
|
||||
/* 0x92 */ 0x2019, /* RIGHT SINGLE QUOTATION MARK */
|
||||
/* 0x93 */ 0x201C, /* LEFT DOUBLE QUOTATION MARK */
|
||||
@ -2733,20 +2733,20 @@ const uint16_t highhalf_cp1257 [] = {
|
||||
/* 0x95 */ 0x2022, /* BULLET */
|
||||
/* 0x96 */ 0x2013, /* EN DASH */
|
||||
/* 0x97 */ 0x2014, /* EM DASH */
|
||||
/* 0x98 */ 0x0000, /* UNDEFINED */
|
||||
/* 0x98 */ 0xFFFF,
|
||||
/* 0x99 */ 0x2122, /* TRADE MARK SIGN */
|
||||
/* 0x9A */ 0x0000, /* UNDEFINED */
|
||||
/* 0x9A */ 0xFFFF,
|
||||
/* 0x9B */ 0x203A, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
|
||||
/* 0x9C */ 0x0000, /* UNDEFINED */
|
||||
/* 0x9C */ 0xFFFF,
|
||||
/* 0x9D */ 0x00AF, /* MACRON */
|
||||
/* 0x9E */ 0x02DB, /* OGONEK */
|
||||
/* 0x9F */ 0x0000, /* UNDEFINED */
|
||||
/* 0x9F */ 0xFFFF,
|
||||
/* 0xA0 */ 0x00A0, /* NO-BREAK SPACE */
|
||||
/* 0xA1 */ 0x0000, /* UNDEFINED */
|
||||
/* 0xA1 */ 0xFFFF,
|
||||
/* 0xA2 */ 0x00A2, /* CENT SIGN */
|
||||
/* 0xA3 */ 0x00A3, /* POUND SIGN */
|
||||
/* 0xA4 */ 0x00A4, /* CURRENCY SIGN */
|
||||
/* 0xA5 */ 0x0000, /* UNDEFINED */
|
||||
/* 0xA5 */ 0xFFFF,
|
||||
/* 0xA6 */ 0x00A6, /* BROKEN BAR */
|
||||
/* 0xA7 */ 0x00A7, /* SECTION SIGN */
|
||||
/* 0xA8 */ 0x00D8, /* LATIN CAPITAL LETTER O WITH STROKE */
|
||||
|
Loading…
x
Reference in New Issue
Block a user