diff --git a/Unicode/cp1250.cp b/Unicode/cp1250.cp index 9c77c7bd..dee63471 100644 --- a/Unicode/cp1250.cp +++ b/Unicode/cp1250.cp @@ -147,14 +147,14 @@ Window$ 1250 0x7E 0x007E #TILDE 0x7F 0x007F #DELETE 0x80 0x20AC #EURO SIGN -0x81 0x0000 #UNDEFINED +0x81 #UNDEFINED 0x82 0x201A #SINGLE LOW-9 QUOTATION MARK -0x83 0x0000 #UNDEFINED +0x83 #UNDEFINED 0x84 0x201E #DOUBLE LOW-9 QUOTATION MARK 0x85 0x2026 #HORIZONTAL ELLIPSIS 0x86 0x2020 #DAGGER 0x87 0x2021 #DOUBLE DAGGER -0x88 0x0000 #UNDEFINED +0x88 #UNDEFINED 0x89 0x2030 #PER MILLE SIGN 0x8A 0x0160 #LATIN CAPITAL LETTER S WITH CARON 0x8B 0x2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK @@ -162,7 +162,7 @@ Window$ 1250 0x8D 0x0164 #LATIN CAPITAL LETTER T WITH CARON 0x8E 0x017D #LATIN CAPITAL LETTER Z WITH CARON 0x8F 0x0179 #LATIN CAPITAL LETTER Z WITH ACUTE -0x90 0x0000 #UNDEFINED +0x90 #UNDEFINED 0x91 0x2018 #LEFT SINGLE QUOTATION MARK 0x92 0x2019 #RIGHT SINGLE QUOTATION MARK 0x93 0x201C #LEFT DOUBLE QUOTATION MARK @@ -170,7 +170,7 @@ Window$ 1250 0x95 0x2022 #BULLET 0x96 0x2013 #EN DASH 0x97 0x2014 #EM DASH -0x98 0x0000 #UNDEFINED +0x98 #UNDEFINED 0x99 0x2122 #TRADE MARK SIGN 0x9A 0x0161 #LATIN SMALL LETTER S WITH CARON 0x9B 0x203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK diff --git a/Unicode/cp1257.cp b/Unicode/cp1257.cp index 39a5e162..266f221e 100644 --- a/Unicode/cp1257.cp +++ b/Unicode/cp1257.cp @@ -23,22 +23,22 @@ Window$ 1257 0x20-0x7e idem # 0x80 0x20AC #EURO SIGN -0x81 0x0000 #UNDEFINED +0x81 #UNDEFINED 0x82 0x201A #SINGLE LOW-9 QUOTATION MARK -0x83 0x0000 #UNDEFINED +0x83 #UNDEFINED 0x84 0x201E #DOUBLE LOW-9 QUOTATION MARK 0x85 0x2026 #HORIZONTAL ELLIPSIS 0x86 0x2020 #DAGGER 0x87 0x2021 #DOUBLE DAGGER -0x88 0x0000 #UNDEFINED +0x88 #UNDEFINED 0x89 0x2030 #PER MILLE SIGN -0x8A 0x0000 #UNDEFINED +0x8A #UNDEFINED 0x8B 0x2039 #SINGLE LEFT-POINTING ANGLE QUOTATION MARK -0x8C 0x0000 #UNDEFINED +0x8C #UNDEFINED 0x8D 0x00A8 #DIAERESIS 0x8E 0x02C7 #CARON 0x8F 0x00B8 #CEDILLA -0x90 0x0000 #UNDEFINED +0x90 #UNDEFINED 0x91 0x2018 #LEFT SINGLE QUOTATION MARK 0x92 0x2019 #RIGHT SINGLE QUOTATION MARK 0x93 0x201C #LEFT DOUBLE QUOTATION MARK @@ -46,20 +46,20 @@ Window$ 1257 0x95 0x2022 #BULLET 0x96 0x2013 #EN DASH 0x97 0x2014 #EM DASH -0x98 0x0000 #UNDEFINED +0x98 #UNDEFINED 0x99 0x2122 #TRADE MARK SIGN -0x9A 0x0000 #UNDEFINED +0x9A #UNDEFINED 0x9B 0x203A #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK -0x9C 0x0000 #UNDEFINED +0x9C #UNDEFINED 0x9D 0x00AF #MACRON 0x9E 0x02DB #OGONEK -0x9F 0x0000 #UNDEFINED +0x9F #UNDEFINED 0xA0 0x00A0 #NO-BREAK SPACE -0xA1 0x0000 #UNDEFINED +0xA1 #UNDEFINED 0xA2 0x00A2 #CENT SIGN 0xA3 0x00A3 #POUND SIGN 0xA4 0x00A4 #CURRENCY SIGN -0xA5 0x0000 #UNDEFINED +0xA5 #UNDEFINED 0xA6 0x00A6 #BROKEN BAR 0xA7 0x00A7 #SECTION SIGN 0xA8 0x00D8 #LATIN CAPITAL LETTER O WITH STROKE diff --git a/Unicode/gen-cp b/Unicode/gen-cp index b1e289b2..83f8090d 100755 --- a/Unicode/gen-cp +++ b/Unicode/gen-cp @@ -23,17 +23,29 @@ for i in $codepages; do echo "/*** $i ***/" echo - sed '1,2d + sed ' # Delete the name and aliases lines. + 1,2d + # Delete comment-only and blank lines. /^[ ]*\(#.*\)\{,1\}$/d + # Copy to the hold space. h + # Delete everything except the comment. s/^[^#]*// + # If there is a comment, change it to use /* */ delimiters. s!#[ ]*\(.*\)!/* \1 */! + # Exchange spaces; now hold space = comment and pattern space = all. x + # Delete the comment. s/#.*// + # Canonicalize case so the strings can be used as lookup keys. y/Xabcdef/xABCDEF/ + # Delete mappings of bytes 0x00...0x7F. ELinks assumes those match ASCII. /^0x[01234567]/d - /[^0x0123456789ABCDEF ]/d + # Delete lines that do not map the byte to exactly one character. + /^[ ]*0x[0123456789ABCDEF]\{2\}[ ]\{1,\}0x[0123456789ABCDEF]\{1,\}[ ]*$/!d + # Append a newline and the comment from the hold space. G + # Delete the newline added by the previous command. s/\n//' "$i.cp" | { for left in 8 9 A B C D E F; do for right in 0 1 2 3 4 5 6 7 8 9 A B C D E F; do diff --git a/src/intl/codepage.inc b/src/intl/codepage.inc index afe53df4..026d6042 100644 --- a/src/intl/codepage.inc +++ b/src/intl/codepage.inc @@ -2134,14 +2134,14 @@ unsigned char *const aliases_8859_16 [] = { const uint16_t highhalf_cp1250 [] = { /* 0x80 */ 0x20AC, /* EURO SIGN */ - /* 0x81 */ 0x0000, /* UNDEFINED */ + /* 0x81 */ 0xFFFF, /* 0x82 */ 0x201A, /* SINGLE LOW-9 QUOTATION MARK */ - /* 0x83 */ 0x0000, /* UNDEFINED */ + /* 0x83 */ 0xFFFF, /* 0x84 */ 0x201E, /* DOUBLE LOW-9 QUOTATION MARK */ /* 0x85 */ 0x2026, /* HORIZONTAL ELLIPSIS */ /* 0x86 */ 0x2020, /* DAGGER */ /* 0x87 */ 0x2021, /* DOUBLE DAGGER */ - /* 0x88 */ 0x0000, /* UNDEFINED */ + /* 0x88 */ 0xFFFF, /* 0x89 */ 0x2030, /* PER MILLE SIGN */ /* 0x8A */ 0x0160, /* LATIN CAPITAL LETTER S WITH CARON */ /* 0x8B */ 0x2039, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ @@ -2149,7 +2149,7 @@ const uint16_t highhalf_cp1250 [] = { /* 0x8D */ 0x0164, /* LATIN CAPITAL LETTER T WITH CARON */ /* 0x8E */ 0x017D, /* LATIN CAPITAL LETTER Z WITH CARON */ /* 0x8F */ 0x0179, /* LATIN CAPITAL LETTER Z WITH ACUTE */ - /* 0x90 */ 0x0000, /* UNDEFINED */ + /* 0x90 */ 0xFFFF, /* 0x91 */ 0x2018, /* LEFT SINGLE QUOTATION MARK */ /* 0x92 */ 0x2019, /* RIGHT SINGLE QUOTATION MARK */ /* 0x93 */ 0x201C, /* LEFT DOUBLE QUOTATION MARK */ @@ -2157,7 +2157,7 @@ const uint16_t highhalf_cp1250 [] = { /* 0x95 */ 0x2022, /* BULLET */ /* 0x96 */ 0x2013, /* EN DASH */ /* 0x97 */ 0x2014, /* EM DASH */ - /* 0x98 */ 0x0000, /* UNDEFINED */ + /* 0x98 */ 0xFFFF, /* 0x99 */ 0x2122, /* TRADE MARK SIGN */ /* 0x9A */ 0x0161, /* LATIN SMALL LETTER S WITH CARON */ /* 0x9B */ 0x203A, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ @@ -2710,22 +2710,22 @@ unsigned char *const aliases_cp1256 [] = { const uint16_t highhalf_cp1257 [] = { /* 0x80 */ 0x20AC, /* EURO SIGN */ - /* 0x81 */ 0x0000, /* UNDEFINED */ + /* 0x81 */ 0xFFFF, /* 0x82 */ 0x201A, /* SINGLE LOW-9 QUOTATION MARK */ - /* 0x83 */ 0x0000, /* UNDEFINED */ + /* 0x83 */ 0xFFFF, /* 0x84 */ 0x201E, /* DOUBLE LOW-9 QUOTATION MARK */ /* 0x85 */ 0x2026, /* HORIZONTAL ELLIPSIS */ /* 0x86 */ 0x2020, /* DAGGER */ /* 0x87 */ 0x2021, /* DOUBLE DAGGER */ - /* 0x88 */ 0x0000, /* UNDEFINED */ + /* 0x88 */ 0xFFFF, /* 0x89 */ 0x2030, /* PER MILLE SIGN */ - /* 0x8A */ 0x0000, /* UNDEFINED */ + /* 0x8A */ 0xFFFF, /* 0x8B */ 0x2039, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ - /* 0x8C */ 0x0000, /* UNDEFINED */ + /* 0x8C */ 0xFFFF, /* 0x8D */ 0x00A8, /* DIAERESIS */ /* 0x8E */ 0x02C7, /* CARON */ /* 0x8F */ 0x00B8, /* CEDILLA */ - /* 0x90 */ 0x0000, /* UNDEFINED */ + /* 0x90 */ 0xFFFF, /* 0x91 */ 0x2018, /* LEFT SINGLE QUOTATION MARK */ /* 0x92 */ 0x2019, /* RIGHT SINGLE QUOTATION MARK */ /* 0x93 */ 0x201C, /* LEFT DOUBLE QUOTATION MARK */ @@ -2733,20 +2733,20 @@ const uint16_t highhalf_cp1257 [] = { /* 0x95 */ 0x2022, /* BULLET */ /* 0x96 */ 0x2013, /* EN DASH */ /* 0x97 */ 0x2014, /* EM DASH */ - /* 0x98 */ 0x0000, /* UNDEFINED */ + /* 0x98 */ 0xFFFF, /* 0x99 */ 0x2122, /* TRADE MARK SIGN */ - /* 0x9A */ 0x0000, /* UNDEFINED */ + /* 0x9A */ 0xFFFF, /* 0x9B */ 0x203A, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ - /* 0x9C */ 0x0000, /* UNDEFINED */ + /* 0x9C */ 0xFFFF, /* 0x9D */ 0x00AF, /* MACRON */ /* 0x9E */ 0x02DB, /* OGONEK */ - /* 0x9F */ 0x0000, /* UNDEFINED */ + /* 0x9F */ 0xFFFF, /* 0xA0 */ 0x00A0, /* NO-BREAK SPACE */ - /* 0xA1 */ 0x0000, /* UNDEFINED */ + /* 0xA1 */ 0xFFFF, /* 0xA2 */ 0x00A2, /* CENT SIGN */ /* 0xA3 */ 0x00A3, /* POUND SIGN */ /* 0xA4 */ 0x00A4, /* CURRENCY SIGN */ - /* 0xA5 */ 0x0000, /* UNDEFINED */ + /* 0xA5 */ 0xFFFF, /* 0xA6 */ 0x00A6, /* BROKEN BAR */ /* 0xA7 */ 0x00A7, /* SECTION SIGN */ /* 0xA8 */ 0x00D8, /* LATIN CAPITAL LETTER O WITH STROKE */