diff --git a/Unicode/entities.txt b/Unicode/entities.txt index 32b7efd0..b3eb3dd1 100644 --- a/Unicode/entities.txt +++ b/Unicode/entities.txt @@ -108,7 +108,7 @@ Imacr ? 0x012A # LATIN CAPITAL LETTER I WITH MACRON Iogon ? 0x012E # LATIN CAPITAL LETTER I WITH OGONEK Iota ? 0x0399 # GREEK CAPITAL LETTER IOTA Itilde ? 0x0128 # LATIN CAPITAL LETTER I WITH TILDE -Iukcy ? 0x0406 # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN +Iukcy ? 0x0406 # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I Iuml ? 0x00CF # LATIN CAPITAL LETTER I WITH DIAERESIS Jcirc ? 0x0134 # LATIN CAPITAL LETTER J WITH CIRCUMFLEX Jcy ? 0x0419 # CYRILLIC CAPITAL LETTER SHORT I @@ -189,7 +189,7 @@ Sigma ? 0x03A3 # GREEK CAPITAL LETTER SIGMA Sub ? 0x22D0 # DOUBLE SUBSET Sup ? 0x22D1 # DOUBLE SUPERSET THORN ? 0x00DE # LATIN CAPITAL LETTER THORN -THgr ? 0x0398 # GREEK CAPITAL LETTER THETA +THgr ? 0x0398 # GREEK CAPITAL LETTER THETA WITH TONOS TSHcy ? 0x040B # CYRILLIC CAPITAL LETTER TSHE TScy ? 0x0426 # CYRILLIC CAPITAL LETTER TSE Tau ? 0x03A4 # GREEK CAPITAL LETTER TAU @@ -345,7 +345,7 @@ boxV ? 0x2551 # BOX DRAWINGS DOUBLE VERTICAL boxVH ? 0x256C # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL boxVL ? 0x2563 # BOX DRAWINGS DOUBLE VERTICAL AND LEFT boxVR ? 0x2560 # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT -boxVh ? 0x256B # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SI +boxVh ? 0x256B # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE boxVl ? 0x2562 # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE boxVr ? 0x255F # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE boxdL ? 0x2555 # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE @@ -362,7 +362,7 @@ boxuR ? 0x2558 # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE boxul ? 0x2518 # BOX DRAWINGS LIGHT UP AND LEFT boxur ? 0x2514 # BOX DRAWINGS LIGHT UP AND RIGHT boxv ? 0x2502 # BOX DRAWINGS LIGHT VERTICAL -boxvH ? 0x256A # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DO +boxvH ? 0x256A # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE boxvL ? 0x2561 # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE boxvR ? 0x255E # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE boxvh ? 0x253C # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL @@ -481,6 +481,7 @@ eogon ? 0x0119 # LATIN SMALL LETTER E WITH OGONEK epsi ? 0x03B5 # GREEK SMALL LETTER EPSILON epsilon ? 0x03B5 # GREEK SMALL LETTER EPSILON epsis ? 0x220A # SMALL ELEMENT OF +epsiv ? 0x???? # variant epsilon equals ? 0x003D # EQUALS SIGN equiv ? 0x2261 # IDENTICAL TO erDot ? 0x2253 # IMAGE OF OR APPROXIMATELY EQUAL TO @@ -497,6 +498,7 @@ ffilig ? 0xFB03 # LATIN SMALL LIGATURE FFI fflig ? 0xFB00 # LATIN SMALL LIGATURE FF ffllig ? 0xFB04 # LATIN SMALL LIGATURE FFL filig ? 0xFB01 # LATIN SMALL LIGATURE FI +fjlig ? 0x???? # fj ligature flat ? 0x266D # MUSIC FLAT SIGN fllig ? 0xFB02 # LATIN SMALL LIGATURE FL fnof ? 0x0192 # LATIN SMALL LETTER F WITH HOOK @@ -520,9 +522,11 @@ frac78 ? 0x215E # VULGAR FRACTION SEVEN EIGHTHS frasl ? 0x2044 # FRACTION SLASH frown ? 0x2322 # FROWN gE ? 0x2267 # GREATER-THAN OVER EQUAL TO +gEl ? 0x???? # greater-than, double equals, less-than gacute ? 0x01F5 # LATIN SMALL LETTER G WITH ACUTE gamma ? 0x03B3 # GREEK SMALL LETTER GAMMA gammad ? 0x03DC # GREEK LETTER DIGAMMA +gap ? 0x???? # greater-than, approximately equal to gbreve ? 0x011F # LATIN SMALL LETTER G WITH BREVE gcedil ? 0x0123 # LATIN SMALL LETTER G WITH CEDILLA gcirc ? 0x011D # LATIN SMALL LETTER G WITH CIRCUMFLEX @@ -536,6 +540,7 @@ gimel ? 0x2137 # GIMEL SYMBOL gjcy ? 0x0453 # CYRILLIC SMALL LETTER GJE gl ? 0x2277 # GREATER-THAN OR LESS-THAN gnE ? 0x2269 # GREATER-THAN BUT NOT EQUAL TO +gnap ? 0x???? # greater-than, not approximately equal to gne ? 0x2269 # GREATER-THAN BUT NOT EQUAL TO gnsim ? 0x22E7 # GREATER-THAN BUT NOT EQUIVALENT TO grave ? 0x0060 # GRAVE ACCENT @@ -562,7 +567,7 @@ iacgr ? 0x03AF # GREEK SMALL LETTER IOTA WITH TONOS iacute ? 0x00ED # LATIN SMALL LETTER I WITH ACUTE icirc ? 0x00EE # LATIN SMALL LETTER I WITH CIRCUMFLEX icy ? 0x0438 # CYRILLIC SMALL LETTER I -idiagr ? 0x0390 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TON +idiagr ? 0x0390 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS idigr ? 0x03CA # GREEK SMALL LETTER IOTA WITH DIALYTIKA iecy ? 0x0435 # CYRILLIC SMALL LETTER IE iexcl ? 0x00A1 # INVERTED EXCLAMATION MARK @@ -587,6 +592,7 @@ iukcy ? 0x0456 # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I iuml ? 0x00EF # LATIN SMALL LETTER I WITH DIAERESIS jcirc ? 0x0135 # LATIN SMALL LETTER J WITH CIRCUMFLEX jcy ? 0x0439 # CYRILLIC SMALL LETTER SHORT I +jnodot ? 0x???? # latin small letter dotless j jsercy ? 0x0458 # CYRILLIC SMALL LETTER JE jukcy ? 0x0454 # CYRILLIC SMALL LETTER UKRAINIAN IE kappa ? 0x03BA # GREEK SMALL LETTER KAPPA @@ -601,10 +607,12 @@ kjcy ? 0x045C # CYRILLIC SMALL LETTER KJE lAarr ? 0x21DA # LEFTWARDS TRIPLE ARROW lArr ? 0x21D0 # LEFTWARDS DOUBLE ARROW lE ? 0x2266 # LESS-THAN OVER EQUAL TO +lEg ? 0x???? # less-than, double equals, greater-than lacute ? 0x013A # LATIN SMALL LETTER L WITH ACUTE lagran ? 0x2112 # SCRIPT CAPITAL L lambda ? 0x03BB # GREEK SMALL LETTER LAMDA lang ? 0x2329 # LEFT-POINTING ANGLE BRACKET +lap ? 0x???? # less-than, approximately equal to laquo ? 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK larr ? 0x2190 # LEFTWARDS ARROW larr2 ? 0x21C7 # LEFTWARDS PAIRED ARROWS @@ -631,6 +639,7 @@ lhblk ? 0x2584 # LOWER HALF BLOCK ljcy ? 0x0459 # CYRILLIC SMALL LETTER LJE lmidot ? 0x0140 # LATIN SMALL LETTER L WITH MIDDLE DOT lnE ? 0x2268 # LESS-THAN BUT NOT EQUAL TO +lnap ? 0x???? # less-than, not approximately equal to lne ? 0x2268 # LESS-THAN BUT NOT EQUAL TO lnsim ? 0x22E6 # LESS-THAN BUT NOT EQUIVALENT TO lowast ? 0x2217 # ASTERISK OPERATOR @@ -639,6 +648,7 @@ loz ? 0x25CA # LOZENGE loz ? 0x2727 # WHITE FOUR POINTED STAR lozf ? 0x2726 # BLACK FOUR POINTED STAR lpar ? 0x0028 # LEFT PARENTHESIS +lpargt ? 0x???? # left parenthesis, greater-than lrarr2 ? 0x21C6 # LEFTWARDS ARROW OVER RIGHTWARDS ARROW lrhar2 ? 0x21CB # LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON lrm ? 0x200E # LEFT-TO-RIGHT MARK @@ -674,7 +684,7 @@ mnplus ? 0x2213 # MINUS-OR-PLUS SIGN models ? 0x22A7 # MODELS mu ? 0x03BC # GREEK SMALL LETTER MU mumap ? 0x22B8 # MULTIMAP -nVDash ? 0x22AF # NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNS +nVDash ? 0x22AF # NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNS nVdash ? 0x22AE # DOES NOT FORCE nabla ? 0x2207 # NABLA nacute ? 0x0144 # LATIN SMALL LETTER N WITH ACUTE @@ -691,6 +701,7 @@ ne ? 0x2260 # NOT EQUAL TO nearr ? 0x2197 # NORTH EAST ARROW nequiv ? 0x2262 # NOT IDENTICAL TO nexist ? 0x2204 # THERE DOES NOT EXIST +ngE ? 0x???? # not greater-than, double equals nge ? 0x2271 # NEITHER GREATER-THAN NOR EQUAL TO nges ? 0x2271 # NEITHER GREATER-THAN NOR EQUAL TO ngr ? 0x03BD # GREEK SMALL LETTER NU @@ -700,6 +711,7 @@ nharr ? 0x21AE # LEFT RIGHT ARROW WITH STROKE ni ? 0x220B # CONTAINS AS MEMBER njcy ? 0x045A # CYRILLIC SMALL LETTER NJE nlArr ? 0x21CD # LEFTWARDS DOUBLE ARROW WITH STROKE +nlE ? 0x???? # not less-than, double equals nlarr ? 0x219A # LEFTWARDS ARROW WITH STROKE nldr ? 0x2025 # TWO DOT LEADER nle ? 0x2270 # NEITHER LESS-THAN NOR EQUAL TO @@ -721,6 +733,7 @@ nsc ? 0x2281 # DOES NOT SUCCEED nsce ? 0x22E1 # DOES NOT SUCCEED OR EQUAL nsim ? 0x2241 # NOT TILDE nsime ? 0x2244 # NOT ASYMPTOTICALLY EQUAL TO +nsmid ? 0x???? # nshortmid nspar ? 0x2226 # NOT PARALLEL TO nsub ? 0x2284 # NOT A SUBSET OF nsubE ? 0x2288 # NEITHER A SUBSET OF NOR EQUAL TO @@ -794,8 +807,11 @@ plusdo ? 0x2214 # DOT PLUS plusmn ? 0x00B1 # PLUS-MINUS SIGN pound ? 0x00A3 # POUND SIGN pr ? 0x227A # PRECEDES +prap ? 0x???? # precedes, approximately equal to pre ? 0x227C # PRECEDES OR EQUAL TO prime ? 0x2032 # PRIME +prnE ? 0x???? # precedes, not double equal +prnap ? 0x???? # precedes, not approximately equal to prnsim ? 0x22E8 # PRECEDES BUT NOT EQUIVALENT TO prod ? 0x220F # N-ARY PRODUCT prop ? 0x221D # PROPORTIONAL TO @@ -838,6 +854,7 @@ rlarr2 ? 0x21C4 # RIGHTWARDS ARROW OVER LEFTWARDS ARROW rlhar2 ? 0x21CC # RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON rlm ? 0x200F # RIGHT-TO-LEFT MARK rpar ? 0x0029 # RIGHT PARENTHESIS +rpargt ? 0x???? # right parenthesis, greater-than rsaquo ? 0x203A # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK rsh ? 0x21B1 # UPWARDS ARROW WITH TIP RIGHTWARDS rsqb ? 0x005D # RIGHT SQUARE BRACKET @@ -854,11 +871,14 @@ samalg ? 0x2210 # N-ARY COPRODUCT sbquo ? 0x201A # SINGLE LOW-9 QUOTATION MARK sbsol ? 0x005C # REVERSE SOLIDUS sc ? 0x227B # SUCCEEDS +scap ? 0x???? # succeeds, approximately equal to scaron ? 0x0161 # LATIN SMALL LETTER S WITH CARON sccue ? 0x227D # SUCCEEDS OR EQUAL TO sce ? 0x227D # SUCCEEDS OR EQUAL TO scedil ? 0x015F # LATIN SMALL LETTER S WITH CEDILLA scirc ? 0x015D # LATIN SMALL LETTER S WITH CIRCUMFLEX +scnE ? 0x???? # succeeds, not double equals +scnap ? 0x???? # succeeds, not approximately equal to scnsim ? 0x22E9 # SUCCEEDS BUT NOT EQUIVALENT TO scomma ? 0x0219 # LATIN SMALL LETTER S WITH COMMA BELOW scsim ? 0x227F # SUCCEEDS OR EQUIVALENT TO @@ -881,6 +901,7 @@ sigmaf ? 0x03C2 # GREEK SMALL LETTER FINAL SIGMA sigmav ? 0x03C2 # GREEK SMALL LETTER FINAL SIGMA sim ? 0x223C # TILDE OPERATOR sime ? 0x2243 # ASYMPTOTICALLY EQUAL TO +smid ? 0x???? # shortmid smile ? 0x2323 # SMILE softcy ? 0x044C # CYRILLIC SMALL LETTER SOFT SIGN sol ? 0x002F # SOLIDUS diff --git a/Unicode/gen-ent b/Unicode/gen-ent index ae715b23..1fdf1ffe 100755 --- a/Unicode/gen-ent +++ b/Unicode/gen-ent @@ -7,6 +7,7 @@ sed -n '/^[^#]/,$p' < entities.txt | while read line; do \ name=$(echo "$line" | cut -f 1); \ code=$(echo "$line" | cut -f 3); \ desc=$(echo "$line" | cut -f 4 | sed 's/# //'); \ + test "$code" = "0x????" && continue printf "\t{ %-12s %s }, /* %-46s */\n" \ "\"$name\"," "$code" "$desc"; \ done | LC_ALL=C sort > tmp diff --git a/src/intl/entity.inc b/src/intl/entity.inc index b7c8be3a..724064e2 100644 --- a/src/intl/entity.inc +++ b/src/intl/entity.inc @@ -1005,4 +1005,4 @@ struct entity { char *s; unicode_val_T c; } entities [1002] = { {NULL, 0} }; -#define N_ENTITIES 1001 +#define N_ENTITIES 1001