From 9394961244eafa228a488fb03ca71f33fdfe041b Mon Sep 17 00:00:00 2001 From: Takashi Yano Date: Sat, 23 Jun 2018 17:04:43 +0900 Subject: [PATCH] Fix Unicode table. * (mkcategories): Fix a bug that outputs incorrect Unicode category table. * (categories.t): Rebuild it using the bug-fixed mkcategories and the latest UnicodeData.txt. * (caseconv.t): Rebuild it using the latest UnicodeData.txt. This fixes the problem reported in the following post. https://cygwin.com/ml/cygwin/2018-06/msg00248.html --- newlib/libc/ctype/caseconv.t | 8 +- newlib/libc/ctype/categories.t | 153 +++++++++++++++++++++------------ newlib/libc/ctype/mkcategories | 9 +- 3 files changed, 110 insertions(+), 60 deletions(-) diff --git a/newlib/libc/ctype/caseconv.t b/newlib/libc/ctype/caseconv.t index 5e132d3e1..31eb503fa 100644 --- a/newlib/libc/ctype/caseconv.t +++ b/newlib/libc/ctype/caseconv.t @@ -165,6 +165,8 @@ {0x10A0, 37, TOLO, 7264}, {0x10C7, 0, TOLO, 7264}, {0x10CD, 0, TOLO, 7264}, + {0x10D0, 42, TOUP, 3008}, + {0x10FD, 2, TOUP, 3008}, {0x13A0, 79, TOLO, 38864}, {0x13F0, 5, TOLO, 8}, {0x13F8, 5, TOUP, -8}, @@ -176,6 +178,8 @@ {0x1C86, 0, TOUP, -6236}, {0x1C87, 0, TOUP, -6181}, {0x1C88, 0, TOUP, 35266}, + {0x1C90, 42, TOLO, -3008}, + {0x1CBD, 2, TOLO, -3008}, {0x1D79, 0, TOUP, 35332}, {0x1D7D, 0, TOUP, 3814}, {0x1E00, 149, TO1, EVENCAP}, @@ -287,7 +291,7 @@ {0xA7B1, 0, TOLO, -42282}, {0xA7B2, 0, TOLO, -42261}, {0xA7B3, 0, TOLO, 928}, - {0xA7B4, 3, TO1, EVENCAP}, + {0xA7B4, 5, TO1, EVENCAP}, {0xAB53, 0, TOUP, -928}, {0xAB70, 79, TOUP, -38864}, {0xFF21, 25, TOLO, 32}, @@ -300,5 +304,7 @@ {0x10CC0, 50, TOUP, -64}, {0x118A0, 31, TOLO, 32}, {0x118C0, 31, TOUP, -32}, + {0x16E40, 31, TOLO, 32}, + {0x16E60, 31, TOUP, -32}, {0x1E900, 33, TOLO, 34}, {0x1E922, 33, TOUP, -34}, diff --git a/newlib/libc/ctype/categories.t b/newlib/libc/ctype/categories.t index dd5cf7d08..ca7671051 100644 --- a/newlib/libc/ctype/categories.t +++ b/newlib/libc/ctype/categories.t @@ -168,8 +168,9 @@ {CAT_LC, 0x0531, 37}, {CAT_Lm, 0x0559, 0}, {CAT_Po, 0x055A, 5}, + {CAT_Ll, 0x0560, 0}, {CAT_LC, 0x0561, 37}, - {CAT_Ll, 0x0587, 0}, + {CAT_Ll, 0x0587, 1}, {CAT_Po, 0x0589, 0}, {CAT_Pd, 0x058A, 0}, {CAT_So, 0x058D, 1}, @@ -184,7 +185,7 @@ {CAT_Po, 0x05C6, 0}, {CAT_Mn, 0x05C7, 0}, {CAT_Lo, 0x05D0, 26}, - {CAT_Lo, 0x05F0, 2}, + {CAT_Lo, 0x05EF, 3}, {CAT_Po, 0x05F3, 1}, {CAT_Cf, 0x0600, 5}, {CAT_Sm, 0x0606, 2}, @@ -236,6 +237,8 @@ {CAT_So, 0x07F6, 0}, {CAT_Po, 0x07F7, 2}, {CAT_Lm, 0x07FA, 0}, + {CAT_Mn, 0x07FD, 0}, + {CAT_Sc, 0x07FE, 1}, {CAT_Lo, 0x0800, 21}, {CAT_Mn, 0x0816, 3}, {CAT_Lm, 0x081A, 0}, @@ -251,7 +254,7 @@ {CAT_Lo, 0x0860, 10}, {CAT_Lo, 0x08A0, 20}, {CAT_Lo, 0x08B6, 7}, - {CAT_Mn, 0x08D4, 13}, + {CAT_Mn, 0x08D3, 14}, {CAT_Cf, 0x08E2, 0}, {CAT_Mn, 0x08E3, 31}, {CAT_Mc, 0x0903, 0}, @@ -302,6 +305,7 @@ {CAT_Sc, 0x09FB, 0}, {CAT_Lo, 0x09FC, 0}, {CAT_Po, 0x09FD, 0}, + {CAT_Mn, 0x09FE, 0}, {CAT_Mn, 0x0A01, 1}, {CAT_Mc, 0x0A03, 0}, {CAT_Lo, 0x0A05, 5}, @@ -323,6 +327,7 @@ {CAT_Mn, 0x0A70, 1}, {CAT_Lo, 0x0A72, 2}, {CAT_Mn, 0x0A75, 0}, + {CAT_Po, 0x0A76, 0}, {CAT_Mn, 0x0A81, 1}, {CAT_Mc, 0x0A83, 0}, {CAT_Lo, 0x0A85, 8}, @@ -399,6 +404,7 @@ {CAT_So, 0x0BFA, 0}, {CAT_Mn, 0x0C00, 0}, {CAT_Mc, 0x0C01, 2}, + {CAT_Mn, 0x0C04, 0}, {CAT_Lo, 0x0C05, 7}, {CAT_Lo, 0x0C0E, 2}, {CAT_Lo, 0x0C12, 22}, @@ -418,6 +424,7 @@ {CAT_Lo, 0x0C80, 0}, {CAT_Mn, 0x0C81, 0}, {CAT_Mc, 0x0C82, 1}, + {CAT_Po, 0x0C84, 0}, {CAT_Lo, 0x0C85, 7}, {CAT_Lo, 0x0C8E, 2}, {CAT_Lo, 0x0C92, 22}, @@ -584,10 +591,11 @@ {CAT_LC, 0x10A0, 37}, {CAT_LC, 0x10C7, 0}, {CAT_LC, 0x10CD, 0}, - {CAT_Lo, 0x10D0, 42}, + {CAT_LC, 0x10D0, 42}, {CAT_Po, 0x10FB, 0}, {CAT_Lm, 0x10FC, 0}, - {CAT_Lo, 0x10FD, 331}, + {CAT_LC, 0x10FD, 2}, + {CAT_Lo, 0x1100, 328}, {CAT_Lo, 0x124A, 3}, {CAT_Lo, 0x1250, 6}, {CAT_Lo, 0x1258, 0}, @@ -657,7 +665,7 @@ {CAT_Nd, 0x1810, 9}, {CAT_Lo, 0x1820, 34}, {CAT_Lm, 0x1843, 0}, - {CAT_Lo, 0x1844, 51}, + {CAT_Lo, 0x1844, 52}, {CAT_Lo, 0x1880, 4}, {CAT_Mn, 0x1885, 1}, {CAT_Lo, 0x1887, 33}, @@ -759,6 +767,8 @@ {CAT_Lm, 0x1C78, 5}, {CAT_Po, 0x1C7E, 1}, {CAT_LC, 0x1C80, 8}, + {CAT_LC, 0x1C90, 42}, + {CAT_LC, 0x1CBD, 2}, {CAT_Po, 0x1CC0, 7}, {CAT_Mn, 0x1CD0, 2}, {CAT_Po, 0x1CD3, 0}, @@ -1066,10 +1076,8 @@ {CAT_Sm, 0x2B47, 5}, {CAT_So, 0x2B4D, 38}, {CAT_So, 0x2B76, 31}, - {CAT_So, 0x2B98, 33}, - {CAT_So, 0x2BBD, 11}, - {CAT_So, 0x2BCA, 8}, - {CAT_So, 0x2BEC, 3}, + {CAT_So, 0x2B98, 48}, + {CAT_So, 0x2BCA, 52}, {CAT_LC, 0x2C00, 46}, {CAT_LC, 0x2C30, 46}, {CAT_LC, 0x2C60, 16}, @@ -1142,7 +1150,7 @@ {CAT_Pd, 0x2E40, 0}, {CAT_Po, 0x2E41, 0}, {CAT_Ps, 0x2E42, 0}, - {CAT_Po, 0x2E43, 6}, + {CAT_Po, 0x2E43, 11}, {CAT_So, 0x2E80, 25}, {CAT_So, 0x2E9B, 88}, {CAT_So, 0x2F00, 213}, @@ -1197,7 +1205,7 @@ {CAT_Po, 0x30FB, 0}, {CAT_Lm, 0x30FC, 2}, {CAT_Lo, 0x30FF, 0}, - {CAT_Lo, 0x3105, 41}, + {CAT_Lo, 0x3105, 42}, {CAT_Lo, 0x3131, 93}, {CAT_So, 0x3190, 1}, {CAT_No, 0x3192, 3}, @@ -1217,11 +1225,9 @@ {CAT_No, 0x32B1, 14}, {CAT_So, 0x32C0, 62}, {CAT_So, 0x3300, 255}, - {CAT_Lo, 0x3400, 0}, - {CAT_Lo, 0x4DB5, 0}, + {CAT_Lo, 0x3400, 6581}, {CAT_So, 0x4DC0, 63}, - {CAT_Lo, 0x4E00, 0}, - {CAT_Lo, 0x9FEA, 0}, + {CAT_Lo, 0x4E00, 20975}, {CAT_Lo, 0xA000, 20}, {CAT_Lm, 0xA015, 0}, {CAT_Lo, 0xA016, 1142}, @@ -1267,7 +1273,8 @@ {CAT_LC, 0xA790, 3}, {CAT_Ll, 0xA794, 1}, {CAT_LC, 0xA796, 24}, - {CAT_LC, 0xA7B0, 7}, + {CAT_Ll, 0xA7AF, 0}, + {CAT_LC, 0xA7B0, 9}, {CAT_Lo, 0xA7F7, 0}, {CAT_Lm, 0xA7F8, 1}, {CAT_Ll, 0xA7FA, 0}, @@ -1299,7 +1306,8 @@ {CAT_Po, 0xA8F8, 2}, {CAT_Lo, 0xA8FB, 0}, {CAT_Po, 0xA8FC, 0}, - {CAT_Lo, 0xA8FD, 0}, + {CAT_Lo, 0xA8FD, 1}, + {CAT_Mn, 0xA8FF, 0}, {CAT_Nd, 0xA900, 9}, {CAT_Lo, 0xA90A, 27}, {CAT_Mn, 0xA926, 7}, @@ -1394,14 +1402,10 @@ {CAT_Mc, 0xABEC, 0}, {CAT_Mn, 0xABED, 0}, {CAT_Nd, 0xABF0, 9}, - {CAT_Lo, 0xAC00, 0}, - {CAT_Lo, 0xD7A3, 0}, + {CAT_Lo, 0xAC00, 11171}, {CAT_Lo, 0xD7B0, 22}, {CAT_Lo, 0xD7CB, 48}, - {CAT_Cs, 0xD800, 0}, - {CAT_Cs, 0xDB7F, 1}, - {CAT_Cs, 0xDBFF, 1}, - {CAT_Cs, 0xDFFF, 0}, + {CAT_Cs, 0xD800, 2047}, {CAT_Lo, 0xF900, 365}, {CAT_Lo, 0xFA70, 105}, {CAT_Ll, 0xFB00, 6}, @@ -1605,10 +1609,10 @@ {CAT_Mn, 0x10A0C, 3}, {CAT_Lo, 0x10A10, 3}, {CAT_Lo, 0x10A15, 2}, - {CAT_Lo, 0x10A19, 26}, + {CAT_Lo, 0x10A19, 28}, {CAT_Mn, 0x10A38, 2}, {CAT_Mn, 0x10A3F, 0}, - {CAT_No, 0x10A40, 7}, + {CAT_No, 0x10A40, 8}, {CAT_Po, 0x10A50, 8}, {CAT_Lo, 0x10A60, 28}, {CAT_No, 0x10A7D, 1}, @@ -1634,7 +1638,17 @@ {CAT_LC, 0x10C80, 50}, {CAT_LC, 0x10CC0, 50}, {CAT_No, 0x10CFA, 5}, + {CAT_Lo, 0x10D00, 35}, + {CAT_Mn, 0x10D24, 3}, + {CAT_Nd, 0x10D30, 9}, {CAT_No, 0x10E60, 30}, + {CAT_Lo, 0x10F00, 28}, + {CAT_No, 0x10F1D, 9}, + {CAT_Lo, 0x10F27, 0}, + {CAT_Lo, 0x10F30, 21}, + {CAT_Mn, 0x10F46, 10}, + {CAT_No, 0x10F51, 3}, + {CAT_Po, 0x10F55, 4}, {CAT_Mc, 0x11000, 0}, {CAT_Mn, 0x11001, 0}, {CAT_Mc, 0x11002, 0}, @@ -1653,6 +1667,7 @@ {CAT_Po, 0x110BB, 1}, {CAT_Cf, 0x110BD, 0}, {CAT_Po, 0x110BE, 3}, + {CAT_Cf, 0x110CD, 0}, {CAT_Lo, 0x110D0, 24}, {CAT_Nd, 0x110F0, 9}, {CAT_Mn, 0x11100, 2}, @@ -1662,6 +1677,8 @@ {CAT_Mn, 0x1112D, 7}, {CAT_Nd, 0x11136, 9}, {CAT_Po, 0x11140, 3}, + {CAT_Lo, 0x11144, 0}, + {CAT_Mc, 0x11145, 1}, {CAT_Lo, 0x11150, 34}, {CAT_Mn, 0x11173, 0}, {CAT_Po, 0x11174, 1}, @@ -1673,8 +1690,8 @@ {CAT_Mn, 0x111B6, 8}, {CAT_Mc, 0x111BF, 1}, {CAT_Lo, 0x111C1, 3}, - {CAT_Po, 0x111C5, 4}, - {CAT_Mn, 0x111CA, 2}, + {CAT_Po, 0x111C5, 3}, + {CAT_Mn, 0x111C9, 3}, {CAT_Po, 0x111CD, 0}, {CAT_Nd, 0x111D0, 9}, {CAT_Lo, 0x111DA, 0}, @@ -1711,7 +1728,7 @@ {CAT_Lo, 0x1132A, 6}, {CAT_Lo, 0x11332, 1}, {CAT_Lo, 0x11335, 4}, - {CAT_Mn, 0x1133C, 0}, + {CAT_Mn, 0x1133B, 1}, {CAT_Lo, 0x1133D, 0}, {CAT_Mc, 0x1133E, 1}, {CAT_Mn, 0x11340, 0}, @@ -1736,6 +1753,7 @@ {CAT_Nd, 0x11450, 9}, {CAT_Po, 0x1145B, 0}, {CAT_Po, 0x1145D, 0}, + {CAT_Mn, 0x1145E, 0}, {CAT_Lo, 0x11480, 47}, {CAT_Mc, 0x114B0, 2}, {CAT_Mn, 0x114B3, 5}, @@ -1779,7 +1797,7 @@ {CAT_Mc, 0x116B6, 0}, {CAT_Mn, 0x116B7, 0}, {CAT_Nd, 0x116C0, 9}, - {CAT_Lo, 0x11700, 25}, + {CAT_Lo, 0x11700, 26}, {CAT_Mn, 0x1171D, 2}, {CAT_Mc, 0x11720, 1}, {CAT_Mn, 0x11722, 3}, @@ -1789,14 +1807,18 @@ {CAT_No, 0x1173A, 1}, {CAT_Po, 0x1173C, 2}, {CAT_So, 0x1173F, 0}, + {CAT_Lo, 0x11800, 43}, + {CAT_Mc, 0x1182C, 2}, + {CAT_Mn, 0x1182F, 8}, + {CAT_Mc, 0x11838, 0}, + {CAT_Mn, 0x11839, 1}, + {CAT_Po, 0x1183B, 0}, {CAT_LC, 0x118A0, 63}, {CAT_Nd, 0x118E0, 9}, {CAT_No, 0x118EA, 8}, {CAT_Lo, 0x118FF, 0}, {CAT_Lo, 0x11A00, 0}, - {CAT_Mn, 0x11A01, 5}, - {CAT_Mc, 0x11A07, 1}, - {CAT_Mn, 0x11A09, 1}, + {CAT_Mn, 0x11A01, 9}, {CAT_Lo, 0x11A0B, 39}, {CAT_Mn, 0x11A33, 5}, {CAT_Mc, 0x11A39, 0}, @@ -1814,6 +1836,7 @@ {CAT_Mc, 0x11A97, 0}, {CAT_Mn, 0x11A98, 1}, {CAT_Po, 0x11A9A, 2}, + {CAT_Lo, 0x11A9D, 0}, {CAT_Po, 0x11A9E, 4}, {CAT_Lo, 0x11AC0, 56}, {CAT_Lo, 0x11C00, 8}, @@ -1846,6 +1869,21 @@ {CAT_Lo, 0x11D46, 0}, {CAT_Mn, 0x11D47, 0}, {CAT_Nd, 0x11D50, 9}, + {CAT_Lo, 0x11D60, 5}, + {CAT_Lo, 0x11D67, 1}, + {CAT_Lo, 0x11D6A, 31}, + {CAT_Mc, 0x11D8A, 4}, + {CAT_Mn, 0x11D90, 1}, + {CAT_Mc, 0x11D93, 1}, + {CAT_Mn, 0x11D95, 0}, + {CAT_Mc, 0x11D96, 0}, + {CAT_Mn, 0x11D97, 0}, + {CAT_Lo, 0x11D98, 0}, + {CAT_Nd, 0x11DA0, 9}, + {CAT_Lo, 0x11EE0, 18}, + {CAT_Mn, 0x11EF3, 1}, + {CAT_Mc, 0x11EF5, 1}, + {CAT_Po, 0x11EF7, 1}, {CAT_Lo, 0x12000, 921}, {CAT_Nl, 0x12400, 110}, {CAT_Po, 0x12470, 4}, @@ -1870,14 +1908,16 @@ {CAT_No, 0x16B5B, 6}, {CAT_Lo, 0x16B63, 20}, {CAT_Lo, 0x16B7D, 18}, + {CAT_LC, 0x16E40, 63}, + {CAT_No, 0x16E80, 22}, + {CAT_Po, 0x16E97, 3}, {CAT_Lo, 0x16F00, 68}, {CAT_Lo, 0x16F50, 0}, {CAT_Mc, 0x16F51, 45}, {CAT_Mn, 0x16F8F, 3}, {CAT_Lm, 0x16F93, 12}, {CAT_Lm, 0x16FE0, 1}, - {CAT_Lo, 0x17000, 0}, - {CAT_Lo, 0x187EC, 0}, + {CAT_Lo, 0x17000, 6129}, {CAT_Lo, 0x18800, 754}, {CAT_Lo, 0x1B000, 286}, {CAT_Lo, 0x1B170, 395}, @@ -1906,8 +1946,9 @@ {CAT_So, 0x1D200, 65}, {CAT_Mn, 0x1D242, 2}, {CAT_So, 0x1D245, 0}, + {CAT_No, 0x1D2E0, 19}, {CAT_So, 0x1D300, 86}, - {CAT_No, 0x1D360, 17}, + {CAT_No, 0x1D360, 24}, {CAT_Lu, 0x1D400, 25}, {CAT_Ll, 0x1D41A, 25}, {CAT_Lu, 0x1D434, 25}, @@ -2002,6 +2043,11 @@ {CAT_Mn, 0x1E944, 6}, {CAT_Nd, 0x1E950, 9}, {CAT_Po, 0x1E95E, 1}, + {CAT_No, 0x1EC71, 58}, + {CAT_So, 0x1ECAC, 0}, + {CAT_No, 0x1ECAD, 2}, + {CAT_Sc, 0x1ECB0, 0}, + {CAT_No, 0x1ECB1, 3}, {CAT_Lo, 0x1EE00, 3}, {CAT_Lo, 0x1EE05, 26}, {CAT_Lo, 0x1EE21, 1}, @@ -2043,8 +2089,7 @@ {CAT_So, 0x1F0C1, 14}, {CAT_So, 0x1F0D1, 36}, {CAT_No, 0x1F100, 12}, - {CAT_So, 0x1F110, 30}, - {CAT_So, 0x1F130, 59}, + {CAT_So, 0x1F110, 91}, {CAT_So, 0x1F170, 60}, {CAT_So, 0x1F1E6, 28}, {CAT_So, 0x1F210, 43}, @@ -2055,9 +2100,9 @@ {CAT_Sk, 0x1F3FB, 4}, {CAT_So, 0x1F400, 724}, {CAT_So, 0x1F6E0, 12}, - {CAT_So, 0x1F6F0, 8}, + {CAT_So, 0x1F6F0, 9}, {CAT_So, 0x1F700, 115}, - {CAT_So, 0x1F780, 84}, + {CAT_So, 0x1F780, 88}, {CAT_So, 0x1F800, 11}, {CAT_So, 0x1F810, 55}, {CAT_So, 0x1F850, 9}, @@ -2065,21 +2110,19 @@ {CAT_So, 0x1F890, 29}, {CAT_So, 0x1F900, 11}, {CAT_So, 0x1F910, 46}, - {CAT_So, 0x1F940, 12}, - {CAT_So, 0x1F950, 27}, - {CAT_So, 0x1F980, 23}, - {CAT_So, 0x1F9C0, 0}, - {CAT_So, 0x1F9D0, 22}, - {CAT_Lo, 0x20000, 0}, - {CAT_Lo, 0x2A6D6, 0}, - {CAT_Lo, 0x2A700, 0}, - {CAT_Lo, 0x2B734, 0}, - {CAT_Lo, 0x2B740, 0}, - {CAT_Lo, 0x2B81D, 0}, - {CAT_Lo, 0x2B820, 0}, - {CAT_Lo, 0x2CEA1, 0}, - {CAT_Lo, 0x2CEB0, 0}, - {CAT_Lo, 0x2EBE0, 0}, + {CAT_So, 0x1F940, 48}, + {CAT_So, 0x1F973, 3}, + {CAT_So, 0x1F97A, 0}, + {CAT_So, 0x1F97C, 38}, + {CAT_So, 0x1F9B0, 9}, + {CAT_So, 0x1F9C0, 2}, + {CAT_So, 0x1F9D0, 47}, + {CAT_So, 0x1FA60, 13}, + {CAT_Lo, 0x20000, 42710}, + {CAT_Lo, 0x2A700, 4148}, + {CAT_Lo, 0x2B740, 221}, + {CAT_Lo, 0x2B820, 5761}, + {CAT_Lo, 0x2CEB0, 7472}, {CAT_Lo, 0x2F800, 541}, {CAT_Cf, 0xE0001, 0}, {CAT_Cf, 0xE0020, 95}, diff --git a/newlib/libc/ctype/mkcategories b/newlib/libc/ctype/mkcategories index 24dd93ad0..582bebe3b 100755 --- a/newlib/libc/ctype/mkcategories +++ b/newlib/libc/ctype/mkcategories @@ -24,14 +24,14 @@ cat <<\/EOS first= item () { if [ -n "$first" ] - then if [ $(( 0x$1 )) -ne $(( 0x${last-0} + 1 )) ] + then if [ $2 = 0 -a $(( 0x$1 )) -ne $(( 0x${last-0} + 1 )) ] then range fi fi if [ -z "$first" ] then first=$1 - val=$2 + val=$3 fi last=$1 @@ -60,8 +60,9 @@ then -e '/;Co;/ d' else cat fi | -sed -e "s,^\([^;]*\);[^;]*;\([^;]*\);.*,\1 \2," | -uniq -f1 --group=append | sed -e "s,^$,range," -e t -e "s,^,item ," +sed -e "s,^\([^;]*\);<[^;]*\, Last>;\([^;]*\);.*,\1 1 \2," \ + -e "s,^\([^;]*\);[^;]*;\([^;]*\);.*,\1 0 \2," | +uniq -f2 --group=append | sed -e "s,^$,range," -e t -e "s,^,item ," ) | sh > categories.t sed -e "s/.*\(CAT_[A-Za-z]*\).*/ \1,/" categories.t | -- 2.17.0