newlib locale patches

Corinna Vinschen corinna-cygwin@cygwin.com
Tue Sep 29 07:51:00 GMT 2009


On Sep 28 19:50, Andy Koppe wrote:
> 2009/9/28 Corinna Vinschen:
> > Of course, you still have to
> > apply the newlib stuff which hasn't been approved yet.
> 
> Could you post a rollup diff with all the newlib patches still
> outstanding? I'm getting a bit lost among them.

Sure.


Corinna


Index: libc/locale/locale.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/locale/locale.c,v
retrieving revision 1.27
diff -u -p -r1.27 locale.c
--- libc/locale/locale.c	27 Sep 2009 12:21:16 -0000	1.27
+++ libc/locale/locale.c	29 Sep 2009 07:50:47 -0000
@@ -61,6 +61,11 @@ backward compatibility with older implem
 xxx in [437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866, 874, 1125,
 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258].
 
+Instead of <<"C-">>, you can specify also <<"C.">>.  Both variations allow
+to specify language neutral locales while using other charsets than ASCII,
+for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
+but uses the UTF-8 charset.
+
 Even when using POSIX locale strings, the only charsets allowed are
 <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<KOI8-R>>, <<KOI8-U>>,
 <<"ISO-8859-x">> with 1 <= x <= 15, or <<"CPxxx">> with xxx in
@@ -438,8 +443,14 @@ loadlocale(struct _reent *p, int categor
 #else
     strcpy (charset, "ASCII");
 #endif
-  else if (locale[0] == 'C' && locale[1] == '-')	/* Old newlib style */
-	strcpy (charset, locale + 2);
+  else if (locale[0] == 'C'
+	   && (locale[1] == '-'		/* Old newlib style */
+	       || locale[1] == '.'))	/* Extension for the C locale to allow
+					   specifying different charsets while
+					   sticking to the C locale in terms
+					   of sort order, etc.  Proposed in
+					   the Debian project. */
+    strcpy (charset, locale + 2);
   else							/* POSIX style */
     {
       char *c = locale;
Index: libc/stdlib/mbtowc_r.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdlib/mbtowc_r.c,v
retrieving revision 1.16
diff -u -p -r1.16 mbtowc_r.c
--- libc/stdlib/mbtowc_r.c	27 Sep 2009 12:21:16 -0000	1.16
+++ libc/stdlib/mbtowc_r.c	29 Sep 2009 07:50:47 -0000
@@ -295,12 +295,6 @@ _DEFUN (__utf8_mbtowc, (r, pwc, s, n, ch
       tmp = (wchar_t)((state->__value.__wchb[0] & 0x0f) << 12)
 	|    (wchar_t)((state->__value.__wchb[1] & 0x3f) << 6)
 	|     (wchar_t)(ch & 0x3f);
-      /* Check for invalid CESU-8 encoding of UTF-16 surrogate values. */
-      if (tmp >= 0xd800 && tmp <= 0xdfff)
-	{
-	  r->_errno = EILSEQ;
-	  return -1;
-	}
       *pwc = tmp;
       return i;
     }
Index: libc/stdlib/sb_charsets.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdlib/sb_charsets.c,v
retrieving revision 1.3
diff -u -p -r1.3 sb_charsets.c
--- libc/stdlib/sb_charsets.c	25 Aug 2009 18:47:24 -0000	1.3
+++ libc/stdlib/sb_charsets.c	29 Sep 2009 07:50:47 -0000
@@ -24,17 +24,17 @@ wchar_t __iso_8859_conv[14][0x60] = {
     0x111, 0x144, 0x148, 0xf3, 0xf4, 0x151, 0xf6, 0xf7,
     0x159, 0x16f, 0xfa, 0x171, 0xfc, 0xfd, 0x163, 0x2d9 },
   /* ISO-8859-3 */
-  { 0xa0, 0x126, 0x2d8, 0xa3, 0xa4, 0x0, 0x124, 0xa7,
-    0xa8, 0x130, 0x15e, 0x11e, 0x134, 0xad, 0x0, 0x17b,
+  { 0xa0, 0x126, 0x2d8, 0xa3, 0xa4, 0xf7f5, 0x124, 0xa7,
+    0xa8, 0x130, 0x15e, 0x11e, 0x134, 0xad, 0xf7f6, 0x17b,
     0xb0, 0x127, 0xb2, 0xb3, 0xb4, 0xb5, 0x125, 0xb7,
-    0xb8, 0x131, 0x15f, 0x11f, 0x135, 0xbd, 0x0, 0x17c,
-    0xc0, 0xc1, 0xc2, 0x0, 0xc4, 0x10a, 0x108, 0xc7,
+    0xb8, 0x131, 0x15f, 0x11f, 0x135, 0xbd, 0xf7f7, 0x17c,
+    0xc0, 0xc1, 0xc2, 0xf7f8, 0xc4, 0x10a, 0x108, 0xc7,
     0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
-    0x0, 0xd1, 0xd2, 0xd3, 0xd4, 0x120, 0xd6, 0xd7,
+    0xf7f9, 0xd1, 0xd2, 0xd3, 0xd4, 0x120, 0xd6, 0xd7,
     0x11c, 0xd9, 0xda, 0xdb, 0xdc, 0x16c, 0x15c, 0xdf,
-    0xe0, 0xe1, 0xe2, 0x0, 0xe4, 0x10b, 0x109, 0xe7,
+    0xe0, 0xe1, 0xe2, 0xf7fa, 0xe4, 0x10b, 0x109, 0xe7,
     0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
-    0x0, 0xf1, 0xf2, 0xf3, 0xf4, 0x121, 0xf6, 0xf7,
+    0xf7fb, 0xf1, 0xf2, 0xf3, 0xf4, 0x121, 0xf6, 0xf7,
     0x11d, 0xf9, 0xfa, 0xfb, 0xfc, 0x16d, 0x15d, 0x2d9 },
   /* ISO-8859-4 */
   { 0xa0, 0x104, 0x138, 0x156, 0xa4, 0x128, 0x13b, 0xa7,
@@ -63,44 +63,44 @@ wchar_t __iso_8859_conv[14][0x60] = {
     0x2116, 0x451, 0x452, 0x453, 0x454, 0x455, 0x456, 0x457,
     0x458, 0x459, 0x45a, 0x45b, 0x45c, 0xa7, 0x45e, 0x45f },
   /* ISO-8859-6 */
-  { 0xa0, 0x0, 0x0, 0x0, 0xa4, 0x0, 0x0, 0x0,
-    0x0, 0x0, 0x0, 0x0, 0x60c, 0xad, 0x0, 0x0,
-    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
-    0x0, 0x0, 0x0, 0x61b, 0x0, 0x0, 0x0, 0x61f,
-    0x0, 0x621, 0x622, 0x623, 0x624, 0x625, 0x626, 0x627,
+  { 0xa0, 0xf7c8, 0xf7c9, 0xf7ca, 0xa4, 0xf7cb, 0xf7cc, 0xf7cd,
+    0xf7ce, 0xf7cf, 0xf7d0, 0xf7d1, 0x60c, 0xad, 0xf7d2, 0xf7d3,
+    0xf7d4, 0xf7d5, 0xf7d6, 0xf7d7, 0xf7d8, 0xf7d9, 0xf7da, 0xf7db,
+    0xf7dc, 0xf7dd, 0xf7de, 0x61b, 0xf7df, 0xf7e0, 0xf7e1, 0x61f,
+    0xf7e2, 0x621, 0x622, 0x623, 0x624, 0x625, 0x626, 0x627,
     0x628, 0x629, 0x62a, 0x62b, 0x62c, 0x62d, 0x62e, 0x62f,
     0x630, 0x631, 0x632, 0x633, 0x634, 0x635, 0x636, 0x637,
-    0x638, 0x639, 0x63a, 0x0, 0x0, 0x0, 0x0, 0x0,
+    0x638, 0x639, 0x63a, 0xf7e3, 0xf7e4, 0xf7e5, 0xf7e6, 0xf7e7,
     0x640, 0x641, 0x642, 0x643, 0x644, 0x645, 0x646, 0x647,
     0x648, 0x649, 0x64a, 0x64b, 0x64c, 0x64d, 0x64e, 0x64f,
-    0x650, 0x651, 0x652, 0x64b, 0xf4, 0xf5, 0xf6, 0xf7,
-    0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff },
+    0x650, 0x651, 0x652, 0xf7e8, 0xf7e9, 0xf7ea, 0xf7eb, 0xf7ec,
+    0xf7ed, 0xf7ee, 0xf7ef, 0xf7f0, 0xf7f1, 0xf7f2, 0xf7f3, 0xf7f4 },
   /* ISO-8859-7 */
-  { 0xa0, 0x2018, 0x2019, 0xa3, 0x20ac, 0x20af, 0xa6, 0xa7,
-    0xa8, 0xa9, 0x37a, 0xab, 0xac, 0xad, 0x0, 0x2015,
+  { 0xa0, 0x2bd, 0x2bc, 0xa3, 0xf7c2, 0xf7c3, 0xa6, 0xa7,
+    0xa8, 0xa9, 0xf7c4, 0xab, 0xac, 0xad, 0xf7c5, 0x2015,
     0xb0, 0xb1, 0xb2, 0xb3, 0x384, 0x385, 0x386, 0xb7,
     0x388, 0x389, 0x38a, 0xbb, 0x38c, 0xbd, 0x38e, 0x38f,
     0x390, 0x391, 0x392, 0x393, 0x394, 0x395, 0x396, 0x397,
     0x398, 0x399, 0x39a, 0x39b, 0x39c, 0x39d, 0x39e, 0x39f,
-    0x3a0, 0x3a1, 0x0, 0x3a3, 0x3a4, 0x3a5, 0x3a6, 0x3a7,
+    0x3a0, 0x3a1, 0xf7c6, 0x3a3, 0x3a4, 0x3a5, 0x3a6, 0x3a7,
     0x3a8, 0x3a9, 0x3aa, 0x3ab, 0x3ac, 0x3ad, 0x3ae, 0x3af,
     0x3b0, 0x3b1, 0x3b2, 0x3b3, 0x3b4, 0x3b5, 0x3b6, 0x3b7,
     0x3b8, 0x3b9, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3be, 0x3bf,
     0x3c0, 0x3c1, 0x3c2, 0x3c3, 0x3c4, 0x3c5, 0x3c6, 0x3c7,
-    0x3c8, 0x3c9, 0x3ca, 0x3cb, 0x3cc, 0x3cd, 0x3ce, 0xff },
+    0x3c8, 0x3c9, 0x3ca, 0x3cb, 0x3cc, 0x3cd, 0x3ce, 0xf7c7 },
   /* ISO-8859-8 */
-  { 0xa0, 0x0, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
-    0xa8, 0xa9, 0xd7, 0xab, 0xac, 0xad, 0xae, 0xaf,
+  { 0xa0, 0xf79c, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+    0xa8, 0xa9, 0xd7, 0xab, 0xac, 0xad, 0xae, 0x203e,
     0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
-    0xb8, 0xb9, 0xf7, 0xbb, 0xbc, 0xbd, 0xbe, 0x0,
-    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
-    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
-    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
-    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2017,
+    0xb8, 0xb9, 0xf7, 0xbb, 0xbc, 0xbd, 0xbe, 0xf79d,
+    0xf79e, 0xf79f, 0xf7a0, 0xf7a1, 0xf7a2, 0xf7a3, 0xf7a4, 0xf7a5,
+    0xf7a6, 0xf7a7, 0xf7a8, 0xf7a9, 0xf7aa, 0xf7ab, 0xf7ac, 0xf7ad,
+    0xf7ae, 0xf7af, 0xf7b0, 0xf7b1, 0xf7b2, 0xf7b3, 0xf7b4, 0xf7b5,
+    0xf7b6, 0xf7b7, 0xf7b8, 0xf7b9, 0xf7ba, 0xf7bb, 0xf7bc, 0x2017,
     0x5d0, 0x5d1, 0x5d2, 0x5d3, 0x5d4, 0x5d5, 0x5d6, 0x5d7,
     0x5d8, 0x5d9, 0x5da, 0x5db, 0x5dc, 0x5dd, 0x5de, 0x5df,
     0x5e0, 0x5e1, 0x5e2, 0x5e3, 0x5e4, 0x5e5, 0x5e6, 0x5e7,
-    0x5e8, 0x5e9, 0x5ea, 0x0, 0x0, 0x200e, 0x200f, 0x200e },
+    0x5e8, 0x5e9, 0x5ea, 0xf7bd, 0xf7be, 0xf7bf, 0xf7c0, 0xf7c1 },
   /* ISO-8859-9 */
   { 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
     0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
@@ -135,7 +135,7 @@ wchar_t __iso_8859_conv[14][0x60] = {
     0xe20, 0xe21, 0xe22, 0xe23, 0xe24, 0xe25, 0xe26, 0xe27,
     0xe28, 0xe29, 0xe2a, 0xe2b, 0xe2c, 0xe2d, 0xe2e, 0xe2f,
     0xe30, 0xe31, 0xe32, 0xe33, 0xe34, 0xe35, 0xe36, 0xe37,
-    0xe38, 0xe39, 0xe3a, 0x0, 0x0, 0x0, 0x0, 0xe3f,
+    0xe38, 0xe39, 0xe3a, 0xdb, 0xdc, 0xdd, 0xde, 0xe3f,
     0xe40, 0xe41, 0xe42, 0xe43, 0xe44, 0xe45, 0xe46, 0xe47,
     0xe48, 0xe49, 0xe4a, 0xe4b, 0xe4c, 0xe4d, 0xe4e, 0xe4f,
     0xe50, 0xe51, 0xe52, 0xe53, 0xe54, 0xe55, 0xe56, 0xe57,
@@ -222,9 +222,9 @@ wchar_t __cp_conv[24][0x80] = {
     0x2261, 0xb1, 0x2265, 0x2264, 0x2320, 0x2321, 0xf7, 0x2248,
     0xb0, 0x2219, 0xb7, 0x221a, 0x207f, 0xb2, 0x25a0, 0xa0 },
   /* CP720 */
-  { 0x0, 0x0, 0xe9, 0xe2, 0x0, 0xe0, 0x0, 0xe7,
-    0xea, 0xeb, 0xe8, 0xef, 0xee, 0x0, 0x0, 0x0,
-    0x0, 0x651, 0x652, 0xf4, 0xa4, 0x640, 0xfb, 0xf9,
+  { 0x80, 0x81, 0xe9, 0xe2, 0x84, 0xe0, 0x86, 0xe7,
+    0xea, 0xeb, 0xe8, 0xef, 0xee, 0x8d, 0x8e, 0x8f,
+    0x90, 0x651, 0x652, 0xf4, 0xa4, 0x640, 0xfb, 0xf9,
     0x621, 0x622, 0x623, 0x624, 0xa3, 0x625, 0x626, 0x627,
     0x628, 0x629, 0x62a, 0x62b, 0x62c, 0x62d, 0x62e, 0x62f,
     0x630, 0x631, 0x632, 0x633, 0x634, 0x635, 0xab, 0xbb,
@@ -334,11 +334,11 @@ wchar_t __cp_conv[24][0x80] = {
     0xa9, 0x2563, 0x2551, 0x2557, 0x255d, 0xa2, 0xa5, 0x2510,
     0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0xe3, 0xc3,
     0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0xa4,
-    0xba, 0xaa, 0xca, 0xcb, 0xc8, 0x0, 0xcd, 0xce,
+    0xba, 0xaa, 0xca, 0xcb, 0xc8, 0xf8bb, 0xcd, 0xce,
     0xcf, 0x2518, 0x250c, 0x2588, 0x2584, 0xa6, 0xcc, 0x2580,
-    0xd3, 0xdf, 0xd4, 0xd2, 0xf5, 0xd5, 0xb5, 0x0,
+    0xd3, 0xdf, 0xd4, 0xd2, 0xf5, 0xd5, 0xb5, 0xf8bc,
     0xd7, 0xda, 0xdb, 0xd9, 0xec, 0xff, 0xaf, 0xb4,
-    0xad, 0xb1, 0x0, 0xbe, 0xb6, 0xa7, 0xf7, 0xb8,
+    0xad, 0xb1, 0xf8bd, 0xbe, 0xb6, 0xa7, 0xf7, 0xb8,
     0xb0, 0xa8, 0xb7, 0xb9, 0xb3, 0xb2, 0x25a0, 0xa0 },
   /* CP858 */
   { 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7,
@@ -392,10 +392,10 @@ wchar_t __cp_conv[24][0x80] = {
     0x401, 0x451, 0x404, 0x454, 0x407, 0x457, 0x40e, 0x45e,
     0xb0, 0x2219, 0xb7, 0x221a, 0x2116, 0xa4, 0x25a0, 0xa0 },
   /* CP874 */
-  { 0x20ac, 0x0, 0x0, 0x0, 0x0, 0x2026, 0x0, 0x0,
-    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
-    0x0, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
-    0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+  { 0x20ac, 0x81, 0x82, 0x83, 0x84, 0x2026, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
+    0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
     0xa0, 0xe01, 0xe02, 0xe03, 0xe04, 0xe05, 0xe06, 0xe07,
     0xe08, 0xe09, 0xe0a, 0xe0b, 0xe0c, 0xe0d, 0xe0e, 0xe0f,
     0xe10, 0xe11, 0xe12, 0xe13, 0xe14, 0xe15, 0xe16, 0xe17,
@@ -403,11 +403,11 @@ wchar_t __cp_conv[24][0x80] = {
     0xe20, 0xe21, 0xe22, 0xe23, 0xe24, 0xe25, 0xe26, 0xe27,
     0xe28, 0xe29, 0xe2a, 0xe2b, 0xe2c, 0xe2d, 0xe2e, 0xe2f,
     0xe30, 0xe31, 0xe32, 0xe33, 0xe34, 0xe35, 0xe36, 0xe37,
-    0xe38, 0xe39, 0xe3a, 0x0, 0x0, 0x0, 0x0, 0xe3f,
+    0xe38, 0xe39, 0xe3a, 0xf8c1, 0xf8c2, 0xf8c3, 0xf8c4, 0xe3f,
     0xe40, 0xe41, 0xe42, 0xe43, 0xe44, 0xe45, 0xe46, 0xe47,
     0xe48, 0xe49, 0xe4a, 0xe4b, 0xe4c, 0xe4d, 0xe4e, 0xe4f,
     0xe50, 0xe51, 0xe52, 0xe53, 0xe54, 0xe55, 0xe56, 0xe57,
-    0xe58, 0xe59, 0xe5a, 0xe5b, 0xfc, 0xfd, 0xfe, 0xff },
+    0xe58, 0xe59, 0xe5a, 0xe5b, 0xf8c5, 0xf8c6, 0xf8c7, 0xf8c8 },
   /* CP1125 */
   { 0x410, 0x411, 0x412, 0x413, 0x414, 0x415, 0x416, 0x417,
     0x418, 0x419, 0x41a, 0x41b, 0x41c, 0x41d, 0x41e, 0x41f,
@@ -426,10 +426,10 @@ wchar_t __cp_conv[24][0x80] = {
     0x401, 0x451, 0x490, 0x491, 0x404, 0x454, 0x406, 0x456,
     0x407, 0x457, 0xb7, 0x221a, 0x2116, 0xa4, 0x25a0, 0xa0 },
   /* CP1250 */
-  { 0x20ac, 0x0, 0x201a, 0x0, 0x201e, 0x2026, 0x2020, 0x2021,
-    0x0, 0x2030, 0x160, 0x2039, 0x15a, 0x164, 0x17d, 0x179,
-    0x0, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
-    0x0, 0x2122, 0x161, 0x203a, 0x15b, 0x165, 0x17e, 0x17a,
+  { 0x20ac, 0x81, 0x201a, 0x83, 0x201e, 0x2026, 0x2020, 0x2021,
+    0x88, 0x2030, 0x160, 0x2039, 0x15a, 0x164, 0x17d, 0x179,
+    0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
+    0x98, 0x2122, 0x161, 0x203a, 0x15b, 0x165, 0x17e, 0x17a,
     0xa0, 0x2c7, 0x2d8, 0x141, 0xa4, 0x104, 0xa6, 0xa7,
     0xa8, 0xa9, 0x15e, 0xab, 0xac, 0xad, 0xae, 0x17b,
     0xb0, 0xb1, 0x2db, 0x142, 0xb4, 0xb5, 0xb6, 0xb7,
@@ -446,7 +446,7 @@ wchar_t __cp_conv[24][0x80] = {
   { 0x402, 0x403, 0x201a, 0x453, 0x201e, 0x2026, 0x2020, 0x2021,
     0x20ac, 0x2030, 0x409, 0x2039, 0x40a, 0x40c, 0x40b, 0x40f,
     0x452, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
-    0x0, 0x2122, 0x459, 0x203a, 0x45a, 0x45c, 0x45b, 0x45f,
+    0x98, 0x2122, 0x459, 0x203a, 0x45a, 0x45c, 0x45b, 0x45f,
     0xa0, 0x40e, 0x45e, 0x408, 0xa4, 0x490, 0xa6, 0xa7,
     0x401, 0xa9, 0x404, 0xab, 0xac, 0xad, 0xae, 0x407,
     0xb0, 0xb1, 0x406, 0x456, 0x491, 0xb5, 0xb6, 0xb7,
@@ -460,10 +460,10 @@ wchar_t __cp_conv[24][0x80] = {
     0x440, 0x441, 0x442, 0x443, 0x444, 0x445, 0x446, 0x447,
     0x448, 0x449, 0x44a, 0x44b, 0x44c, 0x44d, 0x44e, 0x44f },
   /* CP1252 */
-  { 0x20ac, 0x0, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021,
-    0x2c6, 0x2030, 0x160, 0x2039, 0x152, 0x0, 0x17d, 0x0,
-    0x0, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
-    0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x0, 0x17e, 0x178,
+  { 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021,
+    0x2c6, 0x2030, 0x160, 0x2039, 0x152, 0x8d, 0x17d, 0x8f,
+    0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
+    0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x9d, 0x17e, 0x178,
     0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
     0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
     0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
@@ -477,27 +477,27 @@ wchar_t __cp_conv[24][0x80] = {
     0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
     0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff },
   /* CP1253 */
-  { 0x20ac, 0x0, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021,
-    0x0, 0x2030, 0x0, 0x2039, 0x0, 0x0, 0x0, 0x0,
-    0x0, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
-    0x0, 0x2122, 0x0, 0x203a, 0x0, 0x0, 0x0, 0x0,
+  { 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021,
+    0x88, 0x2030, 0x8a, 0x2039, 0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
+    0x98, 0x2122, 0x9a, 0x203a, 0x9c, 0x9d, 0x9e, 0x9f,
     0xa0, 0x385, 0x386, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
-    0xa8, 0xa9, 0x0, 0xab, 0xac, 0xad, 0xae, 0x2015,
+    0xa8, 0xa9, 0xf8f9, 0xab, 0xac, 0xad, 0xae, 0x2015,
     0xb0, 0xb1, 0xb2, 0xb3, 0x384, 0xb5, 0xb6, 0xb7,
     0x388, 0x389, 0x38a, 0xbb, 0x38c, 0xbd, 0x38e, 0x38f,
     0x390, 0x391, 0x392, 0x393, 0x394, 0x395, 0x396, 0x397,
     0x398, 0x399, 0x39a, 0x39b, 0x39c, 0x39d, 0x39e, 0x39f,
-    0x3a0, 0x3a1, 0x0, 0x3a3, 0x3a4, 0x3a5, 0x3a6, 0x3a7,
+    0x3a0, 0x3a1, 0xf8fa, 0x3a3, 0x3a4, 0x3a5, 0x3a6, 0x3a7,
     0x3a8, 0x3a9, 0x3aa, 0x3ab, 0x3ac, 0x3ad, 0x3ae, 0x3af,
     0x3b0, 0x3b1, 0x3b2, 0x3b3, 0x3b4, 0x3b5, 0x3b6, 0x3b7,
     0x3b8, 0x3b9, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3be, 0x3bf,
     0x3c0, 0x3c1, 0x3c2, 0x3c3, 0x3c4, 0x3c5, 0x3c6, 0x3c7,
-    0x3c8, 0x3c9, 0x3ca, 0x3cb, 0x3cc, 0x3cd, 0x3ce, 0xff },
+    0x3c8, 0x3c9, 0x3ca, 0x3cb, 0x3cc, 0x3cd, 0x3ce, 0xf8fb },
   /* CP1254 */
-  { 0x20ac, 0x0, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021,
-    0x2c6, 0x2030, 0x160, 0x2039, 0x152, 0x0, 0x0, 0x0,
-    0x0, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
-    0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x0, 0x0, 0x178,
+  { 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021,
+    0x2c6, 0x2030, 0x160, 0x2039, 0x152, 0x8d, 0x8e, 0x8f,
+    0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
+    0x2dc, 0x2122, 0x161, 0x203a, 0x153, 0x9d, 0x9e, 0x178,
     0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
     0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
     0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
@@ -511,22 +511,22 @@ wchar_t __cp_conv[24][0x80] = {
     0x11f, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
     0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x131, 0x15f, 0xff },
   /* CP1255 */
-  { 0x20ac, 0x0, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021,
-    0x2c6, 0x2030, 0x0, 0x2039, 0x0, 0x0, 0x0, 0x0,
-    0x0, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
-    0x2dc, 0x2122, 0x0, 0x203a, 0x0, 0x0, 0x0, 0x0,
+  { 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021,
+    0x2c6, 0x2030, 0x8a, 0x2039, 0x8c, 0x8d, 0x8e, 0x8f,
+    0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
+    0x2dc, 0x2122, 0x9a, 0x203a, 0x9c, 0x9d, 0x9e, 0x9f,
     0xa0, 0xa1, 0xa2, 0xa3, 0x20aa, 0xa5, 0xa6, 0xa7,
     0xa8, 0xa9, 0xd7, 0xab, 0xac, 0xad, 0xae, 0xaf,
     0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
     0xb8, 0xb9, 0xf7, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
     0x5b0, 0x5b1, 0x5b2, 0x5b3, 0x5b4, 0x5b5, 0x5b6, 0x5b7,
-    0x5b8, 0x5b9, 0x0, 0x5bb, 0x5bc, 0x5bd, 0x5be, 0x5bf,
+    0x5b8, 0x5b9, 0x5ba, 0x5bb, 0x5bc, 0x5bd, 0x5be, 0x5bf,
     0x5c0, 0x5c1, 0x5c2, 0x5c3, 0x5f0, 0x5f1, 0x5f2, 0x5f3,
-    0x5f4, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+    0x5f4, 0xf88d, 0xf88e, 0xf88f, 0xf890, 0xf891, 0xf892, 0xf893,
     0x5d0, 0x5d1, 0x5d2, 0x5d3, 0x5d4, 0x5d5, 0x5d6, 0x5d7,
     0x5d8, 0x5d9, 0x5da, 0x5db, 0x5dc, 0x5dd, 0x5de, 0x5df,
     0x5e0, 0x5e1, 0x5e2, 0x5e3, 0x5e4, 0x5e5, 0x5e6, 0x5e7,
-    0x5e8, 0x5e9, 0x5ea, 0x0, 0x0, 0x200e, 0x200f, 0xff },
+    0x5e8, 0x5e9, 0x5ea, 0xf894, 0xf895, 0x200e, 0x200f, 0xf896 },
   /* CP1256 */
   { 0x20ac, 0x67e, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021,
     0x2c6, 0x2030, 0x679, 0x2039, 0x152, 0x686, 0x698, 0x688,
@@ -545,11 +545,11 @@ wchar_t __cp_conv[24][0x80] = {
     0x64b, 0x64c, 0x64d, 0x64e, 0xf4, 0x64f, 0x650, 0xf7,
     0x651, 0xf9, 0x652, 0xfb, 0xfc, 0x200e, 0x200f, 0x6d2 },
   /* CP1257 */
-  { 0x20ac, 0x0, 0x201a, 0x0, 0x201e, 0x2026, 0x2020, 0x2021,
-    0x0, 0x2030, 0x0, 0x2039, 0x0, 0xa8, 0x2c7, 0xb8,
-    0x0, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
-    0x0, 0x2122, 0x0, 0x203a, 0x0, 0xaf, 0x2db, 0x0,
-    0xa0, 0x0, 0xa2, 0xa3, 0xa4, 0x0, 0xa6, 0xa7,
+  { 0x20ac, 0x81, 0x201a, 0x83, 0x201e, 0x2026, 0x2020, 0x2021,
+    0x88, 0x2030, 0x8a, 0x2039, 0x8c, 0xa8, 0x2c7, 0xb8,
+    0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
+    0x98, 0x2122, 0x9a, 0x203a, 0x9c, 0xaf, 0x2db, 0x9f,
+    0xa0, 0xf8fc, 0xa2, 0xa3, 0xa4, 0xf8fd, 0xa6, 0xa7,
     0xd8, 0xa9, 0x156, 0xab, 0xac, 0xad, 0xae, 0xc6,
     0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
     0xf8, 0xb9, 0x157, 0xbb, 0xbc, 0xbd, 0xbe, 0xe6,
@@ -562,10 +562,10 @@ wchar_t __cp_conv[24][0x80] = {
     0x161, 0x144, 0x146, 0xf3, 0x14d, 0xf5, 0xf6, 0xf7,
     0x173, 0x142, 0x15b, 0x16b, 0xfc, 0x17c, 0x17e, 0x2d9 },
   /* CP1258 */
-  { 0x20ac, 0x0, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021,
-    0x2c6, 0x2030, 0x0, 0x2039, 0x152, 0x0, 0x0, 0x0,
-    0x0, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
-    0x2dc, 0x2122, 0x0, 0x203a, 0x153, 0x0, 0x0, 0x178,
+  { 0x20ac, 0x81, 0x201a, 0x192, 0x201e, 0x2026, 0x2020, 0x2021,
+    0x2c6, 0x2030, 0x8a, 0x2039, 0x152, 0x8d, 0x8e, 0x8f,
+    0x90, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
+    0x2dc, 0x2122, 0x9a, 0x203a, 0x153, 0x9d, 0x9e, 0x178,
     0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
     0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
     0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
@@ -601,9 +601,9 @@ wchar_t __cp_conv[24][0x80] = {
     0x2591, 0x2592, 0x2593, 0x2320, 0x25a0, 0x2219, 0x221a, 0x2248,
     0x2264, 0x2265, 0xa0, 0x2321, 0xb0, 0xb2, 0xb7, 0xf7,
     0x2550, 0x2551, 0x2552, 0x451, 0x454, 0x2554, 0x456, 0x457,
-    0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x491, 0x255d, 0x255e,
+    0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x491, 0x45e, 0x255e,
     0x255f, 0x2560, 0x2561, 0x401, 0x404, 0x2563, 0x406, 0x407,
-    0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x490, 0x256c, 0xa9,
+    0x2566, 0x2567, 0x2568, 0x2569, 0x256a, 0x490, 0x40e, 0xa9,
     0x44e, 0x430, 0x431, 0x446, 0x434, 0x435, 0x444, 0x433,
     0x445, 0x438, 0x439, 0x43a, 0x43b, 0x43c, 0x43d, 0x43e,
     0x43f, 0x44f, 0x440, 0x441, 0x442, 0x443, 0x436, 0x432,
Index: libc/stdlib/wctomb_r.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdlib/wctomb_r.c,v
retrieving revision 1.15
diff -u -p -r1.15 wctomb_r.c
--- libc/stdlib/wctomb_r.c	27 Sep 2009 12:21:16 -0000	1.15
+++ libc/stdlib/wctomb_r.c	29 Sep 2009 07:50:47 -0000
@@ -63,72 +63,75 @@ _DEFUN (__utf8_wctomb, (r, s, wchar, cha
         mbstate_t     *state)
 {
   wint_t wchar = _wchar;
+  int ret = 0;
 
   if (s == NULL)
     return 0; /* UTF-8 encoding is not state-dependent */
 
-  if (state->__count == -4 && (wchar < 0xdc00 || wchar >= 0xdfff))
+  if (sizeof (wchar_t) == 2 && state->__count == -4
+      && (wchar < 0xdc00 || wchar >= 0xdfff))
     {
-      /* At this point only the second half of a surrogate pair is valid. */
-      r->_errno = EILSEQ;
-      return -1;
+      /* There's a leftover lone high surrogate.  Write out the CESU-8 value
+	 of the surrogate and proceed to convert the given character.  Note
+	 to return extra 3 bytes. */
+      wchar_t tmp;
+      tmp = (state->__value.__wchb[0] << 16 | state->__value.__wchb[1] << 8)
+	    - 0x10000 >> 10 | 0xd80d;
+      *s++ = 0xe0 | ((tmp & 0xf000) >> 12);
+      *s++ = 0x80 | ((tmp &  0xfc0) >> 6);
+      *s++ = 0x80 |  (tmp &   0x3f);
+      state->__count = 0;
+      ret = 3;
     }
   if (wchar <= 0x7f)
     {
       *s = wchar;
-      return 1;
+      return ret + 1;
     }
   if (wchar >= 0x80 && wchar <= 0x7ff)
     {
       *s++ = 0xc0 | ((wchar & 0x7c0) >> 6);
       *s   = 0x80 |  (wchar &  0x3f);
-      return 2;
+      return ret + 2;
     }
   if (wchar >= 0x800 && wchar <= 0xffff)
     {
-      if (wchar >= 0xd800 && wchar <= 0xdfff)
+      /* No UTF-16 surrogate handling in UCS-4 */
+      if (sizeof (wchar_t) == 2 && wchar >= 0xd800 && wchar <= 0xdfff)
 	{
 	  wint_t tmp;
-	  /* UTF-16 surrogates -- must not occur in normal UCS-4 data */
-	  if (sizeof (wchar_t) != 2)
+	  if (wchar <= 0xdbff)
 	    {
-	      r->_errno = EILSEQ;
-	      return -1;
+	      /* First half of a surrogate pair.  Store the state and
+	         return ret + 0. */
+	      tmp = ((wchar & 0x3ff) << 10) + 0x10000;
+	      state->__value.__wchb[0] = (tmp >> 16) & 0xff;
+	      state->__value.__wchb[1] = (tmp >> 8) & 0xff;
+	      state->__count = -4;
+	      *s = (0xf0 | ((tmp & 0x1c0000) >> 18));
+	      return ret;
 	    }
-	  if (wchar >= 0xdc00)
+	  if (state->__count == -4)
 	    {
-	      /* Second half of a surrogate pair. It's not valid if
-		 we don't have already read a first half of a surrogate
-		 before. */
-	      if (state->__count != -4)
-		{
-		  r->_errno = EILSEQ;
-		  return -1;
-		}
-	      /* If it's valid, reconstruct the full Unicode value and
-		 return the trailing three bytes of the UTF-8 char. */
+	      /* Second half of a surrogate pair.  Reconstruct the full
+		 Unicode value and return the trailing three bytes of the
+		 UTF-8 character. */
 	      tmp = (state->__value.__wchb[0] << 16)
 		    | (state->__value.__wchb[1] << 8)
 		    | (wchar & 0x3ff);
 	      state->__count = 0;
+	      *s++ = 0xf0 | ((tmp & 0x1c0000) >> 18);
 	      *s++ = 0x80 | ((tmp &  0x3f000) >> 12);
 	      *s++ = 0x80 | ((tmp &    0xfc0) >> 6);
 	      *s   = 0x80 |  (tmp &     0x3f);
-	      return 3;
+	      return 4;
 	    }
-	  /* First half of a surrogate pair.  Store the state and return
-	     the first byte of the UTF-8 char. */
-	  tmp = ((wchar & 0x3ff) << 10) + 0x10000;
-	  state->__value.__wchb[0] = (tmp >> 16) & 0xff;
-	  state->__value.__wchb[1] = (tmp >> 8) & 0xff;
-	  state->__count = -4;
-	  *s = (0xf0 | ((tmp & 0x1c0000) >> 18));
-	  return 1;
+	  /* Otherwise translate into CESU-8 value. */
 	}
       *s++ = 0xe0 | ((wchar & 0xf000) >> 12);
       *s++ = 0x80 | ((wchar &  0xfc0) >> 6);
       *s   = 0x80 |  (wchar &   0x3f);
-      return 3;
+      return ret + 3;
     }
   if (wchar >= 0x10000 && wchar <= 0x10ffff)
     {

-- 
Corinna Vinschen                  Please, send mails regarding Cygwin to
Cygwin Project Co-Leader          cygwin AT cygwin DOT com
Red Hat



More information about the Cygwin-developers mailing list