This is the mail archive of the newlib@sourceware.org mailing list for the newlib project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] _wctomb_r mishandles invalid wchars in single byte mode


Hi,

right now the _wctomb_r function simply assumes that the incoming wchar
is a single byte value if the current multibyte charset is a singlebyte
charset.  This is wrong.  Per POSIX, wctomb "shall return -1 if the
value of wchar does not correspond to a valid character".  Per
POSIX.1-2008, wctomb should also set errno to EILSEQ.  The below patch
fixes that.


Corinna


	* libc/stdlib/wctomb_r.c (_wctomb_r): Return EILSEQ in case of an
	invalid wchar.  Return -1 if wchar doesn't fit into singlebyte
	value in case of using a singlebyte charset.


Index: libc/stdlib/wctomb_r.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdlib/wctomb_r.c,v
retrieving revision 1.8
diff -u -p -r1.8 wctomb_r.c
--- libc/stdlib/wctomb_r.c	25 Feb 2009 09:10:09 -0000	1.8
+++ libc/stdlib/wctomb_r.c	27 Feb 2009 10:06:15 -0000
@@ -1,3 +1,4 @@
+#include <errno.h>
 #include <stdlib.h>
 #include <string.h>
 #include <wchar.h>
@@ -31,6 +32,7 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
       if (state->__count == -4 && (wchar < 0xdc00 || wchar >= 0xdfff))
 	{
 	  /* At this point only the second half of a surrogate pair is valid. */
+	  r->_errno = EILSEQ;
 	  return -1;
 	}
       if (wchar <= 0x7f)
@@ -51,14 +53,20 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
 	      wint_t tmp;
 	      /* UTF-16 surrogates -- must not occur in normal UCS-4 data */
 	      if (sizeof (wchar_t) != 2)
-		return -1;
+		{
+		  r->_errno = EILSEQ;
+		  return -1;
+		}
 	      if (wchar >= 0xdc00)
 		{
 		  /* Second half of a surrogate pair. It's not valid if
 		     we don't have already read a first half of a surrogate
 		     before. */
 		  if (state->__count != -4)
-		    return -1;
+		    {
+		      r->_errno = EILSEQ;
+		      return -1;
+		    }
 		  /* If it's valid, reconstruct the full Unicode value and
 		     return the trailing three bytes of the UTF-8 char. */
 		  tmp = (state->__value.__wchb[0] << 16)
@@ -93,7 +101,10 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
           return 4;
         }
       else
-        return -1;
+	{
+	  r->_errno = EILSEQ;
+	  return -1;
+	}
     }
   else if (!strcmp (__lc_ctype, "C-SJIS"))
     {
@@ -113,7 +124,10 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
               return 2;
             }
           else
-            return -1;
+	    {
+	      r->_errno = EILSEQ;
+	      return -1;
+	    }
         }
     }
   else if (!strcmp (__lc_ctype, "C-EUCJP"))
@@ -134,7 +148,10 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
               return 2;
             }
           else
-            return -1;
+	    {
+	      r->_errno = EILSEQ;
+	      return -1;
+	    }
         }
     }
   else if (!strcmp (__lc_ctype, "C-JIS"))
@@ -165,7 +182,10 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
               return cnt + 2;
             }
           else
-            return -1;
+	    {
+	      r->_errno = EILSEQ;
+	      return -1;
+	    }
         }
       else
         {
@@ -187,6 +207,12 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
     return 0;
  
   /* otherwise we are dealing with a single byte character */
+  if (wchar >= 0x100)
+    {
+      r->_errno = EILSEQ;
+      return -1;
+    }
+
   *s = (char) wchar;
   return 1;
 }


-- 
Corinna Vinschen
Cygwin Project Co-Leader
Red Hat


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]