This is the mail archive of the
newlib@sourceware.org
mailing list for the newlib project.
Re: ctype macros broken on 64-bits builds?
- From: Jeff Johnston <jjohnstn at redhat dot com>
- To: Martijn van Buul <pino+gmane_comp_lib_newlib at dohd dot org>
- Cc: newlib at sources dot redhat dot com
- Date: Mon, 21 Jul 2008 17:28:24 -0400
- Subject: Re: ctype macros broken on 64-bits builds?
- References: <slrng88rdm.1f2h.pino@mud.stack.nl>
Martijn van Buul wrote:
Hello,
I'm using newlib on x86_64-elf, and I've ran into problems with the various
is...() macros in ctype.h. According to C90 (and C99, and possibly earlier
standards before that..) these macros/functions are required to accept an
integer input with range [-1 .. 255]. It appears they are currently broken
for 64-bits targets. As an example, I used isalpha(), but the others have
exactly the same problem:
isalpha() is defined in ctype as:
#define isalpha(c) ((__ctype_ptr)[(unsigned)(c)]&(_U|_L))
where __ctype_ptr points to element 1 of a 257-entry array, so
__ctype_ptr[-1] is actually valid.
This works for targets with 32-bit pointers and 32-bits integers,
as accessing element [-1] from an array will access exactly the same memory
as accessing element[(unsigned)(-1)], as there will be an implicit
overflow:
Assuming a char foo[10], I'd get:
&foo[0]: 0xbd4de86
&foo[-1]: 0xbd4de85
&foo[(unsigned)(-1)]: 0xbd4de85
However, this no longer works on a platform with 32 bits integers and 64-bits
pointers (like x86_64..), since the implicit overflow will not occur:
&foo[0]: 0x28000109c30
&foo[-1]: 0x28000109c2f
&foo[(unsigned)(-1)]: 0x28100109c2f
Note how the [(unsigned) (-1)] address ended up 4GB -1 beyond the first
element, instead of just before it.
All in all, this means that using any of the ctype(3) macros with -1
as an argument will cause a segmentation fault, where it should have been
defined behaviour.
It is the explicit cast to unsigned that's causing the problem here, as
using (signed) would've yielded the expected result:
&foo[(signed)(-1)]: 0x28000109c2f
I rewrote all appropriate macros in ctype.h to cast to (signed) instead of
(unsigned), with no adverse affects. My code no longer crashes now, but my
testbed is limited so I don't know if this might break other targets.
The alternative option would be to do what the rest of the world has been
doing for a while (Including the BSDs, from which this ctype.* seems to
have borrowed quite a bit), and rewriting isalpha and friends to
#define isalpha(c) ((__ctype_ptr)[(unsigned)(c + 1)]&(_U|_L))
with __ctype_ptr pointing at element 0 of the array in ctype/ctype_.c,
instead of at element 1.
Hi Martin,
Thanks for catching this.
I have checked in the accompanying patch which implements the
alternative you mention above. To prevent breakage in existing code, I
have created a new pointer: __ctype_ptr__ and changed the ctype
macros/functions to use it.
Cygwin folks will probably need to add __ctype_ptr__ to the list of
library globals.
If anybody finds any problems, just let me know.
-- Jeff J.
Index: libc/include/ctype.h
===================================================================
RCS file: /cvs/src/src/newlib/libc/include/ctype.h,v
retrieving revision 1.9
diff -u -p -r1.9 ctype.h
--- libc/include/ctype.h 18 Mar 2005 17:18:59 -0000 1.9
+++ libc/include/ctype.h 21 Jul 2008 21:16:37 -0000
@@ -36,21 +36,20 @@ int _EXFUN(_toupper, (int __c));
#define _X 0100
#define _B 0200
-extern __IMPORT _CONST char *__ctype_ptr;
-extern __IMPORT _CONST char _ctype_[]; /* For backward compatibility. */
+extern __IMPORT _CONST char *__ctype_ptr__;
#ifndef __cplusplus
-#define isalpha(c) ((__ctype_ptr)[(unsigned)(c)]&(_U|_L))
-#define isupper(c) ((__ctype_ptr)[(unsigned)(c)]&_U)
-#define islower(c) ((__ctype_ptr)[(unsigned)(c)]&_L)
-#define isdigit(c) ((__ctype_ptr)[(unsigned)(c)]&_N)
-#define isxdigit(c) ((__ctype_ptr)[(unsigned)(c)]&(_X|_N))
-#define isspace(c) ((__ctype_ptr)[(unsigned)(c)]&_S)
-#define ispunct(c) ((__ctype_ptr)[(unsigned)(c)]&_P)
-#define isalnum(c) ((__ctype_ptr)[(unsigned)(c)]&(_U|_L|_N))
-#define isprint(c) ((__ctype_ptr)[(unsigned)(c)]&(_P|_U|_L|_N|_B))
-#define isgraph(c) ((__ctype_ptr)[(unsigned)(c)]&(_P|_U|_L|_N))
-#define iscntrl(c) ((__ctype_ptr)[(unsigned)(c)]&_C)
+#define isalpha(c) ((__ctype_ptr__)[(unsigned)((c)+1)]&(_U|_L))
+#define isupper(c) ((__ctype_ptr__)[(unsigned)((c)+1)]&_U)
+#define islower(c) ((__ctype_ptr__)[(unsigned)((c)+1)]&_L)
+#define isdigit(c) ((__ctype_ptr__)[(unsigned)((c)+1)]&_N)
+#define isxdigit(c) ((__ctype_ptr__)[(unsigned)((c)+1)]&(_X|_N))
+#define isspace(c) ((__ctype_ptr__)[(unsigned)((c)+1)]&_S)
+#define ispunct(c) ((__ctype_ptr__)[(unsigned)((c)+1)]&_P)
+#define isalnum(c) ((__ctype_ptr__)[(unsigned)((c)+1)]&(_U|_L|_N))
+#define isprint(c) ((__ctype_ptr__)[(unsigned)((c)+1)]&(_P|_U|_L|_N|_B))
+#define isgraph(c) ((__ctype_ptr__)[(unsigned)((c)+1)]&(_P|_U|_L|_N))
+#define iscntrl(c) ((__ctype_ptr__)[(unsigned)((c)+1)]&_C)
/* Non-gcc versions will get the library versions, and will be
Index: libc/ctype/ctype_.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/ctype_.c,v
retrieving revision 1.4
diff -u -p -r1.4 ctype_.c
--- libc/ctype/ctype_.c 17 Mar 2005 20:11:22 -0000 1.4
+++ libc/ctype/ctype_.c 21 Jul 2008 21:16:37 -0000
@@ -86,8 +86,10 @@ static _CONST char _ctype_b[128 + 256] =
# if defined(__CYGWIN__)
_CONST char __declspec(dllexport) *__ctype_ptr = _ctype_b + 128;
+_CONST char __declspec(dllexport) *__ctype_ptr__ = _ctype_b + 127;
# else
_CONST char *__ctype_ptr = _ctype_b + 128;
+_CONST char *__ctype_ptr__ = _ctype_b + 127;
# endif
# if defined(_HAVE_ARRAY_ALIASING)
@@ -124,4 +126,5 @@ _CONST char _ctype_[1 + 256] = {
};
_CONST char *__ctype_ptr = _ctype_ + 1;
+_CONST char *__ctype_ptr__ = _ctype_;
#endif
Index: libc/ctype/isalnum.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/isalnum.c,v
retrieving revision 1.1.1.1
diff -u -p -r1.1.1.1 isalnum.c
--- libc/ctype/isalnum.c 17 Feb 2000 19:39:46 -0000 1.1.1.1
+++ libc/ctype/isalnum.c 21 Jul 2008 21:16:37 -0000
@@ -41,6 +41,6 @@ No OS subroutines are required.
int
_DEFUN(isalnum,(c),int c)
{
- return((_ctype_ + 1)[c] & (_U|_L|_N));
+ return(__ctype_ptr__[c+1] & (_U|_L|_N));
}
Index: libc/ctype/isalpha.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/isalpha.c,v
retrieving revision 1.1.1.1
diff -u -p -r1.1.1.1 isalpha.c
--- libc/ctype/isalpha.c 17 Feb 2000 19:39:46 -0000 1.1.1.1
+++ libc/ctype/isalpha.c 21 Jul 2008 21:16:37 -0000
@@ -39,6 +39,6 @@ No supporting OS subroutines are require
int
_DEFUN(isalpha,(c),int c)
{
- return((_ctype_ + 1)[c] & (_U|_L));
+ return(__ctype_ptr__[c+1] & (_U|_L));
}
Index: libc/ctype/iscntrl.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/iscntrl.c,v
retrieving revision 1.1.1.1
diff -u -p -r1.1.1.1 iscntrl.c
--- libc/ctype/iscntrl.c 17 Feb 2000 19:39:46 -0000 1.1.1.1
+++ libc/ctype/iscntrl.c 21 Jul 2008 21:16:37 -0000
@@ -42,7 +42,7 @@ No supporting OS subroutines are require
int
_DEFUN(iscntrl,(c),int c)
{
- return((_ctype_ + 1)[c] & _C);
+ return(__ctype_ptr__[c+1] & _C);
}
Index: libc/ctype/isdigit.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/isdigit.c,v
retrieving revision 1.1.1.1
diff -u -p -r1.1.1.1 isdigit.c
--- libc/ctype/isdigit.c 17 Feb 2000 19:39:46 -0000 1.1.1.1
+++ libc/ctype/isdigit.c 21 Jul 2008 21:16:37 -0000
@@ -39,5 +39,5 @@ No supporting OS subroutines are require
int
_DEFUN(isdigit,(c),int c)
{
- return((_ctype_ + 1)[c] & _N);
+ return(__ctype_ptr__[c+1] & _N);
}
Index: libc/ctype/islower.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/islower.c,v
retrieving revision 1.2
diff -u -p -r1.2 islower.c
--- libc/ctype/islower.c 28 Oct 2005 21:33:22 -0000 1.2
+++ libc/ctype/islower.c 21 Jul 2008 21:16:37 -0000
@@ -38,6 +38,6 @@ No supporting OS subroutines are require
int
_DEFUN(islower,(c),int c)
{
- return((_ctype_ + 1)[c] & _L);
+ return(__ctype_ptr__[c+1] & _L);
}
Index: libc/ctype/isprint.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/isprint.c,v
retrieving revision 1.1.1.1
diff -u -p -r1.1.1.1 isprint.c
--- libc/ctype/isprint.c 17 Feb 2000 19:39:46 -0000 1.1.1.1
+++ libc/ctype/isprint.c 21 Jul 2008 21:16:37 -0000
@@ -47,7 +47,7 @@ No supporting OS subroutines are require
int
_DEFUN(isgraph,(c),int c)
{
- return((_ctype_ + 1)[c] & (_P|_U|_L|_N));
+ return(__ctype_ptr__[c+1] & (_P|_U|_L|_N));
}
@@ -55,6 +55,6 @@ _DEFUN(isgraph,(c),int c)
int
_DEFUN(isprint,(c),int c)
{
- return((_ctype_ + 1)[c] & (_P|_U|_L|_N|_B));
+ return(__ctype_ptr__[c+1] & (_P|_U|_L|_N|_B));
}
Index: libc/ctype/ispunct.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/ispunct.c,v
retrieving revision 1.1.1.1
diff -u -p -r1.1.1.1 ispunct.c
--- libc/ctype/ispunct.c 17 Feb 2000 19:39:46 -0000 1.1.1.1
+++ libc/ctype/ispunct.c 21 Jul 2008 21:16:37 -0000
@@ -41,6 +41,6 @@ No supporting OS subroutines are require
int
_DEFUN(ispunct,(c),int c)
{
- return((_ctype_ + 1)[c] & _P);
+ return(__ctype_ptr__[c+1] & _P);
}
Index: libc/ctype/isspace.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/isspace.c,v
retrieving revision 1.1.1.1
diff -u -p -r1.1.1.1 isspace.c
--- libc/ctype/isspace.c 17 Feb 2000 19:39:46 -0000 1.1.1.1
+++ libc/ctype/isspace.c 21 Jul 2008 21:16:37 -0000
@@ -39,6 +39,6 @@ No supporting OS subroutines are require
int
_DEFUN(isspace,(c),int c)
{
- return((_ctype_ + 1)[c] & _S);
+ return(__ctype_ptr__[c+1] & _S);
}
Index: libc/ctype/isupper.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/isupper.c,v
retrieving revision 1.2
diff -u -p -r1.2 isupper.c
--- libc/ctype/isupper.c 28 Oct 2005 21:33:22 -0000 1.2
+++ libc/ctype/isupper.c 21 Jul 2008 21:16:37 -0000
@@ -38,6 +38,6 @@ No supporting OS subroutines are require
int
_DEFUN(isupper,(c),int c)
{
- return((_ctype_ + 1)[c] & _U);
+ return(__ctype_ptr__[c+1] & _U);
}
Index: libc/ctype/isxdigit.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/isxdigit.c,v
retrieving revision 1.1.1.1
diff -u -p -r1.1.1.1 isxdigit.c
--- libc/ctype/isxdigit.c 17 Feb 2000 19:39:46 -0000 1.1.1.1
+++ libc/ctype/isxdigit.c 21 Jul 2008 21:16:37 -0000
@@ -40,6 +40,6 @@ No supporting OS subroutines are require
int
_DEFUN(isxdigit,(c),int c)
{
- return((_ctype_ + 1)[c] & ((_X)|(_N)));
+ return(__ctype_ptr__[c+1] & ((_X)|(_N)));
}