[PATCH] Fix IBM{932,943} iconv modules

Jakub Jelinek jakub@redhat.com
Mon Aug 2 12:27:00 GMT 2004


Hi!

There are two bugs in these converters.
One is that it doesn't immediately fail for >= L'\xffff' characters
when converting UCS4 to those charsets (pccode is uint16_t, so it
e.g. treats L'\x100a6' as L'\xa6'), the other bug is that the binary
search is not coded correctly.  Initially, high is the first character
above range (size of the table), but later on it is treated as the highest
character in the range.  i/low/high are all unsigned variables, so together
with the previous bug ch >= L'\x10000' and <= L'\x100a6' for example result
in segfault (i - 1 wraps around).
Tested with feeding iconv all the __ucs4_to_ibm932db (resp.
__ucs4_to_ibm943db) FROM values and testing the result against TO
plus a few other values (like L'\xffe6', L'\x10000', L'\x100a6' etc.).

2004-08-02  Jakub Jelinek  <jakub@redhat.com>

	* iconvdata/ibm932.c (BODY): Avoid binary search for ch >= 0xffff.
	Always treat high as highest number in range + 1.
	* iconvdata/ibm943.c (BODY): Likewise.

--- libc/iconvdata/ibm932.c.jj	2002-12-02 23:20:56.000000000 +0100
+++ libc/iconvdata/ibm932.c	2004-08-02 13:55:28.621726700 +0200
@@ -1,5 +1,5 @@
 /* Conversion from and to IBM932.
-   Copyright (C) 2000-2002 Free Software Foundation, Inc.
+   Copyright (C) 2000-2002, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Masahide Washizawa <washi@jp.ibm.com>, 2000.
 
@@ -169,20 +169,21 @@
 	high = (sizeof (__ucs4_to_ibm932db) >> 1)			      \
 		/ sizeof (__ucs4_to_ibm932db[0][FROM]);			      \
 	pccode = ch;							      \
-	while (low <= high)						      \
-	  {								      \
-	    i = (low + high) >> 1;					      \
-	    if (pccode < __ucs4_to_ibm932db[i][FROM])			      \
-	      high = i - 1;						      \
-	    else if (pccode > __ucs4_to_ibm932db[i][FROM])		      \
-	      low = i + 1;						      \
-	    else 							      \
-	      {								      \
-		pccode = __ucs4_to_ibm932db[i][TO];			      \
-		found = true;						      \
-		break;							      \
-	      }								      \
-	  }								      \
+	if (__builtin_expect (rp != NULL, 1))				      \
+	  while (low < high)						      \
+	    {								      \
+	      i = (low + high) >> 1;					      \
+	      if (pccode < __ucs4_to_ibm932db[i][FROM])			      \
+		high = i;						      \
+	      else if (pccode > __ucs4_to_ibm932db[i][FROM])		      \
+		low = i + 1;						      \
+	      else 							      \
+		{							      \
+		  pccode = __ucs4_to_ibm932db[i][TO];			      \
+		  found = true;						      \
+		  break;						      \
+		}							      \
+	    }								      \
 	if (found) 							      \
 	  {								      \
 	    if (__builtin_expect (outptr + 2 > outend, 0))		      \
--- libc/iconvdata/ibm943.c.jj	2002-12-02 23:22:52.000000000 +0100
+++ libc/iconvdata/ibm943.c	2004-08-02 13:35:29.969089593 +0200
@@ -1,5 +1,5 @@
 /* Conversion from and to IBM943.
-   Copyright (C) 2000-2002 Free Software Foundation, Inc.
+   Copyright (C) 2000-2002, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Masahide Washizawa <washi@jp.ibm.com>, 2000.
 
@@ -170,20 +170,21 @@
 	high = (sizeof (__ucs4_to_ibm943db) >> 1)			      \
 		/ sizeof (__ucs4_to_ibm943db[0][FROM]);			      \
 	pccode = ch;							      \
-	while (low <= high)						      \
-	  {								      \
-	    i = (low + high) >> 1;					      \
-	    if (pccode < __ucs4_to_ibm943db[i][FROM])			      \
-	      high = i - 1;						      \
-	    else if (pccode > __ucs4_to_ibm943db[i][FROM])		      \
-	      low = i + 1;						      \
-	    else 							      \
-	      {								      \
-		pccode = __ucs4_to_ibm943db[i][TO];			      \
-		found = true;						      \
-		break;							      \
-	      }								      \
-	  }								      \
+	if (__builtin_expect (rp != NULL, 1))				      \
+	  while (low < high)						      \
+	    {								      \
+	      i = (low + high) >> 1;					      \
+	      if (pccode < __ucs4_to_ibm943db[i][FROM])			      \
+		high = i;						      \
+	      else if (pccode > __ucs4_to_ibm943db[i][FROM])		      \
+		low = i + 1;						      \
+	      else 							      \
+		{							      \
+		  pccode = __ucs4_to_ibm943db[i][TO];			      \
+		  found = true;						      \
+		  break;						      \
+		}							      \
+	    }								      \
 	if (found) 							      \
 	  {								      \
 	    if (__builtin_expect (outptr + 2 > outend, 0))		      \

	Jakub



More information about the Libc-hacker mailing list