This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
bugs in JOHAB converter
- To: libc-alpha at sources dot redhat dot com
- Subject: bugs in JOHAB converter
- From: Bruno Haible <haible at ilog dot fr>
- Date: Mon, 25 Sep 2000 14:46:09 +0200 (CEST)
- Cc: Jungshik Shin <jshin at pantheon dot yale dot edu>
Hi Ulrich,
Now that you have added a charmap for JOHAB, a few more small bugs in the
JOHAB converter become apparent.
In the charmap:
- 0x7f missing. (Some Unicode.org tables are missing the 0x00..0x1f,0x7f
control characters.)
In the JOHAB -> Unicode direction:
- mapping for 0x5c wrong
- mapping for 0x7f missing
- mapping for 0x844c wrong
- mapping for 0x8454 swapped with 0x8455
- extra mappings for 0xd9e6..0xd9e7
- extra mappings for 0xdef9..0xdefe (out-of-range array access)
In the Unicode -> JOHAB direction:
- mapping for 0x5c wrong
- mapping for 0x7f missing
- mappings for U+313E, U+313F, U+314D wrong
- extra mappings for 0xd9e6..0xd9e7
Here is a fix.
2000-09-23 Bruno Haible <haible@clisp.cons.org>
* charmaps/JOHAB: Add identity mapping for 0x7f.
2000-09-23 Bruno Haible <haible@clisp.cons.org>
* iconvdata/johab.c (final_to_ucs): Fix typos.
(jamo_from_ucs_table): Likewise.
(BODY for FROM_LOOP): Map 0x5c to U+20A9. Reject ranges
0xD9E6..0xD9FE and 0xDEF2..0xDEFE.
(BODY for TO_LOOP): Map U+20A9 to 0x5c. Don't produce values in
the range 0xD9E6..0xD9FE.
*** glibc-20000914/localedata/charmaps/JOHAB.bak Thu Sep 14 22:36:46 2000
--- glibc-20000914/localedata/charmaps/JOHAB Sat Sep 23 18:05:42 2000
***************
*** 133,138 ****
--- 133,139 ----
<U007C> /x7c VERTICAL LINE
<U007D> /x7d RIGHT CURLY BRACKET
<U007E> /x7e TILDE
+ <U007F> /x7f DELETE (DEL)
<U3133> /x84/x44 HANGUL LETTER KIYEOK-SIOS
<U3135> /x84/x46 HANGUL LETTER NIEUN-CIEUC
<U3136> /x84/x47 HANGUL LETTER NIEUN-HIEUH
*** glibc-20000914/iconvdata/johab.c.bak Tue Aug 1 13:52:59 2000
--- glibc-20000914/iconvdata/johab.c Sat Sep 23 19:08:36 2000
***************
*** 68,75 ****
static const uint32_t final_to_ucs[31] =
{
L'\0', L'\0', 0x3133, L'\0', 0x3135, 0x3136, L'\0', L'\0',
! 0x313a, 0x313b, 0x314c, 0x313d, 0x313e, 0x313f,
! 0x3140, L'\0', L'\0', L'\0', 0x3144, L'\0', L'\0', L'\0',
L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'
};
--- 68,75 ----
static const uint32_t final_to_ucs[31] =
{
L'\0', L'\0', 0x3133, L'\0', 0x3135, 0x3136, L'\0', L'\0',
! 0x313a, 0x313b, 0x313c, 0x313d, 0x313e, 0x313f,
! 0x3140, L'\0', L'\0', 0x3144, L'\0', L'\0', L'\0', L'\0',
L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'
};
***************
*** 120,130 ****
0x9041,
0x8446, 0x8447,
0x9441, 0x9841, 0x9c41,
! 0x844a, 0x844b, 0x844c, 0x844d, 0x884e, 0x884f, 0x8450,
0xa041, 0xa441, 0xa841,
0x8454,
0xac41, 0xb041, 0xb441, 0xb841, 0xbc41,
! 0xc041, 0xc441, 0xc841, 0xca41, 0xd041,
0x8461, 0x8481, 0x84a1, 0x84c1, 0x84e1,
0x8541, 0x8561, 0x8581, 0x85a1, 0x85c1, 0x85e1,
0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
--- 120,130 ----
0x9041,
0x8446, 0x8447,
0x9441, 0x9841, 0x9c41,
! 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f, 0x8450,
0xa041, 0xa441, 0xa841,
0x8454,
0xac41, 0xb041, 0xb441, 0xb841, 0xbc41,
! 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041,
0x8461, 0x8481, 0x84a1, 0x84c1, 0x84e1,
0x8541, 0x8561, 0x8581, 0x85a1, 0x85c1, 0x85e1,
0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
***************
*** 162,176 ****
{ \
uint32_t ch = *inptr; \
\
! /* half-width Korean Currency WON sign \
! if (ch == 0x5c) \
! ch = 0x20a9; \
! else if (ch < 0x7f) \
! ch = (uint32_t) ch; \
! */ \
! if (ch < 0x7f) \
! /* Plain ASCII. */ \
! ++inptr; \
/* Johab : 1. Hangul \
1st byte : 0x84-0xd3 \
2nd byte : 0x41-0x7e, 0x81-0xfe \
--- 162,174 ----
{ \
uint32_t ch = *inptr; \
\
! if (ch <= 0x7f) \
! { \
! /* Plain ISO646-KR. */ \
! if (ch == 0x5c) \
! ch = 0x20a9; /* half-width Korean Currency WON sign */ \
! ++inptr; \
! } \
/* Johab : 1. Hangul \
1st byte : 0x84-0xd3 \
2nd byte : 0x41-0x7e, 0x81-0xfe \
***************
*** 268,275 ****
if (__builtin_expect (ch2, 0x31) < 0x31 \
|| (__builtin_expect (ch2, 0x7e) > 0x7e && ch2 < 0x91) \
|| __builtin_expect (ch2, 0) == 0xff \
|| (__builtin_expect (ch, 0) == 0xda \
! && ch2 > 0xa0 && ch2 < 0xd4)) \
{ \
/* This is illegal. */ \
if (! ignore_errors_p ()) \
--- 266,275 ----
if (__builtin_expect (ch2, 0x31) < 0x31 \
|| (__builtin_expect (ch2, 0x7e) > 0x7e && ch2 < 0x91) \
|| __builtin_expect (ch2, 0) == 0xff \
+ || (__builtin_expect (ch, 0) == 0xd9 && ch2 > 0xe5) \
|| (__builtin_expect (ch, 0) == 0xda \
! && ch2 > 0xa0 && ch2 < 0xd4) \
! || (__builtin_expect (ch, 0) == 0xde && ch2 > 0xf1)) \
{ \
/* This is illegal. */ \
if (! ignore_errors_p ()) \
***************
*** 346,352 ****
cp = from_ucs4_lat1[ch]; \
*/ \
\
! if (ch < 0x7f) \
*outptr++ = ch; \
else \
{ \
--- 346,352 ----
cp = from_ucs4_lat1[ch]; \
*/ \
\
! if (ch <= 0x7f && ch != 0x5c) \
*outptr++ = ch; \
else \
{ \
***************
*** 410,415 ****
--- 410,417 ----
\
outptr += 2; \
} \
+ else if (ch == 0x20a9) \
+ *outptr++ = 0x5c; \
else \
{ \
size_t written; \
***************
*** 421,427 ****
result = __GCONV_FULL_OUTPUT; \
break; \
} \
! if (__builtin_expect (written, 1) == __UNKNOWN_10646_CHAR) \
{ \
STANDARD_ERR_HANDLER (4); \
} \
--- 423,430 ----
result = __GCONV_FULL_OUTPUT; \
break; \
} \
! if (__builtin_expect (written, 1) == __UNKNOWN_10646_CHAR \
! || (outptr[0] == 0x22 && outptr[1] > 0x65)) \
{ \
STANDARD_ERR_HANDLER (4); \
} \
*** glibc-20000914/iconvdata/tst-tables.sh.bak Thu Sep 14 17:23:16 2000
--- glibc-20000914/iconvdata/tst-tables.sh Sat Sep 23 18:19:47 2000
***************
*** 186,192 ****
SJIS
EUC-KR
CP949
! #JOHAB No charmap exists
BIG5
BIG5HKSCS
EUC-JP
--- 186,192 ----
SJIS
EUC-KR
CP949
! JOHAB
BIG5
BIG5HKSCS
EUC-JP