This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
Re: making the wide character properties Unicode compatible
- To: libc-alpha at sources dot redhat dot com
- Subject: Re: making the wide character properties Unicode compatible
- From: Bruno Haible <haible at ilog dot fr>
- Date: Sun, 1 Oct 2000 14:45:09 +0200 (CEST)
- References: <14799.18936.302663.551703@honolulu.ilog.fr><m3vgvjixo5.fsf@otr.mynet.cygnus.com>
Ulrich Drepper writes:
> > - xdigit: is lacking all the foreign digit characters, but SUSV2 says
> > "The definition of character class xdigit requires that the characters
> > included in character class digit be included here also."
>
> I challenge this one. It does not make any sense since then you would
> also have to add all forms of the letters. This is simply unreasonable.
Actually, ISO C 99 *mandates* that the 'xdigit' class contains only ASCII
characters. And also the 'digit' class! Therefore here is a small adjustment
for the i18n file, as well as keeping the gen-unicode-ctype.c consistent
with it (and follow the GNU coding style).
2000-09-30 Bruno Haible <haible@clisp.cons.org>
* gen-unicode-ctype.c (is_digit, is_xdigit): Remove all non-ASCII
digits.
(is_alpha): Add them here.
* locales/i18n (digit): Remove all non-ASCII digits.
(alpha): Add them here.
*** glibc-20000928/localedata/gen-unicode-ctype.c.bak Tue Sep 26 01:50:03 2000
--- glibc-20000928/localedata/gen-unicode-ctype.c Fri Sep 29 01:23:19 2000
***************
*** 166,186 ****
int n;
lineno++;
! n = getfield(stream, field0, ';');
! n += getfield(stream, field1, ';');
! n += getfield(stream, field2, ';');
! n += getfield(stream, field3, ';');
! n += getfield(stream, field4, ';');
! n += getfield(stream, field5, ';');
! n += getfield(stream, field6, ';');
! n += getfield(stream, field7, ';');
! n += getfield(stream, field8, ';');
! n += getfield(stream, field9, ';');
! n += getfield(stream, field10, ';');
! n += getfield(stream, field11, ';');
! n += getfield(stream, field12, ';');
! n += getfield(stream, field13, ';');
! n += getfield(stream, field14, '\n');
if (n == 0)
break;
if (n != 15)
--- 166,186 ----
int n;
lineno++;
! n = getfield (stream, field0, ';');
! n += getfield (stream, field1, ';');
! n += getfield (stream, field2, ';');
! n += getfield (stream, field3, ';');
! n += getfield (stream, field4, ';');
! n += getfield (stream, field5, ';');
! n += getfield (stream, field6, ';');
! n += getfield (stream, field7, ';');
! n += getfield (stream, field8, ';');
! n += getfield (stream, field9, ';');
! n += getfield (stream, field10, ';');
! n += getfield (stream, field11, ';');
! n += getfield (stream, field12, ';');
! n += getfield (stream, field13, ';');
! n += getfield (stream, field14, '\n');
if (n == 0)
break;
if (n != 15)
***************
*** 196,216 ****
{
/* Deal with a range. */
lineno++;
! n = getfield(stream, field0, ';');
! n += getfield(stream, field1, ';');
! n += getfield(stream, field2, ';');
! n += getfield(stream, field3, ';');
! n += getfield(stream, field4, ';');
! n += getfield(stream, field5, ';');
! n += getfield(stream, field6, ';');
! n += getfield(stream, field7, ';');
! n += getfield(stream, field8, ';');
! n += getfield(stream, field9, ';');
! n += getfield(stream, field10, ';');
! n += getfield(stream, field11, ';');
! n += getfield(stream, field12, ';');
! n += getfield(stream, field13, ';');
! n += getfield(stream, field14, '\n');
if (n != 15)
{
fprintf (stderr, "missing end range in '%s':%d\n",
--- 196,216 ----
{
/* Deal with a range. */
lineno++;
! n = getfield (stream, field0, ';');
! n += getfield (stream, field1, ';');
! n += getfield (stream, field2, ';');
! n += getfield (stream, field3, ';');
! n += getfield (stream, field4, ';');
! n += getfield (stream, field5, ';');
! n += getfield (stream, field6, ';');
! n += getfield (stream, field7, ';');
! n += getfield (stream, field8, ';');
! n += getfield (stream, field9, ';');
! n += getfield (stream, field10, ';');
! n += getfield (stream, field11, ';');
! n += getfield (stream, field12, ';');
! n += getfield (stream, field13, ';');
! n += getfield (stream, field14, '\n');
if (n != 15)
{
fprintf (stderr, "missing end range in '%s':%d\n",
***************
*** 390,406 ****
|| (unicode_attributes[ch].category[0] == 'S'
&& unicode_attributes[ch].category[1] == 'o'
&& strstr (unicode_attributes[ch].name, " LETTER ")
! != NULL)));
}
static bool
is_digit (unsigned int ch)
{
return (unicode_attributes[ch].name != NULL
&& unicode_attributes[ch].category[0] == 'N'
&& unicode_attributes[ch].category[1] == 'd');
/* Note: U+0BE7..U+0BEF and U+1369..U+1371 are digit systems without
a zero. Must add <0> in front of them by hand. */
}
static bool
--- 390,424 ----
|| (unicode_attributes[ch].category[0] == 'S'
&& unicode_attributes[ch].category[1] == 'o'
&& strstr (unicode_attributes[ch].name, " LETTER ")
! != NULL)
! /* Consider all the non-ASCII digits as alphabetic.
! ISO C 99 forbids us to have them in category "digit",
! but we want iswalnum to return true on them. */
! || (unicode_attributes[ch].category[0] == 'N'
! && unicode_attributes[ch].category[1] == 'd'
! && !(ch >= 0x0030 && ch <= 0x0039))));
}
static bool
is_digit (unsigned int ch)
{
+ #if 0
return (unicode_attributes[ch].name != NULL
&& unicode_attributes[ch].category[0] == 'N'
&& unicode_attributes[ch].category[1] == 'd');
/* Note: U+0BE7..U+0BEF and U+1369..U+1371 are digit systems without
a zero. Must add <0> in front of them by hand. */
+ #else
+ /* SUSV2 gives us some freedom for the "digit" category, but ISO C 99
+ takes it away:
+ 7.25.2.1.5:
+ The iswdigit function tests for any wide character that corresponds
+ to a decimal-digit character (as defined in 5.2.1).
+ 5.2.1:
+ the 10 decimal digits 0 1 2 3 4 5 6 7 8 9
+ */
+ return (ch >= 0x0030 && ch <= 0x0039);
+ #endif
}
static bool
***************
*** 455,463 ****
--- 473,495 ----
static bool
is_xdigit (unsigned int ch)
{
+ #if 0
return is_digit (ch)
|| (ch >= 0x0041 && ch <= 0x0046)
|| (ch >= 0x0061 && ch <= 0x0066);
+ #else
+ /* SUSV2 gives us some freedom for the "xdigit" category, but ISO C 99
+ takes it away:
+ 7.25.2.1.12:
+ The iswxdigit function tests for any wide character that corresponds
+ to a hexadecimal-digit character (as defined in 6.4.4.1).
+ 6.4.4.1:
+ hexadecimal-digit: one of 0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F
+ */
+ return (ch >= 0x0030 && ch <= 0x0039)
+ || (ch >= 0x0041 && ch <= 0x0046)
+ || (ch >= 0x0061 && ch <= 0x0066);
+ #endif
}
static bool
*** glibc-20000928/localedata/dump-ctype.c.bak Tue Sep 26 01:50:48 2000
--- glibc-20000928/localedata/dump-ctype.c Fri Sep 29 01:22:32 2000
***************
*** 113,119 ****
}
}
! int main (int argc, char *argv[])
{
size_t i;
--- 113,120 ----
}
}
! int
! main (int argc, char *argv[])
{
size_t i;
*** glibc-20000928/localedata/locales/i18n.bak Tue Sep 26 14:41:43 2000
--- glibc-20000928/localedata/locales/i18n Sat Sep 30 02:36:34 2000
***************
*** 305,317 ****
<UFE70>..<UFE72>;<UFE74>;<UFE76>..<UFEFC>;/
% HALFWIDTH AND FULLWIDTH FORMS/
<UFF21>..<UFF3A>;<UFF41>..<UFF5A>;<UFF66>..<UFFBE>;<UFFC2>..<UFFC7>;/
! <UFFCA>..<UFFCF>;<UFFD2>..<UFFD7>;<UFFDA>..<UFFDC>
!
! % The "digit" class of the "i18n" FDCC-set is reflecting
! % the recommendations in TR 10176 annex A
! digit /
! % TABLE 1 BASIC LATIN/
! <U0030>..<U0039>;/
% TABLE 15 and 16 ARABIC/
<U0660>..<U0669>;<U06F0>..<U06F9>;/
% TABLE 17 DEVANAGARI/
--- 305,314 ----
<UFE70>..<UFE72>;<UFE74>;<UFE76>..<UFEFC>;/
% HALFWIDTH AND FULLWIDTH FORMS/
<UFF21>..<UFF3A>;<UFF41>..<UFF5A>;<UFF66>..<UFFBE>;<UFFC2>..<UFFC7>;/
! <UFFCA>..<UFFCF>;<UFFD2>..<UFFD7>;<UFFDA>..<UFFDC>;/
! % The non-ASCII number characters are included here because ISO C 99 /
! % forbids us to classify them as digits; however, they behave more like /
! % alphanumeric than like punctuation. /
% TABLE 15 and 16 ARABIC/
<U0660>..<U0669>;<U06F0>..<U06F9>;/
% TABLE 17 DEVANAGARI/
***************
*** 349,354 ****
--- 346,356 ----
% HALFWIDTH AND FULLWIDTH FORMS/
<UFF10>..<UFF19>
+ % The "digit" class must only contain the BASIC LATIN digits, says ISO C 99
+ % (sections 7.25.2.1.5 and 5.2.1).
+ digit /
+ <U0030>..<U0039>
+
outdigit <U0030>..<U0039>
space /
***************
*** 602,607 ****
--- 604,611 ----
<UFFD2>..<UFFD7>;<UFFDA>..<UFFDC>;<UFFE0>..<UFFE6>;<UFFE8>..<UFFEE>;/
<UFFF9>..<UFFFD>
+ % The "xdigit" class must only contain the BASIC LATIN digits and A-F, a-f,
+ % says ISO C 99 (sections 7.25.2.1.12 and 6.4.4.1).
xdigit /
<U0030>..<U0039>;<U0041>..<U0046>;<U0061>..<U0066>