This is the mail archive of the
newlib@sourceware.org
mailing list for the newlib project.
Re: [PATCH] Read locale settings from environment
On Feb 20 11:14, Corinna Vinschen wrote:
> > There is a problem with this patch. The code path you have made changes
> > to
> > applies when locale is set to "C" or "". In the case of "C" the old code
> > should still be in place (i.e. if !strcmp(locale, "C")). A check is needed
> > for when !strcmp(locale, ""). If you make that fix, it should be fine.
> >
> > Another problem exists with the current code. The return value from
> > LC_ALL should be a concatenation of the various locale settings separated
> > by a special character (e.g. ':'). The LC_ALL category needs to check if
> > that is the form of the input string given and separate them out and call
> > for each category. This way, the original settings can be restored on a
> > subsequent call to setlocale() with the string given back from LC_ALL.
> > This form only applies to LC_ALL and is not valid input for any other
> > category.
>
> There's more broken in setlocale. For instance, if locale is "C" or
> "", the variable locale_name is set to "C". But afterwards, the tests
> are still using locale instead of locale_name. And worse, locale[1]
> is tested, even though locale could be "" at this point. It also
> just occured to me that the current code disallows *any* other setting
> of LC_ALL except for "C" or "". I'll rework the function a bit. Stay
> tuned.
Ok, here's my new setlocale implementation. It fixes the following
problems:
- Make the static locale buffers bigger (16 instead of 12 bytes). The
reason is that the longest currently supported locale, "C-ISO-8859-1",
has a strlen of 12 bytes. Uh oh...
- Fix the potential access of a byte beyond the incoming locale string
in case the locale string is "".
- Don't return the *previous* locale setting of the category, rather
return the *current* locale setting, as per POSIX. Consequentially
remove the last_lc_ctype and last_lc_messages variables.
- Per POSIX allow the required "POSIX" locale. Map it to the "C" locale
as on Linux.
- If locale is "", honor the environment in the order required by POSIX
for all supported categories.
- If category is LC_ALL, return a colon separated list of the current
settings of all supported categories.
- If category is LC_ALL, check if the incoming locale contains a colon.
If so, use the input to set all supported categories accordingly.
Corinna
* libc/locale/locale.c: Fix documentation.
(__lc_ctype): Raise size to 16 bytes.
(_setlocale_r): Allow "POSIX" locale and map to "C" locale.
Raise size of lc_messages to 16 bytes. Add static lc_all
string array. Handle LC_ALL string according to POSIX.
If locale is the empty string, read the locale settings from
the environment using POSIX rules.
Index: libc/locale/locale.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/locale/locale.c,v
retrieving revision 1.8
diff -u -p -r1.8 locale.c
--- libc/locale/locale.c 23 Apr 2004 21:44:21 -0000 1.8
+++ libc/locale/locale.c 20 Feb 2009 12:07:41 -0000
@@ -42,13 +42,13 @@ execution environment for international
information; <<localeconv>> reports on the settings of the current
locale.
-This is a minimal implementation, supporting only the required <<"C">>
-value for <[locale]>; strings representing other locales are not
-honored unless _MB_CAPABLE is defined in which case three new
-extensions are allowed for LC_CTYPE or LC_MESSAGES only: <<"C-JIS">>,
-<<"C-EUCJP">>, <<"C-SJIS">>, or <<"C-ISO-8859-1">>. (<<"">> is
-also accepted; it represents the default locale
-for an implementation, here equivalent to <<"C">>.)
+This is a minimal implementation, supporting only the required <<"POSIX">>
+and <<"C">> values for <[locale]>; strings representing other locales are not
+honored unless _MB_CAPABLE is defined in which case five extensions
+are allowed for LC_ALL, LC_CTYPE or LC_MESSAGES only: <<"C-UTF-8">>,
+<<"C-JIS">>, <<"C-EUCJP">>, <<"C-SJIS">>, or <<"C-ISO-8859-1">>. (<<"">> is
+also accepted; if given, the settings are read from the corresponding
+LC_* environment variables and $LANG.
If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns
a pointer to the string representing the current locale (always
@@ -66,9 +66,11 @@ in effect.
<[reent]> is a pointer to a reentrancy structure.
RETURNS
-<<setlocale>> returns either a pointer to a string naming the locale
-currently in effect (always <<"C">> for this implementation, or, if
-the locale request cannot be honored, <<NULL>>.
+A successful call to <<setlocale>> returns a pointer to a string
+naming the locale currently in effect. The string returned by
+<<setlocale>> is such that a subsequent call using that string will
+restore that category (or all categories in case of LC_ALL), to that
+state. On error, <<setlocale>> returns <<NULL>>.
<<localeconv>> returns a pointer to a structure of type <<lconv>>,
which describes the formatting and collating conventions in effect (in
@@ -91,6 +93,7 @@ No supporting OS subroutines are require
#include <string.h>
#include <limits.h>
#include <reent.h>
+#include <stdlib.h>
#ifdef __CYGWIN__
int __declspec(dllexport) __mb_cur_max = 1;
@@ -113,7 +116,7 @@ static _CONST struct lconv lconv =
char * _EXFUN(__locale_charset,(_VOID));
static char *charset = "ISO-8859-1";
-char __lc_ctype[12] = "C";
+char __lc_ctype[16] = "C";
char *
_DEFUN(_setlocale_r, (p, category, locale),
@@ -124,33 +127,57 @@ _DEFUN(_setlocale_r, (p, category, local
#ifndef _MB_CAPABLE
if (locale)
{
- if (strcmp (locale, "C") && strcmp (locale, ""))
- return 0;
+ if (strcmp (locale, "POSIX") && strcmp (locale, "C")
+ && strcmp (locale, ""))
+ return NULL;
p->_current_category = category;
p->_current_locale = locale;
}
return "C";
#else
- static char last_lc_ctype[12] = "C";
- static char lc_messages[12] = "C";
- static char last_lc_messages[12] = "C";
+ static char lc_messages[16] = "C";
+ static char lc_all[32] = "C:C";
if (locale)
{
char *locale_name = (char *)locale;
if (category != LC_CTYPE && category != LC_MESSAGES)
- {
- if (strcmp (locale, "C") && strcmp (locale, ""))
- return 0;
- if (category == LC_ALL)
- {
- strcpy (last_lc_ctype, __lc_ctype);
- strcpy (__lc_ctype, "C");
- strcpy (last_lc_messages, lc_messages);
- strcpy (lc_messages, "C");
- __mb_cur_max = 1;
- }
- }
+ {
+ if (category != LC_ALL)
+ {
+ if (strcmp (locale, "POSIX") && strcmp (locale, "C")
+ && strcmp (locale, ""))
+ return NULL;
+ }
+ else
+ {
+ char *colon, *ret;
+ if ((colon = strchr (locale_name, ':')))
+ {
+ /* Too long, probably invalid anyway. */
+ if (strlen (locale_name) > 31)
+ return NULL;
+ /* Use lc_all as temporary storage, if locale
+ isn't a pointer to lc_all anyway. */
+ if (locale_name != lc_all)
+ strcpy (lc_all, locale_name);
+ colon = strchr (lc_all, ':');
+ *colon++ = '\0';
+ ret = _setlocale_r (p, LC_CTYPE, lc_all);
+ if (ret)
+ _setlocale_r (p, LC_MESSAGES, colon);
+ }
+ else
+ {
+ ret = _setlocale_r (p, LC_CTYPE, locale_name);
+ if (ret)
+ _setlocale_r (p, LC_MESSAGES, locale_name);
+ }
+ stpcpy (stpcpy (stpcpy (lc_all, __lc_ctype), ":"),
+ lc_messages);
+ return lc_all;
+ }
+ }
else
{
if (locale[0] == 'C' && locale[1] == '-')
@@ -181,22 +208,36 @@ _DEFUN(_setlocale_r, (p, category, local
return 0;
}
}
- else
- {
- if (strcmp (locale, "C") && strcmp (locale, ""))
- return 0;
- locale_name = "C"; /* C is always the default locale */
- }
-
+ else if (!locale[0])
+ {
+ /* Per POSIX always check LC_ALL first, then the actual
+ locale category, then LANG. */
+ if ((locale_name = _getenv_r (p, "LC_ALL")))
+ ;
+ else if (category == LC_CTYPE
+ && (locale_name = _getenv_r (p, "LC_CTYPE")))
+ ;
+ else if (category == LC_MESSAGES
+ && (locale_name = _getenv_r (p, "LC_MESSAGES")))
+ ;
+ else if ((locale_name = _getenv_r (p, "LANG"))
+ && (locale_name = strchr (locale_name, '.')))
+ ;
+ else
+ locale_name = "C";
+ }
+ else if (!strcmp (locale, "POSIX"))
+ locale_name = "C";
+ else if (strcmp (locale, "C"))
+ return 0;
if (category == LC_CTYPE)
{
- strcpy (last_lc_ctype, __lc_ctype);
strcpy (__lc_ctype, locale_name);
__mb_cur_max = 1;
- if (locale[1] == '-')
+ if (locale_name[1] == '-')
{
- switch (locale[2])
+ switch (locale_name[2])
{
case 'U':
__mb_cur_max = 6;
@@ -218,13 +259,12 @@ _DEFUN(_setlocale_r, (p, category, local
}
else
{
- strcpy (last_lc_messages, lc_messages);
strcpy (lc_messages, locale_name);
charset = "ISO-8859-1";
- if (locale[1] == '-')
+ if (locale_name[1] == '-')
{
- switch (locale[2])
+ switch (locale_name[2])
{
case 'U':
charset = "UTF-8";
@@ -248,12 +288,12 @@ _DEFUN(_setlocale_r, (p, category, local
}
}
p->_current_category = category;
- p->_current_locale = locale;
+ p->_current_locale = locale_name;
if (category == LC_CTYPE)
- return last_lc_ctype;
+ return __lc_ctype;
else if (category == LC_MESSAGES)
- return last_lc_messages;
+ return lc_messages;
}
else
{
--
Corinna Vinschen
Cygwin Project Co-Leader
Red Hat