This is the mail archive of the newlib@sourceware.org mailing list for the newlib project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[PATCH] setlocale: New implementation

From: Corinna Vinschen <vinschen at redhat dot com>
To: newlib at sourceware dot org
Date: Fri, 27 Feb 2009 11:47:28 +0100
Subject: [PATCH] setlocale: New implementation
Reply-to: newlib at sourceware dot org
Hi,

as promised in http://sourceware.org/ml/newlib/2009/msg00194.html here's
my new setlocale implementation.  It's now based on the FreeBSD
implementation.

The handling of the locale string is encapsulated in its own function
called loadlocale.  Originally this function is the one which actually
loads the locale information from the locale-specific file.
This adaption for newlib only checks the incoming string for correctness
and stores the information.  External functions formerly accessing
__lc_ctype are changed to use the function call __locale_charset.  I
renamed the former __locale_charset function to __locale_msgcharset.
This seemed more logical to me.  This implementation of loadlocale now
accepts all incoming locale strings of the style

  "C" or "POSIX",

  language[_territory][.charset][@modifier]
    with language being a two lowercase ASCII letters,
    territory being two uppercase ASCII letters,
    charset being one of UTF-8, JIS, SJIS, EUCJP, ISO-8859-[1..15]

  "C-UTF-8", "C-JIS", "C-SJIS", "C-EUCJP", "C-ISO-8859-[1..15]"
    for backward compatibility.

So far only the charset information is used in other parts of newlib.

The idea to use the FreeBSD function is that subsequent coding allows to
support real locale information from locale files by just changing the
loadlocale() function on a per target base.  I'm planning to do that for
Cygwin in the long run to support real locales.  Help would be greatly
appreciated.


Corinna


	* libc/locale/locale.c (_setlocale_r): New implementation
	based on FreeBSD's setlocale.
	(currentlocale): New helper function.
	(loadlocale): Ditto.
	(__locale_charset): New function.
	(__locale_msgcharset): Rename from __locale_charset.
	* libc/ctype/local.h (__lc_ctype): Remove declaration.
	(__locale_charset): Declare.
	* libc/ctype/iswalpha.c (iswalpha): Call __locale_charset instead
	of using __lc_ctype directly.  Only compare against the charset
	alone.
	* libc/ctype/iswblank.c (iswblank): Ditto.
	* libc/ctype/iswcntrl.c (iswcntrl): Ditto.
	* libc/ctype/iswprint.c (iswprint): Ditto.
	* libc/ctype/iswpunct.c (iswpunct): Ditto.
	* libc/ctype/iswspace.c (iswspace): Ditto.
	* libc/ctype/towlower.c (towlower): Ditto.
	* libc/ctype/towupper.c (towupper): Ditto.
	* libc/stdlib/mbtowc_r.c (_mbtowc_r): Ditto.
	* libc/stdlib/wctomb_r.c (_wctomb_r): Ditto.
	* libc/sys/linux/intl/loadmsgcat.c (_nl_init_domain_conv): Call
	__locale_msgcharset instead of __locale_charset.


Index: libc/ctype/iswalpha.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/iswalpha.c,v
retrieving revision 1.4
diff -u -p -r1.4 iswalpha.c
--- libc/ctype/iswalpha.c	28 Oct 2005 21:33:22 -0000	1.4
+++ libc/ctype/iswalpha.c	27 Feb 2009 10:40:19 -0000
@@ -69,29 +69,25 @@ No supporting OS subroutines are require
 int
 _DEFUN(iswalpha,(c), wint_t c)
 {
-  int unicode = 0;
-  if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
-    {
-      unicode = 0;
-      /* fall-through */ 
-    }
 #ifdef _MB_CAPABLE
-  else if (!strcmp (__lc_ctype, "C-JIS"))
+  int unicode = 0;
+
+  if (!strcmp (__locale_charset (), "JIS"))
     {
       c = __jp2uc (c, JP_JIS);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-SJIS"))
+  else if (!strcmp (__locale_charset (), "SJIS"))
     {
       c = __jp2uc (c, JP_SJIS);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-EUCJP"))
+  else if (!strcmp (__locale_charset (), "EUCJP"))
     {
       c = __jp2uc (c, JP_EUCJP);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-UTF-8"))
+  else if (!strcmp (__locale_charset (), "UTF-8"))
     {
       unicode = 1;
     }
Index: libc/ctype/iswblank.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/iswblank.c,v
retrieving revision 1.4
diff -u -p -r1.4 iswblank.c
--- libc/ctype/iswblank.c	28 Oct 2005 21:33:22 -0000	1.4
+++ libc/ctype/iswblank.c	27 Feb 2009 10:40:19 -0000
@@ -65,29 +65,25 @@ No supporting OS subroutines are require
 int
 _DEFUN(iswblank,(c), wint_t c)
 {
-  int unicode = 0;
-  if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
-    {
-      unicode = 0;
-      /* fall-through */ 
-    }
 #ifdef _MB_CAPABLE
-  else if (!strcmp (__lc_ctype, "C-JIS"))
+  int unicode = 0;
+
+  if (!strcmp (__locale_charset (), "JIS"))
     {
       c = __jp2uc (c, JP_JIS);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-SJIS"))
+  else if (!strcmp (__locale_charset (), "SJIS"))
     {
       c = __jp2uc (c, JP_SJIS);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-EUCJP"))
+  else if (!strcmp (__locale_charset (), "EUCJP"))
     {
       c = __jp2uc (c, JP_EUCJP);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-UTF-8"))
+  else if (!strcmp (__locale_charset (), "UTF-8"))
     {
       unicode = 1;
     }
Index: libc/ctype/iswcntrl.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/iswcntrl.c,v
retrieving revision 1.4
diff -u -p -r1.4 iswcntrl.c
--- libc/ctype/iswcntrl.c	28 Oct 2005 21:33:22 -0000	1.4
+++ libc/ctype/iswcntrl.c	27 Feb 2009 10:40:19 -0000
@@ -65,29 +65,25 @@ No supporting OS subroutines are require
 int
 _DEFUN(iswcntrl,(c), wint_t c)
 {
-  int unicode = 0;
-  if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
-    {
-      unicode = 0;
-      /* fall-through */ 
-    }
 #ifdef _MB_CAPABLE
-  else if (!strcmp (__lc_ctype, "C-JIS"))
+  int unicode = 0;
+
+  if (!strcmp (__locale_charset (), "JIS"))
     {
       c = __jp2uc (c, JP_JIS);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-SJIS"))
+  else if (!strcmp (__locale_charset (), "SJIS"))
     {
       c = __jp2uc (c, JP_SJIS);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-EUCJP"))
+  else if (!strcmp (__locale_charset (), "EUCJP"))
     {
       c = __jp2uc (c, JP_EUCJP);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-UTF-8"))
+  else if (!strcmp (__locale_charset (), "UTF-8"))
     {
       unicode = 1;
     }
Index: libc/ctype/iswprint.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/iswprint.c,v
retrieving revision 1.4
diff -u -p -r1.4 iswprint.c
--- libc/ctype/iswprint.c	28 Oct 2005 21:33:22 -0000	1.4
+++ libc/ctype/iswprint.c	27 Feb 2009 10:40:19 -0000
@@ -69,29 +69,25 @@ No supporting OS subroutines are require
 int
 _DEFUN(iswprint,(c), wint_t c)
 {
-  int unicode = 0;
-  if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
-    {
-      unicode = 0;
-      /* fall-through */ 
-    }
 #ifdef _MB_CAPABLE
-  else if (!strcmp (__lc_ctype, "C-JIS"))
+  int unicode = 0;
+
+  if (!strcmp (__locale_charset (), "JIS"))
     {
       c = __jp2uc (c, JP_JIS);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-SJIS"))
+  else if (!strcmp (__locale_charset (), "SJIS"))
     {
       c = __jp2uc (c, JP_SJIS);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-EUCJP"))
+  else if (!strcmp (__locale_charset (), "EUCJP"))
     {
       c = __jp2uc (c, JP_EUCJP);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-UTF-8"))
+  else if (!strcmp (__locale_charset (), "UTF-8"))
     {
       unicode = 1;
     }
Index: libc/ctype/iswpunct.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/iswpunct.c,v
retrieving revision 1.4
diff -u -p -r1.4 iswpunct.c
--- libc/ctype/iswpunct.c	28 Oct 2005 21:33:22 -0000	1.4
+++ libc/ctype/iswpunct.c	27 Feb 2009 10:40:19 -0000
@@ -69,29 +69,25 @@ No supporting OS subroutines are require
 int
 _DEFUN(iswpunct,(c), wint_t c)
 {
-  int unicode = 0;
-  if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
-    {
-      unicode = 0;
-      /* fall-through */ 
-    }
 #ifdef _MB_CAPABLE
-  else if (!strcmp (__lc_ctype, "C-JIS"))
+  int unicode = 0;
+
+  if (!strcmp (__locale_charset (), "JIS"))
     {
       c = __jp2uc (c, JP_JIS);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-SJIS"))
+  else if (!strcmp (__locale_charset (), "SJIS"))
     {
       c = __jp2uc (c, JP_SJIS);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-EUCJP"))
+  else if (!strcmp (__locale_charset (), "EUCJP"))
     {
       c = __jp2uc (c, JP_EUCJP);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-UTF-8"))
+  else if (!strcmp (__locale_charset (), "UTF-8"))
     {
       unicode = 1;
     }
Index: libc/ctype/iswspace.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/iswspace.c,v
retrieving revision 1.4
diff -u -p -r1.4 iswspace.c
--- libc/ctype/iswspace.c	28 Oct 2005 21:33:22 -0000	1.4
+++ libc/ctype/iswspace.c	27 Feb 2009 10:40:19 -0000
@@ -65,29 +65,25 @@ No supporting OS subroutines are require
 int
 _DEFUN(iswspace,(c), wint_t c)
 {
-  int unicode = 0;
-  if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
-    {
-      unicode = 0;
-      /* fall-through */ 
-    }
 #ifdef _MB_CAPABLE
-  else if (!strcmp (__lc_ctype, "C-JIS"))
+  int unicode = 0;
+
+  if (!strcmp (__locale_charset (), "JIS"))
     {
       c = __jp2uc (c, JP_JIS);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-SJIS"))
+  else if (!strcmp (__locale_charset (), "SJIS"))
     {
       c = __jp2uc (c, JP_SJIS);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-EUCJP"))
+  else if (!strcmp (__locale_charset (), "EUCJP"))
     {
       c = __jp2uc (c, JP_EUCJP);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-UTF-8"))
+  else if (!strcmp (__locale_charset (), "UTF-8"))
     {
       unicode = 1;
     }
Index: libc/ctype/local.h
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/local.h,v
retrieving revision 1.1
diff -u -p -r1.1 local.h
--- libc/ctype/local.h	20 Sep 2002 20:13:10 -0000	1.1
+++ libc/ctype/local.h	27 Feb 2009 10:40:19 -0000
@@ -20,7 +20,7 @@
 #define WC_UPPER	11
 #define WC_XDIGIT	12
 
-extern char __lc_ctype[12];
+extern char *__locale_charset ();
 
 /* Japanese encoding types supported */
 #define JP_JIS		1
Index: libc/ctype/towlower.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/towlower.c,v
retrieving revision 1.4
diff -u -p -r1.4 towlower.c
--- libc/ctype/towlower.c	28 Oct 2005 21:33:22 -0000	1.4
+++ libc/ctype/towlower.c	27 Feb 2009 10:40:19 -0000
@@ -69,30 +69,25 @@ No supporting OS subroutines are require
 wint_t
 _DEFUN(towlower,(c), wint_t c)
 {
+#ifdef _MB_CAPABLE
   int unicode = 0;
 
-  if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
-    {
-      unicode = 0;
-      /* fall-through */ 
-    }
-#ifdef _MB_CAPABLE
-  else if (!strcmp (__lc_ctype, "C-JIS"))
+  if (!strcmp (__locale_charset (), "JIS"))
     {
       c = __jp2uc (c, JP_JIS);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-SJIS"))
+  else if (!strcmp (__locale_charset (), "SJIS"))
     {
       c = __jp2uc (c, JP_SJIS);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-EUCJP"))
+  else if (!strcmp (__locale_charset (), "EUCJP"))
     {
       c = __jp2uc (c, JP_EUCJP);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-UTF-8"))
+  else if (!strcmp (__locale_charset (), "UTF-8"))
     {
       unicode = 1;
     }
Index: libc/ctype/towupper.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/ctype/towupper.c,v
retrieving revision 1.4
diff -u -p -r1.4 towupper.c
--- libc/ctype/towupper.c	28 Oct 2005 21:33:22 -0000	1.4
+++ libc/ctype/towupper.c	27 Feb 2009 10:40:19 -0000
@@ -69,30 +69,25 @@ No supporting OS subroutines are require
 wint_t
 _DEFUN(towupper,(c), wint_t c)
 {
+#ifdef _MB_CAPABLE
   int unicode = 0;
 
-  if (__lc_ctype[0] == 'C' && __lc_ctype[1] == '\0')
-    {
-      unicode = 0;
-      /* fall-through */ 
-    }
-#ifdef _MB_CAPABLE
-  else if (!strcmp (__lc_ctype, "C-JIS"))
+  if (!strcmp (__locale_charset (), "JIS"))
     {
       c = __jp2uc (c, JP_JIS);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-SJIS"))
+  else if (!strcmp (__locale_charset (), "SJIS"))
     {
       c = __jp2uc (c, JP_SJIS);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-EUCJP"))
+  else if (!strcmp (__locale_charset (), "EUCJP"))
     {
       c = __jp2uc (c, JP_EUCJP);
       unicode = 1;
     }
-  else if (!strcmp (__lc_ctype, "C-UTF-8"))
+  else if (!strcmp (__locale_charset (), "UTF-8"))
     {
       unicode = 1;
     }
Index: libc/locale/locale.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/locale/locale.c,v
retrieving revision 1.8
diff -u -p -r1.8 locale.c
--- libc/locale/locale.c	23 Apr 2004 21:44:21 -0000	1.8
+++ libc/locale/locale.c	27 Feb 2009 10:40:19 -0000
@@ -42,13 +42,16 @@ execution environment for international 
 information; <<localeconv>> reports on the settings of the current
 locale.
 
-This is a minimal implementation, supporting only the required <<"C">>
-value for <[locale]>; strings representing other locales are not
-honored unless _MB_CAPABLE is defined in which case three new
-extensions are allowed for LC_CTYPE or LC_MESSAGES only: <<"C-JIS">>, 
-<<"C-EUCJP">>, <<"C-SJIS">>, or <<"C-ISO-8859-1">>.  (<<"">> is 
-also accepted; it represents the default locale
-for an implementation, here equivalent to <<"C">>.)
+This is a minimal implementation, supporting only the required <<"POSIX">>
+and <<"C">> values for <[locale]>; strings representing other locales are not
+honored unless _MB_CAPABLE is defined in which case POSIX locale strings
+are allowed, plus five extensions supported for backward compatibility with
+older implementations using newlib: <<"C-UTF-8">>, <<"C-JIS">>, <<"C-EUCJP">>,
+<<"C-SJIS">>, or <<"C-ISO-8859-x">> with 1 <= x <= 15.  Even when using
+POSIX locale strings, the only charsets allowed are <<"UTF-8">>, <<"JIS">>,
+<<"EUCJP">>, <<"SJIS">>, or <<"ISO-8859-x">> with 1 <= x <= 15.  (<<"">> is 
+also accepted; if given, the settings are read from the corresponding
+LC_* environment variables and $LANG according to POSIX rules.
 
 If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns
 a pointer to the string representing the current locale (always
@@ -66,9 +69,13 @@ in effect.  
 <[reent]> is a pointer to a reentrancy structure.
 
 RETURNS
-<<setlocale>> returns either a pointer to a string naming the locale
-currently in effect (always <<"C">> for this implementation, or, if
-the locale request cannot be honored, <<NULL>>.
+A successful call to <<setlocale>> returns a pointer to a string
+associated with the specified category for the new locale.  The string
+returned by <<setlocale>> is such that a subsequent call using that
+string will restore that category (or all categories in case of LC_ALL),
+to that state.  The application shall not modify the string returned
+which may be overwritten by a subsequent call to <<setlocale>>.
+On error, <<setlocale>> returns <<NULL>>.
 
 <<localeconv>> returns a pointer to a structure of type <<lconv>>,
 which describes the formatting and collating conventions in effect (in
@@ -81,16 +88,50 @@ implementations is the C locale.
 No supporting OS subroutines are required.
 */
 
+/* Parts of this code are originally taken from FreeBSD. */
 /*
- * setlocale, localeconv : internationalize your locale.
- *                         (Only "C" or null supported).
+ * Copyright (c) 1996 - 2002 FreeBSD Project
+ * Copyright (c) 1991, 1993
+ *      The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Paul Borman at Krystal Technologies.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
  */
 
 #include <newlib.h>
+#include <errno.h>
 #include <locale.h>
 #include <string.h>
 #include <limits.h>
 #include <reent.h>
+#include <stdlib.h>
+
+#define _LC_LAST      7
+#define ENCODING_LEN 31
 
 #ifdef __CYGWIN__
 int __declspec(dllexport) __mb_cur_max = 1;
@@ -109,11 +150,48 @@ static _CONST struct lconv lconv = 
   CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
 };
 
+#ifdef _MB_CAPABLE
+/*
+ * Category names for getenv()
+ */
+static char *categories[_LC_LAST] = {
+  "LC_ALL",
+  "LC_COLLATE",
+  "LC_CTYPE",
+  "LC_MONETARY",
+  "LC_NUMERIC",
+  "LC_TIME",
+  "LC_MESSAGES",
+};
 
-char * _EXFUN(__locale_charset,(_VOID));
+/*
+ * Current locales for each category
+ */
+static char current_categories[_LC_LAST][ENCODING_LEN + 1] = {
+    "C",
+    "C",
+    "C",
+    "C",
+    "C",
+    "C",
+    "C",
+};
 
-static char *charset = "ISO-8859-1";
-char __lc_ctype[12] = "C";
+/*
+ * The locales we are going to try and load
+ */
+static char new_categories[_LC_LAST][ENCODING_LEN + 1];
+static char saved_categories[_LC_LAST][ENCODING_LEN + 1];
+
+static char current_locale_string[_LC_LAST * (ENCODING_LEN + 1/*"/"*/ + 1)];
+static char *currentlocale(void);
+static char *loadlocale(struct _reent *, int);
+static const char *__get_locale_env(struct _reent *, int);
+
+#endif
+
+static char lc_ctype_charset[ENCODING_LEN + 1] = "ISO-8859-1";
+static char lc_message_charset[ENCODING_LEN + 1] = "ISO-8859-1";
 
 char *
 _DEFUN(_setlocale_r, (p, category, locale),
@@ -124,154 +202,303 @@ _DEFUN(_setlocale_r, (p, category, local
 #ifndef _MB_CAPABLE
   if (locale)
     { 
-      if (strcmp (locale, "C") && strcmp (locale, ""))
-        return 0;
+      if (strcmp (locale, "POSIX") && strcmp (locale, "C")
+	  && strcmp (locale, ""))
+        return NULL;
       p->_current_category = category;  
       p->_current_locale = locale;
     }
   return "C";
 #else
-  static char last_lc_ctype[12] = "C";
-  static char lc_messages[12] = "C";
-  static char last_lc_messages[12] = "C";
+  int i, j, len, saverr;
+  const char *env, *r;
 
-  if (locale)
+  if (category < LC_ALL || category >= _LC_LAST)
     {
-      char *locale_name = (char *)locale;
-      if (category != LC_CTYPE && category != LC_MESSAGES) 
-        { 
-          if (strcmp (locale, "C") && strcmp (locale, ""))
-            return 0;
-          if (category == LC_ALL)
-            {
-              strcpy (last_lc_ctype, __lc_ctype);
-              strcpy (__lc_ctype, "C");
-              strcpy (last_lc_messages, lc_messages);
-              strcpy (lc_messages, "C");
-              __mb_cur_max = 1;
-            }
-        }
-      else
-        {
-          if (locale[0] == 'C' && locale[1] == '-')
-            {
-              switch (locale[2])
-                {
-                case 'U':
-                  if (strcmp (locale, "C-UTF-8"))
-                    return 0;
-                break;
-                case 'J':
-                  if (strcmp (locale, "C-JIS"))
-                    return 0;
-                break;
-                case 'E':
-                  if (strcmp (locale, "C-EUCJP"))
-                    return 0;
-                break;
-                case 'S':
-                  if (strcmp (locale, "C-SJIS"))
-                    return 0;
-                break;
-                case 'I':
-                  if (strcmp (locale, "C-ISO-8859-1"))
-                    return 0;
-                break;
-                default:
-                  return 0;
-                }
-            }
-          else 
-            {
-              if (strcmp (locale, "C") && strcmp (locale, ""))
-                return 0;
-              locale_name = "C"; /* C is always the default locale */
-            }
-
-          if (category == LC_CTYPE)
-            {
-              strcpy (last_lc_ctype, __lc_ctype);
-              strcpy (__lc_ctype, locale_name);
-
-              __mb_cur_max = 1;
-              if (locale[1] == '-')
-                {
-                  switch (locale[2])
-                    {
-                    case 'U':
-                      __mb_cur_max = 6;
-                    break;
-                    case 'J':
-                      __mb_cur_max = 8;
-                    break;
-                    case 'E':
-                      __mb_cur_max = 2;
-                    break;
-                    case 'S':
-                      __mb_cur_max = 2;
-                    break;
-                    case 'I':
-                    default:
-                      __mb_cur_max = 1;
-                    }
-                }
-            }
-          else
-            {
-              strcpy (last_lc_messages, lc_messages);
-              strcpy (lc_messages, locale_name);
-
-              charset = "ISO-8859-1";
-              if (locale[1] == '-')
-                {
-                  switch (locale[2])
-                    {
-                    case 'U':
-                      charset = "UTF-8";
-                    break;
-                    case 'J':
-                      charset = "JIS";
-                    break;
-                    case 'E':
-                      charset = "EUCJP";
-                    break;
-                    case 'S':
-                      charset = "SJIS";
-                    break;
-                    case 'I':
-                      charset = "ISO-8859-1";
-                    break;
-                    default:
-                      return 0;
-                    }
-                }
-            }
-        }
-      p->_current_category = category;  
-      p->_current_locale = locale;
+      p->_errno = EINVAL;
+      return NULL;
+    }
+
+  if (locale == NULL)
+    return category != LC_ALL ? current_categories[category] : currentlocale();
 
-      if (category == LC_CTYPE)
-        return last_lc_ctype;
-      else if (category == LC_MESSAGES)
-        return last_lc_messages;
+  /*
+   * Default to the current locale for everything.
+   */
+  for (i = 1; i < _LC_LAST; ++i)
+    strcpy (new_categories[i], current_categories[i]);
+
+  /*
+   * Now go fill up new_categories from the locale argument
+   */
+  if (!*locale)
+    {
+      if (category == LC_ALL)
+	{
+	  for (i = 1; i < _LC_LAST; ++i)
+	    {
+	      env = __get_locale_env (p, i);
+	      if (strlen (env) > ENCODING_LEN)
+		{
+		  p->_errno = EINVAL;
+		  return NULL;
+		}
+	      strcpy (new_categories[i], env);
+	    }
+	}
+      else
+	{
+	  env = __get_locale_env (p, category);
+	  if (strlen (env) > ENCODING_LEN)
+	    {
+	      p->_errno = EINVAL;
+	      return NULL;
+	    }
+	  strcpy (new_categories[category], env);
+	}
+    }
+  else if (category != LC_ALL)
+    {
+      if (strlen (locale) > ENCODING_LEN)
+	{
+	  p->_errno = EINVAL;
+	  return NULL;
+	}
+      strcpy (new_categories[category], locale);
     }
   else
     {
-      if (category == LC_CTYPE)
-        return __lc_ctype;
-      else if (category == LC_MESSAGES)
-        return lc_messages;
+      if ((r = strchr (locale, '/')) == NULL)
+	{
+	  if (strlen (locale) > ENCODING_LEN)
+	    {
+	      p->_errno = EINVAL;
+	      return NULL;
+	    }
+	  for (i = 1; i < _LC_LAST; ++i)
+	    strcpy (new_categories[i], locale);
+	}
+      else
+	{
+	  for (i = 1; r[1] == '/'; ++r)
+	    ;
+	  if (!r[1])
+	    {
+	      p->_errno = EINVAL;
+	      return NULL;  /* Hmm, just slashes... */
+	    }
+	  do
+	    {
+	      if (i == _LC_LAST)
+		break;  /* Too many slashes... */
+	      if ((len = r - locale) > ENCODING_LEN)
+		{
+		  p->_errno = EINVAL;
+		  return NULL;
+		}
+	      strlcpy (new_categories[i], locale, len + 1);
+	      i++;
+	      while (*r == '/')
+		r++;
+	      locale = r;
+	      while (*r && *r != '/')
+		r++;
+	    }
+	  while (*locale);
+	  while (i < _LC_LAST)
+	    {
+	      strcpy (new_categories[i], new_categories[i-1]);
+	      i++;
+	    }
+	}
     }
- 
-  return "C";
+
+  if (category != LC_ALL)
+    return loadlocale (p, category);
+
+  for (i = 1; i < _LC_LAST; ++i)
+    {
+      strcpy (saved_categories[i], current_categories[i]);
+      if (loadlocale (p, i) == NULL)
+	{
+	  saverr = p->_errno;
+	  for (j = 1; j < i; j++)
+	    {
+	      strcpy (new_categories[j], saved_categories[j]);
+	      if (loadlocale (p, j) == NULL)
+		{
+		  strcpy (new_categories[j], "C");
+		  loadlocale (p, j);
+		}
+	    }
+	  p->_errno = saverr;
+	  return NULL;
+	}
+    }
+  return currentlocale ();
 #endif
+}
+
+#ifdef _MB_CAPABLE
+static char *
+currentlocale()
+{
+        int i;
+
+        (void)strcpy(current_locale_string, current_categories[1]);
+
+        for (i = 2; i < _LC_LAST; ++i)
+                if (strcmp(current_categories[1], current_categories[i])) {
+                        for (i = 2; i < _LC_LAST; ++i) {
+                                (void)strcat(current_locale_string, "/");
+                                (void)strcat(current_locale_string,
+                                             current_categories[i]);
+                        }
+                        break;
+                }
+        return (current_locale_string);
+}
+#endif
+
+#ifdef _MB_CAPABLE
+static char *
+loadlocale(struct _reent *p, int category)
+{
+  /* At this point a full-featured system would just load the locale
+     specific data from the locale files.
+     What we do here for now is to check the incoming string for correctness.
+     The string must be in one of the allowed locale strings, either
+     one in POSIX-style, or one in the old newlib style to maintain
+     backward compatibility.  If the local string is correct, the charset
+     is extracted and stored in lc_ctype_charset or lc_message_charset
+     dependent on the cateogry. */
+  char *locale = new_categories[category];
+  char charset[ENCODING_LEN + 1];
+  unsigned long val;
+  char *end;
+  int mbc_max;
   
+  /* "POSIX" is translated to "C", as on Linux. */
+  if (!strcmp (locale, "POSIX"))
+    strcpy (locale, "C");
+  if (!strcmp (locale, "C"))				/* Default "C" locale */
+    strcpy (charset, "ISO-8859-1");
+  else if (locale[0] == 'C' && locale[1] == '-')	/* Old newlib style */
+	strcpy (charset, locale + 2);
+  else							/* POSIX style */
+    {
+      char *c = locale;
+
+      /* Don't use ctype macros here, they might be localized. */
+      /* Language */
+      if (c[0] <= 'a' || c[0] >= 'z'
+	  || c[1] <= 'a' || c[1] >= 'z')
+	return NULL;
+      c += 2;
+      if (c[0] == '_')
+        {
+	  /* Territory */
+	  ++c;
+	  if (c[0] <= 'A' || c[0] >= 'Z'
+	      || c[1] <= 'A' || c[1] >= 'Z')
+	    return NULL;
+	  c += 2;
+	}
+      if (c[0] == '.')
+	{
+	  /* Charset */
+	  strcpy (charset, c + 1);
+	  if ((c = strchr (charset, '@')))
+	    /* Strip off modifier */
+	    *c = '\0';
+	}
+      else if (c[0] == '\0' || c[0] == '@')
+	/* End of string or just a modifier */
+	strcpy (charset, "ISO-8859-1");
+      else
+	/* Invalid string */
+      	return NULL;
+    }
+  /* We only support this subset of charsets. */
+  switch (charset[0])
+    {
+    case 'U':
+      if (strcmp (charset, "UTF-8"))
+	return NULL;
+      mbc_max = 6;
+    break;
+    case 'J':
+      if (strcmp (charset, "JIS"))
+	return NULL;
+      mbc_max = 8;
+    break;
+    case 'E':
+      if (strcmp (charset, "EUCJP"))
+	return NULL;
+      mbc_max = 2;
+    break;
+    case 'S':
+      if (strcmp (charset, "SJIS"))
+	return NULL;
+      mbc_max = 2;
+    break;
+    case 'I':
+    default:
+      /* Must be exactly one of ISO-8859-1, [...] ISO-8859-15. */
+      if (strncmp (charset, "ISO-8859-", 9))
+	return NULL;
+      val = strtol (charset + 9, &end, 10);
+      if (val < 1 || val > 15 || *end)
+	return NULL;
+      mbc_max = 1;
+      break;
+    }
+  if (category == LC_CTYPE)
+    {
+      strcpy (lc_ctype_charset, charset);
+      __mb_cur_max = mbc_max;
+    }
+  else if (category == LC_MESSAGES)
+    strcpy (lc_message_charset, charset);
+  p->_current_category = category;  
+  p->_current_locale = locale;
+  return strcpy(current_categories[category], new_categories[category]);
+}
+
+static const char *
+__get_locale_env(struct _reent *p, int category)
+{
+  const char *env;
+
+  /* 1. check LC_ALL. */
+  env = _getenv_r (p, categories[0]);
+
+  /* 2. check LC_* */
+  if (env == NULL || !*env)
+    env = _getenv_r (p, categories[category]);
+
+  /* 3. check LANG */
+  if (env == NULL || !*env)
+    env = _getenv_r (p, "LANG");
+
+  /* 4. if none is set, fall to "C" */
+  if (env == NULL || !*env)
+    env = "C";
+
+  return env;
 }
+#endif
 
 char *
 _DEFUN_VOID(__locale_charset)
 {
-  return charset;
+  return lc_ctype_charset;
+}
+
+char *
+_DEFUN_VOID(__locale_msgcharset)
+{
+  return lc_message_charset;
 }
 
 struct lconv *
Index: libc/stdlib/mbtowc_r.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdlib/mbtowc_r.c,v
retrieving revision 1.9
diff -u -p -r1.9 mbtowc_r.c
--- libc/stdlib/mbtowc_r.c	25 Feb 2009 09:10:09 -0000	1.9
+++ libc/stdlib/mbtowc_r.c	27 Feb 2009 10:40:20 -0000
@@ -45,8 +45,6 @@ static JIS_ACTION JIS_action_table[JIS_S
 /* we override the mbstate_t __count field for more complex encodings and use it store a state value */
 #define __state __count
 
-extern char __lc_ctype[12];
-
 int
 _DEFUN (_mbtowc_r, (r, pwc, s, n, state),
         struct _reent *r   _AND
@@ -65,9 +63,9 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
     return -2;
 
 #ifdef _MB_CAPABLE
-  if (strlen (__lc_ctype) <= 1)
+  if (strlen (__locale_charset ()) <= 1)
     { /* fall-through */ }
-  else if (!strcmp (__lc_ctype, "C-UTF-8"))
+  else if (!strcmp (__locale_charset (), "UTF-8"))
     {
       int ch;
       int i = 0;
@@ -221,7 +219,7 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
       else
 	return -1;
     }      
-  else if (!strcmp (__lc_ctype, "C-SJIS"))
+  else if (!strcmp (__locale_charset (), "SJIS"))
     {
       int ch;
       int i = 0;
@@ -251,7 +249,7 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
 	    return -1;
 	}
     }
-  else if (!strcmp (__lc_ctype, "C-EUCJP"))
+  else if (!strcmp (__locale_charset (), "EUCJP"))
     {
       int ch;
       int i = 0;
@@ -281,7 +279,7 @@ _DEFUN (_mbtowc_r, (r, pwc, s, n, state)
 	    return -1;
 	}
     }
-  else if (!strcmp (__lc_ctype, "C-JIS"))
+  else if (!strcmp (__locale_charset (), "JIS"))
     {
       JIS_STATE curr_state;
       JIS_ACTION action;
Index: libc/stdlib/wctomb_r.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/stdlib/wctomb_r.c,v
retrieving revision 1.8
diff -u -p -r1.8 wctomb_r.c
--- libc/stdlib/wctomb_r.c	25 Feb 2009 09:10:09 -0000	1.8
+++ libc/stdlib/wctomb_r.c	27 Feb 2009 10:40:20 -0000
@@ -7,8 +7,6 @@
 /* for some conversions, we use the __count field as a place to store a state value */
 #define __state __count
 
-extern char __lc_ctype[12];
-
 int
 _DEFUN (_wctomb_r, (r, s, wchar, state),
         struct _reent *r     _AND 
@@ -21,9 +19,9 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
      is 4, as is the case on cygwin.  */
   wint_t wchar = _wchar;
 
-  if (strlen (__lc_ctype) <= 1)
+  if (strlen (__locale_charset ()) <= 1)
     { /* fall-through */ }
-  else if (!strcmp (__lc_ctype, "C-UTF-8"))
+  else if (!strcmp (__locale_charset (), "UTF-8"))
     {
       if (s == NULL)
         return 0; /* UTF-8 encoding is not state-dependent */
@@ -93,9 +91,9 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
           return 4;
         }
       else
-        return -1;
+	return -1;
     }
-  else if (!strcmp (__lc_ctype, "C-SJIS"))
+  else if (!strcmp (__locale_charset (), "SJIS"))
     {
       unsigned char char2 = (unsigned char)wchar;
       unsigned char char1 = (unsigned char)(wchar >> 8);
@@ -113,10 +111,10 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
               return 2;
             }
           else
-            return -1;
+	    return -1;
         }
     }
-  else if (!strcmp (__lc_ctype, "C-EUCJP"))
+  else if (!strcmp (__locale_charset (), "EUCJP"))
     {
       unsigned char char2 = (unsigned char)wchar;
       unsigned char char1 = (unsigned char)(wchar >> 8);
@@ -134,10 +132,10 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
               return 2;
             }
           else
-            return -1;
+	    return -1;
         }
     }
-  else if (!strcmp (__lc_ctype, "C-JIS"))
+  else if (!strcmp (__locale_charset (), "JIS"))
     {
       int cnt = 0; 
       unsigned char char2 = (unsigned char)wchar;
@@ -165,7 +163,7 @@ _DEFUN (_wctomb_r, (r, s, wchar, state),
               return cnt + 2;
             }
           else
-            return -1;
+	    return -1;
         }
       else
         {
Index: libc/sys/linux/intl/loadmsgcat.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/sys/linux/intl/loadmsgcat.c,v
retrieving revision 1.2
diff -u -p -r1.2 loadmsgcat.c
--- libc/sys/linux/intl/loadmsgcat.c	23 Apr 2004 21:44:22 -0000	1.2
+++ libc/sys/linux/intl/loadmsgcat.c	27 Feb 2009 10:40:20 -0000
@@ -266,8 +266,8 @@ _nl_init_domain_conv (domain_file, domai
 		  outcharset = (*_nl_current[LC_CTYPE])->values[_NL_ITEM_INDEX (CODESET)].string;
 # else
 #  if HAVE_ICONV
-		  extern const char *__locale_charset (void);
-		  outcharset = __locale_charset ();
+		  extern const char *__locale_msgcharset (void);
+		  outcharset = __locale_msgcharset ();
 #  endif
 # endif
 		}


-- 
Corinna Vinschen
Cygwin Project Co-Leader
Red Hat
Follow-Ups:
- Re: [PATCH] setlocale: New implementation
  - From: Jeff Johnston
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]