This is the mail archive of the newlib@sourceware.org mailing list for the newlib project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] setlocale: Allow caseinsensitive charsets


Ok.  Please go ahead.

-- Jeff J.

----- Original Message -----
From: "Corinna Vinschen" <vinschen@redhat.com>
To: newlib@sourceware.org
Sent: Thursday, August 20, 2009 9:36:40 AM GMT -05:00 US/Canada Eastern
Subject: [PATCH] setlocale: Allow caseinsensitive charsets

Hi,

for compatibility with other implementations, the below patch allows to
specify the charset case insensitive.  This allows for settings as, for
instance, LC_ALL=en_US.utf-8, instead of enforcing "UTF-8" in all
uppercase.  The charset is internally always stored in uppercase so that
internal functions checking the string returned by __locale_charset ()
will always see the same, uppercased string as before.

Additionally the patch allows to specify "UTF-8" also as "UTF8" or
"utf8", without the dash, just like on Linux.

Documentation is changed accordingly.

Tested on Cygwin.

Ok to apply?


Thanks,
Corinna


	* libc/locale/locale.c: Add this change to documentation.
	Throughout check charset string case insensitive and store
	internal charset string uppercased.  Allow "UTF8" additionally
	to "UTF-8".


Index: libc/locale/locale.c
===================================================================
RCS file: /cvs/src/src/newlib/libc/locale/locale.c,v
retrieving revision 1.22
diff -u -p -r1.22 locale.c
--- libc/locale/locale.c	18 Jun 2009 09:13:39 -0000	1.22
+++ libc/locale/locale.c	20 Aug 2009 13:32:21 -0000
@@ -65,7 +65,10 @@ Even when using POSIX locale strings, th
 <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>/<<"eucJP">>, <<"SJIS">>, <<"ISO-8859-x">>
 with 1 <= x <= 15, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850,
 852, 855, 857, 858, 862, 866, 874, 1125, 1250, 1251, 1252, 1253, 1254,
-1255, 1256, 1257, 1258]. 
+1255, 1256, 1257, 1258].  Charsets are case insensitive.  For instance,
+<<"UTF-8">> and <<"utf-8">> are equivalent.  <<"UTF-8">> can also be
+written without dash, as in <<"UTF8">> or <<"utf8">>.
+
 (<<"">> is also accepted; if given, the settings are read from the
 corresponding LC_* environment variables and $LANG according to POSIX rules.
 
@@ -487,8 +490,10 @@ loadlocale(struct _reent *p, int categor
   switch (charset[0])
     {
     case 'U':
-      if (strcmp (charset, "UTF-8"))
+    case 'u':
+      if (strcasecmp (charset, "UTF-8") && strcasecmp (charset, "UTF8"))
 	return NULL;
+      strcpy (charset, "UTF-8");
       mbc_max = 6;
 #ifdef _MB_CAPABLE
       l_wctomb = __utf8_wctomb;
@@ -496,8 +501,10 @@ loadlocale(struct _reent *p, int categor
 #endif
     break;
     case 'J':
-      if (strcmp (charset, "JIS"))
+    case 'j':
+      if (strcasecmp (charset, "JIS"))
 	return NULL;
+      strcpy (charset, "JIS");
       mbc_max = 8;
 #ifdef _MB_CAPABLE
       l_wctomb = __jis_wctomb;
@@ -506,7 +513,7 @@ loadlocale(struct _reent *p, int categor
     break;
     case 'E':
     case 'e':
-      if (!strcmp (charset, "EUCJP") || !strcmp (charset, "eucJP"))
+      if (!strcasecmp (charset, "EUCJP"))
 	{
 	  strcpy (charset, "EUCJP");
 	  mbc_max = 3;
@@ -516,7 +523,7 @@ loadlocale(struct _reent *p, int categor
 #endif
 	}
 #ifdef __CYGWIN__
-      else if (!strcmp (charset, "EUCKR") || !strcmp (charset, "eucKR"))
+      else if (!strcasecmp (charset, "EUCKR"))
 	{
 	  strcpy (charset, "EUCKR");
 	  mbc_max = 2;
@@ -530,8 +537,10 @@ loadlocale(struct _reent *p, int categor
 	return NULL;
     break;
     case 'S':
-      if (strcmp (charset, "SJIS"))
+    case 's':
+      if (strcasecmp (charset, "SJIS"))
 	return NULL;
+      strcpy (charset, "SJIS");
       mbc_max = 2;
 #ifdef _MB_CAPABLE
       l_wctomb = __sjis_wctomb;
@@ -539,10 +548,12 @@ loadlocale(struct _reent *p, int categor
 #endif
     break;
     case 'I':
+    case 'i':
       /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
          ISO-8859-12. */
-      if (strncmp (charset, "ISO-8859-", 9))
+      if (strncasecmp (charset, "ISO-8859-", 9))
 	return NULL;
+      strncpy (charset, "ISO", 3);
       val = _strtol_r (p, charset + 9, &end, 10);
       if (val < 1 || val > 16 || val == 12 || *end)
 	return NULL;
@@ -558,8 +569,10 @@ loadlocale(struct _reent *p, int categor
 #endif
     break;
     case 'C':
-      if (charset[1] != 'P')
+    case 'c':
+      if (charset[1] != 'P' && charset[1] != 'p')
 	return NULL;
+      strncpy (charset, "CP", 2);
       val = _strtol_r (p, charset + 2, &end, 10);
       if (*end)
 	return NULL;
@@ -603,8 +616,10 @@ loadlocale(struct _reent *p, int categor
 	}
     break;
     case 'A':
-      if (strcmp (charset, "ASCII"))
+    case 'a':
+      if (strcasecmp (charset, "ASCII"))
 	return NULL;
+      strcpy (charset, "ASCII");
       mbc_max = 1;
 #ifdef _MB_CAPABLE
       l_wctomb = __ascii_wctomb;
@@ -613,8 +628,10 @@ loadlocale(struct _reent *p, int categor
       break;
 #ifdef __CYGWIN__
     case 'G':
-      if (strcmp (charset, "GBK"))
+    case 'g':
+      if (strcasecmp (charset, "GBK"))
       	return NULL;
+      strcpy (charset, "GBK");
       mbc_max = 2;
 #ifdef _MB_CAPABLE
       l_wctomb = __gbk_wctomb;
@@ -622,7 +639,8 @@ loadlocale(struct _reent *p, int categor
 #endif
       break;
     case 'B':
-      if (strcmp (charset, "BIG5") && strcmp (charset, "Big5"))
+    case 'b':
+      if (strcasecmp (charset, "BIG5"))
       	return NULL;
       strcpy (charset, "BIG5");
       mbc_max = 2;

-- 
Corinna Vinschen
Cygwin Project Co-Leader
Red Hat


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]