[newlib-cygwin] Do not treat the command line or environment like paths

Corinna Vinschen corinna@sourceware.org
Fri Jan 8 14:20:00 GMT 2016


https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=e0d4e3fec7793c1bf41ca707156af8413fb03455

commit e0d4e3fec7793c1bf41ca707156af8413fb03455
Author: Johannes Schindelin <johannes.schindelin@gmx.de>
Date:   Thu Dec 17 16:22:49 2015 +0100

    Do not treat the command line or environment like paths
    
    	* dcrt0.cc (dll_crt0_1), environ.cc (environ_init, getwinenveq,
    	build_env), strfuncs.cc (sys_wcstombs, sys_wcstombs_alloc),
    	wchar.c (sys_wcstombs, sys_wcstombs_alloc): avoid mis-conversions
    	of text that does not, actually, refer to a path or file name
    
    Detailed explanation:
    
    Our WCS -> UTF conversion handles the private Unicode page specially
    to allow for otherwise invalid file names. However, this handling makes
    no sense for command-lines, nor environment variables, which we would
    rather convert verbatim.
    
    As a stop-gap solution, let's just introduce a version of the
    sys_wcstombs() function that specifically excludes that file name
    conversion magic.
    
    The proper solution is to change sys_wcstombs() to assume that it is not
    a path that wants to be converted, and introduce sys_wcstombs_path()
    that does, but that is a bigger task which we leave for another patch.
    
    Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>

Diff:
---
 winsup/cygwin/dcrt0.cc    |  4 ++--
 winsup/cygwin/environ.cc  |  8 ++++----
 winsup/cygwin/strfuncs.cc | 41 ++++++++++++++++++++++++++++++++++-------
 winsup/cygwin/wchar.h     |  4 ++++
 4 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/winsup/cygwin/dcrt0.cc b/winsup/cygwin/dcrt0.cc
index acba8a4..94f7bf8 100644
--- a/winsup/cygwin/dcrt0.cc
+++ b/winsup/cygwin/dcrt0.cc
@@ -953,9 +953,9 @@ dll_crt0_1 (void *)
   if (!__argc)
     {
       PWCHAR wline = GetCommandLineW ();
-      size_t size = sys_wcstombs (NULL, 0, wline) + 1;
+      size_t size = sys_wcstombs_no_path (NULL, 0, wline) + 1;
       char *line = (char *) alloca (size);
-      sys_wcstombs (line, size, wline);
+      sys_wcstombs_no_path (line, size, wline);
 
       /* Scan the command line and build argv.  Expand wildcards if not
 	 called from another cygwin process. */
diff --git a/winsup/cygwin/environ.cc b/winsup/cygwin/environ.cc
index ab6511b..227f559 100644
--- a/winsup/cygwin/environ.cc
+++ b/winsup/cygwin/environ.cc
@@ -833,7 +833,7 @@ environ_init (char **envp, int envc)
 	 eventually want to use them).  */
       for (i = 0, w = rawenv; *w != L'\0'; w = wcschr (w, L'\0') + 1, i++)
 	{
-	  sys_wcstombs_alloc (&newp, HEAP_NOTHEAP, w);
+	  sys_wcstombs_alloc_no_path (&newp, HEAP_NOTHEAP, w);
 	  if (i >= envc)
 	    envp = (char **) realloc (envp, (4 + (envc += 100)) * sizeof (char *));
 	  envp[i] = newp;
@@ -895,7 +895,7 @@ getwinenveq (const char *name, size_t namelen, int x)
   int totlen = GetEnvironmentVariableW (name0, valbuf, 32768);
   if (totlen > 0)
     {
-      totlen = sys_wcstombs (NULL, 0, valbuf) + 1;
+      totlen = sys_wcstombs_no_path (NULL, 0, valbuf) + 1;
       if (x == HEAP_1_STR)
 	totlen += namelen;
       else
@@ -903,7 +903,7 @@ getwinenveq (const char *name, size_t namelen, int x)
       char *p = (char *) cmalloc_abort ((cygheap_types) x, totlen);
       if (namelen)
 	strcpy (p, name);
-      sys_wcstombs (p + namelen, totlen, valbuf);
+      sys_wcstombs_no_path (p + namelen, totlen, valbuf);
       debug_printf ("using value from GetEnvironmentVariable for '%W'", name0);
       return p;
     }
@@ -1055,7 +1055,7 @@ build_env (const char * const *envp, PWCHAR &envblock, int &envc,
 	  for (winnum = 0, var = cwinenv;
 	       *var;
 	       ++winnum, var = wcschr (var, L'\0') + 1)
-	    sys_wcstombs_alloc (&winenv[winnum], HEAP_NOTHEAP, var);
+	    sys_wcstombs_alloc_no_path (&winenv[winnum], HEAP_NOTHEAP, var);
 	}
       DestroyEnvironmentBlock (cwinenv);
       /* Eliminate variables which are already available in envp, as well as
diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc
index e73cd63..ad67738 100644
--- a/winsup/cygwin/strfuncs.cc
+++ b/winsup/cygwin/strfuncs.cc
@@ -409,8 +409,9 @@ __big5_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n,
        to buffer size, it's a bug in Cygwin and the buffer in the calling
        function should be raised.
 */
-size_t __reg3
-sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc)
+static size_t __reg3
+sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc,
+		bool is_path)
 {
   char buf[10];
   char *ptr = dst;
@@ -434,7 +435,7 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc)
 	 ASCII area <= 0x7f (only for path names) is transform_chars above.
 	 Reverse functionality for invalid bytes in a multibyte sequence is
 	 in sys_cp_mbstowcs below. */
-      if ((pw & 0xff00) == 0xf000
+      if (is_path && (pw & 0xff00) == 0xf000
 	  && (((cwc = (pw & 0xff)) <= 0x7f && tfx_rev_chars[cwc] >= 0xf000)
 	      || (cwc >= 0x80 && MB_CUR_MAX > 1)))
 	{
@@ -496,6 +497,18 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc)
   return n;
 }
 
+size_t __reg3
+sys_wcstombs (char *dst, size_t len, const wchar_t * src, size_t nwc)
+{
+  return sys_wcstombs (dst, len, src, nwc, true);
+}
+
+size_t __reg3
+sys_wcstombs_no_path (char *dst, size_t len, const wchar_t * src, size_t nwc)
+{
+  return sys_wcstombs (dst, len, src, nwc, false);
+}
+
 /* Allocate a buffer big enough for the string, always including the
    terminating '\0'.  The buffer pointer is returned in *dst_p, the return
    value is the number of bytes written to the buffer, as usual.
@@ -506,12 +519,13 @@ sys_wcstombs (char *dst, size_t len, const wchar_t *src, size_t nwc)
    Note that this code is shared by cygserver (which requires it via
    __small_vsprintf) and so when built there plain calloc is the
    only choice.  */
-size_t __reg3
-sys_wcstombs_alloc (char **dst_p, int type, const wchar_t *src, size_t nwc)
+static size_t __reg3
+sys_wcstombs_alloc (char **dst_p, int type, const wchar_t *src, size_t nwc,
+		bool is_path)
 {
   size_t ret;
 
-  ret = sys_wcstombs (NULL, (size_t) -1, src, nwc);
+  ret = sys_wcstombs (NULL, (size_t) -1, src, nwc, is_path);
   if (ret > 0)
     {
       size_t dlen = ret + 1;
@@ -522,11 +536,24 @@ sys_wcstombs_alloc (char **dst_p, int type, const wchar_t *src, size_t nwc)
 	*dst_p = (char *) ccalloc ((cygheap_types) type, dlen, sizeof (char));
       if (!*dst_p)
 	return 0;
-      ret = sys_wcstombs (*dst_p, dlen, src, nwc);
+      ret = sys_wcstombs (*dst_p, dlen, src, nwc, is_path);
     }
   return ret;
 }
 
+size_t __reg3
+sys_wcstombs_alloc (char **dst_p, int type, const wchar_t *src, size_t nwc)
+{
+  return sys_wcstombs_alloc (dst_p, type, src, nwc, true);
+}
+
+size_t __reg3
+sys_wcstombs_alloc_no_path (char **dst_p, int type, const wchar_t *src,
+		size_t nwc)
+{
+  return sys_wcstombs_alloc (dst_p, type, src, nwc, false);
+}
+
 /* sys_cp_mbstowcs is actually most of the time called as sys_mbstowcs with
    a 0 codepage.  If cp is not 0, the codepage is evaluated and used for the
    conversion.  This is so that fhandler_console can switch to an alternate
diff --git a/winsup/cygwin/wchar.h b/winsup/cygwin/wchar.h
index 0abece2..bb325d4 100644
--- a/winsup/cygwin/wchar.h
+++ b/winsup/cygwin/wchar.h
@@ -52,8 +52,12 @@ extern char *__locale_charset ();
 #ifdef __cplusplus
 size_t __reg3 sys_wcstombs (char *dst, size_t len, const wchar_t * src,
 			       size_t nwc = (size_t) -1);
+size_t __reg3 sys_wcstombs_no_path (char *dst, size_t len,
+			       const wchar_t * src, size_t nwc = (size_t) -1);
 size_t __reg3 sys_wcstombs_alloc (char **, int, const wchar_t *,
 				     size_t = (size_t) -1);
+size_t __reg3 sys_wcstombs_alloc_no_path (char **, int, const wchar_t *,
+				     size_t = (size_t) -1);
 
 size_t __reg3 sys_cp_mbstowcs (mbtowc_p, const char *, wchar_t *, size_t,
 				  const char *, size_t = (size_t) -1);



More information about the Cygwin-cvs mailing list