This is the mail archive of the newlib@sourceware.org mailing list for the newlib project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

scanf and "-0x", "-nan", "inf"


A couple more scanf bugs.  OK to apply this?  (and ping on my other scanf patch 
for pos args)

sscanf("-nan", "%e", &float1) was failing, instead of setting float1 to NaN

sscanf("-0x", "%i%c", &int1, &ch) was failing, instead of setting int1 to 0 and 
ch to 'x'

sscanf("infinity", "%e", &float1) was failing, instead of setting float1 to 
infinity

This patch also fixes a bug only triggered by %S in multibyte locales where 
0xff is considered part of an incomplete multibyte whitespace sequence and 
where char is signed (is there such a locale?).  The code lacked a cast to 
unsigned char, so it was calling the equivalent of ungetc(EOF) instead of the 
intended ungetc(0xff).  I audited all the other uses of ungetc to ensure that 
no cast was necessary, because the only thing we could possibly be unget'ting 
were 7-bit ASCII characters.

Bugs that I am still aware of:

sscanf("nan():", "%e%c", &float1, &ch) populates ch with '(' instead of ':' 
(ie. n-char-sequence nans are not parsed)

sscanf("33554430.999999999999", "%e", &float1) populates float1 with 33554432 
instead of 33554430 (ie. double-rounding occurred because strtod was used, but 
even fixing scanf to use strtof depends on strtof to be fixed to do the parse 
itself rather than wrapping strtod)

sscanf("0x1p0", "%e", &float1) populates float1 with 0 instead of 1 (ie. hex 
floats are not parsed) - could be made conditional on --enable-newlib-io-c99-
formats

2007-05-22  Eric Blake  <ebb9@byu.net>

	* libc/stdio/vfscanf.c (__SVFSCANF_R): Fix %i scanning of "-0x".
	Support "-nan" and "inf" for %e.  Audit usage of ungetc to fix bug
	in %S in multibyte locales.

--- libc/stdio/vfscanf.c	21 May 2007 12:39:03 -0000
+++ libc/stdio/vfscanf.c	22 May 2007 14:46:02 -0000
@@ -825,10 +825,10 @@
                     *wcp = L'\0';
                   if (mbslen != (size_t)-2) /* Incomplete sequence */
                     {
-                      if (iswspace(*wcp)) 
+                      if (iswspace(*wcp))
                         {
                           while (n != 0)
-                            ungetc (buf[--n], fp);
+                            ungetc ((unsigned char) buf[--n], fp);
                           break;
                         }
                       nread += n;
@@ -987,15 +987,15 @@
 		    }
 		  break;
 
-		  /* x ok iff flag still set & 2nd char */
+		  /* x ok iff flag still set & single 0 seen */
 		case 'x':
 		case 'X':
-		  if (flags & PFXOK && p == buf + 1)
+		  if ((flags & (PFXOK | NZDIGITS)) == PFXOK)
 		    {
 		      base = 16;/* if %i */
 		      flags &= ~PFXOK;
 		      /* We must reset the NZDIGITS and NDIGITS
-		         flags that would have been unset by seeing
+			 flags that would have been unset by seeing
 			 the zero that preceded the X or x.  */
 		      flags |= NZDIGITS | NDIGITS;
 		      goto ok;
@@ -1024,18 +1024,16 @@
 	   * If we had only a sign, it is no good; push back the sign.
 	   * If the number ends in `x', it was [sign] '0' 'x', so push back
 	   * the x and treat it as [sign] '0'.
+	   * Use of ungetc here and below assumes ASCII encoding; we are only
+	   * pushing back 7-bit characters, so casting to unsigned char is
+	   * not necessary.
 	   */
 	  if (flags & NDIGITS)
 	    {
 	      if (p > buf)
-		_CAST_VOID ungetc (*(u_char *)-- p, fp);
-	      goto match_failure;
-	    }
-	  c = ((u_char *) p)[-1];
-	  if (c == 'x' || c == 'X')
-	    {
-	      --p;
-	      /*(void)*/ ungetc (c, fp);
+		ungetc (*--p, fp); /* [-+xX] */
+	      if (p == buf)
+		goto match_failure;
 	    }
 	  if ((flags & SUPPRESS) == 0)
 	    {
@@ -1096,7 +1094,8 @@
 	  long zeroes, exp_adjust;
 	  char *exp_start = NULL;
 	  unsigned width_left = 0;
-	  int nancount = 0;
+	  unsigned char nancount = 0;
+	  unsigned char infcount = 0;
 #ifdef hardway
 	  if (width == 0 || width > sizeof (buf) - 1)
 #else
@@ -1141,7 +1140,7 @@
 		case '7':
 		case '8':
 		case '9':
-		  if (nancount == 0)
+		  if (nancount + infcount == 0)
 		    {
 		      flags &= ~(SIGNOK | NDIGITS);
 		      goto fok;
@@ -1159,18 +1158,23 @@
 		case 'n':
 		case 'N':
 		  if (nancount == 0
-		      && (flags & (SIGNOK | NDIGITS | DPTOK | EXPOK)) ==
-				  (SIGNOK | NDIGITS | DPTOK | EXPOK))
+		      && (flags & (NDIGITS | DPTOK | EXPOK)) ==
+				  (NDIGITS | DPTOK | EXPOK))
 		    {
 		      flags &= ~(SIGNOK | DPTOK | EXPOK | NDIGITS);
 		      nancount = 1;
 		      goto fok;
 		    }
-		  else if (nancount == 2)
+		  if (nancount == 2)
 		    {
 		      nancount = 3;
 		      goto fok;
 		    }
+		  if (infcount == 1 || infcount == 4)
+		    {
+		      infcount++;
+		      goto fok;
+		    }
 		  break;
 		case 'a':
 		case 'A':
@@ -1180,6 +1184,46 @@
 		      goto fok;
 		    }
 		  break;
+		case 'i':
+		case 'I':
+		  if (infcount == 0
+		      && (flags & (NDIGITS | DPTOK | EXPOK)) ==
+				  (NDIGITS | DPTOK | EXPOK))
+		    {
+		      flags &= ~(SIGNOK | DPTOK | EXPOK | NDIGITS);
+		      infcount = 1;
+		      goto fok;
+		    }
+		  if (infcount == 3 || infcount == 5)
+		    {
+		      infcount++;
+		      goto fok;
+		    }
+		  break;
+		case 'f':
+		case 'F':
+		  if (infcount == 2)
+		    {
+		      infcount = 3;
+		      goto fok;
+		    }
+		  break;
+		case 't':
+		case 'T':
+		  if (infcount == 6)
+		    {
+		      infcount = 7;
+		      goto fok;
+		    }
+		  break;
+		case 'y':
+		case 'Y':
+		  if (infcount == 7)
+		    {
+		      infcount = 8;
+		      goto fok;
+		    }
+		  break;
 		case '.':
 		  if (flags & DPTOK)
 		    {
@@ -1212,7 +1256,7 @@
 	      *p++ = c;
 	    fskip:
 	      width--;
-              ++nread;
+	      ++nread;
 	      if (--fp->_r > 0)
 		fp->_p++;
 	      else
@@ -1221,24 +1265,48 @@
 	    }
 	  if (zeroes)
 	    flags &= ~NDIGITS;
-          /* We may have a 'N' or possibly even a 'Na' as the start of 'NaN', 
-	     only to run out of chars before it was complete (or having 
-	     encountered a non- matching char).  So check here if we have an 
-	     outstanding nancount, and if so put back the chars we did 
-	     swallow and treat as a failed match. */
-          if (nancount && nancount != 3)
-            {
-              /* Ok... what are we supposed to do in the event that the
-              __srefill call above was triggered in the middle of the partial
-              'NaN' and so we can't put it all back? */
-              while (nancount-- && (p > buf))
-                {
-                  ungetc (*(u_char *)--p, fp);
-                  --nread;
-                }
-              goto match_failure;
-            }
-          /*
+	  /* We may have 'N' or possibly even [sign] 'N' 'a' as the
+	     start of 'NaN', only to run out of chars before it was
+	     complete (or having encountered a non-matching char).  So
+	     check here if we have an outstanding nancount, and if so
+	     put back the chars we did swallow and treat as a failed
+	     match.
+
+	     FIXME - we still don't handle NAN([0xdigits]).  */
+	  if (nancount - 1 < 2) /* nancount != 0 && nancount < 3 */
+	    {
+	      /* Newlib's ungetc works even if we called __srefill in
+		 the middle of a partial parse, but POSIX does not
+		 guarantee that in all implementations of ungetc.  */
+	      while (p > buf)
+		{
+		  ungetc (*--p, fp); /* [-+nNaA] */
+		  --nread;
+		}
+	      goto match_failure;
+	    }
+	  /* Likewise for 'inf' and 'infinity'.	 But be careful that
+	     'infinite' consumes only 3 characters, leaving the stream
+	     at the second 'i'.	 */
+	  if (infcount - 1 < 7) /* infcount != 0 && infcount < 8 */
+	    {
+	      if (infcount >= 3) /* valid 'inf', but short of 'infinity' */
+		while (infcount-- > 3)
+		  {
+		    ungetc (*--p, fp); /* [iInNtT] */
+		    --nread;
+		  }
+	      else
+		{
+		  while (p > buf)
+		    {
+		      ungetc (*--p, fp); /* [-+iInN] */
+		      --nread;
+		    }
+		  goto match_failure;
+		}
+	    }
+	  /*
 	   * If no digits, might be missing exponent digits
 	   * (just give back the exponent) or might be missing
 	   * regular digits, but had sign and/or decimal point.
@@ -1249,22 +1317,22 @@
 		{
 		  /* no digits at all */
 		  while (p > buf)
-                    {
-		      ungetc (*(u_char *)--p, fp);
-                      --nread;
-                    }
+		    {
+		      ungetc (*--p, fp); /* [-+.] */
+		      --nread;
+		    }
 		  goto match_failure;
 		}
 	      /* just a bad exponent (e and maybe sign) */
-	      c = *(u_char *)-- p;
-              --nread;
+	      c = *--p;
+	      --nread;
 	      if (c != 'e' && c != 'E')
 		{
-		  _CAST_VOID ungetc (c, fp);	/* sign */
-		  c = *(u_char *)-- p;
-                  --nread;
+		  ungetc (c, fp); /* [-+] */
+		  c = *--p;
+		  --nread;
 		}
-	      _CAST_VOID ungetc (c, fp);
+	      ungetc (c, fp); /* [eE] */
 	    }
 	  if ((flags & SUPPRESS) == 0)
 	    {



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]