This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH v2.0] Use saturated arithmetic for overflow detection.


This version adds saturated arithmetic support with optimized x86_64
version.

This improves a calloc a bit and we use trick from calloc to avoid
division in generic multiplications.

I implemented these as inline function instead of macro except for
generic MUL_S where I need to use __builtin_constant_p for constant
division.

If I made header more techincal I could extract a bit of performance but
I do not know if it is worthwhile.
When multiplying x by power of two 2^^i we could use following code

#define MUL_POW_S(x, i) x >> (8 * sizeof(size_t) - i) == 0 ? x << i : SIZE_MAX


 	* sysdeps/unix/sysv/linux/x86_64/saturated.h: New file.
	* sysdeps/generic/saturated.h: Likewise.
	* malloc/malloc.c (__libc_calloc): Use saturated arithmetic for
	overflow detection.
 	* posix/fnmatch.c (fnmatch): Likewise.
 	* posix/regcomp.c (init_dfa): Likewise.
 	* posix/regexec.c (build_trtable, prune_impossible_nodes,
 	re_search_internal, set_regs): Likewise.
 	* posix/regex_internal.c (re_dfa_add_node,
 	re_string_realloc_buffers): Likewise.
 	* posix/regex_internal.h (static): Likewise.
 	* stdio-common/vfprintf.c (vfprintf): Likewise.
 	* string/strcoll_l.c (STRCOLL): Likewise.
	* sysdeps/posix/getaddrinfo.c (gaih_inet, getaddrinfo): Likewise.

---
 malloc/malloc.c                            |   14 ++++-------
 posix/fnmatch.c                            |   17 ++++---------
 posix/regcomp.c                            |    6 +----
 posix/regex_internal.c                     |   12 ----------
 posix/regex_internal.h                     |    5 ++--
 posix/regexec.c                            |   33 ++++++-------------------
 stdio-common/vfprintf.c                    |    9 +++----
 string/strcoll_l.c                         |   14 ++++-------
 sysdeps/generic/saturated.h                |   36 ++++++++++++++++++++++++++++
 sysdeps/posix/getaddrinfo.c                |   25 +++++++++----------
 sysdeps/unix/sysv/linux/x86_64/saturated.h |   35 +++++++++++++++++++++++++++
 11 files changed, 112 insertions(+), 94 deletions(-)
 create mode 100644 sysdeps/generic/saturated.h
 create mode 100644 sysdeps/unix/sysv/linux/x86_64/saturated.h

diff --git a/malloc/malloc.c b/malloc/malloc.c
index 29796fe..ac1ea73 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -238,6 +238,7 @@
 /* For va_arg, va_start, va_end.  */
 #include <stdarg.h>
 
+#include <saturated.h>
 
 /*
   Debugging:
@@ -3147,15 +3148,10 @@ __libc_calloc(size_t n, size_t elem_size)
   unsigned long nclears;
   INTERNAL_SIZE_T* d;
 
-  /* size_t is unsigned so the behavior on overflow is defined.  */
-  bytes = n * elem_size;
-#define HALF_INTERNAL_SIZE_T \
-  (((INTERNAL_SIZE_T) 1) << (8 * sizeof (INTERNAL_SIZE_T) / 2))
-  if (__builtin_expect ((n | elem_size) >= HALF_INTERNAL_SIZE_T, 0)) {
-    if (elem_size != 0 && bytes / elem_size != n) {
-      __set_errno (ENOMEM);
-      return 0;
-    }
+  bytes = MUL_S (n, elem_size);
+  if (__builtin_expect (bytes == SIZE_MAX, 0)) {
+    __set_errno (ENOMEM);
+    return 0;
   }
 
   void *(*hook) (size_t, const void *) =
diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index 0f26a2e..79d3e7d 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -28,6 +28,7 @@
 #include <errno.h>
 #include <fnmatch.h>
 #include <ctype.h>
+#include <saturated.h>
 
 #if HAVE_STRING_H || defined _LIBC
 # include <string.h>
@@ -373,13 +374,9 @@ fnmatch (pattern, string, flags)
 	       XXX Do we have to set `errno' to something which mbsrtows hasn't
 	       already done?  */
 	    return -1;
-	  if (__builtin_expect (n >= (size_t) -1 / sizeof (wchar_t), 0))
-	    {
-	      __set_errno (ENOMEM);
-	      return -2;
-	    }
+
 	  wpattern_malloc = wpattern
-	    = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t));
+	    = (wchar_t *) malloc (MUL_S (n + 1, sizeof (wchar_t)));
 	  assert (mbsinit (&ps));
 	  if (wpattern == NULL)
 	    return -2;
@@ -422,15 +419,9 @@ fnmatch (pattern, string, flags)
 	       XXX Do we have to set `errno' to something which mbsrtows hasn't
 	       already done?  */
 	    goto free_return;
-	  if (__builtin_expect (n >= (size_t) -1 / sizeof (wchar_t), 0))
-	    {
-	      free (wpattern_malloc);
-	      __set_errno (ENOMEM);
-	      return -2;
-	    }
 
 	  wstring_malloc = wstring
-	    = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t));
+	    = (wchar_t *) malloc (MUL_S (n + 1, sizeof (wchar_t)));
 	  if (wstring == NULL)
 	    {
 	      free (wpattern_malloc);
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 0ffc2fa..c1b302d 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -839,11 +839,7 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
   /* Force allocation of str_tree_storage the first time.  */
   dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
 
-  /* Avoid overflows.  */
-  if (pat_len == SIZE_MAX)
-    return REG_ESPACE;
-
-  dfa->nodes_alloc = pat_len + 1;
+  dfa->nodes_alloc = ADD_S (pat_len, 1);
   dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
 
   /*  table_size = 2 ^ ceil(log pat_len) */
diff --git a/posix/regex_internal.c b/posix/regex_internal.c
index 0a3830b..a0c1f77 100644
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -134,11 +134,6 @@ re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
     {
       wint_t *new_wcs;
 
-      /* Avoid overflow in realloc.  */
-      const size_t max_object_size = MAX (sizeof (wint_t), sizeof (int));
-      if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
-	return REG_ESPACE;
-
       new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
       if (BE (new_wcs == NULL, 0))
 	return REG_ESPACE;
@@ -1416,13 +1411,6 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
       re_node_set *new_edests, *new_eclosures;
       re_token_t *new_nodes;
 
-      /* Avoid overflows in realloc.  */
-      const size_t max_object_size = MAX (sizeof (re_token_t),
-					  MAX (sizeof (re_node_set),
-					       sizeof (int)));
-      if (BE (SIZE_MAX / max_object_size < new_nodes_alloc, 0))
-	return -1;
-
       new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
       if (BE (new_nodes == NULL, 0))
 	return -1;
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index 3c94fbe..c6888ce 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -25,6 +25,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <saturated.h>
 
 #if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
 # include <langinfo.h>
@@ -429,8 +430,8 @@ static unsigned int re_string_context_at (const re_string_t *input, int idx,
 # endif
 #endif
 
-#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
-#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
+#define re_malloc(t,n) ((t *) malloc (MUL_S (n, sizeof (t))))
+#define re_realloc(p,t,n) ((t *) realloc (p, MUL_S (n, sizeof (t))))
 #define re_free(p) free (p)
 
 struct bin_tree_t
diff --git a/posix/regexec.c b/posix/regexec.c
index f85c5e8..23b4e9e 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -703,13 +703,6 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
      multi character collating element.  */
   if (nmatch > 1 || dfa->has_mb_node)
     {
-      /* Avoid overflow.  */
-      if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
-	{
-	  err = REG_ESPACE;
-	  goto free_return;
-	}
-
       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
       if (BE (mctx.state_log == NULL, 0))
 	{
@@ -969,10 +962,6 @@ prune_impossible_nodes (mctx)
   match_last = mctx->match_last;
   halt_node = mctx->last_node;
 
-  /* Avoid overflow.  */
-  if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
-    return REG_ESPACE;
-
   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
   if (BE (sifted_states == NULL, 0))
     {
@@ -3387,21 +3376,16 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
   if (BE (err != REG_NOERROR, 0))
     goto out_free;
 
-  /* Avoid arithmetic overflow in size calculation.  */
-  if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
-	    / (3 * sizeof (re_dfastate_t *)))
-	   < ndests),
-	  0))
-    goto out_free;
+  size_t used = (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX;
+  size_t needed = MUL_S (ndests, 3 * sizeof (re_dfastate_t *));
 
-  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
-			 + ndests * 3 * sizeof (re_dfastate_t *)))
+  if (__libc_use_alloca (ADD_S (used, needed)))
     dest_states = (re_dfastate_t **)
-      alloca (ndests * 3 * sizeof (re_dfastate_t *));
+      alloca (needed);
   else
     {
       dest_states = (re_dfastate_t **)
-	malloc (ndests * 3 * sizeof (re_dfastate_t *));
+	malloc (needed);
       if (BE (dest_states == NULL, 0))
 	{
 out_free:
@@ -4109,14 +4093,11 @@ extend_buffers (re_match_context_t *mctx, int min_len)
   reg_errcode_t ret;
   re_string_t *pstr = &mctx->input;
 
-  /* Avoid overflow.  */
-  if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
-    return REG_ESPACE;
-
   /* Double the lengthes of the buffers, but allocate at least MIN_LEN.  */
   ret = re_string_realloc_buffers (pstr,
 				   MAX (min_len,
-					MIN (pstr->len, pstr->bufs_len * 2)));
+					MIN (pstr->len,
+					     MUL_S (pstr->bufs_len, 2))));
   if (BE (ret != REG_NOERROR, 0))
     return ret;
 
diff --git a/stdio-common/vfprintf.c b/stdio-common/vfprintf.c
index 8cd7a85..789103a 100644
--- a/stdio-common/vfprintf.c
+++ b/stdio-common/vfprintf.c
@@ -25,6 +25,7 @@
 #include <errno.h>
 #include <wchar.h>
 #include <bits/libc-lock.h>
+#include <saturated.h>
 #include <sys/param.h>
 #include <_itoa.h>
 #include <locale/localeinfo.h>
@@ -1067,10 +1068,10 @@ vfprintf (FILE *s, const CHAR_T *format, va_list ap)
 	    /* Allocate dynamically an array which definitely is long	      \
 	       enough for the wide character version.  Each byte in the	      \
 	       multi-byte string can produce at most one wide character.  */  \
-	    if (__libc_use_alloca (len * sizeof (wchar_t)))		      \
-	      string = (CHAR_T *) alloca (len * sizeof (wchar_t));	      \
-	    else if ((string = (CHAR_T *) malloc (len * sizeof (wchar_t)))    \
-		     == NULL)						      \
+	    size_t needed = MUL_S (len, sizeof (wchar_t));		      \
+	    if (__libc_use_alloca (needed))				      \
+	      string = (CHAR_T *) alloca (needed);			      \
+	    else if ((string = (CHAR_T *) malloc (needed)) == NULL)	      \
 	      {								      \
 		done = -1;						      \
 		goto all_done;						      \
diff --git a/string/strcoll_l.c b/string/strcoll_l.c
index 4ee101a..c0cb3fa 100644
--- a/string/strcoll_l.c
+++ b/string/strcoll_l.c
@@ -24,6 +24,7 @@
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
+#include <saturated.h>
 
 #ifndef STRING_TYPE
 # define STRING_TYPE char
@@ -524,17 +525,12 @@ STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
   memset (&seq1, 0, sizeof (seq1));
   seq2 = seq1;
 
-  size_t size_max = SIZE_MAX / (sizeof (int32_t) + 1);
+  size_t size_needed = MUL_S (s1len + s2len, sizeof (int32_t) + 1);
 
-  if (MIN (s1len, s2len) > size_max
-      || MAX (s1len, s2len) > size_max - MIN (s1len, s2len))
-    {
-      /* If the strings are long enough to cause overflow in the size request,
-         then skip the allocation and proceed with the non-cached routines.  */
-    }
-  else if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1)))
+
+  if (!__libc_use_alloca (size_needed))
     {
-      seq1.idxarr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 1));
+      seq1.idxarr = (int32_t *) malloc (size_needed);
 
       /* If we failed to allocate memory, we leave everything as NULL so that
 	 we use the nocache version of traversal and comparison functions.  */
diff --git a/sysdeps/generic/saturated.h b/sysdeps/generic/saturated.h
new file mode 100644
index 0000000..0557146
--- /dev/null
+++ b/sysdeps/generic/saturated.h
@@ -0,0 +1,36 @@
+/* Saturated unsigned arithmetic for size calculations - generic version.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _SATURATED_H
+#define _SATURATED_H
+
+static inline size_t
+ADD_S (size_t x, size_t y)
+{
+  return x + y >= x ? x + y : SIZE_MAX;
+}
+
+#define MUL_S(__x, __y)({ \
+  size_t x = (__x), y = (__y);						\
+  __builtin_constant_p (y)						\
+  ? (x <= SIZE_MAX / (y == 0 ? 1 : y) ? x * y : SIZE_MAX)		\
+  : ((((x | y) >> (8 * sizeof (size_t) / 2) == 0)			\
+      || y <= SIZE_MAX / (x == 0 ? 1 : x)) ? x * y : SIZE_MAX);		\
+})
+
+#endif
diff --git a/sysdeps/posix/getaddrinfo.c b/sysdeps/posix/getaddrinfo.c
index 8ff74b4..df3121d 100644
--- a/sysdeps/posix/getaddrinfo.c
+++ b/sysdeps/posix/getaddrinfo.c
@@ -63,6 +63,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <nscd/nscd-client.h>
 #include <nscd/nscd_proto.h>
 #include <resolv/res_hconf.h>
+#include <saturated.h>
 
 #ifdef HAVE_LIBIDN
 extern int __idna_to_ascii_lz (const char *input, char **output, int flags);
@@ -634,14 +635,12 @@ gaih_inet (const char *name, const struct gaih_service *service,
 		      if (i > 0 && *pat != NULL)
 			--i;
 
-		      if (__libc_use_alloca (alloca_used
-					     + i * sizeof (struct gaih_addrtuple)))
-			addrmem = alloca_account (i * sizeof (struct gaih_addrtuple),
-						  alloca_used);
+		      size_t needed = MUL_S (i,  sizeof (struct gaih_addrtuple));
+		      if (__libc_use_alloca (alloca_used + needed))
+			addrmem = alloca_account (needed, alloca_used);
 		      else
 			{
-			  addrmem = malloc (i
-					    * sizeof (struct gaih_addrtuple));
+			  addrmem = malloc (needed);
 			  if (addrmem == NULL)
 			    {
 			      result = -EAI_MEMORY;
@@ -705,15 +704,13 @@ gaih_inet (const char *name, const struct gaih_service *service,
 		  bool added_canon = (req->ai_flags & AI_CANONNAME) == 0;
 		  char *addrs = air->addrs;
 
-		  if (__libc_use_alloca (alloca_used
-					 + air->naddrs * sizeof (struct gaih_addrtuple)))
-		    addrmem = alloca_account (air->naddrs
-					      * sizeof (struct gaih_addrtuple),
-					      alloca_used);
+		  size_t needed = MUL_S (air->naddrs, sizeof (struct gaih_addrtuple));
+
+		  if (__libc_use_alloca (alloca_used + needed))
+		    addrmem = alloca_account (needed, alloca_used);
 		  else
 		    {
-		      addrmem = malloc (air->naddrs
-					* sizeof (struct gaih_addrtuple));
+		      addrmem = malloc (needed);
 		      if (addrmem == NULL)
 			{
 			  result = -EAI_MEMORY;
@@ -2451,7 +2448,7 @@ getaddrinfo (const char *name, const char *service,
       struct addrinfo *last = NULL;
       char *canonname = NULL;
       bool malloc_results;
-      size_t alloc_size = nresults * (sizeof (*results) + sizeof (size_t));
+      size_t alloc_size = MUL_S (nresults, (sizeof (*results) + sizeof (size_t)));
 
       malloc_results
 	= !__libc_use_alloca (alloc_size);
diff --git a/sysdeps/unix/sysv/linux/x86_64/saturated.h b/sysdeps/unix/sysv/linux/x86_64/saturated.h
new file mode 100644
index 0000000..97e3a37
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/saturated.h
@@ -0,0 +1,35 @@
+/* Saturated unsigned arithmetic for size calculations - generic version.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _SATURATED_H
+#define _SATURATED_H
+
+static inline size_t ADD_S(size_t x, size_t y)
+{
+  size_t ret;
+  asm ("add %%rdx, %%rax; jno 1f; xor %%rax, %%rax; not %%rax; 1:" : "=a" (ret) : "a" (x) , "d" (y));
+  return ret;
+}
+
+static inline size_t MUL_S(size_t x, size_t y)
+{
+  size_t ret;
+  asm ("mul %%rdx; jno 1f; xor %%rax, %%rax; not %%rax; 1:" : "=a" (ret) : "a" (x) , "d" (y));
+  return ret;
+}
+#endif
-- 
1.7.10.4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]