This is the mail archive of the
libc-ports@sources.redhat.com
mailing list for the libc-ports project.
[PATCH 09/14] tilegx: small performance fix for string routines
- From: Chris Metcalf <cmetcalf at tilera dot com>
- To: <libc-ports at sourceware dot org>
- Date: Fri, 11 May 2012 18:03:58 -0400
- Subject: [PATCH 09/14] tilegx: small performance fix for string routines
- References: <4FB161CE.6070905@tilera.com>
We were multiplying a byte by 0x0101010101010101ULL to create a
constant for SIMD ops, but the compiler isn't good at optimizing
this case (the fact that one operand is a byte is lost by the time
it would be possible to do the optimization). So instead we add
a helper routine that explicitly uses SIMD ops to create the constant.
---
ChangeLog.tile | 11 +++++++++++
sysdeps/tile/tilegx/memchr.c | 4 ++--
sysdeps/tile/tilegx/memset.c | 5 +++--
sysdeps/tile/tilegx/rawmemchr.c | 4 ++--
sysdeps/tile/tilegx/strchr.c | 4 ++--
sysdeps/tile/tilegx/strchrnul.c | 4 ++--
sysdeps/tile/tilegx/string-endian.h | 12 +++++++++++-
sysdeps/tile/tilegx/strrchr.c | 4 ++--
8 files changed, 35 insertions(+), 13 deletions(-)
diff --git a/ChangeLog.tile b/ChangeLog.tile
index 83136aa..3c192e9 100644
--- a/ChangeLog.tile
+++ b/ChangeLog.tile
@@ -1,5 +1,16 @@
2012-05-12 Chris Metcalf <cmetcalf@tilera.com>
+ * sysdeps/tile/tilegx/memchr.c: Use new copy_byte() function
+ to efficiently generate a large constant for masking.
+ * sysdeps/tile/tilegx/memset.c: Likewise.
+ * sysdeps/tile/tilegx/rawmemchr.c: Likewise.
+ * sysdeps/tile/tilegx/strchr.c: Likewise.
+ * sysdeps/tile/tilegx/strchrnul.c: Likewise.
+ * sysdeps/tile/tilegx/strrchr.c: Likewise.
+ * sysdeps/tile/tilegx/string-endian.h (copy_byte): New function.
+
+2012-05-12 Chris Metcalf <cmetcalf@tilera.com>
+
* sysdeps/tile/tilegx/memcpy.c: Allow memcpy(p, p, n)
without corrupting memory at "p".
diff --git a/sysdeps/tile/tilegx/memchr.c b/sysdeps/tile/tilegx/memchr.c
index aea25ff..32f24ec 100644
--- a/sysdeps/tile/tilegx/memchr.c
+++ b/sysdeps/tile/tilegx/memchr.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@@ -41,7 +41,7 @@ __memchr (const void *s, int c, size_t n)
p = (const uint64_t *) (s_int & -8);
/* Create eight copies of the byte for which we are looking. */
- goal = 0x0101010101010101ULL * (uint8_t) c;
+ goal = copy_byte(c);
/* Read the first word, but munge it so that bytes before the array
will not match goal. */
diff --git a/sysdeps/tile/tilegx/memset.c b/sysdeps/tile/tilegx/memset.c
index 8083abf..dad3ead 100644
--- a/sysdeps/tile/tilegx/memset.c
+++ b/sysdeps/tile/tilegx/memset.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@@ -19,6 +19,7 @@
#include <arch/chip.h>
#include <string.h>
#include <stdint.h>
+#include "string-endian.h"
void *
__memset (void *s, int c, size_t n)
@@ -71,7 +72,7 @@ __memset (void *s, int c, size_t n)
n64 = n >> 3;
/* Tile input byte out to 64 bits. */
- v64 = 0x0101010101010101ULL * (uint8_t) c;
+ v64 = copy_byte(c);
/* This must be at least 8 or the following loop doesn't work. */
#define CACHE_LINE_SIZE_IN_DOUBLEWORDS (CHIP_L2_LINE_SIZE() / 8)
diff --git a/sysdeps/tile/tilegx/rawmemchr.c b/sysdeps/tile/tilegx/rawmemchr.c
index ed9162d..70b5928 100644
--- a/sysdeps/tile/tilegx/rawmemchr.c
+++ b/sysdeps/tile/tilegx/rawmemchr.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@@ -28,7 +28,7 @@ __rawmemchr (const void *s, int c)
const uint64_t *p = (const uint64_t *) (s_int & -8);
/* Create eight copies of the byte for which we are looking. */
- const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
+ const uint64_t goal = copy_byte(c);
/* Read the first word, but munge it so that bytes before the array
will not match goal. */
diff --git a/sysdeps/tile/tilegx/strchr.c b/sysdeps/tile/tilegx/strchr.c
index 8ef4fdc..c6a741b 100644
--- a/sysdeps/tile/tilegx/strchr.c
+++ b/sysdeps/tile/tilegx/strchr.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@@ -32,7 +32,7 @@ strchr (const char *s, int c)
const uint64_t *p = (const uint64_t *) (s_int & -8);
/* Create eight copies of the byte for which we are looking. */
- const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
+ const uint64_t goal = copy_byte(c);
/* Read the first aligned word, but force bytes before the string to
match neither zero nor goal (we make sure the high bit of each byte
diff --git a/sysdeps/tile/tilegx/strchrnul.c b/sysdeps/tile/tilegx/strchrnul.c
index 1181443..4251598 100644
--- a/sysdeps/tile/tilegx/strchrnul.c
+++ b/sysdeps/tile/tilegx/strchrnul.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@@ -30,7 +30,7 @@ __strchrnul (const char *s, int c)
const uint64_t *p = (const uint64_t *) (s_int & -8);
/* Create eight copies of the byte for which we are looking. */
- const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
+ const uint64_t goal = copy_byte(c);
/* Read the first aligned word, but force bytes before the string to
match neither zero nor goal (we make sure the high bit of each byte
diff --git a/sysdeps/tile/tilegx/string-endian.h b/sysdeps/tile/tilegx/string-endian.h
index 280efd3..c2e40ec 100644
--- a/sysdeps/tile/tilegx/string-endian.h
+++ b/sysdeps/tile/tilegx/string-endian.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@@ -33,3 +33,13 @@
#define CFZ(x) __insn_clz(x)
#define REVCZ(x) __insn_ctz(x)
#endif
+
+/* Create eight copies of the byte in a uint64_t. */
+static inline uint64_t copy_byte(uint8_t byte)
+{
+ uint64_t word = byte;
+ word = __insn_bfins(word, word, 8, 15);
+ word = __insn_bfins(word, word, 16, 31);
+ word = __insn_bfins(word, word, 32, 63);
+ return word;
+}
diff --git a/sysdeps/tile/tilegx/strrchr.c b/sysdeps/tile/tilegx/strrchr.c
index 223c59d..b6e43a6 100644
--- a/sysdeps/tile/tilegx/strrchr.c
+++ b/sysdeps/tile/tilegx/strrchr.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
@@ -28,7 +28,7 @@ strrchr (const char *s, int c)
const uint64_t *p = (const uint64_t *) (s_int & -8);
/* Create eight copies of the byte for which we are looking. */
- const uint64_t goal = 0x0101010101010101ULL * (uint8_t) c;
+ const uint64_t goal = copy_byte(c);
/* Read the first aligned word, but force bytes before the string to
match neither zero nor goal (we make sure the high bit of each byte
--
1.6.5.2