This is the mail archive of the
libc-ports@sources.redhat.com
mailing list for the libc-ports project.
[PATCH] mips: work-around R10k ll/sc errata
- From: Matt Turner <mattst88 at gmail dot com>
- To: libc-ports at sourceware dot org
- Cc: Ralf Baechle <ralf at linux-mips dot org>, Joshua Kinard <kumba at gentoo dot org>, Matt Turner <mattst88 at gmail dot com>
- Date: Tue, 28 Jun 2011 18:03:45 -0400
- Subject: [PATCH] mips: work-around R10k ll/sc errata
The R10000 needs an LL/SC Workaround. If not applied all R10k before
rev 3.0 misbehave on atomic ops and rev 2.6 and lower (e.g. SGI IP28)
die after very few seconds with a deadlock due to even more erratas.
---
This was sent back in Nov 2008 by Joshua Kinard, but seems to have
fallen through the cracks after a lot of bikeshedding.
I don't particularly care about implementing alternative work-arounds,
like 28 nops. I only intend for this patch to be useful when the builder
is using -march=r10000 and/or -mfix-r10000. All other situations are
unaffected.
Please commit.
ChangeLog.mips | 4 ++++
sysdeps/mips/bits/atomic.h | 38 ++++++++++++++++++++++++++++++++------
2 files changed, 36 insertions(+), 6 deletions(-)
diff --git a/ChangeLog.mips b/ChangeLog.mips
index 9abc018..f159d0e 100644
--- a/ChangeLog.mips
+++ b/ChangeLog.mips
@@ -1,3 +1,7 @@
+2011-06-28 Matt Turner <mattst88@gmail.com>
+
+ * sysdeps/mips/bits/atomic.h: workaround R10K ll/sc errata.
+
2011-06-20 Joseph Myers <joseph@codesourcery.com>
* sysdeps/unix/sysv/linux/mips/mips32/sendmmsg.c,
diff --git a/sysdeps/mips/bits/atomic.h b/sysdeps/mips/bits/atomic.h
index 11b3467..2bd723d 100644
--- a/sysdeps/mips/bits/atomic.h
+++ b/sysdeps/mips/bits/atomic.h
@@ -49,6 +49,32 @@ typedef uintmax_t uatomic_max_t;
# define MIPS_SYNC sync
#endif
+/* Certain revisions of the R10000 Processor need an LL/SC Workaround
+ enabled. Revisions before 3.0 misbehave on atomic operations, and
+ Revs 2.6 and lower deadlock after several seconds due to other errata.
+
+ To quote the R10K Errata:
+ Workaround: The basic idea is to inhibit the four instructions
+ from simultaneously becoming active in R10000. Padding all
+ ll/sc sequences with nops or changing the looping branch in the
+ routines to a branch likely (which is always predicted taken
+ by R10000) will work. The nops should go after the loop, and the
+ number of them should be 28. This number could be decremented for
+ each additional instruction in the ll/sc loop such as the lock
+ modifier(s) between the ll and sc, the looping branch and its
+ delay slot. For typical short routines with one ll/sc loop, any
+ instructions after the loop could also count as a decrement. The
+ nop workaround pollutes the cache more but would be a few cycles
+ faster if all the code is in the cache and the looping branch
+ is predicted not taken. */
+
+
+#ifdef _MIPS_ARCH_R10000
+#define R10K_BEQZ_INSN "beqzl"
+#else
+#define R10K_BEQZ_INSN "beqz"
+#endif
+
#define MIPS_SYNC_STR_2(X) #X
#define MIPS_SYNC_STR_1(X) MIPS_SYNC_STR_2(X)
#define MIPS_SYNC_STR MIPS_SYNC_STR_1(MIPS_SYNC)
@@ -74,7 +100,7 @@ typedef uintmax_t uatomic_max_t;
"bne %0,%3,2f\n\t" \
"move %1,%4\n\t" \
"sc %1,%2\n\t" \
- "beqz %1,1b\n" \
+ R10K_BEQZ_INSN" %1,1b\n" \
acq "\n\t" \
".set pop\n" \
"2:\n\t" \
@@ -98,7 +124,7 @@ typedef uintmax_t uatomic_max_t;
"bne %0,%3,2f\n\t" \
"move %1,%4\n\t" \
"scd %1,%2\n\t" \
- "beqz %1,1b\n" \
+ R10K_BEQZ_INSN" %1,1b\n" \
acq "\n\t" \
".set pop\n" \
"2:\n\t" \
@@ -192,7 +218,7 @@ typedef uintmax_t uatomic_max_t;
"ll %0,%4\n\t" \
"move %1,%3\n\t" \
"sc %1,%2\n\t" \
- "beqz %1,1b\n" \
+ R10K_BEQZ_INSN" %1,1b\n" \
acq "\n\t" \
".set pop\n" \
"2:\n\t" \
@@ -216,7 +242,7 @@ typedef uintmax_t uatomic_max_t;
"lld %0,%4\n\t" \
"move %1,%3\n\t" \
"scd %1,%2\n\t" \
- "beqz %1,1b\n" \
+ R10K_BEQZ_INSN" %1,1b\n" \
acq "\n\t" \
".set pop\n" \
"2:\n\t" \
@@ -251,7 +277,7 @@ typedef uintmax_t uatomic_max_t;
"ll %0,%4\n\t" \
"addu %1,%0,%3\n\t" \
"sc %1,%2\n\t" \
- "beqz %1,1b\n" \
+ R10K_BEQZ_INSN" %1,1b\n" \
acq "\n\t" \
".set pop\n" \
"2:\n\t" \
@@ -275,7 +301,7 @@ typedef uintmax_t uatomic_max_t;
"lld %0,%4\n\t" \
"daddu %1,%0,%3\n\t" \
"scd %1,%2\n\t" \
- "beqz %1,1b\n" \
+ R10K_BEQZ_INSN" %1,1b\n" \
acq "\n\t" \
".set pop\n" \
"2:\n\t" \
--
1.7.3.4