This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
Re: [PATCH] NPTL cancellation optimization - please review
- From: Matt Turner <mattst88 at gmail dot com>
- To: Alexander Fyodorov <alexvf at bk dot ru>
- Cc: libc-alpha at sourceware dot org
- Date: Fri, 24 Dec 2010 04:23:23 +0000
- Subject: Re: [PATCH] NPTL cancellation optimization - please review
- References: <E1Np6Eu-0002z4-00.alexvf-bk-ru@f298.mail.ru>
2010/3/9 Alexander Fyodorov <alexvf@bk.ru>:
> Hi
>
> This patch improves performance of functions changing thread cancellation state and type by removing atomic operations from them. Their speed roughly doubled on my Core Quad.
>
> Main idea is simple: since state and type are changed only by the thread itself, they should not require such rigorous synchronization. The 'cancelhandling' word takes 4 bytes, so we can put type and state in different bytes within the word and access them directly. Specific position of a given flag will then depend on endiannes.
>
> Checking whether the thread was canceled must be done after it enables cancellation or set it to asynchronous mode. To enforce this order, I put a barrier between the respective store and load. Since the read is data-dependent on the corresponding write and many architectures do not reorder such accesses, putting atomic_full_barrier() there would be an overkill. So I added a new type of barrier which defaults to a full barrier.
>
> On my computer 'make check' fails some tests even without the patch, so I am not sure about me being able to test it. At least it broke nothing that was working before.
>
>
>
> 2010-03-09 Fyodorov V. Alexander <alexvf@bk.ru>
>
> * nptl/descr.h: Change bits position in the 'cancelhandling' field.
> * include/atomic.h: Define atomic_full_barrier to
> to __sync_synchronize. Add default
> atomic_read_after_write_dependent_barrier definition.
> * sysdeps/ia64/bits/atomic.h: Remove atomic_full_barrier
> definition.
> * nptl/cancellation.c: Replace atomic operation with a barrier.
> * nptl/cleanup_defer_compat.c: Likewise.
> * nptl/pthread_setcancelstate.c: Likewise.
> * nptl/pthread_setcanceltype.c: Likewise.
> * nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S: Delete.
> * nptl/sysdeps/unix/sysv/linux/x86_64/libc-cancellation.S: Delete.
> * nptl/sysdeps/unix/sysv/linux/x86_64/librt-cancellation.S: Delete.
> * sysdeps/i386/i486/bits/atomic.h: Define
> atomic_read_after_write_dependent_barrier.
> * sysdeps/x86_64/bits/atomic.h: Likewise.
> * sysdeps/sparc/sparc32/bits/atomic.h: Likewise.
> * sysdeps/sparc/sparc64/bits/atomic.h: Likewise.
>
>
> diff --git a/include/atomic.h b/include/atomic.h
> index 37d0111..2f708f1 100644
> --- a/include/atomic.h
> +++ b/include/atomic.h
> @@ -510,7 +510,7 @@
> #endif
>
> #ifndef atomic_full_barrier
> -# define atomic_full_barrier() __asm ("" ::: "memory")
> +# define atomic_full_barrier() __sync_synchronize ()
> #endif
>
>
> @@ -534,4 +534,9 @@
> # define atomic_delay() do { /* nothing */ } while (0)
> #endif
>
> +
> +#ifndef atomic_read_after_write_dependent_barrier()
> +# define atomic_read_after_write_dependent_barrier() atomic_full_barrier ()
> +#endif
> +
> #endif /* atomic.h */
> diff --git a/nptl/cancellation.c b/nptl/cancellation.c
> index eac7973..b6ed39a 100644
> --- a/nptl/cancellation.c
> +++ b/nptl/cancellation.c
> @@ -32,29 +32,13 @@ __pthread_enable_asynccancel (void)
> struct pthread *self = THREAD_SELF;
> int oldval = THREAD_GETMEM (self, cancelhandling);
>
> - while (1)
> - {
> - int newval = oldval | CANCELTYPE_BITMASK;
> -
> - if (newval == oldval)
> - break;
> -
> - int curval = THREAD_ATOMIC_CMPXCHG_VAL (self, cancelhandling, newval,
> - oldval);
> - if (__builtin_expect (curval == oldval, 1))
> - {
> - if (CANCEL_ENABLED_AND_CANCELED_AND_ASYNCHRONOUS (newval))
> - {
> - THREAD_SETMEM (self, result, PTHREAD_CANCELED);
> - __do_cancel ();
> - }
> -
> - break;
> - }
> -
> - /* Prepare the next round. */
> - oldval = curval;
> - }
> + /* Set the new value. */
> + THREAD_SETMEM (self, cancel.type, (char) PTHREAD_CANCEL_ASYNCHRONOUS);
> +
> + /* See comment in pthread_setcancelstate (). */
> + atomic_read_after_write_dependent_barrier();
> +
> + CANCELLATION_P (self);
>
> return oldval;
> }
> @@ -70,22 +54,14 @@ __pthread_disable_asynccancel (int oldtype)
> return;
>
> struct pthread *self = THREAD_SELF;
> - int newval;
>
> - int oldval = THREAD_GETMEM (self, cancelhandling);
> -
> - while (1)
> - {
> - newval = oldval & ~CANCELTYPE_BITMASK;
> + /* Set the new value. */
> + THREAD_SETMEM (self, cancel.type, (char) PTHREAD_CANCEL_DEFERRED);
>
> - int curval = THREAD_ATOMIC_CMPXCHG_VAL (self, cancelhandling, newval,
> - oldval);
> - if (__builtin_expect (curval == oldval, 1))
> - break;
> + /* See comment in pthread_setcancelstate (). */
> + atomic_read_after_write_dependent_barrier();
>
> - /* Prepare the next round. */
> - oldval = curval;
> - }
> + int newval = THREAD_GETMEM (self, cancelhandling);
>
> /* We cannot return when we are being canceled. Upon return the
> thread might be things which would have to be undone. The
> diff --git a/nptl/cleanup_defer_compat.c b/nptl/cleanup_defer_compat.c
> index a0ed6da..51078d8 100644
> --- a/nptl/cleanup_defer_compat.c
> +++ b/nptl/cleanup_defer_compat.c
> @@ -35,20 +35,7 @@ _pthread_cleanup_push_defer (buffer, routine, arg)
> int cancelhandling = THREAD_GETMEM (self, cancelhandling);
>
> /* Disable asynchronous cancellation for now. */
> - if (__builtin_expect (cancelhandling & CANCELTYPE_BITMASK, 0))
> - while (1)
> - {
> - int curval = THREAD_ATOMIC_CMPXCHG_VAL (self, cancelhandling,
> - cancelhandling
> - & ~CANCELTYPE_BITMASK,
> - cancelhandling);
> - if (__builtin_expect (curval == cancelhandling, 1))
> - /* Successfully replaced the value. */
> - break;
> -
> - /* Prepare for the next round. */
> - cancelhandling = curval;
> - }
> + THREAD_SETMEM (self, cancel.type, (char) PTHREAD_CANCEL_DEFERRED);
>
> buffer->__canceltype = (cancelhandling & CANCELTYPE_BITMASK
> ? PTHREAD_CANCEL_ASYNCHRONOUS
> @@ -68,24 +55,12 @@ _pthread_cleanup_pop_restore (buffer, execute)
>
> THREAD_SETMEM (self, cleanup, buffer->__prev);
>
> - int cancelhandling;
> - if (__builtin_expect (buffer->__canceltype != PTHREAD_CANCEL_DEFERRED, 0)
> - && ((cancelhandling = THREAD_GETMEM (self, cancelhandling))
> - & CANCELTYPE_BITMASK) == 0)
> + THREAD_SETMEM (self, cancel.type, (char) buffer->__canceltype);
> +
> + if (buffer->__canceltype == PTHREAD_CANCEL_ASYNCHRONOUS)
> {
> - while (1)
> - {
> - int curval = THREAD_ATOMIC_CMPXCHG_VAL (self, cancelhandling,
> - cancelhandling
> - | CANCELTYPE_BITMASK,
> - cancelhandling);
> - if (__builtin_expect (curval == cancelhandling, 1))
> - /* Successfully replaced the value. */
> - break;
> -
> - /* Prepare for the next round. */
> - cancelhandling = curval;
> - }
> + /* See comment in pthread_setcancelstate (). */
> + atomic_read_after_write_dependent_barrier();
>
> CANCELLATION_P (self);
> }
> diff --git a/nptl/descr.h b/nptl/descr.h
> index 9c366e7..dfb350f 100644
> --- a/nptl/descr.h
> +++ b/nptl/descr.h
> @@ -20,6 +20,7 @@
> #ifndef _DESCR_H
> #define _DESCR_H 1
>
> +#include <endian.h>
> #include <limits.h>
> #include <sched.h>
> #include <setjmp.h>
> @@ -240,30 +241,62 @@ struct pthread
> #define HAVE_CLEANUP_JMP_BUF
>
> /* Flags determining processing of cancellation. */
> - int cancelhandling;
> + union {
> + int cancelhandling;
> + struct {
> + char state;
> + char type;
> + } cancel;
> + };
> +#if BYTE_ORDER == LITTLE_ENDIAN
> /* Bit set if cancellation is disabled. */
> #define CANCELSTATE_BIT 0
> -#define CANCELSTATE_BITMASK (0x01 << CANCELSTATE_BIT)
> /* Bit set if asynchronous cancellation mode is selected. */
> -#define CANCELTYPE_BIT 1
> -#define CANCELTYPE_BITMASK (0x01 << CANCELTYPE_BIT)
> +#define CANCELTYPE_BIT 8
> /* Bit set if canceling has been initiated. */
> -#define CANCELING_BIT 2
> -#define CANCELING_BITMASK (0x01 << CANCELING_BIT)
> +#define CANCELING_BIT 16
> /* Bit set if canceled. */
> -#define CANCELED_BIT 3
> -#define CANCELED_BITMASK (0x01 << CANCELED_BIT)
> +#define CANCELED_BIT 17
> /* Bit set if thread is exiting. */
> -#define EXITING_BIT 4
> -#define EXITING_BITMASK (0x01 << EXITING_BIT)
> +#define EXITING_BIT 18
> /* Bit set if thread terminated and TCB is freed. */
> -#define TERMINATED_BIT 5
> -#define TERMINATED_BITMASK (0x01 << TERMINATED_BIT)
> +#define TERMINATED_BIT 19
> /* Bit set if thread is supposed to change XID. */
> -#define SETXID_BIT 6
> +#define SETXID_BIT 20
> +#else
> +#if BYTE_ORDER == BIG_ENDIAN
> + /* Bit set if cancellation is disabled. */
> +#define CANCELSTATE_BIT 24
> + /* Bit set if asynchronous cancellation mode is selected. */
> +#define CANCELTYPE_BIT 16
> +#else /* BYTE_ORDER == PDP_ENDIAN */
> + /* Bit set if cancellation is disabled. */
> +#define CANCELSTATE_BIT 16
> + /* Bit set if asynchronous cancellation mode is selected. */
> +#define CANCELTYPE_BIT 24
> +#endif
> + /* Bit set if canceling has been initiated. */
> +#define CANCELING_BIT 0
> + /* Bit set if canceled. */
> +#define CANCELED_BIT 1
> + /* Bit set if thread is exiting. */
> +#define EXITING_BIT 2
> + /* Bit set if thread terminated and TCB is freed. */
> +#define TERMINATED_BIT 3
> + /* Bit set if thread is supposed to change XID. */
> +#define SETXID_BIT 4
> +#endif
> +#define CANCELSTATE_BITMASK (0x01 << CANCELSTATE_BIT)
> +#define CANCELTYPE_BITMASK (0x01 << CANCELTYPE_BIT)
> +#define CANCELING_BITMASK (0x01 << CANCELING_BIT)
> +#define CANCELED_BITMASK (0x01 << CANCELED_BIT)
> +#define EXITING_BITMASK (0x01 << EXITING_BIT)
> +#define TERMINATED_BITMASK (0x01 << TERMINATED_BIT)
> #define SETXID_BITMASK (0x01 << SETXID_BIT)
> /* Mask for the rest. Helps the compiler to optimize. */
> -#define CANCEL_RESTMASK 0xffffff80
> +#define CANCEL_RESTMASK \
> + ( ~((int) (CANCELSTATE_BITMASK | CANCELTYPE_BITMASK | CANCELING_BITMASK | CANCELED_BITMASK \
> + | EXITING_BITMASK | TERMINATED_BITMASK | SETXID_BITMASK)) )
>
> #define CANCEL_ENABLED_AND_CANCELED(value) \
> (((value) & (CANCELSTATE_BITMASK | CANCELED_BITMASK | EXITING_BITMASK \
> diff --git a/nptl/pthread_setcancelstate.c b/nptl/pthread_setcancelstate.c
> index a452c2e..580fd15 100644
> --- a/nptl/pthread_setcancelstate.c
> +++ b/nptl/pthread_setcancelstate.c
> @@ -34,38 +34,37 @@ __pthread_setcancelstate (state, oldstate)
>
> self = THREAD_SELF;
>
> - int oldval = THREAD_GETMEM (self, cancelhandling);
> - while (1)
> + /* Store the old value. */
> + if (oldstate != NULL)
> {
> - int newval = (state == PTHREAD_CANCEL_DISABLE
> - ? oldval | CANCELSTATE_BITMASK
> - : oldval & ~CANCELSTATE_BITMASK);
> + int oldval = THREAD_GETMEM (self, cancelhandling);
>
> - /* Store the old value. */
> - if (oldstate != NULL)
> - *oldstate = ((oldval & CANCELSTATE_BITMASK)
> - ? PTHREAD_CANCEL_DISABLE : PTHREAD_CANCEL_ENABLE);
> + *oldstate = ((oldval & CANCELSTATE_BITMASK)
> + ? PTHREAD_CANCEL_DISABLE : PTHREAD_CANCEL_ENABLE);
> + }
>
> - /* Avoid doing unnecessary work. The atomic operation can
> - potentially be expensive if the memory has to be locked and
> - remote cache lines have to be invalidated. */
> - if (oldval == newval)
> - break;
> + /* Set the new value. */
> + THREAD_SETMEM (self, cancel.state, (char) state);
>
> - /* Update the cancel handling word. This has to be done
> - atomically since other bits could be modified as well. */
> - int curval = THREAD_ATOMIC_CMPXCHG_VAL (self, cancelhandling, newval,
> - oldval);
> - if (__builtin_expect (curval == oldval, 1))
> - {
> - if (CANCEL_ENABLED_AND_CANCELED_AND_ASYNCHRONOUS (newval))
> - __do_cancel ();
> + if (state == PTHREAD_CANCEL_ENABLE)
> + {
> + /* This (and similar) barrier makes sure that we check whether the thread
> + * was canceled only _after_ we enable cancellation. Otherwise there is a chance
> + * that pthread_cancel() will not send SIGCANCEL and pthread_setcancelstate()
> + * will not notice that CANCELED_BIT was set by pthread_cancel().
> + * Note that the read and the write accesses refer to the overlapping memory
> + * locations. Some architectures (Itanium) can reorder a data-dependent read
> + * before an older write while some (i386, Sparc) can not. So it is possible to
> + * remove this barrier in some cases. */
> + atomic_read_after_write_dependent_barrier();
>
> - break;
> - }
> + int newval = THREAD_GETMEM (self, cancelhandling);
>
> - /* Prepare for the next round. */
> - oldval = curval;
> + if (CANCEL_ENABLED_AND_CANCELED_AND_ASYNCHRONOUS(newval))
> + {
> + THREAD_SETMEM (self, result, PTHREAD_CANCELED);
> + __do_cancel ();
> + }
> }
>
> return 0;
> diff --git a/nptl/pthread_setcanceltype.c b/nptl/pthread_setcanceltype.c
> index bbe87ba..43d3dab 100644
> --- a/nptl/pthread_setcanceltype.c
> +++ b/nptl/pthread_setcanceltype.c
> @@ -34,43 +34,31 @@ __pthread_setcanceltype (type, oldtype)
>
> self = THREAD_SELF;
>
> - int oldval = THREAD_GETMEM (self, cancelhandling);
> - while (1)
> + /* Store the old value. */
> + if (oldtype != NULL)
> {
> - int newval = (type == PTHREAD_CANCEL_ASYNCHRONOUS
> - ? oldval | CANCELTYPE_BITMASK
> - : oldval & ~CANCELTYPE_BITMASK);
> + int oldval = THREAD_GETMEM (self, cancelhandling);
>
> - /* Store the old value. */
> - if (oldtype != NULL)
> - *oldtype = ((oldval & CANCELTYPE_BITMASK)
> - ? PTHREAD_CANCEL_ASYNCHRONOUS : PTHREAD_CANCEL_DEFERRED);
> + *oldtype = ((oldval & CANCELTYPE_BITMASK)
> + ? PTHREAD_CANCEL_ASYNCHRONOUS : PTHREAD_CANCEL_DEFERRED);
> + }
>
> - /* Avoid doing unnecessary work. The atomic operation can
> - potentially be expensive if the memory has to be locked and
> - remote cache lines have to be invalidated. */
> - if (oldval == newval)
> - break;
> + /* Set the new value. */
> + THREAD_SETMEM (self, cancel.type, (char) type);
>
> - /* Update the cancel handling word. This has to be done
> - atomically since other bits could be modified as well. */
> - int curval = THREAD_ATOMIC_CMPXCHG_VAL (self, cancelhandling, newval,
> - oldval);
> - if (__builtin_expect (curval == oldval, 1))
> - {
> - if (CANCEL_ENABLED_AND_CANCELED_AND_ASYNCHRONOUS (newval))
> - {
> - THREAD_SETMEM (self, result, PTHREAD_CANCELED);
> - __do_cancel ();
> - }
> + if (type == PTHREAD_CANCEL_ASYNCHRONOUS)
> + {
> + /* See comment in pthread_setcancelstate (). */
> + atomic_read_after_write_dependent_barrier();
>
> - break;
> - }
> + int newval = THREAD_GETMEM (self, cancelhandling);
>
> - /* Prepare for the next round. */
> - oldval = curval;
> + if (CANCEL_ENABLED_AND_CANCELED_AND_ASYNCHRONOUS(newval))
> + {
> + THREAD_SETMEM (self, result, PTHREAD_CANCELED);
> + __do_cancel ();
> + }
> }
> -
> return 0;
> }
> strong_alias (__pthread_setcanceltype, pthread_setcanceltype)
> diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S b/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S
> index 6806962..e69de29 100644
> --- a/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S
> +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/cancellation.S
> @@ -1,116 +0,0 @@
> -/* Copyright (C) 2009 Free Software Foundation, Inc.
> - This file is part of the GNU C Library.
> - Contributed by Ulrich Drepper <drepper@redhat.com>, 2009.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, write to the Free
> - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
> - 02111-1307 USA. */
> -
> -#include <sysdep.h>
> -#include <tcb-offsets.h>
> -#include <kernel-features.h>
> -#include "lowlevellock.h"
> -
> -#ifdef IS_IN_libpthread
> -# ifdef SHARED
> -# define __pthread_unwind __GI___pthread_unwind
> -# endif
> -#else
> -# ifndef SHARED
> - .weak __pthread_unwind
> -# endif
> -#endif
> -
> -
> -#ifdef __ASSUME_PRIVATE_FUTEX
> -# define LOAD_PRIVATE_FUTEX_WAIT(reg) \
> - movl $(FUTEX_WAIT | FUTEX_PRIVATE_FLAG), reg
> -#else
> -# if FUTEX_WAIT == 0
> -# define LOAD_PRIVATE_FUTEX_WAIT(reg) \
> - movl %fs:PRIVATE_FUTEX, reg
> -# else
> -# define LOAD_PRIVATE_FUTEX_WAIT(reg) \
> - movl %fs:PRIVATE_FUTEX, reg ; \
> - orl $FUTEX_WAIT, reg
> -# endif
> -#endif
> -
> -/* It is crucial that the functions in this file don't modify registers
> - other than %rax and %r11. The syscall wrapper code depends on this
> - because it doesn't explicitly save the other registers which hold
> - relevant values. */
> - .text
> -
> - .hidden __pthread_enable_asynccancel
> -ENTRY(__pthread_enable_asynccancel)
> - movl %fs:CANCELHANDLING, %eax
> -2: movl %eax, %r11d
> - orl $TCB_CANCELTYPE_BITMASK, %r11d
> - cmpl %eax, %r11d
> - je 1f
> -
> - lock
> - cmpxchgl %r11d, %fs:CANCELHANDLING
> - jnz 2b
> -
> - andl $(TCB_CANCELSTATE_BITMASK|TCB_CANCELTYPE_BITMASK|TCB_CANCELED_BITMASK|TCB_EXITING_BITMASK|TCB_CANCEL_RESTMASK|TCB_TERMINATED_BITMASK), %r11d
> - cmpl $(TCB_CANCELTYPE_BITMASK|TCB_CANCELED_BITMASK), %r11d
> - je 3f
> -
> -1: ret
> -
> -3: movq $TCB_PTHREAD_CANCELED, %fs:RESULT
> - lock
> - orl $TCB_EXITING_BITMASK, %fs:CANCELHANDLING
> - movq %fs:CLEANUP_JMP_BUF, %rdi
> -#ifdef SHARED
> - call __pthread_unwind@PLT
> -#else
> - call __pthread_unwind
> -#endif
> - hlt
> -END(__pthread_enable_asynccancel)
> -
> -
> - .hidden __pthread_disable_asynccancel
> -ENTRY(__pthread_disable_asynccancel)
> - testl $TCB_CANCELTYPE_BITMASK, %edi
> - jnz 1f
> -
> - movl %fs:CANCELHANDLING, %eax
> -2: movl %eax, %r11d
> - andl $~TCB_CANCELTYPE_BITMASK, %r11d
> - lock
> - cmpxchgl %r11d, %fs:CANCELHANDLING
> - jnz 2b
> -
> - movl %r11d, %eax
> -3: andl $(TCB_CANCELING_BITMASK|TCB_CANCELED_BITMASK), %eax
> - cmpl $TCB_CANCELING_BITMASK, %eax
> - je 4f
> -1: ret
> -
> - /* Performance doesn't matter in this loop. We will
> - delay until the thread is canceled. And we will unlikely
> - enter the loop twice. */
> -4: movq %fs:0, %rdi
> - movl $__NR_futex, %eax
> - xorq %r10, %r10
> - addq $CANCELHANDLING, %rdi
> - LOAD_PRIVATE_FUTEX_WAIT (%esi)
> - syscall
> - movl %fs:CANCELHANDLING, %eax
> - jmp 3b
> -END(__pthread_disable_asynccancel)
> diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/libc-cancellation.S b/nptl/sysdeps/unix/sysv/linux/x86_64/libc-cancellation.S
> index 1100588..e69de29 100644
> --- a/nptl/sysdeps/unix/sysv/linux/x86_64/libc-cancellation.S
> +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/libc-cancellation.S
> @@ -1,22 +0,0 @@
> -/* Copyright (C) 2009 Free Software Foundation, Inc.
> - This file is part of the GNU C Library.
> - Contributed by Ulrich Drepper <drepper@redhat.com>, 2009.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, write to the Free
> - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
> - 02111-1307 USA. */
> -
> -#define __pthread_enable_asynccancel __libc_enable_asynccancel
> -#define __pthread_disable_asynccancel __libc_disable_asynccancel
> -#include "cancellation.S"
> diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/librt-cancellation.S b/nptl/sysdeps/unix/sysv/linux/x86_64/librt-cancellation.S
> index ce4192b..e69de29 100644
> --- a/nptl/sysdeps/unix/sysv/linux/x86_64/librt-cancellation.S
> +++ b/nptl/sysdeps/unix/sysv/linux/x86_64/librt-cancellation.S
> @@ -1,22 +0,0 @@
> -/* Copyright (C) 2009 Free Software Foundation, Inc.
> - This file is part of the GNU C Library.
> - Contributed by Ulrich Drepper <drepper@redhat.com>, 2009.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, write to the Free
> - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
> - 02111-1307 USA. */
> -
> -#define __pthread_enable_asynccancel __librt_enable_asynccancel
> -#define __pthread_disable_asynccancel __librt_disable_asynccancel
> -#include "cancellation.S"
> diff --git a/sysdeps/i386/i486/bits/atomic.h b/sysdeps/i386/i486/bits/atomic.h
> index 4ee6fef..55fe2a0 100644
> --- a/sysdeps/i386/i486/bits/atomic.h
> +++ b/sysdeps/i386/i486/bits/atomic.h
> @@ -553,3 +553,5 @@ typedef uintmax_t uatomic_max_t;
> #define atomic_or(mem, mask) __arch_or_body (LOCK_PREFIX, mem, mask)
>
> #define catomic_or(mem, mask) __arch_or_body (__arch_cprefix, mem, mask)
> +
> +#define atomic_read_after_write_dependent_barrier() __asm __volatile("" ::: "memory")
> diff --git a/sysdeps/ia64/bits/atomic.h b/sysdeps/ia64/bits/atomic.h
> index 1020c2f..8b4d6bf 100644
> --- a/sysdeps/ia64/bits/atomic.h
> +++ b/sysdeps/ia64/bits/atomic.h
> @@ -115,5 +115,3 @@ typedef uintmax_t uatomic_max_t;
> } \
> while (__builtin_expect (__val != __oldval, 0)); \
> __oldval & __mask; })
> -
> -#define atomic_full_barrier() __sync_synchronize ()
> diff --git a/sysdeps/sparc/sparc32/bits/atomic.h b/sysdeps/sparc/sparc32/bits/atomic.h
> index ef553f7..0143793 100644
> --- a/sysdeps/sparc/sparc32/bits/atomic.h
> +++ b/sysdeps/sparc/sparc32/bits/atomic.h
> @@ -324,4 +324,6 @@ extern uint64_t _dl_hwcap __attribute__((weak));
>
> #endif
>
> +#define atomic_read_after_write_dependent_barrier() __asm __volatile("" ::: "memory")
> +
> #endif /* bits/atomic.h */
> diff --git a/sysdeps/sparc/sparc64/bits/atomic.h b/sysdeps/sparc/sparc64/bits/atomic.h
> index d0a64af..859adc8 100644
> --- a/sysdeps/sparc/sparc64/bits/atomic.h
> +++ b/sysdeps/sparc/sparc64/bits/atomic.h
> @@ -107,3 +107,5 @@ typedef uintmax_t uatomic_max_t;
> __asm __volatile ("membar #LoadLoad | #LoadStore" : : : "memory")
> #define atomic_write_barrier() \
> __asm __volatile ("membar #StoreLoad | #StoreStore" : : : "memory")
> +#define atomic_read_after_write_dependent_barrier() \
> + __asm __volatile("" ::: "memory")
> diff --git a/sysdeps/x86_64/bits/atomic.h b/sysdeps/x86_64/bits/atomic.h
> index 7c138eb..9c895cd 100644
> --- a/sysdeps/x86_64/bits/atomic.h
> +++ b/sysdeps/x86_64/bits/atomic.h
> @@ -496,3 +496,5 @@ typedef uintmax_t uatomic_max_t;
> #define atomic_or(mem, mask) __arch_or_body (LOCK_PREFIX, mem, mask)
>
> #define catomic_or(mem, mask) __arch_or_body (__arch_cprefix, mem, mask)
> +
> +#define atomic_read_after_write_dependent_barrier() __asm __volatile("" ::: "memory")
Did anyone ever review this?