This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] BZ #14059 - HAS_FMA4 check needs to also check for AVX


On 05/11/2012 12:18 AM, Carlos O'Donell wrote:
> On Thu, May 10, 2012 at 6:08 PM, Carlos O'Donell
> <carlos@systemhalted.org>  wrote:
>> On Tue, May 8, 2012 at 4:23 AM, Andreas Jaeger<aj@suse.de>  wrote:
>>>
>>> There are two things broken in checking for HAS_FMA4/AVX on x86-64 in
>>> the multiarch code:
>>>
>>> 1. We should disable FAM4 if AVX is not available (due to disabled
>>>    OSXSAVE)
>>>
>>> 2. commit 56f6f6a2403cfa7267cad722597113be35ecf70d reverted some changes
>>>    from commit 08cf777f9e7f6d826658a99c7d77a359f73a45bf but forgot to
>>>    revert the change for AVX. We really have to disable AVX if it's
>>>
>>> The issue is easy reproduceable under Xen for the reporter.
>>>
>>> Also, the two commits introduced YMM_Usable and removed its usage but
>>> did not remove the code from the headers, I'm cleaning this up as well.
>>>
>>> I'm appending a patch that I tested on Linux/x86-64 (without Xen).
>>>
>>> Ok to commit?
>>
>> No.
>>
>> I actually think the code Ulrich wrote originally was almost correct,
>> but missing this:
> 
> If I had to fix it it would look like this:

Thanks, this looks great - but misses the check for FMA4.

Here's a combined patch that compiled fine for me on
Linux/x86-64. Jeff, could you test it under Xen, please?

Andreas

2012-05-11  Andreas Jaeger  <aj@suse.de>
	    Carlos O'Donell  <carlos_odonell@mentor.com>

	[BZ #14059]
	* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
	Fix check for AVX, enable FMA4 only if it exists and if AVX is
	usable.

	* sysdeps/x86_64/multiarch/init-arch.h (bit_AVX_Usable): Renamed
	from bit_YMM_usable.
	(bit_FMA4_Usable): New macro.
	(bit_XMM_state): New macro.
	(bit_YMM_state): New macro.
	(index_AVX_Usable): Renamed from index_YMM_Usable.
	(index_FMA4_Usable): New macro.
	(HAS_YMM_USABLE): Delete.
	(HAS_AVX): Use HAS_ARCH_FEATURE.
	(HAS_FMA4): Likewise.

diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index 80527ec..bc8e1df 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -145,16 +145,24 @@ __init_cpu_features (void)
 
   if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
     {
-      /* Reset the AVX bit in case OSXSAVE is disabled.  */
+      /* Determine if AVX is usable.  */
       if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
 	  && ({ unsigned int xcrlow;
 		unsigned int xcrhigh;
 		asm ("xgetbv"
 		     : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
-		(xcrlow & 6) == 6; }))
-	__cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable;
+		(xcrlow & (bit_YMM_state | bit_XMM_state)) ==
+		 (bit_YMM_state | bit_XMM_state); }))
+	__cpu_features.feature[index_AVX_Usable] = 1;
     }
 
+  /* FMA4 depends on AVX support.  */
+  if (__cpu_features.cpuid[COMMON_CPUID_INDEX_80000001].ecx & bit_FMA4
+      && HAS_AVX)
+	__cpu_features.feature[index_AVX_Usable] = 1;
+  else
+	__cpu_features.feature[index_AVX_Usable] = 0;
+
   __cpu_features.family = family;
   __cpu_features.model = model;
   atomic_write_barrier ();
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 5054e46..2d02e2b 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -21,8 +21,10 @@
 #define bit_Prefer_SSE_for_memop	(1 << 3)
 #define bit_Fast_Unaligned_Load		(1 << 4)
 #define bit_Prefer_PMINUB_for_stringop	(1 << 5)
-#define bit_YMM_Usable			(1 << 6)
+#define bit_AVX_Usable			(1 << 6)
+#define bit_FMA4_Usable			(1 << 7)
 
+/* CPUID Feature flags.  */
 #define bit_SSE2	(1 << 26)
 #define bit_SSSE3	(1 << 9)
 #define bit_SSE4_1	(1 << 19)
@@ -33,6 +35,10 @@
 #define bit_FMA		(1 << 12)
 #define bit_FMA4	(1 << 16)
 
+/* XCR0 Feature flags.  */
+#define bit_XMM_state  (1 << 1)
+#define bit_YMM_state  (2 << 1)
+
 #ifdef	__ASSEMBLER__
 
 # include <ifunc-defines.h>
@@ -49,7 +55,8 @@
 # define index_Prefer_SSE_for_memop	FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Fast_Unaligned_Load	FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
-# define index_YMM_Usable		FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX_Usable		FEATURE_INDEX_1*FEATURE_SIZE
+# define index_FMA4_Usable		FEATURE_INDEX_1*FEATURE_SIZE
 
 #else	/* __ASSEMBLER__ */
 
@@ -119,15 +126,14 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define HAS_SSE4_1	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
 # define HAS_SSE4_2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
 # define HAS_FMA	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
-# define HAS_AVX	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
-# define HAS_FMA4	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
 
 # define index_Fast_Rep_String		FEATURE_INDEX_1
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1
 # define index_Slow_BSF			FEATURE_INDEX_1
 # define index_Prefer_SSE_for_memop	FEATURE_INDEX_1
 # define index_Fast_Unaligned_Load	FEATURE_INDEX_1
-# define index_YMM_Usable		FEATURE_INDEX_1
+# define index_AVX_Usable		FEATURE_INDEX_1
+# define index_FMA4_Usable		FEATURE_INDEX_1
 
 # define HAS_ARCH_FEATURE(name) \
   ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
@@ -142,6 +148,8 @@ extern const struct cpu_features *__get_cpu_features (void)
 
 # define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)
 
-# define HAS_YMM_USABLE		HAS_ARCH_FEATURE (YMM_Usable)
+# define HAS_AVX		HAS_ARCH_FEATURE (AVX_Usable)
+
+# define HAS_FMA4		HAS_ARCH_FEATURE (FMA4_Usable)
 
 #endif	/* __ASSEMBLER__ */

-- 
 Andreas Jaeger aj@{suse.com,opensuse.org} Twitter/Identica: jaegerandi
  SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
   GF: Jeff Hawn,Jennifer Guild,Felix Imendörffer,HRB16746 (AG Nürnberg)
    GPG fingerprint = 93A3 365E CE47 B889 DF7F  FED1 389A 563C C272 A126


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]