This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] BZ #14059 - HAS_FMA4 check needs to also check for AVX


On Thu, May 10, 2012 at 6:08 PM, Carlos O'Donell
<carlos@systemhalted.org> wrote:
> On Tue, May 8, 2012 at 4:23 AM, Andreas Jaeger <aj@suse.de> wrote:
>>
>> There are two things broken in checking for HAS_FMA4/AVX on x86-64 in
>> the multiarch code:
>>
>> 1. We should disable FAM4 if AVX is not available (due to disabled
>> ? OSXSAVE)
>>
>> 2. commit 56f6f6a2403cfa7267cad722597113be35ecf70d reverted some changes
>> ? from commit 08cf777f9e7f6d826658a99c7d77a359f73a45bf but forgot to
>> ? revert the change for AVX. We really have to disable AVX if it's
>>
>> The issue is easy reproduceable under Xen for the reporter.
>>
>> Also, the two commits introduced YMM_Usable and removed its usage but
>> did not remove the code from the headers, I'm cleaning this up as well.
>>
>> I'm appending a patch that I tested on Linux/x86-64 (without Xen).
>>
>> Ok to commit?
>
> No.
>
> I actually think the code Ulrich wrote originally was almost correct,
> but missing this:

If I had to fix it it would look like this:

diff --git a/sysdeps/x86_64/multiarch/init-arch.c
b/sysdeps/x86_64/multiarch/init-arch.c
index 80527ec..0f0efb7 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -145,14 +145,15 @@ __init_cpu_features (void)

   if (__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_AVX)
     {
-      /* Reset the AVX bit in case OSXSAVE is disabled.  */
+      /* Determine if AVX is usable.  */
       if ((__cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx & bit_OSXSAVE) != 0
          && ({ unsigned int xcrlow;
                unsigned int xcrhigh;
                asm ("xgetbv"
                     : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
-               (xcrlow & 6) == 6; }))
-       __cpu_features.feature[index_YMM_Usable] |= bit_YMM_Usable;
+               (xcrlow & (bit_YMM_state | bit_XMM_state)) ==
+                (bit_YMM_state | bit_XMM_state); }))
+       __cpu_features.feature[index_AVX_Usable] = 1;
     }

   __cpu_features.family = family;
diff --git a/sysdeps/x86_64/multiarch/init-arch.h
b/sysdeps/x86_64/multiarch/init-arch.h
index 5054e46..e70c4fe 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -21,8 +21,9 @@
 #define bit_Prefer_SSE_for_memop       (1 << 3)
 #define bit_Fast_Unaligned_Load                (1 << 4)
 #define bit_Prefer_PMINUB_for_stringop (1 << 5)
-#define bit_YMM_Usable                 (1 << 6)
+#define bit_AVX_Usable                 (1 << 6)

+/* CPUID Feature flags.  */
 #define bit_SSE2       (1 << 26)
 #define bit_SSSE3      (1 << 9)
 #define bit_SSE4_1     (1 << 19)
@@ -33,6 +34,10 @@
 #define bit_FMA                (1 << 12)
 #define bit_FMA4       (1 << 16)

+/* XCR0 Feature flags.  */
+#define bit_XMM_state  (1 << 1)
+#define bit_YMM_state  (2 << 1)
+
 #ifdef __ASSEMBLER__

 # include <ifunc-defines.h>
@@ -49,7 +54,7 @@
 # define index_Prefer_SSE_for_memop    FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Fast_Unaligned_Load     FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
-# define index_YMM_Usable              FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX_Usable              FEATURE_INDEX_1*FEATURE_SIZE

 #else  /* __ASSEMBLER__ */

@@ -119,7 +124,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define HAS_SSE4_1    HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
 # define HAS_SSE4_2    HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
 # define HAS_FMA       HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_FMA)
-# define HAS_AVX       HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_AVX)
+# define HAS_AVX       HAS_ARCH_FEATURE (AVX_Usable)
 # define HAS_FMA4      HAS_CPU_FEATURE (COMMON_CPUID_INDEX_80000001,
ecx, bit_FMA4)

 # define index_Fast_Rep_String         FEATURE_INDEX_1
@@ -127,7 +132,7 @@ extern const struct cpu_features *__get_cpu_features (void)
 # define index_Slow_BSF                        FEATURE_INDEX_1
 # define index_Prefer_SSE_for_memop    FEATURE_INDEX_1
 # define index_Fast_Unaligned_Load     FEATURE_INDEX_1
-# define index_YMM_Usable              FEATURE_INDEX_1
+# define index_AVX_Usable              FEATURE_INDEX_1

 # define HAS_ARCH_FEATURE(name) \
   ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
@@ -142,6 +147,6 @@ extern const struct cpu_features *__get_cpu_features (void)

 # define HAS_FAST_UNALIGNED_LOAD HAS_ARCH_FEATURE (Fast_Unaligned_Load)

-# define HAS_YMM_USABLE                HAS_ARCH_FEATURE (YMM_Usable)
+# define HAS_AVX_USABLE                HAS_ARCH_FEATURE (AVX_Usable)

 #endif /* __ASSEMBLER__ */
===

Cheers,
Carlos.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]