This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [x86-64 psABI]: Extend x86-64 psABI to support AVX-512


On 30 Jul 17:55, Kirill Yukhin wrote:
> On Wed, Jul 24, 2013 at 08:25:14AM -1000, Richard Henderson wrote:
> > On 07/24/2013 05:23 AM, Richard Biener wrote:
> > > "H.J. Lu" <hjl.tools@gmail.com> wrote:
> > > 
> > >> Hi,
> > >>
> > >> Here is a patch to extend x86-64 psABI to support AVX-512:
> > > 
> > > Afaik avx 512 doubles the amount of xmm registers. Can we get them callee saved please?

Hello,
I've implemented a tiny patch on top of `avx512' branch.
It makes first 128-bit parts 8 registers of AVX-512 callee saved: xmm16 through xmm23.

Here is performance data. It seems we have a little degradation in GEOMEAN.

Workload: Spec2006
Dataset: test
Options experiment: -m64 -fstrict-aliasing -fno-prefetch-loop-arrays -Ofast -funroll-loops -flto -fwhole-program -mavx512f
Options refernece : -m64 -fstrict-aliasing -fno-prefetch-loop-arrays -Ofast -funroll-loops -flto -fwhole-program

		"8 callee-"	"icount, all"	icount
		"save icount"	"call-clobber"	decrease
--------------------------------------------------------
400.perlbench	1686198567	1682320942	-0.23%
401.bzip2	18983033855	18983033907	0.00%
403.gcc		3999481141	3999095681	-0.01%
410.bwaves	13736672428	13736640026	0.00%
416.gamess	1531782811	1531350122	-0.03%
429.mcf		3079764286	3080957858	0.04%
433.milc	14628097067	14628175244	0.00%
434.zeusmp	21336261982	21359384879	0.11%
435.gromacs	3593653152	3588581849	-0.14%
436.cactusADM	2822346689	2828797842	0.23%
437.leslie3d	15903712760	15975143040	0.45%
444.namd	42446067469	43607637322	2.74%
445.gobmk	35272482208	35268743690	-0.01%
447.dealII	42476324881	42507009849	0.07%
450.soplex	45943150	45652666	-0.63%
453.povray	2314481169	2222157619	-3.99%
454.calculix	131024939	131078501	0.04%
456.hmmer	13853478444	13853306947	0.00%
458.sjeng	14173066874	14173066909	0.00%
459.GemsFDTD	2437559044	2437819638	0.01%
462.libquantum	175827242	175657854	-0.10%
464.h264ref	75718510217	75711714226	-0.01%
465.tonto	2505737844	2511457541	0.23%
470.lbm		4799298802	4812180033	0.27%
473.astar	17435751523	17435498947	0.00%
481.wrf		7144685575	7170593748	0.36%
482.sphinx3	6000198462	5984438416	-0.26%
483.xalancbmk	273958223	273638145	-0.12%
--------------------------------------------------------
GEOMEAN		4678862313	4677012093	-0.04%

Bigger % is better, negative mean that we have icount
increased after experiment


It seems to me that LRA is not always optimal, e.g. if you compile attached testcase
with: ./build-x86_64-linux/gcc/xgcc -B./build-x86_64-linux/gcc repro.c -S -Ofast -mavx512f

Assembler for main looks like:
main:
.LFB2331:
        vcvtsi2ss       %edi, %xmm1, %xmm1
        subq    $24, %rsp
        vextractf32x4   $0x0, %zmm16, (%rsp)
        vmovaps %zmm1, %zmm16
        call    test
        vfmadd132ss     .LC1(%rip), %xmm16, %xmm16
        vmovaps %zmm16, %zmm2
        movl    $.LC2, %edi
        movl    $1, %eax
        vunpcklps       %xmm2, %xmm2, %xmm2
        vcvtps2pd       %xmm2, %xmm0
        call    printf
        vmovaps %zmm16, %zmm3
        vinsertf32x4    $0x0, (%rsp), %zmm16, %zmm16
        addq    $24, %rsp
        vcvttss2si      %xmm3, %eax
        ret
I have no idea, why we are doind conversion to %xmm1 and then save it to %xmm16
However it maybe non-LRA issue.

Thanks, K


---
 gcc/config/i386/i386.c | 2 +-
 gcc/config/i386/i386.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6b13ac9..d6d8040 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -9125,7 +9125,7 @@ ix86_nsaved_sseregs (void)
   int nregs = 0;
   int regno;
 
-  if (!TARGET_64BIT_MS_ABI)
+  if (!(TARGET_64BIT_MS_ABI || TARGET_AVX512F))
     return 0;
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index d7a934d..9faab8b 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1026,9 +1026,9 @@ enum target_cpu_default
 /*xmm8,xmm9,xmm10,xmm11,xmm12,xmm13,xmm14,xmm15*/		\
      6,   6,    6,    6,    6,    6,    6,    6,		\
 /*xmm16,xmm17,xmm18,xmm19,xmm20,xmm21,xmm22,xmm23*/		\
-     6,    6,     6,    6,    6,    6,    6,    6,		\
+     0,    0,     0,    0,    0,    0,    0,    0,		\
 /*xmm24,xmm25,xmm26,xmm27,xmm28,xmm29,xmm30,xmm31*/		\
-     6,    6,     6,    6,    6,    6,    6,    6,		\
+     1,    1,     1,    1,    1,    1,    1,    1,		\
  /* k0,  k1,  k2,  k3,  k4,  k5,  k6,  k7*/			\
      1,   1,   1,   1,   1,   1,   1,   1 }
 
-- 
1.7.11.7


Attachment: repro.c
Description: Text document


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]