This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Reorder MSA branches


Hi All,

  I forgot to support the reordering of MSA branches.
Here are two versions of patches.

Version 1. We disable the reordering of MSA branches conservatively.

2013-10-17  Chao-ying Fu  <Chao-ying.Fu@imgtec.com>

	* config/tc-mips.c (can_swap_branch_p): Don't swap MSA branches.

Index: gas/config/tc-mips.c
===================================================================
RCS file: /cvs/src/src/gas/config/tc-mips.c,v
retrieving revision 1.594
diff -u -p -r1.594 tc-mips.c
--- gas/config/tc-mips.c	14 Oct 2013 18:50:54 -0000	1.594
+++ gas/config/tc-mips.c	17 Oct 2013 23:43:58 -0000
@@ -6144,6 +6144,10 @@ can_swap_branch_p (struct mips_cl_insn *
   if (gpr_read & prev_gpr_write)
     return FALSE;
 
+  /* If the branch reads MSA registers, we won't swap conservatively.  */
+  if (insn_reg_mask (ip, 1 << OP_REG_MSA, insn_read_mask (ip->insn_mo)))
+    return FALSE;
+
   /* If the branch writes a register that the previous
      instruction sets, we can not swap.  */
   gpr_write = gpr_write_mask (ip);

Version 2.
Because the MSA registers share the FP registers (if the FPU is present),
we combine FP and MSA register dependences together.

2013-10-17  Chao-ying Fu  <Chao-ying.Fu@imgtec.com>

	* config/tc-mips.c (fpr_read_mask): Test MSA registers.
	(fpr_write_mask): Test MSA registers.
	(can_swap_branch_p): Check fpr write followed by fpr read.

Index: gas/config/tc-mips.c
===================================================================
RCS file: /cvs/src/src/gas/config/tc-mips.c,v
retrieving revision 1.594
diff -u -p -r1.594 tc-mips.c
--- gas/config/tc-mips.c	14 Oct 2013 18:50:54 -0000	1.594
+++ gas/config/tc-mips.c	17 Oct 2013 23:38:59 -0000
@@ -4161,7 +4161,8 @@ fpr_read_mask (const struct mips_cl_insn
   unsigned long pinfo;
   unsigned int mask;
 
-  mask = insn_reg_mask (ip, (1 << OP_REG_FP) | (1 << OP_REG_VEC),
+  mask = insn_reg_mask (ip, (1 << OP_REG_FP) | (1 << OP_REG_VEC)
+			    | (1 << OP_REG_MSA),
 			insn_read_mask (ip->insn_mo));
   pinfo = ip->insn_mo->pinfo;
   /* Conservatively treat all operands to an FP_D instruction are doubles.
@@ -4179,7 +4180,8 @@ fpr_write_mask (const struct mips_cl_ins
   unsigned long pinfo;
   unsigned int mask;
 
-  mask = insn_reg_mask (ip, (1 << OP_REG_FP) | (1 << OP_REG_VEC),
+  mask = insn_reg_mask (ip, (1 << OP_REG_FP) | (1 << OP_REG_VEC)
+			    | (1 << OP_REG_MSA),
 			insn_write_mask (ip->insn_mo));
   pinfo = ip->insn_mo->pinfo;
   /* Conservatively treat all operands to an FP_D instruction are doubles.
@@ -6070,6 +6072,7 @@ can_swap_branch_p (struct mips_cl_insn *
 {
   unsigned long pinfo, pinfo2, prev_pinfo, prev_pinfo2;
   unsigned int gpr_read, gpr_write, prev_gpr_read, prev_gpr_write;
+  unsigned int fpr_read, prev_fpr_write;
 
   /* -O2 and above is required for this optimization.  */
   if (mips_optimize < 2)
@@ -6144,6 +6147,11 @@ can_swap_branch_p (struct mips_cl_insn *
   if (gpr_read & prev_gpr_write)
     return FALSE;
 
+  fpr_read = fpr_read_mask (ip);
+  prev_fpr_write = fpr_write_mask (&history[0]);
+  if (fpr_read & prev_fpr_write)
+    return FALSE;
+
   /* If the branch writes a register that the previous
      instruction sets, we can not swap.  */
   gpr_write = gpr_write_mask (ip);

# Testing
# cat r.s
        .set    reorder
test:
        fsune.d $w0,$w1,$w2
        bz.d    $w0, test
        fsune.d $w0,$w1,$w2
        bz.d    $w1, test
        fsune.d $w0,$w1,$w2
        bz.d    $w2, test
        add.s   $f0,$f1,$f2
        bz.d    $w0, test
        add.s   $f0,$f1,$f2
        bz.d    $w1, test
        add.s   $f0,$f1,$f2
        bz.d    $w2, test
        add.d   $f0,$f2,$f4
        bz.d    $w0, test
        add.d   $f0,$f2,$f4
        bz.d    $w1, test
        add.d   $f0,$f2,$f4
        bz.d    $w2, test

# as-new r.s -o r.o -mmsa -mips32r2 -mfp64
# objdump -d r.o
00000000 <test>:
   0:   7aa2081c        fsune.d $w0,$w1,$w2
   4:   4760fffe        bz.d    $w0,0 <test>
   8:   00000000        nop
   c:   4761fffc        bz.d    $w1,0 <test>
  10:   7aa2081c        fsune.d $w0,$w1,$w2
  14:   4762fffa        bz.d    $w2,0 <test>
  18:   7aa2081c        fsune.d $w0,$w1,$w2
  1c:   46020800        add.s   $f0,$f1,$f2
  20:   4760fff7        bz.d    $w0,0 <test>
  24:   00000000        nop
  28:   4761fff5        bz.d    $w1,0 <test>
  2c:   46020800        add.s   $f0,$f1,$f2
  30:   4762fff3        bz.d    $w2,0 <test>
  34:   46020800        add.s   $f0,$f1,$f2
  38:   46241000        add.d   $f0,$f2,$f4
  3c:   4760fff0        bz.d    $w0,0 <test>
  40:   00000000        nop
  44:   4761ffee        bz.d    $w1,0 <test>
  48:   46241000        add.d   $f0,$f2,$f4
  4c:   4762ffec        bz.d    $w2,0 <test>
  50:   46241000        add.d   $f0,$f2,$f4

  Any feedback?  Which version is better?  Thanks a lot!

Regards,
Chao-ying


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]