[Valgrind-developers] Fixes for regression failures, part 2 PPC64 specific

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Julian:

The second issue that is causing regression errors on PPC64 has to do
with the grail changes as mentioned in some private emails. Currently
there is no bugzilla for this issue.

Specifically the commit in question is:

   commit 076a79a48e251067758e1e9d8e50681450ed3889
   Author: Julian Seward <js...@ac...>
   Date:   Wed Nov 27 08:52:45 2019 +0100

       'grail' fixes for ppc32 and ppc64:

       * do_minimal_initial_iropt_BB: for ppc64, flatten rather than
   assert flatness.
         (Kludge. Sigh.)
   etc.

The patch adds the following code in ir_opt.c 

      // FIXME2 The TOC-redirect-hacks generators in m_translate.c -- gen_PUSH()
      //        and gen_PO() -- don't generate flat IR, and so cause this assertion
      //        to fail.  For the time being, hack around this by flattening,
      //        rather than asserting for flatness, on the afflicted platforms.
      //        This is a kludge, yes.
      if (guest_arch == VexArchPPC64) {
         bb0 = flatten_BB(bb0); // Kludge!
      } else {
         vassert(isFlatIRSB(bb0)); // How it Really Should Be (tm).
      }

The issue comes from the new expressions generated by flatten_BB(bb0). 
As mentioned in previous private emails, the flatten_BB() generates
V128 expressions for Iex_ITE which is not supported.

The following patch adds the needed support for Iex_ITE for V128
expressions.  I kinda get what the Iex_ITE needs to do but don't claim
to completely understand it all or why the kludge calls flatten_BB()
only for the PPC64 architecture.  It appears you are planning to remove
the hack once things were "properly" fixed.  Not sure if this fix will
be needed for the "proper" fix or not.  But either way, it might be
nice to have this additional functionality available.  

I need some additional review of this code as I don't claim to
completely understand the grail changes that were being done or if my
fix is OK.  With this fix, the PPC64 regression test failures is
reduced to:

   == 649 tests, 3 stderr failures, 0 stdout failures, 0 stderrB failures, 1 stdoutB failure, 2 post failures ==                                                                              
   gdbserver_tests/nlcontrolc               (stdoutB)                                            
   memcheck/tests/bug340392                 (stderr)                                             
   memcheck/tests/leak_cpp_interior         (stderr)                                             
   memcheck/tests/linux/rfcomm              (stderr)                                             
   massif/tests/new-cpp                     (post)                                               
   massif/tests/overloaded-new              (post)

As expected.

Thanks for the help on the review.  

                     Carl Love
---------------------------------------------

additional grail' fixes for ppc32 and ppc64

The grail changes introduce a kludge call for ppc64.  The call fails
on some tests as the flatten call generates adds

addStmtToIRSB(bb, IRStmt_WrTmp(t1,
            IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond),
                       flatten_Expr(bb, ex->Iex.ITE.iftrue),
                       flatten_Expr(bb, ex->Iex.ITE.iffalse))));

for V128 expressions.  Iex_ITE isn't supported for V128 type.  This patch
adds the needed V128 support for the Iex_ITE expressions.
---
 VEX/priv/host_ppc_defs.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++
 VEX/priv/host_ppc_defs.h |  8 +++++++
 VEX/priv/host_ppc_isel.c | 12 ++++++++++
 3 files changed, 79 insertions(+)

diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c
index 6c298fa..a58ceb9 100644
--- a/VEX/priv/host_ppc_defs.c
+++ b/VEX/priv/host_ppc_defs.c
@@ -1526,6 +1526,15 @@ PPCInstr* PPCInstr_AvBCDV128Binary ( PPCAvOp op, HReg dst,
    i->Pin.AvBCDV128Binary.src2 = src2;
    return i;
 }
+PPCInstr* PPCInstr_V128CMov ( PPCCondCode cond, HReg dst, HReg src ) {
+   PPCInstr* i        = LibVEX_Alloc_inline(sizeof(PPCInstr));
+   i->tag             = Pin_V128CMov;
+   i->Pin.FpCMov.cond = cond;
+   i->Pin.FpCMov.dst  = dst;
+   i->Pin.FpCMov.src  = src;
+   vassert(cond.test != Pct_ALWAYS);
+   return i;
+}
 
 
 /* Pretty Print instructions */
@@ -2177,6 +2186,27 @@ void ppPPCInstr ( const PPCInstr* i, Bool mode64 )
       ppHRegPPC(i->Pin.AvBCDV128Binary.src2);
       return;
 
+   case Pin_V128CMov:
+      vex_printf("v128cmov (%s) ", showPPCCondCode(i->Pin.FpCMov.cond));
+      ppHRegPPC(i->Pin.V128CMov.dst);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.V128CMov.src);
+      vex_printf(": ");
+      vex_printf("if (v128_dst != v128_src) { ");
+      if (i->Pin.FpCMov.cond.test != Pct_ALWAYS) {
+         vex_printf("if (%s) { ", showPPCCondCode(i->Pin.FpCMov.cond));
+      }
+      vex_printf("vor ");
+      ppHRegPPC(i->Pin.V128CMov.dst);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.V128CMov.src);
+      vex_printf(",");
+      ppHRegPPC(i->Pin.V128CMov.src);
+      if (i->Pin.FpCMov.cond.test != Pct_ALWAYS)
+         vex_printf(" }");
+      vex_printf(" }");
+      return;
+
    case Pin_Dfp64Unary:
       vex_printf("%s ", showPPCFpOp(i->Pin.Dfp64Unary.op));
       ppHRegPPC(i->Pin.Dfp64Unary.dst);
@@ -2767,6 +2797,10 @@ void getRegUsage_PPCInstr ( HRegUsage* u, const PPCInstr* i, Bool mode64 )
       addHRegUse(u, HRmRead,  i->Pin.Dfp128Cmp.srcR_hi);
       addHRegUse(u, HRmRead,  i->Pin.Dfp128Cmp.srcR_lo);
       return;                                           
+   case Pin_V128CMov:
+      addHRegUse(u, HRmModify, i->Pin.V128CMov.dst);
+      addHRegUse(u, HRmRead,   i->Pin.V128CMov.src);
+      return;
    case Pin_EvCheck:
       /* We expect both amodes only to mention the GSP (r31), so this
          is in fact pointless, since GSP isn't allocatable, but
@@ -3118,6 +3152,10 @@ void mapRegs_PPCInstr ( HRegRemap* m, PPCInstr* i, Bool mode64 )
       mapReg(m, &i->Pin.Dfp128Cmp.srcR_hi);
       mapReg(m, &i->Pin.Dfp128Cmp.srcR_lo);
       return;
+   case Pin_V128CMov:
+      mapReg(m, &i->Pin.V128CMov.dst);
+      mapReg(m, &i->Pin.V128CMov.src);
+      return;
    case Pin_EvCheck:
       /* We expect both amodes only to mention the GSP (r31), so this
          is in fact pointless, since GSP isn't allocatable, but
@@ -6302,6 +6340,27 @@ Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
       goto done;
    }
 
+   case Pin_V128CMov: {
+      UInt v_dst = vregEnc(i->Pin.V128CMov.dst);
+      UInt v_src = vregEnc(i->Pin.V128CMov.src);
+      PPCCondCode cc = i->Pin.V128CMov.cond;
+
+      if (v_dst == v_src) goto done;
+
+      vassert(cc.test != Pct_ALWAYS);
+
+      /* jmp fwds if !condition */
+      if (cc.test != Pct_ALWAYS) {
+         /* bc !ct,cf,n_bytes>>2 */
+         p = mkFormB(p, invertCondTest(cc.test), cc.flag, 8>>2, 0, 0,
+                     endness_host);
+      }
+
+      // move register, use vor dst, src, src  op1 = 4, opc2 = 1156
+      p = mkFormVX( p, 4, v_dst, v_src, v_src, 1156, endness_host );
+      goto done;
+   }
+
    case Pin_EvCheck: {
       /* This requires a 32-bit dec/test in both 32- and 64-bit
          modes. */
diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h
index 70c3b6c..f1a97fd 100644
--- a/VEX/priv/host_ppc_defs.h
+++ b/VEX/priv/host_ppc_defs.h
@@ -584,6 +584,7 @@ typedef
                            * round */
       Pin_DfpQuantize128, /* D128 quantize using register value, significance
                            * round */
+      Pin_V128CMov,       /* Vector 128-bit conditional move */
       Pin_EvCheck,    /* Event check */
       Pin_ProfInc     /* 64-bit profile counter increment */
    }
@@ -1068,6 +1069,12 @@ typedef
             HReg  srcR_hi;
             HReg  srcR_lo;
          } Dfp128Cmp;     
+         /* V128 mov src to dst on the given condition. */
+         struct {
+            PPCCondCode cond;
+            HReg        dst;
+            HReg        src;
+         } V128CMov;
          struct {
             PPCAMode* amCounter;
             PPCAMode* amFailAddr;
@@ -1188,6 +1195,7 @@ extern PPCInstr* PPCInstr_InsertExpD128  ( PPCFpOp op,   HReg dst_hi,
 extern PPCInstr* PPCInstr_Dfp64Cmp       ( HReg dst, HReg srcL, HReg srcR );
 extern PPCInstr* PPCInstr_Dfp128Cmp      ( HReg dst, HReg srcL_hi, HReg srcL_lo,
                                            HReg srcR_hi, HReg srcR_lo );
+extern PPCInstr* PPCInstr_V128CMov       ( PPCCondCode, HReg dst, HReg src );
 extern PPCInstr* PPCInstr_EvCheck     ( PPCAMode* amCounter,
                                         PPCAMode* amFailAddr );
 extern PPCInstr* PPCInstr_ProfInc     ( void );
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
index 9c954da..25ab559 100644
--- a/VEX/priv/host_ppc_isel.c
+++ b/VEX/priv/host_ppc_isel.c
@@ -5587,6 +5587,18 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, const IRExpr* e,
    vassert(e);
    vassert(ty == Ity_V128);
 
+   if (e->tag == Iex_ITE) {
+      HReg r1 = iselVecExpr( env, e->Iex.ITE.iftrue, IEndianess );
+      HReg r0 = iselVecExpr( env, e->Iex.ITE.iffalse, IEndianess );
+      HReg r_dst = newVRegV(env);
+
+      // Use OR operator to do move r1 to r_dst
+      addInstr(env, PPCInstr_AvBinary( Pav_OR, r_dst, r0, r0));
+      PPCCondCode cc = iselCondCode(env, e->Iex.ITE.cond, IEndianess);
+      addInstr(env, PPCInstr_V128CMov(cc, r_dst, r1));
+      return r_dst;
+   }
+
    if (e->tag == Iex_RdTmp) {
       return lookupIRTemp(env, e->Iex.RdTmp.tmp);
    }
-- 
2.7.4