Index: VEX/priv/ir_opt.c =================================================================== --- VEX/priv/ir_opt.c (revision 2245) +++ VEX/priv/ir_opt.c (working copy) @@ -886,36 +886,73 @@ /* The env in this section is a map from IRTemp to IRExpr*, that is, an array indexed by IRTemp. */ -/* Are both expressions simply the same IRTemp ? */ -static Bool sameIRTemps ( IRExpr* e1, IRExpr* e2 ) +/* Do both expressions compute the same value? The answer is generally + conservative, i.e. it will report that the expressions do not compute + the same value when in fact they do. The reason is that we do not + keep track of changes in the guest state and memory. Thusly, two + Get's or Load's, even when accessing the same location, will be + assumed to compute different values. After all the accesses may happen + at different times and the guest state / memory can have changed in + the meantime. +*/ +static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 ) { - return toBool( e1->tag == Iex_RdTmp - && e2->tag == Iex_RdTmp - && e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp ); -} + if (e1->tag != e2->tag) return False; -static Bool sameIcoU32s ( IRExpr* e1, IRExpr* e2 ) -{ - return toBool( e1->tag == Iex_Const - && e2->tag == Iex_Const - && e1->Iex.Const.con->tag == Ico_U32 - && e2->Iex.Const.con->tag == Ico_U32 - && e1->Iex.Const.con->Ico.U32 - == e2->Iex.Const.con->Ico.U32 ); -} - -/* Are both expressions either the same IRTemp or IRConst-U32s ? If - in doubt, say No. */ -static Bool sameIRTempsOrIcoU32s ( IRExpr* e1, IRExpr* e2 ) -{ switch (e1->tag) { case Iex_RdTmp: - return sameIRTemps(e1, e2); - case Iex_Const: - return sameIcoU32s(e1, e2); + if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True; + if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) { + return sameIRExprs(env, env[e1->Iex.RdTmp.tmp], env[e2->Iex.RdTmp.tmp]); + } + return False; + + case Iex_Get: + case Iex_Load: + /* Guest state / memory could have changed in the meantime. */ + return False; + + case Iex_Binop: + return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op + && sameIRExprs( env, e1->Iex.Binop.arg1, e2->Iex.Binop.arg1 ) + && sameIRExprs( env, e1->Iex.Binop.arg2, e2->Iex.Binop.arg2 )); + + case Iex_Unop: + return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op + && sameIRExprs( env, e1->Iex.Unop.arg, e2->Iex.Unop.arg )); + + case Iex_Const: { + IRConst *c1 = e1->Iex.Const.con; + IRConst *c2 = e2->Iex.Const.con; + vassert(c1->tag == c2->tag); + switch (c1->tag) { + case Ico_U1: return toBool( c1->Ico.U1 == c2->Ico.U1 ); + case Ico_U8: return toBool( c1->Ico.U8 == c2->Ico.U8 ); + case Ico_U16: return toBool( c1->Ico.U16 == c2->Ico.U16 ); + case Ico_U32: return toBool( c1->Ico.U32 == c2->Ico.U32 ); + case Ico_U64: return toBool( c1->Ico.U64 == c2->Ico.U64 ); + default: break; + } + return False; + } + + case Iex_Triop: + return toBool( e1->Iex.Triop.op == e2->Iex.Triop.op + && sameIRExprs( env, e1->Iex.Triop.arg1, e2->Iex.Triop.arg1 ) + && sameIRExprs( env, e1->Iex.Triop.arg2, e2->Iex.Triop.arg2 ) + && sameIRExprs( env, e1->Iex.Triop.arg3, e2->Iex.Triop.arg3 )); + + case Iex_Mux0X: + return toBool( sameIRExprs( env, e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond ) + && sameIRExprs( env, e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 ) + && sameIRExprs( env, e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX )); + default: - return False; + // Not very likely to be "same". + break; } + + return False; } /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */ @@ -1014,7 +1051,7 @@ } -static IRExpr* fold_Expr ( IRExpr* e ) +static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e ) { Int shift; IRExpr* e2 = e; /* e2 is the result of folding e, if possible */ @@ -1638,7 +1675,7 @@ break; } /* Or8/Or16/Or32/Max32U(t,t) ==> t, for some IRTemp t */ - if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { + if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { e2 = e->Iex.Binop.arg1; break; } @@ -1650,7 +1687,7 @@ x+x produces a defined least significant bit, and it seems simplest just to get rid of the problem by rewriting it out, since the opportunity to do so exists. */ - if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { + if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1))); break; @@ -1672,7 +1709,7 @@ break; } /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */ - if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { + if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { e2 = IRExpr_Binop(e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64, e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1))); break; @@ -1686,7 +1723,7 @@ break; } /* Sub64(t,t) ==> 0, for some IRTemp t */ - if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { + if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); break; } @@ -1710,7 +1747,7 @@ break; } /* And32(t,t) ==> t, for some IRTemp t */ - if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { + if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { e2 = e->Iex.Binop.arg1; break; } @@ -1720,7 +1757,7 @@ case Iop_And16: case Iop_And64: /* And8/And16/And64(t,t) ==> t, for some IRTemp t */ - if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { + if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { e2 = e->Iex.Binop.arg1; break; } @@ -1728,7 +1765,7 @@ case Iop_OrV128: /* V128(t,t) ==> t, for some IRTemp t */ - if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { + if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { e2 = e->Iex.Binop.arg1; break; } @@ -1740,7 +1777,7 @@ case Iop_Xor64: case Iop_XorV128: /* Xor8/16/32/64/V128(t,t) ==> 0, for some IRTemp t */ - if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { + if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); break; } @@ -1748,7 +1785,7 @@ case Iop_Sub32: /* Sub32(t,t) ==> 0, for some IRTemp t */ - if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { + if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op); break; } @@ -1757,7 +1794,7 @@ case Iop_CmpEQ64: case Iop_CmpEQ8x8: case Iop_CmpEQ8x16: - if (sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { + if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op); break; } @@ -1782,15 +1819,15 @@ } else /* are the arms identical? (pretty weedy test) */ - if (sameIRTempsOrIcoU32s(e->Iex.Mux0X.expr0, - e->Iex.Mux0X.exprX)) { + if (sameIRExprs(env, e->Iex.Mux0X.expr0, + e->Iex.Mux0X.exprX)) { e2 = e->Iex.Mux0X.expr0; } } /* Show cases where we've found but not folded 'op(t,t)'. */ if (0 && e == e2 && e->tag == Iex_Binop - && sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { + && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) { vex_printf("IDENT: "); ppIRExpr(e); vex_printf("\n"); } @@ -1828,11 +1865,15 @@ switch (ex->tag) { case Iex_RdTmp: if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) { - return env[(Int)ex->Iex.RdTmp.tmp]; - } else { - /* not bound in env */ - return ex; + IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp]; + if (rhs->tag == Iex_RdTmp) + return rhs; + if (rhs->tag == Iex_Const + && rhs->Iex.Const.con->tag != Ico_F64i) + return rhs; } + /* not bound in env */ + return ex; case Iex_Const: case Iex_Get: @@ -1943,15 +1984,15 @@ vassert(isIRAtom(st->Ist.AbiHint.base)); vassert(isIRAtom(st->Ist.AbiHint.nia)); return IRStmt_AbiHint( - fold_Expr(subst_Expr(env, st->Ist.AbiHint.base)), + fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.base)), st->Ist.AbiHint.len, - fold_Expr(subst_Expr(env, st->Ist.AbiHint.nia)) + fold_Expr(env, subst_Expr(env, st->Ist.AbiHint.nia)) ); case Ist_Put: vassert(isIRAtom(st->Ist.Put.data)); return IRStmt_Put( st->Ist.Put.offset, - fold_Expr(subst_Expr(env, st->Ist.Put.data)) + fold_Expr(env, subst_Expr(env, st->Ist.Put.data)) ); case Ist_PutI: @@ -1959,9 +2000,9 @@ vassert(isIRAtom(st->Ist.PutI.data)); return IRStmt_PutI( st->Ist.PutI.descr, - fold_Expr(subst_Expr(env, st->Ist.PutI.ix)), + fold_Expr(env, subst_Expr(env, st->Ist.PutI.ix)), st->Ist.PutI.bias, - fold_Expr(subst_Expr(env, st->Ist.PutI.data)) + fold_Expr(env, subst_Expr(env, st->Ist.PutI.data)) ); case Ist_WrTmp: @@ -1969,7 +2010,7 @@ allowed to be more than just a constant or a tmp. */ return IRStmt_WrTmp( st->Ist.WrTmp.tmp, - fold_Expr(subst_Expr(env, st->Ist.WrTmp.data)) + fold_Expr(env, subst_Expr(env, st->Ist.WrTmp.data)) ); case Ist_Store: @@ -1977,8 +2018,8 @@ vassert(isIRAtom(st->Ist.Store.data)); return IRStmt_Store( st->Ist.Store.end, - fold_Expr(subst_Expr(env, st->Ist.Store.addr)), - fold_Expr(subst_Expr(env, st->Ist.Store.data)) + fold_Expr(env, subst_Expr(env, st->Ist.Store.addr)), + fold_Expr(env, subst_Expr(env, st->Ist.Store.data)) ); case Ist_CAS: { @@ -1991,11 +2032,11 @@ vassert(isIRAtom(cas->dataLo)); cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end, - fold_Expr(subst_Expr(env, cas->addr)), - cas->expdHi ? fold_Expr(subst_Expr(env, cas->expdHi)) : NULL, - fold_Expr(subst_Expr(env, cas->expdLo)), - cas->dataHi ? fold_Expr(subst_Expr(env, cas->dataHi)) : NULL, - fold_Expr(subst_Expr(env, cas->dataLo)) + fold_Expr(env, subst_Expr(env, cas->addr)), + cas->expdHi ? fold_Expr(env, subst_Expr(env, cas->expdHi)) : NULL, + fold_Expr(env, subst_Expr(env, cas->expdLo)), + cas->dataHi ? fold_Expr(env, subst_Expr(env, cas->dataHi)) : NULL, + fold_Expr(env, subst_Expr(env, cas->dataLo)) ); return IRStmt_CAS(cas2); } @@ -2007,9 +2048,9 @@ return IRStmt_LLSC( st->Ist.LLSC.end, st->Ist.LLSC.result, - fold_Expr(subst_Expr(env, st->Ist.LLSC.addr)), + fold_Expr(env, subst_Expr(env, st->Ist.LLSC.addr)), st->Ist.LLSC.storedata - ? fold_Expr(subst_Expr(env, st->Ist.LLSC.storedata)) + ? fold_Expr(env, subst_Expr(env, st->Ist.LLSC.storedata)) : NULL ); @@ -2022,13 +2063,13 @@ d2->args = shallowCopyIRExprVec(d2->args); if (d2->mFx != Ifx_None) { vassert(isIRAtom(d2->mAddr)); - d2->mAddr = fold_Expr(subst_Expr(env, d2->mAddr)); + d2->mAddr = fold_Expr(env, subst_Expr(env, d2->mAddr)); } vassert(isIRAtom(d2->guard)); - d2->guard = fold_Expr(subst_Expr(env, d2->guard)); + d2->guard = fold_Expr(env, subst_Expr(env, d2->guard)); for (i = 0; d2->args[i]; i++) { vassert(isIRAtom(d2->args[i])); - d2->args[i] = fold_Expr(subst_Expr(env, d2->args[i])); + d2->args[i] = fold_Expr(env, subst_Expr(env, d2->args[i])); } return IRStmt_Dirty(d2); } @@ -2047,7 +2088,7 @@ case Ist_Exit: { IRExpr* fcond; vassert(isIRAtom(st->Ist.Exit.guard)); - fcond = fold_Expr(subst_Expr(env, st->Ist.Exit.guard)); + fcond = fold_Expr(env, subst_Expr(env, st->Ist.Exit.guard)); if (fcond->tag == Iex_Const) { /* Interesting. The condition on this exit has folded down to a constant. */ @@ -2091,10 +2132,9 @@ out->tyenv = deepCopyIRTypeEnv( in->tyenv ); /* Set up the env with which travels forward. This holds a - substitution, mapping IRTemps to atoms, that is, IRExprs which - are either IRTemps or IRConsts. Thus, copy and constant - propagation is done. The environment is to be applied as we - move along. Keys are IRTemps. Values are IRExpr*s. + substitution, mapping IRTemps to IRExprs. The environment + is to be applied as we move along. Keys are IRTemps. + Values are IRExpr*s. */ for (i = 0; i < n_tmps; i++) env[i] = NULL; @@ -2117,30 +2157,26 @@ /* If the statement has been folded into a no-op, forget it. */ if (st2->tag == Ist_NoOp) continue; - /* Now consider what the stmt looks like. If it's of the form - 't = const' or 't1 = t2', add it to the running environment - and not to the output BB. Otherwise, add it to the output - BB. Note, we choose not to propagate const when const is an - F64i, so that F64i literals can be CSE'd later. This helps - x86 floating point code generation. */ + /* If the statement assigns to an IRTemp add it to the running + environment. This is for the benefit of copy propagation + and to allow sameIRExpr look through IRTemps. */ + if (st2->tag == Ist_WrTmp) { + vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL); + env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data; - if (st2->tag == Ist_WrTmp - && st2->Ist.WrTmp.data->tag == Iex_Const - && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) { - /* 't = const' -- add to env. - The pair (IRTemp, IRExpr*) is added. */ - env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data; + /* 't1 = t2' -- don't add to BB; will be optimized out */ + if (st2->Ist.WrTmp.data->tag == Iex_RdTmp) continue; + + /* 't = const' && 'const != F64i' -- don't add to BB + Note, we choose not to propagate const when const is an + F64i, so that F64i literals can be CSE'd later. This helps + x86 floating point code generation. */ + if (st2->Ist.WrTmp.data->tag == Iex_Const + && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) continue; } - else - if (st2->tag == Ist_WrTmp && st2->Ist.WrTmp.data->tag == Iex_RdTmp) { - /* 't1 = t2' -- add to env. - The pair (IRTemp, IRExpr*) is added. */ - env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data; - } - else { - /* Not interesting, copy st2 into the output block. */ - addStmtToIRSB( out, st2 ); - } + + /* Not interesting, copy st2 into the output block. */ + addStmtToIRSB( out, st2 ); } out->next = subst_Expr( env, in->next );