|
From: <sv...@va...> - 2006-02-04 15:24:06
|
Author: sewardj
Date: 2006-02-04 15:24:00 +0000 (Sat, 04 Feb 2006)
New Revision: 1566
Log:
Make the CSE pass more aggressive. It now commons up Mux0X and GetI
expressions too. This generates somewhat better FP code on x86 since
it removes more redundant artefacts from the x87 FP stack simulation.
Unfortunately commoning up GetIs complicates CSEs, since it is now
possible that "available expressions" collected by the CSEr will
become invalidated by writes to the guest state as we work through the
block. So there is additional code to check for this case.
Some supporting functions (getAliasingRelation_IC and
getAliasingRelation_II) have been moved earlier in the file.
Modified:
trunk/priv/ir/iropt.c
trunk/priv/ir/iropt.h
Modified: trunk/priv/ir/iropt.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/ir/iropt.c 2006-02-04 15:20:13 UTC (rev 1565)
+++ trunk/priv/ir/iropt.c 2006-02-04 15:24:00 UTC (rev 1566)
@@ -2030,6 +2030,119 @@
=20
=20
/*---------------------------------------------------------------*/
+/*--- Determination of guest state aliasing relationships ---*/
+/*---------------------------------------------------------------*/
+
+/* These are helper functions for CSE and GetI/PutI transformations.
+
+ Determine, to the extent possible, the relationship between two
+ guest state accesses. The possible outcomes are:
+
+ * Exact alias. These two accesses denote precisely the same
+ piece of the guest state.
+
+ * Definitely no alias. These two accesses are guaranteed not to
+ overlap any part of the guest state.
+
+ * Unknown -- if neither of the above can be established.
+
+ If in doubt, return Unknown. */
+
+typedef
+ enum { ExactAlias, NoAlias, UnknownAlias }
+ GSAliasing;
+
+
+/* Produces the alias relation between an indexed guest
+ state access and a non-indexed access. */
+
+static
+GSAliasing getAliasingRelation_IC ( IRArray* descr1, IRExpr* ix1,
+ Int offset2, IRType ty2 )
+{
+ UInt minoff1, maxoff1, minoff2, maxoff2;
+
+ getArrayBounds( descr1, &minoff1, &maxoff1 );
+ minoff2 =3D offset2;
+ maxoff2 =3D minoff2 + sizeofIRType(ty2) - 1;
+
+ if (maxoff1 < minoff2 || maxoff2 < minoff1)
+ return NoAlias;
+
+ /* Could probably do better here if required. For the moment
+ however just claim not to know anything more. */
+ return UnknownAlias;
+}
+
+
+/* Produces the alias relation between two indexed guest state
+ accesses. */
+
+static
+GSAliasing getAliasingRelation_II (=20
+ IRArray* descr1, IRExpr* ix1, Int bias1,
+ IRArray* descr2, IRExpr* ix2, Int bias2
+ )
+{
+ UInt minoff1, maxoff1, minoff2, maxoff2;
+ Int iters;
+
+ /* First try hard to show they don't alias. */
+ getArrayBounds( descr1, &minoff1, &maxoff1 );
+ getArrayBounds( descr2, &minoff2, &maxoff2 );
+ if (maxoff1 < minoff2 || maxoff2 < minoff1)
+ return NoAlias;
+
+ /* So the two arrays at least partially overlap. To get any
+ further we'll have to be sure that the descriptors are
+ identical. */
+ if (!eqIRArray(descr1, descr2))
+ return UnknownAlias;
+
+ /* The descriptors are identical. Now the only difference can be
+ in the index expressions. If they cannot be shown to be
+ identical, we have to say we don't know what the aliasing
+ relation will be. Now, since the IR is flattened, the index
+ expressions should be atoms -- either consts or tmps. So that
+ makes the comparison simple. */
+ vassert(isIRAtom(ix1));
+ vassert(isIRAtom(ix2));
+ if (!eqIRAtom(ix1,ix2))
+ return UnknownAlias;
+
+ /* Ok, the index expressions are identical. So now the only way
+ they can be different is in the bias. Normalise this
+ paranoidly, to reliably establish equality/non-equality. */
+
+ /* So now we know that the GetI and PutI index the same array
+ with the same base. Are the offsets the same, modulo the
+ array size? Do this paranoidly. */
+ vassert(descr1->nElems =3D=3D descr2->nElems);
+ vassert(descr1->elemTy =3D=3D descr2->elemTy);
+ vassert(descr1->base =3D=3D descr2->base);
+ iters =3D 0;
+ while (bias1 < 0 || bias2 < 0) {
+ bias1 +=3D descr1->nElems;
+ bias2 +=3D descr1->nElems;
+ iters++;
+ if (iters > 10)
+ vpanic("getAliasingRelation: iters");
+ }
+ vassert(bias1 >=3D 0 && bias2 >=3D 0);
+ bias1 %=3D descr1->nElems;
+ bias2 %=3D descr1->nElems;
+ vassert(bias1 >=3D 0 && bias1 < descr1->nElems);
+ vassert(bias2 >=3D 0 && bias2 < descr1->nElems);
+
+ /* Finally, biasP and biasG are normalised into the range=20
+ 0 .. descrP/G->nElems - 1. And so we can establish
+ equality/non-equality. */
+
+ return bias1=3D=3Dbias2 ? ExactAlias : NoAlias;
+}
+
+
+/*---------------------------------------------------------------*/
/*--- Common Subexpression Elimination ---*/
/*---------------------------------------------------------------*/
=20
@@ -2042,7 +2155,7 @@
=20
typedef
struct {
- enum { Ut, Btt, Btc, Bct, Cf64i } tag;
+ enum { Ut, Btt, Btc, Bct, Cf64i, Mttt, GetIt } tag;
union {
/* unop(tmp) */
struct {
@@ -2071,6 +2184,18 @@
struct {
ULong f64i;
} Cf64i;
+ /* Mux0X(tmp,tmp,tmp) */
+ struct {
+ IRTemp co;
+ IRTemp e0;
+ IRTemp eX;
+ } Mttt;
+ /* GetI(descr,tmp,bias)*/
+ struct {
+ IRArray* descr;
+ IRTemp ix;
+ Int bias;
+ } GetIt;
} u;
}
AvailExpr;
@@ -2101,6 +2226,14 @@
&& eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
case Cf64i:=20
return toBool(a1->u.Cf64i.f64i =3D=3D a2->u.Cf64i.f64i);
+ case Mttt:
+ return toBool(a1->u.Mttt.co =3D=3D a2->u.Mttt.co
+ && a1->u.Mttt.e0 =3D=3D a2->u.Mttt.e0
+ && a1->u.Mttt.eX =3D=3D a2->u.Mttt.eX);
+ case GetIt:
+ return toBool(eqIRArray(a1->u.GetIt.descr, a2->u.GetIt.descr)=20
+ && a1->u.GetIt.ix =3D=3D a2->u.GetIt.ix
+ && a1->u.GetIt.bias =3D=3D a2->u.GetIt.bias);
default: vpanic("eq_AvailExpr");
}
}
@@ -2127,6 +2260,14 @@
IRExpr_Const(con), IRExpr_Tmp(ae->u.Bct.ar=
g2) );
case Cf64i:
return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
+ case Mttt:
+ return IRExpr_Mux0X(IRExpr_Tmp(ae->u.Mttt.co),=20
+ IRExpr_Tmp(ae->u.Mttt.e0),=20
+ IRExpr_Tmp(ae->u.Mttt.eX));
+ case GetIt:
+ return IRExpr_GetI(ae->u.GetIt.descr,
+ IRExpr_Tmp(ae->u.GetIt.ix),
+ ae->u.GetIt.bias);
default:
vpanic("availExpr_to_IRExpr");
}
@@ -2162,6 +2303,14 @@
break;
case Cf64i:
break;
+ case Mttt:
+ ae->u.Mttt.co =3D subst_AvailExpr_Temp( env, ae->u.Mttt.co );
+ ae->u.Mttt.e0 =3D subst_AvailExpr_Temp( env, ae->u.Mttt.e0 );
+ ae->u.Mttt.eX =3D subst_AvailExpr_Temp( env, ae->u.Mttt.eX );
+ break;
+ case GetIt:
+ ae->u.GetIt.ix =3D subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
+ break;
default:=20
vpanic("subst_AvailExpr");
}
@@ -2221,18 +2370,44 @@
return ae;
}
=20
+ if (e->tag =3D=3D Iex_Mux0X
+ && e->Iex.Mux0X.cond->tag =3D=3D Iex_Tmp
+ && e->Iex.Mux0X.expr0->tag =3D=3D Iex_Tmp
+ && e->Iex.Mux0X.exprX->tag =3D=3D Iex_Tmp) {
+ ae =3D LibVEX_Alloc(sizeof(AvailExpr));
+ ae->tag =3D Mttt;
+ ae->u.Mttt.co =3D e->Iex.Mux0X.cond->Iex.Tmp.tmp;
+ ae->u.Mttt.e0 =3D e->Iex.Mux0X.expr0->Iex.Tmp.tmp;
+ ae->u.Mttt.eX =3D e->Iex.Mux0X.exprX->Iex.Tmp.tmp;
+ return ae;
+ }
+
+ if (e->tag =3D=3D Iex_GetI
+ && e->Iex.GetI.ix->tag =3D=3D Iex_Tmp) {
+ ae =3D LibVEX_Alloc(sizeof(AvailExpr));
+ ae->tag =3D GetIt;
+ ae->u.GetIt.descr =3D e->Iex.GetI.descr;
+ ae->u.GetIt.ix =3D e->Iex.GetI.ix->Iex.Tmp.tmp;
+ ae->u.GetIt.bias =3D e->Iex.GetI.bias;
+ return ae;
+ }
+
return NULL;
}
=20
=20
-/* The BB is modified in-place. */
+/* The BB is modified in-place. Returns True if any changes were
+ made. */
=20
-void do_cse_BB ( IRBB* bb )
+static Bool do_cse_BB ( IRBB* bb )
{
- Int i, j;
+ Int i, j, paranoia;
IRTemp t, q;
IRStmt* st;
AvailExpr* eprime;
+ AvailExpr* ae;
+ Bool invalidate;
+ Bool anyDone =3D False;
=20
HashHW* tenv =3D newHHW(); /* :: IRTemp -> IRTemp */
HashHW* aenv =3D newHHW(); /* :: AvailExpr* -> IRTemp */
@@ -2251,11 +2426,82 @@
else
add binding E' -> t to aenv
replace this stmt by "t =3D E'"
- Ignore any other kind of stmt.
+
+ Other statements are only interesting to the extent that they
+ might invalidate some of the expressions in aenv. So there is
+ an invalidate-bindings check for each statement seen.
*/
for (i =3D 0; i < bb->stmts_used; i++) {
st =3D bb->stmts[i];
=20
+ /* ------ BEGIN invalidate aenv bindings ------ */
+ /* This is critical: remove from aenv any E' -> .. bindings
+ which might be invalidated by this statement. The only
+ vulnerable kind of bindings are the GetIt kind.
+ Dirty call - dump (paranoia level -> 2)=20
+ Store - dump (ditto)=20
+ Put, PutI - dump unless no-overlap is proven (.. -> 1)
+ Uses getAliasingRelation_IC and getAliasingRelation_II
+ to do the no-overlap assessments needed for Put/PutI.
+ */
+ switch (st->tag) {
+ case Ist_Dirty: case Ist_Store:=20
+ paranoia =3D 2; break;
+ case Ist_Put: case Ist_PutI:=20
+ paranoia =3D 1; break;
+ case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:=20
+ case Ist_Tmp: case Ist_MFence: case Ist_Exit:=20
+ paranoia =3D 0; break;
+ default:=20
+ vpanic("do_cse_BB(1)");
+ }
+
+ if (paranoia > 0) {
+ for (j =3D 0; j < aenv->used; j++) {
+ if (!aenv->inuse[j])
+ continue;
+ ae =3D (AvailExpr*)aenv->key[j];
+ if (ae->tag !=3D GetIt)=20
+ continue;
+ invalidate =3D False;
+ if (paranoia >=3D 2) {
+ invalidate =3D True;
+ } else {
+ vassert(paranoia =3D=3D 1);
+ if (st->tag =3D=3D Ist_Put) {
+ if (getAliasingRelation_IC(
+ ae->u.GetIt.descr,=20
+ IRExpr_Tmp(ae->u.GetIt.ix),=20
+ st->Ist.Put.offset,=20
+ typeOfIRExpr(bb->tyenv,st->Ist.Put.data)=20
+ ) !=3D NoAlias)=20
+ invalidate =3D True;
+ }
+ else=20
+ if (st->tag =3D=3D Ist_PutI) {
+ if (getAliasingRelation_II(
+ ae->u.GetIt.descr,=20
+ IRExpr_Tmp(ae->u.GetIt.ix),=20
+ ae->u.GetIt.bias,
+ st->Ist.PutI.descr,
+ st->Ist.PutI.ix,
+ st->Ist.PutI.bias
+ ) !=3D NoAlias)
+ invalidate =3D True;
+ }
+ else=20
+ vpanic("do_cse_BB(2)");
+ }
+
+ if (invalidate) {
+ aenv->inuse[j] =3D False;
+ aenv->key[j] =3D (HWord)NULL; /* be sure */
+ }
+ } /* for j */
+ } /* paranoia > 0 */
+
+ /* ------ ENV invalidate aenv bindings ------ */
+
/* ignore not-interestings */
if (st->tag !=3D Ist_Tmp)
continue;
@@ -2283,6 +2529,7 @@
q =3D (IRTemp)aenv->val[j];
bb->stmts[i] =3D IRStmt_Tmp( t, IRExpr_Tmp(q) );
addToHHW( tenv, (HWord)t, (HWord)q );
+ anyDone =3D True;
} else {
/* No binding was found, so instead we add E' -> t to our
collection of available expressions, replace this stmt
@@ -2297,6 +2544,7 @@
sanityCheckIRBB(bb, Ity_I32);
vex_printf("\n\n");
*/
+ return anyDone;
}
=20
=20
@@ -2479,115 +2727,6 @@
/*--- PutI/GetI transformations ---*/
/*---------------------------------------------------------------*/
=20
-/* ------- Helper functions for PutI/GetI transformations ------ */
-
-/* Determine, to the extent possible, the relationship between two
- guest state accesses. The possible outcomes are:
-
- * Exact alias. These two accesses denote precisely the same
- piece of the guest state.
-
- * Definitely no alias. These two accesses are guaranteed not to
- overlap any part of the guest state.
-
- * Unknown -- if neither of the above can be established.
-
- If in doubt, return Unknown. */
-
-typedef
- enum { ExactAlias, NoAlias, UnknownAlias }
- GSAliasing;
-
-
-/* Produces the alias relation between an indexed guest
- state access and a non-indexed access. */
-
-static
-GSAliasing getAliasingRelation_IC ( IRArray* descr1, IRExpr* ix1,
- Int offset2, IRType ty2 )
-{
- UInt minoff1, maxoff1, minoff2, maxoff2;
-
- getArrayBounds( descr1, &minoff1, &maxoff1 );
- minoff2 =3D offset2;
- maxoff2 =3D minoff2 + sizeofIRType(ty2) - 1;
-
- if (maxoff1 < minoff2 || maxoff2 < minoff1)
- return NoAlias;
-
- /* Could probably do better here if required. For the moment
- however just claim not to know anything more. */
- return UnknownAlias;
-}
-
-
-/* Produces the alias relation between two indexed guest state
- accesses. */
-
-static
-GSAliasing getAliasingRelation_II (=20
- IRArray* descr1, IRExpr* ix1, Int bias1,
- IRArray* descr2, IRExpr* ix2, Int bias2
- )
-{
- UInt minoff1, maxoff1, minoff2, maxoff2;
- Int iters;
-
- /* First try hard to show they don't alias. */
- getArrayBounds( descr1, &minoff1, &maxoff1 );
- getArrayBounds( descr2, &minoff2, &maxoff2 );
- if (maxoff1 < minoff2 || maxoff2 < minoff1)
- return NoAlias;
-
- /* So the two arrays at least partially overlap. To get any
- further we'll have to be sure that the descriptors are
- identical. */
- if (!eqIRArray(descr1, descr2))
- return UnknownAlias;
-
- /* The descriptors are identical. Now the only difference can be
- in the index expressions. If they cannot be shown to be
- identical, we have to say we don't know what the aliasing
- relation will be. Now, since the IR is flattened, the index
- expressions should be atoms -- either consts or tmps. So that
- makes the comparison simple. */
- vassert(isIRAtom(ix1));
- vassert(isIRAtom(ix2));
- if (!eqIRAtom(ix1,ix2))
- return UnknownAlias;
-
- /* Ok, the index expressions are identical. So now the only way
- they can be different is in the bias. Normalise this
- paranoidly, to reliably establish equality/non-equality. */
-
- /* So now we know that the GetI and PutI index the same array
- with the same base. Are the offsets the same, modulo the
- array size? Do this paranoidly. */
- vassert(descr1->nElems =3D=3D descr2->nElems);
- vassert(descr1->elemTy =3D=3D descr2->elemTy);
- vassert(descr1->base =3D=3D descr2->base);
- iters =3D 0;
- while (bias1 < 0 || bias2 < 0) {
- bias1 +=3D descr1->nElems;
- bias2 +=3D descr1->nElems;
- iters++;
- if (iters > 10)
- vpanic("getAliasingRelation: iters");
- }
- vassert(bias1 >=3D 0 && bias2 >=3D 0);
- bias1 %=3D descr1->nElems;
- bias2 %=3D descr1->nElems;
- vassert(bias1 >=3D 0 && bias1 < descr1->nElems);
- vassert(bias2 >=3D 0 && bias2 < descr1->nElems);
-
- /* Finally, biasP and biasG are normalised into the range=20
- 0 .. descrP/G->nElems - 1. And so we can establish
- equality/non-equality. */
-
- return bias1=3D=3Dbias2 ? ExactAlias : NoAlias;
-}
-
-
/* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
the given starting point to find, if any, a PutI which writes
exactly the same piece of guest state, and so return the expression
@@ -3846,7 +3985,7 @@
static
IRBB* expensive_transformations( IRBB* bb )
{
- do_cse_BB( bb );
+ (void)do_cse_BB( bb );
collapse_AddSub_chains_BB( bb );
do_redundant_GetI_elimination( bb );
do_redundant_PutI_elimination( bb );
@@ -3977,20 +4116,27 @@
at it. */
considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
=20
- if (hasVorFtemps) {
+ if (hasVorFtemps && !hasGetIorPutI) {
/* If any evidence of FP or Vector activity, CSE, as that
tends to mop up all manner of lardy code to do with
- rounding modes. */
- do_cse_BB( bb );
+ rounding modes. Don't bother if hasGetIorPutI since that
+ case leads into the expensive transformations, which do
+ CSE anyway. */
+ (void)do_cse_BB( bb );
do_deadcode_BB( bb );
}
=20
if (hasGetIorPutI) {
+ Bool cses;
n_expensive++;
if (DEBUG_IROPT)
vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
bb =3D expensive_transformations( bb );
bb =3D cheap_transformations( bb, specHelper, preciseMemExnsFn =
);
+ /* Potentially common up GetIs */
+ cses =3D do_cse_BB( bb );
+ if (cses)
+ bb =3D cheap_transformations( bb, specHelper, preciseMemExns=
Fn );
}
=20
/* Now have a go at unrolling simple (single-BB) loops. If
Modified: trunk/priv/ir/iropt.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/ir/iropt.h 2006-02-04 15:20:13 UTC (rev 1565)
+++ trunk/priv/ir/iropt.h 2006-02-04 15:24:00 UTC (rev 1566)
@@ -67,10 +67,6 @@
extern
void do_deadcode_BB ( IRBB* bb );
=20
-/* Do a CSE pass. bb is destructively modified. */
-extern
-void do_cse_BB ( IRBB* bb );
-
/* The tree-builder. Make (approximately) maximal safe trees. bb is
destructively modified. */
extern
|