|
From: <sv...@va...> - 2005-11-28 13:39:42
|
Author: sewardj
Date: 2005-11-28 13:39:37 +0000 (Mon, 28 Nov 2005)
New Revision: 1474
Log:
Modify the tree builder to use a fixed-size binding environment rather
than one that is potentially proportional to the length of the input
BB. This changes its complexity from quadratic to linear (in the
length of the BB) and gives a noticable increase in the overall speed
of vex. The tradeoff is that it can no longer guarantee to build
maximal trees, but in practice in only rarely fails to do so (about 1
in 100 bbs) and so the resulting degradation in code quality is
completely insignificant (unmeasurable).
Modified:
trunk/priv/ir/iropt.c
trunk/priv/ir/iropt.h
trunk/priv/main/vex_main.c
Modified: trunk/priv/ir/iropt.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/ir/iropt.c 2005-11-28 13:34:19 UTC (rev 1473)
+++ trunk/priv/ir/iropt.c 2005-11-28 13:39:37 UTC (rev 1474)
@@ -3194,75 +3194,142 @@
to instruction selection, which improves the code that the
instruction selector can produce. */
=20
+/* --- The 'tmp' environment is the central data structure here --- */
+
+/* The number of outstanding bindings we're prepared to track.
+ The number of times the env becomes full and we have to dump
+ the oldest binding (hence reducing code quality) falls very
+ rapidly as the env size increases. 8 gives reasonable performance=20
+ under most circumstances. */
+#define A_NENV 10
+
+/* bindee =3D=3D NULL =3D=3D=3D slot is not in use
+ bindee !=3D NULL =3D=3D=3D slot is in use
+*/
typedef
- struct {=20
- Int occ; /* occurrence count for this tmp */
- IRExpr* expr; /* expr it is bound to,=20
- or NULL if already 'used' */
- Bool eDoesLoad; /* True <=3D> expr reads mem */
- Bool eDoesGet; /* True <=3D> expr reads guest state */
- Bool invalidateMe; /* used when dumping bindings */
- Int origPos; /* posn of the binder in the original bb */
+ struct {
+ IRTemp binder;
+ IRExpr* bindee;
+ Bool doesLoad;
+ Bool doesGet;
}
- TmpInfo;
+ ATmpInfo;
=20
-/* Given env :: IRTemp -> TmpInfo*
- Add the use-occurrences of temps in this expression=20
- to the environment.=20
+__attribute__((unused))
+static void ppAEnv ( ATmpInfo* env )
+{
+ Int i;
+ for (i =3D 0; i < A_NENV; i++) {
+ vex_printf("%d tmp %d val ", i, (Int)env[i].binder);
+ if (env[i].bindee)=20
+ ppIRExpr(env[i].bindee);
+ else=20
+ vex_printf("(null)");
+ vex_printf("\n");
+ }
+}
+
+/* --- Tree-traversal fns --- */
+
+/* Traverse an expr, and detect if any part of it reads memory or does
+ a Get. Be careful ... this really controls how much the
+ tree-builder can reorder the code, so getting it right is critical.
*/
-static void occCount_Temp ( TmpInfo** env, IRTemp tmp )
+static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e )
{
- TmpInfo* ti =3D env[(Int)tmp];
- if (ti) {
- ti->occ++;
- } else {
- ti =3D LibVEX_Alloc(sizeof(TmpInfo));
- ti->occ =3D 1;
- ti->expr =3D NULL;
- ti->eDoesLoad =3D False;
- ti->eDoesGet =3D False;
- ti->invalidateMe =3D False;
- ti->origPos =3D -1; /* filed in properly later */
- env[(Int)tmp] =3D ti;
+ Int i;
+ switch (e->tag) {
+ case Iex_CCall:
+ for (i =3D 0; e->Iex.CCall.args[i]; i++)
+ setHints_Expr(doesLoad, doesGet, e->Iex.CCall.args[i]);
+ return;
+ case Iex_Mux0X:
+ setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.cond);
+ setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0);
+ setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX);
+ return;
+ case Iex_Binop:
+ setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg1);
+ setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg2);
+ return;
+ case Iex_Unop:
+ setHints_Expr(doesLoad, doesGet, e->Iex.Unop.arg);
+ return;
+ case Iex_Load:
+ *doesLoad =3D True;
+ setHints_Expr(doesLoad, doesGet, e->Iex.Load.addr);
+ return;
+ case Iex_Get:
+ *doesGet =3D True;
+ return;
+ case Iex_GetI:
+ *doesGet =3D True;
+ setHints_Expr(doesLoad, doesGet, e->Iex.GetI.ix);
+ return;
+ case Iex_Tmp:
+ case Iex_Const:
+ return;
+ default:=20
+ vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
+ vpanic("setHints_Expr");
}
}
=20
-static void occCount_Expr ( TmpInfo** env, IRExpr* e )
+
+/* Add a binding to the front of the env and slide all the rest
+ backwards. It should be the case that the last slot is free. */
+static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee=
)
{
Int i;
+ vassert(env[A_NENV-1].bindee =3D=3D NULL);
+ for (i =3D A_NENV-1; i >=3D 1; i--)
+ env[i] =3D env[i-1];
+ env[0].binder =3D binder;
+ env[0].bindee =3D bindee;
+ env[0].doesLoad =3D False; /* filled in later */
+ env[0].doesGet =3D False; /* filled in later */
+}
=20
+/* Given uses :: array of UShort, indexed by IRTemp
+ Add the use-occurrences of temps in this expression=20
+ to the env.=20
+*/
+static void aoccCount_Expr ( UShort* uses, IRExpr* e )
+{
+ Int i;
+
switch (e->tag) {
=20
case Iex_Tmp: /* the only interesting case */
- occCount_Temp(env, e->Iex.Tmp.tmp);
+ uses[e->Iex.Tmp.tmp]++;
return;
=20
case Iex_Mux0X:
- occCount_Expr(env, e->Iex.Mux0X.cond);
- occCount_Expr(env, e->Iex.Mux0X.expr0);
- occCount_Expr(env, e->Iex.Mux0X.exprX);
+ aoccCount_Expr(uses, e->Iex.Mux0X.cond);
+ aoccCount_Expr(uses, e->Iex.Mux0X.expr0);
+ aoccCount_Expr(uses, e->Iex.Mux0X.exprX);
return;
=20
case Iex_Binop:=20
- occCount_Expr(env, e->Iex.Binop.arg1);
- occCount_Expr(env, e->Iex.Binop.arg2);
+ aoccCount_Expr(uses, e->Iex.Binop.arg1);
+ aoccCount_Expr(uses, e->Iex.Binop.arg2);
return;
=20
case Iex_Unop:=20
- occCount_Expr(env, e->Iex.Unop.arg);
+ aoccCount_Expr(uses, e->Iex.Unop.arg);
return;
=20
case Iex_Load:
- occCount_Expr(env, e->Iex.Load.addr);
+ aoccCount_Expr(uses, e->Iex.Load.addr);
return;
=20
case Iex_CCall:
for (i =3D 0; e->Iex.CCall.args[i]; i++)
- occCount_Expr(env, e->Iex.CCall.args[i]);
+ aoccCount_Expr(uses, e->Iex.CCall.args[i]);
return;
=20
case Iex_GetI:
- occCount_Expr(env, e->Iex.GetI.ix);
+ aoccCount_Expr(uses, e->Iex.GetI.ix);
return;
=20
case Iex_Const:
@@ -3271,55 +3338,55 @@
=20
default:=20
vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
- vpanic("occCount_Expr");
+ vpanic("aoccCount_Expr");
}
}
=20
=20
-/* Given env :: IRTemp -> TmpInfo*
- Add the use-occurrences of temps in this expression=20
- to the environment.=20
+/* Given uses :: array of UShort, indexed by IRTemp
+ Add the use-occurrences of temps in this statement=20
+ to the env.=20
*/
-static void occCount_Stmt ( TmpInfo** env, IRStmt* st )
+static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
{
Int i;
IRDirty* d;
switch (st->tag) {
case Ist_AbiHint:
- occCount_Expr(env, st->Ist.AbiHint.base);
+ aoccCount_Expr(uses, st->Ist.AbiHint.base);
return;
case Ist_Tmp:=20
- occCount_Expr(env, st->Ist.Tmp.data);=20
+ aoccCount_Expr(uses, st->Ist.Tmp.data);=20
return;=20
case Ist_Put:=20
- occCount_Expr(env, st->Ist.Put.data);
+ aoccCount_Expr(uses, st->Ist.Put.data);
return;
case Ist_PutI:
- occCount_Expr(env, st->Ist.PutI.ix);
- occCount_Expr(env, st->Ist.PutI.data);
+ aoccCount_Expr(uses, st->Ist.PutI.ix);
+ aoccCount_Expr(uses, st->Ist.PutI.data);
return;
case Ist_Store:
- occCount_Expr(env, st->Ist.Store.addr);
- occCount_Expr(env, st->Ist.Store.data);
+ aoccCount_Expr(uses, st->Ist.Store.addr);
+ aoccCount_Expr(uses, st->Ist.Store.data);
return;
case Ist_Dirty:
d =3D st->Ist.Dirty.details;
if (d->mFx !=3D Ifx_None)
- occCount_Expr(env, d->mAddr);
- occCount_Expr(env, d->guard);
+ aoccCount_Expr(uses, d->mAddr);
+ aoccCount_Expr(uses, d->guard);
for (i =3D 0; d->args[i]; i++)
- occCount_Expr(env, d->args[i]);
+ aoccCount_Expr(uses, d->args[i]);
return;
case Ist_NoOp:
case Ist_IMark:
case Ist_MFence:
return;
case Ist_Exit:
- occCount_Expr(env, st->Ist.Exit.guard);
+ aoccCount_Expr(uses, st->Ist.Exit.guard);
return;
default:=20
vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
- vpanic("occCount_Stmt");
+ vpanic("aoccCount_Stmt");
}
}
=20
@@ -3327,31 +3394,26 @@
expression, and set the env's binding to NULL so it is marked as
used. If not found, return NULL. */
=20
-static IRExpr* tbSubst_Temp ( TmpInfo** env, IRTemp tmp )
+static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
{
- TmpInfo* ti;
- IRExpr* e;
- ti =3D env[(Int)tmp];
- if (ti){
- e =3D ti->expr;
- if (e) {
- ti->expr =3D NULL;
- return e;
- } else {
- return NULL;
+ Int i;
+ for (i =3D 0; i < A_NENV; i++) {
+ if (env[i].binder =3D=3D tmp && env[i].bindee !=3D NULL) {
+ IRExpr* bindee =3D env[i].bindee;
+ env[i].bindee =3D NULL;
+ return bindee;
}
- } else {
- return NULL;
}
+ return NULL;
}
=20
/* Traverse e, looking for temps. For each observed temp, see if env
contains a binding for the temp, and if so return the bound value.
The env has the property that any binding it holds is
'single-shot', so once a binding is used, it is marked as no longer
- available, by setting its .expr field to NULL. */
+ available, by setting its .bindee field to NULL. */
=20
-static IRExpr* tbSubst_Expr ( TmpInfo** env, IRExpr* e )
+static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
{
IRExpr* e2;
IRExpr** args2;
@@ -3362,41 +3424,42 @@
case Iex_CCall:
args2 =3D sopyIRExprVec(e->Iex.CCall.args);
for (i =3D 0; args2[i]; i++)
- args2[i] =3D tbSubst_Expr(env,args2[i]);
- return IRExpr_CCall(e->Iex.CCall.cee,
+ args2[i] =3D atbSubst_Expr(env,args2[i]);
+ return IRExpr_CCall(
+ e->Iex.CCall.cee,
e->Iex.CCall.retty,
args2
);
case Iex_Tmp:
- e2 =3D tbSubst_Temp(env, e->Iex.Tmp.tmp);
+ e2 =3D atbSubst_Temp(env, e->Iex.Tmp.tmp);
return e2 ? e2 : e;
case Iex_Mux0X:
return IRExpr_Mux0X(
- tbSubst_Expr(env, e->Iex.Mux0X.cond),
- tbSubst_Expr(env, e->Iex.Mux0X.expr0),
- tbSubst_Expr(env, e->Iex.Mux0X.exprX)
+ atbSubst_Expr(env, e->Iex.Mux0X.cond),
+ atbSubst_Expr(env, e->Iex.Mux0X.expr0),
+ atbSubst_Expr(env, e->Iex.Mux0X.exprX)
);
case Iex_Binop:
return IRExpr_Binop(
e->Iex.Binop.op,
- tbSubst_Expr(env, e->Iex.Binop.arg1),
- tbSubst_Expr(env, e->Iex.Binop.arg2)
+ atbSubst_Expr(env, e->Iex.Binop.arg1),
+ atbSubst_Expr(env, e->Iex.Binop.arg2)
);
case Iex_Unop:
return IRExpr_Unop(
e->Iex.Unop.op,
- tbSubst_Expr(env, e->Iex.Unop.arg)
+ atbSubst_Expr(env, e->Iex.Unop.arg)
);
case Iex_Load:
return IRExpr_Load(
e->Iex.Load.end,
e->Iex.Load.ty,
- tbSubst_Expr(env, e->Iex.Load.addr)
+ atbSubst_Expr(env, e->Iex.Load.addr)
);
case Iex_GetI:
return IRExpr_GetI(
e->Iex.GetI.descr,
- tbSubst_Expr(env, e->Iex.GetI.ix),
+ atbSubst_Expr(env, e->Iex.GetI.ix),
e->Iex.GetI.bias
);
case Iex_Const:
@@ -3404,13 +3467,13 @@
return e;
default:=20
vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
- vpanic("tbSubst_Expr");
+ vpanic("atbSubst_Expr");
}
}
=20
-/* Same deal as tbSubst_Expr, except for stmts. */
+/* Same deal as atbSubst_Expr, except for stmts. */
=20
-static IRStmt* tbSubst_Stmt ( TmpInfo** env, IRStmt* st )
+static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
{
Int i;
IRDirty* d;
@@ -3418,36 +3481,36 @@
switch (st->tag) {
case Ist_AbiHint:
return IRStmt_AbiHint(
- tbSubst_Expr(env, st->Ist.AbiHint.base),
+ atbSubst_Expr(env, st->Ist.AbiHint.base),
st->Ist.AbiHint.len
);
case Ist_Store:
return IRStmt_Store(
st->Ist.Store.end,
- tbSubst_Expr(env, st->Ist.Store.addr),
- tbSubst_Expr(env, st->Ist.Store.data)
+ atbSubst_Expr(env, st->Ist.Store.addr),
+ atbSubst_Expr(env, st->Ist.Store.data)
);
case Ist_Tmp:
return IRStmt_Tmp(
st->Ist.Tmp.tmp,
- tbSubst_Expr(env, st->Ist.Tmp.data)
+ atbSubst_Expr(env, st->Ist.Tmp.data)
);
case Ist_Put:
return IRStmt_Put(
st->Ist.Put.offset,
- tbSubst_Expr(env, st->Ist.Put.data)
+ atbSubst_Expr(env, st->Ist.Put.data)
);
case Ist_PutI:
return IRStmt_PutI(
st->Ist.PutI.descr,
- tbSubst_Expr(env, st->Ist.PutI.ix),
+ atbSubst_Expr(env, st->Ist.PutI.ix),
st->Ist.PutI.bias,
- tbSubst_Expr(env, st->Ist.PutI.data)
+ atbSubst_Expr(env, st->Ist.PutI.data)
);
=20
case Ist_Exit:
return IRStmt_Exit(
- tbSubst_Expr(env, st->Ist.Exit.guard),
+ atbSubst_Expr(env, st->Ist.Exit.guard),
st->Ist.Exit.jk,
st->Ist.Exit.dst
);
@@ -3458,184 +3521,83 @@
case Ist_MFence:
return IRStmt_MFence();
case Ist_Dirty:
- d =3D st->Ist.Dirty.details;
+ d =3D st->Ist.Dirty.details;
d2 =3D emptyIRDirty();
*d2 =3D *d;
if (d2->mFx !=3D Ifx_None)
- d2->mAddr =3D tbSubst_Expr(env, d2->mAddr);
- d2->guard =3D tbSubst_Expr(env, d2->guard);
+ d2->mAddr =3D atbSubst_Expr(env, d2->mAddr);
+ d2->guard =3D atbSubst_Expr(env, d2->guard);
for (i =3D 0; d2->args[i]; i++)
- d2->args[i] =3D tbSubst_Expr(env, d2->args[i]);
+ d2->args[i] =3D atbSubst_Expr(env, d2->args[i]);
return IRStmt_Dirty(d2);
default:=20
vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
- vpanic("tbSubst_Stmt");
+ vpanic("atbSubst_Stmt");
}
}
=20
-
-/* Traverse an expr, and detect if any part of it reads memory or does
- a Get. Be careful ... this really controls how much the
- tree-builder can reorder the code, so getting it right is critical.
-*/
-static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e )
+/* notstatic */ void ado_treebuild_BB ( IRBB* bb )
{
- Int i;
- switch (e->tag) {
- case Iex_CCall:
- for (i =3D 0; e->Iex.CCall.args[i]; i++)
- setHints_Expr(doesLoad, doesGet, e->Iex.CCall.args[i]);
- return;
- case Iex_Mux0X:
- setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.cond);
- setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0);
- setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX);
- return;
- case Iex_Binop:
- setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg1);
- setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg2);
- return;
- case Iex_Unop:
- setHints_Expr(doesLoad, doesGet, e->Iex.Unop.arg);
- return;
- case Iex_Load:
- *doesLoad =3D True;
- setHints_Expr(doesLoad, doesGet, e->Iex.Load.addr);
- return;
- case Iex_Get:
- *doesGet =3D True;
- return;
- case Iex_GetI:
- *doesGet =3D True;
- setHints_Expr(doesLoad, doesGet, e->Iex.GetI.ix);
- return;
- case Iex_Tmp:
- case Iex_Const:
- return;
- default:=20
- vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
- vpanic("setHints_Expr");
- }
-}
-
-
-static void dumpInvalidated ( TmpInfo** env, IRBB* bb, /*INOUT*/Int* j )
-{
- Int k, oldest_op, oldest_k;
- TmpInfo* ti;
-
- /* Dump all the bindings to marked as invalidated, in order. */
- while (True) {
- =20
- /* find the oldest bind marked 'invalidateMe'. */
- oldest_op =3D 1<<30;
- oldest_k =3D 1<<30;
- for (k =3D 0; k < bb->tyenv->types_used; k++) {
- ti =3D env[k];
- if (!ti)
- continue;
- if (!ti->expr)
- continue;
- if (!ti->invalidateMe)
- continue;
- /* vex_printf("FOUND INVAL %d %d\n", ti->origPos, oldest_op); *=
/
- if (ti->origPos < oldest_op) {
- oldest_op =3D ti->origPos;
- oldest_k =3D k;
- }
- }
-
- /* No more binds to invalidate. */
- if (oldest_op =3D=3D 1<<30)
- return;
-
- /* the oldest bind to invalidate has been identified */
- vassert(oldest_op !=3D 1<<31 && oldest_k !=3D 1<<31);
- ti =3D env[oldest_k];
- vassert(ti->expr && ti->invalidateMe);
-
- /* and invalidate it ... */
- bb->stmts[*j] =3D IRStmt_Tmp( (IRTemp)oldest_k, ti->expr );
- /* vex_printf("**1 "); ppIRStmt(bb->stmts[*j]); vex_printf("\n")=
; */
- (*j)++;
- ti->invalidateMe =3D False;
- ti->expr =3D NULL; /* no longer available for substitution */
-
- } /* loop which dumps the binds marked for invalidation */
-}
-
-
-
-/* notstatic */ void do_treebuild_BB ( IRBB* bb )
-{
- Int i, j, k;
- Bool invPut, invStore;
+ Int i, j, k, m;
+ Bool stmtPuts, stmtStores, invalidateMe;
IRStmt* st;
IRStmt* st2;
- TmpInfo* ti;
- IRExpr* next2;
+ ATmpInfo env[A_NENV];
=20
- /* Mapping from IRTemp to TmpInfo*. */
Int n_tmps =3D bb->tyenv->types_used;
- TmpInfo** env =3D LibVEX_Alloc(n_tmps * sizeof(TmpInfo*));
+ UShort* uses =3D LibVEX_Alloc(n_tmps * sizeof(UShort));
=20
- for (i =3D 0; i < n_tmps; i++)
- env[i] =3D NULL;
-
/* Phase 1. Scan forwards in bb, counting use occurrences of each
temp. Also count occurrences in the bb->next field. */
=20
+ for (i =3D 0; i < n_tmps; i++)
+ uses[i] =3D 0;
+
for (i =3D 0; i < bb->stmts_used; i++) {
st =3D bb->stmts[i];
if (st->tag =3D=3D Ist_NoOp)
continue;
- occCount_Stmt( env, st );
+ aoccCount_Stmt( uses, st );
}
- occCount_Expr(env, bb->next );
+ aoccCount_Expr(uses, bb->next );
=20
# if 0
- for (i =3D 0; i < env->used; i++) {
- if (!env->inuse[i])
+ for (i =3D 0; i < n_tmps; i++) {
+ if (uses[i] =3D=3D 0)
continue;
- ppIRTemp( (IRTemp)(env->key[i]) );
- vex_printf(" used %d\n", ((TmpInfo*)env->val[i])->occ );
+ ppIRTemp( (IRTemp)i );
+ vex_printf(" used %d\n", (Int)uses[i] );
}
# endif
=20
- /* Phase 2. Fill in the origPos fields. */
+ /* Phase 2. Scan forwards in bb. For each statement in turn:
=20
- for (i =3D 0; i < bb->stmts_used; i++) {
- st =3D bb->stmts[i];
- if (st->tag !=3D Ist_Tmp)
- continue;
+ If the env is full, emit the end element. This guarantees
+ there is at least one free slot in the following.
=20
- ti =3D env[(Int)st->Ist.Tmp.tmp];
- if (!ti) {
- vex_printf("\n");
- ppIRTemp(st->Ist.Tmp.tmp);
- vex_printf("\n");
- vpanic("treebuild_BB (phase 2): unmapped IRTemp");
- }
- ti->origPos =3D i;
- }
-
- /* Phase 3. Scan forwards in bb. =20
-
- On seeing 't =3D E', occ(t)=3D=3D1, =20
- let E'=3Denv(E), set t's binding to be E', and
- delete this stmt.
- Also examine E' and set the hints for E' appropriately
+ On seeing 't =3D E', occ(t)=3D=3D1, =20
+ let E'=3Denv(E)
+ delete this stmt
+ add t -> E' to the front of the env
+ Examine E' and set the hints for E' appropriately
(doesLoad? doesGet?)
=20
- On seeing any other stmt,=20
+ On seeing any other stmt,=20
let stmt' =3D env(stmt)
remove from env any 't=3DE' binds invalidated by stmt
emit the invalidated stmts
emit stmt'
+ compact any holes in env=20
+ by sliding entries towards the front
=20
- Apply env to bb->next.
+ Finally, apply env to bb->next. =20
*/
=20
+ for (i =3D 0; i < A_NENV; i++) {
+ env[i].bindee =3D NULL;
+ env[i].binder =3D IRTemp_INVALID;
+ }
+
/* The stmts in bb are being reordered, and we are guaranteed to
end up with no more than the number we started with. Use i to
be the cursor of the current stmt examined and j <=3D i to be that
@@ -3643,67 +3605,77 @@
*/
j =3D 0;
for (i =3D 0; i < bb->stmts_used; i++) {
+
st =3D bb->stmts[i];
if (st->tag =3D=3D Ist_NoOp)
continue;
=20
- if (st->tag =3D=3D Ist_Tmp) {
- /* vex_printf("acquire binding\n"); */
- ti =3D env[st->Ist.Tmp.tmp];
- if (!ti) {
- vpanic("treebuild_BB (phase 2): unmapped IRTemp");
+ /* Ensure there's at least one space in the env, by emitting
+ the oldest binding if necessary. */
+ if (env[A_NENV-1].bindee !=3D NULL) {
+ bb->stmts[j] =3D IRStmt_Tmp( env[A_NENV-1].binder, env[A_NENV-1=
].bindee );
+ j++;
+ vassert(j <=3D i);
+ env[A_NENV-1].bindee =3D NULL;
+ }
+
+ /* Consider current stmt. */
+ if (st->tag =3D=3D Ist_Tmp && uses[st->Ist.Tmp.tmp] <=3D 1) {
+
+ /* optional extra: dump dead bindings as we find them.
+ Removes the need for a prior dead-code removal pass. */
+ if (uses[st->Ist.Tmp.tmp] =3D=3D 0) {
+ //vex_printf("DEAD binding\n");
+ continue; /* for (i =3D 0; i < bb->stmts_used; i++) loop */
}
- if (ti->occ =3D=3D 1) {
- /* ok, we have 't =3D E', occ(t)=3D=3D1. Do the abovementio=
ned actions. */
- IRExpr* e =3D st->Ist.Tmp.data;
- IRExpr* e2 =3D tbSubst_Expr(env, e);
- ti->expr =3D e2;
- ti->eDoesLoad =3D ti->eDoesGet =3D False;
- setHints_Expr(&ti->eDoesLoad, &ti->eDoesGet, e2);
- /* don't advance j, as we are deleting this stmt and instead
- holding it temporarily in the env. */
- continue; /* the for (i =3D 0; i < bb->stmts_used; i++) loop=
*/
- }
+ vassert(uses[st->Ist.Tmp.tmp] =3D=3D 1);
+
+ /* ok, we have 't =3D E', occ(t)=3D=3D1. Do the abovementioned
+ actions. */
+ IRExpr* e =3D st->Ist.Tmp.data;
+ IRExpr* e2 =3D atbSubst_Expr(env, e);
+ addToEnvFront(env, st->Ist.Tmp.tmp, e2);
+ setHints_Expr(&env[0].doesLoad, &env[0].doesGet, e2);
+ /* don't advance j, as we are deleting this stmt and instead
+ holding it temporarily in the env. */
+ continue; /* for (i =3D 0; i < bb->stmts_used; i++) loop */
}
=20
/* we get here for any other kind of statement. */
/* 'use up' any bindings required by the current statement. */
- st2 =3D tbSubst_Stmt(env, st);
+ st2 =3D atbSubst_Stmt(env, st);
=20
- /* Now, before this stmt, dump any bindings it invalidates.
- These need to be dumped in the order in which they originally
- appeared. (Stupid algorithm): first, mark all bindings which
- need to be dumped. Then, dump them in the order in which
- they were defined. */
+ /* Now, before this stmt, dump any bindings in env that it
+ invalidates. These need to be dumped in the order in which
+ they originally entered env -- that means from oldest to
+ youngest. */
=20
- invPut =3D toBool(st->tag =3D=3D Ist_Put=20
- || st->tag =3D=3D Ist_PutI=20
- || st->tag =3D=3D Ist_Dirty);
-
- invStore =3D toBool(st->tag =3D=3D Ist_Store
+ /* stmtPuts/stmtStores characterise what the stmt under
+ consideration does. */
+ stmtPuts =3D toBool(st->tag =3D=3D Ist_Put=20
+ || st->tag =3D=3D Ist_PutI=20
|| st->tag =3D=3D Ist_Dirty);
=20
- for (k =3D 0; k < n_tmps; k++) {
- ti =3D env[k];
- if (!ti)
- continue;
- if (!ti->expr)
- continue;
+ stmtStores =3D toBool(st->tag =3D=3D Ist_Store
+ || st->tag =3D=3D Ist_Dirty);
=20
- /* Do we have to invalidate this binding? */
-
- ti->invalidateMe=20
+ for (k =3D A_NENV-1; k >=3D 0; k--) {
+ if (env[k].bindee =3D=3D NULL)
+ continue;
+ /* Compare the actions of this stmt with the actions of
+ binding 'k', to see if they invalidate the binding. */
+ invalidateMe
=3D toBool(
/* a store invalidates loaded data */
- (ti->eDoesLoad && invStore)
+ (env[k].doesLoad && stmtStores)
/* a put invalidates get'd data */
- || (ti->eDoesGet && invPut)
+ || (env[k].doesGet && stmtPuts)
/* a put invalidates loaded data. Note, we could do
much better here in the sense that we only need to
invalidate trees containing loads if the Put in
question is marked as requiring precise
exceptions. */
- || (ti->eDoesLoad && invPut)
+ || (env[k].doesLoad && stmtPuts)
/* probably overly conservative: a memory fence
invalidates absolutely everything, so that all
computation prior to it is forced to complete before
@@ -3712,17 +3684,29 @@
/* also be (probably overly) paranoid re AbiHints */
|| st->tag =3D=3D Ist_AbiHint
);
- /*
- if (ti->invalidateMe)
- vex_printf("SET INVAL\n");
- */
+ if (invalidateMe) {
+ bb->stmts[j] =3D IRStmt_Tmp( env[k].binder, env[k].bindee );
+ j++;
+ vassert(j <=3D i);
+ env[k].bindee =3D NULL;
+ }
}
=20
- dumpInvalidated ( env, bb, &j );
+ /* Slide in-use entries in env up to the front */
+ m =3D 0;
+ for (k =3D 0; k < A_NENV; k++) {
+ if (env[k].bindee !=3D NULL) {
+ env[m] =3D env[k];
+ m++;
+ }
+ }
+ for (m =3D m; m < A_NENV; m++) {
+ env[m].bindee =3D NULL;
+ }
=20
/* finally, emit the substituted statement */
bb->stmts[j] =3D st2;
- /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n");=
*/
+ /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); =
*/
j++;
=20
vassert(j <=3D i+1);
@@ -3732,8 +3716,7 @@
dump any left-over bindings. Hmm. Perhaps there should be no
left over bindings? Or any left-over bindings are
by definition dead? */
- next2 =3D tbSubst_Expr(env, bb->next);
- bb->next =3D next2;
+ bb->next =3D atbSubst_Expr(env, bb->next);
bb->stmts_used =3D j;
}
=20
Modified: trunk/priv/ir/iropt.h
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/ir/iropt.h 2005-11-28 13:34:19 UTC (rev 1473)
+++ trunk/priv/ir/iropt.h 2005-11-28 13:39:37 UTC (rev 1474)
@@ -63,8 +63,7 @@
extern
IRBB* cprop_BB ( IRBB* );
=20
-/* Do a dead-code removal pass, which is generally needed to avoid
- crashing the tree-builder. bb is destructively modified. */
+/* Do a dead-code removal pass. bb is destructively modified. */
extern
void do_deadcode_BB ( IRBB* bb );
=20
@@ -72,10 +71,10 @@
extern
void do_cse_BB ( IRBB* bb );
=20
-/* The tree-builder. Make maximal safe trees. bb is destructively
- modified. */
+/* The tree-builder. Make (approximately) maximal safe trees. bb is
+ destructively modified. */
extern
-void do_treebuild_BB ( IRBB* bb );
+void ado_treebuild_BB ( IRBB* bb );
=20
#endif /* ndef __LIBVEX_IROPT_H */
=20
Modified: trunk/priv/main/vex_main.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/main/vex_main.c 2005-11-28 13:34:19 UTC (rev 1473)
+++ trunk/priv/main/vex_main.c 2005-11-28 13:39:37 UTC (rev 1474)
@@ -520,9 +520,9 @@
vex_printf("\n");
}
=20
- /* Turn it into virtual-registerised code. */
- do_deadcode_BB( irbb );
- do_treebuild_BB( irbb );
+ /* Turn it into virtual-registerised code. Build trees -- this
+ also throws away any dead bindings. */
+ ado_treebuild_BB( irbb );
=20
vexAllocSanityCheck();
=20
|
|
From: Nicholas N. <nj...@cs...> - 2005-11-28 16:52:25
|
On Mon, 28 Nov 2005, sv...@va... wrote: > Log: > Modify the tree builder to use a fixed-size binding environment rather > than one that is potentially proportional to the length of the input > BB. This changes its complexity from quadratic to linear (in the > length of the BB) and gives a noticable increase in the overall speed > of vex. The tradeoff is that it can no longer guarantee to build > maximal trees, but in practice in only rarely fails to do so (about 1 > in 100 bbs) and so the resulting degradation in code quality is > completely insignificant (unmeasurable). Here's the improvements on a few programs I've been performance testing with. == tsim_arch t.out 10 usertime: 15.40s usertime: 15.02s == latex pldi2006 usertime: 11.73s usertime: 11.10s == mybz2 usertime: 6.24s usertime: 5.64s == gcc -S concord.i usertime: 11.74s usertime: 10.23s That's improvements of 2%, 5%, 10% and 13%. And Konqueror startup+shutdown went from about 56s to 51s, 9% better. Great stuff. Any ideas how to improve the register allocator? :) N |