|
From: <sv...@va...> - 2006-12-26 02:37:40
|
Author: sewardj
Date: 2006-12-26 02:37:38 +0000 (Tue, 26 Dec 2006)
New Revision: 1701
Log:
Merge r1686 (speed up register allocator by using a less braindead
algorithm for maintenance of real register live ranges)
Modified:
branches/VEX_3_2_BRANCH/priv/host-generic/reg_alloc2.c
Modified: branches/VEX_3_2_BRANCH/priv/host-generic/reg_alloc2.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/VEX_3_2_BRANCH/priv/host-generic/reg_alloc2.c 2006-12-26 02:=
34:31 UTC (rev 1700)
+++ branches/VEX_3_2_BRANCH/priv/host-generic/reg_alloc2.c 2006-12-26 02:=
37:38 UTC (rev 1701)
@@ -129,6 +129,7 @@
}
RRegState;
=20
+
/* The allocator also maintains a redundant array of indexes
(vreg_state) from vreg numbers back to entries in rreg_state. It
is redundant because iff vreg_state[i] =3D=3D j then
@@ -152,7 +153,6 @@
#define IS_VALID_RREGNO(_zz) ((_zz) >=3D 0 && (_zz) < n_rregs)
=20
=20
-
/* Does this instruction mention a particular reg? */
static Bool instrMentionsReg (=20
void (*getRegUsage) (HRegUsage*, HInstr*, Bool),
@@ -215,7 +215,6 @@
}
=20
=20
-
/* Double the size of the real-reg live-range array, if needed. */
static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
{
@@ -233,6 +232,62 @@
}
=20
=20
+/* Sort an array of RRegLR entries by either the .live_after or
+ .dead_before fields. This is performance-critical. */
+static void sortRRLRarray ( RRegLR* arr,=20
+ Int size, Bool by_live_after )
+{
+ Int incs[14] =3D { 1, 4, 13, 40, 121, 364, 1093, 3280,
+ 9841, 29524, 88573, 265720,
+ 797161, 2391484 };
+ Int lo =3D 0;
+ Int hi =3D size-1;
+ Int i, j, h, bigN, hp;
+ RRegLR v;
+
+ vassert(size >=3D 0);
+ if (size =3D=3D 0)
+ return;
+
+ bigN =3D hi - lo + 1; if (bigN < 2) return;
+ hp =3D 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
+
+ if (by_live_after) {
+
+ for ( ; hp >=3D 0; hp--) {
+ h =3D incs[hp];
+ for (i =3D lo + h; i <=3D hi; i++) {
+ v =3D arr[i];
+ j =3D i;
+ while (arr[j-h].live_after > v.live_after) {
+ arr[j] =3D arr[j-h];
+ j =3D j - h;
+ if (j <=3D (lo + h - 1)) break;
+ }
+ arr[j] =3D v;
+ }
+ }
+
+ } else {
+
+ for ( ; hp >=3D 0; hp--) {
+ h =3D incs[hp];
+ for (i =3D lo + h; i <=3D hi; i++) {
+ v =3D arr[i];
+ j =3D i;
+ while (arr[j-h].dead_before > v.dead_before) {
+ arr[j] =3D arr[j-h];
+ j =3D j - h;
+ if (j <=3D (lo + h - 1)) break;
+ }
+ arr[j] =3D v;
+ }
+ }
+
+ }
+}
+
+
/* A target-independent register allocator. Requires various
functions which it uses to deal abstractly with instructions and
registers, since it cannot have any target-specific knowledge.
@@ -294,9 +349,18 @@
Int n_vregs;
VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
=20
- RRegLR* rreg_lrs;
+ /* We keep two copies of the real-reg live range info, one sorted
+ by .live_after and the other by .dead_before. First the
+ unsorted info is created in the _la variant is copied into the
+ _db variant. Once that's done both of them are sorted.=20
+ We also need two integer cursors which record the next
+ location in the two arrays to consider. */
+ RRegLR* rreg_lrs_la;
+ RRegLR* rreg_lrs_db;
Int rreg_lrs_size;
Int rreg_lrs_used;
+ Int rreg_lrs_la_next;
+ Int rreg_lrs_db_next;
=20
/* Used when constructing vreg_lrs (for allocating stack
slots). */
@@ -437,7 +501,8 @@
=20
rreg_lrs_used =3D 0;
rreg_lrs_size =3D 4;
- rreg_lrs =3D LibVEX_Alloc(rreg_lrs_size * sizeof(RRegLR));
+ rreg_lrs_la =3D LibVEX_Alloc(rreg_lrs_size * sizeof(RRegLR));
+ rreg_lrs_db =3D NULL; /* we'll create this later */
=20
/* We'll need to track live range start/end points seperately for
each rreg. Sigh. */
@@ -593,12 +658,12 @@
if (flush) {
vassert(flush_la !=3D INVALID_INSTRNO);
vassert(flush_db !=3D INVALID_INSTRNO);
- ensureRRLRspace(&rreg_lrs, &rreg_lrs_size, rreg_lrs_used);
+ ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used)=
;
if (0)=20
vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
- rreg_lrs[rreg_lrs_used].rreg =3D rreg;
- rreg_lrs[rreg_lrs_used].live_after =3D toShort(flush_la);
- rreg_lrs[rreg_lrs_used].dead_before =3D toShort(flush_db);
+ rreg_lrs_la[rreg_lrs_used].rreg =3D rreg;
+ rreg_lrs_la[rreg_lrs_used].live_after =3D toShort(flush_la)=
;
+ rreg_lrs_la[rreg_lrs_used].dead_before =3D toShort(flush_db)=
;
rreg_lrs_used++;
}
=20
@@ -629,13 +694,13 @@
if (rreg_live_after[j] =3D=3D INVALID_INSTRNO)
continue;
=20
- ensureRRLRspace(&rreg_lrs, &rreg_lrs_size, rreg_lrs_used);
+ ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
if (0)
vex_printf("FLUSH 2 (%d,%d)\n",=20
rreg_live_after[j], rreg_dead_before[j]);
- rreg_lrs[rreg_lrs_used].rreg =3D available_real_regs[j];
- rreg_lrs[rreg_lrs_used].live_after =3D toShort(rreg_live_after[j]=
);
- rreg_lrs[rreg_lrs_used].dead_before =3D toShort(rreg_dead_before[j=
]);
+ rreg_lrs_la[rreg_lrs_used].rreg =3D available_real_regs[j];
+ rreg_lrs_la[rreg_lrs_used].live_after =3D toShort(rreg_live_after=
[j]);
+ rreg_lrs_la[rreg_lrs_used].dead_before =3D toShort(rreg_dead_befor=
e[j]);
rreg_lrs_used++;
}
=20
@@ -648,7 +713,7 @@
this mechanism -- it is only an optimisation. */
=20
for (j =3D 0; j < rreg_lrs_used; j++) {
- rreg =3D rreg_lrs[j].rreg;
+ rreg =3D rreg_lrs_la[j].rreg;
vassert(!hregIsVirtual(rreg));
/* rreg is involved in a HLR. Record this info in the array, if
there is space. */
@@ -667,6 +732,24 @@
}
}
=20
+ /* Finally, copy the _la variant into the _db variant and
+ sort both by their respective fields. */
+ rreg_lrs_db =3D LibVEX_Alloc(rreg_lrs_used * sizeof(RRegLR));
+ for (j =3D 0; j < rreg_lrs_used; j++)
+ rreg_lrs_db[j] =3D rreg_lrs_la[j];
+
+ sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ =
);
+ sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ =
);
+
+ /* And set up the cursors. */
+ rreg_lrs_la_next =3D 0;
+ rreg_lrs_db_next =3D 0;
+
+ for (j =3D 1; j < rreg_lrs_used; j++) {
+ vassert(rreg_lrs_la[j-1].live_after <=3D rreg_lrs_la[j].live_afte=
r);
+ vassert(rreg_lrs_db[j-1].dead_before <=3D rreg_lrs_db[j].dead_befo=
re);
+ }
+
/* ------ end of FINALISE RREG LIVE RANGES ------ */
=20
# if DEBUG_REGALLOC
@@ -677,11 +760,20 @@
# endif
=20
# if DEBUG_REGALLOC
+ vex_printf("RRegLRs by LA:\n");
for (j =3D 0; j < rreg_lrs_used; j++) {
- (*ppReg)(rreg_lrs[j].rreg);
+ vex_printf(" ");
+ (*ppReg)(rreg_lrs_la[j].rreg);
vex_printf(" la =3D %d, db =3D %d\n",
- rreg_lrs[j].live_after, rreg_lrs[j].dead_before );
+ rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before )=
;
}
+ vex_printf("RRegLRs by DB:\n");
+ for (j =3D 0; j < rreg_lrs_used; j++) {
+ vex_printf(" ");
+ (*ppReg)(rreg_lrs_db[j].rreg);
+ vex_printf(" la =3D %d, db =3D %d\n",
+ rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before )=
;
+ }
# endif
=20
/* --------- Stage 3: allocate spill slots. --------- */
@@ -815,8 +907,8 @@
this insn must be marked as unavailable in the running
state. */
for (j =3D 0; j < rreg_lrs_used; j++) {
- if (rreg_lrs[j].live_after < ii=20
- && ii < rreg_lrs[j].dead_before) {
+ if (rreg_lrs_la[j].live_after < ii=20
+ && ii < rreg_lrs_la[j].dead_before) {
/* ii is the middle of a hard live range for some real
reg. Check it's marked as such in the running
state. */
@@ -831,7 +923,7 @@
=20
/* find the state entry for this rreg */
for (k =3D 0; k < n_rregs; k++)
- if (rreg_state[k].rreg =3D=3D rreg_lrs[j].rreg)
+ if (rreg_state[k].rreg =3D=3D rreg_lrs_la[j].rreg)
break;
=20
/* and assert that this rreg is marked as unavailable */
@@ -850,9 +942,9 @@
if (rreg_state[j].disp !=3D Unavail)
continue;
for (k =3D 0; k < rreg_lrs_used; k++)=20
- if (rreg_lrs[k].rreg =3D=3D rreg_state[j].rreg
- && rreg_lrs[k].live_after < ii=20
- && ii < rreg_lrs[k].dead_before)=20
+ if (rreg_lrs_la[k].rreg =3D=3D rreg_state[j].rreg
+ && rreg_lrs_la[k].live_after < ii=20
+ && ii < rreg_lrs_la[k].dead_before)=20
break;
/* If this vassertion fails, we couldn't find a
corresponding HLR. */
@@ -980,49 +1072,60 @@
Note we could do better:
* Could move it into some other free rreg, if one is available=20
=20
- Simplest way to do this is to iterate over the collection
- of rreg live ranges.
+ Do this efficiently, by incrementally stepping along an array
+ of rreg HLRs that are known to be sorted by start point
+ (their .live_after field).
*/
- for (j =3D 0; j < rreg_lrs_used; j++) {
- if (rreg_lrs[j].live_after =3D=3D ii) {
- /* rreg_lrs[j].rreg needs to be freed up. Find=20
- the associated rreg_state entry. */
- /* Note, re rreg_lrs[j].live_after =3D=3D ii. Real register
- live ranges are guaranteed to be well-formed in that
- they start with a write to the register -- Stage 2
- rejects any code not satisfying this. So the correct
- question to ask is whether rreg_lrs[j].live_after =3D=3D
- ii, that is, whether the reg becomes live after this
- insn -- rather than before it. */
-# if DEBUG_REGALLOC
- vex_printf("need to free up rreg: ");
- (*ppReg)(rreg_lrs[j].rreg);
- vex_printf("\n\n");
-# endif
- for (k =3D 0; k < n_rregs; k++)
- if (rreg_state[k].rreg =3D=3D rreg_lrs[j].rreg)
- break;
- /* If this fails, we don't have an entry for this rreg.
- Which we should. */
- vassert(IS_VALID_RREGNO(k));
- m =3D hregNumber(rreg_state[k].vreg);
- if (rreg_state[k].disp =3D=3D Bound) {
- /* Yes, there is an associated vreg. Spill it if it's
- still live. */
- vassert(IS_VALID_VREGNO(m));
- vreg_state[m] =3D INVALID_RREG_NO;
- if (vreg_lrs[m].dead_before > ii) {
- vassert(vreg_lrs[m].reg_class !=3D HRcINVALID);
- EMIT_INSTR( (*genSpill)( rreg_state[k].rreg,
- vreg_lrs[m].spill_offset,
- mode64 ) );
- }
+ while (True) {
+ vassert(rreg_lrs_la_next >=3D 0);
+ vassert(rreg_lrs_la_next <=3D rreg_lrs_used);
+ if (rreg_lrs_la_next =3D=3D rreg_lrs_used)
+ break; /* no more real reg live ranges to consider */
+ if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
+ break; /* next live range does not yet start */
+ vassert(ii =3D=3D rreg_lrs_la[rreg_lrs_la_next].live_after);
+ /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
+ Find the associated rreg_state entry. */
+ /* Note, re ii =3D=3D rreg_lrs_la[rreg_lrs_la_next].live_after.
+ Real register live ranges are guaranteed to be well-formed
+ in that they start with a write to the register -- Stage 2
+ rejects any code not satisfying this. So the correct
+ question to ask is whether
+ rreg_lrs_la[rreg_lrs_la_next].live_after =3D=3D ii, that is,
+ whether the reg becomes live after this insn -- rather
+ than before it. */
+# if DEBUG_REGALLOC
+ vex_printf("need to free up rreg: ");
+ (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
+ vex_printf("\n\n");
+# endif
+ for (k =3D 0; k < n_rregs; k++)
+ if (rreg_state[k].rreg =3D=3D rreg_lrs_la[rreg_lrs_la_next].=
rreg)
+ break;
+ /* If this fails, we don't have an entry for this rreg.
+ Which we should. */
+ vassert(IS_VALID_RREGNO(k));
+ m =3D hregNumber(rreg_state[k].vreg);
+ if (rreg_state[k].disp =3D=3D Bound) {
+ /* Yes, there is an associated vreg. Spill it if it's
+ still live. */
+ vassert(IS_VALID_VREGNO(m));
+ vreg_state[m] =3D INVALID_RREG_NO;
+ if (vreg_lrs[m].dead_before > ii) {
+ vassert(vreg_lrs[m].reg_class !=3D HRcINVALID);
+ EMIT_INSTR( (*genSpill)( rreg_state[k].rreg,
+ vreg_lrs[m].spill_offset,
+ mode64 ) );
}
- rreg_state[k].disp =3D Unavail;
- rreg_state[k].vreg =3D INVALID_HREG;
}
+ rreg_state[k].disp =3D Unavail;
+ rreg_state[k].vreg =3D INVALID_HREG;
+
+ /* check for further rregs entering HLRs at this point */
+ rreg_lrs_la_next++;
}
=20
+
# if DEBUG_REGALLOC
vex_printf("After pre-insn actions for fixed regs:\n");
PRINT_STATE;
@@ -1220,20 +1323,28 @@
=20
/* Now we need to check for rregs exiting fixed live ranges
after this instruction, and if so mark them as free. */
- for (j =3D 0; j < rreg_lrs_used; j++) {
- if (rreg_lrs[j].dead_before =3D=3D ii+1) {
- /* rreg_lrs[j].rreg is exiting a hard live range. Mark
- it as such in the main rreg_state array. */
- for (k =3D 0; k < n_rregs; k++)
- if (rreg_state[k].rreg =3D=3D rreg_lrs[j].rreg)
- break;
- /* If this vassertion fails, we don't have an entry for
- this rreg. Which we should. */
- vassert(k < n_rregs);
- vassert(rreg_state[k].disp =3D=3D Unavail);
- rreg_state[k].disp =3D Free;
- rreg_state[k].vreg =3D INVALID_HREG;
- }
+ while (True) {
+ vassert(rreg_lrs_db_next >=3D 0);
+ vassert(rreg_lrs_db_next <=3D rreg_lrs_used);
+ if (rreg_lrs_db_next =3D=3D rreg_lrs_used)
+ break; /* no more real reg live ranges to consider */
+ if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
+ break; /* next live range does not yet start */
+ vassert(ii+1 =3D=3D rreg_lrs_db[rreg_lrs_db_next].dead_before);
+ /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
+ range. Mark it as such in the main rreg_state array. */
+ for (k =3D 0; k < n_rregs; k++)
+ if (rreg_state[k].rreg =3D=3D rreg_lrs_db[rreg_lrs_db_next].=
rreg)
+ break;
+ /* If this vassertion fails, we don't have an entry for
+ this rreg. Which we should. */
+ vassert(k < n_rregs);
+ vassert(rreg_state[k].disp =3D=3D Unavail);
+ rreg_state[k].disp =3D Free;
+ rreg_state[k].vreg =3D INVALID_HREG;
+
+ /* check for further rregs leaving HLRs at this point */
+ rreg_lrs_db_next++;
}
=20
# if DEBUG_REGALLOC
@@ -1254,6 +1365,9 @@
for (j =3D 0; j < n_rregs; j++)
vassert(rreg_state[j].rreg =3D=3D available_real_regs[j]);
=20
+ vassert(rreg_lrs_la_next =3D=3D rreg_lrs_used);
+ vassert(rreg_lrs_db_next =3D=3D rreg_lrs_used);
+
return instrs_out;
=20
# undef INVALID_INSTRNO
|