|
From: <sv...@va...> - 2005-07-04 09:39:11
|
Author: sewardj
Date: 2005-07-04 10:38:58 +0100 (Mon, 04 Jul 2005)
New Revision: 1253
Log:
Fix (well, ameliorate, at least) some lurking performance problems
(time taken to do register allocation, not quality of result) which
were tolerable when allocating for x86/amd64 but got bad when dealing
with ppc-ish numbers of real registers (90 ish).
* Don't sanity-check the entire regalloc state after each insn
processed; this is total overkill. Instead do it every 7th insn
processed (somewhat arbitrarily) and just before the last insn.
* Reinstate an optimisation from the old UCode allocator: shadow
the primary state structure (rreg_state) with a redundant inverse
mapping (vreg_state) to remove the need to search
through rreg_state when looking for info about a given vreg, a
very common operation. Add logic to keep the two maps consistent.
Add a sanity check to ensure they really are consistent.
* Rename some variables and macros to make the code easier to
understand.
On x86->x86 (--tool=3Dnone), total Vex runtime is reduced by about 10%,
and amd64 is similar. For ppc32 the vex runtime is nearly halved. On
x86->x86 (--tool=3Dnone), register allocation now consumes only about
10% of the total Vex run time.
When hooked up to Valgrind, run time of short-running programs --
which is dominated by translation time -- is reduced by up to 10%.
Calltree/kcachegrind/cachegrind proved instrumental in tracking down
and quantifying these performance problems. Thanks, Josef & Nick.
Modified:
trunk/priv/host-generic/reg_alloc2.c
Modified: trunk/priv/host-generic/reg_alloc2.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- trunk/priv/host-generic/reg_alloc2.c 2005-07-04 09:21:19 UTC (rev 125=
2)
+++ trunk/priv/host-generic/reg_alloc2.c 2005-07-04 09:38:58 UTC (rev 125=
3)
@@ -10,7 +10,7 @@
This file is part of LibVEX, a library for dynamic binary
instrumentation and translation.
=20
- Copyright (C) 2004 OpenWorks, LLP.
+ Copyright (C) 2004-2005 OpenWorks, LLP.
=20
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -92,10 +92,10 @@
RRegLR;
=20
=20
-/* An array of the following structs comprises the running state of
- the allocator. It indicates what the current disposition of each
- allocatable real register is. The array gets updated as the
- allocator processes instructions. */
+/* An array of the following structs (rreg_state) comprises the
+ running state of the allocator. It indicates what the current
+ disposition of each allocatable real register is. The array gets
+ updated as the allocator processes instructions. */
typedef
struct {
/* FIELDS WHICH DO NOT CHANGE */
@@ -118,8 +118,30 @@
}
RRegState;
=20
+/* The allocator also maintains a redundant array of indexes
+ (vreg_state) from vreg numbers back to entries in rreg_state. It
+ is redundant because iff vreg_state[i] =3D=3D j then
+ hregNumber(rreg_state[j].vreg) =3D=3D i -- that is, the two entries
+ point at each other. The purpose of this is to speed up activities
+ which involve looking for a particular vreg: there is no need to
+ scan the rreg_state looking for it, just index directly into
+ vreg_state. The FAQ "does this vreg already have an associated
+ rreg" is the main beneficiary. =20
=20
+ To indicate, in vreg_state[i], that a given vreg is not currently
+ associated with any rreg, that entry can be set to INVALID_RREG_NO.
=20
+ Because the vreg_state entries are signed Shorts, the max number
+ of vregs that can be handed by regalloc is 32767.
+*/
+
+#define INVALID_RREG_NO ((Short)(-1))
+
+#define IS_VALID_VREGNO(_zz) ((_zz) >=3D 0 && (_zz) < n_vregs)
+#define IS_VALID_RREGNO(_zz) ((_zz) >=3D 0 && (_zz) < n_rregs)
+
+
+
/* Does this instruction mention a particular reg? */
static Bool instrMentionsReg (=20
void (*getRegUsage) (HRegUsage*, HInstr*),
@@ -253,8 +275,8 @@
=20
/* Info on vregs and rregs. Computed once and remains
unchanged. */
- Int n_vreg_lrs;
- VRegLR* vreg_lrs; /* [0 .. n_vreg_lrs-1] */
+ Int n_vregs;
+ VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
=20
RRegLR* rreg_lrs;
Int rreg_lrs_size;
@@ -269,9 +291,14 @@
Int* rreg_dead_before;
=20
/* Running state of the core allocation algorithm. */
- RRegState* state;
- Int n_state;
+ RRegState* rreg_state; /* [0 .. n_rregs-1] */
+ Int n_rregs;
=20
+ /* .. and the redundant backward map */
+ /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
+ This inplies n_rregs must be <=3D 32768. */
+ Short* vreg_state; /* [0 .. n_vregs-1] */
+
/* The vreg -> rreg map constructed and then applied to each
instr. */
HRegRemap remap;
@@ -279,6 +306,10 @@
/* The output array of instructions. */
HInstrArray* instrs_out;
=20
+ /* Sanity checks are expensive. They are only done periodically,
+ not at each insn processed. */
+ Bool do_sanity_check;
+
vassert(0 =3D=3D LibVEX_N_SPILL_BYTES % 16);
vassert(0 =3D=3D guest_sizeB % 8);
=20
@@ -300,40 +331,63 @@
addHInstr ( instrs_out, _tmp ); \
} while (0)
=20
-# define PRINT_STATE \
- do { \
- Int z; \
- for (z =3D 0; z < n_state; z++) { \
- vex_printf(" "); \
- (*ppReg)(state[z].rreg); \
- vex_printf("\t "); \
- switch (state[z].disp) { \
- case Free: vex_printf("Free\n"); break; \
- case Unavail: vex_printf("Unavail\n"); break; \
- case Bound: vex_printf("BoundTo "); \
- (*ppReg)(state[z].vreg); \
- vex_printf("\n"); break; \
- } \
- } \
+# define PRINT_STATE \
+ do { \
+ Int z, q; \
+ for (z =3D 0; z < n_rregs; z++) { \
+ vex_printf(" rreg_state[%2d] =3D ", z); \
+ (*ppReg)(rreg_state[z].rreg); \
+ vex_printf(" \t"); \
+ switch (rreg_state[z].disp) { \
+ case Free: vex_printf("Free\n"); break; \
+ case Unavail: vex_printf("Unavail\n"); break; \
+ case Bound: vex_printf("BoundTo "); \
+ (*ppReg)(rreg_state[z].vreg); \
+ vex_printf("\n"); break; \
+ } \
+ } \
+ vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \
+ q =3D 0; \
+ for (z =3D 0; z < n_vregs; z++) { \
+ if (vreg_state[z] =3D=3D INVALID_RREG_NO) \
+ continue; \
+ vex_printf("[%d] -> %d ", z, vreg_state[z]); \
+ q++; \
+ if (q > 0 && (q % 6) =3D=3D 0) \
+ vex_printf("\n "); \
+ } \
+ vex_printf("\n"); \
} while (0)
=20
=20
- /* --------- Stage 0: set up output array. --------- */
+ /* --------- Stage 0: set up output array --------- */
+ /* --------- and allocate/initialise running state. --------- */
+
instrs_out =3D newHInstrArray();
=20
/* ... and initialise running state. */
- /* n_state is no more than a short name for n_available_real_regs. */
- n_state =3D n_available_real_regs;
- state =3D LibVEX_Alloc(n_available_real_regs * sizeof(RRegState));
+ /* n_rregs is no more than a short name for n_available_real_regs. */
+ n_rregs =3D n_available_real_regs;
+ n_vregs =3D instrs_in->n_vregs;
=20
- for (j =3D 0; j < n_state; j++) {
- state[j].rreg =3D available_real_regs[j];
- state[j].has_hlrs =3D False;
- state[j].disp =3D Free;
- state[j].vreg =3D INVALID_HREG;
- state[j].is_spill_cand =3D False;
+ /* If this is not so, vreg_state entries will overflow. */
+ vassert(n_vregs < 32767);
+
+ rreg_state =3D LibVEX_Alloc(n_rregs * sizeof(RRegState));
+ vreg_state =3D LibVEX_Alloc(n_vregs * sizeof(Short));
+
+ for (j =3D 0; j < n_rregs; j++) {
+ rreg_state[j].rreg =3D available_real_regs[j];
+ rreg_state[j].has_hlrs =3D False;
+ rreg_state[j].disp =3D Free;
+ rreg_state[j].vreg =3D INVALID_HREG;
+ rreg_state[j].is_spill_cand =3D False;
}
=20
+ for (j =3D 0; j < n_vregs; j++)
+ vreg_state[j] =3D INVALID_RREG_NO;
+
+
/* --------- Stage 1: compute vreg live ranges. --------- */
/* --------- Stage 2: compute rreg live ranges. --------- */
=20
@@ -342,15 +396,14 @@
/* This is relatively simple, because (1) we only seek the complete
end-to-end live range of each vreg, and are not interested in
any holes in it, and (2) the vregs are conveniently numbered 0
- .. n_vreg_lrs-1, so we can just dump the results in a
+ .. n_vregs-1, so we can just dump the results in a
pre-allocated array. */
=20
- n_vreg_lrs =3D instrs_in->n_vregs;
vreg_lrs =3D NULL;
- if (n_vreg_lrs > 0)
- vreg_lrs =3D LibVEX_Alloc(sizeof(VRegLR) * n_vreg_lrs);
+ if (n_vregs > 0)
+ vreg_lrs =3D LibVEX_Alloc(sizeof(VRegLR) * n_vregs);
=20
- for (j =3D 0; j < n_vreg_lrs; j++) {
+ for (j =3D 0; j < n_vregs; j++) {
vreg_lrs[j].live_after =3D INVALID_INSTRNO;
vreg_lrs[j].dead_before =3D INVALID_INSTRNO;
vreg_lrs[j].spill_offset =3D 0;
@@ -406,11 +459,11 @@
if (!hregIsVirtual(vreg))
continue;
k =3D hregNumber(vreg);
- if (k < 0 || k >=3D n_vreg_lrs) {
+ if (k < 0 || k >=3D n_vregs) {
vex_printf("\n");
(*ppInstr)(instrs_in->arr[ii]);
vex_printf("\n");
- vex_printf("vreg %d, n_vreg_lrs %d\n", k, n_vreg_lrs);
+ vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
vpanic("doRegisterAllocation: out-of-range vreg");
}
=20
@@ -572,28 +625,28 @@
=20
/* Compute summary hints for choosing real regs. If a real reg is
involved in a hard live range, record that fact in the fixed
- part of the running state. Later, when offered a choice between
+ part of the running rreg_state. Later, when offered a choice betw=
een
rregs, it's better to choose one which is not marked as having
any HLRs, since ones with HLRs may need to be spilled around
their HLRs. Correctness of final assignment is unaffected by
- this mechanism -- it is an optimisation only. */
+ this mechanism -- it is only an optimisation. */
=20
for (j =3D 0; j < rreg_lrs_used; j++) {
rreg =3D rreg_lrs[j].rreg;
vassert(!hregIsVirtual(rreg));
/* rreg is involved in a HLR. Record this info in the array, if
there is space. */
- for (k =3D 0; k < n_state; k++)
- if (state[k].rreg =3D=3D rreg)
+ for (k =3D 0; k < n_rregs; k++)
+ if (rreg_state[k].rreg =3D=3D rreg)
break;
- vassert(k < n_state); /* else rreg was not found in state?! */
- state[k].has_hlrs =3D True;
+ vassert(k < n_rregs); /* else rreg was not found in rreg_state?! *=
/
+ rreg_state[k].has_hlrs =3D True;
}
if (0) {
- for (j =3D 0; j < n_state; j++) {
- if (!state[j].has_hlrs)
+ for (j =3D 0; j < n_rregs; j++) {
+ if (!rreg_state[j].has_hlrs)
continue;
- ppReg(state[j].rreg);
+ ppReg(rreg_state[j].rreg);
vex_printf(" hinted\n");
}
}
@@ -601,7 +654,7 @@
/* ------ end of FINALISE RREG LIVE RANGES ------ */
=20
# if DEBUG_REGALLOC
- for (j =3D 0; j < n_vreg_lrs; j++) {
+ for (j =3D 0; j < n_vregs; j++) {
vex_printf("vreg %d: la =3D %d, db =3D %d\n",=20
j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
}
@@ -629,7 +682,7 @@
for (j =3D 0; j < N_SPILL64S; j++)
ss_busy_until_before[j] =3D 0;
=20
- for (j =3D 0; j < n_vreg_lrs; j++) {
+ for (j =3D 0; j < n_vregs; j++) {
=20
/* True iff this vreg is unused. In which case we also expect
that the reg_class field for it has not been set. */
@@ -689,7 +742,7 @@
=20
# if 0
vex_printf("\n\n");
- for (j =3D 0; j < n_vreg_lrs; j++)
+ for (j =3D 0; j < n_vregs; j++)
vex_printf("vreg %d --> spill offset %d\n",
j, vreg_lrs[j].spill_offset);
# endif
@@ -730,71 +783,100 @@
=20
/* ------------ Sanity checks ------------ */
=20
- /* Sanity check 1: all rregs with a hard live range crossing
- this insn must be marked as unavailable in the running
- state. */
- for (j =3D 0; j < rreg_lrs_used; j++) {
- if (rreg_lrs[j].live_after < ii=20
- && ii < rreg_lrs[j].dead_before) {
- /* ii is the middle of a hard live range for some real reg.
- Check it's marked as such in the running state. */
+ /* Sanity checks are expensive. So they are done only once
+ every 7 instructions, and just before the last
+ instruction. */
+ do_sanity_check
+ =3D toBool(
+ False /* Set to True for sanity checking of all insns. */
+ || ii =3D=3D instrs_in->arr_used-1
+ || (ii > 0 && (ii % 7) =3D=3D 0)
+ );
=20
-# if 0
- vex_printf("considering la %d .. db %d reg =3D ",=20
- rreg_lrs[j].live_after,=20
- rreg_lrs[j].dead_before);
- (*ppReg)(rreg_lrs[j].rreg);
- vex_printf("\n");
-# endif
+ if (do_sanity_check) {
=20
- /* find the state entry for this rreg */
- for (k =3D 0; k < n_state; k++)
- if (state[k].rreg =3D=3D rreg_lrs[j].rreg)
+ /* Sanity check 1: all rregs with a hard live range crossing
+ this insn must be marked as unavailable in the running
+ state. */
+ for (j =3D 0; j < rreg_lrs_used; j++) {
+ if (rreg_lrs[j].live_after < ii=20
+ && ii < rreg_lrs[j].dead_before) {
+ /* ii is the middle of a hard live range for some real
+ reg. Check it's marked as such in the running
+ state. */
+
+# if 0
+ vex_printf("considering la %d .. db %d reg =3D ",=20
+ rreg_lrs[j].live_after,=20
+ rreg_lrs[j].dead_before);
+ (*ppReg)(rreg_lrs[j].rreg);
+ vex_printf("\n");
+# endif
+
+ /* find the state entry for this rreg */
+ for (k =3D 0; k < n_rregs; k++)
+ if (rreg_state[k].rreg =3D=3D rreg_lrs[j].rreg)
+ break;
+
+ /* and assert that this rreg is marked as unavailable */
+ vassert(rreg_state[k].disp =3D=3D Unavail);
+ }
+ }
+
+ /* Sanity check 2: conversely, all rregs marked as
+ unavailable in the running rreg_state must have a
+ corresponding hard live range entry in the rreg_lrs
+ array. */
+ for (j =3D 0; j < n_available_real_regs; j++) {
+ vassert(rreg_state[j].disp =3D=3D Bound
+ || rreg_state[j].disp =3D=3D Free
+ || rreg_state[j].disp =3D=3D Unavail);
+ if (rreg_state[j].disp !=3D Unavail)
+ continue;
+ for (k =3D 0; k < rreg_lrs_used; k++)=20
+ if (rreg_lrs[k].rreg =3D=3D rreg_state[j].rreg
+ && rreg_lrs[k].live_after < ii=20
+ && ii < rreg_lrs[k].dead_before)=20
break;
+ /* If this vassertion fails, we couldn't find a
+ corresponding HLR. */
+ vassert(k < rreg_lrs_used);
+ }
=20
- /* and assert that this rreg is marked as unavailable */
- vassert(state[k].disp =3D=3D Unavail);
+ /* Sanity check 3: all vreg-rreg bindings must bind registers
+ of the same class. */
+ for (j =3D 0; j < n_rregs; j++) {
+ if (rreg_state[j].disp !=3D Bound)
+ continue;
+ vassert(hregClass(rreg_state[j].rreg)=20
+ =3D=3D hregClass(rreg_state[j].vreg));
+ vassert( hregIsVirtual(rreg_state[j].vreg));
+ vassert(!hregIsVirtual(rreg_state[j].rreg));
}
- }
=20
- /* Sanity check 2: conversely, all rregs marked as unavailable in
- the running state must have a corresponding hard live range
- entry in the rreg_lrs array. */
- for (j =3D 0; j < n_available_real_regs; j++) {
- vassert(state[j].disp =3D=3D Bound
- || state[j].disp =3D=3D Free
- || state[j].disp =3D=3D Unavail);
- if (state[j].disp !=3D Unavail)
- continue;
- for (k =3D 0; k < rreg_lrs_used; k++)=20
- if (rreg_lrs[k].rreg =3D=3D state[j].rreg
- && rreg_lrs[k].live_after < ii=20
- && ii < rreg_lrs[k].dead_before)=20
- break;
- /* If this vassertion fails, we couldn't find a corresponding
- HLR. */
- vassert(k < rreg_lrs_used);
- }
+ /* Sanity check 4: the vreg_state and rreg_state
+ mutually-redundant mappings are consistent. If
+ rreg_state[j].vreg points at some vreg_state entry then
+ that vreg_state entry should point back at
+ rreg_state[j]. */
+ for (j =3D 0; j < n_rregs; j++) {
+ if (rreg_state[j].disp !=3D Bound)
+ continue;
+ k =3D hregNumber(rreg_state[j].vreg);
+ vassert(IS_VALID_VREGNO(k));
+ vassert(vreg_state[k] =3D=3D j);
+ }
+ for (j =3D 0; j < n_vregs; j++) {
+ k =3D vreg_state[j];
+ if (k =3D=3D INVALID_RREG_NO)
+ continue;
+ vassert(IS_VALID_RREGNO(k));
+ vassert(rreg_state[k].disp =3D=3D Bound);
+ vassert(hregNumber(rreg_state[k].vreg) =3D=3D j);
+ }
=20
- /* Sanity check 3: No vreg is bound to more than one rreg. */
- for (j =3D 0; j < n_state; j++) {
- if (state[j].disp !=3D Bound)
- continue;
- for (k =3D j+1; k < n_state; k++)
- if (state[k].disp =3D=3D Bound)
- vassert(state[k].vreg !=3D state[j].vreg);
- }
+ } /* if (do_sanity_check) */
=20
- /* Sanity check 4: all vreg-rreg bindings must bind registers of
- the same class. */
- for (j =3D 0; j < n_state; j++) {
- if (state[j].disp !=3D Bound)
- continue;
- vassert(hregClass(state[j].rreg) =3D=3D hregClass(state[j].vreg=
));
- vassert( hregIsVirtual(state[j].vreg));
- vassert(!hregIsVirtual(state[j].rreg));
- }
-
/* ------------ end of Sanity checks ------------ */
=20
/* Do various optimisations pertaining to register coalescing
@@ -812,8 +894,8 @@
vassert(hregClass(vregS) =3D=3D hregClass(vregD));
k =3D hregNumber(vregS);
m =3D hregNumber(vregD);
- vassert(k >=3D 0 && k < n_vreg_lrs);
- vassert(m >=3D 0 && m < n_vreg_lrs);
+ vassert(IS_VALID_VREGNO(k));
+ vassert(IS_VALID_VREGNO(m));
if (vreg_lrs[k].dead_before !=3D ii + 1) goto cannot_coalesce;
if (vreg_lrs[m].live_after !=3D ii) goto cannot_coalesce;
# if DEBUG_REGALLOC
@@ -824,10 +906,10 @@
vex_printf("\n\n");
# endif
/* Find the state entry for vregS. */
- for (m =3D 0; m < n_state; m++)
- if (state[m].disp =3D=3D Bound && state[m].vreg =3D=3D vregS=
)
+ for (m =3D 0; m < n_rregs; m++)
+ if (rreg_state[m].disp =3D=3D Bound && rreg_state[m].vreg =3D=
=3D vregS)
break;
- if (m =3D=3D n_state)
+ if (m =3D=3D n_rregs)
/* We failed to find a binding for vregS, which means it's
currently not in a register. So we can't do the
coalescing. Give up. */
@@ -835,7 +917,11 @@
=20
/* Finally, we can do the coalescing. It's trivial -- merely
claim vregS's register for vregD. */
- state[m].vreg =3D vregD;
+ rreg_state[m].vreg =3D vregD;
+ vassert(IS_VALID_VREGNO(hregNumber(vregD)));
+ vassert(IS_VALID_VREGNO(hregNumber(vregS)));
+ vreg_state[hregNumber(vregD)] =3D m;
+ vreg_state[hregNumber(vregS)] =3D INVALID_RREG_NO;
=20
/* Move on to the next insn. We skip the post-insn stuff for
fixed registers, since this move should not interact with
@@ -849,16 +935,19 @@
/* Look for vregs whose live range has just ended, and=20
mark the associated rreg as free. */
=20
- for (j =3D 0; j < n_state; j++) {
- if (state[j].disp !=3D Bound)
+ for (j =3D 0; j < n_rregs; j++) {
+ if (rreg_state[j].disp !=3D Bound)
continue;
- vreg =3D hregNumber(state[j].vreg);
- vassert(vreg >=3D 0 && vreg < n_vreg_lrs);
+ vreg =3D hregNumber(rreg_state[j].vreg);
+ vassert(IS_VALID_VREGNO(vreg));
if (vreg_lrs[vreg].dead_before <=3D ii) {
- state[j].disp =3D Free;
+ rreg_state[j].disp =3D Free;
+ m =3D hregNumber(rreg_state[j].vreg);
+ vassert(IS_VALID_VREGNO(m));
+ vreg_state[m] =3D INVALID_RREG_NO;
if (DEBUG_REGALLOC) {
vex_printf("free up ");=20
- (*ppReg)(state[j].rreg);=20
+ (*ppReg)(rreg_state[j].rreg);=20
vex_printf("\n");
}
}
@@ -881,7 +970,7 @@
for (j =3D 0; j < rreg_lrs_used; j++) {
if (rreg_lrs[j].live_after =3D=3D ii) {
/* rreg_lrs[j].rreg needs to be freed up. Find=20
- the associated state entry. */
+ the associated rreg_state entry. */
/* Note, re rreg_lrs[j].live_after =3D=3D ii. Real register
live ranges are guaranteed to be well-formed in that
they start with a write to the register -- Stage 2
@@ -894,25 +983,26 @@
(*ppReg)(rreg_lrs[j].rreg);
vex_printf("\n\n");
# endif
- for (k =3D 0; k < n_state; k++)
- if (state[k].rreg =3D=3D rreg_lrs[j].rreg)
+ for (k =3D 0; k < n_rregs; k++)
+ if (rreg_state[k].rreg =3D=3D rreg_lrs[j].rreg)
break;
/* If this fails, we don't have an entry for this rreg.
Which we should. */
- vassert(k < n_state);
- if (state[k].disp =3D=3D Bound) {
+ vassert(IS_VALID_RREGNO(k));
+ m =3D hregNumber(rreg_state[k].vreg);
+ if (rreg_state[k].disp =3D=3D Bound) {
/* Yes, there is an associated vreg. Spill it if it's
still live. */
- m =3D hregNumber(state[k].vreg);
- vassert(m >=3D 0 && m < n_vreg_lrs);
+ vassert(IS_VALID_VREGNO(m));
+ vreg_state[m] =3D INVALID_RREG_NO;
if (vreg_lrs[m].dead_before > ii) {
vassert(vreg_lrs[m].reg_class !=3D HRcINVALID);
- EMIT_INSTR( (*genSpill)( state[k].rreg,
+ EMIT_INSTR( (*genSpill)( rreg_state[k].rreg,
vreg_lrs[m].spill_offset ) );
}
}
- state[k].disp =3D Unavail;
- state[k].vreg =3D INVALID_HREG;
+ rreg_state[k].disp =3D Unavail;
+ rreg_state[k].vreg =3D INVALID_HREG;
}
}
=20
@@ -952,12 +1042,15 @@
/* Now we're trying to find a rreg for "vreg". First of all,
if it already has an rreg assigned, we don't need to do
anything more. Search the current state to find out. */
- for (k =3D 0; k < n_state; k++)
- if (state[k].vreg =3D=3D vreg && state[k].disp =3D=3D Bound)
- break;
- if (k < n_state) {
- addToHRegRemap(&remap, vreg, state[k].rreg);
+ m =3D hregNumber(vreg);
+ vassert(IS_VALID_VREGNO(m));
+ k =3D vreg_state[m];
+ if (IS_VALID_RREGNO(k)) {
+ vassert(rreg_state[k].disp =3D=3D Bound);
+ addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
continue;
+ } else {
+ vassert(k =3D=3D INVALID_RREG_NO);
}
=20
/* No luck. The next thing to do is see if there is a
@@ -966,11 +1059,11 @@
rreg for which the next live-range event is as far ahead
as possible. */
k_suboptimal =3D -1;
- for (k =3D 0; k < n_state; k++) {
- if (state[k].disp !=3D Free
- || hregClass(state[k].rreg) !=3D hregClass(vreg))
+ for (k =3D 0; k < n_rregs; k++) {
+ if (rreg_state[k].disp !=3D Free
+ || hregClass(rreg_state[k].rreg) !=3D hregClass(vreg))
continue;
- if (state[k].has_hlrs) {
+ if (rreg_state[k].has_hlrs) {
/* Well, at least we can use k_suboptimal if we really
have to. Keep on looking for a better candidate. */
k_suboptimal =3D k;
@@ -983,16 +1076,17 @@
if (k_suboptimal >=3D 0)
k =3D k_suboptimal;
=20
- if (k < n_state) {
- state[k].disp =3D Bound;
- state[k].vreg =3D vreg;
- addToHRegRemap(&remap, vreg, state[k].rreg);
+ if (k < n_rregs) {
+ rreg_state[k].disp =3D Bound;
+ rreg_state[k].vreg =3D vreg;
+ m =3D hregNumber(vreg);
+ vassert(IS_VALID_VREGNO(m));
+ vreg_state[m] =3D k;
+ addToHRegRemap(&remap, vreg, rreg_state[k].rreg);
/* Generate a reload if needed. */
if (reg_usage.mode[j] !=3D HRmWrite) {
- m =3D hregNumber(vreg);
- vassert(m >=3D 0 && m < n_vreg_lrs);
vassert(vreg_lrs[m].reg_class !=3D HRcINVALID);
- EMIT_INSTR( (*genReload)( state[k].rreg,
+ EMIT_INSTR( (*genReload)( rreg_state[k].rreg,
vreg_lrs[m].spill_offset ) );
}
continue;
@@ -1003,32 +1097,32 @@
course we need to be careful not to spill a vreg which is
needed by this insn. */
=20
- /* First, mark in the state, those rregs which are not spill
+ /* First, mark in the rreg_state, those rregs which are not spi=
ll
candidates, due to holding a vreg mentioned by this
instruction. Or being of the wrong class. */
- for (k =3D 0; k < n_state; k++) {
- state[k].is_spill_cand =3D False;
- if (state[k].disp !=3D Bound)
+ for (k =3D 0; k < n_rregs; k++) {
+ rreg_state[k].is_spill_cand =3D False;
+ if (rreg_state[k].disp !=3D Bound)
continue;
- if (hregClass(state[k].rreg) !=3D hregClass(vreg))
+ if (hregClass(rreg_state[k].rreg) !=3D hregClass(vreg))
continue;
- state[k].is_spill_cand =3D True;
+ rreg_state[k].is_spill_cand =3D True;
for (m =3D 0; m < reg_usage.n_used; m++) {
- if (state[k].vreg =3D=3D reg_usage.hreg[m]) {
- state[k].is_spill_cand =3D False;
+ if (rreg_state[k].vreg =3D=3D reg_usage.hreg[m]) {
+ rreg_state[k].is_spill_cand =3D False;
break;
}
}
}
=20
/* We can choose to spill any rreg satisfying
- state[r].is_spill_cand (so to speak). Choose r so that
+ rreg_state[r].is_spill_cand (so to speak). Choose r so that
the next use of its associated vreg is as far ahead as
possible, in the hope that this will minimise the number
of consequent reloads required. */
spillee
=3D findMostDistantlyMentionedVReg (=20
- getRegUsage, instrs_in, ii+1, state, n_state );
+ getRegUsage, instrs_in, ii+1, rreg_state, n_rregs );
=20
if (spillee =3D=3D -1) {
/* Hmmmmm. There don't appear to be any spill candidates.
@@ -1039,48 +1133,51 @@
vpanic("reg_alloc: can't create a free register.");
}
=20
- /* Right. So we're going to spill state[spillee]. */
- vassert(spillee >=3D 0 && spillee < n_state);
- vassert(state[spillee].disp =3D=3D Bound);
+ /* Right. So we're going to spill rreg_state[spillee]. */
+ vassert(IS_VALID_RREGNO(spillee));
+ vassert(rreg_state[spillee].disp =3D=3D Bound);
/* check it's the right class */
- vassert(hregClass(state[spillee].rreg) =3D=3D hregClass(vreg));
+ vassert(hregClass(rreg_state[spillee].rreg) =3D=3D hregClass(vr=
eg));
/* check we're not ejecting the vreg for which we are trying
to free up a register. */
- vassert(state[spillee].vreg !=3D vreg);
+ vassert(rreg_state[spillee].vreg !=3D vreg);
=20
- m =3D hregNumber(state[spillee].vreg);
- vassert(m >=3D 0 && m < n_vreg_lrs);
+ m =3D hregNumber(rreg_state[spillee].vreg);
+ vassert(IS_VALID_VREGNO(m));
=20
/* So here's the spill store. Assert that we're spilling a
live vreg. */
vassert(vreg_lrs[m].dead_before > ii);
vassert(vreg_lrs[m].reg_class !=3D HRcINVALID);
- EMIT_INSTR( (*genSpill)( state[spillee].rreg,
+ EMIT_INSTR( (*genSpill)( rreg_state[spillee].rreg,
vreg_lrs[m].spill_offset ) );
=20
- /* Update the state to reflect the new assignment for this
+ /* Update the rreg_state to reflect the new assignment for this
rreg. */
- state[spillee].vreg =3D vreg;
+ rreg_state[spillee].vreg =3D vreg;
+ vreg_state[m] =3D INVALID_RREG_NO;
=20
+ m =3D hregNumber(vreg);
+ vassert(IS_VALID_VREGNO(m));
+ vreg_state[m] =3D spillee;
+
/* Now, if this vreg is being read or modified (as opposed to
written), we have to generate a reload for it. */
if (reg_usage.mode[j] !=3D HRmWrite) {
- m =3D hregNumber(vreg);
- vassert(m >=3D 0 && m < n_vreg_lrs);
vassert(vreg_lrs[m].reg_class !=3D HRcINVALID);
- EMIT_INSTR( (*genReload)( state[spillee].rreg,
+ EMIT_INSTR( (*genReload)( rreg_state[spillee].rreg,
vreg_lrs[m].spill_offset ) );
}
=20
/* So after much twisting and turning, we have vreg mapped to
- state[furthest_k].rreg. Note that in the map. */
- addToHRegRemap(&remap, vreg, state[spillee].rreg);
+ rreg_state[furthest_k].rreg. Note that in the map. */
+ addToHRegRemap(&remap, vreg, rreg_state[spillee].rreg);
=20
} /* iterate over registers in this instruction. */
=20
/* We've finished clowning around with registers in this instructi=
on.
Three results:
- - the running state[] has been updated
+ - the running rreg_state[] has been updated
- a suitable vreg->rreg mapping for this instruction has been=20
constructed
- spill and reload instructions may have been emitted.
@@ -1106,16 +1203,16 @@
for (j =3D 0; j < rreg_lrs_used; j++) {
if (rreg_lrs[j].dead_before =3D=3D ii+1) {
/* rreg_lrs[j].rreg is exiting a hard live range. Mark
- it as such in the main state array. */
- for (k =3D 0; k < n_state; k++)
- if (state[k].rreg =3D=3D rreg_lrs[j].rreg)
+ it as such in the main rreg_state array. */
+ for (k =3D 0; k < n_rregs; k++)
+ if (rreg_state[k].rreg =3D=3D rreg_lrs[j].rreg)
break;
/* If this vassertion fails, we don't have an entry for
this rreg. Which we should. */
- vassert(k < n_state);
- vassert(state[k].disp =3D=3D Unavail);
- state[k].disp =3D Free;
- state[k].vreg =3D INVALID_HREG;
+ vassert(k < n_rregs);
+ vassert(rreg_state[k].disp =3D=3D Unavail);
+ rreg_state[k].disp =3D Free;
+ rreg_state[k].vreg =3D INVALID_HREG;
}
}
=20
@@ -1129,13 +1226,13 @@
=20
/* ------ END: Process each insn in turn. ------ */
=20
- /* free(state); */
+ /* free(rreg_state); */
/* free(rreg_lrs); */
/* if (vreg_lrs) free(vreg_lrs); */
=20
/* Paranoia */
- for (j =3D 0; j < n_state; j++)
- vassert(state[j].rreg =3D=3D available_real_regs[j]);
+ for (j =3D 0; j < n_rregs; j++)
+ vassert(rreg_state[j].rreg =3D=3D available_real_regs[j]);
=20
return instrs_out;
=20
|