Author: sewardj
Date: Sun Apr 12 10:23:58 2015
New Revision: 3126
Log:
Tidyups, no functional change:
* Create RRegSets for caller-saved and callee-saved registers on
amd64, so as to create a single point of reference for that info.
Plumb to use sites.
* Pull out and abstractify logic to compute the set of registers
to spill around NCode calls (calcRegistersToPreserveAroundNCodeCall)
so it becomes arch neutral and move it to host_generic_regs.c.
* fix stupid error in RRegSet__fromVec
Modified:
branches/NCODE/priv/host_amd64_defs.c
branches/NCODE/priv/host_amd64_defs.h
branches/NCODE/priv/host_amd64_isel.c
branches/NCODE/priv/host_generic_regs.c
branches/NCODE/priv/host_generic_regs.h
branches/NCODE/priv/main_main.c
branches/NCODE/priv/main_util.h
Modified: branches/NCODE/priv/host_amd64_defs.c
==============================================================================
--- branches/NCODE/priv/host_amd64_defs.c (original)
+++ branches/NCODE/priv/host_amd64_defs.c Sun Apr 12 10:23:58 2015
@@ -101,6 +101,75 @@
}
+/* Returns the registers in the AMD64 universe that are caller saved.
+ This is really ABI dependent, but we ignore that detail here. */
+static const RRegSet* getRRegsCallerSaved_AMD64 ( void )
+{
+ /* In theory gcc should be able to fold this into a single 64 bit
+ constant (bitset). But that's a bit risky, so instead do
+ thread-unsafe lazy initialisation (sigh). */
+ static RRegSet callerSavedRegs;
+ static Bool callerSavedRegs_initted = False;
+
+ if (LIKELY(callerSavedRegs_initted))
+ return &callerSavedRegs;
+
+ RRegSet__init(&callerSavedRegs, getRRegUniverse_AMD64());
+
+ RRegSet__add(&callerSavedRegs, hregAMD64_RAX());
+ RRegSet__add(&callerSavedRegs, hregAMD64_RCX());
+ RRegSet__add(&callerSavedRegs, hregAMD64_RDX());
+ RRegSet__add(&callerSavedRegs, hregAMD64_RSI());
+ RRegSet__add(&callerSavedRegs, hregAMD64_RDI());
+ RRegSet__add(&callerSavedRegs, hregAMD64_R8());
+ RRegSet__add(&callerSavedRegs, hregAMD64_R9());
+ RRegSet__add(&callerSavedRegs, hregAMD64_R10());
+ RRegSet__add(&callerSavedRegs, hregAMD64_R11());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM0());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM1());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM3());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM4());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM5());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM6());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM7());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM8());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM9());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM10());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM11());
+ RRegSet__add(&callerSavedRegs, hregAMD64_XMM12());
+
+ callerSavedRegs_initted = True;
+ return &callerSavedRegs;
+}
+
+
+/* Returns the registers in the AMD64 universe that are callee saved.
+ This is really ABI dependent, but we ignore that detail here. */
+static const RRegSet* getRRegsCalleeSaved_AMD64 ( void )
+{
+ /* In theory gcc should be able to fold this into a single 64 bit
+ constant (bitset). But that's a bit risky, so instead do
+ thread-unsafe lazy initialisation (sigh). */
+ static RRegSet calleeSavedRegs;
+ static Bool calleeSavedRegs_initted = False;
+
+ if (LIKELY(calleeSavedRegs_initted))
+ return &calleeSavedRegs;
+
+ RRegSet__init(&calleeSavedRegs, getRRegUniverse_AMD64());
+
+ RRegSet__add(&calleeSavedRegs, hregAMD64_RBX());
+ RRegSet__add(&calleeSavedRegs, hregAMD64_RBP());
+ RRegSet__add(&calleeSavedRegs, hregAMD64_R12());
+ RRegSet__add(&calleeSavedRegs, hregAMD64_R13());
+ RRegSet__add(&calleeSavedRegs, hregAMD64_R14());
+ RRegSet__add(&calleeSavedRegs, hregAMD64_R15());
+
+ calleeSavedRegs_initted = True;
+ return &calleeSavedRegs;
+}
+
+
void ppHRegAMD64 ( HReg reg )
{
Int r;
@@ -1548,31 +1617,9 @@
/* This is a bit subtle. */
/* First off, claim it trashes all the caller-saved regs
which fall within the register allocator's jurisdiction.
- These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11
- and all the xmm registers.
+ These I believe to be: rsi rdi r8 r9 r10 xmm3..xmm12.
*/
- addHRegUse(u, HRmWrite, hregAMD64_RAX());
- addHRegUse(u, HRmWrite, hregAMD64_RCX());
- addHRegUse(u, HRmWrite, hregAMD64_RDX());
- addHRegUse(u, HRmWrite, hregAMD64_RSI());
- addHRegUse(u, HRmWrite, hregAMD64_RDI());
- addHRegUse(u, HRmWrite, hregAMD64_R8());
- addHRegUse(u, HRmWrite, hregAMD64_R9());
- addHRegUse(u, HRmWrite, hregAMD64_R10());
- addHRegUse(u, HRmWrite, hregAMD64_R11());
- addHRegUse(u, HRmWrite, hregAMD64_XMM0());
- addHRegUse(u, HRmWrite, hregAMD64_XMM1());
- addHRegUse(u, HRmWrite, hregAMD64_XMM3());
- addHRegUse(u, HRmWrite, hregAMD64_XMM4());
- addHRegUse(u, HRmWrite, hregAMD64_XMM5());
- addHRegUse(u, HRmWrite, hregAMD64_XMM6());
- addHRegUse(u, HRmWrite, hregAMD64_XMM7());
- addHRegUse(u, HRmWrite, hregAMD64_XMM8());
- addHRegUse(u, HRmWrite, hregAMD64_XMM9());
- addHRegUse(u, HRmWrite, hregAMD64_XMM10());
- addHRegUse(u, HRmWrite, hregAMD64_XMM11());
- addHRegUse(u, HRmWrite, hregAMD64_XMM12());
-
+ addHRegUse_from_RRegSet(u, HRmWrite, getRRegsCallerSaved_AMD64());
/* Now we have to state any parameter-carrying registers
which might be read. This depends on the regparmness. */
switch (i->Ain.Call.regparms) {
@@ -3981,251 +4028,9 @@
so it's already out of commission as far as regalloc is concerned.
So we can safely use it here, when needed. */
-/* A handy structure to hold the register environment. */
-typedef
- struct {
- UInt nRegsR;
- const HReg* regsR;
- UInt nRegsA;
- const HReg* regsA;
- UInt nRegsS;
- const HReg* regsS;
- }
- NRegMap;
-
-/* fwds */
-static void emit_AMD64NInstr ( /*MOD*/AssemblyBuffer* ab,
- /*MOD*/RelocationBuffer* rb,
- const NInstr* ni,
- const NRegMap* nregMap,
- const RRegSet* rrLiveAfter,
- /* for debug printing only */
- Bool verbose, NLabel niLabel );
-
-static UInt hregVecLen ( const HReg* vec )
-{
- UInt i;
- for (i = 0; !hregIsInvalid(vec[i]); i++)
- ;
- return i;
-}
-
-/* Generate the AMD64 NCode instruction |hi| into |ab_hot| and
- |ab_cold|. This can only handle NCode blocks. All other AMD64
- instructions are to be handled by emit_AMD64Instr. This is
- required to generate <= 1024 bytes of code. Returns True if OK,
- False if not enough buffer space. */
-
-Bool emit_AMD64NCode ( /*MOD*/AssemblyBuffer* ab_hot,
- /*MOD*/AssemblyBuffer* ab_cold,
- /*MOD*/RelocationBuffer* rb,
- const AMD64Instr* hi,
- Bool mode64, VexEndness endness_host,
- Bool verbose )
-{
- vassert(mode64 == True);
- vassert(endness_host == VexEndnessLE);
- vassert(hi->tag == Ain_NCode);
-
- const AMD64InstrNCode* hi_details = hi->Ain.NCode.details;
- const NCodeTemplate* tmpl = hi_details->tmpl;
- const RRegSet* rregsLiveAfter = hi_details->rrLiveAfter;
- const RRegUniverse* univ = RRegSet__getUniverse(rregsLiveAfter);
-
- NRegMap nregMap;
- nregMap.regsR = hi_details->regsR;
- nregMap.regsA = hi_details->regsA;
- nregMap.regsS = hi_details->regsS;
- nregMap.nRegsR = tmpl->nres;
- nregMap.nRegsA = tmpl->narg;
- nregMap.nRegsS = tmpl->nscr;
-
- vassert(hregVecLen(nregMap.regsR) == nregMap.nRegsR);
- vassert(hregVecLen(nregMap.regsA) == nregMap.nRegsA);
- vassert(hregVecLen(nregMap.regsS) == nregMap.nRegsS);
-
- if (AssemblyBuffer__getRemainingSize(ab_hot) < 1024)
- return False;
- if (AssemblyBuffer__getRemainingSize(ab_cold) < 1024)
- return False;
- if (RelocationBuffer__getRemainingSize(rb) < 128)
- return False;
-
- /* Count how many hot and cold instructions (NInstrs) the template
- has, since we'll need to allocate temporary arrays to keep track
- of the label offsets. */
- UInt nHot, nCold;
- for (nHot = 0; tmpl->hot[nHot]; nHot++)
- ;
- for (nCold = 0; tmpl->cold[nCold]; nCold++)
- ;
-
- /* Here are our two arrays for tracking the AssemblyBuffer offsets
- of the NCode instructions. */
- UInt i;
- UInt offsetsHot[nHot];
- UInt offsetsCold[nCold];
- for (i = 0; i < nHot; i++) offsetsHot[i] = 0;
- for (i = 0; i < nCold; i++) offsetsCold[i] = 0;
-
- /* We'll be adding entries to the relocation buffer, |rb|, and will
- need to adjust their |dst| fields after generation of the hot
- and cold code. Record therefore where we are in the buffer now,
- so that we can iterate over the new entries later. */
- UInt rb_first = RelocationBuffer__getNext(rb);
-
- /* Generate the hot code */
- for (i = 0; i < nHot; i++) {
- offsetsHot[i] = AssemblyBuffer__getNext(ab_hot);
- NLabel lbl = mkNLabel(Nlz_Hot, i);
- emit_AMD64NInstr(ab_hot, rb, tmpl->hot[i], &nregMap,
- rregsLiveAfter, verbose, lbl);
- }
-
- /* And the cold code */
- for (i = 0; i < nCold; i++) {
- offsetsCold[i] = AssemblyBuffer__getNext(ab_cold);
- NLabel lbl = mkNLabel(Nlz_Cold, i);
- emit_AMD64NInstr(ab_cold, rb, tmpl->cold[i], &nregMap,
- rregsLiveAfter, verbose, lbl);
- }
-
- /* Now visit the new relocation entries. */
- UInt rb_last1 = RelocationBuffer__getNext(rb);
-
- for (i = rb_first; i < rb_last1; i++) {
- Relocation* reloc = &rb->buf[i];
-
- /* Show the reloc before the label-to-offset transformation. */
- if (verbose) {
- vex_printf(" reloc: ");
- ppRelocation(reloc);
- vex_printf("\n");
- }
-
- /* Transform the destination component of |reloc| so that it no
- longer refers to a label but rather to an offset in the hot
- or cold assembly buffer. */
- vassert(!reloc->dst.isOffset);
- reloc->dst.isOffset = True;
-
- if (reloc->dst.zone == Nlz_Hot) {
- vassert(reloc->dst.num < nHot);
- reloc->dst.num = offsetsHot[reloc->dst.num];
- } else {
- vassert(reloc->dst.zone == Nlz_Cold);
- vassert(reloc->dst.num < nCold);
- reloc->dst.num = offsetsCold[reloc->dst.num];
- }
-
- /* Show the reloc after the label-to-offset transformation. */
- if (verbose) {
- vex_printf(" reloc: ");
- ppRelocation(reloc);
- vex_printf("\n");
- }
- }
-
- if (0) {
- HReg r10 = hregAMD64_R10();
- HReg rax = hregAMD64_RAX();
- HReg rbx = hregAMD64_RBX();
- HReg rcx = hregAMD64_RCX();
- HReg rdx = hregAMD64_RDX();
-
- RRegSet* rs = RRegSet__new(univ);
- vex_printf("\n__new\n");
- vex_printf("1: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- vex_printf("\n__add\n");
- RRegSet__add(rs, rbx);
- vex_printf("2: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__add(rs, rdx);
- vex_printf("3: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__add(rs, rcx);
- vex_printf("4: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__add(rs, rcx);
- vex_printf("5: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__add(rs, r10);
- vex_printf("6: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__add(rs, rax);
- vex_printf("7: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- vex_printf("\n__fromVec\n");
- const HReg vec[4] = { rdx, rcx, rbx, rax };
- RRegSet__fromVec(rs, vec, 0);
- vex_printf("8: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__fromVec(rs, vec, 4);
- vex_printf("9: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- vex_printf("\n__del\n");
- RRegSet__del(rs, rcx);
- vex_printf("10: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__del(rs, rcx);
- vex_printf("11: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__del(rs, rbx);
- vex_printf("12: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__del(rs, rax);
- vex_printf("13: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__del(rs, rdx);
- vex_printf("14: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- RRegSet__del(rs, rdx);
- vex_printf("15: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
-
- vex_printf("\n__plus\n");
- RRegSet* rs2 = RRegSet__new(univ);
- RRegSet__add(rs, r10); RRegSet__add(rs, rax);
- RRegSet__add(rs2, rbx); RRegSet__add(rs2, rcx); RRegSet__add(rs2, rax);
-
- RRegSet__plus(rs2, rs);
- vex_printf("16a: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
- vex_printf("16b: "); RRegSet__pp(rs2, ppHRegAMD64); vex_printf("\n");
-
- vex_printf("\n__minus\n");
- RRegSet__minus(rs, rs2);
- vex_printf("17: "); RRegSet__pp(rs, ppHRegAMD64); vex_printf("\n");
-
- }
-
- return True;
-}
-
-/* Find the real (hard) register for |r| by looking up in |map|. */
-static HReg mapNReg ( const NRegMap* map, NReg r )
-{
- UInt limit = 0;
- const HReg* arr = NULL;
- switch (r.role) {
- case Nrr_Result: limit = map->nRegsR; arr = map->regsR; break;
- case Nrr_Argument: limit = map->nRegsA; arr = map->regsA; break;
- case Nrr_Scratch: limit = map->nRegsS; arr = map->regsS; break;
- default: vpanic("mapNReg: invalid reg role");
- }
- vassert(r.num < limit);
- return arr[r.num];
-}
-
-/* ***FIXME*** this is an exact copy of the same in host_amd64_isel.c. */
-static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
-{
- vassert(hregClass(src) == HRcInt64);
- vassert(hregClass(dst) == HRcInt64);
- return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
-}
-
-
+/* Emits AMD64 code for a single NInstr |ni| into |ab|, possibly
+ adding relocation information into |rb| too.
+*/
static
void emit_AMD64NInstr ( /*MOD*/AssemblyBuffer* ab,
/*MOD*/RelocationBuffer* rb,
@@ -4298,78 +4103,15 @@
}
case Nin_Call: {
- /* The main difficulty here is to figure out the minimal set
- of registers to save across the call. As far as I can see, the
- set is:
-
- (1) registers live after this NCode block
- (2) + the Arg, Res and Scratch registers for this block
- (3) - Abi_Callee_Saved registers
- (4) - the Arg/Res/Scratch register(s) into which this call
- will place its results
-
- (1) because that's the set of regs that reg-alloc expects to
- not be trashed by the NCode block
- (2) because Arg/Res/Scratch regs can be used freely within the
- NCode block, so we have to keep them alive
- (3) because preserving Callee saved regs is obviously pointless
- (4) because preserving the call's result reg(s) will result in
- the restore sequence overwriting the result of the call
-
- Figuring out (1) is tricky and is something that reg-alloc
- needs to tell us. I think it's safe to start with an
- overestimate of (1) -- for example, all regs available to
- reg-alloc -- and refine it later.
- */
- const RRegUniverse* univ = RRegSet__getUniverse(hregsLiveAfter);
- const RRegSet* set_1 = hregsLiveAfter;
-
- RRegSet* set_2 = RRegSet__new(univ);
- { UInt i;
- for (i = 0; i < nregMap->nRegsR; i++)
- RRegSet__add(set_2, nregMap->regsR[i]);
- for (i = 0; i < nregMap->nRegsA; i++)
- RRegSet__add(set_2, nregMap->regsA[i]);
- for (i = 0; i < nregMap->nRegsS; i++)
- RRegSet__add(set_2, nregMap->regsS[i]);
- }
-
- RRegSet* set_3 = RRegSet__new(univ);
- // callee-saves: rbx rbp r12 r13 r14 r15
- { HReg vec[6];
- vec[0] = hregAMD64_RBX(); vec[1] = hregAMD64_RBP();
- vec[2] = hregAMD64_R12(); vec[3] = hregAMD64_R13();
- vec[4] = hregAMD64_R14(); vec[5] = hregAMD64_R15();
- RRegSet__fromVec(set_3, vec, sizeof(vec)/sizeof(vec[0]));
- }
-
- RRegSet* set_4 = RRegSet__new(univ);
- if (!isNRegINVALID(ni->Nin.Call.resHi))
- RRegSet__add(set_4, mapNReg(nregMap, ni->Nin.Call.resHi));
- if (!isNRegINVALID(ni->Nin.Call.resLo))
- RRegSet__add(set_4, mapNReg(nregMap, ni->Nin.Call.resLo));
-
- RRegSet* to_preserve = RRegSet__new(univ);
- RRegSet__copy(to_preserve, set_1);
- RRegSet__plus(to_preserve, set_2);
- RRegSet__minus(to_preserve, set_3);
- RRegSet__minus(to_preserve, set_4);
-
- if (verbose) {
- vex_printf(" # set1: ");
- RRegSet__pp(set_1, ppHRegAMD64); vex_printf("\n");
- vex_printf(" # set2: ");
- RRegSet__pp(set_2, ppHRegAMD64); vex_printf("\n");
- vex_printf(" # set3: ");
- RRegSet__pp(set_3, ppHRegAMD64); vex_printf("\n");
- vex_printf(" # set4: ");
- RRegSet__pp(set_4, ppHRegAMD64); vex_printf("\n");
- vex_printf(" # pres: ");
- RRegSet__pp(to_preserve, ppHRegAMD64); vex_printf("\n");
- }
+ RRegSet to_preserve;
+ calcRegistersToPreserveAroundNCodeCall(
+ &to_preserve,
+ hregsLiveAfter, getRRegsCalleeSaved_AMD64(), nregMap,
+ ni->Nin.Call.resHi, ni->Nin.Call.resLo
+ );
/* Save live regs */
- UInt n_to_preserve = RRegSet__card(to_preserve);
+ UInt n_to_preserve = RRegSet__card(&to_preserve);
vassert(n_to_preserve < 25); /* stay sane */
/* Figure out how much to move the stack, ensuring any alignment up
@@ -4382,7 +4124,7 @@
}
RRegSetIterator* iter = RRegSetIterator__new();
- RRegSetIterator__init(iter, to_preserve);
+ RRegSetIterator__init(iter, &to_preserve);
UInt slotNo = 0;
while (True) {
HReg r = RRegSetIterator__next(iter);
@@ -4426,7 +4168,7 @@
}
/* Restore live regs */
- RRegSetIterator__init(iter, to_preserve);
+ RRegSetIterator__init(iter, &to_preserve);
slotNo = 0;
while (True) {
HReg r = RRegSetIterator__next(iter);
@@ -4582,6 +4324,127 @@
}
+/* Emits AMD64 code for the complete NCode block |hi| into |ab_hot|
+ and |ab_cold|, possibly adding relocation information to |rb| too.
+ This function can only handle NCode blocks. All other AMD64
+ instructions are to be handled by emit_AMD64Instr. This function
+ is required to generate <= 1024 bytes of code. Returns True if OK,
+ False if not enough buffer space.
+*/
+Bool emit_AMD64NCodeBlock ( /*MOD*/AssemblyBuffer* ab_hot,
+ /*MOD*/AssemblyBuffer* ab_cold,
+ /*MOD*/RelocationBuffer* rb,
+ const AMD64Instr* hi,
+ Bool mode64, VexEndness endness_host,
+ Bool verbose )
+{
+ vassert(mode64 == True);
+ vassert(endness_host == VexEndnessLE);
+ vassert(hi->tag == Ain_NCode);
+
+ const AMD64InstrNCode* hi_details = hi->Ain.NCode.details;
+ const NCodeTemplate* tmpl = hi_details->tmpl;
+ const RRegSet* rregsLiveAfter = hi_details->rrLiveAfter;
+ const RRegUniverse* univ = RRegSet__getUniverse(rregsLiveAfter);
+
+ NRegMap nregMap;
+ nregMap.regsR = hi_details->regsR;
+ nregMap.regsA = hi_details->regsA;
+ nregMap.regsS = hi_details->regsS;
+ nregMap.nRegsR = tmpl->nres;
+ nregMap.nRegsA = tmpl->narg;
+ nregMap.nRegsS = tmpl->nscr;
+
+ vassert(hregVecLen(nregMap.regsR) == nregMap.nRegsR);
+ vassert(hregVecLen(nregMap.regsA) == nregMap.nRegsA);
+ vassert(hregVecLen(nregMap.regsS) == nregMap.nRegsS);
+
+ if (AssemblyBuffer__getRemainingSize(ab_hot) < 1024)
+ return False;
+ if (AssemblyBuffer__getRemainingSize(ab_cold) < 1024)
+ return False;
+ if (RelocationBuffer__getRemainingSize(rb) < 128)
+ return False;
+
+ /* Count how many hot and cold instructions (NInstrs) the template
+ has, since we'll need to allocate temporary arrays to keep track
+ of the label offsets. */
+ UInt nHot, nCold;
+ for (nHot = 0; tmpl->hot[nHot]; nHot++)
+ ;
+ for (nCold = 0; tmpl->cold[nCold]; nCold++)
+ ;
+
+ /* Here are our two arrays for tracking the AssemblyBuffer offsets
+ of the NCode instructions. */
+ UInt i;
+ UInt offsetsHot[nHot];
+ UInt offsetsCold[nCold];
+ for (i = 0; i < nHot; i++) offsetsHot[i] = 0;
+ for (i = 0; i < nCold; i++) offsetsCold[i] = 0;
+
+ /* We'll be adding entries to the relocation buffer, |rb|, and will
+ need to adjust their |dst| fields after generation of the hot
+ and cold code. Record therefore where we are in the buffer now,
+ so that we can iterate over the new entries later. */
+ UInt rb_first = RelocationBuffer__getNext(rb);
+
+ /* Generate the hot code */
+ for (i = 0; i < nHot; i++) {
+ offsetsHot[i] = AssemblyBuffer__getNext(ab_hot);
+ NLabel lbl = mkNLabel(Nlz_Hot, i);
+ emit_AMD64NInstr(ab_hot, rb, tmpl->hot[i], &nregMap,
+ rregsLiveAfter, verbose, lbl);
+ }
+
+ /* And the cold code */
+ for (i = 0; i < nCold; i++) {
+ offsetsCold[i] = AssemblyBuffer__getNext(ab_cold);
+ NLabel lbl = mkNLabel(Nlz_Cold, i);
+ emit_AMD64NInstr(ab_cold, rb, tmpl->cold[i], &nregMap,
+ rregsLiveAfter, verbose, lbl);
+ }
+
+ /* Now visit the new relocation entries. */
+ UInt rb_last1 = RelocationBuffer__getNext(rb);
+
+ for (i = rb_first; i < rb_last1; i++) {
+ Relocation* reloc = &rb->buf[i];
+
+ /* Show the reloc before the label-to-offset transformation. */
+ if (verbose) {
+ vex_printf(" reloc: ");
+ ppRelocation(reloc);
+ vex_printf("\n");
+ }
+
+ /* Transform the destination component of |reloc| so that it no
+ longer refers to a label but rather to an offset in the hot
+ or cold assembly buffer. */
+ vassert(!reloc->dst.isOffset);
+ reloc->dst.isOffset = True;
+
+ if (reloc->dst.zone == Nlz_Hot) {
+ vassert(reloc->dst.num < nHot);
+ reloc->dst.num = offsetsHot[reloc->dst.num];
+ } else {
+ vassert(reloc->dst.zone == Nlz_Cold);
+ vassert(reloc->dst.num < nCold);
+ reloc->dst.num = offsetsCold[reloc->dst.num];
+ }
+
+ /* Show the reloc after the label-to-offset transformation. */
+ if (verbose) {
+ vex_printf(" reloc: ");
+ ppRelocation(reloc);
+ vex_printf("\n");
+ }
+ }
+
+ return True;
+}
+
+
/* --------- Helpers for translation chaining. --------- */
/* How big is an event check? See case for Ain_EvCheck in
Modified: branches/NCODE/priv/host_amd64_defs.h
==============================================================================
--- branches/NCODE/priv/host_amd64_defs.h (original)
+++ branches/NCODE/priv/host_amd64_defs.h Sun Apr 12 10:23:58 2015
@@ -830,6 +830,9 @@
extern void ppAMD64Instr ( const AMD64Instr*, Bool );
+/* Handy helper, for generating integer reg-reg moves. */
+extern AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst );
+
/* Some functions that insulate the register allocator from details
of the underlying instruction set. */
extern void getRegUsage_AMD64Instr ( HRegUsage*, const AMD64Instr*, Bool );
@@ -839,12 +842,12 @@
const AMD64Instr*, Bool, VexEndness,
const VexDispatcherAddresses* );
-extern Bool emit_AMD64NCode ( /*MOD*/AssemblyBuffer* ab_hot,
- /*MOD*/AssemblyBuffer* ab_cold,
- /*MOD*/RelocationBuffer* rb,
- const AMD64Instr* hi,
- Bool mode64, VexEndness endness_host,
- Bool verbose );
+extern Bool emit_AMD64NCodeBlock ( /*MOD*/AssemblyBuffer* ab_hot,
+ /*MOD*/AssemblyBuffer* ab_cold,
+ /*MOD*/RelocationBuffer* rb,
+ const AMD64Instr* hi,
+ Bool mode64, VexEndness endness_host,
+ Bool verbose );
extern void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Bool spRel, Int offset, Bool );
Modified: branches/NCODE/priv/host_amd64_isel.c
==============================================================================
--- branches/NCODE/priv/host_amd64_isel.c (original)
+++ branches/NCODE/priv/host_amd64_isel.c Sun Apr 12 10:23:58 2015
@@ -309,9 +309,9 @@
&& e->Iex.Const.con->Ico.U32 == 0;
}
-/* Make a int reg-reg move. */
+/* Make an int reg-reg move. */
-static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
+/*notstatic*/ AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
{
vassert(hregClass(src) == HRcInt64);
vassert(hregClass(dst) == HRcInt64);
Modified: branches/NCODE/priv/host_generic_regs.c
==============================================================================
--- branches/NCODE/priv/host_generic_regs.c (original)
+++ branches/NCODE/priv/host_generic_regs.c Sun Apr 12 10:23:58 2015
@@ -120,16 +120,6 @@
/*--- Real register sets ---*/
/*---------------------------------------------------------*/
-/* Represents sets of real registers. |bits| is interpreted in the
- context of |univ|. That is, each bit index |i| in |bits|
- corresponds to the register |univ->regs[i]|. This relies
- entirely on the fact that N_RREGUNIVERSE_REGS <= 64.
-*/
-struct _RRegSet {
- ULong bits;
- const RRegUniverse* univ;
-};
-
STATIC_ASSERT(N_RREGUNIVERSE_REGS <= 8 * sizeof(ULong));
/* Print a register set, using the arch-specific register printing
@@ -153,13 +143,19 @@
vex_printf("}");
}
-/* Create a new, empty, set. */
+/* Initialise an RRegSet, making it empty. */
+inline void RRegSet__init ( /*OUT*/RRegSet* set, const RRegUniverse* univ )
+{
+ set->bits = 0;
+ set->univ = univ;
+}
+
+/* Create a new, empty, set, in the normal (transient) heap. */
RRegSet* RRegSet__new ( const RRegUniverse* univ )
{
vassert(univ);
RRegSet* set = LibVEX_Alloc_inline(sizeof(RRegSet));
- set->bits = 0;
- set->univ = univ;
+ RRegSet__init(set, univ);
return set;
}
@@ -174,6 +170,7 @@
duplicates. */
void RRegSet__fromVec ( /*MOD*/RRegSet* dst, const HReg* vec, UInt nVec )
{
+ dst->bits = 0;
for (UInt i = 0; i < nVec; i++) {
HReg r = vec[i];
vassert(!hregIsInvalid(r) && !hregIsVirtual(r));
@@ -229,6 +226,22 @@
return __builtin_popcountll(set->bits);
}
+/* Remove non-allocatable registers from this set. Because the set
+ carries its register universe, we can consult that to find the
+ non-allocatable registers, so no other parameters are needed. */
+void RRegSet__deleteNonAllocatable ( /*MOD*/RRegSet* set )
+{
+ const RRegUniverse* univ = set->univ;
+ UInt allocable = univ->allocable;
+ if (UNLIKELY(allocable == N_RREGUNIVERSE_REGS)) {
+ return;
+ /* otherwise we'd get an out-of-range shift below */
+ }
+ vassert(allocable > 0 && allocable < N_RREGUNIVERSE_REGS);
+ ULong mask = (1ULL << allocable) - 1;
+ set->bits &= mask;
+}
+
struct _RRegSetIterator {
const RRegSet* set;
@@ -398,6 +411,20 @@
/*NOTREACHED*/
}
+void addHRegUse_from_RRegSet ( HRegUsage* tab,
+ HRegMode mode, const RRegSet* set )
+{
+ STATIC_ASSERT(sizeof(tab->rRead) == sizeof(tab->rWritten));
+ STATIC_ASSERT(sizeof(tab->rRead) == sizeof(set->bits));
+ switch (mode) {
+ case HRmRead: tab->rRead |= set->bits; break;
+ case HRmWrite: tab->rWritten |= set->bits; break;
+ case HRmModify: tab->rRead |= set->bits;
+ tab->rWritten |= set->bits; break;
+ default: vassert(0);
+ }
+}
+
/*---------------------------------------------------------*/
/*--- Indicating register remappings (for reg-alloc) ---*/
@@ -531,6 +558,128 @@
}
+/*---------------------------------------------------------*/
+/*--- NCode generation helpers ---*/
+/*---------------------------------------------------------*/
+
+/* Find the length of a vector of HRegs that is terminated by
+ an HReg_INVALID. */
+UInt hregVecLen ( const HReg* vec )
+{
+ UInt i;
+ for (i = 0; !hregIsInvalid(vec[i]); i++)
+ ;
+ return i;
+}
+
+
+/* Find the real (hard) register for |r| by looking up in |map|. */
+HReg mapNReg ( const NRegMap* map, NReg r )
+{
+ UInt limit = 0;
+ const HReg* arr = NULL;
+ switch (r.role) {
+ case Nrr_Result: limit = map->nRegsR; arr = map->regsR; break;
+ case Nrr_Argument: limit = map->nRegsA; arr = map->regsA; break;
+ case Nrr_Scratch: limit = map->nRegsS; arr = map->regsS; break;
+ default: vpanic("mapNReg: invalid reg role");
+ }
+ vassert(r.num < limit);
+ return arr[r.num];
+}
+
+
+/* Compute the minimal set of registers to preserve around calls
+ embedded within NCode blocks. */
+void calcRegistersToPreserveAroundNCodeCall (
+ /*OUT*/RRegSet* result,
+ const RRegSet* hregsLiveAfterTheNCodeBlock,
+ const RRegSet* abiCallerSavedRegs,
+ const NRegMap* nregMap,
+ NReg nregResHi,
+ NReg nregResLo
+ )
+{
+ /* This function deals with one of the main difficulties of NCode
+ templates, which is that of figuring out the minimal set of
+ registers to save across calls embedded inside NCode blocks. As
+ far as I can see, the set is:
+
+ (1) registers live after the NCode block
+ (2) + the Arg, Res and Scratch registers for the block
+ (3) - Abi_Callee_Saved registers
+ (4) - the Arg/Res/Scratch register(s) into which the call
+ will place its results
+
+ (1) because that's the set of regs that reg-alloc expects to
+ not be trashed by the NCode block
+ (2) because Arg/Res/Scratch regs can be used freely within the
+ NCode block, so we have to keep them alive
+ (3) because preserving Callee saved regs is obviously pointless
+ (4) because preserving the call's result reg(s) will result in
+ the restore sequence overwriting the result of the call
+
+ (2) (3) (4) are either constants or something we can find from
+ inspection of the relevant NInstr (call) alone. (1) is
+ something that depends on instructions after the NCode block
+ and so is something that the register allocator has to tell us.
+
+ Another detail is that we remove from the set, all registers not
+ available to the register allocator. That is, we save across
+ the call, only registers available to the allocator. That
+ assumes that all fixed-use or otherwise-not-allocatable
+ registers, that we care about, are callee-saved. AFAIK the only
+ important register is the baseblock register, and that is indeed
+ callee-saved on all targets.
+ */
+ const RRegUniverse* univ
+ = RRegSet__getUniverse(hregsLiveAfterTheNCodeBlock);
+
+ const RRegSet* set_1 = hregsLiveAfterTheNCodeBlock;
+
+ RRegSet set_2;
+ RRegSet__init(&set_2, univ);
+ for (UInt i = 0; i < nregMap->nRegsR; i++)
+ RRegSet__add(&set_2, nregMap->regsR[i]);
+ for (UInt i = 0; i < nregMap->nRegsA; i++)
+ RRegSet__add(&set_2, nregMap->regsA[i]);
+ for (UInt i = 0; i < nregMap->nRegsS; i++)
+ RRegSet__add(&set_2, nregMap->regsS[i]);
+
+ const RRegSet* set_3 = abiCallerSavedRegs;
+ vassert(univ == RRegSet__getUniverse(set_3));
+
+ RRegSet set_4;
+ RRegSet__init(&set_4, univ);
+ if (!isNRegINVALID(nregResHi))
+ RRegSet__add(&set_4, mapNReg(nregMap, nregResHi));
+ if (!isNRegINVALID(nregResLo))
+ RRegSet__add(&set_4, mapNReg(nregMap, nregResLo));
+
+ RRegSet__init(result, univ);
+ RRegSet__copy(result, set_1);
+ RRegSet__plus(result, &set_2);
+ RRegSet__minus(result, set_3);
+ RRegSet__minus(result, &set_4);
+
+ if (0) {
+ vex_printf(" # set1: ");
+ RRegSet__pp(set_1, ppHReg); vex_printf("\n");
+ vex_printf(" # set2: ");
+ RRegSet__pp(&set_2, ppHReg); vex_printf("\n");
+ vex_printf(" # set3: ");
+ RRegSet__pp(set_3, ppHReg); vex_printf("\n");
+ vex_printf(" # set4: ");
+ RRegSet__pp(&set_4, ppHReg); vex_printf("\n");
+ vex_printf(" # pres: ");
+ RRegSet__pp(result, ppHReg); vex_printf("\n");
+ }
+
+ /* Remove any non allocatable registers (see big comment above) */
+ RRegSet__deleteNonAllocatable(result);
+}
+
+
/*---------------------------------------------------------------*/
/*--- end host_generic_regs.c ---*/
/*---------------------------------------------------------------*/
Modified: branches/NCODE/priv/host_generic_regs.h
==============================================================================
--- branches/NCODE/priv/host_generic_regs.h (original)
+++ branches/NCODE/priv/host_generic_regs.h Sun Apr 12 10:23:58 2015
@@ -238,14 +238,34 @@
/*--- Real Register Sets ---*/
/*---------------------------------------------------------*/
-/* ABSTYPE */
-typedef struct _RRegSet RRegSet;
+/* Represents sets of real registers. |bits| is interpreted in the
+ context of |univ|. That is, each bit index |i| in |bits|
+ corresponds to the register |univ->regs[i]|. This relies
+ entirely on the fact that N_RREGUNIVERSE_REGS <= 64.
+
+ It would have been nice to have been able to make this abstract,
+ but it is necessary to declare globals of this type. Hence the
+ size has to be known to the users of the type and so it can't be
+ abstract.
+*/
+typedef
+ struct {
+ ULong bits;
+ const RRegUniverse* univ;
+ }
+ RRegSet;
+
+STATIC_ASSERT(N_RREGUNIVERSE_REGS <= 8 * sizeof(ULong));
+
/* Print a register set, using the arch-specific register printing
function |regPrinter| supplied. */
extern void RRegSet__pp ( const RRegSet* set, void (*regPrinter)(HReg) );
-/* Create a new, empty, set. */
+/* Initialise an RRegSet, making it empty. */
+extern void RRegSet__init ( /*OUT*/RRegSet* set, const RRegUniverse* univ );
+
+/* Create a new, empty, set, in the normal (transient) heap. */
extern RRegSet* RRegSet__new ( const RRegUniverse* univ );
/* Return the RRegUniverse for a given RRegSet. */
@@ -275,6 +295,11 @@
/* Returns the number of elements in |set|. */
extern UInt RRegSet__card ( const RRegSet* set );
+/* Remove non-allocatable registers from this set. Because the set
+ carries its register universe, we can consult that to find the
+ non-allocatable registers, so no other parameters are needed. */
+extern void RRegSet__deleteNonAllocatable ( /*MOD*/RRegSet* set );
+
/* Iterating over RRegSets. */
/* ABSTYPE */
@@ -344,6 +369,9 @@
extern Bool HRegUsage__contains ( const HRegUsage*, HReg );
+extern void addHRegUse_from_RRegSet ( HRegUsage*, HRegMode, const RRegSet* );
+
+
/*---------------------------------------------------------*/
/*--- Indicating register remappings (for reg-alloc) ---*/
/*---------------------------------------------------------*/
@@ -702,6 +730,46 @@
);
+/*---------------------------------------------------------*/
+/*--- NCode generation helpers ---*/
+/*---------------------------------------------------------*/
+
+/* Find the length of a vector of HRegs that is terminated by
+ an HReg_INVALID. */
+extern UInt hregVecLen ( const HReg* vec );
+
+
+/* A handy structure to hold the register environment for an NCode
+ block -- that is, the NReg to HReg mapping. */
+typedef
+ struct {
+ UInt nRegsR;
+ const HReg* regsR;
+ UInt nRegsA;
+ const HReg* regsA;
+ UInt nRegsS;
+ const HReg* regsS;
+ }
+ NRegMap;
+
+/* Find the real (hard) register for |r| by looking up in |map|. */
+extern HReg mapNReg ( const NRegMap* map, NReg r );
+
+
+/* Compute the minimal set of registers to preserve around calls
+ embedded within NCode blocks. See implementation for a detailed
+ comment. */
+extern
+void calcRegistersToPreserveAroundNCodeCall (
+ /*OUT*/RRegSet* result,
+ const RRegSet* hregsLiveAfterTheNCodeBlock,
+ const RRegSet* abiCallerSavedRegs,
+ const NRegMap* nregMap,
+ NReg nregResHi,
+ NReg nregResLo
+ );
+
+
#endif /* ndef __VEX_HOST_GENERIC_REGS_H */
/*---------------------------------------------------------------*/
Modified: branches/NCODE/priv/main_main.c
==============================================================================
--- branches/NCODE/priv/main_main.c (original)
+++ branches/NCODE/priv/main_main.c Sun Apr 12 10:23:58 2015
@@ -1128,9 +1128,9 @@
if (UNLIKELY( AssemblyBuffer__getRemainingSize(&ab_hot) < 1024 )
|| UNLIKELY( AssemblyBuffer__getRemainingSize(&ab_cold) < 1024 ))
goto outputBufferFull;
- Bool ok = emit_AMD64NCode ( &ab_hot, &ab_cold, &rb, hi,
- mode64, vta->archinfo_host.endness,
- !!(vex_traceflags & VEX_TRACE_ASM));
+ Bool ok = emit_AMD64NCodeBlock ( &ab_hot, &ab_cold, &rb, hi,
+ mode64, vta->archinfo_host.endness,
+ !!(vex_traceflags & VEX_TRACE_ASM));
if (!ok)
goto outputBufferFull;
}
Modified: branches/NCODE/priv/main_util.h
==============================================================================
--- branches/NCODE/priv/main_util.h (original)
+++ branches/NCODE/priv/main_util.h Sun Apr 12 10:23:58 2015
@@ -51,7 +51,8 @@
#endif
// Poor man's static assert
-#define STATIC_ASSERT(x) extern int vex__unused_array[(x) ? 1 : -1]
+#define STATIC_ASSERT(x) extern int vex__unused_array[(x) ? 1 : -1] \
+ __attribute__((unused))
/* Stuff for panicking and assertion. */
|