|
From: <sv...@va...> - 2015-04-16 21:10:53
|
Author: sewardj
Date: Thu Apr 16 22:10:42 2015
New Revision: 3135
Log:
Implement NCode generation for ARM32. Also, move a bunch of code in
the initial AMD64 NCode genertor into target-independent routines so
as to avoid duplicating it in all backends in the future.
Modified:
branches/NCODE/priv/host_amd64_defs.c
branches/NCODE/priv/host_amd64_defs.h
branches/NCODE/priv/host_amd64_isel.c
branches/NCODE/priv/host_arm64_defs.c
branches/NCODE/priv/host_arm_defs.c
branches/NCODE/priv/host_arm_defs.h
branches/NCODE/priv/host_arm_isel.c
branches/NCODE/priv/host_generic_reg_alloc2.c
branches/NCODE/priv/host_generic_regs.c
branches/NCODE/priv/host_generic_regs.h
branches/NCODE/priv/host_mips_defs.c
branches/NCODE/priv/host_ppc_defs.c
branches/NCODE/priv/host_x86_defs.c
branches/NCODE/priv/ir_defs.c
branches/NCODE/priv/main_main.c
branches/NCODE/pub/libvex_ir.h
Modified: branches/NCODE/priv/host_amd64_defs.c
==============================================================================
--- branches/NCODE/priv/host_amd64_defs.c (original)
+++ branches/NCODE/priv/host_amd64_defs.c Thu Apr 16 22:10:42 2015
@@ -178,7 +178,7 @@
"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
+ ppHRegGENERIC(reg);
return;
}
/* But specific for real regs. */
@@ -207,7 +207,7 @@
"%r12d", "%r13d", "%r14d", "%r15d" };
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
+ ppHRegGENERIC(reg);
vex_printf("d");
return;
}
@@ -232,7 +232,7 @@
"%r12w", "%r13w", "%r14w", "%r15w" };
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
+ ppHRegGENERIC(reg);
vex_printf("w");
return;
}
@@ -1137,7 +1137,7 @@
}
AMD64Instr* AMD64Instr_NCode ( NCodeTemplate* tmpl, HReg* regsR,
HReg* regsA, HReg* regsS ) {
- AMD64InstrNCode* details = LibVEX_Alloc_inline(sizeof(AMD64InstrNCode));
+ HInstrNCode* details = LibVEX_Alloc_inline(sizeof(HInstrNCode));
details->tmpl = tmpl;
details->regsR = regsR;
details->regsA = regsA;
@@ -1504,28 +1504,9 @@
case Ain_ProfInc:
vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)");
return;
- case Ain_NCode: {
- UInt j;
- AMD64InstrNCode* details = i->Ain.NCode.details;
- NCodeTemplate* tmpl = details->tmpl;
- vex_printf("NCode-AMD64:%s [", tmpl->name);
- for (j = 0; j < tmpl->nres; j++) {
- ppHRegAMD64(details->regsR[j]);
- if (j != tmpl->nres-1) vex_printf(" ");
- }
- vex_printf("] <= [");
- for (j = 0; j < tmpl->narg; j++) {
- ppHRegAMD64(details->regsA[j]);
- if (j != tmpl->narg-1) vex_printf(" ");
- }
- vex_printf("] scratch [");
- for (j = 0; j < tmpl->nscr; j++) {
- ppHRegAMD64(details->regsS[j]);
- if (j != tmpl->nscr-1) vex_printf(" ");
- }
- vex_printf("]");
+ case Ain_NCode:
+ HInstrNCode__show(i->Ain.NCode.details, ppHRegAMD64, "AMD64");
return;
- }
case Ain_NC_Jmp32: {
vex_printf("j%s rel32",
i->Ain.NC_Jmp32.cc == Acc_ALWAYS
@@ -1838,25 +1819,9 @@
case Ain_ProfInc:
addHRegUse(u, HRmWrite, hregAMD64_R11());
return;
- case Ain_NCode: {
- AMD64InstrNCode* details = i->Ain.NCode.details;
- NCodeTemplate* tmpl = details->tmpl;
- // It writes the result and scratch registers.
- UInt j;
- for (j = 0; j < tmpl->nres; j++)
- addHRegUse(u, HRmWrite, details->regsR[j]);
- for (j = 0; j < tmpl->nscr; j++)
- addHRegUse(u, HRmWrite, details->regsS[j]);
- // It both reads and writes the arg regs. We have to say
- // they are written in order to force them to be allocated
- // different registers from the arg and scratch registers,
- // since we have no way to ensure that the NCode block
- // doesn't write its scratch and result registers and later
- // on read the argument registers.
- for (j = 0; j < tmpl->narg; j++)
- addHRegUse(u, HRmModify, details->regsA[j]);
+ case Ain_NCode:
+ HInstrNCode__getRegUsage(u, i->Ain.NCode.details);
return;
- }
default:
ppAMD64Instr(i, mode64);
vpanic("getRegUsage_AMD64Instr");
@@ -2052,18 +2017,9 @@
case Ain_ProfInc:
/* hardwires r11 -- nothing to modify. */
return;
- case Ain_NCode: {
- AMD64InstrNCode* details = i->Ain.NCode.details;
- NCodeTemplate* tmpl = details->tmpl;
- UInt j;
- for (j = 0; j < tmpl->nres; j++)
- mapReg(m, &details->regsR[j]);
- for (j = 0; j < tmpl->nscr; j++)
- mapReg(m, &details->regsS[j]);
- for (j = 0; j < tmpl->narg; j++)
- mapReg(m, &details->regsA[j]);
+ case Ain_NCode:
+ HInstrNCode__mapRegs(i->Ain.NCode.details, m);
return;
- }
default:
ppAMD64Instr(i, mode64);
vpanic("mapRegs_AMD64Instr");
@@ -2236,13 +2192,13 @@
emit32(ab, toUInt((w64 >> 32) & 0xFFFFFFFF));
}
-/* Does a sign-extend of the lowest 8 bits give
- the original number? */
+/* Does a sign-extend of the lowest 8 bits give the original number? */
static Bool fits8bits ( UInt w32 )
{
Int i32 = (Int)w32;
return toBool(i32 == ((Int)(w32 << 24) >> 24));
}
+
/* Can the lower 32 bits be signedly widened to produce the whole
64-bit value? In other words, are the top 33 bits either all 0 or
all 1 ? */
@@ -4142,9 +4098,7 @@
/* Marshall args for the call, do the call, marshal the result */
/* Case: 1 arg reg, 1 result reg */
- UInt nArgRegs = 0;
- while (!isNRegINVALID(ni->Nin.Call.argRegs[nArgRegs]))
- nArgRegs++;
+ UInt nArgRegs = nregVecLen(ni->Nin.Call.argRegs);
if (nArgRegs == 1
&& isNRegINVALID(ni->Nin.Call.resHi)
@@ -4155,14 +4109,14 @@
HReg rdi = hregAMD64_RDI();
HReg rax = hregAMD64_RAX();
if (!sameHReg(arg1, rdi))
- HI( mk_iMOVsd_RR(arg1, rdi) );
+ HI( mk_iMOVsd_RR_AMD64(arg1, rdi) );
HReg r11 = hregAMD64_R11();
HI( AMD64Instr_Imm64((ULong)(HWord)ni->Nin.Call.entry, r11) );
HI( AMD64Instr_NC_CallR11() );
if (!sameHReg(rax, res1))
- HI( mk_iMOVsd_RR(rax, res1) );
+ HI( mk_iMOVsd_RR_AMD64(rax, res1) );
} else {
goto unhandled;
}
@@ -4212,7 +4166,7 @@
vassert(shOp != Ash_INVALID);
if (!sameHReg(src, dst)) {
- HI( mk_iMOVsd_RR(src, dst) );
+ HI( mk_iMOVsd_RR_AMD64(src, dst) );
}
HI( AMD64Instr_Sh64(shOp, amt, dst) );
break;
@@ -4232,7 +4186,7 @@
vassert(shOp != Ash_INVALID);
if (!sameHReg(src, dst)) {
- HI( mk_iMOVsd_RR(src, dst) );
+ HI( mk_iMOVsd_RR_AMD64(src, dst) );
}
/* Now, we have the shift amount in register |amt|. Problem
is that it needs to be in %rcx, but we don't know whether
@@ -4243,10 +4197,10 @@
allocatable, since the insn selector uses it to put
variable shift amounts in. So we can't safely trash it
here. */
- HI( mk_iMOVsd_RR(hregAMD64_RCX(), hregAMD64_R11()) ); // save rcx
- HI( mk_iMOVsd_RR(amt, hregAMD64_RCX()) ); // amt->rcx
+ HI( mk_iMOVsd_RR_AMD64(hregAMD64_RCX(), hregAMD64_R11()) ); // save rcx
+ HI( mk_iMOVsd_RR_AMD64(amt, hregAMD64_RCX()) ); // amt->rcx
HI( AMD64Instr_Sh64(shOp, 0/*meaning %cl*/, dst) );
- HI( mk_iMOVsd_RR(hregAMD64_R11(), hregAMD64_RCX()) ); // restore rcx
+ HI( mk_iMOVsd_RR_AMD64(hregAMD64_R11(), hregAMD64_RCX()) ); // rest rcx
break;
}
@@ -4261,7 +4215,7 @@
}
if (how == Nalu_AND && fitsIn32Bits((ULong)imm)) {
if (!sameHReg(srcLR, dstR)) {
- HI( mk_iMOVsd_RR(srcLR, dstR) );
+ HI( mk_iMOVsd_RR_AMD64(srcLR, dstR) );
}
HI( AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(imm), dstR) );
break;
@@ -4276,7 +4230,7 @@
HReg srcRR = mapNReg(nregMap, ni->Nin.AluWrr.srcR);
if (how == Nalu_ADD) {
if (!sameHReg(srcLR, dstR)) {
- HI( mk_iMOVsd_RR(srcLR, dstR) );
+ HI( mk_iMOVsd_RR_AMD64(srcLR, dstR) );
}
HI( AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Reg(srcRR), dstR) );
break;
@@ -4303,7 +4257,7 @@
case Nin_MovW: {
HReg src = mapNReg(nregMap, ni->Nin.MovW.src);
HReg dst = mapNReg(nregMap, ni->Nin.MovW.dst);
- HI( mk_iMOVsd_RR(src, dst) );
+ HI( mk_iMOVsd_RR_AMD64(src, dst) );
break;
}
@@ -4325,9 +4279,9 @@
}
}
if (addr->tag == Nea_RRS) {
- HReg baseR = mapNReg(nregMap, addr->Nea.RRS.base);
- HReg indexR = mapNReg(nregMap, addr->Nea.RRS.index);
- UChar shift = addr->Nea.RRS.shift;
+ HReg baseR = mapNReg(nregMap, addr->Nea.RRS.base);
+ HReg indexR = mapNReg(nregMap, addr->Nea.RRS.index);
+ UChar shift = addr->Nea.RRS.shift;
if (shift <= 3) {
AMD64AMode* am = AMD64AMode_IRRS(0, baseR, indexR, shift);
if (szB == 2 || szB == 1) {
@@ -4346,8 +4300,7 @@
goto unhandled;
}
- UInt i;
- for (i = 0; i < hiBufUsed; i++) {
+ for (UInt i = 0; i < hiBufUsed; i++) {
if (verbose) {
vex_printf(" ");
ppAMD64Instr(hiBuf[i], True/*mode64*/);
@@ -4384,109 +4337,11 @@
Bool mode64, VexEndness endness_host,
Bool verbose )
{
- vassert(mode64 == True);
+ vassert(mode64 == True);
vassert(endness_host == VexEndnessLE);
- vassert(hi->tag == Ain_NCode);
-
- const AMD64InstrNCode* hi_details = hi->Ain.NCode.details;
- const NCodeTemplate* tmpl = hi_details->tmpl;
- const RRegSet* rregsLiveAfter = hi_details->rrLiveAfter;
-
- NRegMap nregMap;
- nregMap.regsR = hi_details->regsR;
- nregMap.regsA = hi_details->regsA;
- nregMap.regsS = hi_details->regsS;
- nregMap.nRegsR = tmpl->nres;
- nregMap.nRegsA = tmpl->narg;
- nregMap.nRegsS = tmpl->nscr;
-
- vassert(hregVecLen(nregMap.regsR) == nregMap.nRegsR);
- vassert(hregVecLen(nregMap.regsA) == nregMap.nRegsA);
- vassert(hregVecLen(nregMap.regsS) == nregMap.nRegsS);
-
- if (AssemblyBuffer__getRemainingSize(ab_hot) < 1024)
- return False;
- if (AssemblyBuffer__getRemainingSize(ab_cold) < 1024)
- return False;
- if (RelocationBuffer__getRemainingSize(rb) < 128)
- return False;
-
- /* Count how many hot and cold instructions (NInstrs) the template
- has, since we'll need to allocate temporary arrays to keep track
- of the label offsets. */
- UInt nHot, nCold;
- for (nHot = 0; tmpl->hot[nHot]; nHot++)
- ;
- for (nCold = 0; tmpl->cold[nCold]; nCold++)
- ;
-
- /* Here are our two arrays for tracking the AssemblyBuffer offsets
- of the NCode instructions. */
- UInt i;
- UInt offsetsHot[nHot];
- UInt offsetsCold[nCold];
- for (i = 0; i < nHot; i++) offsetsHot[i] = 0;
- for (i = 0; i < nCold; i++) offsetsCold[i] = 0;
-
- /* We'll be adding entries to the relocation buffer, |rb|, and will
- need to adjust their |dst| fields after generation of the hot
- and cold code. Record therefore where we are in the buffer now,
- so that we can iterate over the new entries later. */
- UInt rb_first = RelocationBuffer__getNext(rb);
-
- /* Generate the hot code */
- for (i = 0; i < nHot; i++) {
- offsetsHot[i] = AssemblyBuffer__getNext(ab_hot);
- NLabel lbl = mkNLabel(Nlz_Hot, i);
- emit_AMD64NInstr(ab_hot, rb, tmpl->hot[i], &nregMap,
- rregsLiveAfter, verbose, lbl);
- }
-
- /* And the cold code */
- for (i = 0; i < nCold; i++) {
- offsetsCold[i] = AssemblyBuffer__getNext(ab_cold);
- NLabel lbl = mkNLabel(Nlz_Cold, i);
- emit_AMD64NInstr(ab_cold, rb, tmpl->cold[i], &nregMap,
- rregsLiveAfter, verbose, lbl);
- }
-
- /* Now visit the new relocation entries. */
- UInt rb_last1 = RelocationBuffer__getNext(rb);
-
- for (i = rb_first; i < rb_last1; i++) {
- Relocation* reloc = &rb->buf[i];
-
- /* Show the reloc before the label-to-offset transformation. */
- if (verbose) {
- vex_printf(" reloc: ");
- ppRelocation(reloc);
- vex_printf("\n");
- }
-
- /* Transform the destination component of |reloc| so that it no
- longer refers to a label but rather to an offset in the hot
- or cold assembly buffer. */
- vassert(!reloc->dst.isOffset);
- reloc->dst.isOffset = True;
-
- if (reloc->dst.zone == Nlz_Hot) {
- vassert(reloc->dst.num < nHot);
- reloc->dst.num = offsetsHot[reloc->dst.num];
- } else {
- vassert(reloc->dst.zone == Nlz_Cold);
- vassert(reloc->dst.num < nCold);
- reloc->dst.num = offsetsCold[reloc->dst.num];
- }
-
- /* Show the reloc after the label-to-offset transformation. */
- if (verbose) {
- vex_printf(" reloc: ");
- ppRelocation(reloc);
- vex_printf("\n");
- }
- }
-
- return True;
+ vassert(hi->tag == Ain_NCode);
+ return HInstrNCode__emit ( ab_hot, ab_cold, rb, hi->Ain.NCode.details,
+ verbose, emit_AMD64NInstr );
}
Modified: branches/NCODE/priv/host_amd64_defs.h
==============================================================================
--- branches/NCODE/priv/host_amd64_defs.h (original)
+++ branches/NCODE/priv/host_amd64_defs.h Thu Apr 16 22:10:42 2015
@@ -420,18 +420,6 @@
/* --------- */
-typedef
- struct {
- NCodeTemplate* tmpl;
- HReg* regsR; /* Result regs, INVALID_HREG terminated */
- HReg* regsA; /* Arg regs, ditto */
- HReg* regsS; /* Scratch regs, ditto */
- RRegSet* rrLiveAfter; /* initially NULL, filled in by RA */
- }
- AMD64InstrNCode;
-
-
-/* --------- */
/* Destinations are on the RIGHT (second operand) */
@@ -737,7 +725,7 @@
} ProfInc;
struct {
/* Out of line so as to keep sizeof(AMD64Instr) at 40. */
- AMD64InstrNCode* details;
+ HInstrNCode* details;
} NCode;
/* --- for NCode only --- */
@@ -831,7 +819,7 @@
extern void ppAMD64Instr ( const AMD64Instr*, Bool );
/* Handy helper, for generating integer reg-reg moves. */
-extern AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst );
+extern AMD64Instr* mk_iMOVsd_RR_AMD64 ( HReg src, HReg dst );
/* Some functions that insulate the register allocator from details
of the underlying instruction set. */
Modified: branches/NCODE/priv/host_amd64_isel.c
==============================================================================
--- branches/NCODE/priv/host_amd64_isel.c (original)
+++ branches/NCODE/priv/host_amd64_isel.c Thu Apr 16 22:10:42 2015
@@ -311,13 +311,20 @@
/* Make an int reg-reg move. */
-/*notstatic*/ AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
+static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
{
vassert(hregClass(src) == HRcInt64);
vassert(hregClass(dst) == HRcInt64);
return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
}
+/* And a variant that is exported into the global namespace. */
+
+AMD64Instr* mk_iMOVsd_RR_AMD64 ( HReg src, HReg dst )
+{
+ return mk_iMOVsd_RR(src, dst);
+}
+
/* Make a vector (128 bit) reg-reg move. */
static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
@@ -4809,7 +4816,7 @@
// For the result values, find the vregs associated with the
// result IRTemps, and pin them on the NCode block.
- HReg* regsR = LibVEX_Alloc( (tmpl->nres+1) * sizeof(HReg) );
+ HReg* regsR = LibVEX_Alloc_inline( (tmpl->nres+1) * sizeof(HReg) );
for (i = 0; i < tmpl->nres; i++) {
IRTemp t = stmt->Ist.NCode.ress[i];
vassert(t != IRTemp_INVALID);
@@ -4823,17 +4830,16 @@
// registers returned from the isel*Expr functions may not be
// modified. As usual vreg-vreg move coalescing will remove
// those copies in the cases where they are not necessary.
- HReg* regsA = LibVEX_Alloc( (tmpl->narg+1) * sizeof(HReg) );
+ HReg* regsA = LibVEX_Alloc_inline( (tmpl->narg+1) * sizeof(HReg) );
for (i = 0; i < tmpl->narg; i++) {
HReg arg = iselIntExpr_R(env, stmt->Ist.NCode.args[i]);
regsA[i] = newVRegI(env);
addInstr(env, mk_iMOVsd_RR(arg, regsA[i]));
-
}
regsA[tmpl->narg] = HReg_INVALID;
// Allocate vregs for the scratch values.
- HReg* regsS = LibVEX_Alloc( (tmpl->nscr+1) * sizeof(HReg) );
+ HReg* regsS = LibVEX_Alloc_inline( (tmpl->nscr+1) * sizeof(HReg) );
for (i = 0; i < tmpl->nscr; i++) {
regsS[i] = newVRegI(env);
}
Modified: branches/NCODE/priv/host_arm64_defs.c
==============================================================================
--- branches/NCODE/priv/host_arm64_defs.c (original)
+++ branches/NCODE/priv/host_arm64_defs.c Thu Apr 16 22:10:42 2015
@@ -146,7 +146,7 @@
Int r;
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
+ ppHRegGENERIC(reg);
return;
}
/* But specific for real regs. */
Modified: branches/NCODE/priv/host_arm_defs.c
==============================================================================
--- branches/NCODE/priv/host_arm_defs.c (original)
+++ branches/NCODE/priv/host_arm_defs.c Thu Apr 16 22:10:42 2015
@@ -120,18 +120,9 @@
// Note 9 is ambiguous: the base EABI does not give an e/r-saved
// designation for it, but the Linux instantiation of the ABI
// specifies it as callee-saved.
- //
- // If the set of available registers changes or if the e/r status
- // changes, be sure to re-check/sync the definition of
- // getHRegUsage for ARMInstr_Call too.
ru->regs[ru->size++] = hregARM_R8();
ru->regs[ru->size++] = hregARM_R12();
ru->regs[ru->size++] = hregARM_R13();
- ru->regs[ru->size++] = hregARM_R14();
- ru->regs[ru->size++] = hregARM_R15();
- ru->regs[ru->size++] = hregARM_Q13();
- ru->regs[ru->size++] = hregARM_Q14();
- ru->regs[ru->size++] = hregARM_Q15();
rRegUniverse_ARM_initted = True;
@@ -140,11 +131,82 @@
}
+/* Returns the registers in the ARM universe that are caller saved.
+ This is really ABI dependent, but we ignore that detail here. */
+static const RRegSet* getRRegsCallerSaved_ARM ( void )
+{
+ /* In theory gcc should be able to fold this into a single 64 bit
+ constant (bitset). But that's a bit risky, so instead do
+ thread-unsafe lazy initialisation (sigh). */
+ static RRegSet callerSavedRegs;
+ static Bool callerSavedRegs_initted = False;
+
+ if (LIKELY(callerSavedRegs_initted))
+ return &callerSavedRegs;
+
+ RRegSet__init(&callerSavedRegs, getRRegUniverse_ARM());
+
+ RRegSet__add(&callerSavedRegs, hregARM_R0());
+ RRegSet__add(&callerSavedRegs, hregARM_R1());
+ RRegSet__add(&callerSavedRegs, hregARM_R2());
+ RRegSet__add(&callerSavedRegs, hregARM_R3());
+ RRegSet__add(&callerSavedRegs, hregARM_Q8());
+ RRegSet__add(&callerSavedRegs, hregARM_Q9());
+ RRegSet__add(&callerSavedRegs, hregARM_Q10());
+ RRegSet__add(&callerSavedRegs, hregARM_Q11());
+ RRegSet__add(&callerSavedRegs, hregARM_Q12());
+ RRegSet__add(&callerSavedRegs, hregARM_R12());
+
+ callerSavedRegs_initted = True;
+ return &callerSavedRegs;
+}
+
+
+/* Returns the registers in the ARM universe that are callee saved.
+ This is really ABI dependent, but we ignore that detail here. */
+static const RRegSet* getRRegsCalleeSaved_ARM ( void )
+{
+ /* In theory gcc should be able to fold this into a single 64 bit
+ constant (bitset). But that's a bit risky, so instead do
+ thread-unsafe lazy initialisation (sigh). */
+ static RRegSet calleeSavedRegs;
+ static Bool calleeSavedRegs_initted = False;
+
+ if (LIKELY(calleeSavedRegs_initted))
+ return &calleeSavedRegs;
+
+ RRegSet__init(&calleeSavedRegs, getRRegUniverse_ARM());
+
+ RRegSet__add(&calleeSavedRegs, hregARM_R4());
+ RRegSet__add(&calleeSavedRegs, hregARM_R5());
+ RRegSet__add(&calleeSavedRegs, hregARM_R6());
+ RRegSet__add(&calleeSavedRegs, hregARM_R7());
+ RRegSet__add(&calleeSavedRegs, hregARM_R8());
+ RRegSet__add(&calleeSavedRegs, hregARM_R9());
+ RRegSet__add(&calleeSavedRegs, hregARM_R10());
+ RRegSet__add(&calleeSavedRegs, hregARM_R11());
+ RRegSet__add(&calleeSavedRegs, hregARM_D8());
+ RRegSet__add(&calleeSavedRegs, hregARM_D9());
+ RRegSet__add(&calleeSavedRegs, hregARM_D10());
+ RRegSet__add(&calleeSavedRegs, hregARM_D11());
+ RRegSet__add(&calleeSavedRegs, hregARM_D12());
+ RRegSet__add(&calleeSavedRegs, hregARM_S26());
+ RRegSet__add(&calleeSavedRegs, hregARM_S27());
+ RRegSet__add(&calleeSavedRegs, hregARM_S28());
+ RRegSet__add(&calleeSavedRegs, hregARM_S29());
+ RRegSet__add(&calleeSavedRegs, hregARM_S30());
+ RRegSet__add(&calleeSavedRegs, hregARM_R13());
+
+ calleeSavedRegs_initted = True;
+ return &calleeSavedRegs;
+}
+
+
void ppHRegARM ( HReg reg ) {
Int r;
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
+ ppHRegGENERIC(reg);
return;
}
/* But specific for real regs. */
@@ -1501,6 +1563,13 @@
return False;
}
+/* Does a sign-extend of the lowest 8 bits give the original number? */
+static Bool fitsIn12bits ( UInt w32 )
+{
+ Int i32 = (Int)w32;
+ return toBool(i32 == ((Int)(w32 << 20) >> 20));
+}
+
ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
UInt u8, u4;
ARMInstr *i = LibVEX_Alloc_inline(sizeof(ARMInstr));
@@ -1535,6 +1604,44 @@
return i;
}
+ARMInstr* ARMInstr_NCode ( NCodeTemplate* tmpl, HReg* regsR,
+ HReg* regsA, HReg* regsS ) {
+ HInstrNCode* details = LibVEX_Alloc_inline(sizeof(HInstrNCode));
+ details->tmpl = tmpl;
+ details->regsR = regsR;
+ details->regsA = regsA;
+ details->regsS = regsS;
+ details->rrLiveAfter = NULL;
+ ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
+ i->tag = ARMin_NCode;
+ i->ARMin.NCode.details = details;
+ return i;
+}
+
+ARMInstr* ARMInstr_NC_Branch ( ARMCondCode cc )
+{
+ ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
+ i->tag = ARMin_NC_Branch;
+ i->ARMin.NC_Branch.cc = cc;
+ return i;
+}
+
+ARMInstr* ARMInstr_NC_Uxth ( HReg dst, HReg src )
+{
+ ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
+ i->tag = ARMin_NC_Uxth;
+ i->ARMin.NC_Uxth.dst = dst;
+ i->ARMin.NC_Uxth.src = src;
+ return i;
+}
+
+ARMInstr* ARMInstr_NC_CallR12 ( void )
+{
+ ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
+ i->tag = ARMin_NC_CallR12;
+ return i;
+}
+
/* ... */
void ppARMInstr ( const ARMInstr* i ) {
@@ -1994,6 +2101,23 @@
"adc r11,r11,$0; "
"str r11,[r12+4]");
return;
+ case ARMin_NCode:
+ HInstrNCode__show(i->ARMin.NCode.details, ppHRegARM, "ARM");
+ return;
+ case ARMin_NC_Branch:
+ vex_printf("b%s simm24",
+ i->ARMin.NC_Branch.cc == ARMcc_AL
+ ? "" : showARMCondCode(i->ARMin.NC_Branch.cc));
+ return;
+ case ARMin_NC_Uxth:
+ vex_printf("uxth ");
+ ppHRegARM(i->ARMin.NC_Uxth.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.NC_Uxth.src);
+ break;
+ case ARMin_NC_CallR12:
+ vex_printf("blx r12");
+ break;
default:
vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
vpanic("ppARMInstr(1)");
@@ -2096,13 +2220,8 @@
/* This is a bit subtle. */
/* First off, claim it trashes all the caller-saved regs
which fall within the register allocator's jurisdiction.
- These I believe to be r0,1,2,3. If it turns out that r9
- is also caller-saved, then we'll have to add that here
- too. */
- addHRegUse(u, HRmWrite, hregARM_R0());
- addHRegUse(u, HRmWrite, hregARM_R1());
- addHRegUse(u, HRmWrite, hregARM_R2());
- addHRegUse(u, HRmWrite, hregARM_R3());
+ These I believe to be r0,1,2,3 and q8,9,10,11,12. */
+ addHRegUse_from_RRegSet(u, HRmWrite, getRRegsCallerSaved_ARM());
/* Now we have to state any parameter-carrying registers
which might be read. This depends on nArgRegs. */
switch (i->ARMin.Call.nArgRegs) {
@@ -2303,6 +2422,9 @@
addHRegUse(u, HRmWrite, hregARM_R12());
addHRegUse(u, HRmWrite, hregARM_R11());
return;
+ case ARMin_NCode:
+ HInstrNCode__getRegUsage(u, i->ARMin.NCode.details);
+ return;
default:
ppARMInstr(i);
vpanic("getRegUsage_ARMInstr");
@@ -2499,6 +2621,9 @@
case ARMin_ProfInc:
/* hardwires r11 and r12 -- nothing to modify. */
return;
+ case ARMin_NCode:
+ HInstrNCode__mapRegs(i->ARMin.NCode.details, m);
+ return;
default:
ppARMInstr(i);
vpanic("mapRegs_ARMInstr");
@@ -2560,46 +2685,49 @@
vassert(offsetB >= 0);
vassert(!hregIsVirtual(rreg));
vassert(mode64 == False);
- vassert(!spRel);
*i1 = *i2 = NULL;
+
+ /* We're spilling/reloading either relative to the guest state
+ pointer (r8) when spRel == False, or relative to the stack
+ pointer (r13) when spRel == True. */
+ HReg base = spRel ? hregARM_R13() : hregARM_R8();
+
rclass = hregClass(rreg);
switch (rclass) {
case HRcInt32:
vassert(offsetB <= 4095);
*i1 = ARMInstr_LdSt32( ARMcc_AL, False/*!isLoad*/,
rreg,
- ARMAMode1_RI(hregARM_R8(), offsetB) );
+ ARMAMode1_RI(base, offsetB) );
return;
case HRcFlt32:
case HRcFlt64: {
- HReg r8 = hregARM_R8(); /* baseblock */
- HReg r12 = hregARM_R12(); /* spill temp */
- HReg base = r8;
+ HReg curr = base;
vassert(0 == (offsetB & 3));
if (offsetB >= 1024) {
- Int offsetKB = offsetB / 1024;
- /* r12 = r8 + (1024 * offsetKB) */
- *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
+ Int offsetKB = offsetB / 1024;
+ HReg r12 = hregARM_R12(); /* spill temp */
+ /* r12 = base + (1024 * offsetKB) */
+ *i1 = ARMInstr_Alu(ARMalu_ADD, r12, base,
ARMRI84_I84(offsetKB, 11));
offsetB -= (1024 * offsetKB);
- base = r12;
+ curr = r12;
}
vassert(offsetB <= 1020);
if (rclass == HRcFlt32) {
*i2 = ARMInstr_VLdStS( False/*!isLoad*/,
rreg,
- mkARMAModeV(base, offsetB) );
+ mkARMAModeV(curr, offsetB) );
} else {
*i2 = ARMInstr_VLdStD( False/*!isLoad*/,
rreg,
- mkARMAModeV(base, offsetB) );
+ mkARMAModeV(curr, offsetB) );
}
return;
}
case HRcVec128: {
- HReg r8 = hregARM_R8();
HReg r12 = hregARM_R12();
- *i1 = ARMInstr_Add32(r12, r8, offsetB);
+ *i1 = ARMInstr_Add32(r12, base, offsetB);
*i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
return;
}
@@ -2616,46 +2744,47 @@
vassert(offsetB >= 0);
vassert(!hregIsVirtual(rreg));
vassert(mode64 == False);
- vassert(!spRel);
*i1 = *i2 = NULL;
+
+ /* Same comment as on genSpill_ARM. */
+ HReg base = spRel ? hregARM_R13() : hregARM_R8();
+
rclass = hregClass(rreg);
switch (rclass) {
case HRcInt32:
vassert(offsetB <= 4095);
*i1 = ARMInstr_LdSt32( ARMcc_AL, True/*isLoad*/,
rreg,
- ARMAMode1_RI(hregARM_R8(), offsetB) );
+ ARMAMode1_RI(base, offsetB) );
return;
case HRcFlt32:
case HRcFlt64: {
- HReg r8 = hregARM_R8(); /* baseblock */
- HReg r12 = hregARM_R12(); /* spill temp */
- HReg base = r8;
+ HReg curr = base;
vassert(0 == (offsetB & 3));
if (offsetB >= 1024) {
- Int offsetKB = offsetB / 1024;
- /* r12 = r8 + (1024 * offsetKB) */
- *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
+ Int offsetKB = offsetB / 1024;
+ HReg r12 = hregARM_R12(); /* spill temp */
+ /* r12 = base + (1024 * offsetKB) */
+ *i1 = ARMInstr_Alu(ARMalu_ADD, r12, base,
ARMRI84_I84(offsetKB, 11));
offsetB -= (1024 * offsetKB);
- base = r12;
+ curr = r12;
}
vassert(offsetB <= 1020);
if (rclass == HRcFlt32) {
*i2 = ARMInstr_VLdStS( True/*isLoad*/,
rreg,
- mkARMAModeV(base, offsetB) );
+ mkARMAModeV(curr, offsetB) );
} else {
*i2 = ARMInstr_VLdStD( True/*isLoad*/,
rreg,
- mkARMAModeV(base, offsetB) );
+ mkARMAModeV(curr, offsetB) );
}
return;
}
case HRcVec128: {
- HReg r8 = hregARM_R8();
HReg r12 = hregARM_R12();
- *i1 = ARMInstr_Add32(r12, r8, offsetB);
+ *i1 = ARMInstr_Add32(r12, base, offsetB);
*i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
return;
}
@@ -2668,13 +2797,15 @@
/* --------- The arm32 assembler (bleh.) --------- */
-#define PUT(_ab, _word32) \
- do { const UInt _off = (_ab)->bufUsed; \
- (_ab)->buf[_off + 0] = ((_word32) >> 0) & 0xFF; \
- (_ab)->buf[_off + 1] = ((_word32) >> 8) & 0xFF; \
- (_ab)->buf[_off + 2] = ((_word32) >> 16) & 0xFF; \
- (_ab)->buf[_off + 3] = ((_word32) >> 24) & 0xFF; \
- (_ab)->bufUsed = _off + 4; \
+/* Put a 32 bit word into the assembler buffer |_ab|. Uses a
+ potentially unaligned 32-bit store. That is OK because the
+ allocation point of the buffer should always be 32-bit aligned, and
+ that is asserted for at the top of emit_ARMInstr. */
+#define PUT(_ab, _word32) \
+ do { const UInt _off = (_ab)->bufUsed; \
+ UInt* _ptr = (UInt*)(&(_ab)->buf[_off]); \
+ *_ptr = (_word32); \
+ (_ab)->bufUsed = _off + 4; \
} while (0)
/* Emit an instruction into buf and return the number of bytes used.
@@ -3111,8 +3242,15 @@
PUT(ab, instr);
goto done;
} else {
- // RR case
- goto bad;
+ UInt sh5 = am->ARMam1.RRS.shift;
+ UInt nn = iregEnc(am->ARMam1.RRS.base);
+ UInt mm = iregEnc(am->ARMam1.RRS.index);
+ UInt dd = iregEnc(rD);
+ if (sh5 > 3) goto bad;
+ UInt instr = XXXXXXXX(cc, X0111, BITS4(1,bB,0,bL), nn, dd,
+ ((sh5 >> 1) & 0xF), ((sh5 & 1) << 3), mm);
+ PUT(ab, instr);
+ goto done;
}
}
case ARMin_LdSt16: {
@@ -4670,6 +4808,32 @@
goto done;
}
+ case ARMin_NC_Branch: {
+ /* Generating jumps within NCode blocks. We don't know what the
+ jump offset yet is, so just put in a 24-bit zero. The NCode
+ assembler (emit_ARMNInstr) should also have generated a
+ Relocation that describes how to fix up the offset, and that
+ will be applied after assembly is complete, at the point
+ where the hot and cold buffers are concatenated, to create on
+ single big code block for the containing IRSB. */
+ ARMCondCode cc = i->ARMin.NC_Branch.cc;
+ vassert(cc <= ARMcc_AL);
+ PUT(ab, XX______(cc, 0xA/*1010b*/));
+ goto done;
+ }
+
+ case ARMin_NC_Uxth: {
+ UInt dd = iregEnc(i->ARMin.NC_Uxth.dst);
+ UInt mm = iregEnc(i->ARMin.NC_Uxth.src);
+ PUT(ab, XXXXXXXX(ARMcc_AL, X0110, X1111, X1111, dd, X0000, X0111, mm));
+ goto done;
+ }
+
+ case ARMin_NC_CallR12: {
+ PUT(ab, 0xE12FFF3C);
+ goto done;
+ }
+
/* ... */
default:
goto bad;
@@ -4686,6 +4850,330 @@
}
+/* --------- The arm NCode assembler. --------- */
+
+/* Emits ARM code for a single NInstr |ni| into |ab|, possibly
+ adding relocation information into |rb| too.
+*/
+static
+void emit_ARMNInstr ( /*MOD*/AssemblyBuffer* ab,
+ /*MOD*/RelocationBuffer* rb,
+ const NInstr* ni,
+ const NRegMap* nregMap,
+ const RRegSet* hregsLiveAfter,
+ /* the next 2 are for debug printing only */
+ Bool verbose, NLabel niLabel )
+{
+ ARMInstr* hiBuf[100];
+ UInt hiBufUsed = 0;
+
+# define HI(_insnE) \
+ do { \
+ ARMInstr* _insn = (_insnE); \
+ vassert(hiBufUsed < sizeof(hiBuf)/sizeof(hiBuf[0])); \
+ hiBuf[hiBufUsed++] = _insn; \
+ } while (0)
+
+ if (verbose) {
+ vex_printf(" ");
+ ppNLabel(niLabel);
+ vex_printf(": ");
+ ppNInstr(ni);
+ vex_printf("\n");
+ }
+
+ switch (ni->tag) {
+
+ case Nin_Nop:
+ break;
+
+ case Nin_Branch: {
+ /* We are going to generate an ARM branch insn, which naturally
+ can be conditional if neeed. It will be of the form
+ cond:4 1010 simm:24
+ We need to generate both the instruction and a relocation
+ record that describes how to fix up the offset (simm:24)
+ once the relative offset between this instruction and the
+ destination is known, which is isn't currently. */
+ ARMCondCode cc = 16; /* invalid */
+ switch (ni->Nin.Branch.cc) {
+ case Ncc_ALWAYS: cc = ARMcc_AL; break;
+ case Ncc_Z: cc = ARMcc_EQ; break;
+ case Ncc_NZ: cc = ARMcc_NE; break;
+ default: vassert(0); /* no other cases possible */
+ }
+ vassert(cc < 16);
+ /* First do the relocation, as it's the more complex part.
+ The insns are little-endian, and the offset is the least
+ significant 3 bytes of the insn, so its "where" starts
+ exactly where the current |ab| cursor is. Hence the "+0"
+ below. */
+ RelocWhere where
+ = mkRelocWhere(niLabel.zone, AssemblyBuffer__getNext(ab)+0);
+ RelocDst dst
+ = mkRelocDst_from_NLabel(ni->Nin.Branch.dst);
+ /* Bias is 8 because we've set |where| to be the start of the
+ branch insn. The processor however expects the offset to
+ be relative to the start of 8 bytes past the insn (ARM
+ ancient history) which means that a naive "dst - where"
+ value will give an offset that is 8 too large. Hence the
+ bias of 8. */
+ Relocation reloc
+ = mkRelocation(where, 0, 23, dst, /*bias*/-8, /*rshift*/2);
+ vassert(RelocationBuffer__getRemainingSize(rb) > 0);
+ rb->buf[rb->bufUsed++] = reloc;
+ /* And finally the instruction. Note that we don't specify
+ an offset here since we don't yet know what it is. */
+ HI( ARMInstr_NC_Branch(cc) );
+ break;
+ }
+
+ case Nin_Call: {
+ RRegSet to_preserve;
+ calcRegistersToPreserveAroundNCodeCall(
+ &to_preserve,
+ hregsLiveAfter, getRRegsCalleeSaved_ARM(), nregMap,
+ ni->Nin.Call.resHi, ni->Nin.Call.resLo
+ );
+
+ /* Save live regs */
+ UInt n_to_preserve = RRegSet__card(&to_preserve);
+ vassert(n_to_preserve < 25); /* stay sane */
+
+ /* Figure out how much to move the stack, ensuring any alignment up
+ to 32 is preserved. */
+ UInt stackMove = n_to_preserve * 16;
+ stackMove = (stackMove + 31) & ~31;
+ if (stackMove > 0) {
+ /* This is a bit tricky. We need to encode the offset in
+ an RI84, but it might be moderately large-ish.
+ Fortunately we can take advantage of the fact that
+ |stackMove| is 0 % 16 and so encode just bits 11:4 of
+ it. */
+ vassert((stackMove & 15) == 0);
+ if ((stackMove >> 4) > 0xFF) goto unhandled;
+ HReg sp = hregARM_R13();
+ ARMRI84* dist = ARMRI84_I84(stackMove >> 4, 14/*means "<< 4"*/);
+ HI( ARMInstr_Alu(ARMalu_SUB, sp, sp, dist) );
+ }
+
+ RRegSetIterator* iter = RRegSetIterator__new();
+ RRegSetIterator__init(iter, &to_preserve);
+ UInt slotNo = 0;
+ while (True) {
+ HReg r = RRegSetIterator__next(iter);
+ if (hregIsInvalid(r)) break;
+ ARMInstr* i1 = NULL;
+ ARMInstr* i2 = NULL;
+ genSpill_ARM( (HInstr**)&i1, (HInstr**)&i2,
+ r, True/*spRel*/, 16 * slotNo, False/*!mode64*/ );
+ if (i1) HI(i1);
+ if (i2) HI(i2);
+ slotNo++;
+ }
+ vassert(slotNo == n_to_preserve);
+
+ /* Marshall args for the call, do the call, marshal the result */
+ /* Case: 1 arg reg, 1 result reg */
+
+ UInt nArgRegs = nregVecLen(ni->Nin.Call.argRegs);
+
+ if (nArgRegs == 1
+ && isNRegINVALID(ni->Nin.Call.resHi)
+ && !isNRegINVALID(ni->Nin.Call.resLo)) {
+
+ HReg arg1 = mapNReg(nregMap, ni->Nin.Call.argRegs[0]);
+ HReg res1 = mapNReg(nregMap, ni->Nin.Call.resLo);
+ HReg r0 = hregARM_R0();
+ if (!sameHReg(r0, arg1))
+ HI( mk_iMOVds_RR_ARM(r0, arg1) );
+
+ HReg r12 = hregARM_R12();
+ HI( ARMInstr_Imm32(r12, (UInt)(HWord)ni->Nin.Call.entry) );
+ HI( ARMInstr_NC_CallR12() );
+
+ if (!sameHReg(res1, r0))
+ HI( mk_iMOVds_RR_ARM(res1, r0) );
+ } else {
+ goto unhandled;
+ }
+
+ /* Restore live regs */
+ RRegSetIterator__init(iter, &to_preserve);
+ slotNo = 0;
+ while (True) {
+ HReg r = RRegSetIterator__next(iter);
+ if (hregIsInvalid(r)) break;
+ ARMInstr* i1 = NULL;
+ ARMInstr* i2 = NULL;
+ genReload_ARM( (HInstr**)&i1, (HInstr**)&i2,
+ r, True/*spRel*/, 16 * slotNo, False/*!mode64*/ );
+ if (i1) HI(i1);
+ if (i2) HI(i2);
+ slotNo++;
+ }
+ vassert(slotNo == n_to_preserve);
+ if (stackMove > 0) {
+ /* Same deal as the code for moving SP down, just above
+ .. see comments there. */
+ HReg sp = hregARM_R13();
+ ARMRI84* dist = ARMRI84_I84(stackMove >> 4, 14/*means "<< 4"*/);
+ HI( ARMInstr_Alu(ARMalu_ADD, sp, sp, dist) );
+ }
+ break;
+ }
+
+ case Nin_ImmW: {
+ HReg reg = mapNReg(nregMap, ni->Nin.ImmW.dst);
+ HWord imm = ni->Nin.ImmW.imm;
+ HI( ARMInstr_Imm32(reg, (UInt)imm) );
+ break;
+ }
+
+ case Nin_ShiftWri: {
+ NShift how = ni->Nin.ShiftWri.how;
+ UInt amt = ni->Nin.ShiftWri.amt;
+ HReg src = mapNReg(nregMap, ni->Nin.ShiftWri.srcL);
+ HReg dst = mapNReg(nregMap, ni->Nin.ShiftWri.dst);
+ vassert(amt >= 1 && amt <= 31);
+
+ ARMShiftOp shOp = 0;
+ switch (how) {
+ //case Nsh_SHL: shOp = ARMsh_SHL; break;
+ case Nsh_SHR: shOp = ARMsh_SHR; break;
+ default: break;
+ }
+ vassert(shOp != 0);
+
+ HI( ARMInstr_Shift(shOp, dst, src, ARMRI5_I5(amt)) );
+ break;
+ }
+
+ case Nin_ShiftWrr:
+ goto unhandled;
+
+ case Nin_AluWri: {
+ NAlu how = ni->Nin.AluWri.how;
+ HReg dstR = mapNReg(nregMap, ni->Nin.AluWri.dst);
+ HReg srcLR = mapNReg(nregMap, ni->Nin.AluWri.srcL);
+ HWord imm = ni->Nin.AluWri.srcR;
+ if (how == Nalu_AND && imm == 0xFFFFULL) {
+ HI( ARMInstr_NC_Uxth(dstR, srcLR) );
+ break;
+ }
+ goto unhandled;
+ }
+
+ case Nin_AluWrr:
+ goto unhandled;
+
+ case Nin_SetFlagsWri: {
+ HReg reg = mapNReg(nregMap, ni->Nin.SetFlagsWri.srcL);
+ HWord imm = ni->Nin.SetFlagsWri.srcR;
+ if (ni->Nin.SetFlagsWri.how == Nsf_TEST && imm <= 0xFF) {
+ HI( ARMInstr_CmpOrTst(False/*!isCmp*/, reg, ARMRI84_I84(imm,0)) );
+ break;
+ }
+ if (ni->Nin.SetFlagsWri.how == Nsf_CMP && imm <= 0xFF) {
+ HI( ARMInstr_CmpOrTst(True/*isCmp*/, reg, ARMRI84_I84(imm,0)) );
+ break;
+ }
+ goto unhandled;
+ }
+
+ case Nin_MovW: {
+ HReg src = mapNReg(nregMap, ni->Nin.MovW.src);
+ HReg dst = mapNReg(nregMap, ni->Nin.MovW.dst);
+ HI( mk_iMOVds_RR_ARM(dst, src) );
+ break;
+ }
+
+ case Nin_LoadU: {
+ HReg dstR = mapNReg(nregMap, ni->Nin.LoadU.dst);
+ NEA* addr = ni->Nin.LoadU.addr;
+ UChar szB = ni->Nin.LoadU.szB;
+ /* The Nea_IRS case is a kludge. It would be better to
+ generate a single instruction, but that requires a new
+ AMDAMode_IRS, which doesn't currently exist. */
+ if (addr->tag == Nea_IRS && !fitsIn12bits((UInt)addr->Nea.IRS.base)) {
+ UInt imm = (UInt)addr->Nea.IRS.base;
+ HReg indexR = mapNReg(nregMap, addr->Nea.IRS.index);
+ UChar shift = addr->Nea.IRS.shift;
+ if (szB == 4 && shift <= 3) {
+ /* Put the immediate value in r12, since that's
+ reserved as very-short-term scratch. */
+ HReg r12 = hregARM_R12();
+ HI( ARMInstr_Imm32(r12, imm) );
+ HI( ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, dstR,
+ ARMAMode1_RRS(r12, indexR, shift)) );
+ break;
+ }
+ }
+ if (addr->tag == Nea_RRS) {
+ HReg baseR = mapNReg(nregMap, addr->Nea.RRS.base);
+ HReg indexR = mapNReg(nregMap, addr->Nea.RRS.index);
+ UChar shift = addr->Nea.RRS.shift;
+ if (szB == 1 && shift <= 3) {
+ HI( ARMInstr_LdSt8U(ARMcc_AL, True/*isLoad*/, dstR,
+ ARMAMode1_RRS(baseR, indexR, shift)) );
+ break;
+ }
+ }
+ goto unhandled;
+ }
+
+ case Nin_Store:
+ goto unhandled;
+
+ default:
+ goto unhandled;
+ }
+
+ for (UInt i = 0; i < hiBufUsed; i++) {
+ if (verbose) {
+ vex_printf(" ");
+ ppARMInstr(hiBuf[i]);
+ vex_printf("\n");
+ }
+ Bool isProfInc
+ = emit_ARMInstr(ab, hiBuf[i],
+ False/*!mode64*/, VexEndnessLE, NULL/*vda*/);
+ vassert(!isProfInc);
+ }
+
+ return;
+
+ unhandled:
+ ppNInstr(ni);
+ vpanic("emit_ARMNInstr: unhandled NInstr");
+ /*NOTREACHED*/
+
+# undef HI
+}
+
+
+/* Emits ARM code for the complete NCode block |hi| into |ab_hot|
+ and |ab_cold|, possibly adding relocation information to |rb| too.
+ This function can only handle NCode blocks. All other ARM
+ instructions are to be handled by emit_ARMInstr. This function
+ is required to generate <= 1024 bytes of code. Returns True if OK,
+ False if not enough buffer space.
+*/
+Bool emit_ARMNCodeBlock ( /*MOD*/AssemblyBuffer* ab_hot,
+ /*MOD*/AssemblyBuffer* ab_cold,
+ /*MOD*/RelocationBuffer* rb,
+ const ARMInstr* hi,
+ Bool mode64, VexEndness endness_host,
+ Bool verbose )
+{
+ vassert(mode64 == False);
+ vassert(endness_host == VexEndnessLE);
+ vassert(hi->tag == ARMin_NCode);
+ return HInstrNCode__emit ( ab_hot, ab_cold, rb, hi->ARMin.NCode.details,
+ verbose, emit_ARMNInstr );
+}
+
+
/* --------- Helpers for translation chaining. --------- */
/* How big is an event check? See case for ARMin_EvCheck in
Modified: branches/NCODE/priv/host_arm_defs.h
==============================================================================
--- branches/NCODE/priv/host_arm_defs.h (original)
+++ branches/NCODE/priv/host_arm_defs.h Thu Apr 16 22:10:42 2015
@@ -74,11 +74,6 @@
ST_IN HReg hregARM_R8 ( void ) { return mkHReg(False, HRcInt32, 8, 26); }
ST_IN HReg hregARM_R12 ( void ) { return mkHReg(False, HRcInt32, 12, 27); }
ST_IN HReg hregARM_R13 ( void ) { return mkHReg(False, HRcInt32, 13, 28); }
-ST_IN HReg hregARM_R14 ( void ) { return mkHReg(False, HRcInt32, 14, 29); }
-ST_IN HReg hregARM_R15 ( void ) { return mkHReg(False, HRcInt32, 15, 30); }
-ST_IN HReg hregARM_Q13 ( void ) { return mkHReg(False, HRcVec128, 13, 31); }
-ST_IN HReg hregARM_Q14 ( void ) { return mkHReg(False, HRcVec128, 14, 32); }
-ST_IN HReg hregARM_Q15 ( void ) { return mkHReg(False, HRcVec128, 15, 33); }
#undef ST_IN
extern void ppHRegARM ( HReg );
@@ -618,7 +613,12 @@
NOTE: source and destination registers should be different! */
ARMin_Add32,
ARMin_EvCheck, /* Event check */
- ARMin_ProfInc /* 64-bit profile counter increment */
+ ARMin_ProfInc, /* 64-bit profile counter increment */
+ ARMin_NCode, /* NCode template and registers */
+ // The following for NCode only
+ ARMin_NC_Branch, /* Conditional or unconditional branch, imm offset */
+ ARMin_NC_Uxth, /* extend u16 to u32 */
+ ARMin_NC_CallR12 /* Literally "bl r12" */
}
ARMInstrTag;
@@ -953,6 +953,23 @@
installed later, post-translation, by patching it in,
as it is not known at translation time. */
} ProfInc;
+ struct {
+ /* Out of line so as to keep this ARMInstr small. */
+ HInstrNCode* details;
+ } NCode;
+ /* --- for NCode only --- */
+ struct {
+ /* cond. br. w/ 24-bit offset, cond:4 1010 imm:24 */
+ /* imm24 is unspecified and so assumed to be zero. */
+ ARMCondCode cc;
+ } NC_Branch;
+ struct {
+ HReg dst;
+ HReg src;
+ } NC_Uxth;
+ struct {
+ /* Literally "bl r12" */
+ } NC_CallR12;
} ARMin;
}
ARMInstr;
@@ -1018,9 +1035,16 @@
extern ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
ARMAMode1* amFailAddr );
extern ARMInstr* ARMInstr_ProfInc ( void );
+extern ARMInstr* ARMInstr_NCode ( NCodeTemplate* tmpl, HReg* regsR,
+ HReg* regsA, HReg* regsS );
+extern ARMInstr* ARMInstr_NC_Branch ( ARMCondCode cc );
+extern ARMInstr* ARMInstr_NC_Uxth ( HReg dst, HReg src );
+extern ARMInstr* ARMInstr_NC_CallR12 ( void );
extern void ppARMInstr ( const ARMInstr* );
+/* Handy helper, for generating integer reg-reg moves. */
+extern ARMInstr* mk_iMOVds_RR_ARM ( HReg dst, HReg src );
/* Some functions that insulate the register allocator from details
of the underlying instruction set. */
@@ -1033,6 +1057,13 @@
Bool mode64, VexEndness endness_host,
const VexDispatcherAddresses* vda );
+extern Bool emit_ARMNCodeBlock ( /*MOD*/AssemblyBuffer* ab_hot,
+ /*MOD*/AssemblyBuffer* ab_cold,
+ /*MOD*/RelocationBuffer* rb,
+ const ARMInstr* hi,
+ Bool mode64, VexEndness endness_host,
+ Bool verbose );
+
extern void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Bool spRel, Int offset, Bool );
extern void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
Modified: branches/NCODE/priv/host_arm_isel.c
==============================================================================
--- branches/NCODE/priv/host_arm_isel.c (original)
+++ branches/NCODE/priv/host_arm_isel.c Thu Apr 16 22:10:42 2015
@@ -286,6 +286,12 @@
return ARMInstr_Mov(dst, ARMRI84_R(src));
}
+/* And a variant that is exported into the global namespace. */
+ARMInstr* mk_iMOVds_RR_ARM ( HReg dst, HReg src )
+{
+ return mk_iMOVds_RR(dst, src);
+}
+
/* Set the VFP unit's rounding mode to default (round to nearest). */
static void set_VFP_rounding_default ( ISelEnv* env )
{
@@ -6225,6 +6231,48 @@
goto stmt_fail;
}
+ /* --------- NCODE --------- */
+ case Ist_NCode: {
+ UInt i;
+ NCodeTemplate* tmpl = stmt->Ist.NCode.tmpl;
+
+ // For the result values, find the vregs associated with the
+ // result IRTemps, and pin them on the NCode block.
+ HReg* regsR = LibVEX_Alloc_inline( (tmpl->nres+1) * sizeof(HReg) );
+ for (i = 0; i < tmpl->nres; i++) {
+ IRTemp t = stmt->Ist.NCode.ress[i];
+ vassert(t != IRTemp_INVALID);
+ regsR[i] = lookupIRTemp(env, t);
+ }
+ regsR[tmpl->nres] = HReg_INVALID;
+
+ // Compute each arg into a new vreg. It's important to move
+ // them into new vregs because the NCode block may modify its
+ // argument registers, but the Rules Of The Game stipulate that
+ // registers returned from the isel*Expr functions may not be
+ // modified. As usual vreg-vreg move coalescing will remove
+ // those copies in the cases where they are not necessary.
+ HReg* regsA = LibVEX_Alloc_inline( (tmpl->narg+1) * sizeof(HReg) );
+ for (i = 0; i < tmpl->narg; i++) {
+ HReg arg = iselIntExpr_R(env, stmt->Ist.NCode.args[i]);
+ regsA[i] = newVRegI(env);
+ addInstr(env, mk_iMOVds_RR(regsA[i], arg));
+ }
+ regsA[tmpl->narg] = HReg_INVALID;
+
+ // Allocate vregs for the scratch values.
+ HReg* regsS = LibVEX_Alloc_inline( (tmpl->nscr+1) * sizeof(HReg) );
+ for (i = 0; i < tmpl->nscr; i++) {
+ regsS[i] = newVRegI(env);
+ }
+ regsS[tmpl->nscr] = HReg_INVALID;
+
+ // Hand the template and 3 reg sets on through the pipeline.
+ addInstr(env, ARMInstr_NCode(tmpl, regsR, regsA, regsS));
+
+ return;
+ }
+
default: break;
}
stmt_fail:
Modified: branches/NCODE/priv/host_generic_reg_alloc2.c
==============================================================================
--- branches/NCODE/priv/host_generic_reg_alloc2.c (original)
+++ branches/NCODE/priv/host_generic_reg_alloc2.c Thu Apr 16 22:10:42 2015
@@ -40,7 +40,7 @@
#include "host_generic_regs.h"
// ******** WARNING KLUDGE DO NOT COMMIT
-#include "host_amd64_defs.h"
+#include "host_arm_defs.h"
// ******** WARNING KLUDGE DO NOT COMMIT
/* Set to 1 for lots of debugging output. */
@@ -1309,7 +1309,8 @@
reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
if (debug_direct_reload && !reloaded) {
- vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
+ vex_printf("[%3d] ", spilloff);
+ ppHRegGENERIC(cand); vex_printf(" ");
ppInstr(instrs_in->arr[ii], mode64);
}
if (reloaded) {
@@ -1596,9 +1597,9 @@
/* If this instruction is an NCode block, annotate it with the
set of registers that are live after it. */
- { AMD64Instr* ai = instrs_in->arr[ii];
- if (ai->tag == Ain_NCode) {
- AMD64InstrNCode* details = ai->Ain.NCode.details;
+ { ARMInstr* ai = instrs_in->arr[ii];
+ if (ai->tag == ARMin_NCode) {
+ HInstrNCode* details = ai->ARMin.NCode.details;
//vex_printf("RA: after NCode: ");
vassert(details->rrLiveAfter == NULL);
RRegSet* rrLive_after_NCode = RRegSet__new(univ);
Modified: branches/NCODE/priv/host_generic_regs.c
==============================================================================
--- branches/NCODE/priv/host_generic_regs.c (original)
+++ branches/NCODE/priv/host_generic_regs.c Thu Apr 16 22:10:42 2015
@@ -58,7 +58,7 @@
}
/* Generic printing for registers. */
-void ppHReg ( HReg r )
+void ppHRegGENERIC ( HReg r )
{
if (hregIsInvalid(r)) {
vex_printf("HReg_INVALID");
@@ -320,7 +320,7 @@
else if (!rRd && rWr) { str = "Write "; }
/* else "Modify" is correct */
vex_printf(" %s ", str);
- ppHReg(univ->regs[i]);
+ ppHRegGENERIC(univ->regs[i]);
vex_printf("\n");
}
/* and now the virtual registers */
@@ -333,7 +333,7 @@
default: vpanic("ppHRegUsage");
}
vex_printf(" %s ", str);
- ppHReg(tab->vRegs[i]);
+ ppHRegGENERIC(tab->vRegs[i]);
vex_printf("\n");
}
vex_printf("}\n");
@@ -430,15 +430,15 @@
/*--- Indicating register remappings (for reg-alloc) ---*/
/*---------------------------------------------------------*/
-void ppHRegRemap ( HRegRemap* map )
+void ppHRegRemap ( const HRegRemap* map )
{
Int i;
vex_printf("HRegRemap {\n");
for (i = 0; i < map->n_used; i++) {
vex_printf(" ");
- ppHReg(map->orig[i]);
+ ppHRegGENERIC(map->orig[i]);
vex_printf(" --> ");
- ppHReg(map->replacement[i]);
+ ppHRegGENERIC(map->replacement[i]);
vex_printf("\n");
}
vex_printf("}\n");
@@ -463,7 +463,7 @@
}
-HReg lookupHRegRemap ( HRegRemap* map, HReg orig )
+HReg lookupHRegRemap ( const HRegRemap* map, HReg orig )
{
Int i;
if (!hregIsVirtual(orig))
@@ -573,6 +573,91 @@
}
+/* Find the length of a vector of NRegs that is terminated by
+ an NReg_INVALID. */
+UInt nregVecLen ( const NReg* vec )
+{
+ UInt i;
+ for (i = 0; !isNRegINVALID(vec[i]); i++)
+ ;
+ return i;
+}
+
+
+/* Find the length of a vector of NInstr*s that is terminated by
+ NULL. */
+UInt ninstrVecLen ( NInstr** const vec )
+{
+ UInt i;
+ for (i = 0; vec[i]; i++)
+ ;
+ return i;
+}
+
+
+/* Print a HInstrNCode. Caller must supply a register-printing
+ routine and a bit of text identifying the host architecture. */
+void HInstrNCode__show ( const HInstrNCode* details,
+ void (*ppHReg)(HReg), const HChar* hostName )
+{
+ NCodeTemplate* tmpl = details->tmpl;
+ vex_printf("NCode-%s:%s [", hostName, tmpl->name);
+ UInt j;
+ for (j = 0; j < tmpl->nres; j++) {
+ ppHReg(details->regsR[j]);
+ if (j != tmpl->nres-1) vex_printf(" ");
+ }
+ vex_printf("] <= [");
+ for (j = 0; j < tmpl->narg; j++) {
+ ppHReg(details->regsA[j]);
+ if (j != tmpl->narg-1) vex_printf(" ");
+ }
+ vex_printf("] scratch [");
+ for (j = 0; j < tmpl->nscr; j++) {
+ ppHReg(details->regsS[j]);
+ if (j != tmpl->nscr-1) vex_printf(" ");
+ }
+ vex_printf("]");
+}
+
+
+/* Update |u| with the register usages of |details|. */
+void HInstrNCode__getRegUsage ( /*MOD*/HRegUsage* u,
+ const HInstrNCode* details )
+{
+ NCodeTemplate* tmpl = details->tmpl;
+ // It writes the result and scratch registers.
+ UInt j;
+ for (j = 0; j < tmpl->nres; j++)
+ addHRegUse(u, HRmWrite, details->regsR[j]);
+ for (j = 0; j < tmpl->nscr; j++)
+ addHRegUse(u, HRmWrite, details->regsS[j]);
+ // It both reads and writes the arg regs. We have to say
+ // they are written in order to force them to be allocated
+ // different registers from the arg and scratch registers,
+ // since we have no way to ensure that the NCode block
+ // doesn't write its scratch and result registers and later
+ // on read the argument registers.
+ for (j = 0; j < tmpl->narg; j++)
+ addHRegUse(u, HRmModify, details->regsA[j]);
+}
+
+
+/* Apply |map| to the registers in |details|. */
+void HInstrNCode__mapRegs ( /*MOD*/HInstrNCode* details,
+ const HRegRemap* map )
+{
+ NCodeTemplate* tmpl = details->tmpl;
+ UInt j;
+ for (j = 0; j < tmpl->nres; j++)
+ details->regsR[j] = lookupHRegRemap(map, details->regsR[j]);
+ for (j = 0; j < tmpl->nscr; j++)
+ details->regsS[j] = lookupHRegRemap(map, details->regsS[j]);
+ for (j = 0; j < tmpl->narg; j++)
+ details->regsA[j] = lookupHRegRemap(map, details->regsA[j]);
+}
+
+
/* Find the real (hard) register for |r| by looking up in |map|. */
HReg mapNReg ( const NRegMap* map, NReg r )
{
@@ -664,15 +749,15 @@
if (0) {
vex_printf(" # set1: ");
- RRegSet__pp(set_1, ppHReg); vex_printf("\n");
+ RRegSet__pp(set_1, ppHRegGENERIC); vex_printf("\n");
vex_printf(" # set2: ");
- RRegSet__pp(&set_2, ppHReg); vex_printf("\n");
+ RRegSet__pp(&set_2, ppHRegGENERIC); vex_printf("\n");
vex_printf(" # set3: ");
- RRegSet__pp(set_3, ppHReg); vex_printf("\n");
+ RRegSet__pp(set_3, ppHRegGENERIC); vex_printf("\n");
vex_printf(" # set4: ");
- RRegSet__pp(&set_4, ppHReg); vex_printf("\n");
+ RRegSet__pp(&set_4, ppHRegGENERIC); vex_printf("\n");
vex_printf(" # pres: ");
- RRegSet__pp(result, ppHReg); vex_printf("\n");
+ RRegSet__pp(result, ppHRegGENERIC); vex_printf("\n");
}
/* Remove any non allocatable registers (see big comment above) */
@@ -680,6 +765,126 @@
}
+/* Emits host code for the complete NCode block |details| into
+ |ab_hot| and |ab_cold|, possibly adding relocation information to
+ |rb| too. The caller must supply a host-dependent function
+ |emit_OneNInstr| which generates host code for a single NInstr.
+ This function is required to generate <= 1024 bytes of code.
+ Returns True if OK, False if not enough buffer space.
+*/
+Bool HInstrNCode__emit ( /*MOD*/AssemblyBuffer* ab_hot,
+ /*MOD*/AssemblyBuffer* ab_cold,
+ /*MOD*/RelocationBuffer* rb,
+ const HInstrNCode* details,
+ Bool verbose,
+ void (*emit_OneNInstr) (
+ /*MOD*/AssemblyBuffer* ab,
+ /*MOD*/RelocationBuffer* rb,
+ const NInstr* ni,
+ const NRegMap* nregMap,
+ const RRegSet* hregsLiveAfter,
+ /* the next 2 are for debug printing only */
+ Bool verbose, NLabel niLabel
+ )
+ )
+ {
+ const NCodeTemplate* tmpl = details->tmpl;
+ const RRegSet* rregsLiveAfter = details->rrLiveAfter;
+
+ NRegMap nregMap;
+ nregMap.regsR = details->regsR;
+ nregMap.regsA = details->regsA;
+ nregMap.regsS = details->regsS;
+ nregMap.nRegsR = tmpl->nres;
+ nregMap.nRegsA = tmpl->narg;
+ nregMap.nRegsS = tmpl->nscr;
+
+ vassert(hregVecLen(nregMap.regsR) == nregMap.nRegsR);
+ vassert(hregVecL...
[truncated message content] |