|
From: Ivo R. <ir...@so...> - 2017-08-15 09:55:15
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=ff2a4925d28bc28c606253381a5bfed9fb73681b commit ff2a4925d28bc28c606253381a5bfed9fb73681b Author: Ivo Raisr <iv...@iv...> Date: Tue Aug 8 20:52:10 2017 +0200 Introduce VEX register allocator v3. This is VEX register allocator implementation as found in patch 013 with the following modifications: - VEX register allocator v2 has been removed altogether - command line option --vex-regalloc-version is not recognized - declaration of doRegisterAllocation() has been adjusted to take HInstrSB - file VEX/priv/host_generic_reg_alloc3.c is unmodified Diff: --- Makefile.vex.am | 2 +- NEWS | 1 + VEX/priv/host_amd64_defs.c | 43 +- VEX/priv/host_amd64_defs.h | 29 +- VEX/priv/host_arm64_defs.c | 52 +- VEX/priv/host_arm64_defs.h | 3 +- VEX/priv/host_arm_defs.c | 44 +- VEX/priv/host_arm_defs.h | 3 +- VEX/priv/host_generic_reg_alloc2.c | 1613 ------------------------------------ VEX/priv/host_generic_reg_alloc3.c | 1143 +++++++++++++++++++++++++ VEX/priv/host_generic_regs.c | 49 +- VEX/priv/host_generic_regs.h | 116 ++- VEX/priv/host_mips_defs.c | 39 +- VEX/priv/host_mips_defs.h | 3 +- VEX/priv/host_ppc_defs.c | 37 +- VEX/priv/host_ppc_defs.h | 3 +- VEX/priv/host_s390_defs.c | 25 +- VEX/priv/host_s390_defs.h | 3 +- VEX/priv/host_x86_defs.c | 36 +- VEX/priv/host_x86_defs.h | 4 +- VEX/priv/main_main.c | 26 +- VEX/priv/main_util.c | 37 +- 22 files changed, 1529 insertions(+), 1782 deletions(-) diff --git a/Makefile.vex.am b/Makefile.vex.am index c47be97..a1a1918 100644 --- a/Makefile.vex.am +++ b/Makefile.vex.am @@ -130,7 +130,7 @@ LIBVEX_SOURCES_COMMON = \ priv/host_generic_simd128.c \ priv/host_generic_simd256.c \ priv/host_generic_maddf.c \ - priv/host_generic_reg_alloc2.c \ + priv/host_generic_reg_alloc3.c \ priv/host_x86_defs.c \ priv/host_x86_isel.c # TODO-JIT: other architectures disabled for now diff --git a/NEWS b/NEWS index 516c4cc..446a7fa 100644 --- a/NEWS +++ b/NEWS @@ -40,6 +40,7 @@ where XXXXXX is the bug number as listed below. 381272 ppc64 doesn't compile test_isa_2_06_partx.c without VSX support 381289 epoll_pwait can have a NULL sigmask 381274 powerpc too chatty even with --sigill-diagnostics=no +381553 VEX register allocator v3 381769 Use ucontext_t instead of struct ucontext 381805 arm32 needs ld.so index hardwire for new glibc security fixes 382256 gz compiler flag test doesn't work for gold diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c index 5e0600a..9b63017 100644 --- a/VEX/priv/host_amd64_defs.c +++ b/VEX/priv/host_amd64_defs.c @@ -63,6 +63,7 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) /* Add the registers. The initial segment of this array must be those available for allocation by reg-alloc, and those that follow are not available for allocation. */ + ru->allocable_start[HRcInt64] = ru->size; ru->regs[ru->size++] = hregAMD64_RSI(); ru->regs[ru->size++] = hregAMD64_RDI(); ru->regs[ru->size++] = hregAMD64_R8(); @@ -72,6 +73,10 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) ru->regs[ru->size++] = hregAMD64_R14(); ru->regs[ru->size++] = hregAMD64_R15(); ru->regs[ru->size++] = hregAMD64_RBX(); + ru->regs[ru->size++] = hregAMD64_R10(); + ru->allocable_end[HRcInt64] = ru->size - 1; + + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregAMD64_XMM3(); ru->regs[ru->size++] = hregAMD64_XMM4(); ru->regs[ru->size++] = hregAMD64_XMM5(); @@ -82,8 +87,9 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) ru->regs[ru->size++] = hregAMD64_XMM10(); ru->regs[ru->size++] = hregAMD64_XMM11(); ru->regs[ru->size++] = hregAMD64_XMM12(); - ru->regs[ru->size++] = hregAMD64_R10(); + ru->allocable_end[HRcVec128] = ru->size - 1; ru->allocable = ru->size; + /* And other regs, not available to the allocator. */ ru->regs[ru->size++] = hregAMD64_RAX(); ru->regs[ru->size++] = hregAMD64_RCX(); @@ -101,7 +107,7 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) } -void ppHRegAMD64 ( HReg reg ) +UInt ppHRegAMD64 ( HReg reg ) { Int r; static const HChar* ireg64_names[16] @@ -109,27 +115,24 @@ void ppHRegAMD64 ( HReg reg ) "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("%s", ireg64_names[r]); - return; + return vex_printf("%s", ireg64_names[r]); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("%%xmm%d", r); - return; + return vex_printf("%%xmm%d", r); default: vpanic("ppHRegAMD64"); } } -static void ppHRegAMD64_lo32 ( HReg reg ) +static UInt ppHRegAMD64_lo32 ( HReg reg ) { Int r; static const HChar* ireg32_names[16] @@ -137,17 +140,16 @@ static void ppHRegAMD64_lo32 ( HReg reg ) "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" }; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - vex_printf("d"); - return; + UInt written = ppHReg(reg); + written += vex_printf("d"); + return written; } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("%s", ireg32_names[r]); - return; + return vex_printf("%s", ireg32_names[r]); default: vpanic("ppHRegAMD64_lo32: invalid regclass"); } @@ -1995,6 +1997,19 @@ void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +AMD64Instr* genMove_AMD64(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt64: + return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(from), to); + case HRcVec128: + return AMD64Instr_SseReRg(Asse_MOV, from, to); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_AMD64: unimplemented regclass"); + } +} + AMD64Instr* directReload_AMD64( AMD64Instr* i, HReg vreg, Short spill_off ) { vassert(spill_off >= 0 && spill_off < 10000); /* let's say */ diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h index e41fe34..57ef169 100644 --- a/VEX/priv/host_amd64_defs.h +++ b/VEX/priv/host_amd64_defs.h @@ -56,19 +56,18 @@ ST_IN HReg hregAMD64_R13 ( void ) { return mkHReg(False, HRcInt64, 13, 5); } ST_IN HReg hregAMD64_R14 ( void ) { return mkHReg(False, HRcInt64, 14, 6); } ST_IN HReg hregAMD64_R15 ( void ) { return mkHReg(False, HRcInt64, 15, 7); } ST_IN HReg hregAMD64_RBX ( void ) { return mkHReg(False, HRcInt64, 3, 8); } - -ST_IN HReg hregAMD64_XMM3 ( void ) { return mkHReg(False, HRcVec128, 3, 9); } -ST_IN HReg hregAMD64_XMM4 ( void ) { return mkHReg(False, HRcVec128, 4, 10); } -ST_IN HReg hregAMD64_XMM5 ( void ) { return mkHReg(False, HRcVec128, 5, 11); } -ST_IN HReg hregAMD64_XMM6 ( void ) { return mkHReg(False, HRcVec128, 6, 12); } -ST_IN HReg hregAMD64_XMM7 ( void ) { return mkHReg(False, HRcVec128, 7, 13); } -ST_IN HReg hregAMD64_XMM8 ( void ) { return mkHReg(False, HRcVec128, 8, 14); } -ST_IN HReg hregAMD64_XMM9 ( void ) { return mkHReg(False, HRcVec128, 9, 15); } -ST_IN HReg hregAMD64_XMM10 ( void ) { return mkHReg(False, HRcVec128, 10, 16); } -ST_IN HReg hregAMD64_XMM11 ( void ) { return mkHReg(False, HRcVec128, 11, 17); } -ST_IN HReg hregAMD64_XMM12 ( void ) { return mkHReg(False, HRcVec128, 12, 18); } - -ST_IN HReg hregAMD64_R10 ( void ) { return mkHReg(False, HRcInt64, 10, 19); } +ST_IN HReg hregAMD64_R10 ( void ) { return mkHReg(False, HRcInt64, 10, 9); } + +ST_IN HReg hregAMD64_XMM3 ( void ) { return mkHReg(False, HRcVec128, 3, 10); } +ST_IN HReg hregAMD64_XMM4 ( void ) { return mkHReg(False, HRcVec128, 4, 11); } +ST_IN HReg hregAMD64_XMM5 ( void ) { return mkHReg(False, HRcVec128, 5, 12); } +ST_IN HReg hregAMD64_XMM6 ( void ) { return mkHReg(False, HRcVec128, 6, 13); } +ST_IN HReg hregAMD64_XMM7 ( void ) { return mkHReg(False, HRcVec128, 7, 14); } +ST_IN HReg hregAMD64_XMM8 ( void ) { return mkHReg(False, HRcVec128, 8, 15); } +ST_IN HReg hregAMD64_XMM9 ( void ) { return mkHReg(False, HRcVec128, 9, 16); } +ST_IN HReg hregAMD64_XMM10 ( void ) { return mkHReg(False, HRcVec128, 10, 17); } +ST_IN HReg hregAMD64_XMM11 ( void ) { return mkHReg(False, HRcVec128, 11, 18); } +ST_IN HReg hregAMD64_XMM12 ( void ) { return mkHReg(False, HRcVec128, 12, 19); } ST_IN HReg hregAMD64_RAX ( void ) { return mkHReg(False, HRcInt64, 0, 20); } ST_IN HReg hregAMD64_RCX ( void ) { return mkHReg(False, HRcInt64, 1, 21); } @@ -81,7 +80,7 @@ ST_IN HReg hregAMD64_XMM0 ( void ) { return mkHReg(False, HRcVec128, 0, 26); } ST_IN HReg hregAMD64_XMM1 ( void ) { return mkHReg(False, HRcVec128, 1, 27); } #undef ST_IN -extern void ppHRegAMD64 ( HReg ); +extern UInt ppHRegAMD64 ( HReg ); /* --------- Condition codes, AMD encoding. --------- */ @@ -801,7 +800,7 @@ extern void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); - +extern AMD64Instr* genMove_AMD64(HReg from, HReg to, Bool); extern AMD64Instr* directReload_AMD64 ( AMD64Instr* i, HReg vreg, Short spill_off ); diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 380a24d..bc700c9 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -64,7 +64,7 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) /* Add the registers. The initial segment of this array must be those available for allocation by reg-alloc, and those that follow are not available for allocation. */ - + ru->allocable_start[HRcInt64] = ru->size; ru->regs[ru->size++] = hregARM64_X22(); ru->regs[ru->size++] = hregARM64_X23(); ru->regs[ru->size++] = hregARM64_X24(); @@ -81,6 +81,7 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) ru->regs[ru->size++] = hregARM64_X5(); ru->regs[ru->size++] = hregARM64_X6(); ru->regs[ru->size++] = hregARM64_X7(); + ru->allocable_end[HRcInt64] = ru->size - 1; // X8 is used as a ProfInc temporary, not available to regalloc. // X9 is a chaining/spill temporary, not available to regalloc. @@ -94,19 +95,23 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) // X21 is the guest state pointer, not available to regalloc. // vector regs. Unfortunately not callee-saved. + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregARM64_Q16(); ru->regs[ru->size++] = hregARM64_Q17(); ru->regs[ru->size++] = hregARM64_Q18(); ru->regs[ru->size++] = hregARM64_Q19(); ru->regs[ru->size++] = hregARM64_Q20(); + ru->allocable_end[HRcVec128] = ru->size - 1; // F64 regs, all of which are callee-saved + ru->allocable_start[HRcFlt64] = ru->size; ru->regs[ru->size++] = hregARM64_D8(); ru->regs[ru->size++] = hregARM64_D9(); ru->regs[ru->size++] = hregARM64_D10(); ru->regs[ru->size++] = hregARM64_D11(); ru->regs[ru->size++] = hregARM64_D12(); ru->regs[ru->size++] = hregARM64_D13(); + ru->allocable_end[HRcFlt64] = ru->size - 1; ru->allocable = ru->size; /* And other regs, not available to the allocator. */ @@ -142,43 +147,41 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) } -void ppHRegARM64 ( HReg reg ) { +UInt ppHRegARM64 ( HReg reg ) { Int r; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); vassert(r >= 0 && r < 31); - vex_printf("x%d", r); - return; + return vex_printf("x%d", r); case HRcFlt64: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("d%d", r); - return; + return vex_printf("d%d", r); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("q%d", r); - return; + return vex_printf("q%d", r); default: vpanic("ppHRegARM64"); } } -static void ppHRegARM64asSreg ( HReg reg ) { - ppHRegARM64(reg); - vex_printf("(S-reg)"); +static UInt ppHRegARM64asSreg ( HReg reg ) { + UInt written = ppHRegARM64(reg); + written += vex_printf("(S-reg)"); + return written; } -static void ppHRegARM64asHreg ( HReg reg ) { - ppHRegARM64(reg); - vex_printf("(H-reg)"); +static UInt ppHRegARM64asHreg ( HReg reg ) { + UInt written = ppHRegARM64(reg); + written += vex_printf("(H-reg)"); + return written; } @@ -1745,7 +1748,7 @@ void ppARM64Instr ( const ARM64Instr* i ) { ppHRegARM64asSreg(i->ARM64in.VCmpS.argR); return; case ARM64in_VFCSel: { - void (*ppHRegARM64fp)(HReg) + UInt (*ppHRegARM64fp)(HReg) = (i->ARM64in.VFCSel.isD ? ppHRegARM64 : ppHRegARM64asSreg); vex_printf("fcsel "); ppHRegARM64fp(i->ARM64in.VFCSel.dst); @@ -2616,6 +2619,21 @@ void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +ARM64Instr* genMove_ARM64(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt64: + return ARM64Instr_MovI(to, from); + case HRcFlt64: + return ARM64Instr_VMov(8, to, from); + case HRcVec128: + return ARM64Instr_VMov(16, to, from); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_ARM64: unimplemented regclass"); + } +} + /* Emit an instruction into buf and return the number of bytes used. Note that buf is not the insn's final place, and therefore it is diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index bbc211d..840e0aa 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -74,7 +74,7 @@ ST_IN HReg hregARM64_X9 ( void ) { return mkHReg(False, HRcInt64, 9, 27); } ST_IN HReg hregARM64_X21 ( void ) { return mkHReg(False, HRcInt64, 21, 28); } #undef ST_IN -extern void ppHRegARM64 ( HReg ); +extern UInt ppHRegARM64 ( HReg ); /* Number of registers used arg passing in function calls */ #define ARM64_N_ARGREGS 8 /* x0 .. x7 */ @@ -1007,6 +1007,7 @@ extern void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); +extern ARM64Instr* genMove_ARM64(HReg from, HReg to, Bool); extern const RRegUniverse* getRRegUniverse_ARM64 ( void ); diff --git a/VEX/priv/host_arm_defs.c b/VEX/priv/host_arm_defs.c index a986f37..e0e6bb2 100644 --- a/VEX/priv/host_arm_defs.c +++ b/VEX/priv/host_arm_defs.c @@ -68,6 +68,7 @@ const RRegUniverse* getRRegUniverse_ARM ( void ) /* Callee saves ones are listed first, since we prefer them if they're available. */ + ru->allocable_start[HRcInt32] = ru->size; ru->regs[ru->size++] = hregARM_R4(); ru->regs[ru->size++] = hregARM_R5(); ru->regs[ru->size++] = hregARM_R6(); @@ -80,24 +81,34 @@ const RRegUniverse* getRRegUniverse_ARM ( void ) ru->regs[ru->size++] = hregARM_R2(); ru->regs[ru->size++] = hregARM_R3(); ru->regs[ru->size++] = hregARM_R9(); + ru->allocable_end[HRcInt32] = ru->size - 1; + /* FP registers. Note: these are all callee-save. Yay! Hence we don't need to mention them as trashed in getHRegUsage for ARMInstr_Call. */ + ru->allocable_start[HRcFlt64] = ru->size; ru->regs[ru->size++] = hregARM_D8(); ru->regs[ru->size++] = hregARM_D9(); ru->regs[ru->size++] = hregARM_D10(); ru->regs[ru->size++] = hregARM_D11(); ru->regs[ru->size++] = hregARM_D12(); + ru->allocable_end[HRcFlt64] = ru->size - 1; + + ru->allocable_start[HRcFlt32] = ru->size; ru->regs[ru->size++] = hregARM_S26(); ru->regs[ru->size++] = hregARM_S27(); ru->regs[ru->size++] = hregARM_S28(); ru->regs[ru->size++] = hregARM_S29(); ru->regs[ru->size++] = hregARM_S30(); + ru->allocable_end[HRcFlt32] = ru->size - 1; + + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregARM_Q8(); ru->regs[ru->size++] = hregARM_Q9(); ru->regs[ru->size++] = hregARM_Q10(); ru->regs[ru->size++] = hregARM_Q11(); ru->regs[ru->size++] = hregARM_Q12(); + ru->allocable_end[HRcVec128] = ru->size - 1; ru->allocable = ru->size; /* And other regs, not available to the allocator. */ @@ -140,35 +151,30 @@ const RRegUniverse* getRRegUniverse_ARM ( void ) } -void ppHRegARM ( HReg reg ) { +UInt ppHRegARM ( HReg reg ) { Int r; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt32: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("r%d", r); - return; + return vex_printf("r%d", r); case HRcFlt64: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("d%d", r); - return; + return vex_printf("d%d", r); case HRcFlt32: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("s%d", r); - return; + return vex_printf("s%d", r); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("q%d", r); - return; + return vex_printf("q%d", r); default: vpanic("ppHRegARM"); } @@ -2772,6 +2778,22 @@ void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +ARMInstr* genMove_ARM(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt32: + return ARMInstr_Mov(to, ARMRI84_R(from)); + case HRcFlt32: + return ARMInstr_VUnaryS(ARMvfpu_COPY, to, from); + case HRcFlt64: + return ARMInstr_VUnaryD(ARMvfpu_COPY, to, from); + case HRcVec128: + return ARMInstr_NUnary(ARMneon_COPY, to, from, 4, False); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_ARM: unimplemented regclass"); + } +} /* Emit an instruction into buf and return the number of bytes used. Note that buf is not the insn's final place, and therefore it is diff --git a/VEX/priv/host_arm_defs.h b/VEX/priv/host_arm_defs.h index 19c4299..ec6358e 100644 --- a/VEX/priv/host_arm_defs.h +++ b/VEX/priv/host_arm_defs.h @@ -81,7 +81,7 @@ ST_IN HReg hregARM_Q14 ( void ) { return mkHReg(False, HRcVec128, 14, 32); } ST_IN HReg hregARM_Q15 ( void ) { return mkHReg(False, HRcVec128, 15, 33); } #undef ST_IN -extern void ppHRegARM ( HReg ); +extern UInt ppHRegARM ( HReg ); /* Number of registers used arg passing in function calls */ #define ARM_N_ARGREGS 4 /* r0, r1, r2, r3 */ @@ -1070,6 +1070,7 @@ extern void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); +extern ARMInstr* genMove_ARM(HReg from, HReg to, Bool); extern const RRegUniverse* getRRegUniverse_ARM ( void ); diff --git a/VEX/priv/host_generic_reg_alloc2.c b/VEX/priv/host_generic_reg_alloc2.c deleted file mode 100644 index 2294a9b..0000000 --- a/VEX/priv/host_generic_reg_alloc2.c +++ /dev/null @@ -1,1613 +0,0 @@ - -/*---------------------------------------------------------------*/ -/*--- begin host_reg_alloc2.c ---*/ -/*---------------------------------------------------------------*/ - -/* - This file is part of Valgrind, a dynamic binary instrumentation - framework. - - Copyright (C) 2004-2017 OpenWorks LLP - in...@op... - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - - The GNU General Public License is contained in the file COPYING. - - Neither the names of the U.S. Department of Energy nor the - University of California nor the names of its contributors may be - used to endorse or promote products derived from this software - without prior written permission. -*/ - -#include "libvex_basictypes.h" -#include "libvex.h" - -#include "main_util.h" -#include "host_generic_regs.h" - -/* Set to 1 for lots of debugging output. */ -#define DEBUG_REGALLOC 0 - - -/* TODO 27 Oct 04: - - Better consistency checking from what isMove tells us. - - We can possibly do V-V coalescing even when the src is spilled, - providing we can arrange for the dst to have the same spill slot. - - Note that state[].hreg is the same as the available real regs. - - Generally rationalise data structures. */ - - -/* Records information on virtual register live ranges. Computed once - and remains unchanged after that. */ -typedef - struct { - /* Becomes live for the first time after this insn ... */ - Short live_after; - /* Becomes dead for the last time before this insn ... */ - Short dead_before; - /* The "home" spill slot, if needed. Never changes. */ - Short spill_offset; - Short spill_size; - /* What kind of register this is. */ - HRegClass reg_class; - } - VRegLR; - - -/* Records information on real-register live ranges. Computed once - and remains unchanged after that. */ -typedef - struct { - HReg rreg; - /* Becomes live after this insn ... */ - Short live_after; - /* Becomes dead before this insn ... */ - Short dead_before; - } - RRegLR; - - -/* An array of the following structs (rreg_state) comprises the - running state of the allocator. It indicates what the current - disposition of each allocatable real register is. The array gets - updated as the allocator processes instructions. The identity of - the register is not recorded here, because the index of this - structure in doRegisterAllocation()'s |rreg_state| is the index - number of the register, and the register itself can be extracted - from the RRegUniverse supplied to doRegisterAllocation(). */ -typedef - struct { - /* ------ FIELDS WHICH DO NOT CHANGE ------ */ - /* Is this involved in any HLRs? (only an optimisation hint) */ - Bool has_hlrs; - /* ------ FIELDS WHICH DO CHANGE ------ */ - /* 6 May 07: rearranged fields below so the whole struct fits - into 16 bytes on both x86 and amd64. */ - /* Used when .disp == Bound and we are looking for vregs to - spill. */ - Bool is_spill_cand; - /* Optimisation: used when .disp == Bound. Indicates when the - rreg has the same value as the spill slot for the associated - vreg. Is safely left at False, and becomes True after a - spill store or reload for this rreg. */ - Bool eq_spill_slot; - /* What's it's current disposition? */ - enum { Free, /* available for use */ - Unavail, /* in a real-reg live range */ - Bound /* in use (holding value of some vreg) */ - } - disp; - /* If .disp == Bound, what vreg is it bound to? */ - HReg vreg; - } - RRegState; - - -/* The allocator also maintains a redundant array of indexes - (vreg_state) from vreg numbers back to entries in rreg_state. It - is redundant because iff vreg_state[i] == j then - hregNumber(rreg_state[j].vreg) == i -- that is, the two entries - point at each other. The purpose of this is to speed up activities - which involve looking for a particular vreg: there is no need to - scan the rreg_state looking for it, just index directly into - vreg_state. The FAQ "does this vreg already have an associated - rreg" is the main beneficiary. - - To indicate, in vreg_state[i], that a given vreg is not currently - associated with any rreg, that entry can be set to INVALID_RREG_NO. - - Because the vreg_state entries are signed Shorts, the max number - of vregs that can be handed by regalloc is 32767. -*/ - -#define INVALID_RREG_NO ((Short)(-1)) - -#define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs) -#define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs) - - -/* Search forward from some given point in the incoming instruction - sequence. Point is to select a virtual register to spill, by - finding the vreg which is mentioned as far ahead as possible, in - the hope that this will minimise the number of consequent reloads. - - Only do the search for vregs which are Bound in the running state, - and for which the .is_spill_cand field is set. This allows the - caller to arbitrarily restrict the set of spill candidates to be - considered. - - To do this we don't actually need to see the incoming instruction - stream. Rather, what we need us the HRegUsage records for the - incoming instruction stream. Hence that is passed in. - - Returns an index into the state array indicating the (v,r) pair to - spill, or -1 if none was found. */ -static -Int findMostDistantlyMentionedVReg ( - HRegUsage* reg_usages_in, - Int search_from_instr, - Int num_instrs, - RRegState* state, - Int n_state -) -{ - Int k, m; - Int furthest_k = -1; - Int furthest = -1; - vassert(search_from_instr >= 0); - for (k = 0; k < n_state; k++) { - if (!state[k].is_spill_cand) - continue; - vassert(state[k].disp == Bound); - for (m = search_from_instr; m < num_instrs; m++) { - if (HRegUsage__contains(®_usages_in[m], state[k].vreg)) - break; - } - if (m > furthest) { - furthest = m; - furthest_k = k; - } - } - return furthest_k; -} - - -/* Check that this vreg has been assigned a sane spill offset. */ -inline -static void sanity_check_spill_offset ( VRegLR* vreg ) -{ - switch (vreg->reg_class) { - case HRcVec128: case HRcFlt64: - vassert(0 == ((UShort)vreg->spill_offset % 16)); break; - default: - vassert(0 == ((UShort)vreg->spill_offset % 8)); break; - } -} - - -/* Double the size of the real-reg live-range array, if needed. */ -__attribute__((noinline)) -static void ensureRRLRspace_SLOW ( RRegLR** info, Int* size, Int used ) -{ - Int k; - RRegLR* arr2; - if (0) - vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size); - vassert(used == *size); - arr2 = LibVEX_Alloc_inline(2 * *size * sizeof(RRegLR)); - for (k = 0; k < *size; k++) - arr2[k] = (*info)[k]; - *size *= 2; - *info = arr2; -} -inline -static void ensureRRLRspace ( RRegLR** info, Int* size, Int used ) -{ - if (LIKELY(used < *size)) return; - ensureRRLRspace_SLOW(info, size, used); -} - - -/* Sort an array of RRegLR entries by either the .live_after or - .dead_before fields. This is performance-critical. */ -static void sortRRLRarray ( RRegLR* arr, - Int size, Bool by_live_after ) -{ - Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, - 9841, 29524, 88573, 265720, - 797161, 2391484 }; - Int lo = 0; - Int hi = size-1; - Int i, j, h, bigN, hp; - RRegLR v; - - vassert(size >= 0); - if (size == 0) - return; - - bigN = hi - lo + 1; if (bigN < 2) return; - hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--; - - if (by_live_after) { - - for ( ; hp >= 0; hp--) { - h = incs[hp]; - for (i = lo + h; i <= hi; i++) { - v = arr[i]; - j = i; - while (arr[j-h].live_after > v.live_after) { - arr[j] = arr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - arr[j] = v; - } - } - - } else { - - for ( ; hp >= 0; hp--) { - h = incs[hp]; - for (i = lo + h; i <= hi; i++) { - v = arr[i]; - j = i; - while (arr[j-h].dead_before > v.dead_before) { - arr[j] = arr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - arr[j] = v; - } - } - - } -} - - -/* Compute the index of the highest and lowest 1 in a ULong, - respectively. Results are undefined if the argument is zero. - Don't pass it zero :) */ -static inline UInt ULong__maxIndex ( ULong w64 ) { - return 63 - __builtin_clzll(w64); -} - -static inline UInt ULong__minIndex ( ULong w64 ) { - return __builtin_ctzll(w64); -} - - -/* Vectorised memset, copied from Valgrind's m_libcbase.c. */ -static void* local_memset ( void *destV, Int c, SizeT sz ) -{ -# define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3))) - - UInt c4; - UChar* d = destV; - UChar uc = c; - - while ((!IS_4_ALIGNED(d)) && sz >= 1) { - d[0] = uc; - d++; - sz--; - } - if (sz == 0) - return destV; - c4 = uc; - c4 |= (c4 << 8); - c4 |= (c4 << 16); - while (sz >= 16) { - ((UInt*)d)[0] = c4; - ((UInt*)d)[1] = c4; - ((UInt*)d)[2] = c4; - ((UInt*)d)[3] = c4; - d += 16; - sz -= 16; - } - while (sz >= 4) { - ((UInt*)d)[0] = c4; - d += 4; - sz -= 4; - } - while (sz >= 1) { - d[0] = c; - d++; - sz--; - } - return destV; - -# undef IS_4_ALIGNED -} - - -/* A target-independent register allocator. Requires various - functions which it uses to deal abstractly with instructions and - registers, since it cannot have any target-specific knowledge. - - Returns a new list of instructions, which, as a result of the - behaviour of mapRegs, will be in-place modifications of the - original instructions. - - Requires that the incoming code has been generated using - vreg numbers 0, 1 .. n_vregs-1. Appearance of a vreg outside - that range is a checked run-time error. - - Takes an expandable array of pointers to unallocated insns. - Returns an expandable array of pointers to allocated insns. -*/ -HInstrArray* doRegisterAllocation ( - - /* Incoming virtual-registerised code. */ - HInstrArray* instrs_in, - - /* The real-register universe to use. This contains facts about - real registers, one of which is the set of registers available - for allocation. */ - const RRegUniverse* univ, - - /* Return True iff the given insn is a reg-reg move, in which - case also return the src and dst regs. */ - Bool (*isMove) ( const HInstr*, HReg*, HReg* ), - - /* Get info about register usage in this insn. */ - void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ), - - /* Apply a reg-reg mapping to an insn. */ - void (*mapRegs) ( HRegRemap*, HInstr*, Bool ), - - /* Return one, or, if we're unlucky, two insn(s) to spill/restore a - real reg to a spill slot byte offset. The two leading HInstr** - args are out parameters, through which the generated insns are - returned. Also (optionally) a 'directReload' function, which - attempts to replace a given instruction by one which reads - directly from a specified spill slot. May be NULL, in which - case the optimisation is not attempted. */ - void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ), - void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ), - HInstr* (*directReload) ( HInstr*, HReg, Short ), - Int guest_sizeB, - - /* For debug printing only. */ - void (*ppInstr) ( const HInstr*, Bool ), - void (*ppReg) ( HReg ), - - /* 32/64bit mode */ - Bool mode64 -) -{ -# define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8) - - const Bool eq_spill_opt = True; - - /* Info on vregs and rregs. Computed once and remains - unchanged. */ - Int n_vregs; - VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */ - - /* We keep two copies of the real-reg live range info, one sorted - by .live_after and the other by .dead_before. First the - unsorted info is created in the _la variant is copied into the - _db variant. Once that's done both of them are sorted. - We also need two integer cursors which record the next - location in the two arrays to consider. */ - RRegLR* rreg_lrs_la; - RRegLR* rreg_lrs_db; - Int rreg_lrs_size; - Int rreg_lrs_used; - Int rreg_lrs_la_next; - Int rreg_lrs_db_next; - - /* Info on register usage in the incoming instruction array. - Computed once and remains unchanged, more or less; updated - sometimes by the direct-reload optimisation. */ - HRegUsage* reg_usage_arr; /* [0 .. instrs_in->arr_used-1] */ - - /* Used when constructing vreg_lrs (for allocating stack - slots). */ - Short ss_busy_until_before[N_SPILL64S]; - - /* Used when constructing rreg_lrs. */ - Int* rreg_live_after; - Int* rreg_dead_before; - - /* Running state of the core allocation algorithm. */ - RRegState* rreg_state; /* [0 .. n_rregs-1] */ - Int n_rregs; - - /* .. and the redundant backward map */ - /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO. - This implies n_rregs must be <= 32768. */ - Short* vreg_state; /* [0 .. n_vregs-1] */ - - /* The vreg -> rreg map constructed and then applied to each - instr. */ - HRegRemap remap; - - /* The output array of instructions. */ - HInstrArray* instrs_out; - - /* Sanity checks are expensive. They are only done periodically, - not at each insn processed. */ - Bool do_sanity_check; - - vassert(0 == (guest_sizeB % LibVEX_GUEST_STATE_ALIGN)); - vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN)); - vassert(0 == (N_SPILL64S % 2)); - - /* The live range numbers are signed shorts, and so limiting the - number of insns to 15000 comfortably guards against them - overflowing 32k. */ - vassert(instrs_in->arr_used <= 15000); - -# define INVALID_INSTRNO (-2) - -# define EMIT_INSTR(_instr) \ - do { \ - HInstr* _tmp = (_instr); \ - if (DEBUG_REGALLOC) { \ - vex_printf("** "); \ - (*ppInstr)(_tmp, mode64); \ - vex_printf("\n\n"); \ - } \ - addHInstr ( instrs_out, _tmp ); \ - } while (0) - -# define PRINT_STATE \ - do { \ - Int z, q; \ - for (z = 0; z < n_rregs; z++) { \ - vex_printf(" rreg_state[%2d] = ", z); \ - (*ppReg)(univ->regs[z]); \ - vex_printf(" \t"); \ - switch (rreg_state[z].disp) { \ - case Free: vex_printf("Free\n"); break; \ - case Unavail: vex_printf("Unavail\n"); break; \ - case Bound: vex_printf("BoundTo "); \ - (*ppReg)(rreg_state[z].vreg); \ - vex_printf("\n"); break; \ - } \ - } \ - vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \ - q = 0; \ - for (z = 0; z < n_vregs; z++) { \ - if (vreg_state[z] == INVALID_RREG_NO) \ - continue; \ - vex_printf("[%d] -> %d ", z, vreg_state[z]); \ - q++; \ - if (q > 0 && (q % 6) == 0) \ - vex_printf("\n "); \ - } \ - vex_printf("\n"); \ - } while (0) - - - /* --------- Stage 0: set up output array --------- */ - /* --------- and allocate/initialise running state. --------- */ - - instrs_out = newHInstrArray(); - - /* ... and initialise running state. */ - /* n_rregs is no more than a short name for n_available_real_regs. */ - n_rregs = univ->allocable; - n_vregs = instrs_in->n_vregs; - - /* If this is not so, vreg_state entries will overflow. */ - vassert(n_vregs < 32767); - - /* If this is not so, the universe we have is nonsensical. */ - vassert(n_rregs > 0); - - rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState)); - vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(Short)); - - for (Int j = 0; j < n_rregs; j++) { - rreg_state[j].has_hlrs = False; - rreg_state[j].disp = Free; - rreg_state[j].vreg = INVALID_HREG; - rreg_state[j].is_spill_cand = False; - rreg_state[j].eq_spill_slot = False; - } - - for (Int j = 0; j < n_vregs; j++) - vreg_state[j] = INVALID_RREG_NO; - - - /* --------- Stage 1: compute vreg live ranges. --------- */ - /* --------- Stage 2: compute rreg live ranges. --------- */ - - /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */ - - /* This is relatively simple, because (1) we only seek the complete - end-to-end live range of each vreg, and are not interested in - any holes in it, and (2) the vregs are conveniently numbered 0 - .. n_vregs-1, so we can just dump the results in a - pre-allocated array. */ - - vreg_lrs = NULL; - if (n_vregs > 0) - vreg_lrs = LibVEX_Alloc_inline(sizeof(VRegLR) * n_vregs); - - for (Int j = 0; j < n_vregs; j++) { - vreg_lrs[j].live_after = INVALID_INSTRNO; - vreg_lrs[j].dead_before = INVALID_INSTRNO; - vreg_lrs[j].spill_offset = 0; - vreg_lrs[j].spill_size = 0; - vreg_lrs[j].reg_class = HRcINVALID; - } - - /* An array to hold the reg-usage info for the incoming - instructions. */ - reg_usage_arr = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used); - - /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */ - - /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */ - - /* This is more complex than Stage 1, because we need to compute - exactly all the live ranges of all the allocatable real regs, - and we don't know in advance how many there will be. */ - - rreg_lrs_used = 0; - rreg_lrs_size = 4; - rreg_lrs_la = LibVEX_Alloc_inline(rreg_lrs_size * sizeof(RRegLR)); - rreg_lrs_db = NULL; /* we'll create this later */ - - /* We'll need to track live range start/end points seperately for - each rreg. Sigh. */ - vassert(n_rregs > 0); - rreg_live_after = LibVEX_Alloc_inline(n_rregs * sizeof(Int)); - rreg_dead_before = LibVEX_Alloc_inline(n_rregs * sizeof(Int)); - - for (Int j = 0; j < n_rregs; j++) { - rreg_live_after[j] = - rreg_dead_before[j] = INVALID_INSTRNO; - } - - /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */ - - /* ------ start of ITERATE OVER INSNS ------ */ - - for (Int ii = 0; ii < instrs_in->arr_used; ii++) { - - (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 ); - - if (0) { - vex_printf("\n%d stage1: ", ii); - (*ppInstr)(instrs_in->arr[ii], mode64); - vex_printf("\n"); - ppHRegUsage(univ, ®_usage_arr[ii]); - } - - /* ------ start of DEAL WITH VREG LIVE RANGES ------ */ - - /* for each virtual reg mentioned in the insn ... */ - for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) { - - HReg vreg = reg_usage_arr[ii].vRegs[j]; - vassert(hregIsVirtual(vreg)); - - Int k = hregIndex(vreg); - if (k < 0 || k >= n_vregs) { - vex_printf("\n"); - (*ppInstr)(instrs_in->arr[ii], mode64); - vex_printf("\n"); - vex_printf("vreg %d, n_vregs %d\n", k, n_vregs); - vpanic("doRegisterAllocation: out-of-range vreg"); - } - - /* Take the opportunity to note its regclass. We'll need - that when allocating spill slots. */ - if (vreg_lrs[k].reg_class == HRcINVALID) { - /* First mention of this vreg. */ - vreg_lrs[k].reg_class = hregClass(vreg); - } else { - /* Seen it before, so check for consistency. */ - vassert(vreg_lrs[k].reg_class == hregClass(vreg)); - } - - /* Now consider live ranges. */ - switch (reg_usage_arr[ii].vMode[j]) { - case HRmRead: - if (vreg_lrs[k].live_after == INVALID_INSTRNO) { - vex_printf("\n\nOFFENDING VREG = %d\n", k); - vpanic("doRegisterAllocation: " - "first event for vreg is Read"); - } - vreg_lrs[k].dead_before = toShort(ii + 1); - break; - case HRmWrite: - if (vreg_lrs[k].live_after == INVALID_INSTRNO) - vreg_lrs[k].live_after = toShort(ii); - vreg_lrs[k].dead_before = toShort(ii + 1); - break; - case HRmModify: - if (vreg_lrs[k].live_after == INVALID_INSTRNO) { - vex_printf("\n\nOFFENDING VREG = %d\n", k); - vpanic("doRegisterAllocation: " - "first event for vreg is Modify"); - } - vreg_lrs[k].dead_before = toShort(ii + 1); - break; - default: - vpanic("doRegisterAllocation(1)"); - } /* switch */ - - } /* iterate over virtual registers */ - - /* ------ end of DEAL WITH VREG LIVE RANGES ------ */ - - /* ------ start of DEAL WITH RREG LIVE RANGES ------ */ - - /* If this doesn't hold, the following iteration over real registers - will fail miserably. */ - vassert(N_RREGUNIVERSE_REGS == 64); - - const ULong rRead = reg_usage_arr[ii].rRead; - const ULong rWritten = reg_usage_arr[ii].rWritten; - const ULong rMentioned = rRead | rWritten; - - UInt rReg_minIndex; - UInt rReg_maxIndex; - if (rMentioned == 0) { - /* There are no real register uses in this insn. Set - rReg_{min,max}Index so that the following loop doesn't iterate - at all, so as to avoid wasting time. */ - rReg_minIndex = 1; - rReg_maxIndex = 0; - } else { - rReg_minIndex = ULong__minIndex(rMentioned); - rReg_maxIndex = ULong__maxIndex(rMentioned); - /* Don't bother to look at registers which are not available - to the allocator. We asserted above that n_rregs > 0, so - n_rregs-1 is safe. */ - if (rReg_maxIndex >= n_rregs) - rReg_maxIndex = n_rregs-1; - } - - /* for each allocator-available real reg mentioned in the insn ... */ - /* Note. We are allocating only over the real regs available to - the allocator. Others, eg the stack or baseblock pointers, - are unavailable to allocation and so we never visit them. - Hence the iteration is cut off at n_rregs-1, since n_rregs == - univ->allocable. */ - for (Int j = rReg_minIndex; j <= rReg_maxIndex; j++) { - - const ULong jMask = 1ULL << j; - if (LIKELY((rMentioned & jMask) == 0)) - continue; - - const Bool isR = (rRead & jMask) != 0; - const Bool isW = (rWritten & jMask) != 0; - - /* Dummy initialisations of flush_la and flush_db to avoid - possible bogus uninit-var warnings from gcc. */ - Int flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO; - Bool flush = False; - - if (isW && !isR) { - flush_la = rreg_live_after[j]; - flush_db = rreg_dead_before[j]; - if (flush_la != INVALID_INSTRNO && flush_db != INVALID_INSTRNO) - flush = True; - rreg_live_after[j] = ii; - rreg_dead_before[j] = ii+1; - } else if (!isW && isR) { - if (rreg_live_after[j] == INVALID_INSTRNO) { - vex_printf("\nOFFENDING RREG = "); - (*ppReg)(univ->regs[j]); - vex_printf("\n"); - vex_printf("\nOFFENDING instr = "); - (*ppInstr)(instrs_in->arr[ii], mode64); - vex_printf("\n"); - vpanic("doRegisterAllocation: " - "first event for rreg is Read"); - } - rreg_dead_before[j] = ii+1; - } else { - vassert(isR && isW); - if (rreg_live_after[j] == INVALID_INSTRNO) { - vex_printf("\nOFFENDING RREG = "); - (*ppReg)(univ->regs[j]); - vex_printf("\n"); - vex_printf("\nOFFENDING instr = "); - (*ppInstr)(instrs_in->arr[ii], mode64); - vex_printf("\n"); - vpanic("doRegisterAllocation: " - "first event for rreg is Modify"); - } - rreg_dead_before[j] = ii+1; - } - - if (flush) { - vassert(flush_la != INVALID_INSTRNO); - vassert(flush_db != INVALID_INSTRNO); - ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); - if (0) - vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db); - rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j]; - rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la); - rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db); - rreg_lrs_used++; - } - - } /* iterate over rregs in the instr */ - - /* ------ end of DEAL WITH RREG LIVE RANGES ------ */ - - } /* iterate over insns */ - - /* ------ end of ITERATE OVER INSNS ------ */ - - /* ------ start of FINALISE RREG LIVE RANGES ------ */ - - /* Now finish up any live ranges left over. */ - for (Int j = 0; j < n_rregs; j++) { - - if (0) { - vex_printf("residual %d: %d %d\n", j, rreg_live_after[j], - rreg_dead_before[j]); - } - vassert( (rreg_live_after[j] == INVALID_INSTRNO - && rreg_dead_before[j] == INVALID_INSTRNO) - || - (rreg_live_after[j] != INVALID_INSTRNO - && rreg_dead_before[j] != INVALID_INSTRNO) - ); - - if (rreg_live_after[j] == INVALID_INSTRNO) - continue; - - ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); - if (0) - vex_printf("FLUSH 2 (%d,%d)\n", - rreg_live_after[j], rreg_dead_before[j]); - rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j]; - rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]); - rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]); - rreg_lrs_used++; - } - - /* Compute summary hints for choosing real regs. If a real reg is - involved in a hard live range, record that fact in the fixed - part of the running rreg_state. Later, when offered a choice between - rregs, it's better to choose one which is not marked as having - any HLRs, since ones with HLRs may need to be spilled around - their HLRs. Correctness of final assignment is unaffected by - this mechanism -- it is only an optimisation. */ - - for (Int j = 0; j < rreg_lrs_used; j++) { - HReg rreg = rreg_lrs_la[j].rreg; - vassert(!hregIsVirtual(rreg)); - /* rreg is involved in a HLR. Record this info in the array, if - there is space. */ - UInt ix = hregIndex(rreg); - vassert(ix < n_rregs); - rreg_state[ix].has_hlrs = True; - } - if (0) { - for (Int j = 0; j < n_rregs; j++) { - if (!rreg_state[j].has_hlrs) - continue; - ppReg(univ->regs[j]); - vex_printf(" hinted\n"); - } - } - - /* Finally, copy the _la variant into the _db variant and - sort both by their respective fields. */ - rreg_lrs_db = LibVEX_Alloc_inline(rreg_lrs_used * sizeof(RRegLR)); - for (Int j = 0; j < rreg_lrs_used; j++) - rreg_lrs_db[j] = rreg_lrs_la[j]; - - sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ ); - sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ ); - - /* And set up the cursors. */ - rreg_lrs_la_next = 0; - rreg_lrs_db_next = 0; - - for (Int j = 1; j < rreg_lrs_used; j++) { - vassert(rreg_lrs_la[j-1].live_after <= rreg_lrs_la[j].live_after); - vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before); - } - - /* ------ end of FINALISE RREG LIVE RANGES ------ */ - - if (DEBUG_REGALLOC) { - for (Int j = 0; j < n_vregs; j++) { - vex_printf("vreg %d: la = %d, db = %d\n", - j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before ); - } - } - - if (DEBUG_REGALLOC) { - vex_printf("RRegLRs by LA:\n"); - for (Int j = 0; j < rreg_lrs_used; j++) { - vex_printf(" "); - (*ppReg)(rreg_lrs_la[j].rreg); - vex_printf(" la = %d, db = %d\n", - rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before ); - } - vex_printf("RRegLRs by DB:\n"); - for (Int j = 0; j < rreg_lrs_used; j++) { - vex_printf(" "); - (*ppReg)(rreg_lrs_db[j].rreg); - vex_printf(" la = %d, db = %d\n", - rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before ); - } - } - - /* --------- Stage 3: allocate spill slots. --------- */ - - /* Each spill slot is 8 bytes long. For vregs which take more than - 64 bits to spill (classes Flt64 and Vec128), we have to allocate - two consecutive spill slots. For 256 bit registers (class - Vec256), we have to allocate four consecutive spill slots. - - For Vec128-class on PowerPC, the spill slot's actual address - must be 16-byte aligned. Since the spill slot's address is - computed as an offset from the guest state pointer, and since - the user of the generated code must set that pointer to a - 32-aligned value, we have the residual obligation here of - choosing a 16-aligned spill slot offset for Vec128-class values. - Since each spill slot is 8 bytes long, that means for - Vec128-class values we must allocated a spill slot number which - is zero mod 2. - - Similarly, for Vec256 class on amd64, find a spill slot number - which is zero mod 4. This guarantees it will be 32 byte - aligned, which isn't actually necessary on amd64 (we use movUpd - etc to spill), but seems like good practice. - - Do a rank-based allocation of vregs to spill slot numbers. We - put as few values as possible in spill slots, but nevertheless - need to have a spill slot available for all vregs, just in case. - */ - /* Int max_ss_no = -1; */ - - local_memset(ss_busy_until_before, 0, sizeof(ss_busy_until_before)); - - for (Int j = 0; j < n_vregs; j++) { - - /* True iff this vreg is unused. In which case we also expect - that the reg_class field for it has not been set. */ - if (vreg_lrs[j].live_after == INVALID_INSTRNO) { - vassert(vreg_lrs[j].reg_class == HRcINVALID); - continue; - } - - /* The spill slots are 64 bits in size. As per the comment on - definition of HRegClass in host_generic_regs.h, that means, - to spill a vreg of class Flt64 or Vec128, we'll need to find - two adjacent spill slots to use. For Vec256, we'll need to - find four adjacent slots to use. Note, this logic needs to - kept in sync with the size info on the definition of - HRegClass. */ - Int ss_no = -1; - switch (vreg_lrs[j].reg_class) { - - case HRcVec128: case HRcFlt64: - /* Find two adjacent free slots in which between them - provide up to 128 bits in which to spill the vreg. - Since we are trying to find an even:odd pair, move - along in steps of 2 (slots). */ - for (ss_no = 0; ss_no < N_SPILL64S-1; ss_no += 2) - if (ss_busy_until_before[ss_no+0] <= vreg_lrs[j].live_after - && ss_busy_until_before[ss_no+1] <= vreg_lrs[j].live_after) - break; - if (ss_no >= N_SPILL64S-1) { - vpanic("LibVEX_N_SPILL_BYTES is too low. " - "Increase and recompile."); - } - ss_busy_until_before[ss_no+0] = vreg_lrs[j].dead_before; - ss_busy_until_before[ss_no+1] = vreg_lrs[j].dead_before; - break; - - default: - /* The ordinary case -- just find a single spill slot. */ - /* Find the lowest-numbered spill slot which is available - at the start point of this interval, and assign the - interval to it. */ - for (ss_no = 0; ss_no < N_SPILL64S; ss_no++) - if (ss_busy_until_before[ss_no] <= vreg_lrs[j].live_after) - break; - if (ss_no == N_SPILL64S) { - vpanic("LibVEX_N_SPILL_BYTES is too low. " - "Increase and recompile."); - } - ss_busy_until_before[ss_no] = vreg_lrs[j].dead_before; - break; - - } /* switch (vreg_lrs[j].reg_class) */ - - /* This reflects LibVEX's hard-wired knowledge of the baseBlock - layout: the guest state, then two equal sized areas following - it for two sets of shadow state, and then the spill area. */ - vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + ss_no * 8); - - /* Independent check that we've made a sane choice of slot */ - sanity_check_spill_offset( &vreg_lrs[j] ); - /* if (j > max_ss_no) */ - /* max_ss_no = j; */ - } - - if (0) { - vex_printf("\n\n"); - for (Int j = 0; j < n_vregs; j++) - vex_printf("vreg %d --> spill offset %d\n", - j, vreg_lrs[j].spill_offset); - } - - /* --------- Stage 4: establish rreg preferences --------- */ - - /* It may be advantageous to allocating certain vregs to specific - rregs, as a way of avoiding reg-reg moves later. Here we - establish which, if any, rreg each vreg would prefer to be in. - Note that this constrains the allocator -- ideally we end up - with as few as possible vregs expressing a preference. - - This is an optimisation: if the .preferred_rreg field is never - set to anything different from INVALID_HREG, the allocator still - works. */ - - /* 30 Dec 04: removed this mechanism as it does not seem to - help. */ - - /* --------- Stage 5: process instructions --------- */ - - /* This is the main loop of the allocator. First, we need to - correctly set up our running state, which tracks the status of - each real register. */ - - /* ------ BEGIN: Process each insn in turn. ------ */ - - for (Int ii = 0; ii < instrs_in->arr_used; ii++) { - - if (DEBUG_REGALLOC) { - vex_printf("\n====----====---- Insn %d ----====----====\n", ii); - vex_printf("---- "); - (*ppInstr)(instrs_in->arr[ii], mode64); - vex_printf("\n\nInitial state:\n"); - PRINT_STATE; - vex_printf("\n"); - } - - /* ------------ Sanity checks ------------ */ - - /* Sanity checks are expensive. So they are done only once - every 17 instructions, and just before the last - instruction. */ - do_sanity_check - = toBool( - False /* Set to True for sanity checking of all insns. */ - || ii == instrs_in->arr_used-1 - || (ii > 0 && (ii % 17) == 0) - ); - - if (do_sanity_check) { - - /* Sanity check 1: all rregs with a hard live range crossing - this insn must be marked as unavailable in the running - state. */ - for (Int j = 0; j < rreg_lrs_used; j++) { - if (rreg_lrs_la[j].live_after < ii - && ii < rreg_lrs_la[j].dead_before) { - /* ii is the middle of a hard live range for some real - reg. Check it's marked as such in the running - state. */ - HReg reg = rreg_lrs_la[j].rreg; - - if (0) { - vex_printf("considering la %d .. db %d reg = ", - rreg_lrs_la[j].live_after, - rreg_lrs_la[j].dead_before); - (*ppReg)(reg); - vex_printf("\n"); - } - - /* assert that this rreg is marked as unavailable */ - vassert(!hregIsVirtual(reg)); - vassert(rreg_state[hregIndex(reg)].disp == Unavail); - } - } - - /* Sanity check 2: conversely, all rregs marked as - unavailable in the running rreg_state must have a - corresponding hard live range entry in the rreg_lrs - array. */ - for (Int j = 0; j < n_rregs; j++) { - vassert(rreg_state[j].disp == Bound - || rreg_state[j].disp == Free - || rreg_state[j].disp == Unavail); - if (rreg_state[j].disp != Unavail) - continue; - Int k; - for (k = 0; k < rreg_lrs_used; k++) { - HReg reg = rreg_lrs_la[k].rreg; - vassert(!hregIsVirtual(reg)); - if (hregIndex(reg) == j - && rreg_lrs_la[k].live_after < ii - && ii < rreg_lrs_la[k].dead_before) - break; - } - /* If this vassertion fails, we couldn't find a - corresponding HLR. */ - vassert(k < rreg_lrs_used); - } - - /* Sanity check 3: all vreg-rreg bindings must bind registers - of the same class. */ - for (Int j = 0; j < n_rregs; j++) { - if (rreg_state[j].disp != Bound) { - vassert(rreg_state[j].eq_spill_slot == False); - continue; - } - vassert(hregClass(univ->regs[j]) - == hregClass(rreg_state[j].vreg)); - vassert( hregIsVirtual(rreg_state[j].vreg)); - } - - /* Sanity check 4: the vreg_state and rreg_state - mutually-redundant mappings are consistent. If - rreg_state[j].vreg points at some vreg_state entry then - that vreg_state entry should point back at - rreg_state[j]. */ - for (Int j = 0; j < n_rregs; j++) { - if (rreg_state[j].disp != Bound) - continue; - Int k = hregIndex(rreg_state[j].vreg); - vassert(IS_VALID_VREGNO(k)); - vassert(vreg_state[k] == j); - } - for (Int j = 0; j < n_vregs; j++) { - Int k = vreg_state[j]; - if (k == INVALID_RREG_NO) - continue; - vassert(IS_VALID_RREGNO(k)); - vassert(rreg_state[k].disp == Bound); - ... [truncated message content] |