|
From: Ivo R. <ir...@so...> - 2017-08-28 10:40:38
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=efa1e5ef8d257e3b20facf6f04350d29578ae9e4 commit efa1e5ef8d257e3b20facf6f04350d29578ae9e4 Author: Ivo Raisr <iv...@iv...> Date: Sat Aug 26 00:19:05 2017 +0200 VEX register allocator version 3. Implements a new version of VEX register allocator which keeps the main state per virtual registers, as opposed to real registers in v2. This results in a simpler and cleaner design and much simpler implementation. It has been observed that the new allocator executes 20-30% faster than the previous one but could produce slightly worse spilling decisions. Overall performance improvement when running the Valgrind performance regression test suite has been observed in terms of a few percent. The new register allocator (v3) is now the default one. The old register allocator (v2) is still kept around and can be activated with command line option '--vex-regalloc-version=2'. Fixes BZ#381553. Diff: --- Makefile.vex.am | 1 + NEWS | 1 + VEX/priv/host_amd64_defs.c | 43 +- VEX/priv/host_amd64_defs.h | 29 +- VEX/priv/host_arm64_defs.c | 52 +- VEX/priv/host_arm64_defs.h | 3 +- VEX/priv/host_arm_defs.c | 44 +- VEX/priv/host_arm_defs.h | 3 +- VEX/priv/host_generic_reg_alloc2.c | 179 ++---- VEX/priv/host_generic_reg_alloc3.c | 1171 ++++++++++++++++++++++++++++++++++++ VEX/priv/host_generic_regs.c | 49 +- VEX/priv/host_generic_regs.h | 108 ++-- VEX/priv/host_mips_defs.c | 39 +- VEX/priv/host_mips_defs.h | 3 +- VEX/priv/host_ppc_defs.c | 37 +- VEX/priv/host_ppc_defs.h | 3 +- VEX/priv/host_s390_defs.c | 25 +- VEX/priv/host_s390_defs.h | 3 +- VEX/priv/host_x86_defs.c | 36 +- VEX/priv/host_x86_defs.h | 4 +- VEX/priv/main_main.c | 36 +- VEX/priv/main_util.c | 37 +- VEX/pub/libvex.h | 5 + coregrind/m_main.c | 3 + none/tests/cmdline2.stdout.exp | 1 + 25 files changed, 1628 insertions(+), 287 deletions(-) diff --git a/Makefile.vex.am b/Makefile.vex.am index 9b9b9b5..4ad5ffa 100644 --- a/Makefile.vex.am +++ b/Makefile.vex.am @@ -143,6 +143,7 @@ LIBVEX_SOURCES_COMMON = \ priv/host_generic_simd256.c \ priv/host_generic_maddf.c \ priv/host_generic_reg_alloc2.c \ + priv/host_generic_reg_alloc3.c \ priv/host_x86_defs.c \ priv/host_x86_isel.c \ priv/host_amd64_defs.c \ diff --git a/NEWS b/NEWS index 516c4cc..446a7fa 100644 --- a/NEWS +++ b/NEWS @@ -40,6 +40,7 @@ where XXXXXX is the bug number as listed below. 381272 ppc64 doesn't compile test_isa_2_06_partx.c without VSX support 381289 epoll_pwait can have a NULL sigmask 381274 powerpc too chatty even with --sigill-diagnostics=no +381553 VEX register allocator v3 381769 Use ucontext_t instead of struct ucontext 381805 arm32 needs ld.so index hardwire for new glibc security fixes 382256 gz compiler flag test doesn't work for gold diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c index 5e0600a..ebe2b00 100644 --- a/VEX/priv/host_amd64_defs.c +++ b/VEX/priv/host_amd64_defs.c @@ -63,6 +63,7 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) /* Add the registers. The initial segment of this array must be those available for allocation by reg-alloc, and those that follow are not available for allocation. */ + ru->allocable_start[HRcInt64] = ru->size; ru->regs[ru->size++] = hregAMD64_RSI(); ru->regs[ru->size++] = hregAMD64_RDI(); ru->regs[ru->size++] = hregAMD64_R8(); @@ -72,6 +73,10 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) ru->regs[ru->size++] = hregAMD64_R14(); ru->regs[ru->size++] = hregAMD64_R15(); ru->regs[ru->size++] = hregAMD64_RBX(); + ru->regs[ru->size++] = hregAMD64_R10(); + ru->allocable_end[HRcInt64] = ru->size - 1; + + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregAMD64_XMM3(); ru->regs[ru->size++] = hregAMD64_XMM4(); ru->regs[ru->size++] = hregAMD64_XMM5(); @@ -82,8 +87,9 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) ru->regs[ru->size++] = hregAMD64_XMM10(); ru->regs[ru->size++] = hregAMD64_XMM11(); ru->regs[ru->size++] = hregAMD64_XMM12(); - ru->regs[ru->size++] = hregAMD64_R10(); + ru->allocable_end[HRcVec128] = ru->size - 1; ru->allocable = ru->size; + /* And other regs, not available to the allocator. */ ru->regs[ru->size++] = hregAMD64_RAX(); ru->regs[ru->size++] = hregAMD64_RCX(); @@ -101,7 +107,7 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) } -void ppHRegAMD64 ( HReg reg ) +UInt ppHRegAMD64 ( HReg reg ) { Int r; static const HChar* ireg64_names[16] @@ -109,27 +115,24 @@ void ppHRegAMD64 ( HReg reg ) "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("%s", ireg64_names[r]); - return; + return vex_printf("%s", ireg64_names[r]); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("%%xmm%d", r); - return; + return vex_printf("%%xmm%d", r); default: vpanic("ppHRegAMD64"); } } -static void ppHRegAMD64_lo32 ( HReg reg ) +static UInt ppHRegAMD64_lo32 ( HReg reg ) { Int r; static const HChar* ireg32_names[16] @@ -137,17 +140,16 @@ static void ppHRegAMD64_lo32 ( HReg reg ) "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" }; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - vex_printf("d"); - return; + UInt written = ppHReg(reg); + written += vex_printf("d"); + return written; } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("%s", ireg32_names[r]); - return; + return vex_printf("%s", ireg32_names[r]); default: vpanic("ppHRegAMD64_lo32: invalid regclass"); } @@ -1995,6 +1997,19 @@ void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +AMD64Instr* genMove_AMD64(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt64: + return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(from), to); + case HRcVec128: + return AMD64Instr_SseReRg(Asse_MOV, from, to); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_AMD64: unimplemented regclass"); + } +} + AMD64Instr* directReload_AMD64( AMD64Instr* i, HReg vreg, Short spill_off ) { vassert(spill_off >= 0 && spill_off < 10000); /* let's say */ diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h index 39682ef..8a3eea8 100644 --- a/VEX/priv/host_amd64_defs.h +++ b/VEX/priv/host_amd64_defs.h @@ -56,19 +56,18 @@ ST_IN HReg hregAMD64_R13 ( void ) { return mkHReg(False, HRcInt64, 13, 5); } ST_IN HReg hregAMD64_R14 ( void ) { return mkHReg(False, HRcInt64, 14, 6); } ST_IN HReg hregAMD64_R15 ( void ) { return mkHReg(False, HRcInt64, 15, 7); } ST_IN HReg hregAMD64_RBX ( void ) { return mkHReg(False, HRcInt64, 3, 8); } - -ST_IN HReg hregAMD64_XMM3 ( void ) { return mkHReg(False, HRcVec128, 3, 9); } -ST_IN HReg hregAMD64_XMM4 ( void ) { return mkHReg(False, HRcVec128, 4, 10); } -ST_IN HReg hregAMD64_XMM5 ( void ) { return mkHReg(False, HRcVec128, 5, 11); } -ST_IN HReg hregAMD64_XMM6 ( void ) { return mkHReg(False, HRcVec128, 6, 12); } -ST_IN HReg hregAMD64_XMM7 ( void ) { return mkHReg(False, HRcVec128, 7, 13); } -ST_IN HReg hregAMD64_XMM8 ( void ) { return mkHReg(False, HRcVec128, 8, 14); } -ST_IN HReg hregAMD64_XMM9 ( void ) { return mkHReg(False, HRcVec128, 9, 15); } -ST_IN HReg hregAMD64_XMM10 ( void ) { return mkHReg(False, HRcVec128, 10, 16); } -ST_IN HReg hregAMD64_XMM11 ( void ) { return mkHReg(False, HRcVec128, 11, 17); } -ST_IN HReg hregAMD64_XMM12 ( void ) { return mkHReg(False, HRcVec128, 12, 18); } - -ST_IN HReg hregAMD64_R10 ( void ) { return mkHReg(False, HRcInt64, 10, 19); } +ST_IN HReg hregAMD64_R10 ( void ) { return mkHReg(False, HRcInt64, 10, 9); } + +ST_IN HReg hregAMD64_XMM3 ( void ) { return mkHReg(False, HRcVec128, 3, 10); } +ST_IN HReg hregAMD64_XMM4 ( void ) { return mkHReg(False, HRcVec128, 4, 11); } +ST_IN HReg hregAMD64_XMM5 ( void ) { return mkHReg(False, HRcVec128, 5, 12); } +ST_IN HReg hregAMD64_XMM6 ( void ) { return mkHReg(False, HRcVec128, 6, 13); } +ST_IN HReg hregAMD64_XMM7 ( void ) { return mkHReg(False, HRcVec128, 7, 14); } +ST_IN HReg hregAMD64_XMM8 ( void ) { return mkHReg(False, HRcVec128, 8, 15); } +ST_IN HReg hregAMD64_XMM9 ( void ) { return mkHReg(False, HRcVec128, 9, 16); } +ST_IN HReg hregAMD64_XMM10 ( void ) { return mkHReg(False, HRcVec128, 10, 17); } +ST_IN HReg hregAMD64_XMM11 ( void ) { return mkHReg(False, HRcVec128, 11, 18); } +ST_IN HReg hregAMD64_XMM12 ( void ) { return mkHReg(False, HRcVec128, 12, 19); } ST_IN HReg hregAMD64_RAX ( void ) { return mkHReg(False, HRcInt64, 0, 20); } ST_IN HReg hregAMD64_RCX ( void ) { return mkHReg(False, HRcInt64, 1, 21); } @@ -81,7 +80,7 @@ ST_IN HReg hregAMD64_XMM0 ( void ) { return mkHReg(False, HRcVec128, 0, 26); } ST_IN HReg hregAMD64_XMM1 ( void ) { return mkHReg(False, HRcVec128, 1, 27); } #undef ST_IN -extern void ppHRegAMD64 ( HReg ); +extern UInt ppHRegAMD64 ( HReg ); /* --------- Condition codes, AMD encoding. --------- */ @@ -801,7 +800,7 @@ extern void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); - +extern AMD64Instr* genMove_AMD64(HReg from, HReg to, Bool); extern AMD64Instr* directReload_AMD64 ( AMD64Instr* i, HReg vreg, Short spill_off ); diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 380a24d..2506512 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -64,7 +64,7 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) /* Add the registers. The initial segment of this array must be those available for allocation by reg-alloc, and those that follow are not available for allocation. */ - + ru->allocable_start[HRcInt64] = ru->size; ru->regs[ru->size++] = hregARM64_X22(); ru->regs[ru->size++] = hregARM64_X23(); ru->regs[ru->size++] = hregARM64_X24(); @@ -81,6 +81,7 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) ru->regs[ru->size++] = hregARM64_X5(); ru->regs[ru->size++] = hregARM64_X6(); ru->regs[ru->size++] = hregARM64_X7(); + ru->allocable_end[HRcInt64] = ru->size - 1; // X8 is used as a ProfInc temporary, not available to regalloc. // X9 is a chaining/spill temporary, not available to regalloc. @@ -94,19 +95,23 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) // X21 is the guest state pointer, not available to regalloc. // vector regs. Unfortunately not callee-saved. + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregARM64_Q16(); ru->regs[ru->size++] = hregARM64_Q17(); ru->regs[ru->size++] = hregARM64_Q18(); ru->regs[ru->size++] = hregARM64_Q19(); ru->regs[ru->size++] = hregARM64_Q20(); + ru->allocable_end[HRcVec128] = ru->size - 1; // F64 regs, all of which are callee-saved + ru->allocable_start[HRcFlt64] = ru->size; ru->regs[ru->size++] = hregARM64_D8(); ru->regs[ru->size++] = hregARM64_D9(); ru->regs[ru->size++] = hregARM64_D10(); ru->regs[ru->size++] = hregARM64_D11(); ru->regs[ru->size++] = hregARM64_D12(); ru->regs[ru->size++] = hregARM64_D13(); + ru->allocable_end[HRcFlt64] = ru->size - 1; ru->allocable = ru->size; /* And other regs, not available to the allocator. */ @@ -142,43 +147,41 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) } -void ppHRegARM64 ( HReg reg ) { +UInt ppHRegARM64 ( HReg reg ) { Int r; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); vassert(r >= 0 && r < 31); - vex_printf("x%d", r); - return; + return vex_printf("x%d", r); case HRcFlt64: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("d%d", r); - return; + return vex_printf("d%d", r); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("q%d", r); - return; + return vex_printf("q%d", r); default: vpanic("ppHRegARM64"); } } -static void ppHRegARM64asSreg ( HReg reg ) { - ppHRegARM64(reg); - vex_printf("(S-reg)"); +static UInt ppHRegARM64asSreg ( HReg reg ) { + UInt written = ppHRegARM64(reg); + written += vex_printf("(S-reg)"); + return written; } -static void ppHRegARM64asHreg ( HReg reg ) { - ppHRegARM64(reg); - vex_printf("(H-reg)"); +static UInt ppHRegARM64asHreg ( HReg reg ) { + UInt written = ppHRegARM64(reg); + written += vex_printf("(H-reg)"); + return written; } @@ -1745,7 +1748,7 @@ void ppARM64Instr ( const ARM64Instr* i ) { ppHRegARM64asSreg(i->ARM64in.VCmpS.argR); return; case ARM64in_VFCSel: { - void (*ppHRegARM64fp)(HReg) + UInt (*ppHRegARM64fp)(HReg) = (i->ARM64in.VFCSel.isD ? ppHRegARM64 : ppHRegARM64asSreg); vex_printf("fcsel "); ppHRegARM64fp(i->ARM64in.VFCSel.dst); @@ -2616,6 +2619,21 @@ void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +ARM64Instr* genMove_ARM64(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt64: + return ARM64Instr_MovI(to, from); + case HRcFlt64: + return ARM64Instr_VMov(8, to, from); + case HRcVec128: + return ARM64Instr_VMov(16, to, from); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_ARM64: unimplemented regclass"); + } +} + /* Emit an instruction into buf and return the number of bytes used. Note that buf is not the insn's final place, and therefore it is diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index 14b2de6..e7da4f9 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -74,7 +74,7 @@ ST_IN HReg hregARM64_X9 ( void ) { return mkHReg(False, HRcInt64, 9, 27); } ST_IN HReg hregARM64_X21 ( void ) { return mkHReg(False, HRcInt64, 21, 28); } #undef ST_IN -extern void ppHRegARM64 ( HReg ); +extern UInt ppHRegARM64 ( HReg ); /* Number of registers used arg passing in function calls */ #define ARM64_N_ARGREGS 8 /* x0 .. x7 */ @@ -1007,6 +1007,7 @@ extern void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); +extern ARM64Instr* genMove_ARM64(HReg from, HReg to, Bool); extern const RRegUniverse* getRRegUniverse_ARM64 ( void ); diff --git a/VEX/priv/host_arm_defs.c b/VEX/priv/host_arm_defs.c index a986f37..9bf87cd 100644 --- a/VEX/priv/host_arm_defs.c +++ b/VEX/priv/host_arm_defs.c @@ -68,6 +68,7 @@ const RRegUniverse* getRRegUniverse_ARM ( void ) /* Callee saves ones are listed first, since we prefer them if they're available. */ + ru->allocable_start[HRcInt32] = ru->size; ru->regs[ru->size++] = hregARM_R4(); ru->regs[ru->size++] = hregARM_R5(); ru->regs[ru->size++] = hregARM_R6(); @@ -80,24 +81,34 @@ const RRegUniverse* getRRegUniverse_ARM ( void ) ru->regs[ru->size++] = hregARM_R2(); ru->regs[ru->size++] = hregARM_R3(); ru->regs[ru->size++] = hregARM_R9(); + ru->allocable_end[HRcInt32] = ru->size - 1; + /* FP registers. Note: these are all callee-save. Yay! Hence we don't need to mention them as trashed in getHRegUsage for ARMInstr_Call. */ + ru->allocable_start[HRcFlt64] = ru->size; ru->regs[ru->size++] = hregARM_D8(); ru->regs[ru->size++] = hregARM_D9(); ru->regs[ru->size++] = hregARM_D10(); ru->regs[ru->size++] = hregARM_D11(); ru->regs[ru->size++] = hregARM_D12(); + ru->allocable_end[HRcFlt64] = ru->size - 1; + + ru->allocable_start[HRcFlt32] = ru->size; ru->regs[ru->size++] = hregARM_S26(); ru->regs[ru->size++] = hregARM_S27(); ru->regs[ru->size++] = hregARM_S28(); ru->regs[ru->size++] = hregARM_S29(); ru->regs[ru->size++] = hregARM_S30(); + ru->allocable_end[HRcFlt32] = ru->size - 1; + + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregARM_Q8(); ru->regs[ru->size++] = hregARM_Q9(); ru->regs[ru->size++] = hregARM_Q10(); ru->regs[ru->size++] = hregARM_Q11(); ru->regs[ru->size++] = hregARM_Q12(); + ru->allocable_end[HRcVec128] = ru->size - 1; ru->allocable = ru->size; /* And other regs, not available to the allocator. */ @@ -140,35 +151,30 @@ const RRegUniverse* getRRegUniverse_ARM ( void ) } -void ppHRegARM ( HReg reg ) { +UInt ppHRegARM ( HReg reg ) { Int r; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt32: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("r%d", r); - return; + return vex_printf("r%d", r); case HRcFlt64: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("d%d", r); - return; + return vex_printf("d%d", r); case HRcFlt32: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("s%d", r); - return; + return vex_printf("s%d", r); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("q%d", r); - return; + return vex_printf("q%d", r); default: vpanic("ppHRegARM"); } @@ -2772,6 +2778,22 @@ void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +ARMInstr* genMove_ARM(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt32: + return ARMInstr_Mov(to, ARMRI84_R(from)); + case HRcFlt32: + return ARMInstr_VUnaryS(ARMvfpu_COPY, to, from); + case HRcFlt64: + return ARMInstr_VUnaryD(ARMvfpu_COPY, to, from); + case HRcVec128: + return ARMInstr_NUnary(ARMneon_COPY, to, from, 4, False); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_ARM: unimplemented regclass"); + } +} /* Emit an instruction into buf and return the number of bytes used. Note that buf is not the insn's final place, and therefore it is diff --git a/VEX/priv/host_arm_defs.h b/VEX/priv/host_arm_defs.h index e8a2eb7..56c4ec5 100644 --- a/VEX/priv/host_arm_defs.h +++ b/VEX/priv/host_arm_defs.h @@ -81,7 +81,7 @@ ST_IN HReg hregARM_Q14 ( void ) { return mkHReg(False, HRcVec128, 14, 32); } ST_IN HReg hregARM_Q15 ( void ) { return mkHReg(False, HRcVec128, 15, 33); } #undef ST_IN -extern void ppHRegARM ( HReg ); +extern UInt ppHRegARM ( HReg ); /* Number of registers used arg passing in function calls */ #define ARM_N_ARGREGS 4 /* r0, r1, r2, r3 */ @@ -1070,6 +1070,7 @@ extern void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); +extern ARMInstr* genMove_ARM(HReg from, HReg to, Bool); extern const RRegUniverse* getRRegUniverse_ARM ( void ); diff --git a/VEX/priv/host_generic_reg_alloc2.c b/VEX/priv/host_generic_reg_alloc2.c index 2294a9b..ec291d3 100644 --- a/VEX/priv/host_generic_reg_alloc2.c +++ b/VEX/priv/host_generic_reg_alloc2.c @@ -294,49 +294,6 @@ static inline UInt ULong__minIndex ( ULong w64 ) { } -/* Vectorised memset, copied from Valgrind's m_libcbase.c. */ -static void* local_memset ( void *destV, Int c, SizeT sz ) -{ -# define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3))) - - UInt c4; - UChar* d = destV; - UChar uc = c; - - while ((!IS_4_ALIGNED(d)) && sz >= 1) { - d[0] = uc; - d++; - sz--; - } - if (sz == 0) - return destV; - c4 = uc; - c4 |= (c4 << 8); - c4 |= (c4 << 16); - while (sz >= 16) { - ((UInt*)d)[0] = c4; - ((UInt*)d)[1] = c4; - ((UInt*)d)[2] = c4; - ((UInt*)d)[3] = c4; - d += 16; - sz -= 16; - } - while (sz >= 4) { - ((UInt*)d)[0] = c4; - d += 4; - sz -= 4; - } - while (sz >= 1) { - d[0] = c; - d++; - sz--; - } - return destV; - -# undef IS_4_ALIGNED -} - - /* A target-independent register allocator. Requires various functions which it uses to deal abstractly with instructions and registers, since it cannot have any target-specific knowledge. @@ -352,44 +309,13 @@ static void* local_memset ( void *destV, Int c, SizeT sz ) Takes an expandable array of pointers to unallocated insns. Returns an expandable array of pointers to allocated insns. */ -HInstrArray* doRegisterAllocation ( +HInstrArray* doRegisterAllocation_v2 ( /* Incoming virtual-registerised code. */ HInstrArray* instrs_in, - /* The real-register universe to use. This contains facts about - real registers, one of which is the set of registers available - for allocation. */ - const RRegUniverse* univ, - - /* Return True iff the given insn is a reg-reg move, in which - case also return the src and dst regs. */ - Bool (*isMove) ( const HInstr*, HReg*, HReg* ), - - /* Get info about register usage in this insn. */ - void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ), - - /* Apply a reg-reg mapping to an insn. */ - void (*mapRegs) ( HRegRemap*, HInstr*, Bool ), - - /* Return one, or, if we're unlucky, two insn(s) to spill/restore a - real reg to a spill slot byte offset. The two leading HInstr** - args are out parameters, through which the generated insns are - returned. Also (optionally) a 'directReload' function, which - attempts to replace a given instruction by one which reads - directly from a specified spill slot. May be NULL, in which - case the optimisation is not attempted. */ - void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ), - void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ), - HInstr* (*directReload) ( HInstr*, HReg, Short ), - Int guest_sizeB, - - /* For debug printing only. */ - void (*ppInstr) ( const HInstr*, Bool ), - void (*ppReg) ( HReg ), - - /* 32/64bit mode */ - Bool mode64 + /* Register allocator controls to use. */ + const RegAllocControl* con ) { # define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8) @@ -447,7 +373,7 @@ HInstrArray* doRegisterAllocation ( not at each insn processed. */ Bool do_sanity_check; - vassert(0 == (guest_sizeB % LibVEX_GUEST_STATE_ALIGN)); + vassert(0 == (con->guest_sizeB % LibVEX_GUEST_STATE_ALIGN)); vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN)); vassert(0 == (N_SPILL64S % 2)); @@ -463,7 +389,7 @@ HInstrArray* doRegisterAllocation ( HInstr* _tmp = (_instr); \ if (DEBUG_REGALLOC) { \ vex_printf("** "); \ - (*ppInstr)(_tmp, mode64); \ + con->ppInstr(_tmp, con->mode64); \ vex_printf("\n\n"); \ } \ addHInstr ( instrs_out, _tmp ); \ @@ -474,13 +400,13 @@ HInstrArray* doRegisterAllocation ( Int z, q; \ for (z = 0; z < n_rregs; z++) { \ vex_printf(" rreg_state[%2d] = ", z); \ - (*ppReg)(univ->regs[z]); \ + con->ppReg(con->univ->regs[z]); \ vex_printf(" \t"); \ switch (rreg_state[z].disp) { \ case Free: vex_printf("Free\n"); break; \ case Unavail: vex_printf("Unavail\n"); break; \ case Bound: vex_printf("BoundTo "); \ - (*ppReg)(rreg_state[z].vreg); \ + con->ppReg(rreg_state[z].vreg); \ vex_printf("\n"); break; \ } \ } \ @@ -505,7 +431,7 @@ HInstrArray* doRegisterAllocation ( /* ... and initialise running state. */ /* n_rregs is no more than a short name for n_available_real_regs. */ - n_rregs = univ->allocable; + n_rregs = con->univ->allocable; n_vregs = instrs_in->n_vregs; /* If this is not so, vreg_state entries will overflow. */ @@ -586,13 +512,13 @@ HInstrArray* doRegisterAllocation ( for (Int ii = 0; ii < instrs_in->arr_used; ii++) { - (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 ); + con->getRegUsage(®_usage_arr[ii], instrs_in->arr[ii], con->mode64); if (0) { vex_printf("\n%d stage1: ", ii); - (*ppInstr)(instrs_in->arr[ii], mode64); + con->ppInstr(instrs_in->arr[ii], con->mode64); vex_printf("\n"); - ppHRegUsage(univ, ®_usage_arr[ii]); + ppHRegUsage(con->univ, ®_usage_arr[ii]); } /* ------ start of DEAL WITH VREG LIVE RANGES ------ */ @@ -606,7 +532,7 @@ HInstrArray* doRegisterAllocation ( Int k = hregIndex(vreg); if (k < 0 || k >= n_vregs) { vex_printf("\n"); - (*ppInstr)(instrs_in->arr[ii], mode64); + con->ppInstr(instrs_in->arr[ii], con->mode64); vex_printf("\n"); vex_printf("vreg %d, n_vregs %d\n", k, n_vregs); vpanic("doRegisterAllocation: out-of-range vreg"); @@ -711,10 +637,10 @@ HInstrArray* doRegisterAllocation ( } else if (!isW && isR) { if (rreg_live_after[j] == INVALID_INSTRNO) { vex_printf("\nOFFENDING RREG = "); - (*ppReg)(univ->regs[j]); + con->ppReg(con->univ->regs[j]); vex_printf("\n"); vex_printf("\nOFFENDING instr = "); - (*ppInstr)(instrs_in->arr[ii], mode64); + con->ppInstr(instrs_in->arr[ii], con->mode64); vex_printf("\n"); vpanic("doRegisterAllocation: " "first event for rreg is Read"); @@ -724,10 +650,10 @@ HInstrArray* doRegisterAllocation ( vassert(isR && isW); if (rreg_live_after[j] == INVALID_INSTRNO) { vex_printf("\nOFFENDING RREG = "); - (*ppReg)(univ->regs[j]); + con->ppReg(con->univ->regs[j]); vex_printf("\n"); vex_printf("\nOFFENDING instr = "); - (*ppInstr)(instrs_in->arr[ii], mode64); + con->ppInstr(instrs_in->arr[ii], con->mode64); vex_printf("\n"); vpanic("doRegisterAllocation: " "first event for rreg is Modify"); @@ -741,7 +667,7 @@ HInstrArray* doRegisterAllocation ( ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); if (0) vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db); - rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j]; + rreg_lrs_la[rreg_lrs_used].rreg = con->univ->regs[j]; rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la); rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db); rreg_lrs_used++; @@ -778,7 +704,7 @@ HInstrArray* doRegisterAllocation ( if (0) vex_printf("FLUSH 2 (%d,%d)\n", rreg_live_after[j], rreg_dead_before[j]); - rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j]; + rreg_lrs_la[rreg_lrs_used].rreg = con->univ->regs[j]; rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]); rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]); rreg_lrs_used++; @@ -805,7 +731,7 @@ HInstrArray* doRegisterAllocation ( for (Int j = 0; j < n_rregs; j++) { if (!rreg_state[j].has_hlrs) continue; - ppReg(univ->regs[j]); + con->ppReg(con->univ->regs[j]); vex_printf(" hinted\n"); } } @@ -841,14 +767,14 @@ HInstrArray* doRegisterAllocation ( vex_printf("RRegLRs by LA:\n"); for (Int j = 0; j < rreg_lrs_used; j++) { vex_printf(" "); - (*ppReg)(rreg_lrs_la[j].rreg); + con->ppReg(rreg_lrs_la[j].rreg); vex_printf(" la = %d, db = %d\n", rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before ); } vex_printf("RRegLRs by DB:\n"); for (Int j = 0; j < rreg_lrs_used; j++) { vex_printf(" "); - (*ppReg)(rreg_lrs_db[j].rreg); + con->ppReg(rreg_lrs_db[j].rreg); vex_printf(" la = %d, db = %d\n", rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before ); } @@ -882,7 +808,7 @@ HInstrArray* doRegisterAllocation ( */ /* Int max_ss_no = -1; */ - local_memset(ss_busy_until_before, 0, sizeof(ss_busy_until_before)); + vex_bzero(ss_busy_until_before, sizeof(ss_busy_until_before)); for (Int j = 0; j < n_vregs; j++) { @@ -940,7 +866,7 @@ HInstrArray* doRegisterAllocation ( /* This reflects LibVEX's hard-wired knowledge of the baseBlock layout: the guest state, then two equal sized areas following it for two sets of shadow state, and then the spill area. */ - vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + ss_no * 8); + vreg_lrs[j].spill_offset = toShort(con->guest_sizeB * 3 + ss_no * 8); /* Independent check that we've made a sane choice of slot */ sanity_check_spill_offset( &vreg_lrs[j] ); @@ -983,7 +909,7 @@ HInstrArray* doRegisterAllocation ( if (DEBUG_REGALLOC) { vex_printf("\n====----====---- Insn %d ----====----====\n", ii); vex_printf("---- "); - (*ppInstr)(instrs_in->arr[ii], mode64); + con->ppInstr(instrs_in->arr[ii], con->mode64); vex_printf("\n\nInitial state:\n"); PRINT_STATE; vex_printf("\n"); @@ -1018,7 +944,7 @@ HInstrArray* doRegisterAllocation ( vex_printf("considering la %d .. db %d reg = ", rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before); - (*ppReg)(reg); + con->ppReg(reg); vex_printf("\n"); } @@ -1059,7 +985,7 @@ HInstrArray* doRegisterAllocation ( vassert(rreg_state[j].eq_spill_slot == False); continue; } - vassert(hregClass(univ->regs[j]) + vassert(hregClass(con->univ->regs[j]) == hregClass(rreg_state[j].vreg)); vassert( hregIsVirtual(rreg_state[j].vreg)); } @@ -1099,7 +1025,7 @@ HInstrArray* doRegisterAllocation ( the dst to the src's rreg, and that's all. */ HReg vregS = INVALID_HREG; HReg vregD = INVALID_HREG; - if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) { + if ( con->isMove(instrs_in->arr[ii], &vregS, &vregD) ) { if (!hregIsVirtual(vregS)) goto cannot_coalesce; if (!hregIsVirtual(vregD)) goto cannot_coalesce; /* Check that *isMove is not telling us a bunch of lies ... */ @@ -1112,9 +1038,9 @@ HInstrArray* doRegisterAllocation ( if (vreg_lrs[m].live_after != ii) goto cannot_coalesce; if (DEBUG_REGALLOC) { vex_printf("COALESCE "); - (*ppReg)(vregS); + con->ppReg(vregS); vex_printf(" -> "); - (*ppReg)(vregD); + con->ppReg(vregD); vex_printf("\n\n"); } /* Find the state entry for vregS. */ @@ -1163,7 +1089,7 @@ HInstrArray* doRegisterAllocation ( vreg_state[m] = INVALID_RREG_NO; if (DEBUG_REGALLOC) { vex_printf("free up "); - (*ppReg)(univ->regs[j]); + con->ppReg(con->univ->regs[j]); vex_printf("\n"); } } @@ -1204,7 +1130,7 @@ HInstrArray* doRegisterAllocation ( than before it. */ if (DEBUG_REGALLOC) { vex_printf("need to free up rreg: "); - (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg); + con->ppReg(rreg_lrs_la[rreg_lrs_la_next].rreg); vex_printf("\n\n"); } Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg); @@ -1223,8 +1149,8 @@ HInstrArray* doRegisterAllocation ( if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) { HInstr* spill1 = NULL; HInstr* spill2 = NULL; - (*genSpill)( &spill1, &spill2, univ->regs[k], - vreg_lrs[m].spill_offset, mode64 ); + con->genSpill(&spill1, &spill2, con->univ->regs[k], + vreg_lrs[m].spill_offset, con->mode64); vassert(spill1 || spill2); /* can't both be NULL */ if (spill1) EMIT_INSTR(spill1); @@ -1271,7 +1197,7 @@ HInstrArray* doRegisterAllocation ( that the change is invisible to the standard-case handling that follows. */ - if (directReload && reg_usage_arr[ii].n_vRegs <= 2) { + if (con->directReload != NULL && reg_usage_arr[ii].n_vRegs <= 2) { Bool debug_direct_reload = False; HReg cand = INVALID_HREG; Bool nreads = 0; @@ -1305,19 +1231,20 @@ HInstrArray* doRegisterAllocation ( vassert(! sameHReg(reg_usage_arr[ii].vRegs[0], reg_usage_arr[ii].vRegs[1])); - reloaded = directReload ( instrs_in->arr[ii], cand, spilloff ); + reloaded = con->directReload(instrs_in->arr[ii], cand, spilloff); if (debug_direct_reload && !reloaded) { vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" "); - ppInstr(instrs_in->arr[ii], mode64); + con->ppInstr(instrs_in->arr[ii], con->mode64); } if (reloaded) { /* Update info about the insn, so it looks as if it had been in this form all along. */ instrs_in->arr[ii] = reloaded; - (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 ); + con->getRegUsage(®_usage_arr[ii], instrs_in->arr[ii], + con->mode64); if (debug_direct_reload && !reloaded) { vex_printf(" --> "); - ppInstr(reloaded, mode64); + con->ppInstr(reloaded, con->mode64); } } @@ -1336,7 +1263,7 @@ HInstrArray* doRegisterAllocation ( vassert(hregIsVirtual(vreg)); if (0) { - vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n"); + vex_printf("considering "); con->ppReg(vreg); vex_printf("\n"); } /* Now we're trying to find a rreg for "vreg". First of all, @@ -1347,7 +1274,7 @@ HInstrArray* doRegisterAllocation ( Int n = vreg_state[m]; if (IS_VALID_RREGNO(n)) { vassert(rreg_state[n].disp == Bound); - addToHRegRemap(&remap, vreg, univ->regs[n]); + addToHRegRemap(&remap, vreg, con->univ->regs[n]); /* If this rreg is written or modified, mark it as different from any spill slot value. */ if (reg_usage_arr[ii].vMode[j] != HRmRead) @@ -1366,7 +1293,7 @@ HInstrArray* doRegisterAllocation ( Int k; for (k = 0; k < n_rregs; k++) { if (rreg_state[k].disp != Free - || hregClass(univ->regs[k]) != hregClass(vreg)) + || hregClass(con->univ->regs[k]) != hregClass(vreg)) continue; if (rreg_state[k].has_hlrs) { /* Well, at least we can use k_suboptimal if we really @@ -1387,7 +1314,7 @@ HInstrArray* doRegisterAllocation ( Int p = hregIndex(vreg); vassert(IS_VALID_VREGNO(p)); vreg_state[p] = toShort(k); - addToHRegRemap(&remap, vreg, univ->regs[k]); + addToHRegRemap(&remap, vreg, con->univ->regs[k]); /* Generate a reload if needed. This only creates needed reloads because the live range builder for vregs will guarantee that the first event for a vreg is a write. @@ -1398,8 +1325,8 @@ HInstrArray* doRegisterAllocation ( vassert(vreg_lrs[p].reg_class != HRcINVALID); HInstr* reload1 = NULL; HInstr* reload2 = NULL; - (*genReload)( &reload1, &reload2, univ->regs[k], - vreg_lrs[p].spill_offset, mode64 ); + con->genReload(&reload1, &reload2, con->univ->regs[k], + vreg_lrs[p].spill_offset, con->mode64); vassert(reload1 || reload2); /* can't both be NULL */ if (reload1) EMIT_INSTR(reload1); @@ -1433,7 +1360,7 @@ HInstrArray* doRegisterAllocation ( rreg_state[k].is_spill_cand = False; if (rreg_state[k].disp != Bound) continue; - if (hregClass(univ->regs[k]) != hregClass(vreg)) + if (hregClass(con->univ->regs[k]) != hregClass(vreg)) continue; rreg_state[k].is_spill_cand = True; /* Note, the following loop visits only the virtual regs @@ -1468,7 +1395,7 @@ HInstrArray* doRegisterAllocation ( vassert(IS_VALID_RREGNO(spillee)); vassert(rreg_state[spillee].disp == Bound); /* check it's the right class */ - vassert(hregClass(univ->regs[spillee]) == hregClass(vreg)); + vassert(hregClass(con->univ->regs[spillee]) == hregClass(vreg)); /* check we're not ejecting the vreg for which we are trying to free up a register. */ vassert(! sameHReg(rreg_state[spillee].vreg, vreg)); @@ -1483,8 +1410,8 @@ HInstrArray* doRegisterAllocation ( if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) { HInstr* spill1 = NULL; HInstr* spill2 = NULL; - (*genSpill)( &spill1, &spill2, univ->regs[spillee], - vreg_lrs[m].spill_offset, mode64 ); + con->genSpill(&spill1, &spill2, con->univ->regs[spillee], + vreg_lrs[m].spill_offset, con->mode64); vassert(spill1 || spill2); /* can't both be NULL */ if (spill1) EMIT_INSTR(spill1); @@ -1509,8 +1436,8 @@ HInstrArray* doRegisterAllocation ( vassert(vreg_lrs[m].reg_class != HRcINVALID); HInstr* reload1 = NULL; HInstr* reload2 = NULL; - (*genReload)( &reload1, &reload2, univ->regs[spillee], - vreg_lrs[m].spill_offset, mode64 ); + con->genReload(&reload1, &reload2, con->univ->regs[spillee], + vreg_lrs[m].spill_offset, con->mode64); vassert(reload1 || reload2); /* can't both be NULL */ if (reload1) EMIT_INSTR(reload1); @@ -1529,7 +1456,7 @@ HInstrArray* doRegisterAllocation ( /* So after much twisting and turning, we have vreg mapped to rreg_state[spillee].rreg. Note that in the map. */ - addToHRegRemap(&remap, vreg, univ->regs[spillee]); + addToHRegRemap(&remap, vreg, con->univ->regs[spillee]); } /* iterate over virtual registers in this instruction. */ @@ -1545,7 +1472,7 @@ HInstrArray* doRegisterAllocation ( */ /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */ - (*mapRegs)( &remap, instrs_in->arr[ii], mode64 ); + con->mapRegs(&remap, instrs_in->arr[ii], con->mode64); EMIT_INSTR( instrs_in->arr[ii] ); if (DEBUG_REGALLOC) { diff --git a/VEX/priv/host_generic_reg_alloc3.c b/VEX/priv/host_generic_reg_alloc3.c new file mode 100644 index 0000000..f798372 --- /dev/null +++ b/VEX/priv/host_generic_reg_alloc3.c @@ -0,0 +1,1171 @@ +/*----------------------------------------------------------------------------*/ +/*--- begin host_generic_reg_alloc3.c ---*/ +/*----------------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation framework. + + Copyright (C) 2017-2017 Ivo Raisr + iv...@iv... + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "libvex_basictypes.h" +#include "libvex.h" + +#include "main_util.h" +#include "host_generic_regs.h" + +/* Set to 1 for lots of debugging output. */ +#define DEBUG_REGALLOC 0 + +/* Set to 1 for sanity checking at every instruction. + Set to 0 for sanity checking only every 17th one and the last one. */ +#define SANITY_CHECKS_EVERY_INSTR 0 + + +#define INVALID_INSTRNO (-2) + +/* Register allocator state is kept in an array of VRegState's. + There is an element for every virtual register (vreg). + Elements are indexed [0 .. n_vregs-1]. + Records information about vreg live range and its state. */ +typedef + struct { + /* Live range, register class and spill offset are computed during the + first register allocator pass and remain unchanged after that. */ + + /* This vreg becomes live with this instruction (inclusive). Contains + either an instruction number or INVALID_INSTRNO. */ + Short live_after; + /* This vreg becomes dead before this instruction (exclusive). Contains + either an instruction number or INVALID_INSTRNO. */ + Short dead_before; + /* What kind of register this is. */ + HRegClass reg_class; + + /* What is its current disposition? */ + enum { Unallocated, /* Neither spilled nor assigned to a real reg. */ + Assigned, /* Assigned to a real register, viz rreg. */ + Spilled /* Spilled to the spill slot. */ + } disp; + + /* If .disp == Assigned, what rreg is it bound to? */ + HReg rreg; + + /* The "home" spill slot. The offset is relative to the beginning of + the guest state. */ + UShort spill_offset; + } + VRegState; + +/* The allocator also maintains a redundant array of indexes (rreg_state) from + rreg numbers back to entries in vreg_state. It is redundant because iff + rreg_state[r] == v then hregNumber(vreg_state[v].rreg) == r -- that is, the + two entries point at each other. The purpose of this is to speed up + activities which involve looking for a particular rreg: there is no need to + scan the vreg_state looking for it, just index directly into rreg_state. + The FAQ "does this rreg already have an associated vreg" is the main + beneficiary. + The identity of the real register is not recorded here, because the index + of this structure in |rreg_state| is the index number of the register, and + the register itself can be extracted from the RRegUniverse (univ). */ +typedef + struct { + /* What is its current disposition? */ + enum { Free, /* Not bound to any vreg. */ + Bound, /* Bound to a vreg, viz vreg. */ + Reserved /* Reserved for an instruction. */ + } disp; + + /* If .disp == Bound, what vreg is it bound to? */ + HReg vreg; + } + RRegState; + +/* Records information on a real-register live range, associated with + a particular real register. Computed once; does not change. */ +typedef + struct { + /* This rreg becomes live with this instruction (inclusive). Contains + either an instruction number or INVALID_INSTRNO. */ + Short live_after; + /* This rreg becomes dead before this instruction (exclusive). Contains + either an instruction number or INVALID_INSTRNO. */ + Short dead_before; + } + RRegLR; + +/* Live ranges for a single rreg and the current one. + Live ranges are computed during the first register allocator pass and remain + unchanged after that. + The identity of the real register is not recorded here, because the index + of this structure in |rreg_lr_state| is the index number of the register, and + the register itself can be extracted from the RRegUniverse (univ). */ +typedef + struct { + RRegLR* lrs; + UInt lrs_size; + UInt lrs_used; + + /* Live range corresponding to the currently processed instruction. + Points into |lrs| array. */ + RRegLR *lr_current; + UInt lr_current_idx; + } + RRegLRState; + +#define IS_VALID_VREGNO(v) ((v) >= 0 && (v) < n_vregs) +#define IS_VALID_RREGNO(r) ((r) >= 0 && (r) < n_rregs) + +/* Compute the index of the highest and lowest 1 in a ULong, respectively. + Results are undefined if the argument is zero. Don't pass it zero :) */ +static inline UInt ULong__maxIndex ( ULong w64 ) { + return 63 - __builtin_clzll(w64); +} + +static inline UInt ULong__minIndex ( ULong w64 ) { + return __builtin_ctzll(w64); +} + +static inline void enlarge_rreg_lrs(RRegLRState* rreg_lrs) +{ + vassert(rreg_lrs->lrs_used == rreg_lrs->lrs_size); + + RRegLR* lr2 = LibVEX_Alloc_inline(2 * rreg_lrs->lrs_used * sizeof(RRegLR)); + for (UInt l = 0; l < rreg_lrs->lrs_used; l++) { + lr2[l] = rreg_lrs->lrs[l]; + } + + rreg_lrs->lrs = lr2; + rreg_lrs->lrs_size = 2 * rreg_lrs->lrs_used; +} + +static inline void print_state( + const RegAllocControl* con, + const VRegState* vreg_state, UInt n_vregs, + const RRegState* rreg_state, UInt n_rregs, + const RRegLRState* rreg_lr_state, + UShort current_ii) +{ + for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { + const VRegState* vreg = &vreg_state[v_idx]; + + if (vreg->live_after == INVALID_INSTRNO) { + continue; /* This is a dead vreg. Never comes into live. */ + } + vex_printf("vreg_state[%3u] \t", v_idx); + + UInt written; + switch (vreg->disp) { + case Unallocated: + written = vex_printf("unallocated"); + break; + case Assigned: + written = vex_printf("assigned to "); + written += con->ppReg(vreg->rreg); + break; + case Spilled: + written = vex_printf("spilled at offset %u", vreg->spill_offset); + break; + default: + vassert(0); + } + + for (Int w = 30 - written; w > 0; w--) { + vex_printf(" "); + } + + if (vreg->live_after > (Short) current_ii) { + vex_printf("[not live yet]\n"); + } else if ((Short) current_ii >= vreg->dead_before) { + vex_printf("[now dead]\n"); + } else { + vex_printf("[live]\n"); + } + } + + for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { + const RRegState* rreg = &rreg_state[r_idx]; + const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; + vex_printf("rreg_state[%2u] = ", r_idx); + UInt written = con->ppReg(con->univ->regs[r_idx]); + for (Int w = 10 - written; w > 0; w--) { + vex_printf(" "); + } + + switch (rreg->disp) { + case Free: + vex_printf("free\n"); + break; + case Bound: + vex_printf("bound for "); + con->ppReg(rreg->vreg); + vex_printf("\n"); + break; + case Reserved: + vex_printf("reserved - live range [%d, %d)\n", + rreg_lrs->lr_current->live_after, + rreg_lrs->lr_current->dead_before); + break; + } + } +} + +static inline void emit_instr(HInstr* instr, HInstrArray* instrs_out, + const RegAllocControl* con, const HChar* why) +{ + if (DEBUG_REGALLOC) { + vex_printf("** "); + con->ppInstr(instr, con->mode64); + if (why != NULL) { + vex_printf(" (%s)", why); + } + vex_printf("\n\n"); + } + + addHInstr(instrs_out, instr); +} + +/* Spills a vreg assigned to some rreg. + The vreg is spilled and the rreg is freed. + Returns rreg's index. */ +static inline UInt spill_vreg( + HReg vreg, UInt v_idx, UInt current_ii, VRegState* vreg_state, UInt n_vregs, + RRegState* rreg_state, UInt n_rregs, HInstrArray* instrs_out, + const RegAllocControl* con) +{ + /* Check some invariants first. */ + vassert(IS_VALID_VREGNO((v_idx))); + vassert(vreg_state[v_idx].disp == Assigned); + HReg rreg = vreg_state[v_idx].rreg; + UInt r_idx = hregIndex(rreg); + vassert(IS_VALID_RREGNO(r_idx)); + vassert(hregClass(con->univ->regs[r_idx]) == hregClass(vreg)); + vassert(vreg_state[v_idx].dead_before > (Short) current_ii); + vassert(vreg_state[v_idx].reg_class != HRcINVALID); + + /* Generate spill. */ + HInstr* spill1 = NULL; + HInstr* spill2 = NULL; + con->genSpill(&spill1, &spill2, rreg, vreg_state[v_idx].spill_offset, + con->mode64); + vassert(spill1 != NULL || spill2 != NULL); /* cannot be both NULL */ + if (spill1 != NULL) { + emit_instr(spill1, instrs_out, con, "spill1"); + } + if (spill2 != NULL) { + emit_instr(spill2, instrs_out, con, "spill2"); + } + + /* Update register allocator state. */ + vreg_state[v_idx].disp = Spilled; + vreg_state[v_idx].rreg = INVALID_HREG; + rreg_state[r_idx].disp = Free; + rreg_state[r_idx].vreg = INVALID_HREG; + + return r_idx; +} + +/* Chooses a vreg to be spilled based on various criteria. + The vreg must not be from the instruction being processed, that is, it must + not be listed in reg_usage->vRegs. */ +static inline HReg find_vreg_to_spill( + VRegState* vreg_state, UInt n_vregs, + RRegState* rreg_state, UInt n_rregs, + const HRegUsage* instr_regusage, HRegClass target_hregclass, + const HRegUsage* reg_usage, UInt scan_forward_from, UInt scan_forward_max, + const RegAllocControl* con) +{ + /* Scan forwards a few instructions to find the most distant mentioned + use of a vreg. We can scan in the range of (inclusive): + - reg_usage[scan_forward_from] + - reg_usage[scan_forward_end], where scan_forward_end + = MIN(scan_forward_max, scan_forward_from + FEW_INSTRUCTIONS). */ +# define FEW_INSTRUCTIONS 5 + UInt scan_forward_end + = (scan_forward_max <= scan_forward_from + FEW_INSTRUCTIONS) ? + scan_forward_max : scan_forward_from + FEW_INSTRUCTIONS; +# undef FEW_INSTRUCTIONS + + HReg vreg_found = INVALID_HREG; + UInt distance_so_far = 0; + + for (UInt r_idx = con->univ->allocable_start[target_hregclass]; + r_idx <= con->univ->allocable_end[target_hregclass]; r_idx++) { + if (rreg_state[r_idx].disp == Bound) { + HReg vreg = rreg_state[r_idx].vreg; + if (! HRegUsage__contains(instr_regusage, vreg)) { + UInt ii = scan_forward_from; + for ( ; ii <= scan_forward_end; ii++) { + if (HRegUsage__contains(®_usage[ii], vreg)) { + break; + } + } + + if (ii - scan_forward_from > distance_so_far) { + distance_so_far = ii = scan_forward_from; + vreg_found = vreg; + if (ii + distance_so_far == scan_forward_end) { + break; /* We are at the end. Nothing could be better. */ + } + } + } + } + } + + if (hregIsInvalid(vreg_found)) { + vex_printf("doRegisterAllocation_v3: cannot find a register in class: "); + ppHRegClass(target_hregclass); + vex_printf("\n"); + vpanic("doRegisterAllocation_v3: cannot find a register."); + } + + return vreg_found; +} + +/* Find a free rreg of the correct class. + Tries to find an rreg whose live range (if any) is as far ahead in the + incoming instruction stream as possible. An ideal rreg candidate is + a callee-save register because it won't be used for parameter passing + around helper function calls. */ +static Bool find_free_rreg( + VRegState* vreg_state, UInt n_vregs, + RRegState* rreg_state, UInt n_rregs, + const RRegLRState* rreg_lr_state, + UInt current_ii, HRegClass target_hregclass, + Bool reserve_phase, const RegAllocControl* con, UInt* r_idx_found) +{ + Bool found = False; + UInt distance_so_far = 0; /* running max for |live_after - current_ii| */ + + for (UInt r_idx = con->univ->allocable_start[target_hregclass]; + r_idx <= con->univ->allocable_end[target_hregclass]; r_idx++) { + const RRegState* rreg = &rreg_state[r_idx]; + const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; + if (rreg->disp == Free) { + if (rreg_lrs->lrs_used == 0) { + found = True; + *r_idx_found = r_idx; + break; /* There could be nothing better, so break now. */ + } else { + const RRegLR* lr = rreg_lrs->lr_current; + if (lr->live_after > (Short) current_ii) { + /* Not live, yet. */ + if ((lr->live_after - (Short) current_ii) > distance_so_far) { + distance_so_far = lr->live_after - (Short) current_ii; + found = True; + *r_idx_found = r_idx; + } + } else if ((Short) current_ii >= lr->dead_before) { + /* Now dead. Effectively as if there is no LR now. */ + found = True; + *r_idx_found = r_idx; + break; /* There could be nothing better, so break now. */ + } else { + /* Going live for this instruction. This could happen only when + rregs are being reserved en mass, for example before + a helper call. */ + vassert(reserve_phase); + } + } + } + } + + return found; +} + +/* A target-independent register allocator (v3). Requires various functions + which it uses to deal abstractly with instructions and registers, since it + cannot have any target-specific knowledge. + + Returns a new list of instructions, which, as a result of the behaviour of + mapRegs, will be in-place modifications of the original instructions. + + Requires that the incoming code has been generated using vreg numbers + 0, 1 .. n_vregs-1. Appearance of a vreg outside that range is a checked + run-time error. + + Takes unallocated instructions and returns allocated instructions. +*/ +HInstrArray* doRegisterAllocation_v3( + /* Incoming virtual-registerised code. */ + HInstrArray* instrs_in, + + /* Register allocator controls to use. */ + const RegAllocControl* con +) +{ + vassert((con->guest_sizeB % LibVEX_GUEST_STATE_ALIGN) == 0); + + /* The main register allocator state. */ + UInt n_vregs = instrs_in->n_vregs; + VRegState* vreg_state = NULL; + if (n_vregs > 0) { + vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(VRegState)); + } + + /* If this is not so, the universe we have is nonsensical. */ + UInt n_rregs = con->univ->allocable; + vassert(n_rregs > 0); + STATIC_ASSERT(N_RREGUNIVERSE_REGS == 64); + + /* Redundant rreg -> vreg state. */ + RRegState* rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState)); + + /* Info on rreg live ranges. */ + RRegLRState* rreg_lr_state + = LibVEX_Alloc_inline(n_rregs * sizeof(RRegLRState)); + + /* Info on register usage in the incoming instruction array. Computed once + and remains unchanged, more or less; updated sometimes by the + direct-reload optimisation. */ + HRegUsage* reg_usage + = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used); + + /* The live range numbers are signed shorts, and so limiting the + number of instructions to 15000 comfortably guards against them + overflowing 32k. */ + vassert(instrs_in->arr_used <= 15000); + + /* The output array of instructions. */ + HInstrArray* instrs_out = newHInstrArray(); + + +# define OFFENDING_VREG(_v_idx, _instr, _mode) \ + do { \ + vex_printf("\n\nOffending vreg = %u\n", (_v_idx)); \ + vex_printf("\nOffending instruction = "); \ + con->ppInstr((_instr), con->mode64); \ + vex_printf("\n"); \ + vpanic("doRegisterAllocation_v3: first event for vreg is "#_mode \ + " (should be Write)"); \ + } while (0) + +# define OFFENDING_RREG(_r_idx, _instr, _mode) \ + do { \ + vex_printf("\n\nOffending rreg = "); \ + con->ppReg(con->univ->regs[(_r_idx)]); \ + vex_printf("\nOffending instruction = "); \ + con->ppInstr((_instr), con->mode64); \ + vex_printf("\n"); \ + vpanic("doRegisterAllocation_v3: first event for rreg is "#_mode \ + " (should be Write)"); \ + } while (0) + + +/* Finds an rreg of the correct class. + If a free rreg is not found, then spills a vreg not used by the current + instruction and makes free the corresponding rreg. */ +# define FIND_OR_MAKE_FREE_RREG(_ii, _v_idx, _reg_class, _reserve_phase) \ + ({ \ + UInt _r_free_idx = -1; \ + Bool free_rreg_found = find_free_rreg( \ + vreg_state, n_vregs, rreg_state, n_rregs, rreg_lr_state, \ + (_ii), (_reg_class), (_reserve_phase), con, &_r_free_idx); \ + if (!free_rreg_found) { \ + HReg vreg_to_spill = find_vreg_to_spill( \ + vreg_state, n_vregs, rreg_state, n_rregs, \ + ®_usage[(_ii)], (_reg_class), \ + reg_usage, (_ii) + 1, \ + instrs_in->arr_used - 1, con); \ + _r_free_idx = spill_vreg(vreg_to_spill, hregIndex(vreg_to_spill), \ + (_ii), v... [truncated message content] |