You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
|
|
|
|
1
|
2
(2) |
|
3
|
4
(2) |
5
|
6
(1) |
7
(2) |
8
|
9
|
|
10
|
11
(3) |
12
|
13
|
14
|
15
(1) |
16
|
|
17
|
18
|
19
|
20
|
21
|
22
|
23
(2) |
|
24
|
25
|
26
|
27
(1) |
28
|
29
|
30
|
|
31
|
|
|
|
|
|
|
|
From: Julian S. <se...@so...> - 2021-01-02 16:20:41
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=d2a22725450016759f3bbd22d849e5e82fd0c822 commit d2a22725450016759f3bbd22d849e5e82fd0c822 Author: Julian Seward <js...@ac...> Date: Sat Jan 2 17:18:53 2021 +0100 More arm64 isel tuning: create {and,orr,eor,add,sub} reg,reg,reg-shifted-by-imm Thus far the arm64 isel can't generate instructions of the form {and,or,xor,add,sub} reg,reg,reg-shifted-by-imm and hence sometimes winds up generating pairs like lsh x2, x1, #13 ; orr x4, x3, x2 when instead it could just have generated orr x4, x3, x1, lsh #13 This commit fixes that, although only for the 64-bit case, not the 32-bit case. Specifically, it can transform the IR forms {Add,Sub,And,Or,Xor}(E1, {Shl,Shr,Sar}(E2, immediate)) and {Add,And,Or,Xor}({Shl,Shr,Sar}(E1, immediate), E2) into a single arm64 instruction. Note that `Sub` is not included in the second line, because shifting the first operand requires inverting the arg order in the arm64 instruction, which isn't allowable with `Sub`, since it's not commutative and arm64 doesn't offer us a reverse-subtract instruction to use instead. This gives a 1.1% reduction generated code size when running /usr/bin/date on Memcheck. Diff: --- VEX/priv/host_arm64_defs.c | 97 +++++++++++++++++++++++++++++++++++- VEX/priv/host_arm64_defs.h | 23 +++++++++ VEX/priv/host_arm64_isel.c | 119 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 236 insertions(+), 3 deletions(-) diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 6ea67ef319..67dd06c78a 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -498,6 +498,17 @@ static const HChar* showARM64ShiftOp ( ARM64ShiftOp op ) { } } +static const HChar* showARM64RRSOp ( ARM64RRSOp op ) { + switch (op) { + case ARM64rrs_ADD: return "add"; + case ARM64rrs_SUB: return "sub"; + case ARM64rrs_AND: return "and"; + case ARM64rrs_OR: return "orr"; + case ARM64rrs_XOR: return "eor"; + default: vpanic("showARM64RRSOp"); + } +} + static const HChar* showARM64UnaryOp ( ARM64UnaryOp op ) { switch (op) { case ARM64un_NEG: return "neg"; @@ -858,6 +869,20 @@ ARM64Instr* ARM64Instr_Logic ( HReg dst, i->ARM64in.Logic.op = op; return i; } +ARM64Instr* ARM64Instr_RRS ( HReg dst, HReg argL, HReg argR, + ARM64ShiftOp shiftOp, UChar amt, + ARM64RRSOp mainOp ) { + ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); + i->tag = ARM64in_RRS; + i->ARM64in.RRS.dst = dst; + i->ARM64in.RRS.argL = argL; + i->ARM64in.RRS.argR = argR; + i->ARM64in.RRS.shiftOp = shiftOp; + i->ARM64in.RRS.amt = amt; + i->ARM64in.RRS.mainOp = mainOp; + vassert(amt >= 1 && amt <= 63); + return i; +} ARM64Instr* ARM64Instr_Test ( HReg argL, ARM64RIL* argR ) { ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); i->tag = ARM64in_Test; @@ -1446,6 +1471,16 @@ void ppARM64Instr ( const ARM64Instr* i ) { vex_printf(", "); ppARM64RIL(i->ARM64in.Logic.argR); return; + case ARM64in_RRS: + vex_printf("%s ", showARM64RRSOp(i->ARM64in.RRS.mainOp)); + ppHRegARM64(i->ARM64in.RRS.dst); + vex_printf(", "); + ppHRegARM64(i->ARM64in.RRS.argL); + vex_printf(", "); + ppHRegARM64(i->ARM64in.RRS.argR); + vex_printf(", %s #%u", showARM64ShiftOp(i->ARM64in.RRS.shiftOp), + i->ARM64in.RRS.amt); + return; case ARM64in_Test: vex_printf("tst "); ppHRegARM64(i->ARM64in.Test.argL); @@ -2018,6 +2053,11 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 ) addHRegUse(u, HRmRead, i->ARM64in.Logic.argL); addRegUsage_ARM64RIL(u, i->ARM64in.Logic.argR); return; + case ARM64in_RRS: + addHRegUse(u, HRmWrite, i->ARM64in.RRS.dst); + addHRegUse(u, HRmRead, i->ARM64in.RRS.argL); + addHRegUse(u, HRmRead, i->ARM64in.RRS.argR); + return; case ARM64in_Test: addHRegUse(u, HRmRead, i->ARM64in.Test.argL); addRegUsage_ARM64RIL(u, i->ARM64in.Test.argR); @@ -2386,6 +2426,11 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 ) i->ARM64in.Logic.argL = lookupHRegRemap(m, i->ARM64in.Logic.argL); mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR); return; + case ARM64in_RRS: + i->ARM64in.RRS.dst = lookupHRegRemap(m, i->ARM64in.RRS.dst); + i->ARM64in.RRS.argL = lookupHRegRemap(m, i->ARM64in.RRS.argL); + i->ARM64in.RRS.argR = lookupHRegRemap(m, i->ARM64in.RRS.argR); + return; case ARM64in_Test: i->ARM64in.Test.argL = lookupHRegRemap(m, i->ARM64in.Test.argL); mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR); @@ -2892,8 +2937,13 @@ static inline UInt qregEnc ( HReg r ) #define X01110101 BITS8(0,1,1,1,0,1,0,1) #define X01110110 BITS8(0,1,1,1,0,1,1,0) #define X01110111 BITS8(0,1,1,1,0,1,1,1) +#define X10001010 BITS8(1,0,0,0,1,0,1,0) +#define X10001011 BITS8(1,0,0,0,1,0,1,1) +#define X10101010 BITS8(1,0,1,0,1,0,1,0) #define X11000001 BITS8(1,1,0,0,0,0,0,1) #define X11000011 BITS8(1,1,0,0,0,0,1,1) +#define X11001010 BITS8(1,1,0,0,1,0,1,0) +#define X11001011 BITS8(1,1,0,0,1,0,1,1) #define X11010100 BITS8(1,1,0,1,0,1,0,0) #define X11010110 BITS8(1,1,0,1,0,1,1,0) #define X11011000 BITS8(1,1,0,1,1,0,0,0) @@ -3064,7 +3114,6 @@ static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3, return w; } - static inline UInt X_3_8_5_1_5_5_5 ( UInt f1, UInt f2, UInt f3, UInt f4, UInt f5, UInt f6, UInt f7 ) { vassert(3+8+5+1+5+5+5 == 32); @@ -3086,6 +3135,27 @@ static inline UInt X_3_8_5_1_5_5_5 ( UInt f1, UInt f2, UInt f3, UInt f4, return w; } +static inline UInt X_8_2_1_5_6_5_5 ( UInt f1, UInt f2, UInt f3, UInt f4, + UInt f5, UInt f6, UInt f7 ) { + vassert(8+2+1+5+6+5+5 == 32); + vassert(f1 < (1<<8)); + vassert(f2 < (1<<2)); + vassert(f3 < (1<<1)); + vassert(f4 < (1<<5)); + vassert(f5 < (1<<6)); + vassert(f6 < (1<<5)); + vassert(f7 < (1<<5)); + UInt w = 0; + w = (w << 8) | f1; + w = (w << 2) | f2; + w = (w << 1) | f3; + w = (w << 5) | f4; + w = (w << 6) | f5; + w = (w << 5) | f6; + w = (w << 5) | f7; + return w; +} + //ZZ #define X0000 BITS4(0,0,0,0) //ZZ #define X0001 BITS4(0,0,0,1) //ZZ #define X0010 BITS4(0,0,1,0) @@ -3543,6 +3613,31 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, } goto done; } + case ARM64in_RRS: { + UInt top8 = 0; + switch (i->ARM64in.RRS.mainOp) { + case ARM64rrs_ADD: top8 = X10001011; break; + case ARM64rrs_SUB: top8 = X11001011; break; + case ARM64rrs_AND: top8 = X10001010; break; + case ARM64rrs_XOR: top8 = X11001010; break; + case ARM64rrs_OR: top8 = X10101010; break; + default: vassert(0); /*NOTREACHED*/ + } + UInt sh = 0; + switch (i->ARM64in.RRS.shiftOp) { + case ARM64sh_SHL: sh = X00; break; + case ARM64sh_SHR: sh = X01; break; + case ARM64sh_SAR: sh = X10; break; + default: vassert(0); /*NOTREACHED*/ + } + UInt amt = i->ARM64in.RRS.amt; + vassert(amt >= 1 && amt <= 63); + *p++ = X_8_2_1_5_6_5_5(top8, sh, 0, + iregEnc(i->ARM64in.RRS.argR), amt, + iregEnc(i->ARM64in.RRS.argL), + iregEnc(i->ARM64in.RRS.dst)); + goto done; + } case ARM64in_Test: { UInt rD = 31; /* XZR, we are going to dump the result */ UInt rN = iregEnc(i->ARM64in.Test.argL); diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index 24da64e22b..105d7ce843 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -254,6 +254,17 @@ typedef } ARM64ShiftOp; +typedef + enum { + ARM64rrs_ADD=54, + ARM64rrs_SUB, + ARM64rrs_AND, + ARM64rrs_OR, + ARM64rrs_XOR, + ARM64rrs_INVALID + } + ARM64RRSOp; + typedef enum { ARM64un_NEG=60, @@ -475,6 +486,7 @@ typedef ARM64in_Arith=1220, ARM64in_Cmp, ARM64in_Logic, + ARM64in_RRS, ARM64in_Test, ARM64in_Shift, ARM64in_Unary, @@ -567,6 +579,15 @@ typedef ARM64RIL* argR; ARM64LogicOp op; } Logic; + /* 64 bit AND/OR/XOR/ADD/SUB, reg, reg-with-imm-shift */ + struct { + HReg dst; + HReg argL; + HReg argR; + ARM64ShiftOp shiftOp; + UChar amt; /* 1 to 63 only */ + ARM64RRSOp mainOp; + } RRS; /* 64 bit TST reg, reg or bimm (AND and set flags) */ struct { HReg argL; @@ -956,6 +977,8 @@ typedef extern ARM64Instr* ARM64Instr_Arith ( HReg, HReg, ARM64RIA*, Bool isAdd ); extern ARM64Instr* ARM64Instr_Cmp ( HReg, ARM64RIA*, Bool is64 ); extern ARM64Instr* ARM64Instr_Logic ( HReg, HReg, ARM64RIL*, ARM64LogicOp ); +extern ARM64Instr* ARM64Instr_RRS ( HReg, HReg, HReg, ARM64ShiftOp, + UChar amt, ARM64RRSOp mainOp ); extern ARM64Instr* ARM64Instr_Test ( HReg, ARM64RIL* ); extern ARM64Instr* ARM64Instr_Shift ( HReg, HReg, ARM64RI6*, ARM64ShiftOp ); extern ARM64Instr* ARM64Instr_Unary ( HReg, HReg, ARM64UnaryOp ); diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index 517b7b15b8..689cdba969 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -791,6 +791,94 @@ Bool doHelperCall ( /*OUT*/UInt* stackAdjustAfterCall, partial values if necessary. */ +/* ---------------- RRS matching helper ---------------- */ + +/* This helper matches 64-bit integer expressions of the form + {Add,Sub,And,Or,Xor}(E1, {Shl,Shr,Sar}(E2, immediate)) + and + {Add,And,Or,Xor}({Shl,Shr,Sar}(E1, immediate), E2) + which is a useful thing to do because AArch64 can compute those in + a single instruction. + */ +static Bool matchesRegRegShift(/*OUT*/ARM64RRSOp* mainOp, + /*OUT*/ARM64ShiftOp* shiftOp, + /*OUT*/UChar* amt, + /*OUT*/IRExpr** argUnshifted, + /*OUT*/IRExpr** argToBeShifted, + IRExpr* e) +{ + *mainOp = (ARM64RRSOp)0; + *shiftOp = (ARM64ShiftOp)0; + *amt = 0; + *argUnshifted = NULL; + *argToBeShifted = NULL; + if (e->tag != Iex_Binop) { + return False; + } + const IROp irMainOp = e->Iex.Binop.op; + Bool canSwap = True; + switch (irMainOp) { + case Iop_And64: *mainOp = ARM64rrs_AND; break; + case Iop_Or64: *mainOp = ARM64rrs_OR; break; + case Iop_Xor64: *mainOp = ARM64rrs_XOR; break; + case Iop_Add64: *mainOp = ARM64rrs_ADD; break; + case Iop_Sub64: *mainOp = ARM64rrs_SUB; canSwap = False; break; + default: return False; + } + /* The root node is OK. Now check the right (2nd) arg. */ + IRExpr* argL = e->Iex.Binop.arg1; + IRExpr* argR = e->Iex.Binop.arg2; + + // This loop runs either one or two iterations. In the first iteration, we + // check for a shiftable right (second) arg. If that fails, at the end of + // the first iteration, the args are swapped, if that is valid, and we go + // round again, hence checking for a shiftable left (first) arg. + UInt iterNo = 1; + while (True) { + vassert(iterNo == 1 || iterNo == 2); + if (argR->tag == Iex_Binop) { + const IROp irShiftOp = argR->Iex.Binop.op; + if (irShiftOp == Iop_Shl64 + || irShiftOp == Iop_Shr64 || irShiftOp == Iop_Sar64) { + IRExpr* argRL = argR->Iex.Binop.arg1; + const IRExpr* argRR = argR->Iex.Binop.arg2; + if (argRR->tag == Iex_Const) { + const IRConst* argRRconst = argRR->Iex.Const.con; + vassert(argRRconst->tag == Ico_U8); // due to typecheck rules + const UChar amount = argRRconst->Ico.U8; + if (amount >= 1 && amount <= 63) { + // We got a match \o/ + // *mainOp is already set + switch (irShiftOp) { + case Iop_Shl64: *shiftOp = ARM64sh_SHL; break; + case Iop_Shr64: *shiftOp = ARM64sh_SHR; break; + case Iop_Sar64: *shiftOp = ARM64sh_SAR; break; + default: vassert(0); // guarded above + } + *amt = amount; + *argUnshifted = argL; + *argToBeShifted = argRL; + return True; + } + } + } + } + // We failed to get a match in the first iteration. So, provided the + // root node isn't SUB, swap the arguments and make one further + // iteration. If that doesn't succeed, we must give up. + if (iterNo == 1 && canSwap) { + IRExpr* tmp = argL; + argL = argR; + argR = tmp; + iterNo = 2; + continue; + } + // Give up. + return False; + } + /*NOTREACHED*/ + } + /* --------------------- AMode --------------------- */ /* Return an AMode which computes the value of the specified @@ -1577,7 +1665,34 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) break; } - /* ADD/SUB */ + /* AND64/OR64/XOR64/ADD64/SUB64(e1, e2 shifted by imm) + AND64/OR64/XOR64/ADD64(e1 shifted by imm, e2) + */ + { + switch (e->Iex.Binop.op) { + case Iop_And64: case Iop_Or64: case Iop_Xor64: + case Iop_Add64: case Iop_Sub64:{ + ARM64RRSOp mainOp = ARM64rrs_INVALID; + ARM64ShiftOp shiftOp = (ARM64ShiftOp)0; // Invalid + IRExpr* argUnshifted = NULL; + IRExpr* argToBeShifted = NULL; + UChar amt = 0; + if (matchesRegRegShift(&mainOp, &shiftOp, &amt, &argUnshifted, + &argToBeShifted, e)) { + HReg rDst = newVRegI(env); + HReg rUnshifted = iselIntExpr_R(env, argUnshifted); + HReg rToBeShifted = iselIntExpr_R(env, argToBeShifted); + addInstr(env, ARM64Instr_RRS(rDst, rUnshifted, rToBeShifted, + shiftOp, amt, mainOp)); + return rDst; + } + } + default: + break; + } + } + + /* ADD/SUB(e1, e2) (for any e1, e2) */ switch (e->Iex.Binop.op) { case Iop_Add64: case Iop_Add32: case Iop_Sub64: case Iop_Sub32: { @@ -1593,7 +1708,7 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) break; } - /* AND/OR/XOR */ + /* AND/OR/XOR(e1, e2) (for any e1, e2) */ switch (e->Iex.Binop.op) { case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop; case Iop_Or64: case Iop_Or32: lop = ARM64lo_OR; goto log_binop; |
|
From: Julian S. <se...@so...> - 2021-01-02 15:20:56
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=f719470439e03b757382f769d758f6aa73e22947 commit f719470439e03b757382f769d758f6aa73e22947 Author: Julian Seward <js...@ac...> Date: Sat Jan 2 16:15:03 2021 +0100 A bit of tuning of the arm64 isel: do PUT(..) = 0x0:I64 in a single insn. When running Memcheck, most blocks will do one and often two of `PUT(..) = 0x0:I64`, as a result of the way the front end models arm64 condition codes. The arm64 isel would generate `mov xN, #0 ; str xN, [xBaseblock, #imm]`, which is pretty stupid. This patch changes it to a single insn: `str xzr, [xBaseblock, #imm]`. This is a special-case for `PUT(..) = 0x0:I64`. General-case integer stores of 0x0:I64 are unchanged. This gives a 1.9% reduction in generated code size when running /usr/bin/date on Memcheck. Diff: --- VEX/priv/host_arm64_defs.c | 37 ++++++++++++++++++++++++++++--------- VEX/priv/host_arm64_defs.h | 7 +++++++ VEX/priv/host_arm64_isel.c | 8 +++++++- 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 13b497f600..6ea67ef319 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -118,9 +118,13 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) // x8 is used as a ProfInc temporary // x9 is used as a spill/reload/chaining/call temporary // x30 as LR - // x31 because dealing with the SP-vs-ZR overloading is too - // confusing, and we don't need to do so, so let's just avoid - // the problem + // + // x31 is mentionable, but not allocatable, and is dangerous to use + // because of SP-vs-ZR overloading. Here, we call it `XZR_XSP`. Whether + // it denotes the zero register or the stack pointer depends both on what + // kind of instruction it appears in and even on the position within an + // instruction that it appears. So be careful. There's absolutely + // nothing to prevent shooting oneself in the foot. // // Currently, we have 15 allocatable integer registers: // 0 1 2 3 4 5 6 7 22 23 24 25 26 27 28 @@ -137,6 +141,7 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) ru->regs[ru->size++] = hregARM64_X8(); ru->regs[ru->size++] = hregARM64_X9(); ru->regs[ru->size++] = hregARM64_X21(); + ru->regs[ru->size++] = hregARM64_XZR_XSP(); rRegUniverse_ARM64_initted = True; @@ -155,8 +160,8 @@ UInt ppHRegARM64 ( HReg reg ) { switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); - vassert(r >= 0 && r < 31); - return vex_printf("x%d", r); + vassert(r >= 0 && r <= 31); + return r ==31 ? vex_printf("xzr/xsp") : vex_printf("x%d", r); case HRcFlt64: r = hregEncoding(reg); vassert(r >= 0 && r < 32); @@ -2746,6 +2751,19 @@ static inline UInt iregEnc ( HReg r ) return n; } +static inline UInt iregEncOr31 ( HReg r ) +{ + // This is the same as iregEnc() except that we're allowed to use the + // "special" encoding number 31, which means, depending on the context, + // either XZR/WZR or SP. + UInt n; + vassert(hregClass(r) == HRcInt64); + vassert(!hregIsVirtual(r)); + n = hregEncoding(r); + vassert(n <= 31); + return n; +} + static inline UInt dregEnc ( HReg r ) { UInt n; @@ -3360,13 +3378,14 @@ static UInt* do_load_or_store32 ( UInt* p, } -/* Generate a 64 bit load or store to/from xD, using the given amode +/* Generate a 64 bit integer load or store to/from xD, using the given amode for the address. */ static UInt* do_load_or_store64 ( UInt* p, Bool isLoad, UInt xD, ARM64AMode* am ) { - /* In all these cases, Rn can't be 31 since that means SP. */ - vassert(xD <= 30); + /* In all these cases, Rn can't be 31 since that means SP. But Rd can be + 31, meaning XZR/WZR. */ + vassert(xD <= 31); if (am->tag == ARM64am_RI9) { /* STUR Xd, [Xn|SP + simm9]: 11 111000 000 simm9 00 n d LDUR Xd, [Xn|SP + simm9]: 11 111000 010 simm9 00 n d @@ -3646,7 +3665,7 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, } case ARM64in_LdSt64: { p = do_load_or_store64( p, i->ARM64in.LdSt64.isLoad, - iregEnc(i->ARM64in.LdSt64.rD), + iregEncOr31(i->ARM64in.LdSt64.rD), i->ARM64in.LdSt64.amode ); goto done; } diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index 5a82564ce6..24da64e22b 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -70,6 +70,13 @@ ST_IN HReg hregARM64_D13 ( void ) { return mkHReg(False, HRcFlt64, 13, 25); } ST_IN HReg hregARM64_X8 ( void ) { return mkHReg(False, HRcInt64, 8, 26); } ST_IN HReg hregARM64_X9 ( void ) { return mkHReg(False, HRcInt64, 9, 27); } ST_IN HReg hregARM64_X21 ( void ) { return mkHReg(False, HRcInt64, 21, 28); } + +// This is the integer register with encoding 31. Be *very* careful how you +// use it, since its meaning is dependent on the instruction and indeed even +// the position within an instruction, that it appears. It denotes either the +// zero register or the stack pointer. +ST_IN HReg hregARM64_XZR_XSP ( void ) { return mkHReg(False, + HRcInt64, 31, 29); } #undef ST_IN extern UInt ppHRegARM64 ( HReg ); diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c index da1218715e..517b7b15b8 100644 --- a/VEX/priv/host_arm64_isel.c +++ b/VEX/priv/host_arm64_isel.c @@ -3745,7 +3745,13 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt ) IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.Put.data); UInt offs = (UInt)stmt->Ist.Put.offset; if (tyd == Ity_I64 && 0 == (offs & 7) && offs < (8<<12)) { - HReg rD = iselIntExpr_R(env, stmt->Ist.Put.data); + HReg rD = INVALID_HREG; + if (isZeroU64(stmt->Ist.Put.data)) { + // In this context, XZR_XSP denotes the zero register. + rD = hregARM64_XZR_XSP(); + } else { + rD = iselIntExpr_R(env, stmt->Ist.Put.data); + } ARM64AMode* am = mk_baseblock_64bit_access_amode(offs); addInstr(env, ARM64Instr_LdSt64(False/*!isLoad*/, rD, am)); return; |