You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
|
|
|
1
(12) |
2
(14) |
3
(8) |
|
4
(6) |
5
(15) |
6
(14) |
7
(13) |
8
(14) |
9
(13) |
10
(9) |
|
11
(6) |
12
(12) |
13
(10) |
14
(12) |
15
(9) |
16
(9) |
17
(2) |
|
18
(16) |
19
(21) |
20
(17) |
21
(6) |
22
(5) |
23
(3) |
24
(12) |
|
25
(10) |
26
(5) |
27
(15) |
28
(16) |
29
(12) |
30
(11) |
31
(11) |
|
From: <sv...@va...> - 2015-01-17 19:28:38
|
Author: sewardj
Date: Sat Jan 17 19:28:29 2015
New Revision: 3067
Log:
Code generation improvements for amd64 NCode:
* add and use AMD64AMode_IRS
* use testq instead of andq for misaligned-or-hi check
* use movzwq instead of mov and and-w-0xFFFF
* use short form immediates for loading address of helper fns
Modified:
branches/NCODE/priv/host_amd64_defs.c
branches/NCODE/priv/host_amd64_defs.h
branches/NCODE/priv/host_amd64_isel.c
Modified: branches/NCODE/priv/host_amd64_defs.c
==============================================================================
--- branches/NCODE/priv/host_amd64_defs.c (original)
+++ branches/NCODE/priv/host_amd64_defs.c Sat Jan 17 19:28:29 2015
@@ -82,7 +82,8 @@
Int r;
static const HChar* ireg32_names[16]
= { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
- "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
+ "%r8d", "%r9d", "%r10d", "%r11d",
+ "%r12d", "%r13d", "%r14d", "%r15d" };
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
ppHReg(reg);
@@ -101,6 +102,31 @@
}
}
+static void ppHRegAMD64_lo16 ( HReg reg )
+{
+ Int r;
+ static const HChar* ireg16_names[16]
+ = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
+ "%r8w", "%r9w", "%r10w", "%r11w",
+ "%r12w", "%r13w", "%r14w", "%r15w" };
+ /* Be generic for all virtual regs. */
+ if (hregIsVirtual(reg)) {
+ ppHReg(reg);
+ vex_printf("w");
+ return;
+ }
+ /* But specific for real regs. */
+ switch (hregClass(reg)) {
+ case HRcInt64:
+ r = hregNumber(reg);
+ vassert(r >= 0 && r < 16);
+ vex_printf("%s", ireg16_names[r]);
+ return;
+ default:
+ vpanic("ppHRegAMD64_lo16: invalid regclass");
+ }
+}
+
void getAllocableRegs_AMD64 ( Int* nregs, HReg** arr )
{
#if 0
@@ -178,6 +204,15 @@
am->Aam.IR.reg = reg;
return am;
}
+AMD64AMode* AMD64AMode_IRS ( UInt imm32, HReg reg, Int shift ) {
+ AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode));
+ am->tag = Aam_IRS;
+ am->Aam.IRS.imm = imm32;
+ am->Aam.IRS.reg = reg;
+ am->Aam.IRS.shift = shift;
+ vassert(shift >= 0 && shift <= 3);
+ return am;
+}
AMD64AMode* AMD64AMode_IRRS ( UInt imm32, HReg base, HReg indEx, Int shift ) {
AMD64AMode* am = LibVEX_Alloc(sizeof(AMD64AMode));
am->tag = Aam_IRRS;
@@ -199,6 +234,11 @@
ppHRegAMD64(am->Aam.IR.reg);
vex_printf(")");
return;
+ case Aam_IRS:
+ vex_printf("0x%x(,", am->Aam.IRS.imm);
+ ppHRegAMD64(am->Aam.IRS.reg);
+ vex_printf(",%d)", 1 << am->Aam.IRS.shift);
+ return;
case Aam_IRRS:
vex_printf("0x%x(", am->Aam.IRRS.imm);
ppHRegAMD64(am->Aam.IRRS.base);
@@ -216,6 +256,9 @@
case Aam_IR:
addHRegUse(u, HRmRead, am->Aam.IR.reg);
return;
+ case Aam_IRS:
+ addHRegUse(u, HRmRead, am->Aam.IRS.reg);
+ return;
case Aam_IRRS:
addHRegUse(u, HRmRead, am->Aam.IRRS.base);
addHRegUse(u, HRmRead, am->Aam.IRRS.index);
@@ -230,6 +273,9 @@
case Aam_IR:
am->Aam.IR.reg = lookupHRegRemap(m, am->Aam.IR.reg);
return;
+ case Aam_IRS:
+ am->Aam.IRS.reg = lookupHRegRemap(m, am->Aam.IRS.reg);
+ return;
case Aam_IRRS:
am->Aam.IRRS.base = lookupHRegRemap(m, am->Aam.IRRS.base);
am->Aam.IRRS.index = lookupHRegRemap(m, am->Aam.IRRS.index);
@@ -722,6 +768,14 @@
i->Ain.MovxLQ.dst = dst;
return i;
}
+AMD64Instr* AMD64Instr_MovxWQ ( Bool syned, HReg src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_MovxWQ;
+ i->Ain.MovxWQ.syned = syned;
+ i->Ain.MovxWQ.src = src;
+ i->Ain.MovxWQ.dst = dst;
+ return i;
+}
AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
AMD64AMode* src, HReg dst ) {
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
@@ -1000,6 +1054,13 @@
i->tag = Ain_NC_CallR11;
return i;
}
+AMD64Instr* AMD64Instr_NC_TestQ ( HReg src, HReg dst ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_NC_TestQ;
+ i->Ain.NC_TestQ.src = src;
+ i->Ain.NC_TestQ.dst = dst;
+ return i;
+}
void ppAMD64Instr ( const AMD64Instr* i, Bool mode64 )
{
@@ -1118,6 +1179,12 @@
vex_printf(",");
ppHRegAMD64(i->Ain.MovxLQ.dst);
return;
+ case Ain_MovxWQ:
+ vex_printf("mov%cwq ", i->Ain.MovxWQ.syned ? 's' : 'z');
+ ppHRegAMD64_lo16(i->Ain.MovxWQ.src);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.MovxWQ.dst);
+ return;
case Ain_LoadEX:
if (i->Ain.LoadEX.szSmall==4 && !i->Ain.LoadEX.syned) {
vex_printf("movl ");
@@ -1342,6 +1409,13 @@
vex_printf("call* %%r11");
return;
}
+ case Ain_NC_TestQ: {
+ vex_printf("testq ");
+ ppHRegAMD64(i->Ain.NC_TestQ.src);
+ vex_printf(",");
+ ppHRegAMD64(i->Ain.NC_TestQ.dst);
+ return;
+ }
default:
vpanic("ppAMD64Instr");
}
@@ -1489,6 +1563,10 @@
addHRegUse(u, HRmRead, i->Ain.MovxLQ.src);
addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
return;
+ case Ain_MovxWQ:
+ addHRegUse(u, HRmRead, i->Ain.MovxWQ.src);
+ addHRegUse(u, HRmWrite, i->Ain.MovxWQ.dst);
+ return;
case Ain_LoadEX:
addRegUsage_AMD64AMode(u, i->Ain.LoadEX.src);
addHRegUse(u, HRmWrite, i->Ain.LoadEX.dst);
@@ -1739,6 +1817,10 @@
mapReg(m, &i->Ain.MovxLQ.src);
mapReg(m, &i->Ain.MovxLQ.dst);
return;
+ case Ain_MovxWQ:
+ mapReg(m, &i->Ain.MovxWQ.src);
+ mapReg(m, &i->Ain.MovxWQ.dst);
+ return;
case Ain_LoadEX:
mapRegs_AMD64AMode(m, i->Ain.LoadEX.src);
mapReg(m, &i->Ain.LoadEX.dst);
@@ -2098,6 +2180,12 @@
greg, d32(base,index,scale)
| index != RSP
= 10 greg 100, scale index base, d32
+
+ -----------------------------------------------
+
+ greg, d32(,index,scale)
+ | index != RSP
+ = 00 greg 100, scale index 101, d32
*/
static void doAMode_M ( /*MOD*/AssemblyBuffer* ab, HReg greg, AMD64AMode* am )
{
@@ -2168,6 +2256,17 @@
vpanic("doAMode_M: can't emit amode IRRS");
/*NOTREACHED*/
}
+ if (am->tag == Aam_IRS) {
+ if (! sameHReg(am->Aam.IRS.reg, hregAMD64_RSP())) {
+ PUT(ab, mkModRegRM(0, iregBits210(greg), 4));
+ PUT(ab, mkSIB(am->Aam.IRS.shift, iregBits210(am->Aam.IRS.reg), 5));
+ emit32(ab, am->Aam.IRS.imm);
+ return;
+ }
+ ppAMD64AMode(am);
+ vpanic("doAMode_M: can't emit amode IRRS");
+ /*NOTREACHED*/
+ }
vpanic("doAMode_M: unknown amode");
/*NOTREACHED*/
}
@@ -2191,19 +2290,30 @@
/* Make up a REX byte, with W=1 (size=64), for a (greg,amode) pair. */
static UChar rexAMode_M ( HReg greg, AMD64AMode* am )
{
- if (am->tag == Aam_IR) {
- UChar W = 1; /* we want 64-bit mode */
- UChar R = iregBit3(greg);
- UChar X = 0; /* not relevant */
- UChar B = iregBit3(am->Aam.IR.reg);
- return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
- }
- if (am->tag == Aam_IRRS) {
- UChar W = 1; /* we want 64-bit mode */
- UChar R = iregBit3(greg);
- UChar X = iregBit3(am->Aam.IRRS.index);
- UChar B = iregBit3(am->Aam.IRRS.base);
- return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
+ switch (am->tag) {
+ case Aam_IR: {
+ UChar W = 1; /* we want 64-bit mode */
+ UChar R = iregBit3(greg);
+ UChar X = 0; /* not relevant */
+ UChar B = iregBit3(am->Aam.IR.reg);
+ return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
+ }
+ case Aam_IRS: {
+ UChar W = 1; /* we want 64-bit mode */
+ UChar R = iregBit3(greg);
+ UChar X = iregBit3(am->Aam.IRS.reg);
+ UChar B = 0; /* there is no base register -- base is a constant. */
+ return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
+ }
+ case Aam_IRRS: {
+ UChar W = 1; /* we want 64-bit mode */
+ UChar R = iregBit3(greg);
+ UChar X = iregBit3(am->Aam.IRRS.index);
+ UChar B = iregBit3(am->Aam.IRRS.base);
+ return toUChar(0x40 + ((W << 3) | (R << 2) | (X << 1) | (B << 0)));
+ }
+ default:
+ break;
}
vassert(0);
return 0; /*NOTREACHED*/
@@ -2339,7 +2449,7 @@
switch (i->tag) {
case Ain_Imm64:
- if (i->Ain.Imm64.imm64 <= 0xFFFFFULL) {
+ if (i->Ain.Imm64.imm64 <= /*0xFFFFFULL*/ 0x39000000ULL) {
/* Use the short form (load into 32 bit reg, + default
widening rule) for constants under 1 million. We could
use this form for the range 0 to 0x7FFFFFFF inclusive, but
@@ -2984,6 +3094,14 @@
}
goto done;
+ case Ain_MovxWQ:
+ /* Need REX.W = 1 here, but rexAMode_R does that for us. */
+ PUT(ab, rexAMode_R(i->Ain.MovxWQ.dst, i->Ain.MovxWQ.src));
+ PUT(ab, 0x0F);
+ PUT(ab, i->Ain.MovxWQ.syned ? 0xBF : 0xB7);
+ doAMode_R(ab, i->Ain.MovxWQ.dst, i->Ain.MovxWQ.src);
+ goto done;
+
case Ain_LoadEX:
if (i->Ain.LoadEX.szSmall == 1 && !i->Ain.LoadEX.syned) {
/* movzbq */
@@ -3600,6 +3718,15 @@
goto done;
}
+ case Ain_NC_TestQ: {
+ HReg greg = i->Ain.NC_TestQ.src;
+ HReg ereg = i->Ain.NC_TestQ.dst;
+ PUT(ab, rexAMode_R(greg, ereg));
+ PUT(ab, 0x85);
+ doAMode_R(ab, greg, ereg);
+ goto done;
+ }
+
default:
goto bad;
}
@@ -4118,11 +4245,16 @@
HReg dstR = mapNReg(nregMap, ni->Nin.AluWri.dst);
HReg srcLR = mapNReg(nregMap, ni->Nin.AluWri.srcL);
HWord imm = ni->Nin.AluWri.srcR;
- if (how == Nalu_AND && fitsIn32Bits((ULong)imm)) {
- if (!sameHReg(srcLR, dstR)) {
- HI( mk_iMOVsd_RR(srcLR, dstR) );
- }
- HI( AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(imm), dstR) );
+ // Verified correct, but currently unused
+ //if (how == Nalu_AND && fitsIn32Bits((ULong)imm)) {
+ // if (!sameHReg(srcLR, dstR)) {
+ // HI( mk_iMOVsd_RR(srcLR, dstR) );
+ // }
+ // HI( AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(imm), dstR) );
+ // break;
+ //}
+ if (how == Nalu_AND && imm == 0xFFFFULL) {
+ HI( AMD64Instr_MovxWQ(False/*!syned*/, srcLR, dstR) );
break;
}
goto unhandled;
@@ -4134,7 +4266,7 @@
if (ni->Nin.SetFlagsWri.how == Nsf_TEST) {
HReg r11 = hregAMD64_R11();
HI( AMD64Instr_Imm64((ULong)imm, r11) );
- HI( AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Reg(reg), r11) );
+ HI( AMD64Instr_NC_TestQ(r11, reg) );
break;
}
if (ni->Nin.SetFlagsWri.how == Nsf_CMP && fitsIn32Bits((ULong)imm)) {
@@ -4163,9 +4295,7 @@
HReg indexR = mapNReg(nregMap, addr->Nea.IRS.index);
UChar shift = addr->Nea.IRS.shift;
if (szB == 8 && shift <= 3) {
- HReg r11 = hregAMD64_R11();
- AMD64AMode* am = AMD64AMode_IRRS((UInt)imm, r11, indexR, shift);
- HI( AMD64Instr_Imm64(0, r11) );
+ AMD64AMode* am = AMD64AMode_IRS((UInt)imm, indexR, shift);
HI( AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Mem(am), dstR) );
break;
}
Modified: branches/NCODE/priv/host_amd64_defs.h
==============================================================================
--- branches/NCODE/priv/host_amd64_defs.h (original)
+++ branches/NCODE/priv/host_amd64_defs.h Sat Jan 17 19:28:29 2015
@@ -175,6 +175,7 @@
typedef
enum {
Aam_IR, /* Immediate + Reg */
+ Aam_IRS, /* Immediate + (Reg << Shift) */
Aam_IRRS /* Immediate + Reg1 + (Reg2 << Shift) */
}
AMD64AModeTag;
@@ -189,6 +190,11 @@
} IR;
struct {
UInt imm;
+ HReg reg;
+ Int shift; /* 0, 1, 2 or 3 only */
+ } IRS;
+ struct {
+ UInt imm;
HReg base;
HReg index;
Int shift; /* 0, 1, 2 or 3 only */
@@ -198,10 +204,9 @@
AMD64AMode;
extern AMD64AMode* AMD64AMode_IR ( UInt, HReg );
+extern AMD64AMode* AMD64AMode_IRS ( UInt, HReg, Int );
extern AMD64AMode* AMD64AMode_IRRS ( UInt, HReg, HReg, Int );
-extern AMD64AMode* dopyAMD64AMode ( AMD64AMode* );
-
extern void ppAMD64AMode ( AMD64AMode* );
@@ -417,7 +422,8 @@
Ain_XIndir, /* indirect transfer to GA */
Ain_XAssisted, /* assisted transfer to GA */
Ain_CMov64, /* conditional move */
- Ain_MovxLQ, /* reg-reg move, zx-ing/sx-ing top half */
+ Ain_MovxLQ, /* reg-reg move, zx-ing/sx-ing top 32 bits */
+ Ain_MovxWQ, /* reg-reg move, zx-ing/sx-ing top 48 bits */
Ain_LoadEX, /* mov{s,z}{b,w,l}q from mem to reg */
Ain_Store, /* store 32/16/8 bit value in memory */
Ain_Set64, /* convert condition code to 64-bit value */
@@ -456,7 +462,8 @@
// The following for NCode only
Ain_NC_Jmp32, /* cond. br. w/ 32-bit offset, 0F 8x xx xx xx xx */
/* or cond==Acc_ALWAYS, giving, E9 xx xx xx xx */
- Ain_NC_CallR11 /* Literally "call *%r11" */
+ Ain_NC_CallR11, /* Literally "call *%r11" */
+ Ain_NC_TestQ /* testq reg, reg */
}
AMD64InstrTag;
@@ -559,12 +566,18 @@
AMD64RM* src;
HReg dst;
} CMov64;
- /* reg-reg move, sx-ing/zx-ing top half */
+ /* reg-reg move, sx-ing/zx-ing top 32 bits */
struct {
Bool syned;
HReg src;
HReg dst;
} MovxLQ;
+ /* reg-reg move, sx-ing/zx-ing top 32 bits */
+ struct {
+ Bool syned;
+ HReg src;
+ HReg dst;
+ } MovxWQ;
/* Sign/Zero extending loads. Dst size is always 64 bits. */
struct {
UChar szSmall; /* only 1, 2 or 4 */
@@ -758,6 +771,15 @@
struct {
/* Literally "call *%r11" */
} NC_CallR11;
+ struct {
+ /* testq reg, reg. The src and dst characterisation is
+ somewhat spurious considering that the registers can be
+ swapped without affecting the result, and that there is
+ no result. */
+ HReg src;
+ HReg dst;
+ }
+ NC_TestQ;
} Ain;
}
AMD64Instr;
@@ -782,6 +804,7 @@
AMD64CondCode cond, IRJumpKind jk );
extern AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode, AMD64RM* src, HReg dst );
extern AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_MovxWQ ( Bool syned, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
AMD64AMode* src, HReg dst );
extern AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst );
@@ -819,6 +842,7 @@
HReg* regsA, HReg* regsS );
extern AMD64Instr* AMD64Instr_NC_Jmp32 ( AMD64CondCode cc );
extern AMD64Instr* AMD64Instr_NC_CallR11 ( void );
+extern AMD64Instr* AMD64Instr_NC_TestQ ( HReg src, HReg dst );
extern void ppAMD64Instr ( const AMD64Instr*, Bool );
Modified: branches/NCODE/priv/host_amd64_isel.c
==============================================================================
--- branches/NCODE/priv/host_amd64_isel.c (original)
+++ branches/NCODE/priv/host_amd64_isel.c Sat Jan 17 19:28:29 2015
@@ -266,6 +266,10 @@
toBool( hregClass(am->Aam.IR.reg) == HRcInt64
&& (hregIsVirtual(am->Aam.IR.reg)
|| sameHReg(am->Aam.IR.reg, hregAMD64_RBP())) );
+ case Aam_IRS:
+ return
+ toBool( hregClass(am->Aam.IRS.reg) == HRcInt64
+ && hregIsVirtual(am->Aam.IRS.reg) );
case Aam_IRRS:
return
toBool( hregClass(am->Aam.IRRS.base) == HRcInt64
@@ -1036,8 +1040,7 @@
Aalu_AND, AMD64RMI_Imm(0xFF), dst));
break;
case Iop_Shr16:
- addInstr(env, AMD64Instr_Alu64R(
- Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
+ addInstr(env, AMD64Instr_MovxWQ(False, dst, dst));
break;
case Iop_Shr32:
addInstr(env, AMD64Instr_MovxLQ(False, dst, dst));
@@ -1047,8 +1050,7 @@
addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst));
break;
case Iop_Sar16:
- addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst));
- addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst));
+ addInstr(env, AMD64Instr_MovxWQ(True, dst, dst));
break;
case Iop_Sar32:
addInstr(env, AMD64Instr_MovxLQ(True, dst, dst));
@@ -1467,32 +1469,36 @@
}
case Iop_8Uto16:
case Iop_8Uto32:
- case Iop_8Uto64:
- case Iop_16Uto64:
- case Iop_16Uto32: {
- HReg dst = newVRegI(env);
- HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
- Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Uto32
- || e->Iex.Unop.op==Iop_16Uto64 );
- UInt mask = srcIs16 ? 0xFFFF : 0xFF;
+ case Iop_8Uto64: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
addInstr(env, mk_iMOVsd_RR(src,dst) );
addInstr(env, AMD64Instr_Alu64R(Aalu_AND,
- AMD64RMI_Imm(mask), dst));
+ AMD64RMI_Imm(0xFF), dst));
+ return dst;
+ }
+ case Iop_16Uto64:
+ case Iop_16Uto32: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, AMD64Instr_MovxWQ(False/*!syned*/, src, dst));
return dst;
}
case Iop_8Sto16:
- case Iop_8Sto64:
case Iop_8Sto32:
+ case Iop_8Sto64: {
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, mk_iMOVsd_RR(src,dst) );
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
+ addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst));
+ return dst;
+ }
case Iop_16Sto32:
case Iop_16Sto64: {
- HReg dst = newVRegI(env);
- HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
- Bool srcIs16 = toBool( e->Iex.Unop.op==Iop_16Sto32
- || e->Iex.Unop.op==Iop_16Sto64 );
- UInt amt = srcIs16 ? 48 : 56;
- addInstr(env, mk_iMOVsd_RR(src,dst) );
- addInstr(env, AMD64Instr_Sh64(Ash_SHL, amt, dst));
- addInstr(env, AMD64Instr_Sh64(Ash_SAR, amt, dst));
+ HReg dst = newVRegI(env);
+ HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
+ addInstr(env, AMD64Instr_MovxWQ(True/*syned*/, src, dst));
return dst;
}
case Iop_Not8:
@@ -1991,9 +1997,19 @@
&& e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8) {
UInt shift = e->Iex.Binop.arg2->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
if (shift == 1 || shift == 2 || shift == 3) {
- HReg r1 = iselIntExpr_R(env, e->Iex.Binop.arg1);
- HReg r2 = iselIntExpr_R(env, e->Iex.Binop.arg2->Iex.Binop.arg1 );
- return AMD64AMode_IRRS(0, r1, r2, shift);
+ IRExpr* expr1 = e->Iex.Binop.arg1;
+ IRExpr* expr2 = e->Iex.Binop.arg2->Iex.Binop.arg1;
+ if (expr1->tag == Iex_Const
+ && expr1->Iex.Const.con->tag == Ico_U64/*must always be true*/
+ && fitsIn32Bits(expr1->Iex.Const.con->Ico.U64)) {
+ HReg r2 = iselIntExpr_R(env, expr2);
+ return AMD64AMode_IRS(toUInt(expr1->Iex.Const.con->Ico.U64),
+ r2, shift);
+ } else {
+ HReg r1 = iselIntExpr_R(env, expr1);
+ HReg r2 = iselIntExpr_R(env, expr2);
+ return AMD64AMode_IRRS(0, r1, r2, shift);
+ }
}
}
|
|
From: <sv...@va...> - 2015-01-17 11:11:06
|
Author: sewardj
Date: Sat Jan 17 11:10:57 2015
New Revision: 3066
Log:
Get the register allocator to annotate NCode blocks with the set of
real registers live after the block. This is then used to generate
minimal save/restore sets around C helper calls made from NCode
blocks. Greatly reduces the amount of code in NCode code sections.
Modified:
branches/NCODE/priv/host_amd64_defs.c
branches/NCODE/priv/host_amd64_defs.h
branches/NCODE/priv/host_generic_reg_alloc2.c
Modified: branches/NCODE/priv/host_amd64_defs.c
==============================================================================
--- branches/NCODE/priv/host_amd64_defs.c (original)
+++ branches/NCODE/priv/host_amd64_defs.c Sat Jan 17 11:10:57 2015
@@ -981,11 +981,12 @@
AMD64Instr* AMD64Instr_NCode ( NCodeTemplate* tmpl, HReg* regsR,
HReg* regsA, HReg* regsS ) {
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
- i->tag = Ain_NCode;
- i->Ain.NCode.tmpl = tmpl;
- i->Ain.NCode.regsR = regsR;
- i->Ain.NCode.regsA = regsA;
- i->Ain.NCode.regsS = regsS;
+ i->tag = Ain_NCode;
+ i->Ain.NCode.tmpl = tmpl;
+ i->Ain.NCode.regsR = regsR;
+ i->Ain.NCode.regsA = regsA;
+ i->Ain.NCode.regsS = regsS;
+ i->Ain.NCode.liveAfter = NULL;
return i;
}
AMD64Instr* AMD64Instr_NC_Jmp32 ( AMD64CondCode cc ) {
@@ -3643,6 +3644,7 @@
/*MOD*/RelocationBuffer* rb,
const NInstr* ni,
const NRegMap* nregMap,
+ const HRegSet* hregsLiveAfter,
/* for debug printing only */
Bool verbose, NLabel niLabel );
@@ -3671,6 +3673,7 @@
vassert(endness_host == VexEndnessLE);
vassert(hi->tag == Ain_NCode);
const NCodeTemplate* tmpl = hi->Ain.NCode.tmpl;
+ const HRegSet* hregsLiveAfter = hi->Ain.NCode.liveAfter;
NRegMap nregMap;
nregMap.regsR = hi->Ain.NCode.regsR;
@@ -3718,14 +3721,16 @@
for (i = 0; i < nHot; i++) {
offsetsHot[i] = AssemblyBuffer__getNext(ab_hot);
NLabel lbl = mkNLabel(Nlz_Hot, i);
- emit_AMD64NInstr(ab_hot, rb, tmpl->hot[i], &nregMap, verbose, lbl);
+ emit_AMD64NInstr(ab_hot, rb, tmpl->hot[i], &nregMap,
+ hregsLiveAfter, verbose, lbl);
}
/* And the cold code */
for (i = 0; i < nCold; i++) {
offsetsCold[i] = AssemblyBuffer__getNext(ab_cold);
NLabel lbl = mkNLabel(Nlz_Cold, i);
- emit_AMD64NInstr(ab_cold, rb, tmpl->cold[i], &nregMap, verbose, lbl);
+ emit_AMD64NInstr(ab_cold, rb, tmpl->cold[i], &nregMap,
+ hregsLiveAfter, verbose, lbl);
}
/* Now visit the new relocation entries. */
@@ -3867,7 +3872,9 @@
static
void emit_AMD64NInstr ( /*MOD*/AssemblyBuffer* ab,
/*MOD*/RelocationBuffer* rb,
- const NInstr* ni, const NRegMap* nregMap,
+ const NInstr* ni,
+ const NRegMap* nregMap,
+ const HRegSet* hregsLiveAfter,
/* the next 2 are for debug printing only */
Bool verbose, NLabel niLabel )
{
@@ -3957,8 +3964,9 @@
overestimate of (1) -- for example, all regs available to
reg-alloc -- and refine it later.
*/
- HRegSet* set_1 = HRegSet__new();
- { Int nregs; HReg* arr;
+ const HRegSet* set_1 = hregsLiveAfter; //HRegSet__new();
+ if (0) {
+ Int nregs; HReg* arr;
getAllocableRegs_AMD64(&nregs, &arr);
HRegSet__fromVec(set_1, arr, nregs);
}
@@ -4017,8 +4025,10 @@
stackMove = (stackMove + 31) & ~31;
UInt i;
- HI( AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(stackMove),
- hregAMD64_RSP()) );
+ if (stackMove > 0) {
+ HI( AMD64Instr_Alu64R(Aalu_SUB, AMD64RMI_Imm(stackMove),
+ hregAMD64_RSP()) );
+ }
for (i = 0; i < n_to_preserve; i++) {
HReg r = HRegSet__index(to_preserve, i);
AMD64Instr* i1 = NULL;
@@ -4067,9 +4077,10 @@
if (i1) HI(i1);
if (i2) HI(i2);
}
- HI( AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(stackMove),
- hregAMD64_RSP()) );
-
+ if (stackMove > 0) {
+ HI( AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Imm(stackMove),
+ hregAMD64_RSP()) );
+ }
break;
}
Modified: branches/NCODE/priv/host_amd64_defs.h
==============================================================================
--- branches/NCODE/priv/host_amd64_defs.h (original)
+++ branches/NCODE/priv/host_amd64_defs.h Sat Jan 17 11:10:57 2015
@@ -745,6 +745,7 @@
HReg* regsR; /* Result regs, INVALID_HREG terminated */
HReg* regsA; /* Arg regs, ditto */
HReg* regsS; /* Scratch regs, ditto */
+ HRegSet* liveAfter; /* initially NULL, filled in by RA */
} NCode;
/* --- for NCode only --- */
Modified: branches/NCODE/priv/host_generic_reg_alloc2.c
==============================================================================
--- branches/NCODE/priv/host_generic_reg_alloc2.c (original)
+++ branches/NCODE/priv/host_generic_reg_alloc2.c Sat Jan 17 11:10:57 2015
@@ -39,6 +39,10 @@
#include "main_util.h"
#include "host_generic_regs.h"
+// ******** WARNING KLUDGE DO NOT COMMIT
+#include "host_amd64_defs.h"
+// ******** WARNING KLUDGE DO NOT COMMIT
+
/* Set to 1 for lots of debugging output. */
#define DEBUG_REGALLOC 0
@@ -1533,6 +1537,27 @@
vex_printf("\n");
# endif
+ /* ------ Post-instruction actions for NCode blocks ------ */
+
+ /* If this instruction is an NCode block, annotate it with the
+ set of registers that are live after it. */
+ { AMD64Instr* ai = instrs_in->arr[ii];
+ if (ai->tag == Ain_NCode) {
+ //vex_printf("RA: after NCode: ");
+ vassert(ai->Ain.NCode.liveAfter == NULL);
+ HRegSet* live_after_NCode = HRegSet__new();
+ for (k = 0; k < n_rregs; k++) {
+ if (rreg_state[k].disp == Free)
+ continue;
+ //ppHRegAMD64(rreg_state[k].rreg);
+ HRegSet__add(live_after_NCode, rreg_state[k].rreg);
+ //vex_printf(" ");
+ }
+ //vex_printf("\n");
+ ai->Ain.NCode.liveAfter = live_after_NCode;
+ }
+ }
+
} /* iterate over insns */
/* ------ END: Process each insn in turn. ------ */
|