You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(19) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
1
(2) |
2
(8) |
3
(3) |
4
|
5
(1) |
6
|
7
|
|
8
|
9
|
10
(1) |
11
(4) |
12
(5) |
13
(1) |
14
(2) |
|
15
(1) |
16
|
17
(2) |
18
|
19
(1) |
20
(2) |
21
|
|
22
|
23
|
24
(1) |
25
(1) |
26
(1) |
27
(6) |
28
(1) |
|
29
|
30
|
31
(2) |
|
|
|
|
|
From: Ivo R. <ir...@so...> - 2017-10-12 22:56:00
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=cd804792f24b7631748aeb8b6dfceff2af957f66 commit cd804792f24b7631748aeb8b6dfceff2af957f66 Author: Ivo Raisr <iv...@iv...> Date: Wed Sep 6 08:10:36 2017 +0200 Cherry pick 00d4667295a821fef9eb198abcb0c942dffb6045 from master. Reorder allocatable registers for AMD64, X86, and PPC so that the callee saved are listed first. Helper calls always trash all caller saved registers. By listing the callee saved first then VEX register allocator (both v2 and v3) is more likely to pick them and does not need to spill that much before helper calls. Diff: --- NEWS | 1 + VEX/priv/host_amd64_defs.c | 16 +++++++------- VEX/priv/host_amd64_defs.h | 18 ++++++++-------- VEX/priv/host_ppc_defs.c | 34 ++++++++++++++++-------------- VEX/priv/host_ppc_defs.h | 52 +++++++++++++++++++++++----------------------- VEX/priv/host_x86_defs.c | 6 +++--- VEX/priv/host_x86_defs.h | 12 +++++------ 7 files changed, 70 insertions(+), 69 deletions(-) diff --git a/NEWS b/NEWS index 446a7fa..6b9734e 100644 --- a/NEWS +++ b/NEWS @@ -48,6 +48,7 @@ where XXXXXX is the bug number as listed below. 382515 "Assertion 'di->have_dinfo' failed." on wine's dlls/mscoree/tests/mscoree.c 382998 xml-socket doesn't work 383275 massif valgrind: m_xarray.c:162 (ensureSpaceXA): Assertion '!xa->arr' failed +384584 Callee saved registers listed first for AMD64, X86, and PPC architectures Release 3.13.0 (15 June 2017) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c index ebe2b00..d9949d4 100644 --- a/VEX/priv/host_amd64_defs.c +++ b/VEX/priv/host_amd64_defs.c @@ -64,15 +64,15 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) those available for allocation by reg-alloc, and those that follow are not available for allocation. */ ru->allocable_start[HRcInt64] = ru->size; - ru->regs[ru->size++] = hregAMD64_RSI(); - ru->regs[ru->size++] = hregAMD64_RDI(); - ru->regs[ru->size++] = hregAMD64_R8(); - ru->regs[ru->size++] = hregAMD64_R9(); ru->regs[ru->size++] = hregAMD64_R12(); ru->regs[ru->size++] = hregAMD64_R13(); ru->regs[ru->size++] = hregAMD64_R14(); ru->regs[ru->size++] = hregAMD64_R15(); ru->regs[ru->size++] = hregAMD64_RBX(); + ru->regs[ru->size++] = hregAMD64_RSI(); + ru->regs[ru->size++] = hregAMD64_RDI(); + ru->regs[ru->size++] = hregAMD64_R8(); + ru->regs[ru->size++] = hregAMD64_R9(); ru->regs[ru->size++] = hregAMD64_R10(); ru->allocable_end[HRcInt64] = ru->size - 1; @@ -1460,18 +1460,16 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 ) /* This is a bit subtle. */ /* First off, claim it trashes all the caller-saved regs which fall within the register allocator's jurisdiction. - These I believe to be: rax rcx rdx rsi rdi r8 r9 r10 r11 - and all the xmm registers. - */ + These I believe to be: rax rcx rdx rdi rsi r8 r9 r10 + and all the xmm registers. */ addHRegUse(u, HRmWrite, hregAMD64_RAX()); addHRegUse(u, HRmWrite, hregAMD64_RCX()); addHRegUse(u, HRmWrite, hregAMD64_RDX()); - addHRegUse(u, HRmWrite, hregAMD64_RSI()); addHRegUse(u, HRmWrite, hregAMD64_RDI()); + addHRegUse(u, HRmWrite, hregAMD64_RSI()); addHRegUse(u, HRmWrite, hregAMD64_R8()); addHRegUse(u, HRmWrite, hregAMD64_R9()); addHRegUse(u, HRmWrite, hregAMD64_R10()); - addHRegUse(u, HRmWrite, hregAMD64_R11()); addHRegUse(u, HRmWrite, hregAMD64_XMM0()); addHRegUse(u, HRmWrite, hregAMD64_XMM1()); addHRegUse(u, HRmWrite, hregAMD64_XMM3()); diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h index 57ef169..349e43c 100644 --- a/VEX/priv/host_amd64_defs.h +++ b/VEX/priv/host_amd64_defs.h @@ -47,15 +47,15 @@ */ #define ST_IN static inline -ST_IN HReg hregAMD64_RSI ( void ) { return mkHReg(False, HRcInt64, 6, 0); } -ST_IN HReg hregAMD64_RDI ( void ) { return mkHReg(False, HRcInt64, 7, 1); } -ST_IN HReg hregAMD64_R8 ( void ) { return mkHReg(False, HRcInt64, 8, 2); } -ST_IN HReg hregAMD64_R9 ( void ) { return mkHReg(False, HRcInt64, 9, 3); } -ST_IN HReg hregAMD64_R12 ( void ) { return mkHReg(False, HRcInt64, 12, 4); } -ST_IN HReg hregAMD64_R13 ( void ) { return mkHReg(False, HRcInt64, 13, 5); } -ST_IN HReg hregAMD64_R14 ( void ) { return mkHReg(False, HRcInt64, 14, 6); } -ST_IN HReg hregAMD64_R15 ( void ) { return mkHReg(False, HRcInt64, 15, 7); } -ST_IN HReg hregAMD64_RBX ( void ) { return mkHReg(False, HRcInt64, 3, 8); } +ST_IN HReg hregAMD64_R12 ( void ) { return mkHReg(False, HRcInt64, 12, 0); } +ST_IN HReg hregAMD64_R13 ( void ) { return mkHReg(False, HRcInt64, 13, 1); } +ST_IN HReg hregAMD64_R14 ( void ) { return mkHReg(False, HRcInt64, 14, 2); } +ST_IN HReg hregAMD64_R15 ( void ) { return mkHReg(False, HRcInt64, 15, 3); } +ST_IN HReg hregAMD64_RBX ( void ) { return mkHReg(False, HRcInt64, 3, 4); } +ST_IN HReg hregAMD64_RSI ( void ) { return mkHReg(False, HRcInt64, 6, 5); } +ST_IN HReg hregAMD64_RDI ( void ) { return mkHReg(False, HRcInt64, 7, 6); } +ST_IN HReg hregAMD64_R8 ( void ) { return mkHReg(False, HRcInt64, 8, 7); } +ST_IN HReg hregAMD64_R9 ( void ) { return mkHReg(False, HRcInt64, 9, 8); } ST_IN HReg hregAMD64_R10 ( void ) { return mkHReg(False, HRcInt64, 10, 9); } ST_IN HReg hregAMD64_XMM3 ( void ) { return mkHReg(False, HRcVec128, 3, 10); } diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c index 33ee292..1ef9c5c 100644 --- a/VEX/priv/host_ppc_defs.c +++ b/VEX/priv/host_ppc_defs.c @@ -69,6 +69,24 @@ const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 ) // GPR1 = stack pointer // GPR2 = TOC pointer ru->allocable_start[(mode64) ? HRcInt64 : HRcInt32] = ru->size; + // GPR14 and above are callee save. List them first. + ru->regs[ru->size++] = hregPPC_GPR14(mode64); + ru->regs[ru->size++] = hregPPC_GPR15(mode64); + ru->regs[ru->size++] = hregPPC_GPR16(mode64); + ru->regs[ru->size++] = hregPPC_GPR17(mode64); + ru->regs[ru->size++] = hregPPC_GPR18(mode64); + ru->regs[ru->size++] = hregPPC_GPR19(mode64); + ru->regs[ru->size++] = hregPPC_GPR20(mode64); + ru->regs[ru->size++] = hregPPC_GPR21(mode64); + ru->regs[ru->size++] = hregPPC_GPR22(mode64); + ru->regs[ru->size++] = hregPPC_GPR23(mode64); + ru->regs[ru->size++] = hregPPC_GPR24(mode64); + ru->regs[ru->size++] = hregPPC_GPR25(mode64); + ru->regs[ru->size++] = hregPPC_GPR26(mode64); + ru->regs[ru->size++] = hregPPC_GPR27(mode64); + ru->regs[ru->size++] = hregPPC_GPR28(mode64); + + // Caller save registers now. ru->regs[ru->size++] = hregPPC_GPR3(mode64); ru->regs[ru->size++] = hregPPC_GPR4(mode64); ru->regs[ru->size++] = hregPPC_GPR5(mode64); @@ -85,22 +103,6 @@ const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 ) ru->regs[ru->size++] = hregPPC_GPR12(mode64); } // GPR13 = thread specific pointer - // GPR14 and above are callee save. Yay. - ru->regs[ru->size++] = hregPPC_GPR14(mode64); - ru->regs[ru->size++] = hregPPC_GPR15(mode64); - ru->regs[ru->size++] = hregPPC_GPR16(mode64); - ru->regs[ru->size++] = hregPPC_GPR17(mode64); - ru->regs[ru->size++] = hregPPC_GPR18(mode64); - ru->regs[ru->size++] = hregPPC_GPR19(mode64); - ru->regs[ru->size++] = hregPPC_GPR20(mode64); - ru->regs[ru->size++] = hregPPC_GPR21(mode64); - ru->regs[ru->size++] = hregPPC_GPR22(mode64); - ru->regs[ru->size++] = hregPPC_GPR23(mode64); - ru->regs[ru->size++] = hregPPC_GPR24(mode64); - ru->regs[ru->size++] = hregPPC_GPR25(mode64); - ru->regs[ru->size++] = hregPPC_GPR26(mode64); - ru->regs[ru->size++] = hregPPC_GPR27(mode64); - ru->regs[ru->size++] = hregPPC_GPR28(mode64); ru->allocable_end[(mode64) ? HRcInt64 : HRcInt32] = ru->size - 1; // GPR29 is reserved for the dispatcher // GPR30 is reserved as AltiVec spill reg temporary diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h index 5cc9a85..f8fcbf9 100644 --- a/VEX/priv/host_ppc_defs.h +++ b/VEX/priv/host_ppc_defs.h @@ -57,35 +57,35 @@ mkHReg(False, HRcVec128, \ (_enc), (_mode64) ? (_ix64) : (_ix32)) -ST_IN HReg hregPPC_GPR3 ( Bool mode64 ) { return GPR(mode64, 3, 0, 0); } -ST_IN HReg hregPPC_GPR4 ( Bool mode64 ) { return GPR(mode64, 4, 1, 1); } -ST_IN HReg hregPPC_GPR5 ( Bool mode64 ) { return GPR(mode64, 5, 2, 2); } -ST_IN HReg hregPPC_GPR6 ( Bool mode64 ) { return GPR(mode64, 6, 3, 3); } -ST_IN HReg hregPPC_GPR7 ( Bool mode64 ) { return GPR(mode64, 7, 4, 4); } -ST_IN HReg hregPPC_GPR8 ( Bool mode64 ) { return GPR(mode64, 8, 5, 5); } -ST_IN HReg hregPPC_GPR9 ( Bool mode64 ) { return GPR(mode64, 9, 6, 6); } -ST_IN HReg hregPPC_GPR10 ( Bool mode64 ) { return GPR(mode64, 10, 7, 7); } +ST_IN HReg hregPPC_GPR14 ( Bool mode64 ) { return GPR(mode64, 14, 0, 0); } +ST_IN HReg hregPPC_GPR15 ( Bool mode64 ) { return GPR(mode64, 15, 1, 1); } +ST_IN HReg hregPPC_GPR16 ( Bool mode64 ) { return GPR(mode64, 16, 2, 2); } +ST_IN HReg hregPPC_GPR17 ( Bool mode64 ) { return GPR(mode64, 17, 3, 3); } +ST_IN HReg hregPPC_GPR18 ( Bool mode64 ) { return GPR(mode64, 18, 4, 4); } +ST_IN HReg hregPPC_GPR19 ( Bool mode64 ) { return GPR(mode64, 19, 5, 5); } +ST_IN HReg hregPPC_GPR20 ( Bool mode64 ) { return GPR(mode64, 20, 6, 6); } +ST_IN HReg hregPPC_GPR21 ( Bool mode64 ) { return GPR(mode64, 21, 7, 7); } +ST_IN HReg hregPPC_GPR22 ( Bool mode64 ) { return GPR(mode64, 22, 8, 8); } +ST_IN HReg hregPPC_GPR23 ( Bool mode64 ) { return GPR(mode64, 23, 9, 9); } +ST_IN HReg hregPPC_GPR24 ( Bool mode64 ) { return GPR(mode64, 24, 10, 10); } +ST_IN HReg hregPPC_GPR25 ( Bool mode64 ) { return GPR(mode64, 25, 11, 11); } +ST_IN HReg hregPPC_GPR26 ( Bool mode64 ) { return GPR(mode64, 26, 12, 12); } +ST_IN HReg hregPPC_GPR27 ( Bool mode64 ) { return GPR(mode64, 27, 13, 13); } +ST_IN HReg hregPPC_GPR28 ( Bool mode64 ) { return GPR(mode64, 28, 14, 44); } + +ST_IN HReg hregPPC_GPR3 ( Bool mode64 ) { return GPR(mode64, 3, 15, 15); } +ST_IN HReg hregPPC_GPR4 ( Bool mode64 ) { return GPR(mode64, 4, 16, 16); } +ST_IN HReg hregPPC_GPR5 ( Bool mode64 ) { return GPR(mode64, 5, 17, 17); } +ST_IN HReg hregPPC_GPR6 ( Bool mode64 ) { return GPR(mode64, 6, 18, 18); } +ST_IN HReg hregPPC_GPR7 ( Bool mode64 ) { return GPR(mode64, 7, 19, 19); } +ST_IN HReg hregPPC_GPR8 ( Bool mode64 ) { return GPR(mode64, 8, 20, 20); } +ST_IN HReg hregPPC_GPR9 ( Bool mode64 ) { return GPR(mode64, 9, 21, 21); } +ST_IN HReg hregPPC_GPR10 ( Bool mode64 ) { return GPR(mode64, 10, 22, 22); } // r11 and r12 are only allocatable in 32-bit mode. Hence the 64-bit // index numbering doesn't advance for these two. -ST_IN HReg hregPPC_GPR11 ( Bool mode64 ) { return GPR(mode64, 11, 0, 8); } -ST_IN HReg hregPPC_GPR12 ( Bool mode64 ) { return GPR(mode64, 12, 0, 9); } - -ST_IN HReg hregPPC_GPR14 ( Bool mode64 ) { return GPR(mode64, 14, 8, 10); } -ST_IN HReg hregPPC_GPR15 ( Bool mode64 ) { return GPR(mode64, 15, 9, 11); } -ST_IN HReg hregPPC_GPR16 ( Bool mode64 ) { return GPR(mode64, 16, 10, 12); } -ST_IN HReg hregPPC_GPR17 ( Bool mode64 ) { return GPR(mode64, 17, 11, 13); } -ST_IN HReg hregPPC_GPR18 ( Bool mode64 ) { return GPR(mode64, 18, 12, 14); } -ST_IN HReg hregPPC_GPR19 ( Bool mode64 ) { return GPR(mode64, 19, 13, 15); } -ST_IN HReg hregPPC_GPR20 ( Bool mode64 ) { return GPR(mode64, 20, 14, 16); } -ST_IN HReg hregPPC_GPR21 ( Bool mode64 ) { return GPR(mode64, 21, 15, 17); } -ST_IN HReg hregPPC_GPR22 ( Bool mode64 ) { return GPR(mode64, 22, 16, 18); } -ST_IN HReg hregPPC_GPR23 ( Bool mode64 ) { return GPR(mode64, 23, 17, 19); } -ST_IN HReg hregPPC_GPR24 ( Bool mode64 ) { return GPR(mode64, 24, 18, 20); } -ST_IN HReg hregPPC_GPR25 ( Bool mode64 ) { return GPR(mode64, 25, 19, 21); } -ST_IN HReg hregPPC_GPR26 ( Bool mode64 ) { return GPR(mode64, 26, 20, 22); } -ST_IN HReg hregPPC_GPR27 ( Bool mode64 ) { return GPR(mode64, 27, 21, 23); } -ST_IN HReg hregPPC_GPR28 ( Bool mode64 ) { return GPR(mode64, 28, 22, 24); } +ST_IN HReg hregPPC_GPR11 ( Bool mode64 ) { return GPR(mode64, 11, 22, 23); } +ST_IN HReg hregPPC_GPR12 ( Bool mode64 ) { return GPR(mode64, 12, 22, 24); } ST_IN HReg hregPPC_FPR14 ( Bool mode64 ) { return FPR(mode64, 14, 23, 25); } ST_IN HReg hregPPC_FPR15 ( Bool mode64 ) { return FPR(mode64, 15, 24, 26); } diff --git a/VEX/priv/host_x86_defs.c b/VEX/priv/host_x86_defs.c index c55059e..5f47bdb 100644 --- a/VEX/priv/host_x86_defs.c +++ b/VEX/priv/host_x86_defs.c @@ -64,12 +64,12 @@ const RRegUniverse* getRRegUniverse_X86 ( void ) those available for allocation by reg-alloc, and those that follow are not available for allocation. */ ru->allocable_start[HRcInt32] = ru->size; - ru->regs[ru->size++] = hregX86_EAX(); ru->regs[ru->size++] = hregX86_EBX(); - ru->regs[ru->size++] = hregX86_ECX(); - ru->regs[ru->size++] = hregX86_EDX(); ru->regs[ru->size++] = hregX86_ESI(); ru->regs[ru->size++] = hregX86_EDI(); + ru->regs[ru->size++] = hregX86_EAX(); + ru->regs[ru->size++] = hregX86_ECX(); + ru->regs[ru->size++] = hregX86_EDX(); ru->allocable_end[HRcInt32] = ru->size - 1; ru->allocable_start[HRcFlt64] = ru->size; diff --git a/VEX/priv/host_x86_defs.h b/VEX/priv/host_x86_defs.h index ad5b75d..1f18550 100644 --- a/VEX/priv/host_x86_defs.h +++ b/VEX/priv/host_x86_defs.h @@ -47,12 +47,12 @@ */ #define ST_IN static inline -ST_IN HReg hregX86_EAX ( void ) { return mkHReg(False, HRcInt32, 0, 0); } -ST_IN HReg hregX86_EBX ( void ) { return mkHReg(False, HRcInt32, 3, 1); } -ST_IN HReg hregX86_ECX ( void ) { return mkHReg(False, HRcInt32, 1, 2); } -ST_IN HReg hregX86_EDX ( void ) { return mkHReg(False, HRcInt32, 2, 3); } -ST_IN HReg hregX86_ESI ( void ) { return mkHReg(False, HRcInt32, 6, 4); } -ST_IN HReg hregX86_EDI ( void ) { return mkHReg(False, HRcInt32, 7, 5); } +ST_IN HReg hregX86_EBX ( void ) { return mkHReg(False, HRcInt32, 3, 0); } +ST_IN HReg hregX86_ESI ( void ) { return mkHReg(False, HRcInt32, 6, 1); } +ST_IN HReg hregX86_EDI ( void ) { return mkHReg(False, HRcInt32, 7, 2); } +ST_IN HReg hregX86_EAX ( void ) { return mkHReg(False, HRcInt32, 0, 3); } +ST_IN HReg hregX86_ECX ( void ) { return mkHReg(False, HRcInt32, 1, 4); } +ST_IN HReg hregX86_EDX ( void ) { return mkHReg(False, HRcInt32, 2, 5); } ST_IN HReg hregX86_FAKE0 ( void ) { return mkHReg(False, HRcFlt64, 0, 6); } ST_IN HReg hregX86_FAKE1 ( void ) { return mkHReg(False, HRcFlt64, 1, 7); } |
|
From: Ivo R. <ir...@so...> - 2017-10-11 19:26:47
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=0f97613a4c76537886aeee6d42152c22853369a8 commit 0f97613a4c76537886aeee6d42152c22853369a8 Author: Ivo Raisr <iv...@iv...> Date: Wed Oct 11 21:22:57 2017 +0200 Register allocator: Implement spilled/assigned vreg state merge. Diff: --- VEX/priv/host_generic_reg_alloc3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/VEX/priv/host_generic_reg_alloc3.c b/VEX/priv/host_generic_reg_alloc3.c index 08ff0ee..18eb917 100644 --- a/VEX/priv/host_generic_reg_alloc3.c +++ b/VEX/priv/host_generic_reg_alloc3.c @@ -1732,8 +1732,8 @@ static void merge_vreg_states(RegAllocChunk* chunk, break; case Assigned: /* vreg1: spilled; vreg2: assigned to rreg2 */ - /* Generate spill. */ - vpanic("Spill not implemented, yet."); + spill_vreg(outOfLine, state2, vreg2, chunk->next->ii_total_start, + depth, con); break; case Spilled: /* vreg1: spilled; vreg2: spilled */ |
|
From: Ivo R. <ir...@so...> - 2017-10-11 19:26:43
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=34485abe412893a8c25146e2f6a3383a2a4cffa9 commit 34485abe412893a8c25146e2f6a3383a2a4cffa9 Author: Ivo Raisr <iv...@iv...> Date: Wed Oct 11 19:42:50 2017 +0200 Register allocator: Fix merging of assigned/spilled vregs. Fix the case when vreg2 is spilled but rreg1 in state2 is bound to an offending vreg, still live. Diff: --- VEX/priv/host_generic_reg_alloc3.c | 142 +++++++++++++++++++++++++++---------- 1 file changed, 105 insertions(+), 37 deletions(-) diff --git a/VEX/priv/host_generic_reg_alloc3.c b/VEX/priv/host_generic_reg_alloc3.c index cfe807f..08ff0ee 100644 --- a/VEX/priv/host_generic_reg_alloc3.c +++ b/VEX/priv/host_generic_reg_alloc3.c @@ -1566,10 +1566,15 @@ static void stage4_emit_HInstrIfThenElse(RegAllocChunk* chunk, UInt depth, Usually |vreg1| == |vreg2| == |vregD| so the merging happens between different states but for the same vreg. For phi node merging, |vreg1| != |vreg2| != |vregD|. - Note: |vreg1| and |vregD| refer to |state1|, |vreg2| to |state2|. */ + Note: |vreg1| and |vregD| refer to |state1|, |vreg2| to |state2|. + + |merge_completed| is set to True when the state merge was successfully + completed. + |merge_not_needed| is set to True when there was nothing to merge. */ static void merge_vreg_states(RegAllocChunk* chunk, RegAllocState* state1, RegAllocState* state2, HReg vreg1, HReg vreg2, HReg vregD, + Bool* merge_completed, Bool* merge_not_needed, UInt depth, const RegAllocControl* con) { RegAllocChunk* outOfLine = chunk->IfThenElse.outOfLine; @@ -1581,14 +1586,18 @@ static void merge_vreg_states(RegAllocChunk* chunk, VRegState* v1_dst_state = &state1->vregs[vd_idx]; VRegState* v2_dst_state = &state2->vregs[vd_idx]; + *merge_completed = True; + *merge_not_needed = False; + switch (v1_src_state->disp) { case Unallocated: switch (v2_src_state->disp) { case Unallocated: /* Good. Nothing to do. */ + *merge_not_needed = True; break; case Assigned: - /* Should be dead by now. */ + /* vreg1: unallocated; vreg2: assigned - it should be dead by now. */ vassert(v2_src_state->dead_before <= chunk->next->ii_total_start); HReg rreg2 = v2_src_state->rreg; @@ -1596,7 +1605,7 @@ static void merge_vreg_states(RegAllocChunk* chunk, FREE_RREG(&state2->rregs[hregIndex(rreg2)]); break; case Spilled: - /* Should be dead by now. */ + /* vreg1: unallocated; vreg2: spilled - it should be dead by now. */ vassert(v2_src_state->dead_before <= chunk->next->ii_total_start); FREE_VREG(v2_src_state); @@ -1615,9 +1624,12 @@ static void merge_vreg_states(RegAllocChunk* chunk, "(Assigned/Unallocated)."); case Assigned: { - /* Check if both vregs are assigned to the same rreg. */ + /* vreg1: assigned; vreg2: assigned + Check if both vregs are assigned to the same rreg. */ HReg rreg2 = v2_src_state->rreg; if (! sameHReg(rreg1, rreg2)) { + /* Check the disposition of rreg1 in state2. That's where we + need to get vreg2 into. */ switch (state2->rregs[hregIndex(rreg1)].disp) { case Free: { /* Move rreg2 to rreg1 in outOfLine/state2. */ @@ -1638,6 +1650,8 @@ static void merge_vreg_states(RegAllocChunk* chunk, default: vassert(0); } + } else { + *merge_not_needed = True; } /* Proceed to phi node merging bellow. */ @@ -1645,20 +1659,29 @@ static void merge_vreg_states(RegAllocChunk* chunk, } case Spilled: + /* vreg1: assigned to rreg1; vreg2: spilled + We worry about the disposition of rreg1 in state2. */ switch (state2->rregs[hregIndex(rreg1)].disp) { case Free: assign_vreg(outOfLine, state2, vreg2, rreg1, True, depth, con); break; case Bound: { - /* Make a room in state2->rregs[rreg1] first. */ - HReg vreg_dead = state2->rregs[hregIndex(rreg1)].vreg; - UInt vdead_idx = hregIndex(vreg_dead); - /* That vreg should be dead by now. */ - vassert(state2->vregs[vdead_idx].dead_before - <= chunk->next->ii_total_start); - - FREE_VREG(&state2->vregs[vdead_idx]); - FREE_RREG(&state2->rregs[hregIndex(rreg1)]); + /* First check if we can make a room in state2->rregs[rreg1]. */ + HReg vreg_off = state2->rregs[hregIndex(rreg1)].vreg; + UInt voff_idx = hregIndex(vreg_off); + if (state2->vregs[voff_idx].dead_before + > chunk->next->ii_total_start) { + /* There is an offending vreg_off in state2 which is still + not dead. Let's spill it now and indicate that merging is not + completed for it. */ + UInt r_spilled_idx = spill_vreg(outOfLine, state2, vreg_off, + chunk->next->ii_total_start, depth, con); + vassert(r_spilled_idx == hregIndex(rreg1)); + *merge_completed = False; + } else { + FREE_VREG(&state2->vregs[voff_idx]); + FREE_RREG(&state2->rregs[hregIndex(rreg1)]); + } assign_vreg(outOfLine, state2, vreg2, rreg1, True, depth, con); break; @@ -1676,18 +1699,27 @@ static void merge_vreg_states(RegAllocChunk* chunk, /* Phi node merging. */ if (! sameHReg(vreg1, vreg2)) { - FREE_VREG(v1_src_state); - FREE_VREG(v2_src_state); - v1_dst_state->disp = Assigned; - v1_dst_state->rreg = rreg1; - v2_dst_state->disp = Assigned; - v2_dst_state->rreg = rreg1; - - UInt r_idx = hregIndex(rreg1); - vassert(state1->rregs[r_idx].disp == Bound); - state1->rregs[r_idx].eq_spill_slot - = (state1->rregs[r_idx].eq_spill_slot && state2->rregs[r_idx].eq_spill_slot); - state1->rregs[r_idx].vreg = vregD; + if ((v1_dst_state->disp == Assigned) + && (v2_dst_state->disp == Assigned) + && sameHReg(rreg1, v1_dst_state->rreg) + && sameHReg(v1_dst_state->rreg, v2_dst_state->rreg)) { + // merge not needed at this point but may have been needed previously + } else { + FREE_VREG(v1_src_state); + FREE_VREG(v2_src_state); + v1_dst_state->disp = Assigned; + v1_dst_state->rreg = rreg1; + v2_dst_state->disp = Assigned; + v2_dst_state->rreg = rreg1; + + UInt r_idx = hregIndex(rreg1); + vassert(state1->rregs[r_idx].disp == Bound); + state1->rregs[r_idx].eq_spill_slot + = (state1->rregs[r_idx].eq_spill_slot + && state2->rregs[r_idx].eq_spill_slot); + state1->rregs[r_idx].vreg = vregD; + *merge_not_needed = False; + } } break; } // case Assigned @@ -1699,16 +1731,20 @@ static void merge_vreg_states(RegAllocChunk* chunk, " (Spilled/Unallocated)."); break; case Assigned: + /* vreg1: spilled; vreg2: assigned to rreg2 */ /* Generate spill. */ vpanic("Spill not implemented, yet."); break; case Spilled: + /* vreg1: spilled; vreg2: spilled */ /* Check if both vregs are spilled at the same spill slot. Eventually reload vreg to a rreg and spill it again. */ if (v1_src_state->spill_offset != v2_src_state->spill_offset) { /* Find a free rreg in |state1|, reload from v2_src_state->spill_slot, spill to v1_dst_state->spill_slot. */ vpanic("Spilled/Spilled reload not implemented, yet."); + } else { + *merge_not_needed = True; } break; default: @@ -1746,18 +1782,22 @@ static void stage4_merge_states(RegAllocChunk* chunk, vex_printf("\n"); } + Bool merge_completed, merge_not_needed; merge_vreg_states(chunk, state, cloned, phi_node->srcFallThrough, - phi_node->srcOutOfLine, phi_node->dst, depth, con); - } + phi_node->srcOutOfLine, phi_node->dst, + &merge_completed, &merge_not_needed, depth, con); + // Don't care about merge_completed here. It will be dealt below. - if (DEBUG_REGALLOC) { - print_state(chunk, state, chunk->next->ii_total_start, depth, con, - "After phi node merge"); + if (DEBUG_REGALLOC) { + print_state(chunk, state, chunk->next->ii_total_start, depth, con, + "After phi node merge"); + } } } /* Merge remaining vreg states. VRegs mentioned by phi nodes are processed - as well but merging is no-op for them now. */ + as well but merging is usually no-op for them now (unless merge_completed + returned False above). */ if (DEBUG_REGALLOC) { print_state(chunk, state, chunk->next->ii_total_start, depth, con, @@ -1766,19 +1806,47 @@ static void stage4_merge_states(RegAllocChunk* chunk, "Before state merge: out-of-line leg"); } - for (UInt v_idx = 0; v_idx < state->n_vregs; v_idx++) { - HRegClass reg_class = state->vregs[v_idx].reg_class; - if (reg_class != HRcINVALID) { - merge_vreg_states(chunk, state, cloned, MK_VREG(v_idx, reg_class), + Bool iterate; + UInt n_iterations = 0; + do { + iterate = False; + + for (UInt v_idx = 0; v_idx < state->n_vregs; v_idx++) { + HRegClass reg_class = state->vregs[v_idx].reg_class; + if (reg_class != HRcINVALID) { + Bool merge_completed, merge_not_needed; + merge_vreg_states(chunk, state, cloned, MK_VREG(v_idx, reg_class), MK_VREG(v_idx, reg_class), MK_VREG(v_idx, reg_class), - depth, con); + &merge_completed, &merge_not_needed, depth, con); + + if (!merge_completed) { + iterate = True; + } + } } - } + + n_iterations += 1; + vassert(n_iterations <= 10); + } while (iterate); if (DEBUG_REGALLOC) { print_state(chunk, state, chunk->next->ii_total_start, depth, con, "After state merge"); } + + if (SANITY_CHECKS_EVERY_INSTR) { + for (UInt v_idx = 0; v_idx < state->n_vregs; v_idx++) { + HRegClass reg_class = state->vregs[v_idx].reg_class; + if (reg_class != HRcINVALID) { + Bool merge_completed, merge_not_needed; + merge_vreg_states(chunk, state, cloned, MK_VREG(v_idx, reg_class), + MK_VREG(v_idx, reg_class), MK_VREG(v_idx, reg_class), + &merge_completed, &merge_not_needed, depth, con); + vassert(merge_completed == True); + vassert(merge_not_needed == True); + } + } + } } static void stage4(RegAllocChunk* chunk, RegAllocState* state, |
|
From: Ivo R. <ir...@so...> - 2017-10-11 18:57:12
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=074de238d44c0cdaf394489ea69a67b76916fbce commit 074de238d44c0cdaf394489ea69a67b76916fbce Author: Ivo Raisr <iv...@iv...> Date: Sat Sep 23 09:46:40 2017 +0200 VEX register allocator: allocate caller-save registers for short lived vregs. Allocate caller-saved registers for short lived vregs and callee-save registers for vregs which span accross helper calls. Fixes BZ#384987. Diff: --- NEWS | 1 + VEX/priv/host_generic_reg_alloc3.c | 31 +++++++++++++++++++++---------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/NEWS b/NEWS index 097930a..b043c58 100644 --- a/NEWS +++ b/NEWS @@ -58,6 +58,7 @@ where XXXXXX is the bug number as listed below. 384526 reduce number of spill instructions generated by VEX register allocator v3 384584 Callee saved registers listed first for AMD64, X86, and PPC architectures n-i-bz Fix missing workq_ops operations (macOS) +384987 VEX register allocator: allocate caller-save registers for short lived vregs 385182 PPC64 is missing support for the DSCR 385207 PPC64, generate_store_FPRF() generates too many Iops 385208 PPC64, xxperm instruction exhausts temporary memory diff --git a/VEX/priv/host_generic_reg_alloc3.c b/VEX/priv/host_generic_reg_alloc3.c index 9ab9549..0d35c62 100644 --- a/VEX/priv/host_generic_reg_alloc3.c +++ b/VEX/priv/host_generic_reg_alloc3.c @@ -408,22 +408,27 @@ static inline HReg find_vreg_to_spill( } /* Find a free rreg of the correct class. - Tries to find an rreg whose live range (if any) is as far ahead in the - incoming instruction stream as possible. An ideal rreg candidate is - a callee-save register because it won't be used for parameter passing - around helper function calls. */ + Tries to find an rreg whose hard live range (if any) starts after the vreg's + live range ends. If that is not possible, then at least whose live range + is as far ahead in the incoming instruction stream as possible. + An ideal rreg candidate is a caller-save register for short-lived vregs + and a callee-save register for long-lived vregs because it won't need to + be spilled around helper calls. */ static Bool find_free_rreg( const VRegState* vreg_state, UInt n_vregs, const RRegState* rreg_state, UInt n_rregs, const RRegLRState* rreg_lr_state, - UInt current_ii, HRegClass target_hregclass, + UInt v_idx, UInt current_ii, HRegClass target_hregclass, Bool reserve_phase, const RegAllocControl* con, UInt* r_idx_found) { Bool found = False; UInt distance_so_far = 0; /* running max for |live_after - current_ii| */ + const VRegState* vreg = &vreg_state[v_idx]; - for (UInt r_idx = con->univ->allocable_start[target_hregclass]; - r_idx <= con->univ->allocable_end[target_hregclass]; r_idx++) { + /* Assume majority of vregs are short-lived. Start scannig from caller-save + registers first. */ + for (Int r_idx = (Int) con->univ->allocable_end[target_hregclass]; + r_idx >= (Int) con->univ->allocable_start[target_hregclass]; r_idx--) { const RRegState* rreg = &rreg_state[r_idx]; const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; if (rreg->disp == Free) { @@ -434,7 +439,12 @@ static Bool find_free_rreg( } else { const RRegLR* lr = rreg_lrs->lr_current; if (lr->live_after > (Short) current_ii) { - /* Not live, yet. */ + /* RReg's hard live range is not live, yet. */ + if (vreg->effective_dead_before <= lr->live_after) { + found = True; + *r_idx_found = r_idx; + break; /* VReg is short-lived; it fits in. */ + } if ((lr->live_after - (Short) current_ii) > distance_so_far) { distance_so_far = lr->live_after - (Short) current_ii; found = True; @@ -548,8 +558,9 @@ HInstrArray* doRegisterAllocation_v3( ({ \ UInt _r_free_idx; \ Bool free_rreg_found = find_free_rreg( \ - vreg_state, n_vregs, rreg_state, n_rregs, rreg_lr_state, \ - (_ii), (_reg_class), (_reserve_phase), con, &_r_free_idx); \ + vreg_state, n_vregs, rreg_state, n_rregs, rreg_lr_state, \ + (_v_idx), (_ii), (_reg_class), (_reserve_phase), \ + con, &_r_free_idx); \ if (!free_rreg_found) { \ HReg vreg_to_spill = find_vreg_to_spill( \ vreg_state, n_vregs, rreg_state, n_rregs, \ |
|
From: Ivo R. <ir...@so...> - 2017-10-11 18:57:09
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=83cabd32492e6d19d483a63522e4e874fa64b617 commit 83cabd32492e6d19d483a63522e4e874fa64b617 Author: Ivo Raisr <iv...@iv...> Date: Fri Sep 22 22:50:11 2017 +0200 Refactor tracking of MOV coalescing. Reg<->Reg MOV coalescing status is now a part of the HRegUsage. This allows register allocation to query it two times without incurring a performance penalty. This in turn allows to better keep track of vreg<->vreg MOV coalescing so that all vregs in the coalesce chain get the effective |dead_before| of the last vreg. A small performance improvement has been observed because this allows to coalesce even spilled vregs (previously only assigned ones). Diff: --- VEX/priv/host_amd64_defs.c | 55 +++---- VEX/priv/host_amd64_defs.h | 1 - VEX/priv/host_arm64_defs.c | 29 +--- VEX/priv/host_arm64_defs.h | 1 - VEX/priv/host_arm_defs.c | 68 +++------ VEX/priv/host_arm_defs.h | 1 - VEX/priv/host_generic_reg_alloc2.c | 16 +- VEX/priv/host_generic_reg_alloc3.c | 292 +++++++++++++++++++++++++++---------- VEX/priv/host_generic_regs.c | 3 + VEX/priv/host_generic_regs.h | 21 ++- VEX/priv/host_mips_defs.c | 31 ++-- VEX/priv/host_mips_defs.h | 1 - VEX/priv/host_ppc_defs.c | 46 ++---- VEX/priv/host_ppc_defs.h | 1 - VEX/priv/host_s390_defs.c | 34 +---- VEX/priv/host_s390_defs.h | 1 - VEX/priv/host_x86_defs.c | 53 +++---- VEX/priv/host_x86_defs.h | 1 - VEX/priv/main_main.c | 20 +-- 19 files changed, 342 insertions(+), 333 deletions(-) diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c index d9949d4..a554e28 100644 --- a/VEX/priv/host_amd64_defs.c +++ b/VEX/priv/host_amd64_defs.c @@ -1406,6 +1406,12 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 ) addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src); if (i->Ain.Alu64R.op == Aalu_MOV) { addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst); + + if (i->Ain.Alu64R.src->tag == Armi_Reg) { + u->isRegRegMove = True; + u->regMoveSrc = i->Ain.Alu64R.src->Armi.Reg.reg; + u->regMoveDst = i->Ain.Alu64R.dst; + } return; } if (i->Ain.Alu64R.op == Aalu_CMP) { @@ -1668,6 +1674,12 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 ) addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV ? HRmWrite : HRmModify, i->Ain.SseReRg.dst); + + if (i->Ain.SseReRg.op == Asse_MOV) { + u->isRegRegMove = True; + u->regMoveSrc = i->Ain.SseReRg.src; + u->regMoveDst = i->Ain.SseReRg.dst; + } } return; case Ain_SseCMov: @@ -1694,6 +1706,12 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 ) //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV //uu ? HRmWrite : HRmModify, //uu i->Ain.AvxReRg.dst); + //uu + //uu if (i->Ain.AvxReRg.op == Asse_MOV) { + //uu u->isRegRegMove = True; + //uu u->regMoveSrc = i->Ain.AvxReRg.src; + //uu u->regMoveDst = i->Ain.AvxReRg.dst; + //uu } //uu } //uu return; case Ain_EvCheck: @@ -1910,43 +1928,6 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 ) } } -/* Figure out if i represents a reg-reg move, and if so assign the - source and destination to *src and *dst. If in doubt say No. Used - by the register allocator to do move coalescing. -*/ -Bool isMove_AMD64Instr ( const AMD64Instr* i, HReg* src, HReg* dst ) -{ - switch (i->tag) { - case Ain_Alu64R: - /* Moves between integer regs */ - if (i->Ain.Alu64R.op != Aalu_MOV) - return False; - if (i->Ain.Alu64R.src->tag != Armi_Reg) - return False; - *src = i->Ain.Alu64R.src->Armi.Reg.reg; - *dst = i->Ain.Alu64R.dst; - return True; - case Ain_SseReRg: - /* Moves between SSE regs */ - if (i->Ain.SseReRg.op != Asse_MOV) - return False; - *src = i->Ain.SseReRg.src; - *dst = i->Ain.SseReRg.dst; - return True; - //uu case Ain_AvxReRg: - //uu /* Moves between AVX regs */ - //uu if (i->Ain.AvxReRg.op != Asse_MOV) - //uu return False; - //uu *src = i->Ain.AvxReRg.src; - //uu *dst = i->Ain.AvxReRg.dst; - //uu return True; - default: - return False; - } - /*NOTREACHED*/ -} - - /* Generate amd64 spill/reload instructions under the direction of the register allocator. Note it's critical these don't write the condition codes. */ diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h index 92730fa..68e199a 100644 --- a/VEX/priv/host_amd64_defs.h +++ b/VEX/priv/host_amd64_defs.h @@ -785,7 +785,6 @@ extern void ppAMD64Instr ( const AMD64Instr*, Bool ); of the underlying instruction set. */ extern void getRegUsage_AMD64Instr ( HRegUsage*, const AMD64Instr*, Bool ); extern void mapRegs_AMD64Instr ( HRegRemap*, AMD64Instr*, Bool ); -extern Bool isMove_AMD64Instr ( const AMD64Instr*, HReg*, HReg* ); extern Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc, UChar* buf, Int nbuf, const AMD64Instr* i, diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 2506512..4d088c7 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -1958,6 +1958,9 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 ) case ARM64in_MovI: addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst); addHRegUse(u, HRmRead, i->ARM64in.MovI.src); + u->isRegRegMove = True; + u->regMoveSrc = i->ARM64in.MovI.src; + u->regMoveDst = i->ARM64in.MovI.dst; return; case ARM64in_Imm64: addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst); @@ -2238,6 +2241,9 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 ) case ARM64in_VMov: addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst); addHRegUse(u, HRmRead, i->ARM64in.VMov.src); + u->isRegRegMove = True; + u->regMoveSrc = i->ARM64in.VMov.src; + u->regMoveDst = i->ARM64in.VMov.dst; return; case ARM64in_EvCheck: /* We expect both amodes only to mention x21, so this is in @@ -2510,29 +2516,6 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 ) } } -/* Figure out if i represents a reg-reg move, and if so assign the - source and destination to *src and *dst. If in doubt say No. Used - by the register allocator to do move coalescing. -*/ -Bool isMove_ARM64Instr ( const ARM64Instr* i, HReg* src, HReg* dst ) -{ - switch (i->tag) { - case ARM64in_MovI: - *src = i->ARM64in.MovI.src; - *dst = i->ARM64in.MovI.dst; - return True; - case ARM64in_VMov: - *src = i->ARM64in.VMov.src; - *dst = i->ARM64in.VMov.dst; - return True; - default: - break; - } - - return False; -} - - /* Generate arm spill/reload instructions under the direction of the register allocator. Note it's critical these don't write the condition codes. */ diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index e7da4f9..277a55b 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -993,7 +993,6 @@ extern void ppARM64Instr ( const ARM64Instr* ); of the underlying instruction set. */ extern void getRegUsage_ARM64Instr ( HRegUsage*, const ARM64Instr*, Bool ); extern void mapRegs_ARM64Instr ( HRegRemap*, ARM64Instr*, Bool ); -extern Bool isMove_ARM64Instr ( const ARM64Instr*, HReg*, HReg* ); extern Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, UChar* buf, Int nbuf, const ARM64Instr* i, Bool mode64, diff --git a/VEX/priv/host_arm_defs.c b/VEX/priv/host_arm_defs.c index 9bf87cd..3de6d50 100644 --- a/VEX/priv/host_arm_defs.c +++ b/VEX/priv/host_arm_defs.c @@ -2108,6 +2108,12 @@ void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 ) case ARMin_Mov: addHRegUse(u, HRmWrite, i->ARMin.Mov.dst); addRegUsage_ARMRI84(u, i->ARMin.Mov.src); + + if (i->ARMin.Mov.src->tag == ARMri84_R) { + u->isRegRegMove = True; + u->regMoveSrc = i->ARMin.Mov.src->ARMri84.R.reg; + u->regMoveDst = i->ARMin.Mov.dst; + } return; case ARMin_Imm32: addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst); @@ -2256,10 +2262,22 @@ void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 ) case ARMin_VUnaryD: addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst); addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src); + + if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) { + u->isRegRegMove = True; + u->regMoveSrc = i->ARMin.VUnaryD.src; + u->regMoveDst = i->ARMin.VUnaryD.dst; + } return; case ARMin_VUnaryS: addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst); addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src); + + if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) { + u->isRegRegMove = True; + u->regMoveSrc = i->ARMin.VUnaryS.src; + u->regMoveDst = i->ARMin.VUnaryS.dst; + } return; case ARMin_VCmpD: addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL); @@ -2350,6 +2368,12 @@ void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 ) case ARMin_NUnary: addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst); addHRegUse(u, HRmRead, i->ARMin.NUnary.src); + + if (i->ARMin.NUnary.op == ARMneon_COPY) { + u->isRegRegMove = True; + u->regMoveSrc = i->ARMin.NUnary.src; + u->regMoveDst = i->ARMin.NUnary.dst; + } return; case ARMin_NUnaryS: addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg); @@ -2620,50 +2644,6 @@ void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 ) } } -/* Figure out if i represents a reg-reg move, and if so assign the - source and destination to *src and *dst. If in doubt say No. Used - by the register allocator to do move coalescing. -*/ -Bool isMove_ARMInstr ( const ARMInstr* i, HReg* src, HReg* dst ) -{ - /* Moves between integer regs */ - switch (i->tag) { - case ARMin_Mov: - if (i->ARMin.Mov.src->tag == ARMri84_R) { - *src = i->ARMin.Mov.src->ARMri84.R.reg; - *dst = i->ARMin.Mov.dst; - return True; - } - break; - case ARMin_VUnaryD: - if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) { - *src = i->ARMin.VUnaryD.src; - *dst = i->ARMin.VUnaryD.dst; - return True; - } - break; - case ARMin_VUnaryS: - if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) { - *src = i->ARMin.VUnaryS.src; - *dst = i->ARMin.VUnaryS.dst; - return True; - } - break; - case ARMin_NUnary: - if (i->ARMin.NUnary.op == ARMneon_COPY) { - *src = i->ARMin.NUnary.src; - *dst = i->ARMin.NUnary.dst; - return True; - } - break; - default: - break; - } - - return False; -} - - /* Generate arm spill/reload instructions under the direction of the register allocator. Note it's critical these don't write the condition codes. */ diff --git a/VEX/priv/host_arm_defs.h b/VEX/priv/host_arm_defs.h index 56c4ec5..b88c85a 100644 --- a/VEX/priv/host_arm_defs.h +++ b/VEX/priv/host_arm_defs.h @@ -1056,7 +1056,6 @@ extern void ppARMInstr ( const ARMInstr* ); of the underlying instruction set. */ extern void getRegUsage_ARMInstr ( HRegUsage*, const ARMInstr*, Bool ); extern void mapRegs_ARMInstr ( HRegRemap*, ARMInstr*, Bool ); -extern Bool isMove_ARMInstr ( const ARMInstr*, HReg*, HReg* ); extern Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc, UChar* buf, Int nbuf, const ARMInstr* i, Bool mode64, diff --git a/VEX/priv/host_generic_reg_alloc2.c b/VEX/priv/host_generic_reg_alloc2.c index eb4600e..166f52b 100644 --- a/VEX/priv/host_generic_reg_alloc2.c +++ b/VEX/priv/host_generic_reg_alloc2.c @@ -45,8 +45,6 @@ /* TODO 27 Oct 04: - Better consistency checking from what isMove tells us. - We can possibly do V-V coalescing even when the src is spilled, providing we can arrange for the dst to have the same spill slot. @@ -515,6 +513,10 @@ HInstrArray* doRegisterAllocation_v2 ( for (Int ii = 0; ii < instrs_in->arr_used; ii++) { con->getRegUsage(®_usage_arr[ii], instrs_in->arr[ii], con->mode64); + reg_usage_arr[ii].isVregVregMove + = reg_usage_arr[ii].isRegRegMove + && hregIsVirtual(reg_usage_arr[ii].regMoveSrc) + && hregIsVirtual(reg_usage_arr[ii].regMoveDst); if (0) { vex_printf("\n%d stage1: ", ii); @@ -1025,12 +1027,10 @@ HInstrArray* doRegisterAllocation_v2 ( /* If doing a reg-reg move between two vregs, and the src's live range ends here and the dst's live range starts here, bind the dst to the src's rreg, and that's all. */ - HReg vregS = INVALID_HREG; - HReg vregD = INVALID_HREG; - if ( con->isMove(instrs_in->arr[ii], &vregS, &vregD) ) { - if (!hregIsVirtual(vregS)) goto cannot_coalesce; - if (!hregIsVirtual(vregD)) goto cannot_coalesce; - /* Check that *isMove is not telling us a bunch of lies ... */ + if (reg_usage_arr[ii].isVregVregMove) { + HReg vregS = reg_usage_arr[ii].regMoveSrc; + HReg vregD = reg_usage_arr[ii].regMoveDst; + /* Check that |isVregVregMove| is not telling us a bunch of lies ... */ vassert(hregClass(vregS) == hregClass(vregD)); Int k = hregIndex(vregS); Int m = hregIndex(vregD); diff --git a/VEX/priv/host_generic_reg_alloc3.c b/VEX/priv/host_generic_reg_alloc3.c index 929dee5..9ab9549 100644 --- a/VEX/priv/host_generic_reg_alloc3.c +++ b/VEX/priv/host_generic_reg_alloc3.c @@ -72,6 +72,18 @@ typedef /* The "home" spill slot. The offset is relative to the beginning of the guest state. */ UShort spill_offset; + + /* This vreg (vregS) is coalesced to another vreg + if |coalescedTo| != INVALID_HREG. + Coalescing means that there is a MOV instruction which occurs in the + instruction stream right at vregS' dead_before + and vregD's live_after. */ + HReg coalescedTo; /* Which vreg it is coalesced to. */ + HReg coalescedFirst; /* First vreg in the coalescing chain. */ + + /* If this vregS is coalesced to another vregD, what is the combined + dead_before for vregS+vregD. Used to effectively allocate registers. */ + Short effective_dead_before; } VRegState; @@ -190,13 +202,20 @@ static inline void print_state( const RRegLRState* rreg_lr_state, UShort current_ii) { +# define RIGHT_JUSTIFY(_total, _written) \ + do { \ + for (Int w = (_total) - (_written); w > 0; w--) { \ + vex_printf(" "); \ + } \ + } while (0) + for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { const VRegState* vreg = &vreg_state[v_idx]; if (vreg->live_after == INVALID_INSTRNO) { continue; /* This is a dead vreg. Never comes into live. */ } - vex_printf("vreg_state[%3u] \t", v_idx); + vex_printf("vreg_state[%3u] ", v_idx); UInt written; switch (vreg->disp) { @@ -213,15 +232,26 @@ static inline void print_state( default: vassert(0); } + RIGHT_JUSTIFY(25, written); - for (Int w = 30 - written; w > 0; w--) { - vex_printf(" "); - } + written = vex_printf("lr: [%d, %d) ", + vreg->live_after, vreg->dead_before); + RIGHT_JUSTIFY(15, written); + + written = vex_printf("effective lr: [%d, %d)", + vreg->live_after, vreg->effective_dead_before); + RIGHT_JUSTIFY(25, written); if (vreg->live_after > (Short) current_ii) { vex_printf("[not live yet]\n"); } else if ((Short) current_ii >= vreg->dead_before) { - vex_printf("[now dead]\n"); + if (hregIsInvalid(vreg->coalescedTo)) { + vex_printf("[now dead]\n"); + } else { + vex_printf("[now dead, coalesced to "); + con->ppReg(vreg->coalescedTo); + vex_printf("]\n"); + } } else { vex_printf("[live]\n"); } @@ -232,9 +262,7 @@ static inline void print_state( const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; vex_printf("rreg_state[%2u] = ", r_idx); UInt written = con->ppReg(con->univ->regs[r_idx]); - for (Int w = 10 - written; w > 0; w--) { - vex_printf(" "); - } + RIGHT_JUSTIFY(10, written); switch (rreg->disp) { case Free: @@ -255,6 +283,8 @@ static inline void print_state( break; } } + +# undef RIGHT_JUSTIFY } static inline void emit_instr(HInstr* instr, HInstrArray* instrs_out, @@ -383,8 +413,8 @@ static inline HReg find_vreg_to_spill( a callee-save register because it won't be used for parameter passing around helper function calls. */ static Bool find_free_rreg( - VRegState* vreg_state, UInt n_vregs, - RRegState* rreg_state, UInt n_rregs, + const VRegState* vreg_state, UInt n_vregs, + const RRegState* rreg_state, UInt n_rregs, const RRegLRState* rreg_lr_state, UInt current_ii, HRegClass target_hregclass, Bool reserve_phase, const RegAllocControl* con, UInt* r_idx_found) @@ -476,6 +506,10 @@ HInstrArray* doRegisterAllocation_v3( HRegUsage* reg_usage = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used); + /* Mark vreg indexes where coalesce chains start at. */ + UInt* coalesce_heads = LibVEX_Alloc_inline(n_vregs * sizeof(UInt)); + UInt nr_coalesce_heads = 0; + /* The live range numbers are signed shorts, and so limiting the number of instructions to 15000 comfortably guards against them overflowing 32k. */ @@ -512,9 +546,9 @@ HInstrArray* doRegisterAllocation_v3( instruction and makes free the corresponding rreg. */ # define FIND_OR_MAKE_FREE_RREG(_ii, _v_idx, _reg_class, _reserve_phase) \ ({ \ - UInt _r_free_idx = -1; \ + UInt _r_free_idx; \ Bool free_rreg_found = find_free_rreg( \ - vreg_state, n_vregs, rreg_state, n_rregs, rreg_lr_state, \ + vreg_state, n_vregs, rreg_state, n_rregs, rreg_lr_state, \ (_ii), (_reg_class), (_reserve_phase), con, &_r_free_idx); \ if (!free_rreg_found) { \ HReg vreg_to_spill = find_vreg_to_spill( \ @@ -536,12 +570,15 @@ HInstrArray* doRegisterAllocation_v3( /* --- Stage 0. Initialize the state. --- */ for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { - vreg_state[v_idx].live_after = INVALID_INSTRNO; - vreg_state[v_idx].dead_before = INVALID_INSTRNO; - vreg_state[v_idx].reg_class = HRcINVALID; - vreg_state[v_idx].disp = Unallocated; - vreg_state[v_idx].rreg = INVALID_HREG; - vreg_state[v_idx].spill_offset = 0; + vreg_state[v_idx].live_after = INVALID_INSTRNO; + vreg_state[v_idx].dead_before = INVALID_INSTRNO; + vreg_state[v_idx].reg_class = HRcINVALID; + vreg_state[v_idx].disp = Unallocated; + vreg_state[v_idx].rreg = INVALID_HREG; + vreg_state[v_idx].spill_offset = 0; + vreg_state[v_idx].coalescedTo = INVALID_HREG; + vreg_state[v_idx].coalescedFirst = INVALID_HREG; + vreg_state[v_idx].effective_dead_before = INVALID_INSTRNO; } for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { @@ -565,6 +602,10 @@ HInstrArray* doRegisterAllocation_v3( const HInstr* instr = instrs_in->arr[ii]; con->getRegUsage(®_usage[ii], instr, con->mode64); + reg_usage[ii].isVregVregMove + = reg_usage[ii].isRegRegMove + && hregIsVirtual(reg_usage[ii].regMoveSrc) + && hregIsVirtual(reg_usage[ii].regMoveDst); if (0) { vex_printf("\n%u stage 1: ", ii); @@ -602,23 +643,24 @@ HInstrArray* doRegisterAllocation_v3( if (vreg_state[v_idx].live_after == INVALID_INSTRNO) { OFFENDING_VREG(v_idx, instr, "Read"); } - vreg_state[v_idx].dead_before = toShort(ii + 1); break; case HRmWrite: if (vreg_state[v_idx].live_after == INVALID_INSTRNO) { vreg_state[v_idx].live_after = toShort(ii); } - vreg_state[v_idx].dead_before = toShort(ii + 1); break; case HRmModify: if (vreg_state[v_idx].live_after == INVALID_INSTRNO) { OFFENDING_VREG(v_idx, instr, "Modify"); } - vreg_state[v_idx].dead_before = toShort(ii + 1); break; default: vassert(0); } + + vreg_state[v_idx].dead_before = toShort(ii + 1); + vreg_state[v_idx].effective_dead_before + = vreg_state[v_idx].dead_before; } /* Process real registers mentioned in the instruction. */ @@ -703,7 +745,59 @@ HInstrArray* doRegisterAllocation_v3( } } - /* --- Stage 2. Allocate spill slots. --- */ + + /* --- Stage 2. MOV coalescing (preparation). --- */ + /* Optimise register coalescing: + MOV v <-> v coalescing (done here). + MOV v <-> r coalescing (TODO: not yet, not here). */ + /* If doing a reg-reg move between two vregs, and the src's live range ends + here and the dst's live range starts here, coalesce the src vreg + to the dst vreg. */ + Bool coalesce_happened = False; + for (UShort ii = 0; ii < instrs_in->arr_used; ii++) { + if (reg_usage[ii].isVregVregMove) { + HReg vregS = reg_usage[ii].regMoveSrc; + HReg vregD = reg_usage[ii].regMoveDst; + + /* Check that |isVregVregMove| is not telling us a bunch of lies ... */ + vassert(hregClass(vregS) == hregClass(vregD)); + UInt vs_idx = hregIndex(vregS); + UInt vd_idx = hregIndex(vregD); + vassert(IS_VALID_VREGNO(vs_idx)); + vassert(IS_VALID_VREGNO(vd_idx)); + vassert(! sameHReg(vregS, vregD)); + VRegState* vs_st = &vreg_state[vs_idx]; + VRegState* vd_st = &vreg_state[vd_idx]; + + if ((vs_st->dead_before == ii + 1) && (vd_st->live_after == ii)) { + /* Live ranges are adjacent. */ + + vs_st->coalescedTo = vregD; + if (hregIsInvalid(vs_st->coalescedFirst)) { + vd_st->coalescedFirst = vregS; + coalesce_heads[nr_coalesce_heads] = vs_idx; + nr_coalesce_heads += 1; + } else { + vd_st->coalescedFirst = vs_st->coalescedFirst; + } + + vreg_state[hregIndex(vd_st->coalescedFirst)].effective_dead_before + = vd_st->dead_before; + + if (DEBUG_REGALLOC) { + vex_printf("vreg coalescing: "); + con->ppReg(vregS); + vex_printf(" -> "); + con->ppReg(vregD); + vex_printf("\n"); + } + + coalesce_happened = True; + } + } + } + + /* --- Stage 3. Allocate spill slots. --- */ /* Each spill slot is 8 bytes long. For vregs which take more than 64 bits to spill (for example classes Flt64 and Vec128), we have to allocate two @@ -742,6 +836,11 @@ HInstrArray* doRegisterAllocation_v3( vassert(vreg_state[v_idx].reg_class == HRcINVALID); continue; } + if (! hregIsInvalid(vreg_state[v_idx].coalescedFirst)) { + /* Coalesced vregs should share the same spill slot with the first vreg + in the coalescing chain. But we don't have that information, yet. */ + continue; + } /* The spill slots are 64 bits in size. As per the comment on definition of HRegClass in host_generic_regs.h, that means, to spill a vreg of @@ -763,8 +862,10 @@ HInstrArray* doRegisterAllocation_v3( if (ss_no >= N_SPILL64S - 1) { vpanic("N_SPILL64S is too low in VEX. Increase and recompile."); } - ss_busy_until_before[ss_no + 0] = vreg_state[v_idx].dead_before; - ss_busy_until_before[ss_no + 1] = vreg_state[v_idx].dead_before; + ss_busy_until_before[ss_no + 0] + = vreg_state[v_idx].effective_dead_before; + ss_busy_until_before[ss_no + 1] + = vreg_state[v_idx].effective_dead_before; break; default: /* The ordinary case -- just find a single lowest-numbered spill @@ -777,7 +878,8 @@ HInstrArray* doRegisterAllocation_v3( if (ss_no == N_SPILL64S) { vpanic("N_SPILL64S is too low in VEX. Increase and recompile."); } - ss_busy_until_before[ss_no] = vreg_state[v_idx].dead_before; + ss_busy_until_before[ss_no] + = vreg_state[v_idx].effective_dead_before; break; } @@ -798,15 +900,38 @@ HInstrArray* doRegisterAllocation_v3( } } + /* Fill in the spill offsets and effective_dead_before for coalesced vregs.*/ + for (UInt i = 0; i < nr_coalesce_heads; i++) { + UInt vs_idx = coalesce_heads[i]; + Short effective_dead_before = vreg_state[vs_idx].effective_dead_before; + UShort spill_offset = vreg_state[vs_idx].spill_offset; + HReg vregD = vreg_state[vs_idx].coalescedTo; + while (! hregIsInvalid(vregD)) { + UInt vd_idx = hregIndex(vregD); + vreg_state[vd_idx].effective_dead_before = effective_dead_before; + vreg_state[vd_idx].spill_offset = spill_offset; + vregD = vreg_state[vd_idx].coalescedTo; + } + } + + if (DEBUG_REGALLOC && coalesce_happened) { + UInt ii = 0; + vex_printf("After vreg<->vreg MOV coalescing:\n"); + PRINT_STATE; + } + if (0) { vex_printf("\n\n"); - for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) - vex_printf("vreg %3u --> spill offset %u\n", - v_idx, vreg_state[v_idx].spill_offset); + for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { + if (vreg_state[v_idx].live_after != INVALID_INSTRNO) { + vex_printf("vreg %3u --> spill offset %u\n", + v_idx, vreg_state[v_idx].spill_offset); + } + } } - /* --- State 3. Process instructions. --- */ + /* --- State 4. Process instructions. --- */ for (UShort ii = 0; ii < instrs_in->arr_used; ii++) { HInstr* instr = instrs_in->arr[ii]; @@ -873,65 +998,82 @@ HInstrArray* doRegisterAllocation_v3( vassert((Short) ii < rreg_lrs->lr_current->dead_before); } } + + /* Sanity check: if vregS has been marked as coalesced to vregD, + then the effective live range of vregS must also cover live range + of vregD. */ + /* The following sanity check is quite expensive. Some basic blocks + contain very lengthy coalescing chains... */ + if (SANITY_CHECKS_EVERY_INSTR) { + for (UInt vs_idx = 0; vs_idx < n_vregs; vs_idx++) { + const VRegState* vS_st = &vreg_state[vs_idx]; + HReg vregD = vS_st->coalescedTo; + while (! hregIsInvalid(vregD)) { + const VRegState* vD_st = &vreg_state[hregIndex(vregD)]; + vassert(vS_st->live_after <= vD_st->live_after); + vassert(vS_st->effective_dead_before >= vD_st->dead_before); + vregD = vD_st->coalescedTo; + } + } + } } - /* --- MOV coalescing --- */ + /* --- MOV coalescing (finishing) --- */ /* Optimise register coalescing: - MOV v <-> v coalescing (done here). + MOV v <-> v coalescing (finished here). MOV v <-> r coalescing (TODO: not yet). */ - /* If doing a reg-reg move between two vregs, and the src's live - range ends here and the dst's live range starts here, bind the dst - to the src's rreg, and that's all. */ - HReg vregS = INVALID_HREG; - HReg vregD = INVALID_HREG; - if (con->isMove(instr, &vregS, &vregD)) { - if (hregIsVirtual(vregS) && hregIsVirtual(vregD)) { - /* Check that |isMove| is not telling us a bunch of lies ... */ - vassert(hregClass(vregS) == hregClass(vregD)); - UInt vs_idx = hregIndex(vregS); - UInt vd_idx = hregIndex(vregD); - vassert(IS_VALID_VREGNO(vs_idx)); - vassert(IS_VALID_VREGNO(vd_idx)); - - if ((vreg_state[vs_idx].dead_before == ii + 1) - && (vreg_state[vd_idx].live_after == ii) - && (vreg_state[vs_idx].disp == Assigned)) { - - /* Live ranges are adjacent and source vreg is bound. - Finally we can do the coalescing. */ - HReg rreg = vreg_state[vs_idx].rreg; - vreg_state[vd_idx].disp = Assigned; + if (reg_usage[ii].isVregVregMove) { + HReg vregS = reg_usage[ii].regMoveSrc; + HReg vregD = reg_usage[ii].regMoveDst; + UInt vs_idx = hregIndex(vregS); + UInt vd_idx = hregIndex(vregD); + + if (sameHReg(vreg_state[vs_idx].coalescedTo, vregD)) { + /* Finally do the coalescing. */ + + HReg rreg = vreg_state[vs_idx].rreg; + switch (vreg_state[vs_idx].disp) { + case Assigned: vreg_state[vd_idx].rreg = rreg; - FREE_VREG(&vreg_state[vs_idx]); - UInt r_idx = hregIndex(rreg); vassert(rreg_state[r_idx].disp == Bound); - rreg_state[r_idx].vreg = vregD; - rreg_state[r_idx].eq_spill_slot = False; + rreg_state[r_idx].vreg = vregD; + break; + case Spilled: + vassert(hregIsInvalid(vreg_state[vs_idx].rreg)); + break; + default: + vassert(0); + } - if (DEBUG_REGALLOC) { - vex_printf("coalesced: "); - con->ppReg(vregS); - vex_printf(" -> "); - con->ppReg(vregD); - vex_printf("\n\n"); - } + vreg_state[vd_idx].disp = vreg_state[vs_idx].disp; + FREE_VREG(&vreg_state[vs_idx]); + + if (DEBUG_REGALLOC) { + vex_printf("coalesced: "); + con->ppReg(vregS); + vex_printf(" -> "); + con->ppReg(vregD); + vex_printf("\n\n"); + } - /* In rare cases it can happen that vregD's live range ends - here. Check and eventually free the vreg and rreg. - This effectively means that either the translated program - contained dead code (but VEX iropt passes are pretty good - at eliminating it) or the VEX backend generated dead code. */ - if (vreg_state[vd_idx].dead_before <= (Short) ii + 1) { - FREE_VREG(&vreg_state[vd_idx]); + /* In rare cases it can happen that vregD's live range ends here. + Check and eventually free the vreg and rreg. + This effectively means that either the translated program + contained dead code (but VEX iropt passes are pretty good + at eliminating it) or the VEX backend generated dead code. */ + if (vreg_state[vd_idx].dead_before <= (Short) ii + 1) { + if (vreg_state[vd_idx].disp == Assigned) { + UInt r_idx = hregIndex(rreg); FREE_RREG(&rreg_state[r_idx]); } - - /* Move on to the next instruction. We skip the post-instruction - stuff because all required house-keeping was done here. */ - continue; + FREE_VREG(&vreg_state[vd_idx]); } + + /* Move on to the next instruction. We skip the post-instruction + stuff because all required house-keeping was done here. */ + continue; } } diff --git a/VEX/priv/host_generic_regs.c b/VEX/priv/host_generic_regs.c index 67d2ea2..cd5d222 100644 --- a/VEX/priv/host_generic_regs.c +++ b/VEX/priv/host_generic_regs.c @@ -184,6 +184,9 @@ void ppHRegUsage ( const RRegUniverse* univ, HRegUsage* tab ) ppHReg(tab->vRegs[i]); vex_printf("\n"); } + if (tab->isRegRegMove) { + vex_printf(" (is a reg-reg move)\n"); + } vex_printf("}\n"); } diff --git a/VEX/priv/host_generic_regs.h b/VEX/priv/host_generic_regs.h index 3db9ea0..8f6b2d6 100644 --- a/VEX/priv/host_generic_regs.h +++ b/VEX/priv/host_generic_regs.h @@ -300,6 +300,16 @@ typedef HReg vRegs[N_HREGUSAGE_VREGS]; HRegMode vMode[N_HREGUSAGE_VREGS]; UInt n_vRegs; + + /* Hint to the register allocator: this instruction is actually a move + between two registers: regMoveSrc -> regMoveDst. */ + Bool isRegRegMove; + HReg regMoveSrc; + HReg regMoveDst; + + /* Used internally by the register allocator. The reg-reg move is + actually a vreg-vreg move. */ + Bool isVregVregMove; } HRegUsage; @@ -307,9 +317,10 @@ extern void ppHRegUsage ( const RRegUniverse*, HRegUsage* ); static inline void initHRegUsage ( HRegUsage* tab ) { - tab->rRead = 0; - tab->rWritten = 0; - tab->n_vRegs = 0; + tab->rRead = 0; + tab->rWritten = 0; + tab->n_vRegs = 0; + tab->isRegRegMove = False; } /* Add a register to a usage table. Combine incoming read uses with @@ -471,10 +482,6 @@ typedef allocation. */ const RRegUniverse* univ; - /* Return True iff the given insn is a reg-reg move, in which case also - return the src and dst regs. */ - Bool (*isMove)(const HInstr*, HReg*, HReg*); - /* Get info about register usage in this insn. */ void (*getRegUsage)(HRegUsage*, const HInstr*, Bool); diff --git a/VEX/priv/host_mips_defs.c b/VEX/priv/host_mips_defs.c index 66c226d..35a293b 100644 --- a/VEX/priv/host_mips_defs.c +++ b/VEX/priv/host_mips_defs.c @@ -1606,6 +1606,15 @@ void getRegUsage_MIPSInstr(HRegUsage * u, const MIPSInstr * i, Bool mode64) addHRegUse(u, HRmRead, i->Min.Alu.srcL); addRegUsage_MIPSRH(u, i->Min.Alu.srcR); addHRegUse(u, HRmWrite, i->Min.Alu.dst); + + /* or Rd,Rs,Rs == mr Rd,Rs */ + if ((i->Min.Alu.op == Malu_OR) + && (i->Min.Alu.srcR->tag == Mrh_Reg) + && sameHReg(i->Min.Alu.srcR->Mrh.Reg.reg, i->Min.Alu.srcL)) { + u->isRegRegMove = True; + u->regMoveSrc = i->Min.Alu.srcL; + u->regMoveDst = i->Min.Alu.dst; + } return; case Min_Shft: addHRegUse(u, HRmRead, i->Min.Shft.srcL); @@ -1990,28 +1999,6 @@ void mapRegs_MIPSInstr(HRegRemap * m, MIPSInstr * i, Bool mode64) } -/* Figure out if i represents a reg-reg move, and if so assign the - source and destination to *src and *dst. If in doubt say No. Used - by the register allocator to do move coalescing. -*/ -Bool isMove_MIPSInstr(const MIPSInstr * i, HReg * src, HReg * dst) -{ - /* Moves between integer regs */ - if (i->tag == Min_Alu) { - /* or Rd,Rs,Rs == mr Rd,Rs */ - if (i->Min.Alu.op != Malu_OR) - return False; - if (i->Min.Alu.srcR->tag != Mrh_Reg) - return False; - if (!sameHReg(i->Min.Alu.srcR->Mrh.Reg.reg, i->Min.Alu.srcL)) - return False; - *src = i->Min.Alu.srcL; - *dst = i->Min.Alu.dst; - return True; - } - return False; -} - /* Generate mips spill/reload instructions under the direction of the register allocator. */ void genSpill_MIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg, diff --git a/VEX/priv/host_mips_defs.h b/VEX/priv/host_mips_defs.h index be1e3a8..fb681ac 100644 --- a/VEX/priv/host_mips_defs.h +++ b/VEX/priv/host_mips_defs.h @@ -701,7 +701,6 @@ extern void ppMIPSInstr(const MIPSInstr *, Bool mode64); of the underlying instruction set. */ extern void getRegUsage_MIPSInstr (HRegUsage *, const MIPSInstr *, Bool); extern void mapRegs_MIPSInstr (HRegRemap *, MIPSInstr *, Bool mode64); -extern Bool isMove_MIPSInstr (const MIPSInstr *, HReg *, HReg *); extern Int emit_MIPSInstr (/*MB_MOD*/Bool* is_profInc, UChar* buf, Int nbuf, const MIPSInstr* i, Bool mode64, diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c index 1ef9c5c..b073c1d 100644 --- a/VEX/priv/host_ppc_defs.c +++ b/VEX/priv/host_ppc_defs.c @@ -2362,6 +2362,15 @@ void getRegUsage_PPCInstr ( HRegUsage* u, const PPCInstr* i, Bool mode64 ) addHRegUse(u, HRmRead, i->Pin.Alu.srcL); addRegUsage_PPCRH(u, i->Pin.Alu.srcR); addHRegUse(u, HRmWrite, i->Pin.Alu.dst); + + // or Rd,Rs,Rs == mr Rd,Rs + if ((i->Pin.Alu.op == Palu_OR) + && (i->Pin.Alu.srcR->tag == Prh_Reg) + && sameHReg(i->Pin.Alu.srcR->Prh.Reg.reg, i->Pin.Alu.srcL)) { + u->isRegRegMove = True; + u->regMoveSrc = i->Pin.Alu.srcL; + u->regMoveDst = i->Pin.Alu.dst; + } return; case Pin_Shft: addHRegUse(u, HRmRead, i->Pin.Shft.srcL); @@ -2489,6 +2498,12 @@ void getRegUsage_PPCInstr ( HRegUsage* u, const PPCInstr* i, Bool mode64 ) case Pin_FpUnary: addHRegUse(u, HRmWrite, i->Pin.FpUnary.dst); addHRegUse(u, HRmRead, i->Pin.FpUnary.src); + + if (i->Pin.FpUnary.op == Pfp_MOV) { + u->isRegRegMove = True; + u->regMoveSrc = i->Pin.FpUnary.src; + u->regMoveDst = i->Pin.FpUnary.dst; + } return; case Pin_FpBinary: addHRegUse(u, HRmWrite, i->Pin.FpBinary.dst); @@ -3119,37 +3134,6 @@ void mapRegs_PPCInstr ( HRegRemap* m, PPCInstr* i, Bool mode64 ) } } -/* Figure out if i represents a reg-reg move, and if so assign the - source and destination to *src and *dst. If in doubt say No. Used - by the register allocator to do move coalescing. -*/ -Bool isMove_PPCInstr ( const PPCInstr* i, HReg* src, HReg* dst ) -{ - /* Moves between integer regs */ - if (i->tag == Pin_Alu) { - // or Rd,Rs,Rs == mr Rd,Rs - if (i->Pin.Alu.op != Palu_OR) - return False; - if (i->Pin.Alu.srcR->tag != Prh_Reg) - return False; - if (! sameHReg(i->Pin.Alu.srcR->Prh.Reg.reg, i->Pin.Alu.srcL)) - return False; - *src = i->Pin.Alu.srcL; - *dst = i->Pin.Alu.dst; - return True; - } - /* Moves between FP regs */ - if (i->tag == Pin_FpUnary) { - if (i->Pin.FpUnary.op != Pfp_MOV) - return False; - *src = i->Pin.FpUnary.src; - *dst = i->Pin.FpUnary.dst; - return True; - } - return False; -} - - /* Generate ppc spill/reload instructions under the direction of the register allocator. Note it's critical these don't write the condition codes. */ diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h index 27b3b38..17baff5 100644 --- a/VEX/priv/host_ppc_defs.h +++ b/VEX/priv/host_ppc_defs.h @@ -1201,7 +1201,6 @@ extern void ppPPCInstr(const PPCInstr*, Bool mode64); of the underlying instruction set. */ extern void getRegUsage_PPCInstr ( HRegUsage*, const PPCInstr*, Bool mode64 ); extern void mapRegs_PPCInstr ( HRegRemap*, PPCInstr* , Bool mode64); -extern Bool isMove_PPCInstr ( const PPCInstr*, HReg*, HReg* ); extern Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc, UChar* buf, Int nbuf, const PPCInstr* i, Bool mode64, diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c index 327674a..f9a9557 100644 --- a/VEX/priv/host_s390_defs.c +++ b/VEX/priv/host_s390_defs.c @@ -48,7 +48,6 @@ /*--- Forward declarations ---*/ /*------------------------------------------------------------*/ -static Bool s390_insn_is_reg_reg_move(const s390_insn *, HReg *src, HReg *dst); static void s390_insn_map_regs(HRegRemap *, s390_insn *); static void s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *); static UInt s390_tchain_load64_len(void); @@ -467,16 +466,6 @@ mapRegs_S390Instr(HRegRemap *m, s390_insn *insn, Bool mode64) } -/* Figure out if the given insn represents a reg-reg move, and if so - assign the source and destination to *src and *dst. If in doubt say No. - Used by the register allocator to do move coalescing. */ -Bool -isMove_S390Instr(const s390_insn *insn, HReg *src, HReg *dst) -{ - return s390_insn_is_reg_reg_move(insn, src, dst); -} - - /* Generate s390 spill/reload instructions under the direction of the register allocator. Note it's critical these don't write the condition codes. This is like an Ist_Put */ @@ -587,6 +576,12 @@ s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *insn) case S390_INSN_MOVE: addHRegUse(u, HRmRead, insn->variant.move.src); addHRegUse(u, HRmWrite, insn->variant.move.dst); + + if (hregClass(insn->variant.move.src) == hregClass(insn->variant.move.dst)) { + u->isRegRegMove = True; + u->regMoveSrc = insn->variant.move.src; + u->regMoveDst = insn->variant.move.dst; + } break; case S390_INSN_MEMCPY: @@ -1218,23 +1213,6 @@ s390_insn_map_regs(HRegRemap *m, s390_insn *insn) } -/* Return True, if INSN is a move between two registers of the same class. - In that case assign the source and destination registers to SRC and DST, - respectively. */ -static Bool -s390_insn_is_reg_reg_move(const s390_insn *insn, HReg *src, HReg *dst) -{ - if (insn->tag == S390_INSN_MOVE && - hregClass(insn->variant.move.src) == hregClass(insn->variant.move.dst)) { - *src = insn->variant.move.src; - *dst = insn->variant.move.dst; - return True; - } - - return False; -} - - /*------------------------------------------------------------*/ /*--- Functions to emit a sequence of bytes ---*/ /*------------------------------------------------------------*/ diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h index 937829c..254275a 100644 --- a/VEX/priv/host_s390_defs.h +++ b/VEX/priv/host_s390_defs.h @@ -742,7 +742,6 @@ UInt ppHRegS390(HReg); of the underlying instruction set. */ void getRegUsage_S390Instr( HRegUsage *, const s390_insn *, Bool ); void mapRegs_S390Instr ( HRegRemap *, s390_insn *, Bool ); -Bool isMove_S390Instr ( const s390_insn *, HReg *, HReg * ); Int emit_S390Instr ( Bool *, UChar *, Int, const s390_insn *, Bool, VexEndness, const void *, const void *, const void *, const void *); diff --git a/VEX/priv/host_x86_defs.c b/VEX/priv/host_x86_defs.c index 2457cc1..eb8e020 100644 --- a/VEX/priv/host_x86_defs.c +++ b/VEX/priv/host_x86_defs.c @@ -1234,6 +1234,12 @@ void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64) addRegUsage_X86RMI(u, i->Xin.Alu32R.src); if (i->Xin.Alu32R.op == Xalu_MOV) { addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst); + + if (i->Xin.Alu32R.src->tag == Xrmi_Reg) { + u->isRegRegMove = True; + u->regMoveSrc = i->Xin.Alu32R.src->Xrmi.Reg.reg; + u->regMoveDst = i->Xin.Alu32R.dst; + } return; } if (i->Xin.Alu32R.op == Xalu_CMP) { @@ -1374,6 +1380,12 @@ void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64) case Xin_FpUnary: addHRegUse(u, HRmRead, i->Xin.FpUnary.src); addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst); + + if (i->Xin.FpUnary.op == Xfp_MOV) { + u->isRegRegMove = True; + u->regMoveSrc = i->Xin.FpUnary.src; + u->regMoveDst = i->Xin.FpUnary.dst; + } return; case Xin_FpBinary: addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL); @@ -1469,6 +1481,12 @@ void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64) addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV ? HRmWrite : HRmModify, i->Xin.SseReRg.dst); + + if (i->Xin.SseReRg.op == Xsse_MOV) { + u->isRegRegMove = True; + u->regMoveSrc = i->Xin.SseReRg.src; + u->regMoveDst = i->Xin.SseReRg.dst; + } } return; case Xin_SseCMov: @@ -1668,41 +1686,6 @@ void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 ) } } -/* Figure out if i represents a reg-reg move, and if so assign the - source and destination to *src and *dst. If in doubt say No. Used - by the register allocator to do move coalescing. -*/ -Bool isMove_X86Instr ( const X86Instr* i, HReg* src, HReg* dst ) -{ - /* Moves between integer regs */ - if (i->tag == Xin_Alu32R) { - if (i->Xin.Alu32R.op != Xalu_MOV) - return False; - if (i->Xin.Alu32R.src->tag != Xrmi_Reg) - return False; - *src = i->Xin.Alu32R.src->Xrmi.Reg.reg; - *dst = i->Xin.Alu32R.dst; - return True; - } - /* Moves between FP regs */ - if (i->tag == Xin_FpUnary) { - if (i->Xin.FpUnary.op != Xfp_MOV) - return False; - *src = i->Xin.FpUnary.src; - *dst = i->Xin.FpUnary.dst; - return True; - } - if (i->tag == Xin_SseReRg) { - if (i->Xin.SseReRg.op != Xsse_MOV) - return False; - *src = i->Xin.SseReRg.src; - *dst = i->Xin.SseReRg.dst; - return True; - } - return False; -} - - /* Generate x86 spill/reload instructions under the direction of the register allocator. Note it's critical these don't write the condition codes. */ diff --git a/VEX/priv/host_x86_defs.h b/VEX/priv/host_x86_defs.h index e1a5767..6812d5f 100644 --- a/VEX/priv/host_x86_defs.h +++ b/VEX/priv/host_x86_defs.h @@ -716,7 +716,6 @@ extern void ppX86Instr ( const X86Instr*, Bool ); of the underlying instruction set. */ extern void getRegUsage_X86Instr ( HRegUsage*, const X86Instr*, Bool ); extern void mapRegs_X86Instr ( HRegRemap*, X86Instr*, Bool ); -extern Bool isMove_X86Instr ( const X86Instr*, HReg*, HReg* ); extern Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc, UChar* buf, Int nbuf, const X86Instr* i, Bool mode64, diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c index b27d6ca..107a6a6 100644 --- a/VEX/priv/main_main.c +++ b/VEX/priv/main_main.c @@ -709,7 +709,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, /* This the bundle of functions we need to do the back-end stuff (insn selection, reg-alloc, assembly) whilst being insulated from the target instruction set. */ - Bool (*isMove) ( const HInstr*, HReg*, HReg* ); void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ); void (*mapRegs) ( HRegRemap*, HInstr*, Bool ); void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ); @@ -739,7 +738,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, HInstrArray* vcode; HInstrArray* rcode; - isMove = NULL; getRegUsage = NULL; mapRegs = NULL; genSpill = NULL; @@ -857,7 +855,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, case VexArchX86: mode64 = False; rRegUniv = X86FN(getRRegUniverse_X86()); - isMove = CAST_TO_TYPEOF(isMove) X86FN(isMove_X86Instr); getRegUsage = CAST_TO_TYPEOF(getRegUsage) X86FN(getRegUsage_X86Instr); mapRegs = CAST_TO_TYPEOF(mapRegs) X86FN(mapRegs_X86Instr); @@ -875,7 +872,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, case VexArchAMD64: mode64 = True; rRegUniv = AMD64FN(getRRegUniverse_AMD64()); - isMove = CAST_TO_TYPEOF(isMove) AMD64FN(isMove_AMD64Instr); getRegUsage = CAST_TO_TYPEOF(getRegUsage) AMD64FN(getRegUsage_AMD64Instr); mapRegs = CAST_TO_TYPEOF(mapRegs) AMD64FN(mapRegs_AMD64Instr); @@ -893,7 +889,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, case VexArchPPC32: mode64 = False; rRegUniv = PPC32FN(getRRegUniverse_PPC(mode64)); - isMove = CAST_TO_TYPEOF(isMove) PPC32FN(isMove_PPCInstr); getRegUsage = CAST_TO_TYPEOF(getRegUsage) PPC32FN(getRegUsage_PPCInstr); mapRegs = CAST_TO_TYPEOF(mapRegs) PPC32FN(mapRegs_PPCInstr); @@ -910,7 +905,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, case VexArchPPC64: mode64 = True; rRegUniv = PPC64FN(getRRegUniverse_PPC(mode64)); - isMove = CAST_TO_TYPEOF(isMove) PPC64FN(isMove_PPCInstr); getRegUsage = CAST_TO_TYPEOF(getRegUsage) PPC64FN(getRegUsage_PPCInstr); mapRegs = CAST_TO_TYPEOF(mapRegs) PPC64FN(mapRegs_PPCInstr); @@ -928,7 +922,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, case VexArchS390X: mode64 = True; rRegUniv = S390FN(getRRegUniverse_S390()); - isMove = CAST_TO_TYPEOF(isMove) S390FN(isMove_S390Instr); getRegUsage = CAST_TO_TYPEOF(getRegUsage) S390FN(getRegUsage_S390Instr); mapRegs = CAST_TO_TYPEOF(mapRegs) S390FN(mapRegs_S390Instr); @@ -946,7 +939,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, case VexArchARM: mode64 = False; rRegUniv = ARMFN(getRRegUniverse_ARM()); - isMove = CAST_TO_TYPEOF(isMove) ARMFN(isMove_ARMInstr); getRegUsage = CAST_TO_TYPEOF(getRegUsage) ARMFN(getRegUsage_ARMInstr); mapRegs = CAST_TO_TYPEOF(mapRegs) ARMFN(mapRegs_ARMInstr); @@ -963,7 +955,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, case VexArchARM64: mode64 = True; rRegUniv = ARM64FN(getRRegUniverse_ARM64()); - isMove = CAST_TO_TYPEOF(isMove) ARM64FN(isMove_ARM64Instr); getRegUsage = CAST_TO_TYPEOF(getRegUsage) ARM64FN(getRegUsage_ARM64Instr); mapRegs = CAST_TO_TYPEOF(mapRegs) ARM64FN(mapRegs_ARM64Instr); @@ -980,7 +971,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, case VexArchMIPS32: mode64 = False; rRegUniv = MIPS32FN(getRRegUniverse_MIPS(mode64)); - isMove = CAST_TO_TYPEOF(isMove) MIPS32FN(isMove_MIPSInstr); getRegUsage = CAST_TO_TYPEOF(getRegUsage) MIPS32FN(getRegUsage_MIPSInstr); mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS32FN(mapRegs_MIPSInstr); @@ -998,7 +988,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, case VexArchMIPS64: mode64 = True; rRegUniv = MIPS64FN(getRRegUniverse_MIPS(mode64)); - isMove = CAST_TO_TYPEOF(isMove) MIPS64FN(isMove_MIPSInstr); getRegUsage = CAST_TO_TYPEOF(getRegUsage) MIPS64FN(getRegUsage_MIPSInstr); mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS64FN(mapRegs_MIPSInstr); @@ -1082,11 +1071,10 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, /* Register allocate. */ RegAllocControl con = { - .univ = rRegUniv, .isMove = isMove, .getRegUsage = getRegUsage, - .mapRegs = mapRegs, .genSpill = genSpill, .genReload = genReload, - .genMove = genMove, .directReload = directReload, - .guest_sizeB = guest_sizeB, .ppInstr = ppInstr, .ppReg = ppReg, - .mode64 = mode64}; + .univ = rRegUniv, .getRegUsage = getRegUsage, .mapRegs = mapRegs, + .genSpill = genSpill, .genReload = genReload, .genMove = genMove, + .directReload = directReload, .guest_sizeB = guest_sizeB, + .ppInstr = ppInstr, .ppReg = ppReg, .mode64 = mode64}; switch (vex_control.regalloc_version) { case 2: rcode = doRegisterAllocation_v2(vcode, &con); |
|
From: Petar J. <pe...@so...> - 2017-10-10 16:07:25
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=c069589178d54e2ae5a22fab60b7068414598d48 commit c069589178d54e2ae5a22fab60b7068414598d48 Author: Petar Jovanovic <mip...@gm...> Date: Tue Oct 10 18:06:14 2017 +0200 mips: add support for bi-arch build on mips64 If native compiler can build Valgrind for mips32 o32 on native mips64 system, it should do it. This change adds a second architecture for MIPS in a similar way how it has been previously done for amd64 and ppc64. Diff: --- Makefile.all.am | 12 ++-- configure.ac | 187 ++++++++++++++++++++++++++++++++++---------------------- 2 files changed, 119 insertions(+), 80 deletions(-) diff --git a/Makefile.all.am b/Makefile.all.am index 1859a51..faa170d 100644 --- a/Makefile.all.am +++ b/Makefile.all.am @@ -230,16 +230,16 @@ AM_CFLAGS_PSO_S390X_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE) $(AM_CFLAGS_PSO_BASE) AM_CCASFLAGS_S390X_LINUX = @FLAG_M64@ -g -mzarch -march=z900 AM_FLAG_M3264_MIPS32_LINUX = @FLAG_M32@ -AM_CFLAGS_MIPS32_LINUX = @FLAG_M32@ $(AM_CFLAGS_BASE) @FLAG_MIPS32@ -AM_CFLAGS_PSO_MIPS32_LINUX = @FLAG_M32@ $(AM_CFLAGS_BASE) @FLAG_MIPS32@ \ +AM_CFLAGS_MIPS32_LINUX = @FLAG_M32@ $(AM_CFLAGS_BASE) +AM_CFLAGS_PSO_MIPS32_LINUX = @FLAG_M32@ $(AM_CFLAGS_BASE) \ $(AM_CFLAGS_PSO_BASE) -AM_CCASFLAGS_MIPS32_LINUX = @FLAG_M32@ -g @FLAG_MIPS32@ +AM_CCASFLAGS_MIPS32_LINUX = @FLAG_M32@ -g AM_FLAG_M3264_MIPS64_LINUX = @FLAG_M64@ -AM_CFLAGS_MIPS64_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE) @FLAG_MIPS64@ -AM_CFLAGS_PSO_MIPS64_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE) @FLAG_MIPS64@ \ +AM_CFLAGS_MIPS64_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE) +AM_CFLAGS_PSO_MIPS64_LINUX = @FLAG_M64@ $(AM_CFLAGS_BASE) \ $(AM_CFLAGS_PSO_BASE) -AM_CCASFLAGS_MIPS64_LINUX = @FLAG_M64@ -g @FLAG_MIPS64@ +AM_CCASFLAGS_MIPS64_LINUX = @FLAG_M64@ -g AM_FLAG_M3264_X86_SOLARIS = @FLAG_M32@ AM_CFLAGS_X86_SOLARIS = @FLAG_M32@ @PREFERRED_STACK_BOUNDARY_2@ \ diff --git a/configure.ac b/configure.ac index 392081d..4d45218 100644 --- a/configure.ac +++ b/configure.ac @@ -479,6 +479,19 @@ case "$ARCH_MAX-$VGCONF_OS" in AC_MSG_RESULT([no]) ]) CFLAGS=$safe_CFLAGS;; + mips64-linux) + AC_MSG_CHECKING([for 32 bit build support]) + safe_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS -mips32 -mabi=32" + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ + #include <sys/prctl.h> + ]], [[]])], [ + AC_MSG_RESULT([yes]) + ], [ + vg_cv_only64bit="yes" + AC_MSG_RESULT([no]) + ]) + CFLAGS=$safe_CFLAGS;; esac if test x$vg_cv_only64bit = xyes -a x$vg_cv_only32bit = xyes; then @@ -715,6 +728,7 @@ case "$ARCH_MAX-$VGCONF_OS" in ;; mips32-linux) VGCONF_ARCH_PRI="mips32" + VGCONF_ARCH_SEC="" VGCONF_PLATFORM_PRI_CAPS="MIPS32_LINUX" VGCONF_PLATFORM_SEC_CAPS="" valt_load_address_pri_norml="0x58000000" @@ -724,13 +738,32 @@ case "$ARCH_MAX-$VGCONF_OS" in AC_MSG_RESULT([ok (${ARCH_MAX}-${VGCONF_OS})]) ;; mips64-linux) - VGCONF_ARCH_PRI="mips64" - VGCONF_PLATFORM_PRI_CAPS="MIPS64_LINUX" - VGCONF_PLATFORM_SEC_CAPS="" - valt_load_address_pri_norml="0x58000000" - valt_load_address_pri_inner="0x38000000" valt_load_address_sec_norml="0xUNSET" valt_load_address_sec_inner="0xUNSET" + if test x$vg_cv_only64bit = xyes; then + VGCONF_ARCH_PRI="mips64" + VGCONF_PLATFORM_SEC_CAPS="" + VGCONF_PLATFORM_PRI_CAPS="MIPS64_LINUX" + VGCONF_PLATFORM_SEC_CAPS="" + valt_load_address_pri_norml="0x58000000" + valt_load_address_pri_inner="0x38000000" + elif test x$vg_cv_only32bit = xyes; then + VGCONF_ARCH_PRI="mips32" + VGCONF_ARCH_SEC="" + VGCONF_PLATFORM_PRI_CAPS="MIPS32_LINUX" + VGCONF_PLATFORM_SEC_CAPS="" + valt_load_address_pri_norml="0x58000000" + valt_load_address_pri_inner="0x38000000" + else + VGCONF_ARCH_PRI="mips64" + VGCONF_ARCH_SEC="mips32" + VGCONF_PLATFORM_PRI_CAPS="MIPS64_LINUX" + VGCONF_PLATFORM_SEC_CAPS="MIPS32_LINUX" + valt_load_address_pri_norml="0x58000000" + valt_load_address_pri_inner="0x38000000" + valt_load_address_sec_norml="0x58000000" + valt_load_address_sec_inner="0x38000000" + fi AC_MSG_RESULT([ok (${ARCH_MAX}-${VGCONF_OS})]) ;; x86-solaris) @@ -816,7 +849,8 @@ AM_CONDITIONAL(VGCONF_ARCHS_INCLUDE_ARM64, AM_CONDITIONAL(VGCONF_ARCHS_INCLUDE_S390X, test x$VGCONF_PLATFORM_PRI_CAPS = xS390X_LINUX ) AM_CONDITIONAL(VGCONF_ARCHS_INCLUDE_MIPS32, - test x$VGCONF_PLATFORM_PRI_CAPS = xMIPS32_LINUX ) + test x$VGCONF_PLATFORM_PRI_CAPS = xMIPS32_LINUX \ + -o x$VGCONF_PLATFORM_SEC_CAPS = xMIPS32_LINUX ) AM_CONDITIONAL(VGCONF_ARCHS_INCLUDE_MIPS64, test x$VGCONF_PLATFORM_PRI_CAPS = xMIPS64_LINUX ) @@ -843,7 +877,8 @@ AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_S390X_LINUX, test x$VGCONF_PLATFORM_PRI_CAPS = xS390X_LINUX \ -o x$VGCONF_PLATFORM_SEC_CAPS = xS390X_LINUX) AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_MIPS32_LINUX, - test x$VGCONF_PLATFORM_PRI_CAPS = xMIPS32_LINUX) + test x$VGCONF_PLATFORM_PRI_CAPS = xMIPS32_LINUX \ + -o x$VGCONF_PLATFORM_SEC_CAPS = xMIPS32_LINUX) AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_MIPS64_LINUX, test x$VGCONF_PLATFORM_PRI_CAPS = xMIPS64_LINUX) AM_CONDITIONAL(VGCONF_PLATFORMS_INCLUDE_X86_DARWIN, @@ -1679,91 +1714,94 @@ fi # Checking for supported compiler flags. #---------------------------------------------------------------------------- -# does this compiler support -m32 ? -AC_MSG_CHECKING([if gcc accepts -m32]) - -safe_CFLAGS=$CFLAGS -CFLAGS="-m32 -Werror" - -AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[ - return 0; -]])], [ -FLAG_M32="-m32" -AC_MSG_RESULT([yes]) -], [ -FLAG_M32="" -AC_MSG_RESULT([no]) -]) -CFLAGS=$safe_CFLAGS - -AC_SUBST(FLAG_M32) +case "${host_cpu}" in + mips*) + # does this compiler support -march=mips32 (mips32 default) ? + AC_MSG_CHECKING([if gcc accepts -march=mips32 -mabi=32]) + safe_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS -mips32 -mabi=32 -Werror" -# does this compiler support -m64 ? -AC_MSG_CHECKING([if gcc accepts -m64]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[ + return 0; + ]])], [ + FLAG_M32="-mips32 -mabi=32" + AC_MSG_RESULT([yes]) + ], [ + FLAG_M32="" + AC_MSG_RESULT([no]) + ]) + CFLAGS=$safe_CFLAGS -safe_CFLAGS=$CFLAGS -CFLAGS="-m64 -Werror" + AC_SUBST(FLAG_M32) -AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[ - return 0; -]])], [ -FLAG_M64="-m64" -AC_MSG_RESULT([yes]) -], [ -FLAG_M64="" -AC_MSG_RESULT([no]) -]) -CFLAGS=$safe_CFLAGS -AC_SUBST(FLAG_M64) + # does this compiler support -march=mips64r2 (mips64r2 default) ? + AC_MSG_CHECKING([if gcc accepts -march=mips64r2 -mabi=64]) + safe_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS -march=mips64r2 -mabi=64 -Werror" -# does this compiler support -march=mips32 (mips32 default) ? -AC_MSG_CHECKING([if gcc accepts -march=mips32 -mabi=32]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[ + return 0; + ]])], [ + FLAG_M64="-march=mips64r2 -mabi=64" + AC_MSG_RESULT([yes]) + ], [ + FLAG_M64="" + AC_MSG_RESULT([no]) + ]) + CFLAGS=$safe_CFLAGS -safe_CFLAGS=$CFLAGS -CFLAGS="$CFLAGS -march=mips32 -mabi=32 -Werror" + AC_SUBST(FLAG_M64) + ;; + *) + # does this compiler support -m32 ? + AC_MSG_CHECKING([if gcc accepts -m32]) -AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[ - return 0; -]])], [ -FLAG_MIPS32="-march=mips32 -mabi=32" -AC_MSG_RESULT([yes]) -], [ -FLAG_MIPS32="" -AC_MSG_RESULT([no]) -]) -CFLAGS=$safe_CFLAGS + safe_CFLAGS=$CFLAGS + CFLAGS="-m32 -Werror" -AC_SUBST(FLAG_MIPS32) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[ + return 0; + ]])], [ + FLAG_M32="-m32" + AC_MSG_RESULT([yes]) + ], [ + FLAG_M32="" + AC_MSG_RESULT([no]) + ]) + CFLAGS=$safe_CFLAGS + AC_SUBST(FLAG_M32) -# does this compiler support -march=mips64r2 (mips64r2 default) ? -AC_MSG_CHECKING([if gcc accepts -march=mips64r2 -mabi=64]) -safe_CFLAGS=$CFLAGS -CFLAGS="$CFLAGS -march=mips64r2 -mabi=64 -Werror" + # does this compiler support -m64 ? + AC_MSG_CHECKING([if gcc accepts -m64]) -AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[ - return 0; -]])], [ -FLAG_MIPS64="-march=mips64r2 -mabi=64" -AC_MSG_RESULT([yes]) -], [ -FLAG_MIPS64="" -AC_MSG_RESULT([no]) -]) -CFLAGS=$safe_CFLAGS + safe_CFLAGS=$CFLAGS + CFLAGS="-m64 -Werror" -AC_SUBST(FLAG_MIPS64) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[ + return 0; + ]])], [ + FLAG_M64="-m64" + AC_MSG_RESULT([yes]) + ], [ + FLAG_M64="" + AC_MSG_RESULT([no]) + ]) + CFLAGS=$safe_CFLAGS + AC_SUBST(FLAG_M64) + ;; +esac # does this compiler support -march=octeon (Cavium OCTEON I Specific) ? AC_MSG_CHECKING([if gcc accepts -march=octeon]) safe_CFLAGS=$CFLAGS -CFLAGS="$CFLAGS -march=octeon -Werror" +CFLAGS="$CFLAGS $FLAG_M64 -march=octeon -Werror" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[ return 0; @@ -1783,7 +1821,7 @@ AC_SUBST(FLAG_OCTEON) AC_MSG_CHECKING([if gcc accepts -march=octeon2]) safe_CFLAGS=$CFLAGS -CFLAGS="$CFLAGS -march=octeon2 -Werror" +CFLAGS="$CFLAGS $FLAG_M64 -march=octeon2 -Werror" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[ return 0; @@ -3953,12 +3991,12 @@ if test x$VGCONF_PLATFORM_PRI_CAPS = xX86_LINUX \ -o x$VGCONF_PLATFORM_PRI_CAPS = xPPC32_LINUX \ -o x$VGCONF_PLATFORM_PRI_CAPS = xARM_LINUX \ -o x$VGCONF_PLATFORM_PRI_CAPS = xMIPS32_LINUX \ - -o x$VGCONF_PLATFORM_PRI_CAPS = xMIPS64_LINUX \ -o x$VGCONF_PLATFORM_PRI_CAPS = xX86_SOLARIS ; then mflag_primary=$FLAG_M32 elif test x$VGCONF_PLATFORM_PRI_CAPS = xAMD64_LINUX \ -o x$VGCONF_PLATFORM_PRI_CAPS = xPPC64_LINUX \ -o x$VGCONF_PLATFORM_PRI_CAPS = xARM64_LINUX \ + -o x$VGCONF_PLATFORM_PRI_CAPS = xMIPS64_LINUX \ -o x$VGCONF_PLATFORM_PRI_CAPS = xS390X_LINUX ; then mflag_primary=$FLAG_M64 elif test x$VGCONF_PLATFORM_PRI_CAPS = xX86_DARWIN ; then @@ -3970,7 +4008,8 @@ fi mflag_secondary= if test x$VGCONF_PLATFORM_SEC_CAPS = xX86_LINUX \ -o x$VGCONF_PLATFORM_SEC_CAPS = xPPC32_LINUX \ - -o x$VGCONF_PLATFORM_SEC_CAPS = xX86_SOLARIS ; then + -o x$VGCONF_PLATFORM_SEC_CAPS = xX86_SOLARIS \ + -o x$VGCONF_PLATFORM_SEC_CAPS = xMIPS32_LINUX ; then mflag_secondary=$FLAG_M32 elif test x$VGCONF_PLATFORM_SEC_CAPS = xX86_DARWIN ; then mflag_secondary="$FLAG_M32 -arch i386" |
|
From: Carl L. <ca...@so...> - 2017-10-05 17:20:32
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=856d45eb7e3661a61ace32be2cfa10bf198620c8 commit 856d45eb7e3661a61ace32be2cfa10bf198620c8 Author: Carl Love <ca...@us...> Date: Thu Oct 5 12:19:59 2017 -0500 PPC64, vpermr, xxperm, xxpermr fix Iop_Perm8x16 selector field The implementation of the vpermr, xxperm, xxpermr violate this by using a mask of 0x1F. Fix the code and the corresponding comments to met the definition for Iop_Perm8x16. Use Iop_Dup8x16 to generate vector value for subtraction. Bugzilla 385334. Diff: --- NEWS | 1 + VEX/priv/guest_ppc_toIR.c | 38 +++++++++++++++++++------------------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/NEWS b/NEWS index efa1f4a..097930a 100644 --- a/NEWS +++ b/NEWS @@ -63,6 +63,7 @@ n-i-bz Fix missing workq_ops operations (macOS) 385208 PPC64, xxperm instruction exhausts temporary memory 385210 PPC64, vpermr instruction could exhaust temporary memory 385183 PPC64, Add support for xscmpeqdp, xscmpgtdp, xscmpgedp, xsmincdp instructions +385334 PPC64, fix vpermr, xxperm, xxpermr mask value. Release 3.13.0 (15 June 2017) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c index b5b0d03..f63146e 100644 --- a/VEX/priv/guest_ppc_toIR.c +++ b/VEX/priv/guest_ppc_toIR.c @@ -22579,6 +22579,7 @@ dis_vx_permute_misc( UInt theInstr, UInt opc2 ) IRTemp b_perm = newTemp(Ity_V128); IRTemp mask = newTemp(Ity_V128); IRTemp perm_val = newTemp(Ity_V128); + IRTemp vB_adj = newTemp( Ity_V128 ); if ( opc2 == 0x68 ) { DIP("xxperm v%d,v%d,v%d\n", (UInt)XT, (UInt)XA, (UInt)XB); @@ -22591,29 +22592,27 @@ dis_vx_permute_misc( UInt theInstr, UInt opc2 ) assign( vT, getVSReg( XT ) ); if ( opc2 == 0x68 ) // xxperm - assign( perm_val, - binop( Iop_AndV128, mkexpr( vB ), - unop( Iop_Dup8x16, mkU8( 0x1F ) ) ) ); + assign( vB_adj, mkexpr( vB ) ); else // xxpermr - assign( perm_val, + assign( vB_adj, binop( Iop_Sub16x8, - binop( Iop_64HLtoV128, - mkU64( 0x1F1F1F1F1F1F1F1F ), - mkU64( 0x1F1F1F1F1F1F1F1F ) ), - binop( Iop_AndV128, mkexpr( vB ), - unop( Iop_Dup8x16, mkU8( 0x1F ) ) ) ) ); + unop( Iop_Dup8x16, mkU8( 0x1F ) ), + mkexpr( vB ) ) ); - /* Limit the Perm8x16 steering values to 0 .. 31 as that is what + /* Limit the Perm8x16 steering values to 0 .. 15 as that is what IR specifies, and also to hide irrelevant bits from memcheck. */ + assign( perm_val, + binop( Iop_AndV128, mkexpr( vB_adj ), + unop( Iop_Dup8x16, mkU8( 0xF ) ) ) ); assign( a_perm, binop( Iop_Perm8x16, mkexpr( vA ), mkexpr( perm_val ) ) ); assign( b_perm, binop( Iop_Perm8x16, mkexpr( vT ), mkexpr( perm_val ) ) ); assign( mask, binop( Iop_SarN8x16, - binop( Iop_ShlN8x16, mkexpr( perm_val ), + binop( Iop_ShlN8x16, mkexpr( vB_adj ), mkU8( 3 ) ), mkU8( 7 ) ) ); // dst = (a & ~mask) | (b & mask) @@ -24361,28 +24360,29 @@ static Bool dis_av_permute ( UInt theInstr ) IRTemp b_perm = newTemp( Ity_V128 ); IRTemp mask = newTemp( Ity_V128 ); IRTemp vC_andF = newTemp( Ity_V128 ); + IRTemp vC_adj = newTemp( Ity_V128 ); DIP( "vpermr v%d,v%d,v%d,v%d\n", vD_addr, vA_addr, vB_addr, vC_addr); - /* Limit the Perm8x16 steering values to 0 .. 31 as that is what + /* Limit the Perm8x16 steering values to 0 .. 15 as that is what IR specifies, and also to hide irrelevant bits from memcheck. */ + assign( vC_adj, + binop( Iop_Sub16x8, + unop( Iop_Dup8x16, mkU8( 0x1F ) ), + mkexpr( vC ) ) ); assign( vC_andF, - binop( Iop_Sub16x8, - binop( Iop_64HLtoV128, - mkU64( 0x1F1F1F1F1F1F1F1F ), - mkU64( 0x1F1F1F1F1F1F1F1F ) ), - binop( Iop_AndV128, mkexpr( vC ), - unop( Iop_Dup8x16, mkU8( 0x1F ) ) ) ) ); + binop( Iop_AndV128, mkexpr( vC_adj), + unop( Iop_Dup8x16, mkU8( 0xF ) ) ) ); assign( a_perm, binop( Iop_Perm8x16, mkexpr( vA ), mkexpr( vC_andF ) ) ); assign( b_perm, binop( Iop_Perm8x16, mkexpr( vB ), mkexpr( vC_andF ) ) ); // mask[i8] = (vC[i8]_4 == 1) ? 0xFF : 0x0 assign( mask, binop(Iop_SarN8x16, - binop( Iop_ShlN8x16, mkexpr( vC_andF ), + binop( Iop_ShlN8x16, mkexpr( vC_adj ), mkU8( 3 ) ), mkU8( 7 ) ) ); // dst = (a & ~mask) | (b & mask) putVReg( vD_addr, binop( Iop_OrV128, |
|
From: Julian S. <js...@ac...> - 2017-10-03 10:57:46
|
On 03/10/17 09:29, Ivo Raisr wrote: > Please could you review and eventually test two patches attached to this bug: > 384987 - VEX register allocator: allocate caller-save registers for > short lived vregs > https://bugs.kde.org/show_bug.cgi?id=384987 Nice work. I commented a little in the bug. |
|
From: Julian S. <se...@so...> - 2017-10-03 09:32:23
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=ed5ae781a14701986e7facc9e47882895fcd911a commit ed5ae781a14701986e7facc9e47882895fcd911a Author: Julian Seward <js...@ac...> Date: Tue Oct 3 11:28:26 2017 +0200 Virtual-methodize the assembler in the usual way. So that it can be made to work for architectures other than X86. Diff: --- VEX/priv/host_x86_defs.c | 6 +-- VEX/priv/host_x86_defs.h | 4 +- VEX/priv/main_main.c | 107 +++++++++++++++++++++++++++++------------------ 3 files changed, 71 insertions(+), 46 deletions(-) diff --git a/VEX/priv/host_x86_defs.c b/VEX/priv/host_x86_defs.c index c2cade3..c55059e 100644 --- a/VEX/priv/host_x86_defs.c +++ b/VEX/priv/host_x86_defs.c @@ -3581,8 +3581,8 @@ VexInvalRange patchProfInc_X86 ( VexEndness endness_host, /* Create relocation info needed to patch a branch offset for instruction I whose first instruction is at WHERE in the assembly buffer. */ -Relocation collectRelocInfo_X86 ( AssemblyBufferOffset where, - X86Instr* i ) +Relocation createRelocInfo_X86 ( AssemblyBufferOffset where, + const X86Instr* i ) { /* Xin_JmpCond produces a conditional branch, of the form 0F 8x <32-bit-offset> @@ -3611,7 +3611,7 @@ Relocation collectRelocInfo_X86 ( AssemblyBufferOffset where, return rel; } default: - // We don't expect to be asked to compute relocation information + // We don't expect to be asked to create relocation information // for any other kind of instruction. vpanic("collectRelocInfo_X86"); } diff --git a/VEX/priv/host_x86_defs.h b/VEX/priv/host_x86_defs.h index ea02c4a..ad5b75d 100644 --- a/VEX/priv/host_x86_defs.h +++ b/VEX/priv/host_x86_defs.h @@ -793,8 +793,8 @@ extern VexInvalRange patchProfInc_X86 ( VexEndness endness_host, /* Create relocation info needed to patch a branch offset for instruction I whose first instruction is at WHERE in the assembly buffer. */ -extern Relocation collectRelocInfo_X86 ( AssemblyBufferOffset where, - X86Instr* i ); +extern Relocation createRelocInfo_X86 ( AssemblyBufferOffset where, + const X86Instr* i ); #endif /* ndef __VEX_HOST_X86_DEFS_H */ diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c index 0629c15..df56850 100644 --- a/VEX/priv/main_main.c +++ b/VEX/priv/main_main.c @@ -838,20 +838,23 @@ void applyRelocation ( Relocation rel, profiler-inc, *OFFS_PROFINC will be set to show its offset in the output buffer, else *OFFS_PROFINC will be unchanged. */ -static -AssemblyBufferOffset emitSimpleInsn ( /*MB_MOD*/Int* offs_profInc, - UChar* buf, - AssemblyBufferOffset buf_used, - AssemblyBufferOffset buf_limit, - const HInstr* insn, - const EmitConstants* emitConsts, - const VexTranslateArgs* vta ) +static inline +AssemblyBufferOffset emitSimpleInsn ( + /*MB_MOD*/Int* offs_profInc, + UChar* buf, + AssemblyBufferOffset buf_used, + AssemblyBufferOffset buf_limit, + const HInstr* insn, + const EmitConstants* emitConsts, + const VexTranslateArgs* vta, + UInt (*emitHInstr)(Bool*, UChar*, UInt, const HInstr*, const EmitConstants*) +) { /* Emit into a 128 byte temp buffer */ UChar insn_bytes[128]; Bool isProfInc = False; - UInt j = emit_X86Instr(&isProfInc, insn_bytes, sizeof(insn_bytes), - insn, emitConsts); + UInt j = emitHInstr(&isProfInc, insn_bytes, sizeof(insn_bytes), + insn, emitConsts); /* Debug printing? */ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) { const Int min_spacing = 9; @@ -902,11 +905,18 @@ AssemblyBufferOffset emitSimpleInsn ( /*MB_MOD*/Int* offs_profInc, Returns True for OK, False for 'ran out of buffer space'. */ static -Bool theAssembler ( /*MOD*/VexTranslateResult* res, - const VexTranslateArgs* vta, - HInstrIfThenElse* (*isIfThenElse)( const HInstr* ), - const Bool mode64, - const HInstrSB* rcode ) +Bool theAssembler ( + /*MOD*/VexTranslateResult* res, + const VexTranslateArgs* vta, + const Bool mode64, + const HInstrSB* rcode, + HInstrIfThenElse* (*isIfThenElse)(const HInstr*), + UInt (*emitHInstr)(Bool*, UChar*, UInt, + const HInstr*, const EmitConstants*), + HInstr* (*HInstr_Jmp)(UInt hereOffs, UInt dstOffs), + HInstr* (*HInstr_JmpCond)(HCondCode cc, UInt qDstEntryNo), + Relocation (*createRelocInfo)(AssemblyBufferOffset, const HInstr*) +) { // QElem are work Queue elements. The work Queue is the top level data // structure for the emitter. It is initialised with the HInstrVec* of @@ -1139,7 +1149,7 @@ Bool theAssembler ( /*MOD*/VexTranslateResult* res, AssemblyBufferOffset cursor_next = emitSimpleInsn( &(res->offs_profInc), &vta->host_bytes[0], cursor, cursor_limit, vec->insns[vec_next], - &emitConsts, vta ); + &emitConsts, vta, emitHInstr ); if (UNLIKELY(cursor_next == cursor)) { // We ran out of output space. Give up. return False; @@ -1169,19 +1179,19 @@ Bool theAssembler ( /*MOD*/VexTranslateResult* res, queue[queueNewest].resumePoint = -1; // invalid HInstr* cond_branch - = X86Instr_JmpCond(hite->ccOOL, - queueNewest/*FOR DEBUG PRINTING ONLY*/); + = HInstr_JmpCond(hite->ccOOL, + queueNewest/*FOR DEBUG PRINTING ONLY*/); AssemblyBufferOffset cursor_next = emitSimpleInsn( &(res->offs_profInc), &vta->host_bytes[0], cursor, cursor_limit, cond_branch, - &emitConsts, vta ); + &emitConsts, vta, emitHInstr ); if (UNLIKELY(cursor_next == cursor)) { // We ran out of output space. Give up. return False; } queue[queueNewest].jumpToOOLpoint_valid = True; queue[queueNewest].jumpToOOLpoint - = collectRelocInfo_X86(cursor, cond_branch); + = createRelocInfo(cursor, cond_branch); cursor = cursor_next; @@ -1218,11 +1228,11 @@ Bool theAssembler ( /*MOD*/VexTranslateResult* res, if (0) vex_printf(" // Generate jump to resume point [%03u]\n", qe->resumePoint); - HInstr* jmp = X86Instr_Jmp(cursor, qe->resumePoint); + HInstr* jmp = HInstr_Jmp(cursor, qe->resumePoint); AssemblyBufferOffset cursor_next = emitSimpleInsn( &(res->offs_profInc), &vta->host_bytes[0], cursor, cursor_limit, jmp, - &emitConsts, vta ); + &emitConsts, vta, emitHInstr ); if (UNLIKELY(cursor_next == cursor)) { // We ran out of output space. Give up. return False; @@ -1269,11 +1279,14 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, HInstrSB* (*iselSB) ( const IRSB*, VexArch, const VexArchInfo*, const VexAbiInfo*, Int, Int, Bool, Bool, Addr ); - Int (*emit) ( /*MB_MOD*/Bool*, - UChar*, Int, const HInstr*, Bool, VexEndness, - const void*, const void*, const void*, - const void* ); - Bool (*preciseMemExnsFn) ( Int, Int, VexRegisterUpdates ); + UInt (*emitInstr) ( /*MB_MOD*/Bool*, + UChar*, UInt, const HInstr*, + const EmitConstants* ); + Bool (*preciseMemExnsFn) + ( Int, Int, VexRegisterUpdates ); + HInstr* (*HInstr_Jmp)(UInt hereOffs, UInt dstOffs); + HInstr* (*HInstr_JmpCond)(HCondCode cc, UInt qDstEntryNo); + Relocation (*createRelocInfo)(AssemblyBufferOffset, const HInstr*); const RRegUniverse* rRegUniv = NULL; @@ -1298,14 +1311,17 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, ppCondCode = NULL; ppReg = NULL; iselSB = NULL; - emit = NULL; + emitInstr = NULL; + preciseMemExnsFn = NULL; + HInstr_Jmp = NULL; + HInstr_JmpCond = NULL; + createRelocInfo = NULL; mode64 = False; chainingAllowed = False; guest_sizeB = 0; offB_HOST_EvC_COUNTER = 0; offB_HOST_EvC_FAILADDR = 0; - preciseMemExnsFn = NULL; vassert(vex_initdone); vassert(vta->disp_cp_xassisted != NULL); @@ -1420,10 +1436,16 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, ppCondCode = CAST_TO_TYPEOF(ppCondCode) X86FN(ppX86CondCode); ppReg = CAST_TO_TYPEOF(ppReg) X86FN(ppHRegX86); iselSB = X86FN(iselSB_X86); - emit = CAST_TO_TYPEOF(emit) X86FN(emit_X86Instr); + emitInstr = CAST_TO_TYPEOF(emitInstr) X86FN(emit_X86Instr); + HInstr_Jmp = CAST_TO_TYPEOF(HInstr_Jmp) + X86FN(X86Instr_Jmp); + HInstr_JmpCond = CAST_TO_TYPEOF(HInstr_JmpCond) + X86FN(X86Instr_JmpCond); + createRelocInfo = CAST_TO_TYPEOF(createRelocInfo) + X86FN(createRelocInfo_X86); vassert(vta->archinfo_host.endness == VexEndnessLE); break; - +#if 0 case VexArchAMD64: mode64 = True; rRegUniv = AMD64FN(getRRegUniverse_AMD64()); @@ -1438,7 +1460,7 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, ppInstr = CAST_TO_TYPEOF(ppInstr) AMD64FN(ppAMD64Instr); ppReg = CAST_TO_TYPEOF(ppReg) AMD64FN(ppHRegAMD64); iselSB = AMD64FN(iselSB_AMD64); - emit = CAST_TO_TYPEOF(emit) AMD64FN(emit_AMD64Instr); + emitInstr = CAST_TO_TYPEOF(emitInstr) AMD64FN(emit_AMD64Instr); vassert(vta->archinfo_host.endness == VexEndnessLE); break; @@ -1455,7 +1477,7 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, ppInstr = CAST_TO_TYPEOF(ppInstr) PPC32FN(ppPPCInstr); ppReg = CAST_TO_TYPEOF(ppReg) PPC32FN(ppHRegPPC); iselSB = PPC32FN(iselSB_PPC); - emit = CAST_TO_TYPEOF(emit) PPC32FN(emit_PPCInstr); + emitInstr = CAST_TO_TYPEOF(emitInstr) PPC32FN(emit_PPCInstr); vassert(vta->archinfo_host.endness == VexEndnessBE); break; @@ -1472,7 +1494,7 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, ppInstr = CAST_TO_TYPEOF(ppInstr) PPC64FN(ppPPCInstr); ppReg = CAST_TO_TYPEOF(ppReg) PPC64FN(ppHRegPPC); iselSB = PPC64FN(iselSB_PPC); - emit = CAST_TO_TYPEOF(emit) PPC64FN(emit_PPCInstr); + emitInstr = CAST_TO_TYPEOF(emitInstr) PPC64FN(emit_PPCInstr); vassert(vta->archinfo_host.endness == VexEndnessBE || vta->archinfo_host.endness == VexEndnessLE ); break; @@ -1491,7 +1513,7 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, ppInstr = CAST_TO_TYPEOF(ppInstr) S390FN(ppS390Instr); ppReg = CAST_TO_TYPEOF(ppReg) S390FN(ppHRegS390); iselSB = S390FN(iselSB_S390); - emit = CAST_TO_TYPEOF(emit) S390FN(emit_S390Instr); + emitInstr = CAST_TO_TYPEOF(emitInstr) S390FN(emit_S390Instr); vassert(vta->archinfo_host.endness == VexEndnessBE); break; @@ -1508,7 +1530,7 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, ppInstr = CAST_TO_TYPEOF(ppInstr) ARMFN(ppARMInstr); ppReg = CAST_TO_TYPEOF(ppReg) ARMFN(ppHRegARM); iselSB = ARMFN(iselSB_ARM); - emit = CAST_TO_TYPEOF(emit) ARMFN(emit_ARMInstr); + emitInstr = CAST_TO_TYPEOF(emitInstr) ARMFN(emit_ARMInstr); vassert(vta->archinfo_host.endness == VexEndnessLE); break; @@ -1525,7 +1547,7 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, ppInstr = CAST_TO_TYPEOF(ppInstr) ARM64FN(ppARM64Instr); ppReg = CAST_TO_TYPEOF(ppReg) ARM64FN(ppHRegARM64); iselSB = ARM64FN(iselSB_ARM64); - emit = CAST_TO_TYPEOF(emit) ARM64FN(emit_ARM64Instr); + emitInstr = CAST_TO_TYPEOF(emitInstr) ARM64FN(emit_ARM64Instr); vassert(vta->archinfo_host.endness == VexEndnessLE); break; @@ -1542,7 +1564,7 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS32FN(ppMIPSInstr); ppReg = CAST_TO_TYPEOF(ppReg) MIPS32FN(ppHRegMIPS); iselSB = MIPS32FN(iselSB_MIPS); - emit = CAST_TO_TYPEOF(emit) MIPS32FN(emit_MIPSInstr); + emitInstr = CAST_TO_TYPEOF(emitInstr) MIPS32FN(emit_MIPSInstr); vassert(vta->archinfo_host.endness == VexEndnessLE || vta->archinfo_host.endness == VexEndnessBE); break; @@ -1560,11 +1582,11 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS64FN(ppMIPSInstr); ppReg = CAST_TO_TYPEOF(ppReg) MIPS64FN(ppHRegMIPS); iselSB = MIPS64FN(iselSB_MIPS); - emit = CAST_TO_TYPEOF(emit) MIPS64FN(emit_MIPSInstr); + emitInstr = CAST_TO_TYPEOF(emitInstr) MIPS64FN(emit_MIPSInstr); vassert(vta->archinfo_host.endness == VexEndnessLE || vta->archinfo_host.endness == VexEndnessBE); break; - +#endif default: vpanic("LibVEX_Translate: unsupported host insn set"); } @@ -1661,7 +1683,10 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, "------------------------\n\n"); } - Bool assembly_ok = theAssembler( res, vta, isIfThenElse, mode64, rcode ); + Bool assembly_ok + = theAssembler( res, vta, mode64, rcode, isIfThenElse, + emitInstr, HInstr_Jmp, HInstr_JmpCond, + createRelocInfo ); if (!assembly_ok) goto out_of_buffer_space; |
|
From: Ivo R. <iv...@iv...> - 2017-10-03 07:29:17
|
Dear arch maintainers,
Please could you review and eventually test two patches attached to this bug:
384987 - VEX register allocator: allocate caller-save registers for
short lived vregs
https://bugs.kde.org/show_bug.cgi?id=384987
I've tested on amd64, ppc8le and arm64.
Other architectures need to be given some lovin'.
I expect at least slight improvement there; on amd64 we
actually got quite a good compaction of the produced rcode.
Kind regards,
I.
|
|
From: Julian S. <se...@so...> - 2017-10-02 16:48:53
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=08b043f53a9e3f37b10b5af6abc2e218678b9e2f commit 08b043f53a9e3f37b10b5af6abc2e218678b9e2f Author: Julian Seward <js...@ac...> Date: Mon Oct 2 18:43:22 2017 +0200 libvex_BackEnd: lift the assembler out into its own function, for tidyness. No functional change. Diff: --- VEX/priv/main_main.c | 1342 +++++++++++++++++++++++++------------------------- 1 file changed, 681 insertions(+), 661 deletions(-) diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c index 8caaca2..0629c15 100644 --- a/VEX/priv/main_main.c +++ b/VEX/priv/main_main.c @@ -891,760 +891,779 @@ AssemblyBufferOffset emitSimpleInsn ( /*MB_MOD*/Int* offs_profInc, } -/* ---- The back end proper ---- */ +/* ---- The assembler ---- */ -/* Back end of the compilation pipeline. Is not exported. */ +/* Assemble RCODE, writing the resulting machine code into the buffer + specified by VTA->host_bytes of size VTA->host_bytes_size. When done, + store the number of bytes written at the location specified by + VTA->host_bytes_used. RES->offs_profInc may be modified as a result. No + other fields of RES are changed. -static void libvex_BackEnd ( const VexTranslateArgs *vta, - /*MOD*/ VexTranslateResult* res, - /*MOD*/ IRSB* irsb, - VexRegisterUpdates pxControl ) + Returns True for OK, False for 'ran out of buffer space'. +*/ +static +Bool theAssembler ( /*MOD*/VexTranslateResult* res, + const VexTranslateArgs* vta, + HInstrIfThenElse* (*isIfThenElse)( const HInstr* ), + const Bool mode64, + const HInstrSB* rcode ) { - /* This the bundle of functions we need to do the back-end stuff - (insn selection, reg-alloc, assembly) whilst being insulated - from the target instruction set. */ - Bool (*isMove) ( const HInstr*, HReg*, HReg* ); - void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ); - void (*mapRegs) ( HRegRemap*, HInstr*, Bool ); - HInstrIfThenElse* (*isIfThenElse)( const HInstr* ); - void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ); - void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ); - HInstr* (*genMove) ( HReg, HReg, Bool ); - HInstr* (*genHInstrITE) ( HInstrIfThenElse* ); - HInstr* (*directReload) ( HInstr*, HReg, Short ); - void (*ppInstr) ( const HInstr*, Bool ); - void (*ppCondCode) ( HCondCode ); - UInt (*ppReg) ( HReg ); - HInstrSB* (*iselSB) ( const IRSB*, VexArch, const VexArchInfo*, - const VexAbiInfo*, Int, Int, Bool, Bool, - Addr ); - Int (*emit) ( /*MB_MOD*/Bool*, - UChar*, Int, const HInstr*, Bool, VexEndness, - const void*, const void*, const void*, - const void* ); - Bool (*preciseMemExnsFn) ( Int, Int, VexRegisterUpdates ); - - const RRegUniverse* rRegUniv = NULL; + // QElem are work Queue elements. The work Queue is the top level data + // structure for the emitter. It is initialised with the HInstrVec* of + // the overall HInstrSB. Every OOL HInstrVec* in the tree will at some + // point be present in the Queue. IL HInstrVec*s are never present in + // the Queue because the inner emitter loop processes them in-line, using + // a Stack (see below) to keep track of its nesting level. + // + // The Stack (see below) is empty before and after every Queue element is + // processed. In other words, the Stack only holds state needed during + // the processing of a single Queue element. + // + // The ordering of elements in the Queue is irrelevant -- correct code + // will be emitted even with set semantics (arbitrary order). However, + // the FIFOness of the queue is believed to generate code in which + // colder and colder code (more deeply nested OOLs) is placed further + // and further from the start of the emitted machine code, which sounds + // like a layout which should minimise icache misses. + // + // QElems also contain two pieces of jump-fixup information. When we + // finally come to process a QElem, we need to know: + // + // * |jumpToOOLpoint|: the place which wants to jump to the start of the + // emitted insns for this QElem. We must have already emitted that, + // since it will be the conditional jump that leads to this QElem (OOL + // block). + // + // * |resumePoint|: the place we should jump back to after the QElem is + // finished (the "resume point"), which is the emitted code of the + // HInstr immediately following the HInstrIfThenElse that has this + // QElem as its OOL block. + // + // When the QElem is processed, we know both the |jumpToOOLpoint| and + // the |resumePoint|, and so the first can be patched, and the second + // we generate an instruction to jump to. + // + // There are three complications with patching: + // + // (1) per comments on Stack elems, we do not know the |resumePoint| when + // creating a QElem. That will only be known when processing of the + // corresponding IL block is completed. + // + // (2) The top level HInstrVec* has neither a |jumpToOOLpoint| nor a + // |resumePoint|. + // + // (3) Non-top-level OOLs may not have a valid |resumePoint| if they do + // an unconditional IR-level Exit. We can generate the resume point + // branch, but it will be never be used. + typedef + struct { + // The HInstrs for this OOL. + HInstrVec* oolVec; + // Where we should patch to jump to the OOL ("how do we get here?") + Bool jumpToOOLpoint_valid; + Relocation jumpToOOLpoint; + // Resume point offset, in bytes from start of output buffer + // ("where do we go after this block is completed?") + Bool resumePoint_valid; + AssemblyBufferOffset resumePoint; + } + QElem; - Bool mode64, chainingAllowed; - Int out_used; - Int guest_sizeB; - Int offB_HOST_EvC_COUNTER; - Int offB_HOST_EvC_FAILADDR; - Addr max_ga; - HInstrSB* vcode; - HInstrSB* rcode; - isMove = NULL; - getRegUsage = NULL; - mapRegs = NULL; - isIfThenElse = NULL; - genSpill = NULL; - genReload = NULL; - genMove = NULL; - genHInstrITE = NULL; - directReload = NULL; - ppInstr = NULL; - ppCondCode = NULL; - ppReg = NULL; - iselSB = NULL; - emit = NULL; + // SElem are stack elements. When we suspend processing a HInstrVec* in + // order to process an IL path in an IfThenElse, we push the HInstrVec* + // and the next index to process on the stack, so that we know where to + // resume when the nested IL sequence is completed. |vec| and |vec_next| + // record the resume HInstr. + // + // A second effect of processing a nested IL sequence is that we will + // have to (later) process the corresponding OOL sequence. And that OOL + // sequence will have to finish with a jump back to the "resume point" + // (the emitted instruction immediately following the IfThenElse). We + // only know the offset of the resume point instruction in the output + // buffer when we actually resume emitted from there -- that is, when the + // entry we pushed, is popped. So, when we pop, we must mark the + // corresponding OOL entry in the Queue to record there the resume point + // offset. For this reason we also carry |ool_qindex|, which is the + // index of the corresponding OOL entry in the Queue. + typedef + struct { + HInstrVec* vec; // resume point HInstr vector + UInt vec_next; // resume point HInstr vector index + Int ool_qindex; // index in Queue of OOL to mark when we resume + } + SElem; - mode64 = False; - chainingAllowed = False; - guest_sizeB = 0; - offB_HOST_EvC_COUNTER = 0; - offB_HOST_EvC_FAILADDR = 0; - preciseMemExnsFn = NULL; + // The Stack. The stack depth is bounded by maximum number of nested + // hot (IL) sections, so in practice it is going to be very small. + const Int nSTACK = 4; - vassert(vex_initdone); - vassert(vta->disp_cp_xassisted != NULL); + SElem stack[nSTACK]; + Int stackPtr; // points to most recently pushed entry <=> "-1 means empty" - vex_traceflags = vta->traceflags; + // The Queue. The queue size is bounded by the number of cold (OOL) + // sections in the entire HInstrSB, so it's also going to be pretty + // small. + const Int nQUEUE = 8; - /* Both the chainers and the indir are either NULL or non-NULL. */ - if (vta->disp_cp_chain_me_to_slowEP != NULL) { - vassert(vta->disp_cp_chain_me_to_fastEP != NULL); - vassert(vta->disp_cp_xindir != NULL); - chainingAllowed = True; - } else { - vassert(vta->disp_cp_chain_me_to_fastEP == NULL); - vassert(vta->disp_cp_xindir == NULL); - } + QElem queue[nQUEUE]; + Int queueOldest; // index of oldest entry, initially 0 + Int queueNewest; // index of newest entry, + // initially -1, otherwise must be >= queueOldest - switch (vta->arch_guest) { + /////////////////////////////////////////////////////// - case VexArchX86: - preciseMemExnsFn - = X86FN(guest_x86_state_requires_precise_mem_exns); - guest_sizeB = sizeof(VexGuestX86State); - offB_HOST_EvC_COUNTER = offsetof(VexGuestX86State,host_EvC_COUNTER); - offB_HOST_EvC_FAILADDR = offsetof(VexGuestX86State,host_EvC_FAILADDR); - break; + const Bool verbose_asm = (vex_traceflags & VEX_TRACE_ASM) != 0; - case VexArchAMD64: - preciseMemExnsFn - = AMD64FN(guest_amd64_state_requires_precise_mem_exns); - guest_sizeB = sizeof(VexGuestAMD64State); - offB_HOST_EvC_COUNTER = offsetof(VexGuestAMD64State,host_EvC_COUNTER); - offB_HOST_EvC_FAILADDR = offsetof(VexGuestAMD64State,host_EvC_FAILADDR); - break; + const EmitConstants emitConsts + = { .mode64 = mode64, + .endness_host = vta->archinfo_host.endness, + .disp_cp_chain_me_to_slowEP = vta->disp_cp_chain_me_to_slowEP, + .disp_cp_chain_me_to_fastEP = vta->disp_cp_chain_me_to_fastEP, + .disp_cp_xindir = vta->disp_cp_xindir, + .disp_cp_xassisted = vta->disp_cp_xassisted }; - case VexArchPPC32: - preciseMemExnsFn - = PPC32FN(guest_ppc32_state_requires_precise_mem_exns); - guest_sizeB = sizeof(VexGuestPPC32State); - offB_HOST_EvC_COUNTER = offsetof(VexGuestPPC32State,host_EvC_COUNTER); - offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC32State,host_EvC_FAILADDR); - break; + AssemblyBufferOffset cursor = 0; + AssemblyBufferOffset cursor_limit = vta->host_bytes_size; - case VexArchPPC64: - preciseMemExnsFn - = PPC64FN(guest_ppc64_state_requires_precise_mem_exns); - guest_sizeB = sizeof(VexGuestPPC64State); - offB_HOST_EvC_COUNTER = offsetof(VexGuestPPC64State,host_EvC_COUNTER); - offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC64State,host_EvC_FAILADDR); - break; + *(vta->host_bytes_used) = 0; - case VexArchS390X: - preciseMemExnsFn - = S390FN(guest_s390x_state_requires_precise_mem_exns); - guest_sizeB = sizeof(VexGuestS390XState); - offB_HOST_EvC_COUNTER = offsetof(VexGuestS390XState,host_EvC_COUNTER); - offB_HOST_EvC_FAILADDR = offsetof(VexGuestS390XState,host_EvC_FAILADDR); - break; + queueOldest = 0; + queueNewest = -1; - case VexArchARM: - preciseMemExnsFn - = ARMFN(guest_arm_state_requires_precise_mem_exns); - guest_sizeB = sizeof(VexGuestARMState); - offB_HOST_EvC_COUNTER = offsetof(VexGuestARMState,host_EvC_COUNTER); - offB_HOST_EvC_FAILADDR = offsetof(VexGuestARMState,host_EvC_FAILADDR); - break; + vassert(queueNewest < nQUEUE); + queueNewest++; + { + QElem* qe = &queue[queueNewest]; + vex_bzero(qe, sizeof(*qe)); + qe->oolVec = rcode->insns; + qe->jumpToOOLpoint_valid = False; + qe->resumePoint_valid = False; + } + vassert(queueNewest == 0); - case VexArchARM64: - preciseMemExnsFn - = ARM64FN(guest_arm64_state_requires_precise_mem_exns); - guest_sizeB = sizeof(VexGuestARM64State); - offB_HOST_EvC_COUNTER = offsetof(VexGuestARM64State,host_EvC_COUNTER); - offB_HOST_EvC_FAILADDR = offsetof(VexGuestARM64State,host_EvC_FAILADDR); - break; + /* Main loop, processing Queue entries, until there are no more. */ + while (queueOldest <= queueNewest) { - case VexArchMIPS32: - preciseMemExnsFn - = MIPS32FN(guest_mips32_state_requires_precise_mem_exns); - guest_sizeB = sizeof(VexGuestMIPS32State); - offB_HOST_EvC_COUNTER = offsetof(VexGuestMIPS32State,host_EvC_COUNTER); - offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS32State,host_EvC_FAILADDR); - break; + Int qCur = queueOldest; + if (UNLIKELY(verbose_asm)) + vex_printf("BEGIN queue[%d]\n", qCur); - case VexArchMIPS64: - preciseMemExnsFn - = MIPS64FN(guest_mips64_state_requires_precise_mem_exns); - guest_sizeB = sizeof(VexGuestMIPS64State); - offB_HOST_EvC_COUNTER = offsetof(VexGuestMIPS64State,host_EvC_COUNTER); - offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS64State,host_EvC_FAILADDR); - break; + // Take the oldest entry in the queue + QElem* qe = &queue[queueOldest]; + queueOldest++; - default: - vpanic("LibVEX_Codegen: unsupported guest insn set"); - } + // Stay sane. Only the top level block has no branch to it and no + // resume point. + if (qe->oolVec == rcode->insns) { + // This is the top level block + vassert(!qe->jumpToOOLpoint_valid); + vassert(!qe->resumePoint_valid); + } else { + vassert(qe->jumpToOOLpoint_valid); + vassert(qe->resumePoint_valid); + // In the future, we might be able to allow the resume point to be + // invalid for non-top-level blocks, if the block contains an + // unconditional exit. Currently the IR can't represent that, so + // the assertion is valid. + } + // Processing |qe| + if (qe->jumpToOOLpoint_valid) { + // patch qe->jmpToOOLpoint to jump to |here| + if (UNLIKELY(verbose_asm)) { + vex_printf(" -- APPLY "); + ppRelocation(qe->jumpToOOLpoint); + vex_printf("\n"); + } + applyRelocation(qe->jumpToOOLpoint, &vta->host_bytes[0], + cursor, cursor, vta->archinfo_host.endness, + verbose_asm); + } - switch (vta->arch_host) { + // Initialise the stack, for processing of |qe|. + stackPtr = 0; // "contains one element" - case VexArchX86: - mode64 = False; - rRegUniv = X86FN(getRRegUniverse_X86()); - isMove = CAST_TO_TYPEOF(isMove) X86FN(isMove_X86Instr); - getRegUsage - = CAST_TO_TYPEOF(getRegUsage) X86FN(getRegUsage_X86Instr); - mapRegs = CAST_TO_TYPEOF(mapRegs) X86FN(mapRegs_X86Instr); - isIfThenElse = CAST_TO_TYPEOF(isIfThenElse) X86FN(isIfThenElse_X86Instr); - genSpill = CAST_TO_TYPEOF(genSpill) X86FN(genSpill_X86); - genReload = CAST_TO_TYPEOF(genReload) X86FN(genReload_X86); - genMove = CAST_TO_TYPEOF(genMove) X86FN(genMove_X86); - genHInstrITE = CAST_TO_TYPEOF(genHInstrITE) X86FN(X86Instr_IfThenElse); - directReload = CAST_TO_TYPEOF(directReload) X86FN(directReload_X86); - ppInstr = CAST_TO_TYPEOF(ppInstr) X86FN(ppX86Instr); - ppCondCode = CAST_TO_TYPEOF(ppCondCode) X86FN(ppX86CondCode); - ppReg = CAST_TO_TYPEOF(ppReg) X86FN(ppHRegX86); - iselSB = X86FN(iselSB_X86); - emit = CAST_TO_TYPEOF(emit) X86FN(emit_X86Instr); - vassert(vta->archinfo_host.endness == VexEndnessLE); - break; + stack[stackPtr].vec = qe->oolVec; + stack[stackPtr].vec_next = 0; + stack[stackPtr].ool_qindex = -1; // INVALID - case VexArchAMD64: - mode64 = True; - rRegUniv = AMD64FN(getRRegUniverse_AMD64()); - isMove = CAST_TO_TYPEOF(isMove) AMD64FN(isMove_AMD64Instr); - getRegUsage - = CAST_TO_TYPEOF(getRegUsage) AMD64FN(getRegUsage_AMD64Instr); - mapRegs = CAST_TO_TYPEOF(mapRegs) AMD64FN(mapRegs_AMD64Instr); - genSpill = CAST_TO_TYPEOF(genSpill) AMD64FN(genSpill_AMD64); - genReload = CAST_TO_TYPEOF(genReload) AMD64FN(genReload_AMD64); - genMove = CAST_TO_TYPEOF(genMove) AMD64FN(genMove_AMD64); - directReload = CAST_TO_TYPEOF(directReload) AMD64FN(directReload_AMD64); - ppInstr = CAST_TO_TYPEOF(ppInstr) AMD64FN(ppAMD64Instr); - ppReg = CAST_TO_TYPEOF(ppReg) AMD64FN(ppHRegAMD64); - iselSB = AMD64FN(iselSB_AMD64); - emit = CAST_TO_TYPEOF(emit) AMD64FN(emit_AMD64Instr); - vassert(vta->archinfo_host.endness == VexEndnessLE); - break; + // Iterate till the stack is empty. This effectively does a + // depth-first traversal of the hot-path (IL) tree reachable from + // here, and at the same time adds any encountered cold-path (OOL) + // blocks to the Queue for later processing. This is the heart of the + // flattening algorithm. + while (stackPtr >= 0) { - case VexArchPPC32: - mode64 = False; - rRegUniv = PPC32FN(getRRegUniverse_PPC(mode64)); - isMove = CAST_TO_TYPEOF(isMove) PPC32FN(isMove_PPCInstr); - getRegUsage - = CAST_TO_TYPEOF(getRegUsage) PPC32FN(getRegUsage_PPCInstr); - mapRegs = CAST_TO_TYPEOF(mapRegs) PPC32FN(mapRegs_PPCInstr); - genSpill = CAST_TO_TYPEOF(genSpill) PPC32FN(genSpill_PPC); - genReload = CAST_TO_TYPEOF(genReload) PPC32FN(genReload_PPC); - genMove = CAST_TO_TYPEOF(genMove) PPC32FN(genMove_PPC); - ppInstr = CAST_TO_TYPEOF(ppInstr) PPC32FN(ppPPCInstr); - ppReg = CAST_TO_TYPEOF(ppReg) PPC32FN(ppHRegPPC); - iselSB = PPC32FN(iselSB_PPC); - emit = CAST_TO_TYPEOF(emit) PPC32FN(emit_PPCInstr); - vassert(vta->archinfo_host.endness == VexEndnessBE); - break; + if (UNLIKELY(verbose_asm)) + vex_printf(" -- CONSIDER stack[%d]\n", stackPtr); - case VexArchPPC64: - mode64 = True; - rRegUniv = PPC64FN(getRRegUniverse_PPC(mode64)); - isMove = CAST_TO_TYPEOF(isMove) PPC64FN(isMove_PPCInstr); - getRegUsage - = CAST_TO_TYPEOF(getRegUsage) PPC64FN(getRegUsage_PPCInstr); - mapRegs = CAST_TO_TYPEOF(mapRegs) PPC64FN(mapRegs_PPCInstr); - genSpill = CAST_TO_TYPEOF(genSpill) PPC64FN(genSpill_PPC); - genReload = CAST_TO_TYPEOF(genReload) PPC64FN(genReload_PPC); - genMove = CAST_TO_TYPEOF(genMove) PPC64FN(genMove_PPC); - ppInstr = CAST_TO_TYPEOF(ppInstr) PPC64FN(ppPPCInstr); - ppReg = CAST_TO_TYPEOF(ppReg) PPC64FN(ppHRegPPC); - iselSB = PPC64FN(iselSB_PPC); - emit = CAST_TO_TYPEOF(emit) PPC64FN(emit_PPCInstr); - vassert(vta->archinfo_host.endness == VexEndnessBE || - vta->archinfo_host.endness == VexEndnessLE ); - break; + HInstrVec* vec = stack[stackPtr].vec; + UInt vec_next = stack[stackPtr].vec_next; + Int ool_qindex = stack[stackPtr].ool_qindex; + stackPtr--; - case VexArchS390X: - mode64 = True; - rRegUniv = S390FN(getRRegUniverse_S390()); - isMove = CAST_TO_TYPEOF(isMove) S390FN(isMove_S390Instr); - getRegUsage - = CAST_TO_TYPEOF(getRegUsage) S390FN(getRegUsage_S390Instr); - mapRegs = CAST_TO_TYPEOF(mapRegs) S390FN(mapRegs_S390Instr); - genSpill = CAST_TO_TYPEOF(genSpill) S390FN(genSpill_S390); - genReload = CAST_TO_TYPEOF(genReload) S390FN(genReload_S390); - genMove = CAST_TO_TYPEOF(genMove) S390FN(genMove_S390); - // fixs390: consider implementing directReload_S390 - ppInstr = CAST_TO_TYPEOF(ppInstr) S390FN(ppS390Instr); - ppReg = CAST_TO_TYPEOF(ppReg) S390FN(ppHRegS390); - iselSB = S390FN(iselSB_S390); - emit = CAST_TO_TYPEOF(emit) S390FN(emit_S390Instr); - vassert(vta->archinfo_host.endness == VexEndnessBE); - break; + if (vec_next > 0) { + // We're resuming the current IL block having just finished + // processing a nested IL. The OOL counterpart to the nested IL + // we just finished processing will have to jump back to here. + // So we'll need to mark its Queue entry to record that fact. - case VexArchARM: - mode64 = False; - rRegUniv = ARMFN(getRRegUniverse_ARM()); - isMove = CAST_TO_TYPEOF(isMove) ARMFN(isMove_ARMInstr); - getRegUsage - = CAST_TO_TYPEOF(getRegUsage) ARMFN(getRegUsage_ARMInstr); - mapRegs = CAST_TO_TYPEOF(mapRegs) ARMFN(mapRegs_ARMInstr); - genSpill = CAST_TO_TYPEOF(genSpill) ARMFN(genSpill_ARM); - genReload = CAST_TO_TYPEOF(genReload) ARMFN(genReload_ARM); - genMove = CAST_TO_TYPEOF(genMove) ARMFN(genMove_ARM); - ppInstr = CAST_TO_TYPEOF(ppInstr) ARMFN(ppARMInstr); - ppReg = CAST_TO_TYPEOF(ppReg) ARMFN(ppHRegARM); - iselSB = ARMFN(iselSB_ARM); - emit = CAST_TO_TYPEOF(emit) ARMFN(emit_ARMInstr); - vassert(vta->archinfo_host.endness == VexEndnessLE); - break; + // First assert that the OOL actually *is* in the Queue (it + // must be, since we can't have processed it yet). + vassert(queueOldest <= queueNewest); // "at least 1 entry in Q" + vassert(queueOldest <= ool_qindex && ool_qindex <= queueNewest); - case VexArchARM64: - mode64 = True; - rRegUniv = ARM64FN(getRRegUniverse_ARM64()); - isMove = CAST_TO_TYPEOF(isMove) ARM64FN(isMove_ARM64Instr); - getRegUsage - = CAST_TO_TYPEOF(getRegUsage) ARM64FN(getRegUsage_ARM64Instr); - mapRegs = CAST_TO_TYPEOF(mapRegs) ARM64FN(mapRegs_ARM64Instr); - genSpill = CAST_TO_TYPEOF(genSpill) ARM64FN(genSpill_ARM64); - genReload = CAST_TO_TYPEOF(genReload) ARM64FN(genReload_ARM64); - genMove = CAST_TO_TYPEOF(genMove) ARM64FN(genMove_ARM64); - ppInstr = CAST_TO_TYPEOF(ppInstr) ARM64FN(ppARM64Instr); - ppReg = CAST_TO_TYPEOF(ppReg) ARM64FN(ppHRegARM64); - iselSB = ARM64FN(iselSB_ARM64); - emit = CAST_TO_TYPEOF(emit) ARM64FN(emit_ARM64Instr); - vassert(vta->archinfo_host.endness == VexEndnessLE); - break; + vassert(!queue[ool_qindex].resumePoint_valid); + queue[ool_qindex].resumePoint = cursor; + queue[ool_qindex].resumePoint_valid = True; + if (UNLIKELY(verbose_asm)) + vex_printf(" -- RESUME previous IL\n"); + } else { + // We're starting a new IL. Due to the tail-recursive nature of + // entering ILs, this means we can actually only be starting the + // outermost (top level) block for this particular Queue entry. + vassert(ool_qindex == -1); + vassert(vec == qe->oolVec); + if (UNLIKELY(verbose_asm)) + vex_printf(" -- START new IL\n"); + } - case VexArchMIPS32: - mode64 = False; - rRegUniv = MIPS32FN(getRRegUniverse_MIPS(mode64)); - isMove = CAST_TO_TYPEOF(isMove) MIPS32FN(isMove_MIPSInstr); - getRegUsage - = CAST_TO_TYPEOF(getRegUsage) MIPS32FN(getRegUsage_MIPSInstr); - mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS32FN(mapRegs_MIPSInstr); - genSpill = CAST_TO_TYPEOF(genSpill) MIPS32FN(genSpill_MIPS); - genReload = CAST_TO_TYPEOF(genReload) MIPS32FN(genReload_MIPS); - genMove = CAST_TO_TYPEOF(genMove) MIPS32FN(genMove_MIPS); - ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS32FN(ppMIPSInstr); - ppReg = CAST_TO_TYPEOF(ppReg) MIPS32FN(ppHRegMIPS); - iselSB = MIPS32FN(iselSB_MIPS); - emit = CAST_TO_TYPEOF(emit) MIPS32FN(emit_MIPSInstr); - vassert(vta->archinfo_host.endness == VexEndnessLE - || vta->archinfo_host.endness == VexEndnessBE); - break; + // Repeatedly process "zero or more simple HInstrs followed by (an + // IfThenElse or end-of-block)" + while (True) { - case VexArchMIPS64: - mode64 = True; - rRegUniv = MIPS64FN(getRRegUniverse_MIPS(mode64)); - isMove = CAST_TO_TYPEOF(isMove) MIPS64FN(isMove_MIPSInstr); - getRegUsage - = CAST_TO_TYPEOF(getRegUsage) MIPS64FN(getRegUsage_MIPSInstr); - mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS64FN(mapRegs_MIPSInstr); - genSpill = CAST_TO_TYPEOF(genSpill) MIPS64FN(genSpill_MIPS); - genReload = CAST_TO_TYPEOF(genReload) MIPS64FN(genReload_MIPS); - genMove = CAST_TO_TYPEOF(genMove) MIPS64FN(genMove_MIPS); - ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS64FN(ppMIPSInstr); - ppReg = CAST_TO_TYPEOF(ppReg) MIPS64FN(ppHRegMIPS); - iselSB = MIPS64FN(iselSB_MIPS); - emit = CAST_TO_TYPEOF(emit) MIPS64FN(emit_MIPSInstr); - vassert(vta->archinfo_host.endness == VexEndnessLE - || vta->archinfo_host.endness == VexEndnessBE); - break; + // Process "zero or more simple HInstrs" + while (vec_next < vec->insns_used + && !isIfThenElse(vec->insns[vec_next])) { + AssemblyBufferOffset cursor_next + = emitSimpleInsn( &(res->offs_profInc), &vta->host_bytes[0], + cursor, cursor_limit, vec->insns[vec_next], + &emitConsts, vta ); + if (UNLIKELY(cursor_next == cursor)) { + // We ran out of output space. Give up. + return False; + } + vec_next++; + cursor = cursor_next; + } + + // Now we've either got to the end of the hot path, or we have + // an IfThenElse. + if (vec_next >= vec->insns_used) + break; + + // So we have an IfThenElse. + HInstrIfThenElse* hite = isIfThenElse(vec->insns[vec_next]); + vassert(hite); + vassert(hite->n_phis == 0); // the regalloc will have removed them + + // Put |ite|'s OOL block in the Queue. We'll deal with it + // later. Also, generate the (skeleton) conditional branch to it, + // and collect enough information that we can create patch the + // branch later, once we know where the destination is. + vassert(queueNewest < nQUEUE-1); // else out of Queue space + queueNewest++; + queue[queueNewest].oolVec = hite->outOfLine; + queue[queueNewest].resumePoint_valid = False; // not yet known + queue[queueNewest].resumePoint = -1; // invalid + + HInstr* cond_branch + = X86Instr_JmpCond(hite->ccOOL, + queueNewest/*FOR DEBUG PRINTING ONLY*/); + AssemblyBufferOffset cursor_next + = emitSimpleInsn( &(res->offs_profInc), &vta->host_bytes[0], + cursor, cursor_limit, cond_branch, + &emitConsts, vta ); + if (UNLIKELY(cursor_next == cursor)) { + // We ran out of output space. Give up. + return False; + } + queue[queueNewest].jumpToOOLpoint_valid = True; + queue[queueNewest].jumpToOOLpoint + = collectRelocInfo_X86(cursor, cond_branch); - default: - vpanic("LibVEX_Translate: unsupported host insn set"); - } + cursor = cursor_next; - // Are the host's hardware capabilities feasible. The function will - // not return if hwcaps are infeasible in some sense. - check_hwcaps(vta->arch_host, vta->archinfo_host.hwcaps); + // Now we descend into |ite's| IL block. So we need to save + // where we are in this block, so we can resume when the inner + // one is done. + vassert(stackPtr < nSTACK-1); // else out of Stack space + stackPtr++; + stack[stackPtr].vec = vec; + stack[stackPtr].vec_next = vec_next+1; + stack[stackPtr].ool_qindex = queueNewest; + // And now descend into the inner block. We could have just + // pushed its details on the stack and immediately pop it, but + // it seems simpler to update |vec| and |vec_next| and continue + // directly. + if (UNLIKELY(verbose_asm)) { + vex_printf(" -- START inner IL\n"); + } + vec = hite->fallThrough; + vec_next = 0; - /* Turn it into virtual-registerised code. Build trees -- this - also throws away any dead bindings. */ - max_ga = ado_treebuild_BB( irsb, preciseMemExnsFn, pxControl ); + // And continue with "Repeatedly process ..." + } - if (vta->finaltidy) { - irsb = vta->finaltidy(irsb); - } + // Getting here means we've completed an inner IL and now want to + // resume the parent IL. That is, pop a saved context off the + // stack. + } - vexAllocSanityCheck(); + // Hot path is complete. Now, probably, we have to add a jump + // back to the resume point. + if (qe->resumePoint_valid) { + if (0) + vex_printf(" // Generate jump to resume point [%03u]\n", + qe->resumePoint); + HInstr* jmp = X86Instr_Jmp(cursor, qe->resumePoint); + AssemblyBufferOffset cursor_next + = emitSimpleInsn( &(res->offs_profInc), &vta->host_bytes[0], + cursor, cursor_limit, jmp, + &emitConsts, vta ); + if (UNLIKELY(cursor_next == cursor)) { + // We ran out of output space. Give up. + return False; + } + cursor = cursor_next; + } - if (vex_traceflags & VEX_TRACE_TREES) { - vex_printf("\n------------------------" - " After tree-building " - "------------------------\n\n"); - ppIRSB ( irsb ); - vex_printf("\n"); + if (UNLIKELY(verbose_asm)) + vex_printf("END queue[%d]\n\n", qCur); + // Finished with this Queue entry. } + // Queue empty, all blocks processed - /* HACK */ - if (0) { - *(vta->host_bytes_used) = 0; - res->status = VexTransOK; return; - } - /* end HACK */ + *(vta->host_bytes_used) = cursor; - if (vex_traceflags & VEX_TRACE_VCODE) - vex_printf("\n------------------------" - " Instruction selection " - "------------------------\n"); + return True; // OK +} - /* No guest has its IP field at offset zero. If this fails it - means some transformation pass somewhere failed to update/copy - irsb->offsIP properly. */ - vassert(irsb->offsIP >= 16); - vcode = iselSB ( irsb, vta->arch_host, - &vta->archinfo_host, - &vta->abiinfo_both, - offB_HOST_EvC_COUNTER, - offB_HOST_EvC_FAILADDR, - chainingAllowed, - vta->addProfInc, - max_ga ); +/* ---- The back end proper ---- */ - vexAllocSanityCheck(); +/* Back end of the compilation pipeline. Is not exported. */ - if (vex_traceflags & VEX_TRACE_VCODE) - vex_printf("\n"); +static void libvex_BackEnd ( const VexTranslateArgs* vta, + /*MOD*/ VexTranslateResult* res, + /*MOD*/ IRSB* irsb, + VexRegisterUpdates pxControl ) +{ + /* This the bundle of functions we need to do the back-end stuff + (insn selection, reg-alloc, assembly) whilst being insulated + from the target instruction set. */ + Bool (*isMove) ( const HInstr*, HReg*, HReg* ); + void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ); + void (*mapRegs) ( HRegRemap*, HInstr*, Bool ); + HInstrIfThenElse* (*isIfThenElse)( const HInstr* ); + void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ); + void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ); + HInstr* (*genMove) ( HReg, HReg, Bool ); + HInstr* (*genHInstrITE) ( HInstrIfThenElse* ); + HInstr* (*directReload) ( HInstr*, HReg, Short ); + void (*ppInstr) ( const HInstr*, Bool ); + void (*ppCondCode) ( HCondCode ); + UInt (*ppReg) ( HReg ); + HInstrSB* (*iselSB) ( const IRSB*, VexArch, const VexArchInfo*, + const VexAbiInfo*, Int, Int, Bool, Bool, + Addr ); + Int (*emit) ( /*MB_MOD*/Bool*, + UChar*, Int, const HInstr*, Bool, VexEndness, + const void*, const void*, const void*, + const void* ); + Bool (*preciseMemExnsFn) ( Int, Int, VexRegisterUpdates ); - if (vex_traceflags & VEX_TRACE_VCODE) { - ppHInstrSB(vcode, isIfThenElse, ppInstr, ppCondCode, mode64); - } + const RRegUniverse* rRegUniv = NULL; - /* Register allocate. */ - RegAllocControl con = { - .univ = rRegUniv, .isMove = isMove, .getRegUsage = getRegUsage, - .mapRegs = mapRegs, .isIfThenElse = isIfThenElse, .genSpill = genSpill, - .genReload = genReload, .genMove = genMove, .genHInstrITE = genHInstrITE, - .directReload = directReload, .guest_sizeB = guest_sizeB, - .ppInstr = ppInstr, .ppCondCode = ppCondCode, .ppReg = ppReg, - .mode64 = mode64}; - rcode = doRegisterAllocation(vcode, &con); + Bool mode64, chainingAllowed; + Int guest_sizeB; + Int offB_HOST_EvC_COUNTER; + Int offB_HOST_EvC_FAILADDR; + Addr max_ga; + HInstrSB* vcode; + HInstrSB* rcode; - vexAllocSanityCheck(); + isMove = NULL; + getRegUsage = NULL; + mapRegs = NULL; + isIfThenElse = NULL; + genSpill = NULL; + genReload = NULL; + genMove = NULL; + genHInstrITE = NULL; + directReload = NULL; + ppInstr = NULL; + ppCondCode = NULL; + ppReg = NULL; + iselSB = NULL; + emit = NULL; - if (vex_traceflags & VEX_TRACE_RCODE) { - vex_printf("\n------------------------" - " Register-allocated code " - "------------------------\n\n"); - ppHInstrSB(rcode, isIfThenElse, ppInstr, ppCondCode, mode64); - vex_printf("\n"); - } + mode64 = False; + chainingAllowed = False; + guest_sizeB = 0; + offB_HOST_EvC_COUNTER = 0; + offB_HOST_EvC_FAILADDR = 0; + preciseMemExnsFn = NULL; - /* HACK */ - if (0) { - *(vta->host_bytes_used) = 0; - res->status = VexTransOK; return; - } - /* end HACK */ + vassert(vex_initdone); + vassert(vta->disp_cp_xassisted != NULL); - /* Assemble */ - if (vex_traceflags & VEX_TRACE_ASM) { - vex_printf("\n------------------------" - " Assembly " - "------------------------\n\n"); + vex_traceflags = vta->traceflags; + + /* Both the chainers and the indir are either NULL or non-NULL. */ + if (vta->disp_cp_chain_me_to_slowEP != NULL) { + vassert(vta->disp_cp_chain_me_to_fastEP != NULL); + vassert(vta->disp_cp_xindir != NULL); + chainingAllowed = True; + } else { + vassert(vta->disp_cp_chain_me_to_fastEP == NULL); + vassert(vta->disp_cp_xindir == NULL); } - //////////////////////////////////////////////////////// - //// BEGIN the assembler + switch (vta->arch_guest) { - // QElem are work Queue elements. The work Queue is the top level data - // structure for the emitter. It is initialised with the HInstrVec* of - // the overall HInstrSB. Every OOL HInstrVec* in the tree will at some - // point be present in the Queue. IL HInstrVec*s are never present in - // the Queue because the inner emitter loop processes them in-line, using - // a Stack (see below) to keep track of its nesting level. - // - // The Stack (see below) is empty before and after every Queue element is - // processed. In other words, the Stack only holds state needed during - // the processing of a single Queue element. - // - // The ordering of elements in the Queue is irrelevant -- correct code - // will be emitted even with set semantics (arbitrary order). However, - // the FIFOness of the queue is believed to generate code in which - // colder and colder code (more deeply nested OOLs) is placed further - // and further from the start of the emitted machine code, which sounds - // like a layout which should minimise icache misses. - // - // QElems also contain two pieces of jump-fixup information. When we - // finally come to process a QElem, we need to know: - // - // * |jumpToOOLpoint|: the place which wants to jump to the start of the - // emitted insns for this QElem. We must have already emitted that, - // since it will be the conditional jump that leads to this QElem (OOL - // block). - // - // * |resumePoint|: the place we should jump back to after the QElem is - // finished (the "resume point"), which is the emitted code of the - // HInstr immediately following the HInstrIfThenElse that has this - // QElem as its OOL block. - // - // When the QElem is processed, we know both the |jumpToOOLpoint| and - // the |resumePoint|, and so the first can be patched, and the second - // we generate an instruction to jump to. - // - // There are three complications with patching: - // - // (1) per comments on Stack elems, we do not know the |resumePoint| when - // creating a QElem. That will only be known when processing of the - // corresponding IL block is completed. - // - // (2) The top level HInstrVec* has neither a |jumpToOOLpoint| nor a - // |resumePoint|. - // - // (3) Non-top-level OOLs may not have a valid |resumePoint| if they do - // an unconditional IR-level Exit. We can generate the resume point - // branch, but it will be never be used. - typedef - struct { - // The HInstrs for this OOL. - HInstrVec* oolVec; - // Where we should patch to jump to the OOL ("how do we get here?") - Bool jumpToOOLpoint_valid; - Relocation jumpToOOLpoint; - // Resume point offset, in bytes from start of output buffer - // ("where do we go after this block is completed?") - Bool resumePoint_valid; - AssemblyBufferOffset resumePoint; - } - QElem; + case VexArchX86: + preciseMemExnsFn + = X86FN(guest_x86_state_requires_precise_mem_exns); + guest_sizeB = sizeof(VexGuestX86State); + offB_HOST_EvC_COUNTER = offsetof(VexGuestX86State,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestX86State,host_EvC_FAILADDR); + break; + + case VexArchAMD64: + preciseMemExnsFn + = AMD64FN(guest_amd64_state_requires_precise_mem_exns); + guest_sizeB = sizeof(VexGuestAMD64State); + offB_HOST_EvC_COUNTER = offsetof(VexGuestAMD64State,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestAMD64State,host_EvC_FAILADDR); + break; + case VexArchPPC32: + preciseMemExnsFn + = PPC32FN(guest_ppc32_state_requires_precise_mem_exns); + guest_sizeB = sizeof(VexGuestPPC32State); + offB_HOST_EvC_COUNTER = offsetof(VexGuestPPC32State,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC32State,host_EvC_FAILADDR); + break; - // SElem are stack elements. When we suspend processing a HInstrVec* in - // order to process an IL path in an IfThenElse, we push the HInstrVec* - // and the next index to process on the stack, so that we know where to - // resume when the nested IL sequence is completed. |vec| and |vec_next| - // record the resume HInstr. - // - // A second effect of processing a nested IL sequence is that we will - // have to (later) process the corresponding OOL sequence. And that OOL - // sequence will have to finish with a jump back to the "resume point" - // (the emitted instruction immediately following the IfThenElse). We - // only know the offset of the resume point instruction in the output - // buffer when we actually resume emitted from there -- that is, when the - // entry we pushed, is popped. So, when we pop, we must mark the - // corresponding OOL entry in the Queue to record there the resume point - // offset. For this reason we also carry |ool_qindex|, which is the - // index of the corresponding OOL entry in the Queue. - typedef - struct { - HInstrVec* vec; // resume point HInstr vector - UInt vec_next; // resume point HInstr vector index - Int ool_qindex; // index in Queue of OOL to mark when we resume - } - SElem; + case VexArchPPC64: + preciseMemExnsFn + = PPC64FN(guest_ppc64_state_requires_precise_mem_exns); + guest_sizeB = sizeof(VexGuestPPC64State); + offB_HOST_EvC_COUNTER = offsetof(VexGuestPPC64State,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC64State,host_EvC_FAILADDR); + break; - // The Stack. The stack depth is bounded by maximum number of nested - // hot (IL) sections, so in practice it is going to be very small. - const Int nSTACK = 4; + case VexArchS390X: + preciseMemExnsFn + = S390FN(guest_s390x_state_requires_precise_mem_exns); + guest_sizeB = sizeof(VexGuestS390XState); + offB_HOST_EvC_COUNTER = offsetof(VexGuestS390XState,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestS390XState,host_EvC_FAILADDR); + break; - SElem stack[nSTACK]; - Int stackPtr; // points to most recently pushed entry <=> "-1 means empty" + case VexArchARM: + preciseMemExnsFn + = ARMFN(guest_arm_state_requires_precise_mem_exns); + guest_sizeB = sizeof(VexGuestARMState); + offB_HOST_EvC_COUNTER = offsetof(VexGuestARMState,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestARMState,host_EvC_FAILADDR); + break; - // The Queue. The queue size is bounded by the number of cold (OOL) - // sections in the entire HInstrSB, so it's also going to be pretty - // small. - const Int nQUEUE = 8; + case VexArchARM64: + preciseMemExnsFn + = ARM64FN(guest_arm64_state_requires_precise_mem_exns); + guest_sizeB = sizeof(VexGuestARM64State); + offB_HOST_EvC_COUNTER = offsetof(VexGuestARM64State,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestARM64State,host_EvC_FAILADDR); + break; - QElem queue[nQUEUE]; - Int queueOldest; // index of oldest entry, initially 0 - Int queueNewest; // index of newest entry, - // initially -1, otherwise must be >= queueOldest + case VexArchMIPS32: + preciseMemExnsFn + = MIPS32FN(guest_mips32_state_requires_precise_mem_exns); + guest_sizeB = sizeof(VexGuestMIPS32State); + offB_HOST_EvC_COUNTER = offsetof(VexGuestMIPS32State,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS32State,host_EvC_FAILADDR); + break; - /////////////////////////////////////////////////////// + case VexArchMIPS64: + preciseMemExnsFn + = MIPS64FN(guest_mips64_state_requires_precise_mem_exns); + guest_sizeB = sizeof(VexGuestMIPS64State); + offB_HOST_EvC_COUNTER = offsetof(VexGuestMIPS64State,host_EvC_COUNTER); + offB_HOST_EvC_FAILADDR = offsetof(VexGuestMIPS64State,host_EvC_FAILADDR); + break; - const Bool verbose_asm = (vex_traceflags & VEX_TRACE_ASM) != 0; + default: + vpanic("LibVEX_Codegen: unsupported guest insn set"); + } - const EmitConstants emitConsts - = { .mode64 = mode64, - .endness_host = vta->archinfo_host.endness, - .disp_cp_chain_me_to_slowEP = vta->disp_cp_chain_me_to_slowEP, - .disp_cp_chain_me_to_fastEP = vta->disp_cp_chain_me_to_fastEP, - .disp_cp_xindir = vta->disp_cp_xindir, - .disp_cp_xassisted = vta->disp_cp_xassisted }; - AssemblyBufferOffset cursor = 0; - AssemblyBufferOffset cursor_limit = vta->host_bytes_size; + switch (vta->arch_host) { - queueOldest = 0; - queueNewest = -1; + case VexArchX86: + mode64 = False; + rRegUniv = X86FN(getRRegUniverse_X86()); + isMove = CAST_TO_TYPEOF(isMove) X86FN(isMove_X86Instr); + getRegUsage + = CAST_TO_TYPEOF(getRegUsage) X86FN(getRegUsage_X86Instr); + mapRegs = CAST_TO_TYPEOF(mapRegs) X86FN(mapRegs_X86Instr); + isIfThenElse = CAST_TO_TYPEOF(isIfThenElse) X86FN(isIfThenElse_X86Instr); + genSpill = CAST_TO_TYPEOF(genSpill) X86FN(genSpill_X86); + genReload = CAST_TO_TYPEOF(genReload) X86FN(genReload_X86); + genMove = CAST_TO_TYPEOF(genMove) X86FN(genMove_X86); + genHInstrITE = CAST_TO_TYPEOF(genHInstrITE) X86FN(X86Instr_IfThenElse); + directReload = CAST_TO_TYPEOF(directReload) X86FN(directReload_X86); + ppInstr = CAST_TO_TYPEOF(ppInstr) X86FN(ppX86Instr); + ppCondCode = CAST_TO_TYPEOF(ppCondCode) X86FN(ppX86CondCode); + ppReg = CAST_TO_TYPEOF(ppReg) X86FN(ppHRegX86); + iselSB = X86FN(iselSB_X86); + emit = CAST_TO_TYPEOF(emit) X86FN(emit_X86Instr); + vassert(vta->archinfo_host.endness == VexEndnessLE); + break; - vassert(queueNewest < nQUEUE); - queueNewest++; - { - QElem* qe = &queue[queueNewest]; - vex_bzero(qe, sizeof(*qe)); - qe->oolVec = rcode->insns; - qe->jumpToOOLpoint_valid = False; - qe->resumePoint_valid = False; - } - vassert(queueNewest == 0); + case VexArchAMD64: + mode64 = True; + rRegUniv = AMD64FN(getRRegUniverse_AMD64()); + isMove = CAST_TO_TYPEOF(isMove) AMD64FN(isMove_AMD64Instr); + getRegUsage + = CAST_TO_TYPEOF(getRegUsage) AMD64FN(getRegUsage_AMD64Instr); + mapRegs = CAST_TO_TYPEOF(mapRegs) AMD64FN(mapRegs_AMD64Instr); + genSpill = CAST_TO_TYPEOF(genSpill) AMD64FN(genSpill_AMD64); + genReload = CAST_TO_TYPEOF(genReload) AMD64FN(genReload_AMD64); + genMove = CAST_TO_TYPEOF(genMove) AMD64FN(genMove_AMD64); + directReload = CAST_TO_TYPEOF(directReload) AMD64FN(directReload_AMD64); + ppInstr = CAST_TO_TYPEOF(ppInstr) AMD64FN(ppAMD64Instr); + ppReg = CAST_TO_TYPEOF(ppReg) AMD64FN(ppHRegAMD64); + iselSB = AMD64FN(iselSB_AMD64); + emit = CAST_TO_TYPEOF(emit) AMD64FN(emit_AMD64Instr); + vassert(vta->archinfo_host.endness == VexEndnessLE); + break; - /* Main loop, processing Queue entries, until there are no more. */ - while (queueOldest <= queueNewest) { + case VexArchPPC32: + mode64 = False; + rRegUniv = PPC32FN(getRRegUniverse_PPC(mode64)); + isMove = CAST_TO_TYPEOF(isMove) PPC32FN(isMove_PPCInstr); + getRegUsage + = CAST_TO_TYPEOF(getRegUsage) PPC32FN(getRegUsage_PPCInstr); + mapRegs = CAST_TO_TYPEOF(mapRegs) PPC32FN(mapRegs_PPCInstr); + genSpill = CAST_TO_TYPEOF(genSpill) PPC32FN(genSpill_PPC); + genReload = CAST_TO_TYPEOF(genReload) PPC32FN(genReload_PPC); + genMove = CAST_TO_TYPEOF(genMove) PPC32FN(genMove_PPC); + ppInstr = CAST_TO_TYPEOF(ppInstr) PPC32FN(ppPPCInstr); + ppReg = CAST_TO_TYPEOF(ppReg) PPC32FN(ppHRegPPC); + iselSB = PPC32FN(iselSB_PPC); + emit = CAST_TO_TYPEOF(emit) PPC32FN(emit_PPCInstr); + vassert(vta->archinfo_host.endness == VexEndnessBE); + break; - Int qCur = queueOldest; - if (UNLIKELY(verbose_asm)) - vex_printf("BEGIN queue[%d]\n", qCur); + case VexArchPPC64: + mode64 = True; + rRegUniv = PPC64FN(getRRegUniverse_PPC(mode64)); + isMove = CAST_TO_TYPEOF(isMove) PPC64FN(isMove_PPCInstr); + getRegUsage + = CAST_TO_TYPEOF(getRegUsage) PPC64FN(getRegUsage_PPCInstr); + mapRegs = CAST_TO_TYPEOF(mapRegs) PPC64FN(mapRegs_PPCInstr); + genSpill = CAST_TO_TYPEOF(genSpill) PPC64FN(genSpill_PPC); + genReload = CAST_TO_TYPEOF(genReload) PPC64FN(genReload_PPC); + genMove = CAST_TO_TYPEOF(genMove) PPC64FN(genMove_PPC); + ppInstr = CAST_TO_TYPEOF(ppInstr) PPC64FN(ppPPCInstr); + ppReg = CAST_TO_TYPEOF(ppReg) PPC64FN(ppHRegPPC); + iselSB = PPC64FN(iselSB_PPC); + emit = CAST_TO_TYPEOF(emit) PPC64FN(emit_PPCInstr); + vassert(vta->archinfo_host.endness == VexEndnessBE || + vta->archinfo_host.endness == VexEndnessLE ); + break; - // Take the oldest entry in the queue - QElem* qe = &queue[queueOldest]; - queueOldest++; + case VexArchS390X: + mode64 = True; + rRegUniv = S390FN(getRRegUniverse_S390()); + isMove = CAST_TO_TYPEOF(isMove) S390FN(isMove_S390Instr); + getRegUsage + = CAST_TO_TYPEOF(getRegUsage) S390FN(getRegUsage_S390Instr); + mapRegs = CAST_TO_TYPEOF(mapRegs) S390FN(mapRegs_S390Instr); + genSpill = CAST_TO_TYPEOF(genSpill) S390FN(genSpill_S390); + genReload = CAST_TO_TYPEOF(genReload) S390FN(genReload_S390); + genMove = CAST_TO_TYPEOF(genMove) S390FN(genMove_S390); + // fixs390: consider implementing directReload_S390 + ppInstr = CAST_TO_TYPEOF(ppInstr) S390FN(ppS390Instr); + ppReg = CAST_TO_TYPEOF(ppReg) S390FN(ppHRegS390); + iselSB = S390FN(iselSB_S390); + emit = CAST_TO_TYPEOF(emit) S390FN(emit_S390Instr); + vassert(vta->archinfo_host.endness == VexEndnessBE); + break; - // Stay sane. Only the top level block has no branch to it and no - // resume point. - if (qe->oolVec == rcode->insns) { - // This is the top level block - vassert(!qe->jumpToOOLpoint_valid); - vassert(!qe->resumePoint_valid); - } else { - vassert(qe->jumpToOOLpoint_valid); - vassert(qe->resumePoint_valid); - // In the future, we might be able to allow the resume point to be - // invalid for non-top-level blocks, if the block contains an - // unconditional exit. Currently the IR can't represent that, so - // the assertion is valid. - } + case VexArchARM: + mode64 = False; + rRegUniv = ARMFN(getRRegUniverse_ARM()); + isMove = CAST_TO_TYPEOF(isMove) ARMFN(isMove_ARMInstr); + getRegUsage + = CAST_TO_TYPEOF(getRegUsage) ARMFN(getRegUsage_ARMInstr); + mapRegs = CAST_TO_TYPEOF(mapRegs) ARMFN(mapRegs_ARMInstr); + genSpill = CAST_TO_TYPEOF(genSpill) ARMFN(genSpill_ARM); + genReload = CAST_TO_TYPEOF(genReload) ARMFN(genReload_ARM); + genMove = CAST_TO_TYPEOF(genMove) ARMFN(genMove_ARM); + ppInstr = CAST_TO_TYPEOF(ppInstr) ARMFN(ppARMInstr); + ppReg = CAST_TO_TYPEOF(ppReg) ARMFN(ppHRegARM); + iselSB = ARMFN(iselSB_ARM); + emit = CAST_TO_TYPEOF(emit) ARMFN(emit_ARMInstr); + vassert(vta->archinfo_host.endness == VexEndnessLE); + break; - // Processing |qe| - if (qe->jumpToOOLpoint_valid) { - // patch qe->jmpToOOLpoint to jump to |here| - if (UNLIKELY(verbose_asm)) { - vex_printf(" -- APPLY "); - ppRelocation(qe->jumpToOOLpoint); - vex_printf("\n"); - } - applyRelocation(qe->jumpToOOLpoint, &vta->host_bytes[0], - cursor, cursor, vta->archinfo_host.endness, - verbose_asm); - } + case VexArchARM64: + mode64 = True; + rRegUniv = ARM64FN(getRRegUniverse_ARM64()); + isMove = CAST_TO_TYPEOF(isMove) ARM64FN(isMove_ARM64Instr); + getRegUsage + = CAST_TO_TYPEOF(getRegUsage) ARM64FN(getRegUsage_ARM64Instr); + mapRegs = CAST_TO_TYPEOF(mapRegs) ARM64FN(mapRegs_ARM64Instr); + genSpill = CAST_TO_TYPEOF(genSpill) ARM64FN(genSpill_ARM64); + genReload = CAST_TO_TYPEOF(genReload) ARM64FN(genReload_ARM64); + genMove = CAST_TO_TYPEOF(genMove) ARM64FN(genMove_ARM64); + ppInstr = CAST_TO_TYPEOF(ppInstr) ARM64FN(ppARM64Instr); + ppReg = CAST_TO_TYPEOF(ppReg) ARM64FN(ppHRegARM64); + iselSB = ARM64FN(iselSB_ARM64); + emit = CAST_TO_TYPEOF(emit) ARM64FN(emit_ARM64Instr); + vassert(vta->archinfo_host.endness == VexEndnessLE); + break; - // Initialise the stack, for processing of |qe|. - stackPtr = 0; // "contains one element" + case VexArchMIPS32: + mode64 = False; + rRegUniv = MIPS32FN(getRRegUniverse_MIPS(mode64)); + isMove = CAST_TO_TYPEOF(isMove) MIPS32FN(isMove_MIPSInstr); + getRegUsage + = CAST_TO_TYPEOF(getRegUsage) MIPS32FN(getRegUsage_MIPSInstr); + mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS32FN(mapRegs_MIPSInstr); + genSpill = CAST_TO_TYPEOF(genSpill) MIPS32FN(genSpill_MIPS); + genReload = CAST_TO_TYPEOF(genReload) MIPS32FN(genReload_MIPS); + genMove = CAST_TO_TYPEOF(genMove) MIPS32FN(genMove_MIPS); + ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS32FN(ppMIPSInstr); + ppReg = CAST_TO_TYPEOF(ppReg) MIPS32FN(ppHRegMIPS); + iselSB = MIPS32FN(iselSB_MIPS); + emit = CAST_TO_TYPEOF(emit) MIPS32FN(emit_MIPSInstr); + vassert(vta->archinfo_host.endness == VexEndnessLE + || vta->archinfo_host.endness == VexEndnessBE); + break; - stack[stackPtr].vec = qe->oolVec; - stack[stackPtr].vec_next = 0; - stack[stackPtr].ool_qindex = -1; // INVALID + case VexArchMIPS64: + mode64 = True; + rRegUniv = MIPS64FN(getRRegUniverse_MIPS(mode64)); + isMove = CAST_TO_TYPEOF(isMove) MIPS64FN(isMove_MIPSInstr); + getRegUsage + = CAST_TO_TYPEOF(getRegUsage) MIPS64FN(getRegUsage_MIPSInstr); + mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS64FN(mapRegs_MIPSInstr); + genSpill = CAST_TO_TYPEOF(genSpill) MIPS64FN(genSpill_MIPS); + genReload = CAST_TO_TYPEOF(genReload) MIPS64FN(genReload_MIPS); + genMove = CAST_TO_TYPEOF(genMove) MIPS64FN(genMove_MIPS); + ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS64FN(ppMIPSInstr); + ppReg = CAST_TO_TYPEOF(ppReg) MIPS64FN(ppHRegMIPS); + iselSB = MIPS64FN(iselSB_MIPS); + emit = CAST_TO_TYPEOF(emit) MIPS64FN(emit_MIPSInstr); + vassert(vta->archinfo_host.endness == VexEndnessLE + || vta->archinfo_host.endness == VexEndnessBE); + break; - // Iterate till the stack is empty. This effectively does a - // depth-first traversal of the hot-path (IL) tree reachable from - // here, and at the same time adds any encountered cold-path (OOL) - // blocks to the Queue for later processing. This is the heart of the - // flattening algorithm. - while (stackPtr >= 0) { + default: + vpanic("LibVEX_Translate: unsupported host insn set"); + } - if (UNLIKELY(verbose_asm)) - vex_printf(" -- CONSIDER stack[%d]\n", stackPtr); + // Are the host's hardware capabilities feasible. The function will + // not return if hwcaps are infeasible in some sense. + check_hwcaps(vta->arch_host, vta->archinfo_host.hwcaps); - HInstrVec* vec = stack[stackPtr].vec; - UInt vec_next = stack[stackPtr].vec_next; - Int ool_qindex = stack[stackPtr].ool_qindex; - stackPtr--; - if (vec_next > 0) { - // We're resuming the current IL block having just finished - // processing a nested IL. The OOL counterpart to the nested IL - // we just finished processing will have to jump back to here. - // So we'll need to mark its Queue entry to record that fact. + /* Turn it into virtual-registerised code. Build trees -- this + also throws away any dead bindings. */ + max_ga = ado_treebuild_BB( irsb, preciseMemExnsFn, pxControl ); - // First assert that the OOL actually *is* in the Queue (it - // must be, since we can't have processed it yet). - vassert(queueOldest <= queueNewest); // "at least 1 entry in Q" - vassert(queueOldest <= ool_qindex && ool_qindex <= queueNewest); + if (vta->finaltidy) { + irsb = vta->finaltidy(irsb); + } - vassert(!queue[ool_qindex].resumePoint_valid); - queue[ool_qindex].resumePoint = cursor; - queue[ool_qindex].resumePoint_valid = True; - if (UNLIKELY(verbose_asm)) - vex_printf(" -- RESUME previous IL\n"); - } else { - // We're starting a new IL. Due to the tail-recursive nature of - // entering ILs, this means we can actually only be starting the - // outermost (top level) block for this particular Queue entry. - vassert(ool_qindex == -1); - vassert(vec == qe->oolVec); - if (UNLIKELY(verbose_asm)) - vex_printf(" -- START new IL\n"); - } + vexAllocSanityCheck(); - ... [truncated message content] |
|
From: Petar J. <pe...@so...> - 2017-10-02 12:55:54
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=da3987aa18e77a8b4eb8176eed705b1aeb5c1151 commit da3987aa18e77a8b4eb8176eed705b1aeb5c1151 Author: Petar Jovanovic <mip...@gm...> Date: Mon Oct 2 14:54:20 2017 +0200 mips32: add BE-exp files for several tests This fixes several tests on mips32 BE platforms: memcheck/tests/mips32/fadvise64 drd/tests/tc19_shadowmem helgrind/tests/tc19_shadowmem Diff: --- drd/tests/Makefile.am | 1 + drd/tests/tc19_shadowmem.stderr.exp-32bit-BE | 4224 +++ helgrind/tests/Makefile.am | 1 + helgrind/tests/tc19_shadowmem.stderr.exp-mips32-BE | 26122 +++++++++++++++++++ memcheck/tests/mips32/Makefile.am | 3 +- memcheck/tests/mips32/fadvise64.stderr.exp-BE | 24 + 6 files changed, 30374 insertions(+), 1 deletion(-) diff --git a/drd/tests/Makefile.am b/drd/tests/Makefile.am index bafa412..9b604e9 100644 --- a/drd/tests/Makefile.am +++ b/drd/tests/Makefile.am @@ -321,6 +321,7 @@ EXTRA_DIST = \ tc18_semabuse.stderr.exp-solaris \ tc18_semabuse.vgtest \ tc19_shadowmem.stderr.exp-32bit \ + tc19_shadowmem.stderr.exp-32bit-BE \ tc19_shadowmem.stderr.exp-64bit \ tc19_shadowmem.vgtest \ tc21_pthonce.stderr.exp \ diff --git a/drd/tests/tc19_shadowmem.stderr.exp-32bit-BE b/drd/tests/tc19_shadowmem.stderr.exp-32bit-BE new file mode 100644 index 0000000..64f20bf --- /dev/null +++ b/drd/tests/tc19_shadowmem.stderr.exp-32bit-BE @@ -0,0 +1,4224 @@ + + +=========================================================== +=== 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 === +=========================================================== + +---------- char gran, 0 .. 99, skip 0 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:288) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 0 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 1 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:290) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 1 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 2 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:292) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 2 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 3 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:294) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 3 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 4 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:296) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 4 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 5 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:298) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 5 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 6 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:300) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 6 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 7 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:302) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 7 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 8 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:304) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 8 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 9 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:306) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 9 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 10 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:308) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 10 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 11 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:310) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 11 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 12 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:312) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 12 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 13 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:314) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 13 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 14 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:316) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 14 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 15 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:318) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 15 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 16 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:320) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 16 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 17 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:322) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 17 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 18 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:324) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 18 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 19 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:326) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 19 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 20 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:328) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 20 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 21 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:330) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 21 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 22 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:332) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 22 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 23 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:334) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 23 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 24 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:336) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 24 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 25 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:338) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 25 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 26 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:340) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 26 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 27 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:342) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 27 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 28 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:344) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 28 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 29 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:346) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 29 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 30 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:348) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 30 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 31 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:350) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 31 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 32 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:352) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 32 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 33 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:354) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 33 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 34 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:356) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 34 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 35 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:358) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 35 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 36 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:360) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 36 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 37 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:362) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 37 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 38 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:364) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 38 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 39 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:366) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 39 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 40 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:368) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 40 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 41 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:370) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 41 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 42 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:372) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 42 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 43 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:374) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 43 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 44 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:376) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 44 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 45 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:378) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 45 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 46 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:380) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 46 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 47 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:382) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 47 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 48 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:384) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 48 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 49 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:386) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 49 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 50 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:388) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 50 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 51 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:390) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 51 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 52 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:392) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 52 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 53 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:394) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 53 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 54 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:396) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 54 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 55 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:398) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 55 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 56 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:400) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 56 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 57 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:402) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 57 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 58 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:404) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 58 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 59 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:406) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 59 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 60 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:408) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 60 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 61 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:410) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 61 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 62 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:412) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 62 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 63 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:414) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 63 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 64 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:416) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 64 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 65 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:418) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 65 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 66 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:420) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 66 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 67 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:422) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 67 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 68 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:424) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 68 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 69 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:426) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 69 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 70 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:428) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 70 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 71 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:430) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 71 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 72 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:432) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 72 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 73 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:434) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 73 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 74 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:436) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 74 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 75 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:438) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 75 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 76 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:440) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 76 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 77 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:442) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 77 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 78 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:444) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 78 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 79 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:446) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 79 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 80 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:448) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 80 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 81 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:450) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 81 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 82 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:452) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 82 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 83 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:454) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 83 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 84 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:456) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 84 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 85 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:458) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 85 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 86 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:460) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 86 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 87 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:462) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 87 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 88 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:464) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 88 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 89 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:466) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 89 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 90 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:468) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 90 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 91 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:470) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 91 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 92 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:472) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 92 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 93 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:474) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 93 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 94 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:476) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 94 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 95 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:478) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 95 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 96 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:480) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 96 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 97 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:482) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 97 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- char gran, 0 .. 99, skip 98 ---------- +Conflicting store by thread x at 0x........ size 1 + at 0x........: child8 (tc19_shadowmem.c:33) + by 0x........: steer (tc19_shadowmem.c:484) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 98 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + + +========================================================== +=== 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 === +========================================================== + +---------- short gran, 0 .. 98, skip 0 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:288) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 0 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 1 .. 98, skip 1 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:290) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 1 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 0 .. 98, skip 2 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:292) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 2 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 1 .. 98, skip 3 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:294) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 3 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 0 .. 98, skip 4 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:296) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 4 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 1 .. 98, skip 5 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:298) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 5 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 0 .. 98, skip 6 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:300) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 6 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 1 .. 98, skip 7 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:302) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 7 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 0 .. 98, skip 8 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:304) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 8 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 1 .. 98, skip 9 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:306) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 9 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 0 .. 98, skip 10 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:308) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 10 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 1 .. 98, skip 11 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:310) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 11 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 0 .. 98, skip 12 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:312) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 12 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 1 .. 98, skip 13 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:314) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 13 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 0 .. 98, skip 14 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:316) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 14 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 1 .. 98, skip 15 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:318) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 15 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 0 .. 98, skip 16 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:320) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 16 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 1 .. 98, skip 17 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:322) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 17 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 0 .. 98, skip 18 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:324) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 18 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 1 .. 98, skip 19 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:326) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 19 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 0 .. 98, skip 20 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:328) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 20 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 1 .. 98, skip 21 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:330) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 21 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 0 .. 98, skip 22 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:332) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 22 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 1 .. 98, skip 23 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:334) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 23 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 0 .. 98, skip 24 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:336) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 24 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 1 .. 98, skip 25 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:338) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 25 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 0 .. 98, skip 26 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:340) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 26 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 1 .. 98, skip 27 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:342) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 27 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 0 .. 98, skip 28 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:344) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 28 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 1 .. 98, skip 29 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:346) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 29 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 0 .. 98, skip 30 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:348) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 30 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 1 .. 98, skip 31 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:350) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 31 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 0 .. 98, skip 32 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:352) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is at offset 32 from 0x......... Allocation context: + at 0x........: malloc (vg_replace_malloc.c:...) + by 0x........: main (tc19_shadowmem.c:144) + +---------- short gran, 1 .. 98, skip 33 ---------- +Conflicting store by thread x at 0x........ size 2 + at 0x........: child16 (tc19_shadowmem.c:57) + by 0x........: steer (tc19_shadowmem.c:354) + by 0x........: vgDrd_thread_wrapper (drd_pthread_intercepts.c:?) +Address 0x........ is ... [truncated message content] |
|
From: Petar J. <pe...@so...> - 2017-10-02 11:40:27
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=f3637a085306f3dbb22c52d953b23bd76a85178c commit f3637a085306f3dbb22c52d953b23bd76a85178c Author: Petar Jovanovic <mip...@gm...> Date: Mon Oct 2 13:28:50 2017 +0200 mips: make sure configure script checks for correct ABIs For mips64, we currently build for n64. For mips32, we currently build for o32. Pass these flags explicitly in configure checks. Diff: --- configure.ac | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/configure.ac b/configure.ac index 483d9bd..392081d 100644 --- a/configure.ac +++ b/configure.ac @@ -1720,15 +1720,15 @@ AC_SUBST(FLAG_M64) # does this compiler support -march=mips32 (mips32 default) ? -AC_MSG_CHECKING([if gcc accepts -march=mips32]) +AC_MSG_CHECKING([if gcc accepts -march=mips32 -mabi=32]) safe_CFLAGS=$CFLAGS -CFLAGS="$CFLAGS -march=mips32 -Werror" +CFLAGS="$CFLAGS -march=mips32 -mabi=32 -Werror" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[ return 0; ]])], [ -FLAG_MIPS32="-march=mips32" +FLAG_MIPS32="-march=mips32 -mabi=32" AC_MSG_RESULT([yes]) ], [ FLAG_MIPS32="" @@ -1740,15 +1740,15 @@ AC_SUBST(FLAG_MIPS32) # does this compiler support -march=mips64r2 (mips64r2 default) ? -AC_MSG_CHECKING([if gcc accepts -march=mips64r2]) +AC_MSG_CHECKING([if gcc accepts -march=mips64r2 -mabi=64]) safe_CFLAGS=$CFLAGS -CFLAGS="$CFLAGS -march=mips64r2 -Werror" +CFLAGS="$CFLAGS -march=mips64r2 -mabi=64 -Werror" AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[ return 0; ]])], [ -FLAG_MIPS64="-march=mips64r2" +FLAG_MIPS64="-march=mips64r2 -mabi=64" AC_MSG_RESULT([yes]) ], [ FLAG_MIPS64="" |
|
From: Ivo R. <ir...@so...> - 2017-10-02 03:47:45
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=c7b4630f0610505915c64eff78de904dd9533304 commit c7b4630f0610505915c64eff78de904dd9533304 Author: Ivo Raisr <iv...@iv...> Date: Mon Oct 2 05:46:46 2017 +0200 Register allocator: Implement merging of Assigned/Spilled vregs. Diff: --- VEX/priv/host_generic_reg_alloc3.c | 158 +++++++++++++++++++++++++------------ 1 file changed, 109 insertions(+), 49 deletions(-) diff --git a/VEX/priv/host_generic_reg_alloc3.c b/VEX/priv/host_generic_reg_alloc3.c index 948ad2c..cfe807f 100644 --- a/VEX/priv/host_generic_reg_alloc3.c +++ b/VEX/priv/host_generic_reg_alloc3.c @@ -194,6 +194,7 @@ typedef #define IS_VALID_VREGNO(v) ((v) >= 0 && (v) < state->n_vregs) #define IS_VALID_RREGNO(r) ((r) >= 0 && (r) < state->n_rregs) +#define MK_VREG(idx, reg_class) mkHReg(True, (reg_class), 0, idx) #define FREE_VREG(v) \ do { \ @@ -644,6 +645,39 @@ static inline void reg_reg_move(RegAllocChunk* chunk, RegAllocState* state, FREE_RREG(&state->rregs[rs_idx]); } +/* Assigns a vreg to a free rreg. If |genReload| is True, generates reload + ("fill" in the proper terminology) as well. */ +static inline void assign_vreg(RegAllocChunk* chunk, RegAllocState* state, + HReg vreg, HReg rreg, Bool genReload, UInt depth, const RegAllocControl* con) +{ + UInt v_idx = hregIndex(vreg); + UInt r_idx = hregIndex(rreg); + + if (genReload) { + HInstr* reload1 = NULL; + HInstr* reload2 = NULL; + con->genReload(&reload1, &reload2, rreg, state->vregs[v_idx].spill_offset, + con->mode64); + vassert(reload1 != NULL || reload2 != NULL); + if (reload1 != NULL) { + emit_instr(chunk, reload1, depth, con, "reload1"); + } + if (reload2 != NULL) { + emit_instr(chunk, reload2, depth, con, "reload2"); + } + } + + vassert(state->rregs[r_idx].disp == Free); + state->rregs[r_idx].disp = Bound; + state->rregs[r_idx].vreg = vreg; + state->rregs[r_idx].eq_spill_slot = True; + + vassert(state->vregs[v_idx].disp == Unallocated + || state->vregs[v_idx].disp == Spilled); + state->vregs[v_idx].disp = Assigned; + state->vregs[v_idx].rreg = rreg; +} + /* --- Stage 1. --- Determine total ordering of instructions and structure of HInstrIfThenElse. @@ -1444,27 +1478,15 @@ static void stage4_chunk(RegAllocChunk* chunk, RegAllocState* state, /* Generate reload only if the vreg is spilled and is about to being read or modified. If it is merely written than reloading it first would be pointless. */ + Bool genReload; if ((state->vregs[v_idx].disp == Spilled) && (reg_usage->vMode[j] != HRmWrite)) { - - HInstr* reload1 = NULL; - HInstr* reload2 = NULL; - con->genReload(&reload1, &reload2, rreg, - state->vregs[v_idx].spill_offset, con->mode64); - vassert(reload1 != NULL || reload2 != NULL); - if (reload1 != NULL) { - emit_instr(chunk, reload1, depth, con, "reload1"); - } - if (reload2 != NULL) { - emit_instr(chunk, reload2, depth, con, "reload2"); - } + genReload = True; + } else { + genReload = False; } - state->rregs[r_idx].disp = Bound; - state->rregs[r_idx].vreg = vreg; - state->rregs[r_idx].eq_spill_slot = True; - state->vregs[v_idx].disp = Assigned; - state->vregs[v_idx].rreg = rreg; + assign_vreg(chunk, state, vreg, rreg, genReload, depth, con); addToHRegRemap(&remap, vreg, rreg); } @@ -1540,17 +1562,20 @@ static void stage4_emit_HInstrIfThenElse(RegAllocChunk* chunk, UInt depth, } /* Merges states of two vregs into the destination vreg: - |v1_idx| + |v2_idx| -> |vd_idx|. - Usually |v1_idx| == |v2_idx| == |vd_idx| so the merging happens between + |vreg1| + |vreg2| -> |vregD|. + Usually |vreg1| == |vreg2| == |vregD| so the merging happens between different states but for the same vreg. - For phi node merging, |v1_idx| != |v2_idx| != |vd_idx|. - Note: |v1_idx| and |vd_idx| are indexes to |state1|, |v2_idx| to |state2|. */ + For phi node merging, |vreg1| != |vreg2| != |vregD|. + Note: |vreg1| and |vregD| refer to |state1|, |vreg2| to |state2|. */ static void merge_vreg_states(RegAllocChunk* chunk, RegAllocState* state1, RegAllocState* state2, - UInt v1_idx, UInt v2_idx, UInt vd_idx, HReg vregD, + HReg vreg1, HReg vreg2, HReg vregD, UInt depth, const RegAllocControl* con) { RegAllocChunk* outOfLine = chunk->IfThenElse.outOfLine; + UInt v1_idx = hregIndex(vreg1); + UInt v2_idx = hregIndex(vreg2); + UInt vd_idx = hregIndex(vregD); VRegState* v1_src_state = &state1->vregs[v1_idx]; VRegState* v2_src_state = &state2->vregs[v2_idx]; VRegState* v1_dst_state = &state1->vregs[vd_idx]; @@ -1581,7 +1606,9 @@ static void merge_vreg_states(RegAllocChunk* chunk, } break; - case Assigned: + case Assigned: { + HReg rreg1 = v1_src_state->rreg; + switch (v2_src_state->disp) { case Unallocated: vpanic("Logic error during register allocator state merge " @@ -1589,26 +1616,23 @@ static void merge_vreg_states(RegAllocChunk* chunk, case Assigned: { /* Check if both vregs are assigned to the same rreg. */ - HReg rreg1 = v1_src_state->rreg; HReg rreg2 = v2_src_state->rreg; if (! sameHReg(rreg1, rreg2)) { switch (state2->rregs[hregIndex(rreg1)].disp) { case Free: { /* Move rreg2 to rreg1 in outOfLine/state2. */ reg_reg_move(outOfLine, state2, hregIndex(rreg2), - hregIndex(rreg1), state2->rregs[hregIndex(rreg2)].vreg, - depth, con); + hregIndex(rreg1), vreg2, depth, con); break; } case Bound: { /* Make room in state2->rregs[rreg1] first. */ UInt r_spilled_idx = spill_vreg(outOfLine, state2, - state2->rregs[hregIndex(rreg1)].vreg, - chunk->next->ii_total_start, depth, con); + state2->rregs[hregIndex(rreg1)].vreg, + chunk->next->ii_total_start, depth, con); vassert(r_spilled_idx == hregIndex(rreg1)); reg_reg_move(outOfLine, state2, hregIndex(rreg2), - hregIndex(rreg1), state2->rregs[hregIndex(rreg2)].vreg, - depth, con); + hregIndex(rreg1), vreg2, depth, con); break; } default: @@ -1616,6 +1640,42 @@ static void merge_vreg_states(RegAllocChunk* chunk, } } + /* Proceed to phi node merging bellow. */ + break; + } + + case Spilled: + switch (state2->rregs[hregIndex(rreg1)].disp) { + case Free: + assign_vreg(outOfLine, state2, vreg2, rreg1, True, depth, con); + break; + case Bound: { + /* Make a room in state2->rregs[rreg1] first. */ + HReg vreg_dead = state2->rregs[hregIndex(rreg1)].vreg; + UInt vdead_idx = hregIndex(vreg_dead); + /* That vreg should be dead by now. */ + vassert(state2->vregs[vdead_idx].dead_before + <= chunk->next->ii_total_start); + + FREE_VREG(&state2->vregs[vdead_idx]); + FREE_RREG(&state2->rregs[hregIndex(rreg1)]); + + assign_vreg(outOfLine, state2, vreg2, rreg1, True, depth, con); + break; + } + default: + vassert(0); + } + + /* Proceed to phi node merging bellow. */ + break; + + default: + vassert(0); + } + + /* Phi node merging. */ + if (! sameHReg(vreg1, vreg2)) { FREE_VREG(v1_src_state); FREE_VREG(v2_src_state); v1_dst_state->disp = Assigned; @@ -1625,21 +1685,12 @@ static void merge_vreg_states(RegAllocChunk* chunk, UInt r_idx = hregIndex(rreg1); vassert(state1->rregs[r_idx].disp == Bound); - state1->rregs[r_idx].eq_spill_slot = False; - if (v1_idx != vd_idx) { - vassert(!hregIsInvalid(vregD)); - state1->rregs[r_idx].vreg = vregD; - } - break; - } - case Spilled: - /* Generate reload. */ - vpanic("Reload not implemented, yet."); - break; - default: - vassert(0); + state1->rregs[r_idx].eq_spill_slot + = (state1->rregs[r_idx].eq_spill_slot && state2->rregs[r_idx].eq_spill_slot); + state1->rregs[r_idx].vreg = vregD; } break; + } // case Assigned case Spilled: switch (v2_src_state->disp) { @@ -1688,10 +1739,15 @@ static void stage4_merge_states(RegAllocChunk* chunk, for (UInt i = 0; i < chunk->IfThenElse.n_phis; i++) { const HPhiNode* phi_node = &chunk->IfThenElse.phi_nodes[i]; - merge_vreg_states(chunk, state, cloned, - hregIndex(phi_node->srcFallThrough), - hregIndex(phi_node->srcOutOfLine), - hregIndex(phi_node->dst), phi_node->dst, depth, con); + if (DEBUG_REGALLOC) { + print_depth(depth); + vex_printf("Now merging: "); + ppHPhiNode(phi_node); + vex_printf("\n"); + } + + merge_vreg_states(chunk, state, cloned, phi_node->srcFallThrough, + phi_node->srcOutOfLine, phi_node->dst, depth, con); } if (DEBUG_REGALLOC) { @@ -1711,8 +1767,12 @@ static void stage4_merge_states(RegAllocChunk* chunk, } for (UInt v_idx = 0; v_idx < state->n_vregs; v_idx++) { - merge_vreg_states(chunk, state, cloned, v_idx, v_idx, v_idx, INVALID_HREG, - depth, con); + HRegClass reg_class = state->vregs[v_idx].reg_class; + if (reg_class != HRcINVALID) { + merge_vreg_states(chunk, state, cloned, MK_VREG(v_idx, reg_class), + MK_VREG(v_idx, reg_class), MK_VREG(v_idx, reg_class), + depth, con); + } } if (DEBUG_REGALLOC) { |
|
From: Ivo R. <ir...@so...> - 2017-10-02 03:47:40
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=251cc5781b848c27de2c44062de804f33d5bb543 commit 251cc5781b848c27de2c44062de804f33d5bb543 Author: Ivo Raisr <iv...@iv...> Date: Mon Oct 2 03:45:56 2017 +0200 Register allocator: Implement merging of Assigned/Assigned bound rreg. Diff: --- VEX/priv/host_generic_reg_alloc3.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/VEX/priv/host_generic_reg_alloc3.c b/VEX/priv/host_generic_reg_alloc3.c index 69996c2..948ad2c 100644 --- a/VEX/priv/host_generic_reg_alloc3.c +++ b/VEX/priv/host_generic_reg_alloc3.c @@ -483,12 +483,11 @@ static inline void mark_vreg_spilled(UInt v_idx, RegAllocState* state) /* Spills a vreg assigned to some rreg. The vreg is spilled and the rreg is freed. Returns rreg's index. */ -static inline UInt spill_vreg( - RegAllocChunk* chunk, RegAllocState* state, - HReg vreg, UInt v_idx, Short ii_total_current, - UInt depth, const RegAllocControl* con) +static inline UInt spill_vreg(RegAllocChunk* chunk, RegAllocState* state, + HReg vreg, Short ii_total_current, UInt depth, const RegAllocControl* con) { /* Check some invariants first. */ + UInt v_idx = hregIndex(vreg); vassert(IS_VALID_VREGNO((v_idx))); vassert(state->vregs[v_idx].disp == Assigned); HReg rreg = state->vregs[v_idx].rreg; @@ -1099,8 +1098,7 @@ static void stage4_chunk(RegAllocChunk* chunk, RegAllocState* state, HReg vreg_to_spill = find_vreg_to_spill(chunk, state, \ &chunk->reg_usage[ii_chunk], (_reg_class), \ ii_chunk, con); \ - _r_free_idx = spill_vreg(chunk, state, \ - vreg_to_spill, hregIndex(vreg_to_spill), \ + _r_free_idx = spill_vreg(chunk, state, vreg_to_spill, \ INSTRNO_TOTAL, depth, con); \ } \ \ @@ -1301,8 +1299,8 @@ static void stage4_chunk(RegAllocChunk* chunk, RegAllocState* state, mark_vreg_spilled(v_idx, state); } else { /* Spill the vreg. It is not used by this instruction.*/ - spill_vreg(chunk, state, vreg, v_idx, INSTRNO_TOTAL, - depth, con); + spill_vreg(chunk, state, vreg, INSTRNO_TOTAL, depth, + con); } } else { /* Find or make a free rreg where to move this vreg to. */ @@ -1598,12 +1596,21 @@ static void merge_vreg_states(RegAllocChunk* chunk, case Free: { /* Move rreg2 to rreg1 in outOfLine/state2. */ reg_reg_move(outOfLine, state2, hregIndex(rreg2), - hregIndex(rreg1), vregD, depth, con); + hregIndex(rreg1), state2->rregs[hregIndex(rreg2)].vreg, + depth, con); break; } - case Bound: - vpanic("Assigned/Assigned move to a bound rreg not implemented"); + case Bound: { + /* Make room in state2->rregs[rreg1] first. */ + UInt r_spilled_idx = spill_vreg(outOfLine, state2, + state2->rregs[hregIndex(rreg1)].vreg, + chunk->next->ii_total_start, depth, con); + vassert(r_spilled_idx == hregIndex(rreg1)); + reg_reg_move(outOfLine, state2, hregIndex(rreg2), + hregIndex(rreg1), state2->rregs[hregIndex(rreg2)].vreg, + depth, con); break; + } default: vassert(0); } |
|
From: Ivo R. <ir...@so...> - 2017-10-02 03:47:35
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=0e32a73cbd2e9a11c7ab75cca63a9096dfc858a8 commit 0e32a73cbd2e9a11c7ab75cca63a9096dfc858a8 Author: Ivo Raisr <iv...@iv...> Date: Mon Oct 2 03:35:03 2017 +0200 Register allocator: Fix merging of Assigned/Assigned vregs. Diff: --- VEX/priv/host_generic_reg_alloc3.c | 101 +++++++++++++++++++++++++++---------- 1 file changed, 74 insertions(+), 27 deletions(-) diff --git a/VEX/priv/host_generic_reg_alloc3.c b/VEX/priv/host_generic_reg_alloc3.c index 81cf5c5..69996c2 100644 --- a/VEX/priv/host_generic_reg_alloc3.c +++ b/VEX/priv/host_generic_reg_alloc3.c @@ -32,6 +32,18 @@ #include "main_util.h" #include "host_generic_regs.h" +/* TODO-JIT: There is still a room for lot of improvements around phi node + merging. For example: + - When processing out-of-line leg, we may want to reserve rregs which + are assigned to phi node destinations, as to avoid spilling and reg-reg + move during the merge. + - Although RRegLR's are local to every instruction chunk, the register + allocator should have more visibility to what lies ahead after the merge. + Avoids the situation when registers are allocated somehow + in the fall-through leg and need to be spilled just few instructions + after the merge (because of a helper call, for example). +*/ + /* Set to 1 for lots of debugging output. */ #define DEBUG_REGALLOC 0 @@ -613,6 +625,26 @@ static inline Bool find_free_rreg( return found; } +/* Generates a rreg-rreg move for a given |vreg| from |rs_idx| -> |rd_idx|. + Updates the register allocator state. */ +static inline void reg_reg_move(RegAllocChunk* chunk, RegAllocState* state, + UInt rs_idx, UInt rd_idx, HReg vreg, UInt depth, const RegAllocControl* con) +{ + HInstr* move = con->genMove(con->univ->regs[rs_idx], + con->univ->regs[rd_idx], con->mode64); + vassert(move != NULL); + emit_instr(chunk, move, depth, con, "move"); + + /* Update the register allocator state. */ + UInt v_idx = hregIndex(vreg); + state->vregs[v_idx].disp = Assigned; + state->vregs[v_idx].rreg = con->univ->regs[rd_idx]; + state->rregs[rd_idx].disp = Bound; + state->rregs[rd_idx].vreg = vreg; + state->rregs[rd_idx].eq_spill_slot = state->rregs[rs_idx].eq_spill_slot; + FREE_RREG(&state->rregs[rs_idx]); +} + /* --- Stage 1. --- Determine total ordering of instructions and structure of HInstrIfThenElse. @@ -1278,18 +1310,9 @@ static void stage4_chunk(RegAllocChunk* chunk, RegAllocState* state, v_idx, state->vregs[v_idx].reg_class, True); /* Generate "move" between real registers. */ - HInstr* move = con->genMove(con->univ->regs[r_idx], - con->univ->regs[r_free_idx], con->mode64); - vassert(move != NULL); - emit_instr(chunk, move, depth, con, "move"); - - /* Update the register allocator state. */ vassert(state->vregs[v_idx].disp == Assigned); - state->vregs[v_idx].rreg = con->univ->regs[r_free_idx]; - state->rregs[r_free_idx].disp = Bound; - state->rregs[r_free_idx].vreg = vreg; - state->rregs[r_free_idx].eq_spill_slot = rreg->eq_spill_slot; - FREE_RREG(rreg); + reg_reg_move(chunk, state, r_idx, r_free_idx, vreg, + depth, con); } break; } @@ -1571,11 +1594,19 @@ static void merge_vreg_states(RegAllocChunk* chunk, HReg rreg1 = v1_src_state->rreg; HReg rreg2 = v2_src_state->rreg; if (! sameHReg(rreg1, rreg2)) { - /* Generate "move" from rreg2 to rreg1. */ - HInstr* move = con->genMove(con->univ->regs[hregIndex(rreg2)], - con->univ->regs[hregIndex(rreg1)], con->mode64); - vassert(move != NULL); - emit_instr(outOfLine, move, depth + 1, con, "move"); + switch (state2->rregs[hregIndex(rreg1)].disp) { + case Free: { + /* Move rreg2 to rreg1 in outOfLine/state2. */ + reg_reg_move(outOfLine, state2, hregIndex(rreg2), + hregIndex(rreg1), vregD, depth, con); + break; + } + case Bound: + vpanic("Assigned/Assigned move to a bound rreg not implemented"); + break; + default: + vassert(0); + } } FREE_VREG(v1_src_state); @@ -1638,6 +1669,33 @@ static void stage4_merge_states(RegAllocChunk* chunk, RegAllocState* state, RegAllocState* cloned, UInt depth, const RegAllocControl* con) { + /* Process phi nodes first. */ + if (chunk->IfThenElse.n_phis > 0) { + if (DEBUG_REGALLOC) { + print_state(chunk, state, chunk->next->ii_total_start, depth, con, + "Before phi node merge: fall-through leg"); + print_state(chunk, cloned, chunk->next->ii_total_start, depth, con, + "Before phi node merge: out-of-line leg"); + } + + for (UInt i = 0; i < chunk->IfThenElse.n_phis; i++) { + const HPhiNode* phi_node = &chunk->IfThenElse.phi_nodes[i]; + + merge_vreg_states(chunk, state, cloned, + hregIndex(phi_node->srcFallThrough), + hregIndex(phi_node->srcOutOfLine), + hregIndex(phi_node->dst), phi_node->dst, depth, con); + } + + if (DEBUG_REGALLOC) { + print_state(chunk, state, chunk->next->ii_total_start, depth, con, + "After phi node merge"); + } + } + + /* Merge remaining vreg states. VRegs mentioned by phi nodes are processed + as well but merging is no-op for them now. */ + if (DEBUG_REGALLOC) { print_state(chunk, state, chunk->next->ii_total_start, depth, con, "Before state merge: fall-through leg"); @@ -1645,17 +1703,6 @@ static void stage4_merge_states(RegAllocChunk* chunk, "Before state merge: out-of-line leg"); } - /* Process phi nodes first. */ - for (UInt i = 0; i < chunk->IfThenElse.n_phis; i++) { - const HPhiNode* phi_node = &chunk->IfThenElse.phi_nodes[i]; - - merge_vreg_states(chunk, state, cloned, - hregIndex(phi_node->srcFallThrough), hregIndex(phi_node->srcOutOfLine), - hregIndex(phi_node->dst), phi_node->dst, depth, con); - } - - /* Merge remaining vreg states. VRegs mentioned by phi nodes are processed - as well but merging is no-op for them now. */ for (UInt v_idx = 0; v_idx < state->n_vregs; v_idx++) { merge_vreg_states(chunk, state, cloned, v_idx, v_idx, v_idx, INVALID_HREG, depth, con); |
|
From: Rhys K. <rhy...@so...> - 2017-10-02 01:10:06
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=7221d28cad21d44ebd73750b64229e9457034659 commit 7221d28cad21d44ebd73750b64229e9457034659 Author: Rhys Kidd <rhy...@gm...> Date: Sun Oct 1 20:57:04 2017 -0400 gitignore: Fix up false directory-level .gitignore settings So we never intended to ignore all changes from the top-level down in /include or /cachegrind. Instead allow the filetype-specific .gitignore patterns match to the contents of these two folders. Also, don't ignore changes to include/valgrind.h as it exists in the repository and should be tracked for any changes developers might make. Changes tested by running a git clean force and then full rebuild. No stray build artifacts were being tracked erroneously by git after these changes. Diff: --- .gitignore | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitignore b/.gitignore index ba06188..ec61217 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,6 @@ /autom4te-*.cache /autom4te.cache /bin -/cachegrind /cachegrind.out.* /compile /config.guess @@ -19,7 +18,6 @@ /default.supp /depcomp /glibc-2.X.supp -/include /install-sh /lib /Makefile @@ -694,7 +692,6 @@ /include/Makefile.in /include/Makefile /include/tool.h -/include/valgrind.h /include/vgversion.h # /include/vki/ |
|
From: Rhys K. <rhy...@so...> - 2017-10-02 00:05:23
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=1ce04c35c2ebbc8ea3c2b38ba69daa9dd40cde35 commit 1ce04c35c2ebbc8ea3c2b38ba69daa9dd40cde35 Author: Rhys Kidd <rhy...@gm...> Date: Sun Sep 10 11:34:32 2017 -0400 Preliminary support for Darwin 17.x (macOS 10.13) Diff: --- NEWS | 4 +- README | 6 +- configure.ac | 9 +- coregrind/fixup_macho_loadcmds.c | 3 +- coregrind/m_syswrap/priv_syswrap-darwin.h | 22 +- coregrind/m_syswrap/syswrap-amd64-darwin.c | 3 +- coregrind/m_syswrap/syswrap-darwin.c | 26 +- coregrind/m_syswrap/syswrap-x86-darwin.c | 3 +- darwin17.supp | 768 +++++++++++++++++++++++++++++ include/vki/vki-scnums-darwin.h | 28 ++ 10 files changed, 858 insertions(+), 14 deletions(-) diff --git a/NEWS b/NEWS index 4d6d840..2dfcf32 100644 --- a/NEWS +++ b/NEWS @@ -8,13 +8,15 @@ bug fixes. This release supports X86/Linux, AMD64/Linux, ARM32/Linux, ARM64/Linux, PPC32/Linux, PPC64BE/Linux, PPC64LE/Linux, S390X/Linux, MIPS32/Linux, MIPS64/Linux, ARM/Android, ARM64/Android, MIPS32/Android, X86/Android, -X86/Solaris, AMD64/Solaris and AMD64/MacOSX 10.12. +X86/Solaris, AMD64/Solaris and AMD64/MacOSX 10.12. There is also preliminary +support for X86/macOS 10.13, AMD64/macOS 10.13. * ==================== CORE CHANGES =================== * ================== PLATFORM CHANGES ================= +* Preliminary support for macOS 10.13 has been added. * ==================== TOOL CHANGES ==================== diff --git a/README b/README index 6959707..6f333d3 100644 --- a/README +++ b/README @@ -37,8 +37,8 @@ platforms: - PPC32/Linux - PPC64/Linux - ARM/Linux -- x86/MacOSX -- AMD64/MacOSX +- x86/macOS +- AMD64/macOS - S390X/Linux - MIPS32/Linux - MIPS64/Linux @@ -46,7 +46,7 @@ platforms: - AMD64/Solaris Note that AMD64 is just another name for x86_64, and Valgrind runs fine -on Intel processors. Also note that the core of MacOSX is called +on Intel processors. Also note that the core of macOS is called "Darwin" and this name is used sometimes. Valgrind is licensed under the GNU General Public License, version 2. diff --git a/configure.ac b/configure.ac index fde97ae..483d9bd 100644 --- a/configure.ac +++ b/configure.ac @@ -349,6 +349,7 @@ case "${host_os}" in AC_DEFINE([DARWIN_10_10], 101000, [DARWIN_VERS value for Mac OS X 10.10]) AC_DEFINE([DARWIN_10_11], 101100, [DARWIN_VERS value for Mac OS X 10.11]) AC_DEFINE([DARWIN_10_12], 101200, [DARWIN_VERS value for macOS 10.12]) + AC_DEFINE([DARWIN_10_13], 101300, [DARWIN_VERS value for macOS 10.13]) AC_MSG_CHECKING([for the kernel version]) kernel=`uname -r` @@ -418,9 +419,15 @@ case "${host_os}" in DEFAULT_SUPP="darwin16.supp ${DEFAULT_SUPP}" DEFAULT_SUPP="darwin10-drd.supp ${DEFAULT_SUPP}" ;; + 17.*) + AC_MSG_RESULT([Darwin 17.x (${kernel}) / macOS 10.13 High Sierra]) + AC_DEFINE([DARWIN_VERS], DARWIN_10_13, [Darwin / Mac OS X version]) + DEFAULT_SUPP="darwin17.supp ${DEFAULT_SUPP}" + DEFAULT_SUPP="darwin10-drd.supp ${DEFAULT_SUPP}" + ;; *) AC_MSG_RESULT([unsupported (${kernel})]) - AC_MSG_ERROR([Valgrind works on Darwin 10.x, 11.x, 12.x, 13.x, 14.x, 15.x and 16.x (Mac OS X 10.6/7/8/9/10/11 and macOS 10.12)]) + AC_MSG_ERROR([Valgrind works on Darwin 10.x, 11.x, 12.x, 13.x, 14.x, 15.x, 16.x and 17.x (Mac OS X 10.6/7/8/9/10/11 and macOS 10.12/13)]) ;; esac ;; diff --git a/coregrind/fixup_macho_loadcmds.c b/coregrind/fixup_macho_loadcmds.c index 516b7ba..cdb3622 100644 --- a/coregrind/fixup_macho_loadcmds.c +++ b/coregrind/fixup_macho_loadcmds.c @@ -122,7 +122,8 @@ #if DARWIN_VERS != DARWIN_10_5 && DARWIN_VERS != DARWIN_10_6 \ && DARWIN_VERS != DARWIN_10_7 && DARWIN_VERS != DARWIN_10_8 \ && DARWIN_VERS != DARWIN_10_9 && DARWIN_VERS != DARWIN_10_10 \ - && DARWIN_VERS != DARWIN_10_11 && DARWIN_VERS != DARWIN_10_12 + && DARWIN_VERS != DARWIN_10_11 && DARWIN_VERS != DARWIN_10_12 \ + && DARWIN_VERS != DARWIN_10_13 # error "Unknown DARWIN_VERS value. This file only compiles on Darwin." #endif diff --git a/coregrind/m_syswrap/priv_syswrap-darwin.h b/coregrind/m_syswrap/priv_syswrap-darwin.h index bdefd61..29e491d 100644 --- a/coregrind/m_syswrap/priv_syswrap-darwin.h +++ b/coregrind/m_syswrap/priv_syswrap-darwin.h @@ -248,7 +248,9 @@ DECL_TEMPLATE(darwin, seteuid); // 183 DECL_TEMPLATE(darwin, sigreturn); // 184 DECL_TEMPLATE(darwin, FAKE_SIGRETURN); // NYI chud 185 -// 186 +#if DARWIN_VERS >= DARWIN_10_13 +// NYI thread_selfcounts // 186 +#endif /* DARWIN_VERS >= DARWIN_10_13 */ // 187 // GEN stat 188 // GEN fstat 189 @@ -470,7 +472,9 @@ DECL_TEMPLATE(darwin, __thread_selfid); // 372 #if DARWIN_VERS >= DARWIN_10_11 // NYI kevent_qos // 374 #endif /* DARWIN_VERS >= DARWIN_10_11 */ -// 375 +#if DARWIN_VERS >= DARWIN_10_13 +// NYI kevent_id // 375 +#endif /* DARWIN_VERS >= DARWIN_10_13 */ // 376 // 377 // 378 @@ -634,6 +638,16 @@ DECL_TEMPLATE(darwin, ulock_wake); // 516 // NYI terminate_with_payload // 520 // NYI abort_with_payload // 521 #endif /* DARWIN_VERS >= DARWIN_10_12 */ +#if DARWIN_VERS >= DARWIN_10_13 +// NYI necp_session_open // 522 +// NYI necp_session_action // 523 +// NYI setattrlistat // 524 +// NYI net_qos_guideline // 525 +// NYI fmount // 526 +// NYI ntp_adjtime // 527 +// NYI ntp_gettime // 528 +// NYI os_fault_with_payload // 529 +#endif /* DARWIN_VERS >= DARWIN_10_13 */ // Mach message helpers DECL_TEMPLATE(darwin, mach_port_set_context); @@ -750,6 +764,10 @@ DECL_TEMPLATE(darwin, semaphore_timedwait_signal); DECL_TEMPLATE(darwin, task_for_pid); DECL_TEMPLATE(darwin, pid_for_task); +#if DARWIN_VERS >= DARWIN_10_13 +// NYI thread_get_special_reply_port // 50 +#endif /* DARWIN_VERS >= DARWIN_10_13 */ + #if DARWIN_VERS >= DARWIN_10_12 DECL_TEMPLATE(darwin, host_create_mach_voucher_trap); DECL_TEMPLATE(darwin, task_register_dyld_image_infos); diff --git a/coregrind/m_syswrap/syswrap-amd64-darwin.c b/coregrind/m_syswrap/syswrap-amd64-darwin.c index c827bab..f509329 100644 --- a/coregrind/m_syswrap/syswrap-amd64-darwin.c +++ b/coregrind/m_syswrap/syswrap-amd64-darwin.c @@ -482,7 +482,8 @@ void wqthread_hijack(Addr self, Addr kport, Addr stackaddr, Addr workitem, # elif DARWIN_VERS == DARWIN_10_9 \ || DARWIN_VERS == DARWIN_10_10 \ || DARWIN_VERS == DARWIN_10_11 \ - || DARWIN_VERS == DARWIN_10_12 + || DARWIN_VERS == DARWIN_10_12 \ + || DARWIN_VERS == DARWIN_10_13 UWord magic_delta = 0xE0; # else # error "magic_delta: to be computed on new OS version" diff --git a/coregrind/m_syswrap/syswrap-darwin.c b/coregrind/m_syswrap/syswrap-darwin.c index e8097e3..fe732b2 100644 --- a/coregrind/m_syswrap/syswrap-darwin.c +++ b/coregrind/m_syswrap/syswrap-darwin.c @@ -10575,6 +10575,18 @@ const SyscallTableEntry ML_(syscall_table)[] = { // _____(__NR_terminate_with_payload), // 520 // _____(__NR_abort_with_payload), // 521 #endif +#if DARWIN_VERS >= DARWIN_10_13 +// _____(__NR_thread_selfcounts), // 186 +// _____(__NR_kevent_id, // 375 +// _____(__NR_necp_session_open), // 522 +// _____(__NR_necp_session_action), // 523 +// _____(__NR_setattrlistat), // 524 +// _____(__NR_net_qos_guideline), // 525 +// _____(__NR_fmount), // 526 +// _____(__NR_ntp_adjtime), // 527 +// _____(__NR_ntp_gettime), // 528 +// _____(__NR_os_fault_with_payload), // 529 +#endif // _____(__NR_MAXSYSCALL) MACX_(__NR_DARWIN_FAKE_SIGRETURN, FAKE_SIGRETURN) }; @@ -10698,17 +10710,23 @@ const SyscallTableEntry ML_(mach_trap_table)[] = { #if defined(VGA_x86) // _____(__NR_macx_swapon), // _____(__NR_macx_swapoff), +#else + _____(VG_DARWIN_SYSCALL_CONSTRUCT_MACH(48)), + _____(VG_DARWIN_SYSCALL_CONSTRUCT_MACH(49)), +#endif +#if DARWIN_VERS >= DARWIN_10_13 +// _____(__NR_thread_get_special_reply_port, // 50 +#else _____(VG_DARWIN_SYSCALL_CONSTRUCT_MACH(50)), +#endif /* DARWIN_VERS >= DARWIN_10_13 */ +#if defined(VGA_x86) // _____(__NR_macx_triggers), // _____(__NR_macx_backing_store_suspend), // _____(__NR_macx_backing_store_recovery), #else - _____(VG_DARWIN_SYSCALL_CONSTRUCT_MACH(48)), - _____(VG_DARWIN_SYSCALL_CONSTRUCT_MACH(49)), - _____(VG_DARWIN_SYSCALL_CONSTRUCT_MACH(50)), _____(VG_DARWIN_SYSCALL_CONSTRUCT_MACH(51)), _____(VG_DARWIN_SYSCALL_CONSTRUCT_MACH(52)), - _____(VG_DARWIN_SYSCALL_CONSTRUCT_MACH(53)), + _____(VG_DARWIN_SYSCALL_CONSTRUCT_MACH(53)), #endif _____(VG_DARWIN_SYSCALL_CONSTRUCT_MACH(54)), _____(VG_DARWIN_SYSCALL_CONSTRUCT_MACH(55)), diff --git a/coregrind/m_syswrap/syswrap-x86-darwin.c b/coregrind/m_syswrap/syswrap-x86-darwin.c index dac5c7d..a5d9e97 100644 --- a/coregrind/m_syswrap/syswrap-x86-darwin.c +++ b/coregrind/m_syswrap/syswrap-x86-darwin.c @@ -430,7 +430,8 @@ void wqthread_hijack(Addr self, Addr kport, Addr stackaddr, Addr workitem, # elif DARWIN_VERS == DARWIN_10_9 \ || DARWIN_VERS == DARWIN_10_10 \ || DARWIN_VERS == DARWIN_10_11 \ - || DARWIN_VERS == DARWIN_10_12 + || DARWIN_VERS == DARWIN_10_12 \ + || DARWIN_VERS == DARWIN_10_13 UWord magic_delta = 0xB0; # else # error "magic_delta: to be computed on new OS version" diff --git a/darwin17.supp b/darwin17.supp new file mode 100644 index 0000000..78f3347 --- /dev/null +++ b/darwin17.supp @@ -0,0 +1,768 @@ + +# Suppressions for Darwin 17.x / macOS 10.13 High Sierra + +############################################ +## Leaks. For other stuff see below. + +{ + OSX1013:1-Leak + Memcheck:Leak + match-leak-kinds: possible + fun:malloc_zone_?alloc + ... + fun:_read_images +} + +{ + OSX1013:2-Leak + Memcheck:Leak + match-leak-kinds: definite + fun:malloc_zone_?alloc + ... + fun:_read_images +} + +{ + OSX1013:3-Leak + Memcheck:Leak + match-leak-kinds: definite + fun:malloc_zone_?alloc + fun:recursive_mutex_init + ... + fun:_os_object_init + fun:libdispatch_init + fun:libSystem_initializer + ... +} + +{ + OSX1013:4-Leak + Memcheck:Leak + fun:malloc_zone_?alloc + ... + fun:dyld_register_image_state_change_handler + ... +} + +{ + OSX1013:5-Leak + Memcheck:Leak + match-leak-kinds: reachable + fun:?alloc + ... + fun:dyld_register_image_state_change_handler + ... +} + +{ + OSX1013:6-Leak + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc_zone_?alloc + ... + fun:map_images_nolock + fun:map_2_images + ... +} + +{ + OSX1013:7-Leak + Memcheck:Leak + match-leak-kinds: possible + fun:malloc_zone_?alloc + ... + fun:map_images_nolock + fun:map_2_images + ... +} + +{ + OSX1013:8-Leak + Memcheck:Leak + match-leak-kinds: definite + fun:?alloc + ... + fun:libSystem_initializer + ... +} + +{ + OSX1013:9-Leak + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc_zone_?alloc + ... + fun:libSystem_initializer + ... +} + +{ + OSX1013:10-Leak + Memcheck:Leak + match-leak-kinds: reachable + fun:?alloc + ... + fun:libSystem_initializer + ... +} + +#{ +# OSX1013:11-Leak +# Memcheck:Leak +# match-leak-kinds: definite +# fun:malloc +# fun:currentlocale +#} +# +#{ +# OSX1013:12-Leak +# Memcheck:Leak +# match-leak-kinds: possible +# fun:malloc +# fun:tzsetwall_basic +# fun:_st_tzset_basic +#} + +{ + OSX1013:13-Leak + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc_zone_memalign + ... + fun:_ZN4dyld24initializeMainExecutableEv + ... +} + +{ + OSX1013:14-Leak + Memcheck:Leak + match-leak-kinds: reachable + fun:?alloc + ... + fun:libSystem_atfork_child + ... +} + +{ + OSX1013:15-Leak + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:__smakebuf + ... + fun:printf + ... +} + +{ + OSX1013:16-Leak + Memcheck:Leak + match-leak-kinds: possible + fun:?alloc + ... + fun:_read_images +} + +{ + OSX1013:17-Leak + Memcheck:Leak + match-leak-kinds: reachable + fun:?alloc + ... + fun:_read_images +} + +{ + OSX1013:18-Leak + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc_zone_?alloc + ... + fun:_read_images +} + +{ + OSX1013:19-Leak + Memcheck:Leak + match-leak-kinds: possible + fun:malloc_zone_?alloc + ... + fun:*NX*Map* + fun:*NX*Map* +} + +{ + OSX1013:20-Leak + Memcheck:Leak + match-leak-kinds: indirect + fun:?alloc + ... + fun:libSystem_initializer + ... +} + +{ + OSX1013:21-Leak + Memcheck:Leak + match-leak-kinds: definite + fun:malloc_zone_memalign + ... + fun:_ZN4dyld24initializeMainExecutableEv + ... +} + + +############################################ +## Non-leak errors + +##{ +## OSX1013:CoreFoundation-1 +## Memcheck:Cond +## obj:*CoreFoundation.framework*CoreFoundation* +## obj:*CoreFoundation.framework*CoreFoundation* +## obj:*CoreFoundation.framework*CoreFoundation* +##} + +{ + OSX1013:CoreFoundation-2 + Memcheck:Cond + obj:*CoreFoundation.framework*CoreFoundation* + obj:*libdispatch.dylib* + obj:*libdispatch.dylib* +} + +{ + OSX1013:CoreFoundation-3 + Memcheck:Cond + obj:*CoreFoundation.framework*CoreFoundation* + obj:*CoreFoundation.framework*CoreFoundation* + obj:*Foundation.framework*Foundation* +} + +{ + OSX1013:CoreFoundation-4 + Memcheck:Cond + obj:*CoreFoundation.framework*CoreFoundation* + obj:*CoreFoundation.framework*CoreFoundation* + obj:*SystemConfiguration.framework*SystemConfiguration* +} + +{ + OSX1013:CoreFoundation-5 + Memcheck:Cond + obj:*CoreFoundation.framework*CoreFoundation* + obj:*CoreFoundation.framework*CoreFoundation* + obj:*HIServices.framework*HIServices* +} + +{ + OSX1013:CoreFoundation-6 + Memcheck:Cond + obj:*CoreFoundation.framework*CoreFoundation* + obj:*SystemConfiguration.framework*SystemConfiguration* + obj:*SystemConfiguration.framework*SystemConfiguration* +} + +{ + OSX1013:CoreFoundation-7 + Memcheck:Cond + obj:*CoreFoundation.framework*CoreFoundation* + obj:*SystemConfiguration.framework*SystemConfiguration* + obj:*CoreFoundation.framework*CoreFoundation* +} + +{ + OSX1013:CoreFoundation-8 + Memcheck:Cond + obj:*CoreFoundation.framework*CoreFoundation* + obj:*IOKit.framework*IOKit* +} + +{ + OSX1013:CoreFoundation-9 + Memcheck:Cond + obj:*CoreFoundation.framework*CoreFoundation* + obj:*Foundation.framework*Foundation* + obj:*AppKit.framework*AppKit* +} + +{ + OSX1013:CoreFoundation-10 + Memcheck:Cond + obj:*CoreFoundation.framework*CoreFoundation* + obj:*HIToolbox.framework*HIToolbox* + obj:*HIToolbox.framework*HIToolbox* +} + +{ + OSX1013:CoreFoundation-11 + Memcheck:Cond + obj:*CoreFoundation.framework*CoreFoundation* + obj:*AE.framework*AE* + obj:*AE.framework*AE* +} + +{ + OSX1013:CoreFoundation-12 + Memcheck:Cond + obj:*CoreFoundation.framework*CoreFoundation* + obj:*CoreFoundation.framework*CoreFoundation* + obj:*HIToolbox.framework*HIToolbox* +} + +{ + OSX1013:CoreFoundation-13 + Memcheck:Cond + obj:*CoreFoundation.framework*CoreFoundation* + obj:*CoreFoundation.framework*CoreFoundation* + obj:*AE.framework*AE* +} + +{ + OSX1013:AppKit-1 + Memcheck:Cond + obj:*AppKit.framework*AppKit* + obj:*AppKit.framework*AppKit* + obj:*AppKit.framework*AppKit* +} + +{ + OSX1013:AppKit-2 + Memcheck:Cond + obj:*AppKit.framework*AppKit* + obj:*AppKit.framework*AppKit* + obj:*libdispatch.dylib* +} + +#{ +# OSX1013:AppKit-3 +# Memcheck:Cond +# obj:*AppKit.framework*AppKit* +# obj:*libdispatch.dylib* +# obj:*libdispatch.dylib* +#} + +##{ +## OSX1013:AppKit-4 +## Memcheck:Cond +## obj:*AppKit.framework*AppKit* +## obj:*AppKit.framework*AppKit* +## obj:*CoreFoundation.framework*CoreFoundation* +##} + +{ + OSX1013:AppKit-5 + Memcheck:Cond + obj:*AppKit.framework*AppKit* + obj:*AppKit.framework*AppKit* + obj:*Foundation.framework*Foundation* +} + +{ + OSX1013:AppKit-6 + Memcheck:Cond + obj:*AppKit.framework*AppKit* + obj:*Foundation.framework*Foundation* + obj:*AppKit.framework*AppKit* +} + +{ + OSX1013:AppKit-7 + Memcheck:Cond + obj:*AppKit.framework*AppKit* + obj:*libdispatch.dylib* + obj:*libdispatch.dylib* +} + +{ + OSX1013:AppKit-8 + Memcheck:Cond + obj:*AppKit.framework*AppKit* + obj:*Foundation.framework*Foundation* + obj:*Foundation.framework*Foundation* +} + +{ + OSX1013:ColorSync-1 + Memcheck:Cond + obj:*ColorSync.framework*ColorSync* + obj:*ColorSync.framework*ColorSync* + obj:*ColorSync.framework*ColorSync* +} + +#{ +# OSX1013:ColorSync-2 +# Memcheck:Value8 +# obj:*ColorSync.framework*ColorSync* +# obj:*ColorSync.framework*ColorSync* +# obj:*ColorSync.framework*ColorSync* +#} + +{ + OSX1013:CoreGraphics-1 + Memcheck:Cond + obj:*CoreGraphics.framework*CoreGraphics* + obj:*CoreGraphics.framework*CoreGraphics* +} + +#{ +# OSX1013:CoreGraphics-2 +# Memcheck:Value8 +# obj:*CoreGraphics.framework*CoreGraphics* +# obj:*CoreGraphics.framework*CoreGraphics* +# obj:*CoreGraphics.framework*CoreGraphics* +#} + +{ + OSX1013:CoreGraphics-3 + Memcheck:Cond + obj:*CoreGraphics.framework*CoreGraphics* + obj:*CoreGraphics.framework*libRIP* + obj:*CoreGraphics.framework*libRIP* +} + +#{ +# OSX1013:CoreGraphics-4 +# Memcheck:Cond +# obj:*CoreGraphics.framework*CoreGraphics* +# obj:*CoreGraphics.framework*CoreGraphics* +# obj:*CoreGraphics.framework*libRIP* +#} + +{ + OSX1013:CoreGraphics-5 + Memcheck:Cond + obj:*CoreGraphics.framework*CoreGraphics* + obj:*libdispatch.dylib* + obj:*libdispatch.dylib* +} + +#{ +# OSX1013:CoreGraphics-6 +# Memcheck:Cond +# obj:*CoreGraphics.framework*CoreGraphics* +# obj:*CoreGraphics.framework*CoreGraphics* +# obj:*HIToolbox.framework*HIToolbox* +#} + +{ + OSX1013:HIServices-1 + Memcheck:Cond + obj:*HIServices.framework*HIServices* + obj:*HIToolbox.framework*HIToolbox* + obj:*HIToolbox.framework*HIToolbox* +} + +{ + OSX1013:LaunchServices-1 + Memcheck:Cond + obj:*LaunchServices.framework*LaunchServices* + obj:*AppKit.framework*AppKit* + obj:*AppKit.framework*AppKit* +} + +{ + OSX1013:LaunchServices-2 + Memcheck:Cond + obj:*LaunchServices.framework*LaunchServices* + obj:*libdispatch.dylib* + obj:*libdispatch.dylib* +} + +{ + OSX1013:QuartzCore-1 + Memcheck:Cond + obj:*QuartzCore.framework*QuartzCore + obj:*QuartzCore.framework*QuartzCore + obj:*QuartzCore.framework*QuartzCore +} + +#{ +# OSX1013:vImage-1 +# Memcheck:Cond +# obj:*vImage.framework*vImage* +# obj:*vImage.framework*vImage* +# obj:*CoreGraphics.framework*CoreGraphics* +#} + +{ + OSX1013:zlib-C + Memcheck:Cond + obj:/usr/lib/libz.*dylib + obj:/usr/lib/libz.*dylib +} + +{ + OSX1013:zlib-8 + Memcheck:Value8 + obj:/usr/lib/libz.*dylib + obj:/usr/lib/libz.*dylib +} + +{ + OSX1013:32bit:_libxpc_initializer + Memcheck:Cond + obj:/usr/lib/system/libsystem_c.dylib + obj:/usr/lib/system/libsystem_c.dylib + fun:_libxpc_initializer + obj:/usr/lib/libSystem.B.dylib + fun:*ImageLoaderMachO*doModInitFunctions* +} + +{ + OSX1013:dyld-1 + Memcheck:Cond + fun:*ImageLoader*weakBind* + fun:*ImageLoader*link* + fun:*dyld*link* +} + +{ + OSX1013:dyld-2 + Memcheck:Cond + fun:exit + obj:*libdyld*dylib* +} + +{ + OSX1013:dyld-3 + Memcheck:Cond + fun:bcmp + fun:_ZN16ImageLoaderMachO18validateFirstPagesEPK21linkedit_data_commandiPKhmxRKN11ImageLoader11LinkContextE + ... +} + +{ + OSX1013:dyld-4 + Memcheck:Value8 + fun:bcmp + fun:_ZN16ImageLoaderMachO18validateFirstPagesEPK21linkedit_data_commandiPKhmxRKN11ImageLoader11LinkContextE + ... +} + +{ + OSX1013:dyld-5 + Memcheck:Cond + fun:_ZN16ImageLoaderMachO18validateFirstPagesEPK21linkedit_data_commandiPKhmxRKN11ImageLoader11LinkContextE + fun:_ZN26ImageLoaderMachOCompressed19instantiateFromFileEPKciPKhmyyRK4statjjPK21linkedit_data_commandPK23encryption_info_commandRKN11ImageLoader11LinkContextE + fun:_ZN16ImageLoaderMachO19instantiateFromFileEPKciPKhmyyRK4statRKN11ImageLoader11LinkContextE + fun:_ZN4dyldL10loadPhase6EiRK4statPKcRKNS_11LoadContextE +} + +{ + OSX1013:libsystem_kernel-1 + Memcheck:Cond + obj:*libsystem_kernel*dylib* + obj:*libsystem_kernel*dylib* + obj:*CoreFoundation.framework*CoreFoundation* +} + +{ + OSX1013:CoreServicesInternal-1 + Memcheck:Cond + obj:*CoreServicesInternal.framework*CoreServicesInternal* + obj:*CoreServices.framework*LaunchServices* + obj:*CoreServices.framework*LaunchServices* +} + +{ + OSX1013:CoreServicesInternal-2 + Memcheck:Cond + obj:*CoreServicesInternal.framework*CoreServicesInternal* + obj:*CoreServicesInternal.framework*CoreServicesInternal* + obj:*CoreServicesInternal.framework*CoreServicesInternal* +} + +{ + OSX1013:CoreServicesInternal-3 + Memcheck:Cond + obj:*CoreServicesInternal.framework*CoreServicesInternal* + obj:*CoreServicesInternal.framework*CoreServicesInternal* + obj:*CoreFoundation.framework*CoreFoundation* +} + +{ + OSX1013:CoreServices-1 + Memcheck:Cond + obj:*CoreServices.framework*LaunchServices* + obj:*CoreServices.framework*LaunchServices* + obj:*CoreServices.framework*LaunchServices* +} + +##{ +## OSX1013:libsystem_pthread-1 +## Memcheck:Cond +## obj:*libsystem_pthread*dylib* +## obj:*ImageIO.framework*ImageIO* +## obj:*ImageIO.framework*ImageIO* +##} + +{ + OSX1013:ApplicationServices-1 + Memcheck:Cond + obj:*ApplicationServices.framework*ATS* + obj:*libsystem_pthread*dylib* + obj:*libsystem_platform*dylib* +} + +{ + OSX1013:HIToolbox-1 + Memcheck:Cond + obj:*HIToolbox.framework*HIToolbox* + obj:*HIToolbox.framework*HIToolbox* + obj:*HIToolbox.framework*HIToolbox* +} + +{ + OSX1013:RawCamera-1 + Memcheck:Cond + obj:*RawCamera.bundle*RawCamera* + obj:*libdispatch.dylib* + obj:*libdispatch.dylib* +} + +##{ +## OSX1013:CoreImage-1 +## Memcheck:Cond +## obj:*CoreImage.framework*CoreImage* +## obj:*CoreImage.framework*CoreImage* +## obj:*CoreImage.framework*CoreImage* +##} + +##{ +## OSX1013:strncpy-1 +## Memcheck:Cond +## fun:strncpy +## obj:*CoreServicesInternal.framework*CoreServicesInternal +## obj:*CoreServicesInternal.framework*CoreServicesInternal +##} + +{ + OSX1013:pthread_rwlock_init + Memcheck:Cond + fun:pthread_rwlock_init + obj:*ImageIO.framework*ImageIO* + obj:*ImageIO.framework*ImageIO* +} + +{ + OSX1013:CFBasicHash + Memcheck:Value8 + fun:*CFBasicHash* + fun:*CF* +} + +{ + OSX1013:AppleIntelHD4000GraphicsGLDriver-1-Addr8 + Memcheck:Addr8 + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* +} +{ + OSX1013:AppleIntelHD4000GraphicsGLDriver-1-Addr8 + Memcheck:Addr4 + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* +} + +{ + OSX1013:AppleIntelHD4000GraphicsGLDriver-2-Addr8 + Memcheck:Addr8 + fun:*platform_memmove* + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* +} +{ + OSX1013:AppleIntelHD4000GraphicsGLDriver-2-Addr2 + Memcheck:Addr2 + fun:*platform_memmove* + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* +} + +{ + OSX1013:AppleIntelHD4000GraphicsGLDriver-3-Addr8 + Memcheck:Addr8 + fun:*platform_memmove* + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* + obj:*GLEngine.bundle*GLEngine* +} +{ + OSX1013:AppleIntelHD4000GraphicsGLDriver-3-Addr2 + Memcheck:Addr2 + fun:*platform_memmove* + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* + obj:*GLEngine.bundle*GLEngine* +} +{ + OSX1013:AppleIntelHD4000GraphicsGLDriver-3-Addr1 + Memcheck:Addr1 + fun:*platform_memmove* + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* + obj:*GLEngine.bundle*GLEngine* +} + +{ + OSX1013:AppleIntelHD4000GraphicsGLDriver-4 + Memcheck:Addr8 + fun:*platform_bzero* + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* +} + +{ + OSX1013:AppleIntelHD4000GraphicsGLDriver-6-Addr8 + Memcheck:Addr8 + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* + obj:*GLEngine.bundle*GLEngine* +} +{ + OSX1013:AppleIntelHD4000GraphicsGLDriver-6-Addr4 + Memcheck:Addr4 + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* + obj:*GLEngine.bundle*GLEngine* +} + +{ + OSX1013:AppleIntelHD4000GraphicsGLDriver-7 + Memcheck:Addr4 + obj:*AppleIntelHD4000GraphicsGLDriver.bundle*AppleIntelHD4000GraphicsGLDriver* + obj:*GLEngine.bundle*GLEngine* +} + +{ + OSX1013:OSAtomicAdd32 + Memcheck:Addr4 + fun:*OSAtomicAdd32* + obj:*IOAccelerator.framework*IOAccelerator* + obj:*GPUSupport.framework*GPUSupportMercury* +} + +{ + OSX1013:IOAccelerator-1 + Memcheck:Addr4 + obj:*IOAccelerator.framework*IOAccelerator* + obj:*GPUSupport.framework*GPUSupportMercury* +} + +# See https://bugs.kde.org/show_bug.cgi?id=188572 about this; it's +# unavoidable due to BSD setenv() semantics. +{ + macos-__setenv-leak-see-our-bug-188572 + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + fun:_owned_ptr_alloc + fun:setenv +} + +# See https://bugs.kde.org/show_bug.cgi?id=196528 +{ +macos-__pthread_rwlock_init-see-our-bug-196528 +Memcheck:Cond +fun:pthread_rwlock_init +} diff --git a/include/vki/vki-scnums-darwin.h b/include/vki/vki-scnums-darwin.h index 961e335..d600ed3 100644 --- a/include/vki/vki-scnums-darwin.h +++ b/include/vki/vki-scnums-darwin.h @@ -191,6 +191,13 @@ #if defined(VGA_x86) #define __NR_macx_swapon VG_DARWIN_SYSCALL_CONSTRUCT_MACH(48) #define __NR_macx_swapoff VG_DARWIN_SYSCALL_CONSTRUCT_MACH(49) +#endif + +#if DARWIN_VERS >= DARWIN_10_13 +#define __NR_thread_get_special_reply_port VG_DARWIN_SYSCALL_CONSTRUCT_MACH(50) +#endif /* DARWIN_VERS >= DARWIN_10_13 */ + +#if defined(VGA_x86) #define __NR_macx_triggers VG_DARWIN_SYSCALL_CONSTRUCT_MACH(51) #define __NR_macx_backing_store_suspend VG_DARWIN_SYSCALL_CONSTRUCT_MACH(52) #define __NR_macx_backing_store_recovery VG_DARWIN_SYSCALL_CONSTRUCT_MACH(53) @@ -409,7 +416,11 @@ #define __NR_seteuid VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(183) #define __NR_sigreturn VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(184) #define __NR_chud VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(185) +#if DARWIN_VERS >= DARWIN_10_13 +#define __NR_thread_selfcounts VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(186) +#else /* 186 */ +#endif /* DARWIN_VERS >= DARWIN_10_13 */ #if DARWIN_VERS >= DARWIN_10_6 #define __NR_fdatasync VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(187) #else @@ -654,7 +665,11 @@ #if DARWIN_VERS >= DARWIN_10_11 #define __NR_kevent_qos VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(374) #endif /* DARWIN_VERS >= DARWIN_10_11 */ +#if DARWIN_VERS >= DARWIN_10_13 +#define __NR_kevent_id VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(375) +#else /* 375 */ +#endif /* DARWIN_VERS >= DARWIN_10_13 */ /* 376 */ /* 377 */ /* 378 */ @@ -810,6 +825,17 @@ #define __NR_abort_with_payload VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(521) #endif /* DARWIN_VERS >= DARWIN_10_12 */ +#if DARWIN_VERS >= DARWIN_10_13 +#define __NR_necp_session_open VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(522) +#define __NR_necp_session_action VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(523) +#define __NR_setattrlistat VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(524) +#define __NR_net_qos_guideline VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(525) +#define __NR_fmount VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(526) +#define __NR_ntp_adjtime VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(527) +#define __NR_ntp_gettime VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(528) +#define __NR_os_fault_with_payload VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(529) +#endif /* DARWIN_VERS >= DARWIN_10_13 */ + #if DARWIN_VERS < DARWIN_10_6 #define __NR_MAXSYSCALL VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(427) #elif DARWIN_VERS < DARWIN_10_7 @@ -824,6 +850,8 @@ #define __NR_MAXSYSCALL VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(500) #elif DARWIN_VERS == DARWIN_10_12 #define __NR_MAXSYSCALL VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(522) +#elif DARWIN_VERS == DARWIN_10_13 +#define __NR_MAXSYSCALL VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(530) #else #error unknown darwin version #endif |
|
From: Rhys K. <rhy...@so...> - 2017-10-01 23:10:06
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=ed6ad13bc8f2b33c493a72db9915f3681002e8d0 commit ed6ad13bc8f2b33c493a72db9915f3681002e8d0 Author: Rhys Kidd <rhy...@gm...> Date: Sun Oct 1 18:56:05 2017 -0400 Fix missing workq_ops operations (macOS) Related to discussion in bz#383723. Patch based upon one provided by Andy Maloney. Diff: --- NEWS | 1 + coregrind/m_syswrap/syswrap-darwin.c | 69 +++++++++++++++++++++++++++--------- include/vki/vki-darwin.h | 21 ++++++----- 3 files changed, 67 insertions(+), 24 deletions(-) diff --git a/NEWS b/NEWS index eccbd19..4d6d840 100644 --- a/NEWS +++ b/NEWS @@ -55,6 +55,7 @@ where XXXXXX is the bug number as listed below. 384096 Mention AddrCheck at Memcheck's command line option --undef-value-errors=no 384526 reduce number of spill instructions generated by VEX register allocator v3 384584 Callee saved registers listed first for AMD64, X86, and PPC architectures +n-i-bz Fix missing workq_ops operations (macOS) Release 3.13.0 (15 June 2017) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/coregrind/m_syswrap/syswrap-darwin.c b/coregrind/m_syswrap/syswrap-darwin.c index 0670824..e8097e3 100644 --- a/coregrind/m_syswrap/syswrap-darwin.c +++ b/coregrind/m_syswrap/syswrap-darwin.c @@ -833,6 +833,7 @@ Bool ML_(sync_mappings)(const HChar* when, const HChar* where, UWord num) Bool where_iuct = STREQ(where, "iokit_user_client_trap"); Bool where_MwcN = STREQ(where, "ML_(wqthread_continue_NORETURN)"); Bool where_woQR = STREQ(where, "workq_ops(QUEUE_REQTHREADS)"); + Bool where_woQ2 = STREQ(where, "workq_ops(QUEUE_REQTHREADS2)"); Bool where_woTR = STREQ(where, "workq_ops(THREAD_RETURN)"); Bool where_ke64 = STREQ(where, "kevent64"); # undef STREQ @@ -840,8 +841,8 @@ Bool ML_(sync_mappings)(const HChar* when, const HChar* where, UWord num) vg_assert( 1 >= ( (where_mmr ? 1 : 0) + (where_mmrU ? 1 : 0) + (where_iuct ? 1 : 0) + (where_MwcN ? 1 : 0) - + (where_woQR ? 1 : 0) + (where_woTR ? 1 : 0) - + (where_ke64 ? 1 : 0) + + (where_woQR ? 1 : 0) + (where_woQ2 ? 1 : 0) + + (where_woTR ? 1 : 0) + (where_ke64 ? 1 : 0) )); // merely to stop gcc complaining of non-use in the case where // there's no filter: @@ -892,6 +893,11 @@ Bool ML_(sync_mappings)(const HChar* when, const HChar* where, UWord num) // upd 14434 diff 102+,0- check = CheckEvery20; } +/* if (when_after && where_woQ2 && num == 0x00000000) { + // after workq_ops(QUEUE_REQTHREADS2) 0x00000000 + // upd XXXX diff XX+,0- + check = CheckEvery20; + } */ else if (when_after && where_woTR && num == 0x00000000) { // after workq_ops(THREAD_RETURN) 0x00000000 @@ -954,6 +960,11 @@ Bool ML_(sync_mappings)(const HChar* when, const HChar* where, UWord num) // upd 1099 diff 37+,0- check = CheckEvery20; } +/* if (when_after && where_woQ2 && num == 0x00000000) { + // after workq_ops(QUEUE_REQTHREADS2) 0x00000000 + // upd XXXX diff XX+,0- + check = CheckEvery20; + } */ else if (when_after && where_woTR && num == 0x00000000) { // after workq_ops(THREAD_RETURN) 0x00000000 @@ -2089,12 +2100,17 @@ PRE(workq_open) static const HChar *workqop_name(int op) { switch (op) { - case VKI_WQOPS_QUEUE_ADD: return "QUEUE_ADD"; - case VKI_WQOPS_QUEUE_REMOVE: return "QUEUE_REMOVE"; - case VKI_WQOPS_THREAD_RETURN: return "THREAD_RETURN"; - case VKI_WQOPS_THREAD_SETCONC: return "THREAD_SETCONC"; - case VKI_WQOPS_QUEUE_NEWSPISUPP: return "QUEUE_NEWSPISUPP"; - case VKI_WQOPS_QUEUE_REQTHREADS: return "QUEUE_REQTHREADS"; + case VKI_WQOPS_QUEUE_ADD: return "QUEUE_ADD"; + case VKI_WQOPS_QUEUE_REMOVE: return "QUEUE_REMOVE"; + case VKI_WQOPS_THREAD_RETURN: return "THREAD_RETURN"; + case VKI_WQOPS_THREAD_SETCONC: return "THREAD_SETCONC"; + case VKI_WQOPS_QUEUE_NEWSPISUPP: return "QUEUE_NEWSPISUPP"; + case VKI_WQOPS_QUEUE_REQTHREADS: return "QUEUE_REQTHREADS"; + case VKI_WQOPS_QUEUE_REQTHREADS2: return "QUEUE_REQTHREADS2"; + case VKI_WQOPS_THREAD_KEVENT_RETURN: return "THREAD_KEVENT_RETURN"; + case VKI_WQOPS_SET_EVENT_MANAGER_PRIORITY: return "SET_EVENT_MANAGER_PRIORITY"; + case VKI_WQOPS_THREAD_WORKLOOP_RETURN: return "THREAD_WORKLOOP_RETURN"; + case VKI_WQOPS_SHOULD_NARROW: return "SHOULD_NARROW"; default: return "?"; } } @@ -2113,14 +2129,6 @@ PRE(workq_ops) // GrP fixme need anything here? // GrP fixme may block? break; - case VKI_WQOPS_QUEUE_NEWSPISUPP: - // JRS don't think we need to do anything here -- this just checks - // whether some newer functionality is supported - break; - case VKI_WQOPS_QUEUE_REQTHREADS: - // JRS uh, looks like it queues up a bunch of threads, or some such? - *flags |= SfMayBlock; // the kernel sources take a spinlock, so play safe - break; case VKI_WQOPS_THREAD_RETURN: { // The interesting case. The kernel will do one of two things: // 1. Return normally. We continue; libc proceeds to stop the thread. @@ -2137,6 +2145,32 @@ PRE(workq_ops) *flags |= SfMayBlock; // GrP fixme true? break; } + case VKI_WQOPS_THREAD_SETCONC: + // RK fixme need anything here? + // RK fixme may block? + break; + case VKI_WQOPS_QUEUE_NEWSPISUPP: + // JRS don't think we need to do anything here -- this just checks + // whether some newer functionality is supported + break; + case VKI_WQOPS_QUEUE_REQTHREADS: + case VKI_WQOPS_QUEUE_REQTHREADS2: + // JRS uh, looks like it queues up a bunch of threads, or some such? + *flags |= SfMayBlock; // the kernel sources take a spinlock, so play safe + break; + case VKI_WQOPS_THREAD_KEVENT_RETURN: + // RK fixme need anything here? + // perhaps similar to VKI_WQOPS_THREAD_RETURN above? + break; + case VKI_WQOPS_SET_EVENT_MANAGER_PRIORITY: + // RK fixme this just sets scheduling priorities - don't think we need + // to do anything here + break; + case VKI_WQOPS_THREAD_WORKLOOP_RETURN: + case VKI_WQOPS_SHOULD_NARROW: + // RK fixme need anything here? + // RK fixme may block? + break; default: VG_(printf)("UNKNOWN workq_ops option %ld\n", ARG1); break; @@ -2153,6 +2187,9 @@ POST(workq_ops) case VKI_WQOPS_QUEUE_REQTHREADS: ML_(sync_mappings)("after", "workq_ops(QUEUE_REQTHREADS)", 0); break; + case VKI_WQOPS_QUEUE_REQTHREADS2: + ML_(sync_mappings)("after", "workq_ops(QUEUE_REQTHREADS2)", 0); + break; default: break; } diff --git a/include/vki/vki-darwin.h b/include/vki/vki-darwin.h index 72b66bf..53d14f3 100644 --- a/include/vki/vki-darwin.h +++ b/include/vki/vki-darwin.h @@ -847,14 +847,19 @@ struct ByteRangeLockPB2 #define VKI_FSIOC_SYNC_VOLUME _IOW('A', 1, uint32_t) -// Libc/pthreads/pthread.c - -#define VKI_WQOPS_QUEUE_ADD 1 -#define VKI_WQOPS_QUEUE_REMOVE 2 -#define VKI_WQOPS_THREAD_RETURN 4 -#define VKI_WQOPS_THREAD_SETCONC 8 -#define VKI_WQOPS_QUEUE_NEWSPISUPP 16 /* check for newer SPI support */ -#define VKI_WQOPS_QUEUE_REQTHREADS 32 /* request number of threads of a prio */ +// libpthread/kern/workqueue_internal.h + +#define VKI_WQOPS_QUEUE_ADD 1 +#define VKI_WQOPS_QUEUE_REMOVE 2 +#define VKI_WQOPS_THREAD_RETURN 4 /* parks the thread back into the kernel */ +#define VKI_WQOPS_THREAD_SETCONC 8 +#define VKI_WQOPS_QUEUE_NEWSPISUPP 16 /* check for newer SPI support */ +#define VKI_WQOPS_QUEUE_REQTHREADS 32 /* request number of threads of a prio */ +#define VKI_WQOPS_QUEUE_REQTHREADS2 48 /* request a number of threads in a given priority bucket */ +#define VKI_WQOPS_THREAD_KEVENT_RETURN 64 /* parks the thread after delivering the passed kevent array */ +#define VKI_WQOPS_SET_EVENT_MANAGER_PRIORITY 128 /* max() in the provided priority in the the priority of the event manager */ +#define VKI_WQOPS_THREAD_WORKLOOP_RETURN 256 /* parks the thread after delivering the passed kevent array */ +#define VKI_WQOPS_SHOULD_NARROW 512 /* checks whether we should narrow our concurrency */ #include <sys/ttycom.h> |
|
From: Ivo R. <ir...@so...> - 2017-10-01 05:13:06
|
https://sourceware.org/git/gitweb.cgi?p=valgrind.git;h=33053442153eabff2170bf5c862c57c10bc69071 commit 33053442153eabff2170bf5c862c57c10bc69071 Author: Ivo Raisr <iv...@iv...> Date: Sun Oct 1 07:12:05 2017 +0200 Register allocator: fix merging of Unallocated/Spilled vreg. Spilled vreg has no associated rreg to free. Diff: --- VEX/priv/host_generic_reg_alloc3.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/VEX/priv/host_generic_reg_alloc3.c b/VEX/priv/host_generic_reg_alloc3.c index 899fefe..81cf5c5 100644 --- a/VEX/priv/host_generic_reg_alloc3.c +++ b/VEX/priv/host_generic_reg_alloc3.c @@ -569,7 +569,7 @@ static inline HReg find_vreg_to_spill( incoming instruction stream as possible. An ideal rreg candidate is a callee-save register because it won't be used for parameter passing around helper function calls. */ -static Bool find_free_rreg( +static inline Bool find_free_rreg( const RegAllocChunk* chunk, RegAllocState* state, Short ii_chunk_current, HRegClass target_hregclass, Bool reserve_phase, const RegAllocControl* con, UInt* r_idx_found) @@ -1541,8 +1541,7 @@ static void merge_vreg_states(RegAllocChunk* chunk, case Unallocated: /* Good. Nothing to do. */ break; - case Assigned: /* fall through */ - case Spilled: + case Assigned: /* Should be dead by now. */ vassert(v2_src_state->dead_before <= chunk->next->ii_total_start); @@ -1550,6 +1549,12 @@ static void merge_vreg_states(RegAllocChunk* chunk, FREE_VREG(v2_src_state); FREE_RREG(&state2->rregs[hregIndex(rreg2)]); break; + case Spilled: + /* Should be dead by now. */ + vassert(v2_src_state->dead_before <= chunk->next->ii_total_start); + + FREE_VREG(v2_src_state); + break; default: vassert(0); } |