You can subscribe to this list here.
| 2002 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
(1) |
Oct
(122) |
Nov
(152) |
Dec
(69) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2003 |
Jan
(6) |
Feb
(25) |
Mar
(73) |
Apr
(82) |
May
(24) |
Jun
(25) |
Jul
(10) |
Aug
(11) |
Sep
(10) |
Oct
(54) |
Nov
(203) |
Dec
(182) |
| 2004 |
Jan
(307) |
Feb
(305) |
Mar
(430) |
Apr
(312) |
May
(187) |
Jun
(342) |
Jul
(487) |
Aug
(637) |
Sep
(336) |
Oct
(373) |
Nov
(441) |
Dec
(210) |
| 2005 |
Jan
(385) |
Feb
(480) |
Mar
(636) |
Apr
(544) |
May
(679) |
Jun
(625) |
Jul
(810) |
Aug
(838) |
Sep
(634) |
Oct
(521) |
Nov
(965) |
Dec
(543) |
| 2006 |
Jan
(494) |
Feb
(431) |
Mar
(546) |
Apr
(411) |
May
(406) |
Jun
(322) |
Jul
(256) |
Aug
(401) |
Sep
(345) |
Oct
(542) |
Nov
(308) |
Dec
(481) |
| 2007 |
Jan
(427) |
Feb
(326) |
Mar
(367) |
Apr
(255) |
May
(244) |
Jun
(204) |
Jul
(223) |
Aug
(231) |
Sep
(354) |
Oct
(374) |
Nov
(497) |
Dec
(362) |
| 2008 |
Jan
(322) |
Feb
(482) |
Mar
(658) |
Apr
(422) |
May
(476) |
Jun
(396) |
Jul
(455) |
Aug
(267) |
Sep
(280) |
Oct
(253) |
Nov
(232) |
Dec
(304) |
| 2009 |
Jan
(486) |
Feb
(470) |
Mar
(458) |
Apr
(423) |
May
(696) |
Jun
(461) |
Jul
(551) |
Aug
(575) |
Sep
(134) |
Oct
(110) |
Nov
(157) |
Dec
(102) |
| 2010 |
Jan
(226) |
Feb
(86) |
Mar
(147) |
Apr
(117) |
May
(107) |
Jun
(203) |
Jul
(193) |
Aug
(238) |
Sep
(300) |
Oct
(246) |
Nov
(23) |
Dec
(75) |
| 2011 |
Jan
(133) |
Feb
(195) |
Mar
(315) |
Apr
(200) |
May
(267) |
Jun
(293) |
Jul
(353) |
Aug
(237) |
Sep
(278) |
Oct
(611) |
Nov
(274) |
Dec
(260) |
| 2012 |
Jan
(303) |
Feb
(391) |
Mar
(417) |
Apr
(441) |
May
(488) |
Jun
(655) |
Jul
(590) |
Aug
(610) |
Sep
(526) |
Oct
(478) |
Nov
(359) |
Dec
(372) |
| 2013 |
Jan
(467) |
Feb
(226) |
Mar
(391) |
Apr
(281) |
May
(299) |
Jun
(252) |
Jul
(311) |
Aug
(352) |
Sep
(481) |
Oct
(571) |
Nov
(222) |
Dec
(231) |
| 2014 |
Jan
(185) |
Feb
(329) |
Mar
(245) |
Apr
(238) |
May
(281) |
Jun
(399) |
Jul
(382) |
Aug
(500) |
Sep
(579) |
Oct
(435) |
Nov
(487) |
Dec
(256) |
| 2015 |
Jan
(338) |
Feb
(357) |
Mar
(330) |
Apr
(294) |
May
(191) |
Jun
(108) |
Jul
(142) |
Aug
(261) |
Sep
(190) |
Oct
(54) |
Nov
(83) |
Dec
(22) |
| 2016 |
Jan
(49) |
Feb
(89) |
Mar
(33) |
Apr
(50) |
May
(27) |
Jun
(34) |
Jul
(53) |
Aug
(53) |
Sep
(98) |
Oct
(206) |
Nov
(93) |
Dec
(53) |
| 2017 |
Jan
(65) |
Feb
(82) |
Mar
(102) |
Apr
(86) |
May
(187) |
Jun
(67) |
Jul
(23) |
Aug
(93) |
Sep
(65) |
Oct
(45) |
Nov
(35) |
Dec
(17) |
| 2018 |
Jan
(26) |
Feb
(35) |
Mar
(38) |
Apr
(32) |
May
(8) |
Jun
(43) |
Jul
(27) |
Aug
(30) |
Sep
(43) |
Oct
(42) |
Nov
(38) |
Dec
(67) |
| 2019 |
Jan
(32) |
Feb
(37) |
Mar
(53) |
Apr
(64) |
May
(49) |
Jun
(18) |
Jul
(14) |
Aug
(53) |
Sep
(25) |
Oct
(30) |
Nov
(49) |
Dec
(31) |
| 2020 |
Jan
(87) |
Feb
(45) |
Mar
(37) |
Apr
(51) |
May
(99) |
Jun
(36) |
Jul
(11) |
Aug
(14) |
Sep
(20) |
Oct
(24) |
Nov
(40) |
Dec
(23) |
| 2021 |
Jan
(14) |
Feb
(53) |
Mar
(85) |
Apr
(15) |
May
(19) |
Jun
(3) |
Jul
(14) |
Aug
(1) |
Sep
(57) |
Oct
(73) |
Nov
(56) |
Dec
(22) |
| 2022 |
Jan
(3) |
Feb
(22) |
Mar
(6) |
Apr
(55) |
May
(46) |
Jun
(39) |
Jul
(15) |
Aug
(9) |
Sep
(11) |
Oct
(34) |
Nov
(20) |
Dec
(36) |
| 2023 |
Jan
(79) |
Feb
(41) |
Mar
(99) |
Apr
(169) |
May
(48) |
Jun
(16) |
Jul
(16) |
Aug
(57) |
Sep
(32) |
Oct
|
Nov
|
Dec
|
| S | M | T | W | T | F | S |
|---|---|---|---|---|---|---|
|
|
|
|
1
(30) |
2
(8) |
3
(5) |
4
(5) |
|
5
(3) |
6
(9) |
7
(5) |
8
(14) |
9
(17) |
10
(27) |
11
(10) |
|
12
(6) |
13
(10) |
14
(7) |
15
(16) |
16
(9) |
17
(14) |
18
(8) |
|
19
(5) |
20
(13) |
21
(21) |
22
(13) |
23
(4) |
24
(1) |
25
(4) |
|
26
(2) |
27
(7) |
28
(4) |
29
(5) |
30
(12) |
|
|
|
From: <sv...@va...> - 2015-04-17 23:42:48
|
Author: carll
Date: Sat Apr 18 00:42:40 2015
New Revision: 3137
Log:
Add support for the lbarx, lharx, stbcx and sthcs instructions.
The instructions are part of the ISA 2.06 but were not implemented
in all versions of hardware. The four instructions are all supported
in ISA 2.07. The instructions were put under the ISA 2.07 category
of supported instructions in this patch.
The bugzilla for this issue is 346324.
Modified:
trunk/priv/guest_ppc_toIR.c
trunk/priv/host_ppc_defs.c
trunk/priv/host_ppc_isel.c
Modified: trunk/priv/guest_ppc_toIR.c
==============================================================================
--- trunk/priv/guest_ppc_toIR.c (original)
+++ trunk/priv/guest_ppc_toIR.c Sat Apr 18 00:42:40 2015
@@ -1729,7 +1729,7 @@
restart of the current insn. */
static void gen_SIGBUS_if_misaligned ( IRTemp addr, UChar align )
{
- vassert(align == 4 || align == 8 || align == 16);
+ vassert(align == 2 || align == 4 || align == 8 || align == 16);
if (mode64) {
vassert(typeOfIRTemp(irsb->tyenv, addr) == Ity_I64);
stmt(
@@ -6292,6 +6292,41 @@
break;
}
+ case 0x034: { // lbarx (Load Word and Reserve Indexed)
+ IRTemp res;
+ /* According to the PowerPC ISA version 2.05, b0 (called EH
+ in the documentation) is merely a hint bit to the
+ hardware, I think as to whether or not contention is
+ likely. So we can just ignore it. */
+ DIP("lbarx r%u,r%u,r%u,EH=%u\n", rD_addr, rA_addr, rB_addr, (UInt)b0);
+
+ // and actually do the load
+ res = newTemp(Ity_I8);
+ stmt( stmt_load(res, mkexpr(EA), NULL/*this is a load*/) );
+
+ putIReg( rD_addr, mkWidenFrom8(ty, mkexpr(res), False) );
+ break;
+ }
+
+ case 0x074: { // lharx (Load Word and Reserve Indexed)
+ IRTemp res;
+ /* According to the PowerPC ISA version 2.05, b0 (called EH
+ in the documentation) is merely a hint bit to the
+ hardware, I think as to whether or not contention is
+ likely. So we can just ignore it. */
+ DIP("lharx r%u,r%u,r%u,EH=%u\n", rD_addr, rA_addr, rB_addr, (UInt)b0);
+
+ // trap if misaligned
+ gen_SIGBUS_if_misaligned( EA, 2 );
+
+ // and actually do the load
+ res = newTemp(Ity_I16);
+ stmt( stmt_load(res, mkexpr(EA), NULL/*this is a load*/) );
+
+ putIReg( rD_addr, mkWidenFrom16(ty, mkexpr(res), False) );
+ break;
+ }
+
case 0x096: {
// stwcx. (Store Word Conditional Indexed, PPC32 p532)
// Note this has to handle stwcx. in both 32- and 64-bit modes,
@@ -6326,6 +6361,71 @@
break;
}
+ case 0x2B6: {
+ // stbcx. (Store Byte Conditional Indexed)
+ // Note this has to handle stbcx. in both 32- and 64-bit modes,
+ // so isn't quite as straightforward as it might otherwise be.
+ IRTemp rS = newTemp(Ity_I8);
+ IRTemp resSC;
+ if (b0 != 1) {
+ vex_printf("dis_memsync(ppc)(stbcx.,b0)\n");
+ return False;
+ }
+ DIP("stbcx. r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+
+ // Get the data to be stored, and narrow to 32 bits if necessary
+ assign( rS, mkNarrowTo8(ty, getIReg(rS_addr)) );
+
+ // Do the store, and get success/failure bit into resSC
+ resSC = newTemp(Ity_I1);
+ stmt( stmt_load( resSC, mkexpr(EA), mkexpr(rS)) );
+
+ // Set CR0[LT GT EQ S0] = 0b000 || XER[SO] on failure
+ // Set CR0[LT GT EQ S0] = 0b001 || XER[SO] on success
+ putCR321(0, binop(Iop_Shl8, unop(Iop_1Uto8, mkexpr(resSC)), mkU8(1)));
+ putCR0(0, getXER_SO());
+
+ /* Note:
+ If resaddr != lbarx_resaddr, CR0[EQ] is undefined, and
+ whether rS is stored is dependent on that value. */
+ /* So I guess we can just ignore this case? */
+ break;
+ }
+
+ case 0x2D6: {
+ // sthcx. (Store Word Conditional Indexed, PPC32 p532)
+ // Note this has to handle sthcx. in both 32- and 64-bit modes,
+ // so isn't quite as straightforward as it might otherwise be.
+ IRTemp rS = newTemp(Ity_I16);
+ IRTemp resSC;
+ if (b0 != 1) {
+ vex_printf("dis_memsync(ppc)(stwcx.,b0)\n");
+ return False;
+ }
+ DIP("sthcx. r%u,r%u,r%u\n", rS_addr, rA_addr, rB_addr);
+
+ // trap if misaligned
+ gen_SIGBUS_if_misaligned( EA, 2 );
+
+ // Get the data to be stored, and narrow to 16 bits if necessary
+ assign( rS, mkNarrowTo16(ty, getIReg(rS_addr)) );
+
+ // Do the store, and get success/failure bit into resSC
+ resSC = newTemp(Ity_I1);
+ stmt( stmt_load( resSC, mkexpr(EA), mkexpr(rS)) );
+
+ // Set CR0[LT GT EQ S0] = 0b000 || XER[SO] on failure
+ // Set CR0[LT GT EQ S0] = 0b001 || XER[SO] on success
+ putCR321(0, binop(Iop_Shl8, unop(Iop_1Uto8, mkexpr(resSC)), mkU8(1)));
+ putCR0(0, getXER_SO());
+
+ /* Note:
+ If resaddr != lharx_resaddr, CR0[EQ] is undefined, and
+ whether rS is stored is dependent on that value. */
+ /* So I guess we can just ignore this case? */
+ break;
+ }
+
case 0x256: // sync (Synchronize, PPC32 p543),
// also lwsync (L==1), ptesync (L==2)
/* http://sources.redhat.com/ml/binutils/2000-12/msg00311.html
@@ -19668,6 +19768,12 @@
}
/* Memory Synchronization Instructions */
+ case 0x034: case 0x074: // lbarx, lharx
+ case 0x2B6: case 0x2D6: // stbcx, sthcx
+ if (!allow_isa_2_07) goto decode_noP8;
+ if (dis_memsync( theInstr )) goto decode_success;
+ goto decode_failure;
+
case 0x356: case 0x014: case 0x096: // eieio, lwarx, stwcx.
case 0x256: // sync
if (dis_memsync( theInstr )) goto decode_success;
Modified: trunk/priv/host_ppc_defs.c
==============================================================================
--- trunk/priv/host_ppc_defs.c (original)
+++ trunk/priv/host_ppc_defs.c Sat Apr 18 00:42:40 2015
@@ -861,7 +861,7 @@
i->Pin.LoadL.sz = sz;
i->Pin.LoadL.src = src;
i->Pin.LoadL.dst = dst;
- vassert(sz == 4 || sz == 8);
+ vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
if (sz == 8) vassert(mode64);
return i;
}
@@ -882,7 +882,7 @@
i->Pin.StoreC.sz = sz;
i->Pin.StoreC.src = src;
i->Pin.StoreC.dst = dst;
- vassert(sz == 4 || sz == 8);
+ vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
if (sz == 8) vassert(mode64);
return i;
}
@@ -1644,12 +1644,15 @@
ppPPCAMode(i->Pin.Load.src);
return;
}
- case Pin_LoadL:
- vex_printf("l%carx ", i->Pin.LoadL.sz==4 ? 'w' : 'd');
+ case Pin_LoadL: {
+ UChar sz = i->Pin.LoadL.sz;
+ HChar c_sz = sz==1 ? 'b' : sz==2 ? 'h' : sz==4 ? 'w' : 'd';
+ vex_printf("l%carx ", c_sz);
ppHRegPPC(i->Pin.LoadL.dst);
vex_printf(",%%r0,");
ppHRegPPC(i->Pin.LoadL.src);
return;
+ }
case Pin_Store: {
UChar sz = i->Pin.Store.sz;
Bool idxd = toBool(i->Pin.Store.dst->tag == Pam_RR);
@@ -1660,12 +1663,15 @@
ppPPCAMode(i->Pin.Store.dst);
return;
}
- case Pin_StoreC:
- vex_printf("st%ccx. ", i->Pin.StoreC.sz==4 ? 'w' : 'd');
+ case Pin_StoreC: {
+ UChar sz = i->Pin.StoreC.sz;
+ HChar c_sz = sz==1 ? 'b' : sz==2 ? 'h' : sz==4 ? 'w' : 'd';
+ vex_printf("st%ccx. ", c_sz);
ppHRegPPC(i->Pin.StoreC.src);
vex_printf(",%%r0,");
ppHRegPPC(i->Pin.StoreC.dst);
return;
+ }
case Pin_Set: {
PPCCondCode cc = i->Pin.Set.cond;
vex_printf("set (%s),", showPPCCondCode(cc));
@@ -4399,6 +4405,16 @@
}
case Pin_LoadL: {
+ if (i->Pin.LoadL.sz == 1) {
+ p = mkFormX(p, 31, iregEnc(i->Pin.LoadL.dst, mode64),
+ 0, iregEnc(i->Pin.LoadL.src, mode64), 52, 0, endness_host);
+ goto done;
+ }
+ if (i->Pin.LoadL.sz == 2) {
+ p = mkFormX(p, 31, iregEnc(i->Pin.LoadL.dst, mode64),
+ 0, iregEnc(i->Pin.LoadL.src, mode64), 116, 0, endness_host);
+ goto done;
+ }
if (i->Pin.LoadL.sz == 4) {
p = mkFormX(p, 31, iregEnc(i->Pin.LoadL.dst, mode64),
0, iregEnc(i->Pin.LoadL.src, mode64), 20, 0, endness_host);
@@ -4495,6 +4511,17 @@
}
case Pin_StoreC: {
+ if (i->Pin.StoreC.sz == 1) {
+ p = mkFormX(p, 31, iregEnc(i->Pin.StoreC.src, mode64),
+ 0, iregEnc(i->Pin.StoreC.dst, mode64), 694, 1, endness_host);
+ goto done;
+ }
+ if (i->Pin.StoreC.sz == 2) {
+ p = mkFormX(p, 31, iregEnc(i->Pin.StoreC.src, mode64),
+ 0, iregEnc(i->Pin.StoreC.dst, mode64), 726, 1, endness_host);
+ goto done;
+ }
+
if (i->Pin.StoreC.sz == 4) {
p = mkFormX(p, 31, iregEnc(i->Pin.StoreC.src, mode64),
0, iregEnc(i->Pin.StoreC.dst, mode64), 150, 1, endness_host);
Modified: trunk/priv/host_ppc_isel.c
==============================================================================
--- trunk/priv/host_ppc_isel.c (original)
+++ trunk/priv/host_ppc_isel.c Sat Apr 18 00:42:40 2015
@@ -5754,6 +5754,14 @@
/* LL */
HReg r_addr = iselWordExpr_R( env, stmt->Ist.LLSC.addr, IEndianess );
HReg r_dst = lookupIRTemp(env, res);
+ if (tyRes == Ity_I8) {
+ addInstr(env, PPCInstr_LoadL( 1, r_dst, r_addr, mode64 ));
+ return;
+ }
+ if (tyRes == Ity_I16) {
+ addInstr(env, PPCInstr_LoadL( 2, r_dst, r_addr, mode64 ));
+ return;
+ }
if (tyRes == Ity_I32) {
addInstr(env, PPCInstr_LoadL( 4, r_dst, r_addr, mode64 ));
return;
@@ -5773,8 +5781,20 @@
IRType tyData = typeOfIRExpr(env->type_env,
stmt->Ist.LLSC.storedata);
vassert(tyRes == Ity_I1);
- if (tyData == Ity_I32 || (tyData == Ity_I64 && mode64)) {
- addInstr(env, PPCInstr_StoreC( tyData==Ity_I32 ? 4 : 8,
+ if (tyData == Ity_I8 || tyData == Ity_I16 || tyData == Ity_I32 ||
+ (tyData == Ity_I64 && mode64)) {
+ int size = 0;
+
+ if (tyData == Ity_I64)
+ size = 8;
+ else if (tyData == Ity_I32)
+ size = 4;
+ else if (tyData == Ity_I16)
+ size = 2;
+ else if (tyData == Ity_I8)
+ size = 1;
+
+ addInstr(env, PPCInstr_StoreC( size,
r_a, r_src, mode64 ));
addInstr(env, PPCInstr_MfCR( r_tmp ));
addInstr(env, PPCInstr_Shft(
|
|
From: <sv...@va...> - 2015-04-17 21:19:53
|
Author: philippe
Date: Fri Apr 17 22:19:43 2015
New Revision: 15105
Log:
Fix statistics about ctxt_rcec :
* the nr of discards was always 0
* the cur nr of values was shown as max
Modified:
trunk/helgrind/libhb_core.c
Modified: trunk/helgrind/libhb_core.c
==============================================================================
--- trunk/helgrind/libhb_core.c (original)
+++ trunk/helgrind/libhb_core.c Fri Apr 17 22:19:43 2015
@@ -4738,6 +4738,7 @@
free_RCEC(p);
p = *pp;
tl_assert(stats__ctxt_tab_curr > 0);
+ stats__ctxt_rcdec_discards++;
stats__ctxt_tab_curr--;
} else {
pp = &p->next;
@@ -6293,9 +6294,10 @@
stats__ctxt_rcdec3 );
VG_(printf)( " libhb: ctxt__rcdec: calls %lu, discards %lu\n",
stats__ctxt_rcdec_calls, stats__ctxt_rcdec_discards);
- VG_(printf)( " libhb: contextTab: %lu slots, %lu max ents\n",
+ VG_(printf)( " libhb: contextTab: %lu slots, %lu cur ents,"
+ " %lu max ents\n",
(UWord)N_RCEC_TAB,
- stats__ctxt_tab_curr );
+ stats__ctxt_tab_curr, stats__ctxt_tab_max );
VG_(printf)( " libhb: contextTab: %lu queries, %lu cmps\n",
stats__ctxt_tab_qs,
stats__ctxt_tab_cmps );
|
|
From: Patrick J. L. <lop...@gm...> - 2015-04-17 15:57:24
|
On Thu, Apr 16, 2015 at 11:31 AM, Carl E. Love <ce...@us...> wrote: > > > There is nothing that can be done at the source code level to eliminate the notice from the compiler. Really? Glancing through the GCC source, it looks like: #pragma GCC diagnostic ignored "-Wpsabi" ...should do the trick. Or just pass "-Wno-psabi" on the command line. (Or am I missing something?) Probably should be conditional on GCC version, of course. - Pat |
|
From: Julian S. <js...@ac...> - 2015-04-17 10:57:16
|
On 17/04/15 12:47, Florian Krohm wrote: > Now, looking at the linux side of things: > > VG_TRACK( new_mem_stack_signal, addr - VG_STACK_REDZONE_SZB, > size + VG_STACK_REDZONE_SZB, tid ); > > With your above argument (which is platform neutral), this does not look > right either. I agree. It might be one of those things which has always been wrong, but which nobody really noticed until now. If you're amenable to it, I'd suggest to use simply |size| in the new merged-up version. If we get it wrong somehow, the worst that can happen is that we'll get flooded with false positive errors in signal handlers and we'll soon know something isn't right. So it's a low-risk change IMO. J |
|
From: Florian K. <fl...@ei...> - 2015-04-17 10:47:47
|
>>> incarnations of that function for Darwin are subtly different.
>>> The difference is:
>>>
>>> x86-darwin:
>>>
>>> VG_TRACK( new_mem_stack_signal,
>>> addr - VG_STACK_REDZONE_SZB, size, tid );
>>>
>>> amd64-darwin:
>>>
>>> VG_TRACK( new_mem_stack_signal, addr - VG_STACK_REDZONE_SZB,
>>> size + VG_STACK_REDZONE_SZB, tid );
>
>
> On the whole I'd guess that the first version is actually correct.
>
> Imagine, on amd64-linux, where the redzone size is 128 (bytes). That is,
> the area up to 128 below %rsp is accessible. If we now want to allocate a
> new block on the stack for delivering signals, with size |size|, the area
> that we want to mark as "new" is new_rsp-128 .. old_rsp-128. So I'd say
> that we don't want to extend the marked area by 128 (as in the second
> version) since that will paint the pre-signal-delivery redzone as
> addressible but uninitialised. And so if, after the signal frame is
> cleared, the thread pulls a value out of the redzone and uses it, it
> will be incorrectly marked as uninitialised.
>
That makes sense to me.
> This is just me guessing on the meaning of |size| here.
|size| is either sizeof(struct sigframe) or sizeof(struct rt_sigframe).
You guess was excellent! :)
Now, looking at the linux side of things:
VG_TRACK( new_mem_stack_signal, addr - VG_STACK_REDZONE_SZB,
size + VG_STACK_REDZONE_SZB, tid );
With your above argument (which is platform neutral), this does not look
right either.
Florian
|
|
From: Julian S. <js...@ac...> - 2015-04-17 10:19:19
|
>> In coregrind/m_sigframe we currently have for the linux platform 9
>> versions of a function called 'extend' which extends the stack segment
>> -- all alike (modulo white space).
>> I'm factoring that out
Excellent.
>> incarnations of that function for Darwin are subtly different.
>> The difference is:
>>
>> x86-darwin:
>>
>> VG_TRACK( new_mem_stack_signal,
>> addr - VG_STACK_REDZONE_SZB, size, tid );
>>
>> amd64-darwin:
>>
>> VG_TRACK( new_mem_stack_signal, addr - VG_STACK_REDZONE_SZB,
>> size + VG_STACK_REDZONE_SZB, tid );
My 2 euro-cents worth: on x86-darwin (and x86-linux), VG_STACK_REDZONE_SZB
is zero, so
VG_TRACK( new_mem_stack_signal,
addr - VG_STACK_REDZONE_SZB,
size, tid );
and
VG_TRACK( new_mem_stack_signal,
addr - VG_STACK_REDZONE_SZB,
size + VG_STACK_REDZONE_SZB, tid );
are equivalent.
On the whole I'd guess that the first version is actually correct.
Imagine, on amd64-linux, where the redzone size is 128 (bytes). That is,
the area up to 128 below %rsp is accessible. If we now want to allocate a
new block on the stack for delivering signals, with size |size|, the area
that we want to mark as "new" is new_rsp-128 .. old_rsp-128. So I'd say
that we don't want to extend the marked area by 128 (as in the second
version) since that will paint the pre-signal-delivery redzone as
addressible but uninitialised. And so if, after the signal frame is
cleared, the thread pulls a value out of the redzone and uses it, it
will be incorrectly marked as uninitialised.
This is just me guessing on the meaning of |size| here.
Commoning up the extend functions is great .. it means there's only one
place we have to prove correct :)
J
|
|
From: Tom H. <to...@co...> - 2015-04-17 09:49:57
|
On 17/04/15 10:32, Florian Krohm wrote: > In coregrind/m_sigframe we currently have for the linux platform 9 > versions of a function called 'extend' which extends the stack segment > -- all alike (modulo white space). > I'm factoring that out and while doing so I noticed that the 2 > incarnations of that function for Darwin are subtly different. > The difference is: > > x86-darwin: > > VG_TRACK( new_mem_stack_signal, > addr - VG_STACK_REDZONE_SZB, size, tid ); > > amd64-darwin: > > VG_TRACK( new_mem_stack_signal, addr - VG_STACK_REDZONE_SZB, > size + VG_STACK_REDZONE_SZB, tid ); > > It used to be that the amd64-darwin version of 'extend' was identical to > the x86-darwin version. In r13320 Bart changed the amd64-darwin version > to what it is today. The rationale is not the strongest one: > Darwin: Make stack growth tracking consistent with other architectures > > I'm curious...... Why was x86-darwin not changed the same way? > Or the other way round: Was r13320 perhaps not the right thing to do? Well if Darwin is like other platforms then amd64 has a redzone and x86 doesn't - maybe that is the reason for the difference? Tom -- Tom Hughes (to...@co...) http://compton.nu/ |
|
From: Florian K. <fl...@ei...> - 2015-04-17 09:32:21
|
In coregrind/m_sigframe we currently have for the linux platform 9
versions of a function called 'extend' which extends the stack segment
-- all alike (modulo white space).
I'm factoring that out and while doing so I noticed that the 2
incarnations of that function for Darwin are subtly different.
The difference is:
x86-darwin:
VG_TRACK( new_mem_stack_signal,
addr - VG_STACK_REDZONE_SZB, size, tid );
amd64-darwin:
VG_TRACK( new_mem_stack_signal, addr - VG_STACK_REDZONE_SZB,
size + VG_STACK_REDZONE_SZB, tid );
It used to be that the amd64-darwin version of 'extend' was identical to
the x86-darwin version. In r13320 Bart changed the amd64-darwin version
to what it is today. The rationale is not the strongest one:
Darwin: Make stack growth tracking consistent with other architectures
I'm curious...... Why was x86-darwin not changed the same way?
Or the other way round: Was r13320 perhaps not the right thing to do?
Florian
|
|
From: Florian K. <fl...@ei...> - 2015-04-17 09:09:34
|
On 16.04.2015 20:31, Carl E. Love wrote: > The warning went into the gcc 4.9 compiler for the PPC64 > platform. There will be an ABI change made in the gcc 5.0 with regards to the alignment of 128-bit > arguments to a function. As I understand it, this will only be an issue for code when linking code > that was compiled with different gcc versions. If some of the code was compiled with a pre gcc 5.0 > compiler and it is linked with functions with 128-bit arguments that were compiled with a gcc 5.0 > or newer compiler,there will be a problem of the arguments not aligning properly. > OK. Thanks for the clarification. > There is nothing that can be done at the source code level to eliminate the notice from the compiler. > Sigh. Florian |
|
From: <sv...@va...> - 2015-04-17 08:56:19
|
Author: florian
Date: Fri Apr 17 09:56:11 2015
New Revision: 15104
Log:
Followup to r15101. Remove pointless cast. The castee already
has that type.
Modified:
trunk/coregrind/m_coredump/coredump-elf.c
Modified: trunk/coregrind/m_coredump/coredump-elf.c
==============================================================================
--- trunk/coregrind/m_coredump/coredump-elf.c (original)
+++ trunk/coregrind/m_coredump/coredump-elf.c Fri Apr 17 09:56:11 2015
@@ -358,8 +358,8 @@
regs->orig_gpr3 = arch->vex.guest_GPR3;
regs->ctr = arch->vex.guest_CTR;
regs->link = arch->vex.guest_LR;
- regs->xer = LibVEX_GuestPPC64_get_XER( (const VexGuestPPC64State*) &(arch->vex) );
- regs->ccr = LibVEX_GuestPPC64_get_CR( (const VexGuestPPC64State*) &(arch->vex) );
+ regs->xer = LibVEX_GuestPPC64_get_XER( &(arch->vex) );
+ regs->ccr = LibVEX_GuestPPC64_get_CR( &(arch->vex) );
/* regs->mq = 0; */
regs->trap = 0;
regs->dar = 0; /* should be fault address? */
|
|
From: Julian S. <js...@ac...> - 2015-04-17 07:35:35
|
> On 17/04/15 06:50, Matthias Schwarzott wrote:
>
>>> + /* NCode [r0] = "LOADV32le_on_32" [a0] s0 {
>>> + hot:
>>> + 0 tst.w a0, #3 high?
>>
>> I guess here it should say "unaligned".
>
> So .. actually .. no.
Hmm ok, I shouldn't write email with so little coffee in the system.
Yes you are right. Should be "unaligned".
J
|
|
From: Julian S. <js...@ac...> - 2015-04-17 07:28:49
|
On 17/04/15 06:50, Matthias Schwarzott wrote:
>> + /* NCode [r0] = "LOADV32le_on_32" [a0] s0 {
>> + hot:
>> + 0 tst.w a0, #3 high?
>
> I guess here it should say "unaligned".
Wow, I didn't think anybody was reading these in so much detail.
So .. actually .. no. On 32 bit targets, the primary_map covers
the entire address space in 64k chunks, so we only need to check the
address for misalignment; it can't be "high". And this is for a
32-bit target -- hence the "_on_32" suffix.
You can see the equivalent test for a 64 bit target is against
0xFFFF FFF0 0000 0003, since if any of the top 28 bits of the
address are set, then it can't be indexed via the primary_map.
J
|
|
From: Matthias S. <zz...@ge...> - 2015-04-17 04:50:16
|
On 14.04.2015 21:23, sv...@va... wrote:
>
> +static NCodeTemplate* mk_tmpl__LOADV32le_on_32 ( NAlloc na )
> +{
> + NInstr** hot = na((11+1) * sizeof(NInstr*));
> + NInstr** cold = na((6+1) * sizeof(NInstr*));
> +
> + NReg rINVALID = mkNRegINVALID();
> +
> + NReg r0 = mkNReg(Nrr_Result, 0);
> + NReg a0 = mkNReg(Nrr_Argument, 0);
> + NReg s0 = mkNReg(Nrr_Scratch, 0);
> +
> + /* NCode [r0] = "LOADV32le_on_32" [a0] s0 {
> + hot:
> + 0 tst.w a0, #3 high?
I guess here it should say "unaligned".
Regards
Matthias
|
|
From: <sv...@va...> - 2015-04-16 23:17:29
|
Author: carll
Date: Fri Apr 17 00:17:22 2015
New Revision: 3136
Log:
The vbpermq for Powerpc64 big endian has the same issue as the little
endian support. Bugzilla 346270 was reopened to include the BE issue.
The bugzilla for the issue is 346270.
Modified:
trunk/priv/guest_ppc_toIR.c
Modified: trunk/priv/guest_ppc_toIR.c
==============================================================================
--- trunk/priv/guest_ppc_toIR.c (original)
+++ trunk/priv/guest_ppc_toIR.c Fri Apr 17 00:17:22 2015
@@ -17854,18 +17854,11 @@
mkexpr( vA ),
mkexpr( idx ) ) ),
mkU8( 127 ) ) ) );
- if (host_endness == VexEndnessLE)
- res = binop( Iop_OrV128,
- res,
- binop( Iop_ShlV128,
- mkexpr( perm_bit ),
- mkU8( i + 64) ) );
- else
- res = binop( Iop_OrV128,
- res,
- binop( Iop_ShlV128,
- mkexpr( perm_bit ),
- mkU8( i ) ) );
+ res = binop( Iop_OrV128,
+ res,
+ binop( Iop_ShlV128,
+ mkexpr( perm_bit ),
+ mkU8( i + 64 ) ) );
vB_expr = binop( Iop_ShrV128, vB_expr, mkU8( 8 ) );
}
putVReg( vRT_addr, res);
|
|
From: <sv...@va...> - 2015-04-16 21:10:53
|
Author: sewardj
Date: Thu Apr 16 22:10:42 2015
New Revision: 3135
Log:
Implement NCode generation for ARM32. Also, move a bunch of code in
the initial AMD64 NCode genertor into target-independent routines so
as to avoid duplicating it in all backends in the future.
Modified:
branches/NCODE/priv/host_amd64_defs.c
branches/NCODE/priv/host_amd64_defs.h
branches/NCODE/priv/host_amd64_isel.c
branches/NCODE/priv/host_arm64_defs.c
branches/NCODE/priv/host_arm_defs.c
branches/NCODE/priv/host_arm_defs.h
branches/NCODE/priv/host_arm_isel.c
branches/NCODE/priv/host_generic_reg_alloc2.c
branches/NCODE/priv/host_generic_regs.c
branches/NCODE/priv/host_generic_regs.h
branches/NCODE/priv/host_mips_defs.c
branches/NCODE/priv/host_ppc_defs.c
branches/NCODE/priv/host_x86_defs.c
branches/NCODE/priv/ir_defs.c
branches/NCODE/priv/main_main.c
branches/NCODE/pub/libvex_ir.h
Modified: branches/NCODE/priv/host_amd64_defs.c
==============================================================================
--- branches/NCODE/priv/host_amd64_defs.c (original)
+++ branches/NCODE/priv/host_amd64_defs.c Thu Apr 16 22:10:42 2015
@@ -178,7 +178,7 @@
"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
+ ppHRegGENERIC(reg);
return;
}
/* But specific for real regs. */
@@ -207,7 +207,7 @@
"%r12d", "%r13d", "%r14d", "%r15d" };
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
+ ppHRegGENERIC(reg);
vex_printf("d");
return;
}
@@ -232,7 +232,7 @@
"%r12w", "%r13w", "%r14w", "%r15w" };
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
+ ppHRegGENERIC(reg);
vex_printf("w");
return;
}
@@ -1137,7 +1137,7 @@
}
AMD64Instr* AMD64Instr_NCode ( NCodeTemplate* tmpl, HReg* regsR,
HReg* regsA, HReg* regsS ) {
- AMD64InstrNCode* details = LibVEX_Alloc_inline(sizeof(AMD64InstrNCode));
+ HInstrNCode* details = LibVEX_Alloc_inline(sizeof(HInstrNCode));
details->tmpl = tmpl;
details->regsR = regsR;
details->regsA = regsA;
@@ -1504,28 +1504,9 @@
case Ain_ProfInc:
vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)");
return;
- case Ain_NCode: {
- UInt j;
- AMD64InstrNCode* details = i->Ain.NCode.details;
- NCodeTemplate* tmpl = details->tmpl;
- vex_printf("NCode-AMD64:%s [", tmpl->name);
- for (j = 0; j < tmpl->nres; j++) {
- ppHRegAMD64(details->regsR[j]);
- if (j != tmpl->nres-1) vex_printf(" ");
- }
- vex_printf("] <= [");
- for (j = 0; j < tmpl->narg; j++) {
- ppHRegAMD64(details->regsA[j]);
- if (j != tmpl->narg-1) vex_printf(" ");
- }
- vex_printf("] scratch [");
- for (j = 0; j < tmpl->nscr; j++) {
- ppHRegAMD64(details->regsS[j]);
- if (j != tmpl->nscr-1) vex_printf(" ");
- }
- vex_printf("]");
+ case Ain_NCode:
+ HInstrNCode__show(i->Ain.NCode.details, ppHRegAMD64, "AMD64");
return;
- }
case Ain_NC_Jmp32: {
vex_printf("j%s rel32",
i->Ain.NC_Jmp32.cc == Acc_ALWAYS
@@ -1838,25 +1819,9 @@
case Ain_ProfInc:
addHRegUse(u, HRmWrite, hregAMD64_R11());
return;
- case Ain_NCode: {
- AMD64InstrNCode* details = i->Ain.NCode.details;
- NCodeTemplate* tmpl = details->tmpl;
- // It writes the result and scratch registers.
- UInt j;
- for (j = 0; j < tmpl->nres; j++)
- addHRegUse(u, HRmWrite, details->regsR[j]);
- for (j = 0; j < tmpl->nscr; j++)
- addHRegUse(u, HRmWrite, details->regsS[j]);
- // It both reads and writes the arg regs. We have to say
- // they are written in order to force them to be allocated
- // different registers from the arg and scratch registers,
- // since we have no way to ensure that the NCode block
- // doesn't write its scratch and result registers and later
- // on read the argument registers.
- for (j = 0; j < tmpl->narg; j++)
- addHRegUse(u, HRmModify, details->regsA[j]);
+ case Ain_NCode:
+ HInstrNCode__getRegUsage(u, i->Ain.NCode.details);
return;
- }
default:
ppAMD64Instr(i, mode64);
vpanic("getRegUsage_AMD64Instr");
@@ -2052,18 +2017,9 @@
case Ain_ProfInc:
/* hardwires r11 -- nothing to modify. */
return;
- case Ain_NCode: {
- AMD64InstrNCode* details = i->Ain.NCode.details;
- NCodeTemplate* tmpl = details->tmpl;
- UInt j;
- for (j = 0; j < tmpl->nres; j++)
- mapReg(m, &details->regsR[j]);
- for (j = 0; j < tmpl->nscr; j++)
- mapReg(m, &details->regsS[j]);
- for (j = 0; j < tmpl->narg; j++)
- mapReg(m, &details->regsA[j]);
+ case Ain_NCode:
+ HInstrNCode__mapRegs(i->Ain.NCode.details, m);
return;
- }
default:
ppAMD64Instr(i, mode64);
vpanic("mapRegs_AMD64Instr");
@@ -2236,13 +2192,13 @@
emit32(ab, toUInt((w64 >> 32) & 0xFFFFFFFF));
}
-/* Does a sign-extend of the lowest 8 bits give
- the original number? */
+/* Does a sign-extend of the lowest 8 bits give the original number? */
static Bool fits8bits ( UInt w32 )
{
Int i32 = (Int)w32;
return toBool(i32 == ((Int)(w32 << 24) >> 24));
}
+
/* Can the lower 32 bits be signedly widened to produce the whole
64-bit value? In other words, are the top 33 bits either all 0 or
all 1 ? */
@@ -4142,9 +4098,7 @@
/* Marshall args for the call, do the call, marshal the result */
/* Case: 1 arg reg, 1 result reg */
- UInt nArgRegs = 0;
- while (!isNRegINVALID(ni->Nin.Call.argRegs[nArgRegs]))
- nArgRegs++;
+ UInt nArgRegs = nregVecLen(ni->Nin.Call.argRegs);
if (nArgRegs == 1
&& isNRegINVALID(ni->Nin.Call.resHi)
@@ -4155,14 +4109,14 @@
HReg rdi = hregAMD64_RDI();
HReg rax = hregAMD64_RAX();
if (!sameHReg(arg1, rdi))
- HI( mk_iMOVsd_RR(arg1, rdi) );
+ HI( mk_iMOVsd_RR_AMD64(arg1, rdi) );
HReg r11 = hregAMD64_R11();
HI( AMD64Instr_Imm64((ULong)(HWord)ni->Nin.Call.entry, r11) );
HI( AMD64Instr_NC_CallR11() );
if (!sameHReg(rax, res1))
- HI( mk_iMOVsd_RR(rax, res1) );
+ HI( mk_iMOVsd_RR_AMD64(rax, res1) );
} else {
goto unhandled;
}
@@ -4212,7 +4166,7 @@
vassert(shOp != Ash_INVALID);
if (!sameHReg(src, dst)) {
- HI( mk_iMOVsd_RR(src, dst) );
+ HI( mk_iMOVsd_RR_AMD64(src, dst) );
}
HI( AMD64Instr_Sh64(shOp, amt, dst) );
break;
@@ -4232,7 +4186,7 @@
vassert(shOp != Ash_INVALID);
if (!sameHReg(src, dst)) {
- HI( mk_iMOVsd_RR(src, dst) );
+ HI( mk_iMOVsd_RR_AMD64(src, dst) );
}
/* Now, we have the shift amount in register |amt|. Problem
is that it needs to be in %rcx, but we don't know whether
@@ -4243,10 +4197,10 @@
allocatable, since the insn selector uses it to put
variable shift amounts in. So we can't safely trash it
here. */
- HI( mk_iMOVsd_RR(hregAMD64_RCX(), hregAMD64_R11()) ); // save rcx
- HI( mk_iMOVsd_RR(amt, hregAMD64_RCX()) ); // amt->rcx
+ HI( mk_iMOVsd_RR_AMD64(hregAMD64_RCX(), hregAMD64_R11()) ); // save rcx
+ HI( mk_iMOVsd_RR_AMD64(amt, hregAMD64_RCX()) ); // amt->rcx
HI( AMD64Instr_Sh64(shOp, 0/*meaning %cl*/, dst) );
- HI( mk_iMOVsd_RR(hregAMD64_R11(), hregAMD64_RCX()) ); // restore rcx
+ HI( mk_iMOVsd_RR_AMD64(hregAMD64_R11(), hregAMD64_RCX()) ); // rest rcx
break;
}
@@ -4261,7 +4215,7 @@
}
if (how == Nalu_AND && fitsIn32Bits((ULong)imm)) {
if (!sameHReg(srcLR, dstR)) {
- HI( mk_iMOVsd_RR(srcLR, dstR) );
+ HI( mk_iMOVsd_RR_AMD64(srcLR, dstR) );
}
HI( AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(imm), dstR) );
break;
@@ -4276,7 +4230,7 @@
HReg srcRR = mapNReg(nregMap, ni->Nin.AluWrr.srcR);
if (how == Nalu_ADD) {
if (!sameHReg(srcLR, dstR)) {
- HI( mk_iMOVsd_RR(srcLR, dstR) );
+ HI( mk_iMOVsd_RR_AMD64(srcLR, dstR) );
}
HI( AMD64Instr_Alu64R(Aalu_ADD, AMD64RMI_Reg(srcRR), dstR) );
break;
@@ -4303,7 +4257,7 @@
case Nin_MovW: {
HReg src = mapNReg(nregMap, ni->Nin.MovW.src);
HReg dst = mapNReg(nregMap, ni->Nin.MovW.dst);
- HI( mk_iMOVsd_RR(src, dst) );
+ HI( mk_iMOVsd_RR_AMD64(src, dst) );
break;
}
@@ -4325,9 +4279,9 @@
}
}
if (addr->tag == Nea_RRS) {
- HReg baseR = mapNReg(nregMap, addr->Nea.RRS.base);
- HReg indexR = mapNReg(nregMap, addr->Nea.RRS.index);
- UChar shift = addr->Nea.RRS.shift;
+ HReg baseR = mapNReg(nregMap, addr->Nea.RRS.base);
+ HReg indexR = mapNReg(nregMap, addr->Nea.RRS.index);
+ UChar shift = addr->Nea.RRS.shift;
if (shift <= 3) {
AMD64AMode* am = AMD64AMode_IRRS(0, baseR, indexR, shift);
if (szB == 2 || szB == 1) {
@@ -4346,8 +4300,7 @@
goto unhandled;
}
- UInt i;
- for (i = 0; i < hiBufUsed; i++) {
+ for (UInt i = 0; i < hiBufUsed; i++) {
if (verbose) {
vex_printf(" ");
ppAMD64Instr(hiBuf[i], True/*mode64*/);
@@ -4384,109 +4337,11 @@
Bool mode64, VexEndness endness_host,
Bool verbose )
{
- vassert(mode64 == True);
+ vassert(mode64 == True);
vassert(endness_host == VexEndnessLE);
- vassert(hi->tag == Ain_NCode);
-
- const AMD64InstrNCode* hi_details = hi->Ain.NCode.details;
- const NCodeTemplate* tmpl = hi_details->tmpl;
- const RRegSet* rregsLiveAfter = hi_details->rrLiveAfter;
-
- NRegMap nregMap;
- nregMap.regsR = hi_details->regsR;
- nregMap.regsA = hi_details->regsA;
- nregMap.regsS = hi_details->regsS;
- nregMap.nRegsR = tmpl->nres;
- nregMap.nRegsA = tmpl->narg;
- nregMap.nRegsS = tmpl->nscr;
-
- vassert(hregVecLen(nregMap.regsR) == nregMap.nRegsR);
- vassert(hregVecLen(nregMap.regsA) == nregMap.nRegsA);
- vassert(hregVecLen(nregMap.regsS) == nregMap.nRegsS);
-
- if (AssemblyBuffer__getRemainingSize(ab_hot) < 1024)
- return False;
- if (AssemblyBuffer__getRemainingSize(ab_cold) < 1024)
- return False;
- if (RelocationBuffer__getRemainingSize(rb) < 128)
- return False;
-
- /* Count how many hot and cold instructions (NInstrs) the template
- has, since we'll need to allocate temporary arrays to keep track
- of the label offsets. */
- UInt nHot, nCold;
- for (nHot = 0; tmpl->hot[nHot]; nHot++)
- ;
- for (nCold = 0; tmpl->cold[nCold]; nCold++)
- ;
-
- /* Here are our two arrays for tracking the AssemblyBuffer offsets
- of the NCode instructions. */
- UInt i;
- UInt offsetsHot[nHot];
- UInt offsetsCold[nCold];
- for (i = 0; i < nHot; i++) offsetsHot[i] = 0;
- for (i = 0; i < nCold; i++) offsetsCold[i] = 0;
-
- /* We'll be adding entries to the relocation buffer, |rb|, and will
- need to adjust their |dst| fields after generation of the hot
- and cold code. Record therefore where we are in the buffer now,
- so that we can iterate over the new entries later. */
- UInt rb_first = RelocationBuffer__getNext(rb);
-
- /* Generate the hot code */
- for (i = 0; i < nHot; i++) {
- offsetsHot[i] = AssemblyBuffer__getNext(ab_hot);
- NLabel lbl = mkNLabel(Nlz_Hot, i);
- emit_AMD64NInstr(ab_hot, rb, tmpl->hot[i], &nregMap,
- rregsLiveAfter, verbose, lbl);
- }
-
- /* And the cold code */
- for (i = 0; i < nCold; i++) {
- offsetsCold[i] = AssemblyBuffer__getNext(ab_cold);
- NLabel lbl = mkNLabel(Nlz_Cold, i);
- emit_AMD64NInstr(ab_cold, rb, tmpl->cold[i], &nregMap,
- rregsLiveAfter, verbose, lbl);
- }
-
- /* Now visit the new relocation entries. */
- UInt rb_last1 = RelocationBuffer__getNext(rb);
-
- for (i = rb_first; i < rb_last1; i++) {
- Relocation* reloc = &rb->buf[i];
-
- /* Show the reloc before the label-to-offset transformation. */
- if (verbose) {
- vex_printf(" reloc: ");
- ppRelocation(reloc);
- vex_printf("\n");
- }
-
- /* Transform the destination component of |reloc| so that it no
- longer refers to a label but rather to an offset in the hot
- or cold assembly buffer. */
- vassert(!reloc->dst.isOffset);
- reloc->dst.isOffset = True;
-
- if (reloc->dst.zone == Nlz_Hot) {
- vassert(reloc->dst.num < nHot);
- reloc->dst.num = offsetsHot[reloc->dst.num];
- } else {
- vassert(reloc->dst.zone == Nlz_Cold);
- vassert(reloc->dst.num < nCold);
- reloc->dst.num = offsetsCold[reloc->dst.num];
- }
-
- /* Show the reloc after the label-to-offset transformation. */
- if (verbose) {
- vex_printf(" reloc: ");
- ppRelocation(reloc);
- vex_printf("\n");
- }
- }
-
- return True;
+ vassert(hi->tag == Ain_NCode);
+ return HInstrNCode__emit ( ab_hot, ab_cold, rb, hi->Ain.NCode.details,
+ verbose, emit_AMD64NInstr );
}
Modified: branches/NCODE/priv/host_amd64_defs.h
==============================================================================
--- branches/NCODE/priv/host_amd64_defs.h (original)
+++ branches/NCODE/priv/host_amd64_defs.h Thu Apr 16 22:10:42 2015
@@ -420,18 +420,6 @@
/* --------- */
-typedef
- struct {
- NCodeTemplate* tmpl;
- HReg* regsR; /* Result regs, INVALID_HREG terminated */
- HReg* regsA; /* Arg regs, ditto */
- HReg* regsS; /* Scratch regs, ditto */
- RRegSet* rrLiveAfter; /* initially NULL, filled in by RA */
- }
- AMD64InstrNCode;
-
-
-/* --------- */
/* Destinations are on the RIGHT (second operand) */
@@ -737,7 +725,7 @@
} ProfInc;
struct {
/* Out of line so as to keep sizeof(AMD64Instr) at 40. */
- AMD64InstrNCode* details;
+ HInstrNCode* details;
} NCode;
/* --- for NCode only --- */
@@ -831,7 +819,7 @@
extern void ppAMD64Instr ( const AMD64Instr*, Bool );
/* Handy helper, for generating integer reg-reg moves. */
-extern AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst );
+extern AMD64Instr* mk_iMOVsd_RR_AMD64 ( HReg src, HReg dst );
/* Some functions that insulate the register allocator from details
of the underlying instruction set. */
Modified: branches/NCODE/priv/host_amd64_isel.c
==============================================================================
--- branches/NCODE/priv/host_amd64_isel.c (original)
+++ branches/NCODE/priv/host_amd64_isel.c Thu Apr 16 22:10:42 2015
@@ -311,13 +311,20 @@
/* Make an int reg-reg move. */
-/*notstatic*/ AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
+static AMD64Instr* mk_iMOVsd_RR ( HReg src, HReg dst )
{
vassert(hregClass(src) == HRcInt64);
vassert(hregClass(dst) == HRcInt64);
return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(src), dst);
}
+/* And a variant that is exported into the global namespace. */
+
+AMD64Instr* mk_iMOVsd_RR_AMD64 ( HReg src, HReg dst )
+{
+ return mk_iMOVsd_RR(src, dst);
+}
+
/* Make a vector (128 bit) reg-reg move. */
static AMD64Instr* mk_vMOVsd_RR ( HReg src, HReg dst )
@@ -4809,7 +4816,7 @@
// For the result values, find the vregs associated with the
// result IRTemps, and pin them on the NCode block.
- HReg* regsR = LibVEX_Alloc( (tmpl->nres+1) * sizeof(HReg) );
+ HReg* regsR = LibVEX_Alloc_inline( (tmpl->nres+1) * sizeof(HReg) );
for (i = 0; i < tmpl->nres; i++) {
IRTemp t = stmt->Ist.NCode.ress[i];
vassert(t != IRTemp_INVALID);
@@ -4823,17 +4830,16 @@
// registers returned from the isel*Expr functions may not be
// modified. As usual vreg-vreg move coalescing will remove
// those copies in the cases where they are not necessary.
- HReg* regsA = LibVEX_Alloc( (tmpl->narg+1) * sizeof(HReg) );
+ HReg* regsA = LibVEX_Alloc_inline( (tmpl->narg+1) * sizeof(HReg) );
for (i = 0; i < tmpl->narg; i++) {
HReg arg = iselIntExpr_R(env, stmt->Ist.NCode.args[i]);
regsA[i] = newVRegI(env);
addInstr(env, mk_iMOVsd_RR(arg, regsA[i]));
-
}
regsA[tmpl->narg] = HReg_INVALID;
// Allocate vregs for the scratch values.
- HReg* regsS = LibVEX_Alloc( (tmpl->nscr+1) * sizeof(HReg) );
+ HReg* regsS = LibVEX_Alloc_inline( (tmpl->nscr+1) * sizeof(HReg) );
for (i = 0; i < tmpl->nscr; i++) {
regsS[i] = newVRegI(env);
}
Modified: branches/NCODE/priv/host_arm64_defs.c
==============================================================================
--- branches/NCODE/priv/host_arm64_defs.c (original)
+++ branches/NCODE/priv/host_arm64_defs.c Thu Apr 16 22:10:42 2015
@@ -146,7 +146,7 @@
Int r;
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
+ ppHRegGENERIC(reg);
return;
}
/* But specific for real regs. */
Modified: branches/NCODE/priv/host_arm_defs.c
==============================================================================
--- branches/NCODE/priv/host_arm_defs.c (original)
+++ branches/NCODE/priv/host_arm_defs.c Thu Apr 16 22:10:42 2015
@@ -120,18 +120,9 @@
// Note 9 is ambiguous: the base EABI does not give an e/r-saved
// designation for it, but the Linux instantiation of the ABI
// specifies it as callee-saved.
- //
- // If the set of available registers changes or if the e/r status
- // changes, be sure to re-check/sync the definition of
- // getHRegUsage for ARMInstr_Call too.
ru->regs[ru->size++] = hregARM_R8();
ru->regs[ru->size++] = hregARM_R12();
ru->regs[ru->size++] = hregARM_R13();
- ru->regs[ru->size++] = hregARM_R14();
- ru->regs[ru->size++] = hregARM_R15();
- ru->regs[ru->size++] = hregARM_Q13();
- ru->regs[ru->size++] = hregARM_Q14();
- ru->regs[ru->size++] = hregARM_Q15();
rRegUniverse_ARM_initted = True;
@@ -140,11 +131,82 @@
}
+/* Returns the registers in the ARM universe that are caller saved.
+ This is really ABI dependent, but we ignore that detail here. */
+static const RRegSet* getRRegsCallerSaved_ARM ( void )
+{
+ /* In theory gcc should be able to fold this into a single 64 bit
+ constant (bitset). But that's a bit risky, so instead do
+ thread-unsafe lazy initialisation (sigh). */
+ static RRegSet callerSavedRegs;
+ static Bool callerSavedRegs_initted = False;
+
+ if (LIKELY(callerSavedRegs_initted))
+ return &callerSavedRegs;
+
+ RRegSet__init(&callerSavedRegs, getRRegUniverse_ARM());
+
+ RRegSet__add(&callerSavedRegs, hregARM_R0());
+ RRegSet__add(&callerSavedRegs, hregARM_R1());
+ RRegSet__add(&callerSavedRegs, hregARM_R2());
+ RRegSet__add(&callerSavedRegs, hregARM_R3());
+ RRegSet__add(&callerSavedRegs, hregARM_Q8());
+ RRegSet__add(&callerSavedRegs, hregARM_Q9());
+ RRegSet__add(&callerSavedRegs, hregARM_Q10());
+ RRegSet__add(&callerSavedRegs, hregARM_Q11());
+ RRegSet__add(&callerSavedRegs, hregARM_Q12());
+ RRegSet__add(&callerSavedRegs, hregARM_R12());
+
+ callerSavedRegs_initted = True;
+ return &callerSavedRegs;
+}
+
+
+/* Returns the registers in the ARM universe that are callee saved.
+ This is really ABI dependent, but we ignore that detail here. */
+static const RRegSet* getRRegsCalleeSaved_ARM ( void )
+{
+ /* In theory gcc should be able to fold this into a single 64 bit
+ constant (bitset). But that's a bit risky, so instead do
+ thread-unsafe lazy initialisation (sigh). */
+ static RRegSet calleeSavedRegs;
+ static Bool calleeSavedRegs_initted = False;
+
+ if (LIKELY(calleeSavedRegs_initted))
+ return &calleeSavedRegs;
+
+ RRegSet__init(&calleeSavedRegs, getRRegUniverse_ARM());
+
+ RRegSet__add(&calleeSavedRegs, hregARM_R4());
+ RRegSet__add(&calleeSavedRegs, hregARM_R5());
+ RRegSet__add(&calleeSavedRegs, hregARM_R6());
+ RRegSet__add(&calleeSavedRegs, hregARM_R7());
+ RRegSet__add(&calleeSavedRegs, hregARM_R8());
+ RRegSet__add(&calleeSavedRegs, hregARM_R9());
+ RRegSet__add(&calleeSavedRegs, hregARM_R10());
+ RRegSet__add(&calleeSavedRegs, hregARM_R11());
+ RRegSet__add(&calleeSavedRegs, hregARM_D8());
+ RRegSet__add(&calleeSavedRegs, hregARM_D9());
+ RRegSet__add(&calleeSavedRegs, hregARM_D10());
+ RRegSet__add(&calleeSavedRegs, hregARM_D11());
+ RRegSet__add(&calleeSavedRegs, hregARM_D12());
+ RRegSet__add(&calleeSavedRegs, hregARM_S26());
+ RRegSet__add(&calleeSavedRegs, hregARM_S27());
+ RRegSet__add(&calleeSavedRegs, hregARM_S28());
+ RRegSet__add(&calleeSavedRegs, hregARM_S29());
+ RRegSet__add(&calleeSavedRegs, hregARM_S30());
+ RRegSet__add(&calleeSavedRegs, hregARM_R13());
+
+ calleeSavedRegs_initted = True;
+ return &calleeSavedRegs;
+}
+
+
void ppHRegARM ( HReg reg ) {
Int r;
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
+ ppHRegGENERIC(reg);
return;
}
/* But specific for real regs. */
@@ -1501,6 +1563,13 @@
return False;
}
+/* Does a sign-extend of the lowest 8 bits give the original number? */
+static Bool fitsIn12bits ( UInt w32 )
+{
+ Int i32 = (Int)w32;
+ return toBool(i32 == ((Int)(w32 << 20) >> 20));
+}
+
ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
UInt u8, u4;
ARMInstr *i = LibVEX_Alloc_inline(sizeof(ARMInstr));
@@ -1535,6 +1604,44 @@
return i;
}
+ARMInstr* ARMInstr_NCode ( NCodeTemplate* tmpl, HReg* regsR,
+ HReg* regsA, HReg* regsS ) {
+ HInstrNCode* details = LibVEX_Alloc_inline(sizeof(HInstrNCode));
+ details->tmpl = tmpl;
+ details->regsR = regsR;
+ details->regsA = regsA;
+ details->regsS = regsS;
+ details->rrLiveAfter = NULL;
+ ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
+ i->tag = ARMin_NCode;
+ i->ARMin.NCode.details = details;
+ return i;
+}
+
+ARMInstr* ARMInstr_NC_Branch ( ARMCondCode cc )
+{
+ ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
+ i->tag = ARMin_NC_Branch;
+ i->ARMin.NC_Branch.cc = cc;
+ return i;
+}
+
+ARMInstr* ARMInstr_NC_Uxth ( HReg dst, HReg src )
+{
+ ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
+ i->tag = ARMin_NC_Uxth;
+ i->ARMin.NC_Uxth.dst = dst;
+ i->ARMin.NC_Uxth.src = src;
+ return i;
+}
+
+ARMInstr* ARMInstr_NC_CallR12 ( void )
+{
+ ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
+ i->tag = ARMin_NC_CallR12;
+ return i;
+}
+
/* ... */
void ppARMInstr ( const ARMInstr* i ) {
@@ -1994,6 +2101,23 @@
"adc r11,r11,$0; "
"str r11,[r12+4]");
return;
+ case ARMin_NCode:
+ HInstrNCode__show(i->ARMin.NCode.details, ppHRegARM, "ARM");
+ return;
+ case ARMin_NC_Branch:
+ vex_printf("b%s simm24",
+ i->ARMin.NC_Branch.cc == ARMcc_AL
+ ? "" : showARMCondCode(i->ARMin.NC_Branch.cc));
+ return;
+ case ARMin_NC_Uxth:
+ vex_printf("uxth ");
+ ppHRegARM(i->ARMin.NC_Uxth.dst);
+ vex_printf(", ");
+ ppHRegARM(i->ARMin.NC_Uxth.src);
+ break;
+ case ARMin_NC_CallR12:
+ vex_printf("blx r12");
+ break;
default:
vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
vpanic("ppARMInstr(1)");
@@ -2096,13 +2220,8 @@
/* This is a bit subtle. */
/* First off, claim it trashes all the caller-saved regs
which fall within the register allocator's jurisdiction.
- These I believe to be r0,1,2,3. If it turns out that r9
- is also caller-saved, then we'll have to add that here
- too. */
- addHRegUse(u, HRmWrite, hregARM_R0());
- addHRegUse(u, HRmWrite, hregARM_R1());
- addHRegUse(u, HRmWrite, hregARM_R2());
- addHRegUse(u, HRmWrite, hregARM_R3());
+ These I believe to be r0,1,2,3 and q8,9,10,11,12. */
+ addHRegUse_from_RRegSet(u, HRmWrite, getRRegsCallerSaved_ARM());
/* Now we have to state any parameter-carrying registers
which might be read. This depends on nArgRegs. */
switch (i->ARMin.Call.nArgRegs) {
@@ -2303,6 +2422,9 @@
addHRegUse(u, HRmWrite, hregARM_R12());
addHRegUse(u, HRmWrite, hregARM_R11());
return;
+ case ARMin_NCode:
+ HInstrNCode__getRegUsage(u, i->ARMin.NCode.details);
+ return;
default:
ppARMInstr(i);
vpanic("getRegUsage_ARMInstr");
@@ -2499,6 +2621,9 @@
case ARMin_ProfInc:
/* hardwires r11 and r12 -- nothing to modify. */
return;
+ case ARMin_NCode:
+ HInstrNCode__mapRegs(i->ARMin.NCode.details, m);
+ return;
default:
ppARMInstr(i);
vpanic("mapRegs_ARMInstr");
@@ -2560,46 +2685,49 @@
vassert(offsetB >= 0);
vassert(!hregIsVirtual(rreg));
vassert(mode64 == False);
- vassert(!spRel);
*i1 = *i2 = NULL;
+
+ /* We're spilling/reloading either relative to the guest state
+ pointer (r8) when spRel == False, or relative to the stack
+ pointer (r13) when spRel == True. */
+ HReg base = spRel ? hregARM_R13() : hregARM_R8();
+
rclass = hregClass(rreg);
switch (rclass) {
case HRcInt32:
vassert(offsetB <= 4095);
*i1 = ARMInstr_LdSt32( ARMcc_AL, False/*!isLoad*/,
rreg,
- ARMAMode1_RI(hregARM_R8(), offsetB) );
+ ARMAMode1_RI(base, offsetB) );
return;
case HRcFlt32:
case HRcFlt64: {
- HReg r8 = hregARM_R8(); /* baseblock */
- HReg r12 = hregARM_R12(); /* spill temp */
- HReg base = r8;
+ HReg curr = base;
vassert(0 == (offsetB & 3));
if (offsetB >= 1024) {
- Int offsetKB = offsetB / 1024;
- /* r12 = r8 + (1024 * offsetKB) */
- *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
+ Int offsetKB = offsetB / 1024;
+ HReg r12 = hregARM_R12(); /* spill temp */
+ /* r12 = base + (1024 * offsetKB) */
+ *i1 = ARMInstr_Alu(ARMalu_ADD, r12, base,
ARMRI84_I84(offsetKB, 11));
offsetB -= (1024 * offsetKB);
- base = r12;
+ curr = r12;
}
vassert(offsetB <= 1020);
if (rclass == HRcFlt32) {
*i2 = ARMInstr_VLdStS( False/*!isLoad*/,
rreg,
- mkARMAModeV(base, offsetB) );
+ mkARMAModeV(curr, offsetB) );
} else {
*i2 = ARMInstr_VLdStD( False/*!isLoad*/,
rreg,
- mkARMAModeV(base, offsetB) );
+ mkARMAModeV(curr, offsetB) );
}
return;
}
case HRcVec128: {
- HReg r8 = hregARM_R8();
HReg r12 = hregARM_R12();
- *i1 = ARMInstr_Add32(r12, r8, offsetB);
+ *i1 = ARMInstr_Add32(r12, base, offsetB);
*i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
return;
}
@@ -2616,46 +2744,47 @@
vassert(offsetB >= 0);
vassert(!hregIsVirtual(rreg));
vassert(mode64 == False);
- vassert(!spRel);
*i1 = *i2 = NULL;
+
+ /* Same comment as on genSpill_ARM. */
+ HReg base = spRel ? hregARM_R13() : hregARM_R8();
+
rclass = hregClass(rreg);
switch (rclass) {
case HRcInt32:
vassert(offsetB <= 4095);
*i1 = ARMInstr_LdSt32( ARMcc_AL, True/*isLoad*/,
rreg,
- ARMAMode1_RI(hregARM_R8(), offsetB) );
+ ARMAMode1_RI(base, offsetB) );
return;
case HRcFlt32:
case HRcFlt64: {
- HReg r8 = hregARM_R8(); /* baseblock */
- HReg r12 = hregARM_R12(); /* spill temp */
- HReg base = r8;
+ HReg curr = base;
vassert(0 == (offsetB & 3));
if (offsetB >= 1024) {
- Int offsetKB = offsetB / 1024;
- /* r12 = r8 + (1024 * offsetKB) */
- *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
+ Int offsetKB = offsetB / 1024;
+ HReg r12 = hregARM_R12(); /* spill temp */
+ /* r12 = base + (1024 * offsetKB) */
+ *i1 = ARMInstr_Alu(ARMalu_ADD, r12, base,
ARMRI84_I84(offsetKB, 11));
offsetB -= (1024 * offsetKB);
- base = r12;
+ curr = r12;
}
vassert(offsetB <= 1020);
if (rclass == HRcFlt32) {
*i2 = ARMInstr_VLdStS( True/*isLoad*/,
rreg,
- mkARMAModeV(base, offsetB) );
+ mkARMAModeV(curr, offsetB) );
} else {
*i2 = ARMInstr_VLdStD( True/*isLoad*/,
rreg,
- mkARMAModeV(base, offsetB) );
+ mkARMAModeV(curr, offsetB) );
}
return;
}
case HRcVec128: {
- HReg r8 = hregARM_R8();
HReg r12 = hregARM_R12();
- *i1 = ARMInstr_Add32(r12, r8, offsetB);
+ *i1 = ARMInstr_Add32(r12, base, offsetB);
*i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
return;
}
@@ -2668,13 +2797,15 @@
/* --------- The arm32 assembler (bleh.) --------- */
-#define PUT(_ab, _word32) \
- do { const UInt _off = (_ab)->bufUsed; \
- (_ab)->buf[_off + 0] = ((_word32) >> 0) & 0xFF; \
- (_ab)->buf[_off + 1] = ((_word32) >> 8) & 0xFF; \
- (_ab)->buf[_off + 2] = ((_word32) >> 16) & 0xFF; \
- (_ab)->buf[_off + 3] = ((_word32) >> 24) & 0xFF; \
- (_ab)->bufUsed = _off + 4; \
+/* Put a 32 bit word into the assembler buffer |_ab|. Uses a
+ potentially unaligned 32-bit store. That is OK because the
+ allocation point of the buffer should always be 32-bit aligned, and
+ that is asserted for at the top of emit_ARMInstr. */
+#define PUT(_ab, _word32) \
+ do { const UInt _off = (_ab)->bufUsed; \
+ UInt* _ptr = (UInt*)(&(_ab)->buf[_off]); \
+ *_ptr = (_word32); \
+ (_ab)->bufUsed = _off + 4; \
} while (0)
/* Emit an instruction into buf and return the number of bytes used.
@@ -3111,8 +3242,15 @@
PUT(ab, instr);
goto done;
} else {
- // RR case
- goto bad;
+ UInt sh5 = am->ARMam1.RRS.shift;
+ UInt nn = iregEnc(am->ARMam1.RRS.base);
+ UInt mm = iregEnc(am->ARMam1.RRS.index);
+ UInt dd = iregEnc(rD);
+ if (sh5 > 3) goto bad;
+ UInt instr = XXXXXXXX(cc, X0111, BITS4(1,bB,0,bL), nn, dd,
+ ((sh5 >> 1) & 0xF), ((sh5 & 1) << 3), mm);
+ PUT(ab, instr);
+ goto done;
}
}
case ARMin_LdSt16: {
@@ -4670,6 +4808,32 @@
goto done;
}
+ case ARMin_NC_Branch: {
+ /* Generating jumps within NCode blocks. We don't know what the
+ jump offset yet is, so just put in a 24-bit zero. The NCode
+ assembler (emit_ARMNInstr) should also have generated a
+ Relocation that describes how to fix up the offset, and that
+ will be applied after assembly is complete, at the point
+ where the hot and cold buffers are concatenated, to create on
+ single big code block for the containing IRSB. */
+ ARMCondCode cc = i->ARMin.NC_Branch.cc;
+ vassert(cc <= ARMcc_AL);
+ PUT(ab, XX______(cc, 0xA/*1010b*/));
+ goto done;
+ }
+
+ case ARMin_NC_Uxth: {
+ UInt dd = iregEnc(i->ARMin.NC_Uxth.dst);
+ UInt mm = iregEnc(i->ARMin.NC_Uxth.src);
+ PUT(ab, XXXXXXXX(ARMcc_AL, X0110, X1111, X1111, dd, X0000, X0111, mm));
+ goto done;
+ }
+
+ case ARMin_NC_CallR12: {
+ PUT(ab, 0xE12FFF3C);
+ goto done;
+ }
+
/* ... */
default:
goto bad;
@@ -4686,6 +4850,330 @@
}
+/* --------- The arm NCode assembler. --------- */
+
+/* Emits ARM code for a single NInstr |ni| into |ab|, possibly
+ adding relocation information into |rb| too.
+*/
+static
+void emit_ARMNInstr ( /*MOD*/AssemblyBuffer* ab,
+ /*MOD*/RelocationBuffer* rb,
+ const NInstr* ni,
+ const NRegMap* nregMap,
+ const RRegSet* hregsLiveAfter,
+ /* the next 2 are for debug printing only */
+ Bool verbose, NLabel niLabel )
+{
+ ARMInstr* hiBuf[100];
+ UInt hiBufUsed = 0;
+
+# define HI(_insnE) \
+ do { \
+ ARMInstr* _insn = (_insnE); \
+ vassert(hiBufUsed < sizeof(hiBuf)/sizeof(hiBuf[0])); \
+ hiBuf[hiBufUsed++] = _insn; \
+ } while (0)
+
+ if (verbose) {
+ vex_printf(" ");
+ ppNLabel(niLabel);
+ vex_printf(": ");
+ ppNInstr(ni);
+ vex_printf("\n");
+ }
+
+ switch (ni->tag) {
+
+ case Nin_Nop:
+ break;
+
+ case Nin_Branch: {
+ /* We are going to generate an ARM branch insn, which naturally
+ can be conditional if neeed. It will be of the form
+ cond:4 1010 simm:24
+ We need to generate both the instruction and a relocation
+ record that describes how to fix up the offset (simm:24)
+ once the relative offset between this instruction and the
+ destination is known, which is isn't currently. */
+ ARMCondCode cc = 16; /* invalid */
+ switch (ni->Nin.Branch.cc) {
+ case Ncc_ALWAYS: cc = ARMcc_AL; break;
+ case Ncc_Z: cc = ARMcc_EQ; break;
+ case Ncc_NZ: cc = ARMcc_NE; break;
+ default: vassert(0); /* no other cases possible */
+ }
+ vassert(cc < 16);
+ /* First do the relocation, as it's the more complex part.
+ The insns are little-endian, and the offset is the least
+ significant 3 bytes of the insn, so its "where" starts
+ exactly where the current |ab| cursor is. Hence the "+0"
+ below. */
+ RelocWhere where
+ = mkRelocWhere(niLabel.zone, AssemblyBuffer__getNext(ab)+0);
+ RelocDst dst
+ = mkRelocDst_from_NLabel(ni->Nin.Branch.dst);
+ /* Bias is 8 because we've set |where| to be the start of the
+ branch insn. The processor however expects the offset to
+ be relative to the start of 8 bytes past the insn (ARM
+ ancient history) which means that a naive "dst - where"
+ value will give an offset that is 8 too large. Hence the
+ bias of 8. */
+ Relocation reloc
+ = mkRelocation(where, 0, 23, dst, /*bias*/-8, /*rshift*/2);
+ vassert(RelocationBuffer__getRemainingSize(rb) > 0);
+ rb->buf[rb->bufUsed++] = reloc;
+ /* And finally the instruction. Note that we don't specify
+ an offset here since we don't yet know what it is. */
+ HI( ARMInstr_NC_Branch(cc) );
+ break;
+ }
+
+ case Nin_Call: {
+ RRegSet to_preserve;
+ calcRegistersToPreserveAroundNCodeCall(
+ &to_preserve,
+ hregsLiveAfter, getRRegsCalleeSaved_ARM(), nregMap,
+ ni->Nin.Call.resHi, ni->Nin.Call.resLo
+ );
+
+ /* Save live regs */
+ UInt n_to_preserve = RRegSet__card(&to_preserve);
+ vassert(n_to_preserve < 25); /* stay sane */
+
+ /* Figure out how much to move the stack, ensuring any alignment up
+ to 32 is preserved. */
+ UInt stackMove = n_to_preserve * 16;
+ stackMove = (stackMove + 31) & ~31;
+ if (stackMove > 0) {
+ /* This is a bit tricky. We need to encode the offset in
+ an RI84, but it might be moderately large-ish.
+ Fortunately we can take advantage of the fact that
+ |stackMove| is 0 % 16 and so encode just bits 11:4 of
+ it. */
+ vassert((stackMove & 15) == 0);
+ if ((stackMove >> 4) > 0xFF) goto unhandled;
+ HReg sp = hregARM_R13();
+ ARMRI84* dist = ARMRI84_I84(stackMove >> 4, 14/*means "<< 4"*/);
+ HI( ARMInstr_Alu(ARMalu_SUB, sp, sp, dist) );
+ }
+
+ RRegSetIterator* iter = RRegSetIterator__new();
+ RRegSetIterator__init(iter, &to_preserve);
+ UInt slotNo = 0;
+ while (True) {
+ HReg r = RRegSetIterator__next(iter);
+ if (hregIsInvalid(r)) break;
+ ARMInstr* i1 = NULL;
+ ARMInstr* i2 = NULL;
+ genSpill_ARM( (HInstr**)&i1, (HInstr**)&i2,
+ r, True/*spRel*/, 16 * slotNo, False/*!mode64*/ );
+ if (i1) HI(i1);
+ if (i2) HI(i2);
+ slotNo++;
+ }
+ vassert(slotNo == n_to_preserve);
+
+ /* Marshall args for the call, do the call, marshal the result */
+ /* Case: 1 arg reg, 1 result reg */
+
+ UInt nArgRegs = nregVecLen(ni->Nin.Call.argRegs);
+
+ if (nArgRegs == 1
+ && isNRegINVALID(ni->Nin.Call.resHi)
+ && !isNRegINVALID(ni->Nin.Call.resLo)) {
+
+ HReg arg1 = mapNReg(nregMap, ni->Nin.Call.argRegs[0]);
+ HReg res1 = mapNReg(nregMap, ni->Nin.Call.resLo);
+ HReg r0 = hregARM_R0();
+ if (!sameHReg(r0, arg1))
+ HI( mk_iMOVds_RR_ARM(r0, arg1) );
+
+ HReg r12 = hregARM_R12();
+ HI( ARMInstr_Imm32(r12, (UInt)(HWord)ni->Nin.Call.entry) );
+ HI( ARMInstr_NC_CallR12() );
+
+ if (!sameHReg(res1, r0))
+ HI( mk_iMOVds_RR_ARM(res1, r0) );
+ } else {
+ goto unhandled;
+ }
+
+ /* Restore live regs */
+ RRegSetIterator__init(iter, &to_preserve);
+ slotNo = 0;
+ while (True) {
+ HReg r = RRegSetIterator__next(iter);
+ if (hregIsInvalid(r)) break;
+ ARMInstr* i1 = NULL;
+ ARMInstr* i2 = NULL;
+ genReload_ARM( (HInstr**)&i1, (HInstr**)&i2,
+ r, True/*spRel*/, 16 * slotNo, False/*!mode64*/ );
+ if (i1) HI(i1);
+ if (i2) HI(i2);
+ slotNo++;
+ }
+ vassert(slotNo == n_to_preserve);
+ if (stackMove > 0) {
+ /* Same deal as the code for moving SP down, just above
+ .. see comments there. */
+ HReg sp = hregARM_R13();
+ ARMRI84* dist = ARMRI84_I84(stackMove >> 4, 14/*means "<< 4"*/);
+ HI( ARMInstr_Alu(ARMalu_ADD, sp, sp, dist) );
+ }
+ break;
+ }
+
+ case Nin_ImmW: {
+ HReg reg = mapNReg(nregMap, ni->Nin.ImmW.dst);
+ HWord imm = ni->Nin.ImmW.imm;
+ HI( ARMInstr_Imm32(reg, (UInt)imm) );
+ break;
+ }
+
+ case Nin_ShiftWri: {
+ NShift how = ni->Nin.ShiftWri.how;
+ UInt amt = ni->Nin.ShiftWri.amt;
+ HReg src = mapNReg(nregMap, ni->Nin.ShiftWri.srcL);
+ HReg dst = mapNReg(nregMap, ni->Nin.ShiftWri.dst);
+ vassert(amt >= 1 && amt <= 31);
+
+ ARMShiftOp shOp = 0;
+ switch (how) {
+ //case Nsh_SHL: shOp = ARMsh_SHL; break;
+ case Nsh_SHR: shOp = ARMsh_SHR; break;
+ default: break;
+ }
+ vassert(shOp != 0);
+
+ HI( ARMInstr_Shift(shOp, dst, src, ARMRI5_I5(amt)) );
+ break;
+ }
+
+ case Nin_ShiftWrr:
+ goto unhandled;
+
+ case Nin_AluWri: {
+ NAlu how = ni->Nin.AluWri.how;
+ HReg dstR = mapNReg(nregMap, ni->Nin.AluWri.dst);
+ HReg srcLR = mapNReg(nregMap, ni->Nin.AluWri.srcL);
+ HWord imm = ni->Nin.AluWri.srcR;
+ if (how == Nalu_AND && imm == 0xFFFFULL) {
+ HI( ARMInstr_NC_Uxth(dstR, srcLR) );
+ break;
+ }
+ goto unhandled;
+ }
+
+ case Nin_AluWrr:
+ goto unhandled;
+
+ case Nin_SetFlagsWri: {
+ HReg reg = mapNReg(nregMap, ni->Nin.SetFlagsWri.srcL);
+ HWord imm = ni->Nin.SetFlagsWri.srcR;
+ if (ni->Nin.SetFlagsWri.how == Nsf_TEST && imm <= 0xFF) {
+ HI( ARMInstr_CmpOrTst(False/*!isCmp*/, reg, ARMRI84_I84(imm,0)) );
+ break;
+ }
+ if (ni->Nin.SetFlagsWri.how == Nsf_CMP && imm <= 0xFF) {
+ HI( ARMInstr_CmpOrTst(True/*isCmp*/, reg, ARMRI84_I84(imm,0)) );
+ break;
+ }
+ goto unhandled;
+ }
+
+ case Nin_MovW: {
+ HReg src = mapNReg(nregMap, ni->Nin.MovW.src);
+ HReg dst = mapNReg(nregMap, ni->Nin.MovW.dst);
+ HI( mk_iMOVds_RR_ARM(dst, src) );
+ break;
+ }
+
+ case Nin_LoadU: {
+ HReg dstR = mapNReg(nregMap, ni->Nin.LoadU.dst);
+ NEA* addr = ni->Nin.LoadU.addr;
+ UChar szB = ni->Nin.LoadU.szB;
+ /* The Nea_IRS case is a kludge. It would be better to
+ generate a single instruction, but that requires a new
+ AMDAMode_IRS, which doesn't currently exist. */
+ if (addr->tag == Nea_IRS && !fitsIn12bits((UInt)addr->Nea.IRS.base)) {
+ UInt imm = (UInt)addr->Nea.IRS.base;
+ HReg indexR = mapNReg(nregMap, addr->Nea.IRS.index);
+ UChar shift = addr->Nea.IRS.shift;
+ if (szB == 4 && shift <= 3) {
+ /* Put the immediate value in r12, since that's
+ reserved as very-short-term scratch. */
+ HReg r12 = hregARM_R12();
+ HI( ARMInstr_Imm32(r12, imm) );
+ HI( ARMInstr_LdSt32(ARMcc_AL, True/*isLoad*/, dstR,
+ ARMAMode1_RRS(r12, indexR, shift)) );
+ break;
+ }
+ }
+ if (addr->tag == Nea_RRS) {
+ HReg baseR = mapNReg(nregMap, addr->Nea.RRS.base);
+ HReg indexR = mapNReg(nregMap, addr->Nea.RRS.index);
+ UChar shift = addr->Nea.RRS.shift;
+ if (szB == 1 && shift <= 3) {
+ HI( ARMInstr_LdSt8U(ARMcc_AL, True/*isLoad*/, dstR,
+ ARMAMode1_RRS(baseR, indexR, shift)) );
+ break;
+ }
+ }
+ goto unhandled;
+ }
+
+ case Nin_Store:
+ goto unhandled;
+
+ default:
+ goto unhandled;
+ }
+
+ for (UInt i = 0; i < hiBufUsed; i++) {
+ if (verbose) {
+ vex_printf(" ");
+ ppARMInstr(hiBuf[i]);
+ vex_printf("\n");
+ }
+ Bool isProfInc
+ = emit_ARMInstr(ab, hiBuf[i],
+ False/*!mode64*/, VexEndnessLE, NULL/*vda*/);
+ vassert(!isProfInc);
+ }
+
+ return;
+
+ unhandled:
+ ppNInstr(ni);
+ vpanic("emit_ARMNInstr: unhandled NInstr");
+ /*NOTREACHED*/
+
+# undef HI
+}
+
+
+/* Emits ARM code for the complete NCode block |hi| into |ab_hot|
+ and |ab_cold|, possibly adding relocation information to |rb| too.
+ This function can only handle NCode blocks. All other ARM
+ instructions are to be handled by emit_ARMInstr. This function
+ is required to generate <= 1024 bytes of code. Returns True if OK,
+ False if not enough buffer space.
+*/
+Bool emit_ARMNCodeBlock ( /*MOD*/AssemblyBuffer* ab_hot,
+ /*MOD*/AssemblyBuffer* ab_cold,
+ /*MOD*/RelocationBuffer* rb,
+ const ARMInstr* hi,
+ Bool mode64, VexEndness endness_host,
+ Bool verbose )
+{
+ vassert(mode64 == False);
+ vassert(endness_host == VexEndnessLE);
+ vassert(hi->tag == ARMin_NCode);
+ return HInstrNCode__emit ( ab_hot, ab_cold, rb, hi->ARMin.NCode.details,
+ verbose, emit_ARMNInstr );
+}
+
+
/* --------- Helpers for translation chaining. --------- */
/* How big is an event check? See case for ARMin_EvCheck in
Modified: branches/NCODE/priv/host_arm_defs.h
==============================================================================
--- branches/NCODE/priv/host_arm_defs.h (original)
+++ branches/NCODE/priv/host_arm_defs.h Thu Apr 16 22:10:42 2015
@@ -74,11 +74,6 @@
ST_IN HReg hregARM_R8 ( void ) { return mkHReg(False, HRcInt32, 8, 26); }
ST_IN HReg hregARM_R12 ( void ) { return mkHReg(False, HRcInt32, 12, 27); }
ST_IN HReg hregARM_R13 ( void ) { return mkHReg(False, HRcInt32, 13, 28); }
-ST_IN HReg hregARM_R14 ( void ) { return mkHReg(False, HRcInt32, 14, 29); }
-ST_IN HReg hregARM_R15 ( void ) { return mkHReg(False, HRcInt32, 15, 30); }
-ST_IN HReg hregARM_Q13 ( void ) { return mkHReg(False, HRcVec128, 13, 31); }
-ST_IN HReg hregARM_Q14 ( void ) { return mkHReg(False, HRcVec128, 14, 32); }
-ST_IN HReg hregARM_Q15 ( void ) { return mkHReg(False, HRcVec128, 15, 33); }
#undef ST_IN
extern void ppHRegARM ( HReg );
@@ -618,7 +613,12 @@
NOTE: source and destination registers should be different! */
ARMin_Add32,
ARMin_EvCheck, /* Event check */
- ARMin_ProfInc /* 64-bit profile counter increment */
+ ARMin_ProfInc, /* 64-bit profile counter increment */
+ ARMin_NCode, /* NCode template and registers */
+ // The following for NCode only
+ ARMin_NC_Branch, /* Conditional or unconditional branch, imm offset */
+ ARMin_NC_Uxth, /* extend u16 to u32 */
+ ARMin_NC_CallR12 /* Literally "bl r12" */
}
ARMInstrTag;
@@ -953,6 +953,23 @@
installed later, post-translation, by patching it in,
as it is not known at translation time. */
} ProfInc;
+ struct {
+ /* Out of line so as to keep this ARMInstr small. */
+ HInstrNCode* details;
+ } NCode;
+ /* --- for NCode only --- */
+ struct {
+ /* cond. br. w/ 24-bit offset, cond:4 1010 imm:24 */
+ /* imm24 is unspecified and so assumed to be zero. */
+ ARMCondCode cc;
+ } NC_Branch;
+ struct {
+ HReg dst;
+ HReg src;
+ } NC_Uxth;
+ struct {
+ /* Literally "bl r12" */
+ } NC_CallR12;
} ARMin;
}
ARMInstr;
@@ -1018,9 +1035,16 @@
extern ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
ARMAMode1* amFailAddr );
extern ARMInstr* ARMInstr_ProfInc ( void );
+extern ARMInstr* ARMInstr_NCode ( NCodeTemplate* tmpl, HReg* regsR,
+ HReg* regsA, HReg* regsS );
+extern ARMInstr* ARMInstr_NC_Branch ( ARMCondCode cc );
+extern ARMInstr* ARMInstr_NC_Uxth ( HReg dst, HReg src );
+extern ARMInstr* ARMInstr_NC_CallR12 ( void );
extern void ppARMInstr ( const ARMInstr* );
+/* Handy helper, for generating integer reg-reg moves. */
+extern ARMInstr* mk_iMOVds_RR_ARM ( HReg dst, HReg src );
/* Some functions that insulate the register allocator from details
of the underlying instruction set. */
@@ -1033,6 +1057,13 @@
Bool mode64, VexEndness endness_host,
const VexDispatcherAddresses* vda );
+extern Bool emit_ARMNCodeBlock ( /*MOD*/AssemblyBuffer* ab_hot,
+ /*MOD*/AssemblyBuffer* ab_cold,
+ /*MOD*/RelocationBuffer* rb,
+ const ARMInstr* hi,
+ Bool mode64, VexEndness endness_host,
+ Bool verbose );
+
extern void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Bool spRel, Int offset, Bool );
extern void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
Modified: branches/NCODE/priv/host_arm_isel.c
==============================================================================
--- branches/NCODE/priv/host_arm_isel.c (original)
+++ branches/NCODE/priv/host_arm_isel.c Thu Apr 16 22:10:42 2015
@@ -286,6 +286,12 @@
return ARMInstr_Mov(dst, ARMRI84_R(src));
}
+/* And a variant that is exported into the global namespace. */
+ARMInstr* mk_iMOVds_RR_ARM ( HReg dst, HReg src )
+{
+ return mk_iMOVds_RR(dst, src);
+}
+
/* Set the VFP unit's rounding mode to default (round to nearest). */
static void set_VFP_rounding_default ( ISelEnv* env )
{
@@ -6225,6 +6231,48 @@
goto stmt_fail;
}
+ /* --------- NCODE --------- */
+ case Ist_NCode: {
+ UInt i;
+ NCodeTemplate* tmpl = stmt->Ist.NCode.tmpl;
+
+ // For the result values, find the vregs associated with the
+ // result IRTemps, and pin them on the NCode block.
+ HReg* regsR = LibVEX_Alloc_inline( (tmpl->nres+1) * sizeof(HReg) );
+ for (i = 0; i < tmpl->nres; i++) {
+ IRTemp t = stmt->Ist.NCode.ress[i];
+ vassert(t != IRTemp_INVALID);
+ regsR[i] = lookupIRTemp(env, t);
+ }
+ regsR[tmpl->nres] = HReg_INVALID;
+
+ // Compute each arg into a new vreg. It's important to move
+ // them into new vregs because the NCode block may modify its
+ // argument registers, but the Rules Of The Game stipulate that
+ // registers returned from the isel*Expr functions may not be
+ // modified. As usual vreg-vreg move coalescing will remove
+ // those copies in the cases where they are not necessary.
+ HReg* regsA = LibVEX_Alloc_inline( (tmpl->narg+1) * sizeof(HReg) );
+ for (i = 0; i < tmpl->narg; i++) {
+ HReg arg = iselIntExpr_R(env, stmt->Ist.NCode.args[i]);
+ regsA[i] = newVRegI(env);
+ addInstr(env, mk_iMOVds_RR(regsA[i], arg));
+ }
+ regsA[tmpl->narg] = HReg_INVALID;
+
+ // Allocate vregs for the scratch values.
+ HReg* regsS = LibVEX_Alloc_inline( (tmpl->nscr+1) * sizeof(HReg) );
+ for (i = 0; i < tmpl->nscr; i++) {
+ regsS[i] = newVRegI(env);
+ }
+ regsS[tmpl->nscr] = HReg_INVALID;
+
+ // Hand the template and 3 reg sets on through the pipeline.
+ addInstr(env, ARMInstr_NCode(tmpl, regsR, regsA, regsS));
+
+ return;
+ }
+
default: break;
}
stmt_fail:
Modified: branches/NCODE/priv/host_generic_reg_alloc2.c
==============================================================================
--- branches/NCODE/priv/host_generic_reg_alloc2.c (original)
+++ branches/NCODE/priv/host_generic_reg_alloc2.c Thu Apr 16 22:10:42 2015
@@ -40,7 +40,7 @@
#include "host_generic_regs.h"
// ******** WARNING KLUDGE DO NOT COMMIT
-#include "host_amd64_defs.h"
+#include "host_arm_defs.h"
// ******** WARNING KLUDGE DO NOT COMMIT
/* Set to 1 for lots of debugging output. */
@@ -1309,7 +1309,8 @@
reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
if (debug_direct_reload && !reloaded) {
- vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
+ vex_printf("[%3d] ", spilloff);
+ ppHRegGENERIC(cand); vex_printf(" ");
ppInstr(instrs_in->arr[ii], mode64);
}
if (reloaded) {
@@ -1596,9 +1597,9 @@
/* If this instruction is an NCode block, annotate it with the
set of registers that are live after it. */
- { AMD64Instr* ai = instrs_in->arr[ii];
- if (ai->tag == Ain_NCode) {
- AMD64InstrNCode* details = ai->Ain.NCode.details;
+ { ARMInstr* ai = instrs_in->arr[ii];
+ if (ai->tag == ARMin_NCode) {
+ HInstrNCode* details = ai->ARMin.NCode.details;
//vex_printf("RA: after NCode: ");
vassert(details->rrLiveAfter == NULL);
RRegSet* rrLive_after_NCode = RRegSet__new(univ);
Modified: branches/NCODE/priv/host_generic_regs.c
==============================================================================
--- branches/NCODE/priv/host_generic_regs.c (original)
+++ branches/NCODE/priv/host_generic_regs.c Thu Apr 16 22:10:42 2015
@@ -58,7 +58,7 @@
}
/* Generic printing for registers. */
-void ppHReg ( HReg r )
+void ppHRegGENERIC ( HReg r )
{
if (hregIsInvalid(r)) {
vex_printf("HReg_INVALID");
@@ -320,7 +320,7 @@
else if (!rRd && rWr) { str = "Write "; }
/* else "Modify" is correct */
vex_printf(" %s ", str);
- ppHReg(univ->regs[i]);
+ ppHRegGENERIC(univ->regs[i]);
vex_printf("\n");
}
/* and now the virtual registers */
@@ -333,7 +333,7 @@
default: vpanic("ppHRegUsage");
}
vex_printf(" %s ", str);
- ppHReg(tab->vRegs[i]);
+ ppHRegGENERIC(tab->vRegs[i]);
vex_printf("\n");
}
vex_printf("}\n");
@@ -430,15 +430,15 @@
/*--- Indicating register remappings (for reg-alloc) ---*/
/*---------------------------------------------------------*/
-void ppHRegRemap ( HRegRemap* map )
+void ppHRegRemap ( const HRegRemap* map )
{
Int i;
vex_printf("HRegRemap {\n");
for (i = 0; i < map->n_used; i++) {
vex_printf(" ");
- ppHReg(map->orig[i]);
+ ppHRegGENERIC(map->orig[i]);
vex_printf(" --> ");
- ppHReg(map->replacement[i]);
+ ppHRegGENERIC(map->replacement[i]);
vex_printf("\n");
}
vex_printf("}\n");
@@ -463,7 +463,7 @@
}
-HReg lookupHRegRemap ( HRegRemap* map, HReg orig )
+HReg lookupHRegRemap ( const HRegRemap* map, HReg orig )
{
Int i;
if (!hregIsVirtual(orig))
@@ -573,6 +573,91 @@
}
+/* Find the length of a vector of NRegs that is terminated by
+ an NReg_INVALID. */
+UInt nregVecLen ( const NReg* vec )
+{
+ UInt i;
+ for (i = 0; !isNRegINVALID(vec[i]); i++)
+ ;
+ return i;
+}
+
+
+/* Find the length of a vector of NInstr*s that is terminated by
+ NULL. */
+UInt ninstrVecLen ( NInstr** const vec )
+{
+ UInt i;
+ for (i = 0; vec[i]; i++)
+ ;
+ return i;
+}
+
+
+/* Print a HInstrNCode. Caller must supply a register-printing
+ routine and a bit of text identifying the host architecture. */
+void HInstrNCode__show ( const HInstrNCode* details,
+ void (*ppHReg)(HReg), const HChar* hostName )
+{
+ NCodeTemplate* tmpl = details->tmpl;
+ vex_printf("NCode-%s:%s [", hostName, tmpl->name);
+ UInt j;
+ for (j = 0; j < tmpl->nres; j++) {
+ ppHReg(details->regsR[j]);
+ if (j != tmpl->nres-1) vex_printf(" ");
+ }
+ vex_printf("] <= [");
+ for (j = 0; j < tmpl->narg; j++) {
+ ppHReg(details->regsA[j]);
+ if (j != tmpl->narg-1) vex_printf(" ");
+ }
+ vex_printf("] scratch [");
+ for (j = 0; j < tmpl->nscr; j++) {
+ ppHReg(details->regsS[j]);
+ if (j != tmpl->nscr-1) vex_printf(" ");
+ }
+ vex_printf("]");
+}
+
+
+/* Update |u| with the register usages of |details|. */
+void HInstrNCode__getRegUsage ( /*MOD*/HRegUsage* u,
+ const HInstrNCode* details )
+{
+ NCodeTemplate* tmpl = details->tmpl;
+ // It writes the result and scratch registers.
+ UInt j;
+ for (j = 0; j < tmpl->nres; j++)
+ addHRegUse(u, HRmWrite, details->regsR[j]);
+ for (j = 0; j < tmpl->nscr; j++)
+ addHRegUse(u, HRmWrite, details->regsS[j]);
+ // It both reads and writes the arg regs. We have to say
+ // they are written in order to force them to be allocated
+ // different registers from the arg and scratch registers,
+ // since we have no way to ensure that the NCode block
+ // doesn't write its scratch and result registers and later
+ // on read the argument registers.
+ for (j = 0; j < tmpl->narg; j++)
+ addHRegUse(u, HRmModify, details->regsA[j]);
+}
+
+
+/* Apply |map| to the registers in |details|. */
+void HInstrNCode__mapRegs ( /*MOD*/HInstrNCode* details,
+ const HRegRemap* map )
+{
+ NCodeTemplate* tmpl = details->tmpl;
+ UInt j;
+ for (j = 0; j < tmpl->nres; j++)
+ details->regsR[j] = lookupHRegRemap(map, details->regsR[j]);
+ for (j = 0; j < tmpl->nscr; j++)
+ details->regsS[j] = lookupHRegRemap(map, details->regsS[j]);
+ for (j = 0; j < tmpl->narg; j++)
+ details->regsA[j] = lookupHRegRemap(map, details->regsA[j]);
+}
+
+
/* Find the real (hard) register for |r| by looking up in |map|. */
HReg mapNReg ( const NRegMap* map, NReg r )
{
@@ -664,15 +749,15 @@
if (0) {
vex_printf(" # set1: ");
- RRegSet__pp(set_1, ppHReg); vex_printf("\n");
+ RRegSet__pp(set_1, ppHRegGENERIC); vex_printf("\n");
vex_printf(" # set2: ");
- RRegSet__pp(&set_2, ppHReg); vex_printf("\n");
+ RRegSet__pp(&set_2, ppHRegGENERIC); vex_printf("\n");
vex_printf(" # set3: ");
- RRegSet__pp(set_3, ppHReg); vex_printf("\n");
+ RRegSet__pp(set_3, ppHRegGENERIC); vex_printf("\n");
vex_printf(" # set4: ");
- RRegSet__pp(&set_4, ppHReg); vex_printf("\n");
+ RRegSet__pp(&set_4, ppHRegGENERIC); vex_printf("\n");
vex_printf(" # pres: ");
- RRegSet__pp(result, ppHReg); vex_printf("\n");
+ RRegSet__pp(result, ppHRegGENERIC); vex_printf("\n");
}
/* Remove any non allocatable registers (see big comment above) */
@@ -680,6 +765,126 @@
}
+/* Emits host code for the complete NCode block |details| into
+ |ab_hot| and |ab_cold|, possibly adding relocation information to
+ |rb| too. The caller must supply a host-dependent function
+ |emit_OneNInstr| which generates host code for a single NInstr.
+ This function is required to generate <= 1024 bytes of code.
+ Returns True if OK, False if not enough buffer space.
+*/
+Bool HInstrNCode__emit ( /*MOD*/AssemblyBuffer* ab_hot,
+ /*MOD*/AssemblyBuffer* ab_cold,
+ /*MOD*/RelocationBuffer* rb,
+ const HInstrNCode* details,
+ Bool verbose,
+ void (*emit_OneNInstr) (
+ /*MOD*/AssemblyBuffer* ab,
+ /*MOD*/RelocationBuffer* rb,
+ const NInstr* ni,
+ const NRegMap* nregMap,
+ const RRegSet* hregsLiveAfter,
+ /* the next 2 are for debug printing only */
+ Bool verbose, NLabel niLabel
+ )
+ )
+ {
+ const NCodeTemplate* tmpl = details->tmpl;
+ const RRegSet* rregsLiveAfter = details->rrLiveAfter;
+
+ NRegMap nregMap;
+ nregMap.regsR = details->regsR;
+ nregMap.regsA = details->regsA;
+ nregMap.regsS = details->regsS;
+ nregMap.nRegsR = tmpl->nres;
+ nregMap.nRegsA = tmpl->narg;
+ nregMap.nRegsS = tmpl->nscr;
+
+ vassert(hregVecLen(nregMap.regsR) == nregMap.nRegsR);
+ vassert(hregVecL...
[truncated message content] |
|
From: <sv...@va...> - 2015-04-16 21:06:47
|
Author: sewardj
Date: Thu Apr 16 22:06:40 2015
New Revision: 15103
Log:
Update details for arm32, especially w.r.t. VFP/Neon regs.
Modified:
branches/NCODE/docs/internals/register-uses.txt
Modified: branches/NCODE/docs/internals/register-uses.txt
==============================================================================
--- branches/NCODE/docs/internals/register-uses.txt (original)
+++ branches/NCODE/docs/internals/register-uses.txt Thu Apr 16 22:06:40 2015
@@ -108,10 +108,10 @@
Reg Callee Arg
Name Saves? Reg? Comment Vex-uses?
--------------------------------------------------------------
-r0 int#1 int[31:0] retreg? avail
-r1 int#2 int[63:32] retreg? avail
-r2 int#3 avail
-r3 int#4 avail
+r0 n int#1 int[31:0] retreg? avail
+r1 n int#2 int[63:32] retreg? avail
+r2 n int#3 avail
+r3 n int#4 avail
r4 y avail
r5 y avail
r6 y avail
@@ -120,18 +120,26 @@
r9 y (but only on Linux; not in general) avail
r10 y avail
r11 y avail
-r12 possibly used by linker? unavail
-r13(sp) unavail
-r14(lr) unavail
-r15(pc) unavail
+r12 n possibly used by linker? unavail
+r13(sp) y unavail
+r14(lr) y (else callee can never return!) unavail
+r15(pc) meaningless unavail
cp15/c3/r2 thread ptr (see libvex_guest_arm.h, guest_TPIDRURO)
-VFP: d8-d15 are callee-saved
-r12 (IP) is probably available for use as a caller-saved
-register; but instead we use it as an intermediate for
-holding the address for F32/F64 spills, since the VFP load/store
-insns have reg+offset forms for offsets only up to 1020, which
-often isn't enough.
+VFP-v2 has 32 float regs (s0 .. s31) also accessible as 16 double
+registers (d0 .. d15) or 8 128-bit regs (q0 .. q7). Of these,
+the first half (s0 .. s15, d0 .. d7, q0 .. q3) are caller saved
+and the second half (s16 .. s31, d8 .. d15, q4 .. q7) are callee
+saved.
+
+VFP-v3 extends the register bank with d16 .. d31 == q8 .. q15.
+All of these are caller saved.
+
+r12 (IP) is probably available for use as a caller-saved register; but
+instead we use it as this target's per-insn scratch register: as an
+intermediate for holding the address for F32/F64 spills, since the VFP
+load/store insns have reg+offset forms for offsets only up to 1020,
+which often isn't enough, and as an intermediate for NCode loads.
arm64-linux
|
|
From: <sv...@va...> - 2015-04-16 21:06:14
|
Author: sewardj
Date: Thu Apr 16 22:06:06 2015
New Revision: 15102
Log:
mk_tmpl__LOADV32le_on_32: fix copy-n-paste error (wrong word size load)
Modified:
branches/NCODE/memcheck/mc_main.c
Modified: branches/NCODE/memcheck/mc_main.c
==============================================================================
--- branches/NCODE/memcheck/mc_main.c (original)
+++ branches/NCODE/memcheck/mc_main.c Thu Apr 16 22:06:06 2015
@@ -4649,7 +4649,7 @@
hot[0] = NInstr_SetFlagsWri (na, Nsf_TEST, a0, MASK(4));
hot[1] = NInstr_Branch (na, Ncc_NZ, mkNLabel(Nlz_Cold, 4));
hot[2] = NInstr_ShiftWri (na, Nsh_SHR, s0, a0, 16);
- hot[3] = NInstr_LoadU (na, 8, s0, NEA_IRS(na, (HWord)&primary_map[0],
+ hot[3] = NInstr_LoadU (na, 4, s0, NEA_IRS(na, (HWord)&primary_map[0],
s0, 2));
hot[4] = NInstr_AluWri (na, Nalu_AND, r0, a0, 0xFFFF);
hot[5] = NInstr_ShiftWri (na, Nsh_SHR, r0, r0, 2);
|
|
From: Carl E. L. <ce...@us...> - 2015-04-16 18:31:53
|
Florian:
I have investigated the message from the gcc 4.9 compiler that you brought to my attention. The
message is:
test_dfp4.c: In function ‘_test_dtstdgq’:
test_dfp4.c:249:13: note: the ABI of passing aggregates with 16-byte alignment will change in a future GCC release
static void _test_dtstdgq(int BF, int DGM, dfp_val_t val1, dfp_val_t x1 __attribute__((unused)))
I have talked with the IBM compiler team. The warning went into the gcc 4.9 compiler for the PPC64
platform. There will be an ABI change made in the gcc 5.0 with regards to the alignment of 128-bit
arguments to a function. As I understand it, this will only be an issue for code when linking code
that was compiled with different gcc versions. If some of the code was compiled with a pre gcc 5.0
compiler and it is linked with functions with 128-bit arguments that were compiled with a gcc 5.0
or newer compiler,there will be a problem of the arguments not aligning properly.
In my case, all of the code is in the same regression test file. So, as far as the Valgrind regression
tests go this is not an issue.
There is nothing that can be done at the source code level to eliminate the notice from the compiler.
Carl Love
|
|
From: <sv...@va...> - 2015-04-16 17:09:18
|
Author: carll
Date: Thu Apr 16 18:09:09 2015
New Revision: 3134
Log:
The following regression tests failures occur on PPC64 little endian only.
The regression test none/tests/jm_vec/isa_2_07 has failures on the lxsiwax and
lxsiwzx instructions. They are loads and the the results are correct for
big endian but not little endian. The little endian result matches the
expected big endian result.
The regresssion test none/tests/test_isa_2_07_part2 has a failure with the
vbpermq instruction. The little endian result matches the expected result for
big endian. The upper and lower 64 bits of the result are not swapped correctly
for little endian.
This commit fixes these issues.
The bugzilla for the issue is 346270.
Modified:
trunk/priv/guest_ppc_toIR.c
Modified: trunk/priv/guest_ppc_toIR.c
==============================================================================
--- trunk/priv/guest_ppc_toIR.c (original)
+++ trunk/priv/guest_ppc_toIR.c Thu Apr 16 18:09:09 2015
@@ -15250,7 +15250,12 @@
{
IRExpr * exp;
DIP("lxsiwzx %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
- exp = unop( Iop_64HIto32, load( Ity_I64, mkexpr( EA ) ) );
+
+ if (host_endness == VexEndnessLE)
+ exp = unop( Iop_64to32, load( Ity_I64, mkexpr( EA ) ) );
+ else
+ exp = unop( Iop_64HIto32, load( Ity_I64, mkexpr( EA ) ) );
+
putVSReg( XT, binop( Iop_64HLtoV128,
unop( Iop_32Uto64, exp),
mkU64(0) ) );
@@ -15260,7 +15265,12 @@
{
IRExpr * exp;
DIP("lxsiwax %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
- exp = unop( Iop_64HIto32, load( Ity_I64, mkexpr( EA ) ) );
+
+ if (host_endness == VexEndnessLE)
+ exp = unop( Iop_64to32, load( Ity_I64, mkexpr( EA ) ) );
+ else
+ exp = unop( Iop_64HIto32, load( Ity_I64, mkexpr( EA ) ) );
+
putVSReg( XT, binop( Iop_64HLtoV128,
unop( Iop_32Sto64, exp),
mkU64(0) ) );
@@ -17844,11 +17854,18 @@
mkexpr( vA ),
mkexpr( idx ) ) ),
mkU8( 127 ) ) ) );
- res = binop( Iop_OrV128,
- res,
- binop( Iop_ShlV128,
- mkexpr( perm_bit ),
- mkU8( i ) ) );
+ if (host_endness == VexEndnessLE)
+ res = binop( Iop_OrV128,
+ res,
+ binop( Iop_ShlV128,
+ mkexpr( perm_bit ),
+ mkU8( i + 64) ) );
+ else
+ res = binop( Iop_OrV128,
+ res,
+ binop( Iop_ShlV128,
+ mkexpr( perm_bit ),
+ mkU8( i ) ) );
vB_expr = binop( Iop_ShrV128, vB_expr, mkU8( 8 ) );
}
putVReg( vRT_addr, res);
|
|
From: Zhu, Y. <Yan...@vi...> - 2015-04-16 16:47:56
|
It turned out that after applying patches from Bug 339288, the problem went away. Now I'm seeing a new issue when running Valgrind 3.10.1 on MIPS64 Cavium OCTEON: # valgrind uia_init ==1932== Memcheck, a memory error detector ==1932== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al. ==1932== Using Valgrind-3.10.1 and LibVEX; rerun with -h for copyright info ==1932== Command: uia_init ==1932== ==1932== Invalid write of size 4 ==1932== at 0x4001628: _dl_start_user (in /lib/ld-2.16.so.new) ==1932== by 0x40015B8: __start (in /lib/ld-2.16.so.new) ==1932== Address 0x7e6afc78 is on thread 1's stack ==1932== 8 bytes below stack pointer ==1932== And Valgrind is hung from there. Anyone has any ideas? Thanks, Yanwen From: Maran Pakkirisamy [mailto:mpa...@ca...] Sent: Thursday, April 16, 2015 3:17 AM To: Zhu, Yanwen; Crestez Dan Leonard; Valgrind Developers Subject: Re: [Valgrind-developers] Valgrind 13854: Cross compiling for Cavium MIPS64, N32 ABI Please check if the revision r3108 is included in the version you use. If not, including the patch might fix the issue. The patch is part of fixing the below bugz. https://bugs.kde.org/show_bug.cgi?id=341997 On 04/15/2015 11:42 PM, Zhu, Yanwen wrote: Leonard, I finally got valgrind compiled correctly. And I just run valgrind without attaching to any command in MIPS64 OCTEON target, I got the following error. Looks like some of the shared libraries are missing: # uname -a Linux ViaSat 3.10.20-rt14-Cavium-Octeon #1 SMP Wed Apr 15 09:30:37 EDT 2015 mips64 GNU/Linux # which valgrind /usr/bin/valgrind # valgrind valgrind: failed to start tool 'memcheck' for platform 'mips64-linux': No such file or directory # strace valgrind execve("/usr/bin/valgrind", ["valgrind"], [/* 20 vars */]) = 0 brk(0) = 0x100cb0f4 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x77dad000 uname({sys="Linux", node="ViaSat", ...}) = 0 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) open("/lib32-fp/tls/octeon3/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) stat("/lib32-fp/tls/octeon3", 0x7fad6050) = -1 ENOENT (No such file or directory) open("/lib32-fp/tls/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) stat("/lib32-fp/tls", 0x7fad6050) = -1 ENOENT (No such file or directory) open("/lib32-fp/octeon3/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory) stat("/lib32-fp/octeon3", 0x7fad6050) = -1 ENOENT (No such file or directory) Attached is my own makefile to build valgrind, maybe something is not right in that make file? Thanks, Yanwen From: Crestez Dan Leonard [mailto:cdl...@gm...] Sent: Tuesday, April 14, 2015 6:02 PM To: Zhu, Yanwen; Valgrind Developers Subject: Re: Valgrind 13854: Cross compiling for Cavium MIPS64, N32 ABI Hello, You seem to be building for mips32, this is wrong. Instead of _mips32_ those object files should all contain "_mips64n32_". You need to make sure your configure target is some form of mips64*, not mips32. What this patch does is add support for the N32 ABI as a secondary arch of mips64. What used to only build valgrind for mips64 will now build a second set of binaries for the N32 ABI. The main valgrind launcher will pick the correct mips64/mips64n32 version of the tool based on flags in the elf header. You also need to avoid including -mabi inside any explicit CFLAGS. Apparently gcc gets confused by multiple -mabi=* flags instead of just using the last option. Maybe you show how you are running the configure script? Also make sure you reran autogen.sh after patching and cleaned any stale binaries. I included the valgrind-developers list because this mail thread should be public in case anyone has similar issues. Regards, Leonard On Wed, Apr 15, 2015 at 12:31 AM, Zhu, Yanwen <Yan...@vi...<mailto:Yan...@vi...>> wrote: Leonard, Thanks for your reply, I just fixed some errors during the patch. You're right, not too bad, just 3 files that I had to manually port the changes. I am building valgrind for MIPS64 with -mabi=n32 using the OCTEOM cross compiler and I am seeing the following error in the linking phase: ../coregrind/link_tool_exe_linux 0x38000000 /home/yzhu/workspace/buildroot/buildroot/output/kg255x.v2_pp_devel/tools/ext/bin/mips64-octeon-linux-gnu-gcc --sysroot=/home/yzhu/workspace/buildroot/buildroot/output/kg255x.v2_pp_devel/tools -Os -pipe -Os -mtune=mips64 -mabi=n32 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wl,-melf32btsmipn32 -march=octeon3 -I/home/yzhu/workspace/buildroot/buildroot/output/kg255x.v2_pp_devel/toolchain/linux/include -Wno-long-long -Os -pipe -Os -mtune=mips64 -mabi=n32 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wl,-melf32btsmipn32 -march=octeon3 -fno-stack-protector -mabi=n32 -Wl,-melf32btsmipn32 -march=octeon3 -L/home/yzhu/workspace/buildroot/buildroot/output/kg255x.v2_pp_devel/tools/lib -L/home/yzhu/workspace/buildroot/buildroot/output/kg255x.v2_pp_devel/tools/usr/lib -o memcheck-mips32-linux -O2 -g -Wall -Wmissing-prototypes -Wshadow -Wpointer-arith -Wstrict-prototypes -Wmissing-declarations -Wno-format-zero-length -fno-strict-aliasing -fno-builtin -O2 -static -nodefaultlibs -nostartfiles -u __start memcheck_mips32_linux-mc_leakcheck.o memcheck_mips32_linux-mc_malloc_wrappers.o memcheck_mips32_linux-mc_main.o memcheck_mips32_linux-mc_translate.o memcheck_mips32_linux-mc_machine.o memcheck_mips32_linux-mc_errors.o ../coregrind/libcoregrind-mips32-linux.a ../VEX/libvex-mips32-linux.a -lgcc ../coregrind/libcoregrind-mips32-linux.a(libcoregrind_mips32_linux_a-m_main.o): In function `__start': m_main.c:(.text+0xc): undefined reference to `_gp_disp' m_main.c:(.text+0x10): undefined reference to `_gp_disp' collect2: error: ld returned 1 exit status make[4]: *** [memcheck-mips32-linux] Error 1 I don't know what's going on here, any suggestions? Thanks, Yanwen From: Crestez Dan Leonard [mailto:cdl...@gm...<mailto:cdl...@gm...>] Sent: Tuesday, April 14, 2015 3:08 PM To: Zhu, Yanwen Subject: Re: Valgrind 13854: Cross compiling for Cavium MIPS64, N32 ABI Hello, The patches are against SVN trunk at around the time the patches were posted (it differs between V1 and V2). Backporting them to 3.10.1 should not be terribly difficult. They won't apply cleanly but I expect the issues to be minor and easily solvable. Since the tilegx port was integrated a few days ago I expect they won't apply cleanly on svn trunk either, at least not until I rebase and post the next version. You can also try to compile directly from the git repos mentioned in the tracker item. That should "just work". If you have problems compiling you should mention the actual build errors as well as compiler/target details. Support for N32 should be generic but I'm only actually targeting octeon chips using the cavium gcc-4.7 toolchain. Regards, Leonard On Tue, Apr 14, 2015 at 7:16 PM, <Yan...@vi...<mailto:Yan...@vi...>> wrote: Hi Leonard, I'm trying to apply your patches for mips64n32 on valgrind 3.10.1 and there some some errors, I manually fixed the patching errors, however, I have some problem compiling it. Looks to me that your patches were not made based on 3.10.1. What version of valgrind were your patches made from? Do you have patches for 3.10.1? ------------------------------------------------------------------------------ BPM Camp - Free Virtual Workshop May 6th at 10am PDT/1PM EDT Develop your own process in accordance with the BPMN 2 standard Learn Process modeling best practices with Bonita BPM through live exercises http://www.bonitasoft.com/be-part-of-it/events/bpm-camp-virtual- event?utm_ source=Sourceforge_BPM_Camp_5_6_15&utm_medium=email&utm_campaign=VA_SF _______________________________________________ Valgrind-developers mailing list Val...@li...<mailto:Val...@li...> https://lists.sourceforge.net/lists/listinfo/valgrind-developers -- Maran Pakkirisamy |
|
From: <sv...@va...> - 2015-04-16 16:25:38
|
Author: carll
Date: Thu Apr 16 17:25:29 2015
New Revision: 15101
Log:
Fix the compiler warning about casting the arguments to the functions
LibVEX_GuestPPC64_get_CR() and LibVEX_GuestPPC64_get_XER().
The bugzilla for this issue is 346267.
Modified:
trunk/coregrind/m_coredump/coredump-elf.c
Modified: trunk/coregrind/m_coredump/coredump-elf.c
==============================================================================
--- trunk/coregrind/m_coredump/coredump-elf.c (original)
+++ trunk/coregrind/m_coredump/coredump-elf.c Thu Apr 16 17:25:29 2015
@@ -358,8 +358,8 @@
regs->orig_gpr3 = arch->vex.guest_GPR3;
regs->ctr = arch->vex.guest_CTR;
regs->link = arch->vex.guest_LR;
- regs->xer = LibVEX_GuestPPC64_get_XER( &((ThreadArchState*)arch)->vex );
- regs->ccr = LibVEX_GuestPPC64_get_CR( &((ThreadArchState*)arch)->vex );
+ regs->xer = LibVEX_GuestPPC64_get_XER( (const VexGuestPPC64State*) &(arch->vex) );
+ regs->ccr = LibVEX_GuestPPC64_get_CR( (const VexGuestPPC64State*) &(arch->vex) );
/* regs->mq = 0; */
regs->trap = 0;
regs->dar = 0; /* should be fault address? */
|
|
From: Maran P. <mpa...@ca...> - 2015-04-16 07:17:30
|
Please check if the revision r3108 is included in the version you use. If not, including the patch might fix the issue. The patch is part of fixing the below bugz. https://bugs.kde.org/show_bug.cgi?id=341997 On 04/15/2015 11:42 PM, Zhu, Yanwen wrote: > > Leonard, > > I finally got valgrind compiled correctly. And I just run valgrind > without attaching to any command in MIPS64 OCTEON target, I got the > following error. Looks like some of the shared libraries are missing: > > # uname -a > > Linux ViaSat 3.10.20-rt14-Cavium-Octeon #1 SMP Wed Apr 15 09:30:37 EDT > 2015 mips64 GNU/Linux > > # which valgrind > > /usr/bin/valgrind > > # valgrind > > valgrind: failed to start tool 'memcheck' for platform 'mips64-linux': > No such file or directory > > # strace valgrind > > execve("/usr/bin/valgrind", ["valgrind"], [/* 20 vars */]) = 0 > > brk(0) = 0x100cb0f4 > > mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, > 0) = 0x77dad000 > > uname({sys="Linux", node="ViaSat", ...}) = 0 > > access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or > directory) > > open("/lib32-fp/tls/octeon3/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 > ENOENT (No such file or directory) > > stat("/lib32-fp/tls/octeon3", 0x7fad6050) = -1 ENOENT (No such file or > directory) > > open("/lib32-fp/tls/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No > such file or directory) > > stat("/lib32-fp/tls", 0x7fad6050) = -1 ENOENT (No such file or > directory) > > open("/lib32-fp/octeon3/libc.so.6", O_RDONLY|O_CLOEXEC) = -1 ENOENT > (No such file or directory) > > stat("/lib32-fp/octeon3", 0x7fad6050) = -1 ENOENT (No such file or > directory) > > Attached is my own makefile to build valgrind, maybe something is not > right in that make file? > > Thanks, > > Yanwen > > *From:*Crestez Dan Leonard [mailto:cdl...@gm...] > *Sent:* Tuesday, April 14, 2015 6:02 PM > *To:* Zhu, Yanwen; Valgrind Developers > *Subject:* Re: Valgrind 13854: Cross compiling for Cavium MIPS64, N32 ABI > > Hello, > > You seem to be building for mips32, this is wrong. Instead of _mips32_ > those object files should all contain "_mips64n32_". > > You need to make sure your configure target is some form of mips64*, > not mips32. What this patch does is add support for the N32 ABI as a > secondary arch of mips64. What used to only build valgrind for mips64 > will now build a second set of binaries for the N32 ABI. The main > valgrind launcher will pick the correct mips64/mips64n32 version of > the tool based on flags in the elf header. > > You also need to avoid including -mabi inside any explicit CFLAGS. > Apparently gcc gets confused by multiple -mabi=* flags instead of just > using the last option. > > Maybe you show how you are running the configure script? Also make > sure you reran autogen.sh after patching and cleaned any stale binaries. > > I included the valgrind-developers list because this mail thread > should be public in case anyone has similar issues. > > Regards, > > Leonard > > On Wed, Apr 15, 2015 at 12:31 AM, Zhu, Yanwen <Yan...@vi... > <mailto:Yan...@vi...>> wrote: > > Leonard, > > Thanks for your reply, I just fixed some errors during the patch. > You’re right, not too bad, just 3 files that I had to manually port > the changes. > > I am building valgrind for MIPS64 with -mabi=n32 using the OCTEOM > cross compiler and I am seeing the following error in the linking phase: > > ../coregrind/link_tool_exe_linux 0x38000000 > /home/yzhu/workspace/buildroot/buildroot/output/kg255x.v2_pp_devel/tools/ext/bin/mips64-octeon-linux-gnu-gcc > --sysroot=/home/yzhu/workspace/buildroot/buildroot/output/kg255x.v2_pp_devel/tools > -Os -pipe -Os -mtune=mips64 -mabi=n32 -D_LARGEFILE_SOURCE > -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -Wl,-melf32btsmipn32 > -march=octeon3 > -I/home/yzhu/workspace/buildroot/buildroot/output/kg255x.v2_pp_devel/toolchain/linux/include > -Wno-long-long -Os -pipe -Os -mtune=mips64 -mabi=n32 > -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 > -Wl,-melf32btsmipn32 -march=octeon3 -fno-stack-protector -mabi=n32 > -Wl,-melf32btsmipn32 -march=octeon3 > -L/home/yzhu/workspace/buildroot/buildroot/output/kg255x.v2_pp_devel/tools/lib > -L/home/yzhu/workspace/buildroot/buildroot/output/kg255x.v2_pp_devel/tools/usr/lib > -o memcheck-mips32-linux -O2 -g -Wall -Wmissing-prototypes -Wshadow > -Wpointer-arith -Wstrict-prototypes -Wmissing-declarations > -Wno-format-zero-length -fno-strict-aliasing -fno-builtin -O2 -static > -nodefaultlibs -nostartfiles -u __start > memcheck_mips32_linux-mc_leakcheck.o > memcheck_mips32_linux-mc_malloc_wrappers.o > memcheck_mips32_linux-mc_main.o memcheck_mips32_linux-mc_translate.o > memcheck_mips32_linux-mc_machine.o memcheck_mips32_linux-mc_errors.o > ../coregrind/libcoregrind-mips32-linux.a ../VEX/libvex-mips32-linux.a > -lgcc > > ../coregrind/libcoregrind-mips32-linux.a(libcoregrind_mips32_linux_a-m_main.o): > In function `__start': > > m_main.c:(.text+0xc): undefined reference to `_gp_disp' > > m_main.c:(.text+0x10): undefined reference to `_gp_disp' > > collect2: error: ld returned 1 exit status > > make[4]: *** [memcheck-mips32-linux] Error 1 > > I don’t know what’s going on here, any suggestions? > > Thanks, > > Yanwen > > *From:*Crestez Dan Leonard [mailto:cdl...@gm... > <mailto:cdl...@gm...>] > *Sent:* Tuesday, April 14, 2015 3:08 PM > *To:* Zhu, Yanwen > *Subject:* Re: Valgrind 13854: Cross compiling for Cavium MIPS64, N32 ABI > > Hello, > > The patches are against SVN trunk at around the time the patches were > posted (it differs between V1 and V2). Backporting them to 3.10.1 > should not be terribly difficult. They won't apply cleanly but I > expect the issues to be minor and easily solvable. Since the tilegx > port was integrated a few days ago I expect they won't apply cleanly > on svn trunk either, at least not until I rebase and post the next > version. > > You can also try to compile directly from the git repos mentioned in > the tracker item. That should "just work". > > If you have problems compiling you should mention the actual build > errors as well as compiler/target details. Support for N32 should be > generic but I'm only actually targeting octeon chips using the cavium > gcc-4.7 toolchain. > > Regards, > > Leonard > > On Tue, Apr 14, 2015 at 7:16 PM, <Yan...@vi... > <mailto:Yan...@vi...>> wrote: > > Hi Leonard, > > I'm trying to apply your patches for mips64n32 on valgrind 3.10.1 and > there some some errors, I manually fixed the patching errors, however, > I have some problem compiling it. Looks to me that your patches were > not made based on 3.10.1. What version of valgrind were your patches > made from? Do you have patches for 3.10.1? > > > > ------------------------------------------------------------------------------ > BPM Camp - Free Virtual Workshop May 6th at 10am PDT/1PM EDT > Develop your own process in accordance with the BPMN 2 standard > Learn Process modeling best practices with Bonita BPM through live exercises > http://www.bonitasoft.com/be-part-of-it/events/bpm-camp-virtual- event?utm_ > source=Sourceforge_BPM_Camp_5_6_15&utm_medium=email&utm_campaign=VA_SF > > > _______________________________________________ > Valgrind-developers mailing list > Val...@li... > https://lists.sourceforge.net/lists/listinfo/valgrind-developers -- Maran Pakkirisamy |
|
From: <sv...@va...> - 2015-04-15 21:46:05
|
Author: florian
Date: Wed Apr 15 22:45:57 2015
New Revision: 15100
Log:
Fix function call: 1st argument is the thread id.
Modified:
trunk/coregrind/m_sigframe/sigframe-tilegx-linux.c
Modified: trunk/coregrind/m_sigframe/sigframe-tilegx-linux.c
==============================================================================
--- trunk/coregrind/m_sigframe/sigframe-tilegx-linux.c (original)
+++ trunk/coregrind/m_sigframe/sigframe-tilegx-linux.c Wed Apr 15 22:45:57 2015
@@ -76,7 +76,7 @@
ThreadId tid = tst->tid;
NSegment const* stackseg = NULL;
- if (VG_(extend_stack)(addr, addr))
+ if (VG_(extend_stack)(tid, addr))
stackseg = VG_(am_find_nsegment)(addr);
if (stackseg == NULL || !stackseg->hasR || !stackseg->hasW)
|
|
From: <sv...@va...> - 2015-04-15 20:31:00
|
Author: philippe
Date: Wed Apr 15 21:30:52 2015
New Revision: 15099
Log:
Following fix done in tilegx host in vex: r3130, reenable tilegx as host
in libvexmultiarch_test
Modified:
trunk/none/tests/libvex_test.c
Modified: trunk/none/tests/libvex_test.c
==============================================================================
--- trunk/none/tests/libvex_test.c (original)
+++ trunk/none/tests/libvex_test.c Wed Apr 15 21:30:52 2015
@@ -241,14 +241,6 @@
show_vta("skipped (word size differs)", &vta);
continue;
}
- // Special condition for VexArchTILEGX that is not yet ready
- // to run in multiarch as an host for different guest.
- if (va == VexArchTILEGX
- && guest_arch != VexArchTILEGX
- && multiarch != va) {
- show_vta("skipped (TILEGX host and guest != TILEGX)", &vta);
- continue;
- }
if (multiarch > VexArch_INVALID
&& multiarch != va) {
show_vta("skipped (!= specific requested arch)", &vta);
|
|
From: <sv...@va...> - 2015-04-15 18:35:59
|
Author: florian
Date: Wed Apr 15 19:35:52 2015
New Revision: 15098
Log:
Update list of ignored files.
Modified:
trunk/none/tests/ (props changed)
|