|
From: <sv...@va...> - 2015-04-13 10:49:23
|
Author: sewardj
Date: Mon Apr 13 11:49:15 2015
New Revision: 15086
Log:
Add an NCode template for 8 bit loads on 64 bit targets.
Requires vex r3127.
Modified:
branches/NCODE/memcheck/mc_include.h
branches/NCODE/memcheck/mc_main.c
branches/NCODE/memcheck/mc_translate.c
Modified: branches/NCODE/memcheck/mc_include.h
==============================================================================
--- branches/NCODE/memcheck/mc_include.h (original)
+++ branches/NCODE/memcheck/mc_include.h Mon Apr 13 11:49:15 2015
@@ -602,6 +602,7 @@
extern NCodeTemplate* MC_(tmpl__LOADV64le_on_64);
extern NCodeTemplate* MC_(tmpl__LOADV32le_on_64);
+extern NCodeTemplate* MC_(tmpl__LOADV8_on_64);
/* Helper functions defined in mc_main.c */
Modified: branches/NCODE/memcheck/mc_main.c
==============================================================================
--- branches/NCODE/memcheck/mc_main.c (original)
+++ branches/NCODE/memcheck/mc_main.c Mon Apr 13 11:49:15 2015
@@ -4276,6 +4276,7 @@
VG_REGPARM(1) static ULong mc_LOADV64le_on_64_slow ( Addr a );
VG_REGPARM(1) static ULong mc_LOADV32le_on_64_slow ( Addr a );
+VG_REGPARM(1) static ULong mc_LOADV8_on_64_slow ( Addr a );
static void* ncode_alloc ( UInt n ) {
return VG_(malloc)("mc.ncode_alloc (NCode, permanent)", n);
@@ -4294,6 +4295,7 @@
NCodeTemplate* MC_(tmpl__LOADV64le_on_64) = NULL;
NCodeTemplate* MC_(tmpl__LOADV32le_on_64) = NULL;
+NCodeTemplate* MC_(tmpl__LOADV8_on_64) = NULL;
static NCodeTemplate* mk_tmpl__LOADV64le_on_64 ( NAlloc na )
{
@@ -4306,27 +4308,49 @@
NReg a0 = mkNReg(Nrr_Argument, 0);
NReg s0 = mkNReg(Nrr_Scratch, 0);
- hot[0] = NInstr_SetFlagsWri (na, Nsf_TEST, a0, MASK(8));
- hot[1] = NInstr_Branch (na, Ncc_NZ, mkNLabel(Nlz_Cold, 4));
- hot[2] = NInstr_ShiftWri (na, Nsh_SHR, s0, a0, 16);
- hot[3] = NInstr_LoadU (na, 8, s0, NEA_IRS(na, (HWord)&primary_map[0],
- s0, 3));
- hot[4] = NInstr_AluWri (na, Nalu_AND, r0, a0, 0xFFFF);
- hot[5] = NInstr_ShiftWri (na, Nsh_SHR, r0, r0, 3);
- hot[6] = NInstr_LoadU (na, 2, r0, NEA_RRS(na, s0, r0, 1));
- hot[7] = NInstr_SetFlagsWri (na, Nsf_CMP, r0, VA_BITS16_DEFINED);
- hot[8] = NInstr_Branch (na, Ncc_NZ, mkNLabel(Nlz_Cold, 0));
- hot[9] = NInstr_ImmW (na, r0, V_BITS64_DEFINED);
+ /* NCode [r0] = "LOADV64le_on_64" [a0] s0 {
+ hot:
+ 0 tst.w a0, #0xFFFFFFF000000007 misaligned-or-high?
+ 1 bnz cold.4 yes, goto slow path
+ 2 shr.w s0, a0, #16 s0 = pri-map-ix
+ 3 ld.64 s0, [&pri_map[0] + s0 << #3] s0 = sec-map
+ 4 and.w r0, a0, #0xFFFF r0 = sec-map-offB
+ 5 shr.w r0, r0, #3 r0 = sec-map-ix
+ 6 ld.16 r0, [s0 + r0 << #1] r0 = sec-map-VABITS16
+ 7 cmp.w r0, #0xAAAA r0 == VABITS16_DEFINED?
+ 8 bnz cold.0 no, goto cold.0
+ 9 imm.w r0, #0x0 VBITS64_DEFINED
+ 10 nop continue
+ cold:
+ 0 mov.w s0, r0 s0 = sec-map-VABITS16
+ 1 imm.w r0, #0xFFFFFFFFFFFFFFFF VBITS64_UNDEFINED
+ 2 cmp.w s0, #0x5555 s0 == VABITS16_UNDEFINED?
+ 3 bz hot.10 yes, continue
+ 4 call r0 = mc_LOADV64le_on_64_slow[..](a0) call helper
+ 5 b hot.10 continue
+ }
+ */
+ hot[0] = NInstr_SetFlagsWri (na, Nsf_TEST, a0, MASK(8));
+ hot[1] = NInstr_Branch (na, Ncc_NZ, mkNLabel(Nlz_Cold, 4));
+ hot[2] = NInstr_ShiftWri (na, Nsh_SHR, s0, a0, 16);
+ hot[3] = NInstr_LoadU (na, 8, s0, NEA_IRS(na, (HWord)&primary_map[0],
+ s0, 3));
+ hot[4] = NInstr_AluWri (na, Nalu_AND, r0, a0, 0xFFFF);
+ hot[5] = NInstr_ShiftWri (na, Nsh_SHR, r0, r0, 3);
+ hot[6] = NInstr_LoadU (na, 2, r0, NEA_RRS(na, s0, r0, 1));
+ hot[7] = NInstr_SetFlagsWri (na, Nsf_CMP, r0, VA_BITS16_DEFINED);
+ hot[8] = NInstr_Branch (na, Ncc_NZ, mkNLabel(Nlz_Cold, 0));
+ hot[9] = NInstr_ImmW (na, r0, V_BITS64_DEFINED);
hot[10] = NInstr_Nop (na);
- cold[0] = NInstr_MovW (na, s0, r0);
- cold[1] = NInstr_ImmW (na, r0, V_BITS64_UNDEFINED);
- cold[2] = NInstr_SetFlagsWri (na, Nsf_CMP, s0, VA_BITS16_UNDEFINED);
- cold[3] = NInstr_Branch (na, Ncc_Z, mkNLabel(Nlz_Hot, 10));
- cold[4] = NInstr_Call (na, rINVALID, r0, mkNRegVec1(na, a0),
- (void*)& mc_LOADV64le_on_64_slow,
- "mc_LOADV64le_on_64_slow");
- cold[5] = NInstr_Branch(na, Ncc_ALWAYS, mkNLabel(Nlz_Hot, 10));
+ cold[0] = NInstr_MovW (na, s0, r0);
+ cold[1] = NInstr_ImmW (na, r0, V_BITS64_UNDEFINED);
+ cold[2] = NInstr_SetFlagsWri (na, Nsf_CMP, s0, VA_BITS16_UNDEFINED);
+ cold[3] = NInstr_Branch (na, Ncc_Z, mkNLabel(Nlz_Hot, 10));
+ cold[4] = NInstr_Call (na, rINVALID, r0, mkNRegVec1(na, a0),
+ (void*)& mc_LOADV64le_on_64_slow,
+ "mc_LOADV64le_on_64_slow");
+ cold[5] = NInstr_Branch (na, Ncc_ALWAYS, mkNLabel(Nlz_Hot, 10));
hot[11] = cold[6] = NULL;
NCodeTemplate* tmpl
@@ -4346,29 +4370,51 @@
NReg a0 = mkNReg(Nrr_Argument, 0);
NReg s0 = mkNReg(Nrr_Scratch, 0);
- hot[0] = NInstr_SetFlagsWri (na, Nsf_TEST, a0, MASK(4));
- hot[1] = NInstr_Branch (na, Ncc_NZ, mkNLabel(Nlz_Cold, 4));
- hot[2] = NInstr_ShiftWri (na, Nsh_SHR, s0, a0, 16);
- hot[3] = NInstr_LoadU (na, 8, s0, NEA_IRS(na, (HWord)&primary_map[0],
- s0, 3));
- hot[4] = NInstr_AluWri (na, Nalu_AND, r0, a0, 0xFFFF);
- hot[5] = NInstr_ShiftWri (na, Nsh_SHR, r0, r0, 2);
- hot[6] = NInstr_LoadU (na, 1, r0, NEA_RRS(na, s0, r0, 0));
- hot[7] = NInstr_SetFlagsWri (na, Nsf_CMP, r0, VA_BITS8_DEFINED);
- hot[8] = NInstr_Branch (na, Ncc_NZ, mkNLabel(Nlz_Cold, 0));
- hot[9] = NInstr_ImmW (na, r0, 0xFFFFFFFF00000000ULL
- | (ULong)V_BITS32_DEFINED);
+ /* NCode [r0] = "LOADV32le_on_64" [a0] s0 {
+ hot:
+ 0 tst.w a0, #0xFFFFFFF000000003 misaligned-or-high?
+ 1 bnz cold.4 yes, goto slow path
+ 2 shr.w s0, a0, #16 pri-map-ix
+ 3 ld.64 s0, [&pri_map[0] + s0 << #3] sec-map
+ 4 and.w r0, a0, #0xFFFF sec-map-offB
+ 5 shr.w r0, r0, #2 sec-map-ix
+ 6 ld.8 r0, [s0 + r0 << #0] sec-map-VABITS8
+ 7 cmp.w r0, #0xAA == VABITS8_DEFINED ?
+ 8 bnz cold.0 no, goto cold.0
+ 9 imm.w r0, #0xFFFFFFFF00000000 VBITS32_DEFINED (sort of)
+ 10 nop continue
+ cold:
+ 0 mov.w s0, r0 sec-map-VABITS8
+ 1 imm.w r0, #0xFFFFFFFFFFFFFFFF VBITS32_UNDEFINED
+ 2 cmp.w s0, #0x55 s-m-VABITS8 == VABITS8_UNDEF?
+ 3 bz hot.10 yes, continue
+ 4 call r0 = mc_LOADV32le_on_64_slow[..](a0) call helper
+ 5 b hot.10 continue
+ }
+ */
+ hot[0] = NInstr_SetFlagsWri (na, Nsf_TEST, a0, MASK(4));
+ hot[1] = NInstr_Branch (na, Ncc_NZ, mkNLabel(Nlz_Cold, 4));
+ hot[2] = NInstr_ShiftWri (na, Nsh_SHR, s0, a0, 16);
+ hot[3] = NInstr_LoadU (na, 8, s0, NEA_IRS(na, (HWord)&primary_map[0],
+ s0, 3));
+ hot[4] = NInstr_AluWri (na, Nalu_AND, r0, a0, 0xFFFF);
+ hot[5] = NInstr_ShiftWri (na, Nsh_SHR, r0, r0, 2);
+ hot[6] = NInstr_LoadU (na, 1, r0, NEA_RRS(na, s0, r0, 0));
+ hot[7] = NInstr_SetFlagsWri (na, Nsf_CMP, r0, VA_BITS8_DEFINED);
+ hot[8] = NInstr_Branch (na, Ncc_NZ, mkNLabel(Nlz_Cold, 0));
+ hot[9] = NInstr_ImmW (na, r0, 0xFFFFFFFF00000000ULL
+ | (ULong)V_BITS32_DEFINED);
hot[10] = NInstr_Nop (na);
- cold[0] = NInstr_MovW (na, s0, r0);
- cold[1] = NInstr_ImmW (na, r0, 0xFFFFFFFF00000000ULL
- | (ULong)V_BITS32_UNDEFINED);
- cold[2] = NInstr_SetFlagsWri (na, Nsf_CMP, s0, VA_BITS8_UNDEFINED);
- cold[3] = NInstr_Branch (na, Ncc_Z, mkNLabel(Nlz_Hot, 10));
- cold[4] = NInstr_Call (na, rINVALID, r0, mkNRegVec1(na, a0),
- (void*)& mc_LOADV32le_on_64_slow,
- "mc_LOADV32le_on_64_slow");
- cold[5] = NInstr_Branch(na, Ncc_ALWAYS, mkNLabel(Nlz_Hot, 10));
+ cold[0] = NInstr_MovW (na, s0, r0);
+ cold[1] = NInstr_ImmW (na, r0, 0xFFFFFFFF00000000ULL
+ | (ULong)V_BITS32_UNDEFINED);
+ cold[2] = NInstr_SetFlagsWri (na, Nsf_CMP, s0, VA_BITS8_UNDEFINED);
+ cold[3] = NInstr_Branch (na, Ncc_Z, mkNLabel(Nlz_Hot, 10));
+ cold[4] = NInstr_Call (na, rINVALID, r0, mkNRegVec1(na, a0),
+ (void*)& mc_LOADV32le_on_64_slow,
+ "mc_LOADV32le_on_64_slow");
+ cold[5] = NInstr_Branch (na, Ncc_ALWAYS, mkNLabel(Nlz_Hot, 10));
hot[11] = cold[6] = NULL;
NCodeTemplate* tmpl
@@ -4377,12 +4423,107 @@
return tmpl;
}
+static NCodeTemplate* mk_tmpl__LOADV8_on_64 ( NAlloc na )
+{
+ NInstr** hot = na((11+1) * sizeof(NInstr*));
+ NInstr** cold = na((14+1) * sizeof(NInstr*));
+
+ NReg rINVALID = mkNRegINVALID();
+
+ NReg r0 = mkNReg(Nrr_Result, 0);
+ NReg a0 = mkNReg(Nrr_Argument, 0);
+ NReg s0 = mkNReg(Nrr_Scratch, 0);
+
+ /*
+ h0 tst.w a0, #0xFFFFFFF000000000 high?
+ h1 bnz cold.12 yes, goto slow path
+ h2 shr.w s0, a0, #16 s0 = pri-map-ix
+ h3 ld.64 s0, [&pri_map[0] + s0 << #3] s0 = sec-map
+ h4 and.w r0, a0, #0xFFFF r0 = sec-map-offB
+ h5 shr.w r0, r0, #2 r0 = sec-map-ix
+ h6 ld.8 r0, [s0 + r0 << #0] r0 = sec-map-VABITS8
+ h7 cmp.w r0, #0xAA r0 == VABITS8_DEFINED?
+ h8 bnz cold.0 no, goto cold.0
+ h9 imm.w r0, #0xFFFFFFFFFFFFFF00 VBITS8_DEFINED | top56safe
+ h10 nop continue
+
+ c0 cmp.w r0, #0x55 VABITS8_UNDEFINED
+ c1 bnz cold.4
+
+ c2 imm.w r0, #0xFFFFFFFFFFFFFFFF VBITS8_UNDEFINED | top56safe
+ c3 b hot.10
+
+ // r0 holds sec-map-VABITS8
+ // a0 holds the address. Extract the relevant 2 bits and inspect.
+ c4 and.w s0, a0, #3 // addr & 3
+ c5 add.w s0, s0, s0 // 2 * (addr & 3)
+ c6 shr.w r0, r0, s0 // sec-map-VABITS8 >> (2 * (addr & 3))
+ c7 and.w r0, r0, #3 // (sec-map-VABITS8 >> (2 * (addr & 3))) & 3
+
+ c8 cmp.w r0, #2 // VABITS2_DEFINED
+ c9 jz hot.9
+
+ c10 cmp.w r0, #1 // VABITS2_UNDEFINED
+ c11 jz cold.2
+
+ c12 call r0 = mc_LOADV8_on_64_slow(a0)
+ c13 b hot.10
+ */
+ hot[0] = NInstr_SetFlagsWri (na, Nsf_TEST, a0, MASK(1));
+ hot[1] = NInstr_Branch (na, Ncc_NZ, mkNLabel(Nlz_Cold, 12));
+ hot[2] = NInstr_ShiftWri (na, Nsh_SHR, s0, a0, 16);
+ hot[3] = NInstr_LoadU (na, 8, s0, NEA_IRS(na, (HWord)&primary_map[0],
+ s0, 3));
+ hot[4] = NInstr_AluWri (na, Nalu_AND, r0, a0, 0xFFFF);
+ hot[5] = NInstr_ShiftWri (na, Nsh_SHR, r0, r0, 2);
+ hot[6] = NInstr_LoadU (na, 1, r0, NEA_RRS(na, s0, r0, 0));
+ hot[7] = NInstr_SetFlagsWri (na, Nsf_CMP, r0, VA_BITS8_DEFINED);
+ hot[8] = NInstr_Branch (na, Ncc_NZ, mkNLabel(Nlz_Cold, 0));
+ hot[9] = NInstr_ImmW (na, r0, 0xFFFFFFFFFFFFFF00ULL
+ | (ULong)V_BITS8_DEFINED);
+ hot[10] = NInstr_Nop (na);
+
+ cold[0] = NInstr_SetFlagsWri (na, Nsf_CMP, r0, VA_BITS8_UNDEFINED);
+ cold[1] = NInstr_Branch (na, Ncc_NZ, mkNLabel(Nlz_Cold, 4));
+
+ cold[2] = NInstr_ImmW (na, r0, 0xFFFFFFFFFFFFFF00ULL
+ | (ULong)V_BITS8_UNDEFINED);
+ cold[3] = NInstr_Branch (na, Ncc_ALWAYS, mkNLabel(Nlz_Hot, 10));
+
+ // r0 holds sec-map-VABITS8
+ // a0 holds the address. Extract the relevant 2 bits and inspect.
+ cold[4] = NInstr_AluWri (na, Nalu_AND, s0, a0, 3);
+ cold[5] = NInstr_AluWrr (na, Nalu_ADD, s0, s0, s0);
+ cold[6] = NInstr_ShiftWrr (na, Nsh_SHR, r0, r0, s0);
+ cold[7] = NInstr_AluWri (na, Nalu_AND, r0, r0, 3);
+
+ cold[8] = NInstr_SetFlagsWri (na, Nsf_CMP, r0, 2);
+ cold[9] = NInstr_Branch (na, Ncc_Z, mkNLabel(Nlz_Hot, 9));
+
+ cold[10]= NInstr_SetFlagsWri (na, Nsf_CMP, r0, 1);
+ cold[11]= NInstr_Branch (na, Ncc_Z, mkNLabel(Nlz_Cold, 2));
+
+ cold[12]= NInstr_Call (na, rINVALID, r0, mkNRegVec1(na, a0),
+ (void*)& mc_LOADV8_on_64_slow,
+ "mc_LOADV8_on_64_slow");
+ cold[13]= NInstr_Branch (na, Ncc_ALWAYS, mkNLabel(Nlz_Hot, 10));
+
+ hot[11] = cold[14] = NULL;
+ NCodeTemplate* tmpl
+ = mkNCodeTemplate(na,"LOADV8_on_64",
+ /*res, parms, scratch*/1, 1, 1, hot, cold);
+ return tmpl;
+}
+
+
void MC_(create_ncode_templates) ( void )
{
tl_assert(MC_(tmpl__LOADV64le_on_64) == NULL);
tl_assert(MC_(tmpl__LOADV32le_on_64) == NULL);
+ tl_assert(MC_(tmpl__LOADV8_on_64) == NULL);
MC_(tmpl__LOADV64le_on_64) = mk_tmpl__LOADV64le_on_64(ncode_alloc);
MC_(tmpl__LOADV32le_on_64) = mk_tmpl__LOADV32le_on_64(ncode_alloc);
+ MC_(tmpl__LOADV8_on_64) = mk_tmpl__LOADV8_on_64(ncode_alloc);
}
@@ -4886,6 +5027,11 @@
#endif
}
+VG_REGPARM(1) static ULong mc_LOADV8_on_64_slow ( Addr a )
+{
+ return mc_LOADVn_slow( a, 8, False/*irrelevant*/ );
+}
+
VG_REGPARM(2)
void MC_(helperc_STOREV8) ( Addr a, UWord vbits8 )
Modified: branches/NCODE/memcheck/mc_translate.c
==============================================================================
--- branches/NCODE/memcheck/mc_translate.c (original)
+++ branches/NCODE/memcheck/mc_translate.c Mon Apr 13 11:49:15 2015
@@ -4648,6 +4648,19 @@
= assignNew('V', mce, Ity_I32, unop(Iop_64to32, mkexpr(datavbits64)));
return datavbits32;
}
+ if (ty == Ity_I8) {
+ /* Unconditional LOAD8 on 64 bit host. Generate inline code. */
+ IRTemp datavbits64 = newTemp(mce, Ity_I64, VSh);
+ NCodeTemplate* tmpl = MC_(tmpl__LOADV8_on_64);
+ IRAtom** args = mkIRExprVec_1( addrAct );
+ IRTemp* ress = mkIRTempVec_1( datavbits64 );
+ /* The NCode block produces a 64 bit value, but we need to
+ truncate it to 8 bits. */
+ stmt( 'V', mce, IRStmt_NCode(tmpl, args, ress) );
+ IRAtom* datavbits8
+ = assignNew('V', mce, Ity_I8, unop(Iop_64to8, mkexpr(datavbits64)));
+ return datavbits8;
+ }
/* else fall through */
}
/* ------ END inline NCode ? ------ */
|