|
From: Nicholas N. <nj...@ca...> - 2004-01-19 19:25:40
|
CVS commit by nethercote:
Patch from Tom Hughes, for bug 72643:
Patch to improve SSE/SS2 support
This patch should implement most of the missing SSE/SSE2 opcodes. About
the only ones it doesn't do are the MASKMOVxxx ones as they are quite
horrible and involved an implicit reference to EDI so I need to think
about them a bit more.
The patch also includes a set of tests for the MMX/SSE/SSE2 opcodes to
validate that they have the same effect under valgrind as they do when
run normally. In one or two cases this wasn't actually the case even
for some of the implemented opcodes, so I fixed those as well ;-)
M +2 -6 addrcheck/ac_main.c 1.60
M +6 -6 cachegrind/cg_main.c 1.62
M +129 -3 coregrind/vg_from_ucode.c 1.73
M +604 -72 coregrind/vg_to_ucode.c 1.122
M +16 -2 coregrind/vg_translate.c 1.68
M +26 -0 include/vg_skin.h.base 1.9
M +4 -1 memcheck/mc_translate.c 1.35
M +13 -1 none/tests/Makefile.am 1.22
M +3 -2 tests/Makefile.am 1.35
M +10 -2 tests/vg_regtest.in 1.18
--- valgrind/addrcheck/ac_main.c #1.59:1.60
@@ -1049,4 +1049,6 @@ UCodeBlock* SK_(instrument)(UCodeBlock*
case SSE3a_MemRd:
case SSE2a_MemRd:
+ case SSE3a1_MemRd:
+ case SSE2a1_MemRd:
helper = (Addr)ac_fpu_READ_check;
goto do_Access_ARG3;
@@ -1068,10 +1070,4 @@ UCodeBlock* SK_(instrument)(UCodeBlock*
break;
- case SSE2a1_MemRd:
- case SSE3a1_MemRd:
- VG_(pp_UInstr)(0,u_in);
- VG_(skin_panic)("AddrCheck: unhandled SSE uinstr");
- break;
-
case SSE3e1_RegRd:
case SSE3e_RegWr:
--- valgrind/cachegrind/cg_main.c #1.61:1.62
@@ -545,5 +545,5 @@ static Int compute_BBCC_array_size(UCode
case SSE2a_MemRd:
case SSE2a1_MemRd:
- sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512);
+ sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
t_read = u_in->val3;
is_FPU_R = True;
@@ -557,5 +557,5 @@ static Int compute_BBCC_array_size(UCode
case SSE3a1_MemRd:
- sk_assert(u_in->size == 16);
+ sk_assert(u_in->size == 8 || u_in->size == 16);
t_read = u_in->val3;
is_FPU_R = True;
@@ -578,5 +578,5 @@ static Int compute_BBCC_array_size(UCode
case SSE2a_MemWr:
- sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512);
+ sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
t_write = u_in->val3;
is_FPU_W = True;
@@ -799,5 +799,5 @@ UCodeBlock* SK_(instrument)(UCodeBlock*
case SSE2a_MemRd:
case SSE2a1_MemRd:
- sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512);
+ sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
t_read = u_in->val3;
t_read_addr = newTemp(cb);
@@ -822,5 +822,5 @@ UCodeBlock* SK_(instrument)(UCodeBlock*
case SSE3a1_MemRd:
- sk_assert(u_in->size == 16);
+ sk_assert(u_in->size == 8 || u_in->size == 16);
t_read = u_in->val3;
t_read_addr = newTemp(cb);
@@ -862,5 +862,5 @@ UCodeBlock* SK_(instrument)(UCodeBlock*
case SSE2a_MemWr:
- sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512);
+ sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
/* fall through */
case SSE3a_MemWr:
--- valgrind/coregrind/vg_from_ucode.c #1.72:1.73
@@ -1519,4 +1519,76 @@ static void emit_SSE2a ( FlagSet uses_sf
}
+static void emit_SSE2e1 ( FlagSet uses_sflags,
+ FlagSet sets_sflags,
+ UChar first_byte,
+ UChar second_byte,
+ UChar third_byte,
+ UChar fourth_byte,
+ Int ireg )
+{
+ VG_(new_emit)(True, uses_sflags, sets_sflags);
+ VG_(emitB) ( first_byte );
+ VG_(emitB) ( second_byte );
+ third_byte &= 0x38; /* mask out mod and rm fields */
+ third_byte |= 0xC0; /* set top two bits: mod = 11b */
+ third_byte |= (ireg & 7); /* patch in our ireg */
+ VG_(emitB) ( third_byte );
+ VG_(emitB) ( fourth_byte );
+ if (dis)
+ VG_(printf)(
+ "\n\t\tsse2e1--0x%x:0x%x:0x%x:0x%x-(%s)\n",
+ (UInt)first_byte, (UInt)second_byte,
+ (UInt)third_byte, (UInt)fourth_byte,
+ nameIReg(4,ireg)
+ );
+}
+
+static void emit_SSE2g1 ( FlagSet uses_sflags,
+ FlagSet sets_sflags,
+ UChar first_byte,
+ UChar second_byte,
+ UChar third_byte,
+ UChar fourth_byte,
+ Int ireg )
+{
+ VG_(new_emit)(True, uses_sflags, sets_sflags);
+ VG_(emitB) ( first_byte );
+ VG_(emitB) ( second_byte );
+ third_byte &= 0xC7; /* mask out reg field */
+ third_byte |= 0xC0; /* set top two bits: mod = 11b */
+ third_byte |= ((ireg & 7) << 3); /* patch in our ireg */
+ VG_(emitB) ( third_byte );
+ VG_(emitB) ( fourth_byte );
+ if (dis)
+ VG_(printf)(
+ "\n\t\tsse2g1_reg_wr--0x%x:0x%x:0x%x:0x%x-(%s)\n",
+ (UInt)first_byte, (UInt)second_byte,
+ (UInt)third_byte, (UInt)fourth_byte,
+ nameIReg(4,ireg)
+ );
+}
+
+static void emit_SSE2g ( FlagSet uses_sflags,
+ FlagSet sets_sflags,
+ UChar first_byte,
+ UChar second_byte,
+ UChar third_byte,
+ Int ireg )
+{
+ VG_(new_emit)(True, uses_sflags, sets_sflags);
+ VG_(emitB) ( first_byte );
+ VG_(emitB) ( second_byte );
+ third_byte &= 0xC7; /* mask out reg field */
+ third_byte |= 0xC0; /* set top two bits: mod = 11b */
+ third_byte |= ((ireg & 7) << 3); /* patch in our ireg */
+ VG_(emitB) ( third_byte );
+ if (dis)
+ VG_(printf)(
+ "\n\t\tsse2g--0x%x:0x%x:0x%x-(%s)\n",
+ (UInt)first_byte, (UInt)second_byte, (UInt)third_byte,
+ nameIReg(4,ireg)
+ );
+}
+
static void emit_SSE2a1 ( FlagSet uses_sflags,
FlagSet sets_sflags,
@@ -4076,5 +4148,6 @@ static void emitUInstr ( UCodeBlock* cb,
case SSE2a_MemWr:
case SSE2a_MemRd:
- vg_assert(u->size == 4 || u->size == 16 || u->size == 512);
+ vg_assert(u->size == 4 || u->size == 8
+ || u->size == 16 || u->size == 512);
vg_assert(u->tag1 == Lit16);
vg_assert(u->tag2 == Lit16);
@@ -4091,4 +4164,57 @@ static void emitUInstr ( UCodeBlock* cb,
break;
+ case SSE2g_RegWr:
+ vg_assert(u->size == 4);
+ vg_assert(u->tag1 == Lit16);
+ vg_assert(u->tag2 == Lit16);
+ vg_assert(u->tag3 == RealReg);
+ vg_assert(!anyFlagUse(u));
+ if (!(*sselive)) {
+ emit_get_sse_state();
+ *sselive = True;
+ }
+ emit_SSE2g ( u->flags_r, u->flags_w,
+ (u->val1 >> 8) & 0xFF,
+ u->val1 & 0xFF,
+ u->val2 & 0xFF,
+ u->val3 );
+ break;
+
+ case SSE2g1_RegWr:
+ vg_assert(u->size == 4);
+ vg_assert(u->tag1 == Lit16);
+ vg_assert(u->tag2 == Lit16);
+ vg_assert(u->tag3 == RealReg);
+ vg_assert(!anyFlagUse(u));
+ if (!(*sselive)) {
+ emit_get_sse_state();
+ *sselive = True;
+ }
+ emit_SSE2g1 ( u->flags_r, u->flags_w,
+ (u->val1 >> 8) & 0xFF,
+ u->val1 & 0xFF,
+ u->val2 & 0xFF,
+ u->lit32 & 0xFF,
+ u->val3 );
+ break;
+
+ case SSE2e1_RegRd:
+ vg_assert(u->size == 2);
+ vg_assert(u->tag1 == Lit16);
+ vg_assert(u->tag2 == Lit16);
+ vg_assert(u->tag3 == RealReg);
+ vg_assert(!anyFlagUse(u));
+ if (!(*sselive)) {
+ emit_get_sse_state();
+ *sselive = True;
+ }
+ emit_SSE2e1 ( u->flags_r, u->flags_w,
+ (u->val1 >> 8) & 0xFF,
+ u->val1 & 0xFF,
+ u->val2 & 0xFF,
+ u->lit32 & 0xFF,
+ u->val3 );
+ break;
+
case SSE2a1_MemRd:
vg_assert(u->size == 4 || u->size == 16);
@@ -4195,5 +4321,5 @@ static void emitUInstr ( UCodeBlock* cb,
case SSE3a1_MemRd:
- vg_assert(u->size == 16);
+ vg_assert(u->size == 8 || u->size == 16);
vg_assert(u->tag1 == Lit16);
vg_assert(u->tag2 == Lit16);
@@ -4209,5 +4335,5 @@ static void emitUInstr ( UCodeBlock* cb,
(u->val2 >> 8) & 0xFF,
u->val2 & 0xFF,
- (u->lit32 >> 8) & 0xFF,
+ u->lit32 & 0xFF,
u->val3 );
break;
--- valgrind/coregrind/vg_to_ucode.c #1.121:1.122
@@ -3398,4 +3398,191 @@ Addr dis_SSE2_load_store_or_mov ( UCodeB
}
+
+/* Simple SSE operations, either
+ op (src)xmmreg, (dst)mmxreg
+ or
+ op (src)address, (dst)mmxreg
+ 2 opcode bytes.
+ Supplied eip points to the first address mode byte.
+*/
+static
+Addr dis_SSE2_to_MMX ( UCodeBlock *cb,
+ UChar sorb,
+ Addr eip,
+ Int sz,
+ Char* name,
+ UChar opc1,
+ UChar opc2 )
+{
+ UChar dis_buf[50];
+ UChar modrm = getUChar(eip);
+ if (epartIsReg(modrm)) {
+ /* Completely internal SSE insn. */
+ uInstr2(cb, SSE3, 0, /* ignore sz for internal ops */
+ Lit16, (((UShort)opc1) << 8) | (UShort)opc2,
+ Lit16, (UShort)modrm );
+ if (dis)
+ VG_(printf)("%s %s, %s\n", name,
+ nameXMMReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)) );
+ eip++;
+ } else {
+ UInt pair = disAMode ( cb, sorb, eip, dis?dis_buf:NULL );
+ Int tmpa = LOW24(pair);
+ eip += HI8(pair);
+ uInstr3(cb, SSE2a_MemRd, sz,
+ Lit16, (((UShort)(opc1)) << 8) | ((UShort)opc2),
+ Lit16, ((UShort)modrm),
+ TempReg, tmpa);
+ if (dis)
+ VG_(printf)("%s %s, %s\n",
+ name,
+ dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+ return eip;
+}
+
+
+/* Simple SSE operations, either
+ op (src)mmxreg, (dst)xmmreg
+ or
+ op (src)address, (dst)xmmreg
+ 2 opcode bytes.
+ Supplied eip points to the first address mode byte.
+*/
+static
+Addr dis_SSE2_from_MMX ( UCodeBlock *cb,
+ UChar sorb,
+ Addr eip,
+ Int sz,
+ Char* name,
+ UChar opc1,
+ UChar opc2 )
+{
+ UChar dis_buf[50];
+ UChar modrm = getUChar(eip);
+ if (epartIsReg(modrm)) {
+ /* Completely internal SSE insn. */
+ uInstr2(cb, SSE3, 0, /* ignore sz for internal ops */
+ Lit16, (((UShort)opc1) << 8) | (UShort)opc2,
+ Lit16, (UShort)modrm );
+ if (dis)
+ VG_(printf)("%s %s, %s\n", name,
+ nameMMXReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)) );
+ eip++;
+ } else {
+ UInt pair = disAMode ( cb, sorb, eip, dis?dis_buf:NULL );
+ Int tmpa = LOW24(pair);
+ eip += HI8(pair);
+ uInstr3(cb, SSE2a_MemRd, sz,
+ Lit16, (((UShort)(opc1)) << 8) | ((UShort)opc2),
+ Lit16, ((UShort)modrm),
+ TempReg, tmpa);
+ if (dis)
+ VG_(printf)("%s %s, %s\n",
+ name,
+ dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+ return eip;
+}
+
+
+/* Simple SSE operations, either
+ op (src)xmmreg, (dst)mmxreg
+ or
+ op (src)address, (dst)mmxreg
+ 3 opcode bytes.
+ Supplied eip points to the first address mode byte.
+*/
+static
+Addr dis_SSE3_to_MMX ( UCodeBlock *cb,
+ UChar sorb,
+ Addr eip,
+ Int sz,
+ Char* name,
+ UChar opc1,
+ UChar opc2,
+ UChar opc3 )
+{
+ UChar dis_buf[50];
+ UChar modrm = getUChar(eip);
+ if (epartIsReg(modrm)) {
+ /* Completely internal SSE insn. */
+ uInstr2(cb, SSE4, 0, /* ignore sz for internal ops */
+ Lit16, (((UShort)opc1) << 8) | (UShort)opc2,
+ Lit16, (((UShort)opc3) << 8) | (UShort)modrm );
+ if (dis)
+ VG_(printf)("%s %s, %s\n", name,
+ nameXMMReg(eregOfRM(modrm)),
+ nameMMXReg(gregOfRM(modrm)) );
+ eip++;
+ } else {
+ UInt pair = disAMode ( cb, sorb, eip, dis?dis_buf:NULL );
+ Int tmpa = LOW24(pair);
+ eip += HI8(pair);
+ uInstr3(cb, SSE3a_MemRd, sz,
+ Lit16, (((UShort)(opc1)) << 8) | ((UShort)opc2),
+ Lit16, (((UShort)(opc3)) << 8) | ((UShort)modrm),
+ TempReg, tmpa);
+ if (dis)
+ VG_(printf)("%s %s, %s\n",
+ name,
+ dis_buf,
+ nameMMXReg(gregOfRM(modrm)));
+ }
+ return eip;
+}
+
+
+/* Simple SSE operations, either
+ op (src)mmxreg, (dst)xmmreg
+ or
+ op (src)address, (dst)xmmreg
+ 3 opcode bytes.
+ Supplied eip points to the first address mode byte.
+*/
+static
+Addr dis_SSE3_from_MMX ( UCodeBlock *cb,
+ UChar sorb,
+ Addr eip,
+ Int sz,
+ Char* name,
+ UChar opc1,
+ UChar opc2,
+ UChar opc3 )
+{
+ UChar dis_buf[50];
+ UChar modrm = getUChar(eip);
+ if (epartIsReg(modrm)) {
+ /* Completely internal SSE insn. */
+ uInstr2(cb, SSE4, 0, /* ignore sz for internal ops */
+ Lit16, (((UShort)opc1) << 8) | (UShort)opc2,
+ Lit16, (((UShort)opc3) << 8) | (UShort)modrm );
+ if (dis)
+ VG_(printf)("%s %s, %s\n", name,
+ nameMMXReg(eregOfRM(modrm)),
+ nameXMMReg(gregOfRM(modrm)) );
+ eip++;
+ } else {
+ UInt pair = disAMode ( cb, sorb, eip, dis?dis_buf:NULL );
+ Int tmpa = LOW24(pair);
+ eip += HI8(pair);
+ uInstr3(cb, SSE3a_MemRd, sz,
+ Lit16, (((UShort)(opc1)) << 8) | ((UShort)opc2),
+ Lit16, (((UShort)(opc3)) << 8) | ((UShort)modrm),
+ TempReg, tmpa);
+ if (dis)
+ VG_(printf)("%s %s, %s\n",
+ name,
+ dis_buf,
+ nameXMMReg(gregOfRM(modrm)));
+ }
+ return eip;
+}
+
+
static
void dis_push_segreg ( UCodeBlock* cb, UInt sreg, Int sz )
@@ -3597,4 +3784,55 @@ static Addr disInstr ( UCodeBlock* cb, A
}
+ /* CLFLUSH -- flush cache line */
+ if (insn[0] == 0x0F && insn[1] == 0xAE
+ && (!epartIsReg(insn[2]))
+ && (gregOfRM(insn[2]) == 7))
+ {
+ vg_assert(sz == 4);
+ pair = disAMode ( cb, sorb, eip+2, dis?dis_buf:NULL );
+ t1 = LOW24(pair);
+ eip += 2+HI8(pair);
+ uInstr3(cb, SSE2a_MemRd, 0, /* ignore sz for internal ops */
+ Lit16, (((UShort)0x0F) << 8) | (UShort)0xAE,
+ Lit16, (UShort)insn[2],
+ TempReg, t1 );
+ if (dis)
+ VG_(printf)("clflush %s\n", dis_buf);
+ goto decode_success;
+ }
+
+ /* CVTPI2PS (0x0F,0x2A) -- mm/m64, xmm */
+ /* CVTPI2PD (0x66,0x0F,0x2A) -- mm/m64, xmm */
+ if (insn[0] == 0x0F && insn[1] == 0x2A) {
+ if (sz == 4) {
+ eip = dis_SSE2_from_MMX
+ ( cb, sorb, eip+2, 8, "cvtpi2ps",
+ insn[0], insn[1] );
+ } else {
+ eip = dis_SSE3_from_MMX
+ ( cb, sorb, eip+2, 8, "cvtpi2pd",
+ 0x66, insn[0], insn[1] );
+ }
+ goto decode_success;
+ }
+
+ /* CVTTPS2PI (0x0F,0x2C) -- xmm/m64, mm */
+ /* CVTPS2PI (0x0F,0x2D) -- xmm/m64, mm */
+ /* CVTTPD2PI (0x66,0x0F,0x2C) -- xmm/m128, mm */
+ /* CVTPD2PI (0x66,0x0F,0x2D) -- xmm/m128, mm */
+ if (insn[0] == 0x0F
+ && (insn[1] == 0x2C || insn[1] == 0x2D)) {
+ if (sz == 4) {
+ eip = dis_SSE2_to_MMX
+ ( cb, sorb, eip+2, 8, "cvt{t}ps2pi",
+ insn[0], insn[1] );
+ } else {
+ eip = dis_SSE3_to_MMX
+ ( cb, sorb, eip+2, 16, "cvt{t}pd2pi",
+ 0x66, insn[0], insn[1] );
+ }
+ goto decode_success;
+ }
+
/* CVTTSD2SI (0xF2,0x0F,0x2C) -- convert a double-precision float
value in memory or xmm reg to int and put it in an ireg.
@@ -3696,4 +3934,18 @@ static Addr disInstr ( UCodeBlock* cb, A
}
+ /* CVTPS2PD -- convert two packed floats to two packed doubles. */
+ /* 0x66: CVTPD2PS -- convert two packed doubles to two packed floats. */
+ if (insn[0] == 0x0F && insn[1] == 0x5A) {
+ vg_assert(sz == 2 || sz == 4);
+ if (sz == 4) {
+ eip = dis_SSE2_reg_or_mem ( cb, sorb, eip+2, 8, "cvtps2pd",
+ insn[0], insn[1] );
+ } else {
+ eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+2, 16, "cvtpd2ps",
+ 0x66, insn[0], insn[1] );
+ }
+ goto decode_success;
+ }
+
/* CVTSS2SD -- convert one single float to double. */
if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5A) {
@@ -3712,4 +3964,58 @@ static Addr disInstr ( UCodeBlock* cb, A
}
+ /* CVTDQ2PS -- convert four ints to four packed floats. */
+ /* 0x66: CVTPS2DQ -- convert four packed floats to four ints. */
+ if (insn[0] == 0x0F && insn[1] == 0x5B) {
+ vg_assert(sz == 2 || sz == 4);
+ if (sz == 4) {
+ eip = dis_SSE2_reg_or_mem ( cb, sorb, eip+2, 16, "cvtdq2ps",
+ insn[0], insn[1] );
+ } else {
+ eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+2, 16, "cvtps2dq",
+ 0x66, insn[0], insn[1] );
+ }
+ goto decode_success;
+ }
+
+ /* CVTPD2DQ -- convert two packed doubles to two ints. */
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xE6) {
+ eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+2, 8, "cvtpd2dq",
+ 0x66, insn[0], insn[1] );
+ goto decode_success;
+ }
+
+ /* CVTTPD2DQ -- convert two packed doubles to two ints with truncation. */
+ if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xE6) {
+ vg_assert(sz == 4);
+ eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+3, 8, "cvttpd2dq",
+ insn[0], insn[1], insn[2] );
+ goto decode_success;
+ }
+
+ /* CVTDQ2PD -- convert two ints to two packed doubles. */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xE6) {
+ vg_assert(sz == 4);
+ eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+3, 8, "cvtdq2pd",
+ insn[0], insn[1], insn[2] );
+ goto decode_success;
+ }
+
+ /* CVTTPS2DQ -- convert four packed floats to four ints with truncation. */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x5B) {
+ vg_assert(sz == 4);
+ eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+3, 16, "cvttps2dq",
+ insn[0], insn[1], insn[2] );
+ goto decode_success;
+ }
+
+ /* CMPSS -- compare scalar floats. */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xC2) {
+ vg_assert(sz == 4);
+ eip = dis_SSE3_reg_or_mem_Imm8 ( cb, sorb, eip+3, 8, "cmpss",
+ insn[0], insn[1], insn[2] );
+ goto decode_success;
+ }
+
/* CMPSD -- compare scalar doubles. */
if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0xC2) {
@@ -3743,4 +4049,20 @@ static Addr disInstr ( UCodeBlock* cb, A
}
+ /* PSHUFLW */
+ if (insn[0] == 0xF2 && insn[1] == 0x0F && insn[2] == 0x70) {
+ eip = dis_SSE3_reg_or_mem_Imm8 ( cb, sorb, eip+3, 16,
+ "pshuflw",
+ insn[0], insn[1], insn[2] );
+ goto decode_success;
+ }
+
+ /* PSHUFHW */
+ if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x70) {
+ eip = dis_SSE3_reg_or_mem_Imm8 ( cb, sorb, eip+3, 16,
+ "pshufhw",
+ insn[0], insn[1], insn[2] );
+ goto decode_success;
+ }
+
/* PSHUFW */
if (sz == 4
@@ -3883,4 +4205,18 @@ static Addr disInstr ( UCodeBlock* cb, A
}
+ /* MINPS */
+ /* 0x66: MINPD */
+ if (insn[0] == 0x0F && insn[1] == 0x5D) {
+ vg_assert(sz == 4 || sz == 2);
+ if (sz == 4) {
+ eip = dis_SSE2_reg_or_mem ( cb, sorb, eip+2, 16, "minps",
+ insn[0], insn[1] );
+ } else {
+ eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+2, 16, "minpd",
+ 0x66, insn[0], insn[1] );
+ }
+ goto decode_success;
+ }
+
/* 0xF3: MAXSD */
/* 0xF3: MAXSS */
@@ -3945,9 +4281,15 @@ static Addr disInstr ( UCodeBlock* cb, A
}
- /* ORPD (src)xmmreg-or-mem, (dst)xmmreg */
- if (sz == 2
- && insn[0] == 0x0F && insn[1] == 0x56) {
+ /* ORPS */
+ /* 0x66: ORPD (src)xmmreg-or-mem, (dst)xmmreg */
+ if (insn[0] == 0x0F && insn[1] == 0x56) {
+ vg_assert(sz == 4 || sz == 2);
+ if (sz == 4) {
+ eip = dis_SSE2_reg_or_mem ( cb, sorb, eip+2, 16, "orps",
+ insn[0], insn[1] );
+ } else {
eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+2, 16, "orpd",
0x66, insn[0], insn[1] );
+ }
goto decode_success;
}
@@ -4014,10 +4356,10 @@ static Addr disInstr ( UCodeBlock* cb, A
goto decode_success;
}
- /* 0xE0: PAVGB(src)xmmreg-or-mem, (dst)xmmreg, size 4 */
- if (sz == 4
- && insn[0] == 0x0F
- && insn[1] == 0xE0 ) {
- eip = dis_SSE2_reg_or_mem ( cb, sorb, eip+2, 16, "pavg{b,w}",
- insn[0], insn[1] );
+
+ /* 0xF6: PSADBW(src)xmmreg-or-mem, (dst)xmmreg */
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xF6) {
+ eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+2, 16, "psadbw",
+ 0x66, insn[0], insn[1] );
goto decode_success;
}
@@ -4026,9 +4368,11 @@ static Addr disInstr ( UCodeBlock* cb, A
/* 0x61: PUNPCKLWD (src)xmmreg-or-mem, (dst)xmmreg */
/* 0x62: PUNPCKLDQ (src)xmmreg-or-mem, (dst)xmmreg */
+ /* 0x6C: PUNPCKQLQDQ (src)xmmreg-or-mem, (dst)xmmreg */
if (sz == 2
&& insn[0] == 0x0F
- && (insn[1] == 0x60 || insn[1] == 0x61 || insn[1] == 0x62)) {
+ && (insn[1] == 0x60 || insn[1] == 0x61
+ || insn[1] == 0x62 || insn[1] == 0x6C)) {
eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+2, 16,
- "punpckl{bw,wd,dq}",
+ "punpckl{bw,wd,dq,qdq}",
0x66, insn[0], insn[1] );
goto decode_success;
@@ -4038,9 +4382,11 @@ static Addr disInstr ( UCodeBlock* cb, A
/* 0x69: PUNPCKHWD (src)xmmreg-or-mem, (dst)xmmreg */
/* 0x6A: PUNPCKHDQ (src)xmmreg-or-mem, (dst)xmmreg */
+ /* 0x6D: PUNPCKHQDQ (src)xmmreg-or-mem, (dst)xmmreg */
if (sz == 2
&& insn[0] == 0x0F
- && (insn[1] == 0x68 || insn[1] == 0x69 || insn[1] == 0x6A)) {
+ && (insn[1] == 0x68 || insn[1] == 0x69
+ || insn[1] == 0x6A || insn[1] == 0x6D)) {
eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+2, 16,
- "punpckh{bw,wd,dq}",
+ "punpckh{bw,wd,dq,qdq}",
0x66, insn[0], insn[1] );
goto decode_success;
@@ -4143,10 +4489,19 @@ static Addr disInstr ( UCodeBlock* cb, A
}
+ /* 0xE4: PMULHUW(src)xmmreg-or-mem, (dst)xmmreg */
/* 0xE5: PMULHW(src)xmmreg-or-mem, (dst)xmmreg */
/* 0xD5: PMULLW(src)xmmreg-or-mem, (dst)xmmreg */
if (sz == 2
&& insn[0] == 0x0F
- && (insn[1] == 0xE5 || insn[1] == 0xD5)) {
- eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+2, 16, "pmul{h,l}w",
+ && (insn[1] == 0xE4 || insn[1] == 0xE5 || insn[1] == 0xD5)) {
+ eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+2, 16, "pmul{hu,h,l}w",
+ 0x66, insn[0], insn[1] );
+ goto decode_success;
+ }
+
+ /* 0xD5: PMULUDQ(src)xmmreg-or-mem, (dst)xmmreg */
+ if (sz == 2
+ && insn[0] == 0x0F && insn[1] == 0xF4) {
+ eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+2, 16, "pmuludq",
0x66, insn[0], insn[1] );
goto decode_success;
@@ -4292,4 +4647,24 @@ static Addr disInstr ( UCodeBlock* cb, A
}
+ /* MOVDQ2Q -- move low 4 bytes of XMM reg to MMX reg. */
+ if (insn[0] == 0xF2
+ && insn[1] == 0x0F
+ && insn[2] == 0xD6) {
+ eip = dis_SSE3_to_MMX
+ ( cb, sorb, eip+3, 8, "movdq2q",
+ insn[0], insn[1], insn[2] );
+ goto decode_success;
+ }
+
+ /* MOVQ2DQ -- move MMX reg to low 4 bytes of XMM reg. */
+ if (insn[0] == 0xF3
+ && insn[1] == 0x0F
+ && insn[2] == 0xD6) {
+ eip = dis_SSE3_from_MMX
+ ( cb, sorb, eip+3, 8, "movq2dq",
+ insn[0], insn[1], insn[2] );
+ goto decode_success;
+ }
+
/* MOVSS -- move 4 bytes of XMM reg to/from XMM reg or mem. */
if (insn[0] == 0xF3
@@ -4357,17 +4732,4 @@ static Addr disInstr ( UCodeBlock* cb, A
}
- /* MOVLPD -- 8-byte load/store. */
- if (sz == 2
- && insn[0] == 0x0F
- && (insn[1] == 0x12 || insn[1] == 0x13)) {
- Bool is_store = insn[1]==0x13;
- /* Cannot be used for reg-reg moves, according to Intel docs. */
- vg_assert(!epartIsReg(insn[2]));
- eip = dis_SSE3_load_store_or_mov
- (cb, sorb, eip+2, 8, is_store, "movlpd",
- 0x66, insn[0], insn[1] );
- goto decode_success;
- }
-
/* MOVDQU -- unaligned 16-byte load/store. */
if (insn[0] == 0xF3
@@ -4523,18 +4885,4 @@ static Addr disInstr ( UCodeBlock* cb, A
}
- /* MOVLPS -- 8-byte load/store. How is this different from MOVLPS
- ? */
- if (insn[0] == 0x0F
- && (insn[1] == 0x12 || insn[1] == 0x13)) {
- Bool is_store = insn[1]==0x13;
- vg_assert(sz == 4);
- /* Cannot be used for reg-reg moves, according to Intel docs. */
- // vg_assert(!epartIsReg(insn[2]));
- eip = dis_SSE2_load_store_or_mov
- (cb, sorb, eip+2, 8, is_store, "movlps",
- insn[0], insn[1] );
- goto decode_success;
- }
-
/* 0xF3: RCPSS -- reciprocal of scalar float */
if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0x53) {
@@ -4548,5 +4896,8 @@ static Addr disInstr ( UCodeBlock* cb, A
/* MOVMSKPD -- extract 2 sign bits from a xmm reg and copy them to
an ireg. Top 30 bits of ireg are set to zero. */
- if (sz == 2 && insn[0] == 0x0F && insn[1] == 0x50) {
+ /* MOVMSKPS -- extract 4 sign bits from a xmm reg and copy them to
+ an ireg. Top 28 bits of ireg are set to zero. */
+ if (insn[0] == 0x0F && insn[1] == 0x50) {
+ vg_assert(sz == 4 || sz == 2);
modrm = insn[2];
/* Intel docs don't say anything about a memory source being
@@ -4554,4 +4905,11 @@ static Addr disInstr ( UCodeBlock* cb, A
vg_assert(epartIsReg(modrm));
t1 = newTemp(cb);
+ if (sz == 4) {
+ uInstr3(cb, SSE2g_RegWr, 4,
+ Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1],
+ Lit16, (UShort)modrm,
+ TempReg, t1 );
+ uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, gregOfRM(modrm));
+ } else {
uInstr3(cb, SSE3g_RegWr, 4,
Lit16, (((UShort)0x66) << 8) | (UShort)insn[0],
@@ -4559,6 +4917,8 @@ static Addr disInstr ( UCodeBlock* cb, A
TempReg, t1 );
uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, gregOfRM(modrm));
+ }
if (dis)
- VG_(printf)("movmskpd %s, %s\n",
+ VG_(printf)("movmskp%c %s, %s\n",
+ sz == 4 ? 's' : 'd',
nameXMMReg(eregOfRM(modrm)),
nameIReg(4,gregOfRM(modrm)));
@@ -4581,14 +4941,53 @@ static Addr disInstr ( UCodeBlock* cb, A
}
- /* MOVHPD -- 8-byte load/store. */
- if (sz == 2
- && insn[0] == 0x0F
+ /* MOVHLPS -- move two packed floats from high quadword to low quadword */
+ /* MOVLPS -- load/store two packed floats to/from low quadword. */
+ /* MOVLPD -- load/store packed double to/from low quadword. */
+ if (insn[0] == 0x0F
+ && (insn[1] == 0x12 || insn[1] == 0x13)) {
+ Bool is_store = insn[1]==0x13;
+ vg_assert(sz == 4 || sz == 2);
+ if (sz == 4) {
+ if (epartIsReg(insn[2])) {
+ vg_assert(insn[1]==0x12);
+ eip = dis_SSE2_reg_or_mem ( cb, sorb, eip+2, 16, "movhlps",
+ insn[0], insn[1] );
+ } else {
+ eip = dis_SSE2_load_store_or_mov
+ (cb, sorb, eip+2, 8, is_store, "movlps",
+ insn[0], insn[1] );
+ }
+ } else {
+ vg_assert(!epartIsReg(insn[2]));
+ eip = dis_SSE3_load_store_or_mov
+ (cb, sorb, eip+2, 8, is_store, "movlpd",
+ 0x66, insn[0], insn[1] );
+ }
+ goto decode_success;
+ }
+
+ /* MOVLHPS -- move two packed floats from low quadword to high quadword */
+ /* MOVHPS -- load/store two packed floats to/from high quadword. */
+ /* MOVHPD -- load/store packed double to/from high quadword. */
+ if (insn[0] == 0x0F
&& (insn[1] == 0x16 || insn[1] == 0x17)) {
Bool is_store = insn[1]==0x17;
- /* Cannot be used for reg-reg moves, according to Intel docs. */
+ vg_assert(sz == 4 || sz == 2);
+ if (sz == 4) {
+ if (epartIsReg(insn[2])) {
+ vg_assert(insn[1]==0x16);
+ eip = dis_SSE2_reg_or_mem ( cb, sorb, eip+2, 16, "movlhps",
+ insn[0], insn[1] );
+ } else {
+ eip = dis_SSE2_load_store_or_mov
+ (cb, sorb, eip+2, 8, is_store, "movhps",
+ insn[0], insn[1] );
+ }
+ } else {
vg_assert(!epartIsReg(insn[2]));
eip = dis_SSE3_load_store_or_mov
(cb, sorb, eip+2, 8, is_store, "movhpd",
0x66, insn[0], insn[1] );
+ }
goto decode_success;
}
@@ -4615,26 +5014,26 @@ static Addr disInstr ( UCodeBlock* cb, A
}
- /* CVTDQ2PD -- convert one single double. to float. */
- if (insn[0] == 0xF3 && insn[1] == 0x0F && insn[2] == 0xE6) {
- vg_assert(sz == 4);
- eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+3, 8, "cvtdq2pd",
- insn[0], insn[1], insn[2] );
- goto decode_success;
- }
-
- /* CVTPD2PS -- convert two doubles to two floats. */
- if (sz == 2 &&
- insn[0] == 0x0F && insn[1] == 0x5A) {
- eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+2, 16, "cvtpd2ps",
+ /* sz==4: SQRTPS: square root of packed float. */
+ /* sz==2: SQRTPD: square root of packed double. */
+ if (insn[0] == 0x0F && insn[1] == 0x51) {
+ vg_assert(sz == 2 || sz == 4);
+ if (sz == 4) {
+ eip = dis_SSE2_reg_or_mem ( cb, sorb, eip+2, 16,
+ "sqrtps",
+ insn[0], insn[1] );
+ } else {
+ eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+2, 16,
+ "sqrtpd",
0x66, insn[0], insn[1] );
+ }
goto decode_success;
}
- /* SQRTPD: square root of packed double. */
- if (sz == 2
- && insn[0] == 0x0F && insn[1] == 0x51) {
- eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+2, 16,
- "sqrtpd",
- 0x66, insn[0], insn[1] );
+ /* RSQRTPS: square root reciprocal of packed float. */
+ if (insn[0] == 0x0F && insn[1] == 0x52) {
+ vg_assert(sz == 4);
+ eip = dis_SSE2_reg_or_mem ( cb, sorb, eip+2, 16,
+ "rsqrtps",
+ insn[0], insn[1] );
goto decode_success;
}
@@ -6073,4 +6472,22 @@ static Addr disInstr ( UCodeBlock* cb, A
break;
+ /* =-=-=-=-=-=-=-=-=-=-= MOVNTI -=-=-=-=-=-=-=-=-= */
+
+ case 0xC3: /* MOVNTI Gv,Ev */
+ vg_assert(sz == 4);
+ modrm = getUChar(eip);
+ vg_assert(!epartIsReg(modrm));
+ t1 = newTemp(cb);
+ uInstr2(cb, GET, 4, ArchReg, gregOfRM(modrm), TempReg, t1);
+ pair = disAMode ( cb, sorb, eip, dis?dis_buf:NULL );
+ t2 = LOW24(pair);
+ eip += HI8(pair);
+ uInstr2(cb, STORE, 4, TempReg, t1, TempReg, t2);
+ if (dis)
+ VG_(printf)("movnti %s,%s\n",
+ nameIReg(4,gregOfRM(modrm)),
+ dis_buf);
+ break;
+
/* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
@@ -6429,4 +6846,10 @@ static Addr disInstr ( UCodeBlock* cb, A
break;
+ case 0xD4:
+ /* PADDQ (src)mmxreg-or-mem, (dst)mmxreg */
+ vg_assert(sz == 4);
+ eip = dis_MMXop_regmem_to_reg ( cb, sorb, eip, opc, "paddq", False );
+ break;
+
case 0xEC: case 0xED:
/* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
@@ -6441,5 +6864,5 @@ static Addr disInstr ( UCodeBlock* cb, A
break;
- case 0xF8: case 0xF9: case 0xFA:
+ case 0xF8: case 0xF9: case 0xFA: case 0xFB:
/* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
vg_assert(sz == 4);
@@ -6459,4 +6882,9 @@ static Addr disInstr ( UCodeBlock* cb, A
break;
+ case 0xE4: /* PMULHUW (src)mmxreg-or-mem, (dst)mmxreg */
+ vg_assert(sz == 4);
+ eip = dis_MMXop_regmem_to_reg ( cb, sorb, eip, opc, "pmulhuw", False );
+ break;
+
case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
vg_assert(sz == 4);
@@ -6469,4 +6897,9 @@ static Addr disInstr ( UCodeBlock* cb, A
break;
+ case 0xF4: /* PMULUDQ (src)mmxreg-or-mem, (dst)mmxreg */
+ vg_assert(sz == 4);
+ eip = dis_MMXop_regmem_to_reg ( cb, sorb, eip, opc, "pmuludq", False );
+ break;
+
case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
vg_assert(sz == 4);
@@ -6551,4 +6984,103 @@ static Addr disInstr ( UCodeBlock* cb, A
break;
+ case 0xDA:
+ /* PMINUB (src)mmxreg-or-mem, (dst)mmxreg */
+ vg_assert(sz == 4);
+ eip = dis_MMXop_regmem_to_reg ( cb, sorb, eip, opc, "pminub", False );
+ break;
+
+ case 0xDE:
+ /* PMAXUB (src)mmxreg-or-mem, (dst)mmxreg */
+ vg_assert(sz == 4);
+ eip = dis_MMXop_regmem_to_reg ( cb, sorb, eip, opc, "pmaxub", False );
+ break;
+
+ case 0xEA:
+ /* PMINSW (src)mmxreg-or-mem, (dst)mmxreg */
+ vg_assert(sz == 4);
+ eip = dis_MMXop_regmem_to_reg ( cb, sorb, eip, opc, "pminsw", False );
+ break;
+
+ case 0xEE:
+ /* PMAXSW (src)mmxreg-or-mem, (dst)mmxreg */
+ vg_assert(sz == 4);
+ eip = dis_MMXop_regmem_to_reg ( cb, sorb, eip, opc, "pmaxsw", False );
+ break;
+
+ case 0xE0:
+ /* PAVGB (src)mmxreg-or-mem, (dst)mmxreg */
+ vg_assert(sz == 4);
+ eip = dis_MMXop_regmem_to_reg ( cb, sorb, eip, opc, "pavgb", False );
+ break;
+
+ case 0xE3:
+ /* PAVGW (src)mmxreg-or-mem, (dst)mmxreg */
+ vg_assert(sz == 4);
+ eip = dis_MMXop_regmem_to_reg ( cb, sorb, eip, opc, "pavgw", False );
+ break;
+
+ case 0xF6:
+ /* PSADBW (src)mmxreg-or-mem, (dst)mmxreg */
+ vg_assert(sz == 4);
+ eip = dis_MMXop_regmem_to_reg ( cb, sorb, eip, opc, "psadbw", False );
+ break;
+
+ case 0xD7:
+ /* PMOVMSKB (src)mmxreg, (dst)ireg */
+ vg_assert(sz == 4);
+ modrm = getUChar(eip);
+ vg_assert(epartIsReg(modrm));
+ t1 = newTemp(cb);
+ uInstr3(cb, SSE2g_RegWr, 4,
+ Lit16, (((UShort)(0x0F)) << 8) | (UShort)(opc),
+ Lit16, (UShort)modrm,
+ TempReg, t1 );
+ uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, gregOfRM(modrm));
+ if (dis)
+ VG_(printf)("pmovmskb %s, %s\n",
+ nameMMXReg(eregOfRM(modrm)),
+ nameIReg(4,gregOfRM(modrm)));
+ eip++;
+ break;
+
+ case 0xC5:
+ /* PEXTRW (src)mmxreg, (dst)ireg */
+ vg_assert(sz == 4);
+ t1 = newTemp(cb);
+ modrm = getUChar(eip); eip++;
+ abyte = getUChar(eip); eip++;
+ vg_assert(epartIsReg(modrm));
+ uInstr3(cb, SSE2g1_RegWr, 4,
+ Lit16, (((UShort)(0x0F)) << 8) | (UShort)(opc),
+ Lit16, (UShort)modrm,
+ TempReg, t1 );
+ uLiteral(cb, abyte);
+ uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, gregOfRM(modrm));
+ if (dis)
+ VG_(printf)("pextrw %s, %d, %s\n",
+ nameMMXReg(eregOfRM(modrm)), (Int)abyte,
+ nameIReg(4, gregOfRM(modrm)));
+ break;
+
+ case 0xC4:
+ /* PINSRW (src)ireg, (dst)mmxreg */
+ vg_assert(sz == 4);
+ t1 = newTemp(cb);
+ modrm = getUChar(eip); eip++;
+ abyte = getUChar(eip); eip++;
+ vg_assert(epartIsReg(modrm));
+ uInstr2(cb, GET, 2, ArchReg, eregOfRM(modrm), TempReg, t1);
+ uInstr3(cb, SSE2e1_RegRd, 2,
+ Lit16, (((UShort)(0x0F)) << 8) | (UShort)(opc),
+ Lit16, (UShort)modrm,
+ TempReg, t1 );
+ uLiteral(cb, abyte);
+ if (dis)
+ VG_(printf)("pinsrw %s, %d, %s\n",
+ nameIReg(2, eregOfRM(modrm)),
+ (Int)abyte,
+ nameMMXReg(gregOfRM(modrm)));
+ break;
+
case 0xA1: /* POP %FS */
dis_pop_segreg( cb, R_FS, sz ); break;
--- valgrind/coregrind/vg_translate.c #1.67:1.68
@@ -415,5 +415,6 @@ Bool VG_(saneUInstr) ( Bool beforeRA, Bo
# define SZ48 (u->size == 4 || u->size == 8)
# define SZ416 (u->size == 4 || u->size == 16)
-# define SZsse2 (u->size == 4 || u->size == 16 || u->size == 512)
+# define SZ816 (u->size == 8 || u->size == 16)
+# define SZsse2 (u->size == 4 || u->size == 8 || u->size == 16 || u->size == 512)
# define SZsse3 (u->size == 4 || u->size == 8 || u->size == 16)
# define SZi (u->size == 4 || u->size == 2 || u->size == 1)
@@ -568,9 +569,12 @@ Bool VG_(saneUInstr) ( Bool beforeRA, Bo
case SSE2a_MemRd: return LIT0 && SZsse2 && CCa && Ls1 && Ls2 && TR3 && XOTHER;
case SSE2a1_MemRd: return LIT0 && SZ416 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE2g_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE2g1_RegWr: return LIT8 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE2e1_RegRd: return LIT8 && SZ2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
case SSE3a_MemWr: return LIT0 && SZsse3 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
case SSE3a_MemRd: return LIT0 && SZsse3 && CCa && Ls1 && Ls2 && TR3 && XOTHER;
case SSE3e_RegRd: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
case SSE3e_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
- case SSE3a1_MemRd: return LIT8 && SZ16 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE3a1_MemRd: return LIT8 && SZ816 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
case SSE3g_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
case SSE3g1_RegWr: return LIT8 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
@@ -898,5 +902,8 @@ Char* VG_(name_UOpcode) ( Bool upper, Op
case SSE2a_MemWr: return "SSE2a_MWr";
case SSE2a_MemRd: return "SSE2a_MRd";
+ case SSE2g_RegWr: return "SSE2g_RWr";
case SSE2a1_MemRd: return "SSE2a1_MRd";
+ case SSE2g1_RegWr: return "SSE2g1_RWr";
+ case SSE2e1_RegRd: return "SSE2e1_RRd";
case SSE3e_RegRd: return "SSE3e_RRd";
case SSE3e_RegWr: return "SSE3e_RWr";
@@ -1063,4 +1070,7 @@ void pp_UInstrWorker ( Int instrNo, UIns
case SSE2a_MemWr:
case SSE2a_MemRd:
+ case SSE2g_RegWr:
+ case SSE2g1_RegWr:
+ case SSE2e1_RegRd:
VG_(printf)("0x%x:0x%x:0x%x",
(u->val1 >> 8) & 0xFF, u->val1 & 0xFF, u->val2 & 0xFF );
@@ -1271,4 +1281,5 @@ Int VG_(get_reg_usage) ( UInstr* u, Tag
case SSE3a1_MemRd:
case SSE2a1_MemRd:
+ case SSE2e1_RegRd:
case SSE3e_RegRd:
case SSE3a_MemWr:
@@ -1278,4 +1289,6 @@ Int VG_(get_reg_usage) ( UInstr* u, Tag
case SSE2a_MemRd: RD(3); break;
+ case SSE2g_RegWr:
+ case SSE2g1_RegWr:
case SSE3e_RegWr:
case SSE3g1_RegWr:
@@ -1442,4 +1455,5 @@ Int maybe_uinstrReadsArchReg ( UInstr* u
case MMX2_ERegRd: case MMX2_ERegWr:
case SSE2a_MemWr: case SSE2a_MemRd: case SSE2a1_MemRd:
+ case SSE2g_RegWr: case SSE2g1_RegWr: case SSE2e1_RegRd:
case SSE3a_MemWr: case SSE3a_MemRd: case SSE3a1_MemRd:
case SSE3e_RegRd: case SSE3g_RegWr: case SSE3e_RegWr:
--- valgrind/include/vg_skin.h.base #1.8:1.9
@@ -711,4 +711,30 @@
SSE2a_MemWr,
+ /* 4 bytes, writes an integer register. Insns of the form
+ bbbbbbbb:bbbbbbbb:11 ireg bbb.
+ Held in val1[15:0] and val2[7:0], and ireg is to be replaced
+ at codegen time by a reference to the relevant RealReg.
+ Transfer is always at size 4. Arg3 holds this Temp/Real Reg.
+ */
+ SSE2g_RegWr,
+
+ /* 5 bytes, writes an integer register. Insns of the form
+ bbbbbbbb:bbbbbbbb:11 ireg bbb :bbbbbbbb. Held in
+ val1[15:0] and val2[7:0] and lit32[7:0], and ireg is to be
+ replaced at codegen time by a reference to the relevant
+ RealReg. Transfer is always at size 4. Arg3 holds this
+ Temp/Real Reg.
+ */
+ SSE2g1_RegWr,
+
+ /* 5 bytes, reads an integer register. Insns of the form
+ bbbbbbbb:bbbbbbbb:11 bbb ireg :bbbbbbbb. Held in
+ val1[15:0] and val2[7:0] and lit32[7:0], and ireg is to be
+ replaced at codegen time by a reference to the relevant
+ RealReg. Transfer is always at size 4. Arg3 holds this
+ Temp/Real Reg.
+ */
+ SSE2e1_RegRd,
+
/* 4 bytes, no memrefs, no iregdefs, copy exactly to the
output. Held in val1[15:0] and val2[15:0]. */
--- valgrind/memcheck/mc_translate.c #1.34:1.35
@@ -1077,4 +1077,7 @@ static UCodeBlock* memcheck_instrument (
/* SSE ins referencing scalar integer registers */
+ case SSE2g_RegWr:
+ case SSE2g1_RegWr:
+ case SSE2e1_RegRd:
case SSE3g_RegWr:
case SSE3e_RegRd:
--- valgrind/none/tests/Makefile.am #1.21:1.22
@@ -25,4 +25,7 @@
fucomip.stderr.exp fucomip.vgtest \
gxx304.stderr.exp gxx304.vgtest \
+ insn_mmx.stderr.exp insn_mmx.stdout.exp insn_mmx.vgtest \
+ insn_sse.stderr.exp insn_sse.stdout.exp insn_sse.vgtest \
+ insn_sse2.stderr.exp insn_sse2.stdout.exp insn_sse2.vgtest \
map_unmap.stdout.exp map_unmap.vgtest \
mremap.stdout.exp mremap.vgtest \
@@ -48,5 +51,6 @@
args bitfield1 bt_everything bt_literal coolo_strlen \
cpuid dastest discard exec-sigmask floored fork fpu_lazy_eflags \
- fucomip munmap_exe map_unmap mremap rcl_assert \
+ fucomip insn_mmx insn_sse insn_sse2 \
+ munmap_exe map_unmap mremap rcl_assert \
rcrl readline1 resolv seg_override sha1_test shortpush shorts smc1 \
pth_blockedsig \
@@ -72,4 +76,10 @@
fpu_lazy_eflags_SOURCES = fpu_lazy_eflags.c
fucomip_SOURCES = fucomip.c
+insn_mmx_SOURCES = insn_mmx.def
+insn_mmx_LDADD = -lm
+insn_sse_SOURCES = insn_sse.def
+insn_sse_LDADD = -lm
+insn_sse2_SOURCES = insn_sse2.def
+insn_sse2_LDADD = -lm
map_unmap_SOURCES = map_unmap.c
mremap_SOURCES = mremap.c
@@ -100,2 +110,4 @@
fpu_lazy_eflags.o: CFLAGS += -O2 -mcpu=pentiumpro -march=pentiumpro
+.def.c:
+ $(PERL) gen_insn_test.pl < $< > $@
--- valgrind/tests/Makefile.am #1.34:1.35
@@ -13,5 +13,6 @@
check_PROGRAMS = \
- true
+ true \
+ cputest
AM_CFLAGS = $(WERROR) -Winline -Wall -Wshadow -g
@@ -20,4 +21,4 @@
# generic C ones
true_SOURCES = true.c
-
+cputest_SOURCES = cputest.c
--- valgrind/tests/vg_regtest.in #1.17:1.18
@@ -50,4 +50,5 @@
# - stdout_filter: <filter to run stdout through> (default: none)
# - stderr_filter: <filter to run stderr through> (default: ./filter_stderr)
+# - cpu_test: <cpu feature required for test> (default: none)
#
# Note that filters are necessary for stderr results to filter out things that
@@ -81,4 +82,5 @@
my $stdout_filter; # filter program to run stdout results file through
my $stderr_filter; # filter program to run stderr results file through
+my $cpu_test; # cpu feature to check for before running test
my @failures; # List of failed tests
@@ -166,6 +168,6 @@
# Defaults.
- ($vgopts, $prog, $args, $stdout_filter, $stderr_filter) =
- ("", undef, "", undef, undef);
+ ($vgopts, $prog, $args, $stdout_filter, $stderr_filter, $cpu_test) =
+ ("", undef, "", undef, undef, undef);
# Every test directory must have a "filter_stderr"
@@ -185,4 +187,6 @@
} elsif ($line =~ /^\s*stderr_filter:\s*(.*)$/) {
$stderr_filter = validate_program(".", $1, 1);
+ } elsif ($line =~ /^\s*cpu_test:\s*(.*)$/) {
+ $cpu_test = $1;
} else {
die "Bad line in $f: $line\n";
@@ -223,4 +227,8 @@
read_vgtest_file($vgtest);
+ if (defined $cpu_test) {
+ return unless system("../../tests/cputest $cpu_test") == 0;
+ }
+
printf("%-16s valgrind $vgopts $prog $args\n", "$name:");
|