|
From: <jhr...@t-...> - 2003-10-19 15:06:07
|
Hello,
I wrote:
> now that my basic tests work under valgrind when compiled with ICC 7.1
> (excellent!), I've tighten the requirements. Another one of my tests,
> compiled with -march=pentium4 and profile guided optimizations under
> ICC 7.1 fails with
>
> ----------
> ==1568== Memcheck, a.k.a. Valgrind, a memory error detector for x86-linux.
> ==1568== Copyright (C) 2002-2003, and GNU GPL'd, by Julian Seward.
> ==1568== Using valgrind-20031012, a program supervision framework for
> x86-linux.
> ==1568== Copyright (C) 2000-2003, and GNU GPL'd, by Julian Seward.
> ==1568== Estimated CPU clock rate is 1724 MHz
> ==1568== For more details, rerun with: -v
> ==1568==
> float, 8
> rrr
> disInstr: unhandled instruction bytes: 0xF 0x14 0xF8 0xF3
> ----------
>
> Again an easy one? ;-)
Hm, unsure. To get this test running, I had to revisit the implementation of
the following instructions:
1. unpcklps/hps/lpd/hpd
lps/hps were missing
2. movaps/ups/apd/upd
upd was missing, the implemention of ps, pd differed in the computation of
the store flag (the one of ps with corrected typo seems to be more
appropriate).
3. movlps/lpd/hps/hpd
lps/hps/hpd were missing, check against reg-reg move failed, lpd moved 16
instead of 8 bytes IIUC.
I didn't look into movhlps/movlhps. They still seem to be missing.
I trusted dis_SSE_reg_or_mem() and dis_SSE3_load_store_or_mov() to always do
The Right Thing (TM).
Best,
Joerg
P.S.: here's the diff -u -w:
----------
--- vg_to_ucode.c.orig Sun Oct 19 14:01:03 2003
+++ vg_to_ucode.c Sun Oct 19 16:09:09 2003
@@ -4142,14 +4142,22 @@
goto decode_success;
}
- /* 0x14: UNPCKLPD (src)xmmreg-or-mem, (dst)xmmreg */
- /* 0x15: UNPCKHPD (src)xmmreg-or-mem, (dst)xmmreg */
- if (sz == 2
- && insn[0] == 0x0F
+ /* 0x0F 0x14: UNPCKLPS (src)xmmreg-or-mem, (dst)xmmreg */
+ /* 0x0F 0x15: UNPCKHPS (src)xmmreg-or-mem, (dst)xmmreg */
+ /* 0x66 0x0F 0x14: UNPCKLPD (src)xmmreg-or-mem, (dst)xmmreg */
+ /* 0x66 0x0F 0x15: UNPCKHPD (src)xmmreg-or-mem, (dst)xmmreg */
+ if (insn[0] == 0x0F
&& (insn[1] == 0x14 || insn[1] == 0x15)) {
+ vg_assert(sz == 4 || sz == 2);
+ if (sz == 4) {
+ eip = dis_SSE2_reg_or_mem ( cb, sorb, eip+2, 16,
+ "unpck{l,h}ps",
+ insn[0], insn[1] );
+ } else {
eip = dis_SSE3_reg_or_mem ( cb, sorb, eip+2, 16,
"unpck{l,h}pd",
0x66, insn[0], insn[1] );
+ }
goto decode_success;
}
@@ -4379,15 +4387,18 @@
goto decode_success;
}
- /* I don't understand how MOVAPD differs from MOVAPS. */
/* MOVAPD (28,29) -- aligned load/store of xmm reg, or xmm-xmm reg
move */
+ /* MOVUPD (10,11) -- unaligned load/store of xmm reg, or xmm-xmm
+ reg move */
if (sz == 2
- && insn[0] == 0x0F && insn[1] == 0x28) {
- UChar* name = "movapd";
- //(insn[1] == 0x10 || insn[1] == 0x11)
- // ? "movups" : "movaps";
- Bool store = False; //insn[1] == 0x29 || insn[1] == 11;
+ && insn[0] == 0x0F && (insn[1] == 0x28
+ || insn[1] == 0x29
+ || insn[1] == 0x10
+ || insn[1] == 0x11)) {
+ UChar* name = (insn[1] == 0x10 || insn[1] == 0x11)
+ ? "movupd" : "movapd";
+ Bool store = insn[1] == 0x29 || insn[1] == 0x11;
eip = dis_SSE3_load_store_or_mov
( cb, sorb, eip+2, 16, store, name,
0x66, insn[0], insn[1] );
@@ -4404,7 +4415,7 @@
|| insn[1] == 0x11)) {
UChar* name = (insn[1] == 0x10 || insn[1] == 0x11)
? "movups" : "movaps";
- Bool store = insn[1] == 0x29 || insn[1] == 11;
+ Bool store = insn[1] == 0x29 || insn[1] == 0x11;
vg_assert(sz == 4);
eip = dis_SSE2_load_store_or_mov
( cb, sorb, eip+2, 16, store, name,
@@ -4423,16 +4434,42 @@
goto decode_success;
}
- /* MOVLPD -- 8-byte load/store. */
- if (sz == 2
- && insn[0] == 0x0F
+ /* 0x0F 0x12/0x13: MOVLPS -- 8-byte load/store. */
+ /* 0x66 0x0F 0x12/0x13: MOVLPD -- 8-byte load/store. */
+ if (insn[0] == 0x0F
&& (insn[1] == 0x12 || insn[1] == 0x13)) {
+ vg_assert(sz == 4 || sz == 2);
Bool is_store = insn[1]==0x13;
/* Cannot be used for reg-reg moves, according to Intel docs. */
- vg_assert(!epartIsReg(insn[2]));
+ /* But ICC 7.1 tells us another story ;-( */
+ /* vg_assert(!epartIsReg(insn[2])); */
+ if (sz == 4) {
+ eip = dis_SSE2_load_store_or_mov
+ (cb, sorb, eip+2, 8, is_store, "movlps",
+ insn[0], insn[1] );
+ } else {
eip = dis_SSE3_load_store_or_mov
- (cb, sorb, eip+2, 16, is_store, "movlpd",
+ (cb, sorb, eip+2, 8, is_store, "movlpd",
0x66, insn[0], insn[1] );
+ }
+ goto decode_success;
+ }
+
+ /* 0x0F 0x16/0x17: MOVHPS -- 8-byte load/store. */
+ /* 0x66 0x0F 0x16/0x17: MOVHPD -- 8-byte load/store. */
+ if (insn[0] == 0x0F
+ && (insn[1] == 0x16 || insn[1] == 0x17)) {
+ vg_assert(sz == 4 || sz == 2);
+ Bool is_store = insn[1]==0x17;
+ if (sz == 4) {
+ eip = dis_SSE2_load_store_or_mov
+ (cb, sorb, eip+2, 8, is_store, "movhps",
+ insn[0], insn[1] );
+ } else {
+ eip = dis_SSE3_load_store_or_mov
+ (cb, sorb, eip+2, 8, is_store, "movhpd",
+ 0x66, insn[0], insn[1] );
+ }
goto decode_success;
}
----------
|