|
From: Julian S. <js...@ac...> - 2004-01-04 23:31:28
|
CVS commit by jseward:
Support for FXSAVE/FXRSTOR (Tom Hughes). Fixes #71180.
M +3 -3 addrcheck/ac_main.c 1.59
M +17 -7 cachegrind/cg_main.c 1.61
M +1 -1 coregrind/vg_from_ucode.c 1.72
M +19 -0 coregrind/vg_to_ucode.c 1.120
M +19 -17 coregrind/vg_translate.c 1.67
M +1 -1 include/vg_skin.h.base 1.8
M +3 -3 memcheck/mac_needs.c 1.22
M +2 -2 memcheck/mc_main.c 1.46
M +2 -2 memcheck/mc_translate.c 1.34
--- valgrind/addrcheck/ac_main.c #1.58:1.59
@@ -908,5 +908,5 @@ void ac_fpu_ACCESS_check ( Addr addr, In
}
- if (size == 16 || size == 10 || size == 28 || size == 108) {
+ if (size == 16 || size == 10 || size == 28 || size == 108 || size == 512) {
PROF_EVENT(94);
ac_fpu_ACCESS_check_SLOWLY ( addr, size, isWrite );
@@ -1056,6 +1056,6 @@ UCodeBlock* SK_(instrument)(UCodeBlock*
goto do_Access_ARG3;
do_Access_ARG3:
- sk_assert(u_in->size == 4
- || u_in->size == 8 || u_in->size == 16);
+ sk_assert(u_in->size == 4 || u_in->size == 8
+ || u_in->size == 16 || u_in->size == 512);
sk_assert(u_in->tag3 == TempReg);
t_addr = u_in->val3;
--- valgrind/cachegrind/cg_main.c #1.60:1.61
@@ -545,5 +545,5 @@ static Int compute_BBCC_array_size(UCode
case SSE2a_MemRd:
case SSE2a1_MemRd:
- sk_assert(u_in->size == 4 || u_in->size == 16);
+ sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512);
t_read = u_in->val3;
is_FPU_R = True;
@@ -578,5 +578,5 @@ static Int compute_BBCC_array_size(UCode
case SSE2a_MemWr:
- sk_assert(u_in->size == 4 || u_in->size == 16);
+ sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512);
t_write = u_in->val3;
is_FPU_W = True;
@@ -799,9 +799,14 @@ UCodeBlock* SK_(instrument)(UCodeBlock*
case SSE2a_MemRd:
case SSE2a1_MemRd:
- sk_assert(u_in->size == 4 || u_in->size == 16);
+ sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512);
t_read = u_in->val3;
t_read_addr = newTemp(cb);
uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
- data_size = u_in->size;
+ /* 512 B data-sized instructions will be done inaccurately
+ * but they're very rare and this avoids errors from
+ * hitting more than two cache lines in the simulation. */
+ data_size = ( u_in->size <= MIN_LINE_SIZE
+ ? u_in->size
+ : MIN_LINE_SIZE);
VG_(copy_UInstr)(cb, u_in);
break;
@@ -857,12 +862,17 @@ UCodeBlock* SK_(instrument)(UCodeBlock*
case SSE2a_MemWr:
- sk_assert(u_in->size == 4 || u_in->size == 16);
+ sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512);
/* fall through */
case SSE3a_MemWr:
- sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
+ sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
t_write = u_in->val3;
t_write_addr = newTemp(cb);
uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_write_addr);
- data_size = u_in->size;
+ /* 512 B data-sized instructions will be done inaccurately
+ * but they're very rare and this avoids errors from
+ * hitting more than two cache lines in the simulation. */
+ data_size = ( u_in->size <= MIN_LINE_SIZE
+ ? u_in->size
+ : MIN_LINE_SIZE);
VG_(copy_UInstr)(cb, u_in);
break;
--- valgrind/coregrind/vg_from_ucode.c #1.71:1.72
@@ -4076,5 +4076,5 @@ static void emitUInstr ( UCodeBlock* cb,
case SSE2a_MemWr:
case SSE2a_MemRd:
- vg_assert(u->size == 4 || u->size == 16);
+ vg_assert(u->size == 4 || u->size == 16 || u->size == 512);
vg_assert(u->tag1 == Lit16);
vg_assert(u->tag2 == Lit16);
--- valgrind/coregrind/vg_to_ucode.c #1.119:1.120
@@ -3546,6 +3546,25 @@ static Addr disInstr ( UCodeBlock* cb, A
UChar* insn = (UChar*)eip;
+ /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */
+ if (insn[0] == 0x0F && insn[1] == 0xAE
+ && (!epartIsReg(insn[2]))
+ && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) {
+ Bool store = gregOfRM(insn[2]) == 0;
+ vg_assert(sz == 4);
+ pair = disAMode ( cb, sorb, eip+2, dis?dis_buf:NULL );
+ t1 = LOW24(pair);
+ eip += 2+HI8(pair);
+ uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512,
+ Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1],
+ Lit16, (UShort)insn[2],
+ TempReg, t1 );
+ if (dis)
+ VG_(printf)("fx%s %s\n", store ? "save" : "rstor", dis_buf );
+ goto decode_success;
+ }
+
/* STMXCSR/LDMXCSR m32 -- load/store the MXCSR register. */
if (insn[0] == 0x0F && insn[1] == 0xAE
+ && (!epartIsReg(insn[2]))
&& (gregOfRM(insn[2]) == 3 || gregOfRM(insn[2]) == 2) ) {
Bool store = gregOfRM(insn[2]) == 3;
--- valgrind/coregrind/vg_translate.c #1.66:1.67
@@ -415,5 +415,6 @@ Bool VG_(saneUInstr) ( Bool beforeRA, Bo
# define SZ48 (u->size == 4 || u->size == 8)
# define SZ416 (u->size == 4 || u->size == 16)
-# define SZsse (u->size == 4 || u->size == 8 || u->size == 16)
+# define SZsse2 (u->size == 4 || u->size == 16 || u->size == 512)
+# define SZsse3 (u->size == 4 || u->size == 8 || u->size == 16)
# define SZi (u->size == 4 || u->size == 2 || u->size == 1)
# define SZf ( u->size == 4 || u->size == 8 || u->size == 2 \
@@ -564,9 +565,9 @@ Bool VG_(saneUInstr) ( Bool beforeRA, Bo
/* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
- case SSE2a_MemWr: return LIT0 && SZ416 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
- case SSE2a_MemRd: return LIT0 && SZ416 && CCa && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE2a_MemWr: return LIT0 && SZsse2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE2a_MemRd: return LIT0 && SZsse2 && CCa && Ls1 && Ls2 && TR3 && XOTHER;
case SSE2a1_MemRd: return LIT0 && SZ416 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
- case SSE3a_MemWr: return LIT0 && SZsse && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
- case SSE3a_MemRd: return LIT0 && SZsse && CCa && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE3a_MemWr: return LIT0 && SZsse3 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE3a_MemRd: return LIT0 && SZsse3 && CCa && Ls1 && Ls2 && TR3 && XOTHER;
case SSE3e_RegRd: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
case SSE3e_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
@@ -603,5 +604,6 @@ Bool VG_(saneUInstr) ( Bool beforeRA, Bo
# undef SZ48
# undef SZ416
-# undef SZsse
+# undef SZsse2
+# undef SZsse3
# undef SZi
# undef SZf
--- valgrind/include/vg_skin.h.base #1.7:1.8
@@ -961,5 +961,5 @@
UShort val3; /* third operand */
UChar opcode; /* opcode */
- UChar size; /* data transfer size */
+ UShort size; /* data transfer size */
/* word 4 */
--- valgrind/memcheck/mac_needs.c #1.21:1.22
@@ -733,5 +733,5 @@ A 78 ACCESS1_SLOWLY
82 fpu_read aligned 8
83 fpu_read 2
- 84 fpu_read 10/28/108
+ 84 fpu_read 10/28/108/512
M 85 fpu_write
@@ -739,5 +739,5 @@ M 86 fpu_write aligned 4
M 87 fpu_write aligned 8
M 88 fpu_write 2
-M 89 fpu_write 10/28/108
+M 89 fpu_write 10/28/108/512
90 fpu_access
@@ -745,5 +745,5 @@ M 89 fpu_write 10/28/108
92 fpu_access aligned 8
93 fpu_access 2
- 94 fpu_access 10/28/108
+ 94 fpu_access 10/28/108/512
100 fpu_access_check_SLOWLY
--- valgrind/memcheck/mc_main.c #1.45:1.46
@@ -1191,5 +1191,5 @@ void MC_(fpu_read_check) ( Addr addr, In
if (size == 16 /*SSE*/
- || size == 10 || size == 28 || size == 108) {
+ || size == 10 || size == 28 || size == 108 || size == 512) {
PROF_EVENT(84);
mc_fpu_read_check_SLOWLY ( addr, size );
@@ -1274,5 +1274,5 @@ void MC_(fpu_write_check) ( Addr addr, I
if (size == 16 /*SSE*/
- || size == 10 || size == 28 || size == 108) {
+ || size == 10 || size == 28 || size == 108 || size == 512) {
PROF_EVENT(89);
mc_fpu_write_check_SLOWLY ( addr, size );
--- valgrind/memcheck/mc_translate.c #1.33:1.34
@@ -1113,6 +1113,6 @@ static UCodeBlock* memcheck_instrument (
Int t_size;
- sk_assert(u_in->size == 4
- || u_in->size == 8 || u_in->size == 16);
+ sk_assert(u_in->size == 4 || u_in->size == 8
+ || u_in->size == 16 || u_in->size == 512);
t_size = INVALID_TEMPREG;
|