|
From: <sv...@va...> - 2006-12-26 02:59:54
|
Author: sewardj
Date: 2006-12-26 02:59:50 +0000 (Tue, 26 Dec 2006)
New Revision: 6423
Log:
Merge r6365 and r6367 (fix for: "Drepper: obscure Cachegrind
simulation bug", and the same for Callgrind)
Modified:
branches/VALGRIND_3_2_BRANCH/cachegrind/cg_sim.c
branches/VALGRIND_3_2_BRANCH/callgrind/sim.c
Modified: branches/VALGRIND_3_2_BRANCH/cachegrind/cg_sim.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/VALGRIND_3_2_BRANCH/cachegrind/cg_sim.c 2006-12-26 02:56:23 =
UTC (rev 6422)
+++ branches/VALGRIND_3_2_BRANCH/cachegrind/cg_sim.c 2006-12-26 02:59:50 =
UTC (rev 6423)
@@ -80,21 +80,6 @@
c->tags[i] =3D 0;
}
=20
-#if 0
-static void print_cache(cache_t2* c)
-{
- UInt set, way, i;
-
- /* Note initialisation and update of 'i'. */
- for (i =3D 0, set =3D 0; set < c->sets; set++) {
- for (way =3D 0; way < c->assoc; way++, i++) {
- VG_(printf)("%16lx ", c->tags[i]);
- }
- VG_(printf)("\n");
- }
-}
-#endif=20
-
/* This is done as a macro rather than by passing in the cache_t2 as an=20
* arg because it slows things down by a small amount (3-5%) due to all=20
* that extra indirection. */
@@ -114,9 +99,10 @@
static __inline__ =
\
void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *m2) =
\
{ =
\
- register UInt set1 =3D ( a >> L.line_size_bits) & (L.sets_mi=
n_1); \
- register UInt set2 =3D ((a+size-1) >> L.line_size_bits) & (L.sets_mi=
n_1); \
- register UWord tag =3D a >> L.tag_shift; =
\
+ UInt set1 =3D ( a >> L.line_size_bits) & (L.sets_min_1); =
\
+ UInt set2 =3D ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); =
\
+ UWord tag =3D a >> L.tag_shift; =
\
+ UWord tag2; =
\
Int i, j; =
\
Bool is_miss =3D False; =
\
UWord* set; =
\
@@ -176,22 +162,23 @@
is_miss =3D True; =
\
block2: =
\
set =3D &(L.tags[set2 << L.assoc_bits]); =
\
- if (tag =3D=3D set[0]) { =
\
+ tag2 =3D (a+size-1) >> L.tag_shift; =
\
+ if (tag2 =3D=3D set[0]) { =
\
goto miss_treatment; =
\
} =
\
for (i =3D 1; i < L.assoc; i++) { =
\
- if (tag =3D=3D set[i]) { =
\
+ if (tag2 =3D=3D set[i]) { =
\
for (j =3D i; j > 0; j--) { =
\
set[j] =3D set[j - 1]; =
\
} =
\
- set[0] =3D tag; =
\
+ set[0] =3D tag2; =
\
goto miss_treatment; =
\
} =
\
} =
\
for (j =3D L.assoc - 1; j > 0; j--) { =
\
set[j] =3D set[j - 1]; =
\
} =
\
- set[0] =3D tag; =
\
+ set[0] =3D tag2; =
\
is_miss =3D True; =
\
miss_treatment: =
\
if (is_miss) { MISS_TREATMENT; } =
\
Modified: branches/VALGRIND_3_2_BRANCH/callgrind/sim.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- branches/VALGRIND_3_2_BRANCH/callgrind/sim.c 2006-12-26 02:56:23 UTC =
(rev 6422)
+++ branches/VALGRIND_3_2_BRANCH/callgrind/sim.c 2006-12-26 02:59:50 UTC =
(rev 6423)
@@ -300,10 +300,11 @@
/* Access straddles two lines. */
/* Nb: this is a fast way of doing ((set1+1) % c->sets) */
else if (((set1 + 1) & (c->sets-1)) =3D=3D set2) {
+ UWord tag2 =3D (a+size-1) >> c->tag_shift;
=20
/* the call updates cache structures as side effect */
CacheResult res1 =3D cachesim_setref(c, set1, tag);
- CacheResult res2 =3D cachesim_setref(c, set2, tag);
+ CacheResult res2 =3D cachesim_setref(c, set2, tag2);
return ((res1 =3D=3D Miss) || (res2 =3D=3D Miss)) ? Miss : Hit;
=20
} else {
@@ -404,10 +405,11 @@
/* Access straddles two lines. */
/* Nb: this is a fast way of doing ((set1+1) % c->sets) */
else if (((set1 + 1) & (c->sets-1)) =3D=3D set2) {
+ UWord tag2 =3D (a+size-1) >> c->tag_shift;
=20
/* the call updates cache structures as side effect */
CacheResult res1 =3D cachesim_setref_wb(c, ref, set1, tag);
- CacheResult res2 =3D cachesim_setref_wb(c, ref, set2, tag);
+ CacheResult res2 =3D cachesim_setref_wb(c, ref, set2, tag2);
=20
if ((res1 =3D=3D MissDirty) || (res2 =3D=3D MissDirty)) return MissDirt=
y;
return ((res1 =3D=3D Miss) || (res2 =3D=3D Miss)) ? Miss : Hit;
@@ -758,10 +760,11 @@
/* Access straddles two lines. */
/* Nb: this is a fast way of doing ((set1+1) % c->sets) */
else if (((set1 + 1) & (c->sets-1)) =3D=3D set2) {
+ UWord tag2 =3D a >> c->tag_shift;
=20
/* the call updates cache structures as side effect */
CacheResult res1 =3D cacheuse_isMiss(c, set1, tag);
- CacheResult res2 =3D cacheuse_isMiss(c, set2, tag);
+ CacheResult res2 =3D cacheuse_isMiss(c, set2, tag2);
return ((res1 =3D=3D Miss) || (res2 =3D=3D Miss)) ? Miss : Hit;
=20
} else {
@@ -778,9 +781,10 @@
=
\
static CacheModelResult cacheuse##_##L##_doRead(Addr a, UChar size) =
\
{ =
\
- register UInt set1 =3D ( a >> L.line_size_bits) & (L.sets_min=
_1); \
- register UInt set2 =3D ((a+size-1) >> L.line_size_bits) & (L.sets_min=
_1); \
- register UWord tag =3D a & L.tag_mask; =
\
+ UInt set1 =3D ( a >> L.line_size_bits) & (L.sets_min_1); =
\
+ UInt set2 =3D ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); =
\
+ UWord tag =3D a & L.tag_mask; =
\
+ UWord tag2; =
\
int i, j, idx; =
\
UWord *set, tmp_tag; \
UInt use_mask; \
@@ -879,7 +883,8 @@
block2: =
\
set =3D &(L.tags[set2 << L.assoc_bits]); =
\
use_mask =3D L.line_end_mask[(a+size-1) & L.line_size_mask]; =
\
- if (tag =3D=3D (set[0] & L.tag_mask)) { =
\
+ tag2 =3D (a+size-1) & L.tag_mask; =
\
+ if (tag2 =3D=3D (set[0] & L.tag_mask)) { =
\
idx =3D (set2 << L.assoc_bits) | (set[0] & ~L.tag_mask); =
\
L.use[idx].count ++; =
\
L.use[idx].mask |=3D use_mask; =
\
@@ -889,7 +894,7 @@
return miss1; =
\
} =
\
for (i =3D 1; i < L.assoc; i++) { =
\
- if (tag =3D=3D (set[i] & L.tag_mask)) { \
+ if (tag2 =3D=3D (set[i] & L.tag_mask)) { \
tmp_tag =3D set[i]; =
\
for (j =3D i; j > 0; j--) { =
\
set[j] =3D set[j - 1]; =
\
@@ -908,7 +913,7 @@
for (j =3D L.assoc - 1; j > 0; j--) { =
\
set[j] =3D set[j - 1]; =
\
} =
\
- set[0] =3D tag | tmp_tag; =
\
+ set[0] =3D tag2 | tmp_tag; =
\
idx =3D (set2 << L.assoc_bits) | tmp_tag; =
\
miss2 =3D update_##L##_use(&L, idx, \
use_mask, (a+size-1) &~ L.line_size_mask); \
|