commit 486a2efa0da9b8441ea059186af0b1ab6c15240b
Author: Josef Weidendorfer <Josef.Weidendorfer@gmx.de>
Date:   Mon Nov 21 23:55:21 2011 +0100

    Use block numbers for tags. Saves instructions in hot path.

diff --git a/cachegrind/cg_sim.c b/cachegrind/cg_sim.c
index e10261d..2478194 100644
--- a/cachegrind/cg_sim.c
+++ b/cachegrind/cg_sim.c
@@ -91,7 +91,7 @@ Bool cachesim_setref_is_miss(cache_t2* c, UInt set_no, UWord tag)
     /* This loop is unrolled for just the first case, which is the most */
     /* common.  We can't unroll any further because it would screw up   */
     /* if we have a direct-mapped (1-way) cache.                        */
-    if (tag == set[0])
+    if (LIKELY(tag == set[0]))
         return False;
 
     /* If the tag is one other than the MRU, move it into the MRU spot  */
@@ -119,21 +119,31 @@ __attribute__((always_inline))
 static __inline__
 Bool cachesim_ref_is_miss(cache_t2* c, Addr a, UChar size)
 {
-    UInt  set1 = ( a         >> c->line_size_bits) & (c->sets_min_1);
-    UInt  set2 = ((a+size-1) >> c->line_size_bits) & (c->sets_min_1);
-    UWord tag  = a >> c->tag_shift;
+    /* A memory block has the size of a cache line */
+    UWord block1 =  a         >> c->line_size_bits;
+    UWord block2 = (a+size-1) >> c->line_size_bits;
+    UInt  set1   = block1 & c->sets_min_1;
+    UInt  set2   = block2 & c->sets_min_1;
+
+    /* Tags used in real caches are minimal to save space.
+     * As the last bits of the block number of addresses mapping
+     * into one cache set are the same, real caches use
+     *   tag = block >> log2(#sets)
+     * But using the memory block as more specific tag is fine,
+     * and saves instructions.
+     */
+    UWord tag1   = block1;
 
     /* Access entirely within line. */
-    if (set1 == set2)
-	return cachesim_setref_is_miss(c, set1, tag);
+    if (LIKELY(set1 == set2))
+	return cachesim_setref_is_miss(c, set1, tag1);
 
     /* Access straddles two lines. */
-    /* Nb: this is a fast way of doing ((set1+1) % c->sets) */
-    else if (((set1 + 1) & (c->sets_min_1)) == set2) {
-	UWord tag2  = (a+size-1) >> c->tag_shift;
+    else if (LIKELY(block1 + 1 == block2)) {
+        UWord tag2  = block2;
 
 	/* always do both, as state is updated as side effect */
-	if (cachesim_setref_is_miss(c, set1, tag)) {
+	if (cachesim_setref_is_miss(c, set1, tag1)) {
 	  cachesim_setref_is_miss(c, set2, tag2);
 	  return True;
 	}

