commit 09e4faea3a88af280b15c8b6473e1d67098221e8
Author: Josef Weidendorfer <Josef.Weidendorfer@gmx.de>
Date:   Mon Nov 21 23:22:28 2011 +0100

    Special case: Ir not crossing cache lines

diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c
index 4b36204..9013c07 100644
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -323,6 +323,16 @@ void log_1I_0D_cache_access(InstrInfo* n)
    n->parent->Ir.a++;
 }
 
+static VG_REGPARM(1)
+void log_IrX_cache_access(InstrInfo* n)
+{
+   //VG_(printf)("1I_0D :  CCaddr=0x%010lx,  iaddr=0x%010lx,  isize=%lu\n",
+   //             n, n->instr_addr, n->instr_len);
+   cachesim_I1_doref(n->instr_addr, n->instr_len, 
+                     &n->parent->Ir.m1, &n->parent->Ir.mL);
+   n->parent->Ir.a++;
+}
+
 static VG_REGPARM(2)
 void log_2I_0D_cache_access(InstrInfo* n, InstrInfo* n2)
 {
@@ -472,8 +482,9 @@ typedef
    IRAtom;
 
 typedef 
-   enum { 
+   enum {
       Ev_Ir,  // Instruction read
+      Ev_IrX, // Instruction read crossing cache lines
       Ev_Dr,  // Data read
       Ev_Dw,  // Data write
       Ev_Dm,  // Data modify (read then write)
@@ -490,6 +501,8 @@ typedef
          struct {
          } Ir;
          struct {
+         } IrX;
+         struct {
             IRAtom* ea;
             Int     szB;
          } Dr;
@@ -607,6 +620,9 @@ static void showEvent ( Event* ev )
       case Ev_Ir: 
          VG_(printf)("Ir %p\n", ev->inode);
          break;
+      case Ev_IrX: 
+         VG_(printf)("IrX %p\n", ev->inode);
+         break;
       case Ev_Dr:
          VG_(printf)("Dr %p %d EA=", ev->inode, ev->Ev.Dr.szB);
          ppIRExpr(ev->Ev.Dr.ea); 
@@ -774,6 +790,18 @@ static void flushEvents ( CgState* cgs )
                i++;
             }
             break;
+         case Ev_IrX:
+            if (clo_cache_sim) {
+	       helperName = "log_IrX_cache_access";
+	       helperAddr = &log_IrX_cache_access;
+	    } else {
+	       helperName = "log_1I";
+	       helperAddr = &log_1I;
+	    }
+	    argv = mkIRExprVec_1( i_node_expr );
+	    regparms = 1;
+	    i++;
+            break;
          case Ev_Dr:
          case Ev_Dm:
             /* Data read or modify */
@@ -836,7 +864,8 @@ static void addEvent_Ir ( CgState* cgs, InstrInfo* inode )
    tl_assert(cgs->events_used >= 0 && cgs->events_used < N_EVENTS);
    evt = &cgs->events[cgs->events_used];
    init_Event(evt);
-   evt->tag      = Ev_Ir;
+   evt->tag      = cachesim_isX(inode->instr_addr, inode->instr_len)
+                   ? Ev_IrX : Ev_Ir;
    evt->inode    = inode;
    cgs->events_used++;
 }
diff --git a/cachegrind/cg_sim.c b/cachegrind/cg_sim.c
index 2478194..073dcc9 100644
--- a/cachegrind/cg_sim.c
+++ b/cachegrind/cg_sim.c
@@ -97,7 +97,7 @@ Bool cachesim_setref_is_miss(cache_t2* c, UInt set_no, UWord tag)
     /* If the tag is one other than the MRU, move it into the MRU spot  */
     /* and shuffle the rest down.                                       */
     for (i = 1; i < c->assoc; i++) {
-        if (tag == set[i]) {
+      if (tag == set[i]) {
             for (j = i; j > 0; j--) {
                 set[j] = set[j - 1];
             }
@@ -175,11 +175,29 @@ static void cachesim_D1_initcache(cache_t config)
   cachesim_initcache(config, &D1);
 }
 
-
+/* special case: cache line size of I1 and LL are same,
+ * and the access does not cross cache line boundaries */
 __attribute__((always_inline))
 static __inline__
 void cachesim_I1_doref(Addr a, UChar size, ULong* m1, ULong *mL)
 {
+   UWord block  = a >> I1.line_size_bits;
+   UInt  I1_set = block & I1.sets_min_1;
+
+   if (cachesim_setref_is_miss(&I1, I1_set, block)) {
+      UInt  LL_set = block & LL.sets_min_1;
+      (*m1)++;
+      if (cachesim_setref_is_miss(&LL, LL_set, block))
+	 (*mL)++;
+  }
+}
+
+
+/* general case: Ir can cross cache line boundaries */
+__attribute__((always_inline))
+static __inline__
+void cachesim_I1_dorefX(Addr a, UChar size, ULong* m1, ULong *mL)
+{
   if (cachesim_ref_is_miss(&I1, a, size)) {
     (*m1)++;
     if (cachesim_ref_is_miss(&LL, a, size))
@@ -198,6 +216,23 @@ void cachesim_D1_doref(Addr a, UChar size, ULong* m1, ULong *mL)
   }
 }
 
+static
+Bool cachesim_isX(Addr a, UChar size)
+{
+  UInt block1, block2;
+
+  /* returning true is always fine, as this is the general case.
+   * returning false will trigger the special case for Ir events:
+   *  (1) both I1 and LL must have the same cache line size
+   *  (2) the access fits into one line of I1, and of LL because of (1)
+   */
+  if (I1.line_size_bits != LL.line_size_bits) return True;
+  block1 =  a         >> I1.line_size_bits;
+  block2 = (a+size-1) >> I1.line_size_bits;
+  if (block1 != block2) return True;
+  return False;
+}
+
 /*--------------------------------------------------------------------*/
 /*--- end                                                 cg_sim.c ---*/
 /*--------------------------------------------------------------------*/

