|
From: Nicholas N. <nj...@ca...> - 2003-10-30 10:34:07
|
CVS commit by nethercote:
Fix cachegrind to deal with VG_(open)() returning any negative number on error,
not just -1.
M +1 -1 cg_main.c 1.54
--- valgrind/cachegrind/cg_main.c #1.53:1.54
@@ -1620,5 +1620,5 @@ static void fprint_BBCC_table_and_calc_t
fd = VG_(open)(cachegrind_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
VKI_S_IRUSR|VKI_S_IWUSR);
- if (-1 == fd) {
+ if (fd < 0) {
/* If the file can't be opened for whatever reason (conflict
between multiple cachegrinded processes?), give up now. */
|
|
From: Julian S. <js...@ac...> - 2003-11-04 22:54:58
|
CVS commit by jseward:
More SSE support for cachegrind.
MERGE TO STABLE
M +15 -0 cg_main.c 1.55
--- valgrind/cachegrind/cg_main.c #1.54:1.55
@@ -560,4 +560,10 @@ static Int compute_BBCC_array_size(UCode
break;
+ case SSE3a1_MemRd:
+ sk_assert(u_in->size == 16);
+ t_read = u_in->val3;
+ is_FPU_R = True;
+ break;
+
case SSE3ag_MemRd_RegWr:
sk_assert(u_in->size == 4 || u_in->size == 8);
@@ -807,4 +813,13 @@ UCodeBlock* SK_(instrument)(UCodeBlock*
case SSE3a_MemRd:
sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
+ t_read = u_in->val3;
+ t_read_addr = newTemp(cb);
+ uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
+ data_size = u_in->size;
+ VG_(copy_UInstr)(cb, u_in);
+ break;
+
+ case SSE3a1_MemRd:
+ sk_assert(u_in->size == 16);
t_read = u_in->val3;
t_read_addr = newTemp(cb);
|
|
From: Nicholas N. <nj...@ca...> - 2004-01-04 16:58:36
|
CVS commit by nethercote:
Folded cg_sim_{gen,I1,D1,L2}.c into cg_sim.c, which makes a lot more sense and
is shorter.
A cg_sim.c 1.1 [GPL (v2+)]
M +1 -5 Makefile.am 1.42
M +1 -3 cg_main.c 1.60
--- valgrind/cachegrind/Makefile.am #1.41:1.42
@@ -11,9 +11,5 @@
bin_SCRIPTS = cg_annotate
-EXTRA_DIST = \
- cg_sim_I1.c \
- cg_sim_D1.c \
- cg_sim_L2.c \
- cg_sim_gen.c
+EXTRA_DIST = cg_sim.c
val_PROGRAMS = vgskin_cachegrind.so
--- valgrind/cachegrind/cg_main.c #1.59:1.60
@@ -41,7 +41,5 @@ typedef struct {
} cache_t;
-#include "cg_sim_L2.c"
-#include "cg_sim_I1.c"
-#include "cg_sim_D1.c"
+#include "cg_sim.c"
/*------------------------------------------------------------*/
|
|
From: Dirk M. <dm...@gm...> - 2004-01-06 16:04:00
|
On Sunday 04 January 2004 17:56, Nicholas Nethercote wrote:
> Folded cg_sim_{gen,I1,D1,L2}.c into cg_sim.c, which makes a lot more sense
> and is shorter.
Hmm, don't you want to remove those files then?
|
|
From: Nicholas N. <nj...@ca...> - 2004-01-06 16:09:43
|
On Tue, 6 Jan 2004, Dirk Mueller wrote:
> > Folded cg_sim_{gen,I1,D1,L2}.c into cg_sim.c, which makes a lot more sense
> > and is shorter.
>
> Hmm, don't you want to remove those files then?
Er, yeah. Not sure what happened there, I can remember invoking "cvs
remove"... done. Thanks for catching it.
N
|
|
From: Dirk M. <dm...@gm...> - 2004-01-06 18:18:13
|
On Tuesday 06 January 2004 17:09, Nicholas Nethercote wrote: > Er, yeah. Not sure what happened there, I can remember invoking "cvs > remove"... done. Thanks for catching it. you probably forgot the '-f' parameter.. |
|
From: Nicholas N. <nj...@ca...> - 2004-01-06 18:27:15
|
On Tue, 6 Jan 2004, Dirk Mueller wrote: > > Er, yeah. Not sure what happened there, I can remember invoking "cvs > > remove"... done. Thanks for catching it. > > you probably forgot the '-f' parameter.. what -f parameter? N |
|
From: Dirk M. <dm...@gm...> - 2004-01-06 21:21:37
|
On Tuesday 06 January 2004 19:27, Nicholas Nethercote wrote: > > > Er, yeah. Not sure what happened there, I can remember invoking "cvs > > > remove"... done. Thanks for catching it. > > you probably forgot the '-f' parameter.. > what -f parameter? cvs rm <file> does not work unless the file is already deleted in the checkout or you use the -f parameter. |
|
From: Nicholas N. <nj...@ca...> - 2004-01-06 16:14:44
|
CVS commit by nethercote: Removed files I meant to with the last commit. R cg_sim_D1.c 1.9 R cg_sim_I1.c 1.9 R cg_sim_L2.c 1.9 R cg_sim_gen.c 1.12 |
|
From: Nicholas N. <nj...@ca...> - 2004-01-21 16:13:26
|
CVS commit by nethercote:
Fix a bug in Cachegrind: when invalidating BBs, the lookup in the BBCC_table
sometimes failed, causing an assertion failure. This is because the debug
info for the code address, which is used in the lookup,
can change -- eg. "myprint.c:myprint()" is found at instrumentation, but by the
time the invalidation occurs, it's changed to "myprint.c:???". So it now falls
back to a slow exhaustive search of the table.
This was causing cachegrind/tests/dlclose to fail, and should hopefully fix
bug #72781.
M +68 -6 cg_main.c 1.64
--- valgrind/cachegrind/cg_main.c #1.63:1.64
@@ -400,4 +400,61 @@ static UInt hash(Char *s, UInt table_siz
}
+/* This is a backup for get_BBCC() when removing BBs from the table.
+ * Necessary because the debug info can change when code is removed. For
+ * example, when inserting, the info might be "myprint.c:myprint()", but
+ * upon removal, the info might be "myprint.c:???", which causes the
+ * hash-lookup to fail (but it doesn't always happen). So we do a horrible,
+ * slow search through all the file nodes and function nodes (but we can do
+ * 3rd stage with the fast hash-lookup). */
+static BBCC* get_BBCC_slow_removal(Addr bb_orig_addr)
+{
+ Int i, j;
+ UInt BBCC_hash;
+ file_node *curr_file_node;
+ fn_node *curr_fn_node;
+ BBCC **prev_BBCC_next_ptr, *curr_BBCC;
+
+ for (i = 0; i < N_FILE_ENTRIES; i++) {
+
+ for (curr_file_node = BBCC_table[i];
+ NULL != curr_file_node;
+ curr_file_node = curr_file_node->next)
+ {
+ for (j = 0; j < N_FN_ENTRIES; j++) {
+
+ for (curr_fn_node = curr_file_node->fns[j];
+ NULL != curr_fn_node;
+ curr_fn_node = curr_fn_node->next)
+ {
+ BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
+ prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]);
+ curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
+
+ while (NULL != curr_BBCC) {
+ if (bb_orig_addr == curr_BBCC->orig_addr) {
+ // Found it!
+ sk_assert(curr_BBCC->array_size > 0
+ && curr_BBCC->array_size < 1000000);
+ if (VG_(clo_verbosity) > 2) {
+ VG_(message)(Vg_DebugMsg, "did slow BB removal");
+ }
+
+ // Remove curr_BBCC from chain; it will be used and
+ // free'd by the caller.
+ *prev_BBCC_next_ptr = curr_BBCC->next;
+ return curr_BBCC;
+ }
+
+ prev_BBCC_next_ptr = &(curr_BBCC->next);
+ curr_BBCC = curr_BBCC->next;
+ }
+ }
+ }
+ }
+ }
+ VG_(printf)("failing BB address: %p\n", bb_orig_addr);
+ VG_(skin_panic)("slow BB removal failed");
+}
+
/* Do a three step traversal: by filename, then fn_name, then instr_addr.
* In all cases prepends new nodes to their chain. Returns a pointer to the
@@ -450,9 +507,14 @@ static BBCC* get_BBCC(Addr bb_orig_addr,
if (curr_BBCC == NULL) {
- sk_assert(False == remove);
-
+ if (remove == False) {
curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC =
new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]);
*BB_seen_before = False;
+ } else {
+ // Ok, BB not found when removing: the debug info must have
+ // changed. Do a slow removal.
+ curr_BBCC = get_BBCC_slow_removal(bb_orig_addr);
+ *BB_seen_before = True;
+ }
} else {
@@ -461,5 +523,5 @@ static BBCC* get_BBCC(Addr bb_orig_addr,
if (VG_(clo_verbosity) > 2) {
VG_(message)(Vg_DebugMsg,
- "BB retranslation, retrieving from BBCC table");
+ "BB retranslation/invalidation, retrieving from BBCC table");
}
*BB_seen_before = True;
|
|
From: Nicholas N. <nj...@ca...> - 2004-07-02 15:28:41
|
CVS commit by nethercote:
Fix meaningless typo.
M +2 -2 cg_main.c 1.67
--- valgrind/cachegrind/cg_main.c #1.66:1.67
@@ -351,5 +351,5 @@ static Int compute_BBCC_array_size(UCode
static __inline__
-file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next)
+file_node* new_file_node(Char filename[], file_node* next)
{
Int i;
@@ -364,5 +364,5 @@ file_node* new_file_node(Char filename[F
static __inline__
-fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next)
+fn_node* new_fn_node(Char fn_name[], fn_node* next)
{
Int i;
|
|
From: Nicholas N. <nj...@ca...> - 2004-07-03 20:27:44
|
CVS commit by nethercote:
Remove unused variable.
M +0 -3 cg_main.c 1.68
--- valgrind/cachegrind/cg_main.c #1.67:1.68
@@ -792,5 +792,4 @@ UCodeBlock* SK_(instrument)(UCodeBlock*
Addr helper;
Int argc;
- UInt stack_used;
Bool BB_seen_before = False;
Bool instrumented_Jcond = False;
@@ -1011,6 +1010,4 @@ UCodeBlock* SK_(instrument)(UCodeBlock*
* hasn't been initialised before. Then call appropriate sim
* function, passing it the CC address. */
- stack_used = 0;
-
sk_assert(x86_instr_size >= 1 &&
x86_instr_size <= MAX_x86_INSTR_SIZE);
|
|
From: Nicholas N. <nj...@ca...> - 2004-07-06 21:57:54
|
CVS commit by nethercote:
Completely overhauled Cachegrind's data structures. With the new
scheme, there are two main structures:
1. The CC table holds a cost centre (CC) for every distinct source code
line, as found using debug/symbol info. It's arranged by files, then
functions, then lines.
2. The instr-info-table holds certain important pieces of info about
each instruction -- instr_addr, instr_size, data_size, its line-CC.
A pointer to the instr's info is passed to the simulation functions,
which is shorter and quicker than passing the pieces individually.
This is nice and simple. Previously, there was a single data structure
(the BBCC table) which mingled the two purposes (maintaining CCs and
caching instruction info). The CC stuff was done at the level of
instructions, and there were different CC types for different kinds of
instructions, and it was pretty yucky. The two simple data structures
together are much less complex than the original single data structure.
As a result, we have the following general improvements:
- Previously, when code was unloaded all its hit/miss counts were stuck
in a single "discard" CC, and so that code would not be annotated. Now
this code is profiled and annotatable just like all other code.
- Source code size is 27% smaller. cg_main.c is now 1472 lines, down
from 2174. Some (1/3?) of this is from removing the special handling
of JIFZ and general compaction, but most is from the data structure
changes. Happily, a lot of the removed code was nasty.
- Object code size (vgskin_cachegrind.so) is 15% smaller.
- cachegrind.out.pid size is about 90+% smaller(!) Annotation time is
accordingly *much* faster. Doing cost-centres at the level of source
code lines rather than instructions makes a big difference, since
there's typically 2--3 instructions per source line. Even better,
when debug info is not present, entire functions (and even files) get
collapsed into a single "???" CC. (This behaviour is no different
to what happened before, it's just the collapsing used to occur in the
annotation script, rather than within Cachegrind.) This is a huge win
for stripped libraries.
- Memory consumption is about 10--20% less, due to fewer CCs.
- Speed is not much changed -- the changes were not in the intensive
parts, so the only likely change is a cache improvement due to using
less memory. SPEC experiments go -3 -- 10% faster, with the "average"
being unchanged or perhaps a tiny bit faster.
I've tested it reasonably thoroughly, it seems extremely similar result
as the old version, which is highly encouraging. (The results aren't
quite the same, because they are so sensitive to memory layout; even
tiny changes to Cachegrind affect the results slightly.)
Some particularly nice changes that happened:
- No longer need an instrumentation prepass; this is because CCs are not
stored grouped by BB, and they're all the same size now. (This makes
various bits of code much simpler than before).
- The actions to take when a BB translation is discarded (due to the
translation table getting full) are much easier -- just chuck all the
instr-info nodes for the BB, without touching the CCs.
- Dumping the cachegrind.out.pid file at the end is much simpler, just
because the CC data structure is much neater.
Some other, specific changes:
- Removed the JIFZ special handling, which never did what it was
intended to do and just complicated things. This changes the results
for REP-prefixed instructions very slightly, but it's not important.
- Abbreviated the FP/MMX/SSE crap by being slightly laxer with size
checking -- not an issue, since this checking was just a pale
imitation of the stricter checking done in codegen anyway.
- Removed "fi" and "fe" handling from cg_annotate, no longer needed due
to neatening of the CC-table.
- Factorised out some code a bit, so fewer monolithic slabs,
particularly in SK_(instrument)().
- Just improved formatting and compacted code in general in various
places.
- Removed the long-commented-out sanity checking code at the bottom.
Phew.
M +0 -12 cg_annotate.in 1.20
M +547 -1249 cg_main.c 1.69
--- valgrind/cachegrind/cg_annotate.in #1.19:1.20
@@ -409,16 +409,4 @@
$curr_file_ind_CCs = {} unless (defined $curr_file_ind_CCs);
- } elsif (s/^(fi|fe)=(.*)$//) {
- (defined $curr_name) or die("Line $.: Unexpected fi/fe line\n");
- $fn_totals{$curr_name} = $curr_fn_CC;
- $all_ind_CCs{$curr_file} = $curr_file_ind_CCs;
-
- $curr_file = $2;
- $curr_name = "$curr_file:$curr_fn";
- $curr_file_ind_CCs = $all_ind_CCs{$curr_file};
- $curr_file_ind_CCs = {} unless (defined $curr_file_ind_CCs);
- $curr_fn_CC = $fn_totals{$curr_name};
- $curr_fn_CC = [] unless (defined $curr_fn_CC);
-
} elsif (s/^\s*$//) {
# blank, do nothing
--- valgrind/cachegrind/cg_main.c #1.68:1.69
@@ -1,6 +1,5 @@
/*--------------------------------------------------------------------*/
-/*--- Cachegrind: cache detection; instrumentation, recording and ---*/
-/*--- results printing. ---*/
+/*--- Cachegrind: every but the simulation itself. ---*/
/*--- cg_main.c ---*/
/*--------------------------------------------------------------------*/
@@ -47,16 +46,8 @@ typedef struct {
/*------------------------------------------------------------*/
-/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */
-#define MAX_x86_INSTR_SIZE 16
-
+#define MAX_x86_INSTR_SIZE 16 // According to ia32 sw dev manual vol 2
#define MIN_LINE_SIZE 16
-
-/* Size of various buffers used for storing strings */
-#define FILENAME_LEN 256
-#define FN_NAME_LEN 256
-#define BUF_LEN 512
-#define COMMIFY_BUF_LEN 128
-#define RESULTS_BUF_LEN 128
-#define LINE_BUF_LEN 64
+#define FILE_LEN 256
+#define FN_LEN 256
/*------------------------------------------------------------*/
@@ -66,5 +57,5 @@ typedef struct {
typedef
enum {
- VgpGetBBCC = VgpFini+1,
+ VgpGetLineCC = VgpFini+1,
VgpCacheSimulate,
VgpCacheResults
@@ -73,20 +64,5 @@ typedef
/*------------------------------------------------------------*/
-/*--- Output file related stuff ---*/
-/*------------------------------------------------------------*/
-
-static Char* cachegrind_out_file;
-
-static void file_err ( void )
-{
- VG_(message)(Vg_UserMsg,
- "error: can't open cache simulation output file `%s'",
- cachegrind_out_file );
- VG_(message)(Vg_UserMsg,
- " ... so simulation results will be missing.");
-}
-
-/*------------------------------------------------------------*/
-/*--- Cost center types, operations ---*/
+/*--- Types and Data Structures ---*/
/*------------------------------------------------------------*/
@@ -98,206 +74,78 @@ struct _CC {
};
-static __inline__ void initCC(CC* cc) {
- cc->a = 0;
- cc->m1 = 0;
- cc->m2 = 0;
-}
+//------------------------------------------------------------
+// Primary data structure #1: CC table
+// - Holds the per-source-line hit/miss stats, grouped by file/function/line.
+// - hash(file, hash(fn, hash(line+CC)))
+// - Each hash table is separately chained.
+// - The array sizes below work fairly well for Konqueror.
+// - Lookups done by instr_addr, which is converted immediately to a source
+// location.
+// - Traversed for dumping stats at end in file/func/line hierarchy.
-typedef
- enum {
- InstrCC, /* eg. mov %eax, %ebx */
- ReadCC, /* eg. mov (%ecx), %esi */
- WriteCC, /* eg. mov %eax, (%edx) */
- ModCC, /* eg. incl (%eax) (read+write one addr) */
- ReadWriteCC, /* eg. call*l (%esi), pushl 0x4(%ebx), movsw
- (read+write two different addrs) */
- } CC_type;
+#define N_FILE_ENTRIES 251
+#define N_FN_ENTRIES 53
+#define N_LINE_ENTRIES 37
-/* Instruction-level cost-centres.
- *
- * WARNING: the 'tag' field *must* be the first byte of both CC types.
- *
- * This is because we use it to work out what kind of CC we're dealing with.
- */
-typedef
- struct {
- /* word 1 */
- UChar tag;
- UChar instr_size;
- /* 2 bytes padding */
+typedef struct _lineCC lineCC;
+struct _lineCC {
+ Int line;
+ CC Ir;
+ CC Dr;
+ CC Dw;
+ lineCC* next;
+};
- /* words 2+ */
- Addr instr_addr;
- CC I;
- }
- iCC;
+typedef struct _fnCC fnCC;
+struct _fnCC {
+ Char* fn;
+ fnCC* next;
+ lineCC* lines[N_LINE_ENTRIES];
+};
-typedef
- struct _idCC {
- /* word 1 */
- UChar tag;
- UChar instr_size;
- UChar data_size;
- /* 1 byte padding */
+typedef struct _fileCC fileCC;
+struct _fileCC {
+ Char* file;
+ fileCC* next;
+ fnCC* fns[N_FN_ENTRIES];
+};
- /* words 2+ */
- Addr instr_addr;
- CC I;
- CC D;
- }
- idCC;
+// Top level of CC table. Auto-zeroed.
+static fileCC *CC_table[N_FILE_ENTRIES];
-typedef
- struct _iddCC {
- /* word 1 */
- UChar tag;
- UChar instr_size;
- UChar data_size;
- /* 1 byte padding */
+//------------------------------------------------------------
+// Primary data structre #2: Instr-info table
+// - Holds the cached info about each instr that is used for simulation.
+// - table(BB_start_addr, list(instr_info))
+// - For each BB, each instr_info in the list holds info about the
+// instruction (instr_size, instr_addr, etc), plue a pointer to its line
+// CC. This node is what's passed to the simulation function.
+// - When BBs are discarded the relevant list(instr_details) is freed.
- /* words 2+ */
+typedef struct _instr_info instr_info;
+struct _instr_info {
Addr instr_addr;
- CC I;
- CC Da;
- CC Db;
- }
- iddCC;
-
-static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size)
-{
- cc->tag = InstrCC;
- cc->instr_size = instr_size;
- cc->instr_addr = instr_addr;
- initCC(&cc->I);
-}
-
-static void init_idCC(CC_type X_CC, idCC* cc, Addr instr_addr,
- UInt instr_size, UInt data_size)
-{
- cc->tag = X_CC;
- cc->instr_size = instr_size;
- cc->data_size = data_size;
- cc->instr_addr = instr_addr;
- initCC(&cc->I);
- initCC(&cc->D);
-}
-
-static void init_iddCC(iddCC* cc, Addr instr_addr,
- UInt instr_size, UInt data_size)
-{
- cc->tag = ReadWriteCC;
- cc->instr_size = instr_size;
- cc->data_size = data_size;
- cc->instr_addr = instr_addr;
- initCC(&cc->I);
- initCC(&cc->Da);
- initCC(&cc->Db);
-}
-
-#define ADD_CC_TO(CC_type, cc, total) \
- total.a += ((CC_type*)BBCC_ptr)->cc.a; \
- total.m1 += ((CC_type*)BBCC_ptr)->cc.m1; \
- total.m2 += ((CC_type*)BBCC_ptr)->cc.m2;
-
-/* If 1, address of each instruction is printed as a comment after its counts
- * in cachegrind.out */
-#define PRINT_INSTR_ADDRS 0
-
-static __inline__ void sprint_iCC(Char buf[BUF_LEN], iCC* cc)
-{
-#if PRINT_INSTR_ADDRS
- VG_(sprintf)(buf, "%llu %llu %llu # %x\n",
- cc->I.a, cc->I.m1, cc->I.m2, cc->instr_addr);
-#else
- VG_(sprintf)(buf, "%llu %llu %llu\n",
- cc->I.a, cc->I.m1, cc->I.m2);
-#endif
-}
-
-static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], idCC* cc)
-{
-#if PRINT_INSTR_ADDRS
- VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
- cc->I.a, cc->I.m1, cc->I.m2,
- cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
-#else
- VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu\n",
- cc->I.a, cc->I.m1, cc->I.m2,
- cc->D.a, cc->D.m1, cc->D.m2);
-#endif
-}
-
-static __inline__ void sprint_write_CC(Char buf[BUF_LEN], idCC* cc)
-{
-#if PRINT_INSTR_ADDRS
- VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu # %x\n",
- cc->I.a, cc->I.m1, cc->I.m2,
- cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr);
-#else
- VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu\n",
- cc->I.a, cc->I.m1, cc->I.m2,
- cc->D.a, cc->D.m1, cc->D.m2);
-#endif
-}
-
-static __inline__ void sprint_read_write_CC(Char buf[BUF_LEN], iddCC* cc)
-{
-#if PRINT_INSTR_ADDRS
- VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n",
- cc->I.a, cc->I.m1, cc->I.m2,
- cc->Da.a, cc->Da.m1, cc->Da.m2,
- cc->Db.a, cc->Db.m1, cc->Db.m2, cc->instr_addr);
-#else
- VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
- cc->I.a, cc->I.m1, cc->I.m2,
- cc->Da.a, cc->Da.m1, cc->Da.m2,
- cc->Db.a, cc->Db.m1, cc->Db.m2);
-#endif
-}
-
-
-/*------------------------------------------------------------*/
-/*--- BBCC hash table stuff ---*/
-/*------------------------------------------------------------*/
-
-/* The table of BBCCs is of the form hash(filename, hash(fn_name,
- * hash(BBCCs))). Each hash table is separately chained. The sizes below work
- * fairly well for Konqueror. */
-
-#define N_FILE_ENTRIES 251
-#define N_FN_ENTRIES 53
-#define N_BBCC_ENTRIES 37
-
-/* The cost centres for a basic block are stored in a contiguous array.
- * They are distinguishable by their tag field. */
-typedef struct _BBCC BBCC;
-struct _BBCC {
- Addr orig_addr;
- UInt array_size; /* byte-size of variable length array */
- BBCC* next;
- Addr array[0]; /* variable length array */
-};
-
-typedef struct _fn_node fn_node;
-struct _fn_node {
- Char* fn_name;
- BBCC* BBCCs[N_BBCC_ENTRIES];
- fn_node* next;
+ UChar instr_size;
+ UChar data_size;
+ struct _lineCC* parent; // parent line-CC
};
-typedef struct _file_node file_node;
-struct _file_node {
- Char* filename;
- fn_node* fns[N_FN_ENTRIES];
- file_node* next;
+typedef struct _BB_info BB_info;
+struct _BB_info {
+ BB_info* next; // next field
+ Addr BB_addr; // key
+ Int n_instrs;
+ instr_info instrs[0];
};
-/* BBCC_table structure: list(filename, list(fn_name, list(BBCC))) */
-static file_node *BBCC_table[N_FILE_ENTRIES];
+VgHashTable instr_info_table; // hash(Addr, BB_info)
+//------------------------------------------------------------
+// Stats
static Int distinct_files = 0;
static Int distinct_fns = 0;
-
+static Int distinct_lines = 0;
static Int distinct_instrs = 0;
+
static Int full_debug_BBs = 0;
static Int file_line_debug_BBs = 0;
@@ -302,547 +150,395 @@ static Int distinct_instrs = 0;
static Int full_debug_BBs = 0;
static Int file_line_debug_BBs = 0;
-static Int fn_name_debug_BBs = 0;
+static Int fn_debug_BBs = 0;
static Int no_debug_BBs = 0;
static Int BB_retranslations = 0;
-static CC Ir_discards;
-static CC Dr_discards;
-static CC Dw_discards;
-
-static void init_BBCC_table()
-{
- Int i;
- for (i = 0; i < N_FILE_ENTRIES; i++)
- BBCC_table[i] = NULL;
-}
+/*------------------------------------------------------------*/
+/*--- CC table operations ---*/
+/*------------------------------------------------------------*/
-static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN],
- Char fn_name[FN_NAME_LEN], Int* line_num)
+static void get_debug_info(Addr instr_addr, Char file[FILE_LEN],
+ Char fn[FN_LEN], Int* line)
{
- Bool found1, found2;
-
- found1 = VG_(get_filename_linenum)(instr_addr, filename,
- FILENAME_LEN, line_num);
- found2 = VG_(get_fnname)(instr_addr, fn_name, FN_NAME_LEN);
-
- if (!found1 && !found2) {
- no_debug_BBs++;
- VG_(strcpy)(filename, "???");
- VG_(strcpy)(fn_name, "???");
- *line_num = 0;
-
- } else if ( found1 && found2) {
- full_debug_BBs++;
-
- } else if ( found1 && !found2) {
- file_line_debug_BBs++;
- VG_(strcpy)(fn_name, "???");
+ Bool found_file_line = VG_(get_filename_linenum)(instr_addr, file,
+ FILE_LEN, line);
+ Bool found_fn = VG_(get_fnname)(instr_addr, fn, FN_LEN);
- } else /*(!found1 && found2)*/ {
- fn_name_debug_BBs++;
- VG_(strcpy)(filename, "???");
- *line_num = 0;
+ if (!found_file_line) {
+ VG_(strcpy)(file, "???");
+ *line = 0;
+ }
+ if (!found_fn) {
+ VG_(strcpy)(fn, "???");
+ }
+ if (found_file_line) {
+ if (found_fn) full_debug_BBs++;
+ else file_line_debug_BBs++;
+ } else {
+ if (found_fn) fn_debug_BBs++;
+ else no_debug_BBs++;
}
}
-/* Forward declaration. */
-static Int compute_BBCC_array_size(UCodeBlock* cb);
-
-static __inline__
-file_node* new_file_node(Char filename[], file_node* next)
+static UInt hash(Char *s, UInt table_size)
{
- Int i;
- file_node* new = VG_(malloc)(sizeof(file_node));
- new->filename = VG_(strdup)(filename);
- for (i = 0; i < N_FN_ENTRIES; i++) {
- new->fns[i] = NULL;
- }
- new->next = next;
- return new;
+ const int hash_constant = 256;
+ int hash_value = 0;
+ for ( ; *s; s++)
+ hash_value = (hash_constant * hash_value + *s) % table_size;
+ return hash_value;
}
static __inline__
-fn_node* new_fn_node(Char fn_name[], fn_node* next)
+fileCC* new_fileCC(Char filename[], fileCC* next)
{
- Int i;
- fn_node* new = VG_(malloc)(sizeof(fn_node));
- new->fn_name = VG_(strdup)(fn_name);
- for (i = 0; i < N_BBCC_ENTRIES; i++) {
- new->BBCCs[i] = NULL;
- }
- new->next = next;
- return new;
+ // Using calloc() zeroes the fns[] array
+ fileCC* cc = VG_(calloc)(1, sizeof(fileCC));
+ cc->file = VG_(strdup)(filename);
+ cc->next = next;
+ return cc;
}
static __inline__
-BBCC* new_BBCC(Addr bb_orig_addr, UCodeBlock* cb, BBCC* next)
+fnCC* new_fnCC(Char fn[], fnCC* next)
{
- Int BBCC_array_size = compute_BBCC_array_size(cb);
- BBCC* new;
-
- new = (BBCC*)VG_(malloc)(sizeof(BBCC) + BBCC_array_size);
- new->orig_addr = bb_orig_addr;
- new->array_size = BBCC_array_size;
- new->next = next;
-
- return new;
+ // Using calloc() zeroes the lines[] array
+ fnCC* cc = VG_(calloc)(1, sizeof(fnCC));
+ cc->fn = VG_(strdup)(fn);
+ cc->next = next;
+ return cc;
}
-#define HASH_CONSTANT 256
-
-static UInt hash(Char *s, UInt table_size)
+static __inline__
+lineCC* new_lineCC(Int line, lineCC* next)
{
- int hash_value = 0;
- for ( ; *s; s++)
- hash_value = (HASH_CONSTANT * hash_value + *s) % table_size;
- return hash_value;
+ // Using calloc() zeroes the Ir/Dr/Dw CCs and the instrs[] array
+ lineCC* cc = VG_(calloc)(1, sizeof(lineCC));
+ cc->line = line;
+ cc->next = next;
+ return cc;
}
-/* This is a backup for get_BBCC() when removing BBs from the table.
- * Necessary because the debug info can change when code is removed. For
- * example, when inserting, the info might be "myprint.c:myprint()", but
- * upon removal, the info might be "myprint.c:???", which causes the
- * hash-lookup to fail (but it doesn't always happen). So we do a horrible,
- * slow search through all the file nodes and function nodes (but we can do
- * 3rd stage with the fast hash-lookup). */
-static BBCC* get_BBCC_slow_removal(Addr bb_orig_addr)
+static __inline__
+instr_info* new_instr_info(Addr instr_addr, lineCC* parent, instr_info* next)
{
- Int i, j;
- UInt BBCC_hash;
- file_node *curr_file_node;
- fn_node *curr_fn_node;
- BBCC **prev_BBCC_next_ptr, *curr_BBCC;
-
- for (i = 0; i < N_FILE_ENTRIES; i++) {
-
- for (curr_file_node = BBCC_table[i];
- NULL != curr_file_node;
- curr_file_node = curr_file_node->next)
- {
- for (j = 0; j < N_FN_ENTRIES; j++) {
-
- for (curr_fn_node = curr_file_node->fns[j];
- NULL != curr_fn_node;
- curr_fn_node = curr_fn_node->next)
- {
- BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
- prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]);
- curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
-
- while (NULL != curr_BBCC) {
- if (bb_orig_addr == curr_BBCC->orig_addr) {
- // Found it!
- sk_assert(curr_BBCC->array_size > 0
- && curr_BBCC->array_size < 1000000);
- if (VG_(clo_verbosity) > 2) {
- VG_(message)(Vg_DebugMsg, "did slow BB removal");
- }
-
- // Remove curr_BBCC from chain; it will be used and
- // free'd by the caller.
- *prev_BBCC_next_ptr = curr_BBCC->next;
- return curr_BBCC;
- }
-
- prev_BBCC_next_ptr = &(curr_BBCC->next);
- curr_BBCC = curr_BBCC->next;
- }
- }
- }
- }
- }
- VG_(printf)("failing BB address: %p\n", bb_orig_addr);
- VG_(skin_panic)("slow BB removal failed");
+ // Using calloc() zeroes instr_size and data_size
+ instr_info* ii = VG_(calloc)(1, sizeof(instr_info));
+ ii->instr_addr = instr_addr;
+ ii->parent = parent;
+ return ii;
}
-/* Do a three step traversal: by filename, then fn_name, then instr_addr.
- * In all cases prepends new nodes to their chain. Returns a pointer to the
- * cost centre. Also sets BB_seen_before by reference.
- */
-static BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb,
- Bool remove, Bool *BB_seen_before)
+// Do a three step traversal: by file, then fn, then line.
+// In all cases prepends new nodes to their chain. Returns a pointer to the
+// line node, creates a new one if necessary.
+static lineCC* get_lineCC(Addr orig_addr)
{
- file_node *curr_file_node;
- fn_node *curr_fn_node;
- BBCC **prev_BBCC_next_ptr, *curr_BBCC;
- Char filename[FILENAME_LEN], fn_name[FN_NAME_LEN];
- UInt filename_hash, fnname_hash, BBCC_hash;
- Int dummy_line_num;
+ fileCC *curr_fileCC;
+ fnCC *curr_fnCC;
+ lineCC *curr_lineCC;
+ Char file[FILE_LEN], fn[FN_LEN];
+ Int line;
+ UInt file_hash, fn_hash, line_hash;
- get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num);
+ get_debug_info(orig_addr, file, fn, &line);
- VGP_PUSHCC(VgpGetBBCC);
- filename_hash = hash(filename, N_FILE_ENTRIES);
- curr_file_node = BBCC_table[filename_hash];
- while (NULL != curr_file_node &&
- VG_(strcmp)(filename, curr_file_node->filename) != 0) {
- curr_file_node = curr_file_node->next;
+ VGP_PUSHCC(VgpGetLineCC);
+
+ // level 1
+ file_hash = hash(file, N_FILE_ENTRIES);
+ curr_fileCC = CC_table[file_hash];
+ while (NULL != curr_fileCC && !VG_STREQ(file, curr_fileCC->file)) {
+ curr_fileCC = curr_fileCC->next;
}
- if (NULL == curr_file_node) {
- BBCC_table[filename_hash] = curr_file_node =
- new_file_node(filename, BBCC_table[filename_hash]);
+ if (NULL == curr_fileCC) {
+ CC_table[file_hash] = curr_fileCC =
+ new_fileCC(file, CC_table[file_hash]);
distinct_files++;
}
- fnname_hash = hash(fn_name, N_FN_ENTRIES);
- curr_fn_node = curr_file_node->fns[fnname_hash];
- while (NULL != curr_fn_node &&
- VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) {
- curr_fn_node = curr_fn_node->next;
+ // level 2
+ fn_hash = hash(fn, N_FN_ENTRIES);
+ curr_fnCC = curr_fileCC->fns[fn_hash];
+ while (NULL != curr_fnCC && !VG_STREQ(fn, curr_fnCC->fn)) {
+ curr_fnCC = curr_fnCC->next;
}
- if (NULL == curr_fn_node) {
- curr_file_node->fns[fnname_hash] = curr_fn_node =
- new_fn_node(fn_name, curr_file_node->fns[fnname_hash]);
+ if (NULL == curr_fnCC) {
+ curr_fileCC->fns[fn_hash] = curr_fnCC =
+ new_fnCC(fn, curr_fileCC->fns[fn_hash]);
distinct_fns++;
}
- BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES;
- prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]);
- curr_BBCC = curr_fn_node->BBCCs[BBCC_hash];
- while (NULL != curr_BBCC && bb_orig_addr != curr_BBCC->orig_addr) {
- prev_BBCC_next_ptr = &(curr_BBCC->next);
- curr_BBCC = curr_BBCC->next;
- }
- if (curr_BBCC == NULL) {
-
- if (remove == False) {
- curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC =
- new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]);
- *BB_seen_before = False;
- } else {
- // Ok, BB not found when removing: the debug info must have
- // changed. Do a slow removal.
- curr_BBCC = get_BBCC_slow_removal(bb_orig_addr);
- *BB_seen_before = True;
+ // level 3
+ line_hash = line % N_LINE_ENTRIES;
+ curr_lineCC = curr_fnCC->lines[line_hash];
+ while (NULL != curr_lineCC && line != curr_lineCC->line) {
+ curr_lineCC = curr_lineCC->next;
}
-
- } else {
- sk_assert(bb_orig_addr == curr_BBCC->orig_addr);
- sk_assert(curr_BBCC->array_size > 0 && curr_BBCC->array_size < 1000000);
- if (VG_(clo_verbosity) > 2) {
- VG_(message)(Vg_DebugMsg,
- "BB retranslation/invalidation, retrieving from BBCC table");
+ if (NULL == curr_lineCC) {
+ curr_fnCC->lines[line_hash] = curr_lineCC =
+ new_lineCC(line, curr_fnCC->lines[line_hash]);
+ distinct_lines++;
}
- *BB_seen_before = True;
-
- if (True == remove) {
- // Remove curr_BBCC from chain; it will be used and free'd by the
- // caller.
- *prev_BBCC_next_ptr = curr_BBCC->next;
- } else {
- BB_retranslations++;
- }
- }
- VGP_POPCC(VgpGetBBCC);
- return curr_BBCC;
+ VGP_POPCC(VgpGetLineCC);
+ return curr_lineCC;
}
/*------------------------------------------------------------*/
-/*--- Cache simulation instrumentation phase ---*/
+/*--- Cache simulation functions ---*/
/*------------------------------------------------------------*/
-static Int compute_BBCC_array_size(UCodeBlock* cb)
-{
- UInstr* u_in;
- Int i, CC_size, BBCC_size = 0;
- Bool is_LOAD, is_STORE, is_FPU_R, is_FPU_W;
- Int t_read, t_write;
-
- is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
- t_read = t_write = INVALID_TEMPREG;
-
- for (i = 0; i < VG_(get_num_instrs)(cb); i++) {
- u_in = VG_(get_instr)(cb, i);
- switch(u_in->opcode) {
-
- case INCEIP:
- goto case_for_end_of_instr;
-
- case JMP:
- if (u_in->cond != CondAlways) break;
-
- goto case_for_end_of_instr;
-
- case_for_end_of_instr:
-
- if (((is_LOAD && is_STORE) || (is_FPU_R && is_FPU_W)) &&
- t_read != t_write)
- CC_size = sizeof(iddCC);
- else if (is_LOAD || is_STORE || is_FPU_R || is_FPU_W)
- CC_size = sizeof(idCC);
- else
- CC_size = sizeof(iCC);
-
- BBCC_size += CC_size;
- is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False;
- break;
-
- case LOAD:
- /* Two LDBs are possible for a single instruction */
- /* Also, a STORE can come after a LOAD for bts/btr/btc */
- sk_assert(/*!is_LOAD &&*/ /* !is_STORE && */
- !is_FPU_R && !is_FPU_W);
- t_read = u_in->val1;
- is_LOAD = True;
- break;
-
- case STORE:
- /* Multiple STOREs are possible for 'pushal' */
- sk_assert( /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W);
- t_write = u_in->val2;
- is_STORE = True;
- break;
-
- case MMX2_MemRd:
- sk_assert(u_in->size == 4 || u_in->size == 8);
- /* fall through */
- case FPU_R:
- sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
- t_read = u_in->val2;
- is_FPU_R = True;
- break;
-
- case MMX2a1_MemRd:
- sk_assert(u_in->size == 8);
- sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
- t_read = u_in->val3;
- is_FPU_R = True;
- break;
-
- case SSE2a_MemRd:
- case SSE2a1_MemRd:
- sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
- t_read = u_in->val3;
- is_FPU_R = True;
- break;
-
- case SSE3a_MemRd:
- sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
- t_read = u_in->val3;
- is_FPU_R = True;
- break;
-
- case SSE3a1_MemRd:
- sk_assert(u_in->size == 8 || u_in->size == 16);
- t_read = u_in->val3;
- is_FPU_R = True;
- break;
-
- case SSE3ag_MemRd_RegWr:
- sk_assert(u_in->size == 4 || u_in->size == 8);
- t_read = u_in->val1;
- is_FPU_R = True;
- break;
-
- case MMX2_MemWr:
- sk_assert(u_in->size == 4 || u_in->size == 8);
- /* fall through */
- case FPU_W:
- sk_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W);
- t_write = u_in->val2;
- is_FPU_W = True;
- break;
-
- case SSE2a_MemWr:
- sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
- t_write = u_in->val3;
- is_FPU_W = True;
- break;
-
- case SSE3a_MemWr:
- sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
- t_write = u_in->val3;
- is_FPU_W = True;
- break;
-
- default:
- break;
- }
- }
-
- return BBCC_size;
-}
-
static __attribute__ ((regparm (1)))
-void log_1I_0D_cache_access(iCC* cc)
+void log_1I_0D_cache_access(instr_info* n)
{
//VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
- // cc, cc->instr_addr, cc->instr_size)
+ // n, n->instr_addr, n->instr_size)
VGP_PUSHCC(VgpCacheSimulate);
- cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
- cc->I.a++;
+ cachesim_I1_doref(n->instr_addr, n->instr_size,
+ &n->parent->Ir.m1, &n->parent->Ir.m2);
+ n->parent->Ir.a++;
VGP_POPCC(VgpCacheSimulate);
}
-/* Difference between this function and log_1I_0D_cache_access() is that
- this one can be passed any kind of CC, not just an iCC. So we have to
- be careful to make sure we don't make any assumptions about CC layout.
- (As it stands, they would be safe, but this will avoid potential heartache
- if anyone else changes CC layout.)
- Note that we only do the switch for the JIFZ version because if we always
- called this switching version, things would run about 5% slower. */
-static __attribute__ ((regparm (1)))
-void log_1I_0D_cache_access_JIFZ(iCC* cc)
+static __attribute__ ((regparm (2)))
+void log_1I_1Dr_cache_access(instr_info* n, Addr data_addr)
{
- UChar instr_size;
- Addr instr_addr;
- CC* I;
-
- //VG_(printf)("1I_0D: CCaddr=0x%x, iaddr=0x%x, isize=%u\n",
- // cc, cc->instr_addr, cc->instr_size)
+ //VG_(printf)("1I_1Dr: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
+ // n, n->instr_addr, n->instr_size, data_addr, n->data_size)
VGP_PUSHCC(VgpCacheSimulate);
+ cachesim_I1_doref(n->instr_addr, n->instr_size,
+ &n->parent->Ir.m1, &n->parent->Ir.m2);
+ n->parent->Ir.a++;
- switch(cc->tag) {
- case InstrCC:
- instr_size = cc->instr_size;
- instr_addr = cc->instr_addr;
- I = &(cc->I);
- break;
- case ReadCC:
- case WriteCC:
- case ModCC:
- instr_size = ((idCC*)cc)->instr_size;
- instr_addr = ((idCC*)cc)->instr_addr;
- I = &( ((idCC*)cc)->I );
- break;
- case ReadWriteCC:
- instr_size = ((iddCC*)cc)->instr_size;
- instr_addr = ((iddCC*)cc)->instr_addr;
- I = &( ((iddCC*)cc)->I );
- break;
- default:
- VG_(skin_panic)("Unknown CC type in log_1I_0D_cache_access_JIFZ()\n");
- break;
- }
- cachesim_I1_doref(instr_addr, instr_size, &I->m1, &I->m2);
- I->a++;
+ cachesim_D1_doref(data_addr, n->data_size,
+ &n->parent->Dr.m1, &n->parent->Dr.m2);
+ n->parent->Dr.a++;
VGP_POPCC(VgpCacheSimulate);
}
-__attribute__ ((regparm (2))) static
-void log_0I_1D_cache_access(idCC* cc, Addr data_addr)
+static __attribute__ ((regparm (2)))
+void log_1I_1Dw_cache_access(instr_info* n, Addr data_addr)
{
- //VG_(printf)("0I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
- // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
+ //VG_(printf)("1I_1Dw: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
+ // n, n->instr_addr, n->instr_size, data_addr, n->data_size)
VGP_PUSHCC(VgpCacheSimulate);
- cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
- cc->D.a++;
+ cachesim_I1_doref(n->instr_addr, n->instr_size,
+ &n->parent->Ir.m1, &n->parent->Ir.m2);
+ n->parent->Ir.a++;
+
+ cachesim_D1_doref(data_addr, n->data_size,
+ &n->parent->Dw.m1, &n->parent->Dw.m2);
+ n->parent->Dw.a++;
VGP_POPCC(VgpCacheSimulate);
}
-__attribute__ ((regparm (2))) static
-void log_1I_1D_cache_access(idCC* cc, Addr data_addr)
+static __attribute__ ((regparm (3)))
+void log_1I_2D_cache_access(instr_info* n, Addr data_addr1, Addr data_addr2)
{
- //VG_(printf)("1I_1D: CCaddr=%p, iaddr=%p, isize=%u, daddr=%p, dsize=%u\n",
- // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size)
+ //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
+ // n, n->instr_addr, n->instr_size, data_addr1, data_addr2, n->data_size)
VGP_PUSHCC(VgpCacheSimulate);
- cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
- cc->I.a++;
+ cachesim_I1_doref(n->instr_addr, n->instr_size,
+ &n->parent->Ir.m1, &n->parent->Ir.m2);
+ n->parent->Ir.a++;
- cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2);
- cc->D.a++;
+ cachesim_D1_doref(data_addr1, n->data_size,
+ &n->parent->Dr.m1, &n->parent->Dr.m2);
+ n->parent->Dr.a++;
+ cachesim_D1_doref(data_addr2, n->data_size,
+ &n->parent->Dw.m1, &n->parent->Dw.m2);
+ n->parent->Dw.a++;
VGP_POPCC(VgpCacheSimulate);
}
-__attribute__ ((regparm (3))) static
-void log_0I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
+/*------------------------------------------------------------*/
+/*--- Instrumentation ---*/
+/*------------------------------------------------------------*/
+
+BB_info* get_BB_info(UCodeBlock* cb_in, Addr orig_addr, Bool* bb_seen_before)
{
- //VG_(printf)("0I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=0x%x, daddr2=%p, size=%u\n",
- // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
- VGP_PUSHCC(VgpCacheSimulate);
- cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
- cc->Da.a++;
- cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
- cc->Db.a++;
- VGP_POPCC(VgpCacheSimulate);
+ Int i, n_instrs;
+ UInstr* u_in;
+ BB_info* bb_info;
+ VgHashNode** dummy;
+
+ // Count number of x86 instrs in BB
+ n_instrs = 1; // start at 1 because last x86 instr has no INCEIP
+ for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
+ u_in = VG_(get_instr)(cb_in, i);
+ if (INCEIP == u_in->opcode) n_instrs++;
+ }
+
+ // Get the BB_info
+ bb_info = (BB_info*)VG_(HT_get_node)(instr_info_table, orig_addr, &dummy);
+ *bb_seen_before = ( NULL == bb_info ? False : True );
+ if (*bb_seen_before) {
+ // BB must have been translated before, but flushed from the TT
+ sk_assert(bb_info->n_instrs == n_instrs );
+ BB_retranslations++;
+ } else {
+ // BB never translated before (at this address, at least; could have
+ // been unloaded and then reloaded elsewhere in memory)
+ bb_info =
+ VG_(calloc)(1, sizeof(BB_info) + n_instrs*sizeof(instr_info));
+ bb_info->BB_addr = orig_addr;
+ bb_info->n_instrs = n_instrs;
+ VG_(HT_add_node)( instr_info_table, (VgHashNode*)bb_info );
+ distinct_instrs++;
+ }
+ return bb_info;
}
-__attribute__ ((regparm (3))) static
-void log_1I_2D_cache_access(iddCC* cc, Addr data_addr1, Addr data_addr2)
+void do_details( instr_info* n, Bool bb_seen_before,
+ Addr instr_addr, Int instr_size, Int data_size )
{
- //VG_(printf)("1I_2D: CCaddr=%p, iaddr=%p, isize=%u, daddr1=%p, daddr2=%p, dsize=%u\n",
- // cc, cc->instr_addr, cc->instr_size, data_addr1, data_addr2, cc->data_size)
- VGP_PUSHCC(VgpCacheSimulate);
- cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2);
- cc->I.a++;
+ lineCC* parent = get_lineCC(instr_addr);
+ if (bb_seen_before) {
+ sk_assert( n->instr_addr == instr_addr );
+ sk_assert( n->instr_size == instr_size );
+ sk_assert( n->data_size == data_size );
+ // Don't assert that (n->parent == parent)... it's conceivable that
+ // the debug info might change; the other asserts should be enough to
+ // detect anything strange.
+ } else {
+ n->instr_addr = instr_addr;
+ n->instr_size = instr_size;
+ n->data_size = data_size;
+ n->parent = parent;
+ }
+}
- cachesim_D1_doref(data_addr1, cc->data_size, &cc->Da.m1, &cc->Da.m2);
- cc->Da.a++;
- cachesim_D1_doref(data_addr2, cc->data_size, &cc->Db.m1, &cc->Db.m2);
- cc->Db.a++;
- VGP_POPCC(VgpCacheSimulate);
+Bool is_valid_data_size(Int data_size)
+{
+ return (4 == data_size || 2 == data_size || 1 == data_size ||
+ 8 == data_size || 10 == data_size || MIN_LINE_SIZE == data_size);
}
-UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
+// Instrumentation for the end of each x86 instruction.
+void end_of_x86_instr(UCodeBlock* cb, instr_info* i_node, Bool bb_seen_before,
+ UInt instr_addr, UInt instr_size, UInt data_size,
+ Int t_read, Int t_read_addr,
+ Int t_write, Int t_write_addr)
{
-/* Use this rather than eg. -1 because it's a UInt. */
-#define INVALID_DATA_SIZE 999999
+ Addr helper;
+ Int argc;
+ Int t_CC_addr,
+ t_data_addr1 = INVALID_TEMPREG,
+ t_data_addr2 = INVALID_TEMPREG;
+
+ sk_assert(instr_size >= 1 &&
+ instr_size <= MAX_x86_INSTR_SIZE);
+
+#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
+#define INV(qqt) (INVALID_TEMPREG == (qqt))
+
+ // Work out what kind of x86 instruction it is
+ if (!IS_(read) && !IS_(write)) {
+ sk_assert( 0 == data_size );
+ sk_assert(INV(t_read) && INV(t_write));
+ helper = (Addr) & log_1I_0D_cache_access;
+ argc = 1;
+ } else if (IS_(read) && !IS_(write)) {
+ sk_assert( is_valid_data_size(data_size) );
+ sk_assert(!INV(t_read) && INV(t_write));
+ helper = (Addr) & log_1I_1Dr_cache_access;
+ argc = 2;
+ t_data_addr1 = t_read_addr;
+
+ } else if (!IS_(read) && IS_(write)) {
+ sk_assert( is_valid_data_size(data_size) );
+ sk_assert(INV(t_read) && !INV(t_write));
+ helper = (Addr) & log_1I_1Dw_cache_access;
+ argc = 2;
+ t_data_addr1 = t_write_addr;
+
+ } else {
+ sk_assert(IS_(read) && IS_(write));
+ sk_assert( is_valid_data_size(data_size) );
+ sk_assert(!INV(t_read) && !INV(t_write));
+ if (t_read == t_write) {
+ helper = (Addr) & log_1I_1Dr_cache_access;
+ argc = 2;
+ t_data_addr1 = t_read_addr;
+ } else {
+ helper = (Addr) & log_1I_2D_cache_access;
+ argc = 3;
+ t_data_addr1 = t_read_addr;
+ t_data_addr2 = t_write_addr;
+ }
+ }
+#undef IS_
+ // Setup 1st arg: CC addr
+ do_details( i_node, bb_seen_before, instr_addr, instr_size, data_size );
+ t_CC_addr = newTemp(cb);
+ uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
+ uLiteral(cb, (Addr)i_node);
+
+ // Call the helper
+ if (1 == argc)
+ uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
+ else if (2 == argc)
+ uInstr2(cb, CCALL, 0, TempReg, t_CC_addr,
+ TempReg, t_data_addr1);
+ else if (3 == argc)
+ uInstr3(cb, CCALL, 0, TempReg, t_CC_addr,
+ TempReg, t_data_addr1,
+ TempReg, t_data_addr2);
+ else
+ VG_(skin_panic)("argc... not 1 or 2 or 3?");
+
+ uCCall(cb, helper, argc, argc, False);
+}
+
+UCodeBlock* SK_(instrument)(UCodeBlock* cb_in, Addr orig_addr)
+{
UCodeBlock* cb;
- Int i;
UInstr* u_in;
- BBCC* BBCC_node;
- Int t_CC_addr, t_read_addr, t_write_addr, t_data_addr1,
- t_data_addr2, t_read, t_write;
- Int CC_size = -1; /* Shut gcc warnings up */
+ Int i, bb_info_i;
+ BB_info* bb_info;
+ Bool bb_seen_before = False;
+ Int t_read_addr, t_write_addr, t_read, t_write;
Addr x86_instr_addr = orig_addr;
- UInt x86_instr_size, data_size = INVALID_DATA_SIZE;
- Addr helper;
- Int argc;
- Bool BB_seen_before = False;
- Bool instrumented_Jcond = False;
- Bool has_rep_prefix = False;
- Addr BBCC_ptr0, BBCC_ptr;
+ UInt x86_instr_size, data_size = 0;
+ Bool instrumented_Jcc = False;
- /* Get BBCC (creating if necessary -- requires a counting pass over the BB
- * if it's the first time it's been seen), and point to start of the
- * BBCC array. */
- BBCC_node = get_BBCC(orig_addr, cb_in, /*remove=*/False, &BB_seen_before);
- BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array);
+ bb_info = get_BB_info(cb_in, orig_addr, &bb_seen_before);
+ bb_info_i = 0;
cb = VG_(setup_UCodeBlock)(cb_in);
- t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 = t_data_addr2 =
- t_read = t_write = INVALID_TEMPREG;
+ t_read_addr = t_write_addr = t_read = t_write = INVALID_TEMPREG;
for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
u_in = VG_(get_instr)(cb_in, i);
- /* What this is all about: we want to instrument each x86 instruction
- * translation. The end of these are marked in three ways. The three
- * ways, and the way we instrument them, are as follows:
- *
- * 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
- * 2. UCode, Juncond --> UCode, Instrumentation, Juncond
- * 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond
- *
- * The last UInstr in a basic block is always a Juncond. Jconds,
- * when they appear, are always second last. We check this with
- * various assertions.
- *
- * We must put the instrumentation before any jumps so that it is always
- * executed. We don't have to put the instrumentation before the INCEIP
- * (it could go after) but we do so for consistency.
- *
- * x86 instruction sizes are obtained from INCEIPs (for case 1) or
- * from .extra4b field of the final JMP (for case 2 & 3).
- *
- * Note that JIFZ is treated differently.
- *
- * The instrumentation is just a call to the appropriate helper function,
- * passing it the address of the instruction's CC.
- */
- if (instrumented_Jcond) sk_assert(u_in->opcode == JMP);
+ // We want to instrument each x86 instruction with a call to the
+ // appropriate simulation function, which depends on whether the
+ // instruction does memory data reads/writes. x86 instructions can
+ // end in three ways, and this is how they are instrumented:
+ //
+ // 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP
+ // 2. UCode, JMP --> UCode, Instrumentation, JMP
+ // 3. UCode, Jcc, JMP --> UCode, Instrumentation, Jcc, JMP
+ //
+ // The last UInstr in a BB is always a JMP. Jccs, when they appear,
+ // are always second last. This is checked with assertions.
+ // Instrumentation must go before any jumps. (JIFZ is the exception;
+ // if a JIFZ succeeds, no simulation is done for the instruction.)
+ //
+ // x86 instruction sizes are obtained from INCEIPs (for case 1) or
+ // from .extra4b field of the final JMP (for case 2 & 3).
+
+ if (instrumented_Jcc) sk_assert(u_in->opcode == JMP);
switch (u_in->opcode) {
- case NOP: case LOCK: case CALLM_E: case CALLM_S:
- break;
- /* For memory-ref instrs, copy the data_addr into a temporary to be
- * passed to the cachesim_* helper at the end of the instruction.
- */
+ // For memory-ref instrs, copy the data_addr into a temporary to be
+ // passed to the cachesim_* helper at the end of the instruction.
case LOAD:
+ case SSE3ag_MemRd_RegWr:
t_read = u_in->val1;
t_read_addr = newTemp(cb);
@@ -852,14 +548,10 @@ UCodeBlock* SK_(instrument)(UCodeBlock*
break;
- case MMX2_MemRd:
- sk_assert(u_in->size == 4 || u_in->size == 8);
- /* fall through */
case FPU_R:
+ case MMX2_MemRd:
t_read = u_in->val2;
t_read_addr = newTemp(cb);
uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr);
- data_size = ( u_in->size <= MIN_LINE_SIZE
- ? u_in->size
- : MIN_LINE_SIZE);
+ data_size = u_in->size;
VG_(copy_UInstr)(cb, u_in);
break;
@@ -867,40 +559,8 @@ UCodeBlock* SK_(instrument)(UCodeBlock*
case MMX2a1_MemRd:
- sk_assert(u_in->size == 8);
- t_read = u_in->val3;
- t_read_addr = newTemp(cb);
- uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
- data_size = ( u_in->size <= MIN_LINE_SIZE
- ? u_in->size
- : MIN_LINE_SIZE);
- VG_(copy_UInstr)(cb, u_in);
- break;
-
case SSE2a_MemRd:
case SSE2a1_MemRd:
- sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
- t_read = u_in->val3;
- t_read_addr = newTemp(cb);
- uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
- /* 512 B data-sized instructions will be done inaccurately
- * but they're very rare and this avoids errors from
- * hitting more than two cache lines in the simulation. */
- data_size = ( u_in->size <= MIN_LINE_SIZE
- ? u_in->size
- : MIN_LINE_SIZE);
- VG_(copy_UInstr)(cb, u_in);
- break;
-
case SSE3a_MemRd:
- sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
- t_read = u_in->val3;
- t_read_addr = newTemp(cb);
- uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
- data_size = u_in->size;
- VG_(copy_UInstr)(cb, u_in);
- break;
-
case SSE3a1_MemRd:
- sk_assert(u_in->size == 8 || u_in->size == 16);
t_read = u_in->val3;
t_read_addr = newTemp(cb);
@@ -910,232 +570,74 @@ UCodeBlock* SK_(instrument)(UCodeBlock*
break;
- case SSE3ag_MemRd_RegWr:
- sk_assert(u_in->size == 4 || u_in->size == 8);
- t_read = u_in->val1;
- t_read_addr = newTemp(cb);
- uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr);
- data_size = u_in->size;
- VG_(copy_UInstr)(cb, u_in);
- break;
-
- /* Note that we must set t_write_addr even for mod instructions;
- * That's how the code above determines whether it does a write.
- * Without it, it would think a mod instruction is a read.
- * As for the MOV, if it's a mod instruction it's redundant, but it's
- * not expensive and mod instructions are rare anyway. */
- case MMX2_MemWr:
- sk_assert(u_in->size == 4 || u_in->size == 8);
- /* fall through */
+ // Note that we must set t_write_addr even for mod instructions;
+ // That's how the code above determines whether it does a write.
+ // Without it, it would think a mod instruction is a read.
+ // As for the MOV, if it's a mod instruction it's redundant, but it's
+ // not expensive and mod instructions are rare anyway. */
case STORE:
case FPU_W:
+ case MMX2_MemWr:
t_write = u_in->val2;
t_write_addr = newTemp(cb);
uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr);
- /* 28 and 108 B data-sized instructions will be done
- * inaccurately but they're very rare and this avoids errors
- * from hitting more than two cache lines in the simulation. */
- data_size = ( u_in->size <= MIN_LINE_SIZE
- ? u_in->size
- : MIN_LINE_SIZE);
+ data_size = u_in->size;
VG_(copy_UInstr)(cb, u_in);
break;
case SSE2a_MemWr:
- sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
- /* fall through */
case SSE3a_MemWr:
- sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
t_write = u_in->val3;
t_write_addr = newTemp(cb);
uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_write_addr);
- /* 512 B data-sized instructions will be done inaccurately
- * but they're very rare and this avoids errors from
- * hitting more than two cache lines in the simulation. */
- data_size = ( u_in->size <= MIN_LINE_SIZE
- ? u_in->size
- : MIN_LINE_SIZE);
- VG_(copy_UInstr)(cb, u_in);
- break;
-
- /* For rep-prefixed instructions, log a single I-cache access
- * before the UCode loop that implements the repeated part, which
- * is where the multiple D-cache accesses are logged. */
- case JIFZ:
- has_rep_prefix = True;
-
- /* Setup 1st and only arg: CC addr */
- t_CC_addr = newTemp(cb);
- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
- uLiteral(cb, BBCC_ptr);
-
- /* Call helper */
- uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
- uCCall(cb, (Addr) & log_1I_0D_cache_access_JIFZ, 1, 1, False);
+ data_size = u_in->size;
VG_(copy_UInstr)(cb, u_in);
break;
-
- /* INCEIP: insert instrumentation */
+ // INCEIP: insert instrumentation
case INCEIP:
x86_instr_size = u_in->val1;
goto instrument_x86_instr;
- /* JMP: insert instrumentation if the first JMP */
+ // JMP: insert instrumentation if the first JMP
case JMP:
- if (instrumented_Jcond) {
+ if (instrumented_Jcc) {
sk_assert(CondAlways == u_in->cond);
sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
VG_(copy_UInstr)(cb, u_in);
- instrumented_Jcond = False; /* reset */
+ instrumented_Jcc = False; // rest
break;
- }
- /* The first JMP... instrument. */
+ } else {
+ // The first JMP... instrument.
if (CondAlways != u_in->cond) {
sk_assert(i+2 == VG_(get_num_instrs)(cb_in));
- instrumented_Jcond = True;
+ instrumented_Jcc = True;
} else {
sk_assert(i+1 == VG_(get_num_instrs)(cb_in));
}
-
- /* Get x86 instr size from final JMP. */
+ // Get x86 instr size from final JMP.
x86_instr_size = VG_(get_last_instr)(cb_in)->extra4b;
-
goto instrument_x86_instr;
-
-
- /* Code executed at the end of each x86 instruction. */
- instrument_x86_instr:
-
- /* Initialise the CC in the BBCC array appropriately if it
- * hasn't been initialised before. Then call appropriate sim
- * function, passing it the CC address. */
- sk_assert(x86_instr_size >= 1 &&
- x86_instr_size <= MAX_x86_INSTR_SIZE);
-
-#define IS_(X) (INVALID_TEMPREG != t_##X##_addr)
-
- if (!IS_(read) && !IS_(write)) {
- sk_assert(INVALID_DATA_SIZE == data_size);
- sk_assert(INVALID_TEMPREG == t_read_addr &&
- INVALID_TEMPREG == t_read &&
- INVALID_TEMPREG == t_write_addr &&
- INVALID_TEMPREG == t_write);
- CC_size = sizeof(iCC);
- if (!BB_seen_before)
- init_iCC((iCC*)BBCC_ptr, x86_instr_addr, x86_instr_size);
- helper = ( has_rep_prefix
- ? (Addr)0 /* no extra log needed */
- : (Addr) & log_1I_0D_cache_access
- );
- argc = 1;
-
- } else {
- sk_assert(4 == data_size || 2 == data_size || 1 == data_size ||
- 8 == data_size || 10 == data_size ||
- MIN_LINE_SIZE == data_size);
-
- if (IS_(read) && !IS_(write)) {
- CC_size = sizeof(idCC);
- /* If it uses 'rep', we've already logged the I-cache
- * access at the JIFZ UInstr (see JIFZ case below) so
- * don't do it here */
- helper = ( has_rep_prefix
- ? (Addr) & log_0I_1D_cache_access
- : (Addr) & log_1I_1D_cache_access
- );
- argc = 2;
- if (!BB_seen_before)
- init_idCC(ReadCC, (idCC*)BBCC_ptr, x86_instr_addr,
- x86_instr_size, data_size);
- sk_assert(INVALID_TEMPREG != t_read_addr &&
- INVALID_TEMPREG != t_read &&
- INVALID_TEMPREG == t_write_addr &&
- INVALID_TEMPREG == t_write);
- t_data_addr1 = t_read_addr;
-
- } else if (!IS_(read) && IS_(write)) {
- CC_size = sizeof(idCC);
- helper = ( has_rep_prefix
- ? (Addr) & log_0I_1D_cache_access
- : (Addr) & log_1I_1D_cache_access
- );
- argc = 2;
- if (!BB_seen_before)
- init_idCC(WriteCC, (idCC*)BBCC_ptr, x86_instr_addr,
- x86_instr_size, data_size);
- sk_assert(INVALID_TEMPREG == t_read_addr &&
- INVALID_TEMPREG == t_read &&
- INVALID_TEMPREG != t_write_addr &&
- INVALID_TEMPREG != t_write);
- t_data_addr1 = t_write_addr;
-
- } else {
- sk_assert(IS_(read) && IS_(write));
- sk_assert(INVALID_TEMPREG != t_read_addr &&
- INVALID_TEMPREG != t_read &&
- INVALID_TEMPREG != t_write_addr &&
- INVALID_TEMPREG != t_write);
- if (t_read == t_write) {
- CC_size = sizeof(idCC);
- helper = ( has_rep_prefix
- ? (Addr) & log_0I_1D_cache_access
- : (Addr) & log_1I_1D_cache_access
- );
- argc = 2;
- if (!BB_seen_before)
- init_idCC(ModCC, (idCC*)BBCC_ptr, x86_instr_addr,
- x86_instr_size, data_size);
- t_data_addr1 = t_read_addr;
- } else {
- CC_size = sizeof(iddCC);
- helper = ( has_rep_prefix
- ? (Addr) & log_0I_2D_cache_access
- : (Addr) & log_1I_2D_cache_access
- );
- argc = 3;
- if (!BB_seen_before)
- init_iddCC((iddCC*)BBCC_ptr, x86_instr_addr,
- x86_instr_size, data_size);
- t_data_addr1 = t_read_addr;
- t_data_addr2 = t_write_addr;
- }
- }
-#undef IS_
}
- /* Call the helper, if necessary */
- if ((Addr)0 != helper) {
-
- /* Setup 1st arg: CC addr */
- t_CC_addr = newTemp(cb);
- uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr);
- uLiteral(cb, BBCC_ptr);
-
- /* Call the helper */
- if (1 == argc)
- uInstr1(cb, CCALL, 0, TempReg, t_CC_addr);
- else if (2 == argc)
- uInstr2(cb, CCALL, 0, TempReg, t_CC_addr,
- TempReg, t_data_addr1);
- else if (3 == argc)
- uInstr3(cb, CCALL, 0, TempReg, t_CC_addr,
- TempReg, t_data_addr1,
- TempReg, t_data_addr2);
- else
- VG_(skin_panic)("argc... not 1 or 2 or 3?");
+ // Code executed at the end of each x86 instruction.
+ instrument_x86_instr:
+ // Large (eg. 28B, 108B, 512B) data-sized instructions will be
+ // done inaccurately but they're very rare and this avoids
+ // errors from hitting more than two cache lines in the
+ // simulation.
+ if (data_size > MIN_LINE_SIZE) data_size = MIN_LINE_SIZE;
- uCCall(cb, helper, argc, argc, False);
- }
+ end_of_x86_instr(cb, &bb_info->instrs[ bb_info_i ], bb_seen_before,
+ x86_instr_addr, x86_instr_size, data_size,
+ t_read, t_read_addr, t_write, t_write_addr);
- /* Copy original UInstr (INCEIP or JMP) */
+ // Copy original UInstr (INCEIP or JMP)
VG_(copy_UInstr)(cb, u_in);
- /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */
- BBCC_ptr += CC_size;
+ // Update loop state for next x86 instr
+ bb_info_i++;
x86_instr_addr += x86_instr_size;
- t_CC_addr = t_read_addr = t_write_addr = t_data_addr1 =
- t_data_addr2 = t_read = t_write = INVALID_TEMPREG;
- data_size...
[truncated message content] |
|
From: Nicholas N. <nj...@ca...> - 2004-07-06 22:46:49
|
CVS commit by nethercote:
minor
M +2 -0 cg_main.c 1.70
--- valgrind/cachegrind/cg_main.c #1.69:1.70
@@ -472,4 +472,6 @@ void end_of_x86_instr(UCodeBlock* cb, in
}
#undef IS_
+#undef INV
+
// Setup 1st arg: CC addr
do_details( i_node, bb_seen_before, instr_addr, instr_size, data_size );
|
|
From: Tom H. <th...@cy...> - 2004-07-07 07:16:24
|
CVS commit by thughes:
Only print a warning about not having identified the cache if we have
actually failed to recognise it...
M +0 -1 cg_main.c 1.71
--- valgrind/cachegrind/cg_main.c #1.70:1.71
@@ -1042,5 +1042,4 @@ void get_caches(cache_t* I1c, cache_t* D
// Then replace with any info we can get from CPUID.
res = get_caches_from_CPUID(I1c, D1c, L2c);
- res = -1;
// Then replace with any defined on the command line.
|
|
From: Nicholas N. <nj...@ca...> - 2004-07-07 13:22:57
|
On Wed, 7 Jul 2004, Tom Hughes wrote: > Only print a warning about not having identified the cache if we have > actually failed to recognise it... > > --- valgrind/cachegrind/cg_main.c #1.70:1.71 > @@ -1042,5 +1042,4 @@ void get_caches(cache_t* I1c, cache_t* D > // Then replace with any info we can get from CPUID. > res = get_caches_from_CPUID(I1c, D1c, L2c); > - res = -1; whoops... thanks N |
|
From: Nicholas N. <nj...@ca...> - 2004-07-20 13:29:08
|
CVS commit by nethercote: comment typo M +1 -1 cg_main.c 1.73 --- valgrind/cachegrind/cg_main.c #1.72:1.73 @@ -119,5 +119,5 @@ static fileCC *CC_table[N_FILE_ENTRIES]; // - table(BB_start_addr, list(instr_info)) // - For each BB, each instr_info in the list holds info about the -// instruction (instr_size, instr_addr, etc), plue a pointer to its line +// instruction (instr_size, instr_addr, etc), plus a pointer to its line // CC. This node is what's passed to the simulation function. // - When BBs are discarded the relevant list(instr_details) is freed. |
|
From: Nicholas N. <nj...@ca...> - 2004-07-21 08:49:11
|
CVS commit by nethercote:
Type name wibble
M +4 -4 cg_main.c 1.74
--- valgrind/cachegrind/cg_main.c #1.73:1.74
@@ -128,5 +128,5 @@ struct _instr_info {
UChar instr_size;
UChar data_size;
- struct _lineCC* parent; // parent line-CC
+ lineCC* parent; // parent line-CC
};
|
|
From: Nicholas N. <nj...@ca...> - 2004-08-07 15:55:21
|
CVS commit by nethercote:
Add some missing 'static' annotations. Thanks to Josef W for spotting them.
M +4 -1 cg_main.c 1.75
--- valgrind/cachegrind/cg_main.c #1.74:1.75
@@ -358,4 +358,5 @@ void log_1I_2D_cache_access(instr_info*
/*------------------------------------------------------------*/
+static
BB_info* get_BB_info(UCodeBlock* cb_in, Addr orig_addr, Bool* bb_seen_before)
{
@@ -392,4 +393,5 @@ BB_info* get_BB_info(UCodeBlock* cb_in,
}
+static
void do_details( instr_info* n, Bool bb_seen_before,
Addr instr_addr, Int instr_size, Int data_size )
@@ -411,5 +413,5 @@ void do_details( instr_info* n, Bool bb_
}
-Bool is_valid_data_size(Int data_size)
+static Bool is_valid_data_size(Int data_size)
{
return (4 == data_size || 2 == data_size || 1 == data_size ||
@@ -418,4 +420,5 @@ Bool is_valid_data_size(Int data_size)
// Instrumentation for the end of each x86 instruction.
+static
void end_of_x86_instr(UCodeBlock* cb, instr_info* i_node, Bool bb_seen_before,
UInt instr_addr, UInt instr_size, UInt data_size,
|
|
From: Tom H. <th...@cy...> - 2004-10-06 13:50:53
|
CVS commit by thughes:
Cope with arguments over 510 characters in length when writing the
cachegrind output file.
CCMAIL: 908...@bu...
M +2 -2 cg_main.c 1.80
--- valgrind/cachegrind/cg_main.c #1.79:1.80
@@ -830,6 +830,6 @@ static void fprint_CC_table_and_calc_tot
VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
for (i = 0; i < VG_(client_argc); i++) {
- VG_(sprintf)(buf, " %s", VG_(client_argv)[i]);
- VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
+ VG_(write)(fd, "", 1);
+ VG_(write)(fd, VG_(client_argv)[i], VG_(strlen)(VG_(client_argv)[i]));
}
// "events:" line
|
|
From: Tom H. <th...@cy...> - 2004-10-06 13:51:27
|
CVS commit by thughes:
Put back missing space...
M +1 -1 cg_main.c 1.81
--- valgrind/cachegrind/cg_main.c #1.80:1.81
@@ -830,5 +830,5 @@ static void fprint_CC_table_and_calc_tot
VG_(write)(fd, (void*)buf, VG_(strlen)(buf));
for (i = 0; i < VG_(client_argc); i++) {
- VG_(write)(fd, "", 1);
+ VG_(write)(fd, " ", 1);
VG_(write)(fd, VG_(client_argv)[i], VG_(strlen)(VG_(client_argv)[i]));
}
|
|
From: Dirk M. <mu...@kd...> - 2005-02-27 00:21:55
|
CVS commit by mueller:
fix compile (gcc 4.0)
M +1 -1 cg_main.c 1.84
--- valgrind/cachegrind/cg_main.c #1.83:1.84
@@ -660,5 +660,5 @@ UCodeBlock* SK_(instrument)(UCodeBlock*
/*------------------------------------------------------------*/
-#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 })
+#define UNDEFINED_CACHE { -1, -1, -1 }
static cache_t clo_I1_cache = UNDEFINED_CACHE;
|