From: Andi K. <an...@fi...> - 2009-04-27 09:07:22
|
Make error handling in oprofile make check more gentle Output more errors in make check before exiting. I found this useful while debugging large event files. Signed-off-by: Andi Kleen <ak...@li...> --- libop/op_events.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) Index: oprofile/libop/op_events.c =================================================================== --- oprofile.orig/libop/op_events.c 2009-04-01 22:57:37.000000000 +0200 +++ oprofile/libop/op_events.c 2009-04-27 10:34:51.000000000 +0200 @@ -403,20 +403,21 @@ /* usefull for make check */ -static void check_unit_mask(struct op_unit_mask const * um, +static int check_unit_mask(struct op_unit_mask const * um, char const * cpu_name) { u32 i; + int err = 0; if (!um->used) { fprintf(stderr, "um %s is not used\n", um->name); - exit(EXIT_FAILURE); + err = EXIT_FAILURE; } if (um->unit_type_mask == utm_mandatory && um->num != 1) { fprintf(stderr, "mandatory um %s doesn't contain exactly one " "entry (%s)\n", um->name, cpu_name); - exit(EXIT_FAILURE); + err = EXIT_FAILURE; } else if (um->unit_type_mask == utm_bitmask) { u32 default_mask = um->default_mask; for (i = 0; i < um->num; ++i) @@ -425,7 +426,7 @@ if (default_mask) { fprintf(stderr, "um %s default mask is not valid " "(%s)\n", um->name, cpu_name); - exit(EXIT_FAILURE); + err = EXIT_FAILURE; } } else { for (i = 0; i < um->num; ++i) { @@ -436,9 +437,10 @@ if (i == um->num) { fprintf(stderr, "exclusive um %s default value is not " "valid (%s)\n", um->name, cpu_name); - exit(EXIT_FAILURE); + err = EXIT_FAILURE; } } + return err; } static void arch_filter_events(op_cpu cpu_type) |
From: Andi K. <an...@fi...> - 2009-04-27 09:43:42
|
[resent without typo in emails] Make error handling in make check more gentle Output more errors in make check before exiting. I found this useful while debugging large event files. Signed-off-by: Andi Kleen <ak...@li...> --- libop/op_events.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) Index: oprofile/libop/op_events.c =================================================================== --- oprofile.orig/libop/op_events.c 2009-04-01 22:57:37.000000000 +0200 +++ oprofile/libop/op_events.c 2009-04-27 10:34:51.000000000 +0200 @@ -403,20 +403,21 @@ /* usefull for make check */ -static void check_unit_mask(struct op_unit_mask const * um, +static int check_unit_mask(struct op_unit_mask const * um, char const * cpu_name) { u32 i; + int err = 0; if (!um->used) { fprintf(stderr, "um %s is not used\n", um->name); - exit(EXIT_FAILURE); + err = EXIT_FAILURE; } if (um->unit_type_mask == utm_mandatory && um->num != 1) { fprintf(stderr, "mandatory um %s doesn't contain exactly one " "entry (%s)\n", um->name, cpu_name); - exit(EXIT_FAILURE); + err = EXIT_FAILURE; } else if (um->unit_type_mask == utm_bitmask) { u32 default_mask = um->default_mask; for (i = 0; i < um->num; ++i) @@ -425,7 +426,7 @@ if (default_mask) { fprintf(stderr, "um %s default mask is not valid " "(%s)\n", um->name, cpu_name); - exit(EXIT_FAILURE); + err = EXIT_FAILURE; } } else { for (i = 0; i < um->num; ++i) { @@ -436,9 +437,10 @@ if (i == um->num) { fprintf(stderr, "exclusive um %s default value is not " "valid (%s)\n", um->name, cpu_name); - exit(EXIT_FAILURE); + err = EXIT_FAILURE; } } + return err; } static void arch_filter_events(op_cpu cpu_type) |
From: Andi K. <an...@fi...> - 2009-04-27 15:36:28
|
Output more errors in make check before exitting. I found this useful while debugging large event files. Signed-off-by: Andi Kleen <ak...@li...> --- libop/op_events.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) Index: oprofile/libop/op_events.c =================================================================== --- oprofile.orig/libop/op_events.c 2009-04-01 22:57:37.000000000 +0200 +++ oprofile/libop/op_events.c 2009-04-27 10:34:51.000000000 +0200 @@ -403,20 +403,21 @@ /* usefull for make check */ -static void check_unit_mask(struct op_unit_mask const * um, +static int check_unit_mask(struct op_unit_mask const * um, char const * cpu_name) { u32 i; + int err = 0; if (!um->used) { fprintf(stderr, "um %s is not used\n", um->name); - exit(EXIT_FAILURE); + err = EXIT_FAILURE; } if (um->unit_type_mask == utm_mandatory && um->num != 1) { fprintf(stderr, "mandatory um %s doesn't contain exactly one " "entry (%s)\n", um->name, cpu_name); - exit(EXIT_FAILURE); + err = EXIT_FAILURE; } else if (um->unit_type_mask == utm_bitmask) { u32 default_mask = um->default_mask; for (i = 0; i < um->num; ++i) @@ -425,7 +426,7 @@ if (default_mask) { fprintf(stderr, "um %s default mask is not valid " "(%s)\n", um->name, cpu_name); - exit(EXIT_FAILURE); + err = EXIT_FAILURE; } } else { for (i = 0; i < um->num; ++i) { @@ -436,9 +437,10 @@ if (i == um->num) { fprintf(stderr, "exclusive um %s default value is not " "valid (%s)\n", um->name, cpu_name); - exit(EXIT_FAILURE); + err = EXIT_FAILURE; } } + return err; } static void arch_filter_events(op_cpu cpu_type) |
From: Andi K. <an...@fi...> - 2009-04-27 15:36:36
|
Nehalem reports one counter in the arch perfmon bitmaps as supported which it actually doesn't support. This is kind of obsolete with the full Nehalem support, but let's add it anyways. Signed-off-by: Andi Kleen <ak...@li...> --- libop/op_hw_specific.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) Index: oprofile/libop/op_hw_specific.h =================================================================== --- oprofile.orig/libop/op_hw_specific.h 2008-08-28 23:57:46.000000000 +0200 +++ oprofile/libop/op_hw_specific.h 2009-04-27 12:01:28.000000000 +0200 @@ -11,11 +11,54 @@ #define num_to_mask(x) ((1U << (x)) - 1) +static inline int cpuid_vendor(char *vnd) +{ + union { + struct { + unsigned b,d,c; + }; + char v[12]; + } v; + unsigned eax; + asm("cpuid" : "=a" (eax), "=b" (v.b), "=c" (v.c), "=d" (v.d) : "0" (0)); + return !strncmp(v.v, vnd, 12); +} + +/* Work around Nehalem spec update AAJ79: CPUID incorrectly indicates + unhalted reference cycle architectural event is supported. We assume + steppings after C0 report correct data in CPUID. */ +static inline void workaround_nehalem_aaj79(unsigned *ebx) +{ + union { + unsigned eax; + struct { + unsigned stepping : 4; + unsigned model : 4; + unsigned family : 4; + unsigned type : 2; + unsigned res : 2; + unsigned ext_model : 4; + unsigned ext_family : 8; + unsigned res2 : 4; + }; + } v; + unsigned model; + + if (!cpuid_vendor("GenuineIntel")) + return; + asm("cpuid" : "=a" (v.eax) : "0" (1) : "ecx","ebx","edx"); + model = (v.ext_model << 4) + v.model; + if (v.family != 6 || model != 26 || v.stepping > 4) + return; + *ebx |= (1 << 2); /* disable unsupported event */ +} + static inline unsigned arch_get_filter(op_cpu cpu_type) { if (cpu_type == CPU_ARCH_PERFMON) { unsigned ebx, eax; asm("cpuid" : "=a" (eax), "=b" (ebx) : "0" (0xa) : "ecx","edx"); + workaround_nehalem_aaj79(&ebx); return ebx & num_to_mask(eax >> 24); } return 0; |
From: Suravee S. <sur...@am...> - 2009-04-29 18:42:12
|
Andi, This patch looks fine. Suravee Andi Kleen wrote: > > Nehalem reports one counter in the arch perfmon bitmaps as supported > which it actually doesn't support. > > This is kind of obsolete with the full Nehalem support, but let's > add it anyways. > > Signed-off-by: Andi Kleen <ak...@li...> > > --- > libop/op_hw_specific.h | 43 +++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 43 insertions(+) > > Index: oprofile/libop/op_hw_specific.h > =================================================================== > --- oprofile.orig/libop/op_hw_specific.h 2008-08-28 > 23:57:46.000000000 +0200 > +++ oprofile/libop/op_hw_specific.h 2009-04-27 12:01:28.000000000 +0200 > @@ -11,11 +11,54 @@ > > #define num_to_mask(x) ((1U << (x)) - 1) > > +static inline int cpuid_vendor(char *vnd) > +{ > + union { > + struct { > + unsigned b,d,c; > + }; > + char v[12]; > + } v; > + unsigned eax; > + asm("cpuid" : "=a" (eax), "=b" (v.b), "=c" (v.c), "=d" (v.d) : > "0" (0)); > + return !strncmp(v.v, vnd, 12); > +} > + > +/* Work around Nehalem spec update AAJ79: CPUID incorrectly indicates > + unhalted reference cycle architectural event is supported. We assume > + steppings after C0 report correct data in CPUID. */ > +static inline void workaround_nehalem_aaj79(unsigned *ebx) > +{ > + union { > + unsigned eax; > + struct { > + unsigned stepping : 4; > + unsigned model : 4; > + unsigned family : 4; > + unsigned type : 2; > + unsigned res : 2; > + unsigned ext_model : 4; > + unsigned ext_family : 8; > + unsigned res2 : 4; > + }; > + } v; > + unsigned model; > + > + if (!cpuid_vendor("GenuineIntel")) > + return; > + asm("cpuid" : "=a" (v.eax) : "0" (1) : "ecx","ebx","edx"); > + model = (v.ext_model << 4) + v.model; > + if (v.family != 6 || model != 26 || v.stepping > 4) > + return; > + *ebx |= (1 << 2); /* disable unsupported event */ > +} > + > static inline unsigned arch_get_filter(op_cpu cpu_type) > { > if (cpu_type == CPU_ARCH_PERFMON) { > unsigned ebx, eax; > asm("cpuid" : "=a" (eax), "=b" (ebx) : "0" (0xa) : > "ecx","edx"); > + workaround_nehalem_aaj79(&ebx); > return ebx & num_to_mask(eax >> 24); > } > return 0; > > ------------------------------------------------------------------------------ > Crystal Reports - New Free Runtime and 30 Day Trial > Check out the new simplified licensign option that enables unlimited > royalty-free distribution of the report engine for externally facing > server and web deployment. > http://p.sf.net/sfu/businessobjects > _______________________________________________ > oprofile-list mailing list > opr...@li... > https://lists.sourceforge.net/lists/listinfo/oprofile-list > |
From: Andi K. <an...@fi...> - 2009-04-27 15:36:38
|
Using the earlier added event files. That's a single patch because they all touch the same files. I also fixed the Intel manual reference in ophelp.c Signed-off-by: Andi Kleen <ak...@li...> --- events/Makefile.am | 3 +++ libop/op_cpu_type.c | 4 +++- libop/op_cpu_type.h | 2 ++ libop/op_events.c | 2 ++ utils/ophelp.c | 4 +++- 5 files changed, 13 insertions(+), 2 deletions(-) Index: oprofile/events/Makefile.am =================================================================== --- oprofile.orig/events/Makefile.am 2009-04-27 14:44:46.000000000 +0200 +++ oprofile/events/Makefile.am 2009-04-27 14:55:13.000000000 +0200 @@ -14,6 +14,9 @@ i386/p6_mobile/events i386/p6_mobile/unit_masks \ i386/core/events i386/core/unit_masks \ i386/arch_perfmon/events i386/arch_perfmon/unit_masks \ + i386/atom/events i386/atom/unit_masks \ + i386/core_i7/events i386/core_i7/unit_masks \ + i386/nehalem/events i386/nehalem/unit_masks \ ia64/ia64/events ia64/ia64/unit_masks \ ia64/itanium2/events ia64/itanium2/unit_masks \ ia64/itanium/events ia64/itanium/unit_masks \ Index: oprofile/libop/op_cpu_type.c =================================================================== --- oprofile.orig/libop/op_cpu_type.c 2009-04-27 14:44:46.000000000 +0200 +++ oprofile/libop/op_cpu_type.c 2009-04-27 14:57:09.000000000 +0200 @@ -78,6 +78,8 @@ { "ARM V7 PMNC", "arm/armv7", CPU_ARM_V7, 5 }, { "Intel Architectural Perfmon", "i386/arch_perfmon", CPU_ARCH_PERFMON, 0}, { "AMD64 family11h", "x86-64/family11h", CPU_FAMILY11H, 4 }, + { "Intel Core/i7", "i386/core_i7", CPU_CORE_I7, 4 }, + { "Intel Atom", "i386/atom", CPU_ATOM, 2 }, }; static size_t const nr_cpu_descrs = sizeof(cpu_descrs) / sizeof(struct cpu_descr); Index: oprofile/libop/op_cpu_type.h =================================================================== --- oprofile.orig/libop/op_cpu_type.h 2009-04-27 14:44:46.000000000 +0200 +++ oprofile/libop/op_cpu_type.h 2009-04-27 14:55:13.000000000 +0200 @@ -75,6 +75,8 @@ CPU_ARM_V7, /**< ARM V7 */ CPU_ARCH_PERFMON, /**< Intel architectural perfmon */ CPU_FAMILY11H, /**< AMD family 11h */ + CPU_CORE_I7, /* Intel Core i7 */ + CPU_ATOM, /* First generation Intel Atom */ MAX_CPU_TYPE } op_cpu; Index: oprofile/libop/op_events.c =================================================================== --- oprofile.orig/libop/op_events.c 2009-04-27 14:47:28.000000000 +0200 +++ oprofile/libop/op_events.c 2009-04-27 14:55:13.000000000 +0200 @@ -942,6 +942,8 @@ case CPU_FAMILY10: case CPU_ARCH_PERFMON: case CPU_FAMILY11H: + case CPU_ATOM: + case CPU_CORE_I7: descr->name = "CPU_CLK_UNHALTED"; break; Index: oprofile/utils/ophelp.c =================================================================== --- oprofile.orig/utils/ophelp.c 2009-04-27 14:44:46.000000000 +0200 +++ oprofile/utils/ophelp.c 2009-04-27 14:55:13.000000000 +0200 @@ -472,8 +472,10 @@ case CPU_P4_HT2: case CPU_CORE: case CPU_CORE_2: + case CPU_CORE_I7: + case CPU_ATOM: event_doc = - "See Intel Architecture Developer's Manual Volume 3, Appendix A and\n" + "See Intel Architecture Developer's Manual Volume 3B, Appendix A and\n" "Intel Architecture Optimization Reference Manual (730795-001)\n\n"; break; |
From: Suravee S. <sur...@am...> - 2009-04-29 18:37:44
|
Andi, Besides getting the latest code from CVS (I think Maynard has recently commited the POWER7 support patches.), I think this patch looks fine. Suravee Andi Kleen wrote: > > Using the earlier added event files. > > That's a single patch because they all touch the same files. > > I also fixed the Intel manual reference in ophelp.c > > Signed-off-by: Andi Kleen <ak...@li...> > > --- > events/Makefile.am | 3 +++ > libop/op_cpu_type.c | 4 +++- > libop/op_cpu_type.h | 2 ++ > libop/op_events.c | 2 ++ > utils/ophelp.c | 4 +++- > 5 files changed, 13 insertions(+), 2 deletions(-) > > Index: oprofile/events/Makefile.am > =================================================================== > --- oprofile.orig/events/Makefile.am 2009-04-27 14:44:46.000000000 +0200 > +++ oprofile/events/Makefile.am 2009-04-27 14:55:13.000000000 +0200 > @@ -14,6 +14,9 @@ > i386/p6_mobile/events i386/p6_mobile/unit_masks \ > i386/core/events i386/core/unit_masks \ > i386/arch_perfmon/events i386/arch_perfmon/unit_masks \ > + i386/atom/events i386/atom/unit_masks \ > + i386/core_i7/events i386/core_i7/unit_masks \ > + i386/nehalem/events i386/nehalem/unit_masks \ > ia64/ia64/events ia64/ia64/unit_masks \ > ia64/itanium2/events ia64/itanium2/unit_masks \ > ia64/itanium/events ia64/itanium/unit_masks \ > Index: oprofile/libop/op_cpu_type.c > =================================================================== > --- oprofile.orig/libop/op_cpu_type.c 2009-04-27 14:44:46.000000000 +0200 > +++ oprofile/libop/op_cpu_type.c 2009-04-27 14:57:09.000000000 +0200 > @@ -78,6 +78,8 @@ > { "ARM V7 PMNC", "arm/armv7", CPU_ARM_V7, 5 }, > { "Intel Architectural Perfmon", "i386/arch_perfmon", > CPU_ARCH_PERFMON, 0}, > { "AMD64 family11h", "x86-64/family11h", CPU_FAMILY11H, 4 }, > + { "Intel Core/i7", "i386/core_i7", CPU_CORE_I7, 4 }, > + { "Intel Atom", "i386/atom", CPU_ATOM, 2 }, > }; > > static size_t const nr_cpu_descrs = sizeof(cpu_descrs) / sizeof(struct > cpu_descr); > Index: oprofile/libop/op_cpu_type.h > =================================================================== > --- oprofile.orig/libop/op_cpu_type.h 2009-04-27 14:44:46.000000000 +0200 > +++ oprofile/libop/op_cpu_type.h 2009-04-27 14:55:13.000000000 +0200 > @@ -75,6 +75,8 @@ > CPU_ARM_V7, /**< ARM V7 */ > CPU_ARCH_PERFMON, /**< Intel architectural perfmon */ > CPU_FAMILY11H, /**< AMD family 11h */ > + CPU_CORE_I7, /* Intel Core i7 */ > + CPU_ATOM, /* First generation Intel Atom */ > MAX_CPU_TYPE > } op_cpu; > > Index: oprofile/libop/op_events.c > =================================================================== > --- oprofile.orig/libop/op_events.c 2009-04-27 14:47:28.000000000 +0200 > +++ oprofile/libop/op_events.c 2009-04-27 14:55:13.000000000 +0200 > @@ -942,6 +942,8 @@ > case CPU_FAMILY10: > case CPU_ARCH_PERFMON: > case CPU_FAMILY11H: > + case CPU_ATOM: > + case CPU_CORE_I7: > descr->name = "CPU_CLK_UNHALTED"; > break; > > Index: oprofile/utils/ophelp.c > =================================================================== > --- oprofile.orig/utils/ophelp.c 2009-04-27 14:44:46.000000000 +0200 > +++ oprofile/utils/ophelp.c 2009-04-27 14:55:13.000000000 +0200 > @@ -472,8 +472,10 @@ > case CPU_P4_HT2: > case CPU_CORE: > case CPU_CORE_2: > + case CPU_CORE_I7: > + case CPU_ATOM: > event_doc = > - "See Intel Architecture Developer's Manual > Volume 3, Appendix A and\n" > + "See Intel Architecture Developer's Manual > Volume 3B, Appendix A and\n" > "Intel Architecture Optimization Reference > Manual (730795-001)\n\n"; > break; > > > ------------------------------------------------------------------------------ > Crystal Reports - New Free Runtime and 30 Day Trial > Check out the new simplified licensign option that enables unlimited > royalty-free distribution of the report engine for externally facing > server and web deployment. > http://p.sf.net/sfu/businessobjects > _______________________________________________ > oprofile-list mailing list > opr...@li... > https://lists.sourceforge.net/lists/listinfo/oprofile-list > |
From: Andi K. <an...@fi...> - 2009-04-27 15:36:28
|
include:foo includes events from another file um:a,b,c merges several existing unit masks for an event. Signed-off-by: Andi Kleen <ak...@li...> diff -urp oprofile/libop/op_events.c ../oprofile/libop/op_events.c --- oprofile/libop/op_events.c 2009-01-19 20:01:40.000000000 +0100 +++ ../oprofile/libop/op_events.c 2009-01-14 08:29:55.000000000 +0100 @@ -29,6 +29,22 @@ static char const * filename; static unsigned int line_nr; static void delete_event(struct op_event * event); +static void read_events(char const * file); +static void read_unit_masks(char const * file); +static void free_unit_mask(struct op_unit_mask * um); + +static char *build_fn(const char *cpu_name, const char *fn) +{ + char *s; + static const char *dir; + if (dir == NULL) + dir = getenv("OPROFILE_EVENTS_DIR"); + if (dir == NULL) + dir = OP_DATADIR; + s = xmalloc(strlen(dir) + strlen(cpu_name) + strlen(fn) + 5); + sprintf(s, "%s/%s/%s", dir, cpu_name, fn); + return s; +} static void parse_error(char const * context) { @@ -72,6 +88,23 @@ static u64 parse_long_hex(char const * s return value; } +static void include_um(const char *start, const char *end) +{ + char *s; + char cpu[end - start + 1]; + int old_line_nr; + const char *old_filename; + + strncpy(cpu, start, end - start); + cpu[end - start] = 0; + s = build_fn(cpu, "unit_masks"); + old_line_nr = line_nr; + old_filename = filename; + read_unit_masks(s); + line_nr = old_line_nr; + filename = old_filename; + free(s); +} /* name:MESI type:bitmask default:0x0f */ static void parse_um(struct op_unit_mask * um, char const * line) @@ -97,6 +130,14 @@ static void parse_um(struct op_unit_mask ++tagend; + if (strisprefix(start, "include")) { + if (seen_name + seen_type + seen_default > 0) + parse_error("include must be on its own"); + free_unit_mask(um); + include_um(tagend, valueend); + return; + } + if (strisprefix(start, "name")) { if (seen_name) parse_error("duplicate name: tag"); @@ -128,6 +169,11 @@ static void parse_um(struct op_unit_mask tagend = valueend; start = valueend; } + + if (!um->name) + parse_error("Missing name for unit mask"); + if (!seen_type) + parse_error("Missing type for unit mask"); } @@ -161,6 +207,11 @@ static struct op_unit_mask * new_unit_ma return um; } +static void free_unit_mask(struct op_unit_mask * um) +{ + list_del(&um->um_next); + free(um); +} /* * name:zero type:mandatory default:0x0 @@ -230,21 +281,68 @@ static u32 parse_counter_mask(char const return mask; } - -static struct op_unit_mask * find_um(char const * value) +static struct op_unit_mask * try_find_um(char const * value) { struct list_head * pos; list_for_each(pos, &um_list) { struct op_unit_mask * um = list_entry(pos, struct op_unit_mask, um_next); - if (strcmp(value, um->name) == 0) + if (strcmp(value, um->name) == 0) { + um->used = 1; return um; + } } + return NULL; +} +static struct op_unit_mask * find_um(char const * value) +{ + struct op_unit_mask * um = try_find_um(value); + if (um) + return um; fprintf(stderr, "oprofile: could not find unit mask %s\n", value); exit(EXIT_FAILURE); } +/* um:a,b,c,d merge multiple unit masks */ +static struct op_unit_mask * merge_um(char * value) +{ + int num; + char *s; + struct op_unit_mask *new, *um; + enum unit_mask_type type = -1U; + + um = try_find_um(value); + if (um) + return um; + + new = new_unit_mask(); + new->name = xstrdup(value); + new->used = 1; + num = 0; + while ((s = strsep(&value, ",")) != NULL) { + unsigned c; + um = find_um(s); + if (type == -1U) + type = um->unit_type_mask; + if (um->unit_type_mask != type) + parse_error("combined unit mask must be all the same types"); + if (type != utm_bitmask && type != utm_exclusive) + parse_error("combined unit mask must be all bitmasks or exclusive"); + new->default_mask |= um->default_mask; + new->num += um->num; + if (new->num > MAX_UNIT_MASK) + parse_error("too many members in combined unit mask"); + for (c = 0; c < um->num; c++, num++) { + new->um[num] = um->um[c]; + new->um[num].desc = xstrdup(new->um[num].desc); + } + } + if (type == -1U) + parse_error("Empty unit mask"); + new->unit_type_mask = type; + return new; +} /* parse either a "tag:value" or a ": trailing description string" */ static int next_token(char const ** cp, char ** name, char ** value) @@ -290,6 +388,20 @@ static int next_token(char const ** cp, return 1; } +static void include_events (char *value) +{ + char * event_file; + const char *old_filename; + int old_line_nr; + + event_file = build_fn(value, "events"); + old_line_nr = line_nr; + old_filename = filename; + read_events(event_file); + line_nr = old_line_nr; + filename = old_filename; + free(event_file); +} static struct op_event * new_event(void) { @@ -300,6 +412,11 @@ static struct op_event * new_event(void) return event; } +static void free_event(struct op_event * event) +{ + list_del(&event->event_next); + free(event); +} /* event:0x00 counters:0 um:zero minimum:4096 name:ISSUES : Total issues */ static void read_events(char const * file) @@ -311,6 +428,7 @@ static void read_events(char const * fil char const * c; int seen_event, seen_counters, seen_um, seen_minimum, seen_name; FILE * fp = fopen(file, "r"); + int tags; if (!fp) { fprintf(stderr, "oprofile: could not open event description file %s\n", file); @@ -326,6 +444,7 @@ static void read_events(char const * fil if (empty_line(line) || comment_line(line)) goto next; + tags = 0; seen_name = 0; seen_event = 0; seen_counters = 0; @@ -364,8 +483,10 @@ static void read_events(char const * fil if (seen_um) parse_error("duplicate um: tag"); seen_um = 1; - event->unit = find_um(value); - event->unit->used = 1; + if (strchr(value, ',')) + event->unit = merge_um(value); + else + event->unit = find_um(value); free(value); } else if (strcmp(name, "minimum") == 0) { if (seen_minimum) @@ -378,9 +499,19 @@ static void read_events(char const * fil } else if (strcmp(name, "filter") == 0) { event->filter = parse_int(value); free(value); + } else if (strcmp(name, "include") == 0) { + if (tags > 0) + parse_error("tags before include:"); + free_event(event); + include_events(value); + free(value); + c = skip_ws(c); + if (*c != '\0' && *c != '#') + parse_error("non whitespace after include:"); } else { parse_error("unknown tag"); } + tags++; free(name); } @@ -446,53 +579,41 @@ static void arch_filter_events(op_cpu cp } } -static void load_events(op_cpu cpu_type) +static void load_events_name(const char *cpu_name) { - char const * cpu_name = op_get_cpu_name(cpu_type); - char * event_dir; char * event_file; char * um_file; - char * dir; + + event_file = build_fn(cpu_name, "events"); + um_file = build_fn(cpu_name, "unit_masks"); + + read_unit_masks(um_file); + read_events(event_file); + + free(um_file); + free(event_file); +} + +static void load_events(op_cpu cpu_type) +{ + const char * cpu_name = op_get_cpu_name(cpu_type); struct list_head * pos; + int err = 0; if (!list_empty(&events_list)) return; - dir = getenv("OPROFILE_EVENTS_DIR"); - if (dir == NULL) - dir = OP_DATADIR; - - event_dir = xmalloc(strlen(dir) + strlen("/") + strlen(cpu_name) + - strlen("/") + 1); - strcpy(event_dir, dir); - strcat(event_dir, "/"); - - strcat(event_dir, cpu_name); - strcat(event_dir, "/"); - - event_file = xmalloc(strlen(event_dir) + strlen("events") + 1); - strcpy(event_file, event_dir); - strcat(event_file, "events"); - - um_file = xmalloc(strlen(event_dir) + strlen("unit_masks") + 1); - strcpy(um_file, event_dir); - strcat(um_file, "unit_masks"); - - read_unit_masks(um_file); - read_events(event_file); + load_events_name(cpu_name); arch_filter_events(cpu_type); /* sanity check: all unit mask must be used */ list_for_each(pos, &um_list) { struct op_unit_mask * um = list_entry(pos, struct op_unit_mask, um_next); - - check_unit_mask(um, cpu_name); + err |= check_unit_mask(um, cpu_name); } - - free(um_file); - free(event_file); - free(event_dir); + if (err) + exit(err); } struct list_head * op_events(op_cpu cpu_type) |
From: Maynard J. <may...@us...> - 2009-04-30 00:58:47
|
Andi Kleen wrote: > include:foo includes events from another file > > um:a,b,c merges several existing unit masks for an event. I committed this patch after making two minor changes: - Moved the "err |= check_unit_mask" stuff to the 'make check' patch - Removed some white space at the end of lines -Maynard > > Signed-off-by: Andi Kleen <ak...@li...> > > diff -urp oprofile/libop/op_events.c ../oprofile/libop/op_events.c > --- oprofile/libop/op_events.c 2009-01-19 20:01:40.000000000 +0100 > +++ ../oprofile/libop/op_events.c 2009-01-14 08:29:55.000000000 +0100 > @@ -29,6 +29,22 @@ static char const * filename; > static unsigned int line_nr; > > static void delete_event(struct op_event * event); > +static void read_events(char const * file); > +static void read_unit_masks(char const * file); > +static void free_unit_mask(struct op_unit_mask * um); > + > +static char *build_fn(const char *cpu_name, const char *fn) > +{ > + char *s; > + static const char *dir; > + if (dir == NULL) > + dir = getenv("OPROFILE_EVENTS_DIR"); > + if (dir == NULL) > + dir = OP_DATADIR; > + s = xmalloc(strlen(dir) + strlen(cpu_name) + strlen(fn) + 5); > + sprintf(s, "%s/%s/%s", dir, cpu_name, fn); > + return s; > +} > > static void parse_error(char const * context) > { > @@ -72,6 +88,23 @@ static u64 parse_long_hex(char const * s > return value; > } > > +static void include_um(const char *start, const char *end) > +{ > + char *s; > + char cpu[end - start + 1]; > + int old_line_nr; > + const char *old_filename; > + > + strncpy(cpu, start, end - start); > + cpu[end - start] = 0; > + s = build_fn(cpu, "unit_masks"); > + old_line_nr = line_nr; > + old_filename = filename; > + read_unit_masks(s); > + line_nr = old_line_nr; > + filename = old_filename; > + free(s); > +} > > /* name:MESI type:bitmask default:0x0f */ > static void parse_um(struct op_unit_mask * um, char const * line) > @@ -97,6 +130,14 @@ static void parse_um(struct op_unit_mask > > ++tagend; > > + if (strisprefix(start, "include")) { > + if (seen_name + seen_type + seen_default > 0) > + parse_error("include must be on its own"); > + free_unit_mask(um); > + include_um(tagend, valueend); > + return; > + } > + > if (strisprefix(start, "name")) { > if (seen_name) > parse_error("duplicate name: tag"); > @@ -128,6 +169,11 @@ static void parse_um(struct op_unit_mask > tagend = valueend; > start = valueend; > } > + > + if (!um->name) > + parse_error("Missing name for unit mask"); > + if (!seen_type) > + parse_error("Missing type for unit mask"); > } > > > @@ -161,6 +207,11 @@ static struct op_unit_mask * new_unit_ma > return um; > } > > +static void free_unit_mask(struct op_unit_mask * um) > +{ > + list_del(&um->um_next); > + free(um); > +} > > /* > * name:zero type:mandatory default:0x0 > @@ -230,21 +281,68 @@ static u32 parse_counter_mask(char const > return mask; > } > > - > -static struct op_unit_mask * find_um(char const * value) > +static struct op_unit_mask * try_find_um(char const * value) > { > struct list_head * pos; > > list_for_each(pos, &um_list) { > struct op_unit_mask * um = list_entry(pos, struct op_unit_mask, um_next); > - if (strcmp(value, um->name) == 0) > + if (strcmp(value, um->name) == 0) { > + um->used = 1; > return um; > + } > } > + return NULL; > +} > > +static struct op_unit_mask * find_um(char const * value) > +{ > + struct op_unit_mask * um = try_find_um(value); > + if (um) > + return um; > fprintf(stderr, "oprofile: could not find unit mask %s\n", value); > exit(EXIT_FAILURE); > } > > +/* um:a,b,c,d merge multiple unit masks */ > +static struct op_unit_mask * merge_um(char * value) > +{ > + int num; > + char *s; > + struct op_unit_mask *new, *um; > + enum unit_mask_type type = -1U; > + > + um = try_find_um(value); > + if (um) > + return um; > + > + new = new_unit_mask(); > + new->name = xstrdup(value); > + new->used = 1; > + num = 0; > + while ((s = strsep(&value, ",")) != NULL) { > + unsigned c; > + um = find_um(s); > + if (type == -1U) > + type = um->unit_type_mask; > + if (um->unit_type_mask != type) > + parse_error("combined unit mask must be all the same types"); > + if (type != utm_bitmask && type != utm_exclusive) > + parse_error("combined unit mask must be all bitmasks or exclusive"); > + new->default_mask |= um->default_mask; > + new->num += um->num; > + if (new->num > MAX_UNIT_MASK) > + parse_error("too many members in combined unit mask"); > + for (c = 0; c < um->num; c++, num++) { > + new->um[num] = um->um[c]; > + new->um[num].desc = xstrdup(new->um[num].desc); > + } > + } > + if (type == -1U) > + parse_error("Empty unit mask"); > + new->unit_type_mask = type; > + return new; > +} > > /* parse either a "tag:value" or a ": trailing description string" */ > static int next_token(char const ** cp, char ** name, char ** value) > @@ -290,6 +388,20 @@ static int next_token(char const ** cp, > return 1; > } > > +static void include_events (char *value) > +{ > + char * event_file; > + const char *old_filename; > + int old_line_nr; > + > + event_file = build_fn(value, "events"); > + old_line_nr = line_nr; > + old_filename = filename; > + read_events(event_file); > + line_nr = old_line_nr; > + filename = old_filename; > + free(event_file); > +} > > static struct op_event * new_event(void) > { > @@ -300,6 +412,11 @@ static struct op_event * new_event(void) > return event; > } > > +static void free_event(struct op_event * event) > +{ > + list_del(&event->event_next); > + free(event); > +} > > /* event:0x00 counters:0 um:zero minimum:4096 name:ISSUES : Total issues */ > static void read_events(char const * file) > @@ -311,6 +428,7 @@ static void read_events(char const * fil > char const * c; > int seen_event, seen_counters, seen_um, seen_minimum, seen_name; > FILE * fp = fopen(file, "r"); > + int tags; > > if (!fp) { > fprintf(stderr, "oprofile: could not open event description file %s\n", file); > @@ -326,6 +444,7 @@ static void read_events(char const * fil > if (empty_line(line) || comment_line(line)) > goto next; > > + tags = 0; > seen_name = 0; > seen_event = 0; > seen_counters = 0; > @@ -364,8 +483,10 @@ static void read_events(char const * fil > if (seen_um) > parse_error("duplicate um: tag"); > seen_um = 1; > - event->unit = find_um(value); > - event->unit->used = 1; > + if (strchr(value, ',')) > + event->unit = merge_um(value); > + else > + event->unit = find_um(value); > free(value); > } else if (strcmp(name, "minimum") == 0) { > if (seen_minimum) > @@ -378,9 +499,19 @@ static void read_events(char const * fil > } else if (strcmp(name, "filter") == 0) { > event->filter = parse_int(value); > free(value); > + } else if (strcmp(name, "include") == 0) { > + if (tags > 0) > + parse_error("tags before include:"); > + free_event(event); > + include_events(value); > + free(value); > + c = skip_ws(c); > + if (*c != '\0' && *c != '#') > + parse_error("non whitespace after include:"); > } else { > parse_error("unknown tag"); > } > + tags++; > > free(name); > } > @@ -446,53 +579,41 @@ static void arch_filter_events(op_cpu cp > } > } > > -static void load_events(op_cpu cpu_type) > +static void load_events_name(const char *cpu_name) > { > - char const * cpu_name = op_get_cpu_name(cpu_type); > - char * event_dir; > char * event_file; > char * um_file; > - char * dir; > + > + event_file = build_fn(cpu_name, "events"); > + um_file = build_fn(cpu_name, "unit_masks"); > + > + read_unit_masks(um_file); > + read_events(event_file); > + > + free(um_file); > + free(event_file); > +} > + > +static void load_events(op_cpu cpu_type) > +{ > + const char * cpu_name = op_get_cpu_name(cpu_type); > struct list_head * pos; > + int err = 0; > > if (!list_empty(&events_list)) > return; > > - dir = getenv("OPROFILE_EVENTS_DIR"); > - if (dir == NULL) > - dir = OP_DATADIR; > - > - event_dir = xmalloc(strlen(dir) + strlen("/") + strlen(cpu_name) + > - strlen("/") + 1); > - strcpy(event_dir, dir); > - strcat(event_dir, "/"); > - > - strcat(event_dir, cpu_name); > - strcat(event_dir, "/"); > - > - event_file = xmalloc(strlen(event_dir) + strlen("events") + 1); > - strcpy(event_file, event_dir); > - strcat(event_file, "events"); > - > - um_file = xmalloc(strlen(event_dir) + strlen("unit_masks") + 1); > - strcpy(um_file, event_dir); > - strcat(um_file, "unit_masks"); > - > - read_unit_masks(um_file); > - read_events(event_file); > + load_events_name(cpu_name); > > arch_filter_events(cpu_type); > > /* sanity check: all unit mask must be used */ > list_for_each(pos, &um_list) { > struct op_unit_mask * um = list_entry(pos, struct op_unit_mask, um_next); > - > - check_unit_mask(um, cpu_name); > + err |= check_unit_mask(um, cpu_name); > } > - > - free(um_file); > - free(event_file); > - free(event_dir); > + if (err) > + exit(err); > } > > struct list_head * op_events(op_cpu cpu_type) |
From: Andi K. <an...@fi...> - 2009-04-27 15:36:31
|
This just includes the nehalem events. The reason it's separate is that I eventually hope to readd the uncore events, which are specific to Bloomfield, so would need a separate file. To avoid reshuffling the events later start out with a separate file. The naming is unfortunate, but follows the existing convention (core 2) and I didn't come up with a better one. Signed-off-by: Andi Kleen <ak...@li...> --- events/i386/core_i7/events | 6 ++++++ events/i386/core_i7/unit_masks | 1 + 2 files changed, 7 insertions(+) Index: oprofile/events/i386/core_i7/events =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ oprofile/events/i386/core_i7/events 2009-04-27 12:27:57.000000000 +0200 @@ -0,0 +1,6 @@ +# +# Intel Core i7 "Bloomfield" / Xeon EP 75xx events +# right now this is only the shared events included for the Nehalem core, +# but later we'll add here the uncore events specific to this chip +# +include:i386/nehalem Index: oprofile/events/i386/core_i7/unit_masks =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ oprofile/events/i386/core_i7/unit_masks 2009-04-27 12:15:07.000000000 +0200 @@ -0,0 +1 @@ +include:i386/nehalem |
From: Suravee S. <sur...@am...> - 2009-04-29 18:39:18
|
Andi, This patch looks fine. Suravee Andi Kleen wrote: > > This just includes the nehalem events. The reason it's separate > is that I eventually hope to readd the uncore events, which > are specific to Bloomfield, so would need a separate file. > To avoid reshuffling the events later start out with > a separate file. > > The naming is unfortunate, but follows the existing convention (core 2) > and I didn't come up with a better one. > > Signed-off-by: Andi Kleen <ak...@li...> > > --- > events/i386/core_i7/events | 6 ++++++ > events/i386/core_i7/unit_masks | 1 + > 2 files changed, 7 insertions(+) > > Index: oprofile/events/i386/core_i7/events > =================================================================== > --- /dev/null 1970-01-01 00:00:00.000000000 +0000 > +++ oprofile/events/i386/core_i7/events 2009-04-27 12:27:57.000000000 +0200 > @@ -0,0 +1,6 @@ > +# > +# Intel Core i7 "Bloomfield" / Xeon EP 75xx events > +# right now this is only the shared events included for the Nehalem core, > +# but later we'll add here the uncore events specific to this chip > +# > +include:i386/nehalem > Index: oprofile/events/i386/core_i7/unit_masks > =================================================================== > --- /dev/null 1970-01-01 00:00:00.000000000 +0000 > +++ oprofile/events/i386/core_i7/unit_masks 2009-04-27 > 12:15:07.000000000 +0200 > @@ -0,0 +1 @@ > +include:i386/nehalem > > ------------------------------------------------------------------------------ > Crystal Reports - New Free Runtime and 30 Day Trial > Check out the new simplified licensign option that enables unlimited > royalty-free distribution of the report engine for externally facing > server and web deployment. > http://p.sf.net/sfu/businessobjects > _______________________________________________ > oprofile-list mailing list > opr...@li... > https://lists.sourceforge.net/lists/listinfo/oprofile-list > |
From: Andi K. <an...@fi...> - 2009-04-27 15:36:33
|
Some of the event descriptions in upcoming event files are rather large and don't fit on a line. So add word wrap code. Signed-off-by: Andi Kleen <ak...@li...> --- utils/ophelp.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) Index: oprofile/utils/ophelp.c =================================================================== --- oprofile.orig/utils/ophelp.c 2009-04-01 22:57:37.000000000 +0200 +++ oprofile/utils/ophelp.c 2009-04-27 12:00:45.000000000 +0200 @@ -61,6 +61,23 @@ } } +#define LINE_LEN 79 + +static void word_wrap(int indent, int *column, char *msg) +{ + while (*msg) { + int wlen = strcspn(msg, " "); + if (*column + wlen > LINE_LEN) { + printf("\n%*s", indent, ""); + *column = indent; + } + printf("%.*s ", wlen, msg); + *column += wlen + 1; + msg += wlen; + msg += strspn(msg, " "); + } +} + /** * help_for_event - output event name and description * @param i event number @@ -69,9 +86,11 @@ */ static void help_for_event(struct op_event * event) { + int column; uint i, j; uint mask; size_t nr_counters; + char buf[32]; do_arch_specific_event_help(event); nr_counters = op_get_nr_counters(cpu_type); @@ -104,7 +123,12 @@ if(event->ext != NULL) printf(" (ext: %s)", event->ext); - printf("\n\t%s (min count: %d)\n", event->desc, event->min_count); + printf(")\n\t"); + column = 8; + word_wrap(8, &column, event->desc); + snprintf(buf, sizeof buf, "(min count: %d)", event->min_count); + word_wrap(8, &column, buf); + putchar('\n'); if (strcmp(event->unit->name, "zero")) { @@ -113,9 +137,11 @@ printf("\t----------\n"); for (j = 0; j < event->unit->num; j++) { - printf("\t0x%.2x: %s\n", - event->unit->um[j].value, - event->unit->um[j].desc); + printf("\t0x%.2x: ", + event->unit->um[j].value); + column = 14; + word_wrap(14, &column, event->unit->um[j].desc); + putchar('\n'); } } } |
From: Andi K. <an...@fi...> - 2009-04-27 15:36:40
|
Signed-off-by: Andi Kleen <ak...@li...> diff -urp oprofile/events/i386/arch_perfmon/events ../oprofile/events/i386/arch_perfmon/events --- oprofile/events/i386/arch_perfmon/events 2009-01-19 20:01:40.000000000 +0100 +++ ../oprofile/events/i386/arch_perfmon/events 2009-01-12 23:20:09.000000000 +0100 @@ -3,8 +3,8 @@ # event:0x3c counters:cpuid um:zero minimum:6000 filter:0 name:CPU_CLK_UNHALTED : Clock cycles when not halted event:0x3c counters:cpuid um:one minimum:6000 filter:2 name:UNHALTED_REFERENCE_CYCLES : Unhalted reference cycles -event:0xc0 counters:cpuid um:zero minimum:6000 filter:1 name:INST_RETIRED_ANY_P : number of instructions retired -event:0x2e counters:cpuid um:x41 minimum:6000 filter:5 name:LLC_MISSES : L2 cache demand requests from this core that missed the L2 -event:0x2e counters:cpuid um:x4f minimum:6000 filter:4 name:LLC_REFS : L2 cache demand requests from this core +event:0xc0 counters:cpuid um:one minimum:6000 filter:1 name:INST_RETIRED : number of instructions retired +event:0x2e counters:cpuid um:x41 minimum:6000 filter:5 name:LLC_MISSES : Last level cache demand requests from this core that missed the LLC +event:0x2e counters:cpuid um:x4f minimum:6000 filter:4 name:LLC_REFS : Last level cache demand requests from this core event:0xc4 counters:cpuid um:zero minimum:500 filter:6 name:BR_INST_RETIRED : number of branch instructions retired event:0xc5 counters:cpuid um:zero minimum:500 filter:7 name:BR_MISS_PRED_RETIRED : number of mispredicted branches retired (precise) |
From: Suravee S. <sur...@am...> - 2009-04-29 18:44:43
|
Andi, This patch looks fine. Suravee Andi Kleen wrote: > > Signed-off-by: Andi Kleen <ak...@li...> > > diff -urp oprofile/events/i386/arch_perfmon/events > ../oprofile/events/i386/arch_perfmon/events > --- oprofile/events/i386/arch_perfmon/events 2009-01-19 > 20:01:40.000000000 +0100 > +++ ../oprofile/events/i386/arch_perfmon/events 2009-01-12 > 23:20:09.000000000 +0100 > @@ -3,8 +3,8 @@ > # > event:0x3c counters:cpuid um:zero minimum:6000 filter:0 > name:CPU_CLK_UNHALTED : Clock cycles when not halted > event:0x3c counters:cpuid um:one minimum:6000 filter:2 > name:UNHALTED_REFERENCE_CYCLES : Unhalted reference cycles > -event:0xc0 counters:cpuid um:zero minimum:6000 filter:1 > name:INST_RETIRED_ANY_P : number of instructions retired > -event:0x2e counters:cpuid um:x41 minimum:6000 filter:5 name:LLC_MISSES > : L2 cache demand requests from this core that missed the L2 > -event:0x2e counters:cpuid um:x4f minimum:6000 filter:4 name:LLC_REFS : > L2 cache demand requests from this core > +event:0xc0 counters:cpuid um:one minimum:6000 filter:1 > name:INST_RETIRED : number of instructions retired > +event:0x2e counters:cpuid um:x41 minimum:6000 filter:5 name:LLC_MISSES > : Last level cache demand requests from this core that missed the LLC > +event:0x2e counters:cpuid um:x4f minimum:6000 filter:4 name:LLC_REFS : > Last level cache demand requests from this core > event:0xc4 counters:cpuid um:zero minimum:500 filter:6 > name:BR_INST_RETIRED : number of branch instructions retired > event:0xc5 counters:cpuid um:zero minimum:500 filter:7 > name:BR_MISS_PRED_RETIRED : number of mispredicted branches retired > (precise) > > ------------------------------------------------------------------------------ > Crystal Reports - New Free Runtime and 30 Day Trial > Check out the new simplified licensign option that enables unlimited > royalty-free distribution of the report engine for externally facing > server and web deployment. > http://p.sf.net/sfu/businessobjects > _______________________________________________ > oprofile-list mailing list > opr...@li... > https://lists.sourceforge.net/lists/listinfo/oprofile-list > |
From: Andi K. <an...@fi...> - 2009-04-29 19:01:44
|
On Wed, Apr 29, 2009 at 01:44:25PM -0500, Suravee Suthikulpanit wrote: > Andi, > > This patch looks fine. Thanks for the review. -Andi |
From: Andi K. <an...@fi...> - 2009-04-27 15:36:31
|
Signed-off-by: Andi Kleen <ak...@li...> --- events/i386/atom/events | 80 +++++++++++++++++++++++++++++ events/i386/atom/unit_masks | 120 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 200 insertions(+) Index: oprofile/events/i386/atom/events =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ oprofile/events/i386/atom/events 2009-04-27 12:26:26.000000000 +0200 @@ -0,0 +1,80 @@ +# +# Intel Atom (Silverthorne) events +# +# architectural perfmon events +event:0x3c counters:cpuid um:cpu_clk_unhalted minimum:6000 name:CPU_CLK_UNHALTED : Clock cycles when not halted +event:0x3c counters:cpuid um:one minimum:6000 name:UNHALTED_REFERENCE_CYCLES : Unhalted reference cycles +event:0xc0 counters:cpuid um:one minimum:6000 name:INST_RETIRED : number of instructions retired +event:0x2e counters:cpuid um:x41 minimum:6000 name:LLC_MISSES : Last level cache demand requests from this core that missed the LLC +event:0x2e counters:cpuid um:x4f minimum:6000 name:LLC_REFS : Last level cache demand requests from this core +event:0xc4 counters:cpuid um:br_inst_retired minimum:500 name:BR_INST_RETIRED : number of branch instructions retired +event:0xc5 counters:cpuid um:zero minimum:500 name:BR_MISS_PRED_RETIRED : number of mispredicted branches retired (precise) +# +event:0x02 counters:0,1 um:store_forwards minimum:6000 name:STORE_FORWARDS : Good store forwards +event:0x06 counters:0,1 um:segment_reg_loads minimum:6000 name:SEGMENT_REG_LOADS : Number of segment register loads +event:0x07 counters:0,1 um:simd_prefetch minimum:6000 name:PREFETCH : Streaming SIMD Extensions (SSE) Prefetch instructions executed +event:0x08 counters:0,1 um:data_tlb_misses minimum:6000 name:DATA_TLB_MISSES : Memory accesses that missed the DTLB +event:0x0C counters:0,1 um:page_walks minimum:6000 name:PAGE_WALKS : Page walks +event:0x10 counters:0,1 um:x87_comp_ops_exe minimum:6000 name:X87_COMP_OPS_EXE : Floating point computational micro-ops +event:0x11 counters:0,1 um:fp_assist minimum:6000 name:FP_ASSIST : Floating point assists +event:0x12 counters:0,1 um:mul minimum:6000 name:MUL : Multiply operations +event:0x13 counters:0,1 um:div minimum:6000 name:DIV : Divide operations +event:0x14 counters:0,1 um:one minimum:6000 name:CYCLES_DIV_BUSY : Cycles the driver is busy +event:0x21 counters:0,1 um:core minimum:6000 name:CORE : Cycles L2 address bus is in use +event:0x22 counters:0,1 um:core minimum:6000 name:L2_DBUS_BUSY : Cycles the L2 cache data bus is busy +event:0x24 counters:0,1 um:core,prefetch minimum:500 name:L2_LINES_IN : L2 cache misses +event:0x25 counters:0,1 um:core minimum:500 name:L2_M_LINES_IN : L2 cache line modifications +event:0x26 counters:0,1 um:core,prefetch minimum:500 name:L2_LINES_OUT : L2 cache lines evicted +event:0x27 counters:0,1 um:core,prefetch minimum:500 name:L2_M_LINES_OUT : Modified lines evicted from the L2 cache +event:0x28 counters:0,1 um:core,mesi minimum:6000 name:L2_IFETCH : L2 cacheable instruction fetch requests +event:0x29 counters:0,1 um:core,prefetch,mesi minimum:6000 name:L2_LD : L2 cache reads +event:0x2A counters:0,1 um:core,mesi minimum:6000 name:L2_ST : L2 store requests +event:0x2B counters:0,1 um:core,mesi minimum:6000 name:L2_LOCK : L2 locked accesses +event:0x2E counters:0,1 um:l2_rqsts,core,prefetch,mesi minimum:6000 name:L2_RQSTS : L2 cache requests +event:0x30 counters:0,1 um:core,prefetch,mesi minimum:500 name:L2_REJECT_BUSQ : Rejected L2 cache requests +event:0x32 counters:0,1 um:core minimum:6000 name:L2_NO_REQ : Cycles no L2 cache requests are pending +event:0x3A counters:0,1 um:zero minimum:6000 name:EIST_TRANS : Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions +event:0x3B counters:0,1 um:thermal_trip minimum:6000 name:THERMAL_TRIP : Number of thermal trips +event:0x40 counters:0,1 um:l1d_cache minimum:6000 name:L1D_CACHE : L1d Cache accesses +event:0x60 counters:0,1 um:core,agent minimum:6000 name:BUS_REQUEST_OUTSTANDING : Outstanding cacheable data read bus requests duration +event:0x61 counters:0,1 um:agent minimum:6000 name:BUS_BNR_DRV : Number of Bus Not Ready signals asserted +event:0x62 counters:0,1 um:agent minimum:6000 name:BUS_DRDY_CLOCKS : Bus cycles when data is sent on the bus +event:0x63 counters:0,1 um:core,agent minimum:6000 name:BUS_LOCK_CLOCKS : Bus cycles when a LOCK signal is asserted. +event:0x64 counters:0,1 um:core minimum:6000 name:BUS_DATA_RCV : Bus cycles while processor receives data +event:0x65 counters:0,1 um:core,agent minimum:500 name:BUS_TRANS_BRD : Burst read bus transactions +event:0x66 counters:0,1 um:core,agent minimum:500 name:BUS_TRANS_RFO : RFO bus transactions +event:0x67 counters:0,1 um:core,agent minimum:500 name:BUS_TRANS_WB : Explicit writeback bus transactions +event:0x68 counters:0,1 um:core,agent minimum:500 name:BUS_TRANS_IFETCH : Instruction-fetch bus transactions. +event:0x69 counters:0,1 um:core,agent minimum:500 name:BUS_TRANS_INVAL : Invalidate bus transactions +event:0x6A counters:0,1 um:core,agent minimum:500 name:BUS_TRANS_PWR : Partial write bus transaction. +event:0x6B counters:0,1 um:core,agent minimum:500 name:BUS_TRANS_P : Partial bus transactions +event:0x6C counters:0,1 um:core,agent minimum:500 name:BUS_TRANS_IO : IO bus transactions +event:0x6D counters:0,1 um:core,agent minimum:500 name:BUS_TRANS_DEF : Deferred bus transactions +event:0x6E counters:0,1 um:core,agent minimum:500 name:BUS_TRANS_BURST : Burst (full cache-line) bus transactions. +event:0x6F counters:0,1 um:core,agent minimum:500 name:BUS_TRANS_MEM : Memory bus transactions +event:0x70 counters:0,1 um:core,agent minimum:500 name:BUS_TRANS_ANY : All bus transactions +event:0x77 counters:0,1 um:core,mesi minimum:500 name:EXT_SNOOP : External snoops +event:0x7A counters:0,1 um:agent minimum:500 name:BUS_HIT_DRV : HIT signal asserted +event:0x7B counters:0,1 um:agent minimum:500 name:BUS_HITM_DRV : HITM signal asserted +event:0x7D counters:0,1 um:core minimum:500 name:BUSQ_EMPTY : Bus queue is empty +event:0x7E counters:0,1 um:core,agent minimum:6000 name:SNOOP_STALL_DRV : Bus stalled for snoops +event:0x7F counters:0,1 um:core minimum:6000 name:BUS_IO_WAIT : IO requests waiting in the bus queue +event:0x80 counters:0,1 um:icache minimum:6000 name:ICACHE : Instruction cache accesses +event:0x82 counters:0,1 um:itlb minimum:6000 name:ITLB : ITLB events +event:0xAA counters:0,1 um:macro_insts minimum:6000 name:MACRO_INSTS : instructions decoded +event:0xB0 counters:0,1 um:simd_uops_exec minimum:6000 name:SIMD_UOPS_EXEC : SIMD micro-ops executed +event:0xB1 counters:0,1 um:simd_sat_uop_exec minimum:6000 name:SIMD_SAT_UOP_EXEC : SIMD saturated arithmetic micro-ops executed +event:0xB3 counters:0,1 um:simd_uop_type_exec minimum:6000 name:SIMD_UOP_TYPE_EXEC : SIMD packed microops executed +event:0xC2 counters:0,1 um:uops_retired minimum:6000 name:UOPS_RETIRED : Micro-ops retired +event:0xC3 counters:0,1 um:one minimum:6000 name:MACHINE_CLEARS : Self-Modifying Code detected +event:0xC6 counters:0,1 um:cycles_int_masked minimum:6000 name:CYCLES_INT_MASKED : Cycles during which interrupts are disabled +event:0xC7 counters:0,1 um:simd_inst_retired minimum:6000 name:SIMD_INST_RETIRED : Retired Streaming SIMD Extensions (SSE) instructions +event:0xC8 counters:0,1 um:zero minimum:6000 name:HW_INT_RCV : Hardware interrupts received +event:0xCA counters:0,1 um:simd_comp_inst_retired minimum:6000 name:SIMD_COMP_INST_RETIRED : Retired computational Streaming SIMD Extensions (SSE) instructions. +event:0xCB counters:0,1 um:mem_load_retired minimum:6000 name:MEM_LOAD_RETIRED : Retired loads +event:0xCD counters:0,1 um:zero minimum:6000 name:SIMD_ASSIST : SIMD assists invoked +event:0xCE counters:0,1 um:zero minimum:6000 name:SIMD_INSTR_RETIRED : SIMD Instructions retired +event:0xCF counters:0,1 um:zero minimum:6000 name:SIMD_SAT_INSTR_RETIRED : Saturated arithmetic instructions retired +event:0xE0 counters:0,1 um:zero minimum:6000 name:BR_INST_DECODED : Branch instructions decoded +event:0xE4 counters:0,1 um:zero minimum:6000 name:BOGUS_BR : Bogus branches +event:0xE6 counters:0,1 um:one minimum:6000 name:BACLEARS : BACLEARS asserted Index: oprofile/events/i386/atom/unit_masks =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ oprofile/events/i386/atom/unit_masks 2009-04-27 12:26:31.000000000 +0200 @@ -0,0 +1,120 @@ +# +# Intel Atom (Silverthorne) unit masks +# +include:i386/arch_perfmon +name:store_forwards type:mandatory default:0x81 + 0x81 good Good store forwards +name:segment_reg_loads type:mandatory default:0x00 + 0x00 any Number of segment register loads +name:simd_prefetch type:bitmask default:0x01 + 0x01 prefetcht0 Streaming SIMD Extensions (SSE) PrefetchT0 instructions executed + 0x06 sw_l2 Streaming SIMD Extensions (SSE) PrefetchT1 and PrefetchT2 instructions executed + 0x08 prefetchnta Streaming SIMD Extensions (SSE) Prefetch NTA instructions executed +name:data_tlb_misses type:bitmask default:0x07 + 0x07 dtlb_miss Memory accesses that missed the DTLB + 0x05 dtlb_miss_ld DTLB misses due to load operations + 0x09 l0_dtlb_miss_ld L0_DTLB misses due to load operations + 0x06 dtlb_miss_st DTLB misses due to store operations +name:page_walks type:bitmask default:0x03 + 0x03 walks Number of page-walks executed + 0x03 cycles Duration of page-walks in core cycles +name:x87_comp_ops_exe type:bitmask default:0x81 + 0x01 s Floating point computational micro-ops executed + 0x81 ar Floating point computational micro-ops retired +name:fp_assist type:mandatory default:0x81 + 0x81 ar Floating point assists +name:mul type:bitmask default:0x01 + 0x01 s Multiply operations executed + 0x81 ar Multiply operations retired +name:div type:bitmask default:0x01 + 0x01 s Divide operations executed + 0x81 ar Divide operations retired +name:l2_rqsts type:bitmask default:0x41 + 0x41 i_state L2 cache demand requests from this core that missed the L2 + 0x4F mesi L2 cache demand requests from this core +name:cpu_clk_unhalted type:bitmask default:0x00 + 0x00 core_p Core cycles when core is not halted + 0x01 bus Bus cycles when core is not halted + 0x02 no_other Bus cycles when core is active and the other is halted +name:l1d_cache type:bitmask default:0x21 + 0x21 ld L1 Cacheable Data Reads + 0x22 st L1 Cacheable Data Writes +name:icache type:bitmask default:0x03 + 0x03 accesses Instruction fetches + 0x02 misses Icache miss +name:itlb type:bitmask default:0x04 + 0x04 flush ITLB flushes + 0x02 misses ITLB misses +name:macro_insts type:exclusive default:0x03 + 0x02 cisc_decoded CISC macro instructions decoded + 0x03 all_decoded All Instructions decoded +name:simd_uops_exec type:exclusive default:0x80 + 0x00 s SIMD micro-ops executed (excluding stores) + 0x80 ar SIMD micro-ops retired (excluding stores) +name:simd_sat_uop_exec type:bitmask default:0x00 + 0x00 s SIMD saturated arithmetic micro-ops executed + 0x80 ar SIMD saturated arithmetic micro-ops retired +name:simd_uop_type_exec type:bitmask default:0x01 + 0x01 s SIMD packed multiply microops executed + 0x81 ar SIMD packed multiply microops retired + 0x02 s SIMD packed shift micro-ops executed + 0x82 ar SIMD packed shift micro-ops retired + 0x04 s SIMD pack micro-ops executed + 0x84 ar SIMD pack micro-ops retired + 0x08 s SIMD unpack micro-ops executed + 0x88 ar SIMD unpack micro-ops retired + 0x10 s SIMD packed logical microops executed + 0x90 ar SIMD packed logical microops retired + 0x20 s SIMD packed arithmetic micro-ops executed + 0xA0 ar SIMD packed arithmetic micro-ops retired +name:uops_retired type:mandatory default:0x10 + 0x10 any Micro-ops retired +name:br_inst_retired type:bitmask default:0x00 + 0x00 any Retired branch instructions + 0x01 pred_not_taken Retired branch instructions that were predicted not-taken + 0x02 mispred_not_taken Retired branch instructions that were mispredicted not-taken + 0x04 pred_taken Retired branch instructions that were predicted taken + 0x08 mispred_taken Retired branch instructions that were mispredicted taken + 0x0A mispred Retired mispredicted branch instructions (precise event) + 0x0C taken Retired taken branch instructions + 0x0F any1 Retired branch instructions +name:cycles_int_masked type:bitmask default:0x01 + 0x01 cycles_int_masked Cycles during which interrupts are disabled + 0x02 cycles_int_pending_and_masked Cycles during which interrupts are pending and disabled +name:simd_inst_retired type:bitmask default:0x01 + 0x01 packed_single Retired Streaming SIMD Extensions (SSE) packed-single instructions + 0x02 scalar_single Retired Streaming SIMD Extensions (SSE) scalar-single instructions + 0x04 packed_double Retired Streaming SIMD Extensions 2 (SSE2) packed-double instructions + 0x08 scalar_double Retired Streaming SIMD Extensions 2 (SSE2) scalar-double instructions + 0x10 vector Retired Streaming SIMD Extensions 2 (SSE2) vector instructions + 0x1F any Retired Streaming SIMD instructions +name:simd_comp_inst_retired type:bitmask default:0x01 + 0x01 packed_single Retired computational Streaming SIMD Extensions (SSE) packed-single instructions + 0x02 scalar_single Retired computational Streaming SIMD Extensions (SSE) scalar-single instructions + 0x04 packed_double Retired computational Streaming SIMD Extensions 2 (SSE2) packed-double instructions + 0x08 scalar_double Retired computational Streaming SIMD Extensions 2 (SSE2) scalar-double instructions +name:mem_load_retired type:bitmask default:0x01 + 0x01 l2_hit Retired loads that hit the L2 cache (precise event) + 0x02 l2_miss Retired loads that miss the L2 cache (precise event) + 0x04 dtlb_miss Retired loads that miss the DTLB (precise event) +name:thermal_trip type:mandatory default:0xc0 + 0xc0 thermal_trip Number of thermal trips. +# 18-11 +name:core type:bitmask default:0x180 + 0x180 all All cores. + 0x080 this This Core. +# 18-12 +name:agent type:bitmask default:0x00 + 0x00 this This agent + 0x40 any Include any agents +# 18-13 +name:prefetch type:bitmask default:0x60 + 0x60 all All inclusive + 0x20 hw Hardware prefetch only + 0x00 exclude_hw Exclude hardware prefetch +# 18-14 +name:mesi type:bitmask default:0x0f + 0x08 modified Counts modified state + 0x04 exclusive Counts exclusive state + 0x02 shared Counts shared state + 0x01 invalid Counts invalid state |
From: Suravee S. <sur...@am...> - 2009-04-29 18:54:34
|
Please see my comment below. Andi Kleen wrote: > > Signed-off-by: Andi Kleen <ak...@li...> > > --- > events/i386/atom/events | 80 +++++++++++++++++++++++++++++ > events/i386/atom/unit_masks | 120 > ++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 200 insertions(+) > > Index: oprofile/events/i386/atom/events > =================================================================== > --- /dev/null 1970-01-01 00:00:00.000000000 +0000 > +++ oprofile/events/i386/atom/events 2009-04-27 12:26:26.000000000 +0200 > @@ -0,0 +1,80 @@ > +# > +# Intel Atom (Silverthorne) events > +# > +# architectural perfmon events > +event:0x3c counters:cpuid um:cpu_clk_unhalted minimum:6000 > name:CPU_CLK_UNHALTED : Clock cycles when not halted > +event:0x3c counters:cpuid um:one minimum:6000 > name:UNHALTED_REFERENCE_CYCLES : Unhalted reference cycles > +event:0xc0 counters:cpuid um:one minimum:6000 name:INST_RETIRED : > number of instructions retired > +event:0x2e counters:cpuid um:x41 minimum:6000 name:LLC_MISSES : Last > level cache demand requests from this core that missed the LLC > +event:0x2e counters:cpuid um:x4f minimum:6000 name:LLC_REFS : Last > level cache demand requests from this core > +event:0xc4 counters:cpuid um:br_inst_retired minimum:500 > name:BR_INST_RETIRED : number of branch instructions retired > +event:0xc5 counters:cpuid um:zero minimum:500 name:BR_MISS_PRED_RETIRED > : number of mispredicted branches retired (precise) [Suravee] Why do we have to have these events here? These are the same as in the "events/arch_perfmon/event" file. I'm not quite familiar with this, but shouldn't we be just keeping one list of perfmon events? Or just use the "include"i386/arch_perfmon" |
From: Andi K. <an...@fi...> - 2009-04-29 18:59:08
|
> Why do we have to have these events here? These are the same as in the > "events/arch_perfmon/event" file. I'm not quite familiar with this, but > shouldn't we be just keeping one list of perfmon events? Or just use > the "include"i386/arch_perfmon" I had this originally, but I ran into some problem which I unfortunately don't remember, so i ended up splitting it. I think it was conflicts in the unit masks (not 100% sure) Yes ideally include would be better, but it was tough to get the event files complete anyways and I was glad when it was working in this form. -Andi -- ak...@li... -- Speaking for myself only. |
From: Suravee S. <sur...@am...> - 2009-04-29 20:02:35
|
Andi Kleen wrote: >> Why do we have to have these events here? These are the same as in the >> "events/arch_perfmon/event" file. I'm not quite familiar with this, but >> shouldn't we be just keeping one list of perfmon events? Or just use >> the "include"i386/arch_perfmon" > > I had this originally, but I ran into some problem which I unfortunately > don't remember, so i ended up splitting it. > I think it was conflicts in the unit masks (not 100% sure) > > Yes ideally include would be better, but it was tough > to get the event files complete anyways and I was glad when it was > working in this form. > > -Andi [Suravee] I can see that these two events have different "um" tag from the ones in "events/i386/arch_perfmon/events". +event:0x3c counters:cpuid um:cpu_clk_unhalted minimum:6000 name:CPU_CLK_UNHALTED : Clock cycles when not halted +event:0xc4 counters:cpuid um:br_inst_retired minimum:500 name:BR_INST_RETIRED : number of branch instructions retired And the rest of the perfmon events do not have "filter" tags. Does this mean Atom is supporting all of these event? Suravee |
From: Andi K. <an...@fi...> - 2009-04-29 20:06:35
|
On Wed, Apr 29, 2009 at 03:02:06PM -0500, Suravee Suthikulpanit wrote: > Andi Kleen wrote: > >>Why do we have to have these events here? These are the same as in the > >>"events/arch_perfmon/event" file. I'm not quite familiar with this, but > >>shouldn't we be just keeping one list of perfmon events? Or just use > >>the "include"i386/arch_perfmon" > > > >I had this originally, but I ran into some problem which I unfortunately > >don't remember, so i ended up splitting it. > >I think it was conflicts in the unit masks (not 100% sure) > > > >Yes ideally include would be better, but it was tough > >to get the event files complete anyways and I was glad when it was > >working in this form. > > > >-Andi > > [Suravee] > > I can see that these two events have different "um" tag from the ones in > "events/i386/arch_perfmon/events". That's an artifact of my generation script, shouldn't really matter. > > +event:0x3c counters:cpuid um:cpu_clk_unhalted minimum:6000 > name:CPU_CLK_UNHALTED : Clock cycles when not halted > > +event:0xc4 counters:cpuid um:br_inst_retired minimum:500 > name:BR_INST_RETIRED : number of branch instructions retired > > And the rest of the perfmon events do not have "filter" tags. Yes I didn't add them to the CPU specific files, but it could be done. That was mainly because they were generated by script and the script didn't special case this. Also the cpu specific file "knows" what events are there, so it doesn't really need a filter. > Does this > mean Atom is supporting all of these event? Yes it does support all the arch perfmon events. -Andi -- ak...@li... -- Speaking for myself only. |
From: Andi K. <an...@fi...> - 2009-04-30 09:34:36
|
On Wed, Apr 29, 2009 at 03:02:06PM -0500, Suravee Suthikulpanit wrote: > Andi Kleen wrote: > >>Why do we have to have these events here? These are the same as in the > >>"events/arch_perfmon/event" file. I'm not quite familiar with this, but > >>shouldn't we be just keeping one list of perfmon events? Or just use > >>the "include"i386/arch_perfmon" > > > >I had this originally, but I ran into some problem which I unfortunately > >don't remember, so i ended up splitting it. > >I think it was conflicts in the unit masks (not 100% sure) > > > >Yes ideally include would be better, but it was tough > >to get the event files complete anyways and I was glad when it was > >working in this form. > > > >-Andi > > [Suravee] > > I can see that these two events have different "um" tag from the ones in > "events/i386/arch_perfmon/events". Double checked this now. Yes the reason was that Atom has more unit masks for these events and it seemed to complicated to invent a "include but derive with changes" mechanism, so I opted to just copy. -Andi -- ak...@li... -- Speaking for myself only. |
From: Andi K. <an...@fi...> - 2009-04-27 15:36:32
|
These are just the event files for the core, not including uncore. Will be used by later patches. Signed-off-by: Andi Kleen <ak...@li...> --- events/i386/nehalem/events | 104 +++++++++++ events/i386/nehalem/unit_masks | 368 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 472 insertions(+) Index: oprofile/events/i386/nehalem/events =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ oprofile/events/i386/nehalem/events 2009-04-27 12:27:11.000000000 +0200 @@ -0,0 +1,104 @@ +# +# Intel Nehalem (Core i7 etc.) core events +# the uncore (memory controller/QPI) events are in separate files because +# they vary between implementations (right now they are not implemented +# in oprofile) +# +# architectural perfmon events +event:0x3c counters:cpuid um:zero minimum:6000 filter:0 name:CPU_CLK_UNHALTED : Clock cycles when not halted +event:0x2e counters:cpuid um:x41 minimum:6000 filter:5 name:LLC_MISSES : Last level cache demand requests from this core that missed the LLC +event:0x2e counters:cpuid um:x4f minimum:6000 filter:4 name:LLC_REFS : Last level cache demand requests from this core +event:0xc0 counters:0,1,2,3 um:inst_retired minimum:6000 name:INST_RETIRED : number of instructions retired +event:0xc4 counters:0,1,2,3 um:br_inst_retired minimum:500 name:BR_INST_RETIRED : number of branch instructions retired +event:0xc5 counters:0,1,2,3 um:br_misp_retired minimum:500 name:BR_MISS_PRED_RETIRED : number of mispredicted branches retired (precise) +# +event:0x02 counters:0,1,2,3 um:sb_forward minimum:6000 name:SB_FORWARD : Counts the number of store forwards. +event:0x03 counters:0,1,2,3 um:load_block minimum:6000 name:LOAD_BLOCK : Counts the number of loads blocked +event:0x04 counters:0,1,2,3 um:sb_drain minimum:6000 name:SB_DRAIN : Counts the cycles of store buffer drains. +event:0x05 counters:0,1,2,3 um:misalign_mem_ref minimum:6000 name:MISALIGN_MEM_REF : Counts the number of misaligned load references +event:0x06 counters:0,1,2,3 um:store_blocks minimum:6000 name:STORE_BLOCKS : This event counts the number of load operations delayed caused by preceding stores. +event:0x07 counters:0,1,2,3 um:one minimum:6000 name:PARTIAL_ADDRESS_ALIAS : Counts false dependency due to partial address aliasing +event:0x08 counters:0,1,2,3 um:dtlb_load_misses minimum:6000 name:DTLB_LOAD_MISSES : Counts dtlb page walks +event:0x09 counters:0,1,2,3 um:memory_disambiguration minimum:6000 name:MEMORY_DISAMBIGURATION : Counts memory disambiguration events +event:0x0B counters:0,1,2,3 um:mem_inst_retired minimum:6000 name:MEM_INST_RETIRED : Counts the number of instructions with an architecturally-visible load/store retired on the architected path. +event:0x0C counters:0,1,2,3 um:mem_store_retired minimum:6000 name:MEM_STORE_RETIRED : The event counts the number of retired stores that missed the DTLB. The DTLB miss is not counted if the store operation causes a fault. Does not counter prefetches. Counts both primary and secondary misses to the TLB +event:0x0E counters:0,1,2,3 um:uops_issued minimum:6000 name:UOPS_ISSUED : Counts the number of Uops issued by the Register Allocation Table to the Reservation Station, i.e. the UOPs issued from the front end to the back end. +event:0x0F counters:0,1,2,3 um:mem_uncore_retired minimum:6000 name:MEM_UNCORE_RETIRED : Counts number of memory load instructions retired where the memory reference hit modified data in another core +event:0x10 counters:0,1,2,3 um:fp_comp_ops_exe minimum:6000 name:FP_COMP_OPS_EXE : Counts the number of FP Computational Uops Executed. +event:0x12 counters:0,1,2,3 um:simd_int_128 minimum:6000 name:SIMD_INT_128 : Counts number of 128 bit SIMD integer operations. +event:0x13 counters:0,1,2,3 um:load_dispatch minimum:6000 name:LOAD_DISPATCH : Counts number of loads dispatched from the Reservation Station that bypass. +event:0x14 counters:0,1,2,3 um:arith minimum:6000 name:ARITH : Counts division cycles and number of multiplies. Includes integer and FP, but excludes DPPS/MPSAD. +event:0x17 counters:0,1,2,3 um:one minimum:6000 name:INST_QUEUE_WRITES : Counts the number of instructions written into the instruction queue every cycle. +event:0x18 counters:0,1,2,3 um:inst_decoded minimum:6000 name:INST_DECODED : Counts number of instructions that require decoder 0 to be decoded. Usually, this means that the instruction maps to more than 1 uop +event:0x19 counters:0,1,2,3 um:one minimum:6000 name:TWO_UOP_INSTS_DECODED : An instruction that generates two uops was decoded +event:0x1D counters:0,1,2,3 um:hw_int minimum:100 name:HW_INT : Counts hardware interrupt events. +event:0x1E counters:0,1,2,3 um:one minimum:6000 name:INST_QUEUE_WRITE_CYCLES : This event counts the number of cycles during which instructions are written to the instruction queue. Dividing this counter by the number of instructions written to the instruction queue (INST_QUEUE_WRITES) yields the average number of instructions decoded each cycle. If this number is less than four and the pipe stalls, this indicates that the decoder is failing to decode enough instructions per cycle to sustain the 4-wide pipeline. +event:0x24 counters:0,1,2,3 um:l2_rqsts minimum:500 name:L2_RQSTS : Counts number of L2 data loads +event:0x26 counters:0,1,2,3 um:l2_data_rqsts minimum:500 name:L2_DATA_RQSTS : More L2 data loads. +event:0x27 counters:0,1,2,3 um:l2_write minimum:500 name:L2_WRITE : Counts number of L2 writes +event:0x28 counters:0,1,2,3 um:l1d_wb_l2 minimum:500 name:L1D_WB_L2 : Counts number of L1 writebacks to the L2. +event:0x2E counters:0,1,2,3 um:longest_lat_cache minimum:6000 name:LONGEST_LAT_CACHE : Count LLC cache reference latencies. +event:0x3C counters:0,1,2,3 um:cpu_clk_unhalted minimum:6000 name:CPU_CLK_UNHALTED : Counts the number of thread cycles while the thread is not in a halt state. +event:0x3D counters:0,1,2,3 um:one minimum:6000 name:UOPS_DECODED_DEC0 : Counts micro-ops decoded by decoder 0. +event:0x40 counters:0,1 um:l1d_cache_ld minimum:6000 name:L1D_CACHE_LD : Counts L1 data cache read requests. +event:0x41 counters:0,1 um:l1d_cache_st minimum:6000 name:L1D_CACHE_ST : Counts L1 data cache stores. +event:0x42 counters:0,1 um:l1d_cache_lock minimum:6000 name:L1D_CACHE_LOCK : Counts retired load locks in the L1D cache. +event:0x43 counters:0,1 um:l1d_all_ref minimum:6000 name:L1D_ALL_REF : Counts all references to the L1 data cache, +event:0x49 counters:0,1,2,3 um:dtlb_misses minimum:6000 name:DTLB_MISSES : Counts the number of misses in the STLB +event:0x4B counters:0,1,2,3 um:sse_mem_exec minimum:6000 name:SSE_MEM_EXEC : Counts number of SSE instructions which missed the L1 data cache. +event:0x4C counters:0,1,2,3 um:one minimum:6000 name:LOAD_HIT_PRE : Counts load operations sent to the L1 data cache while a previous SSE prefetch instruction to the same cache line has started prefetching but has not yet finished. +event:0x4D counters:0,1,2,3 um:one minimum:6000 name:SFENCE_CYCLES : Counts store fence cycles +event:0x4E counters:0,1,2,3 um:l1d_prefetch minimum:6000 name:L1D_PREFETCH : Counts number of hardware prefetch requests. +event:0x4F counters:0,1,2,3 um:ept minimum:6000 name:EPT : Counts Extended Page Directory Entry accesses. The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches. +event:0x51 counters:0,1 um:l1d minimum:6000 name:L1D : Counts the number of lines brought from/to the L1 data cache. +event:0x52 counters:0,1,2,3 um:one minimum:6000 name:L1D_CACHE_PREFETCH_LOCK_FB_HIT : Counts the number of cacheable load lock speculated instructions accepted into the fill buffer. +event:0x53 counters:0,1,2,3 um:one minimum:6000 name:L1D_CACHE_LOCK_FB_HIT : Counts the number of cacheable load lock speculated or retired instructions accepted into the fill buffer. +event:0x60 counters:0,1,2,3 um:offcore_requests_outstanding minimum:6000 name:OFFCORE_REQUESTS_OUTSTANDING : Counts weighted cycles of offcore requests. +event:0x63 counters:0,1 um:cache_lock_cycles minimum:6000 name:CACHE_LOCK_CYCLES : Cycle count during which the L1/L2 caches are locked. A lock is asserted when there is a locked memory access, due to uncacheable memory, a locked operation that spans two cache lines, or a page walk from an uncacheable page table. +event:0x6C counters:0,1,2,3 um:one minimum:6000 name:IO_TRANSACTIONS : Counts the number of completed I/O transactions. +event:0x80 counters:0,1,2,3 um:l1i minimum:6000 name:L1I : Counts L1i instruction cache accesses. +event:0x81 counters:0,1,2,3 um:ifu_ivc minimum:6000 name:IFU_IVC : Instruction Fetch unit events +event:0x82 counters:0,1,2,3 um:large_itlb minimum:6000 name:LARGE_ITLB : Counts number of large ITLB accesses +event:0x83 counters:0,1,2,3 um:one minimum:6000 name:L1I_OPPORTUNISTIC_HITS : Opportunistic hits in streaming. +event:0x85 counters:0,1,2,3 um:itlb_misses minimum:6000 name:ITLB_MISSES : Counts the number of ITLB misses in various variants +event:0x87 counters:0,1,2,3 um:ild_stall minimum:6000 name:ILD_STALL : Cycles Instruction Length Decoder stalls +event:0x88 counters:0,1,2,3 um:br_inst_exec minimum:6000 name:BR_INST_EXEC : Counts the number of near branch instructions executed, but not necessarily retired. +event:0x89 counters:0,1,2,3 um:br_misp_exec minimum:6000 name:BR_MISP_EXEC : Counts the number of mispredicted conditional near branch instructions executed, but not necessarily retired. +event:0xA2 counters:0,1,2,3 um:resource_stalls minimum:6000 name:RESOURCE_STALLS : Counts the number of Allocator resource related stalls. Includes register renaming buffer entries, memory buffer entries. In addition to resource related stalls, this event counts some other events. Includes stalls arising during branch misprediction recovery, such as if retirement of the mispredicted branch is delayed and stalls arising while store buffer is draining from synchronizing operations. +event:0xA6 counters:0,1,2,3 um:one minimum:6000 name:MACRO_INSTS : Counts the number of instructions decoded that are macro-fused but not necessarily executed or retired. +event:0xA7 counters:0,1,2,3 um:one minimum:6000 name:ONE : Counts number of times a BACLEAR was forced by the Instruction Queue. The IQ is also responsible for providing conditional branch prediciton direction based on a static scheme and dynamic data provided by the L2 Branch Prediction Unit. If the conditional branch target is not found in the Target Array and the IQ predicts that the branch is taken, then the IQ will force the Branch Address Calculator to issue a BACLEAR. Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble in the instruction fetch pipeline. +event:0xA8 counters:0,1,2,3 um:one minimum:6000 name:LSD : Counts the number of micro-ops delivered by loop stream detector +event:0xAE counters:0,1,2,3 um:one minimum:6000 name:ITLB_FLUSH : Counts the number of ITLB flushes +event:0xB0 counters:0,1,2,3 um:offcore_requests minimum:6000 name:OFFCORE_REQUESTS : Counts number of offcore data requests. +event:0xB1 counters:0,1,2,3 um:uops_executed minimum:6000 name:UOPS_EXECUTED : Counts number of Uops executed that were issued on various ports +event:0xB2 counters:0,1,2,3 um:one minimum:6000 name:OFFCORE_REQUESTS_SQ_FULL : Counts number of cycles the SQ is full to handle off-core requests. +event:0xB3 counters:0,1,2,3 um:snoopq_requests_outstanding minimum:6000 name:SNOOPQ_REQUESTS_OUTSTANDING : Counts weighted cycles of snoopq requests. +event:0xB7 counters:0,1,2,3 um:one minimum:6000 name:OOF_CORE_RESPONSE_0 : Off-core Response Performance Monitoring in the Processor Core. Requires special setup. +event:0xB8 counters:0,1,2,3 um:snoop_response minimum:6000 name:SNOOP_RESPONSE : Counts HIT snoop response sent by this thread in response to a snoop request. +event:0xBA counters:0,1,2,3 um:pic_accesses minimum:6000 name:PIC_ACCESSES : Counts number of TPR accesses +event:0xC2 counters:0,1,2,3 um:uops_retired minimum:6000 name:UOPS_RETIRED : Counts the number of micro-ops retired, (macro-fused=1, micro-fused=2, others=1; maximum count of 8 per cycle). Most instructions are composed of one or two microops. Some instructions are decoded into longer sequences such as repeat instructions, floating point transcendental instructions, and assists +event:0xC3 counters:0,1,2,3 um:machine_clears minimum:6000 name:MACHINE_CLEARS : Counts the cycles machine clear is asserted. +event:0xC7 counters:0,1,2,3 um:ssex_uops_retired minimum:6000 name:SSEX_UOPS_RETIRED : Counts SIMD packed single-precision floating point Uops retired. +event:0xC8 counters:0,1,2,3 um:x20 minimum:6000 name:ITLB_MISS_RETIRED : Counts the number of retired instructions that missed the ITLB when the instruction was fetched. +event:0xCB counters:0,1,2,3 um:mem_load_retired minimum:6000 name:MEM_LOAD_RETIRED : Counts number of retired loads. +event:0xCC counters:0,1,2,3 um:fp_mmx_trans minimum:6000 name:FP_MMX_TRANS : Counts transitions between MMX and x87 state. +event:0xD0 counters:0,1,2,3 um:macro_insts minimum:6000 name:MACRO_INSTS : Counts the number of instructions decoded, (but not necessarily executed or retired). +event:0xD1 counters:0,1,2,3 um:uops_decoded minimum:6000 name:UOPS_DECODED : Counts the number of Uops decoded by various subsystems. +event:0xD2 counters:0,1,2,3 um:rat_stalls minimum:6000 name:RAT_STALLS : Counts the number of cycles during which execution stalled due to several reason +event:0xD4 counters:0,1,2,3 um:one minimum:6000 name:SEG_RENAME_STALLS : Counts the number of stall cycles due to the lack of renaming resources for the ES, DS, FS, and GS segment registers. If a segment is renamed but not retired and a second update to the same segment occurs, a stall occurs in the front-end of the pipeline until the renamed segment retires. +event:0xD5 counters:0,1,2,3 um:one minimum:6000 name:ES_REG_RENAMES : Counts the number of times the ES segment register is renamed. +event:0xDB counters:0,1,2,3 um:one minimum:6000 name:UOP_UNFUSION : Counts unfusion events due to floating point exception to a fused uop. +event:0xE0 counters:0,1,2,3 um:one minimum:6000 name:BR_INST_DECODED : Counts the number of branch instructions decoded. +event:0xE4 counters:0,1,2,3 um:one minimum:6000 name:BOGUS_BR : Counts the number of bogus branches. +event:0xE5 counters:0,1,2,3 um:one minimum:6000 name:BPU_MISSED_CALL_RET : Counts number of times the Branch Prediciton Unit missed predicting a call or return branch. +event:0xE6 counters:0,1,2,3 um:baclear minimum:6000 name:BACLEAR : Counts the number of times the front end is resteered, +event:0xE8 counters:0,1,2,3 um:bpu_clears minimum:6000 name:BPU_CLEARS : Counts Branch Prediction Unit clears. +event:0xF0 counters:0,1,2,3 um:l2_transactions minimum:6000 name:L2_TRANSACTIONS : Counts L2 transactions +event:0xF1 counters:0,1,2,3 um:l2_lines_in minimum:6000 name:L2_LINES_IN : Counts the number of cache lines allocated in the L2 cache in various states. +event:0xF2 counters:0,1,2,3 um:l2_lines_out minimum:6000 name:L2_LINES_OUT : Counts L2 cache lines evicted. +event:0xF3 counters:0,1,2,3 um:l2_hw_prefetch minimum:6000 name:L2_HW_PREFETCH : Count L2 HW prefetcher events +event:0xF4 counters:0,1,2,3 um:sq_misc minimum:6000 name:SQ_MISC : Counts events in the Super Queue below the L2. +event:0xF6 counters:0,1,2,3 um:one minimum:6000 name:SQ_FULL_STALL_CYCLES : Counts cycles the Super Queue is full. Neither of the threads on this core will be able to access the uncore. +event:0xF7 counters:0,1,2,3 um:fp_assist minimum:6000 name:FP_ASSIST : Counts the number of floating point operations executed that required micro-code assist intervention. +event:0xF8 counters:0,1,2,3 um:one minimum:6000 name:SEGMENT_REG_LOADS : Counts number of segment register loads +event:0xFD counters:0,1,2,3 um:simd_int_64 minimum:6000 name:SIMD_INT_64 : Counts number of SID integer 64 bit packed multiply operations. Index: oprofile/events/i386/nehalem/unit_masks =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ oprofile/events/i386/nehalem/unit_masks 2009-04-27 12:27:14.000000000 +0200 @@ -0,0 +1,368 @@ +include:i386/arch_perfmon +name:sb_forward type:mandatory default:0x01 + 0x01 any Counts the number of store forwards +name:load_block type:bitmask default:0x01 + 0x01 std Counts the number of loads blocked by a preceding store with unknown data + 0x04 address_offset Counts the number of loads blocked by a preceding store address +name:sb_drain type:mandatory default:0x01 + 0x01 cycles Counts the cycles of store buffer drains +name:misalign_mem_ref type:bitmask default:0x03 + 0x01 load Counts the number of misaligned load references + 0x02 store Counts the number of misaligned store references + 0x03 any Counts the number of misaligned memory references +name:store_blocks type:bitmask default:0x0f + 0x01 not_sta This event counts the number of load operations delayed caused by preceding stores whose addresses are known but whose data is unknown, and preceding stores that conflict with the load but which incompletely overlap the load + 0x02 sta This event counts load operations delayed caused by preceding stores whose addresses are unknown (STA block) + 0x04 at_ret Counts number of loads delayed with at-Retirement block code + 0x08 l1d_block Cacheable loads delayed with L1D block code + 0x0F any All loads delayed due to store blocks +name:dtlb_load_misses type:bitmask default:0x01 + 0x01 any Counts all load misses that cause a page walk + 0x02 walk_completed Counts number of completed page walks due to load miss in the STLB + 0x10 stlb_hit Number of cache load STLB hits + 0x20 pde_miss Number of DTLB cache load misses where the low part of the linear to physical address translation was missed + 0x40 pdp_miss Number of DTLB cache load misses where the high part of the linear to physical address translation was missed + 0x80 large_walk_completed Counts number of completed large page walks due to load miss in the STLB +name:memory_disambiguration type:bitmask default:0x01 + 0x01 reset Counts memory disambiguration reset cycles + 0x02 success Counts the number of loads that memory disambiguration succeeded + 0x04 watchdog Counts the number of times the memory disambiguration watchdog kicked in + 0x08 watch_cycles Counts the cycles that the memory disambiguration watchdog is active +name:mem_inst_retired type:bitmask default:0x01 + 0x01 loads Counts the number of instructions with an architecturally-visible store retired on the architected path + 0x02 stores Counts the number of instructions with an architecturally-visible store retired on the architected path +name:mem_store_retired type:mandatory default:0x01 + 0x01 dtlb_miss The event counts the number of retired stores that missed the DTLB +name:uops_issued type:bitmask default:0x01 + 0x01 any Counts the number of Uops issued by the Register Allocation Table to the Reservation Station, i + 0x01 stalled_cycles Counts the number of cycles no Uops issued by the Register Allocation Table to the Reservation Station, i + 0x02 fused Counts the number of fused Uops that were issued from the Register Allocation Table to the Reservation Station +name:mem_uncore_retired type:bitmask default:0x02 + 0x02 other_core_l2_hitm Counts number of memory load instructions retired where the memory reference hit modified data in a sibling core residing on the same socket + 0x08 remote_cache_local_home_hit Counts number of memory load instructions retired where the memory reference missed the L1, L2 and LLC caches and HIT in a remote socket's cache + 0x10 remote_dram Counts number of memory load instructions retired where the memory reference missed the L1, L2 and LLC caches and was remotely homed + 0x20 local_dram Counts number of memory load instructions retired where the memory reference missed the L1, L2 and LLC caches and required a local socket memory reference +name:fp_comp_ops_exe type:bitmask default:0x01 + 0x01 x87 Counts the number of FP Computational Uops Executed + 0x02 mmx Counts number of MMX Uops executed + 0x04 sse_fp Counts number of SSE and SSE2 FP uops executed + 0x08 sse2_integer Counts number of SSE2 integer uops executed + 0x10 sse_fp_packed Counts number of SSE FP packed uops executed + 0x20 sse_fp_scalar Counts number of SSE FP scalar uops executed + 0x40 sse_single_precision Counts number of SSE* FP single precision uops executed + 0x80 sse_double_precision Counts number of SSE* FP double precision uops executed +name:simd_int_128 type:bitmask default:0x01 + 0x01 packed_mpy Counts number of 128 bit SIMD integer multiply operations + 0x02 packed_shift Counts number of 128 bit SIMD integer shift operations + 0x04 pack Counts number of 128 bit SIMD integer pack operations + 0x08 unpack Counts number of 128 bit SIMD integer unpack operations + 0x10 packed_logical Counts number of 128 bit SIMD integer logical operations + 0x20 packed_arith Counts number of 128 bit SIMD integer arithmetic operations + 0x40 shuffle_move Counts number of 128 bit SIMD integer shuffle and move operations +name:load_dispatch type:bitmask default:0x07 + 0x01 rs Counts number of loads dispatched from the Reservation Station that bypass the Memory Order Buffer + 0x02 rs_delayed Counts the number of delayed RS dispatches at the stage latch + 0x04 mob Counts the number of loads dispatched from the Reservation Station to the Memory Order Buffer + 0x07 any Counts all loads dispatched from the Reservation Station +name:arith type:bitmask default:0x01 + 0x01 cycles_div_busy Counts the number of cycles the divider is busy executing divide or square root operations + 0x02 mul Counts the number of multiply operations executed +name:inst_decoded type:mandatory default:0x01 + 0x01 dec0 Counts number of instructions that require decoder 0 to be decoded +name:hw_int type:bitmask default:0x01 + 0x01 rcv Number of interrupt received + 0x02 cycles_masked Number of cycles interrupt are masked + 0x04 cycles_pending_and_masked Number of cycles interrupts are pending and masked +name:l2_rqsts type:bitmask default:0x01 + 0x01 ld_hit Counts number of loads that hit the L2 cache + 0x02 ld_miss Counts the number of loads that miss the L2 cache + 0x03 loads Counts all L2 load requests + 0x04 rfo_hit Counts the number of store RFO requests that hit the L2 cache + 0x08 rfo_miss Counts the number of store RFO requests that miss the L2 cache + 0x0C rfos Counts all L2 store RFO requests + 0x10 ifetch_hit Counts number of instruction fetches that hit the L2 cache + 0x20 ifetch_miss Counts number of instruction fetches that miss the L2 cache + 0x30 ifetches Counts all instruction fetches + 0x40 prefetch_hit Counts L2 prefetch hits for both code and data + 0x80 prefetch_miss Counts L2 prefetch misses for both code and data + 0xC0 prefetches Counts all L2 prefetches for both code and data + 0xAA miss Counts all L2 misses for both code and data + 0xFF references Counts all L2 requests for both code and data +name:l2_data_rqsts type:bitmask default:0xff + 0x01 i_state Counts number of L2 data demand loads where the cache line to be loaded is in the I (invalid) state, i + 0x02 s_state Counts number of L2 data demand loads where the cache line to be loaded is in the S (shared) state + 0x04 e_state Counts number of L2 data demand loads where the cache line to be loaded is in the E (exclusive) state + 0x08 m_state Counts number of L2 data demand loads where the cache line to be loaded is in the M (modified) state + 0x0F mesi Counts all L2 data demand requests + 0x10 i_state Counts number of L2 prefetch data loads where the cache line to be loaded is in the I (invalid) state, i + 0x20 s_state Counts number of L2 prefetch data loads where the cache line to be loaded is in the S (shared) state + 0x40 e_state Counts number of L2 prefetch data loads where the cache line to be loaded is in the E (exclusive) state + 0x80 m_state Counts number of L2 prefetch data loads where the cache line to be loaded is in the M (modified) state + 0xF0 mesi Counts all L2 prefetch requests + 0xFF any Counts all L2 data requests +name:l2_write type:bitmask default:0x01 + 0x01 i_state Counts number of L2 demand store RFO requests where the cache line to be loaded is in the I (invalid) state, i + 0x02 s_state Counts number of L2 store RFO requests where the cache line to be loaded is in the S (shared) state + 0x04 e_state Counts number of L2 store RFO requests where the cache line to be loaded is in the E (exclusive) state + 0x08 m_state Counts number of L2 store RFO requests where the cache line to be loaded is in the M (modified) state + 0x0E hit Counts number of L2 store RFO requests where the cache line to be loaded is in either the S, E or M states + 0x0F mesi Counts all L2 store RFO requests + 0x10 i_state Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the I (invalid) state, i + 0x20 s_state Counts number of L2 lock RFO requests where the cache line to be loaded is in the S (shared) state + 0x40 e_state Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the E (exclusive) state + 0x80 m_state Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the M (modified) state + 0xE0 hit Counts number of L2 demand lock RFO requests where the cache line to be loaded is in either the S, E, or M state + 0xF0 mesi Counts all L2 demand lock RFO requests +name:l1d_wb_l2 type:bitmask default:0x01 + 0x01 i_state Counts number of L1 writebacks to the L2 where the cache line to be written is in the I (invalid) state, i + 0x02 s_state Counts number of L1 writebacks to the L2 where the cache line to be written is in the S state + 0x04 e_state Counts number of L1 writebacks to the L2 where the cache line to be written is in the E (exclusive) state + 0x08 m_state Counts number of L1 writebacks to the L2 where the cache line to be written is in the M (modified) state + 0x0F mesi Counts all L1 writebacks to the L2 +name:longest_lat_cache type:bitmask default:0x4F + 0x4F reference This event counts requests originating from the core that reference a cache line in the last level cache + 0x41 miss This event counts each cache miss condition for references to the last level cache +name:cpu_clk_unhalted type:bitmask default:0x00 + 0x00 thread_p Counts the number of thread cycles while the thread is not in a halt state + 0x01 ref_p Increments at the frequency of TSC when not halted +name:l1d_cache_ld type:bitmask default:0x01 + 0x01 i_state Counts L1 data cache read requests where the cache line to be loaded is in the I (invalid) state, i + 0x02 s_state Counts L1 data cache read requests where the cache line to be loaded is in the S (shared) state + 0x04 e_state Counts L1 data cache read requests where the cache line to be loaded is in the E (exclusive) state + 0x08 m_state Counts L1 data cache read requests where the cache line to be loaded is in the M (modified) state + 0x0F mesi Counts L1 data cache read requests +name:l1d_cache_st type:bitmask default:0x01 + 0x01 i_state Counts L1 data cache store RFO requests where the cache line to be loaded is in the I state + 0x02 s_state Counts L1 data cache store RFO requests where the cache line to be loaded is in the S (shared) state + 0x04 e_state Counts L1 data cache store RFO requests where the cache line to be loaded is in the E (exclusive) state + 0x08 m_state Counts L1 data cache store RFO requests where cache line to be loaded is in the M (modified) state + 0x0F mesi Counts L1 data cache store RFO requests +name:l1d_cache_lock type:bitmask default:0x01 + 0x01 hit Counts retired load locks that hit in the L1 data cache or hit in an already allocated fill buffer + 0x02 s_state Counts L1 data cache retired load locks that hit the target cache line in the shared state + 0x04 e_state Counts L1 data cache retired load locks that hit the target cache line in the exclusive state + 0x08 m_state Counts L1 data cache retired load locks that hit the target cache line in the modified state +name:l1d_all_ref type:bitmask default:0x01 + 0x01 any Counts all references (uncached, speculated and retired) to the L1 data cache, including all loads and stores with any memory types + 0x02 cacheable Counts all data reads and writes (speculated and retired) from cacheable memory, including locked operations +#name:l1d_pend_miss type:mandatory default:0x02 +# 0x02 load_buffers_full Counts cycles of L1 data cache load fill buffers full +name:dtlb_misses type:bitmask default:0x01 + 0x01 any Counts the number of misses in the STLB which causes a page walk + 0x02 walk_completed Counts number of misses in the STLB which resulted in a completed page walk + 0x10 stlb_hit Counts the number of DTLB first level misses that hit in the second level TLB + 0x20 pde_miss Number of DTLB cache misses where the low part of the linear to physical address translation was missed + 0x40 pdp_miss Number of DTLB misses where the high part of the linear to physical address translation was missed + 0x80 large_walk_completed Counts number of completed large page walks due to misses in the STLB +name:sse_mem_exec type:bitmask default:0x01 + 0x01 nta Counts number of SSE NTA prefetch/weakly-ordered instructions which missed the L1 data cache + 0x08 streaming_stores Counts number of SSE nontemporal stores +name:l1d_prefetch type:bitmask default:0x01 + 0x01 requests Counts number of hardware prefetch requests dispatched out of the prefetch FIFO + 0x02 miss Counts number of hardware prefetch requests that miss the L1D + 0x04 triggers Counts number of prefetch requests triggered by the Finite State Machine and pushed into the prefetch FIFO +name:ept type:bitmask default:0x02 + 0x02 epde_miss Counts Extended Page Directory Entry misses + 0x04 epdpe_hit Counts Extended Page Directory Pointer Entry hits + 0x08 epdpe_miss Counts Extended Page Directory Pointer Entry misses +name:l1d type:bitmask default:0x01 + 0x01 repl Counts the number of lines brought into the L1 data cache + 0x02 m_repl Counts the number of modified lines brought into the L1 data cache + 0x04 m_evict Counts the number of modified lines evicted from the L1 data cache due to replacement + 0x08 m_snoop_evict Counts the number of modified lines evicted from the L1 data cache due to snoop HITM intervention +name:offcore_requests_outstanding type:bitmask default:0x01 + 0x01 read_data Counts weighted cycles of offcore demand data read requests + 0x02 read_code Counts weighted cycles of offcore demand code read requests + 0x04 rfo Counts weighted cycles of offcore demand RFO requests + 0x08 read Counts weighted cycles of offcore read requests of any kind +name:cache_lock_cycles type:bitmask default:0x01 + 0x01 l1d_l2 Cycle count during which the L1D and L2 are locked + 0x02 l1d Counts the number of cycles that cacheline in the L1 data cache unit is locked +name:l1i type:bitmask default:0x01 + 0x01 hits Counts all instruction fetches that hit the L1 instruction cache + 0x02 misses Counts all instruction fetches that miss the L1I cache + 0x03 reads Counts all instruction fetches, including uncacheable fetches that bypass the L1I + 0x04 cycles_stalled Cycle counts for which an instruction fetch stalls due to a L1I cache miss, ITLB miss or ITLB fault +name:ifu_ivc type:bitmask default:0x01 + 0x01 full Instruction Fetche unit victim cache full + 0x02 l1i_eviction L1 Instruction cache evictions +name:large_itlb type:mandatory default:0x01 + 0x01 hit Counts number of large ITLB hits +name:itlb_misses type:bitmask default:0x01 + 0x01 any Counts the number of misses in all levels of the ITLB which causes a page walk + 0x02 walk_completed Counts number of misses in all levels of the ITLB which resulted in a completed page walk + 0x04 walk_cycles Counts ITLB miss page walk cycles + 0x04 pmh_busy_cycles Counts PMH busy cycles + 0x10 stlb_hit Counts the number of ITLB misses that hit in the second level TLB + 0x20 pde_miss Number of ITLB misses where the low part of the linear to physical address translation was missed + 0x40 pdp_miss Number of ITLB misses where the high part of the linear to physical address translation was missed + 0x80 large_walk_completed Counts number of completed large page walks due to misses in the STLB +name:ild_stall type:bitmask default:0x0f + 0x01 lcp Cycles Instruction Length Decoder stalls due to length changing prefixes: 66, 67 or REX + 0x02 mru Instruction Length Decoder stall cycles due to Brand Prediction Unit (PBU) Most Recently Used (MRU) bypass + 0x04 iq_full Stall cycles due to a full instruction queue + 0x08 regen Counts the number of regen stalls + 0x0F any Counts any cycles the Instruction Length Decoder is stalled +name:br_inst_exec type:bitmask default:0x7f + 0x01 cond Counts the number of conditional near branch instructions executed, but not necessarily retired + 0x02 direct Counts all unconditional near branch instructions excluding calls and indirect branches + 0x04 indirect_non_call Counts the number of executed indirect near branch instructions that are not calls + 0x07 non_calls Counts all non call near branch instructions executed, but not necessarily retired + 0x08 return_near Counts indirect near branches that have a return mnemonic + 0x10 direct_near_call Counts unconditional near call branch instructions, excluding non call branch, executed + 0x20 indirect_near_call Counts indirect near calls, including both register and memory indirect, executed + 0x30 near_calls Counts all near call branches executed, but not necessarily retired + 0x40 taken Counts taken near branches executed, but not necessarily retired + 0x7F any Counts all near executed branches (not necessarily retired) +name:br_misp_exec type:bitmask default:0x7f + 0x01 cond Counts the number of mispredicted conditional near branch instructions executed, but not necessarily retired + 0x02 direct Counts mispredicted macro unconditional near branch instructions, excluding calls and indirect branches (should always be 0) + 0x04 indirect_non_call Counts the number of executed mispredicted indirect near branch instructions that are not calls + 0x07 non_calls Counts mispredicted non call near branches executed, but not necessarily retired + 0x08 return_near Counts mispredicted indirect branches that have a rear return mnemonic + 0x10 direct_near_call Counts mispredicted non-indirect near calls executed, (should always be 0) + 0x20 indirect_near_call Counts mispredicted indirect near calls exeucted, including both register and memory indirect + 0x30 near_calls Counts all mispredicted near call branches executed, but not necessarily retired + 0x40 taken Counts executed mispredicted near branches that are taken, but not necessarily retired + 0x7F any Counts the number of mispredicted near branch instructions that were executed, but not necessarily retired +name:resource_stalls type:bitmask default:0x01 + 0x01 any Counts the number of Allocator resource related stalls + 0x02 load Counts the cycles of stall due to lack of load buffer for load operation + 0x04 rs_full This event counts the number of cycles when the number of instructions in the pipeline waiting for execution reaches the limit the processor can handle + 0x08 store This event counts the number of cycles that a resource related stall will occur due to the number of store instructions reaching the limit of the pipeline, (i + 0x10 rob_full Counts the cycles of stall due to reorder buffer full + 0x20 fpcw Counts the number of cycles while execution was stalled due to writing the floating-point unit (FPU) control word + 0x40 mxcsr Stalls due to the MXCSR register rename occurring to close to a previous MXCSR rename + 0x80 other Counts the number of cycles while execution was stalled due to other resource issues +name:offcore_requests type:bitmask default:0x80 + 0x01 demand_read_data Counts number of offcore demand data read requests + 0x02 demand_read_code Counts number of offcore demand code read requests + 0x04 demand_rfo Counts number of offcore demand RFO requests + 0x08 any_read Counts number of offcore read requests + 0x10 any_rfo Counts number of offcore RFO requests + 0x20 uncached_mem Counts number of offcore uncached memory requests + 0x40 l1d_writeback Counts number of L1D writebacks to the uncore + 0x80 any Counts all offcore requests +name:uops_executed type:bitmask default:0x3f + 0x01 port0 Counts number of Uops executed that were issued on port 0 + 0x02 port1 Counts number of Uops executed that were issued on port 1 + 0x04 port2_core Counts number of Uops executed that were issued on port 2 + 0x08 port3_core Counts number of Uops executed that were issued on port 3 + 0x10 port4_core Counts number of Uops executed that where issued on port 4 + 0x20 port5 Counts number of Uops executed that where issued on port 5 + 0x40 port015 Counts number of Uops executed that where issued on port 0, 1, or 5 + 0x80 port234 Counts number of Uops executed that where issued on port 2, 3, or 4 +name:snoopq_requests_outstanding type:bitmask default:0x01 + 0x01 data Counts weighted cycles of snoopq requests for data + 0x02 invalidate Counts weighted cycles of snoopq invalidate requests + 0x04 code Counts weighted cycles of snoopq requests for code +name:snoop_response type:bitmask default:0x01 + 0x01 hit Counts HIT snoop response sent by this thread in response to a snoop request + 0x02 hite Counts HIT E snoop response sent by this thread in response to a snoop request + 0x04 hitm Counts HIT M snoop response sent by this thread in response to a snoop request +name:pic_accesses type:bitmask default:0x01 + 0x01 tpr_reads Counts number of TPR reads + 0x02 tpr_writes Counts number of TPR writes +name:inst_retired type:bitmask default:0x01 + 0x01 any_p instructions retired + 0x02 x87 Counts the number of floating point computational operations retired: floating point computational operations executed by the assist handler and sub-operations of complex floating point instructions like transcendental instructions +name:uops_retired type:bitmask default:0x01 + 0x01 any Counts the number of micro-ops retired, (macro-fused=1, micro-fused=2, others=1; maximum count of 8 per cycle) + 0x02 retire_slots Counts the number of retirement slots used each cycle + 0x04 macro_fused Counts number of macro-fused uops retired +name:machine_clears type:bitmask default:0x01 + 0x01 cycles Counts the cycles machine clear is asserted + 0x02 mem_order Counts the number of machine clears due to memory order conflicts + 0x04 smc Counts the number of times that a program writes to a code section + 0x10 fusion_assist Counts the number of macro-fusion assists +name:br_inst_retired type:bitmask default:0x00 + 0x00 all_branches See Table A-1 + 0x01 conditional Counts the number of conditional branch instructions retired + 0x02 near_call Counts the number of direct & indirect near unconditional calls retired + 0x04 all_branches Counts the number of branch instructions retired +name:br_misp_retired type:bitmask default:0x00 + 0x00 all_branches See Table A-1 + 0x02 near_call Counts mispredicted direct & indirect near unconditional retired calls +name:ssex_uops_retired type:bitmask default:0x01 + 0x01 packed_single Counts SIMD packed single-precision floating point Uops retired + 0x02 scalar_single Counts SIMD calar single-precision floating point Uops retired + 0x04 packed_double Counts SIMD packed double-precision floating point Uops retired + 0x08 scalar_double Counts SIMD scalar double-precision floating point Uops retired + 0x10 vector_integer Counts 128-bit SIMD vector integer Uops retired +name:mem_load_retired type:bitmask default:0x01 + 0x01 l1d_hit Counts number of retired loads that hit the L1 data cache + 0x02 l2_hit Counts number of retired loads that hit the L2 data cache + 0x04 llc_unshared_hit Counts number of retired loads that hit their own, unshared lines in the LLC cache + 0x08 other_core_l2_hit_hitm Counts number of retired loads that hit in a sibling core's L2 (on die core) + 0x10 llc_miss Counts number of retired loads that miss the LLC cache + 0x40 hit_lfb Counts number of retired loads that miss the L1D and the address is located in an allocated line fill buffer and will soon be committed to cache + 0x80 dtlb_miss Counts the number of retired loads that missed the DTLB +name:fp_mmx_trans type:bitmask default:0x03 + 0x01 to_fp Counts the first floating-point instruction following any MMX instruction + 0x02 to_mmx Counts the first MMX instruction following a floating-point instruction + 0x03 any Counts all transitions from floating point to MMX instructions and from MMX instructions to floating point instructions +name:macro_insts type:mandatory default:0x01 + 0x01 decoded Counts the number of instructions decoded, (but not necessarily executed or retired) +name:uops_decoded type:bitmask default:0x0e + 0x02 ms Counts the number of Uops decoded by the Microcode Sequencer, MS + 0x04 esp_folding Counts number of stack pointer (ESP) instructions decoded: push , pop , call , ret, etc + 0x08 esp_sync Counts number of stack pointer (ESP) sync operations where an ESP instruction is corrected by adding the ESP offset register to the current value of the ESP register +name:rat_stalls type:bitmask default:0x0f + 0x01 flags Counts the number of cycles during which execution stalled due to several reasons, one of which is a partial flag register stall + 0x02 registers This event counts the number of cycles instruction execution latency became longer than the defined latency because the instruction used a register that was partially written by previous instruction + 0x04 rob_read_port Counts the number of cycles when ROB read port stalls occurred, which did not allow new micro-ops to enter the out-of-order pipeline + 0x08 scoreboard Counts the cycles where we stall due to microarchitecturally required serialization + 0x0F any Counts all Register Allocation Table stall cycles due to: Cycles when ROB read port stalls occurred, which did not allow new micro-ops to enter the execution pipe +name:baclear type:bitmask default:0x01 + 0x01 clear Counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end + 0x02 bad_target Counts number of Branch Address Calculator clears (BACLEAR) asserted due to conditional branch instructions in which there was a target hit but the direction was wrong +name:bpu_clears type:bitmask default:0x03 + 0x01 early Counts early (normal) Branch Prediction Unit clears: BPU predicted a taken branch after incorrectly assuming that it was not taken + 0x02 late Counts late Branch Prediction Unit clears due to Most Recently Used conflicts + 0x03 any Counts all BPU clears +name:l2_transactions type:bitmask default:0x80 + 0x01 load Counts L2 load operations due to HW prefetch or demand loads + 0x02 rfo Counts L2 RFO operations due to HW prefetch or demand RFOs + 0x04 ifetch Counts L2 instruction fetch operations due to HW prefetch or demand ifetch + 0x08 prefetch Counts L2 prefetch operations + 0x10 l1d_wb Counts L1D writeback operations to the L2 + 0x20 fill Counts L2 cache line fill operations due to load, RFO, L1D writeback or prefetch + 0x40 wb Counts L2 writeback operations to the LLC + 0x80 any Counts all L2 cache operations +name:l2_lines_in type:bitmask default:0x07 + 0x02 s_state Counts the number of cache lines allocated in the L2 cache in the S (shared) state + 0x04 e_state Counts the number of cache lines allocated in the L2 cache in the E (exclusive) state + 0x07 any Counts the number of cache lines allocated in the L2 cache +name:l2_lines_out type:bitmask default:0x0f + 0x01 demand_clean Counts L2 clean cache lines evicted by a demand request + 0x02 demand_dirty Counts L2 dirty (modified) cache lines evicted by a demand request + 0x04 prefetch_clean Counts L2 clean cache line evicted by a prefetch request + 0x08 prefetch_dirty Counts L2 modified cache line evicted by a prefetch request + 0x0F any Counts all L2 cache lines evicted for any reason +name:l2_hw_prefetch type:bitmask default:0x01 + 0x01 hit Count L2 HW prefetcher detector hits + 0x02 alloc Count L2 HW prefetcher allocations + 0x04 data_trigger Count L2 HW data prefetcher triggered + 0x08 code_trigger Count L2 HW code prefetcher triggered + 0x10 dca_trigger Count L2 HW DCA prefetcher triggered + 0x20 kick_start Count L2 HW prefetcher kick started +name:sq_misc type:bitmask default:0x01 + 0x01 promotion Counts the number of L2 secondary misses that hit the Super Queue + 0x02 promotion_post_go Counts the number of L2 secondary misses during the Super Queue filling L2 + 0x04 lru_hints Counts number of Super Queue LRU hints sent to L3 + 0x08 fill_dropped Counts the number of SQ L2 fills dropped due to L2 busy + 0x10 split_lock Counts the number of SQ lock splits across a cache line +name:fp_assist type:bitmask default:0x01 + 0x01 all Counts the number of floating point operations executed that required micro-code assist intervention + 0x02 output Counts number of floating point micro-code assist when the output value (destination register) is invalid + 0x04 input Counts number of floating point micro-code assist when the input value (one of the source operands to an FP instruction) is invalid +name:simd_int_64 type:bitmask default:0x01 + 0x01 packed_mpy Counts number of SID integer 64 bit packed multiply operations + 0x02 packed_shift Counts number of SID integer 64 bit packed shift operations + 0x04 pack Counts number of SID integer 64 bit pack operations + 0x08 unpack Counts number of SID integer 64 bit unpack operations + 0x10 packed_logical Counts number of SID integer 64 bit logical operations + 0x20 packed_arith Counts number of SID integer 64 bit arithmetic operations + 0x40 shuffle_move Counts number of SID integer 64 bit shift or move operations +name:x20 type:mandatory default:0x20 + 0x20 No unit mask |
From: Suravee S. <sur...@am...> - 2009-04-29 19:23:16
|
Andi Kleen wrote: > > These are just the event files for the core, not including uncore. > > Will be used by later patches. > > Signed-off-by: Andi Kleen <ak...@li...> > > --- > events/i386/nehalem/events | 104 +++++++++++ > events/i386/nehalem/unit_masks | 368 > +++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 472 insertions(+) > > Index: oprofile/events/i386/nehalem/events > =================================================================== > --- /dev/null 1970-01-01 00:00:00.000000000 +0000 > +++ oprofile/events/i386/nehalem/events 2009-04-27 12:27:11.000000000 +0200 > @@ -0,0 +1,104 @@ > +# > +# Intel Nehalem (Core i7 etc.) core events > +# the uncore (memory controller/QPI) events are in separate files because > +# they vary between implementations (right now they are not implemented > +# in oprofile) > +# > +# architectural perfmon events > +event:0x3c counters:cpuid um:zero minimum:6000 filter:0 > name:CPU_CLK_UNHALTED : Clock cycles when not halted [Suravee] Just checking if you are missing the UNHALTED_REFERENCE_CYCLES here? > +event:0x2e counters:cpuid um:x41 minimum:6000 filter:5 name:LLC_MISSES > : Last level cache demand requests from this core that missed the LLC > +event:0x2e counters:cpuid um:x4f minimum:6000 filter:4 name:LLC_REFS : > Last level cache demand requests from this core > +event:0xc0 counters:0,1,2,3 um:inst_retired minimum:6000 > name:INST_RETIRED : number of instructions retired > +event:0xc4 counters:0,1,2,3 um:br_inst_retired minimum:500 > name:BR_INST_RETIRED : number of branch instructions retired > +event:0xc5 counters:0,1,2,3 um:br_misp_retired minimum:500 > name:BR_MISS_PRED_RETIRED : number of mispredicted branches retired > (precise) > +# |
From: Andi K. <an...@fi...> - 2009-04-29 19:35:29
|
> >+event:0x3c counters:cpuid um:zero minimum:6000 filter:0 > >name:CPU_CLK_UNHALTED : Clock cycles when not halted > > [Suravee] Just checking if you are missing the UNHALTED_REFERENCE_CYCLES > here? Thanks for paying attention. Nehalem is missing that event, see also the workaround for that in another patch for the arch perfmon case. -Andi -- ak...@li... -- Speaking for myself only. |
From: Suravee S. <sur...@am...> - 2009-04-29 20:02:00
|
Andi Kleen wrote: >>> +event:0x3c counters:cpuid um:zero minimum:6000 filter:0 >>> name:CPU_CLK_UNHALTED : Clock cycles when not halted >> [Suravee] Just checking if you are missing the UNHALTED_REFERENCE_CYCLES >> here? > > Thanks for paying attention. Nehalem is missing that event, see also > the workaround for that in another patch for the arch perfmon case. > > -Andi OK, I just saw the work around right before you reply :D. In stead of omitting this event here, what about the using filter tags? You should be able to depend on the work around to check the availability of this events based on stepping of the CPU. Or am I missing something? Suravee |