Thread: [perfmon2] [PATCH 1/8] libpfm3: Updates for AMD Families 10h and 15h
Status: Beta
Brought to you by:
seranian
From: Robert R. <rob...@am...> - 2011-05-10 13:45:58
|
This patch series contains updates for AMD Families 10h and 15h cpus. I already sent some of them to the list. This patch series contains all pending patches not yet in the libpfm3 repository. -Robert Following changes since: * add -L option for compact encoding+event:umask output (2010-10-07 12:35:26 +0000) Robert Richter (8): libpfm3: Add support for AMD Family 10h RevE cpus libpfm3: modify AMD family check macros libpfm3: standard format amd64_events_fam10h.h from code generator libpfm3: Add support for AMD Family 15h cpus libpfm3: Add support for up to 6 counters (AMD Family 15h cpus) libpfm3: Don't report unsupported AMD family 15h northbridge events libpfm3: Fix family check for AMD architectural PMU libpfm3: Event table update for AMD Family 15h cpus include/perfmon/pfmlib.h | 2 +- include/perfmon/pfmlib_amd64.h | 8 +- lib/amd64_events.h | 8 + lib/amd64_events_fam10h.h | 99 +- lib/amd64_events_fam15h.h | 2418 ++++++++++++++++++++++++++++++++++++++++ lib/pfmlib_amd64.c | 77 +- lib/pfmlib_amd64_priv.h | 47 +- 7 files changed, 2551 insertions(+), 108 deletions(-) create mode 100644 lib/amd64_events_fam15h.h |
From: Robert R. <rob...@am...> - 2011-05-10 13:45:55
|
Mostly whitespace changes. Signed-off-by: Robert Richter <rob...@am...> --- lib/amd64_events_fam10h.h | 99 ++++++++++++++++++--------------------------- 1 files changed, 40 insertions(+), 59 deletions(-) diff --git a/lib/amd64_events_fam10h.h b/lib/amd64_events_fam10h.h index 7cefb24..1d333fe 100644 --- a/lib/amd64_events_fam10h.h +++ b/lib/amd64_events_fam10h.h @@ -2008,7 +2008,7 @@ static pme_amd64_entry_t amd64_fam10h_pe[]={ /* 110 */{.pme_name = "READ_REQUEST_TO_L3_CACHE", .pme_code = 0x4E0, .pme_desc = "Read Request to L3 Cache", - .pme_flags = PFMLIB_AMD64_UMASK_COMBO | PFMLIB_AMD64_TILL_FAM10H_REV_C, + .pme_flags = PFMLIB_AMD64_UMASK_COMBO|PFMLIB_AMD64_TILL_FAM10H_REV_C, .pme_numasks = 5, .pme_umasks = { { .pme_uname = "READ_BLOCK_EXCLUSIVE", @@ -2060,7 +2060,7 @@ static pme_amd64_entry_t amd64_fam10h_pe[]={ /* 111 */{.pme_name = "L3_CACHE_MISSES", .pme_code = 0x4E1, .pme_desc = "L3 Cache Misses", - .pme_flags = PFMLIB_AMD64_UMASK_COMBO | PFMLIB_AMD64_TILL_FAM10H_REV_C, + .pme_flags = PFMLIB_AMD64_UMASK_COMBO|PFMLIB_AMD64_TILL_FAM10H_REV_C, .pme_numasks = 5, .pme_umasks = { { .pme_uname = "READ_BLOCK_EXCLUSIVE", @@ -2112,7 +2112,7 @@ static pme_amd64_entry_t amd64_fam10h_pe[]={ /* 112 */{.pme_name = "L3_FILLS_CAUSED_BY_L2_EVICTIONS", .pme_code = 0x4E2, .pme_desc = "L3 Fills caused by L2 Evictions", - .pme_flags = PFMLIB_AMD64_UMASK_COMBO | PFMLIB_AMD64_TILL_FAM10H_REV_C, + .pme_flags = PFMLIB_AMD64_UMASK_COMBO|PFMLIB_AMD64_TILL_FAM10H_REV_C, .pme_numasks = 6, .pme_umasks = { { .pme_uname = "SHARED", @@ -2133,7 +2133,7 @@ static pme_amd64_entry_t amd64_fam10h_pe[]={ }, { .pme_uname = "ANY_STATE", .pme_udesc = "any line state (shared, owned, exclusive, modified)", - .pme_ucode = 0x0f, + .pme_ucode = 0x0F, }, #if 0 /* @@ -2199,7 +2199,7 @@ static pme_amd64_entry_t amd64_fam10h_pe[]={ /* 114 */{.pme_name = "PAGE_SIZE_MISMATCHES", .pme_code = 0x165, .pme_desc = "Page Size Mismatches", - .pme_flags = PFMLIB_AMD64_UMASK_COMBO | PFMLIB_AMD64_FAM10H_REV_C, + .pme_flags = PFMLIB_AMD64_UMASK_COMBO|PFMLIB_AMD64_FAM10H_REV_C, .pme_numasks = 4, .pme_umasks = { { .pme_uname = "GUEST_LARGER", @@ -2223,7 +2223,7 @@ static pme_amd64_entry_t amd64_fam10h_pe[]={ /* 115 */{.pme_name = "RETIRED_X87_OPS", .pme_code = 0x1C0, .pme_desc = "Retired x87 Floating Point Operations", - .pme_flags = PFMLIB_AMD64_UMASK_COMBO | PFMLIB_AMD64_FAM10H_REV_C, + .pme_flags = PFMLIB_AMD64_UMASK_COMBO|PFMLIB_AMD64_FAM10H_REV_C, .pme_numasks = 4, .pme_umasks = { { .pme_uname = "ADD_SUB_OPS", @@ -2270,7 +2270,7 @@ static pme_amd64_entry_t amd64_fam10h_pe[]={ /* 120 */{.pme_name = "READ_REQUEST_TO_L3_CACHE", .pme_code = 0x4E0, .pme_desc = "Read Request to L3 Cache", - .pme_flags = PFMLIB_AMD64_UMASK_COMBO | PFMLIB_AMD64_FAM10H_REV_D, + .pme_flags = PFMLIB_AMD64_UMASK_COMBO|PFMLIB_AMD64_FAM10H_REV_D, .pme_numasks = 5, .pme_umasks = { { .pme_uname = "READ_BLOCK_EXCLUSIVE", @@ -2335,7 +2335,7 @@ static pme_amd64_entry_t amd64_fam10h_pe[]={ /* 121 */{.pme_name = "L3_CACHE_MISSES", .pme_code = 0x4E1, .pme_desc = "L3 Cache Misses", - .pme_flags = PFMLIB_AMD64_UMASK_COMBO | PFMLIB_AMD64_FAM10H_REV_D, + .pme_flags = PFMLIB_AMD64_UMASK_COMBO|PFMLIB_AMD64_FAM10H_REV_D, .pme_numasks = 5, .pme_umasks = { { .pme_uname = "READ_BLOCK_EXCLUSIVE", @@ -2400,7 +2400,7 @@ static pme_amd64_entry_t amd64_fam10h_pe[]={ /* 122 */{.pme_name = "L3_FILLS_CAUSED_BY_L2_EVICTIONS", .pme_code = 0x4E2, .pme_desc = "L3 Fills caused by L2 Evictions", - .pme_flags = PFMLIB_AMD64_UMASK_COMBO | PFMLIB_AMD64_FAM10H_REV_D, + .pme_flags = PFMLIB_AMD64_UMASK_COMBO|PFMLIB_AMD64_FAM10H_REV_D, .pme_numasks = 6, .pme_umasks = { { .pme_uname = "SHARED", @@ -2421,7 +2421,7 @@ static pme_amd64_entry_t amd64_fam10h_pe[]={ }, { .pme_uname = "ANY_STATE", .pme_udesc = "any line state (shared, owned, exclusive, modified)", - .pme_ucode = 0x0f, + .pme_ucode = 0x0F, }, #if 0 /* @@ -2467,36 +2467,37 @@ static pme_amd64_entry_t amd64_fam10h_pe[]={ }, }, /* 123 */{.pme_name = "IBSOP_EVENT", - .pme_code = 0xFF, - .pme_desc = "Enable IBS OP mode (pseudo event)", - .pme_flags = 0, - .pme_numasks = 2, - .pme_umasks = { - { .pme_uname = "CYCLES", - .pme_udesc = "sample cycles", - .pme_ucode = 0x01, - }, - { .pme_uname = "UOPS", - .pme_udesc = "sample dispatched uops (Rev C and later)", - .pme_ucode = 0x02, - }, - }, - }, + .pme_code = 0xFF, + .pme_desc = "Enable IBS OP mode (pseudo event)", + .pme_flags = 0, + .pme_numasks = 2, + .pme_umasks = { + { .pme_uname = "CYCLES", + .pme_udesc = "sample cycles", + .pme_ucode = 0x01, + }, + { .pme_uname = "UOPS", + .pme_udesc = "sample dispatched uops (Rev C and later)", + .pme_ucode = 0x02, + }, + }, + }, /* 124 */{.pme_name = "IBSFETCH_EVENT", - .pme_code = 0xFF, - .pme_desc = "Enable IBS Fetch mode (pseudo event)", - .pme_flags = 0, - .pme_numasks = 2, - .pme_umasks = { - { .pme_uname = "RANDOM", - .pme_udesc = "randomize period", - .pme_ucode = 0x01, - }, - { .pme_uname = "NO_RANDOM", - .pme_udesc = "do not randomize period", - } - }, - }, + .pme_code = 0xFF, + .pme_desc = "Enable IBS Fetch mode (pseudo event)", + .pme_flags = 0, + .pme_numasks = 2, + .pme_umasks = { + { .pme_uname = "RANDOM", + .pme_udesc = "randomize period", + .pme_ucode = 0x01, + }, + { .pme_uname = "NO_RANDOM", + .pme_udesc = "do not randomize period", + .pme_ucode = 0x00, + }, + }, + }, /* 125 */{.pme_name = "MAB_REQUESTS", .pme_code = 0x68, .pme_desc = "Average L1 refill latency for Icache and Dcache misses (request count for cache refills)", @@ -2505,52 +2506,42 @@ static pme_amd64_entry_t amd64_fam10h_pe[]={ { .pme_uname = "BUFFER_0", .pme_udesc = "Buffer 0", .pme_ucode = 0x00, - .pme_uflags = 0, }, { .pme_uname = "BUFFER_1", .pme_udesc = "Buffer 1", .pme_ucode = 0x01, - .pme_uflags = 0, }, { .pme_uname = "BUFFER_2", .pme_udesc = "Buffer 2", .pme_ucode = 0x02, - .pme_uflags = 0, }, { .pme_uname = "BUFFER_3", .pme_udesc = "Buffer 3", .pme_ucode = 0x03, - .pme_uflags = 0, }, { .pme_uname = "BUFFER_4", .pme_udesc = "Buffer 4", .pme_ucode = 0x04, - .pme_uflags = 0, }, { .pme_uname = "BUFFER_5", .pme_udesc = "Buffer 5", .pme_ucode = 0x05, - .pme_uflags = 0, }, { .pme_uname = "BUFFER_6", .pme_udesc = "Buffer 6", .pme_ucode = 0x06, - .pme_uflags = 0, }, { .pme_uname = "BUFFER_7", .pme_udesc = "Buffer 7", .pme_ucode = 0x07, - .pme_uflags = 0, }, { .pme_uname = "BUFFER_8", .pme_udesc = "Buffer 8", .pme_ucode = 0x08, - .pme_uflags = 0, }, { .pme_uname = "BUFFER_9", .pme_udesc = "Buffer 9", .pme_ucode = 0x09, - .pme_uflags = 0, }, }, }, @@ -2562,52 +2553,42 @@ static pme_amd64_entry_t amd64_fam10h_pe[]={ { .pme_uname = "BUFFER_0", .pme_udesc = "Buffer 0", .pme_ucode = 0x00, - .pme_uflags = 0, }, { .pme_uname = "BUFFER_1", .pme_udesc = "Buffer 1", .pme_ucode = 0x01, - .pme_uflags = 0, }, { .pme_uname = "BUFFER_2", .pme_udesc = "Buffer 2", .pme_ucode = 0x02, - .pme_uflags = 0, }, { .pme_uname = "BUFFER_3", .pme_udesc = "Buffer 3", .pme_ucode = 0x03, - .pme_uflags = 0, }, { .pme_uname = "BUFFER_4", .pme_udesc = "Buffer 4", .pme_ucode = 0x04, - .pme_uflags = 0, }, { .pme_uname = "BUFFER_5", .pme_udesc = "Buffer 5", .pme_ucode = 0x05, - .pme_uflags = 0, }, { .pme_uname = "BUFFER_6", .pme_udesc = "Buffer 6", .pme_ucode = 0x06, - .pme_uflags = 0, }, { .pme_uname = "BUFFER_7", .pme_udesc = "Buffer 7", .pme_ucode = 0x07, - .pme_uflags = 0, }, { .pme_uname = "BUFFER_8", .pme_udesc = "Buffer 8", .pme_ucode = 0x08, - .pme_uflags = 0, }, { .pme_uname = "BUFFER_9", .pme_udesc = "Buffer 9", .pme_ucode = 0x09, - .pme_uflags = 0, }, }, }, -- 1.7.3.4 |
From: Robert R. <rob...@am...> - 2011-05-10 13:45:56
|
This patch adds support for AMD Family 15h cpus (Orochi). This is a new micro-architecture with an own table. However, events compared to family 10h are similar. Signed-off-by: Robert Richter <rob...@am...> --- include/perfmon/pfmlib.h | 2 +- lib/amd64_events.h | 8 + lib/amd64_events_fam15h.h | 2248 +++++++++++++++++++++++++++++++++++++++++++++ lib/pfmlib_amd64.c | 11 + lib/pfmlib_amd64_priv.h | 12 +- 5 files changed, 2278 insertions(+), 3 deletions(-) create mode 100644 lib/amd64_events_fam15h.h diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h index 7da3bfd..c75992f 100644 --- a/include/perfmon/pfmlib.h +++ b/include/perfmon/pfmlib.h @@ -224,7 +224,7 @@ extern pfm_err_t pfm_get_inst_retired_event(pfmlib_event_t *e); #define PFMLIB_ITANIUM_PMU 2 /* Intel Itanium */ #define PFMLIB_ITANIUM2_PMU 3 /* Intel Itanium 2 */ #define PFMLIB_MONTECITO_PMU 4 /* Intel Dual-Core Itanium 2 9000 */ -#define PFMLIB_AMD64_PMU 16 /* AMD AMD64 (K7, K8, Fam 10h) */ +#define PFMLIB_AMD64_PMU 16 /* AMD AMD64 (K7, K8, Families 10h, 15h) */ #define PFMLIB_GEN_IA32_PMU 63 /* Intel architectural PMU for X86 */ #define PFMLIB_I386_P6_PMU 32 /* Intel PIII (P6 core) */ #define PFMLIB_PENTIUM4_PMU 33 /* Intel Pentium4/Xeon/EM64T */ diff --git a/lib/amd64_events.h b/lib/amd64_events.h index 02e9444..a0c8b02 100644 --- a/lib/amd64_events.h +++ b/lib/amd64_events.h @@ -27,6 +27,7 @@ #include "amd64_events_k7.h" #include "amd64_events_k8.h" #include "amd64_events_fam10h.h" +#include "amd64_events_fam15h.h" struct pme_amd64_table { unsigned int num; @@ -55,3 +56,10 @@ static struct pme_amd64_table amd64_fam10h_table = { .cpu_clks = PME_AMD64_FAM10H_CPU_CLK_UNHALTED, .ret_inst = PME_AMD64_FAM10H_RETIRED_INSTRUCTIONS, }; + +static struct pme_amd64_table amd64_fam15h_table = { + .num = PME_AMD64_FAM15H_EVENT_COUNT, + .events = amd64_fam15h_pe, + .cpu_clks = PME_AMD64_FAM15H_CPU_CLK_UNHALTED, + .ret_inst = PME_AMD64_FAM15H_RETIRED_INSTRUCTIONS, +}; diff --git a/lib/amd64_events_fam15h.h b/lib/amd64_events_fam15h.h new file mode 100644 index 0000000..20c7140 --- /dev/null +++ b/lib/amd64_events_fam15h.h @@ -0,0 +1,2248 @@ +/* + * Copyright (c) 2010 Advanced Micro Devices, Inc. + * Contributed by Robert Richter <rob...@am...> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ + +/* History + * + * Dec 09 2010 -- Robert Richter, rob...@am...: + * + * Family 15h Microarchitecture performance monitor events + * + * Source: BIOS and Kernel Developer's Guide for the AMD Family 15h + * Processors, Rev 0.90, May 18, 2010 + */ + +static pme_amd64_entry_t amd64_fam15h_pe[]={ + +/* Family 15h */ + +/* 0 */{.pme_name = "DISPATCHED_FPU_OPS", + .pme_code = 0x00, + .pme_desc = "FPU Pipe Assignment", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 9, + .pme_umasks = { + { .pme_uname = "OPS_PIPE0", + .pme_udesc = "Total number uops assigned to Pipe 0", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "OPS_PIPE1", + .pme_udesc = "Total number uops assigned to Pipe 1", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "OPS_PIPE2", + .pme_udesc = "Total number uops assigned to Pipe 2", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "OPS_PIPE3", + .pme_udesc = "Total number uops assigned to Pipe 3", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "OPS_DUAL_PIPE0", + .pme_udesc = "Total number dual-pipe uops assigned to Pipe 0", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "OPS_DUAL_PIPE1", + .pme_udesc = "Total number dual-pipe uops assigned to Pipe 1", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "OPS_DUAL_PIPE2", + .pme_udesc = "Total number dual-pipe uops assigned to Pipe 2", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "OPS_DUAL_PIPE3", + .pme_udesc = "Total number dual-pipe uops assigned to Pipe 3", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xFF, + }, + }, + }, +/* 1 */{.pme_name = "CYCLES_FPU_EMPTY", + .pme_code = 0x01, + .pme_desc = "FP Scheduler Empty", + }, +/* 2 */{.pme_name = "RETIRED_SSE_OPS", + .pme_code = 0x03, + .pme_desc = "Retired SSE/BNI Ops", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 9, + .pme_umasks = { + { .pme_uname = "SINGLE_ADD_SUB_OPS", + .pme_udesc = "Single-precision add/subtract FLOPS", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "SINGLE_MUL_OPS", + .pme_udesc = "Single-precision multiply FLOPS", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "SINGLE_DIV_OPS", + .pme_udesc = "Single-precision divide/square root FLOPS", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "SINGLE_MUL_ADD_OPS", + .pme_udesc = "Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "DOUBLE_ADD_SUB_OPS", + .pme_udesc = "Double precision add/subtract FLOPS", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "DOUBLE_MUL_OPS", + .pme_udesc = "Double precision multiply FLOPS", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "DOUBLE_DIV_OPS", + .pme_udesc = "Double precision divide/square root FLOPS", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "DOUBLE_MUL_ADD_OPS", + .pme_udesc = "Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xFF, + }, + }, + }, +/* 3 */{.pme_name = "MOVE_SCALAR_OPTIMIZATION", + .pme_code = 0x04, + .pme_desc = "Number of Move Elimination and Scalar Op Optimization", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 5, + .pme_umasks = { + { .pme_uname = "SSE_MOVE_OPS", + .pme_udesc = "Number of SSE Move Ops", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "SSE_MOVE_OPS_ELIM", + .pme_udesc = "Number of SSE Move Ops eliminated", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "OPT_CAND", + .pme_udesc = "Number of Ops that are candidates for optimization (Z-bit set or pass)", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "SCALAR_OPS_OPTIMIZED", + .pme_udesc = "Number of Scalar ops optimized", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x0F, + }, + }, + }, +/* 4 */{.pme_name = "RETIRED_SERIALIZING_OPS", + .pme_code = 0x05, + .pme_desc = "Retired Serializing Ops", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 5, + .pme_umasks = { + { .pme_uname = "SSE_RETIRED", + .pme_udesc = "SSE bottom-executing uops retired", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "SSE_MISPREDICTED", + .pme_udesc = "SSE control word mispredict traps due to mispredictions", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "X87_RETIRED", + .pme_udesc = "x87 bottom-executing uops retired", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "X87_MISPREDICTED", + .pme_udesc = "x87 control word mispredict traps due to mispredictions", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x0F, + }, + }, + }, +/* 5 */{.pme_name = "BOTTOM_EXECUTE_OP", + .pme_code = 0x06, + .pme_desc = "Number of Cycles that a Bottom-Execute uop is in the FP Scheduler", + }, +/* 6 */{.pme_name = "SEGMENT_REGISTER_LOADS", + .pme_code = 0x20, + .pme_desc = "Segment Register Loads", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 8, + .pme_umasks = { + { .pme_uname = "ES", + .pme_udesc = "ES", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "CS", + .pme_udesc = "CS", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "SS", + .pme_udesc = "SS", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "DS", + .pme_udesc = "DS", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "FS", + .pme_udesc = "FS", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "GS", + .pme_udesc = "GS", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "HS", + .pme_udesc = "HS", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x7F, + }, + }, + }, +/* 7 */{.pme_name = "PIPELINE_RESTART_DUE_TO_SELF_MODIFYING_CODE", + .pme_code = 0x21, + .pme_desc = "Pipeline Restart Due to Self-Modifying Code", + }, +/* 8 */{.pme_name = "PIPELINE_RESTART_DUE_TO_PROBE_HIT", + .pme_code = 0x22, + .pme_desc = "Pipeline Restart Due to Probe Hit", + }, +/* 9 */{.pme_name = "LOAD_Q_STORE_Q_FULL", + .pme_code = 0x23, + .pme_desc = "Load Queue/Store Queue Full", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 3, + .pme_umasks = { + { .pme_uname = "LOAD_QUEUE", + .pme_udesc = "The number of cycles that the load buffer is full", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "STORE_QUEUE", + .pme_udesc = "The number of cycles that the store buffer is full", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x03, + }, + }, + }, +/* 10 */{.pme_name = "LOCKED_OPS", + .pme_code = 0x24, + .pme_desc = "Locked Operations", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 4, + .pme_umasks = { + { .pme_uname = "EXECUTED", + .pme_udesc = "Number of locked instructions executed", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "CYCLES_NON_SPECULATIVE_PHASE", + .pme_udesc = "Number of cycles spent non-speculative phase (including cache miss penalty)", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "CYCLES_WAITING", + .pme_udesc = "Number of cycles waiting for a cache hit (cache miss penalty)", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x0D, + }, + }, + }, +/* 11 */{.pme_name = "RETIRED_CLFLUSH_INSTRUCTIONS", + .pme_code = 0x26, + .pme_desc = "Retired CLFLUSH Instructions", + }, +/* 12 */{.pme_name = "RETIRED_CPUID_INSTRUCTIONS", + .pme_code = 0x27, + .pme_desc = "Retired CPUID Instructions", + }, +/* 13 */{.pme_name = "CANCELLED_STORE_TO_LOAD", + .pme_code = 0x2A, + .pme_desc = "Canceled Store to Load Forward Operations", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 2, + .pme_umasks = { + { .pme_uname = "SIZE_ADDRESS_MISMATCHES", + .pme_udesc = "Store is smaller than load or different starting byte but partial overlap", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x01, + }, + }, + }, +/* 14 */{.pme_name = "SMIS_RECEIVED", + .pme_code = 0x2B, + .pme_desc = "SMIs Received", + }, +/* 15 */{.pme_name = "DATA_CACHE_ACCESSES", + .pme_code = 0x40, + .pme_desc = "Data Cache Accesses", + }, +/* 16 */{.pme_name = "DATA_CACHE_MISSES", + .pme_code = 0x41, + .pme_desc = "Data Cache Misses", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 3, + .pme_umasks = { + { .pme_uname = "DC_MISS_STREAMING_STORE", + .pme_udesc = "First data cache miss or streaming store to a 64B cache line", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "STREAMING_STORE", + .pme_udesc = "First streaming store to a 64B cache line", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x03, + }, + }, + }, +/* 17 */{.pme_name = "DATA_CACHE_REFILLS_FROM_L2_OR_NORTHBRIDGE", + .pme_code = 0x42, + .pme_desc = "Data Cache Refills from L2 or System", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 5, + .pme_umasks = { + { .pme_uname = "GOOD", + .pme_udesc = "Fill with good data. (Final valid status is valid)", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "INVALID", + .pme_udesc = "Early valid status turned out to be invalid", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "POISON", + .pme_udesc = "Fill with poison data", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "READ_ERROR", + .pme_udesc = "Fill with read data error", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x0F, + }, + }, + }, +/* 18 */{.pme_name = "DATA_CACHE_REFILLS_FROM_NORTHBRIDGE", + .pme_code = 0x43, + .pme_desc = "Data Cache Refills from System", + }, +/* 19 */{.pme_name = "UNIFIED_TLB_HIT", + .pme_code = 0x45, + .pme_desc = "Unified TLB Hit", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 7, + .pme_umasks = { + { .pme_uname = "4K_DATA", + .pme_udesc = "4 KB unified TLB hit for data", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "2M_DATA", + .pme_udesc = "2 MB unified TLB hit for data", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "1G_DATA", + .pme_udesc = "1 GB unified TLB hit for data", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "4K_INST", + .pme_udesc = "4 KB unified TLB hit for instruction", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "2M_INST", + .pme_udesc = "2 MB unified TLB hit for instruction", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "1G_INST", + .pme_udesc = "1 GB unified TLB hit for instruction", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x77, + }, + }, + }, +/* 20 */{.pme_name = "UNIFIED_TLB_MISS", + .pme_code = 0x46, + .pme_desc = "Unified TLB Miss", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 7, + .pme_umasks = { + { .pme_uname = "4K_DATA", + .pme_udesc = "4 KB unified TLB miss for data", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "2M_DATA", + .pme_udesc = "2 MB unified TLB miss for data", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "1GB_DATA", + .pme_udesc = "1 GB unified TLB miss for data", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "4K_INST", + .pme_udesc = "4 KB unified TLB miss for instruction", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "2M_INST", + .pme_udesc = "2 MB unified TLB miss for instruction", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "1G_INST", + .pme_udesc = "1 GB unified TLB miss for instruction", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x77, + }, + }, + }, +/* 21 */{.pme_name = "MISALIGNED_ACCESSES", + .pme_code = 0x47, + .pme_desc = "Misaligned Accesses", + }, +/* 22 */{.pme_name = "PREFETCH_INSTRUCTIONS_DISPATCHED", + .pme_code = 0x4B, + .pme_desc = "Prefetch Instructions Dispatched", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 4, + .pme_umasks = { + { .pme_uname = "LOAD", + .pme_udesc = "Load (Prefetch, PrefetchT0/T1/T2)", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "STORE", + .pme_udesc = "Store (PrefetchW)", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "NTA", + .pme_udesc = "NTA (PrefetchNTA)", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x07, + }, + }, + }, +/* 23 */{.pme_name = "INEFFECTIVE_SW_PREFETCHES", + .pme_code = 0x52, + .pme_desc = "Ineffective Software Prefetches", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 3, + .pme_umasks = { + { .pme_uname = "SW_PREFETCH_HIT_IN_L1", + .pme_udesc = "Software prefetch hit in the L1.", + .pme_ucode = 0x01, + }, + { .pme_uname = "SW_PREFETCH_HIT_IN_L2", + .pme_udesc = "Software prefetch hit in L2.", + .pme_ucode = 0x08, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x09, + }, + }, + }, +/* 24 */{.pme_name = "MEMORY_REQUESTS", + .pme_code = 0x65, + .pme_desc = "Memory Requests by Type", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 4, + .pme_umasks = { + { .pme_uname = "NON_CACHEABLE", + .pme_udesc = "Requests to non-cacheable (UC) memory", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "WRITE_COMBINING", + .pme_udesc = "Requests to non-cacheable (WC, but not WC+/SS) memory", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "STREAMING_STORE", + .pme_udesc = "Requests to non-cacheable (WC+/SS, but not WC) memory", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x83, + }, + }, + }, +/* 25 */{.pme_name = "DATA_PREFETCHER", + .pme_code = 0x67, + .pme_desc = "Data Prefetcher", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 2, + .pme_umasks = { + { .pme_uname = "ATTEMPTED", + .pme_udesc = "Prefetch attempts", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x02, + }, + }, + }, +/* 26 */{.pme_name = "MAB_REQS", + .pme_code = 0x68, + .pme_desc = "MAB Requests", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 9, + .pme_umasks = { + { .pme_uname = "BUFFER_BIT_0", + .pme_udesc = "Buffer entry index bit 0", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "BUFFER_BIT_1", + .pme_udesc = "Buffer entry index bit 1", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "BUFFER_BIT_2", + .pme_udesc = "Buffer entry index bit 2", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "BUFFER_BIT_3", + .pme_udesc = "Buffer entry index bit 3", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "BUFFER_BIT_4", + .pme_udesc = "Buffer entry index bit 4", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "BUFFER_BIT_5", + .pme_udesc = "Buffer entry index bit 5", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "BUFFER_BIT_6", + .pme_udesc = "Buffer entry index bit 6", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "BUFFER_BIT_7", + .pme_udesc = "Buffer entry index bit 7", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xFF, + }, + }, + }, +/* 27 */{.pme_name = "MAB_WAIT", + .pme_code = 0x69, + .pme_desc = "MAB Wait Cycles", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 9, + .pme_umasks = { + { .pme_uname = "BUFFER_BIT_0", + .pme_udesc = "Buffer entry index bit 0", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "BUFFER_BIT_1", + .pme_udesc = "Buffer entry index bit 1", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "BUFFER_BIT_2", + .pme_udesc = "Buffer entry index bit 2", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "BUFFER_BIT_3", + .pme_udesc = "Buffer entry index bit 3", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "BUFFER_BIT_4", + .pme_udesc = "Buffer entry index bit 4", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "BUFFER_BIT_5", + .pme_udesc = "Buffer entry index bit 5", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "BUFFER_BIT_6", + .pme_udesc = "Buffer entry index bit 6", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "BUFFER_BIT_7", + .pme_udesc = "Buffer entry index bit 7", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xFF, + }, + }, + }, +/* 28 */{.pme_name = "SYSTEM_READ_RESPONSES", + .pme_code = 0x6C, + .pme_desc = "Response From System on Cache Refills", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 7, + .pme_umasks = { + { .pme_uname = "EXCLUSIVE", + .pme_udesc = "Exclusive", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "MODIFIED", + .pme_udesc = "Modified (D18F0x68[ATMModeEn]==0), Modified written (D18F0x68[ATMModeEn]==1)", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "SHARED", + .pme_udesc = "Shared", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "OWNED", + .pme_udesc = "Owned", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "DATA_ERROR", + .pme_udesc = "Data Error", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "MODIFIED_UNWRITTEN", + .pme_udesc = "Modified unwritten", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x3F, + }, + }, + }, +/* 29 */{.pme_name = "OCTWORD_WRITE_TRANSFERS", + .pme_code = 0x6D, + .pme_desc = "Octwords Written to System", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 2, + .pme_umasks = { + { .pme_uname = "OCTWORD_WRITE_TRANSFER", + .pme_udesc = "OW write transfer", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x01, + }, + }, + }, +/* 30 */{.pme_name = "CPU_CLK_UNHALTED", + .pme_code = 0x76, + .pme_desc = "CPU Clocks not Halted", + }, +/* 31 */{.pme_name = "REQUESTS_TO_L2", + .pme_code = 0x7D, + .pme_desc = "Requests to L2 Cache", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 7, + .pme_umasks = { + { .pme_uname = "INSTRUCTIONS", + .pme_udesc = "IC fill", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "DATA", + .pme_udesc = "DC fill", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "TLB_WALK", + .pme_udesc = "TLB fill (page table walks)", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "SNOOP", + .pme_udesc = "NB probe request", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "CANCELLED", + .pme_udesc = "Canceled request", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "PREFETCHER", + .pme_udesc = "L2 cache prefetcher request", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x5F, + }, + }, + }, +/* 32 */{.pme_name = "L2_CACHE_MISS", + .pme_code = 0x7E, + .pme_desc = "L2 Cache Misses", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 5, + .pme_umasks = { + { .pme_uname = "INSTRUCTIONS", + .pme_udesc = "IC fill", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "DATA", + .pme_udesc = "DC fill (includes possible replays, whereas PMCx041 does not)", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "TLB_WALK", + .pme_udesc = "TLB page table walk", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "PREFETCHER", + .pme_udesc = "L2 Cache Prefetcher request", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x17, + }, + }, + }, +/* 33 */{.pme_name = "L2_CACHE_FILL_WRITEBACK", + .pme_code = 0x7F, + .pme_desc = "L2 Fill/Writeback", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 4, + .pme_umasks = { + { .pme_uname = "L2_FILLS", + .pme_udesc = "L2 fills from system", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "L2_WRITEBACKS", + .pme_udesc = "L2 Writebacks to system (Clean and Dirty)", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "L2_WRITEBACKS_CLEAN", + .pme_udesc = "L2 Clean Writebacks to system", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x07, + }, + }, + }, +/* 34 */{.pme_name = "PAGE_SPLINTERING", + .pme_code = 0x165, + .pme_desc = "Page Splintering", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 4, + .pme_umasks = { + { .pme_uname = "GUEST_LARGER", + .pme_udesc = "Guest page size is larger than host page size when nested paging is enabled", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "MTRR_MISMATCH", + .pme_udesc = "Splintering due to MTRRs, IORRs, APIC, TOMs or other special address region", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "HOST_LARGER", + .pme_udesc = "Host page size is larger than the guest page size", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x07, + }, + }, + }, +/* 35 */{.pme_name = "INSTRUCTION_CACHE_FETCHES", + .pme_code = 0x80, + .pme_desc = "Instruction Cache Fetches", + }, +/* 36 */{.pme_name = "INSTRUCTION_CACHE_MISSES", + .pme_code = 0x81, + .pme_desc = "Instruction Cache Misses", + }, +/* 37 */{.pme_name = "INSTRUCTION_CACHE_REFILLS_FROM_L2", + .pme_code = 0x82, + .pme_desc = "Instruction Cache Refills from L2", + }, +/* 38 */{.pme_name = "INSTRUCTION_CACHE_REFILLS_FROM_SYSTEM", + .pme_code = 0x83, + .pme_desc = "Instruction Cache Refills from System", + }, +/* 39 */{.pme_name = "L1_ITLB_MISS_AND_L2_ITLB_HIT", + .pme_code = 0x84, + .pme_desc = "L1 ITLB Miss, L2 ITLB Hit", + }, +/* 40 */{.pme_name = "L1_ITLB_MISS_AND_L2_ITLB_MISS", + .pme_code = 0x85, + .pme_desc = "L1 ITLB Miss, L2 ITLB Miss", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 4, + .pme_umasks = { + { .pme_uname = "4K_PAGE_FETCHES", + .pme_udesc = "Instruction fetches to a 4 KB page", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "2M_PAGE_FETCHES", + .pme_udesc = "Instruction fetches to a 2 MB page", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "1G_PAGE_FETCHES", + .pme_udesc = "Instruction fetches to a 1 GB page", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x07, + }, + }, + }, +/* 41 */{.pme_name = "PIPELINE_RESTART_DUE_TO_INSTRUCTION_STREAM_PROBE", + .pme_code = 0x86, + .pme_desc = "Pipeline Restart Due to Instruction Stream Probe", + }, +/* 42 */{.pme_name = "INSTRUCTION_FETCH_STALL", + .pme_code = 0x87, + .pme_desc = "Instruction Fetch Stall", + }, +/* 43 */{.pme_name = "RETURN_STACK_HITS", + .pme_code = 0x88, + .pme_desc = "Return Stack Hits", + }, +/* 44 */{.pme_name = "RETURN_STACK_OVERFLOWS", + .pme_code = 0x89, + .pme_desc = "Return Stack Overflows", + }, +/* 45 */{.pme_name = "INSTRUCTION_CACHE_VICTIMS", + .pme_code = 0x8B, + .pme_desc = "Instruction Cache Victims", + }, +/* 46 */{.pme_name = "INSTRUCTION_CACHE_INVALIDATED", + .pme_code = 0x8C, + .pme_desc = "Instruction Cache Lines Invalidated", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 5, + .pme_umasks = { + { .pme_uname = "NON_SMC_PROBE_MISS", + .pme_udesc = "Non-SMC invalidating probe that missed on in-flight instructions", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "NON_SMC_PROBE_HIT", + .pme_udesc = "Non-SMC invalidating probe that hit on in-flight instructions", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "SMC_PROBE_MISS", + .pme_udesc = "SMC invalidating probe that missed on in-flight instructions", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "SMC_PROBE_HIT", + .pme_udesc = "SMC invalidating probe that hit on in-flight instructions", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x0F, + }, + }, + }, +/* 47 */{.pme_name = "ITLB_RELOADS", + .pme_code = 0x99, + .pme_desc = "ITLB Reloads", + }, +/* 48 */{.pme_name = "ITLB_RELOADS_ABORTED", + .pme_code = 0x9A, + .pme_desc = "ITLB Reloads Aborted", + }, +/* 49 */{.pme_name = "RETIRED_INSTRUCTIONS", + .pme_code = 0xC0, + .pme_desc = "Retired Instructions", + }, +/* 50 */{.pme_name = "RETIRED_UOPS", + .pme_code = 0xC1, + .pme_desc = "Retired uops", + }, +/* 51 */{.pme_name = "RETIRED_BRANCH_INSTRUCTIONS", + .pme_code = 0xC2, + .pme_desc = "Retired Branch Instructions", + }, +/* 52 */{.pme_name = "RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS", + .pme_code = 0xC3, + .pme_desc = "Retired Mispredicted Branch Instructions", + }, +/* 53 */{.pme_name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS", + .pme_code = 0xC4, + .pme_desc = "Retired Taken Branch Instructions", + }, +/* 54 */{.pme_name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS_MISPREDICTED", + .pme_code = 0xC5, + .pme_desc = "Retired Taken Branch Instructions Mispredicted", + }, +/* 55 */{.pme_name = "RETIRED_FAR_CONTROL_TRANSFERS", + .pme_code = 0xC6, + .pme_desc = "Retired Far Control Transfers", + }, +/* 56 */{.pme_name = "RETIRED_BRANCH_RESYNCS", + .pme_code = 0xC7, + .pme_desc = "Retired Branch Resyncs", + }, +/* 57 */{.pme_name = "RETIRED_NEAR_RETURNS", + .pme_code = 0xC8, + .pme_desc = "Retired Near Returns", + }, +/* 58 */{.pme_name = "RETIRED_NEAR_RETURNS_MISPREDICTED", + .pme_code = 0xC9, + .pme_desc = "Retired Near Returns Mispredicted", + }, +/* 59 */{.pme_name = "RETIRED_INDIRECT_BRANCHES_MISPREDICTED", + .pme_code = 0xCA, + .pme_desc = "Retired Indirect Branches Mispredicted", + }, +/* 60 */{.pme_name = "RETIRED_MMX_FP_INSTRUCTIONS", + .pme_code = 0xCB, + .pme_desc = "Retired MMX/FP Instructions", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 4, + .pme_umasks = { + { .pme_uname = "X87", + .pme_udesc = "x87 instructions", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "MMX", + .pme_udesc = "MMX(tm) instructions", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "SSE", + .pme_udesc = "SSE instructions (SSE,SSE2,SSE3,SSSE3,SSE4A,SSE4.1,SSE4.2,AVX,XOP,FMA4)", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x07, + }, + }, + }, +/* 61 */{.pme_name = "INTERRUPTS_MASKED_CYCLES", + .pme_code = 0xCD, + .pme_desc = "Interrupts-Masked Cycles", + }, +/* 62 */{.pme_name = "INTERRUPTS_MASKED_CYCLES_WITH_INTERRUPT_PENDING", + .pme_code = 0xCE, + .pme_desc = "Interrupts-Masked Cycles with Interrupt Pending", + }, +/* 63 */{.pme_name = "INTERRUPTS_TAKEN", + .pme_code = 0xCF, + .pme_desc = "Interrupts Taken", + }, +/* 64 */{.pme_name = "DECODER_EMPTY", + .pme_code = 0xD0, + .pme_desc = "Decoder Empty", + }, +/* 65 */{.pme_name = "DISPATCH_STALLS", + .pme_code = 0xD1, + .pme_desc = "Dispatch Stalls", + }, +/* 66 */{.pme_name = "DISPATCH_STALL_FOR_SERIALIZATION", + .pme_code = 0xD3, + .pme_desc = "Microsequencer Stall due to Serialization", + }, +/* 67 */{.pme_name = "DISPATCH_STALL_FOR_RETIRE_QUEUE_FULL", + .pme_code = 0xD5, + .pme_desc = "Dispatch Stall for Instruction Retire Q Full", + }, +/* 68 */{.pme_name = "DISPATCH_STALL_FOR_INT_SCHED_QUEUE_FULL", + .pme_code = 0xD6, + .pme_desc = "Dispatch Stall for Integer Scheduler Queue Full", + }, +/* 69 */{.pme_name = "DISPATCH_STALL_FOR_FPU_FULL", + .pme_code = 0xD7, + .pme_desc = "Dispatch Stall for FP Scheduler Queue Full", + }, +/* 70 */{.pme_name = "DISPATCH_STALL_FOR_LDQ_FULL", + .pme_code = 0xD8, + .pme_desc = "Dispatch Stall for LDQ Full", + }, +/* 71 */{.pme_name = "MICROSEQ_STALL_WAITING_FOR_ALL_QUIET", + .pme_code = 0xD9, + .pme_desc = "Microsequencer Stall Waiting for All Quiet", + }, +/* 72 */{.pme_name = "FPU_EXCEPTIONS", + .pme_code = 0xDB, + .pme_desc = "FPU Exceptions", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 6, + .pme_umasks = { + { .pme_uname = "TOTAL_FAULTS", + .pme_udesc = "Total microfaults", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "TOTAL_TRAPS", + .pme_udesc = "Total microtraps", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "INT2EXT_FAULTS", + .pme_udesc = "Int2Ext faults", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "EXT2INT_FAULTS", + .pme_udesc = "Ext2Int faults", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "BYPASS_FAULTS", + .pme_udesc = "Bypass faults", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x1F, + }, + }, + }, +/* 73 */{.pme_name = "DR0_BREAKPOINTS", + .pme_code = 0xDC, + .pme_desc = "DR0 Breakpoint Match", + }, +/* 74 */{.pme_name = "DR1_BREAKPOINTS", + .pme_code = 0xDD, + .pme_desc = "DR1 Breakpoint Match", + }, +/* 75 */{.pme_name = "DR2_BREAKPOINTS", + .pme_code = 0xDE, + .pme_desc = "DR2 Breakpoint Match", + }, +/* 76 */{.pme_name = "DR3_BREAKPOINTS", + .pme_code = 0xDF, + .pme_desc = "DR3 Breakpoint Match", + }, +/* 77 */{.pme_name = "IBS_OPS_TAGGED", + .pme_code = 0x1CF, + .pme_desc = "Tagged IBS Ops", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 4, + .pme_umasks = { + { .pme_uname = "TAGGED", + .pme_udesc = "Number of ops tagged by IBS", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "RETIRED", + .pme_udesc = "Number of ops tagged by IBS that retired", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "IGNORED", + .pme_udesc = "Number of times op could not be tagged due to other tagged op active in pipe", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x07, + }, + }, + }, +/* 78 */{.pme_name = "DRAM_ACCESSES", + .pme_code = 0xE0, + .pme_desc = "DRAM Accesses", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 7, + .pme_umasks = { + { .pme_uname = "HIT", + .pme_udesc = "DCT0 Page hit", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "MISS", + .pme_udesc = "DCT0 Page Miss", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "CONFLICT", + .pme_udesc = "DCT0 Page Conflict", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "DCT1_PAGE_HIT", + .pme_udesc = "DCT1 Page hit", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "DCT1_PAGE_MISS", + .pme_udesc = "DCT1 Page Miss", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "DCT1_PAGE_CONFLICT", + .pme_udesc = "DCT1 Page Conflict", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x3F, + }, + }, + }, +/* 79 */{.pme_name = "MEMORY_CONTROLLER_PAGE_TABLE_OVERFLOWS", + .pme_code = 0xE1, + .pme_desc = "DRAM Controller Page Table Overflows", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 3, + .pme_umasks = { + { .pme_uname = "DCT0_PAGE_TABLE_OVERFLOW", + .pme_udesc = "DCT0 Page Table Overflow", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "DCT1_PAGE_TABLE_OVERFLOW", + .pme_udesc = "DCT1 Page Table Overflow", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x03, + }, + }, + }, +/* 80 */{.pme_name = "MEMORY_CONTROLLER_SLOT_MISSED", + .pme_code = 0xE2, + .pme_desc = "Memory Controller DRAM Command Slots Missed", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 3, + .pme_umasks = { + { .pme_uname = "DCT0_COMMAND_SLOTS_MISSED", + .pme_udesc = "DCT0 Command Slots Missed (in MemClks)", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "DCT1_COMMAND_SLOTS_MISSED", + .pme_udesc = "DCT1 Command Slots Missed (in MemClks)", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x03, + }, + }, + }, +/* 81 */{.pme_name = "MEMORY_CONTROLLER_TURNAROUNDS", + .pme_code = 0xE3, + .pme_desc = "Memory Controller Turnarounds", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 7, + .pme_umasks = { + { .pme_uname = "CHIP_SELECT", + .pme_udesc = "DCT0 DIMM (chip select) turnaround", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "READ_TO_WRITE", + .pme_udesc = "DCT0 Read to write turnaround", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "WRITE_TO_READ", + .pme_udesc = "DCT0 Write to read turnaround", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "DCT1_DIMM", + .pme_udesc = "DCT1 DIMM (chip select) turnaround", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "DCT1_READ_TO_WRITE_TURNAROUND", + .pme_udesc = "DCT1 Read to write turnaround", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "DCT1_WRITE_TO_READ_TURNAROUND", + .pme_udesc = "DCT1 Write to read turnaround", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x3F, + }, + }, + }, +/* 82 */{.pme_name = "MEMORY_CONTROLLER_BYPASS_COUNTER_SATURATION", + .pme_code = 0xE4, + .pme_desc = "Memory Controller Bypass Counter Saturation", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 5, + .pme_umasks = { + { .pme_uname = "HIGH_PRIORITY", + .pme_udesc = "Memory controller high priority bypass", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "MEDIUM_PRIORITY", + .pme_udesc = "Memory controller medium priority bypass", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "DCT0_DCQ", + .pme_udesc = "DCT0 DCQ bypass", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "DCT1_DCQ", + .pme_udesc = "DCT1 DCQ bypass", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x0F, + }, + }, + }, +/* 83 */{.pme_name = "THERMAL_STATUS", + .pme_code = 0xE8, + .pme_desc = "Thermal Status", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 4, + .pme_umasks = { + { .pme_uname = "CLKS_DIE_TEMP_TOO_HIGH", + .pme_udesc = "Number of times the HTC trip point is crossed", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "CLOCKS_HTC_P_STATE_INACTIVE", + .pme_udesc = "Number of clocks HTC P-state is inactive", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "CLOCKS_HTC_P_STATE_ACTIVE", + .pme_udesc = "Number of clocks HTC P-state is active", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x64, + }, + }, + }, +/* 84 */{.pme_name = "CPU_IO_REQUESTS_TO_MEMORY_IO", + .pme_code = 0xE9, + .pme_desc = "CPU/IO Requests to Memory/IO", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 9, + .pme_umasks = { + { .pme_uname = "I_O_TO_I_O", + .pme_udesc = "IO to IO", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "I_O_TO_MEM", + .pme_udesc = "IO to Mem", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "CPU_TO_I_O", + .pme_udesc = "CPU to IO", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "CPU_TO_MEM", + .pme_udesc = "CPU to Mem", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "TO_REMOTE_NODE", + .pme_udesc = "To remote node", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "TO_LOCAL_NODE", + .pme_udesc = "To local node", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "FROM_REMOTE_NODE", + .pme_udesc = "From remote node", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "FROM_LOCAL_NODE", + .pme_udesc = "From local node", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xFF, + }, + }, + }, +/* 85 */{.pme_name = "CACHE_BLOCK_COMMANDS", + .pme_code = 0xEA, + .pme_desc = "Cache Block Commands", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 6, + .pme_umasks = { + { .pme_uname = "VICTIM_WRITEBACK", + .pme_udesc = "Victim Block (Writeback)", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "DCACHE_LOAD_MISS", + .pme_udesc = "Read Block (Dcache load miss refill)", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "SHARED_ICACHE_REFILL", + .pme_udesc = "Read Block Shared (Icache refill)", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "READ_BLOCK_MODIFIED", + .pme_udesc = "Read Block Modified (Dcache store miss refill)", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "CHANGE_TO_DIRTY", + .pme_udesc = "Change-to-Dirty (first store to clean block already in cache)", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x3D, + }, + }, + }, +/* 86 */{.pme_name = "SIZED_COMMANDS", + .pme_code = 0xEB, + .pme_desc = "Sized Commands", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 7, + .pme_umasks = { + { .pme_uname = "NON_POSTED_WRITE_BYTE", + .pme_udesc = "Non-Posted SzWr Byte (1-32 bytes)", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "NON_POSTED_WRITE_DWORD", + .pme_udesc = "Non-Posted SzWr DW (1-16 dwords)", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "POSTED_WRITE_BYTE", + .pme_udesc = "Posted SzWr Byte (1-32 bytes)", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "POSTED_WRITE_DWORD", + .pme_udesc = "Posted SzWr DW (1-16 dwords)", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "READ_BYTE", + .pme_udesc = "SzRd Byte (4 bytes)", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "READ_DWORD", + .pme_udesc = "SzRd DW (1-16 dwords)", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x3F, + }, + }, + }, +/* 87 */{.pme_name = "PROBE_RESPONSES_AND_UPSTREAM_REQUESTS", + .pme_code = 0xEC, + .pme_desc = "Probe Responses and Upstream Requests", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 9, + .pme_umasks = { + { .pme_uname = "MISS", + .pme_udesc = "Probe miss", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "HIT_CLEAN", + .pme_udesc = "Probe hit clean", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "HIT_DIRTY_NO_MEMORY_CANCEL", + .pme_udesc = "Probe hit dirty without memory cancel (probed by Sized Write or Change2Dirty)", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "HIT_DIRTY_WITH_MEMORY_CANCEL", + .pme_udesc = "Probe hit dirty with memory cancel (probed by DMA read/cache refill request)", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "UPSTREAM_DISPLAY_REFRESH_READS", + .pme_udesc = "Upstream display refresh/ISOC reads", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "UPSTREAM_NON_DISPLAY_REFRESH_READS", + .pme_udesc = "Upstream non-display refresh reads", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "UPSTREAM_WRITES", + .pme_udesc = "Upstream ISOC writes", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "UPSTREAM_NON_ISOC_WRITES", + .pme_udesc = "Upstream non-ISOC writes", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xFF, + }, + }, + }, +/* 88 */{.pme_name = "GART_EVENTS", + .pme_code = 0xEE, + .pme_desc = "GART Events", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 6, + .pme_umasks = { + { .pme_uname = "CPU_HIT", + .pme_udesc = "GART aperture hit on access from CPU", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "IO_HIT", + .pme_udesc = "GART aperture hit on access from IO", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "MISS", + .pme_udesc = "GART miss", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "TABLE_WALK", + .pme_udesc = "GART Request hit table walk in progress", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "MULTIPLE_TABLE_WALK", + .pme_udesc = "GART multiple table walk in progress", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x8F, + }, + }, + }, +/* 89 */{.pme_name = "HYPERTRANSPORT_LINK0_TRANSMIT_BANDWIDTH", + .pme_code = 0xF6, + .pme_desc = "HyperTransport(tm) Link 0 Transmit Bandwidth", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 8, + .pme_umasks = { + { .pme_uname = "COMMAND_DWORD_SENT", + .pme_udesc = "Command DWORD sent", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "DATA_DWORD_SENT", + .pme_udesc = "Data DWORD sent", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "BUFFER_RELEASE_DWORD_SENT", + .pme_udesc = "Buffer release DWORD sent", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "NOP_DWORD_SENT", + .pme_udesc = "Nop DW sent (idle)", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "ADDRESS_EXT_DWORD_SENT", + .pme_udesc = "Address DWORD sent", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "PER_PACKET_CRC_SENT", + .pme_udesc = "Per packet CRC sent", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "SUBLINK_MASK", + .pme_udesc = "SubLink Mask", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xBF, + }, + }, + }, +/* 90 */{.pme_name = "HYPERTRANSPORT_LINK1_TRANSMIT_BANDWIDTH", + .pme_code = 0xF7, + .pme_desc = "HyperTransport(tm) Link 1 Transmit Bandwidth", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 8, + .pme_umasks = { + { .pme_uname = "COMMAND_DWORD_SENT", + .pme_udesc = "Command DWORD sent", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "DATA_DWORD_SENT", + .pme_udesc = "Data DWORD sent", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "BUFFER_RELEASE_DWORD_SENT", + .pme_udesc = "Buffer release DWORD sent", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "NOP_DWORD_SENT", + .pme_udesc = "Nop DW sent (idle)", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "ADDRESS_EXT_DWORD_SENT", + .pme_udesc = "Address DWORD sent", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "PER_PACKET_CRC_SENT", + .pme_udesc = "Per packet CRC sent", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "SUBLINK_MASK", + .pme_udesc = "SubLink Mask", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xBF, + }, + }, + }, +/* 91 */{.pme_name = "HYPERTRANSPORT_LINK2_TRANSMIT_BANDWIDTH", + .pme_code = 0xF8, + .pme_desc = "HyperTransport(tm) Link 2 Transmit Bandwidth", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 8, + .pme_umasks = { + { .pme_uname = "COMMAND_DWORD_SENT", + .pme_udesc = "Command DWORD sent", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "DATA_DWORD_SENT", + .pme_udesc = "Data DWORD sent", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "BUFFER_RELEASE_DWORD_SENT", + .pme_udesc = "Buffer release DWORD sent", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "NOP_DWORD_SENT", + .pme_udesc = "Nop DW sent (idle)", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "ADDRESS_EXT_DWORD_SENT", + .pme_udesc = "Address DWORD sent", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "PER_PACKET_CRC_SENT", + .pme_udesc = "Per packet CRC sent", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "SUBLINK_MASK", + .pme_udesc = "SubLink Mask", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xBF, + }, + }, + }, +/* 92 */{.pme_name = "HYPERTRANSPORT_LINK3_TRANSMIT_BANDWIDTH", + .pme_code = 0x1F9, + .pme_desc = "HyperTransport(tm) Link 3 Transmit Bandwidth", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 8, + .pme_umasks = { + { .pme_uname = "COMMAND_DWORD_SENT", + .pme_udesc = "Command DWORD sent", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "DATA_DWORD_SENT", + .pme_udesc = "Data DWORD sent", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "BUFFER_RELEASE_DWORD_SENT", + .pme_udesc = "Buffer release DWORD sent", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "NOP_DWORD_SENT", + .pme_udesc = "Nop DW sent (idle)", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "ADDRESS_EXT_DWORD_SENT", + .pme_udesc = "Address DWORD sent", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "PER_PACKET_CRC_SENT", + .pme_udesc = "Per packet CRC sent", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "SUBLINK_MASK", + .pme_udesc = "SubLink Mask", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xBF, + }, + }, + }, +/* 93 */{.pme_name = "CPU_DRAM_REQUEST_TO_NODE", + .pme_code = 0x1E0, + .pme_desc = "CPU to DRAM Requests to Target Node", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 9, + .pme_umasks = { + { .pme_uname = "LOCAL_TO_0", + .pme_udesc = "From Local node to Node 0", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "LOCAL_TO_1", + .pme_udesc = "From Local node to Node 1", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "LOCAL_TO_2", + .pme_udesc = "From Local node to Node 2", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "LOCAL_TO_3", + .pme_udesc = "From Local node to Node 3", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "LOCAL_TO_4", + .pme_udesc = "From Local node to Node 4", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "LOCAL_TO_5", + .pme_udesc = "From Local node to Node 5", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "LOCAL_TO_6", + .pme_udesc = "From Local node to Node 6", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "LOCAL_TO_7", + .pme_udesc = "From Local node to Node 7", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xFF, + }, + }, + }, +/* 94 */{.pme_name = "IO_DRAM_REQUEST_TO_NODE", + .pme_code = 0x1E1, + .pme_desc = "IO to DRAM Requests to Target Node", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 9, + .pme_umasks = { + { .pme_uname = "LOCAL_TO_0", + .pme_udesc = "From Local node to Node 0", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "LOCAL_TO_1", + .pme_udesc = "From Local node to Node 1", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "LOCAL_TO_2", + .pme_udesc = "From Local node to Node 2", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "LOCAL_TO_3", + .pme_udesc = "From Local node to Node 3", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "LOCAL_TO_4", + .pme_udesc = "From Local node to Node 4", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "LOCAL_TO_5", + .pme_udesc = "From Local node to Node 5", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "LOCAL_TO_6", + .pme_udesc = "From Local node to Node 6", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "LOCAL_TO_7", + .pme_udesc = "From Local node to Node 7", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xFF, + }, + }, + }, +/* 95 */{.pme_name = "CPU_READ_COMMAND_LATENCY_NODE_0_3", + .pme_code = 0x1E2, + .pme_desc = "CPU Read Command Latency to Target Node 0-3", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 9, + .pme_umasks = { + { .pme_uname = "READ_BLOCK", + .pme_udesc = "Read block", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "READ_BLOCK_SHARED", + .pme_udesc = "Read block shared", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "READ_BLOCK_MODIFIED", + .pme_udesc = "Read block modified", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "CHANGE_TO_DIRTY", + .pme_udesc = "Change-to-Dirty", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "LOCAL_TO_0", + .pme_udesc = "From Local node to Node 0", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "LOCAL_TO_1", + .pme_udesc = "From Local node to Node 1", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "LOCAL_TO_2", + .pme_udesc = "From Local node to Node 2", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "LOCAL_TO_3", + .pme_udesc = "From Local node to Node 3", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xFF, + }, + }, + }, +/* 96 */{.pme_name = "CPU_READ_COMMAND_REQUEST_NODE_0_3", + .pme_code = 0x1E3, + .pme_desc = "CPU Read Command Requests to Target Node 0-3", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 9, + .pme_umasks = { + { .pme_uname = "READ_BLOCK", + .pme_udesc = "Read block", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "READ_BLOCK_SHARED", + .pme_udesc = "Read block shared", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "READ_BLOCK_MODIFIED", + .pme_udesc = "Read block modified", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "CHANGE_TO_DIRTY", + .pme_udesc = "Change-to-Dirty", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "LOCAL_TO_0", + .pme_udesc = "From Local node to Node 0", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "LOCAL_TO_1", + .pme_udesc = "From Local node to Node 1", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "LOCAL_TO_2", + .pme_udesc = "From Local node to Node 2", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "LOCAL_TO_3", + .pme_udesc = "From Local node to Node 3", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xFF, + }, + }, + }, +/* 97 */{.pme_name = "CPU_READ_COMMAND_LATENCY_NODE_4_7", + .pme_code = 0x1E4, + .pme_desc = "CPU Read Command Latency to Target Node 4-7", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 9, + .pme_umasks = { + { .pme_uname = "READ_BLOCK", + .pme_udesc = "Read block", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "READ_BLOCK_SHARED", + .pme_udesc = "Read block shared", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "READ_BLOCK_MODIFIED", + .pme_udesc = "Read block modified", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "CHANGE_TO_DIRTY", + .pme_udesc = "Change-to-Dirty", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "LOCAL_TO_4", + .pme_udesc = "From Local node to Node 4", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "LOCAL_TO_5", + .pme_udesc = "From Local node to Node 5", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "LOCAL_TO_6", + .pme_udesc = "From Local node to Node 6", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "LOCAL_TO_7", + .pme_udesc = "From Local node to Node 7", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xFF, + }, + }, + }, +/* 98 */{.pme_name = "CPU_READ_COMMAND_REQUEST_NODE_4_7", + .pme_code = 0x1E5, + .pme_desc = "CPU Read Command Requests to Target Node 4-7", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 9, + .pme_umasks = { + { .pme_uname = "READ_BLOCK", + .pme_udesc = "Read block", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "READ_BLOCK_SHARED", + .pme_udesc = "Read block shared", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "READ_BLOCK_MODIFIED", + .pme_udesc = "Read block modified", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "CHANGE_TO_DIRTY", + .pme_udesc = "Change-to-Dirty", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "LOCAL_TO_4", + .pme_udesc = "From Local node to Node 4", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "LOCAL_TO_5", + .pme_udesc = "From Local node to Node 5", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "LOCAL_TO_6", + .pme_udesc = "From Local node to Node 6", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "LOCAL_TO_7", + .pme_udesc = "From Local node to Node 7", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xFF, + }, + }, + }, +/* 99 */{.pme_name = "CPU_COMMAND_LATENCY_TARGET", + .pme_code = 0x1E6, + .pme_desc = "CPU Command Latency to Target Node 0-3/4-7", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 9, + .pme_umasks = { + { .pme_uname = "READ_SIZED", + .pme_udesc = "Read Sized", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "WRITE_SIZED", + .pme_udesc = "Write Sized", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "VICTIM_BLOCK", + .pme_udesc = "Victim Block", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "NODE_GROUP_SELECT", + .pme_udesc = "Node Group Select: 0=Nodes 0-3, 1= Nodes 4-7", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "LOCAL_TO_0_4", + .pme_udesc = "From Local node to Node 0/4", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "LOCAL_TO_1_5", + .pme_udesc = "From Local node to Node 1/5", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "LOCAL_TO_2_6", + .pme_udesc = "From Local node to Node 2/6", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "LOCAL_TO_3_7", + .pme_udesc = "From Local node to Node 3/7", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xFF, + }, + }, + }, +/* 100 */{.pme_name = "CPU_REQUEST_TARGET", + .pme_code = 0x1E7, + .pme_desc = "CPU Requests to Target Node 0-3/4-7", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 9, + .pme_umasks = { + { .pme_uname = "READ_SIZED", + .pme_udesc = "Read Sized", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "WRITE_SIZED", + .pme_udesc = "Write Sized", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "VICTIM_BLOCK", + .pme_udesc = "Victim Block", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "NODE_GROUP_SELECT", + .pme_udesc = "Node Group Select: 0=Nodes 0-3, 1= Nodes 4-7", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "LOCAL_TO_0_4", + .pme_udesc = "From Local node to Node 0/4", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "LOCAL_TO_1_5", + .pme_udesc = "From Local node to Node 1/5", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "LOCAL_TO_2_6", + .pme_udesc = "From Local node to Node 2/6", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "LOCAL_TO_3_7", + .pme_udesc = "From Local node to Node 3/7", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xFF, + }, + }, + }, +/* 101 */{.pme_name = "MEMORY_CONTROLLER_REQUESTS", + .pme_code = 0x1F0, + .pme_desc = "Memory Controller Requests", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 9, + .pme_umasks = { + { .pme_uname = "WRITE_REQUESTS", + .pme_udesc = "Write requests sent to the DCT", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "READ_REQUESTS", + .pme_udesc = "Read requests (including prefetch requests) sent to the DCT", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "PREFETCH_REQUESTS", + .pme_udesc = "Prefetch requests sent to the DCT", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "32_BYTES_WRITES", + .pme_udesc = "32 Bytes Sized Writes", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "64_BYTES_WRITES", + .pme_udesc = "64 Bytes Sized Writes", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "32_BYTES_READS", + .pme_udesc = "32 Bytes Sized Reads", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "64_BYTES_READS", + .pme_udesc = "64 Byte Sized Reads", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "READ_REQUESTS_WHILE_WRITES_REQUESTS", + .pme_udesc = "Read requests sent to the DCT while writes requests are pending in the DCT", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xFF, + }, + }, + }, +/* 102 */{.pme_name = "READ_REQUEST_L3_CACHE", + .pme_code = 0x4E0, + .pme_desc = "Read Request to L3 Cache", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 13, + .pme_umasks = { + { .pme_uname = "READ_BLOCK_EXCLUSIVE", + .pme_udesc = "Read Block Exclusive (Data cache read)", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "READ_BLOCK_SHARED", + .pme_udesc = "Read Block Shared (Instruction cache read)", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "READ_BLOCK_MODIFY", + .pme_udesc = "Read Block Modify", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "PREFETCH_ONLY", + .pme_udesc = "1=Count prefetch only, 0=Count prefetch and non-prefetch", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "CORE_0_SELECT", + .pme_udesc = "Core 0 Select", + .pme_ucode = 0x00, + }, + { .pme_uname = "CORE_1_SELECT", + .pme_udesc = "Core 1 Select", + .pme_ucode = 0x10, + }, + { .pme_uname = "CORE_2_SELECT", + .pme_udesc = "Core 2 Select", + .pme_ucode = 0x20, + }, + { .pme_uname = "CORE_3_SELECT", + .pme_udesc = "Core 3 Select", + .pme_ucode = 0x30, + }, + { .pme_uname = "CORE_4_SELECT", + .pme_udesc = "Core 4 Select", + .pme_ucode = 0x40, + }, + { .pme_uname = "CORE_5_SELECT", + .pme_udesc = "Core 5 Select", + .pme_ucode = 0x50, + }, + { .pme_uname = "CORE_6_SELECT", + .pme_udesc = "Core 6 Select", + .pme_ucode = 0x60, + }, + { .pme_uname = "CORE_7_SELECT", + .pme_udesc = "Core 7 Select", + .pme_ucode = 0x70, + }, + { .pme_uname = "ALL_CORES", + .pme_udesc = "All cores", + .pme_ucode = 0xF0, + }, + }, + }, +/* 103 */{.pme_name = "L3_CACHE_MISSES", + .pme_code = 0x4E1, + .pme_desc = "L3 Cache Misses", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 13, + .pme_umasks = { + { .pme_uname = "READ_BLOCK_EXCLUSIVE", + .pme_udesc = "Read Block Exclusive (Data cache read)", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "READ_BLOCK_SHARED", + .pme_udesc = "Read Block Shared (Instruction cache read)", + .pme_ucode = 1 << 1, + ... [truncated message content] |
From: Robert R. <rob...@am...> - 2011-05-10 13:45:57
|
Updated event table for AMD Family 15h cpus from: Apr 29 2011 -- Robert Richter, rob...@am...: Source: BKDG for AMD Family 15h Models 00h-0Fh Processors, 42301, Rev 1.15, April 18, 2011 These events have been added for family 15h: PMCx029 LS Dispatch PMCx030 Executed CLFLUSH Instructions PMCx16C L2 Prefetcher Trigger Events PMCx1D8 Dispatch Stall for STQ Full NBPMCx1EA Request Cache Status 0 NBPMCx1EB Request Cache Status 1 NBPMCx4EF L3 Latency Signed-off-by: Robert Richter <rob...@am...> --- lib/amd64_events_fam15h.h | 187 ++++++++++++++++++++++++++++++++++++++++++--- 1 files changed, 177 insertions(+), 10 deletions(-) diff --git a/lib/amd64_events_fam15h.h b/lib/amd64_events_fam15h.h index 8ecf6d9..672f108 100644 --- a/lib/amd64_events_fam15h.h +++ b/lib/amd64_events_fam15h.h @@ -23,12 +23,16 @@ * applications on Linux. */ -/* History +/* + * Family 15h Microarchitecture performance monitor events * - * Dec 09 2010 -- Robert Richter, rob...@am...: + * History: * - * Family 15h Microarchitecture performance monitor events + * Apr 29 2011 -- Robert Richter, rob...@am...: + * Source: BKDG for AMD Family 15h Models 00h-0Fh Processors, + * 42301, Rev 1.15, April 18, 2011 * + * Dec 09 2010 -- Robert Richter, rob...@am...: * Source: BIOS and Kernel Developer's Guide for the AMD Family 15h * Processors, Rev 0.90, May 18, 2010 */ @@ -268,11 +272,11 @@ static pme_amd64_entry_t amd64_fam15h_pe[]={ .pme_ucode = 1 << 0, }, { .pme_uname = "CYCLES_NON_SPECULATIVE_PHASE", - .pme_udesc = "Number of cycles spent non-speculative phase (including cache miss penalty)", + .pme_udesc = "Number of cycles spent in non-speculative phase, excluding cache miss penalty", .pme_ucode = 1 << 2, }, { .pme_uname = "CYCLES_WAITING", - .pme_udesc = "Number of cycles waiting for a cache hit (cache miss penalty)", + .pme_udesc = "Number of cycles spent in non-speculative phase, including the cache miss penalty", .pme_ucode = 1 << 3, }, { .pme_uname = "ALL", @@ -472,12 +476,12 @@ static pme_amd64_entry_t amd64_fam15h_pe[]={ .pme_numasks = 3, .pme_umasks = { { .pme_uname = "SW_PREFETCH_HIT_IN_L1", - .pme_udesc = "Software prefetch hit in the L1.", - .pme_ucode = 0x01, + .pme_udesc = "Software prefetch hit in the L1", + .pme_ucode = 1 << 0, }, { .pme_uname = "SW_PREFETCH_HIT_IN_L2", - .pme_udesc = "Software prefetch hit in L2.", - .pme_ucode = 0x08, + .pme_udesc = "Software prefetch hit in the L2", + .pme_ucode = 1 << 3, }, { .pme_uname = "ALL", .pme_udesc = "All sub-events selected", @@ -1056,7 +1060,7 @@ static pme_amd64_entry_t amd64_fam15h_pe[]={ .pme_ucode = 1 << 1, }, { .pme_uname = "IGNORED", - .pme_udesc = "Number of times op could not be tagged due to other tagged op active in pipe", + .pme_udesc = "Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired", .pme_ucode = 1 << 2, }, { .pme_uname = "ALL", @@ -2244,6 +2248,169 @@ static pme_amd64_entry_t amd64_fam15h_pe[]={ }, }, #endif +/* 107 */{.pme_name = "LS_DISPATCH", + .pme_code = 0x29, + .pme_desc = "LS Dispatch", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 4, + .pme_umasks = { + { .pme_uname = "LOADS", + .pme_udesc = "Loads", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "STORES", + .pme_udesc = "Stores", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "LOAD_OP_STORES", + .pme_udesc = "Load-op-Stores", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x07, + }, + }, + }, +/* 108 */{.pme_name = "EXECUTED_CLFLUSH_INSTRUCTIONS", + .pme_code = 0x30, + .pme_desc = "Executed CLFLUSH Instructions", + }, +/* 109 */{.pme_name = "L2_PREFETCHER_TRIGGER_EVENTS", + .pme_code = 0x16C, + .pme_desc = "L2 Prefetcher Trigger Events", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 3, + .pme_umasks = { + { .pme_uname = "LOAD_L1_MISS_SEEN_BY_PREFETCHER", + .pme_udesc = "Load L1 miss seen by prefetcher", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "STORE_L1_MISS_SEEN_BY_PREFETCHER", + .pme_udesc = "Store L1 miss seen by prefetcher", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x03, + }, + }, + }, +/* 110 */{.pme_name = "DISPATCH_STALL_FOR_STQ_FULL", + .pme_code = 0x1D8, + .pme_desc = "Dispatch Stall for STQ Full", + }, +/* Northbridge events (.pme_code & 0x0E0) not yet supported by the kernel */ +#if 0 +/* 111 */{.pme_name = "REQUEST_CACHE_STATUS_0", + .pme_code = 0x1EA, + .pme_desc = "Request Cache Status 0", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 9, + .pme_umasks = { + { .pme_uname = "PROBE_HIT_S", + .pme_udesc = "Probe Hit S", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "PROBE_HIT_E", + .pme_udesc = "Probe Hit E", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "PROBE_HIT_MUW_OR_O", + .pme_udesc = "Probe Hit MuW or O", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "PROBE_HIT_M", + .pme_udesc = "Probe Hit M", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "PROBE_MISS", + .pme_udesc = "Probe Miss", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "DIRECTED_PROBE", + .pme_udesc = "Directed Probe", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "TRACK_CACHE_STAT_FOR_RDBLK", + .pme_udesc = "Track Cache Stat for RdBlk", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "TRACK_CACHE_STAT_FOR_RDBLKS", + .pme_udesc = "Track Cache Stat for RdBlkS", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xFF, + }, + }, + }, +/* 112 */{.pme_name = "REQUEST_CACHE_STATUS_1", + .pme_code = 0x1EB, + .pme_desc = "Request Cache Status 1", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 9, + .pme_umasks = { + { .pme_uname = "PROBE_HIT_S", + .pme_udesc = "Probe Hit S", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "PROBE_HIT_E", + .pme_udesc = "Probe Hit E", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "PROBE_HIT_MUW_OR_O", + .pme_udesc = "Probe Hit MuW or O", + .pme_ucode = 1 << 2, + }, + { .pme_uname = "PROBE_HIT_M", + .pme_udesc = "Probe Hit M", + .pme_ucode = 1 << 3, + }, + { .pme_uname = "PROBE_MISS", + .pme_udesc = "Probe Miss", + .pme_ucode = 1 << 4, + }, + { .pme_uname = "DIRECTED_PROBE", + .pme_udesc = "Directed Probe", + .pme_ucode = 1 << 5, + }, + { .pme_uname = "TRACK_CACHE_STAT_FOR_CHGTODIRTY", + .pme_udesc = "Track Cache Stat for ChgToDirty", + .pme_ucode = 1 << 6, + }, + { .pme_uname = "TRACK_CACHE_STAT_FOR_RDBLKM", + .pme_udesc = "Track Cache Stat for RdBlkM", + .pme_ucode = 1 << 7, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0xFF, + }, + }, + }, +/* 113 */{.pme_name = "L3_LATENCY", + .pme_code = 0x4EF, + .pme_desc = "L3 Latency", + .pme_flags = PFMLIB_AMD64_UMASK_COMBO, + .pme_numasks = 3, + .pme_umasks = { + { .pme_uname = "L3CYCCOUNT", + .pme_udesc = "L3CycCount. L3 Request cycle count", + .pme_ucode = 1 << 0, + }, + { .pme_uname = "L3REQCOUNT", + .pme_udesc = "L3ReqCount. L3 request count", + .pme_ucode = 1 << 1, + }, + { .pme_uname = "ALL", + .pme_udesc = "All sub-events selected", + .pme_ucode = 0x03, + }, + }, + }, +#endif }; #define PME_AMD64_FAM15H_EVENT_COUNT (sizeof(amd64_fam15h_pe)/sizeof(pme_amd64_entry_t)) -- 1.7.3.4 |
From: Robert R. <rob...@am...> - 2011-05-10 13:45:58
|
This patch modifies AMD family check macros in preparation of patches for family 15h support. Signed-off-by: Robert Richter <rob...@am...> --- lib/pfmlib_amd64.c | 15 +++++++-------- lib/pfmlib_amd64_priv.h | 2 -- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/lib/pfmlib_amd64.c b/lib/pfmlib_amd64.c index 69d9921..76a9928 100644 --- a/lib/pfmlib_amd64.c +++ b/lib/pfmlib_amd64.c @@ -104,8 +104,8 @@ pfm_pmu_support_t amd64_support; #define amd64_model amd64_pmu.model #define amd64_stepping amd64_pmu.stepping -#define IS_FAMILY_10H() (amd64_pmu.revision >= AMD64_FAM10H) -#define HAS_IBS() IS_FAMILY_10H() +#define IS_FAMILY_10H() (amd64_pmu.family == 0x10) +#define HAS_IBS() (amd64_pmu.family >= 0x10) static amd64_rev_t amd64_get_revision(int family, int model, int stepping) @@ -188,17 +188,16 @@ pfm_amd64_setup(amd64_rev_t revision) amd64_support.pmc_count = PMU_AMD64_NUM_COUNTERS; amd64_support.pmd_count = PMU_AMD64_NUM_COUNTERS; - /* K7 */ - if (amd64_pmu.revision == AMD64_K7) { + switch (amd64_pmu.family) { + case 6: + /* K7 */ amd64_pmu.events = amd64_k7_table.events; amd64_support.pme_count = amd64_k7_table.num; amd64_pmu.cpu_clks = amd64_k7_table.cpu_clks; amd64_pmu.ret_inst = amd64_k7_table.ret_inst; return; - } - - /* Barcelona */ - if (IS_FAMILY_10H()) { + case 0x10: + /* Family 10h */ amd64_pmu.events = amd64_fam10h_table.events; amd64_support.pme_count = amd64_fam10h_table.num; amd64_pmu.cpu_clks = amd64_fam10h_table.cpu_clks; diff --git a/lib/pfmlib_amd64_priv.h b/lib/pfmlib_amd64_priv.h index 052a230..eeb4008 100644 --- a/lib/pfmlib_amd64_priv.h +++ b/lib/pfmlib_amd64_priv.h @@ -54,8 +54,6 @@ typedef struct { unsigned int pme_flags; /* flags */ } pme_amd64_entry_t; -#define AMD64_FAM10H AMD64_FAM10H_REV_B - typedef enum { AMD64_CPU_UN, AMD64_K7, -- 1.7.3.4 |
From: Robert R. <rob...@am...> - 2011-05-10 13:45:58
|
There is no kernel support for AMD family 15h northbridge events, disabling them in libpfm3 to not report them as available native events. Signed-off-by: Robert Richter <rob...@am...> --- lib/amd64_events_fam15h.h | 3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/lib/amd64_events_fam15h.h b/lib/amd64_events_fam15h.h index 20c7140..8ecf6d9 100644 --- a/lib/amd64_events_fam15h.h +++ b/lib/amd64_events_fam15h.h @@ -1065,6 +1065,8 @@ static pme_amd64_entry_t amd64_fam15h_pe[]={ }, }, }, +/* Northbridge events (.pme_code & 0x0E0) not yet supported by the kernel */ +#if 0 /* 78 */{.pme_name = "DRAM_ACCESSES", .pme_code = 0xE0, .pme_desc = "DRAM Accesses", @@ -2241,6 +2243,7 @@ static pme_amd64_entry_t amd64_fam15h_pe[]={ }, }, }, +#endif }; #define PME_AMD64_FAM15H_EVENT_COUNT (sizeof(amd64_fam15h_pe)/sizeof(pme_amd64_entry_t)) -- 1.7.3.4 |
From: Robert R. <rob...@am...> - 2011-05-10 13:46:01
|
Though Family 15h support has been added earlier, there was a limitation to use only 4 counters. This patch implements support for up to 6 counters. Signed-off-by: Robert Richter <rob...@am...> --- include/perfmon/pfmlib_amd64.h | 2 +- lib/pfmlib_amd64.c | 27 +++++++++++++++++---------- lib/pfmlib_amd64_priv.h | 31 +++++++++++++++++++++---------- 3 files changed, 39 insertions(+), 21 deletions(-) diff --git a/include/perfmon/pfmlib_amd64.h b/include/perfmon/pfmlib_amd64.h index 784911e..bee4821 100644 --- a/include/perfmon/pfmlib_amd64.h +++ b/include/perfmon/pfmlib_amd64.h @@ -40,7 +40,7 @@ extern "C" { #endif -#define PMU_AMD64_MAX_COUNTERS 4 /* total numbers of performance counters */ +#define PMU_AMD64_MAX_COUNTERS 6 /* total numbers of performance counters */ /* * AMD64 MSR definitions diff --git a/lib/pfmlib_amd64.c b/lib/pfmlib_amd64.c index ab417e3..0194269 100644 --- a/lib/pfmlib_amd64.c +++ b/lib/pfmlib_amd64.c @@ -72,8 +72,10 @@ * 1 -> PMD1 -> PERCTR1 -> MSR @ 0xc0010005 * ... */ -#define AMD64_SEL_BASE 0xc0010000 -#define AMD64_CTR_BASE 0xc0010004 +#define AMD64_SEL_BASE 0xc0010000 +#define AMD64_CTR_BASE 0xc0010004 +#define AMD64_SEL_BASE_F15H 0xc0010200 +#define AMD64_CTR_BASE_F15H 0xc0010201 static struct { amd64_rev_t revision; @@ -213,6 +215,7 @@ pfm_amd64_setup(amd64_rev_t revision) amd64_support.pme_count = amd64_fam15h_table.num; amd64_pmu.cpu_clks = amd64_fam15h_table.cpu_clks; amd64_pmu.ret_inst = amd64_fam15h_table.ret_inst; + amd64_support.num_cnt = PMU_AMD64_NUM_COUNTERS_F15H; amd64_support.pmc_count = PMU_AMD64_NUM_PERFSEL; amd64_support.pmd_count = PMU_AMD64_NUM_PERFCTR; return; @@ -522,15 +525,19 @@ pfm_amd64_dispatch_counters(pfmlib_input_param_t *inp, pfmlib_amd64_input_param_ if ((IS_FAM10H_ONLY(reg)) && !IS_FAMILY_10H()) return PFMLIB_ERR_BADHOST; - pc[j].reg_value = reg.val; - pc[j].reg_addr = AMD64_SEL_BASE+assign[j]; - pc[j].reg_alt_addr = AMD64_SEL_BASE+assign[j]; + if (amd64_support.num_cnt == PMU_AMD64_NUM_COUNTERS_F15H) { + pc[j].reg_addr = AMD64_SEL_BASE_F15H + (assign[j] << 1); + pd[j].reg_addr = AMD64_CTR_BASE_F15H + (assign[j] << 1); + } else { + pc[j].reg_addr = AMD64_SEL_BASE + assign[j]; + pd[j].reg_addr = AMD64_CTR_BASE + assign[j]; + } + pc[j].reg_value = reg.val; + pc[j].reg_alt_addr = pc[j].reg_addr; pd[j].reg_num = assign[j]; - pd[j].reg_addr = AMD64_CTR_BASE+assign[j]; - /* index to use with RDPMC */ - pd[j].reg_alt_addr = assign[j]; + pd[j].reg_alt_addr = assign[j]; /* index to use with RDPMC */ __pfm_vbprintf("[PERFSEL%u(pmc%u)=0x%llx emask=0x%x umask=0x%x os=%d usr=%d inv=%d en=%d int=%d edge=%d cnt_mask=%d] %s\n", assign[j], @@ -671,7 +678,7 @@ pfm_amd64_dispatch_events( static int pfm_amd64_get_event_code(unsigned int i, unsigned int cnt, int *code) { - if (cnt != PFMLIB_CNT_FIRST && cnt > 3) + if (cnt != PFMLIB_CNT_FIRST && cnt >= amd64_support.num_cnt) return PFMLIB_ERR_INVAL; *code = pfm_amd64_get_event_entry(i)->pme_code; @@ -727,7 +734,7 @@ pfm_amd64_get_impl_counters(pfmlib_regmask_t *impl_counters) unsigned int i = 0; /* counting pmds are contiguous */ - for(i=0; i < 4; i++) + for(i=0; i < amd64_support.num_cnt; i++) pfm_regmask_set(impl_counters, i); } diff --git a/lib/pfmlib_amd64_priv.h b/lib/pfmlib_amd64_priv.h index 5def153..55a29ed 100644 --- a/lib/pfmlib_amd64_priv.h +++ b/lib/pfmlib_amd64_priv.h @@ -25,16 +25,27 @@ #ifndef __PFMLIB_AMD64_PRIV_H__ #define __PFMLIB_AMD64_PRIV_H__ -/* PERFSEL/PERFCTR include IBS registers of family 10h */ -#define PMU_AMD64_NUM_PERFSEL 6 /* total number of PMCs defined */ -#define PMU_AMD64_NUM_PERFCTR 14 /* total number of PMDs defined */ -#define PMU_AMD64_NUM_COUNTERS 4 /* total numbers of EvtSel/EvtCtr */ -#define PMU_AMD64_COUNTER_WIDTH 48 /* hardware counter bit width */ -#define PMU_AMD64_CNT_MASK_MAX 4 /* max cnt_mask value */ -#define PMU_AMD64_IBSFETCHCTL_PMC 4 /* IBS: fetch PMC base */ -#define PMU_AMD64_IBSFETCHCTL_PMD 4 /* IBS: fetch PMD base */ -#define PMU_AMD64_IBSOPCTL_PMC 5 /* IBS: op PMC base */ -#define PMU_AMD64_IBSOPCTL_PMD 7 /* IBS: op PMD base */ +/* + * PERFSEL/PERFCTR include IBS registers: + * + * PMCs PMDs + * + * PERFCTRS 6 6 + * IBS FETCH 1 3 + * IBS OP 1 7 + * + * total 8 16 + */ +#define PMU_AMD64_NUM_PERFSEL 8 /* number of PMCs defined */ +#define PMU_AMD64_NUM_PERFCTR 16 /* number of PMDs defined */ +#define PMU_AMD64_NUM_COUNTERS 4 /* number of EvtSel/EvtCtr */ +#define PMU_AMD64_NUM_COUNTERS_F15H 6 /* number of EvtSel/EvtCtr */ +#define PMU_AMD64_COUNTER_WIDTH 48 /* hw counter bit width */ +#define PMU_AMD64_CNT_MASK_MAX 4 /* max cnt_mask value */ +#define PMU_AMD64_IBSFETCHCTL_PMC 6 /* IBS: fetch PMC base */ +#define PMU_AMD64_IBSFETCHCTL_PMD 6 /* IBS: fetch PMD base */ +#define PMU_AMD64_IBSOPCTL_PMC 7 /* IBS: op PMC base */ +#define PMU_AMD64_IBSOPCTL_PMD 9 /* IBS: op PMD base */ #define PFMLIB_AMD64_MAX_UMASK 13 -- 1.7.3.4 |
From: Robert R. <rob...@am...> - 2011-05-10 13:46:05
|
This patch adds supprot for AMD Family 10h RevE cpus. There are no new events/unitmasks compared to RevD. It basically implements CPU detection that enables the RevD pmu bahavior also for RevE systems. Signed-off-by: Robert Richter <rob...@am...> --- lib/pfmlib_amd64.c | 18 +++++++++--------- lib/pfmlib_amd64_priv.h | 4 +++- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/lib/pfmlib_amd64.c b/lib/pfmlib_amd64.c index 7b4c2e5..69d9921 100644 --- a/lib/pfmlib_amd64.c +++ b/lib/pfmlib_amd64.c @@ -110,10 +110,10 @@ pfm_pmu_support_t amd64_support; static amd64_rev_t amd64_get_revision(int family, int model, int stepping) { - if (family == 6) + switch (family) { + case 6: return AMD64_K7; - - if (family == 15) { + case 0x0f: switch (model >> 4) { case 0: if (model == 5 && stepping < 2) @@ -134,10 +134,9 @@ amd64_get_revision(int family, int model, int stepping) case 7: case 8: return AMD64_K8_REV_G; - default: - return AMD64_K8_REV_B; } - } else if (family == 16) { + return AMD64_K8_REV_B; + case 0x10: switch (model) { case 4: case 5: @@ -146,9 +145,10 @@ amd64_get_revision(int family, int model, int stepping) case 8: case 9: return AMD64_FAM10H_REV_D; - default: - return AMD64_FAM10H_REV_B; + case 10: + return AMD64_FAM10H_REV_E; } + return AMD64_FAM10H_REV_B; } return AMD64_CPU_UN; @@ -231,7 +231,7 @@ pfm_amd64_detect(void) amd64_family += (a >> 20) & 0x000000ff; // Extended family amd64_model |= (a >> 12) & 0x000000f0; // Extended model } - amd64_stepping= a & 0x0000000f; // bits 3 - 0 + amd64_stepping = a & 0x0000000f; // bits 3 - 0 amd64_revision = amd64_get_revision(amd64_family, amd64_model, amd64_stepping); diff --git a/lib/pfmlib_amd64_priv.h b/lib/pfmlib_amd64_priv.h index 7b300f0..052a230 100644 --- a/lib/pfmlib_amd64_priv.h +++ b/lib/pfmlib_amd64_priv.h @@ -68,10 +68,11 @@ typedef enum { AMD64_FAM10H_REV_B, AMD64_FAM10H_REV_C, AMD64_FAM10H_REV_D, + AMD64_FAM10H_REV_E, } amd64_rev_t; static const char *amd64_rev_strs[]= { - "?", "?", "B", "C", "D", "E", "F", "G", "B", "C", "D" + "?", "?", "B", "C", "D", "E", "F", "G", "B", "C", "D", "E" }; static const char *amd64_cpu_strs[] = { @@ -86,6 +87,7 @@ static const char *amd64_cpu_strs[] = { "AMD64 (Family 10h RevB, Barcelona)", "AMD64 (Family 10h RevC, Shanghai)", "AMD64 (Family 10h RevD, Istanbul)", + "AMD64 (Family 10h RevE)", }; /* -- 1.7.3.4 |
From: Robert R. <rob...@am...> - 2011-05-10 13:47:55
|
AMD architectural PMU could not been detected for family 15h as there was a strict check for AMD family 10h. Enabling it now for all families from 10h. Signed-off-by: Robert Richter <rob...@am...> --- include/perfmon/pfmlib_amd64.h | 6 +++--- lib/pfmlib_amd64.c | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/perfmon/pfmlib_amd64.h b/include/perfmon/pfmlib_amd64.h index bee4821..b727891 100644 --- a/include/perfmon/pfmlib_amd64.h +++ b/include/perfmon/pfmlib_amd64.h @@ -60,10 +60,10 @@ typedef union { uint64_t sel_en:1; /* enable */ uint64_t sel_inv:1; /* invert counter mask */ uint64_t sel_cnt_mask:8; /* counter mask */ - uint64_t sel_event_mask2:4; /* 10h only: event mask [11:8] */ + uint64_t sel_event_mask2:4; /* from 10h: event mask [11:8] */ uint64_t sel_res2:4; /* reserved */ - uint64_t sel_guest:1; /* 10h only: guest only counter */ - uint64_t sel_host:1; /* 10h only: host only counter */ + uint64_t sel_guest:1; /* from 10h: guest only counter */ + uint64_t sel_host:1; /* from 10h: host only counter */ uint64_t sel_res3:22; /* reserved */ } perfsel; } pfm_amd64_sel_reg_t; /* MSR 0xc001000-0xc001003 */ diff --git a/lib/pfmlib_amd64.c b/lib/pfmlib_amd64.c index 0194269..6e86ad4 100644 --- a/lib/pfmlib_amd64.c +++ b/lib/pfmlib_amd64.c @@ -51,7 +51,7 @@ #define sel_guest perfsel.sel_guest #define sel_host perfsel.sel_host -#define IS_FAM10H_ONLY(reg) \ +#define CHECK_AMD_ARCH(reg) \ ((reg).sel_event_mask2 || (reg).sel_guest || (reg).sel_host) #define PFMLIB_AMD64_HAS_COMBO(_e) \ @@ -106,8 +106,8 @@ pfm_pmu_support_t amd64_support; #define amd64_model amd64_pmu.model #define amd64_stepping amd64_pmu.stepping -#define IS_FAMILY_10H() (amd64_pmu.family == 0x10) -#define HAS_IBS() (amd64_pmu.family >= 0x10) +/* AMD architectural pmu features starts with family 10h */ +#define IS_AMD_ARCH() (amd64_pmu.family >= 0x10) static amd64_rev_t amd64_get_revision(int family, int model, int stepping) @@ -522,7 +522,7 @@ pfm_amd64_dispatch_counters(pfmlib_input_param_t *inp, pfmlib_amd64_input_param_ } pc[j].reg_num = assign[j]; - if ((IS_FAM10H_ONLY(reg)) && !IS_FAMILY_10H()) + if ((CHECK_AMD_ARCH(reg)) && !IS_AMD_ARCH()) return PFMLIB_ERR_BADHOST; if (amd64_support.num_cnt == PMU_AMD64_NUM_COUNTERS_F15H) { @@ -577,7 +577,7 @@ static int pfm_amd64_dispatch_ibs(pfmlib_input_param_t *inp, if (!inp_mod || !outp || !outp_mod) return PFMLIB_ERR_INVAL; - if (!HAS_IBS()) + if (!IS_AMD_ARCH()) return PFMLIB_ERR_BADHOST; /* IBS fetch profiling */ -- 1.7.3.4 |