From: Maynard J. <may...@us...> - 2013-05-15 15:35:36
|
Hello, Giles, I don't have a Pentium P4 to test out this patch, so can you please apply it to the latest oprofile source and test it out. You can get the latest oprofile source from: git clone git://oprofile.git.sourceforge.net/gitroot/oprofile/oprofile Please compare results with the opcontrol-based profiler. Thanks for your patience. -Maynard ---------------------------------------------------- Add support for Intel Netburst (e.g., Pentium P4) to operf The "legacy" oprofile kernel driver has special "p4" handling. There's a map of event codes to ESCR/CCCR values. Unfortunately, the P4 event codes (stored in events/i386/p4/events) that are used by the oprofile kernel driver don't match what perf_events kernel code expects. This patch adds some p4-specific event code handling to operf in order to generate the correct encoding to pass to perf_events kernel subsystem. Signed-off-by: Maynard Johnson <may...@us...> --- libop/Makefile.am | 4 +- libop/op_netburst.c | 1597 ++++++++++++++++++++++++++++++++++++++++++++++++ libop/op_netburst.h | 256 ++++++++ pe_profiling/operf.cpp | 7 + 4 files changed, 1863 insertions(+), 1 deletions(-) create mode 100644 libop/op_netburst.c create mode 100644 libop/op_netburst.h diff --git a/libop/Makefile.am b/libop/Makefile.am index 54ed1ea..3358dfb 100644 --- a/libop/Makefile.am +++ b/libop/Makefile.am @@ -25,4 +25,6 @@ libop_a_SOURCES = \ op_xml_events.h \ op_xml_out.c \ op_xml_out.h \ - op_hw_specific.h + op_hw_specific.h \ + op_netburst.c \ + op_netburst.h diff --git a/libop/op_netburst.c b/libop/op_netburst.c new file mode 100644 index 0000000..04198a2 --- /dev/null +++ b/libop/op_netburst.c @@ -0,0 +1,1597 @@ +/** + * @file libop/op_netburst.c + * Definitions of Netburst events and a function for obtaining an encoding + * for a given event/unit mask in perf_events format. + * + * @remark Copyright 2013 OProfile authors + * @remark Read the file COPYING + * + * Created on: May 14, 2013 + * @author Maynard Johnson + * (C) Copyright IBM Corp. 2013 + * + */ + +#include "config.h" + +#include <string.h> + +#include "op_types.h" +#include "op_netburst.h" + +/** + * netburst_events + * + * Array of events that can be counted on Pentium4. + **/ +netburst_entry_t op_netburst_events[] = { + + /* 0 */ + {.name = "TC_DELIVER_MODE", + .desc = "The duration (in clock cycles) of the operating modes of " + "the trace cache and decode engine in the processor package", + .event_select = 0x1, + .escr_select = 0x1, + .allowed_escrs = { 9, 32 }, + .perf_code = P4_EVENT_TC_DELIVER_MODE, + .event_masks = { + {.name = "DD", + .desc = "Both logical CPUs in deliver mode", + .bit = 0, + }, + {.name = "DB", + .desc = "Logical CPU 0 in deliver mode and " + "logical CPU 1 in build mode", + .bit = 1, + }, + {.name = "DI", + .desc = "Logical CPU 0 in deliver mode and logical CPU 1 " + "either halted, under machine clear condition, or " + "transitioning to a long microcode flow", + .bit = 2, + }, + {.name = "BD", + .desc = "Logical CPU 0 in build mode and " + "logical CPU 1 is in deliver mode", + .bit = 3, + }, + {.name = "BB", + .desc = "Both logical CPUs in build mode", + .bit = 4, + }, + {.name = "BI", + .desc = "Logical CPU 0 in build mode and logical CPU 1 " + "either halted, under machine clear condition, or " + "transitioning to a long microcode flow", + .bit = 5, + }, + {.name = "ID", + .desc = "Logical CPU 0 either halted, under machine clear " + "condition, or transitioning to a long microcode " + "flow, and logical CPU 1 in deliver mode", + .bit = 6, + }, + {.name = "IB", + .desc = "Logical CPU 0 either halted, under machine clear " + "condition, or transitioning to a long microcode " + "flow, and logical CPU 1 in build mode", + .bit = 7, + }, + }, + }, + + /* 1 */ + {.name = "BPU_FETCH_REQUEST", + .desc = "Instruction fetch requests by the Branch Prediction Unit", + .event_select = 0x3, + .escr_select = 0x0, + .allowed_escrs = { 0, 23 }, + .perf_code = P4_EVENT_BPU_FETCH_REQUEST, + .event_masks = { + {.name = "TCMISS", + .desc = "Trace cache lookup miss", + .bit = 0, + .flags = NETBURST_FL_DFL, + }, + }, + }, + + /* 2 */ + {.name = "ITLB_REFERENCE", + .desc = "Translations using the Instruction " + "Translation Look-Aside Buffer", + .event_select = 0x18, + .escr_select = 0x3, + .allowed_escrs = { 3, 26 }, + .perf_code = P4_EVENT_ITLB_REFERENCE, + .event_masks = { + {.name = "HIT", + .desc = "ITLB hit", + .bit = 0, + }, + {.name = "MISS", + .desc = "ITLB miss", + .bit = 1, + }, + {.name = "HIT_UC", + .desc = "Uncacheable ITLB hit", + .bit = 2, + }, + }, + }, + + /* 3 */ + {.name = "MEMORY_CANCEL", + .desc = "Canceling of various types of requests in the " + "Data cache Address Control unit (DAC)", + .event_select = 0x2, + .escr_select = 0x5, + .allowed_escrs = { 15, 38 }, + .perf_code = P4_EVENT_MEMORY_CANCEL, + .event_masks = { + {.name = "ST_RB_FULL", + .desc = "Replayed because no store request " + "buffer is available", + .bit = 2, + }, + {.name = "64K_CONF", + .desc = "Conflicts due to 64K aliasing", + .bit = 3, + }, + }, + }, + + /* 4 */ + {.name = "MEMORY_COMPLETE", + .desc = "Completions of a load split, store split, " + "uncacheable (UC) split, or UC load", + .event_select = 0x8, + .escr_select = 0x2, + .allowed_escrs = { 13, 36 }, + .perf_code = P4_EVENT_MEMORY_COMPLETE, + .event_masks = { + {.name = "LSC", + .desc = "Load split completed, excluding UC/WC loads", + .bit = 0, + }, + {.name = "SSC", + .desc = "Any split stores completed", + .bit = 1, + }, + }, + }, + + /* 5 */ + {.name = "LOAD_PORT_REPLAY", + .desc = "Replayed events at the load port", + .event_select = 0x4, + .escr_select = 0x2, + .allowed_escrs = { 13, 36 }, + .perf_code = P4_EVENT_LOAD_PORT_REPLAY, + .event_masks = { + {.name = "SPLIT_LD", + .desc = "Split load", + .bit = 1, + .flags = NETBURST_FL_DFL, + }, + }, + }, + + /* 6 */ + {.name = "STORE_PORT_REPLAY", + .desc = "Replayed events at the store port", + .event_select = 0x5, + .escr_select = 0x2, + .allowed_escrs = { 13, 36 }, + .perf_code = P4_EVENT_STORE_PORT_REPLAY, + .event_masks = { + {.name = "SPLIT_ST", + .desc = "Split store", + .bit = 1, + .flags = NETBURST_FL_DFL, + }, + }, + }, + + /* 7 */ + {.name = "MOB_LOAD_REPLAY", + .desc = "Count of times the memory order buffer (MOB) " + "caused a load operation to be replayed", + .event_select = 0x3, + .escr_select = 0x2, + .allowed_escrs = { 2, 25 }, + .perf_code = P4_EVENT_MOB_LOAD_REPLAY, + .event_masks = { + {.name = "NO_STA", + .desc = "Replayed because of unknown store address", + .bit = 1, + }, + {.name = "NO_STD", + .desc = "Replayed because of unknown store data", + .bit = 3, + }, + {.name = "PARTIAL_DATA", + .desc = "Replayed because of partially overlapped data " + "access between the load and store operations", + .bit = 4, + }, + {.name = "UNALGN_ADDR", + .desc = "Replayed because the lower 4 bits of the " + "linear address do not match between the " + "load and store operations", + .bit = 5, + }, + }, + }, + + /* 8 */ + {.name = "PAGE_WALK_TYPE", + .desc = "Page walks that the page miss handler (PMH) performs", + .event_select = 0x1, + .escr_select = 0x4, + .allowed_escrs = { 4, 27 }, + .perf_code = P4_EVENT_PAGE_WALK_TYPE, + .event_masks = { + {.name = "DTMISS", + .desc = "Page walk for a data TLB miss (load or store)", + .bit = 0, + }, + {.name = "ITMISS", + .desc = "Page walk for an instruction TLB miss", + .bit = 1, + }, + }, + }, + + /* 9 */ + {.name = "BSQ_CACHE_REFERENCE", + .desc = "Cache references (2nd or 3rd level caches) as seen by the " + "bus unit. Read types include both load and RFO, and write " + "types include writebacks and evictions", + .event_select = 0xC, + .escr_select = 0x7, + .allowed_escrs = { 7, 30 }, + .perf_code = P4_EVENT_BSQ_CACHE_REFERENCE, + .event_masks = { + {.name = "RD_2ndL_HITS", + .desc = "Read 2nd level cache hit Shared", + .bit = 0, + }, + {.name = "RD_2ndL_HITE", + .desc = "Read 2nd level cache hit Exclusive", + .bit = 1, + }, + {.name = "RD_2ndL_HITM", + .desc = "Read 2nd level cache hit Modified", + .bit = 2, + }, + {.name = "RD_3rdL_HITS", + .desc = "Read 3rd level cache hit Shared", + .bit = 3, + }, + {.name = "RD_3rdL_HITE", + .desc = "Read 3rd level cache hit Exclusive", + .bit = 4, + }, + {.name = "RD_3rdL_HITM", + .desc = "Read 3rd level cache hit Modified", + .bit = 5, + }, + {.name = "RD_2ndL_MISS", + .desc = "Read 2nd level cache miss", + .bit = 8, + }, + {.name = "RD_3rdL_MISS", + .desc = "Read 3rd level cache miss", + .bit = 9, + }, + {.name = "WR_2ndL_MISS", + .desc = "A writeback lookup from DAC misses the 2nd " + "level cache (unlikely to happen)", + .bit = 10, + }, + }, + }, + + /* 10 */ + {.name = "IOQ_ALLOCATION", + .desc = "Count of various types of transactions on the bus. A count " + "is generated each time a transaction is allocated into the " + "IOQ that matches the specified mask bits. An allocated entry " + "can be a sector (64 bytes) or a chunk of 8 bytes. Requests " + "are counted once per retry. All 'TYPE_BIT*' event-masks " + "together are treated as a single 5-bit value", + .event_select = 0x3, + .escr_select = 0x6, + .allowed_escrs = { 6, 29 }, + .perf_code = P4_EVENT_IOQ_ALLOCATION, + .event_masks = { + {.name = "TYPE_BIT0", + .desc = "Bus request type (bit 0)", + .bit = 0, + }, + {.name = "TYPE_BIT1", + .desc = "Bus request type (bit 1)", + .bit = 1, + }, + {.name = "TYPE_BIT2", + .desc = "Bus request type (bit 2)", + .bit = 2, + }, + {.name = "TYPE_BIT3", + .desc = "Bus request type (bit 3)", + .bit = 3, + }, + {.name = "TYPE_BIT4", + .desc = "Bus request type (bit 4)", + .bit = 4, + }, + {.name = "ALL_READ", + .desc = "Count read entries", + .bit = 5, + }, + {.name = "ALL_WRITE", + .desc = "Count write entries", + .bit = 6, + }, + {.name = "MEM_UC", + .desc = "Count UC memory access entries", + .bit = 7, + }, + {.name = "MEM_WC", + .desc = "Count WC memory access entries", + .bit = 8, + }, + {.name = "MEM_WT", + .desc = "Count write-through (WT) memory access entries", + .bit = 9, + }, + {.name = "MEM_WP", + .desc = "Count write-protected (WP) memory access entries", + .bit = 10, + }, + {.name = "MEM_WB", + .desc = "Count WB memory access entries", + .bit = 11, + }, + {.name = "OWN", + .desc = "Count all store requests driven by processor, as " + "opposed to other processor or DMA", + .bit = 13, + }, + {.name = "OTHER", + .desc = "Count all requests driven by other " + "processors or DMA", + .bit = 14, + }, + {.name = "PREFETCH", + .desc = "Include HW and SW prefetch requests in the count", + .bit = 15, + }, + }, + }, + + /* 11 */ + {.name = "IOQ_ACTIVE_ENTRIES", + .desc = "Number of entries (clipped at 15) in the IOQ that are " + "active. An allocated entry can be a sector (64 bytes) " + "or a chunk of 8 bytes. This event must be programmed in " + "conjuction with IOQ_allocation. All 'TYPE_BIT*' event-masks " + "together are treated as a single 5-bit value", + .event_select = 0x1A, + .escr_select = 0x6, + .allowed_escrs = { 29, -1 }, + .perf_code = P4_EVENT_IOQ_ACTIVE_ENTRIES, + .event_masks = { + {.name = "TYPE_BIT0", + .desc = "Bus request type (bit 0)", + .bit = 0, + }, + {.name = "TYPE_BIT1", + .desc = "Bus request type (bit 1)", + .bit = 1, + }, + {.name = "TYPE_BIT2", + .desc = "Bus request type (bit 2)", + .bit = 2, + }, + {.name = "TYPE_BIT3", + .desc = "Bus request type (bit 3)", + .bit = 3, + }, + {.name = "TYPE_BIT4", + .desc = "Bus request type (bit 4)", + .bit = 4, + }, + {.name = "ALL_READ", + .desc = "Count read entries", + .bit = 5, + }, + {.name = "ALL_WRITE", + .desc = "Count write entries", + .bit = 6, + }, + {.name = "MEM_UC", + .desc = "Count UC memory access entries", + .bit = 7, + }, + {.name = "MEM_WC", + .desc = "Count WC memory access entries", + .bit = 8, + }, + {.name = "MEM_WT", + .desc = "Count write-through (WT) memory access entries", + .bit = 9, + }, + {.name = "MEM_WP", + .desc = "Count write-protected (WP) memory access entries", + .bit = 10, + }, + {.name = "MEM_WB", + .desc = "Count WB memory access entries", + .bit = 11, + }, + {.name = "OWN", + .desc = "Count all store requests driven by processor, as " + "opposed to other processor or DMA", + .bit = 13, + }, + {.name = "OTHER", + .desc = "Count all requests driven by other " + "processors or DMA", + .bit = 14, + }, + {.name = "PREFETCH", + .desc = "Include HW and SW prefetch requests in the count", + .bit = 15, + }, + }, + }, + + /* 12 */ + {.name = "FSB_DATA_ACTIVITY", + .desc = "Count of DRDY or DBSY events that " + "occur on the front side bus", + .event_select = 0x17, + .escr_select = 0x6, + .allowed_escrs = { 6, 29 }, + .perf_code = P4_EVENT_FSB_DATA_ACTIVITY, + .event_masks = { + {.name = "DRDY_DRV", + .desc = "Count when this processor drives data onto the bus. " + "Includes writes and implicit writebacks", + .bit = 0, + }, + {.name = "DRDY_OWN", + .desc = "Count when this processor reads data from the bus. " + "Includes loads and some PIC transactions. Count " + "DRDY events that we drive. Count DRDY events sampled " + "that we own", + .bit = 1, + }, + {.name = "DRDY_OTHER", + .desc = "Count when data is on the bus but not being sampled " + "by the processor. It may or may not be driven by " + "this processor", + .bit = 2, + }, + {.name = "DBSY_DRV", + .desc = "Count when this processor reserves the bus for use " + "in the next bus cycle in order to drive data", + .bit = 3, + }, + {.name = "DBSY_OWN", + .desc = "Count when some agent reserves the bus for use in " + "the next bus cycle to drive data that this processor " + "will sample", + .bit = 4, + }, + {.name = "DBSY_OTHER", + .desc = "Count when some agent reserves the bus for use in " + "the next bus cycle to drive data that this processor " + "will NOT sample. It may or may not be being driven " + "by this processor", + .bit = 5, + }, + }, + }, + + /* 13 */ + {.name = "BSQ_ALLOCATION", + .desc = "Allocations in the Bus Sequence Unit (BSQ). The event mask " + "bits consist of four sub-groups: request type, request " + "length, memory type, and a sub-group consisting mostly of " + "independent bits (5 through 10). Must specify a mask for " + "each sub-group", + .event_select = 0x5, + .escr_select = 0x7, + .allowed_escrs = { 7, -1 }, + .perf_code = P4_EVENT_BSQ_ALLOCATION, + .event_masks = { + {.name = "REQ_TYPE0", + .desc = "Along with REQ_TYPE1, request type encodings are: " + "0 - Read (excludes read invalidate), 1 - Read " + "invalidate, 2 - Write (other than writebacks), 3 - " + "Writeback (evicted from cache)", + .bit = 0, + }, + {.name = "REQ_TYPE1", + .desc = "Along with REQ_TYPE0, request type encodings are: " + "0 - Read (excludes read invalidate), 1 - Read " + "invalidate, 2 - Write (other than writebacks), 3 - " + "Writeback (evicted from cache)", + .bit = 1, + }, + {.name = "REQ_LEN0", + .desc = "Along with REQ_LEN1, request length encodings are: " + "0 - zero chunks, 1 - one chunk, 3 - eight chunks", + .bit = 2, + }, + {.name = "REQ_LEN1", + .desc = "Along with REQ_LEN0, request length encodings are: " + "0 - zero chunks, 1 - one chunk, 3 - eight chunks", + .bit = 3, + }, + {.name = "REQ_IO_TYPE", + .desc = "Request type is input or output", + .bit = 5, + }, + {.name = "REQ_LOCK_TYPE", + .desc = "Request type is bus lock", + .bit = 6, + }, + {.name = "REQ_CACHE_TYPE", + .desc = "Request type is cacheable", + .bit = 7, + }, + {.name = "REQ_SPLIT_TYPE", + .desc = "Request type is a bus 8-byte chunk split across " + "an 8-byte boundary", + .bit = 8, + }, + {.name = "REQ_DEM_TYPE", + .desc = "0: Request type is HW.SW prefetch. " + "1: Request type is a demand", + .bit = 9, + }, + {.name = "REQ_ORD_TYPE", + .desc = "Request is an ordered type", + .bit = 10, + }, + {.name = "MEM_TYPE0", + .desc = "Along with MEM_TYPE1 and MEM_TYPE2, " + "memory type encodings are: 0 - UC, " + "1 - USWC, 4- WT, 5 - WP, 6 - WB", + .bit = 11, + }, + {.name = "MEM_TYPE1", + .desc = "Along with MEM_TYPE0 and MEM_TYPE2, " + "memory type encodings are: 0 - UC, " + "1 - USWC, 4- WT, 5 - WP, 6 - WB", + .bit = 12, + }, + {.name = "MEM_TYPE2", + .desc = "Along with MEM_TYPE0 and MEM_TYPE1, " + "memory type encodings are: 0 - UC, " + "1 - USWC, 4- WT, 5 - WP, 6 - WB", + .bit = 13, + }, + }, + }, + + /* 14 */ + {.name = "BSQ_ACTIVE_ENTRIES", + .desc = "Number of BSQ entries (clipped at 15) currently active " + "(valid) which meet the subevent mask criteria during " + "allocation in the BSQ. Active request entries are allocated " + "on the BSQ until de-allocated. De-allocation of an entry " + "does not necessarily imply the request is filled. This " + "event must be programmed in conjunction with BSQ_allocation", + .event_select = 0x6, + .escr_select = 0x7, + .allowed_escrs = { 30, -1 }, + .perf_code = P4_EVENT_BSQ_ACTIVE_ENTRIES, + .event_masks = { + {.name = "REQ_TYPE0", + .desc = "Along with REQ_TYPE1, request type encodings are: " + "0 - Read (excludes read invalidate), 1 - Read " + "invalidate, 2 - Write (other than writebacks), 3 - " + "Writeback (evicted from cache)", + .bit = 0, + }, + {.name = "REQ_TYPE1", + .desc = "Along with REQ_TYPE0, request type encodings are: " + "0 - Read (excludes read invalidate), 1 - Read " + "invalidate, 2 - Write (other than writebacks), 3 - " + "Writeback (evicted from cache)", + .bit = 1, + }, + {.name = "REQ_LEN0", + .desc = "Along with REQ_LEN1, request length encodings are: " + "0 - zero chunks, 1 - one chunk, 3 - eight chunks", + .bit = 2, + }, + {.name = "REQ_LEN1", + .desc = "Along with REQ_LEN0, request length encodings are: " + "0 - zero chunks, 1 - one chunk, 3 - eight chunks", + .bit = 3, + }, + {.name = "REQ_IO_TYPE", + .desc = "Request type is input or output", + .bit = 5, + }, + {.name = "REQ_LOCK_TYPE", + .desc = "Request type is bus lock", + .bit = 6, + }, + {.name = "REQ_CACHE_TYPE", + .desc = "Request type is cacheable", + .bit = 7, + }, + {.name = "REQ_SPLIT_TYPE", + .desc = "Request type is a bus 8-byte chunk split across " + "an 8-byte boundary", + .bit = 8, + }, + {.name = "REQ_DEM_TYPE", + .desc = "0: Request type is HW.SW prefetch. " + "1: Request type is a demand", + .bit = 9, + }, + {.name = "REQ_ORD_TYPE", + .desc = "Request is an ordered type", + .bit = 10, + }, + {.name = "MEM_TYPE0", + .desc = "Along with MEM_TYPE1 and MEM_TYPE2, " + "memory type encodings are: 0 - UC, " + "1 - USWC, 4- WT, 5 - WP, 6 - WB", + .bit = 11, + }, + {.name = "MEM_TYPE1", + .desc = "Along with MEM_TYPE0 and MEM_TYPE2, " + "memory type encodings are: 0 - UC, " + "1 - USWC, 4- WT, 5 - WP, 6 - WB", + .bit = 12, + }, + {.name = "MEM_TYPE2", + .desc = "Along with MEM_TYPE0 and MEM_TYPE1, " + "memory type encodings are: 0 - UC, " + "1 - USWC, 4- WT, 5 - WP, 6 - WB", + .bit = 13, + }, + }, + }, + + /* 15 */ + {.name = "SSE_INPUT_ASSIST", + .desc = "Number of times an assist is requested to handle problems " + "with input operands for SSE/SSE2/SSE3 operations; most " + "notably denormal source operands when the DAZ bit isn't set", + .event_select = 0x34, + .escr_select = 0x1, + .allowed_escrs = { 12, 35 }, + .perf_code = P4_EVENT_SSE_INPUT_ASSIST, + .event_masks = { + {.name = "ALL", + .desc = "Count assists for SSE/SSE2/SSE3 uops", + .bit = 15, + .flags = NETBURST_FL_DFL, + }, + }, + }, + + /* 16 */ + {.name = "PACKED_SP_UOP", + .desc = "Number of packed single-precision uops", + .event_select = 0x8, + .escr_select = 0x1, + .perf_code = P4_EVENT_PACKED_SP_UOP, + .allowed_escrs = { 12, 35 }, + .event_masks = { + {.name = "ALL", + .desc = "Count all uops operating on packed " + "single-precisions operands", + .bit = 15, + .flags = NETBURST_FL_DFL, + }, + {.name = "TAG0", + .desc = "Tag this event with tag bit 0 " + "for retirement counting with execution_event", + .bit = 16, + }, + {.name = "TAG1", + .desc = "Tag this event with tag bit 1 " + "for retirement counting with execution_event", + .bit = 17, + }, + {.name = "TAG2", + .desc = "Tag this event with tag bit 2 " + "for retirement counting with execution_event", + .bit = 18, + }, + {.name = "TAG3", + .desc = "Tag this event with tag bit 3 " + "for retirement counting with execution_event", + .bit = 19, + }, + }, + }, + + /* 17 */ + {.name = "PACKED_DP_UOP", + .desc = "Number of packed double-precision uops", + .event_select = 0xC, + .escr_select = 0x1, + .allowed_escrs = { 12, 35 }, + .perf_code = P4_EVENT_PACKED_DP_UOP, + .event_masks = { + {.name = "ALL", + .desc = "Count all uops operating on packed " + "double-precisions operands", + .bit = 15, + .flags = NETBURST_FL_DFL, + }, + {.name = "TAG0", + .desc = "Tag this event with tag bit 0 " + "for retirement counting with execution_event", + .bit = 16, + }, + {.name = "TAG1", + .desc = "Tag this event with tag bit 1 " + "for retirement counting with execution_event", + .bit = 17, + }, + {.name = "TAG2", + .desc = "Tag this event with tag bit 2 " + "for retirement counting with execution_event", + .bit = 18, + }, + {.name = "TAG3", + .desc = "Tag this event with tag bit 3 " + "for retirement counting with execution_event", + .bit = 19, + }, + }, + }, + + /* 18 */ + {.name = "SCALAR_SP_UOP", + .desc = "Number of scalar single-precision uops", + .event_select = 0xA, + .escr_select = 0x1, + .allowed_escrs = { 12, 35 }, + .perf_code = P4_EVENT_SCALAR_SP_UOP, + .event_masks = { + {.name = "ALL", + .desc = "Count all uops operating on scalar " + "single-precisions operands", + .bit = 15, + .flags = NETBURST_FL_DFL, + }, + {.name = "TAG0", + .desc = "Tag this event with tag bit 0 " + "for retirement counting with execution_event", + .bit = 16, + }, + {.name = "TAG1", + .desc = "Tag this event with tag bit 1 " + "for retirement counting with execution_event", + .bit = 17, + }, + {.name = "TAG2", + .desc = "Tag this event with tag bit 2 " + "for retirement counting with execution_event", + .bit = 18, + }, + {.name = "TAG3", + .desc = "Tag this event with tag bit 3 " + "for retirement counting with execution_event", + .bit = 19, + }, + }, + }, + + /* 19 */ + {.name = "SCALAR_DP_UOP", + .desc = "Number of scalar double-precision uops", + .event_select = 0xE, + .escr_select = 0x1, + .allowed_escrs = { 12, 35 }, + .perf_code = P4_EVENT_SCALAR_DP_UOP, + .event_masks = { + {.name = "ALL", + .desc = "Count all uops operating on scalar " + "double-precisions operands", + .bit = 15, + .flags = NETBURST_FL_DFL, + }, + {.name = "TAG0", + .desc = "Tag this event with tag bit 0 " + "for retirement counting with execution_event", + .bit = 16, + }, + {.name = "TAG1", + .desc = "Tag this event with tag bit 1 " + "for retirement counting with execution_event", + .bit = 17, + }, + {.name = "TAG2", + .desc = "Tag this event with tag bit 2 " + "for retirement counting with execution_event", + .bit = 18, + }, + {.name = "TAG3", + .desc = "Tag this event with tag bit 3 " + "for retirement counting with execution_event", + .bit = 19, + }, + }, + }, + + /* 20 */ + {.name = "64BIT_MMX_UOP", + .desc = "Number of MMX instructions which " + "operate on 64-bit SIMD operands", + .event_select = 0x2, + .escr_select = 0x1, + .allowed_escrs = { 12, 35 }, + .perf_code = P4_EVENT_64BIT_MMX_UOP, + .event_masks = { + {.name = "ALL", + .desc = "Count all uops operating on 64-bit SIMD integer " + "operands in memory or MMX registers", + .bit = 15, + .flags = NETBURST_FL_DFL, + }, + {.name = "TAG0", + .desc = "Tag this event with tag bit 0 " + "for retirement counting with execution_event", + .bit = 16, + }, + {.name = "TAG1", + .desc = "Tag this event with tag bit 1 " + "for retirement counting with execution_event", + .bit = 17, + }, + {.name = "TAG2", + .desc = "Tag this event with tag bit 2 " + "for retirement counting with execution_event", + .bit = 18, + }, + {.name = "TAG3", + .desc = "Tag this event with tag bit 3 " + "for retirement counting with execution_event", + .bit = 19, + }, + }, + }, + + /* 21 */ + {.name = "128BIT_MMX_UOP", + .desc = "Number of MMX instructions which " + "operate on 128-bit SIMD operands", + .event_select = 0x1A, + .escr_select = 0x1, + .allowed_escrs = { 12, 35 }, + .perf_code = P4_EVENT_128BIT_MMX_UOP, + .event_masks = { + {.name = "ALL", + .desc = "Count all uops operating on 128-bit SIMD integer " + "operands in memory or MMX registers", + .bit = 15, + .flags = NETBURST_FL_DFL, + }, + {.name = "TAG0", + .desc = "Tag this event with tag bit 0 " + "for retirement counting with execution_event", + .bit = 16, + }, + {.name = "TAG1", + .desc = "Tag this event with tag bit 1 " + "for retirement counting with execution_event", + .bit = 17, + }, + {.name = "TAG2", + .desc = "Tag this event with tag bit 2 " + "for retirement counting with execution_event", + .bit = 18, + }, + {.name = "TAG3", + .desc = "Tag this event with tag bit 3 " + "for retirement counting with execution_event", + .bit = 19, + }, + }, + }, + + /* 22 */ + {.name = "X87_FP_UOP", + .desc = "Number of x87 floating-point uops", + .event_select = 0x4, + .escr_select = 0x1, + .allowed_escrs = { 12, 35 }, + .perf_code = P4_EVENT_X87_FP_UOP, + .event_masks = { + {.name = "ALL", + .desc = "Count all x87 FP uops", + .bit = 15, + .flags = NETBURST_FL_DFL, + }, + {.name = "TAG0", + .desc = "Tag this event with tag bit 0 " + "for retirement counting with execution_event", + .bit = 16, + }, + {.name = "TAG1", + .desc = "Tag this event with tag bit 1 " + "for retirement counting with execution_event", + .bit = 17, + }, + {.name = "TAG2", + .desc = "Tag this event with tag bit 2 " + "for retirement counting with execution_event", + .bit = 18, + }, + {.name = "TAG3", + .desc = "Tag this event with tag bit 3 " + "for retirement counting with execution_event", + .bit = 19, + }, + }, + }, + + /* 23 */ + {.name = "TC_misc", + .desc = "Miscellaneous events detected by the TC. The counter will " + "count twice for each occurrence", + .event_select = 0x6, + .escr_select = 0x1, + .allowed_escrs = { 9, 32 }, + .perf_code = P4_EVENT_TC_MISC, + .event_masks = { + {.name = "FLUSH", + .desc = "Number of flushes", + .bit = 4, + .flags = NETBURST_FL_DFL, + }, + }, + }, + + /* 24 */ + {.name = "GLOBAL_POWER_EVENTS", + .desc = "Counts the time during which a processor is not stopped", + .event_select = 0x13, + .escr_select = 0x6, + .allowed_escrs = { 6, 29 }, + .perf_code = P4_EVENT_GLOBAL_POWER_EVENTS, + .event_masks = { + {.name = "RUNNING", + .desc = "The processor is active (includes the " + "handling of HLT STPCLK and throttling", + .bit = 0, + .flags = NETBURST_FL_DFL, + }, + }, + }, + + /* 25 */ + {.name = "TC_MS_XFER", + .desc = "Number of times that uop delivery changed from TC to MS ROM", + .event_select = 0x5, + .escr_select = 0x0, + .allowed_escrs = { 8, 31 }, + .perf_code = P4_EVENT_TC_MS_XFER, + .event_masks = { + {.name = "CISC", + .desc = "A TC to MS transfer occurred", + .bit = 0, + .flags = NETBURST_FL_DFL, + }, + }, + }, + + /* 26 */ + {.name = "UOP_QUEUE_WRITES", + .desc = "Number of valid uops written to the uop queue", + .event_select = 0x9, + .escr_select = 0x0, + .allowed_escrs = { 8, 31 }, + .perf_code = P4_EVENT_UOP_QUEUE_WRITES, + .event_masks = { + {.name = "FROM_TC_BUILD", + .desc = "The uops being written are from TC build mode", + .bit = 0, + }, + {.name = "FROM_TC_DELIVER", + .desc = "The uops being written are from TC deliver mode", + .bit = 1, + }, + {.name = "FROM_ROM", + .desc = "The uops being written are from microcode ROM", + .bit = 2, + }, + }, + }, + + /* 27 */ + {.name = "RETIRED_MISPRED_BRANCH_TYPE", + .desc = "Number of retiring mispredicted branches by type", + .event_select = 0x5, + .escr_select = 0x2, + .allowed_escrs = { 10, 33 }, + .perf_code = P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, + .event_masks = { + {.name = "CONDITIONAL", + .desc = "Conditional jumps", + .bit = 1, + }, + {.name = "CALL", + .desc = "Indirect call branches", + .bit = 2, + }, + {.name = "RETURN", + .desc = "Return branches", + .bit = 3, + }, + {.name = "INDIRECT", + .desc = "Returns, indirect calls, or indirect jumps", + .bit = 4, + }, + }, + }, + + /* 28 */ + {.name = "RETIRED_BRANCH_TYPE", + .desc = "Number of retiring branches by type", + .event_select = 0x4, + .escr_select = 0x2, + .allowed_escrs = { 10, 33 }, + .perf_code = P4_EVENT_RETIRED_BRANCH_TYPE, + .event_masks = { + {.name = "CONDITIONAL", + .desc = "Conditional jumps", + .bit = 1, + }, + {.name = "CALL", + .desc = "Indirect call branches", + .bit = 2, + }, + {.name = "RETURN", + .desc = "Return branches", + .bit = 3, + }, + {.name = "INDIRECT", + .desc = "Returns, indirect calls, or indirect jumps", + .bit = 4, + }, + }, + }, + + /* 29 */ + {.name = "resource_stall", + .desc = "Occurrences of latency or stalls in the Allocator", + .event_select = 0x1, + .escr_select = 0x1, + .allowed_escrs = { 17, 40 }, + .perf_code = P4_EVENT_RESOURCE_STALL, + .event_masks = { + {.name = "SBFULL", + .desc = "A stall due to lack of store buffers", + .bit = 5, + .flags = NETBURST_FL_DFL, + }, + }, + }, + + /* 30 */ + {.name = "WC_Buffer", + .desc = "Number of Write Combining Buffer operations", + .event_select = 0x5, + .escr_select = 0x5, + .allowed_escrs = { 15, 38 }, + .perf_code = P4_EVENT_WC_BUFFER, + .event_masks = { + {.name = "WCB_EVICTS", + .desc = "WC Buffer evictions of all causes", + .bit = 0, + }, + {.name = "WCB_FULL_EVICT", + .desc = "WC Buffer eviction; no WC buffer is available", + .bit = 1, + }, + }, + }, + + /* 31 */ + {.name = "b2b_cycles", + .desc = "Number of back-to-back bus cycles", + .event_select = 0x16, + .escr_select = 0x3, + .allowed_escrs = { 6, 29 }, + .perf_code = P4_EVENT_B2B_CYCLES, + .event_masks = { + {.name = "BIT1", + .desc = "bit 1", + .bit = 1, + }, + {.name = "BIT2", + .desc = "bit 2", + .bit = 2, + }, + {.name = "BIT3", + .desc = "bit 3", + .bit = 3, + }, + {.name = "BIT4", + .desc = "bit 4", + .bit = 4, + }, + {.name = "BIT5", + .desc = "bit 5", + .bit = 4, + }, + {.name = "BIT6", + .desc = "bit 6", + .bit = 4, + }, + }, + }, + /* 32 */ + {.name = "bnr", + .desc = "Number of bus-not-ready conditions", + .event_select = 0x8, + .escr_select = 0x3, + .allowed_escrs = { 6, 29 }, + .perf_code = P4_EVENT_BNR, + .event_masks = { + {.name = "BIT0", + .desc = "bit 0", + .bit = 0, + }, + {.name = "BIT1", + .desc = "bit 1", + .bit = 1, + }, + {.name = "BIT2", + .desc = "bit 2", + .bit = 2, + }, + }, + }, + + /* 33 */ + {.name = "snoop", + .desc = "Number of snoop hit modified bus traffic", + .event_select = 0x6, + .escr_select = 0x3, + .allowed_escrs = { 6, 29 }, + .perf_code = P4_EVENT_SNOOP, + .event_masks = { + {.name = "BIT2", + .desc = "bit 2", + .bit = 2, + }, + {.name = "BIT6", + .desc = "bit 6", + .bit = 6, + }, + {.name = "BIT7", + .desc = "bit 7", + .bit = 7, + }, + }, + }, + + /* 34 */ + {.name = "response", + .desc = "Count of different types of responses", + .event_select = 0x4, + .escr_select = 0x3, + .allowed_escrs = { 6, 29 }, + .perf_code = P4_EVENT_RESPONSE, + .event_masks = { + {.name = "BIT1", + .desc = "bit 1", + .bit = 1, + }, + {.name = "BIT2", + .desc = "bit 2", + .bit = 2, + }, + {.name = "BIT8", + .desc = "bit 8", + .bit = 8, + }, + {.name = "BIT9", + .desc = "bit 9", + .bit = 9, + }, + }, + }, + + /* 35 */ + {.name = "front_end_event", + .desc = "Number of retirements of tagged uops which are specified " + "through the front-end tagging mechanism", + .event_select = 0x8, + .escr_select = 0x5, + .allowed_escrs = { 21, 43 }, + .perf_code = P4_EVENT_FRONT_END_EVENT, + .event_masks = { + {.name = "NBOGUS", + .desc = "The marked uops are not bogus", + .bit = 0, + }, + {.name = "BOGUS", + .desc = "The marked uops are bogus", + .bit = 1, + }, + }, + }, + + /* 36 */ + {.name = "execution_event", + .desc = "Number of retirements of tagged uops which are specified " + "through the execution tagging mechanism. The event-mask " + "allows from one to four types of uops to be tagged", + .event_select = 0xC, + .escr_select = 0x5, + .allowed_escrs = { 21, 43 }, + .perf_code = P4_EVENT_EXECUTION_EVENT, + .event_masks = { + {.name = "NBOGUS0", + .desc = "The marked uops are not bogus", + .bit = 0, + }, + {.name = "NBOGUS1", + .desc = "The marked uops are not bogus", + .bit = 1, + }, + {.name = "NBOGUS2", + .desc = "The marked uops are not bogus", + .bit = 2, + }, + {.name = "NBOGUS3", + .desc = "The marked uops are not bogus", + .bit = 3, + }, + {.name = "BOGUS0", + .desc = "The marked uops are bogus", + .bit = 4, + }, + {.name = "BOGUS1", + .desc = "The marked uops are bogus", + .bit = 5, + }, + {.name = "BOGUS2", + .desc = "The marked uops are bogus", + .bit = 6, + }, + {.name = "BOGUS3", + .desc = "The marked uops are bogus", + .bit = 7, + }, + }, + }, + + /* 37 */ + {.name = "replay_event", + .desc = "Number of retirements of tagged uops which are specified " + "through the replay tagging mechanism", + .event_select = 0x9, + .escr_select = 0x5, + .allowed_escrs = { 21, 43 }, + .perf_code = P4_EVENT_REPLAY_EVENT, + .event_masks = { + {.name = "NBOGUS", + .desc = "The marked uops are not bogus", + .bit = 0, + }, + {.name = "BOGUS", + .desc = "The marked uops are bogus", + .bit = 1, + }, + {.name = "L1_LD_MISS", + .desc = "Virtual mask for L1 cache load miss replays", + .bit = 2, + }, + {.name = "L2_LD_MISS", + .desc = "Virtual mask for L2 cache load miss replays", + .bit = 3, + }, + {.name = "DTLB_LD_MISS", + .desc = "Virtual mask for DTLB load miss replays", + .bit = 4, + }, + {.name = "DTLB_ST_MISS", + .desc = "Virtual mask for DTLB store miss replays", + .bit = 5, + }, + {.name = "DTLB_ALL_MISS", + .desc = "Virtual mask for all DTLB miss replays", + .bit = 6, + }, + {.name = "BR_MSP", + .desc = "Virtual mask for tagged mispredicted branch replays", + .bit = 7, + }, + {.name = "MOB_LD_REPLAY", + .desc = "Virtual mask for MOB load replays", + .bit = 8, + }, + {.name = "SP_LD_RET", + .desc = "Virtual mask for split load replays. Use with load_port_replay event", + .bit = 9, + }, + {.name = "SP_ST_RET", + .desc = "Virtual mask for split store replays. Use with store_port_replay event", + .bit = 10, + }, + }, + }, + + /* 38 */ + {.name = "INSTR_RETIRED", + .desc = "Number of instructions retired during a clock cycle", + .event_select = 0x2, + .escr_select = 0x4, + .allowed_escrs = { 20, 42 }, + .perf_code = P4_EVENT_INSTR_RETIRED, + .event_masks = { + {.name = "NBOGUSNTAG", + .desc = "Non-bogus instructions that are not tagged", + .bit = 0, + }, + {.name = "NBOGUSTAG", + .desc = "Non-bogus instructions that are tagged", + .bit = 1, + }, + {.name = "BOGUSNTAG", + .desc = "Bogus instructions that are not tagged", + .bit = 2, + }, + {.name = "BOGUSTAG", + .desc = "Bogus instructions that are tagged", + .bit = 3, + }, + }, + }, + + /* 39 */ + {.name = "UOPS_RETIRED", + .desc = "Number of uops retired during a clock cycle", + .event_select = 0x1, + .escr_select = 0x4, + .allowed_escrs = { 20, 42 }, + .perf_code = P4_EVENT_UOPS_RETIRED, + .event_masks = { + {.name = "NBOGUS", + .desc = "The marked uops are not bogus", + .bit = 0, + }, + {.name = "BOGUS", + .desc = "The marked uops are bogus", + .bit = 1, + }, + }, + }, + + /* 40 */ + {.name = "UOP_TYPE", + .desc = "This event is used in conjunction with with the front-end " + "mechanism to tag load and store uops", + .event_select = 0x2, + .escr_select = 0x2, + .allowed_escrs = { 18, 41 }, + .perf_code = P4_EVENT_UOP_TYPE, + .event_masks = { + {.name = "TAGLOADS", + .desc = "The uop is a load operation", + .bit = 1, + }, + {.name = "TAGSTORES", + .desc = "The uop is a store operation", + .bit = 2, + }, + }, + }, + + /* 41 */ + {.name = "BRANCH_RETIRED", + .desc = "Number of retirements of a branch", + .event_select = 0x6, + .escr_select = 0x5, + .allowed_escrs = { 21, 43 }, + .perf_code = P4_EVENT_BRANCH_RETIRED, + .event_masks = { + {.name = "MMNP", + .desc = "Branch not-taken predicted", + .bit = 0, + }, + {.name = "MMNM", + .desc = "Branch not-taken mispredicted", + .bit = 1, + }, + {.name = "MMTP", + .desc = "Branch taken predicted", + .bit = 2, + }, + {.name = "MMTM", + .desc = "Branch taken mispredicted", + .bit = 3, + }, + }, + }, + + /* 42 */ + {.name = "MISPRED_BRANCH_RETIRED", + .desc = "Number of retirements of mispredicted " + "IA-32 branch instructions", + .event_select = 0x3, + .escr_select = 0x4, + .allowed_escrs = { 20, 42 }, + .perf_code = P4_EVENT_MISPRED_BRANCH_RETIRED, + .event_masks = { + {.name = "BOGUS", + .desc = "The retired instruction is not bogus", + .bit = 0, + .flags = NETBURST_FL_DFL, + }, + }, + }, + + /* 43 */ + {.name = "X87_ASSIST", + .desc = "Number of retirements of x87 instructions that required " + "special handling", + .event_select = 0x3, + .escr_select = 0x5, + .allowed_escrs = { 21, 43 }, + .perf_code = P4_EVENT_X87_ASSIST, + .event_masks = { + {.name = "FPSU", + .desc = "Handle FP stack underflow", + .bit = 0, + }, + {.name = "FPSO", + .desc = "Handle FP stack overflow", + .bit = 1, + }, + {.name = "POAO", + .desc = "Handle x87 output overflow", + .bit = 2, + }, + {.name = "POAU", + .desc = "Handle x87 output underflow", + .bit = 3, + }, + {.name = "PREA", + .desc = "Handle x87 input assist", + .bit = 4, + }, + }, + }, + + /* 44 */ + {.name = "MACHINE_CLEAR", + .desc = "Number of occurances when the entire " + "pipeline of the machine is cleared", + .event_select = 0x2, + .escr_select = 0x5, + .allowed_escrs = { 21, 43 }, + .perf_code = P4_EVENT_MACHINE_CLEAR, + .event_masks = { + {.name = "CLEAR", + .desc = "Counts for a portion of the many cycles while the " + "machine is cleared for any cause. Use edge-" + "triggering for this bit only to get a count of " + "occurances versus a duration", + .bit = 0, + }, + {.name = "MOCLEAR", + .desc = "Increments each time the machine is cleared due to " + "memory ordering issues", + .bit = 2, + }, + {.name = "SMCLEAR", + .desc = "Increments each time the machine is cleared due to " + "self-modifying code issues", + .bit = 6, + }, + }, + }, + + /* 45 */ + {.name = "instr_completed", + .desc = "Instructions that have completed and " + "retired during a clock cycle (models 3, 4, 6 only)", + .event_select = 0x7, + .escr_select = 0x4, + .allowed_escrs = { 21, 42 }, + .perf_code = P4_EVENT_INSTR_COMPLETED, + .event_masks = { + {.name = "NBOGUS", + .desc = "Non-bogus instructions", + .bit = 0, + }, + {.name = "BOGUS", + .desc = "Bogus instructions", + .bit = 1, + }, + }, + }, +}; + +#define NETBURST_EVENT_COUNT (sizeof(op_netburst_events)/sizeof(netburst_entry_t)) + +int op_netburst_get_perf_encoding(const char * evt_name, unsigned long evt_um, int do_kernel, + int do_user, u64 * config) +{ + unsigned int evmask = 0; + const char *n; + unsigned int i, evt_idx, um_idx; + int bit; + int tag_enable = 0, tag_value = 0; + int perf_code; + netburst_escr_value_t escr; + netburst_cccr_value_t cccr; + u64 escr_val; + + evt_idx = um_idx = 0xffffffff; + + // Match up event name with netburst event index + for (i = 0; i < NETBURST_EVENT_COUNT; i++) { + if (!strcmp(evt_name, op_netburst_events[i].name)) { + evt_idx = i; + break; + } + } + if (evt_idx == 0xffffffff) + return -1; + + // Iterate through unit masks of the event to find UM idx + for (i = 0; op_netburst_events[evt_idx].event_masks[i].name; i++) { + if (evt_um == (unsigned long)(1 << op_netburst_events[evt_idx].event_masks[i].bit)) { + um_idx = i; + break; + } + } + if (um_idx == 0xffffffff) + return -1; + + perf_code = op_netburst_events[evt_idx].perf_code; + + bit = op_netburst_events[evt_idx].event_masks[um_idx].bit; + n = op_netburst_events[evt_idx].event_masks[um_idx].name; + if (bit < EVENT_MASK_BITS && n) { + evmask |= (1 << bit); + } else if (bit >= EVENT_MASK_BITS && n) { + tag_value |= (1 << (bit - EVENT_MASK_BITS)); + tag_enable = 1; + } + + if (do_user) { + escr.bits.t1_usr = 1; + escr.bits.t0_usr = 1; + } + if (do_kernel) { + escr.bits.t1_os = 1; + escr.bits.t0_os = 1; + } + + escr.bits.tag_enable = tag_enable; + escr.bits.tag_value = tag_value; + escr.bits.event_mask = evmask; + escr.bits.event_select = op_netburst_events[evt_idx].event_select; + + cccr.bits.enable = 1; + cccr.bits.escr_select = op_netburst_events[evt_idx].escr_select; + cccr.bits.active_thread = 3; + + escr_val = escr.val & ~(0x3full << 25); + escr_val |= perf_code << 25; + *config = (escr_val << 32) | cccr.val; + + return 0; +} diff --git a/libop/op_netburst.h b/libop/op_netburst.h new file mode 100644 index 0000000..809b550 --- /dev/null +++ b/libop/op_netburst.h @@ -0,0 +1,256 @@ +/* + * @file libop/op_netburst.h + * Definitions of structures and methods for handling Intel Netburst events. + * + * @remark Copyright 2013 OProfile authors + * @remark Read the file COPYING + * + * Created on: May 14, 2013 + * @author Maynard Johnson + * (C) Copyright IBM Corp. 2013 + * + * NOTE: The code in this file was largely borrowed from a libpfm file, + * so we include below the Copyright and licensing information from that file. + */ +/* + * Copyright (c) 2006 IBM Corp. + * Contributed by Kevin Corry <kev...@us...> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * pfmlib_netburst_priv.h + * + * Structures and definitions for use in the Pentium4/Xeon/EM64T libpfm code. + */ + +#ifndef OP_NETBURST_H_ +#define OP_NETBURST_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* ESCR: Event Selection Control Register + * + * These registers are used to select which event to count along with options + * for that event. There are (up to) 45 ESCRs, but each data counter is + * restricted to a specific set of ESCRs. + */ + +/** + * netburst_escr_value_t + * + * Bit-wise breakdown of the ESCR registers. + * + * Bits Description + * ------- ----------- + * 63 - 31 Reserved + * 30 - 25 Event Select + * 24 - 9 Event Mask + * 8 - 5 Tag Value + * 4 Tag Enable + * 3 T0 OS - Enable counting in kernel mode (thread 0) + * 2 T0 USR - Enable counting in user mode (thread 0) + * 1 T1 OS - Enable counting in kernel mode (thread 1) + * 0 T1 USR - Enable counting in user mode (thread 1) + **/ + +#define EVENT_MASK_BITS 16 +#define EVENT_SELECT_BITS 6 + +typedef union { + unsigned long long val; + struct { + unsigned long t1_usr:1; + unsigned long t1_os:1; + unsigned long t0_usr:1; + unsigned long t0_os:1; + unsigned long tag_enable:1; + unsigned long tag_value:4; + unsigned long event_mask:EVENT_MASK_BITS; + unsigned long event_select:EVENT_SELECT_BITS; + unsigned long reserved:1; + } bits; +} netburst_escr_value_t; + +/* CCCR: Counter Configuration Control Register + * + * These registers are used to configure the data counters. There are 18 + * CCCRs, one for each data counter. + */ + +/** + * netburst_cccr_value_t + * + * Bit-wise breakdown of the CCCR registers. + * + * Bits Description + * ------- ----------- + * 63 - 32 Reserved + * 31 OVF - The data counter overflowed. + * 30 Cascade - Enable cascading of data counter when alternate + * counter overflows. + * 29 - 28 Reserved + * 27 OVF_PMI_T1 - Generate interrupt for LP1 on counter overflow + * 26 OVF_PMI_T0 - Generate interrupt for LP0 on counter overflow + * 25 FORCE_OVF - Force interrupt on every counter increment + * 24 Edge - Enable rising edge detection of the threshold comparison + * output for filtering event counts. + * 23 - 20 Threshold Value - Select the threshold value for comparing to + * incoming event counts. + * 19 Complement - Select how incoming event count is compared with + * the threshold value. + * 18 Compare - Enable filtering of event counts. + * 17 - 16 Active Thread - Only used with HT enabled. + * 00 - None: Count when neither LP is active. + * 01 - Single: Count when only one LP is active. + * 10 - Both: Count when both LPs are active. + * 11 - Any: Count when either LP is active. + * 15 - 13 ESCR Select - Select which ESCR to use for selecting the + * event to count. + * 12 Enable - Turns the data counter on or off. + * 11 - 0 Reserved + **/ +typedef union { + unsigned long long val; + struct { + unsigned long reserved1:12; + unsigned long enable:1; + unsigned long escr_select:3; + unsigned long active_thread:2; + unsigned long compare:1; + unsigned long complement:1; + unsigned long threshold:4; + unsigned long edge:1; + unsigned long force_ovf:1; + unsigned long ovf_pmi_t0:1; + unsigned long ovf_pmi_t1:1; + unsigned long reserved2:2; + unsigned long cascade:1; + unsigned long overflow:1; + } bits; +} netburst_cccr_value_t; + +/** + * netburst_event_mask_t + * + * Defines one bit of the event-mask for one Pentium4 event. + * + * @name: Event mask name + * @desc: Event mask description + * @bit: The bit position within the event_mask field. + **/ +typedef struct { + char *name; + char *desc; + unsigned int bit; + unsigned int flags; +} netburst_event_mask_t; +/* + * netburst_event_mask_t->flags + */ +#define NETBURST_FL_DFL 0x1 /* event mask is default */ + +#define MAX_ESCRS_PER_EVENT 2 + +/* + * These are the unique event codes used by perf_events. + * They need to be encoded in the ESCR.event_select field when + * programming for perf_events. + * NOTE: Only 36 of the events specified below have counterparts + * in oprofile's p4 event list + */ +enum netburst_event_code { + P4_EVENT_TC_DELIVER_MODE, + P4_EVENT_BPU_FETCH_REQUEST, + P4_EVENT_ITLB_REFERENCE, + P4_EVENT_MEMORY_CANCEL, + P4_EVENT_MEMORY_COMPLETE, + P4_EVENT_LOAD_PORT_REPLAY, + P4_EVENT_STORE_PORT_REPLAY, + P4_EVENT_MOB_LOAD_REPLAY, + P4_EVENT_PAGE_WALK_TYPE, + P4_EVENT_BSQ_CACHE_REFERENCE, + P4_EVENT_IOQ_ALLOCATION, + P4_EVENT_IOQ_ACTIVE_ENTRIES, + P4_EVENT_FSB_DATA_ACTIVITY, + P4_EVENT_BSQ_ALLOCATION, + P4_EVENT_BSQ_ACTIVE_ENTRIES, + P4_EVENT_SSE_INPUT_ASSIST, + P4_EVENT_PACKED_SP_UOP, + P4_EVENT_PACKED_DP_UOP, + P4_EVENT_SCALAR_SP_UOP, + P4_EVENT_SCALAR_DP_UOP, + P4_EVENT_64BIT_MMX_UOP, + P4_EVENT_128BIT_MMX_UOP, + P4_EVENT_X87_FP_UOP, + P4_EVENT_TC_MISC, + P4_EVENT_GLOBAL_POWER_EVENTS, + P4_EVENT_TC_MS_XFER, + P4_EVENT_UOP_QUEUE_WRITES, + P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, + P4_EVENT_RETIRED_BRANCH_TYPE, + P4_EVENT_RESOURCE_STALL, + P4_EVENT_WC_BUFFER, + P4_EVENT_B2B_CYCLES, + P4_EVENT_BNR, + P4_EVENT_SNOOP, + P4_EVENT_RESPONSE, + P4_EVENT_FRONT_END_EVENT, + P4_EVENT_EXECUTION_EVENT, + P4_EVENT_REPLAY_EVENT, + P4_EVENT_INSTR_RETIRED, + P4_EVENT_UOPS_RETIRED, + P4_EVENT_UOP_TYPE, + P4_EVENT_BRANCH_RETIRED, + P4_EVENT_MISPRED_BRANCH_RETIRED, + P4_EVENT_X87_ASSIST, + P4_EVENT_MACHINE_CLEAR, + P4_EVENT_INSTR_COMPLETED, +}; + +typedef struct { + char *name; + char *desc; + unsigned int event_select; + unsigned int escr_select; + enum netburst_event_code perf_code; /* perf_event event code, enum P4_EVENTS */ + int allowed_escrs[MAX_ESCRS_PER_EVENT]; + netburst_event_mask_t event_masks[EVENT_MASK_BITS]; +} netburst_entry_t; + +#define NETBURST_ATTR_U 0 +#define NETBURST_ATTR_K 1 +#define NETBURST_ATTR_C 2 +#define NETBURST_ATTR_E 3 +#define NETBURST_ATTR_T 4 + +#define _NETBURST_ATTR_U (1 << NETBURST_ATTR_U) +#define _NETBURST_ATTR_K (1 << NETBURST_ATTR_K) + +#define P4_REPLAY_REAL_MASK 0x00000003 + +int op_netburst_get_perf_encoding(const char * evt_name, unsigned long evt_um, int do_kernel, + int do_user, u64 * config); + +#ifdef __cplusplus +} +#endif + +#endif // OP_NETBURST_H_ diff --git a/pe_profiling/operf.cpp b/pe_profiling/operf.cpp index 76aeae0..29ca117 100644 --- a/pe_profiling/operf.cpp +++ b/pe_profiling/operf.cpp @@ -47,6 +47,7 @@ #include "child_reader.h" #include "op_get_time.h" #include "operf_stats.h" +#include "op_netburst.h" using namespace std; @@ -1178,6 +1179,12 @@ static void _get_event_code(operf_event_t * event) #endif event->op_evt_code = base_code; + if (cpu_type == CPU_P4 || cpu_type == CPU_P4_HT2) { + if (op_netburst_get_perf_encoding(event->name, event->evt_um, 1, 1, &config)) { + cerr << "Unable to get event encoding for " << event->name << endl; + exit(EXIT_FAILURE); + } + } event->evt_code = config; } -- 1.7.1 |