From: <gr...@re...> - 2002-09-18 17:55:55
|
Hi, attached is a patch which alters the way event descriptions, unit masks, and unit mask descriptions are declared, hopefully to "future-proof" it a little as we bulk out the number of events supported. The patch removes most uses of cross-table indexing in libop/op_events*, using explicit pointers between events and unit masks, and merging the descriptions into the mask or event tables directly. the patch is against oprofile's CVS head. I have performed a clean apply / build / test of this on my P4 in RTC mode, including running op_help and oprofpp to see that they are at least reporting sensible values; it would be nice if someone with a ppro or athlon could also check that the outputs are good. I've improved my testing setup a little since messing up the separation of items between patches yesterday; I'll go back over yesterday's x86-model-specific reorg patch one more time and resubmit when I'm sure it applies, builds and runs against cvs. until then, of course, comments on the content would be appreciated (I'll fix the spacing too :) -graydon Index: libop/op_events.c =================================================================== RCS file: /cvsroot/oprofile/oprofile/libop/op_events.c,v retrieving revision 1.11 diff -u -r1.11 op_events.c --- libop/op_events.c 15 Sep 2002 19:47:47 -0000 1.11 +++ libop/op_events.c 18 Sep 2002 17:17:35 -0000 @@ -20,25 +20,78 @@ #include "op_events.h" -/* nr. unit mask values, unit mask type, default, unit mask choices */ -struct op_unit_mask op_unit_masks[] = { - /* reserved empty entry */ - { 0, utm_mandatory, 0x00, { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, }, - /* MESI counters */ - { 5, utm_bitmask, 0x0f, { 0x8, 0x4, 0x2, 0x1, 0xf, 0x0, 0x0 }, }, - /* EBL self/any default to any transitions */ - { 2, utm_exclusive, 0x20, { 0x0, 0x20, 0x0, 0x0, 0x0, 0x0, 0x0 }, }, - /* MMX PII events */ - { 1, utm_mandatory, 0xf, { 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }, }, - { 7, utm_bitmask, 0x3f, { 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x3f }, }, - { 2, utm_exclusive, 0x0, { 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 }, }, - { 5, utm_bitmask, 0x0f, { 0x1, 0x2, 0x4, 0x8, 0xf, 0x0, 0x0 }, }, - /* KNI PIII events */ - { 4, utm_exclusive, 0x0, { 0x0, 0x1, 0x2, 0x3, 0x0, 0x0, 0x0 }, }, - { 2, utm_bitmask, 0x1, { 0x0, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0 }, }, - /* Athlon MOESI cache events */ - { 6, utm_bitmask, 0x1f, { 0x10, 0x8, 0x4, 0x2, 0x1, 0x1f }, } -}; +/* reserved empty entry */ +static struct op_unit_mask um_empty = + { 0, utm_mandatory, 0x00, + { {0x0, 0x0} } }; + +/* Modified/Exclusive/Shared/Invalid (MESI) counters */ +static struct op_unit_mask um_mesi = + { 5, utm_bitmask, 0x0f, + { {0x8, "(M)odified cache state"}, + {0x4, "(E)xclusive cache state"}, + {0x2, "(S)hared cache state"}, + {0x1, "(I)nvalid cache state"}, + {0xf, "all MESI cache state"} } }; + +/* External Bus Logic (EBL) self/any default to any transitions */ +static struct op_unit_mask um_ebl = + { 2, utm_exclusive, 0x20, + { {0x0, "self-generated transactions"}, + {0x20, "any transactions"} } }; + +/* MMX PII events */ +static struct op_unit_mask um_mmx_uops = + { 1, utm_mandatory, 0xf, + { {0xf, "mandatory"} } }; + +static struct op_unit_mask um_mmx_instr_type_exec = + { 7, utm_bitmask, 0x3f, + { {0x1, "MMX packed multiplies"}, + {0x2, "MMX packed shifts"}, + {0x4, "MMX pack operations"}, + {0x8, "MMX unpack operations"}, + {0x10, "MMX packed logical"}, + {0x20, "MMX packed arithmetic"}, + {0x3f, "All the above"} } }; + +static struct op_unit_mask um_mmx_trans = + { 2, utm_exclusive, 0x0, + { {0x0, "MMX->float transitions"}, + {0x1, "float->MMX transitions"} } }; + +static struct op_unit_mask um_seg_rename = + { 5, utm_bitmask, 0x0f, + { {0x1, "ES register"}, + {0x2, "DS register"}, + {0x4, "FS register"}, + /* IA manual says this is actually FS again - no mention in errata */ + /* but test show that is really a typo error from IA manual */ + {0x8, "GS register"}, + {0xf, "ES,DS,FS,GS registers"} } }; + +/* KNI PIII events */ +static struct op_unit_mask um_kni_prefetch = + { 4, utm_exclusive, 0x0, + { {0x0, "prefetch NTA"}, + {0x1, "prefetch T1"}, + {0x2, "prefetch T2"}, + {0x3, "weakly ordered stores"} } }; + +static struct op_unit_mask um_kni_inst_retired = + { 2, utm_bitmask, 0x1, + { {0x0, "packed and scalar"}, + {0x1, "packed"} } }; + +/* Athlon MOESI cache events */ +static struct op_unit_mask um_moesi = + { 6, utm_bitmask, 0x1f, + { {0x10, "(M)odified cache state"}, + {0x8, "(O)wner cache state"}, + {0x4, "(E)xclusive cache state"}, + {0x2, "(S)hared cache state"}, + {0x1, "(I)nvalid cache state"}, + {0x1f, "all MOESI cache state"} } }; /* the following are just short cut for filling the table of event */ #define OP_RTC (1 << CPU_RTC) @@ -53,136 +106,246 @@ #define CTR_1 (1 << 1) /* ctr allowed, allowed cpus, Event #, unit mask, name, min event value */ + /* event name must be in one word */ struct op_event op_events[] = { + /* Clocks */ - { CTR_ALL, OP_IA_ALL, 0x79, 0, "CPU_CLK_UNHALTED", 6000 }, + { CTR_ALL, OP_IA_ALL, 0x79, &um_empty, "CPU_CLK_UNHALTED", + "clocks processor is not halted", 6000 }, + /* Data Cache Unit (DCU) */ - { CTR_ALL, OP_IA_ALL, 0x43, 0, "DATA_MEM_REFS", 500 }, - { CTR_ALL, OP_IA_ALL, 0x45, 0, "DCU_LINES_IN", 500 }, - { CTR_ALL, OP_IA_ALL, 0x46, 0, "DCU_M_LINES_IN", 500 }, - { CTR_ALL, OP_IA_ALL, 0x47, 0, "DCU_M_LINES_OUT", 500}, - { CTR_ALL, OP_IA_ALL, 0x48, 0, "DCU_MISS_OUTSTANDING", 500 }, + { CTR_ALL, OP_IA_ALL, 0x43, &um_empty, "DATA_MEM_REFS", + "all memory references, cachable and non", 500 }, + { CTR_ALL, OP_IA_ALL, 0x45, &um_empty, "DCU_LINES_IN", + "total lines allocated in the DCU", 500 }, + { CTR_ALL, OP_IA_ALL, 0x46, &um_empty, "DCU_M_LINES_IN", + "number of M state lines allocated in DCU", 500 }, + { CTR_ALL, OP_IA_ALL, 0x47, &um_empty, "DCU_M_LINES_OUT", + "number of M lines evicted from the DCU", 500}, + { CTR_ALL, OP_IA_ALL, 0x48, &um_empty, "DCU_MISS_OUTSTANDING", + "number of cycles while DCU miss outstanding", 500 }, /* Intruction Fetch Unit (IFU) */ - { CTR_ALL, OP_IA_ALL, 0x80, 0, "IFU_IFETCH", 500 }, - { CTR_ALL, OP_IA_ALL, 0x81, 0, "IFU_IFETCH_MISS", 500 }, - { CTR_ALL, OP_IA_ALL, 0x85, 0, "ITLB_MISS", 500}, - { CTR_ALL, OP_IA_ALL, 0x86, 0, "IFU_MEM_STALL", 500 }, - { CTR_ALL, OP_IA_ALL, 0x87, 0, "ILD_STALL", 500 }, + { CTR_ALL, OP_IA_ALL, 0x80, &um_empty, "IFU_IFETCH", + "number of non/cachable instruction fetches", 500 }, + { CTR_ALL, OP_IA_ALL, 0x81, &um_empty, "IFU_IFETCH_MISS", + "number of instruction fetch misses", 500 }, + { CTR_ALL, OP_IA_ALL, 0x85, &um_empty, "ITLB_MISS", + "number of ITLB misses" ,500}, + { CTR_ALL, OP_IA_ALL, 0x86, &um_empty, "IFU_MEM_STALL", + "cycles instruction fetch pipe is stalled", 500 }, + { CTR_ALL, OP_IA_ALL, 0x87, &um_empty, "ILD_STALL", + "cycles instruction length decoder is stalled", 500 }, /* L2 Cache */ - { CTR_ALL, OP_IA_ALL, 0x28, 1, "L2_IFETCH", 500 }, - { CTR_ALL, OP_IA_ALL, 0x29, 1, "L2_LD", 500 }, - { CTR_ALL, OP_IA_ALL, 0x2a, 1, "L2_ST", 500 }, - { CTR_ALL, OP_IA_ALL, 0x24, 0, "L2_LINES_IN", 500 }, - { CTR_ALL, OP_IA_ALL, 0x26, 0, "L2_LINES_OUT", 500 }, - { CTR_ALL, OP_IA_ALL, 0x25, 0, "L2_M_LINES_INM", 500 }, - { CTR_ALL, OP_IA_ALL, 0x27, 0, "L2_M_LINES_OUTM", 500 }, - { CTR_ALL, OP_IA_ALL, 0x2e, 1, "L2_RQSTS", 500 }, - { CTR_ALL, OP_IA_ALL, 0x21, 0, "L2_ADS", 500 }, - { CTR_ALL, OP_IA_ALL, 0x22, 0, "L2_DBUS_BUSY", 500 }, - { CTR_ALL, OP_IA_ALL, 0x23, 0, "L2_DMUS_BUSY_RD", 500 }, + { CTR_ALL, OP_IA_ALL, 0x28, &um_mesi, "L2_IFETCH", + "number of L2 instruction fetches", 500 }, + { CTR_ALL, OP_IA_ALL, 0x29, &um_mesi, "L2_LD", + "number of L2 data loads", 500 }, + { CTR_ALL, OP_IA_ALL, 0x2a, &um_mesi, "L2_ST", + "number of L2 data stores", 500 }, + { CTR_ALL, OP_IA_ALL, 0x24, &um_empty, "L2_LINES_IN", + "number of allocated lines in L2", 500 }, + { CTR_ALL, OP_IA_ALL, 0x26, &um_empty, "L2_LINES_OUT", + "number of recovered lines from L2", 500 }, + { CTR_ALL, OP_IA_ALL, 0x25, &um_empty, "L2_M_LINES_INM", + "number of modified lines allocated in L2", 500 }, + { CTR_ALL, OP_IA_ALL, 0x27, &um_empty, "L2_M_LINES_OUTM", + "number of modified lines removed from L2", 500 }, + { CTR_ALL, OP_IA_ALL, 0x2e, &um_mesi, "L2_RQSTS", + "number of L2 requests", 500 }, + { CTR_ALL, OP_IA_ALL, 0x21, &um_empty, "L2_ADS", + "number of L2 address strobes", 500 }, + { CTR_ALL, OP_IA_ALL, 0x22, &um_empty, "L2_DBUS_BUSY", + "number of cycles data bus was busy", 500 }, + { CTR_ALL, OP_IA_ALL, 0x23, &um_empty, "L2_DMUS_BUSY_RD", + "cycles data bus was busy in xfer from L2 to CPU", 500 }, /* External Bus Logic (EBL) */ - { CTR_ALL, OP_IA_ALL, 0x62, 2, "BUS_DRDY_CLOCKS", 500 }, - { CTR_ALL, OP_IA_ALL, 0x63, 2, "BUS_LOCK_CLOCKS", 500 }, - { CTR_ALL, OP_IA_ALL, 0x60, 0, "BUS_REQ_OUTSTANDING", 500 }, - { CTR_ALL, OP_IA_ALL, 0x65, 2, "BUS_TRAN_BRD", 500 }, - { CTR_ALL, OP_IA_ALL, 0x66, 2, "BUS_TRAN_RFO", 500 }, - { CTR_ALL, OP_IA_ALL, 0x67, 2, "BUS_TRANS_WB", 500 }, - { CTR_ALL, OP_IA_ALL, 0x68, 2, "BUS_TRAN_IFETCH", 500 }, - { CTR_ALL, OP_IA_ALL, 0x69, 2, "BUS_TRAN_INVAL", 500 }, - { CTR_ALL, OP_IA_ALL, 0x6a, 2, "BUS_TRAN_PWR", 500 }, - { CTR_ALL, OP_IA_ALL, 0x6b, 2, "BUS_TRANS_P", 500 }, - { CTR_ALL, OP_IA_ALL, 0x6c, 2, "BUS_TRANS_IO", 500 }, - { CTR_ALL, OP_IA_ALL, 0x6d, 2, "BUS_TRANS_DEF", 500 }, - { CTR_ALL, OP_IA_ALL, 0x6e, 2, "BUS_TRAN_BURST", 500 }, - { CTR_ALL, OP_IA_ALL, 0x70, 2, "BUS_TRAN_ANY", 500 }, - { CTR_ALL, OP_IA_ALL, 0x6f, 2, "BUS_TRAN_MEM", 500 }, - { CTR_ALL, OP_IA_ALL, 0x64, 0, "BUS_DATA_RCV", 500 }, - { CTR_ALL, OP_IA_ALL, 0x61, 0, "BUS_BNR_DRV", 500 }, - { CTR_ALL, OP_IA_ALL, 0x7a, 0, "BUS_HIT_DRV", 500 }, - { CTR_ALL, OP_IA_ALL, 0x7b, 0, "BUS_HITM_DRV", 500 }, - { CTR_ALL, OP_IA_ALL, 0x7e, 0, "BUS_SNOOP_STALL", 500 }, + { CTR_ALL, OP_IA_ALL, 0x62, &um_ebl, "BUS_DRDY_CLOCKS", + "number of clocks DRDY is asserted", 500 }, + { CTR_ALL, OP_IA_ALL, 0x63, &um_ebl, "BUS_LOCK_CLOCKS", + "number of clocks LOCK is asserted", 500 }, + { CTR_ALL, OP_IA_ALL, 0x60, &um_empty, "BUS_REQ_OUTSTANDING", + "number of outstanding bus requests", 500 }, + { CTR_ALL, OP_IA_ALL, 0x65, &um_ebl, "BUS_TRAN_BRD", + "number of burst read transactions", 500 }, + { CTR_ALL, OP_IA_ALL, 0x66, &um_ebl, "BUS_TRAN_RFO", + "number of read for ownership transactions", 500 }, + { CTR_ALL, OP_IA_ALL, 0x67, &um_ebl, "BUS_TRANS_WB", + "number of write back transactions", 500 }, + { CTR_ALL, OP_IA_ALL, 0x68, &um_ebl, "BUS_TRAN_IFETCH", + "number of instruction fetch transactions", 500 }, + { CTR_ALL, OP_IA_ALL, 0x69, &um_ebl, "BUS_TRAN_INVAL", + "number of invalidate transactions", 500 }, + { CTR_ALL, OP_IA_ALL, 0x6a, &um_ebl, "BUS_TRAN_PWR", + "number of partial write transactions", 500 }, + { CTR_ALL, OP_IA_ALL, 0x6b, &um_ebl, "BUS_TRANS_P", + "number of partial transactions", 500 }, + { CTR_ALL, OP_IA_ALL, 0x6c, &um_ebl, "BUS_TRANS_IO", + "number of I/O transactions", 500 }, + { CTR_ALL, OP_IA_ALL, 0x6d, &um_ebl, "BUS_TRANS_DEF", + "number of deferred transactions", 500 }, + { CTR_ALL, OP_IA_ALL, 0x6e, &um_ebl, "BUS_TRAN_BURST", + "number of burst transactions", 500 }, + { CTR_ALL, OP_IA_ALL, 0x70, &um_ebl, "BUS_TRAN_ANY", + "number of all transactions", 500 }, + { CTR_ALL, OP_IA_ALL, 0x6f, &um_ebl, "BUS_TRAN_MEM", + "number of memory transactions", 500 }, + { CTR_ALL, OP_IA_ALL, 0x64, &um_empty, "BUS_DATA_RCV", + "bus cycles this processor is receiving data", 500 }, + { CTR_ALL, OP_IA_ALL, 0x61, &um_empty, "BUS_BNR_DRV", + "bus cycles this processor is driving BNR pin", 500 }, + { CTR_ALL, OP_IA_ALL, 0x7a, &um_empty, "BUS_HIT_DRV", + "bus cycles this processor is driving HIT pin", 500 }, + { CTR_ALL, OP_IA_ALL, 0x7b, &um_empty, "BUS_HITM_DRV", + "bus cycles this processor is driving HITM pin", 500 }, + { CTR_ALL, OP_IA_ALL, 0x7e, &um_empty, "BUS_SNOOP_STALL", + "cycles during bus snoop stall", 500 }, /* Floating Point Unit (FPU) */ - { CTR_0, OP_IA_ALL, 0xc1, 0, "COMP_FLOP_RET", 3000 }, - { CTR_0, OP_IA_ALL, 0x10, 0, "FLOPS", 3000 }, - { CTR_1, OP_IA_ALL, 0x11, 0, "FP_ASSIST", 500 }, - { CTR_1, OP_IA_ALL, 0x12, 0, "MUL", 1000 }, - { CTR_1, OP_IA_ALL, 0x13, 0, "DIV", 500 }, - { CTR_0, OP_IA_ALL, 0x14, 0, "CYCLES_DIV_BUSY", 1000 }, + { CTR_0, OP_IA_ALL, 0xc1, &um_empty, "COMP_FLOP_RET", + "number of computational FP operations retired", 3000 }, + { CTR_0, OP_IA_ALL, 0x10, &um_empty, "FLOPS", + "number of computational FP operations executed", 3000 }, + { CTR_1, OP_IA_ALL, 0x11, &um_empty, "FP_ASSIST", + "number of FP exceptions handled by microcode", 500 }, + { CTR_1, OP_IA_ALL, 0x12, &um_empty, "MUL", + "number of multiplies", 1000 }, + { CTR_1, OP_IA_ALL, 0x13, &um_empty, "DIV", + "number of divides", 500 }, + { CTR_0, OP_IA_ALL, 0x14, &um_empty, "CYCLES_DIV_BUSY", + "cycles divider is busy", 1000 }, /* Memory Ordering */ - { CTR_ALL, OP_IA_ALL, 0x03, 0, "LD_BLOCKS", 500 }, - { CTR_ALL, OP_IA_ALL, 0x04, 0, "SB_DRAINS", 500 }, - { CTR_ALL, OP_IA_ALL, 0x05, 0, "MISALIGN_MEM_REF", 500 }, + { CTR_ALL, OP_IA_ALL, 0x03, &um_empty, "LD_BLOCKS", + "number of store buffer blocks", 500 }, + { CTR_ALL, OP_IA_ALL, 0x04, &um_empty, "SB_DRAINS", + "number of store buffer drain cycles", 500 }, + { CTR_ALL, OP_IA_ALL, 0x05, &um_empty, "MISALIGN_MEM_REF", + "number of misaligned data memory references", 500 }, /* PIII KNI */ - { CTR_ALL, OP_PIII, 0x07, 7, "EMON_KNI_PREF_DISPATCHED", 500 }, - { CTR_ALL, OP_PIII, 0x4b, 7, "EMON_KNI_PREF_MISS", 500 }, + { CTR_ALL, OP_PIII, 0x07, &um_kni_prefetch, "EMON_KNI_PREF_DISPATCHED", + "number of KNI pre-fetch/weakly ordered insns dispatched", 500 }, + { CTR_ALL, OP_PIII, 0x4b, &um_kni_prefetch, "EMON_KNI_PREF_MISS", + "number of KNI pre-fetch/weakly ordered insns that miss all caches", 500 }, /* Instruction Decoding and Retirement */ - { CTR_ALL, OP_IA_ALL, 0xc0, 0, "INST_RETIRED", 6000 }, - { CTR_ALL, OP_IA_ALL, 0xc2, 0, "UOPS_RETIRED", 6000 }, - { CTR_ALL, OP_IA_ALL, 0xd0, 0, "INST_DECODED", 6000 }, + { CTR_ALL, OP_IA_ALL, 0xc0, &um_empty, "INST_RETIRED", + "number of instructions retired", 6000 }, + { CTR_ALL, OP_IA_ALL, 0xc2, &um_empty, "UOPS_RETIRED", + "number of UOPs retired", 6000 }, + { CTR_ALL, OP_IA_ALL, 0xd0, &um_empty, "INST_DECODED", + "number of instructions decoded", 6000 }, /* PIII KNI */ - { CTR_ALL, OP_PIII, 0xd8, 8, "EMON_KNI_INST_RETIRED", 3000 }, - { CTR_ALL, OP_PIII, 0xd9, 8, "EMON_KNI_COMP_INST_RET", 3000 }, + { CTR_ALL, OP_PIII, 0xd8, &um_kni_inst_retired, "EMON_KNI_INST_RETIRED", + "number of KNI instructions retired", 3000 }, + { CTR_ALL, OP_PIII, 0xd9, &um_kni_inst_retired, "EMON_KNI_COMP_INST_RET", + "number of KNI computation instructions retired", 3000 }, /* Interrupts */ - { CTR_ALL, OP_IA_ALL, 0xc8, 0, "HW_INT_RX", 500 }, - { CTR_ALL, OP_IA_ALL, 0xc6, 0, "CYCLES_INT_MASKED", 500 }, - { CTR_ALL, OP_IA_ALL, 0xc7, 0, "CYCLES_INT_PENDING_AND_MASKED", 500 }, + { CTR_ALL, OP_IA_ALL, 0xc8, &um_empty, "HW_INT_RX", + "number of hardware interrupts received", 500 }, + { CTR_ALL, OP_IA_ALL, 0xc6, &um_empty, "CYCLES_INT_MASKED", + "cycles interrupts are disabled", 500 }, + { CTR_ALL, OP_IA_ALL, 0xc7, &um_empty, "CYCLES_INT_PENDING_AND_MASKED", + "cycles interrupts are disabled with pending interrupts", 500 }, /* Branches */ - { CTR_ALL, OP_IA_ALL, 0xc4, 0, "BR_INST_RETIRED", 500 }, - { CTR_ALL, OP_IA_ALL, 0xc5, 0, "BR_MISS_PRED_RETIRED", 500 }, - { CTR_ALL, OP_IA_ALL, 0xc9, 0, "BR_TAKEN_RETIRED", 500 }, - { CTR_ALL, OP_IA_ALL, 0xca, 0, "BR_MISS_PRED_TAKEN_RET", 500 }, - { CTR_ALL, OP_IA_ALL, 0xe0, 0, "BR_INST_DECODED", 500 }, - { CTR_ALL, OP_IA_ALL, 0xe2, 0, "BTB_MISSES", 500 }, - { CTR_ALL, OP_IA_ALL, 0xe4, 0, "BR_BOGUS", 500 }, - { CTR_ALL, OP_IA_ALL, 0xe6, 0, "BACLEARS", 500 }, + { CTR_ALL, OP_IA_ALL, 0xc4, &um_empty, "BR_INST_RETIRED", + "number of branch instructions retired", 500 }, + { CTR_ALL, OP_IA_ALL, 0xc5, &um_empty, "BR_MISS_PRED_RETIRED", + "number of mispredicted branches retired", 500 }, + { CTR_ALL, OP_IA_ALL, 0xc9, &um_empty, "BR_TAKEN_RETIRED", + "number of taken branches retired", 500 }, + { CTR_ALL, OP_IA_ALL, 0xca, &um_empty, "BR_MISS_PRED_TAKEN_RET", + "number of taken mispredictions branches retired", 500 }, + { CTR_ALL, OP_IA_ALL, 0xe0, &um_empty, "BR_INST_DECODED", + "number of branch instructions decoded", 500 }, + { CTR_ALL, OP_IA_ALL, 0xe2, &um_empty, "BTB_MISSES", + "number of branches that miss the BTB", 500 }, + { CTR_ALL, OP_IA_ALL, 0xe4, &um_empty, "BR_BOGUS", + "number of bogus branches", 500 }, + { CTR_ALL, OP_IA_ALL, 0xe6, &um_empty, "BACLEARS", + "number of times BACLEAR is asserted", 500 }, /* Stalls */ - { CTR_ALL, OP_IA_ALL, 0xa2, 0, "RESOURCE_STALLS", 500 }, - { CTR_ALL, OP_IA_ALL, 0xd2, 0, "PARTIAL_RAT_STALLS", 500 }, + { CTR_ALL, OP_IA_ALL, 0xa2, &um_empty, "RESOURCE_STALLS", + "cycles during resource related stalls", 500 }, + { CTR_ALL, OP_IA_ALL, 0xd2, &um_empty, "PARTIAL_RAT_STALLS", + "cycles or events for partial stalls", 500 }, /* Segment Register Loads */ - { CTR_ALL, OP_IA_ALL, 0x06, 0, "SEGMENT_REG_LOADS", 500 }, + { CTR_ALL, OP_IA_ALL, 0x06, &um_empty, "SEGMENT_REG_LOADS", + "number of segment register loads", 500 }, /* MMX (Pentium II only) */ - { CTR_ALL, OP_PII, 0xb0, 0, "MMX_INSTR_EXEC", 3000 }, - { CTR_ALL, OP_PII_PIII, 0xb1, 0, "MMX_SAT_INSTR_EXEC", 3000 }, - { CTR_ALL, OP_PII_PIII, 0xb2, 3, "MMX_UOPS_EXEC", 3000 }, - { CTR_ALL, OP_PII_PIII, 0xb3, 4, "MMX_INSTR_TYPE_EXEC", 3000 }, - { CTR_ALL, OP_PII_PIII, 0xcc, 5, "FP_MMX_TRANS", 3000 }, - { CTR_ALL, OP_PII_PIII, 0xcd, 0, "MMX_ASSIST", 500 }, - { CTR_ALL, OP_PII_PIII, 0xce, 0, "MMX_INSTR_RET", 3000 }, + { CTR_ALL, OP_PII, 0xb0, &um_empty, "MMX_INSTR_EXEC", + "number of MMX instructions executed", 3000 }, + { CTR_ALL, OP_PII_PIII, 0xb1, &um_empty, "MMX_SAT_INSTR_EXEC", + "number of MMX saturating instructions executed", 3000 }, + { CTR_ALL, OP_PII_PIII, 0xb2, &um_mmx_uops, "MMX_UOPS_EXEC", + "number of MMX UOPS executed", 3000 }, + { CTR_ALL, OP_PII_PIII, 0xb3, &um_mmx_instr_type_exec, "MMX_INSTR_TYPE_EXEC", + "number of MMX packing instructions", 3000 }, + { CTR_ALL, OP_PII_PIII, 0xcc, &um_mmx_trans, "FP_MMX_TRANS", + "MMX-floating point transitions", 3000 }, + { CTR_ALL, OP_PII_PIII, 0xcd, &um_empty, "MMX_ASSIST", + "number of EMMS instructions executed", 500 }, + { CTR_ALL, OP_PII_PIII, 0xce, &um_empty, "MMX_INSTR_RET", + "number of MMX instructions retired", 3000 }, /* segment renaming (Pentium II only) */ - { CTR_ALL, OP_PII, 0xd4, 6, "SEG_RENAME_STALLS", 500 }, - { CTR_ALL, OP_PII, 0xd5, 6, "SEG_REG_RENAMES", 500 }, - { CTR_ALL, OP_PII, 0xd6, 0, "RET_SEG_RENAMES", 500 }, + { CTR_ALL, OP_PII, 0xd4, &um_seg_rename, "SEG_RENAME_STALLS", + "number of segment register renaming stalls", 500 }, + { CTR_ALL, OP_PII, 0xd5, &um_seg_rename, "SEG_REG_RENAMES", + "number of segment register renames", 500 }, + { CTR_ALL, OP_PII, 0xd6, &um_empty, "RET_SEG_RENAMES", + "number of segment register rename events retired", 500 }, /* athlon events */ - { CTR_ALL, OP_ATHLON, 0xc0, 0, "RETIRED_INSNS", 3000,}, - { CTR_ALL, OP_ATHLON, 0xc1, 0, "RETIRED_OPS", 500,}, - { CTR_ALL, OP_ATHLON, 0x80, 0, "ICACHE_FETCHES", 500,}, - { CTR_ALL, OP_ATHLON, 0x81, 0, "ICACHE_MISSES", 500,}, - { CTR_ALL, OP_ATHLON, 0x40, 0, "DATA_CACHE_ACCESSES", 500,}, - { CTR_ALL, OP_ATHLON, 0x41, 0, "DATA_CACHE_MISSES", 500,}, - { CTR_ALL, OP_ATHLON, 0x42, 9, "DATA_CACHE_REFILLS_FROM_L2", 500,}, - { CTR_ALL, OP_ATHLON, 0x43, 9, "DATA_CACHE_REFILLS_FROM_SYSTEM", 500,}, - { CTR_ALL, OP_ATHLON, 0x44, 9, "DATA_CACHE_WRITEBACKS", 500,}, - { CTR_ALL, OP_ATHLON, 0xc2, 0, "RETIRED_BRANCHES", 500,}, - { CTR_ALL, OP_ATHLON, 0xc3, 0, "RETIRED_BRANCHES_MISPREDICTED", 500,}, - { CTR_ALL, OP_ATHLON, 0xc4, 0, "RETIRED_TAKEN_BRANCHES", 500,}, - { CTR_ALL, OP_ATHLON, 0xc5, 0, "RETIRED_TAKEN_BRANCHES_MISPREDICTED", 500,}, - { CTR_ALL, OP_ATHLON, 0x45, 0, "L1_DTLB_MISSES_L2_DTLD_HITS", 500,}, - { CTR_ALL, OP_ATHLON, 0x46, 0, "L1_AND_L2_DTLB_MISSES", 500,}, - { CTR_ALL, OP_ATHLON, 0x47, 0, "MISALIGNED_DATA_REFS", 500,}, - { CTR_ALL, OP_ATHLON, 0x84, 0, "L1_ITLB_MISSES_L2_ITLB_HITS", 500,}, - { CTR_ALL, OP_ATHLON, 0x85, 0, "L1_AND_L2_ITLB_MISSES", 500,}, - { CTR_ALL, OP_ATHLON, 0xc6, 0, "RETIRED_FAR_CONTROL_TRANSFERS", 500,}, - { CTR_ALL, OP_ATHLON, 0xc7, 0, "RETIRED_RESYNC_BRANCHES", 500,}, - { CTR_ALL, OP_ATHLON, 0xcd, 0, "INTERRUPTS_MASKED", 500,}, - { CTR_ALL, OP_ATHLON, 0xce, 0, "INTERRUPTS_MASKED_PENDING", 500,}, - { CTR_ALL, OP_ATHLON, 0xcf, 0, "HARDWARE_INTERRUPTS", 10,}, + { CTR_ALL, OP_ATHLON, 0xc0, &um_empty, "RETIRED_INSNS", + "Retired instructions (includes exceptions, interrupts, resyncs)", 3000,}, + { CTR_ALL, OP_ATHLON, 0xc1, &um_empty, "RETIRED_OPS", + "Retired Ops", 500,}, + { CTR_ALL, OP_ATHLON, 0x80, &um_empty, "ICACHE_FETCHES", + "Instruction cache fetches", 500,}, + { CTR_ALL, OP_ATHLON, 0x81, &um_empty, "ICACHE_MISSES", + "Instruction cache misses", 500,}, + { CTR_ALL, OP_ATHLON, 0x40, &um_empty, "DATA_CACHE_ACCESSES", + "Data cache accesses", 500,}, + { CTR_ALL, OP_ATHLON, 0x41, &um_empty, "DATA_CACHE_MISSES", + "Data cache misses", 500,}, + { CTR_ALL, OP_ATHLON, 0x42, &um_moesi, "DATA_CACHE_REFILLS_FROM_L2", + "Data cache refills from L2", 500,}, + { CTR_ALL, OP_ATHLON, 0x43, &um_moesi, "DATA_CACHE_REFILLS_FROM_SYSTEM", + "Data cache refills from system", 500,}, + { CTR_ALL, OP_ATHLON, 0x44, &um_moesi, "DATA_CACHE_WRITEBACKS", + "Data cache write backs", 500,}, + { CTR_ALL, OP_ATHLON, 0xc2, &um_empty, "RETIRED_BRANCHES", + "Retired branches (conditional, unconditional, exceptions, interrupts)", 500,}, + { CTR_ALL, OP_ATHLON, 0xc3, &um_empty, "RETIRED_BRANCHES_MISPREDICTED", + "Retired branches mispredicted", 500,}, + { CTR_ALL, OP_ATHLON, 0xc4, &um_empty, "RETIRED_TAKEN_BRANCHES", + "Retired taken branches", 500,}, + { CTR_ALL, OP_ATHLON, 0xc5, &um_empty, "RETIRED_TAKEN_BRANCHES_MISPREDICTED", + "Retired taken branches mispredicted", 500,}, + { CTR_ALL, OP_ATHLON, 0x45, &um_empty, "L1_DTLB_MISSES_L2_DTLD_HITS", + "L1 DTLB misses and L2 DTLB hits", 500,}, + { CTR_ALL, OP_ATHLON, 0x46, &um_empty, "L1_AND_L2_DTLB_MISSES", + "L1 and L2 DTLB misses", 500,}, + { CTR_ALL, OP_ATHLON, 0x47, &um_empty, "MISALIGNED_DATA_REFS", + "Misaligned data references", 500,}, + { CTR_ALL, OP_ATHLON, 0x84, &um_empty, "L1_ITLB_MISSES_L2_ITLB_HITS", + "L1 ITLB misses (and L2 ITLB hits)", 500,}, + { CTR_ALL, OP_ATHLON, 0x85, &um_empty, "L1_AND_L2_ITLB_MISSES", + "L1 and L2 ITLB misses", 500,}, + { CTR_ALL, OP_ATHLON, 0xc6, &um_empty, "RETIRED_FAR_CONTROL_TRANSFERS", + "Retired far control transfers", 500,}, + { CTR_ALL, OP_ATHLON, 0xc7, &um_empty, "RETIRED_RESYNC_BRANCHES", + "Retired resync branches (only non-control transfer branches counted)", 500,}, + { CTR_ALL, OP_ATHLON, 0xcd, &um_empty, "INTERRUPTS_MASKED", + "Interrupts masked cycles (IF=0)", 500,}, + { CTR_ALL, OP_ATHLON, 0xce, &um_empty, "INTERRUPTS_MASKED_PENDING", + "Interrupts masked while pending cycles (INTR while IF=0)", 500,}, + { CTR_ALL, OP_ATHLON, 0xcf, &um_empty, "HARDWARE_INTERRUPTS", + "Number of taken hardware interrupts", 10,}, /* other CPUs */ - { CTR_0, OP_RTC, 0xff, 0, "RTC_Interrupts", 2,}, + { CTR_0, OP_RTC, 0xff, &um_empty, "RTC_Interrupts", + "RTC interrupts/sec (rounded up to power of two)", 2,}, }; + /* the total number of events for all processor type */ u32 op_nr_events = (sizeof(op_events)/sizeof(op_events[0])); @@ -199,7 +362,7 @@ * > 0 otherwise, in this case allow->um[return value - 1] == um so the * caller can access to the description of the unit_mask. */ -int op_check_unit_mask(struct op_unit_mask * allow, u8 um) +int op_check_unit_mask(struct op_unit_mask const* allow, u16 um) { u32 i, mask; @@ -207,7 +370,7 @@ case utm_exclusive: case utm_mandatory: for (i=0; i < allow->num; i++) { - if (allow->um[i] == um) + if (allow->um[i].value == um) return i + 1; } break; @@ -219,11 +382,11 @@ mask = 0; for (i=0; i < allow->num; i++) { - if (allow->um[i] == um) + if (allow->um[i].value == um) /* it is an exact match so return the index + 1 */ return i + 1; - mask |= allow->um[i]; + mask |= allow->um[i].value; } if ((mask & um) == um) @@ -274,12 +437,14 @@ * * 2 Pentium III * - * 3 AMD Athlon + * 3 Pentium 4 / Xeon + * + * 4 AMD Athlon * * The function returns bitmask of failure cause * 0 otherwise */ -int op_check_events(int ctr, u8 ctr_type, u8 ctr_um, op_cpu cpu_type) +int op_check_events(int ctr, u8 ctr_type, u16 ctr_um, op_cpu cpu_type) { int ret = OP_OK_EVENT; u32 i; @@ -292,7 +457,7 @@ ret |= OP_INVALID_COUNTER; if (op_events[i].unit && - op_check_unit_mask(&op_unit_masks[op_events[i].unit], ctr_um) < 0) + op_check_unit_mask(op_events[i].unit, ctr_um) < 0) ret |= OP_INVALID_UM; break; } Index: libop/op_events.h =================================================================== RCS file: /cvsroot/oprofile/oprofile/libop/op_events.h,v retrieving revision 1.9 diff -u -r1.9 op_events.h --- libop/op_events.h 7 Sep 2002 18:19:35 -0000 1.9 +++ libop/op_events.h 18 Sep 2002 17:17:35 -0000 @@ -27,15 +27,6 @@ OP_INVALID_COUNTER = 4, /**< event is not allowed for the given counter */ }; -/** Describe an event. */ -struct op_event { - u32 counter_mask; /**< bitmask of allowed counter */ - u16 cpu_mask; /**< bitmask of allowed cpu_type */ - u8 val; /**< event number */ - u8 unit; /**< which unit mask if any allowed */ - char const * name; /**< the event name */ - int min_count; /**< minimum counter value allowed */ -}; /** Describe an unit mask type. Events can optionnaly use a filter called * the unit mask. the mask type can be a bitmask or a discrete value */ @@ -49,8 +40,25 @@ struct op_unit_mask { u32 num; /**< number of possible unit masks */ enum unit_mask_type unit_type_mask; - u8 default_mask; /**< only the gui use it */ - u8 um[7]; /**< up to seven allowed unit masks */ + u16 default_mask; /**< only the gui use it */ + /**< up to sixteen allowed unit masks */ + struct op_described_um { + u16 value; + const char *desc; + } um[16]; +}; + + +/** Describe an event. */ +struct op_event { + u32 counter_mask; /**< bitmask of allowed counter */ + u16 cpu_mask; /**< bitmask of allowed cpu_type */ + u8 val; /**< event number */ + /**< which unit mask if any allowed */ + struct op_unit_mask const *unit; + const char * name; /**< the event name */ + const char * desc; /**< the event description */ + int min_count; /**< minimum counter value allowed */ }; /** @@ -74,7 +82,7 @@ * * \sa op_cpu, OP_EVENTS_OK */ -int op_check_events(int ctr, u8 ctr_type, u8 ctr_um, op_cpu cpu_type); +int op_check_events(int ctr, u8 ctr_type, u16 ctr_um, op_cpu cpu_type); /** * sanity check unit mask value @@ -92,7 +100,7 @@ * the unit_mask through op_unit_descs * \sa op_unit_descs */ -int op_check_unit_mask(struct op_unit_mask * allow, u8 um); +int op_check_unit_mask(struct op_unit_mask const* allow, u16 um); /** a special constant meaning this event is available for all counters */ #define CTR_ALL (~0u) Index: libop/op_events_desc.c =================================================================== RCS file: /cvsroot/oprofile/oprofile/libop/op_events_desc.c,v retrieving revision 1.10 diff -u -r1.10 op_events_desc.c --- libop/op_events_desc.c 7 Sep 2002 18:19:35 -0000 1.10 +++ libop/op_events_desc.c 18 Sep 2002 17:17:35 -0000 @@ -20,171 +20,6 @@ #include "op_events.h" #include "op_events_desc.h" -struct op_unit_desc op_unit_descs[] = { - { { NULL, NULL, NULL, NULL, NULL, NULL, NULL, }, }, - { { "(M)odified cache state", - "(E)xclusive cache state", - "(S)hared cache state", - "(I)nvalid cache state", - "all MESI cache state", NULL, NULL, }, }, - { { "self-generated transactions", - "any transactions", NULL, NULL, NULL, NULL, NULL, }, }, - { { "mandatory", NULL, NULL, NULL, NULL, NULL, NULL, }, }, - { { "MMX packed multiplies", - "MMX packed shifts", - "MMX pack operations", - "MMX unpack operations", - "MMX packed logical", - "MMX packed arithmetic", - "All the above" }, }, - { { "MMX->float transitions", - "float->MMX transitions", - NULL, NULL, NULL, NULL, NULL, }, }, - { { "ES register", - "DS register", - "FS register", - /* IA manual says this is actually FS again - no mention in errata */ - /* but test show that is really a typo error from IA manual */ - "GS register", - "ES,DS,FS,GS registers", NULL, NULL }, }, - { { "prefetch NTA", - "prefetch T1", - "prefetch T2", - "weakly ordered stores", NULL, NULL, NULL, }, }, - { { "packed and scalar", "packed", NULL, NULL, NULL, NULL, NULL, }, }, - { { "(M)odified cache state", - "(O)wner cache state", - "(E)xclusive cache state", - "(S)hared cache state", - "(I)nvalid cache state", - "all MOESI cache state", NULL, }, }, -}; - -char * op_event_descs[] = { - "clocks processor is not halted", - /* Data Cache Unit (DCU) */ - "all memory references, cachable and non", - "total lines allocated in the DCU", - "number of M state lines allocated in DCU", - "number of M lines evicted from the DCU", - "number of cycles while DCU miss outstanding", - /* Intruction Fetch Unit (IFU) */ - "number of non/cachable instruction fetches", - "number of instruction fetch misses", - "number of ITLB misses", - "cycles instruction fetch pipe is stalled", - "cycles instruction length decoder is stalled", - /* L2 Cache */ - "number of L2 instruction fetches", - "number of L2 data loads", - "number of L2 data stores", - "number of allocated lines in L2", - "number of recovered lines from L2", - "number of modified lines allocated in L2", - "number of modified lines removed from L2", - "number of L2 requests", - "number of L2 address strobes", - "number of cycles data bus was busy", - "cycles data bus was busy in xfer from L2 to CPU", - /* External Bus Logic (EBL) */ - "number of clocks DRDY is asserted", - "number of clocks LOCK is asserted", - "number of outstanding bus requests", - "number of burst read transactions", - "number of read for ownership transactions", - "number of write back transactions", - "number of instruction fetch transactions", - "number of invalidate transactions", - "number of partial write transactions", - "number of partial transactions", - "number of I/O transactions", - "number of deferred transactions", - "number of burst transactions", - "number of all transactions", - "number of memory transactions", - "bus cycles this processor is receiving data", - "bus cycles this processor is driving BNR pin", - "bus cycles this processor is driving HIT pin", - "bus cycles this processor is driving HITM pin", - "cycles during bus snoop stall", - /* Floating Point Unit (FPU) */ - "number of computational FP operations retired", - "number of computational FP operations executed", - "number of FP exceptions handled by microcode", - "number of multiplies", - "number of divides", - "cycles divider is busy", - /* Memory Ordering */ - "number of store buffer blocks", - "number of store buffer drain cycles", - "number of misaligned data memory references", - /* PIII KNI */ - "number of KNI pre-fetch/weakly ordered insns dispatched", - "number of KNI pre-fetch/weakly ordered insns that miss all caches", - /* Instruction Decoding and Retirement */ - "number of instructions retired", - "number of UOPs retired", - "number of instructions decoded", - /* PIII KNI */ - "number of KNI instructions retired", - "number of KNI computation instructions retired", - /* Interrupts */ - "number of hardware interrupts received", - "cycles interrupts are disabled", - "cycles interrupts are disabled with pending interrupts", - /* Branches */ - "number of branch instructions retired", - "number of mispredicted branches retired", - "number of taken branches retired", - "number of taken mispredictions branches retired", - "number of branch instructions decoded", - "number of branches that miss the BTB", - "number of bogus branches", - "number of times BACLEAR is asserted", - /* Stalls */ - "cycles during resource related stalls", - "cycles or events for partial stalls", - /* Segment Register Loads */ - "number of segment register loads", - /* MMX (Pentium II only) */ - "number of MMX instructions executed", - "number of MMX saturating instructions executed", - "number of MMX UOPS executed", - "number of MMX packing instructions", - "MMX-floating point transitions", - "number of EMMS instructions executed", - "number of MMX instructions retired", - /* segment renaming (Pentium II only) */ - "number of segment register renaming stalls", - "number of segment register renames", - "number of segment register rename events retired", - /* Athlon/Duron */ - "Retired instructions (includes exceptions, interrupts, resyncs)", - "Retired Ops", - "Instruction cache fetches)", - "Instruction cache misses)", - "Data cache accesses", - "Data cache misses", - "Data cache refills from L2", - "Data cache refills from system", - "Data cache write backs", - "Retired branches (conditional, unconditional, exceptions, interrupts)", - "Retired branches mispredicted", - "Retired taken branches", - "Retired taken branches mispredicted", - "L1 DTLB misses and L2 DTLB hits", - "L1 and L2 DTLB misses", - "Misaligned data references", - "L1 ITLB misses (and L2 ITLB hits)", - "L1 and L2 ITLB misses", - "Retired far control transfers", - "Retired resync branches (only non-control transfer branches counted)", - "Interrupts masked cycles (IF=0)", - "Interrupts masked while pending cycles (INTR while IF=0)", - "Number of taken hardware interrupts", - "RTC interrupts/sec (rounded up to power of two)", -}; - /** * op_get_um_desc - verify and get unit mask description * @param op_events_index the index of the events in op_events array @@ -197,16 +32,12 @@ * NULL if um is invalid. * This string is in text section so should not be freed. */ -static char * op_get_um_desc(u32 op_events_index, u8 um) +static const char * op_get_um_desc(u32 op_events_index, u16 um) { - struct op_unit_mask * op_um_mask; + struct op_unit_mask const * op_um_mask; int um_mask_desc_index; - u32 um_mask_index = op_events[op_events_index].unit; - - if (!um_mask_index) - return NULL; - op_um_mask = &op_unit_masks[um_mask_index]; + op_um_mask = op_events[op_events_index].unit; um_mask_desc_index = op_check_unit_mask(op_um_mask, um); if (um_mask_desc_index == -1) @@ -216,7 +47,7 @@ return "set with multiple units, check the documentation"; } - return op_unit_descs[um_mask_index].desc[um_mask_desc_index-1]; + return op_um_mask->um[um_mask_desc_index-1].desc; } /** @@ -238,8 +69,10 @@ * NULL when @um is invalid for the given @type value. * These strings are in text section so should not be freed. */ -void op_get_event_desc(op_cpu cpu_type, u8 type, u8 um, - char ** typenamep, char ** typedescp, char ** umdescp) +void op_get_event_desc(op_cpu cpu_type, u8 type, u16 um, + const char ** typenamep, + const char ** typedescp, + const char ** umdescp) { u32 i; int cpu_mask = 1 << cpu_type; @@ -248,8 +81,8 @@ for (i=0; i < op_nr_events; i++) { if (op_events[i].val == type && (op_events[i].cpu_mask & cpu_mask)) { - *typenamep = (char *)op_events[i].name; - *typedescp = op_event_descs[i]; + *typenamep = op_events[i].name; + *typedescp = op_events[i].desc; *umdescp = op_get_um_desc(i, um); break; Index: libop/op_events_desc.h =================================================================== RCS file: /cvsroot/oprofile/oprofile/libop/op_events_desc.h,v retrieving revision 1.9 diff -u -r1.9 op_events_desc.h --- libop/op_events_desc.h 7 Sep 2002 18:19:35 -0000 1.9 +++ libop/op_events_desc.h 18 Sep 2002 17:17:35 -0000 @@ -20,7 +20,7 @@ /** Human readable description for an unit mask. */ struct op_unit_desc { - char * desc[7]; + char * desc[16]; }; /** @@ -41,15 +41,11 @@ * NULL when um is invalid for the given type value. * These strings are static and should not be freed. */ -void op_get_event_desc(op_cpu cpu_type, u8 type, u8 um, - char ** typenamep, char ** typedescp, char ** umdescp); +void op_get_event_desc(op_cpu cpu_type, u8 type, u16 um, + const char ** typenamep, + const char ** typedescp, + const char ** umdescp); -/** unit mask description */ -extern struct op_unit_mask op_unit_masks[]; -/** unit mask string description */ -extern struct op_unit_desc op_unit_descs[]; -/** events string description */ -extern char * op_event_descs[]; /** description of events for all processor type */ extern struct op_event op_events[]; /** the total number of events for all processor type, allowing to iterate Index: libop++/op_print_event.h =================================================================== RCS file: /cvsroot/oprofile/oprofile/libop++/op_print_event.h,v retrieving revision 1.6 diff -u -r1.6 op_print_event.h --- libop++/op_print_event.h 7 Sep 2002 18:19:36 -0000 1.6 +++ libop++/op_print_event.h 18 Sep 2002 17:17:35 -0000 @@ -23,6 +23,6 @@ * to the stream. */ void op_print_event(std::ostream & out, int counter_nr, - op_cpu cpu_type, u8 type, u8 um, u32 count); + op_cpu cpu_type, u8 type, u16 um, u32 count); #endif // OP_PRINT_EVENT Index: libop++/op_print_event.cpp =================================================================== RCS file: /cvsroot/oprofile/oprofile/libop++/op_print_event.cpp,v retrieving revision 1.4 diff -u -r1.4 op_print_event.cpp --- libop++/op_print_event.cpp 7 Sep 2002 18:19:36 -0000 1.4 +++ libop++/op_print_event.cpp 18 Sep 2002 17:17:35 -0000 @@ -23,11 +23,11 @@ using std::setfill; void op_print_event(ostream & out, int counter_nr, op_cpu cpu_type, - u8 type, u8 um, u32 count) + u8 type, u16 um, u32 count) { - char * typenamep; - char * typedescp; - char * umdescp; + const char * typenamep; + const char * typedescp; + const char * umdescp; op_get_event_desc(cpu_type, type, um, &typenamep, &typedescp, &umdescp); Index: utils/op_help.c =================================================================== RCS file: /cvsroot/oprofile/oprofile/utils/op_help.c,v retrieving revision 1.7 diff -u -r1.7 op_help.c --- utils/op_help.c 7 Sep 2002 18:19:41 -0000 1.7 +++ utils/op_help.c 18 Sep 2002 17:17:35 -0000 @@ -62,18 +62,17 @@ } } - printf(")\n\t%s (min count: %d)\n", op_event_descs[i], op_events[i].min_count); + printf(")\n\t%s (min count: %d)\n", op_events[i].desc, op_events[i].min_count); if (op_events[i].unit) { - int unit_idx = op_events[i].unit; printf("\tUnit masks\n"); printf("\t----------\n"); - for (j=0; j < op_unit_masks[unit_idx].num; j++) { + for (j=0; j < op_events[i].unit->num; j++) { printf("\t%.2x: %s\n", - op_unit_masks[unit_idx].um[j], - op_unit_descs[unit_idx].desc[j]); + op_events[i].unit->um[j].value, + op_events[i].unit->um[j].desc); } } } |
From: John L. <le...@mo...> - 2002-09-18 18:07:59
|
On Wed, Sep 18, 2002 at 01:55:44PM -0400, gr...@re... wrote: > separation of items between patches yesterday; I'll go back over > yesterday's x86-model-specific reorg patch one more time and resubmit > when I'm sure it applies, builds and runs against cvs. > > until then, of course, comments on the content would be appreciated > (I'll fix the spacing too :) I have one problem that Phil raised. Currently your model patch uses model() in the NMI handler. The only reason this is done is to get the nr of physical counters. Seeing as it's crucial to oprofile's performance, it's a bit silly to have a switch and a dereference to get something that won't change. Instead, I would prefer to keep an "op_nr_counters" in op_nmi.c, and initialise this via model()->nr_counters just once. That way we don't need to use model() inside the NMI handler. Apart from that, your model-specific patch looks good to me. I haven't tested it though. regards john |
From: <gr...@re...> - 2002-09-18 18:57:59
|
At Wed, 18 Sep 2002 19:04:17 +0100, John Levon wrote: > I have one problem that Phil raised. Currently your model patch uses > model() in the NMI handler. The only reason this is done is to get > the nr of physical counters. no, it's done to switch into a different code path depending on the processor model. in fact, the number of counters is hard-coded into each model-specific code path (though the counter MSR addresses are not). > Seeing as it's crucial to oprofile's performance, it's a bit silly > to have a switch and a dereference to get something that won't > change. agreed; the only reason I have model() used at all, rather than set a static struct op_x86_model_spec * once and have it visible for the whole file, is that doing so required initializing the model_spec down in the pmc_add_syscalls function, since it is the first one called by the outside. This seemed a little ugly, and I wanted to make the patch a little robust in the face of initialization order changes, but you're right that it's performance critical. if you really want the NMI path to go fast, I should move the counter MSR addresses back out of the save/restore structures, into an array of their own (i.e. back to perfctr_msr) so they'll sit in a single cache line, as well as move the model pointer into a file-visible static variable. btw: what technique would you recommend for measuring the overhead of the module? I'd like to mechanically ensure our additions do not make it worse. -graydon |
From: Philippe E. <ph...@wa...> - 2002-09-18 19:24:59
|
gr...@re... wrote: > At Wed, 18 Sep 2002 19:04:17 +0100, > John Levon wrote: [...] > > btw: what technique would you recommend for measuring the overhead of > the module? I'd like to mechanically ensure our additions do not make > it worse. currently I use two tests: kernel compile w/o profiling and with profiling at different rate and a bzip2 test compression decompression of a 256 MB files (test file generated from taring a huge source dir). Each tests run three time. For testing module overhead bzip2 test is better because kernel compile involve a lot of process creation and NMI overhead have tendance to be hidden by syscall/daemon overhead. regards, Phil |
From: William C. <wc...@nc...> - 2002-09-18 20:10:09
|
The nmi code is the one area that oprofile can't measure directly. However, one could get an idea of the overhead by looking at the sample rate. The sample rate for CPU_CLOCK_UNHALTED is nominally set to collect 2000 samples a second when using oprof_start. The difference between the expected number of samples (2000 * number of seconds running) and the number of samples reported by oprof_start, 2000 samples/second would imply 0 overhead and 200 sample overhead would imply about a 10% overhead in the nmi. Another option for measuring the performance of the nmi is to reserve one of the performance monitoring registers and use as regular counter take the difference between counter value on exit from nmi routine and counter value on entry to nmi. This has overhead associated with it and there may be some overhead before and after the nmi that may be missed. -Will gr...@re... wrote: > At Wed, 18 Sep 2002 19:04:17 +0100, > John Levon wrote: >>Seeing as it's crucial to oprofile's performance, it's a bit silly >>to have a switch and a dereference to get something that won't >>change. > > > agreed; the only reason I have model() used at all, rather than set a > static struct op_x86_model_spec * once and have it visible for the > whole file, is that doing so required initializing the model_spec down > in the pmc_add_syscalls function, since it is the first one called by > the outside. This seemed a little ugly, and I wanted to make the patch > a little robust in the face of initialization order changes, but > you're right that it's performance critical. > > if you really want the NMI path to go fast, I should move the counter > MSR addresses back out of the save/restore structures, into an array > of their own (i.e. back to perfctr_msr) so they'll sit in a single > cache line, as well as move the model pointer into a file-visible > static variable. > > btw: what technique would you recommend for measuring the overhead of > the module? I'd like to mechanically ensure our additions do not make > it worse. > > -graydon > > > ------------------------------------------------------- > This SF.NET email is sponsored by: AMD - Your access to the experts > on Hammer Technology! Open Source & Linux Developers, register now > for the AMD Developer Symposium. Code: EX8664 > http://www.developwithamd.com/developerlab > _______________________________________________ > oprofile-list mailing list > opr...@li... > https://lists.sourceforge.net/lists/listinfo/oprofile-list > |
From: John L. <le...@mo...> - 2002-09-18 18:26:38
|
On Wed, Sep 18, 2002 at 01:55:44PM -0400, gr...@re... wrote: > attached is a patch which alters the way event descriptions, unit > masks, and unit mask descriptions are declared, hopefully to > "future-proof" it a little as we bulk out the number of events > supported. The patch removes most uses of cross-table indexing in > libop/op_events*, using explicit pointers between events and unit > masks, and merging the descriptions into the mask or event tables > directly. Looks good to me, I think. We'll bloat the kernel module a little bit with the description strings, but who cares ? I see a little bit of P4 seepage with the change to u16 for the unit mask. Have you caught all the places correctly ? > + /**< up to sixteen allowed unit masks */ > + struct op_described_um { > + u16 value; > + const char *desc; > + } um[16]; Doxygen won't like this. Make it /** up to sixteen allowed unit masks */ instead. char const * desc; instead. > + /**< which unit mask if any allowed */ > + struct op_unit_mask const *unit; ditto Unfortunately you also broke the GUI. Phil, what do you think the best solution here is ? One minor thing from op_help : ITLB_MISS: (counter: all) (supported cpu: Pentium Pro, PII, PIII) number of ITLB misses (min count: 500) Unit masks ---------- ... We should not print out unit mask header if there are no unit masks. Finally, I need this change in op_check_events() from your patch : 459 if (op_events[i].unit != &um_empty && 460 op_check_unit_mask(op_events[i].unit, ctr_um) < 0) 461 ret |= OP_INVALID_UM; 462 break; .unit is always non-zero now. regards john |
From: Philippe E. <ph...@wa...> - 2002-09-18 19:58:46
|
John Levon wrote: > On Wed, Sep 18, 2002 at 01:55:44PM -0400, gr...@re... wrote: > > >>attached is a patch which alters the way event descriptions, unit >>masks, and unit mask descriptions are declared, hopefully to >>"future-proof" it a little as we bulk out the number of events >>supported. The patch removes most uses of cross-table indexing in >>libop/op_events*, using explicit pointers between events and unit >>masks, and merging the descriptions into the mask or event tables >>directly. > > > Looks good to me, I think. We'll bloat the kernel module a little bit > with the description strings, but who cares ? Me, is it not a problem at long term? Each new x86 support will add unswappable memory in kernel module. Someone (Dave ?) propose to generate the data struct from a text file. We should use this idea to generate data struct for module w/o all help stuff if we don't need it.Some sort of xml format will be perhaps a good idea parse it from standard xml tools (?),putting in cvs the generated file. We can also probably generate the sourceforge oprofile events docs and reuse this file in doc/oprofile.xml. Fow now don't worry about that, we will see later. [...] > > Unfortunately you also broke the GUI. Phil, what do you think the best > solution here is ? Graydon you planned to fix the GUI or you prefer *cough* I *cough* fix it ? regards, Phil |
From: <gr...@re...> - 2002-09-18 20:36:47
|
At Wed, 18 Sep 2002 22:05:03 +0200, Philippe Elie wrote: > Me, is it not a problem at long term? Each new x86 support > will add unswappable memory in kernel module. Someone (Dave ?) > propose to generate the data struct from a text file. it may not be my place to say it, I'd be adverse to over-engineering this too much. you can accomplish modest compile-time switchery like this using cpp macros, and then the build headache will be far less, plus you're not committing generated code to CVS. for example, here's a sketch of a solution: /* op_events.h */ struct op_kernel_event { u32 counter_mask; u16 cpu_mask; u8 val; struct op_unit_mask const * unit; int min_count; }; struct op_userspace_event { u32 counter_mask; u16 cpu_mask; u8 val; struct op_unit_mask const * unit; const char * name; const char * desc; int min_count; }; extern struct op_kernel_event op_kernel_events[]; extern struct op_userspace_event op_userspace_events[]; /* event_table.h */ struct PREFIX(event) PREFIX(events)[] = { EV( CTR_ALL, OP_IA_ALL, 0x43, &um_empty, "DATA_MEM_REFS", "all memory references, cachable and non", 500 ), ... }; /* events_kernel.c */ #define EV(ctr, cpu, ev, unit, name, desc, min) \ {(ctr), (cpu), (ev), (unit), (min)} #define PREFIX(sym) op_kernel_ ##sym #include "event_table.h" /* events_userspace.c */ #define EV(ctr, cpu, ev, unit, name, desc, min) \ {(ctr), (cpu), (ev), (unit), (name), (desc), (min)} #define PREFIX(sym) op_userspace_ ##sym #include "event_table.h" compile them separately and you get the same effect; you'll only link the events_kernel.o object from libop.a into the kernel module. > Graydon you planned to fix the GUI or you prefer *cough* I *cough* > fix it ? I am rebuilding QT presently (my net QT was built on g++ 2.95.x, and it doesn't seem to like my g++ 3.2) after which it will be added as part of my test routine, and I will fix it. probably this will take the rest of the day, since QT is so enormous. sorry about the breaks. -graydon |
From: John L. <le...@mo...> - 2002-09-19 20:04:26
|
On Wed, Sep 18, 2002 at 04:36:42PM -0400, gr...@re... wrote: > it may not be my place to say it, I'd be adverse to over-engineering > this too much. you can accomplish modest compile-time switchery like > this using cpp macros, and then the build headache will be far less, > plus you're not committing generated code to CVS. for example, here's > a sketch of a solution: This seems a bit hacky too. There are 3/4 things we want to generate : 1) stuff the module needs (I am a bit dubious about this tbh) 2) stuff userspace daemon needs 3) the gui (think about generating the relevant part of the UI) 4) the website list (well, would be nice) I reckon some simple bespoke format would be good. We need to bear in mind non-x86-alike architectures too regards john -- Support the project - http://www.gtonline.net/private/mapp/project/ |