From: Suravee S. <sur...@am...> - 2010-11-15 15:47:25
|
This patch implements changes for IBS in family12h/14h/15h : - Add check for Maximum counts for IBS and IBS extended count. - Add support for IBS extended register (Branch target address register) - Changes for the IBS derived performance event translation: * Enhance existing data translation logic and interface. * Add IBS OP memory access data translation. * Add IBS Op branch target address data translation. - Add performance data logging mechanism to an output file in session directory. (/var/lib/oprofile/samples/ directory): * Add OProfile extended deinit function * IBS OP Memory Access Log (/var/lib/oprofile/samples/ibs_memaccess.log) * IBS OP Branch Target Address Log (/var/lib/oprofile/samples/ibs_bta.log)(only family12h/14h/15h). ChangeLog-ibs-extended | 17 daemon/init.c | 3 daemon/opd_extended.c | 14 daemon/opd_extended.h | 9 daemon/opd_ibs.c | 203 ++++++- daemon/opd_ibs.h | 14 daemon/opd_ibs_macro.h | 71 +- daemon/opd_ibs_trans.c | 1016 ++++++++++++++++++++------------------ daemon/opd_ibs_trans.h | 12 events/x86-64/family10/unit_masks | 1 utils/opcontrol | 59 ++ 11 files changed, 895 insertions(+), 524 deletions(-) ---- diff -paurN oprofile/ChangeLog oprofile-new/ChangeLog-ibs-extended --- oprofile/ChangeLog 2010-11-07 15:28:36.112419594 -0600 +++ oprofile-new/ChangeLog-ibs-extended 2010-11-07 16:11:36.651115844 -0600 @@ -1,3 +1,20 @@ +2010-11-7 Suravee Suthikulpanit <sur...@am...> + + * oprofile/utils/opcontrol: + * oprofile/daemon/init.c: + * oprofile/daemon/opd_extended.h: + * oprofile/daemon/opd_extended.c: + * oprofile/daemon/opd_ibs.h: + * oprofile/daemon/opd_ibs.c: + * oprofile/daemon/opd_ibs_trans.h: + * oprofile/daemon/opd_ibs_trans.c: + * oprofile/events/x86-64/family10/unit_masks: + * oprofile/daemon/opd_ibs_macro.h: Add the new IBS supports for + family12/14/15h includeding: + - IBS Op branch target address log + - IBS Op memory access log + - IBS Op extended count bits + 2010-10-15 Roland Grunberg <rol...@gm...> * libop/op_xml_events.c: diff -paurN oprofile/utils/opcontrol oprofile-new/utils/opcontrol --- oprofile/utils/opcontrol 2010-11-07 15:28:36.322348154 -0600 +++ oprofile-new/utils/opcontrol 2010-11-07 12:36:39.491330457 -0600 @@ -1867,13 +1867,20 @@ verify_ibs() IBS_FETCH_COUNT=$IBS_COUNT IBS_FETCH_MASK=$IBS_MASK elif test "$IBS_FETCH_COUNT" != "$IBS_COUNT" ; then - echo "All IBS Fetch must have the same count." + echo "ERROR: All IBS Fetch must have the same count." exit 1 fi # Check IBS_MASK consistency if test "$IBS_FETCH_MASK" != "$IBS_MASK" ; then - echo "All IBS Fetch must have the same unitmask." + echo "ERROR: All IBS Fetch must have the same unitmask." + exit 1 + fi + + # Check IBS_FETCH_COUNT within range + if test "$IBS_FETCH_COUNT" -gt 1048575 ; then + echo "ERROR: IBS Fetch count is too large." + echo " The maximum IBS-fetch count is 1048575." exit 1 fi @@ -1892,6 +1899,28 @@ verify_ibs() echo "All IBS Op must have the same unitmask." exit 1 fi + + # Check IBS_OP_COUNT within range + case "$CPUTYPE" in + x86-64/family10) + if test "$IBS_OP_COUNT" -gt 1048575 ; then + echo "ERROR: IBS Op count is too large." + echo " The maximum IBS-fetch count is 1048575." + exit 1 + fi + ;; + + x86-64/family12h|\ + x86-64/family14h|\ + x86-64/family15h) + if test "$IBS_OP_COUNT" -gt 134217727 ; then + echo "ERROR: IBS Op count is too large." + echo " The maximum IBS-Op count is 134217727." + exit 1 + fi + ;; + *) + esac fi return @@ -1944,10 +1973,32 @@ do_param_setup_ibs() # NOTE: We default to use dispatched_op if available. # Some of the older family10 system does not have # dispatched_ops feature. - # dispatched op is enabled by bit 1 of the unitmask + # Dispatched op is enabled by bit 0 of the unitmask + IBS_OP_DISPATCHED_OP=$(( IBS_OP_UNITMASK & 0x1 )) if test -f $MOUNT/ibs_op/dispatched_ops ; then - IBS_OP_DISPATCHED_OP=$(( IBS_OP_UNITMASK & 0x1 )) set_param ibs_op/dispatched_ops $IBS_OP_DISPATCHED_OP + else + if test $IBS_OP_DISPATCHED_OP -eq 1 ; then + echo "ERROR: IBS Op dispatched ops is not supported." + exit 1 + fi + fi + + # NOTE: BTA is enabled by bit 2 of the unitmask + IBS_OP_BTA=$(( IBS_OP_UNITMASK & 0x4 )) + if test -f $MOUNT/ibs_op/branch_target; then + if [ "$IBS_OP_BTA" = "4" ] ; then + set_param ibs_op/branch_target 1 + else + set_param ibs_op/branch_target 0 + fi + + # TODO: Check if write successful + else + if test $IBS_OP_BTA -eq 1 ; then + echo "ERROR: IBS Op Branch Target Address is not supported." + exit 1 + fi fi else set_param ibs_op/enable 0 diff -paurN oprofile/daemon/init.c oprofile-new/daemon/init.c --- oprofile/daemon/init.c 2010-11-07 15:28:36.182420260 -0600 +++ oprofile-new/daemon/init.c 2010-11-05 15:56:56.641572462 -0500 @@ -24,6 +24,7 @@ #include "opd_anon.h" #include "opd_perfmon.h" #include "opd_printf.h" +#include "opd_extended.h" #include "op_version.h" #include "op_config.h" @@ -282,6 +283,8 @@ static void opd_sigterm(void) opd_do_jitdumps(); opd_print_stats(); printf("oprofiled stopped %s", op_get_time()); + opd_ext_deinitialize(); + exit(EXIT_FAILURE); } diff -paurN oprofile/daemon/opd_extended.h oprofile-new/daemon/opd_extended.h --- oprofile/daemon/opd_extended.h 2010-11-07 15:28:36.182420260 -0600 +++ oprofile-new/daemon/opd_extended.h 2010-11-05 15:54:49.290251600 -0500 @@ -35,6 +35,8 @@ struct opd_ext_feature { struct opd_ext_handlers { // Extended init int (*ext_init)(char const *); + // Extended deinit + int (*ext_deinit)(); // Extended statistics int (*ext_print_stats)(); // Extended sfile handlers @@ -61,6 +63,13 @@ struct opd_ext_sfile_handlers { extern int opd_ext_initialize(char const * value); /** + * @param value: commandline input option string + * + * Deinitialize + */ +extern int opd_ext_deinitialize(); + +/** * Print out extended feature statistics in oprofiled.log file */ extern void opd_ext_print_stats(); diff -paurN oprofile/daemon/opd_extended.c oprofile-new/daemon/opd_extended.c --- oprofile/daemon/opd_extended.c 2010-11-07 15:28:36.182420260 -0600 +++ oprofile-new/daemon/opd_extended.c 2010-11-05 15:55:23.670338392 -0500 @@ -109,6 +109,20 @@ err_out: } +int opd_ext_deinitialize() +{ + int ret = EXIT_FAILURE; + + if(opd_ext_feat_index == -1) { + return 0; + } + + ret = ext_feature_table[opd_ext_feat_index].handlers->ext_deinit(); + + return ret; +} + + void opd_ext_print_stats() { if (is_ext_enabled() diff -paurN oprofile/daemon/opd_ibs.h oprofile-new/daemon/opd_ibs.h --- oprofile/daemon/opd_ibs.h 2010-11-07 15:28:36.192346762 -0600 +++ oprofile-new/daemon/opd_ibs.h 2010-11-05 15:53:48.980582133 -0500 @@ -2,7 +2,7 @@ * @file daemon/opd_ibs.h * AMD Family10h Instruction Based Sampling (IBS) handling. * - * @remark Copyright 2008 OProfile authors + * @remark Copyright 2008-2010 OProfile authors * @remark Read the file COPYING * * @author Jason Yeh <jas...@am...> @@ -70,18 +70,14 @@ struct ibs_op_sample { /* MSRC001_1037 IBS Op Data 3 Register */ unsigned int ibs_op_data3_low; unsigned int ibs_op_data3_high; + /* MSRC001_1038 IBS DC Linear Address */ unsigned int ibs_op_ldst_linaddr_low; unsigned int ibs_op_ldst_linaddr_high; + /* MSRC001_1039 IBS DC Physical Address */ unsigned int ibs_op_phys_addr_low; unsigned int ibs_op_phys_addr_high; -}; - - -enum IBSL1PAGESIZE { - L1TLB4K = 0, - L1TLB2M, - L1TLB1G, - L1TLB_INVALID + /* MSRC001_103B IBS Branch Target Address */ + unsigned long ibs_op_brtgt_addr; }; diff -paurN oprofile/daemon/opd_ibs.c oprofile-new/daemon/opd_ibs.c --- oprofile/daemon/opd_ibs.c 2010-11-07 15:28:36.192346762 -0600 +++ oprofile-new/daemon/opd_ibs.c 2010-11-06 12:23:46.571357565 -0500 @@ -2,7 +2,7 @@ * @file daemon/opd_ibs.c * AMD Family10h Instruction Based Sampling (IBS) handling. * - * @remark Copyright 2007 OProfile authors + * @remark Copyright 2007-2010 OProfile authors * @remark Read the file COPYING * * @author Jason Yeh <jas...@am...> @@ -32,22 +32,37 @@ #include <stdio.h> #include <errno.h> #include <string.h> +#include <limits.h> + +#if defined(__i386__) && defined(__PIC__) +/* %ebx may be the PIC register. */ + #define __cpuid(level, a, b, c, d) \ + __asm__ ("xchgl\t%%ebx, %1\n\t" \ + "cpuid\n\t" \ + "xchgl\t%%ebx, %1\n\t" \ + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ + : "0" (level)) +#else + #define __cpuid(level, a, b, c, d) \ + __asm__ ("cpuid\n\t" \ + : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ + : "0" (level)) +#endif extern op_cpu cpu_type; extern int no_event_ok; extern int sfile_equal(struct sfile const * sf, struct sfile const * sf2); extern void sfile_dup(struct sfile * to, struct sfile * from); +extern char * session_dir; -/* IBS Select Arrays/Counters */ +/* IBS Select Counters */ static unsigned int ibs_selected_size; + +/* These flags store the IBS-derived events selection. */ static unsigned int ibs_fetch_selected_flag; -static unsigned int ibs_fetch_selected_size; static unsigned int ibs_op_selected_flag; -static unsigned int ibs_op_selected_size; static unsigned int ibs_op_ls_selected_flag; -static unsigned int ibs_op_ls_selected_size; static unsigned int ibs_op_nb_selected_flag; -static unsigned int ibs_op_nb_selected_size; /* IBS Statistics */ static unsigned long ibs_fetch_sample_stats; @@ -64,6 +79,18 @@ struct opd_event ibs_vc[OP_MAX_IBS_COUNT /* IBS Virtual Counter Index(VCI) Map*/ unsigned int ibs_vci_map[OP_MAX_IBS_COUNTERS]; +/* CPUID information */ +unsigned int ibs_family; +unsigned int ibs_model; +unsigned int ibs_stepping; + +/* IBS Extended MSRs */ +static unsigned long ibs_bta_enabled; + +/* IBS log files */ +FILE * memaccess_log; +FILE * bta_log; + /** * This function converts IBS fetch event flags and values into * derived events. If the tagged (sampled) fetched caused a derived @@ -75,7 +102,7 @@ static void opd_log_ibs_fetch(struct tra if (!trans_fetch) return; - trans_ibs_fetch(trans, ibs_fetch_selected_flag, ibs_fetch_selected_size); + trans_ibs_fetch(trans, ibs_fetch_selected_flag); } @@ -89,9 +116,16 @@ static void opd_log_ibs_op(struct transi if (!trans_op) return; - trans_ibs_op(trans, ibs_op_selected_flag, ibs_op_selected_size); - trans_ibs_op_ls(trans, ibs_op_ls_selected_flag, ibs_op_ls_selected_size); - trans_ibs_op_nb(trans, ibs_op_nb_selected_flag, ibs_op_nb_selected_size); + trans_ibs_op_mask_reserved(ibs_family, trans); + + if (trans_ibs_op_rip_invalid(trans) != 0) + return; + + trans_ibs_op(trans, ibs_op_selected_flag); + trans_ibs_op_ls(trans, ibs_op_ls_selected_flag); + trans_ibs_op_nb(trans, ibs_op_nb_selected_flag); + trans_ibs_op_ls_memaccess(trans); + trans_ibs_op_bta(trans); } @@ -150,6 +184,26 @@ out: } +static void get_ibs_bta_status() +{ + FILE * fp = NULL; + char buf[PATH_MAX]; + + /* Default to disable */ + ibs_bta_enabled = 0; + + snprintf(buf, PATH_MAX, "/dev/oprofile/ibs_op/branch_target"); + fp = fopen(buf, "r"); + if (!fp) + return; + + while (fgets(buf, PATH_MAX, fp) != NULL) + ibs_bta_enabled = strtoul(buf, NULL, 10); + + fclose(fp); +} + + void code_ibs_fetch_sample(struct transient * trans) { struct ibs_fetch_sample * trans_fetch = NULL; @@ -169,12 +223,12 @@ void code_ibs_fetch_sample(struct transi trans_fetch->rip = pop_buffer_value(trans); - trans_fetch->ibs_fetch_lin_addr_low = pop_buffer_value(trans); - trans_fetch->ibs_fetch_lin_addr_high = pop_buffer_value(trans); + trans_fetch->ibs_fetch_lin_addr_low = pop_buffer_value(trans); + trans_fetch->ibs_fetch_lin_addr_high = pop_buffer_value(trans); - trans_fetch->ibs_fetch_ctl_low = pop_buffer_value(trans); - trans_fetch->ibs_fetch_ctl_high = pop_buffer_value(trans); - trans_fetch->ibs_fetch_phys_addr_low = pop_buffer_value(trans); + trans_fetch->ibs_fetch_ctl_low = pop_buffer_value(trans); + trans_fetch->ibs_fetch_ctl_high = pop_buffer_value(trans); + trans_fetch->ibs_fetch_phys_addr_low = pop_buffer_value(trans); trans_fetch->ibs_fetch_phys_addr_high = pop_buffer_value(trans); verbprintf(vsamples, @@ -200,6 +254,30 @@ void code_ibs_fetch_sample(struct transi } +static void get_ibs_op_bta_sample(struct transient * trans, + struct ibs_op_sample * trans_op) +{ + // Check remaining + if (!enough_remaining(trans, 2)) { + verbprintf(vext, "not enough remaining\n"); + trans->remaining = 0; + ibs_op_incomplete_stats++; + return; + } + + if (ibs_bta_enabled == 1) { + trans_op->ibs_op_brtgt_addr = pop_buffer_value(trans); + + // Check if branch target address is valid (MSRC001_1035[37] == 1] + if ((trans_op->ibs_op_data1_high & (0x00000001 << 5)) == 0) { + trans_op->ibs_op_brtgt_addr = 0; + } + } else { + trans_op->ibs_op_brtgt_addr = 0; + } +} + + void code_ibs_op_sample(struct transient * trans) { struct ibs_op_sample * trans_op= NULL; @@ -233,8 +311,10 @@ void code_ibs_op_sample(struct transient trans_op->ibs_op_phys_addr_low = pop_buffer_value(trans); trans_op->ibs_op_phys_addr_high = pop_buffer_value(trans); + get_ibs_op_bta_sample(trans, trans_op); + verbprintf(vsamples, - "IBS_OP_X CPU:%ld PID:%d RIP:%lx D1HI:%x D1LO:%x D2LO:%x D3HI:%x D3LO:%x L_LO:%x P_LO:%x\n", + "IBS_OP_X CPU:%ld PID:%d RIP:%lx D1HI:%x D1LO:%x D2LO:%x D3HI:%x D3LO:%x L_LO:%x P_LO:%x\n", trans->cpu, trans->tgid, trans_op->rip, @@ -339,16 +419,12 @@ static int ibs_parse_and_set_events(char // Grouping if (IS_IBS_FETCH(event->val)) { ibs_fetch_selected_flag |= 1 << IBS_FETCH_OFFSET(event->val); - ibs_fetch_selected_size++; } else if (IS_IBS_OP(event->val)) { ibs_op_selected_flag |= 1 << IBS_OP_OFFSET(event->val); - ibs_op_selected_size++; } else if (IS_IBS_OP_LS(event->val)) { ibs_op_ls_selected_flag |= 1 << IBS_OP_LS_OFFSET(event->val); - ibs_op_ls_selected_size++; } else if (IS_IBS_OP_NB(event->val)) { ibs_op_nb_selected_flag |= 1 << IBS_OP_NB_OFFSET(event->val); - ibs_op_nb_selected_size++; } else { return -1; } @@ -402,7 +478,6 @@ static int ibs_parse_and_set_um_fetch(ch } - static int ibs_parse_and_set_um_op(char const * str, unsigned long int * ibs_op_um) { char * end = NULL; @@ -418,6 +493,31 @@ static int ibs_parse_and_set_um_op(char } +static void check_cpuid_family_model_stepping() +{ + union { + unsigned eax; + struct { + unsigned stepping : 4; + unsigned model : 4; + unsigned family : 4; + unsigned res : 4; + unsigned ext_model : 4; + unsigned ext_family : 8; + unsigned res2 : 4; + }; + } v; + unsigned ebx, ecx, edx; + + /* CPUID Fn0000_0001_EAX Family, Model, Stepping */ + __cpuid(1, v.eax, ebx, ecx, edx); + + ibs_family = v.family + v.ext_family; + ibs_model = v.model + v.ext_model; + ibs_stepping = v.stepping; +} + + static int ibs_init(char const * argv) { char * tmp, * ptr, * tok1, * tok2 = NULL; @@ -532,15 +632,67 @@ static int ibs_init(char const * argv) // Allow no event no_event_ok = 1; + + check_cpuid_family_model_stepping(); + + get_ibs_bta_status(); + + /* Create IBS memory access log */ + memaccess_log = NULL; + if (ibs_op_um & 0x2) { + char filename[1024]; + strncpy(filename, session_dir, 1023); + strncat(filename, "/samples/ibs_memaccess.log", 1024); + if ((memaccess_log = fopen(filename, "w")) == NULL) { + verbprintf(vext, "Warning: Cannot create file %s\n", filename); + + } else { + fprintf (memaccess_log, "# IBS Memory Access Log\n\n"); + fprintf (memaccess_log, "# Format: app_cookie,cookie,cpu,tgid,tid,pc,branch-target-address,\n"); + fprintf (memaccess_log, "# phy-hi:phy-low,lin-hi:lin-low,accese-type,latency\n\n"); + } + } + + // Create IBS Branch Target Address (BTA) log + bta_log = NULL; + if (ibs_bta_enabled) { + char filename[1024]; + strncpy(filename, session_dir, 1023); + strncat(filename, "/samples/ibs_bta.log", 1024); + if ((bta_log = fopen(filename, "w")) == NULL) { + verbprintf(vext, "Warning: Cannot create file %s\n", filename); + } else { + fprintf (bta_log, "# IBS Memory Access Log\n\n"); + fprintf (bta_log, "# Format: app_cookie,cookie,cpu,tgid,tid,pc,branch-target-address\n\n"); + } + } + + return 0; +} + + +static int ibs_deinit() +{ + if (memaccess_log) { + fclose (memaccess_log); + memaccess_log = NULL; + } + + if (bta_log) { + fclose (bta_log); + bta_log = NULL; + } return 0; } static int ibs_print_stats() { - printf("Nr. IBS Fetch samples : %lu (%lu entries)\n", ibs_fetch_sample_stats, (ibs_fetch_sample_stats * 7)); + printf("Nr. IBS Fetch samples : %lu (%lu entries)\n", + ibs_fetch_sample_stats, (ibs_fetch_sample_stats * 7)); printf("Nr. IBS Fetch incompletes : %lu\n", ibs_fetch_incomplete_stats); - printf("Nr. IBS Op samples : %lu (%lu entries)\n", ibs_op_sample_stats, (ibs_op_sample_stats * 13)); + printf("Nr. IBS Op samples : %lu (%lu entries)\n", + ibs_op_sample_stats, (ibs_op_sample_stats * 13)); printf("Nr. IBS Op incompletes : %lu\n", ibs_op_incomplete_stats); printf("Nr. IBS derived events : %lu\n", ibs_derived_event_stats); return 0; @@ -686,7 +838,8 @@ struct opd_ext_sfile_handlers ibs_sfile_ struct opd_ext_handlers ibs_handlers = { - .ext_init = &ibs_init, + .ext_init = &ibs_init, + .ext_deinit = &ibs_deinit, .ext_print_stats = &ibs_print_stats, - .ext_sfile = &ibs_sfile_handlers + .ext_sfile = &ibs_sfile_handlers }; diff -paurN oprofile/daemon/opd_ibs_trans.h oprofile-new/daemon/opd_ibs_trans.h --- oprofile/daemon/opd_ibs_trans.h 2010-11-07 15:28:36.182420260 -0600 +++ oprofile-new/daemon/opd_ibs_trans.h 2010-11-06 10:59:05.492727144 -0500 @@ -24,8 +24,12 @@ struct ibs_translation_table { }; -extern void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag, unsigned int size); -extern void trans_ibs_op (struct transient * trans, unsigned int selected_flag, unsigned int size); -extern void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag, unsigned int size); -extern void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag, unsigned int size); +extern void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag); +extern void trans_ibs_op (struct transient * trans, unsigned int selected_flag); +extern void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag); +extern void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag); +extern int trans_ibs_op_rip_invalid (struct transient * trans); +extern void trans_ibs_op_mask_reserved (unsigned int family, struct transient * trans); +extern void trans_ibs_op_ls_memaccess(struct transient * trans); +extern void trans_ibs_op_bta (struct transient * trans); #endif // OPD_IBS_TRANS_H diff -paurN oprofile/daemon/opd_ibs_trans.c oprofile-new/daemon/opd_ibs_trans.c --- oprofile/daemon/opd_ibs_trans.c 2010-11-07 15:28:36.192346762 -0600 +++ oprofile-new/daemon/opd_ibs_trans.c 2010-11-06 11:56:59.431773062 -0500 @@ -1,8 +1,8 @@ /** * @file daemon/opd_ibs_trans.c - * AMD Family10h Instruction Based Sampling (IBS) translation. + * AMD Instruction Based Sampling (IBS) translation. * - * @remark Copyright 2008 OProfile authors + * @remark Copyright 2008 - 2010 OProfile authors * @remark Read the file COPYING * * @author Jason Yeh <jas...@am...> @@ -20,205 +20,185 @@ #include <stdlib.h> #include <stdio.h> -#define MAX_EVENTS_PER_GROUP 32 +extern FILE * bta_log; +extern FILE * memaccess_log; /* - * --------------------- OP DERIVED FUNCTION + * --------------------- FETCH DERIVED FUNCTION */ -void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag, unsigned int size) +void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag) { struct ibs_fetch_sample * trans_fetch = ((struct ibs_sample*)(trans->ext))->fetch; - unsigned int i, j, mask = 1; - for (i = IBS_FETCH_BASE, j =0 ; i <= IBS_FETCH_END && j < size ; i++, mask = mask << 1) { + if ((selected_flag) == 0) + return; - if ((selected_flag & mask) == 0) - continue; + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_ALL) { + /* IBS all fetch samples (kills + attempts) */ + AGG_IBS_EVENT(DE_IBS_FETCH_ALL); + } + + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_KILLED) { + /* IBS killed fetches ("case 0") -- All interesting event + * flags are clear */ + if (IBS_FETCH_KILLED(trans_fetch)) + AGG_IBS_EVENT(DE_IBS_FETCH_KILLED); + } - j++; + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_ATTEMPTED) { + /* Any non-killed fetch is an attempted fetch */ + AGG_IBS_EVENT(DE_IBS_FETCH_ATTEMPTED); + } - switch (i) { - - case DE_IBS_FETCH_ALL: - /* IBS all fetch samples (kills + attempts) */ - AGG_IBS_EVENT(DE_IBS_FETCH_ALL); - break; - - case DE_IBS_FETCH_KILLED: - /* IBS killed fetches ("case 0") -- All interesting event - * flags are clear */ - if (IBS_FETCH_KILLED(trans_fetch)) - AGG_IBS_EVENT(DE_IBS_FETCH_KILLED); - break; - - case DE_IBS_FETCH_ATTEMPTED: - /* Any non-killed fetch is an attempted fetch */ - AGG_IBS_EVENT(DE_IBS_FETCH_ATTEMPTED); - break; - - case DE_IBS_FETCH_COMPLETED: - if (IBS_FETCH_FETCH_COMPLETION(trans_fetch)) - /* IBS Fetch Completed */ - AGG_IBS_EVENT(DE_IBS_FETCH_COMPLETED); - break; - - case DE_IBS_FETCH_ABORTED: - if (!IBS_FETCH_FETCH_COMPLETION(trans_fetch)) - /* IBS Fetch Aborted */ - AGG_IBS_EVENT(DE_IBS_FETCH_ABORTED); - break; - - case DE_IBS_L1_ITLB_HIT: - /* IBS L1 ITLB hit */ - if (IBS_FETCH_L1_TLB_HIT(trans_fetch)) - AGG_IBS_EVENT(DE_IBS_L1_ITLB_HIT); - break; - - case DE_IBS_ITLB_L1M_L2H: - /* IBS L1 ITLB miss and L2 ITLB hit */ - if (IBS_FETCH_ITLB_L1M_L2H(trans_fetch)) - AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2H); - break; - - case DE_IBS_ITLB_L1M_L2M: - /* IBS L1 & L2 ITLB miss; complete ITLB miss */ - if (IBS_FETCH_ITLB_L1M_L2M(trans_fetch)) - AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2M); - break; - - case DE_IBS_IC_MISS: - /* IBS instruction cache miss */ - if (IBS_FETCH_INST_CACHE_MISS(trans_fetch)) - AGG_IBS_EVENT(DE_IBS_IC_MISS); - break; - - case DE_IBS_IC_HIT: - /* IBS instruction cache hit */ - if (IBS_FETCH_INST_CACHE_HIT(trans_fetch)) - AGG_IBS_EVENT(DE_IBS_IC_HIT); - break; - - case DE_IBS_FETCH_4K_PAGE: - if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch) - && IBS_FETCH_TLB_PAGE_SIZE(trans_fetch) == L1TLB4K) - AGG_IBS_EVENT(DE_IBS_FETCH_4K_PAGE); - break; - - case DE_IBS_FETCH_2M_PAGE: - if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch) - && IBS_FETCH_TLB_PAGE_SIZE(trans_fetch) == L1TLB2M) - AGG_IBS_EVENT(DE_IBS_FETCH_2M_PAGE); - break; - - case DE_IBS_FETCH_1G_PAGE: - if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch) - && IBS_FETCH_TLB_PAGE_SIZE(trans_fetch) == L1TLB1G) - AGG_IBS_EVENT(DE_IBS_FETCH_1G_PAGE); - break; - - case DE_IBS_FETCH_XX_PAGE: - break; - - case DE_IBS_FETCH_LATENCY: - if (IBS_FETCH_FETCH_LATENCY(trans_fetch)) - AGG_IBS_COUNT(DE_IBS_FETCH_LATENCY, - IBS_FETCH_FETCH_LATENCY(trans_fetch)); - break; - default: - break; - } + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_COMPLETED) { + if (IBS_FETCH_FETCH_COMPLETION(trans_fetch)) + /* IBS Fetch Completed */ + AGG_IBS_EVENT(DE_IBS_FETCH_COMPLETED); + } + + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_ABORTED) { + if (!IBS_FETCH_FETCH_COMPLETION(trans_fetch)) + /* IBS Fetch Aborted */ + AGG_IBS_EVENT(DE_IBS_FETCH_ABORTED); + } + + CHECK_FETCH_SELECTED_FLAG(DE_IBS_L1_ITLB_HIT) { + /* IBS L1 ITLB hit */ + if (IBS_FETCH_L1_TLB_HIT(trans_fetch)) + AGG_IBS_EVENT(DE_IBS_L1_ITLB_HIT); + } + + CHECK_FETCH_SELECTED_FLAG(DE_IBS_ITLB_L1M_L2H) { + /* IBS L1 ITLB miss and L2 ITLB hit */ + if (IBS_FETCH_ITLB_L1M_L2H(trans_fetch)) + AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2H); + } + + CHECK_FETCH_SELECTED_FLAG(DE_IBS_ITLB_L1M_L2M) { + /* IBS L1 & L2 ITLB miss; complete ITLB miss */ + if (IBS_FETCH_ITLB_L1M_L2M(trans_fetch)) + AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2M); + } + + CHECK_FETCH_SELECTED_FLAG(DE_IBS_IC_MISS) { + /* IBS instruction cache miss */ + if (IBS_FETCH_INST_CACHE_MISS(trans_fetch)) + AGG_IBS_EVENT(DE_IBS_IC_MISS); + } + + CHECK_FETCH_SELECTED_FLAG(DE_IBS_IC_HIT) { + /* IBS instruction cache hit */ + if (IBS_FETCH_INST_CACHE_HIT(trans_fetch)) + AGG_IBS_EVENT(DE_IBS_IC_HIT); + } + + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_4K_PAGE) { + if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch) + && IBS_FETCH_TLB_PAGE_SIZE_4K(trans_fetch)) + AGG_IBS_EVENT(DE_IBS_FETCH_4K_PAGE); + } + + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_2M_PAGE) { + if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch) + && IBS_FETCH_TLB_PAGE_SIZE_2M(trans_fetch)) + AGG_IBS_EVENT(DE_IBS_FETCH_2M_PAGE); + } + + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_1G_PAGE) { + if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch) + && IBS_FETCH_TLB_PAGE_SIZE_1G(trans_fetch)) + AGG_IBS_EVENT(DE_IBS_FETCH_1G_PAGE); + } + + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_XX_PAGE) { + } + + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_LATENCY) { + if (IBS_FETCH_FETCH_LATENCY(trans_fetch)) + AGG_IBS_COUNT(DE_IBS_FETCH_LATENCY, + IBS_FETCH_FETCH_LATENCY(trans_fetch)); } } + /* * --------------------- OP DERIVED FUNCTION */ -void trans_ibs_op (struct transient * trans, unsigned int selected_flag, unsigned int size) +void trans_ibs_op (struct transient * trans, unsigned int selected_flag) { struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op; - unsigned int i, j, mask = 1; - for (i = IBS_OP_BASE, j =0 ; i <= IBS_OP_END && j < size ; i++, mask = mask << 1) { + if ((selected_flag) == 0) + return; - if ((selected_flag & mask) == 0) - continue; + CHECK_OP_SELECTED_FLAG(DE_IBS_OP_ALL) { + /* All IBS op samples */ + AGG_IBS_EVENT(DE_IBS_OP_ALL); + } - j++; + CHECK_OP_SELECTED_FLAG(DE_IBS_OP_TAG_TO_RETIRE) { + /* Tally retire cycle counts for all sampled macro-ops + * IBS tag to retire cycles */ + if (IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op)) + AGG_IBS_COUNT(DE_IBS_OP_TAG_TO_RETIRE, + IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op)); + } - switch (i) { - - case DE_IBS_OP_ALL: - /* All IBS op samples */ - AGG_IBS_EVENT(DE_IBS_OP_ALL); - break; - - case DE_IBS_OP_TAG_TO_RETIRE: - /* Tally retire cycle counts for all sampled macro-ops - * IBS tag to retire cycles */ - if (IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op)) - AGG_IBS_COUNT(DE_IBS_OP_TAG_TO_RETIRE, - IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op)); - break; - - case DE_IBS_OP_COMP_TO_RETIRE: - /* IBS completion to retire cycles */ - if (IBS_OP_COM_TO_RETIRE_CYCLES(trans_op)) - AGG_IBS_COUNT(DE_IBS_OP_COMP_TO_RETIRE, - IBS_OP_COM_TO_RETIRE_CYCLES(trans_op)); - break; - - case DE_IBS_BRANCH_RETIRED: - if (IBS_OP_OP_BRANCH_RETIRED(trans_op)) - /* IBS Branch retired op */ - AGG_IBS_EVENT(DE_IBS_BRANCH_RETIRED) ; - break; - - case DE_IBS_BRANCH_MISP: - if (IBS_OP_OP_BRANCH_RETIRED(trans_op) - /* Test branch-specific event flags */ - /* IBS mispredicted Branch op */ - && IBS_OP_OP_BRANCH_MISPREDICT(trans_op)) - AGG_IBS_EVENT(DE_IBS_BRANCH_MISP) ; - break; - - case DE_IBS_BRANCH_TAKEN: - if (IBS_OP_OP_BRANCH_RETIRED(trans_op) - /* IBS taken Branch op */ - && IBS_OP_OP_BRANCH_TAKEN(trans_op)) - AGG_IBS_EVENT(DE_IBS_BRANCH_TAKEN); - break; - - case DE_IBS_BRANCH_MISP_TAKEN: - if (IBS_OP_OP_BRANCH_RETIRED(trans_op) - /* IBS mispredicted taken branch op */ - && IBS_OP_OP_BRANCH_TAKEN(trans_op) - && IBS_OP_OP_BRANCH_MISPREDICT(trans_op)) - AGG_IBS_EVENT(DE_IBS_BRANCH_MISP_TAKEN); - break; - - case DE_IBS_RETURN: - if (IBS_OP_OP_BRANCH_RETIRED(trans_op) - /* IBS return op */ - && IBS_OP_OP_RETURN(trans_op)) - AGG_IBS_EVENT(DE_IBS_RETURN); - break; - - case DE_IBS_RETURN_MISP: - if (IBS_OP_OP_BRANCH_RETIRED(trans_op) - /* IBS mispredicted return op */ - && IBS_OP_OP_RETURN(trans_op) - && IBS_OP_OP_BRANCH_MISPREDICT(trans_op)) - AGG_IBS_EVENT(DE_IBS_RETURN_MISP); - break; - - case DE_IBS_RESYNC: - /* Test for a resync macro-op */ - if (IBS_OP_OP_BRANCH_RESYNC(trans_op)) - AGG_IBS_EVENT(DE_IBS_RESYNC); - break; - default: - break; - } + CHECK_OP_SELECTED_FLAG(DE_IBS_OP_COMP_TO_RETIRE) { + /* IBS completion to retire cycles */ + if (IBS_OP_COM_TO_RETIRE_CYCLES(trans_op)) + AGG_IBS_COUNT(DE_IBS_OP_COMP_TO_RETIRE, + IBS_OP_COM_TO_RETIRE_CYCLES(trans_op)); + } + + CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_RETIRED) { + if (IBS_OP_BRANCH_RETIRED(trans_op)) + /* IBS Branch retired op */ + AGG_IBS_EVENT(DE_IBS_BRANCH_RETIRED) ; + } + + CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_MISP) { + if (IBS_OP_BRANCH_RETIRED(trans_op) + /* Test branch-specific event flags */ + /* IBS mispredicted Branch op */ + && IBS_OP_BRANCH_MISPREDICT(trans_op)) + AGG_IBS_EVENT(DE_IBS_BRANCH_MISP) ; + } + + CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_TAKEN) { + if (IBS_OP_BRANCH_RETIRED(trans_op) + /* IBS taken Branch op */ + && IBS_OP_BRANCH_TAKEN(trans_op)) + AGG_IBS_EVENT(DE_IBS_BRANCH_TAKEN); + } + + CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_MISP_TAKEN) { + if (IBS_OP_BRANCH_RETIRED(trans_op) + /* IBS mispredicted taken branch op */ + && IBS_OP_BRANCH_TAKEN(trans_op) + && IBS_OP_BRANCH_MISPREDICT(trans_op)) + AGG_IBS_EVENT(DE_IBS_BRANCH_MISP_TAKEN); + } + + CHECK_OP_SELECTED_FLAG(DE_IBS_RETURN) { + if (IBS_OP_BRANCH_RETIRED(trans_op) + /* IBS return op */ + && IBS_OP_RETURN(trans_op)) + AGG_IBS_EVENT(DE_IBS_RETURN); + } + + CHECK_OP_SELECTED_FLAG(DE_IBS_RETURN_MISP) { + if (IBS_OP_BRANCH_RETIRED(trans_op) + /* IBS mispredicted return op */ + && IBS_OP_RETURN(trans_op) + && IBS_OP_BRANCH_MISPREDICT(trans_op)) + AGG_IBS_EVENT(DE_IBS_RETURN_MISP); + } + + CHECK_OP_SELECTED_FLAG(DE_IBS_RESYNC) { + /* Test for a resync macro-op */ + if (IBS_OP_BRANCH_RESYNC(trans_op)) + AGG_IBS_EVENT(DE_IBS_RESYNC); } } @@ -226,213 +206,201 @@ void trans_ibs_op (struct transient * tr /* * --------------------- OP LS DERIVED FUNCTION */ -void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag, unsigned int size) +void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag) { struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op; - unsigned int i, j, mask = 1; /* Preliminary check */ if (!IBS_OP_IBS_LD_OP(trans_op) && !IBS_OP_IBS_ST_OP(trans_op)) return; - for (i = IBS_OP_LS_BASE, j =0 ; i <= IBS_OP_LS_END && j < size ; i++, mask = mask << 1) { + if ((selected_flag) == 0) + return; + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_ALL_OP) { + /* Count the number of LS op samples */ + AGG_IBS_EVENT(DE_IBS_LS_ALL_OP) ; + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_LOAD_OP) { + if (IBS_OP_IBS_LD_OP(trans_op)) + /* TALLy an IBS load derived event */ + AGG_IBS_EVENT(DE_IBS_LS_LOAD_OP) ; + } - if ((selected_flag & mask) == 0) - continue; + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_STORE_OP) { + if (IBS_OP_IBS_ST_OP(trans_op)) + /* Count and handle store operations */ + AGG_IBS_EVENT(DE_IBS_LS_STORE_OP); + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DTLB_L1H) { + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) + && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)) + /* L1 DTLB hit -- This is the most frequent case */ + AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1H); + } - j++; + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DTLB_L1M_L2H) { + /* l2_translation_size = 1 */ + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) + && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) + && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)) + /* L1 DTLB miss, L2 DTLB hit */ + AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2H); + } - switch (i) { + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DTLB_L1M_L2M) { + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) + && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) + && IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)) + /* L1 DTLB miss, L2 DTLB miss */ + AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2M); + } - case DE_IBS_LS_ALL_OP: - /* Count the number of LS op samples */ - AGG_IBS_EVENT(DE_IBS_LS_ALL_OP) ; - break; - - case DE_IBS_LS_LOAD_OP: - if (IBS_OP_IBS_LD_OP(trans_op)) - /* TALLy an IBS load derived event */ - AGG_IBS_EVENT(DE_IBS_LS_LOAD_OP) ; - break; - - case DE_IBS_LS_STORE_OP: - if (IBS_OP_IBS_ST_OP(trans_op)) - /* Count and handle store operations */ - AGG_IBS_EVENT(DE_IBS_LS_STORE_OP); - break; - - case DE_IBS_LS_DTLB_L1H: - if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) - && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)) - /* L1 DTLB hit -- This is the most frequent case */ - AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1H); - break; - - case DE_IBS_LS_DTLB_L1M_L2H: - /* l2_translation_size = 1 */ - if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) - && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) - && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)) - /* L1 DTLB miss, L2 DTLB hit */ - AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2H); - break; - - case DE_IBS_LS_DTLB_L1M_L2M: - if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) - && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) - && IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)) - /* L1 DTLB miss, L2 DTLB miss */ - AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2M); - break; - - case DE_IBS_LS_DC_MISS: - if (IBS_OP_IBS_DC_MISS(trans_op)) - AGG_IBS_EVENT(DE_IBS_LS_DC_MISS); - break; - - case DE_IBS_LS_DC_HIT: - if (!IBS_OP_IBS_DC_MISS(trans_op)) - AGG_IBS_EVENT(DE_IBS_LS_DC_HIT); - break; - - case DE_IBS_LS_MISALIGNED: - if (IBS_OP_IBS_DC_MISS_ACC(trans_op)) - AGG_IBS_EVENT(DE_IBS_LS_MISALIGNED); - break; - - case DE_IBS_LS_BNK_CONF_LOAD: - if (IBS_OP_IBS_DC_LD_BNK_CON(trans_op)) - AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_LOAD); - break; - - case DE_IBS_LS_BNK_CONF_STORE: - if (IBS_OP_IBS_DC_ST_BNK_CON(trans_op)) - AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_STORE); - break; - - case DE_IBS_LS_STL_FORWARDED: - if (IBS_OP_IBS_LD_OP(trans_op) - /* Data forwarding info are valid only for load ops */ - && IBS_OP_IBS_DC_ST_TO_LD_FWD(trans_op)) - AGG_IBS_EVENT(DE_IBS_LS_STL_FORWARDED) ; - break; - - case DE_IBS_LS_STL_CANCELLED: - if (IBS_OP_IBS_LD_OP(trans_op)) - if (IBS_OP_IBS_DC_ST_TO_LD_CAN(trans_op)) - AGG_IBS_EVENT(DE_IBS_LS_STL_CANCELLED) ; - break; - - case DE_IBS_LS_UC_MEM_ACCESS: - if (IBS_OP_IBS_DC_UC_MEM_ACC(trans_op)) - AGG_IBS_EVENT(DE_IBS_LS_UC_MEM_ACCESS); - break; - - case DE_IBS_LS_WC_MEM_ACCESS: - if (IBS_OP_IBS_DC_WC_MEM_ACC(trans_op)) - AGG_IBS_EVENT(DE_IBS_LS_WC_MEM_ACCESS); - break; - - case DE_IBS_LS_LOCKED_OP: - if (IBS_OP_IBS_LOCKED_OP(trans_op)) - AGG_IBS_EVENT(DE_IBS_LS_LOCKED_OP); - break; - - case DE_IBS_LS_MAB_HIT: - if (IBS_OP_IBS_DC_MAB_HIT(trans_op)) - AGG_IBS_EVENT(DE_IBS_LS_MAB_HIT); - break; - - case DE_IBS_LS_L1_DTLB_4K: - /* l1_translation */ - if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) - && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) - - && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op) - && !IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op)) - /* This is the most common case, unfortunately */ - AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_4K) ; - break; - - case DE_IBS_LS_L1_DTLB_2M: - /* l1_translation */ - if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) - && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) - - && IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op)) - /* 2M L1 DTLB page translation */ - AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_2M); - break; - - case DE_IBS_LS_L1_DTLB_1G: - /* l1_translation */ - if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) - && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) - - && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op) - && IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op)) - /* 1G L1 DTLB page translation */ - AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_1G); - break; - - case DE_IBS_LS_L1_DTLB_RES: - break; - - case DE_IBS_LS_L2_DTLB_4K: - /* l2_translation_size = 1 */ - if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) - && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) - && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op) - - /* L2 DTLB page translation */ - && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op) - && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op)) - /* 4K L2 DTLB page translation */ - AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_4K); - break; - - case DE_IBS_LS_L2_DTLB_2M: - /* l2_translation_size = 1 */ - if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) - && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) - && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op) - - /* L2 DTLB page translation */ - && IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op) - && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op)) - /* 2M L2 DTLB page translation */ - AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_2M); - break; - - case DE_IBS_LS_L2_DTLB_1G: - /* l2_translation_size = 1 */ - if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) - && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) - && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op) - - /* L2 DTLB page translation */ - && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op) - && IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op)) - /* 2M L2 DTLB page translation */ - AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_1G); - break; - - case DE_IBS_LS_L2_DTLB_RES2: - break; - - case DE_IBS_LS_DC_LOAD_LAT: - if (IBS_OP_IBS_LD_OP(trans_op) - /* If the load missed in DC, tally the DC load miss latency */ - && IBS_OP_IBS_DC_MISS(trans_op)) - /* DC load miss latency is only reliable for load ops */ - AGG_IBS_COUNT(DE_IBS_LS_DC_LOAD_LAT, - IBS_OP_DC_MISS_LATENCY(trans_op)) ; - break; + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DC_MISS) { + if (IBS_OP_IBS_DC_MISS(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_DC_MISS); + } - default: - break; - } + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DC_HIT) { + if (!IBS_OP_IBS_DC_MISS(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_DC_HIT); + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_MISALIGNED) { + if (IBS_OP_IBS_DC_MISS_ACC(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_MISALIGNED); + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_BNK_CONF_LOAD) { + if (IBS_OP_IBS_DC_LD_BNK_CON(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_LOAD); + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_BNK_CONF_STORE) { + if (IBS_OP_IBS_DC_ST_BNK_CON(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_STORE); + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_STL_FORWARDED) { + if (IBS_OP_IBS_LD_OP(trans_op) + /* Data forwarding info are valid only for load ops */ + && IBS_OP_IBS_DC_ST_TO_LD_FWD(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_STL_FORWARDED) ; + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_STL_CANCELLED) { + if (IBS_OP_IBS_LD_OP(trans_op)) + if (IBS_OP_IBS_DC_ST_TO_LD_CAN(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_STL_CANCELLED) ; + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_UC_MEM_ACCESS) { + if (IBS_OP_IBS_DC_UC_MEM_ACC(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_UC_MEM_ACCESS); + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_WC_MEM_ACCESS) { + if (IBS_OP_IBS_DC_WC_MEM_ACC(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_WC_MEM_ACCESS); + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_LOCKED_OP) { + if (IBS_OP_IBS_LOCKED_OP(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_LOCKED_OP); + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_MAB_HIT) { + if (IBS_OP_IBS_DC_MAB_HIT(trans_op)) + AGG_IBS_EVENT(DE_IBS_LS_MAB_HIT); + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_4K) { + /* l1_translation */ + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) + && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) + + && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op) + && !IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op)) + /* This is the most common case, unfortunately */ + AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_4K) ; + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_2M) { + /* l1_translation */ + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) + && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) + + && IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op)) + /* 2M L1 DTLB page translation */ + AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_2M); + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_1G) { + /* l1_translation */ + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) + && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) + + && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op) + && IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op)) + /* 1G L1 DTLB page translation */ + AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_1G); + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_RES) { + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_4K) { + /* l2_translation_size = 1 */ + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) + && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) + && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op) + + /* L2 DTLB page translation */ + && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op) + && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op)) + /* 4K L2 DTLB page translation */ + AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_4K); + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_2M) { + /* l2_translation_size = 1 */ + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) + && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) + && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op) + + /* L2 DTLB page translation */ + && IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op) + && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op)) + /* 2M L2 DTLB page translation */ + AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_2M); + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_1G) { + /* l2_translation_size = 1 */ + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) + && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) + && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op) + + /* L2 DTLB page translation */ + && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op) + && IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op)) + /* 2M L2 DTLB page translation */ + AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_1G); + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_RES2) { + } + + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DC_LOAD_LAT) { + if (IBS_OP_IBS_LD_OP(trans_op) + /* If the load missed in DC, tally the DC load miss latency */ + && IBS_OP_IBS_DC_MISS(trans_op)) + /* DC load miss latency is only reliable for load ops */ + AGG_IBS_COUNT(DE_IBS_LS_DC_LOAD_LAT, + IBS_OP_DC_MISS_LATENCY(trans_op)) ; } } @@ -443,12 +411,14 @@ void trans_ibs_op_ls (struct transient * * that miss in L1 and L2 cache. NB data arrives too late * to be reliable for store operations */ -void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag, unsigned int size) +void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag) { struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op; - unsigned int i, j, mask = 1; /* Preliminary check */ + if ((selected_flag) == 0) + return; + if (!IBS_OP_IBS_LD_OP(trans_op)) return; @@ -458,97 +428,219 @@ void trans_ibs_op_nb (struct transient * if (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0) return; - for (i = IBS_OP_NB_BASE, j =0 ; i <= IBS_OP_NB_END && j < size ; i++, mask = mask << 1) { + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL) { + if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) + /* Request was serviced by local processor */ + AGG_IBS_EVENT(DE_IBS_NB_LOCAL) ; + } - if ((selected_flag & mask) == 0) - continue; + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE) { + if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) + /* Request was serviced by remote processor */ + AGG_IBS_EVENT(DE_IBS_NB_REMOTE) ; + } - j++; + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_L3) { + if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) + && IBS_OP_NB_IBS_REQ_SRC_01(trans_op)) + AGG_IBS_EVENT(DE_IBS_NB_LOCAL_L3); + } + + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_CACHE) { + if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) + && IBS_OP_NB_IBS_REQ_SRC_02(trans_op)) + AGG_IBS_EVENT(DE_IBS_NB_LOCAL_CACHE); + } + + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_CACHE) { + if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) + && IBS_OP_NB_IBS_REQ_SRC_02(trans_op)) + AGG_IBS_EVENT(DE_IBS_NB_REMOTE_CACHE) ; + } - switch (i) { + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_DRAM) { + if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) + && IBS_OP_NB_IBS_REQ_SRC_03(trans_op)) + AGG_IBS_EVENT(DE_IBS_NB_LOCAL_DRAM); + } - case DE_IBS_NB_LOCAL: - if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) - /* Request was serviced by local processor */ - AGG_IBS_EVENT(DE_IBS_NB_LOCAL) ; - break; - - case DE_IBS_NB_REMOTE: - if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) - /* Request was serviced by remote processor */ - AGG_IBS_EVENT(DE_IBS_NB_REMOTE) ; - break; - - case DE_IBS_NB_LOCAL_L3: - if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) - && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x1)) - AGG_IBS_EVENT(DE_IBS_NB_LOCAL_L3); - break; - - case DE_IBS_NB_LOCAL_CACHE: - if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) - && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2)) - AGG_IBS_EVENT(DE_IBS_NB_LOCAL_CACHE); - break; - - case DE_IBS_NB_REMOTE_CACHE: - if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) - && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2)) - AGG_IBS_EVENT(DE_IBS_NB_REMOTE_CACHE) ; - break; - - case DE_IBS_NB_LOCAL_DRAM: - if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) - && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x3)) - AGG_IBS_EVENT(DE_IBS_NB_LOCAL_DRAM); - break; - - case DE_IBS_NB_REMOTE_DRAM: - if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) - && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x3)) - AGG_IBS_EVENT(DE_IBS_NB_REMOTE_DRAM) ; - break; - - case DE_IBS_NB_LOCAL_OTHER: - if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) - && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x7)) - AGG_IBS_EVENT(DE_IBS_NB_LOCAL_OTHER); - break; - - case DE_IBS_NB_REMOTE_OTHER: - if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) - && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x7)) - AGG_IBS_EVENT(DE_IBS_NB_REMOTE_OTHER) ; - break; - - case DE_IBS_NB_CACHE_STATE_M: - if ((IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2) - && !IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op)) - AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_M) ; - break; - - case DE_IBS_NB_CACHE_STATE_O: - if ((IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2) - && IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op)) - AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_O) ; - break; - - case DE_IBS_NB_LOCAL_LATENCY: - if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) - /* Request was serviced by local processor */ - AGG_IBS_COUNT(DE_IBS_NB_LOCAL_LATENCY, - IBS_OP_DC_MISS_LATENCY(trans_op)); - break; - - case DE_IBS_NB_REMOTE_LATENCY: - if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) - /* Request was serviced by remote processor */ - AGG_IBS_COUNT(DE_IBS_NB_REMOTE_LATENCY, - IBS_OP_DC_MISS_LATENCY(trans_op)); - break; + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_DRAM) { + if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) + && IBS_OP_NB_IBS_REQ_SRC_03(trans_op)) + AGG_IBS_EVENT(DE_IBS_NB_REMOTE_DRAM) ; + } - default: - break; + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_OTHER) { + if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) + && IBS_OP_NB_IBS_REQ_SRC_07(trans_op)) + AGG_IBS_EVENT(DE_IBS_NB_LOCAL_OTHER); + } + + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_OTHER) { + if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) + && IBS_OP_NB_IBS_REQ_SRC_07(trans_op)) + AGG_IBS_EVENT(DE_IBS_NB_REMOTE_OTHER) ; + } + + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_CACHE_STATE_M) { + if (IBS_OP_NB_IBS_REQ_SRC_02(trans_op) + && !IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op)) + AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_M) ; + } + + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_CACHE_STATE_O) { + if (IBS_OP_NB_IBS_REQ_SRC_02(trans_op) + && IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op)) + AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_O) ; + } + + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_LATENCY) { + if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) + /* Request was serviced by local processor */ + AGG_IBS_COUNT(DE_IBS_NB_LOCAL_LATENCY, + IBS_OP_DC_MISS_LATENCY(trans_op)); + } + + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_LATENCY) { + if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) + /* Request was serviced by remote processor */ + AGG_IBS_COUNT(DE_IBS_NB_REMOTE_LATENCY, + IBS_OP_DC_MISS_LATENCY(trans_op)); + } +} + + +int trans_ibs_op_rip_invalid (struct transient * trans) +{ + struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op; + + if (IBS_OP_RIP_INVALID(trans_op)) + return 1; + + return 0; +} + + +void trans_ibs_op_mask_reserved (unsigned int family, struct transient * trans) +{ + struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op; + + switch (family) { + case 0x10: + /* Reserved IbsRipInvalid (MSRC001_1035[38])*/ + trans_op->ibs_op_data1_high &= ~MASK_RIP_INVALID; + break; + case 0x12: + /* Reserved NbIbsReqDstProc (MSRCC001_1036[4]) */ + trans_op->ibs_op_data2_low &= ~NB_MASK_REQ_DST_PROC; + /* Reserved NbIbsReqCacheHitSt (MSRCC001_1036[5]) */ + trans_op->ibs_op_data2_low &= ~NB_MASK_L3_STATE; + break; + case 0x14: + /* Reserved NbIbsReqDstProc (MSRCC001_1036[4]) */ + trans_op->ibs_op_data2_low &= ~NB_MASK_REQ_DST_PROC; + /* Reserved NbIbsReqCacheHitSt (MSRCC001_1036[5]) */ + trans_op->ibs_op_data2_low &= ~NB_MASK_L3_STATE; + /* Reserved IbsDcL1tlbHit1G (MSRC001_1037[5]) */ + trans_op->ibs_op_data3_low &= ~DC_MASK_L1_HIT_1G; + /* Reserved IbsDcLdBnkCon (MSRC001_1037[9]) */ + trans_op->ibs_op_data3_low &= ~DC_MASK_LD_BANK_CONFLICT; + /* Reserved IbsDcStBnkCon (MSRC001_1037[10]) */ + trans_op->ibs_op_data3_low &= ~DC_MASK_ST_BANK_CONFLICT; + /* Reserved IbsDcStToLdCan (MSRC001_1037[12]) */ + trans_op->ibs_op_data3_low &= ~DC_MASK_ST_TO_LD_CANCEL; + /* Reserved IbsDcL2tlbHit1G (MSRC001_1037[19]) */ + trans_op->ibs_op_data3_low &= ~DC_MASK_L2_HIT_1G; + + break; + case 0x15: + default: + break; + + } +} + + +void trans_ibs_op_bta(struct transient * trans) +{ + static cookie_t old_cookie = NO_COOKIE; + static cookie_t old_app_cookie = NO_COOKIE; + static char const * mod = NULL; + static char const * app = NULL; + const char vmlinux[10] = "vmlinux"; + struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op; + + if (!bta_log) + return; + + if (!trans_op->ibs_op_brtgt_addr) + return; + + if( old_app_cookie == INVALID_COOKIE + || old_app_cookie == NO_COOKIE + || old_app_cookie != trans->app_cookie) { + app = find_cookie(trans->app_cookie); + old_app_cookie = trans->cookie; + } + + if (trans->in_kernel == 1) { + mod = vmlinux; + old_cookie = NO_COOKIE; + } else { + if( old_cookie == INVALID_COOKIE + || old_cookie == NO_COOKIE + || old_cookie != trans->cookie) { + mod = find_cookie(trans->cookie); + old_cookie = trans->cookie; } } + + fprintf(bta_log, "0x%016llx,0x%016llx,%02lu %08u,%08u,0x%08x,0x%08lx\n", + trans->app_cookie, trans->cookie, trans->cpu, trans->tgid, trans->tid, (unsigned int)trans->pc, + trans_op->ibs_op_brtgt_addr); +} + + +void trans_ibs_op_ls_memaccess(struct transient * trans) +{ + static cookie_t old_cookie = NO_COOKIE; + static cookie_t old_app_cookie = NO_COOKIE; + static char const * mod = NULL; + static char const * app = NULL; + const char vmlinux[10] = "vmlinux"; + struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op; + + if (!memaccess_log) + return; + + if( old_app_cookie == INVALID_COOKIE + || old_app_cookie == NO_COOKIE + || old_app_cookie != trans->app_cookie) { + app = find_cookie(trans->app_cookie); + old_app_cookie = trans->cookie; + } + + if (trans->in_kernel == 1) { + mod = vmlinux; + old_cookie = NO_COOKIE; + } else { + if( old_cookie == INVALID_COOKIE + || old_cookie == NO_COOKIE + || old_cookie != trans->cookie) { + mod = find_cookie(trans->cookie); + old_cookie = trans->cookie; + } + } + + fprintf(memaccess_log, "0x%016llx,0x%016llx,%02lu,%08u,%08u,0x%08x,0x%08u:%08x,0x%08x:%08x,%s,%08u\n", + trans->app_cookie, +trans->cookie, +trans->cpu, +trans->tgid, +trans->tid, +(unsigned int)trans->pc, + trans_op->ibs_op_phys_addr_high, trans_op->ibs_op_phys_addr_low, + trans_op->ibs_op_ldst_linaddr_high, trans_op->ibs_op_ldst_linaddr_low, + (IBS_OP_IBS_LD_OP(trans_op))? "LD": "ST", + (unsigned int) IBS_OP_DC_MISS_LATENCY(trans_op)); } diff -paurN oprofile/events/x86-64/family10/unit_masks oprofile-new/events/x86-64/family10/unit_masks --- oprofile/events/x86-64/family10/unit_masks 2010-11-07 15:28:36.322348154 -0600 +++ oprofile-new/events/x86-64/family10/unit_masks 2010-11-06 12:02:21.021378226 -0500 @@ -363,6 +363,7 @@ name:retired_x87_fp type:bitmask default name:ibs_op type:bitmask default:0x01 0x00 Using IBS OP cycle count mode 0x01 Using IBS OP dispatch count mode + 0x02 Enable IBS OP Memory Access Log name:non_cancelled_l3_read_requests type:bitmask default:0xf7 0x01 RbBlk 0x02 RbBlkS diff -paurN oprofile/daemon/opd_ibs_macro.h oprofile-new/daemon/opd_ibs_macro.h --- oprofile/daemon/opd_ibs_macro.h 2010-11-07 15:28:36.192346762 -0600 +++ oprofile-new/daemon/opd_ibs_macro.h 2010-11-05 10:44:40.630276281 -0500 @@ -1,8 +1,8 @@ /** * @file daemon/opd_ibs_macro.h - * AMD Family10h Instruction Based Sampling (IBS) related macro. + * AMD Instruction Based Sampling (IBS) related macro. * - * @remark Copyright 2008 OProfile authors + * @remark Copyright 2008-2010 OProfile authors * @remark Read the file COPYING * * @author Jason Yeh <jas...@am...> @@ -16,7 +16,8 @@ /** * The following defines are bit masks that are used to select - * IBS fetch event flags and values at the MSR level. + * IBS fetch event flags and values at the + * MSRC001_1030 IBS Fetch Control Register (IbsFetchCtl) */ #define FETCH_MASK_LATENCY 0x0000ffff #define FETCH_MASK_COMPLETE 0x00040000 @@ -34,7 +35,10 @@ * The following defines are bit masks that are used to select * IBS op event flags and values at the MSR level. */ + +/* MSRC001_1035 IBS Op Data Register (IbsOpData) */ #define BR_MASK_RETIRE 0x0000ffff +#define MASK_RIP_INVALID 0x00000040 #define BR_MASK_BRN_RET 0x00000020 #define BR_MASK_BRN_MISP 0x00000010 #define BR_MASK_BRN_TAKEN 0x00000008 @@ -42,17 +46,19 @@ #define BR_MASK_MISP_RETURN 0x00000002 #define BR_MASK_BRN_RESYNC 0x00000001 +/* MSRC001_1036 IBS Op Data Register (IbsOpData2) */ #define NB_MASK_L3_STATE 0x00000020 #define NB_MASK_REQ_DST_PROC 0x00000010 #define NB_MASK_REQ_DATA_SRC 0x00000007 +/* MSRC001_1037 IBS Op Data Register (IbsOpData3) */ #define DC_MASK_L2_HIT_1G 0x00080000 #define DC_MASK_PHY_ADDR_VALID 0x00040000 #define DC_MASK_LIN_ADDR_VALID 0x00020000 #define DC_MASK_MAB_HIT 0x00010000 #define DC_MASK_LOCKED_OP 0x00008000 -#define DC_MASK_WC_MEM_ACCESS 0x00004000 -#define DC_MASK_UC_MEM_ACCESS 0x00002000 +#define DC_MASK_UC_MEM_ACCESS 0x00004000 +#define DC_MASK_WC_MEM_ACCESS 0x00002000 #define DC_MASK_ST_TO_LD_CANCEL 0x00001000 #define DC_MASK_ST_TO_LD_FOR 0x00000800 #define DC_MASK_ST_BANK_CONFLICT 0x00000400 @@ -77,10 +83,9 @@ * at 0xf000. * * The definitions in this file *must* match definitions - * of IBS derived events in gh-events.xml and in the - * oprofile AMD Family 10h events file. More information + * of IBS derived events. More information * about IBS derived events is given in the Software Oprimization - * Guide for AMD Family 10h Processors. + * Guide. */ /** @@ -108,6 +113,8 @@ #define IBS_FETCH_MAX (IBS_FETCH_END - IBS_FETCH_BASE + 1) #define IS_IBS_FETCH(x) (IBS_FETCH_BASE <= x && x <= IBS_FETCH_END) #define IBS_FETCH_OFFSET(x) (x - IBS_FETCH_BASE) +#define CHECK_FETCH_SELECTED_FLAG(x) if ( selected_flag & (1 << IBS_FETCH_OFFSET(x))) + /** * The following defines associate a 16-bit select value with an IBS @@ -129,6 +136,8 @@ #define IBS_OP_MAX (IBS_OP_END - IBS_OP_BASE + 1) #define IS_IBS_OP(x) (IBS_OP_BASE <= x && x <= IBS_OP_END) #define IBS_OP_OFFSET(x) (x - IBS_OP_BASE) +#define CHECK_OP_SELECTED_FLAG(x) if ( selected_flag & (1 << IBS_OP_OFFSET(x))) + /** * The following defines associate a 16-bit select value with an IBS @@ -166,6 +175,7 @@ #define IBS_OP_LS_MAX (IBS_OP_LS_END - IBS_OP_LS_BASE + 1) #define IS_IBS_OP_LS(x) (IBS_OP_LS_BASE <= x && x <= IBS_OP_LS_END) #define IBS_OP_LS_OFFSET(x) (x - IBS_OP_LS_BASE) +#define CHECK_OP_LS_SELECTED_FLAG(x) if ( selected_flag & (1 << IBS_OP_LS_OFFSET(x))) /** @@ -191,6 +201,7 @@ #define IBS_OP_NB_MAX (IBS_OP_NB_END - IBS_OP_NB_BASE + 1) #define IS_IBS_OP_NB(x) (IBS_OP_NB_BASE <= x && x <= IBS_OP_NB_END) #define IBS_OP_NB_OFFSET(x) (x - IBS_OP_NB_BASE) +#define CHECK_OP_NB_SELECTED_FLAG(x) if ( selected_flag & (1 << IBS_OP_NB_OFFSET(x))) #define OP_MAX_IBS_COUNTERS (IBS_FETCH_MAX + IBS_OP_MAX + IBS_OP_LS_MAX + IBS_OP_NB_MAX) @@ -215,8 +226,18 @@ /** Bit 52 IbsPhyAddrValid: instruction fetch physical address valid. */ #define IBS_FETCH_PHYS_ADDR_VALID(x) ((x->ibs_fetch_ctl_high & FETCH_MASK_PHY_ADDR) != 0) +enum IBSL1PAGESIZE { + L1TLB4K = 0, + L1TLB2M, + L1TLB1G, + L1TLB_INVALID +}; + /** Bits 54:53 IbsL1TlbPgSz: instruction cache L1TLB page size. */ #define IBS_FETCH_TLB_PAGE_SIZE(x) ((unsigned short)((x->ibs_fetch_ctl_high >> 21) & 0x3)) +#define IBS_FETCH_TLB_PAGE_SIZE_4K(x) (IBS_FETCH_TLB_PAGE_SIZE(x) == L1TLB4K) +#define IBS_FETCH_TLB_PAGE_SIZE_2M(x) (IBS_FETCH_TLB_PAGE_SIZE(x) == L1TLB2M) +#define IBS_FETCH_TLB_PAGE_SIZE_1G(x) (IBS_FETCH_TLB_PAGE_SIZE(x) == L1TLB1G) /** Bit 55 IbsL1TlbMiss: instruction cache L1TLB miss. */ #define IBS_FETCH_M_L1_TLB_MISS(x) ((x->ibs_fetch_ctl_high & FETCH_MASK_L1_MISS) != 0) @@ -252,22 +273,25 @@ #define IBS_OP_TAG_TO_RETIRE_CYCLES(x) ((unsigned short)((x->ibs_op_data1_low >> 16) & BR_MASK_RETIRE)) /** 32 op_branch_resync : resync macro-op. */ -#define IBS_OP_OP_BRANCH_RESYNC(x) ((x->ibs_op_data1_high & BR_MASK_BRN_RESYNC) != 0) +#define IBS_OP_BRANCH_RESYNC(x) ((x->ibs_op_data1_high & BR_MASK_BRN_RESYNC) != 0) /** 33 op_mispredict_return : mispredicted return macro-op. */ -#define IBS_OP_OP_MISPREDICT_RETURN(x) ((x->ibs_op_data1_high & BR_MASK_MISP_RETURN) != 0) +#define IBS_OP_MISPREDICT_RETURN(x) ((x->ibs_op_data1_high & BR_MASK_MISP_RETURN) != 0) /** 34 IbsOpReturn: return macro-op. */ -#define IBS_OP_OP_RETURN(x) ((x->ibs_op_data1_high & BR_MASK_RETURN) != 0) +#define IBS_OP_RETURN(x) ((x->ibs_op_data1_high & BR_MASK_RETURN) != 0) /** 35 IbsOpBrnTaken: taken branch macro-op. */ -#define IBS_OP_OP_BRANCH_TAKEN(x) ((x->ibs_op_data1_high & BR_MASK_BRN_TAKEN) != 0) +#define IBS_OP_BRANCH_TAKEN(x) ((x->ibs_op_data1_high & BR_MASK_BRN_TAKEN) != 0) /** 36 IbsOpBrnMisp: mispredicted branch macro-op. */ -#define IBS_OP_OP_BRANCH_MISPREDICT(x) ((x->ibs_op_data1_high & BR_MASK_BRN_MISP) != 0) +#define IBS_OP_BRANCH_MISPREDICT(x) ((x->ibs_op_data1_high & BR_MASK_BRN_MISP) != 0) /** 37 IbsOpBrnRet: branch macro-op retired. */ -#define IBS_OP_OP_BRANCH_RETIRED(x) ((x->ibs_op_data1_high & BR_MASK_BRN_RET) != 0) +#define IBS_OP_BRANCH_RETIRED(x) ((x->ibs_op_data1_high & BR_MASK_BRN_RET) != 0) + +/** 38 IbsRipInvalid: RIP invalid. */ +#define IBS_OP_RIP_INVALID(x) ((x->ibs_op_data1_high & MASK_RIP_INVALID) != 0) /** * MSRC001_1036 IBS Op Data 2 Register (IbsOpData2) @@ -282,10 +306,18 @@ /** 2:0 NbIbsReqSrc: Northbridge IBS request data source */ #define IBS_OP_NB_IBS_REQ_SRC(x) ((unsigne... [truncated message content] |
From: Robert R. <rob...@am...> - 2010-11-22 16:43:56
|
On 15.11.10 10:46:58, Suravee Suthikulpanit wrote: > > This patch implements changes for IBS in family12h/14h/15h : > - Add check for Maximum counts for IBS and IBS extended count. > - Add support for IBS extended register (Branch target address register) > - Changes for the IBS derived performance event translation: > * Enhance existing data translation logic and interface. > * Add IBS OP memory access data translation. > * Add IBS Op branch target address data translation. > - Add performance data logging mechanism to an output file in session directory. > (/var/lib/oprofile/samples/ directory): > * Add OProfile extended deinit function > * IBS OP Memory Access Log (/var/lib/oprofile/samples/ibs_memaccess.log) > * IBS OP Branch Target Address Log (/var/lib/oprofile/samples/ibs_bta.log)(only family12h/14h/15h). > > ChangeLog-ibs-extended | 17 > daemon/init.c | 3 > daemon/opd_extended.c | 14 > daemon/opd_extended.h | 9 > daemon/opd_ibs.c | 203 ++++++- > daemon/opd_ibs.h | 14 > daemon/opd_ibs_macro.h | 71 +- > daemon/opd_ibs_trans.c | 1016 ++++++++++++++++++++------------------ > daemon/opd_ibs_trans.h | 12 > events/x86-64/family10/unit_masks | 1 > utils/opcontrol | 59 ++ > 11 files changed, 895 insertions(+), 524 deletions(-) Support for new IBS features is in v2.6.37. Acked-by: Robert Richter <rob...@am...> -Robert -- Advanced Micro Devices, Inc. Operating System Research Center |
From: Maynard J. <may...@us...> - 2010-12-08 23:02:41
|
Suravee Suthikulpanit wrote: > > This patch implements changes for IBS in family12h/14h/15h : Suravee, I was a bit late in checking out this code that you committed for IBS support. Unfortunately, it doesn't compile on ppc64. I get the following error: ******************************** make[3]: Entering directory `/home/mpj/oprofile/daemon' gcc -DHAVE_CONFIG_H -I. -I.. -I ../libabi -I ../libutil -I ../libop -I ../libdb -W -Wall -fno-common -Wdeclaration-after-statement -Werror -fno-omit-frame-pointer -g -O2 -MT opd_ibs.o -MD -MP -MF .deps/opd_ibs.Tpo -c -o opd_ibs.o opd_ibs.c opd_ibs.c: In function 'ibs_init': opd_ibs.c:513: error: impossible constraint in 'asm' ******************************** The error involves use of the cpuid instruction, which doesn't exist on ppc64 (and other) architectures. Please fix as soon as possible. Thanks. -Maynard > - Add check for Maximum counts for IBS and IBS extended count. > - Add support for IBS extended register (Branch target address register) > - Changes for the IBS derived performance event translation: > * Enhance existing data translation logic and interface. > * Add IBS OP memory access data translation. > * Add IBS Op branch target address data translation. > - Add performance data logging mechanism to an output file in session directory. > (/var/lib/oprofile/samples/ directory): > * Add OProfile extended deinit function > * IBS OP Memory Access Log (/var/lib/oprofile/samples/ibs_memaccess.log) > * IBS OP Branch Target Address Log (/var/lib/oprofile/samples/ibs_bta.log)(only family12h/14h/15h). > > ChangeLog-ibs-extended | 17 > daemon/init.c | 3 > daemon/opd_extended.c | 14 > daemon/opd_extended.h | 9 > daemon/opd_ibs.c | 203 ++++++- > daemon/opd_ibs.h | 14 > daemon/opd_ibs_macro.h | 71 +- > daemon/opd_ibs_trans.c | 1016 ++++++++++++++++++++------------------ > daemon/opd_ibs_trans.h | 12 > events/x86-64/family10/unit_masks | 1 > utils/opcontrol | 59 ++ > 11 files changed, 895 insertions(+), 524 deletions(-) > > ---- > diff -paurN oprofile/ChangeLog oprofile-new/ChangeLog-ibs-extended > --- oprofile/ChangeLog 2010-11-07 15:28:36.112419594 -0600 > +++ oprofile-new/ChangeLog-ibs-extended 2010-11-07 16:11:36.651115844 -0600 > @@ -1,3 +1,20 @@ > +2010-11-7 Suravee Suthikulpanit <sur...@am...> > + > + * oprofile/utils/opcontrol: > + * oprofile/daemon/init.c: > + * oprofile/daemon/opd_extended.h: > + * oprofile/daemon/opd_extended.c: > + * oprofile/daemon/opd_ibs.h: > + * oprofile/daemon/opd_ibs.c: > + * oprofile/daemon/opd_ibs_trans.h: > + * oprofile/daemon/opd_ibs_trans.c: > + * oprofile/events/x86-64/family10/unit_masks: > + * oprofile/daemon/opd_ibs_macro.h: Add the new IBS supports for > + family12/14/15h includeding: > + - IBS Op branch target address log > + - IBS Op memory access log > + - IBS Op extended count bits > + > 2010-10-15 Roland Grunberg <rol...@gm...> > > * libop/op_xml_events.c: > diff -paurN oprofile/utils/opcontrol oprofile-new/utils/opcontrol > --- oprofile/utils/opcontrol 2010-11-07 15:28:36.322348154 -0600 > +++ oprofile-new/utils/opcontrol 2010-11-07 12:36:39.491330457 -0600 > @@ -1867,13 +1867,20 @@ verify_ibs() > IBS_FETCH_COUNT=$IBS_COUNT > IBS_FETCH_MASK=$IBS_MASK > elif test "$IBS_FETCH_COUNT" != "$IBS_COUNT" ; then > - echo "All IBS Fetch must have the same count." > + echo "ERROR: All IBS Fetch must have the same count." > exit 1 > fi > > # Check IBS_MASK consistency > if test "$IBS_FETCH_MASK" != "$IBS_MASK" ; then > - echo "All IBS Fetch must have the same unitmask." > + echo "ERROR: All IBS Fetch must have the same unitmask." > + exit 1 > + fi > + > + # Check IBS_FETCH_COUNT within range > + if test "$IBS_FETCH_COUNT" -gt 1048575 ; then > + echo "ERROR: IBS Fetch count is too large." > + echo " The maximum IBS-fetch count is 1048575." > exit 1 > fi > > @@ -1892,6 +1899,28 @@ verify_ibs() > echo "All IBS Op must have the same unitmask." > exit 1 > fi > + > + # Check IBS_OP_COUNT within range > + case "$CPUTYPE" in > + x86-64/family10) > + if test "$IBS_OP_COUNT" -gt 1048575 ; then > + echo "ERROR: IBS Op count is too large." > + echo " The maximum IBS-fetch count is 1048575." > + exit 1 > + fi > + ;; > + > + x86-64/family12h|\ > + x86-64/family14h|\ > + x86-64/family15h) > + if test "$IBS_OP_COUNT" -gt 134217727 ; then > + echo "ERROR: IBS Op count is too large." > + echo " The maximum IBS-Op count is 134217727." > + exit 1 > + fi > + ;; > + *) > + esac > fi > > return > @@ -1944,10 +1973,32 @@ do_param_setup_ibs() > # NOTE: We default to use dispatched_op if available. > # Some of the older family10 system does not have > # dispatched_ops feature. > - # dispatched op is enabled by bit 1 of the unitmask > + # Dispatched op is enabled by bit 0 of the unitmask > + IBS_OP_DISPATCHED_OP=$(( IBS_OP_UNITMASK & 0x1 )) > if test -f $MOUNT/ibs_op/dispatched_ops ; then > - IBS_OP_DISPATCHED_OP=$(( IBS_OP_UNITMASK & 0x1 )) > set_param ibs_op/dispatched_ops $IBS_OP_DISPATCHED_OP > + else > + if test $IBS_OP_DISPATCHED_OP -eq 1 ; then > + echo "ERROR: IBS Op dispatched ops is not supported." > + exit 1 > + fi > + fi > + > + # NOTE: BTA is enabled by bit 2 of the unitmask > + IBS_OP_BTA=$(( IBS_OP_UNITMASK & 0x4 )) > + if test -f $MOUNT/ibs_op/branch_target; then > + if [ "$IBS_OP_BTA" = "4" ] ; then > + set_param ibs_op/branch_target 1 > + else > + set_param ibs_op/branch_target 0 > + fi > + > + # TODO: Check if write successful > + else > + if test $IBS_OP_BTA -eq 1 ; then > + echo "ERROR: IBS Op Branch Target Address is not supported." > + exit 1 > + fi > fi > else > set_param ibs_op/enable 0 > diff -paurN oprofile/daemon/init.c oprofile-new/daemon/init.c > --- oprofile/daemon/init.c 2010-11-07 15:28:36.182420260 -0600 > +++ oprofile-new/daemon/init.c 2010-11-05 15:56:56.641572462 -0500 > @@ -24,6 +24,7 @@ > #include "opd_anon.h" > #include "opd_perfmon.h" > #include "opd_printf.h" > +#include "opd_extended.h" > > #include "op_version.h" > #include "op_config.h" > @@ -282,6 +283,8 @@ static void opd_sigterm(void) > opd_do_jitdumps(); > opd_print_stats(); > printf("oprofiled stopped %s", op_get_time()); > + opd_ext_deinitialize(); > + > exit(EXIT_FAILURE); > } > > diff -paurN oprofile/daemon/opd_extended.h oprofile-new/daemon/opd_extended.h > --- oprofile/daemon/opd_extended.h 2010-11-07 15:28:36.182420260 -0600 > +++ oprofile-new/daemon/opd_extended.h 2010-11-05 15:54:49.290251600 -0500 > @@ -35,6 +35,8 @@ struct opd_ext_feature { > struct opd_ext_handlers { > // Extended init > int (*ext_init)(char const *); > + // Extended deinit > + int (*ext_deinit)(); > // Extended statistics > int (*ext_print_stats)(); > // Extended sfile handlers > @@ -61,6 +63,13 @@ struct opd_ext_sfile_handlers { > extern int opd_ext_initialize(char const * value); > > /** > + * @param value: commandline input option string > + * > + * Deinitialize > + */ > +extern int opd_ext_deinitialize(); > + > +/** > * Print out extended feature statistics in oprofiled.log file > */ > extern void opd_ext_print_stats(); > diff -paurN oprofile/daemon/opd_extended.c oprofile-new/daemon/opd_extended.c > --- oprofile/daemon/opd_extended.c 2010-11-07 15:28:36.182420260 -0600 > +++ oprofile-new/daemon/opd_extended.c 2010-11-05 15:55:23.670338392 -0500 > @@ -109,6 +109,20 @@ err_out: > } > > > +int opd_ext_deinitialize() > +{ > + int ret = EXIT_FAILURE; > + > + if(opd_ext_feat_index == -1) { > + return 0; > + } > + > + ret = ext_feature_table[opd_ext_feat_index].handlers->ext_deinit(); > + > + return ret; > +} > + > + > void opd_ext_print_stats() > { > if (is_ext_enabled() > diff -paurN oprofile/daemon/opd_ibs.h oprofile-new/daemon/opd_ibs.h > --- oprofile/daemon/opd_ibs.h 2010-11-07 15:28:36.192346762 -0600 > +++ oprofile-new/daemon/opd_ibs.h 2010-11-05 15:53:48.980582133 -0500 > @@ -2,7 +2,7 @@ > * @file daemon/opd_ibs.h > * AMD Family10h Instruction Based Sampling (IBS) handling. > * > - * @remark Copyright 2008 OProfile authors > + * @remark Copyright 2008-2010 OProfile authors > * @remark Read the file COPYING > * > * @author Jason Yeh <jas...@am...> > @@ -70,18 +70,14 @@ struct ibs_op_sample { > /* MSRC001_1037 IBS Op Data 3 Register */ > unsigned int ibs_op_data3_low; > unsigned int ibs_op_data3_high; > + /* MSRC001_1038 IBS DC Linear Address */ > unsigned int ibs_op_ldst_linaddr_low; > unsigned int ibs_op_ldst_linaddr_high; > + /* MSRC001_1039 IBS DC Physical Address */ > unsigned int ibs_op_phys_addr_low; > unsigned int ibs_op_phys_addr_high; > -}; > - > - > -enum IBSL1PAGESIZE { > - L1TLB4K = 0, > - L1TLB2M, > - L1TLB1G, > - L1TLB_INVALID > + /* MSRC001_103B IBS Branch Target Address */ > + unsigned long ibs_op_brtgt_addr; > }; > > > diff -paurN oprofile/daemon/opd_ibs.c oprofile-new/daemon/opd_ibs.c > --- oprofile/daemon/opd_ibs.c 2010-11-07 15:28:36.192346762 -0600 > +++ oprofile-new/daemon/opd_ibs.c 2010-11-06 12:23:46.571357565 -0500 > @@ -2,7 +2,7 @@ > * @file daemon/opd_ibs.c > * AMD Family10h Instruction Based Sampling (IBS) handling. > * > - * @remark Copyright 2007 OProfile authors > + * @remark Copyright 2007-2010 OProfile authors > * @remark Read the file COPYING > * > * @author Jason Yeh <jas...@am...> > @@ -32,22 +32,37 @@ > #include <stdio.h> > #include <errno.h> > #include <string.h> > +#include <limits.h> > + > +#if defined(__i386__) && defined(__PIC__) > +/* %ebx may be the PIC register. */ > + #define __cpuid(level, a, b, c, d) \ > + __asm__ ("xchgl\t%%ebx, %1\n\t" \ > + "cpuid\n\t" \ > + "xchgl\t%%ebx, %1\n\t" \ > + : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ > + : "0" (level)) > +#else > + #define __cpuid(level, a, b, c, d) \ > + __asm__ ("cpuid\n\t" \ > + : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ > + : "0" (level)) > +#endif > > extern op_cpu cpu_type; > extern int no_event_ok; > extern int sfile_equal(struct sfile const * sf, struct sfile const * sf2); > extern void sfile_dup(struct sfile * to, struct sfile * from); > +extern char * session_dir; > > -/* IBS Select Arrays/Counters */ > +/* IBS Select Counters */ > static unsigned int ibs_selected_size; > + > +/* These flags store the IBS-derived events selection. */ > static unsigned int ibs_fetch_selected_flag; > -static unsigned int ibs_fetch_selected_size; > static unsigned int ibs_op_selected_flag; > -static unsigned int ibs_op_selected_size; > static unsigned int ibs_op_ls_selected_flag; > -static unsigned int ibs_op_ls_selected_size; > static unsigned int ibs_op_nb_selected_flag; > -static unsigned int ibs_op_nb_selected_size; > > /* IBS Statistics */ > static unsigned long ibs_fetch_sample_stats; > @@ -64,6 +79,18 @@ struct opd_event ibs_vc[OP_MAX_IBS_COUNT > /* IBS Virtual Counter Index(VCI) Map*/ > unsigned int ibs_vci_map[OP_MAX_IBS_COUNTERS]; > > +/* CPUID information */ > +unsigned int ibs_family; > +unsigned int ibs_model; > +unsigned int ibs_stepping; > + > +/* IBS Extended MSRs */ > +static unsigned long ibs_bta_enabled; > + > +/* IBS log files */ > +FILE * memaccess_log; > +FILE * bta_log; > + > /** > * This function converts IBS fetch event flags and values into > * derived events. If the tagged (sampled) fetched caused a derived > @@ -75,7 +102,7 @@ static void opd_log_ibs_fetch(struct tra > if (!trans_fetch) > return; > > - trans_ibs_fetch(trans, ibs_fetch_selected_flag, ibs_fetch_selected_size); > + trans_ibs_fetch(trans, ibs_fetch_selected_flag); > } > > > @@ -89,9 +116,16 @@ static void opd_log_ibs_op(struct transi > if (!trans_op) > return; > > - trans_ibs_op(trans, ibs_op_selected_flag, ibs_op_selected_size); > - trans_ibs_op_ls(trans, ibs_op_ls_selected_flag, ibs_op_ls_selected_size); > - trans_ibs_op_nb(trans, ibs_op_nb_selected_flag, ibs_op_nb_selected_size); > + trans_ibs_op_mask_reserved(ibs_family, trans); > + > + if (trans_ibs_op_rip_invalid(trans) != 0) > + return; > + > + trans_ibs_op(trans, ibs_op_selected_flag); > + trans_ibs_op_ls(trans, ibs_op_ls_selected_flag); > + trans_ibs_op_nb(trans, ibs_op_nb_selected_flag); > + trans_ibs_op_ls_memaccess(trans); > + trans_ibs_op_bta(trans); > } > > > @@ -150,6 +184,26 @@ out: > } > > > +static void get_ibs_bta_status() > +{ > + FILE * fp = NULL; > + char buf[PATH_MAX]; > + > + /* Default to disable */ > + ibs_bta_enabled = 0; > + > + snprintf(buf, PATH_MAX, "/dev/oprofile/ibs_op/branch_target"); > + fp = fopen(buf, "r"); > + if (!fp) > + return; > + > + while (fgets(buf, PATH_MAX, fp) != NULL) > + ibs_bta_enabled = strtoul(buf, NULL, 10); > + > + fclose(fp); > +} > + > + > void code_ibs_fetch_sample(struct transient * trans) > { > struct ibs_fetch_sample * trans_fetch = NULL; > @@ -169,12 +223,12 @@ void code_ibs_fetch_sample(struct transi > > trans_fetch->rip = pop_buffer_value(trans); > > - trans_fetch->ibs_fetch_lin_addr_low = pop_buffer_value(trans); > - trans_fetch->ibs_fetch_lin_addr_high = pop_buffer_value(trans); > + trans_fetch->ibs_fetch_lin_addr_low = pop_buffer_value(trans); > + trans_fetch->ibs_fetch_lin_addr_high = pop_buffer_value(trans); > > - trans_fetch->ibs_fetch_ctl_low = pop_buffer_value(trans); > - trans_fetch->ibs_fetch_ctl_high = pop_buffer_value(trans); > - trans_fetch->ibs_fetch_phys_addr_low = pop_buffer_value(trans); > + trans_fetch->ibs_fetch_ctl_low = pop_buffer_value(trans); > + trans_fetch->ibs_fetch_ctl_high = pop_buffer_value(trans); > + trans_fetch->ibs_fetch_phys_addr_low = pop_buffer_value(trans); > trans_fetch->ibs_fetch_phys_addr_high = pop_buffer_value(trans); > > verbprintf(vsamples, > @@ -200,6 +254,30 @@ void code_ibs_fetch_sample(struct transi > } > > > +static void get_ibs_op_bta_sample(struct transient * trans, > + struct ibs_op_sample * trans_op) > +{ > + // Check remaining > + if (!enough_remaining(trans, 2)) { > + verbprintf(vext, "not enough remaining\n"); > + trans->remaining = 0; > + ibs_op_incomplete_stats++; > + return; > + } > + > + if (ibs_bta_enabled == 1) { > + trans_op->ibs_op_brtgt_addr = pop_buffer_value(trans); > + > + // Check if branch target address is valid (MSRC001_1035[37] == 1] > + if ((trans_op->ibs_op_data1_high & (0x00000001 << 5)) == 0) { > + trans_op->ibs_op_brtgt_addr = 0; > + } > + } else { > + trans_op->ibs_op_brtgt_addr = 0; > + } > +} > + > + > void code_ibs_op_sample(struct transient * trans) > { > struct ibs_op_sample * trans_op= NULL; > @@ -233,8 +311,10 @@ void code_ibs_op_sample(struct transient > trans_op->ibs_op_phys_addr_low = pop_buffer_value(trans); > trans_op->ibs_op_phys_addr_high = pop_buffer_value(trans); > > + get_ibs_op_bta_sample(trans, trans_op); > + > verbprintf(vsamples, > - "IBS_OP_X CPU:%ld PID:%d RIP:%lx D1HI:%x D1LO:%x D2LO:%x D3HI:%x D3LO:%x L_LO:%x P_LO:%x\n", > + "IBS_OP_X CPU:%ld PID:%d RIP:%lx D1HI:%x D1LO:%x D2LO:%x D3HI:%x D3LO:%x L_LO:%x P_LO:%x\n", > trans->cpu, > trans->tgid, > trans_op->rip, > @@ -339,16 +419,12 @@ static int ibs_parse_and_set_events(char > // Grouping > if (IS_IBS_FETCH(event->val)) { > ibs_fetch_selected_flag |= 1 << IBS_FETCH_OFFSET(event->val); > - ibs_fetch_selected_size++; > } else if (IS_IBS_OP(event->val)) { > ibs_op_selected_flag |= 1 << IBS_OP_OFFSET(event->val); > - ibs_op_selected_size++; > } else if (IS_IBS_OP_LS(event->val)) { > ibs_op_ls_selected_flag |= 1 << IBS_OP_LS_OFFSET(event->val); > - ibs_op_ls_selected_size++; > } else if (IS_IBS_OP_NB(event->val)) { > ibs_op_nb_selected_flag |= 1 << IBS_OP_NB_OFFSET(event->val); > - ibs_op_nb_selected_size++; > } else { > return -1; > } > @@ -402,7 +478,6 @@ static int ibs_parse_and_set_um_fetch(ch > } > > > - > static int ibs_parse_and_set_um_op(char const * str, unsigned long int * ibs_op_um) > { > char * end = NULL; > @@ -418,6 +493,31 @@ static int ibs_parse_and_set_um_op(char > } > > > +static void check_cpuid_family_model_stepping() > +{ > + union { > + unsigned eax; > + struct { > + unsigned stepping : 4; > + unsigned model : 4; > + unsigned family : 4; > + unsigned res : 4; > + unsigned ext_model : 4; > + unsigned ext_family : 8; > + unsigned res2 : 4; > + }; > + } v; > + unsigned ebx, ecx, edx; > + > + /* CPUID Fn0000_0001_EAX Family, Model, Stepping */ > + __cpuid(1, v.eax, ebx, ecx, edx); > + > + ibs_family = v.family + v.ext_family; > + ibs_model = v.model + v.ext_model; > + ibs_stepping = v.stepping; > +} > + > + > static int ibs_init(char const * argv) > { > char * tmp, * ptr, * tok1, * tok2 = NULL; > @@ -532,15 +632,67 @@ static int ibs_init(char const * argv) > > // Allow no event > no_event_ok = 1; > + > + check_cpuid_family_model_stepping(); > + > + get_ibs_bta_status(); > + > + /* Create IBS memory access log */ > + memaccess_log = NULL; > + if (ibs_op_um & 0x2) { > + char filename[1024]; > + strncpy(filename, session_dir, 1023); > + strncat(filename, "/samples/ibs_memaccess.log", 1024); > + if ((memaccess_log = fopen(filename, "w")) == NULL) { > + verbprintf(vext, "Warning: Cannot create file %s\n", filename); > + > + } else { > + fprintf (memaccess_log, "# IBS Memory Access Log\n\n"); > + fprintf (memaccess_log, "# Format: app_cookie,cookie,cpu,tgid,tid,pc,branch-target-address,\n"); > + fprintf (memaccess_log, "# phy-hi:phy-low,lin-hi:lin-low,accese-type,latency\n\n"); > + } > + } > + > + // Create IBS Branch Target Address (BTA) log > + bta_log = NULL; > + if (ibs_bta_enabled) { > + char filename[1024]; > + strncpy(filename, session_dir, 1023); > + strncat(filename, "/samples/ibs_bta.log", 1024); > + if ((bta_log = fopen(filename, "w")) == NULL) { > + verbprintf(vext, "Warning: Cannot create file %s\n", filename); > + } else { > + fprintf (bta_log, "# IBS Memory Access Log\n\n"); > + fprintf (bta_log, "# Format: app_cookie,cookie,cpu,tgid,tid,pc,branch-target-address\n\n"); > + } > + } > + > + return 0; > +} > + > + > +static int ibs_deinit() > +{ > + if (memaccess_log) { > + fclose (memaccess_log); > + memaccess_log = NULL; > + } > + > + if (bta_log) { > + fclose (bta_log); > + bta_log = NULL; > + } > return 0; > } > > > static int ibs_print_stats() > { > - printf("Nr. IBS Fetch samples : %lu (%lu entries)\n", ibs_fetch_sample_stats, (ibs_fetch_sample_stats * 7)); > + printf("Nr. IBS Fetch samples : %lu (%lu entries)\n", > + ibs_fetch_sample_stats, (ibs_fetch_sample_stats * 7)); > printf("Nr. IBS Fetch incompletes : %lu\n", ibs_fetch_incomplete_stats); > - printf("Nr. IBS Op samples : %lu (%lu entries)\n", ibs_op_sample_stats, (ibs_op_sample_stats * 13)); > + printf("Nr. IBS Op samples : %lu (%lu entries)\n", > + ibs_op_sample_stats, (ibs_op_sample_stats * 13)); > printf("Nr. IBS Op incompletes : %lu\n", ibs_op_incomplete_stats); > printf("Nr. IBS derived events : %lu\n", ibs_derived_event_stats); > return 0; > @@ -686,7 +838,8 @@ struct opd_ext_sfile_handlers ibs_sfile_ > > struct opd_ext_handlers ibs_handlers = > { > - .ext_init = &ibs_init, > + .ext_init = &ibs_init, > + .ext_deinit = &ibs_deinit, > .ext_print_stats = &ibs_print_stats, > - .ext_sfile = &ibs_sfile_handlers > + .ext_sfile = &ibs_sfile_handlers > }; > diff -paurN oprofile/daemon/opd_ibs_trans.h oprofile-new/daemon/opd_ibs_trans.h > --- oprofile/daemon/opd_ibs_trans.h 2010-11-07 15:28:36.182420260 -0600 > +++ oprofile-new/daemon/opd_ibs_trans.h 2010-11-06 10:59:05.492727144 -0500 > @@ -24,8 +24,12 @@ struct ibs_translation_table { > }; > > > -extern void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag, unsigned int size); > -extern void trans_ibs_op (struct transient * trans, unsigned int selected_flag, unsigned int size); > -extern void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag, unsigned int size); > -extern void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag, unsigned int size); > +extern void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag); > +extern void trans_ibs_op (struct transient * trans, unsigned int selected_flag); > +extern void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag); > +extern void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag); > +extern int trans_ibs_op_rip_invalid (struct transient * trans); > +extern void trans_ibs_op_mask_reserved (unsigned int family, struct transient * trans); > +extern void trans_ibs_op_ls_memaccess(struct transient * trans); > +extern void trans_ibs_op_bta (struct transient * trans); > #endif // OPD_IBS_TRANS_H > diff -paurN oprofile/daemon/opd_ibs_trans.c oprofile-new/daemon/opd_ibs_trans.c > --- oprofile/daemon/opd_ibs_trans.c 2010-11-07 15:28:36.192346762 -0600 > +++ oprofile-new/daemon/opd_ibs_trans.c 2010-11-06 11:56:59.431773062 -0500 > @@ -1,8 +1,8 @@ > /** > * @file daemon/opd_ibs_trans.c > - * AMD Family10h Instruction Based Sampling (IBS) translation. > + * AMD Instruction Based Sampling (IBS) translation. > * > - * @remark Copyright 2008 OProfile authors > + * @remark Copyright 2008 - 2010 OProfile authors > * @remark Read the file COPYING > * > * @author Jason Yeh <jas...@am...> > @@ -20,205 +20,185 @@ > #include <stdlib.h> > #include <stdio.h> > > -#define MAX_EVENTS_PER_GROUP 32 > +extern FILE * bta_log; > +extern FILE * memaccess_log; > > /* > - * --------------------- OP DERIVED FUNCTION > + * --------------------- FETCH DERIVED FUNCTION > */ > -void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag, unsigned int size) > +void trans_ibs_fetch (struct transient * trans, unsigned int selected_flag) > { > struct ibs_fetch_sample * trans_fetch = ((struct ibs_sample*)(trans->ext))->fetch; > - unsigned int i, j, mask = 1; > > - for (i = IBS_FETCH_BASE, j =0 ; i <= IBS_FETCH_END && j < size ; i++, mask = mask << 1) { > + if ((selected_flag) == 0) > + return; > > - if ((selected_flag & mask) == 0) > - continue; > + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_ALL) { > + /* IBS all fetch samples (kills + attempts) */ > + AGG_IBS_EVENT(DE_IBS_FETCH_ALL); > + } > + > + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_KILLED) { > + /* IBS killed fetches ("case 0") -- All interesting event > + * flags are clear */ > + if (IBS_FETCH_KILLED(trans_fetch)) > + AGG_IBS_EVENT(DE_IBS_FETCH_KILLED); > + } > > - j++; > + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_ATTEMPTED) { > + /* Any non-killed fetch is an attempted fetch */ > + AGG_IBS_EVENT(DE_IBS_FETCH_ATTEMPTED); > + } > > - switch (i) { > - > - case DE_IBS_FETCH_ALL: > - /* IBS all fetch samples (kills + attempts) */ > - AGG_IBS_EVENT(DE_IBS_FETCH_ALL); > - break; > - > - case DE_IBS_FETCH_KILLED: > - /* IBS killed fetches ("case 0") -- All interesting event > - * flags are clear */ > - if (IBS_FETCH_KILLED(trans_fetch)) > - AGG_IBS_EVENT(DE_IBS_FETCH_KILLED); > - break; > - > - case DE_IBS_FETCH_ATTEMPTED: > - /* Any non-killed fetch is an attempted fetch */ > - AGG_IBS_EVENT(DE_IBS_FETCH_ATTEMPTED); > - break; > - > - case DE_IBS_FETCH_COMPLETED: > - if (IBS_FETCH_FETCH_COMPLETION(trans_fetch)) > - /* IBS Fetch Completed */ > - AGG_IBS_EVENT(DE_IBS_FETCH_COMPLETED); > - break; > - > - case DE_IBS_FETCH_ABORTED: > - if (!IBS_FETCH_FETCH_COMPLETION(trans_fetch)) > - /* IBS Fetch Aborted */ > - AGG_IBS_EVENT(DE_IBS_FETCH_ABORTED); > - break; > - > - case DE_IBS_L1_ITLB_HIT: > - /* IBS L1 ITLB hit */ > - if (IBS_FETCH_L1_TLB_HIT(trans_fetch)) > - AGG_IBS_EVENT(DE_IBS_L1_ITLB_HIT); > - break; > - > - case DE_IBS_ITLB_L1M_L2H: > - /* IBS L1 ITLB miss and L2 ITLB hit */ > - if (IBS_FETCH_ITLB_L1M_L2H(trans_fetch)) > - AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2H); > - break; > - > - case DE_IBS_ITLB_L1M_L2M: > - /* IBS L1 & L2 ITLB miss; complete ITLB miss */ > - if (IBS_FETCH_ITLB_L1M_L2M(trans_fetch)) > - AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2M); > - break; > - > - case DE_IBS_IC_MISS: > - /* IBS instruction cache miss */ > - if (IBS_FETCH_INST_CACHE_MISS(trans_fetch)) > - AGG_IBS_EVENT(DE_IBS_IC_MISS); > - break; > - > - case DE_IBS_IC_HIT: > - /* IBS instruction cache hit */ > - if (IBS_FETCH_INST_CACHE_HIT(trans_fetch)) > - AGG_IBS_EVENT(DE_IBS_IC_HIT); > - break; > - > - case DE_IBS_FETCH_4K_PAGE: > - if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch) > - && IBS_FETCH_TLB_PAGE_SIZE(trans_fetch) == L1TLB4K) > - AGG_IBS_EVENT(DE_IBS_FETCH_4K_PAGE); > - break; > - > - case DE_IBS_FETCH_2M_PAGE: > - if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch) > - && IBS_FETCH_TLB_PAGE_SIZE(trans_fetch) == L1TLB2M) > - AGG_IBS_EVENT(DE_IBS_FETCH_2M_PAGE); > - break; > - > - case DE_IBS_FETCH_1G_PAGE: > - if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch) > - && IBS_FETCH_TLB_PAGE_SIZE(trans_fetch) == L1TLB1G) > - AGG_IBS_EVENT(DE_IBS_FETCH_1G_PAGE); > - break; > - > - case DE_IBS_FETCH_XX_PAGE: > - break; > - > - case DE_IBS_FETCH_LATENCY: > - if (IBS_FETCH_FETCH_LATENCY(trans_fetch)) > - AGG_IBS_COUNT(DE_IBS_FETCH_LATENCY, > - IBS_FETCH_FETCH_LATENCY(trans_fetch)); > - break; > - default: > - break; > - } > + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_COMPLETED) { > + if (IBS_FETCH_FETCH_COMPLETION(trans_fetch)) > + /* IBS Fetch Completed */ > + AGG_IBS_EVENT(DE_IBS_FETCH_COMPLETED); > + } > + > + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_ABORTED) { > + if (!IBS_FETCH_FETCH_COMPLETION(trans_fetch)) > + /* IBS Fetch Aborted */ > + AGG_IBS_EVENT(DE_IBS_FETCH_ABORTED); > + } > + > + CHECK_FETCH_SELECTED_FLAG(DE_IBS_L1_ITLB_HIT) { > + /* IBS L1 ITLB hit */ > + if (IBS_FETCH_L1_TLB_HIT(trans_fetch)) > + AGG_IBS_EVENT(DE_IBS_L1_ITLB_HIT); > + } > + > + CHECK_FETCH_SELECTED_FLAG(DE_IBS_ITLB_L1M_L2H) { > + /* IBS L1 ITLB miss and L2 ITLB hit */ > + if (IBS_FETCH_ITLB_L1M_L2H(trans_fetch)) > + AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2H); > + } > + > + CHECK_FETCH_SELECTED_FLAG(DE_IBS_ITLB_L1M_L2M) { > + /* IBS L1 & L2 ITLB miss; complete ITLB miss */ > + if (IBS_FETCH_ITLB_L1M_L2M(trans_fetch)) > + AGG_IBS_EVENT(DE_IBS_ITLB_L1M_L2M); > + } > + > + CHECK_FETCH_SELECTED_FLAG(DE_IBS_IC_MISS) { > + /* IBS instruction cache miss */ > + if (IBS_FETCH_INST_CACHE_MISS(trans_fetch)) > + AGG_IBS_EVENT(DE_IBS_IC_MISS); > + } > + > + CHECK_FETCH_SELECTED_FLAG(DE_IBS_IC_HIT) { > + /* IBS instruction cache hit */ > + if (IBS_FETCH_INST_CACHE_HIT(trans_fetch)) > + AGG_IBS_EVENT(DE_IBS_IC_HIT); > + } > + > + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_4K_PAGE) { > + if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch) > + && IBS_FETCH_TLB_PAGE_SIZE_4K(trans_fetch)) > + AGG_IBS_EVENT(DE_IBS_FETCH_4K_PAGE); > + } > + > + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_2M_PAGE) { > + if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch) > + && IBS_FETCH_TLB_PAGE_SIZE_2M(trans_fetch)) > + AGG_IBS_EVENT(DE_IBS_FETCH_2M_PAGE); > + } > + > + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_1G_PAGE) { > + if (IBS_FETCH_PHYS_ADDR_VALID(trans_fetch) > + && IBS_FETCH_TLB_PAGE_SIZE_1G(trans_fetch)) > + AGG_IBS_EVENT(DE_IBS_FETCH_1G_PAGE); > + } > + > + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_XX_PAGE) { > + } > + > + CHECK_FETCH_SELECTED_FLAG(DE_IBS_FETCH_LATENCY) { > + if (IBS_FETCH_FETCH_LATENCY(trans_fetch)) > + AGG_IBS_COUNT(DE_IBS_FETCH_LATENCY, > + IBS_FETCH_FETCH_LATENCY(trans_fetch)); > } > } > > + > /* > * --------------------- OP DERIVED FUNCTION > */ > -void trans_ibs_op (struct transient * trans, unsigned int selected_flag, unsigned int size) > +void trans_ibs_op (struct transient * trans, unsigned int selected_flag) > { > struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op; > - unsigned int i, j, mask = 1; > > - for (i = IBS_OP_BASE, j =0 ; i <= IBS_OP_END && j < size ; i++, mask = mask << 1) { > + if ((selected_flag) == 0) > + return; > > - if ((selected_flag & mask) == 0) > - continue; > + CHECK_OP_SELECTED_FLAG(DE_IBS_OP_ALL) { > + /* All IBS op samples */ > + AGG_IBS_EVENT(DE_IBS_OP_ALL); > + } > > - j++; > + CHECK_OP_SELECTED_FLAG(DE_IBS_OP_TAG_TO_RETIRE) { > + /* Tally retire cycle counts for all sampled macro-ops > + * IBS tag to retire cycles */ > + if (IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op)) > + AGG_IBS_COUNT(DE_IBS_OP_TAG_TO_RETIRE, > + IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op)); > + } > > - switch (i) { > - > - case DE_IBS_OP_ALL: > - /* All IBS op samples */ > - AGG_IBS_EVENT(DE_IBS_OP_ALL); > - break; > - > - case DE_IBS_OP_TAG_TO_RETIRE: > - /* Tally retire cycle counts for all sampled macro-ops > - * IBS tag to retire cycles */ > - if (IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op)) > - AGG_IBS_COUNT(DE_IBS_OP_TAG_TO_RETIRE, > - IBS_OP_TAG_TO_RETIRE_CYCLES(trans_op)); > - break; > - > - case DE_IBS_OP_COMP_TO_RETIRE: > - /* IBS completion to retire cycles */ > - if (IBS_OP_COM_TO_RETIRE_CYCLES(trans_op)) > - AGG_IBS_COUNT(DE_IBS_OP_COMP_TO_RETIRE, > - IBS_OP_COM_TO_RETIRE_CYCLES(trans_op)); > - break; > - > - case DE_IBS_BRANCH_RETIRED: > - if (IBS_OP_OP_BRANCH_RETIRED(trans_op)) > - /* IBS Branch retired op */ > - AGG_IBS_EVENT(DE_IBS_BRANCH_RETIRED) ; > - break; > - > - case DE_IBS_BRANCH_MISP: > - if (IBS_OP_OP_BRANCH_RETIRED(trans_op) > - /* Test branch-specific event flags */ > - /* IBS mispredicted Branch op */ > - && IBS_OP_OP_BRANCH_MISPREDICT(trans_op)) > - AGG_IBS_EVENT(DE_IBS_BRANCH_MISP) ; > - break; > - > - case DE_IBS_BRANCH_TAKEN: > - if (IBS_OP_OP_BRANCH_RETIRED(trans_op) > - /* IBS taken Branch op */ > - && IBS_OP_OP_BRANCH_TAKEN(trans_op)) > - AGG_IBS_EVENT(DE_IBS_BRANCH_TAKEN); > - break; > - > - case DE_IBS_BRANCH_MISP_TAKEN: > - if (IBS_OP_OP_BRANCH_RETIRED(trans_op) > - /* IBS mispredicted taken branch op */ > - && IBS_OP_OP_BRANCH_TAKEN(trans_op) > - && IBS_OP_OP_BRANCH_MISPREDICT(trans_op)) > - AGG_IBS_EVENT(DE_IBS_BRANCH_MISP_TAKEN); > - break; > - > - case DE_IBS_RETURN: > - if (IBS_OP_OP_BRANCH_RETIRED(trans_op) > - /* IBS return op */ > - && IBS_OP_OP_RETURN(trans_op)) > - AGG_IBS_EVENT(DE_IBS_RETURN); > - break; > - > - case DE_IBS_RETURN_MISP: > - if (IBS_OP_OP_BRANCH_RETIRED(trans_op) > - /* IBS mispredicted return op */ > - && IBS_OP_OP_RETURN(trans_op) > - && IBS_OP_OP_BRANCH_MISPREDICT(trans_op)) > - AGG_IBS_EVENT(DE_IBS_RETURN_MISP); > - break; > - > - case DE_IBS_RESYNC: > - /* Test for a resync macro-op */ > - if (IBS_OP_OP_BRANCH_RESYNC(trans_op)) > - AGG_IBS_EVENT(DE_IBS_RESYNC); > - break; > - default: > - break; > - } > + CHECK_OP_SELECTED_FLAG(DE_IBS_OP_COMP_TO_RETIRE) { > + /* IBS completion to retire cycles */ > + if (IBS_OP_COM_TO_RETIRE_CYCLES(trans_op)) > + AGG_IBS_COUNT(DE_IBS_OP_COMP_TO_RETIRE, > + IBS_OP_COM_TO_RETIRE_CYCLES(trans_op)); > + } > + > + CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_RETIRED) { > + if (IBS_OP_BRANCH_RETIRED(trans_op)) > + /* IBS Branch retired op */ > + AGG_IBS_EVENT(DE_IBS_BRANCH_RETIRED) ; > + } > + > + CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_MISP) { > + if (IBS_OP_BRANCH_RETIRED(trans_op) > + /* Test branch-specific event flags */ > + /* IBS mispredicted Branch op */ > + && IBS_OP_BRANCH_MISPREDICT(trans_op)) > + AGG_IBS_EVENT(DE_IBS_BRANCH_MISP) ; > + } > + > + CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_TAKEN) { > + if (IBS_OP_BRANCH_RETIRED(trans_op) > + /* IBS taken Branch op */ > + && IBS_OP_BRANCH_TAKEN(trans_op)) > + AGG_IBS_EVENT(DE_IBS_BRANCH_TAKEN); > + } > + > + CHECK_OP_SELECTED_FLAG(DE_IBS_BRANCH_MISP_TAKEN) { > + if (IBS_OP_BRANCH_RETIRED(trans_op) > + /* IBS mispredicted taken branch op */ > + && IBS_OP_BRANCH_TAKEN(trans_op) > + && IBS_OP_BRANCH_MISPREDICT(trans_op)) > + AGG_IBS_EVENT(DE_IBS_BRANCH_MISP_TAKEN); > + } > + > + CHECK_OP_SELECTED_FLAG(DE_IBS_RETURN) { > + if (IBS_OP_BRANCH_RETIRED(trans_op) > + /* IBS return op */ > + && IBS_OP_RETURN(trans_op)) > + AGG_IBS_EVENT(DE_IBS_RETURN); > + } > + > + CHECK_OP_SELECTED_FLAG(DE_IBS_RETURN_MISP) { > + if (IBS_OP_BRANCH_RETIRED(trans_op) > + /* IBS mispredicted return op */ > + && IBS_OP_RETURN(trans_op) > + && IBS_OP_BRANCH_MISPREDICT(trans_op)) > + AGG_IBS_EVENT(DE_IBS_RETURN_MISP); > + } > + > + CHECK_OP_SELECTED_FLAG(DE_IBS_RESYNC) { > + /* Test for a resync macro-op */ > + if (IBS_OP_BRANCH_RESYNC(trans_op)) > + AGG_IBS_EVENT(DE_IBS_RESYNC); > } > } > > @@ -226,213 +206,201 @@ void trans_ibs_op (struct transient * tr > /* > * --------------------- OP LS DERIVED FUNCTION > */ > -void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag, unsigned int size) > +void trans_ibs_op_ls (struct transient * trans, unsigned int selected_flag) > { > struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op; > - unsigned int i, j, mask = 1; > > /* Preliminary check */ > if (!IBS_OP_IBS_LD_OP(trans_op) && !IBS_OP_IBS_ST_OP(trans_op)) > return; > > > - for (i = IBS_OP_LS_BASE, j =0 ; i <= IBS_OP_LS_END && j < size ; i++, mask = mask << 1) { > + if ((selected_flag) == 0) > + return; > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_ALL_OP) { > + /* Count the number of LS op samples */ > + AGG_IBS_EVENT(DE_IBS_LS_ALL_OP) ; > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_LOAD_OP) { > + if (IBS_OP_IBS_LD_OP(trans_op)) > + /* TALLy an IBS load derived event */ > + AGG_IBS_EVENT(DE_IBS_LS_LOAD_OP) ; > + } > > - if ((selected_flag & mask) == 0) > - continue; > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_STORE_OP) { > + if (IBS_OP_IBS_ST_OP(trans_op)) > + /* Count and handle store operations */ > + AGG_IBS_EVENT(DE_IBS_LS_STORE_OP); > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DTLB_L1H) { > + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) > + && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)) > + /* L1 DTLB hit -- This is the most frequent case */ > + AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1H); > + } > > - j++; > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DTLB_L1M_L2H) { > + /* l2_translation_size = 1 */ > + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) > + && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) > + && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)) > + /* L1 DTLB miss, L2 DTLB hit */ > + AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2H); > + } > > - switch (i) { > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DTLB_L1M_L2M) { > + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) > + && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) > + && IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)) > + /* L1 DTLB miss, L2 DTLB miss */ > + AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2M); > + } > > - case DE_IBS_LS_ALL_OP: > - /* Count the number of LS op samples */ > - AGG_IBS_EVENT(DE_IBS_LS_ALL_OP) ; > - break; > - > - case DE_IBS_LS_LOAD_OP: > - if (IBS_OP_IBS_LD_OP(trans_op)) > - /* TALLy an IBS load derived event */ > - AGG_IBS_EVENT(DE_IBS_LS_LOAD_OP) ; > - break; > - > - case DE_IBS_LS_STORE_OP: > - if (IBS_OP_IBS_ST_OP(trans_op)) > - /* Count and handle store operations */ > - AGG_IBS_EVENT(DE_IBS_LS_STORE_OP); > - break; > - > - case DE_IBS_LS_DTLB_L1H: > - if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) > - && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op)) > - /* L1 DTLB hit -- This is the most frequent case */ > - AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1H); > - break; > - > - case DE_IBS_LS_DTLB_L1M_L2H: > - /* l2_translation_size = 1 */ > - if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) > - && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) > - && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)) > - /* L1 DTLB miss, L2 DTLB hit */ > - AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2H); > - break; > - > - case DE_IBS_LS_DTLB_L1M_L2M: > - if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) > - && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) > - && IBS_OP_IBS_DC_L2_TLB_MISS(trans_op)) > - /* L1 DTLB miss, L2 DTLB miss */ > - AGG_IBS_EVENT(DE_IBS_LS_DTLB_L1M_L2M); > - break; > - > - case DE_IBS_LS_DC_MISS: > - if (IBS_OP_IBS_DC_MISS(trans_op)) > - AGG_IBS_EVENT(DE_IBS_LS_DC_MISS); > - break; > - > - case DE_IBS_LS_DC_HIT: > - if (!IBS_OP_IBS_DC_MISS(trans_op)) > - AGG_IBS_EVENT(DE_IBS_LS_DC_HIT); > - break; > - > - case DE_IBS_LS_MISALIGNED: > - if (IBS_OP_IBS_DC_MISS_ACC(trans_op)) > - AGG_IBS_EVENT(DE_IBS_LS_MISALIGNED); > - break; > - > - case DE_IBS_LS_BNK_CONF_LOAD: > - if (IBS_OP_IBS_DC_LD_BNK_CON(trans_op)) > - AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_LOAD); > - break; > - > - case DE_IBS_LS_BNK_CONF_STORE: > - if (IBS_OP_IBS_DC_ST_BNK_CON(trans_op)) > - AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_STORE); > - break; > - > - case DE_IBS_LS_STL_FORWARDED: > - if (IBS_OP_IBS_LD_OP(trans_op) > - /* Data forwarding info are valid only for load ops */ > - && IBS_OP_IBS_DC_ST_TO_LD_FWD(trans_op)) > - AGG_IBS_EVENT(DE_IBS_LS_STL_FORWARDED) ; > - break; > - > - case DE_IBS_LS_STL_CANCELLED: > - if (IBS_OP_IBS_LD_OP(trans_op)) > - if (IBS_OP_IBS_DC_ST_TO_LD_CAN(trans_op)) > - AGG_IBS_EVENT(DE_IBS_LS_STL_CANCELLED) ; > - break; > - > - case DE_IBS_LS_UC_MEM_ACCESS: > - if (IBS_OP_IBS_DC_UC_MEM_ACC(trans_op)) > - AGG_IBS_EVENT(DE_IBS_LS_UC_MEM_ACCESS); > - break; > - > - case DE_IBS_LS_WC_MEM_ACCESS: > - if (IBS_OP_IBS_DC_WC_MEM_ACC(trans_op)) > - AGG_IBS_EVENT(DE_IBS_LS_WC_MEM_ACCESS); > - break; > - > - case DE_IBS_LS_LOCKED_OP: > - if (IBS_OP_IBS_LOCKED_OP(trans_op)) > - AGG_IBS_EVENT(DE_IBS_LS_LOCKED_OP); > - break; > - > - case DE_IBS_LS_MAB_HIT: > - if (IBS_OP_IBS_DC_MAB_HIT(trans_op)) > - AGG_IBS_EVENT(DE_IBS_LS_MAB_HIT); > - break; > - > - case DE_IBS_LS_L1_DTLB_4K: > - /* l1_translation */ > - if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) > - && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) > - > - && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op) > - && !IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op)) > - /* This is the most common case, unfortunately */ > - AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_4K) ; > - break; > - > - case DE_IBS_LS_L1_DTLB_2M: > - /* l1_translation */ > - if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) > - && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) > - > - && IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op)) > - /* 2M L1 DTLB page translation */ > - AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_2M); > - break; > - > - case DE_IBS_LS_L1_DTLB_1G: > - /* l1_translation */ > - if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) > - && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) > - > - && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op) > - && IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op)) > - /* 1G L1 DTLB page translation */ > - AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_1G); > - break; > - > - case DE_IBS_LS_L1_DTLB_RES: > - break; > - > - case DE_IBS_LS_L2_DTLB_4K: > - /* l2_translation_size = 1 */ > - if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) > - && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) > - && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op) > - > - /* L2 DTLB page translation */ > - && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op) > - && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op)) > - /* 4K L2 DTLB page translation */ > - AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_4K); > - break; > - > - case DE_IBS_LS_L2_DTLB_2M: > - /* l2_translation_size = 1 */ > - if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) > - && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) > - && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op) > - > - /* L2 DTLB page translation */ > - && IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op) > - && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op)) > - /* 2M L2 DTLB page translation */ > - AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_2M); > - break; > - > - case DE_IBS_LS_L2_DTLB_1G: > - /* l2_translation_size = 1 */ > - if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) > - && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) > - && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op) > - > - /* L2 DTLB page translation */ > - && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op) > - && IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op)) > - /* 2M L2 DTLB page translation */ > - AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_1G); > - break; > - > - case DE_IBS_LS_L2_DTLB_RES2: > - break; > - > - case DE_IBS_LS_DC_LOAD_LAT: > - if (IBS_OP_IBS_LD_OP(trans_op) > - /* If the load missed in DC, tally the DC load miss latency */ > - && IBS_OP_IBS_DC_MISS(trans_op)) > - /* DC load miss latency is only reliable for load ops */ > - AGG_IBS_COUNT(DE_IBS_LS_DC_LOAD_LAT, > - IBS_OP_DC_MISS_LATENCY(trans_op)) ; > - break; > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DC_MISS) { > + if (IBS_OP_IBS_DC_MISS(trans_op)) > + AGG_IBS_EVENT(DE_IBS_LS_DC_MISS); > + } > > - default: > - break; > - } > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DC_HIT) { > + if (!IBS_OP_IBS_DC_MISS(trans_op)) > + AGG_IBS_EVENT(DE_IBS_LS_DC_HIT); > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_MISALIGNED) { > + if (IBS_OP_IBS_DC_MISS_ACC(trans_op)) > + AGG_IBS_EVENT(DE_IBS_LS_MISALIGNED); > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_BNK_CONF_LOAD) { > + if (IBS_OP_IBS_DC_LD_BNK_CON(trans_op)) > + AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_LOAD); > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_BNK_CONF_STORE) { > + if (IBS_OP_IBS_DC_ST_BNK_CON(trans_op)) > + AGG_IBS_EVENT(DE_IBS_LS_BNK_CONF_STORE); > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_STL_FORWARDED) { > + if (IBS_OP_IBS_LD_OP(trans_op) > + /* Data forwarding info are valid only for load ops */ > + && IBS_OP_IBS_DC_ST_TO_LD_FWD(trans_op)) > + AGG_IBS_EVENT(DE_IBS_LS_STL_FORWARDED) ; > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_STL_CANCELLED) { > + if (IBS_OP_IBS_LD_OP(trans_op)) > + if (IBS_OP_IBS_DC_ST_TO_LD_CAN(trans_op)) > + AGG_IBS_EVENT(DE_IBS_LS_STL_CANCELLED) ; > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_UC_MEM_ACCESS) { > + if (IBS_OP_IBS_DC_UC_MEM_ACC(trans_op)) > + AGG_IBS_EVENT(DE_IBS_LS_UC_MEM_ACCESS); > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_WC_MEM_ACCESS) { > + if (IBS_OP_IBS_DC_WC_MEM_ACC(trans_op)) > + AGG_IBS_EVENT(DE_IBS_LS_WC_MEM_ACCESS); > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_LOCKED_OP) { > + if (IBS_OP_IBS_LOCKED_OP(trans_op)) > + AGG_IBS_EVENT(DE_IBS_LS_LOCKED_OP); > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_MAB_HIT) { > + if (IBS_OP_IBS_DC_MAB_HIT(trans_op)) > + AGG_IBS_EVENT(DE_IBS_LS_MAB_HIT); > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_4K) { > + /* l1_translation */ > + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) > + && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) > + > + && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op) > + && !IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op)) > + /* This is the most common case, unfortunately */ > + AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_4K) ; > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_2M) { > + /* l1_translation */ > + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) > + && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) > + > + && IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op)) > + /* 2M L1 DTLB page translation */ > + AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_2M); > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_1G) { > + /* l1_translation */ > + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) > + && !IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) > + > + && !IBS_OP_IBS_DC_L1_TLB_HIT_2MB(trans_op) > + && IBS_OP_IBS_DC_L1_TLB_HIT_1GB(trans_op)) > + /* 1G L1 DTLB page translation */ > + AGG_IBS_EVENT(DE_IBS_LS_L1_DTLB_1G); > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L1_DTLB_RES) { > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_4K) { > + /* l2_translation_size = 1 */ > + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) > + && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) > + && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op) > + > + /* L2 DTLB page translation */ > + && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op) > + && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op)) > + /* 4K L2 DTLB page translation */ > + AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_4K); > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_2M) { > + /* l2_translation_size = 1 */ > + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) > + && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) > + && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op) > + > + /* L2 DTLB page translation */ > + && IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op) > + && !IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op)) > + /* 2M L2 DTLB page translation */ > + AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_2M); > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_1G) { > + /* l2_translation_size = 1 */ > + if (IBS_OP_IBS_DC_LIN_ADDR_VALID(trans_op) > + && IBS_OP_IBS_DC_L1_TLB_MISS(trans_op) > + && !IBS_OP_IBS_DC_L2_TLB_MISS(trans_op) > + > + /* L2 DTLB page translation */ > + && !IBS_OP_IBS_DC_L2_TLB_HIT_2MB(trans_op) > + && IBS_OP_IBS_DC_L2_TLB_HIT_1GB(trans_op)) > + /* 2M L2 DTLB page translation */ > + AGG_IBS_EVENT(DE_IBS_LS_L2_DTLB_1G); > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_L2_DTLB_RES2) { > + } > + > + CHECK_OP_LS_SELECTED_FLAG(DE_IBS_LS_DC_LOAD_LAT) { > + if (IBS_OP_IBS_LD_OP(trans_op) > + /* If the load missed in DC, tally the DC load miss latency */ > + && IBS_OP_IBS_DC_MISS(trans_op)) > + /* DC load miss latency is only reliable for load ops */ > + AGG_IBS_COUNT(DE_IBS_LS_DC_LOAD_LAT, > + IBS_OP_DC_MISS_LATENCY(trans_op)) ; > } > } > > @@ -443,12 +411,14 @@ void trans_ibs_op_ls (struct transient * > * that miss in L1 and L2 cache. NB data arrives too late > * to be reliable for store operations > */ > -void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag, unsigned int size) > +void trans_ibs_op_nb (struct transient * trans, unsigned int selected_flag) > { > struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op; > - unsigned int i, j, mask = 1; > > /* Preliminary check */ > + if ((selected_flag) == 0) > + return; > + > if (!IBS_OP_IBS_LD_OP(trans_op)) > return; > > @@ -458,97 +428,219 @@ void trans_ibs_op_nb (struct transient * > if (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0) > return; > > - for (i = IBS_OP_NB_BASE, j =0 ; i <= IBS_OP_NB_END && j < size ; i++, mask = mask << 1) { > + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL) { > + if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) > + /* Request was serviced by local processor */ > + AGG_IBS_EVENT(DE_IBS_NB_LOCAL) ; > + } > > - if ((selected_flag & mask) == 0) > - continue; > + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE) { > + if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) > + /* Request was serviced by remote processor */ > + AGG_IBS_EVENT(DE_IBS_NB_REMOTE) ; > + } > > - j++; > + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_L3) { > + if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) > + && IBS_OP_NB_IBS_REQ_SRC_01(trans_op)) > + AGG_IBS_EVENT(DE_IBS_NB_LOCAL_L3); > + } > + > + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_CACHE) { > + if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) > + && IBS_OP_NB_IBS_REQ_SRC_02(trans_op)) > + AGG_IBS_EVENT(DE_IBS_NB_LOCAL_CACHE); > + } > + > + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_CACHE) { > + if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) > + && IBS_OP_NB_IBS_REQ_SRC_02(trans_op)) > + AGG_IBS_EVENT(DE_IBS_NB_REMOTE_CACHE) ; > + } > > - switch (i) { > + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_DRAM) { > + if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) > + && IBS_OP_NB_IBS_REQ_SRC_03(trans_op)) > + AGG_IBS_EVENT(DE_IBS_NB_LOCAL_DRAM); > + } > > - case DE_IBS_NB_LOCAL: > - if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) > - /* Request was serviced by local processor */ > - AGG_IBS_EVENT(DE_IBS_NB_LOCAL) ; > - break; > - > - case DE_IBS_NB_REMOTE: > - if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) > - /* Request was serviced by remote processor */ > - AGG_IBS_EVENT(DE_IBS_NB_REMOTE) ; > - break; > - > - case DE_IBS_NB_LOCAL_L3: > - if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) > - && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x1)) > - AGG_IBS_EVENT(DE_IBS_NB_LOCAL_L3); > - break; > - > - case DE_IBS_NB_LOCAL_CACHE: > - if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) > - && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2)) > - AGG_IBS_EVENT(DE_IBS_NB_LOCAL_CACHE); > - break; > - > - case DE_IBS_NB_REMOTE_CACHE: > - if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) > - && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2)) > - AGG_IBS_EVENT(DE_IBS_NB_REMOTE_CACHE) ; > - break; > - > - case DE_IBS_NB_LOCAL_DRAM: > - if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) > - && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x3)) > - AGG_IBS_EVENT(DE_IBS_NB_LOCAL_DRAM); > - break; > - > - case DE_IBS_NB_REMOTE_DRAM: > - if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) > - && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x3)) > - AGG_IBS_EVENT(DE_IBS_NB_REMOTE_DRAM) ; > - break; > - > - case DE_IBS_NB_LOCAL_OTHER: > - if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) > - && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x7)) > - AGG_IBS_EVENT(DE_IBS_NB_LOCAL_OTHER); > - break; > - > - case DE_IBS_NB_REMOTE_OTHER: > - if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) > - && (IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x7)) > - AGG_IBS_EVENT(DE_IBS_NB_REMOTE_OTHER) ; > - break; > - > - case DE_IBS_NB_CACHE_STATE_M: > - if ((IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2) > - && !IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op)) > - AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_M) ; > - break; > - > - case DE_IBS_NB_CACHE_STATE_O: > - if ((IBS_OP_NB_IBS_REQ_SRC(trans_op) == 0x2) > - && IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op)) > - AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_O) ; > - break; > - > - case DE_IBS_NB_LOCAL_LATENCY: > - if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) > - /* Request was serviced by local processor */ > - AGG_IBS_COUNT(DE_IBS_NB_LOCAL_LATENCY, > - IBS_OP_DC_MISS_LATENCY(trans_op)); > - break; > - > - case DE_IBS_NB_REMOTE_LATENCY: > - if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) > - /* Request was serviced by remote processor */ > - AGG_IBS_COUNT(DE_IBS_NB_REMOTE_LATENCY, > - IBS_OP_DC_MISS_LATENCY(trans_op)); > - break; > + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_DRAM) { > + if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) > + && IBS_OP_NB_IBS_REQ_SRC_03(trans_op)) > + AGG_IBS_EVENT(DE_IBS_NB_REMOTE_DRAM) ; > + } > > - default: > - break; > + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_OTHER) { > + if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) > + && IBS_OP_NB_IBS_REQ_SRC_07(trans_op)) > + AGG_IBS_EVENT(DE_IBS_NB_LOCAL_OTHER); > + } > + > + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_OTHER) { > + if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op) > + && IBS_OP_NB_IBS_REQ_SRC_07(trans_op)) > + AGG_IBS_EVENT(DE_IBS_NB_REMOTE_OTHER) ; > + } > + > + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_CACHE_STATE_M) { > + if (IBS_OP_NB_IBS_REQ_SRC_02(trans_op) > + && !IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op)) > + AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_M) ; > + } > + > + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_CACHE_STATE_O) { > + if (IBS_OP_NB_IBS_REQ_SRC_02(trans_op) > + && IBS_OP_NB_IBS_CACHE_HIT_ST(trans_op)) > + AGG_IBS_EVENT(DE_IBS_NB_CACHE_STATE_O) ; > + } > + > + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_LOCAL_LATENCY) { > + if (!IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) > + /* Request was serviced by local processor */ > + AGG_IBS_COUNT(DE_IBS_NB_LOCAL_LATENCY, > + IBS_OP_DC_MISS_LATENCY(trans_op)); > + } > + > + CHECK_OP_NB_SELECTED_FLAG(DE_IBS_NB_REMOTE_LATENCY) { > + if (IBS_OP_NB_IBS_REQ_DST_PROC(trans_op)) > + /* Request was serviced by remote processor */ > + AGG_IBS_COUNT(DE_IBS_NB_REMOTE_LATENCY, > + IBS_OP_DC_MISS_LATENCY(trans_op)); > + } > +} > + > + > +int trans_ibs_op_rip_invalid (struct transient * trans) > +{ > + struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op; > + > + if (IBS_OP_RIP_INVALID(trans_op)) > + return 1; > + > + return 0; > +} > + > + > +void trans_ibs_op_mask_reserved (unsigned int family, struct transient * trans) > +{ > + struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op; > + > + switch (family) { > + case 0x10: > + /* Reserved IbsRipInvalid (MSRC001_1035[38])*/ > + trans_op->ibs_op_data1_high &= ~MASK_RIP_INVALID; > + break; > + case 0x12: > + /* Reserved NbIbsReqDstProc (MSRCC001_1036[4]) */ > + trans_op->ibs_op_data2_low &= ~NB_MASK_REQ_DST_PROC; > + /* Reserved NbIbsReqCacheHitSt (MSRCC001_1036[5]) */ > + trans_op->ibs_op_data2_low &= ~NB_MASK_L3_STATE; > + break; > + case 0x14: > + /* Reserved NbIbsReqDstProc (MSRCC001_1036[4]) */ > + trans_op->ibs_op_data2_low &= ~NB_MASK_REQ_DST_PROC; > + /* Reserved NbIbsReqCacheHitSt (MSRCC001_1036[5]) */ > + trans_op->ibs_op_data2_low &= ~NB_MASK_L3_STATE; > + /* Reserved IbsDcL1tlbHit1G (MSRC001_1037[5]) */ > + trans_op->ibs_op_data3_low &= ~DC_MASK_L1_HIT_1G; > + /* Reserved IbsDcLdBnkCon (MSRC001_1037[9]) */ > + trans_op->ibs_op_data3_low &= ~DC_MASK_LD_BANK_CONFLICT; > + /* Reserved IbsDcStBnkCon (MSRC001_1037[10]) */ > + trans_op->ibs_op_data3_low &= ~DC_MASK_ST_BANK_CONFLICT; > + /* Reserved IbsDcStToLdCan (MSRC001_1037[12]) */ > + trans_op->ibs_op_data3_low &= ~DC_MASK_ST_TO_LD_CANCEL; > + /* Reserved IbsDcL2tlbHit1G (MSRC001_1037[19]) */ > + trans_op->ibs_op_data3_low &= ~DC_MASK_L2_HIT_1G; > + > + break; > + case 0x15: > + default: > + break; > + > + } > +} > + > + > +void trans_ibs_op_bta(struct transient * trans) > +{ > + static cookie_t old_cookie = NO_COOKIE; > + static cookie_t old_app_cookie = NO_COOKIE; > + static char const * mod = NULL; > + static char const * app = NULL; > + const char vmlinux[10] = "vmlinux"; > + struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op; > + > + if (!bta_log) > + return; > + > + if (!trans_op->ibs_op_brtgt_addr) > + return; > + > + if( old_app_cookie == INVALID_COOKIE > + || old_app_cookie == NO_COOKIE > + || old_app_cookie != trans->app_cookie) { > + app = find_cookie(trans->app_cookie); > + old_app_cookie = trans->cookie; > + } > + > + if (trans->in_kernel == 1) { > + mod = vmlinux; > + old_cookie = NO_COOKIE; > + } else { > + if( old_cookie == INVALID_COOKIE > + || old_cookie == NO_COOKIE > + || old_cookie != trans->cookie) { > + mod = find_cookie(trans->cookie); > + old_cookie = trans->cookie; > } > } > + > + fprintf(bta_log, "0x%016llx,0x%016llx,%02lu %08u,%08u,0x%08x,0x%08lx\n", > + trans->app_cookie, trans->cookie, trans->cpu, trans->tgid, trans->tid, (unsigned int)trans->pc, > + trans_op->ibs_op_brtgt_addr); > +} > + > + > +void trans_ibs_op_ls_memaccess(struct transient * trans) > +{ > + static cookie_t old_cookie = NO_COOKIE; > + static cookie_t old_app_cookie = NO_COOKIE; > + static char const * mod = NULL; > + static char const * app = NULL; > + const char vmlinux[10] = "vmlinux"; > + struct ibs_op_sample * trans_op = ((struct ibs_sample*)(trans->ext))->op; > + > + if (!memaccess_log) > + return; > + > + if( old_app_cookie == INVALID_COOKIE > + || old_app_cookie == NO_COOKIE > + || old_app_cookie != trans->app_cookie) { > + app = find_cookie(trans->app_cookie); > + old_app_cookie = trans->cookie; > + } > + > + if (trans->in_kernel == 1) { > + mod = vmlinux; > + old_cookie = NO_COOKIE; > + } else { > + if( old_cookie == INVALID_COOKIE > + || old_cookie == NO_COOKIE > + || old_cookie != trans->cookie) { > + mod = find_cookie(trans->cookie); > + old_cookie = trans->cookie; > + } > + } > + > + fprintf(memaccess_log, "0x%016llx,0x%016llx,%02lu,%08u,%08u,0x%08x,0x%08u:%08x,0x%08x:%08x,%s,%08u\n", > + trans->app_cookie, > +trans->cookie, > +trans->cpu, > +trans->tgid, > +trans->tid, > +(unsigned int)trans->pc, > + trans_op->ibs_op_phys_addr_high, trans_op->ibs_op_phys_addr_low, > + trans_op->ibs_op_ldst_linaddr_high, trans_op->ibs_op_ldst_linaddr_low, > + (IBS_OP_IBS_LD_OP(trans_op))? "LD": "ST", > + (unsigned int) IBS_OP_DC_MISS_LATENCY(trans_op)); > } > diff -paurN oprofile/events/x86-64/family10/unit_masks oprofile-new/events/x86-64/family10/unit_masks > --- oprofile/events/x86-64/family10/unit_masks 2010-11-07 15:28:36.322348154 -0600 > +++ oprofile-new/events/x86-64/family10/unit_masks 2010-11-06 12:02:21.021378226 -0500 > @@ -363,6 +363,7 @@ name:retired_x87_fp type:bitmask default > name:ibs_op type:bitmask default:0x01 > 0x00 Using IBS OP cycle count mode > 0x01 Using IBS OP dispatch count mode > + 0x02 Enable IBS OP Memory Access Log > name:non_cancelled_l3_read_requests type:bitmask default:0xf7 > 0x01 RbBlk > 0x02 RbBlkS > diff -paurN oprofile/daemon/opd_ibs_macro.h oprofile-new/daemon/opd_ibs_macro.h > --- oprofile/daemon/opd_ibs_macro.h 2010-11-07 15:28:36.192346762 -0600 > +++ oprofile-new/daemon/opd_ibs_macro.h 2010-11-05 10:44:40.630276281 -0500 > @@ -1,8 +1,8 @@ > /** > * @file daemon/opd_ibs_macro.h > - * AMD Family10h Instruction Based Sampling (IBS) related macro. > + * AMD Instruction Based Sampling (IBS) related macro. > * > - * @remark Copyright 2008 OProfile authors > + * @remark Copyright 2008-2010 OProfile authors > * @remark Read the file COPYING > * > * @author Jason Yeh <jas...@am...> > @@ -16,7 +16,8 @@ > > /** > * The following defines are bit masks that are used to select > - * IBS fetch event flags and values at the MSR level. > + * IBS fetch event flags and values at the > + * MSRC001_1030 IBS Fetch Control Register (IbsFetchCtl) > */ > #define FETCH_MASK_LATENCY 0x0000ffff > #define FETCH_MASK_COMPLETE 0x00040000 > @@ -34,7 +35,10 @@ > * The following defines are bit masks that are used to select > * IBS op event flags and values at the MSR level. > */ > + > +/* MSRC001_1035 IBS Op Data Register (IbsOpData) */ > #define BR_MASK_RETIRE 0x0000ffff > +#define MASK_RIP_INVALID 0x00000040 > #define BR_MASK_BRN_RET 0x00000020 > #define BR_MASK_BRN_MISP 0x00000010 > #define BR_MASK_BRN_TAKEN 0x00000008 > @@ -42,17 +46,19 @@ > #define BR_MASK_MISP_RETURN 0x00000002 > #define BR_MASK_BRN_RESYNC 0x00000001 > > +/* MSRC001_1036 IBS Op Data Register (IbsOpData2) */ > #define NB_MASK_L3_STATE 0x00000020 > #define NB_MASK_REQ_DST_PROC 0x00000010 > #define NB_MASK_REQ_DATA_SRC 0x00000007 > > +/* MSRC001_1037 IBS Op Data Register (IbsOpData3) */ > #define DC_MASK_L2_HIT_1G 0x00080000 > #define DC_MASK_PHY_ADDR_VALID 0x00040000 > #define DC_MASK_LIN_ADDR_VALID 0x00020000 > #define DC_MASK_MAB_HIT 0x00010000 > #define DC_MASK_LOCKED_OP 0x00008000 > -#define DC_MASK_WC_MEM_ACCESS 0x00004000 > -#define DC_MASK_UC_MEM_ACCESS 0x00002000 > +#define DC_MASK_UC_MEM_ACCESS 0x00004000 > +#define DC_MASK_WC_MEM_ACCESS 0x00002000 > #define DC_MASK_ST_TO_LD_CANCEL 0x00001000 > #define DC_MASK_ST_TO_LD_FOR 0x00000800 > #define DC_MASK_ST_BANK_CONFLICT 0x00000400 > @@ -77,10 +83,9 @@ > * at 0xf000. > * > * The definitions in this file *must* match definitions > - * of IBS derived events in gh-events.xml and in the > - * oprofile AMD Family 10h events file. More information > + * of IBS derived events. More information > * about IBS derived events is given in the Software Oprimization > - * Guide for AMD Family 10h Processors. > + * Guide. > */ > > /** > @@ -108,6 +113,8 @@ > #define IBS_FETCH_MAX (IBS_FETCH_END... [truncated message content] |