Diff of /docs/intel-corei7-events [9766c7] .. [28a31d] Maximize Restore

  Switch to side-by-side view

--- a/docs/intel-corei7-events
+++ b/docs/intel-corei7-events
@@ -1,102 +1,102 @@
-<tr><td>CPU_CLK_UNHALTED</td><td>	Clock cycles when not halted </td><td> 0, 1, 2, 3</td><td>
-</td>
-
-</tr>
-
-<tr><td>UNHALTED_REFERENCE_CYCLES</td><td>	Unhalted reference cycles </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>LLC_MISSES</td><td>	Last level cache demand requests from this core that missed the LLC </td><td> 0, 1, 2, 3</td><td>
+<tr><td>CPU_CLK_UNHALTED</td><td>	Clock cycles when not halted </td><td> all</td><td>
+</td>
+
+</tr>
+
+<tr><td>UNHALTED_REFERENCE_CYCLES</td><td>	Unhalted reference cycles </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>LLC_MISSES</td><td>	Last level cache demand requests from this core that missed the LLC </td><td> all</td><td>
 	0x41: No unit mask
  <br />
 </td>
 
 </tr>
 
-<tr><td>LLC_REFS</td><td>	Last level cache demand requests from this core </td><td> 0, 1, 2, 3</td><td>
+<tr><td>LLC_REFS</td><td>	Last level cache demand requests from this core </td><td> all</td><td>
 	0x4f: No unit mask
  <br />
 </td>
 
 </tr>
 
-<tr><td>INST_RETIRED</td><td>	number of instructions retired </td><td> 0, 1, 2, 3</td><td>
-	0x01: any_p instructions retired
- <br />
-	0x02: x87 Counts the number of floating point computational operations retired: floating point computational operations executed by the assist handler and sub-operations of complex floating point instructions like transcendental instructions
- <br />
-</td>
-
-</tr>
-
-<tr><td>BR_INST_RETIRED</td><td>	number of branch instructions retired </td><td> 0, 1, 2, 3</td><td>
-	0x00: all_branches See Table A-1
- <br />
-	0x01: conditional Counts the number of conditional branch instructions retired
- <br />
-	0x02: near_call Counts the number of direct & indirect near unconditional calls retired
- <br />
-	0x04: all_branches Counts the number of branch instructions retired
- <br />
-</td>
-
-</tr>
-
-<tr><td>BR_MISS_PRED_RETIRED</td><td>	number of mispredicted branches retired (precise) </td><td> 0, 1, 2, 3</td><td>
-	0x00: all_branches See Table A-1
- <br />
-	0x02: near_call Counts mispredicted direct & indirect near unconditional retired calls
- <br />
-</td>
-
-</tr>
-
-<tr><td>SB_FORWARD</td><td>	Counts the number of store forwards. </td><td> 0, 1, 2, 3</td><td>
-	0x01: any Counts the number of store forwards
- <br />
-</td>
-
-</tr>
-
-<tr><td>LOAD_BLOCK</td><td>	Counts the number of loads blocked </td><td> 0, 1, 2, 3</td><td>
-	0x01: std Counts the number of loads blocked by a preceding store with unknown data
- <br />
-	0x04: address_offset Counts the number of loads blocked by a preceding store address
- <br />
-</td>
-
-</tr>
-
-<tr><td>SB_DRAIN</td><td>	Counts the cycles of store buffer drains. </td><td> 0, 1, 2, 3</td><td>
-	0x01: cycles Counts the cycles of store buffer drains
- <br />
-</td>
-
-</tr>
-
-<tr><td>MISALIGN_MEM_REF</td><td>	Counts the number of misaligned load references </td><td> 0, 1, 2, 3</td><td>
-	0x01: load Counts the number of misaligned load references
- <br />
-	0x02: store Counts the number of misaligned store references
- <br />
-	0x03: any Counts the number of misaligned memory references
- <br />
-</td>
-
-</tr>
-
-<tr><td>STORE_BLOCKS</td><td>	This event counts the number of load operations delayed caused by preceding stores. </td><td> 0, 1, 2, 3</td><td>
-	0x01: not_sta This event counts the number of load operations delayed caused by preceding stores whose addresses are known but whose data is unknown, and preceding stores that conflict with the load but which incompletely overlap the load
- <br />
-	0x02: sta This event counts load operations delayed caused by preceding stores whose addresses are unknown (STA block)
- <br />
-	0x04: at_ret Counts number of loads delayed with at-Retirement block code
- <br />
-	0x08: l1d_block Cacheable loads delayed with L1D block code
+<tr><td>INST_RETIRED</td><td>	number of instructions retired </td><td> all</td><td>
+	0x01: (name=any_p) instructions retired
+ <br />
+	0x02: (name=x87) Counts the number of floating point computational operations retired: floating point computational operations executed by the assist handler and sub-operations of complex floating point instructions like transcendental instructions
+ <br />
+</td>
+
+</tr>
+
+<tr><td>BR_INST_RETIRED</td><td>	number of branch instructions retired </td><td> all</td><td>
+	0x00: (name=all_branches) See Table A-1
+ <br />
+	0x01: (name=conditional) Counts the number of conditional branch instructions retired
+ <br />
+	0x02: (name=near_call) Counts the number of direct & indirect near unconditional calls retired
+ <br />
+	0x04: (name=all_branches) Counts the number of branch instructions retired
+ <br />
+</td>
+
+</tr>
+
+<tr><td>BR_MISS_PRED_RETIRED</td><td>	number of mispredicted branches retired (precise) </td><td> all</td><td>
+	0x00: (name=all_branches) See Table A-1
+ <br />
+	0x02: (name=near_call) Counts mispredicted direct & indirect near unconditional retired calls
+ <br />
+</td>
+
+</tr>
+
+<tr><td>SB_FORWARD</td><td>	Counts the number of store forwards. </td><td> all</td><td>
+	0x01: (name=any) Counts the number of store forwards
+ <br />
+</td>
+
+</tr>
+
+<tr><td>LOAD_BLOCK</td><td>	Counts the number of loads blocked </td><td> all</td><td>
+	0x01: (name=std) Counts the number of loads blocked by a preceding store with unknown data
+ <br />
+	0x04: (name=address_offset) Counts the number of loads blocked by a preceding store address
+ <br />
+</td>
+
+</tr>
+
+<tr><td>SB_DRAIN</td><td>	Counts the cycles of store buffer drains. </td><td> all</td><td>
+	0x01: (name=cycles) Counts the cycles of store buffer drains
+ <br />
+</td>
+
+</tr>
+
+<tr><td>MISALIGN_MEM_REF</td><td>	Counts the number of misaligned load references </td><td> all</td><td>
+	0x01: (name=load) Counts the number of misaligned load references
+ <br />
+	0x02: (name=store) Counts the number of misaligned store references
+ <br />
+	0x03: (name=any) Counts the number of misaligned memory references
+ <br />
+</td>
+
+</tr>
+
+<tr><td>STORE_BLOCKS</td><td>	This event counts the number of load operations delayed caused by preceding stores. </td><td> all</td><td>
+	0x01: (name=not_sta) This event counts the number of load operations delayed caused by preceding stores whose addresses are known but whose data is unknown, and preceding stores that conflict with the load but which incompletely overlap the load
+ <br />
+	0x02: (name=sta) This event counts load operations delayed caused by preceding stores whose addresses are unknown (STA block)
+ <br />
+	0x04: (name=at_ret) Counts number of loads delayed with at-Retirement block code
+ <br />
+	0x08: (name=l1d_block) Cacheable loads delayed with L1D block code
  <br />
 	0x0f: any All loads delayed due to store blocks
  <br />
@@ -104,206 +104,206 @@
 
 </tr>
 
-<tr><td>PARTIAL_ADDRESS_ALIAS</td><td>	Counts false dependency due to partial address aliasing </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>DTLB_LOAD_MISSES</td><td>	Counts dtlb page walks </td><td> 0, 1, 2, 3</td><td>
-	0x01: any Counts all load misses that cause a page walk
- <br />
-	0x02: walk_completed Counts number of completed page walks due to load miss in the STLB
- <br />
-	0x10: stlb_hit Number of cache load STLB hits
- <br />
-	0x20: pde_miss Number of DTLB cache load misses where the low part of the linear to physical address translation was missed
- <br />
-	0x40: pdp_miss Number of DTLB cache load misses where the high part of the linear to physical address translation was missed
- <br />
-	0x80: large_walk_completed Counts number of completed large page walks due to load miss in the STLB
- <br />
-</td>
-
-</tr>
-
-<tr><td>MEMORY_DISAMBIGURATION</td><td>	Counts memory disambiguration events </td><td> 0, 1, 2, 3</td><td>
-	0x01: reset Counts memory disambiguration reset cycles
- <br />
-	0x02: success Counts the number of loads that memory disambiguration succeeded
- <br />
-	0x04: watchdog Counts the number of times the memory disambiguration watchdog kicked in
- <br />
-	0x08: watch_cycles Counts the cycles that the memory disambiguration watchdog is active
- <br />
-</td>
-
-</tr>
-
-<tr><td>MEM_INST_RETIRED</td><td>	Counts the number of instructions with an architecturally-visible load/store retired on the architected path. </td><td> 0, 1, 2, 3</td><td>
-	0x01: loads Counts the number of instructions with an architecturally-visible store retired on the architected path
- <br />
-	0x02: stores Counts the number of instructions with an architecturally-visible store retired on the architected path
- <br />
-</td>
-
-</tr>
-
-<tr><td>MEM_STORE_RETIRED</td><td>	The event counts the number of retired stores that missed the DTLB. The DTLB miss is not counted if the store operation causes a fault. Does not count prefetches. Counts both primary and secondary misses to the TLB </td><td> 0, 1, 2, 3</td><td>
-	0x01: dtlb_miss The event counts the number of retired stores that missed the DTLB
- <br />
-</td>
-
-</tr>
-
-<tr><td>UOPS_ISSUED</td><td>	Counts the number of Uops issued by the Register Allocation Table to the Reservation Station, i.e. the UOPs issued from the front end to the back end. </td><td> 0, 1, 2, 3</td><td>
-	0x01: any Counts the number of Uops issued by the Register Allocation Table to the Reservation Station, i
- <br />
-	0x01: stalled_cycles Counts the number of cycles no Uops issued by the Register Allocation Table to the Reservation Station, i
- <br />
-	0x02: fused Counts the number of fused Uops that were issued from the Register Allocation Table to the Reservation Station
- <br />
-</td>
-
-</tr>
-
-<tr><td>MEM_UNCORE_RETIRED</td><td>	Counts number of memory load instructions retired where the memory reference hit modified data in another core </td><td> 0, 1, 2, 3</td><td>
-	0x02: other_core_l2_hitm Counts number of memory load instructions retired where the memory reference hit modified data in a sibling core residing on the same socket
- <br />
-	0x08: remote_cache_local_home_hit Counts number of memory load instructions retired where the memory reference missed the L1, L2 and LLC caches and HIT in a remote socket's cache
- <br />
-	0x10: remote_dram Counts number of memory load instructions retired where the memory reference missed the L1, L2 and LLC caches and was remotely homed
- <br />
-	0x20: local_dram Counts number of memory load instructions retired where the memory reference missed the L1, L2 and LLC caches and required a local socket memory reference
- <br />
-</td>
-
-</tr>
-
-<tr><td>FP_COMP_OPS_EXE</td><td>	Counts the number of FP Computational Uops Executed. </td><td> 0, 1, 2, 3</td><td>
-	0x01: x87 Counts the number of FP Computational Uops Executed
- <br />
-	0x02: mmx Counts number of MMX Uops executed
- <br />
-	0x04: sse_fp Counts number of SSE and SSE2 FP uops executed
- <br />
-	0x08: sse2_integer Counts number of SSE2 integer uops executed
- <br />
-	0x10: sse_fp_packed Counts number of SSE FP packed uops executed
- <br />
-	0x20: sse_fp_scalar Counts number of SSE FP scalar uops executed
- <br />
-	0x40: sse_single_precision Counts number of SSE* FP single precision uops executed
- <br />
-	0x80: sse_double_precision Counts number of SSE* FP double precision uops executed
- <br />
-</td>
-
-</tr>
-
-<tr><td>SIMD_INT_128</td><td>	Counts number of 128 bit SIMD integer operations. </td><td> 0, 1, 2, 3</td><td>
-	0x01: packed_mpy Counts number of 128 bit SIMD integer multiply operations
- <br />
-	0x02: packed_shift Counts number of 128 bit SIMD integer shift operations
- <br />
-	0x04: pack Counts number of 128 bit SIMD integer pack operations
- <br />
-	0x08: unpack Counts number of 128 bit SIMD integer unpack operations
- <br />
-	0x10: packed_logical Counts number of 128 bit SIMD integer logical operations
- <br />
-	0x20: packed_arith Counts number of 128 bit SIMD integer arithmetic operations
- <br />
-	0x40: shuffle_move Counts number of 128 bit SIMD integer shuffle and move operations
- <br />
-</td>
-
-</tr>
-
-<tr><td>LOAD_DISPATCH</td><td>	Counts number of loads dispatched from the Reservation Station that bypass. </td><td> 0, 1, 2, 3</td><td>
-	0x01: rs Counts number of loads dispatched from the Reservation Station that bypass the Memory Order Buffer
- <br />
-	0x02: rs_delayed Counts the number of delayed RS dispatches at the stage latch
- <br />
-	0x04: mob Counts the number of loads dispatched from the Reservation Station to the Memory Order Buffer
- <br />
-	0x07: any Counts all loads dispatched from the Reservation Station
- <br />
-</td>
-
-</tr>
-
-<tr><td>ARITH</td><td>	Counts division cycles and number of multiplies. Includes integer and FP, but excludes DPPS/MPSAD. </td><td> 0, 1, 2, 3</td><td>
-	0x01: cycles_div_busy Counts the number of cycles the divider is busy executing divide or square root operations
- <br />
-	0x02: mul Counts the number of multiply operations executed
- <br />
-</td>
-
-</tr>
-
-<tr><td>INST_QUEUE_WRITES</td><td>	Counts the number of instructions written into the instruction queue every cycle. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>INST_DECODED</td><td>	Counts number of instructions that require decoder 0 to be decoded. Usually, this means that the instruction maps to more than 1 uop </td><td> 0, 1, 2, 3</td><td>
-	0x01: dec0 Counts number of instructions that require decoder 0 to be decoded
- <br />
-</td>
-
-</tr>
-
-<tr><td>TWO_UOP_INSTS_DECODED</td><td>	An instruction that generates two uops was decoded </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>HW_INT</td><td>	Counts hardware interrupt events. </td><td> 0, 1, 2, 3</td><td>
-	0x01: rcv Number of interrupt received
- <br />
-	0x02: cycles_masked Number of cycles interrupt are masked
- <br />
-	0x04: cycles_pending_and_masked Number of cycles interrupts are pending and masked
- <br />
-</td>
-
-</tr>
-
-<tr><td>INST_QUEUE_WRITE_CYCLES</td><td>	This event counts the number of cycles during which instructions are written to the instruction queue. Dividing this counter by the number of instructions written to the instruction queue (INST_QUEUE_WRITES) yields the average number of instructions decoded each cycle. If this number is less than four and the pipe stalls, this indicates that the decoder is failing to decode enough instructions per cycle to sustain the 4-wide pipeline. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>L2_RQSTS</td><td>	Counts number of L2 data loads </td><td> 0, 1, 2, 3</td><td>
-	0x01: ld_hit Counts number of loads that hit the L2 cache
- <br />
-	0x02: ld_miss Counts the number of loads that miss the L2 cache
- <br />
-	0x03: loads Counts all L2 load requests
- <br />
-	0x04: rfo_hit Counts the number of store RFO requests that hit the L2 cache
- <br />
-	0x08: rfo_miss Counts the number of store RFO requests that miss the L2 cache
+<tr><td>PARTIAL_ADDRESS_ALIAS</td><td>	Counts false dependency due to partial address aliasing </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>DTLB_LOAD_MISSES</td><td>	Counts dtlb page walks </td><td> all</td><td>
+	0x01: (name=any) Counts all load misses that cause a page walk
+ <br />
+	0x02: (name=walk_completed) Counts number of completed page walks due to load miss in the STLB
+ <br />
+	0x10: (name=stlb_hit) Number of cache load STLB hits
+ <br />
+	0x20: (name=pde_miss) Number of DTLB cache load misses where the low part of the linear to physical address translation was missed
+ <br />
+	0x40: (name=pdp_miss) Number of DTLB cache load misses where the high part of the linear to physical address translation was missed
+ <br />
+	0x80: (name=large_walk_completed) Counts number of completed large page walks due to load miss in the STLB
+ <br />
+</td>
+
+</tr>
+
+<tr><td>MEMORY_DISAMBIGURATION</td><td>	Counts memory disambiguration events </td><td> all</td><td>
+	0x01: (name=reset) Counts memory disambiguration reset cycles
+ <br />
+	0x02: (name=success) Counts the number of loads that memory disambiguration succeeded
+ <br />
+	0x04: (name=watchdog) Counts the number of times the memory disambiguration watchdog kicked in
+ <br />
+	0x08: (name=watch_cycles) Counts the cycles that the memory disambiguration watchdog is active
+ <br />
+</td>
+
+</tr>
+
+<tr><td>MEM_INST_RETIRED</td><td>	Counts the number of instructions with an architecturally-visible load/store retired on the architected path. </td><td> all</td><td>
+	0x01: (name=loads) Counts the number of instructions with an architecturally-visible store retired on the architected path
+ <br />
+	0x02: (name=stores) Counts the number of instructions with an architecturally-visible store retired on the architected path
+ <br />
+</td>
+
+</tr>
+
+<tr><td>MEM_STORE_RETIRED</td><td>	The event counts the number of retired stores that missed the DTLB. The DTLB miss is not counted if the store operation causes a fault. Does not count prefetches. Counts both primary and secondary misses to the TLB </td><td> all</td><td>
+	0x01: (name=dtlb_miss) The event counts the number of retired stores that missed the DTLB
+ <br />
+</td>
+
+</tr>
+
+<tr><td>UOPS_ISSUED</td><td>	Counts the number of Uops issued by the Register Allocation Table to the Reservation Station, i.e. the UOPs issued from the front end to the back end. </td><td> all</td><td>
+	0x01: (name=any) Counts the number of Uops issued by the Register Allocation Table to the Reservation Station, i
+ <br />
+	0x01: (name=stalled_cycles) Counts the number of cycles no Uops issued by the Register Allocation Table to the Reservation Station, i
+ <br />
+	0x02: (name=fused) Counts the number of fused Uops that were issued from the Register Allocation Table to the Reservation Station
+ <br />
+</td>
+
+</tr>
+
+<tr><td>MEM_UNCORE_RETIRED</td><td>	Counts number of memory load instructions retired where the memory reference hit modified data in another core </td><td> all</td><td>
+	0x02: (name=other_core_l2_hitm) Counts number of memory load instructions retired where the memory reference hit modified data in a sibling core residing on the same socket
+ <br />
+	0x08: (name=remote_cache_local_home_hit) Counts number of memory load instructions retired where the memory reference missed the L1, L2 and LLC caches and HIT in a remote socket's cache
+ <br />
+	0x10: (name=remote_dram) Counts number of memory load instructions retired where the memory reference missed the L1, L2 and LLC caches and was remotely homed
+ <br />
+	0x20: (name=local_dram) Counts number of memory load instructions retired where the memory reference missed the L1, L2 and LLC caches and required a local socket memory reference
+ <br />
+</td>
+
+</tr>
+
+<tr><td>FP_COMP_OPS_EXE</td><td>	Counts the number of FP Computational Uops Executed. </td><td> all</td><td>
+	0x01: (name=x87) Counts the number of FP Computational Uops Executed
+ <br />
+	0x02: (name=mmx) Counts number of MMX Uops executed
+ <br />
+	0x04: (name=sse_fp) Counts number of SSE and SSE2 FP uops executed
+ <br />
+	0x08: (name=sse2_integer) Counts number of SSE2 integer uops executed
+ <br />
+	0x10: (name=sse_fp_packed) Counts number of SSE FP packed uops executed
+ <br />
+	0x20: (name=sse_fp_scalar) Counts number of SSE FP scalar uops executed
+ <br />
+	0x40: (name=sse_single_precision) Counts number of SSE* FP single precision uops executed
+ <br />
+	0x80: (name=sse_double_precision) Counts number of SSE* FP double precision uops executed
+ <br />
+</td>
+
+</tr>
+
+<tr><td>SIMD_INT_128</td><td>	Counts number of 128 bit SIMD integer operations. </td><td> all</td><td>
+	0x01: (name=packed_mpy) Counts number of 128 bit SIMD integer multiply operations
+ <br />
+	0x02: (name=packed_shift) Counts number of 128 bit SIMD integer shift operations
+ <br />
+	0x04: (name=pack) Counts number of 128 bit SIMD integer pack operations
+ <br />
+	0x08: (name=unpack) Counts number of 128 bit SIMD integer unpack operations
+ <br />
+	0x10: (name=packed_logical) Counts number of 128 bit SIMD integer logical operations
+ <br />
+	0x20: (name=packed_arith) Counts number of 128 bit SIMD integer arithmetic operations
+ <br />
+	0x40: (name=shuffle_move) Counts number of 128 bit SIMD integer shuffle and move operations
+ <br />
+</td>
+
+</tr>
+
+<tr><td>LOAD_DISPATCH</td><td>	Counts number of loads dispatched from the Reservation Station that bypass. </td><td> all</td><td>
+	0x01: (name=rs) Counts number of loads dispatched from the Reservation Station that bypass the Memory Order Buffer
+ <br />
+	0x02: (name=rs_delayed) Counts the number of delayed RS dispatches at the stage latch
+ <br />
+	0x04: (name=mob) Counts the number of loads dispatched from the Reservation Station to the Memory Order Buffer
+ <br />
+	0x07: (name=any) Counts all loads dispatched from the Reservation Station
+ <br />
+</td>
+
+</tr>
+
+<tr><td>ARITH</td><td>	Counts division cycles and number of multiplies. Includes integer and FP, but excludes DPPS/MPSAD. </td><td> all</td><td>
+	0x01: (name=cycles_div_busy) Counts the number of cycles the divider is busy executing divide or square root operations
+ <br />
+	0x02: (name=mul) Counts the number of multiply operations executed
+ <br />
+</td>
+
+</tr>
+
+<tr><td>INST_QUEUE_WRITES</td><td>	Counts the number of instructions written into the instruction queue every cycle. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>INST_DECODED</td><td>	Counts number of instructions that require decoder 0 to be decoded. Usually, this means that the instruction maps to more than 1 uop </td><td> all</td><td>
+	0x01: (name=dec0) Counts number of instructions that require decoder 0 to be decoded
+ <br />
+</td>
+
+</tr>
+
+<tr><td>TWO_UOP_INSTS_DECODED</td><td>	An instruction that generates two uops was decoded </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>HW_INT</td><td>	Counts hardware interrupt events. </td><td> all</td><td>
+	0x01: (name=rcv) Number of interrupt received
+ <br />
+	0x02: (name=cycles_masked) Number of cycles interrupt are masked
+ <br />
+	0x04: (name=cycles_pending_and_masked) Number of cycles interrupts are pending and masked
+ <br />
+</td>
+
+</tr>
+
+<tr><td>INST_QUEUE_WRITE_CYCLES</td><td>	This event counts the number of cycles during which instructions are written to the instruction queue. Dividing this counter by the number of instructions written to the instruction queue (INST_QUEUE_WRITES) yields the average number of instructions decoded each cycle. If this number is less than four and the pipe stalls, this indicates that the decoder is failing to decode enough instructions per cycle to sustain the 4-wide pipeline. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>L2_RQSTS</td><td>	Counts number of L2 data loads </td><td> all</td><td>
+	0x01: (name=ld_hit) Counts number of loads that hit the L2 cache
+ <br />
+	0x02: (name=ld_miss) Counts the number of loads that miss the L2 cache
+ <br />
+	0x03: (name=loads) Counts all L2 load requests
+ <br />
+	0x04: (name=rfo_hit) Counts the number of store RFO requests that hit the L2 cache
+ <br />
+	0x08: (name=rfo_miss) Counts the number of store RFO requests that miss the L2 cache
  <br />
 	0x0c: rfos Counts all L2 store RFO requests
  <br />
-	0x10: ifetch_hit Counts number of instruction fetches that hit the L2 cache
- <br />
-	0x20: ifetch_miss Counts number of instruction fetches that miss the L2 cache
- <br />
-	0x30: ifetches Counts all instruction fetches
- <br />
-	0x40: prefetch_hit Counts L2 prefetch hits for both code and data
- <br />
-	0x80: prefetch_miss Counts L2 prefetch misses for both code and data
+	0x10: (name=ifetch_hit) Counts number of instruction fetches that hit the L2 cache
+ <br />
+	0x20: (name=ifetch_miss) Counts number of instruction fetches that miss the L2 cache
+ <br />
+	0x30: (name=ifetches) Counts all instruction fetches
+ <br />
+	0x40: (name=prefetch_hit) Counts L2 prefetch hits for both code and data
+ <br />
+	0x80: (name=prefetch_miss) Counts L2 prefetch misses for both code and data
  <br />
 	0xc0: prefetches Counts all L2 prefetches for both code and data
  <br />
@@ -315,24 +315,24 @@
 
 </tr>
 
-<tr><td>L2_DATA_RQSTS</td><td>	More L2 data loads. </td><td> 0, 1, 2, 3</td><td>
-	0x01: i_state Counts number of L2 data demand loads where the cache line to be loaded is in the I (invalid) state, i
- <br />
-	0x02: s_state Counts number of L2 data demand loads where the cache line to be loaded is in the S (shared) state
- <br />
-	0x04: e_state Counts number of L2 data demand loads where the cache line to be loaded is in the E (exclusive) state
- <br />
-	0x08: m_state Counts number of L2 data demand loads where the cache line to be loaded is in the M (modified) state
+<tr><td>L2_DATA_RQSTS</td><td>	More L2 data loads. </td><td> all</td><td>
+	0x01: (name=i_state) Counts number of L2 data demand loads where the cache line to be loaded is in the I (invalid) state, i
+ <br />
+	0x02: (name=s_state) Counts number of L2 data demand loads where the cache line to be loaded is in the S (shared) state
+ <br />
+	0x04: (name=e_state) Counts number of L2 data demand loads where the cache line to be loaded is in the E (exclusive) state
+ <br />
+	0x08: (name=m_state) Counts number of L2 data demand loads where the cache line to be loaded is in the M (modified) state
  <br />
 	0x0f: mesi Counts all L2 data demand requests
  <br />
-	0x10: i_state Counts number of L2 prefetch data loads where the cache line to be loaded is in the I (invalid) state, i
- <br />
-	0x20: s_state Counts number of L2 prefetch data loads where the cache line to be loaded is in the S (shared) state
- <br />
-	0x40: e_state Counts number of L2 prefetch data loads where the cache line to be loaded is in the E (exclusive) state
- <br />
-	0x80: m_state Counts number of L2 prefetch data loads where the cache line to be loaded is in the M (modified) state
+	0x10: (name=i_state) Counts number of L2 prefetch data loads where the cache line to be loaded is in the I (invalid) state, i
+ <br />
+	0x20: (name=s_state) Counts number of L2 prefetch data loads where the cache line to be loaded is in the S (shared) state
+ <br />
+	0x40: (name=e_state) Counts number of L2 prefetch data loads where the cache line to be loaded is in the E (exclusive) state
+ <br />
+	0x80: (name=m_state) Counts number of L2 prefetch data loads where the cache line to be loaded is in the M (modified) state
  <br />
 	0xf0: mesi Counts all L2 prefetch requests
  <br />
@@ -342,26 +342,26 @@
 
 </tr>
 
-<tr><td>L2_WRITE</td><td>	Counts number of L2 writes </td><td> 0, 1, 2, 3</td><td>
-	0x01: i_state Counts number of L2 demand store RFO requests where the cache line to be loaded is in the I (invalid) state, i
- <br />
-	0x02: s_state Counts number of L2 store RFO requests where the cache line to be loaded is in the S (shared) state
- <br />
-	0x04: e_state Counts number of L2 store RFO requests where the cache line to be loaded is in the E (exclusive) state
- <br />
-	0x08: m_state Counts number of L2 store RFO requests where the cache line to be loaded is in the M (modified) state
+<tr><td>L2_WRITE</td><td>	Counts number of L2 writes </td><td> all</td><td>
+	0x01: (name=i_state) Counts number of L2 demand store RFO requests where the cache line to be loaded is in the I (invalid) state, i
+ <br />
+	0x02: (name=s_state) Counts number of L2 store RFO requests where the cache line to be loaded is in the S (shared) state
+ <br />
+	0x04: (name=e_state) Counts number of L2 store RFO requests where the cache line to be loaded is in the E (exclusive) state
+ <br />
+	0x08: (name=m_state) Counts number of L2 store RFO requests where the cache line to be loaded is in the M (modified) state
  <br />
 	0x0e: hit Counts number of L2 store RFO requests where the cache line to be loaded is in either the S, E or M states
  <br />
 	0x0f: mesi Counts all L2 store RFO requests
  <br />
-	0x10: i_state Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the I (invalid) state, i
- <br />
-	0x20: s_state Counts number of L2 lock RFO requests where the cache line to be loaded is in the S (shared) state
- <br />
-	0x40: e_state Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the E (exclusive) state
- <br />
-	0x80: m_state Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the M (modified) state
+	0x10: (name=i_state) Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the I (invalid) state, i
+ <br />
+	0x20: (name=s_state) Counts number of L2 lock RFO requests where the cache line to be loaded is in the S (shared) state
+ <br />
+	0x40: (name=e_state) Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the E (exclusive) state
+ <br />
+	0x80: (name=m_state) Counts number of L2 demand lock RFO requests where the cache line to be loaded is in the M (modified) state
  <br />
 	0xe0: hit Counts number of L2 demand lock RFO requests where the cache line to be loaded is in either the S, E, or M state
  <br />
@@ -371,14 +371,14 @@
 
 </tr>
 
-<tr><td>L1D_WB_L2</td><td>	Counts number of L1 writebacks to the L2. </td><td> 0, 1, 2, 3</td><td>
-	0x01: i_state Counts number of L1 writebacks to the L2 where the cache line to be written is in the I (invalid) state, i
- <br />
-	0x02: s_state Counts number of L1 writebacks to the L2 where the cache line to be written is in the S state
- <br />
-	0x04: e_state Counts number of L1 writebacks to the L2 where the cache line to be written is in the E (exclusive) state
- <br />
-	0x08: m_state Counts number of L1 writebacks to the L2 where the cache line to be written is in the M (modified) state
+<tr><td>L1D_WB_L2</td><td>	Counts number of L1 writebacks to the L2. </td><td> all</td><td>
+	0x01: (name=i_state) Counts number of L1 writebacks to the L2 where the cache line to be written is in the I (invalid) state, i
+ <br />
+	0x02: (name=s_state) Counts number of L1 writebacks to the L2 where the cache line to be written is in the S state
+ <br />
+	0x04: (name=e_state) Counts number of L1 writebacks to the L2 where the cache line to be written is in the E (exclusive) state
+ <br />
+	0x08: (name=m_state) Counts number of L1 writebacks to the L2 where the cache line to be written is in the M (modified) state
  <br />
 	0x0f: mesi Counts all L1 writebacks to the L2
  <br />
@@ -386,39 +386,39 @@
 
 </tr>
 
-<tr><td>LONGEST_LAT_CACHE</td><td>	Count LLC cache reference latencies. </td><td> 0, 1, 2, 3</td><td>
+<tr><td>LONGEST_LAT_CACHE</td><td>	Count LLC cache reference latencies. </td><td> all</td><td>
 	0x4f: reference This event counts requests originating from the core that reference a cache line in the last level cache
  <br />
-	0x41: miss This event counts each cache miss condition for references to the last level cache
- <br />
-</td>
-
-</tr>
-
-<tr><td>CPU_CLK_UNHALTED</td><td>	Counts the number of thread cycles while the thread is not in a halt state. </td><td> 0, 1, 2, 3</td><td>
-	0x00: thread_p Counts the number of thread cycles while the thread is not in a halt state
- <br />
-	0x01: ref_p Increments at the frequency of a slower reference clock when not halted
- <br />
-</td>
-
-</tr>
-
-<tr><td>UOPS_DECODED_DEC0</td><td>	Counts micro-ops decoded by decoder 0. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>L1D_CACHE_LD</td><td>	Counts L1 data cache read requests. </td><td> all</td><td>
-	0x01: i_state Counts L1 data cache read requests where the cache line to be loaded is in the I (invalid) state, i
- <br />
-	0x02: s_state Counts L1 data cache read requests where the cache line to be loaded is in the S (shared) state
- <br />
-	0x04: e_state Counts L1 data cache read requests where the cache line to be loaded is in the E (exclusive) state
- <br />
-	0x08: m_state Counts L1 data cache read requests where the cache line to be loaded is in the M (modified) state
+	0x41: (name=miss) This event counts each cache miss condition for references to the last level cache
+ <br />
+</td>
+
+</tr>
+
+<tr><td>CPU_CLK_UNHALTED</td><td>	Counts the number of thread cycles while the thread is not in a halt state. </td><td> all</td><td>
+	0x00: (name=thread_p) Counts the number of thread cycles while the thread is not in a halt state
+ <br />
+	0x01: (name=ref_p) Increments at the frequency of a slower reference clock when not halted
+ <br />
+</td>
+
+</tr>
+
+<tr><td>UOPS_DECODED_DEC0</td><td>	Counts micro-ops decoded by decoder 0. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>L1D_CACHE_LD</td><td>	Counts L1 data cache read requests. </td><td> 0, 1</td><td>
+	0x01: (name=i_state) Counts L1 data cache read requests where the cache line to be loaded is in the I (invalid) state, i
+ <br />
+	0x02: (name=s_state) Counts L1 data cache read requests where the cache line to be loaded is in the S (shared) state
+ <br />
+	0x04: (name=e_state) Counts L1 data cache read requests where the cache line to be loaded is in the E (exclusive) state
+ <br />
+	0x08: (name=m_state) Counts L1 data cache read requests where the cache line to be loaded is in the M (modified) state
  <br />
 	0x0f: mesi Counts L1 data cache read requests
  <br />
@@ -426,14 +426,14 @@
 
 </tr>
 
-<tr><td>L1D_CACHE_ST</td><td>	Counts L1 data cache stores. </td><td> all</td><td>
-	0x01: i_state Counts L1 data cache store RFO requests where the cache line to be loaded is in the I state
- <br />
-	0x02: s_state Counts L1 data cache store RFO requests where the cache line to be loaded is in the S (shared) state
- <br />
-	0x04: e_state Counts L1 data cache store RFO requests where the cache line to be loaded is in the E (exclusive) state
- <br />
-	0x08: m_state Counts L1 data cache store RFO requests where cache line to be loaded is in the M (modified) state
+<tr><td>L1D_CACHE_ST</td><td>	Counts L1 data cache stores. </td><td> 0, 1</td><td>
+	0x01: (name=i_state) Counts L1 data cache store RFO requests where the cache line to be loaded is in the I state
+ <br />
+	0x02: (name=s_state) Counts L1 data cache store RFO requests where the cache line to be loaded is in the S (shared) state
+ <br />
+	0x04: (name=e_state) Counts L1 data cache store RFO requests where the cache line to be loaded is in the E (exclusive) state
+ <br />
+	0x08: (name=m_state) Counts L1 data cache store RFO requests where cache line to be loaded is in the M (modified) state
  <br />
 	0x0f: mesi Counts L1 data cache store RFO requests
  <br />
@@ -441,211 +441,211 @@
 
 </tr>
 
-<tr><td>L1D_CACHE_LOCK</td><td>	Counts retired load locks in the L1D cache. </td><td> all</td><td>
-	0x01: hit Counts retired load locks that hit in the L1 data cache or hit in an already allocated fill buffer
- <br />
-	0x02: s_state Counts L1 data cache retired load locks that hit the target cache line in the shared state
- <br />
-	0x04: e_state Counts L1 data cache retired load locks that hit the target cache line in the exclusive state
- <br />
-	0x08: m_state Counts L1 data cache retired load locks that hit the target cache line in the modified state
- <br />
-</td>
-
-</tr>
-
-<tr><td>L1D_ALL_REF</td><td>	Counts all references to the L1 data cache, </td><td> all</td><td>
-	0x01: any Counts all references (uncached, speculated and retired) to the L1 data cache, including all loads and stores with any memory types
- <br />
-	0x02: cacheable Counts all data reads and writes (speculated and retired) from cacheable memory, including locked operations
- <br />
-</td>
-
-</tr>
-
-<tr><td>DTLB_MISSES</td><td>	Counts the number of misses in the STLB </td><td> 0, 1, 2, 3</td><td>
-	0x01: any Counts the number of misses in the STLB which causes a page walk
- <br />
-	0x02: walk_completed Counts number of misses in the STLB which resulted in a completed page walk
- <br />
-	0x10: stlb_hit Counts the number of DTLB first level misses that hit in the second level TLB
- <br />
-	0x20: pde_miss Number of DTLB cache misses where the low part of the linear to physical address translation was missed
- <br />
-	0x40: pdp_miss Number of DTLB misses where the high part of the linear to physical address translation was missed
- <br />
-	0x80: large_walk_completed Counts number of completed large page walks due to misses in the STLB
- <br />
-</td>
-
-</tr>
-
-<tr><td>SSE_MEM_EXEC</td><td>	Counts number of SSE instructions which missed the L1 data cache. </td><td> 0, 1, 2, 3</td><td>
-	0x01: nta Counts number of SSE NTA prefetch/weakly-ordered instructions which missed the L1 data cache
- <br />
-	0x08: streaming_stores Counts number of SSE nontemporal stores
- <br />
-</td>
-
-</tr>
-
-<tr><td>LOAD_HIT_PRE</td><td>	Counts load operations sent to the L1 data cache while a previous SSE prefetch instruction to the same cache line has started prefetching but has not yet finished. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>SFENCE_CYCLES</td><td>	Counts store fence cycles </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>L1D_PREFETCH</td><td>	Counts number of hardware prefetch requests. </td><td> 0, 1, 2, 3</td><td>
-	0x01: requests Counts number of hardware prefetch requests dispatched out of the prefetch FIFO
- <br />
-	0x02: miss Counts number of hardware prefetch requests that miss the L1D
- <br />
-	0x04: triggers Counts number of prefetch requests triggered by the Finite State Machine and pushed into the prefetch FIFO
- <br />
-</td>
-
-</tr>
-
-<tr><td>EPT</td><td>	Counts Extended Page Directory Entry accesses. The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches. </td><td> 0, 1, 2, 3</td><td>
-	0x02: epde_miss Counts Extended Page Directory Entry misses
- <br />
-	0x04: epdpe_hit Counts Extended Page Directory Pointer Entry hits
- <br />
-	0x08: epdpe_miss Counts Extended Page Directory Pointer Entry misses
- <br />
-</td>
-
-</tr>
-
-<tr><td>L1D</td><td>	Counts the number of lines brought from/to the L1 data cache. </td><td> all</td><td>
-	0x01: repl Counts the number of lines brought into the L1 data cache
- <br />
-	0x02: m_repl Counts the number of modified lines brought into the L1 data cache
- <br />
-	0x04: m_evict Counts the number of modified lines evicted from the L1 data cache due to replacement
- <br />
-	0x08: m_snoop_evict Counts the number of modified lines evicted from the L1 data cache due to snoop HITM intervention
- <br />
-</td>
-
-</tr>
-
-<tr><td>L1D_CACHE_PREFETCH_LOCK_FB_HIT</td><td>	Counts the number of cacheable load lock speculated instructions accepted into the fill buffer. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>L1D_CACHE_LOCK_FB_HIT</td><td>	Counts the number of cacheable load lock speculated or retired instructions accepted into the fill buffer. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>OFFCORE_REQUESTS_OUTSTANDING</td><td>	Counts weighted cycles of offcore requests. </td><td> 0, 1, 2, 3</td><td>
-	0x01: read_data Counts weighted cycles of offcore demand data read requests
- <br />
-	0x02: read_code Counts weighted cycles of offcore demand code read requests
- <br />
-	0x04: rfo Counts weighted cycles of offcore demand RFO requests
- <br />
-	0x08: read Counts weighted cycles of offcore read requests of any kind
- <br />
-</td>
-
-</tr>
-
-<tr><td>CACHE_LOCK_CYCLES</td><td>	Cycle count during which the L1/L2 caches are locked. A lock is asserted when there is a locked memory access, due to uncacheable memory, a locked operation that spans two cache lines, or a page walk from an uncacheable page table. </td><td> all</td><td>
-	0x01: l1d_l2 Cycle count during which the L1D and L2 are locked
- <br />
-	0x02: l1d Counts the number of cycles that cacheline in the L1 data cache unit is locked
- <br />
-</td>
-
-</tr>
-
-<tr><td>IO_TRANSACTIONS</td><td>	Counts the number of completed I/O transactions. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>L1I</td><td>	Counts L1i instruction cache accesses. </td><td> 0, 1, 2, 3</td><td>
-	0x01: hits Counts all instruction fetches that hit the L1 instruction cache
- <br />
-	0x02: misses Counts all instruction fetches that miss the L1I cache
- <br />
-	0x03: reads Counts all instruction fetches, including uncacheable fetches that bypass the L1I
- <br />
-	0x04: cycles_stalled Cycle counts for which an instruction fetch stalls due to a L1I cache miss, ITLB miss or ITLB fault
- <br />
-</td>
-
-</tr>
-
-<tr><td>IFU_IVC</td><td>	Instruction Fetch unit events </td><td> 0, 1, 2, 3</td><td>
-	0x01: full Instruction Fetche unit victim cache full
- <br />
-	0x02: l1i_eviction L1 Instruction cache evictions
- <br />
-</td>
-
-</tr>
-
-<tr><td>LARGE_ITLB</td><td>	Counts number of large ITLB accesses </td><td> 0, 1, 2, 3</td><td>
-	0x01: hit Counts number of large ITLB hits
- <br />
-</td>
-
-</tr>
-
-<tr><td>L1I_OPPORTUNISTIC_HITS</td><td>	Opportunistic hits in streaming. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>ITLB_MISSES</td><td>	Counts the number of ITLB misses in various variants </td><td> 0, 1, 2, 3</td><td>
-	0x01: any Counts the number of misses in all levels of the ITLB which causes a page walk
- <br />
-	0x02: walk_completed Counts number of misses in all levels of the ITLB which resulted in a completed page walk
- <br />
-	0x04: walk_cycles Counts ITLB miss page walk cycles
- <br />
-	0x04: pmh_busy_cycles Counts PMH busy cycles
- <br />
-	0x10: stlb_hit Counts the number of ITLB misses that hit in the second level TLB
- <br />
-	0x20: pde_miss Number of ITLB misses where the low part of the linear to physical address translation was missed
- <br />
-	0x40: pdp_miss Number of ITLB misses where the high part of the linear to physical address translation was missed
- <br />
-	0x80: large_walk_completed Counts number of completed large page walks due to misses in the STLB
- <br />
-</td>
-
-</tr>
-
-<tr><td>ILD_STALL</td><td>	Cycles Instruction Length Decoder stalls </td><td> 0, 1, 2, 3</td><td>
-	0x01: lcp Cycles Instruction Length Decoder stalls due to length changing prefixes: 66, 67 or REX
- <br />
-	0x02: mru Instruction Length Decoder stall cycles due to Brand Prediction Unit (PBU) Most Recently Used (MRU) bypass
- <br />
-	0x04: iq_full Stall cycles due to a full instruction queue
- <br />
-	0x08: regen Counts the number of regen stalls
+<tr><td>L1D_CACHE_LOCK</td><td>	Counts retired load locks in the L1D cache. </td><td> 0, 1</td><td>
+	0x01: (name=hit) Counts retired load locks that hit in the L1 data cache or hit in an already allocated fill buffer
+ <br />
+	0x02: (name=s_state) Counts L1 data cache retired load locks that hit the target cache line in the shared state
+ <br />
+	0x04: (name=e_state) Counts L1 data cache retired load locks that hit the target cache line in the exclusive state
+ <br />
+	0x08: (name=m_state) Counts L1 data cache retired load locks that hit the target cache line in the modified state
+ <br />
+</td>
+
+</tr>
+
+<tr><td>L1D_ALL_REF</td><td>	Counts all references to the L1 data cache, </td><td> 0, 1</td><td>
+	0x01: (name=any) Counts all references (uncached, speculated and retired) to the L1 data cache, including all loads and stores with any memory types
+ <br />
+	0x02: (name=cacheable) Counts all data reads and writes (speculated and retired) from cacheable memory, including locked operations
+ <br />
+</td>
+
+</tr>
+
+<tr><td>DTLB_MISSES</td><td>	Counts the number of misses in the STLB </td><td> all</td><td>
+	0x01: (name=any) Counts the number of misses in the STLB which causes a page walk
+ <br />
+	0x02: (name=walk_completed) Counts number of misses in the STLB which resulted in a completed page walk
+ <br />
+	0x10: (name=stlb_hit) Counts the number of DTLB first level misses that hit in the second level TLB
+ <br />
+	0x20: (name=pde_miss) Number of DTLB cache misses where the low part of the linear to physical address translation was missed
+ <br />
+	0x40: (name=pdp_miss) Number of DTLB misses where the high part of the linear to physical address translation was missed
+ <br />
+	0x80: (name=large_walk_completed) Counts number of completed large page walks due to misses in the STLB
+ <br />
+</td>
+
+</tr>
+
+<tr><td>SSE_MEM_EXEC</td><td>	Counts number of SSE instructions which missed the L1 data cache. </td><td> all</td><td>
+	0x01: (name=nta) Counts number of SSE NTA prefetch/weakly-ordered instructions which missed the L1 data cache
+ <br />
+	0x08: (name=streaming_stores) Counts number of SSE nontemporal stores
+ <br />
+</td>
+
+</tr>
+
+<tr><td>LOAD_HIT_PRE</td><td>	Counts load operations sent to the L1 data cache while a previous SSE prefetch instruction to the same cache line has started prefetching but has not yet finished. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>SFENCE_CYCLES</td><td>	Counts store fence cycles </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>L1D_PREFETCH</td><td>	Counts number of hardware prefetch requests. </td><td> all</td><td>
+	0x01: (name=requests) Counts number of hardware prefetch requests dispatched out of the prefetch FIFO
+ <br />
+	0x02: (name=miss) Counts number of hardware prefetch requests that miss the L1D
+ <br />
+	0x04: (name=triggers) Counts number of prefetch requests triggered by the Finite State Machine and pushed into the prefetch FIFO
+ <br />
+</td>
+
+</tr>
+
+<tr><td>EPT</td><td>	Counts Extended Page Directory Entry accesses. The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches. </td><td> all</td><td>
+	0x02: (name=epde_miss) Counts Extended Page Directory Entry misses
+ <br />
+	0x04: (name=epdpe_hit) Counts Extended Page Directory Pointer Entry hits
+ <br />
+	0x08: (name=epdpe_miss) Counts Extended Page Directory Pointer Entry misses
+ <br />
+</td>
+
+</tr>
+
+<tr><td>L1D</td><td>	Counts the number of lines brought from/to the L1 data cache. </td><td> 0, 1</td><td>
+	0x01: (name=repl) Counts the number of lines brought into the L1 data cache
+ <br />
+	0x02: (name=m_repl) Counts the number of modified lines brought into the L1 data cache
+ <br />
+	0x04: (name=m_evict) Counts the number of modified lines evicted from the L1 data cache due to replacement
+ <br />
+	0x08: (name=m_snoop_evict) Counts the number of modified lines evicted from the L1 data cache due to snoop HITM intervention
+ <br />
+</td>
+
+</tr>
+
+<tr><td>L1D_CACHE_PREFETCH_LOCK_FB_HIT</td><td>	Counts the number of cacheable load lock speculated instructions accepted into the fill buffer. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>L1D_CACHE_LOCK_FB_HIT</td><td>	Counts the number of cacheable load lock speculated or retired instructions accepted into the fill buffer. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>OFFCORE_REQUESTS_OUTSTANDING</td><td>	Counts weighted cycles of offcore requests. </td><td> all</td><td>
+	0x01: (name=read_data) Counts weighted cycles of offcore demand data read requests
+ <br />
+	0x02: (name=read_code) Counts weighted cycles of offcore demand code read requests
+ <br />
+	0x04: (name=rfo) Counts weighted cycles of offcore demand RFO requests
+ <br />
+	0x08: (name=read) Counts weighted cycles of offcore read requests of any kind
+ <br />
+</td>
+
+</tr>
+
+<tr><td>CACHE_LOCK_CYCLES</td><td>	Cycle count during which the L1/L2 caches are locked. A lock is asserted when there is a locked memory access, due to uncacheable memory, a locked operation that spans two cache lines, or a page walk from an uncacheable page table. </td><td> 0, 1</td><td>
+	0x01: (name=l1d_l2) Cycle count during which the L1D and L2 are locked
+ <br />
+	0x02: (name=l1d) Counts the number of cycles that cacheline in the L1 data cache unit is locked
+ <br />
+</td>
+
+</tr>
+
+<tr><td>IO_TRANSACTIONS</td><td>	Counts the number of completed I/O transactions. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>L1I</td><td>	Counts L1i instruction cache accesses. </td><td> all</td><td>
+	0x01: (name=hits) Counts all instruction fetches that hit the L1 instruction cache
+ <br />
+	0x02: (name=misses) Counts all instruction fetches that miss the L1I cache
+ <br />
+	0x03: (name=reads) Counts all instruction fetches, including uncacheable fetches that bypass the L1I
+ <br />
+	0x04: (name=cycles_stalled) Cycle counts for which an instruction fetch stalls due to a L1I cache miss, ITLB miss or ITLB fault
+ <br />
+</td>
+
+</tr>
+
+<tr><td>IFU_IVC</td><td>	Instruction Fetch unit events </td><td> all</td><td>
+	0x01: (name=full) Instruction Fetche unit victim cache full
+ <br />
+	0x02: (name=l1i_eviction) L1 Instruction cache evictions
+ <br />
+</td>
+
+</tr>
+
+<tr><td>LARGE_ITLB</td><td>	Counts number of large ITLB accesses </td><td> all</td><td>
+	0x01: (name=hit) Counts number of large ITLB hits
+ <br />
+</td>
+
+</tr>
+
+<tr><td>L1I_OPPORTUNISTIC_HITS</td><td>	Opportunistic hits in streaming. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>ITLB_MISSES</td><td>	Counts the number of ITLB misses in various variants </td><td> all</td><td>
+	0x01: (name=any) Counts the number of misses in all levels of the ITLB which causes a page walk
+ <br />
+	0x02: (name=walk_completed) Counts number of misses in all levels of the ITLB which resulted in a completed page walk
+ <br />
+	0x04: (name=walk_cycles) Counts ITLB miss page walk cycles
+ <br />
+	0x04: (name=pmh_busy_cycles) Counts PMH busy cycles
+ <br />
+	0x10: (name=stlb_hit) Counts the number of ITLB misses that hit in the second level TLB
+ <br />
+	0x20: (name=pde_miss) Number of ITLB misses where the low part of the linear to physical address translation was missed
+ <br />
+	0x40: (name=pdp_miss) Number of ITLB misses where the high part of the linear to physical address translation was missed
+ <br />
+	0x80: (name=large_walk_completed) Counts number of completed large page walks due to misses in the STLB
+ <br />
+</td>
+
+</tr>
+
+<tr><td>ILD_STALL</td><td>	Cycles Instruction Length Decoder stalls </td><td> all</td><td>
+	0x01: (name=lcp) Cycles Instruction Length Decoder stalls due to length changing prefixes: 66, 67 or REX
+ <br />
+	0x02: (name=mru) Instruction Length Decoder stall cycles due to Brand Prediction Unit (PBU) Most Recently Used (MRU) bypass
+ <br />
+	0x04: (name=iq_full) Stall cycles due to a full instruction queue
+ <br />
+	0x08: (name=regen) Counts the number of regen stalls
  <br />
 	0x0f: any Counts any cycles the Instruction Length Decoder is stalled
  <br />
@@ -653,24 +653,24 @@
 
 </tr>
 
-<tr><td>BR_INST_EXEC</td><td>	Counts the number of near branch instructions executed, but not necessarily retired. </td><td> 0, 1, 2, 3</td><td>
-	0x01: cond Counts the number of conditional near branch instructions executed, but not necessarily retired
- <br />
-	0x02: direct Counts all unconditional near branch instructions excluding calls and indirect branches
- <br />
-	0x04: indirect_non_call Counts the number of executed indirect near branch instructions that are not calls
- <br />
-	0x07: non_calls Counts all non call near branch instructions executed, but not necessarily retired
- <br />
-	0x08: return_near Counts indirect near branches that have a return mnemonic
- <br />
-	0x10: direct_near_call Counts unconditional near call branch instructions, excluding non call branch, executed
- <br />
-	0x20: indirect_near_call Counts indirect near calls, including both register and memory indirect, executed
- <br />
-	0x30: near_calls Counts all near call branches executed, but not necessarily retired
- <br />
-	0x40: taken Counts taken near branches executed, but not necessarily retired
+<tr><td>BR_INST_EXEC</td><td>	Counts the number of near branch instructions executed, but not necessarily retired. </td><td> all</td><td>
+	0x01: (name=cond) Counts the number of conditional near branch instructions executed, but not necessarily retired
+ <br />
+	0x02: (name=direct) Counts all unconditional near branch instructions excluding calls and indirect branches
+ <br />
+	0x04: (name=indirect_non_call) Counts the number of executed indirect near branch instructions that are not calls
+ <br />
+	0x07: (name=non_calls) Counts all non call near branch instructions executed, but not necessarily retired
+ <br />
+	0x08: (name=return_near) Counts indirect near branches that have a return mnemonic
+ <br />
+	0x10: (name=direct_near_call) Counts unconditional near call branch instructions, excluding non call branch, executed
+ <br />
+	0x20: (name=indirect_near_call) Counts indirect near calls, including both register and memory indirect, executed
+ <br />
+	0x30: (name=near_calls) Counts all near call branches executed, but not necessarily retired
+ <br />
+	0x40: (name=taken) Counts taken near branches executed, but not necessarily retired
  <br />
 	0x7f: any Counts all near executed branches (not necessarily retired)
  <br />
@@ -678,24 +678,24 @@
 
 </tr>
 
-<tr><td>BR_MISP_EXEC</td><td>	Counts the number of mispredicted conditional near branch instructions executed, but not necessarily retired. </td><td> 0, 1, 2, 3</td><td>
-	0x01: cond Counts the number of mispredicted conditional near branch instructions executed, but not necessarily retired
- <br />
-	0x02: direct Counts mispredicted macro unconditional near branch instructions, excluding calls and indirect branches (should always be 0)
- <br />
-	0x04: indirect_non_call Counts the number of executed mispredicted indirect near branch instructions that are not calls
- <br />
-	0x07: non_calls Counts mispredicted non call near branches executed, but not necessarily retired
- <br />
-	0x08: return_near Counts mispredicted indirect branches that have a rear return mnemonic
- <br />
-	0x10: direct_near_call Counts mispredicted non-indirect near calls executed, (should always be 0)
- <br />
-	0x20: indirect_near_call Counts mispredicted indirect near calls exeucted, including both register and memory indirect
- <br />
-	0x30: near_calls Counts all mispredicted near call branches executed, but not necessarily retired
- <br />
-	0x40: taken Counts executed mispredicted near branches that are taken, but not necessarily retired
+<tr><td>BR_MISP_EXEC</td><td>	Counts the number of mispredicted conditional near branch instructions executed, but not necessarily retired. </td><td> all</td><td>
+	0x01: (name=cond) Counts the number of mispredicted conditional near branch instructions executed, but not necessarily retired
+ <br />
+	0x02: (name=direct) Counts mispredicted macro unconditional near branch instructions, excluding calls and indirect branches (should always be 0)
+ <br />
+	0x04: (name=indirect_non_call) Counts the number of executed mispredicted indirect near branch instructions that are not calls
+ <br />
+	0x07: (name=non_calls) Counts mispredicted non call near branches executed, but not necessarily retired
+ <br />
+	0x08: (name=return_near) Counts mispredicted indirect branches that have a rear return mnemonic
+ <br />
+	0x10: (name=direct_near_call) Counts mispredicted non-indirect near calls executed, (should always be 0)
+ <br />
+	0x20: (name=indirect_near_call) Counts mispredicted indirect near calls exeucted, including both register and memory indirect
+ <br />
+	0x30: (name=near_calls) Counts all mispredicted near call branches executed, but not necessarily retired
+ <br />
+	0x40: (name=taken) Counts executed mispredicted near branches that are taken, but not necessarily retired
  <br />
 	0x7f: any Counts the number of mispredicted near branch instructions that were executed, but not necessarily retired
  <br />
@@ -703,244 +703,244 @@
 
 </tr>
 
-<tr><td>RESOURCE_STALLS</td><td>	Counts the number of Allocator resource related stalls. Includes register renaming buffer entries, memory buffer entries. In addition to resource related stalls, this event counts some other events. Includes stalls arising during branch misprediction recovery, such as if retirement of the mispredicted branch is delayed and stalls arising while store buffer is draining from synchronizing operations. </td><td> 0, 1, 2, 3</td><td>
-	0x01: any Counts the number of Allocator resource related stalls
- <br />
-	0x02: load Counts the cycles of stall due to lack of load buffer for load operation
- <br />
-	0x04: rs_full This event counts the number of cycles when the number of instructions in the pipeline waiting for execution reaches the limit the processor can handle
- <br />
-	0x08: store This event counts the number of cycles that a resource related stall will occur due to the number of store instructions reaching the limit of the pipeline, (i
- <br />
-	0x10: rob_full Counts the cycles of stall due to reorder buffer full
- <br />
-	0x20: fpcw Counts the number of cycles while execution was stalled due to writing the floating-point unit (FPU) control word
- <br />
-	0x40: mxcsr Stalls due to the MXCSR register rename occurring to close to a previous MXCSR rename
- <br />
-	0x80: other Counts the number of cycles while execution was stalled due to other resource issues
- <br />
-</td>
-
-</tr>
-
-<tr><td>MACRO_INSTS</td><td>	Counts the number of instructions decoded that are macro-fused but not necessarily executed or retired. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>BACLEAR_FORCE_IQ</td><td>	Counts number of times a BACLEAR was forced by the Instruction Queue. The IQ is also responsible for providing conditional branch prediciton direction based on a static scheme and dynamic data provided by the L2 Branch Prediction Unit. If the conditional branch target is not found in the Target Array and the IQ predicts that the branch is taken, then the IQ will force the Branch Address Calculator to issue a BACLEAR. Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble in the instruction fetch pipeline. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>LSD</td><td>	Counts the number of micro-ops delivered by loop stream detector </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>ITLB_FLUSH</td><td>	Counts the number of ITLB flushes </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>OFFCORE_REQUESTS</td><td>	Counts number of offcore data requests. </td><td> 0, 1, 2, 3</td><td>
-	0x01: demand_read_data Counts number of offcore demand data read requests
- <br />
-	0x02: demand_read_code Counts number of offcore demand code read requests
- <br />
-	0x04: demand_rfo Counts number of offcore demand RFO requests
- <br />
-	0x08: any_read Counts number of offcore read requests
- <br />
-	0x10: any_rfo Counts number of offcore RFO requests
- <br />
-	0x20: uncached_mem Counts number of offcore uncached memory requests
- <br />
-	0x40: l1d_writeback Counts number of L1D writebacks to the uncore
- <br />
-	0x80: any Counts all offcore requests
- <br />
-</td>
-
-</tr>
-
-<tr><td>UOPS_EXECUTED</td><td>	Counts number of Uops executed that were issued on various ports </td><td> 0, 1, 2, 3</td><td>
-	0x01: port0 Counts number of Uops executed that were issued on port 0
- <br />
-	0x02: port1 Counts number of Uops executed that were issued on port 1
- <br />
-	0x04: port2_core Counts number of Uops executed that were issued on port 2
- <br />
-	0x08: port3_core Counts number of Uops executed that were issued on port 3
- <br />
-	0x10: port4_core Counts number of Uops executed that where issued on port 4
- <br />
-	0x20: port5 Counts number of Uops executed that where issued on port 5
- <br />
-	0x40: port015 Counts number of Uops executed that where issued on port 0, 1, or 5
- <br />
-	0x80: port234 Counts number of Uops executed that where issued on port 2, 3, or 4
- <br />
-</td>
-
-</tr>
-
-<tr><td>OFFCORE_REQUESTS_SQ_FULL</td><td>	Counts number of cycles the SQ is full to handle off-core requests. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>SNOOPQ_REQUESTS_OUTSTANDING</td><td>	Counts weighted cycles of snoopq requests. </td><td> 0, 1, 2, 3</td><td>
-	0x01: data Counts weighted cycles of snoopq requests for data
- <br />
-	0x02: invalidate Counts weighted cycles of snoopq invalidate requests
- <br />
-	0x04: code Counts weighted cycles of snoopq requests for code
- <br />
-</td>
-
-</tr>
-
-<tr><td>OOF_CORE_RESPONSE_0</td><td>	Off-core Response Performance Monitoring in the Processor Core. Requires special setup. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>SNOOP_RESPONSE</td><td>	Counts HIT snoop response sent by this thread in response to a snoop request. </td><td> 0, 1, 2, 3</td><td>
-	0x01: hit Counts HIT snoop response sent by this thread in response to a snoop request
- <br />
-	0x02: hite Counts HIT E snoop response sent by this thread in response to a snoop request
- <br />
-	0x04: hitm Counts HIT M snoop response sent by this thread in response to a snoop request
- <br />
-</td>
-
-</tr>
-
-<tr><td>PIC_ACCESSES</td><td>	Counts number of TPR accesses </td><td> 0, 1, 2, 3</td><td>
-	0x01: tpr_reads Counts number of TPR reads
- <br />
-	0x02: tpr_writes Counts number of TPR writes
- <br />
-</td>
-
-</tr>
-
-<tr><td>UOPS_RETIRED</td><td>	Counts the number of micro-ops retired, (macro-fused=1, micro-fused=2, others=1; maximum count of 8 per cycle). Most instructions are composed of one or two microops. Some instructions are decoded into longer sequences such as repeat instructions, floating point transcendental instructions, and assists </td><td> 0, 1, 2, 3</td><td>
-	0x01: any Counts the number of micro-ops retired, (macro-fused=1, micro-fused=2, others=1; maximum count of 8 per cycle)
- <br />
-	0x02: retire_slots Counts the number of retirement slots used each cycle
- <br />
-	0x04: macro_fused Counts number of macro-fused uops retired
- <br />
-</td>
-
-</tr>
-
-<tr><td>MACHINE_CLEARS</td><td>	Counts the cycles machine clear is asserted. </td><td> 0, 1, 2, 3</td><td>
-	0x01: cycles Counts the cycles machine clear is asserted
- <br />
-	0x02: mem_order Counts the number of machine clears due to memory order conflicts
- <br />
-	0x04: smc Counts the number of times that a program writes to a code section
- <br />
-	0x10: fusion_assist Counts the number of macro-fusion assists
- <br />
-</td>
-
-</tr>
-
-<tr><td>SSEX_UOPS_RETIRED</td><td>	Counts SIMD packed single-precision floating point Uops retired. </td><td> 0, 1, 2, 3</td><td>
-	0x01: packed_single Counts SIMD packed single-precision floating point Uops retired
- <br />
-	0x02: scalar_single Counts SIMD calar single-precision floating point Uops retired
- <br />
-	0x04: packed_double Counts SIMD packed double-precision floating point Uops retired
- <br />
-	0x08: scalar_double Counts SIMD scalar double-precision floating point Uops retired
- <br />
-	0x10: vector_integer Counts 128-bit SIMD vector integer Uops retired
- <br />
-</td>
-
-</tr>
-
-<tr><td>ITLB_MISS_RETIRED</td><td>	Counts the number of retired instructions that missed the ITLB when the instruction was fetched. </td><td> 0, 1, 2, 3</td><td>
+<tr><td>RESOURCE_STALLS</td><td>	Counts the number of Allocator resource related stalls. Includes register renaming buffer entries, memory buffer entries. In addition to resource related stalls, this event counts some other events. Includes stalls arising during branch misprediction recovery, such as if retirement of the mispredicted branch is delayed and stalls arising while store buffer is draining from synchronizing operations. </td><td> all</td><td>
+	0x01: (name=any) Counts the number of Allocator resource related stalls
+ <br />
+	0x02: (name=load) Counts the cycles of stall due to lack of load buffer for load operation
+ <br />
+	0x04: (name=rs_full) This event counts the number of cycles when the number of instructions in the pipeline waiting for execution reaches the limit the processor can handle
+ <br />
+	0x08: (name=store) This event counts the number of cycles that a resource related stall will occur due to the number of store instructions reaching the limit of the pipeline, (i
+ <br />
+	0x10: (name=rob_full) Counts the cycles of stall due to reorder buffer full
+ <br />
+	0x20: (name=fpcw) Counts the number of cycles while execution was stalled due to writing the floating-point unit (FPU) control word
+ <br />
+	0x40: (name=mxcsr) Stalls due to the MXCSR register rename occurring to close to a previous MXCSR rename
+ <br />
+	0x80: (name=other) Counts the number of cycles while execution was stalled due to other resource issues
+ <br />
+</td>
+
+</tr>
+
+<tr><td>MACRO_INSTS</td><td>	Counts the number of instructions decoded that are macro-fused but not necessarily executed or retired. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>BACLEAR_FORCE_IQ</td><td>	Counts number of times a BACLEAR was forced by the Instruction Queue. The IQ is also responsible for providing conditional branch prediciton direction based on a static scheme and dynamic data provided by the L2 Branch Prediction Unit. If the conditional branch target is not found in the Target Array and the IQ predicts that the branch is taken, then the IQ will force the Branch Address Calculator to issue a BACLEAR. Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble in the instruction fetch pipeline. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>LSD</td><td>	Counts the number of micro-ops delivered by loop stream detector </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>ITLB_FLUSH</td><td>	Counts the number of ITLB flushes </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>OFFCORE_REQUESTS</td><td>	Counts number of offcore data requests. </td><td> all</td><td>
+	0x01: (name=demand_read_data) Counts number of offcore demand data read requests
+ <br />
+	0x02: (name=demand_read_code) Counts number of offcore demand code read requests
+ <br />
+	0x04: (name=demand_rfo) Counts number of offcore demand RFO requests
+ <br />
+	0x08: (name=any_read) Counts number of offcore read requests
+ <br />
+	0x10: (name=any_rfo) Counts number of offcore RFO requests
+ <br />
+	0x20: (name=uncached_mem) Counts number of offcore uncached memory requests
+ <br />
+	0x40: (name=l1d_writeback) Counts number of L1D writebacks to the uncore
+ <br />
+	0x80: (name=any) Counts all offcore requests
+ <br />
+</td>
+
+</tr>
+
+<tr><td>UOPS_EXECUTED</td><td>	Counts number of Uops executed that were issued on various ports </td><td> all</td><td>
+	0x01: (name=port0) Counts number of Uops executed that were issued on port 0
+ <br />
+	0x02: (name=port1) Counts number of Uops executed that were issued on port 1
+ <br />
+	0x04: (name=port2_core) Counts number of Uops executed that were issued on port 2
+ <br />
+	0x08: (name=port3_core) Counts number of Uops executed that were issued on port 3
+ <br />
+	0x10: (name=port4_core) Counts number of Uops executed that where issued on port 4
+ <br />
+	0x20: (name=port5) Counts number of Uops executed that where issued on port 5
+ <br />
+	0x40: (name=port015) Counts number of Uops executed that where issued on port 0, 1, or 5
+ <br />
+	0x80: (name=port234) Counts number of Uops executed that where issued on port 2, 3, or 4
+ <br />
+</td>
+
+</tr>
+
+<tr><td>OFFCORE_REQUESTS_SQ_FULL</td><td>	Counts number of cycles the SQ is full to handle off-core requests. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>SNOOPQ_REQUESTS_OUTSTANDING</td><td>	Counts weighted cycles of snoopq requests. </td><td> all</td><td>
+	0x01: (name=data) Counts weighted cycles of snoopq requests for data
+ <br />
+	0x02: (name=invalidate) Counts weighted cycles of snoopq invalidate requests
+ <br />
+	0x04: (name=code) Counts weighted cycles of snoopq requests for code
+ <br />
+</td>
+
+</tr>
+
+<tr><td>OOF_CORE_RESPONSE_0</td><td>	Off-core Response Performance Monitoring in the Processor Core. Requires special setup. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>SNOOP_RESPONSE</td><td>	Counts HIT snoop response sent by this thread in response to a snoop request. </td><td> all</td><td>
+	0x01: (name=hit) Counts HIT snoop response sent by this thread in response to a snoop request
+ <br />
+	0x02: (name=hite) Counts HIT E snoop response sent by this thread in response to a snoop request
+ <br />
+	0x04: (name=hitm) Counts HIT M snoop response sent by this thread in response to a snoop request
+ <br />
+</td>
+
+</tr>
+
+<tr><td>PIC_ACCESSES</td><td>	Counts number of TPR accesses </td><td> all</td><td>
+	0x01: (name=tpr_reads) Counts number of TPR reads
+ <br />
+	0x02: (name=tpr_writes) Counts number of TPR writes
+ <br />
+</td>
+
+</tr>
+
+<tr><td>UOPS_RETIRED</td><td>	Counts the number of micro-ops retired, (macro-fused=1, micro-fused=2, others=1; maximum count of 8 per cycle). Most instructions are composed of one or two microops. Some instructions are decoded into longer sequences such as repeat instructions, floating point transcendental instructions, and assists </td><td> all</td><td>
+	0x01: (name=any) Counts the number of micro-ops retired, (macro-fused=1, micro-fused=2, others=1; maximum count of 8 per cycle)
+ <br />
+	0x02: (name=retire_slots) Counts the number of retirement slots used each cycle
+ <br />
+	0x04: (name=macro_fused) Counts number of macro-fused uops retired
+ <br />
+</td>
+
+</tr>
+
+<tr><td>MACHINE_CLEARS</td><td>	Counts the cycles machine clear is asserted. </td><td> all</td><td>
+	0x01: (name=cycles) Counts the cycles machine clear is asserted
+ <br />
+	0x02: (name=mem_order) Counts the number of machine clears due to memory order conflicts
+ <br />
+	0x04: (name=smc) Counts the number of times that a program writes to a code section
+ <br />
+	0x10: (name=fusion_assist) Counts the number of macro-fusion assists
+ <br />
+</td>
+
+</tr>
+
+<tr><td>SSEX_UOPS_RETIRED</td><td>	Counts SIMD packed single-precision floating point Uops retired. </td><td> all</td><td>
+	0x01: (name=packed_single) Counts SIMD packed single-precision floating point Uops retired
+ <br />
+	0x02: (name=scalar_single) Counts SIMD calar single-precision floating point Uops retired
+ <br />
+	0x04: (name=packed_double) Counts SIMD packed double-precision floating point Uops retired
+ <br />
+	0x08: (name=scalar_double) Counts SIMD scalar double-precision floating point Uops retired
+ <br />
+	0x10: (name=vector_integer) Counts 128-bit SIMD vector integer Uops retired
+ <br />
+</td>
+
+</tr>
+
+<tr><td>ITLB_MISS_RETIRED</td><td>	Counts the number of retired instructions that missed the ITLB when the instruction was fetched. </td><td> all</td><td>
 	0x20: No unit mask
  <br />
 </td>
 
 </tr>
 
-<tr><td>MEM_LOAD_RETIRED</td><td>	Counts number of retired loads. </td><td> 0, 1, 2, 3</td><td>
-	0x01: l1d_hit Counts number of retired loads that hit the L1 data cache
- <br />
-	0x02: l2_hit Counts number of retired loads that hit the L2 data cache
- <br />
-	0x04: llc_unshared_hit Counts number of retired loads that hit their own, unshared lines in the LLC cache
- <br />
-	0x08: other_core_l2_hit_hitm Counts number of retired loads that hit in a sibling core's L2 (on die core)
- <br />
-	0x10: llc_miss Counts number of retired loads that miss the LLC cache
- <br />
-	0x40: hit_lfb Counts number of retired loads that miss the L1D and the address is located in an allocated line fill buffer and will soon be committed to cache
- <br />
-	0x80: dtlb_miss Counts the number of retired loads that missed the DTLB
- <br />
-</td>
-
-</tr>
-
-<tr><td>FP_MMX_TRANS</td><td>	Counts transitions between MMX and x87 state. </td><td> 0, 1, 2, 3</td><td>
-	0x01: to_fp Counts the first floating-point instruction following any MMX instruction
- <br />
-	0x02: to_mmx Counts the first MMX instruction following a floating-point instruction
- <br />
-	0x03: any Counts all transitions from floating point to MMX instructions and from MMX instructions to floating point instructions
- <br />
-</td>
-
-</tr>
-
-<tr><td>MACRO_INSTS</td><td>	Counts the number of instructions decoded, (but not necessarily executed or retired). </td><td> 0, 1, 2, 3</td><td>
-	0x01: decoded Counts the number of instructions decoded, (but not necessarily executed or retired)
- <br />
-</td>
-
-</tr>
-
-<tr><td>UOPS_DECODED</td><td>	Counts the number of Uops decoded by various subsystems. </td><td> 0, 1, 2, 3</td><td>
-	0x02: ms Counts the number of Uops decoded by the Microcode Sequencer, MS
- <br />
-	0x04: esp_folding Counts number of stack pointer (ESP) instructions decoded: push , pop , call , ret, etc
- <br />
-	0x08: esp_sync Counts number of stack pointer (ESP) sync operations where an ESP instruction is corrected by adding the ESP offset register to the current value of the ESP register
- <br />
-</td>
-
-</tr>
-
-<tr><td>RAT_STALLS</td><td>	Counts the number of cycles during which execution stalled due to several reason </td><td> 0, 1, 2, 3</td><td>
-	0x01: flags Counts the number of cycles during which execution stalled due to several reasons, one of which is a partial flag register stall
- <br />
-	0x02: registers This event counts the number of cycles instruction execution latency became longer than the defined latency because the instruction used a register that was partially written by previous instruction
- <br />
-	0x04: rob_read_port Counts the number of cycles when ROB read port stalls occurred, which did not allow new micro-ops to enter the out-of-order pipeline
- <br />
-	0x08: scoreboard Counts the cycles where we stall due to microarchitecturally required serialization
+<tr><td>MEM_LOAD_RETIRED</td><td>	Counts number of retired loads. </td><td> all</td><td>
+	0x01: (name=l1d_hit) Counts number of retired loads that hit the L1 data cache
+ <br />
+	0x02: (name=l2_hit) Counts number of retired loads that hit the L2 data cache
+ <br />
+	0x04: (name=llc_unshared_hit) Counts number of retired loads that hit their own, unshared lines in the LLC cache
+ <br />
+	0x08: (name=other_core_l2_hit_hitm) Counts number of retired loads that hit in a sibling core's L2 (on die core)
+ <br />
+	0x10: (name=llc_miss) Counts number of retired loads that miss the LLC cache
+ <br />
+	0x40: (name=hit_lfb) Counts number of retired loads that miss the L1D and the address is located in an allocated line fill buffer and will soon be committed to cache
+ <br />
+	0x80: (name=dtlb_miss) Counts the number of retired loads that missed the DTLB
+ <br />
+</td>
+
+</tr>
+
+<tr><td>FP_MMX_TRANS</td><td>	Counts transitions between MMX and x87 state. </td><td> all</td><td>
+	0x01: (name=to_fp) Counts the first floating-point instruction following any MMX instruction
+ <br />
+	0x02: (name=to_mmx) Counts the first MMX instruction following a floating-point instruction
+ <br />
+	0x03: (name=any) Counts all transitions from floating point to MMX instructions and from MMX instructions to floating point instructions
+ <br />
+</td>
+
+</tr>
+
+<tr><td>MACRO_INSTS</td><td>	Counts the number of instructions decoded, (but not necessarily executed or retired). </td><td> all</td><td>
+	0x01: (name=decoded) Counts the number of instructions decoded, (but not necessarily executed or retired)
+ <br />
+</td>
+
+</tr>
+
+<tr><td>UOPS_DECODED</td><td>	Counts the number of Uops decoded by various subsystems. </td><td> all</td><td>
+	0x02: (name=ms) Counts the number of Uops decoded by the Microcode Sequencer, MS
+ <br />
+	0x04: (name=esp_folding) Counts number of stack pointer (ESP) instructions decoded: push , pop , call , ret, etc
+ <br />
+	0x08: (name=esp_sync) Counts number of stack pointer (ESP) sync operations where an ESP instruction is corrected by adding the ESP offset register to the current value of the ESP register
+ <br />
+</td>
+
+</tr>
+
+<tr><td>RAT_STALLS</td><td>	Counts the number of cycles during which execution stalled due to several reason </td><td> all</td><td>
+	0x01: (name=flags) Counts the number of cycles during which execution stalled due to several reasons, one of which is a partial flag register stall
+ <br />
+	0x02: (name=registers) This event counts the number of cycles instruction execution latency became longer than the defined latency because the instruction used a register that was partially written by previous instruction
+ <br />
+	0x04: (name=rob_read_port) Counts the number of cycles when ROB read port stalls occurred, which did not allow new micro-ops to enter the out-of-order pipeline
+ <br />
+	0x08: (name=scoreboard) Counts the cycles where we stall due to microarchitecturally required serialization
  <br />
 	0x0f: any Counts all Register Allocation Table stall cycles due to: Cycles when ROB read port stalls occurred, which did not allow new micro-ops to enter the execution pipe
  <br />
@@ -948,108 +948,108 @@
 
 </tr>
 
-<tr><td>SEG_RENAME_STALLS</td><td>	Counts the number of stall cycles due to the lack of renaming resources for the ES, DS, FS, and GS segment registers. If a segment is renamed but not retired and a second update to the same segment occurs, a stall occurs in the front-end of the pipeline until the renamed segment retires. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>ES_REG_RENAMES</td><td>	Counts the number of times the ES segment register is renamed. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>UOP_UNFUSION</td><td>	Counts unfusion events due to floating point exception to a fused uop. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>BR_INST_DECODED</td><td>	Counts the number of branch instructions decoded. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>BOGUS_BR</td><td>	Counts the number of bogus branches. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>BPU_MISSED_CALL_RET</td><td>	Counts number of times the Branch Prediciton Unit missed predicting a call or return branch. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>BACLEAR</td><td>	Counts the number of times the front end is resteered, </td><td> 0, 1, 2, 3</td><td>
-	0x01: clear Counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end
- <br />
-	0x02: bad_target Counts number of Branch Address Calculator clears (BACLEAR) asserted due to conditional branch instructions in which there was a target hit but the direction was wrong
- <br />
-</td>
-
-</tr>
-
-<tr><td>BPU_CLEARS</td><td>	Counts Branch Prediction Unit clears. </td><td> 0, 1, 2, 3</td><td>
-	0x01: early Counts early (normal) Branch Prediction Unit clears: BPU predicted a taken branch after incorrectly assuming that it was not taken
- <br />
-	0x02: late Counts late Branch Prediction Unit clears due to Most Recently Used conflicts
- <br />
-	0x03: any Counts all BPU clears
- <br />
-</td>
-
-</tr>
-
-<tr><td>L2_TRANSACTIONS</td><td>	Counts L2 transactions </td><td> 0, 1, 2, 3</td><td>
-	0x01: load Counts L2 load operations due to HW prefetch or demand loads
- <br />
-	0x02: rfo Counts L2 RFO operations due to HW prefetch or demand RFOs
- <br />
-	0x04: ifetch Counts L2 instruction fetch operations due to HW prefetch or demand ifetch
- <br />
-	0x08: prefetch Counts L2 prefetch operations
- <br />
-	0x10: l1d_wb Counts L1D writeback operations to the L2
- <br />
-	0x20: fill Counts L2 cache line fill operations due to load, RFO, L1D writeback or prefetch
- <br />
-	0x40: wb Counts L2 writeback operations to the LLC
- <br />
-	0x80: any Counts all L2 cache operations
- <br />
-</td>
-
-</tr>
-
-<tr><td>L2_LINES_IN</td><td>	Counts the number of cache lines allocated in the L2 cache in various states. </td><td> 0, 1, 2, 3</td><td>
-	0x02: s_state Counts the number of cache lines allocated in the L2 cache in the S (shared) state
- <br />
-	0x04: e_state Counts the number of cache lines allocated in the L2 cache in the E (exclusive) state
- <br />
-	0x07: any Counts the number of cache lines allocated in the L2 cache
- <br />
-</td>
-
-</tr>
-
-<tr><td>L2_LINES_OUT</td><td>	Counts L2 cache lines evicted. </td><td> 0, 1, 2, 3</td><td>
-	0x01: demand_clean Counts L2 clean cache lines evicted by a demand request
- <br />
-	0x02: demand_dirty Counts L2 dirty (modified) cache lines evicted by a demand request
- <br />
-	0x04: prefetch_clean Counts L2 clean cache line evicted by a prefetch request
- <br />
-	0x08: prefetch_dirty Counts L2 modified cache line evicted by a prefetch request
+<tr><td>SEG_RENAME_STALLS</td><td>	Counts the number of stall cycles due to the lack of renaming resources for the ES, DS, FS, and GS segment registers. If a segment is renamed but not retired and a second update to the same segment occurs, a stall occurs in the front-end of the pipeline until the renamed segment retires. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>ES_REG_RENAMES</td><td>	Counts the number of times the ES segment register is renamed. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>UOP_UNFUSION</td><td>	Counts unfusion events due to floating point exception to a fused uop. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>BR_INST_DECODED</td><td>	Counts the number of branch instructions decoded. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>BOGUS_BR</td><td>	Counts the number of bogus branches. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>BPU_MISSED_CALL_RET</td><td>	Counts number of times the Branch Prediciton Unit missed predicting a call or return branch. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>BACLEAR</td><td>	Counts the number of times the front end is resteered, </td><td> all</td><td>
+	0x01: (name=clear) Counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end
+ <br />
+	0x02: (name=bad_target) Counts number of Branch Address Calculator clears (BACLEAR) asserted due to conditional branch instructions in which there was a target hit but the direction was wrong
+ <br />
+</td>
+
+</tr>
+
+<tr><td>BPU_CLEARS</td><td>	Counts Branch Prediction Unit clears. </td><td> all</td><td>
+	0x01: (name=early) Counts early (normal) Branch Prediction Unit clears: BPU predicted a taken branch after incorrectly assuming that it was not taken
+ <br />
+	0x02: (name=late) Counts late Branch Prediction Unit clears due to Most Recently Used conflicts
+ <br />
+	0x03: (name=any) Counts all BPU clears
+ <br />
+</td>
+
+</tr>
+
+<tr><td>L2_TRANSACTIONS</td><td>	Counts L2 transactions </td><td> all</td><td>
+	0x01: (name=load) Counts L2 load operations due to HW prefetch or demand loads
+ <br />
+	0x02: (name=rfo) Counts L2 RFO operations due to HW prefetch or demand RFOs
+ <br />
+	0x04: (name=ifetch) Counts L2 instruction fetch operations due to HW prefetch or demand ifetch
+ <br />
+	0x08: (name=prefetch) Counts L2 prefetch operations
+ <br />
+	0x10: (name=l1d_wb) Counts L1D writeback operations to the L2
+ <br />
+	0x20: (name=fill) Counts L2 cache line fill operations due to load, RFO, L1D writeback or prefetch
+ <br />
+	0x40: (name=wb) Counts L2 writeback operations to the LLC
+ <br />
+	0x80: (name=any) Counts all L2 cache operations
+ <br />
+</td>
+
+</tr>
+
+<tr><td>L2_LINES_IN</td><td>	Counts the number of cache lines allocated in the L2 cache in various states. </td><td> all</td><td>
+	0x02: (name=s_state) Counts the number of cache lines allocated in the L2 cache in the S (shared) state
+ <br />
+	0x04: (name=e_state) Counts the number of cache lines allocated in the L2 cache in the E (exclusive) state
+ <br />
+	0x07: (name=any) Counts the number of cache lines allocated in the L2 cache
+ <br />
+</td>
+
+</tr>
+
+<tr><td>L2_LINES_OUT</td><td>	Counts L2 cache lines evicted. </td><td> all</td><td>
+	0x01: (name=demand_clean) Counts L2 clean cache lines evicted by a demand request
+ <br />
+	0x02: (name=demand_dirty) Counts L2 dirty (modified) cache lines evicted by a demand request
+ <br />
+	0x04: (name=prefetch_clean) Counts L2 clean cache line evicted by a prefetch request
+ <br />
+	0x08: (name=prefetch_dirty) Counts L2 modified cache line evicted by a prefetch request
  <br />
 	0x0f: any Counts all L2 cache lines evicted for any reason
  <br />
@@ -1057,79 +1057,79 @@
 
 </tr>
 
-<tr><td>L2_HW_PREFETCH</td><td>	Count L2 HW prefetcher events </td><td> 0, 1, 2, 3</td><td>
-	0x01: hit Count L2 HW prefetcher detector hits
- <br />
-	0x02: alloc Count L2 HW prefetcher allocations
- <br />
-	0x04: data_trigger Count L2 HW data prefetcher triggered
- <br />
-	0x08: code_trigger Count L2 HW code prefetcher triggered
- <br />
-	0x10: dca_trigger Count L2 HW DCA prefetcher triggered
- <br />
-	0x20: kick_start Count L2 HW prefetcher kick started
- <br />
-</td>
-
-</tr>
-
-<tr><td>SQ_MISC</td><td>	Counts events in the Super Queue below the L2. </td><td> 0, 1, 2, 3</td><td>
-	0x01: promotion Counts the number of L2 secondary misses that hit the Super Queue
- <br />
-	0x02: promotion_post_go Counts the number of L2 secondary misses during the Super Queue filling L2
- <br />
-	0x04: lru_hints Counts number of Super Queue LRU hints sent to L3
- <br />
-	0x08: fill_dropped Counts the number of SQ L2 fills dropped due to L2 busy
- <br />
-	0x10: split_lock Counts the number of SQ lock splits across a cache line
- <br />
-</td>
-
-</tr>
-
-<tr><td>SQ_FULL_STALL_CYCLES</td><td>	Counts cycles the Super Queue is full. Neither of the threads on this core will be able to access the uncore. </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>FP_ASSIST</td><td>	Counts the number of floating point operations executed that required micro-code assist intervention. </td><td> 0, 1, 2, 3</td><td>
-	0x01: all Counts the number of floating point operations executed that required micro-code assist intervention
- <br />
-	0x02: output Counts number of floating point micro-code assist when the output value (destination register) is invalid
- <br />
-	0x04: input Counts number of floating point micro-code assist when the input value (one of the source operands to an FP instruction) is invalid
- <br />
-</td>
-
-</tr>
-
-<tr><td>SEGMENT_REG_LOADS</td><td>	Counts number of segment register loads </td><td> 0, 1, 2, 3</td><td>
-	0x01: No unit mask
- <br />
-</td>
-
-</tr>
-
-<tr><td>SIMD_INT_64</td><td>	Counts number of SID integer 64 bit packed multiply operations. </td><td> 0, 1, 2, 3</td><td>
-	0x01: packed_mpy Counts number of SID integer 64 bit packed multiply operations
- <br />
-	0x02: packed_shift Counts number of SID integer 64 bit packed shift operations
- <br />
-	0x04: pack Counts number of SID integer 64 bit pack operations
- <br />
-	0x08: unpack Counts number of SID integer 64 bit unpack operations
- <br />
-	0x10: packed_logical Counts number of SID integer 64 bit logical operations
- <br />
-	0x20: packed_arith Counts number of SID integer 64 bit arithmetic operations
- <br />
-	0x40: shuffle_move Counts number of SID integer 64 bit shift or move operations
- <br />
-</td>
-
-</tr>
-
+<tr><td>L2_HW_PREFETCH</td><td>	Count L2 HW prefetcher events </td><td> all</td><td>
+	0x01: (name=hit) Count L2 HW prefetcher detector hits
+ <br />
+	0x02: (name=alloc) Count L2 HW prefetcher allocations
+ <br />
+	0x04: (name=data_trigger) Count L2 HW data prefetcher triggered
+ <br />
+	0x08: (name=code_trigger) Count L2 HW code prefetcher triggered
+ <br />
+	0x10: (name=dca_trigger) Count L2 HW DCA prefetcher triggered
+ <br />
+	0x20: (name=kick_start) Count L2 HW prefetcher kick started
+ <br />
+</td>
+
+</tr>
+
+<tr><td>SQ_MISC</td><td>	Counts events in the Super Queue below the L2. </td><td> all</td><td>
+	0x01: (name=promotion) Counts the number of L2 secondary misses that hit the Super Queue
+ <br />
+	0x02: (name=promotion_post_go) Counts the number of L2 secondary misses during the Super Queue filling L2
+ <br />
+	0x04: (name=lru_hints) Counts number of Super Queue LRU hints sent to L3
+ <br />
+	0x08: (name=fill_dropped) Counts the number of SQ L2 fills dropped due to L2 busy
+ <br />
+	0x10: (name=split_lock) Counts the number of SQ lock splits across a cache line
+ <br />
+</td>
+
+</tr>
+
+<tr><td>SQ_FULL_STALL_CYCLES</td><td>	Counts cycles the Super Queue is full. Neither of the threads on this core will be able to access the uncore. </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>FP_ASSIST</td><td>	Counts the number of floating point operations executed that required micro-code assist intervention. </td><td> all</td><td>
+	0x01: (name=all) Counts the number of floating point operations executed that required micro-code assist intervention
+ <br />
+	0x02: (name=output) Counts number of floating point micro-code assist when the output value (destination register) is invalid
+ <br />
+	0x04: (name=input) Counts number of floating point micro-code assist when the input value (one of the source operands to an FP instruction) is invalid
+ <br />
+</td>
+
+</tr>
+
+<tr><td>SEGMENT_REG_LOADS</td><td>	Counts number of segment register loads </td><td> all</td><td>
+	0x01: No unit mask
+ <br />
+</td>
+
+</tr>
+
+<tr><td>SIMD_INT_64</td><td>	Counts number of SID integer 64 bit packed multiply operations. </td><td> all</td><td>
+	0x01: (name=packed_mpy) Counts number of SID integer 64 bit packed multiply operations
+ <br />
+	0x02: (name=packed_shift) Counts number of SID integer 64 bit packed shift operations
+ <br />
+	0x04: (name=pack) Counts number of SID integer 64 bit pack operations
+ <br />
+	0x08: (name=unpack) Counts number of SID integer 64 bit unpack operations
+ <br />
+	0x10: (name=packed_logical) Counts number of SID integer 64 bit logical operations
+ <br />
+	0x20: (name=packed_arith) Counts number of SID integer 64 bit arithmetic operations
+ <br />
+	0x40: (name=shuffle_move) Counts number of SID integer 64 bit shift or move operations
+ <br />
+</td>
+
+</tr>
+