diff --git a/lib/libpmc/pmu-events/arch/x86/alderlaken/adln-metrics.json b/lib/libpmc/pmu-events/arch/x86/alderlaken/adln-metrics.json new file mode 100644 index 000000000000..535acee4ab7f --- /dev/null +++ b/lib/libpmc/pmu-events/arch/x86/alderlaken/adln-metrics.json @@ -0,0 +1,583 @@ +[ + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.", + "MetricExpr": "TOPDOWN_FE_BOUND.ALL / SLOTS", + "MetricGroup": "TopdownL1", + "MetricName": "tma_frontend_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", + "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / SLOTS", + "MetricGroup": "TopdownL2;tma_frontend_bound_group", + "MetricName": "tma_frontend_latency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.", + "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_latency_group", + "MetricName": "tma_icache", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.", + "MetricExpr": "TOPDOWN_FE_BOUND.ITLB / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_latency_group", + "MetricName": "tma_itlb", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend", + "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_latency_group", + "MetricName": "tma_branch_detect", + "PublicDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.", + "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_latency_group", + "MetricName": "tma_branch_resteer", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", + "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / SLOTS", + "MetricGroup": "TopdownL2;tma_frontend_bound_group", + "MetricName": "tma_frontend_bandwidth", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).", + "MetricExpr": "TOPDOWN_FE_BOUND.CISC / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group", + "MetricName": "tma_cisc", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.", + "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group", + "MetricName": "tma_decode", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.", + "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group", + "MetricName": "tma_predecode", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.", + "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / SLOTS", + "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group", + "MetricName": "tma_other_fb", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear", + "MetricExpr": "(SLOTS - (TOPDOWN_FE_BOUND.ALL + TOPDOWN_BE_BOUND.ALL + TOPDOWN_RETIRING.ALL)) / SLOTS", + "MetricGroup": "TopdownL1", + "MetricName": "tma_bad_speculation", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ). Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts.", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / SLOTS", + "MetricGroup": "TopdownL2;tma_bad_speculation_group", + "MetricName": "tma_branch_mispredicts", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / SLOTS", + "MetricGroup": "TopdownL2;tma_bad_speculation_group", + "MetricName": "tma_machine_clears", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear (slow nuke).", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / SLOTS", + "MetricGroup": "TopdownL3;tma_machine_clears_group", + "MetricName": "tma_nuke", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to SMC. ", + "MetricExpr": "tma_nuke * (MACHINE_CLEARS.SMC / MACHINE_CLEARS.SLOW)", + "MetricGroup": "TopdownL4;tma_nuke_group", + "MetricName": "tma_smc", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to memory ordering. ", + "MetricExpr": "tma_nuke * (MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.SLOW)", + "MetricGroup": "TopdownL4;tma_nuke_group", + "MetricName": "tma_memory_ordering", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to FP assists. ", + "MetricExpr": "tma_nuke * (MACHINE_CLEARS.FP_ASSIST / MACHINE_CLEARS.SLOW)", + "MetricGroup": "TopdownL4;tma_nuke_group", + "MetricName": "tma_fp_assist", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to memory disambiguation. ", + "MetricExpr": "tma_nuke * (MACHINE_CLEARS.DISAMBIGUATION / MACHINE_CLEARS.SLOW)", + "MetricGroup": "TopdownL4;tma_nuke_group", + "MetricName": "tma_disambiguation", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to page faults. ", + "MetricExpr": "tma_nuke * (MACHINE_CLEARS.PAGE_FAULT / MACHINE_CLEARS.SLOW)", + "MetricGroup": "TopdownL4;tma_nuke_group", + "MetricName": "tma_page_fault", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear classified as a fast nuke due to memory ordering, memory disambiguation and memory renaming.", + "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / SLOTS", + "MetricGroup": "TopdownL3;tma_machine_clears_group", + "MetricName": "tma_fast_nuke", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", + "MetricExpr": "TOPDOWN_BE_BOUND.ALL / SLOTS", + "MetricGroup": "TopdownL1", + "MetricName": "tma_backend_bound", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. The rest of these subevents count backend stalls, in cycles, due to an outstanding request which is memory bound vs core bound. The subevents are not slot based events and therefore can not be precisely added or subtracted from the Backend_Bound_Aux subevents which are slot based.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles due to backend bound stalls that are core execution bound and not attributed to outstanding demand load or store stalls. ", + "MetricExpr": "max(0, tma_backend_bound - tma_load_store_bound)", + "MetricGroup": "TopdownL2;tma_backend_bound_group", + "MetricName": "tma_core_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to stores or loads. ", + "MetricExpr": "min((TOPDOWN_BE_BOUND.ALL / SLOTS), (LD_HEAD.ANY_AT_RET / CLKS) + tma_store_bound)", + "MetricGroup": "TopdownL2;tma_backend_bound_group", + "MetricName": "tma_load_store_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to store buffer full.", + "MetricExpr": "tma_mem_scheduler * (MEM_SCHEDULER_BLOCK.ST_BUF / MEM_SCHEDULER_BLOCK.ALL)", + "MetricGroup": "TopdownL3;tma_load_store_bound_group", + "MetricName": "tma_store_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a load block.", + "MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / CLKS", + "MetricGroup": "TopdownL3;tma_load_store_bound_group", + "MetricName": "tma_l1_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.", + "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / CLKS", + "MetricGroup": "TopdownL4;tma_l1_bound_group", + "MetricName": "tma_store_fwd", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a first level TLB miss.", + "MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / CLKS", + "MetricGroup": "TopdownL4;tma_l1_bound_group", + "MetricName": "tma_stlb_hit", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a second level TLB miss requiring a page walk.", + "MetricExpr": "LD_HEAD.PGWALK_AT_RET / CLKS", + "MetricGroup": "TopdownL4;tma_l1_bound_group", + "MetricName": "tma_stlb_miss", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a number of other load blocks.", + "MetricExpr": "LD_HEAD.OTHER_AT_RET / CLKS", + "MetricGroup": "TopdownL4;tma_l1_bound_group", + "MetricName": "tma_other_l1", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.", + "MetricExpr": "(MEM_BOUND_STALLS.LOAD_L2_HIT / CLKS) - (MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD)", + "MetricGroup": "TopdownL3;tma_load_store_bound_group", + "MetricName": "tma_l2_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", + "MetricExpr": "(MEM_BOUND_STALLS.LOAD_LLC_HIT / CLKS) - (MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD)", + "MetricGroup": "TopdownL3;tma_load_store_bound_group", + "MetricName": "tma_l3_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", + "MetricExpr": "(MEM_BOUND_STALLS.LOAD_DRAM_HIT / CLKS) - (MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD)", + "MetricGroup": "TopdownL3;tma_load_store_bound_group", + "MetricName": "tma_dram_bound", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hits in the L2, LLC, DRAM or MMIO (Non-DRAM) but could not be correctly attributed or cycles in which the load miss is waiting on a request buffer.", + "MetricExpr": "max(0, tma_load_store_bound - (tma_store_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_dram_bound))", + "MetricGroup": "TopdownL3;tma_load_store_bound_group", + "MetricName": "tma_other_load_store", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", + "MetricExpr": "tma_backend_bound", + "MetricGroup": "TopdownL1", + "MetricName": "tma_backend_bound_aux", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that UOPS must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. All of these subevents count backend stalls, in slots, due to a resource limitation. These are not cycle based events and therefore can not be precisely added or subtracted from the Backend_Bound subevents which are cycle based. These subevents are supplementary to Backend_Bound and can be used to analyze results from a resource perspective at allocation. ", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", + "MetricExpr": "tma_backend_bound", + "MetricGroup": "TopdownL2;tma_backend_bound_aux_group", + "MetricName": "tma_resource_bound", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls. Note that uops must be available for consumption in order for this event to count. If a uop is not available (IQ is empty), this event will not count. ", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.", + "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / SLOTS", + "MetricGroup": "TopdownL3;tma_resource_bound_group", + "MetricName": "tma_mem_scheduler", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to store buffer full", + "MetricExpr": "tma_mem_scheduler * (MEM_SCHEDULER_BLOCK.ST_BUF / MEM_SCHEDULER_BLOCK.ALL)", + "MetricGroup": "TopdownL4;tma_mem_scheduler_group", + "MetricName": "tma_st_buffer", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to load buffer full", + "MetricExpr": "tma_mem_scheduler * MEM_SCHEDULER_BLOCK.LD_BUF / MEM_SCHEDULER_BLOCK.ALL", + "MetricGroup": "TopdownL4;tma_mem_scheduler_group", + "MetricName": "tma_ld_buffer", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to RSV full relative ", + "MetricExpr": "tma_mem_scheduler * MEM_SCHEDULER_BLOCK.RSV / MEM_SCHEDULER_BLOCK.ALL", + "MetricGroup": "TopdownL4;tma_mem_scheduler_group", + "MetricName": "tma_rsv", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.", + "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / SLOTS", + "MetricGroup": "TopdownL3;tma_resource_bound_group", + "MetricName": "tma_non_mem_scheduler", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).", + "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / SLOTS", + "MetricGroup": "TopdownL3;tma_resource_bound_group", + "MetricName": "tma_register", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls).", + "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / SLOTS", + "MetricGroup": "TopdownL3;tma_resource_bound_group", + "MetricName": "tma_reorder_buffer", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions.", + "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / SLOTS", + "MetricGroup": "TopdownL3;tma_resource_bound_group", + "MetricName": "tma_alloc_restriction", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).", + "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / SLOTS", + "MetricGroup": "TopdownL3;tma_resource_bound_group", + "MetricName": "tma_serialization", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the numer of issue slots that result in retirement slots. ", + "MetricExpr": "TOPDOWN_RETIRING.ALL / SLOTS", + "MetricGroup": "TopdownL1", + "MetricName": "tma_retiring", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of uops that are not from the microsequencer. ", + "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS) / SLOTS", + "MetricGroup": "TopdownL2;tma_retiring_group", + "MetricName": "tma_base", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of floating point operations per uop with all default weighting.", + "MetricExpr": "UOPS_RETIRED.FPDIV / SLOTS", + "MetricGroup": "TopdownL3;tma_base_group", + "MetricName": "tma_fp_uops", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of uops retired excluding ms and fp div uops.", + "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS - UOPS_RETIRED.FPDIV) / SLOTS", + "MetricGroup": "TopdownL3;tma_base_group", + "MetricName": "tma_other_ret", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS)", + "MetricExpr": "UOPS_RETIRED.MS / SLOTS", + "MetricGroup": "TopdownL2;tma_retiring_group", + "MetricName": "tma_ms_uops", + "PublicDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "", + "MetricExpr": "CPU_CLK_UNHALTED.CORE", + "MetricName": "CLKS" + }, + { + "BriefDescription": "", + "MetricExpr": "CPU_CLK_UNHALTED.CORE_P", + "MetricName": "CLKS_P" + }, + { + "BriefDescription": "", + "MetricExpr": "5 * CLKS", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Instructions Per Cycle", + "MetricExpr": "INST_RETIRED.ANY / CLKS", + "MetricName": "IPC" + }, + { + "BriefDescription": "Cycles Per Instruction", + "MetricExpr": "CLKS / INST_RETIRED.ANY", + "MetricName": "CPI" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY", + "MetricName": "UPI" + }, + { + "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block", + "MetricExpr": "100 * LD_BLOCKS.DATA_UNKNOWN / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "Store_Fwd_Blocks" + }, + { + "BriefDescription": "Percentage of total non-speculative loads with a address aliasing block", + "MetricExpr": "100 * LD_BLOCKS.4K_ALIAS / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "Address_Alias_Blocks" + }, + { + "BriefDescription": "Percentage of total non-speculative loads that are splits", + "MetricExpr": "100 * MEM_UOPS_RETIRED.SPLIT_LOADS / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "Load_Splits" + }, + { + "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", + "MetricName": "IpBranch" + }, + { + "BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)", + "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL", + "MetricName": "IpCall" + }, + { + "BriefDescription": "Instructions per Load", + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS", + "MetricName": "IpLoad" + }, + { + "BriefDescription": "Instructions per Store", + "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES", + "MetricName": "IpStore" + }, + { + "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction", + "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricName": "IpMispredict" + }, + { + "BriefDescription": "Instructions per Far Branch", + "MetricExpr": "INST_RETIRED.ANY / (BR_INST_RETIRED.FAR_BRANCH / 2)", + "MetricName": "IpFarBranch" + }, + { + "BriefDescription": "Ratio of all branches which mispredict", + "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES", + "MetricName": "Branch_Mispredict_Ratio" + }, + { + "BriefDescription": "Ratio between Mispredicted branches and unknown branches", + "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY", + "MetricName": "Branch_Mispredict_to_Unknown_Branch_Ratio" + }, + { + "BriefDescription": "Percentage of all uops which are ucode ops", + "MetricExpr": "100 * UOPS_RETIRED.MS / UOPS_RETIRED.ALL", + "MetricName": "Microcode_Uop_Ratio" + }, + { + "BriefDescription": "Percentage of all uops which are FPDiv uops", + "MetricExpr": "100 * UOPS_RETIRED.FPDIV / UOPS_RETIRED.ALL", + "MetricName": "FPDiv_Uop_Ratio" + }, + { + "BriefDescription": "Percentage of all uops which are IDiv uops", + "MetricExpr": "100 * UOPS_RETIRED.IDIV / UOPS_RETIRED.ALL", + "MetricName": "IDiv_Uop_Ratio" + }, + { + "BriefDescription": "Percentage of all uops which are x87 uops", + "MetricExpr": "100 * UOPS_RETIRED.X87 / UOPS_RETIRED.ALL", + "MetricName": "X87_Uop_Ratio" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CLKS / CPU_CLK_UNHALTED.REF_TSC", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "cpu@CPU_CLK_UNHALTED.CORE@k / CPU_CLK_UNHALTED.CORE", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Cycle cost per L2 hit", + "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT", + "MetricName": "Cycles_per_Demand_Load_L2_Hit" + }, + { + "BriefDescription": "Cycle cost per LLC hit", + "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT", + "MetricName": "Cycles_per_Demand_Load_L3_Hit" + }, + { + "BriefDescription": "Cycle cost per DRAM hit", + "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT", + "MetricName": "Cycles_per_Demand_Load_DRAM_Hit" + }, + { + "BriefDescription": "Percent of instruction miss cost that hit in the L2", + "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_L2_HIT / (MEM_BOUND_STALLS.IFETCH)", + "MetricName": "Inst_Miss_Cost_L2Hit_Percent" + }, + { + "BriefDescription": "Percent of instruction miss cost that hit in the L3", + "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_LLC_HIT / (MEM_BOUND_STALLS.IFETCH)", + "MetricName": "Inst_Miss_Cost_L3Hit_Percent" + }, + { + "BriefDescription": "Percent of instruction miss cost that hit in DRAM", + "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_DRAM_HIT / (MEM_BOUND_STALLS.IFETCH)", + "MetricName": "Inst_Miss_Cost_DRAMHit_Percent" + }, + { + "BriefDescription": "load ops retired per 1000 instruction", + "MetricExpr": "1000 * MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY", + "MetricName": "MemLoadPKI" + }, + { + "BriefDescription": "C1 residency percent per core", + "MetricExpr": "cstate_core@c1\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C1_Core_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "cstate_core@c6\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "cstate_core@c7\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C8 residency percent per package", + "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C8_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C9 residency percent per package", + "MetricExpr": "cstate_pkg@c9\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C9_Pkg_Residency", + "ScaleUnit": "100%" + }, + { + "BriefDescription": "C10 residency percent per package", + "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC", + "MetricGroup": "Power", + "MetricName": "C10_Pkg_Residency", + "ScaleUnit": "100%" + } +] diff --git a/lib/libpmc/pmu-events/arch/x86/alderlaken/cache.json b/lib/libpmc/pmu-events/arch/x86/alderlaken/cache.json new file mode 100644 index 000000000000..043445ae14a8 --- /dev/null +++ b/lib/libpmc/pmu-events/arch/x86/alderlaken/cache.json @@ -0,0 +1,330 @@ +[ + { + "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "SampleAfterValue": "200003", + "UMask": "0x41" + }, + { + "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.", + "EventCode": "0x2e", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.", + "SampleAfterValue": "200003", + "UMask": "0x4f" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.IFETCH", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).", + "SampleAfterValue": "200003", + "UMask": "0x38" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in DRAM or MMIO (Non-DRAM).", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.IFETCH_DRAM_HIT", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in DRAM or MMIO (non-DRAM).", + "SampleAfterValue": "200003", + "UMask": "0x20" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2 cache.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.IFETCH_L2_HIT", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the LLC or other core with HITE/F/M.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.IFETCH_LLC_HIT", + "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", + "SampleAfterValue": "200003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.LOAD", + "SampleAfterValue": "200003", + "UMask": "0x7" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.LOAD_DRAM_HIT", + "SampleAfterValue": "200003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.LOAD_L2_HIT", + "SampleAfterValue": "200003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the LLC or other core with HITE/F/M.", + "EventCode": "0x34", + "EventName": "MEM_BOUND_STALLS.LOAD_LLC_HIT", + "PublicDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", + "SampleAfterValue": "200003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of load uops retired that hit in DRAM.", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x80" + }, + { + "BriefDescription": "Counts the number of load uops retired that hit in the L2 cache.", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of load uops retired that hit in the L3 cache.", + "Data_LA": "1", + "EventCode": "0xd1", + "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked for any of the following reasons: load buffer, store buffer or RSV full.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.ALL", + "SampleAfterValue": "20003", + "UMask": "0x7" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked due to a load buffer full condition.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.LD_BUF", + "SampleAfterValue": "20003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked due to an RSV full condition.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.RSV", + "SampleAfterValue": "20003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of cycles that uops are blocked due to a store buffer full condition.", + "EventCode": "0x04", + "EventName": "MEM_SCHEDULER_BLOCK.ST_BUF", + "SampleAfterValue": "20003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of load uops retired.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", + "PEBS": "1", + "PublicDescription": "Counts the total number of load uops retired.", + "SampleAfterValue": "200003", + "UMask": "0x81" + }, + { + "BriefDescription": "Counts the number of store uops retired.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.ALL_STORES", + "PEBS": "1", + "PublicDescription": "Counts the total number of store uops retired.", + "SampleAfterValue": "200003", + "UMask": "0x82" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128", + "MSRIndex": "0x3F6", + "MSRValue": "0x80", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16", + "MSRIndex": "0x3F6", + "MSRValue": "0x10", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256", + "MSRIndex": "0x3F6", + "MSRValue": "0x100", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32", + "MSRIndex": "0x3F6", + "MSRValue": "0x20", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4", + "MSRIndex": "0x3F6", + "MSRValue": "0x4", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512", + "MSRIndex": "0x3F6", + "MSRValue": "0x200", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64", + "MSRIndex": "0x3F6", + "MSRValue": "0x40", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8", + "MSRIndex": "0x3F6", + "MSRValue": "0x8", + "PEBS": "2", + "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Counts the number of retired split load uops.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x41" + }, + { + "BriefDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled.", + "Data_LA": "1", + "EventCode": "0xd0", + "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY", + "PEBS": "2", + "PublicDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled. If PEBS is enabled and a PEBS record is generated, will populate PEBS Latency and PEBS Data Source fields accordingly.", + "SampleAfterValue": "1000003", + "UMask": "0x6" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, but no data was forwarded.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x4003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and non-modified data was forwarded.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x8003C0001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_HIT", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to instruction cache misses.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.ICACHE", + "SampleAfterValue": "1000003", + "UMask": "0x20" + } +] diff --git a/lib/libpmc/pmu-events/arch/x86/alderlaken/floating-point.json b/lib/libpmc/pmu-events/arch/x86/alderlaken/floating-point.json new file mode 100644 index 000000000000..30e8ca3c1485 --- /dev/null +++ b/lib/libpmc/pmu-events/arch/x86/alderlaken/floating-point.json @@ -0,0 +1,18 @@ +[ + { + "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.FP_ASSIST", + "PublicDescription": "Counts the number of floating point operations retired that required microcode assist, which is not a reflection of the number of FP operations, instructions or uops.", + "SampleAfterValue": "20003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of floating point divide uops retired (x87 and SSE, including x87 sqrt).", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.FPDIV", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x8" + } +] diff --git a/lib/libpmc/pmu-events/arch/x86/alderlaken/frontend.json b/lib/libpmc/pmu-events/arch/x86/alderlaken/frontend.json new file mode 100644 index 000000000000..36898bab2bba --- /dev/null +++ b/lib/libpmc/pmu-events/arch/x86/alderlaken/frontend.json @@ -0,0 +1,26 @@ +[ + { + "BriefDescription": "Counts the total number of BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "EventCode": "0xe6", + "EventName": "BACLEARS.ANY", + "PublicDescription": "Counts the total number of BACLEARS, which occur when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of requests to the instruction cache for one or more bytes of a cache line.", + "EventCode": "0x80", + "EventName": "ICACHE.ACCESSES", + "PublicDescription": "Counts the total number of requests to the instruction cache. The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line or byte chunk count as one. Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.", + "SampleAfterValue": "200003", + "UMask": "0x3" + }, + { + "BriefDescription": "Counts the number of instruction cache misses.", + "EventCode": "0x80", + "EventName": "ICACHE.MISSES", + "PublicDescription": "Counts the number of missed requests to the instruction cache. The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line and byte chunk count as one. Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.", + "SampleAfterValue": "200003", + "UMask": "0x2" + } +] diff --git a/lib/libpmc/pmu-events/arch/x86/alderlaken/memory.json b/lib/libpmc/pmu-events/arch/x86/alderlaken/memory.json new file mode 100644 index 000000000000..f84bf8c43495 --- /dev/null +++ b/lib/libpmc/pmu-events/arch/x86/alderlaken/memory.json @@ -0,0 +1,81 @@ +[ + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to any number of reasons, including an L1 miss, WCB full, pagewalk, store address block or store data block, on a load that retires.", + "EventCode": "0x05", + "EventName": "LD_HEAD.ANY_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0xff" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a core bound stall including a store address match, a DTLB miss or a page walk that detains the load from retiring.", + "EventCode": "0x05", + "EventName": "LD_HEAD.L1_BOUND_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0xf4" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases.", + "EventCode": "0x05", + "EventName": "LD_HEAD.OTHER_AT_RET", + "PublicDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases such as pipeline conflicts, fences, etc.", + "SampleAfterValue": "1000003", + "UMask": "0xc0" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a pagewalk.", + "EventCode": "0x05", + "EventName": "LD_HEAD.PGWALK_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0xa0" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a store address match.", + "EventCode": "0x05", + "EventName": "LD_HEAD.ST_ADDR_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0x84" + }, + { + "BriefDescription": "Counts the number of machine clears due to memory ordering caused by a snoop from an external agent. Does not count internally generated machine clears such as those due to memory disambiguation.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", + "SampleAfterValue": "20003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F84400001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F84400001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_MISS", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F84400002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F84400002", + "SampleAfterValue": "100003", + "UMask": "0x1" + } +] diff --git a/lib/libpmc/pmu-events/arch/x86/alderlaken/other.json b/lib/libpmc/pmu-events/arch/x86/alderlaken/other.json new file mode 100644 index 000000000000..6336de61f628 --- /dev/null +++ b/lib/libpmc/pmu-events/arch/x86/alderlaken/other.json @@ -0,0 +1,38 @@ +[ + { + "BriefDescription": "Counts modified writebacks from L1 cache and L2 cache that have any type of response.", + "EventCode": "0xB7", + "EventName": "OCR.COREWB_M.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10008", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that have any type of response.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10001", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.", + "EventCode": "0xB7", + "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10002", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts streaming stores that have any type of response.", + "EventCode": "0xB7", + "EventName": "OCR.STREAMING_WR.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10800", + "SampleAfterValue": "100003", + "UMask": "0x1" + } +] diff --git a/lib/libpmc/pmu-events/arch/x86/alderlaken/pipeline.json b/lib/libpmc/pmu-events/arch/x86/alderlaken/pipeline.json new file mode 100644 index 000000000000..fa53ff11a509 --- /dev/null +++ b/lib/libpmc/pmu-events/arch/x86/alderlaken/pipeline.json @@ -0,0 +1,533 @@ +[ + { + "BriefDescription": "Counts the total number of branch instructions retired for all branch types.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PEBS": "1", + "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.", + "SampleAfterValue": "200003" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_CALL", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf9" + }, + { + "BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e" + }, + { + "BriefDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND_TAKEN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe" + }, + { + "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xbf" + }, + { + "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.INDIRECT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb" + }, + { + "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.INDIRECT_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.IND_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e" + }, + { + "BriefDescription": "Counts the number of near CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf9" + }, + { + "BriefDescription": "Counts the number of near RET branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf7" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.NON_RETURN_IND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb" + }, + { + "BriefDescription": "Counts the number of near relative CALL branch instructions retired.", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.REL_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfd" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_RETURN", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.RETURN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf7" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND_TAKEN", + "Deprecated": "1", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.TAKEN_JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe" + }, + { + "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PEBS": "1", + "PublicDescription": "Counts the total number of mispredicted branch instructions retired. All branch type instructions are accounted for. Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP. A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.", + "SampleAfterValue": "200003" + }, + { + "BriefDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e" + }, + { + "BriefDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.COND_TAKEN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe" + }, + { + "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.INDIRECT", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb" + }, + { + "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.INDIRECT_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT_CALL", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.IND_CALL", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfb" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0x7e" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.NON_RETURN_IND", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xeb" + }, + { + "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.RETURN", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xf7" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND_TAKEN", + "Deprecated": "1", + "EventCode": "0xc5", + "EventName": "BR_MISP_RETIRED.TAKEN_JCC", + "PEBS": "1", + "SampleAfterValue": "200003", + "UMask": "0xfe" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)", + "EventName": "CPU_CLK_UNHALTED.CORE", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles.", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.CORE_P", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003" + }, + { + "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency. (Fixed event)", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses fixed counter 2.", + "SampleAfterValue": "2000003", + "UMask": "0x3" + }, + { + "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.REF_TSC_P", + "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of unhalted core clock cycles.", + "EventCode": "0x3c", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003" + }, + { + "BriefDescription": "Counts the total number of instructions retired. (Fixed event)", + "EventName": "INST_RETIRED.ANY", + "PEBS": "1", + "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the total number of instructions retired.", + "EventCode": "0xc0", + "EventName": "INST_RETIRED.ANY_P", + "PEBS": "1", + "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses a programmable general purpose performance counter.", + "SampleAfterValue": "2000003" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event LD_BLOCKS.ADDRESS_ALIAS", + "Deprecated": "1", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.4K_ALIAS", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check.", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.ADDRESS_ALIAS", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.DATA_UNKNOWN", + "PEBS": "1", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of machine clears due to memory ordering in which an internal load passes an older store within the same CPU.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.DISAMBIGUATION", + "SampleAfterValue": "20003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts the number of machines clears due to memory renaming.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.MRN_NUKE", + "SampleAfterValue": "1000003", + "UMask": "0x80" + }, + { + "BriefDescription": "Counts the number of machine clears due to a page fault. Counts both I-Side and D-Side (Loads/Stores) page faults. A page fault occurs when either the page is not present, or an access violation occurs.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.PAGE_FAULT", + "SampleAfterValue": "20003", + "UMask": "0x20" + }, + { + "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine with the use of microcode due to SMC, MEMORY_ORDERING, FP_ASSISTS, PAGE_FAULT, DISAMBIGUATION, and FPC_VIRTUAL_TRAP.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.SLOW", + "SampleAfterValue": "20003", + "UMask": "0x6f" + }, + { + "BriefDescription": "Counts the number of machine clears due to program modifying data (self modifying code) within 1K of a recently fetched code page.", + "EventCode": "0xc3", + "EventName": "MACHINE_CLEARS.SMC", + "SampleAfterValue": "20003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires.", + "EventCode": "0x75", + "EventName": "SERIALIZATION.NON_C01_MS_SCB", + "PublicDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires. The most commonly executed instruction with an MS scoreboard is PAUSE.", + "SampleAfterValue": "200003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.ALL", + "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ) even if an FE_bound event occurs during this period. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to fast nukes such as memory ordering and memory disambiguation machine clears.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS", + "SampleAfterValue": "1000003", + "UMask": "0x3" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to branch mispredicts.", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.MISPREDICT", + "SampleAfterValue": "1000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a machine clear (nuke).", + "EventCode": "0x73", + "EventName": "TOPDOWN_BAD_SPECULATION.NUKE", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to backend stalls.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.ALL", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to certain allocation restrictions.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER", + "SampleAfterValue": "1000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.REGISTER", + "SampleAfterValue": "1000003", + "UMask": "0x20" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the reorder buffer being full (ROB stalls).", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.REORDER_BUFFER", + "SampleAfterValue": "1000003", + "UMask": "0x40" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).", + "EventCode": "0x74", + "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION", + "SampleAfterValue": "1000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to frontend stalls.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.ALL", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT", + "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.", + "SampleAfterValue": "1000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER", + "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.", + "SampleAfterValue": "1000003", + "UMask": "0x40" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to the microcode sequencer (MS).", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.CISC", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stalls.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.DECODE", + "SampleAfterValue": "1000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH", + "SampleAfterValue": "1000003", + "UMask": "0x8d" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to a latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY", + "SampleAfterValue": "1000003", + "UMask": "0x72" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ITLB misses.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.ITLB", + "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.", + "SampleAfterValue": "1000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to other common frontend stalls not categorized.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.OTHER", + "SampleAfterValue": "1000003", + "UMask": "0x80" + }, + { + "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to wrong predecodes.", + "EventCode": "0x71", + "EventName": "TOPDOWN_FE_BOUND.PREDECODE", + "SampleAfterValue": "1000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the total number of consumed retirement slots.", + "EventCode": "0xc2", + "EventName": "TOPDOWN_RETIRING.ALL", + "PEBS": "1", + "SampleAfterValue": "1000003" + }, + { + "BriefDescription": "Counts the total number of uops retired.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.ALL", + "PEBS": "1", + "SampleAfterValue": "2000003" + }, + { + "BriefDescription": "Counts the number of integer divide uops retired.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.IDIV", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts the number of uops that are from complex flows issued by the micro-sequencer (MS).", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.MS", + "PEBS": "1", + "PublicDescription": "Counts the number of uops that are from complex flows issued by the Microcode Sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of x87 uops retired, includes those in MS flows.", + "EventCode": "0xc2", + "EventName": "UOPS_RETIRED.X87", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x2" + } +] diff --git a/lib/libpmc/pmu-events/arch/x86/alderlaken/uncore-memory.json b/lib/libpmc/pmu-events/arch/x86/alderlaken/uncore-memory.json new file mode 100644 index 000000000000..2ccd9cf96957 --- /dev/null +++ b/lib/libpmc/pmu-events/arch/x86/alderlaken/uncore-memory.json @@ -0,0 +1,175 @@ +[ + { + "BriefDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels).", + "EventName": "UNC_MC0_RDCAS_COUNT_FREERUN", + "PerPkg": "1", + "PublicDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels).", + "Unit": "iMC" + }, + { + "BriefDescription": "Counts every 64B write request entering the Memory Controller 0 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM.", + "EventName": "UNC_MC0_WRCAS_COUNT_FREERUN", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Counts every 64B read request entering the Memory Controller 1 to DRAM (sum of all channels).", + "EventName": "UNC_MC1_RDCAS_COUNT_FREERUN", + "PerPkg": "1", + "PublicDescription": "Counts every 64B read entering the Memory Controller 1 to DRAM (sum of all channels).", + "Unit": "iMC" + }, + { + "BriefDescription": "Counts every 64B write request entering the Memory Controller 1 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM.", + "EventName": "UNC_MC1_WRCAS_COUNT_FREERUN", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "ACT command for a read request sent to DRAM", + "EventCode": "0x24", + "EventName": "UNC_M_ACT_COUNT_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "ACT command sent to DRAM", + "EventCode": "0x26", + "EventName": "UNC_M_ACT_COUNT_TOTAL", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "ACT command for a write request sent to DRAM", + "EventCode": "0x25", + "EventName": "UNC_M_ACT_COUNT_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Read CAS command sent to DRAM", + "EventCode": "0x22", + "EventName": "UNC_M_CAS_COUNT_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Write CAS command sent to DRAM", + "EventCode": "0x23", + "EventName": "UNC_M_CAS_COUNT_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Number of clocks", + "EventCode": "0x01", + "EventName": "UNC_M_CLOCKTICKS", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming read request page status is Page Empty", + "EventCode": "0x1D", + "EventName": "UNC_M_DRAM_PAGE_EMPTY_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming write request page status is Page Empty", + "EventCode": "0x20", + "EventName": "UNC_M_DRAM_PAGE_EMPTY_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming read request page status is Page Hit", + "EventCode": "0x1C", + "EventName": "UNC_M_DRAM_PAGE_HIT_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming write request page status is Page Hit", + "EventCode": "0x1F", + "EventName": "UNC_M_DRAM_PAGE_HIT_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming read request page status is Page Miss", + "EventCode": "0x1E", + "EventName": "UNC_M_DRAM_PAGE_MISS_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "incoming write request page status is Page Miss", + "EventCode": "0x21", + "EventName": "UNC_M_DRAM_PAGE_MISS_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Any Rank at Hot state", + "EventCode": "0x19", + "EventName": "UNC_M_DRAM_THERMAL_HOT", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Any Rank at Warm state", + "EventCode": "0x1A", + "EventName": "UNC_M_DRAM_THERMAL_WARM", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Incoming read prefetch request from IA.", + "EventCode": "0x0A", + "EventName": "UNC_M_PREFETCH_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "PRE command sent to DRAM due to page table idle timer expiration", + "EventCode": "0x28", + "EventName": "UNC_M_PRE_COUNT_IDLE", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "PRE command sent to DRAM for a read/write request", + "EventCode": "0x27", + "EventName": "UNC_M_PRE_COUNT_PAGE_MISS", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Incoming VC0 read request", + "EventCode": "0x02", + "EventName": "UNC_M_VC0_REQUESTS_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Incoming VC0 write request", + "EventCode": "0x03", + "EventName": "UNC_M_VC0_REQUESTS_WR", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Incoming VC1 read request", + "EventCode": "0x04", + "EventName": "UNC_M_VC1_REQUESTS_RD", + "PerPkg": "1", + "Unit": "iMC" + }, + { + "BriefDescription": "Incoming VC1 write request", + "EventCode": "0x05", + "EventName": "UNC_M_VC1_REQUESTS_WR", + "PerPkg": "1", + "Unit": "iMC" + } +] diff --git a/lib/libpmc/pmu-events/arch/x86/alderlaken/uncore-other.json b/lib/libpmc/pmu-events/arch/x86/alderlaken/uncore-other.json new file mode 100644 index 000000000000..f9e7777cd2be --- /dev/null +++ b/lib/libpmc/pmu-events/arch/x86/alderlaken/uncore-other.json @@ -0,0 +1,33 @@ +[ + { + "BriefDescription": "Number of requests allocated in Coherency Tracker.", + "EventCode": "0x84", + "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "Each cycle counts number of all outgoing valid entries in ReqTrk. Such entry is defined as valid from its allocation in ReqTrk till deallocation. Accounts for Coherent and non-coherent traffic.", + "EventCode": "0x80", + "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.", + "EventCode": "0x81", + "EventName": "UNC_ARB_TRK_REQUESTS.ALL", + "PerPkg": "1", + "UMask": "0x1", + "Unit": "ARB" + }, + { + "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.", + "EventCode": "0xff", + "EventName": "UNC_CLOCK.SOCKET", + "PerPkg": "1", + "Unit": "CLOCK" + } +] diff --git a/lib/libpmc/pmu-events/arch/x86/alderlaken/virtual-memory.json b/lib/libpmc/pmu-events/arch/x86/alderlaken/virtual-memory.json new file mode 100644 index 000000000000..67fd640f790e --- /dev/null +++ b/lib/libpmc/pmu-events/arch/x86/alderlaken/virtual-memory.json @@ -0,0 +1,47 @@ +[ + { + "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "200003", + "UMask": "0xe" + }, + { + "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "2000003", + "UMask": "0xe" + }, + { + "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.MISS_CAUSED_WALK", + "SampleAfterValue": "1000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of page walks due to an instruction fetch that miss the PDE (Page Directory Entry) cache.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.PDE_CACHE_MISS", + "SampleAfterValue": "2000003", + "UMask": "0x80" + }, + { + "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.", + "SampleAfterValue": "200003", + "UMask": "0xe" + }, + { + "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.", + "EventCode": "0x05", + "EventName": "LD_HEAD.DTLB_MISS_AT_RET", + "SampleAfterValue": "1000003", + "UMask": "0x90" + } +] diff --git a/lib/libpmc/pmu-events/arch/x86/mapfile.csv b/lib/libpmc/pmu-events/arch/x86/mapfile.csv index 7056e02d68a9..87a68de4a909 100644 --- a/lib/libpmc/pmu-events/arch/x86/mapfile.csv +++ b/lib/libpmc/pmu-events/arch/x86/mapfile.csv @@ -1,57 +1,58 @@ Family-model,Version,Filename,EventType GenuineIntel-6-56,v5,broadwellde,core GenuineIntel-6-3D,v17,broadwell,core GenuineIntel-6-47,v17,broadwell,core GenuineIntel-6-4F,v10,broadwellx,core GenuineIntel-6-1C,v4,bonnell,core GenuineIntel-6-26,v4,bonnell,core GenuineIntel-6-27,v4,bonnell,core GenuineIntel-6-36,v4,bonnell,core GenuineIntel-6-35,v4,bonnell,core GenuineIntel-6-5C,v8,goldmont,core GenuineIntel-6-5F,v8,goldmont,core GenuineIntel-6-7A,v1,goldmontplus,core GenuineIntel-6-CF,v1,emeraldrapids,core GenuineIntel-6-3C,v24,haswell,core GenuineIntel-6-45,v24,haswell,core GenuineIntel-6-46,v24,haswell,core GenuineIntel-6-3F,v17,haswellx,core GenuineIntel-6-3A,v18,ivybridge,core GenuineIntel-6-3E,v19,ivytown,core GenuineIntel-6-2D,v20,jaketown,core GenuineIntel-6-57,v9,knightslanding,core GenuineIntel-6-85,v9,knightslanding,core GenuineIntel-6-1E,v2,nehalemep,core GenuineIntel-6-1F,v2,nehalemep,core GenuineIntel-6-1A,v2,nehalemep,core GenuineIntel-6-2E,v2,nehalemex,core GenuineIntel-6-[4589]E,v24,skylake,core GenuineIntel-6-A[56],v24,skylake,core GenuineIntel-6-37,v13,silvermont,core GenuineIntel-6-4D,v13,silvermont,core GenuineIntel-6-4C,v13,silvermont,core GenuineIntel-6-2A,v15,sandybridge,core GenuineIntel-6-2C,v2,westmereep-dp,core GenuineIntel-6-25,v2,westmereep-sp,core GenuineIntel-6-2F,v2,westmereex,core GenuineIntel-6-55-[01234],v1,skylakex,core GenuineIntel-6-55-[56789ABCDEF],v1,cascadelakex,core GenuineIntel-6-7D,v1,icelake,core GenuineIntel-6-7E,v1,icelake,core GenuineIntel-6-8[CD],v1,tigerlake,core GenuineIntel-6-A7,v1,icelake,core GenuineIntel-6-6A,v1,icelakex,core GenuineIntel-6-6C,v1,icelakex,core GenuineIntel-6-86,v1,tremontx,core GenuineIntel-6-96,v1,elkhartlake,core GenuineIntel-6-97,v1,alderlake,core GenuineIntel-6-9A,v1,alderlake,core GenuineIntel-6-B7,v1,alderlake,core GenuineIntel-6-BA,v1,alderlake,core GenuineIntel-6-BF,v1,alderlake,core GenuineIntel-6-8F,v1,sapphirerapids,core +GenuineIntel-6-BE,v1,alderlaken,core AuthenticAMD-23-[012][0-9A-F],v2,amdzen1,core AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core AuthenticAMD-25-[0245][[:xdigit:]],v1,amdzen3,core AuthenticAMD-25-[[:xdigit:]]+,v1,amdzen4,core HygonGenuine-24-00,v2,amdzen1,core diff --git a/sys/dev/hwpmc/hwpmc_intel.c b/sys/dev/hwpmc/hwpmc_intel.c index c5073d07944a..6d8b5c5de6b6 100644 --- a/sys/dev/hwpmc/hwpmc_intel.c +++ b/sys/dev/hwpmc/hwpmc_intel.c @@ -1,350 +1,354 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2008 Joseph Koshy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Common code for handling Intel CPUs. */ #include #include #include #include #include #include #include #include static int intel_switch_in(struct pmc_cpu *pc, struct pmc_process *pp) { (void) pc; PMCDBG3(MDP,SWI,1, "pc=%p pp=%p enable-msr=%d", pc, pp, pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS); /* allow the RDPMC instruction if needed */ if (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) load_cr4(rcr4() | CR4_PCE); PMCDBG1(MDP,SWI,1, "cr4=0x%jx", (uintmax_t) rcr4()); return 0; } static int intel_switch_out(struct pmc_cpu *pc, struct pmc_process *pp) { (void) pc; (void) pp; /* can be NULL */ PMCDBG3(MDP,SWO,1, "pc=%p pp=%p cr4=0x%jx", pc, pp, (uintmax_t) rcr4()); /* always turn off the RDPMC instruction */ load_cr4(rcr4() & ~CR4_PCE); return 0; } struct pmc_mdep * pmc_intel_initialize(void) { struct pmc_mdep *pmc_mdep; enum pmc_cputype cputype; int error, family, model, nclasses, ncpus, stepping, verov; KASSERT(cpu_vendor_id == CPU_VENDOR_INTEL, ("[intel,%d] Initializing non-intel processor", __LINE__)); PMCDBG1(MDP,INI,0, "intel-initialize cpuid=0x%x", cpu_id); cputype = -1; nclasses = 2; error = 0; verov = 0; family = CPUID_TO_FAMILY(cpu_id); model = CPUID_TO_MODEL(cpu_id); stepping = CPUID_TO_STEPPING(cpu_id); snprintf(pmc_cpuid, sizeof(pmc_cpuid), "GenuineIntel-%d-%02X-%X", family, model, stepping); switch (cpu_id & 0xF00) { case 0x600: switch (model) { case 0xE: cputype = PMC_CPU_INTEL_CORE; break; case 0xF: /* Per Intel document 315338-020. */ if (stepping == 0x7) { cputype = PMC_CPU_INTEL_CORE; verov = 1; } else { cputype = PMC_CPU_INTEL_CORE2; nclasses = 3; } break; case 0x17: cputype = PMC_CPU_INTEL_CORE2EXTREME; nclasses = 3; break; case 0x1A: case 0x1E: /* * Per Intel document 253669-032 9/2009, * pages A-2 and A-57 */ case 0x1F: /* * Per Intel document 253669-032 9/2009, * pages A-2 and A-57 */ cputype = PMC_CPU_INTEL_COREI7; nclasses = 5; break; case 0x2E: cputype = PMC_CPU_INTEL_NEHALEM_EX; nclasses = 3; break; case 0x25: /* Per Intel document 253669-033US 12/2009. */ case 0x2C: /* Per Intel document 253669-033US 12/2009. */ cputype = PMC_CPU_INTEL_WESTMERE; nclasses = 5; break; case 0x2F: /* Westmere-EX, seen in wild */ cputype = PMC_CPU_INTEL_WESTMERE_EX; nclasses = 3; break; case 0x2A: /* Per Intel document 253669-039US 05/2011. */ cputype = PMC_CPU_INTEL_SANDYBRIDGE; nclasses = 3; break; case 0x2D: /* Per Intel document 253669-044US 08/2012. */ cputype = PMC_CPU_INTEL_SANDYBRIDGE_XEON; nclasses = 3; break; case 0x3A: /* Per Intel document 253669-043US 05/2012. */ cputype = PMC_CPU_INTEL_IVYBRIDGE; nclasses = 3; break; case 0x3E: /* Per Intel document 325462-045US 01/2013. */ cputype = PMC_CPU_INTEL_IVYBRIDGE_XEON; nclasses = 3; break; case 0x3D: case 0x47: cputype = PMC_CPU_INTEL_BROADWELL; nclasses = 3; break; case 0x4f: case 0x56: cputype = PMC_CPU_INTEL_BROADWELL_XEON; nclasses = 3; break; case 0x3C: /* Per Intel document 325462-045US 01/2013. */ case 0x45: /* Per Intel document 325462-045US 09/2014. */ cputype = PMC_CPU_INTEL_HASWELL; nclasses = 3; break; case 0x3F: /* Per Intel document 325462-045US 09/2014. */ case 0x46: /* Per Intel document 325462-045US 09/2014. */ /* Should 46 be XEON. probably its own? */ cputype = PMC_CPU_INTEL_HASWELL_XEON; nclasses = 3; break; /* Skylake */ case 0x4e: case 0x5e: /* Kabylake */ case 0x8E: /* Per Intel document 325462-063US July 2017. */ case 0x9E: /* Per Intel document 325462-063US July 2017. */ /* Cometlake */ case 0xA5: case 0xA6: cputype = PMC_CPU_INTEL_SKYLAKE; nclasses = 3; break; case 0x55: /* SDM rev 63 */ cputype = PMC_CPU_INTEL_SKYLAKE_XEON; nclasses = 3; break; /* Icelake */ case 0x7D: case 0x7E: /* Tigerlake */ case 0x8C: case 0x8D: /* Rocketlake */ case 0xA7: cputype = PMC_CPU_INTEL_ICELAKE; nclasses = 3; break; case 0x6A: case 0x6C: cputype = PMC_CPU_INTEL_ICELAKE_XEON; nclasses = 3; break; case 0x97: case 0x9A: case 0xB7: case 0xBA: case 0xBF: cputype = PMC_CPU_INTEL_ALDERLAKE; nclasses = 3; break; case 0x1C: /* Per Intel document 320047-002. */ case 0x26: case 0x27: case 0x35: case 0x36: cputype = PMC_CPU_INTEL_ATOM; nclasses = 3; break; case 0x37: case 0x4A: case 0x4D: /* Per Intel document 330061-001 01/2014. */ case 0x5A: case 0x5D: cputype = PMC_CPU_INTEL_ATOM_SILVERMONT; nclasses = 3; break; case 0x5C: /* Per Intel document 325462-071US 10/2019. */ case 0x5F: cputype = PMC_CPU_INTEL_ATOM_GOLDMONT; nclasses = 3; break; case 0x7A: cputype = PMC_CPU_INTEL_ATOM_GOLDMONT_P; nclasses = 3; break; case 0x86: case 0x96: cputype = PMC_CPU_INTEL_ATOM_TREMONT; nclasses = 3; break; + case 0xBE: + cputype = PMC_CPU_INTEL_ALDERLAKEN; + nclasses = 3; + break; case 0xCF: cputype = PMC_CPU_INTEL_EMERALD_RAPIDS; nclasses = 3; break; } break; } if ((int) cputype == -1) { printf("pmc: Unknown Intel CPU.\n"); return (NULL); } /* Allocate base class and initialize machine dependent struct */ pmc_mdep = pmc_mdep_alloc(nclasses); pmc_mdep->pmd_cputype = cputype; pmc_mdep->pmd_switch_in = intel_switch_in; pmc_mdep->pmd_switch_out = intel_switch_out; ncpus = pmc_cpu_max(); error = pmc_tsc_initialize(pmc_mdep, ncpus); if (error) goto error; MPASS(nclasses >= PMC_MDEP_CLASS_INDEX_IAF); error = pmc_core_initialize(pmc_mdep, ncpus, verov); if (error) { pmc_tsc_finalize(pmc_mdep); goto error; } /* * Init the uncore class. */ switch (cputype) { /* * Intel Corei7 and Westmere processors. */ case PMC_CPU_INTEL_COREI7: case PMC_CPU_INTEL_WESTMERE: #ifdef notyet /* * TODO: re-enable uncore class on these processors. * * The uncore unit was reworked beginning with Sandy Bridge, including * the MSRs required to program it. In particular, we need to: * - Parse the MSR_UNC_CBO_CONFIG MSR for number of C-box units in the * system * - Support reading and writing to ARB and C-box units, depending on * the requested event * - Create some kind of mapping between C-box <--> CPU * * Also TODO: support other later changes to these interfaces, to * enable the uncore class on generations newer than Broadwell. * Skylake+ appears to use newer addresses for the uncore MSRs. */ case PMC_CPU_INTEL_HASWELL: case PMC_CPU_INTEL_BROADWELL: case PMC_CPU_INTEL_SANDYBRIDGE: #endif MPASS(nclasses >= PMC_MDEP_CLASS_INDEX_UCF); error = pmc_uncore_initialize(pmc_mdep, ncpus); break; default: break; } error: if (error) { pmc_mdep_free(pmc_mdep); pmc_mdep = NULL; } return (pmc_mdep); } void pmc_intel_finalize(struct pmc_mdep *md) { pmc_tsc_finalize(md); pmc_core_finalize(md); /* * Uncore. */ switch (md->pmd_cputype) { case PMC_CPU_INTEL_COREI7: case PMC_CPU_INTEL_WESTMERE: #ifdef notyet case PMC_CPU_INTEL_HASWELL: case PMC_CPU_INTEL_BROADWELL: case PMC_CPU_INTEL_SANDYBRIDGE: #endif pmc_uncore_finalize(md); break; default: break; } } diff --git a/sys/sys/pmc.h b/sys/sys/pmc.h index 68cf43d0b10f..a9853e5f7d10 100644 --- a/sys/sys/pmc.h +++ b/sys/sys/pmc.h @@ -1,1239 +1,1240 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2003-2008, Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under * sponsorship from the FreeBSD Foundation and Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _SYS_PMC_H_ #define _SYS_PMC_H_ #include #include #include #include #include #ifdef _KERNEL #include #include #endif #define PMC_MODULE_NAME "hwpmc" #define PMC_NAME_MAX 64 /* HW counter name size */ #define PMC_CLASS_MAX 8 /* max #classes of PMCs per-system */ /* * Kernel<->userland API version number [MMmmpppp] * * Major numbers are to be incremented when an incompatible change to * the ABI occurs that older clients will not be able to handle. * * Minor numbers are incremented when a backwards compatible change * occurs that allows older correct programs to run unchanged. For * example, when support for a new PMC type is added. * * The patch version is incremented for every bug fix. */ #define PMC_VERSION_MAJOR 0x0A #define PMC_VERSION_MINOR 0x00 #define PMC_VERSION_PATCH 0x0000 #define PMC_VERSION (PMC_VERSION_MAJOR << 24 | \ PMC_VERSION_MINOR << 16 | PMC_VERSION_PATCH) #define PMC_CPUID_LEN 64 /* cpu model name for pmu lookup */ extern char pmc_cpuid[PMC_CPUID_LEN]; /* * Kinds of CPUs known. * * We keep track of CPU variants that need to be distinguished in * some way for PMC operations. CPU names are grouped by manufacturer * and numbered sparsely in order to minimize changes to the ABI involved * when new CPUs are added. * * Please keep the pmc(3) manual page in sync with this list. */ #define __PMC_CPUS() \ __PMC_CPU(AMD_K8, 0x01, "AMD K8") \ __PMC_CPU(INTEL_CORE, 0x87, "Intel Core Solo/Duo") \ __PMC_CPU(INTEL_CORE2, 0x88, "Intel Core2") \ __PMC_CPU(INTEL_CORE2EXTREME, 0x89, "Intel Core2 Extreme") \ __PMC_CPU(INTEL_ATOM, 0x8A, "Intel Atom") \ __PMC_CPU(INTEL_COREI7, 0x8B, "Intel Core i7") \ __PMC_CPU(INTEL_WESTMERE, 0x8C, "Intel Westmere") \ __PMC_CPU(INTEL_SANDYBRIDGE, 0x8D, "Intel Sandy Bridge") \ __PMC_CPU(INTEL_IVYBRIDGE, 0x8E, "Intel Ivy Bridge") \ __PMC_CPU(INTEL_SANDYBRIDGE_XEON, 0x8F, "Intel Sandy Bridge Xeon") \ __PMC_CPU(INTEL_IVYBRIDGE_XEON, 0x90, "Intel Ivy Bridge Xeon") \ __PMC_CPU(INTEL_HASWELL, 0x91, "Intel Haswell") \ __PMC_CPU(INTEL_ATOM_SILVERMONT, 0x92, "Intel Atom Silvermont") \ __PMC_CPU(INTEL_NEHALEM_EX, 0x93, "Intel Nehalem Xeon 7500") \ __PMC_CPU(INTEL_WESTMERE_EX, 0x94, "Intel Westmere Xeon E7") \ __PMC_CPU(INTEL_HASWELL_XEON, 0x95, "Intel Haswell Xeon E5 v3") \ __PMC_CPU(INTEL_BROADWELL, 0x96, "Intel Broadwell") \ __PMC_CPU(INTEL_BROADWELL_XEON, 0x97, "Intel Broadwell Xeon") \ __PMC_CPU(INTEL_SKYLAKE, 0x98, "Intel Skylake") \ __PMC_CPU(INTEL_SKYLAKE_XEON, 0x99, "Intel Skylake Xeon") \ __PMC_CPU(INTEL_ATOM_GOLDMONT, 0x9A, "Intel Atom Goldmont") \ __PMC_CPU(INTEL_ICELAKE, 0x9B, "Intel Icelake") \ __PMC_CPU(INTEL_ICELAKE_XEON, 0x9C, "Intel Icelake Xeon") \ __PMC_CPU(INTEL_ALDERLAKE, 0x9D, "Intel Alderlake") \ __PMC_CPU(INTEL_ATOM_GOLDMONT_P, 0x9E, "Intel Atom Goldmont Plus") \ __PMC_CPU(INTEL_ATOM_TREMONT, 0x9F, "Intel Atom Tremont") \ __PMC_CPU(INTEL_EMERALD_RAPIDS, 0xA0, "Intel Emerald Rapids") \ + __PMC_CPU(INTEL_ALDERLAKEN, 0xA1, "Intel AlderlakeN") \ __PMC_CPU(INTEL_XSCALE, 0x100, "Intel XScale") \ __PMC_CPU(PPC_7450, 0x300, "PowerPC MPC7450") \ __PMC_CPU(PPC_E500, 0x340, "PowerPC e500 Core") \ __PMC_CPU(PPC_970, 0x380, "IBM PowerPC 970") \ __PMC_CPU(PPC_POWER8, 0x390, "IBM POWER8") \ __PMC_CPU(GENERIC, 0x400, "Generic") \ __PMC_CPU(ARMV7_CORTEX_A5, 0x500, "ARMv7 Cortex A5") \ __PMC_CPU(ARMV7_CORTEX_A7, 0x501, "ARMv7 Cortex A7") \ __PMC_CPU(ARMV7_CORTEX_A8, 0x502, "ARMv7 Cortex A8") \ __PMC_CPU(ARMV7_CORTEX_A9, 0x503, "ARMv7 Cortex A9") \ __PMC_CPU(ARMV7_CORTEX_A15, 0x504, "ARMv7 Cortex A15") \ __PMC_CPU(ARMV7_CORTEX_A17, 0x505, "ARMv7 Cortex A17") \ __PMC_CPU(ARMV8_CORTEX_A53, 0x600, "ARMv8 Cortex A53") \ __PMC_CPU(ARMV8_CORTEX_A57, 0x601, "ARMv8 Cortex A57") \ __PMC_CPU(ARMV8_CORTEX_A76, 0x602, "ARMv8 Cortex A76") enum pmc_cputype { #undef __PMC_CPU #define __PMC_CPU(S,V,D) PMC_CPU_##S = V, __PMC_CPUS() }; #define PMC_CPU_FIRST PMC_CPU_AMD_K8 #define PMC_CPU_LAST PMC_CPU_ARMV8_CORTEX_A76 /* * Classes of PMCs */ #define __PMC_CLASSES() \ __PMC_CLASS(TSC, 0x00, "CPU Timestamp counter") \ __PMC_CLASS(K8, 0x02, "AMD K8 performance counters") \ __PMC_CLASS(IAF, 0x06, "Intel Core2/Atom, fixed function") \ __PMC_CLASS(IAP, 0x07, "Intel Core...Atom, programmable") \ __PMC_CLASS(UCF, 0x08, "Intel Uncore fixed function") \ __PMC_CLASS(UCP, 0x09, "Intel Uncore programmable") \ __PMC_CLASS(XSCALE, 0x0A, "Intel XScale counters") \ __PMC_CLASS(PPC7450, 0x0D, "Motorola MPC7450 class") \ __PMC_CLASS(PPC970, 0x0E, "IBM PowerPC 970 class") \ __PMC_CLASS(SOFT, 0x0F, "Software events") \ __PMC_CLASS(ARMV7, 0x10, "ARMv7") \ __PMC_CLASS(ARMV8, 0x11, "ARMv8") \ __PMC_CLASS(E500, 0x13, "Freescale e500 class") \ __PMC_CLASS(POWER8, 0x15, "IBM POWER8 class") \ __PMC_CLASS(DMC620_PMU_CD2, 0x16, "ARM DMC620 Memory Controller PMU CLKDIV2") \ __PMC_CLASS(DMC620_PMU_C, 0x17, "ARM DMC620 Memory Controller PMU CLK") \ __PMC_CLASS(CMN600_PMU, 0x18, "Arm CoreLink CMN600 Coherent Mesh Network PMU") enum pmc_class { #undef __PMC_CLASS #define __PMC_CLASS(S,V,D) PMC_CLASS_##S = V, __PMC_CLASSES() }; #define PMC_CLASS_FIRST PMC_CLASS_TSC #define PMC_CLASS_LAST PMC_CLASS_CMN600_PMU /* * A PMC can be in the following states: * * Hardware states: * DISABLED -- administratively prohibited from being used. * FREE -- HW available for use * Software states: * ALLOCATED -- allocated * STOPPED -- allocated, but not counting events * RUNNING -- allocated, and in operation; 'pm_runcount' * holds the number of CPUs using this PMC at * a given instant * DELETED -- being destroyed */ #define __PMC_HWSTATES() \ __PMC_STATE(DISABLED) \ __PMC_STATE(FREE) #define __PMC_SWSTATES() \ __PMC_STATE(ALLOCATED) \ __PMC_STATE(STOPPED) \ __PMC_STATE(RUNNING) \ __PMC_STATE(DELETED) #define __PMC_STATES() \ __PMC_HWSTATES() \ __PMC_SWSTATES() enum pmc_state { #undef __PMC_STATE #define __PMC_STATE(S) PMC_STATE_##S, __PMC_STATES() __PMC_STATE(MAX) }; #define PMC_STATE_FIRST PMC_STATE_DISABLED #define PMC_STATE_LAST PMC_STATE_DELETED /* * An allocated PMC may used as a 'global' counter or as a * 'thread-private' one. Each such mode of use can be in either * statistical sampling mode or in counting mode. Thus a PMC in use * * SS i.e., SYSTEM STATISTICAL -- system-wide statistical profiling * SC i.e., SYSTEM COUNTER -- system-wide counting mode * TS i.e., THREAD STATISTICAL -- thread virtual, statistical profiling * TC i.e., THREAD COUNTER -- thread virtual, counting mode * * Statistical profiling modes rely on the PMC periodically delivering * a interrupt to the CPU (when the configured number of events have * been measured), so the PMC must have the ability to generate * interrupts. * * In counting modes, the PMC counts its configured events, with the * value of the PMC being read whenever needed by its owner process. * * The thread specific modes "virtualize" the PMCs -- the PMCs appear * to be thread private and count events only when the profiled thread * actually executes on the CPU. * * The system-wide "global" modes keep the PMCs running all the time * and are used to measure the behaviour of the whole system. */ #define __PMC_MODES() \ __PMC_MODE(SS, 0) \ __PMC_MODE(SC, 1) \ __PMC_MODE(TS, 2) \ __PMC_MODE(TC, 3) enum pmc_mode { #undef __PMC_MODE #define __PMC_MODE(M,N) PMC_MODE_##M = N, __PMC_MODES() }; #define PMC_MODE_FIRST PMC_MODE_SS #define PMC_MODE_LAST PMC_MODE_TC #define PMC_IS_COUNTING_MODE(mode) \ ((mode) == PMC_MODE_SC || (mode) == PMC_MODE_TC) #define PMC_IS_SYSTEM_MODE(mode) \ ((mode) == PMC_MODE_SS || (mode) == PMC_MODE_SC) #define PMC_IS_SAMPLING_MODE(mode) \ ((mode) == PMC_MODE_SS || (mode) == PMC_MODE_TS) #define PMC_IS_VIRTUAL_MODE(mode) \ ((mode) == PMC_MODE_TS || (mode) == PMC_MODE_TC) /* * PMC row disposition */ #define __PMC_DISPOSITIONS(N) \ __PMC_DISP(STANDALONE) /* global/disabled counters */ \ __PMC_DISP(FREE) /* free/available */ \ __PMC_DISP(THREAD) /* thread-virtual PMCs */ \ __PMC_DISP(UNKNOWN) /* sentinel */ enum pmc_disp { #undef __PMC_DISP #define __PMC_DISP(D) PMC_DISP_##D , __PMC_DISPOSITIONS() }; #define PMC_DISP_FIRST PMC_DISP_STANDALONE #define PMC_DISP_LAST PMC_DISP_THREAD /* * Counter capabilities * * __PMC_CAPS(NAME, VALUE, DESCRIPTION) */ #define __PMC_CAPS() \ __PMC_CAP(INTERRUPT, 0, "generate interrupts") \ __PMC_CAP(USER, 1, "count user-mode events") \ __PMC_CAP(SYSTEM, 2, "count system-mode events") \ __PMC_CAP(EDGE, 3, "do edge detection of events") \ __PMC_CAP(THRESHOLD, 4, "ignore events below a threshold") \ __PMC_CAP(READ, 5, "read PMC counter") \ __PMC_CAP(WRITE, 6, "reprogram PMC counter") \ __PMC_CAP(INVERT, 7, "invert comparison sense") \ __PMC_CAP(QUALIFIER, 8, "further qualify monitored events") \ __PMC_CAP(PRECISE, 9, "perform precise sampling") \ __PMC_CAP(TAGGING, 10, "tag upstream events") \ __PMC_CAP(CASCADE, 11, "cascade counters") \ __PMC_CAP(SYSWIDE, 12, "system wide counter") \ __PMC_CAP(DOMWIDE, 13, "NUMA domain wide counter") enum pmc_caps { #undef __PMC_CAP #define __PMC_CAP(NAME, VALUE, DESCR) PMC_CAP_##NAME = (1 << VALUE) , __PMC_CAPS() }; #define PMC_CAP_FIRST PMC_CAP_INTERRUPT #define PMC_CAP_LAST PMC_CAP_DOMWIDE /* * PMC Event Numbers * * These are generated from the definitions in "dev/hwpmc/pmc_events.h". */ enum pmc_event { #undef __PMC_EV #undef __PMC_EV_BLOCK #define __PMC_EV_BLOCK(C,V) PMC_EV_ ## C ## __BLOCK_START = (V) - 1 , #define __PMC_EV(C,N) PMC_EV_ ## C ## _ ## N , __PMC_EVENTS() }; /* * PMC SYSCALL INTERFACE */ /* * "PMC_OPS" -- these are the commands recognized by the kernel * module, and are used when performing a system call from userland. */ #define __PMC_OPS() \ __PMC_OP(CONFIGURELOG, "Set log file") \ __PMC_OP(FLUSHLOG, "Flush log file") \ __PMC_OP(GETCPUINFO, "Get system CPU information") \ __PMC_OP(GETDRIVERSTATS, "Get driver statistics") \ __PMC_OP(GETMODULEVERSION, "Get module version") \ __PMC_OP(GETPMCINFO, "Get per-cpu PMC information") \ __PMC_OP(PMCADMIN, "Set PMC state") \ __PMC_OP(PMCALLOCATE, "Allocate and configure a PMC") \ __PMC_OP(PMCATTACH, "Attach a PMC to a process") \ __PMC_OP(PMCDETACH, "Detach a PMC from a process") \ __PMC_OP(PMCGETMSR, "Get a PMC's hardware address") \ __PMC_OP(PMCRELEASE, "Release a PMC") \ __PMC_OP(PMCRW, "Read/Set a PMC") \ __PMC_OP(PMCSETCOUNT, "Set initial count/sampling rate") \ __PMC_OP(PMCSTART, "Start a PMC") \ __PMC_OP(PMCSTOP, "Stop a PMC") \ __PMC_OP(WRITELOG, "Write a cookie to the log file") \ __PMC_OP(CLOSELOG, "Close log file") \ __PMC_OP(GETDYNEVENTINFO, "Get dynamic events list") enum pmc_ops { #undef __PMC_OP #define __PMC_OP(N, D) PMC_OP_##N, __PMC_OPS() }; /* * Flags used in operations on PMCs. */ #define PMC_F_UNUSED1 0x00000001 /* unused */ #define PMC_F_DESCENDANTS 0x00000002 /*OP ALLOCATE track descendants */ #define PMC_F_LOG_PROCCSW 0x00000004 /*OP ALLOCATE track ctx switches */ #define PMC_F_LOG_PROCEXIT 0x00000008 /*OP ALLOCATE log proc exits */ #define PMC_F_NEWVALUE 0x00000010 /*OP RW write new value */ #define PMC_F_OLDVALUE 0x00000020 /*OP RW get old value */ /* V2 API */ #define PMC_F_CALLCHAIN 0x00000080 /*OP ALLOCATE capture callchains */ #define PMC_F_USERCALLCHAIN 0x00000100 /*OP ALLOCATE use userspace stack */ /* V10 API */ #define PMC_F_EV_PMU 0x00000200 /* * OP ALLOCATE: pm_ev has special * userspace meaning; counter * configuration is communicated * through class-dependent fields */ /* internal flags */ #define PMC_F_ATTACHED_TO_OWNER 0x00010000 /*attached to owner*/ #define PMC_F_NEEDS_LOGFILE 0x00020000 /*needs log file */ #define PMC_F_ATTACH_DONE 0x00040000 /*attached at least once */ #define PMC_CALLCHAIN_DEPTH_MAX 512 #define PMC_CC_F_USERSPACE 0x01 /*userspace callchain*/ /* * Cookies used to denote allocated PMCs, and the values of PMCs. */ typedef uint32_t pmc_id_t; typedef uint64_t pmc_value_t; #define PMC_ID_INVALID (~ (pmc_id_t) 0) /* * PMC IDs have the following format: * * +-----------------------+-------+-----------+ * | CPU | PMC MODE | CLASS | ROW INDEX | * +-----------------------+-------+-----------+ * * where CPU is 12 bits, MODE 4, CLASS 8, and ROW INDEX 8 Field 'CPU' * is set to the requested CPU for system-wide PMCs or PMC_CPU_ANY for * process-mode PMCs. Field 'PMC MODE' is the allocated PMC mode. * Field 'PMC CLASS' is the class of the PMC. Field 'ROW INDEX' is the * row index for the PMC. * * The 'ROW INDEX' ranges over 0..NWPMCS where NHWPMCS is the total * number of hardware PMCs on this cpu. */ #define PMC_ID_TO_ROWINDEX(ID) ((ID) & 0xFF) #define PMC_ID_TO_CLASS(ID) (((ID) & 0xFF00) >> 8) #define PMC_ID_TO_MODE(ID) (((ID) & 0xF0000) >> 16) #define PMC_ID_TO_CPU(ID) (((ID) & 0xFFF00000) >> 20) #define PMC_ID_MAKE_ID(CPU,MODE,CLASS,ROWINDEX) \ ((((CPU) & 0xFFF) << 20) | (((MODE) & 0xF) << 16) | \ (((CLASS) & 0xFF) << 8) | ((ROWINDEX) & 0xFF)) /* * Data structures for system calls supported by the pmc driver. */ /* * OP PMCALLOCATE * * Allocate a PMC on the named CPU. */ #define PMC_CPU_ANY ~0 struct pmc_op_pmcallocate { uint32_t pm_caps; /* PMC_CAP_* */ uint32_t pm_cpu; /* CPU number or PMC_CPU_ANY */ enum pmc_class pm_class; /* class of PMC desired */ enum pmc_event pm_ev; /* [enum pmc_event] desired */ uint32_t pm_flags; /* additional modifiers PMC_F_* */ enum pmc_mode pm_mode; /* desired mode */ pmc_id_t pm_pmcid; /* [return] process pmc id */ pmc_value_t pm_count; /* initial/sample count */ union pmc_md_op_pmcallocate pm_md; /* MD layer extensions */ }; /* * OP PMCADMIN * * Set the administrative state (i.e., whether enabled or disabled) of * a PMC 'pm_pmc' on CPU 'pm_cpu'. Note that 'pm_pmc' specifies an * absolute PMC number and need not have been first allocated by the * calling process. */ struct pmc_op_pmcadmin { int pm_cpu; /* CPU# */ uint32_t pm_flags; /* flags */ int pm_pmc; /* PMC# */ enum pmc_state pm_state; /* desired state */ }; /* * OP PMCATTACH / OP PMCDETACH * * Attach/detach a PMC and a process. */ struct pmc_op_pmcattach { pmc_id_t pm_pmc; /* PMC to attach to */ pid_t pm_pid; /* target process */ }; /* * OP PMCSETCOUNT * * Set the sampling rate (i.e., the reload count) for statistical counters. * 'pm_pmcid' need to have been previously allocated using PMCALLOCATE. */ struct pmc_op_pmcsetcount { pmc_value_t pm_count; /* initial/sample count */ pmc_id_t pm_pmcid; /* PMC id to set */ }; /* * OP PMCRW * * Read the value of a PMC named by 'pm_pmcid'. 'pm_pmcid' needs * to have been previously allocated using PMCALLOCATE. */ struct pmc_op_pmcrw { uint32_t pm_flags; /* PMC_F_{OLD,NEW}VALUE*/ pmc_id_t pm_pmcid; /* pmc id */ pmc_value_t pm_value; /* new&returned value */ }; /* * OP GETPMCINFO * * retrieve PMC state for a named CPU. The caller is expected to * allocate 'npmc' * 'struct pmc_info' bytes of space for the return * values. */ struct pmc_info { char pm_name[PMC_NAME_MAX]; /* pmc name */ enum pmc_class pm_class; /* enum pmc_class */ int pm_enabled; /* whether enabled */ enum pmc_disp pm_rowdisp; /* FREE, THREAD or STANDLONE */ pid_t pm_ownerpid; /* owner, or -1 */ enum pmc_mode pm_mode; /* current mode [enum pmc_mode] */ enum pmc_event pm_event; /* current event */ uint32_t pm_flags; /* current flags */ pmc_value_t pm_reloadcount; /* sampling counters only */ }; struct pmc_op_getpmcinfo { int32_t pm_cpu; /* 0 <= cpu < mp_maxid */ struct pmc_info pm_pmcs[]; /* space for 'npmc' structures */ }; /* * OP GETCPUINFO * * Retrieve system CPU information. */ struct pmc_classinfo { enum pmc_class pm_class; /* class id */ uint32_t pm_caps; /* counter capabilities */ uint32_t pm_width; /* width of the PMC */ uint32_t pm_num; /* number of PMCs in class */ }; struct pmc_op_getcpuinfo { enum pmc_cputype pm_cputype; /* what kind of CPU */ uint32_t pm_ncpu; /* max CPU number */ uint32_t pm_npmc; /* #PMCs per CPU */ uint32_t pm_nclass; /* #classes of PMCs */ struct pmc_classinfo pm_classes[PMC_CLASS_MAX]; }; /* * OP CONFIGURELOG * * Configure a log file for writing system-wide statistics to. */ struct pmc_op_configurelog { int pm_flags; int pm_logfd; /* logfile fd (or -1) */ }; /* * OP GETDRIVERSTATS * * Retrieve pmc(4) driver-wide statistics. */ #ifdef _KERNEL struct pmc_driverstats { counter_u64_t pm_intr_ignored; /* #interrupts ignored */ counter_u64_t pm_intr_processed; /* #interrupts processed */ counter_u64_t pm_intr_bufferfull; /* #interrupts with ENOSPC */ counter_u64_t pm_syscalls; /* #syscalls */ counter_u64_t pm_syscall_errors; /* #syscalls with errors */ counter_u64_t pm_buffer_requests; /* #buffer requests */ counter_u64_t pm_buffer_requests_failed; /* #failed buffer requests */ counter_u64_t pm_log_sweeps; /* #sample buffer processing passes */ counter_u64_t pm_merges; /* merged k+u */ counter_u64_t pm_overwrites; /* UR overwrites */ }; #endif struct pmc_op_getdriverstats { unsigned int pm_intr_ignored; /* #interrupts ignored */ unsigned int pm_intr_processed; /* #interrupts processed */ unsigned int pm_intr_bufferfull; /* #interrupts with ENOSPC */ unsigned int pm_syscalls; /* #syscalls */ unsigned int pm_syscall_errors; /* #syscalls with errors */ unsigned int pm_buffer_requests; /* #buffer requests */ unsigned int pm_buffer_requests_failed; /* #failed buffer requests */ unsigned int pm_log_sweeps; /* #sample buffer processing passes */ }; /* * OP RELEASE / OP START / OP STOP * * Simple operations on a PMC id. */ struct pmc_op_simple { pmc_id_t pm_pmcid; }; /* * OP WRITELOG * * Flush the current log buffer and write 4 bytes of user data to it. */ struct pmc_op_writelog { uint32_t pm_userdata; }; /* * OP GETMSR * * Retrieve the machine specific address associated with the allocated * PMC. This number can be used subsequently with a read-performance-counter * instruction. */ struct pmc_op_getmsr { uint32_t pm_msr; /* machine specific address */ pmc_id_t pm_pmcid; /* allocated pmc id */ }; /* * OP GETDYNEVENTINFO * * Retrieve a PMC dynamic class events list. */ struct pmc_dyn_event_descr { char pm_ev_name[PMC_NAME_MAX]; enum pmc_event pm_ev_code; }; struct pmc_op_getdyneventinfo { enum pmc_class pm_class; unsigned int pm_nevent; struct pmc_dyn_event_descr pm_events[PMC_EV_DYN_COUNT]; }; #ifdef _KERNEL #include #include #include #include #define PMC_HASH_SIZE 1024 #define PMC_MTXPOOL_SIZE 2048 #define PMC_LOG_BUFFER_SIZE 256 #define PMC_NLOGBUFFERS_PCPU 32 #define PMC_NSAMPLES 256 #define PMC_CALLCHAIN_DEPTH 128 #define PMC_THREADLIST_MAX 128 #define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "." /* * Locking keys * * (b) - pmc_bufferlist_mtx (spin lock) * (k) - pmc_kthread_mtx (sleep lock) * (o) - po->po_mtx (spin lock) * (g) - global_epoch_preempt (epoch) * (p) - pmc_sx (sx) */ /* * PMC commands */ struct pmc_syscall_args { register_t pmop_code; /* one of PMC_OP_* */ void *pmop_data; /* syscall parameter */ }; /* * Interface to processor specific s1tuff */ /* * struct pmc_descr * * Machine independent (i.e., the common parts) of a human readable * PMC description. */ struct pmc_descr { char pd_name[PMC_NAME_MAX]; /* name */ uint32_t pd_caps; /* capabilities */ enum pmc_class pd_class; /* class of the PMC */ uint32_t pd_width; /* width in bits */ }; /* * struct pmc_target * * This structure records all the target processes associated with a * PMC. */ struct pmc_target { LIST_ENTRY(pmc_target) pt_next; struct pmc_process *pt_process; /* target descriptor */ }; /* * struct pmc * * Describes each allocated PMC. * * Each PMC has precisely one owner, namely the process that allocated * the PMC. * * A PMC may be attached to multiple target processes. The * 'pm_targets' field links all the target processes being monitored * by this PMC. * * The 'pm_savedvalue' field is protected by a mutex. * * On a multi-cpu machine, multiple target threads associated with a * process-virtual PMC could be concurrently executing on different * CPUs. The 'pm_runcount' field is atomically incremented every time * the PMC gets scheduled on a CPU and atomically decremented when it * get descheduled. Deletion of a PMC is only permitted when this * field is '0'. * */ struct pmc_pcpu_state { uint32_t pps_overflowcnt; /* count overflow interrupts */ uint8_t pps_stalled; uint8_t pps_cpustate; } __aligned(CACHE_LINE_SIZE); struct pmc { LIST_HEAD(,pmc_target) pm_targets; /* list of target processes */ LIST_ENTRY(pmc) pm_next; /* owner's list */ /* * System-wide PMCs are allocated on a CPU and are not moved * around. For system-wide PMCs we record the CPU the PMC was * allocated on in the 'CPU' field of the pmc ID. * * Virtual PMCs run on whichever CPU is currently executing * their targets' threads. For these PMCs we need to save * their current PMC counter values when they are taken off * CPU. */ union { pmc_value_t pm_savedvalue; /* Virtual PMCS */ } pm_gv; /* * For sampling mode PMCs, we keep track of the PMC's "reload * count", which is the counter value to be loaded in when * arming the PMC for the next counting session. For counting * modes on PMCs that are read-only (e.g., the x86 TSC), we * keep track of the initial value at the start of * counting-mode operation. */ union { pmc_value_t pm_reloadcount; /* sampling PMC modes */ pmc_value_t pm_initial; /* counting PMC modes */ } pm_sc; struct pmc_pcpu_state *pm_pcpu_state; volatile cpuset_t pm_cpustate; /* CPUs where PMC should be active */ uint32_t pm_caps; /* PMC capabilities */ enum pmc_event pm_event; /* event being measured */ uint32_t pm_flags; /* additional flags PMC_F_... */ struct pmc_owner *pm_owner; /* owner thread state */ counter_u64_t pm_runcount; /* #cpus currently on */ enum pmc_state pm_state; /* current PMC state */ /* * The PMC ID field encodes the row-index for the PMC, its * mode, class and the CPU# associated with the PMC. */ pmc_id_t pm_id; /* allocated PMC id */ enum pmc_class pm_class; /* md extensions */ union pmc_md_pmc pm_md; }; /* * Accessor macros for 'struct pmc' */ #define PMC_TO_MODE(P) PMC_ID_TO_MODE((P)->pm_id) #define PMC_TO_CLASS(P) PMC_ID_TO_CLASS((P)->pm_id) #define PMC_TO_ROWINDEX(P) PMC_ID_TO_ROWINDEX((P)->pm_id) #define PMC_TO_CPU(P) PMC_ID_TO_CPU((P)->pm_id) /* * struct pmc_threadpmcstate * * Record per-PMC, per-thread state. */ struct pmc_threadpmcstate { pmc_value_t pt_pmcval; /* per-thread reload count */ }; /* * struct pmc_thread * * Record a 'target' thread being profiled. */ struct pmc_thread { LIST_ENTRY(pmc_thread) pt_next; /* linked list */ struct thread *pt_td; /* target thread */ struct pmc_threadpmcstate pt_pmcs[]; /* per-PMC state */ }; /* * struct pmc_process * * Record a 'target' process being profiled. * * The target process being profiled could be different from the owner * process which allocated the PMCs. Each target process descriptor * is associated with NHWPMC 'struct pmc *' pointers. Each PMC at a * given hardware row-index 'n' will use slot 'n' of the 'pp_pmcs[]' * array. The size of this structure is thus PMC architecture * dependent. * */ struct pmc_targetstate { struct pmc *pp_pmc; /* target PMC */ pmc_value_t pp_pmcval; /* per-process value */ }; struct pmc_process { LIST_ENTRY(pmc_process) pp_next; /* hash chain */ LIST_HEAD(,pmc_thread) pp_tds; /* list of threads */ struct mtx *pp_tdslock; /* lock on pp_tds thread list */ int pp_refcnt; /* reference count */ uint32_t pp_flags; /* flags PMC_PP_* */ struct proc *pp_proc; /* target process */ struct pmc_targetstate pp_pmcs[]; /* NHWPMCs */ }; #define PMC_PP_ENABLE_MSR_ACCESS 0x00000001 /* * struct pmc_owner * * We associate a PMC with an 'owner' process. * * A process can be associated with 0..NCPUS*NHWPMC PMCs during its * lifetime, where NCPUS is the numbers of CPUS in the system and * NHWPMC is the number of hardware PMCs per CPU. These are * maintained in the list headed by the 'po_pmcs' to save on space. * */ struct pmc_owner { LIST_ENTRY(pmc_owner) po_next; /* hash chain */ CK_LIST_ENTRY(pmc_owner) po_ssnext; /* (g/p) list of SS PMC owners */ LIST_HEAD(, pmc) po_pmcs; /* owned PMC list */ TAILQ_HEAD(, pmclog_buffer) po_logbuffers; /* (o) logbuffer list */ struct mtx po_mtx; /* spin lock for (o) */ struct proc *po_owner; /* owner proc */ uint32_t po_flags; /* (k) flags PMC_PO_* */ struct proc *po_kthread; /* (k) helper kthread */ struct file *po_file; /* file reference */ int po_error; /* recorded error */ short po_sscount; /* # SS PMCs owned */ short po_logprocmaps; /* global mappings done */ struct pmclog_buffer *po_curbuf[MAXCPU]; /* current log buffer */ }; #define PMC_PO_OWNS_LOGFILE 0x00000001 /* has a log file */ #define PMC_PO_SHUTDOWN 0x00000010 /* in the process of shutdown */ #define PMC_PO_INITIAL_MAPPINGS_DONE 0x00000020 /* * struct pmc_hw -- describe the state of the PMC hardware * * When in use, a HW PMC is associated with one allocated 'struct pmc' * pointed to by field 'phw_pmc'. When inactive, this field is NULL. * * On an SMP box, one or more HW PMC's in process virtual mode with * the same 'phw_pmc' could be executing on different CPUs. In order * to handle this case correctly, we need to ensure that only * incremental counts get added to the saved value in the associated * 'struct pmc'. The 'phw_save' field is used to keep the saved PMC * value at the time the hardware is started during this context * switch (i.e., the difference between the new (hardware) count and * the saved count is atomically added to the count field in 'struct * pmc' at context switch time). * */ struct pmc_hw { uint32_t phw_state; /* see PHW_* macros below */ struct pmc *phw_pmc; /* current thread PMC */ }; #define PMC_PHW_RI_MASK 0x000000FF #define PMC_PHW_CPU_SHIFT 8 #define PMC_PHW_CPU_MASK 0x0000FF00 #define PMC_PHW_FLAGS_SHIFT 16 #define PMC_PHW_FLAGS_MASK 0xFFFF0000 #define PMC_PHW_INDEX_TO_STATE(ri) ((ri) & PMC_PHW_RI_MASK) #define PMC_PHW_STATE_TO_INDEX(state) ((state) & PMC_PHW_RI_MASK) #define PMC_PHW_CPU_TO_STATE(cpu) (((cpu) << PMC_PHW_CPU_SHIFT) & \ PMC_PHW_CPU_MASK) #define PMC_PHW_STATE_TO_CPU(state) (((state) & PMC_PHW_CPU_MASK) >> \ PMC_PHW_CPU_SHIFT) #define PMC_PHW_FLAGS_TO_STATE(flags) (((flags) << PMC_PHW_FLAGS_SHIFT) & \ PMC_PHW_FLAGS_MASK) #define PMC_PHW_STATE_TO_FLAGS(state) (((state) & PMC_PHW_FLAGS_MASK) >> \ PMC_PHW_FLAGS_SHIFT) #define PMC_PHW_FLAG_IS_ENABLED (PMC_PHW_FLAGS_TO_STATE(0x01)) #define PMC_PHW_FLAG_IS_SHAREABLE (PMC_PHW_FLAGS_TO_STATE(0x02)) /* * struct pmc_sample * * Space for N (tunable) PC samples and associated control data. */ struct pmc_sample { uint16_t ps_nsamples; /* callchain depth */ uint16_t ps_nsamples_actual; uint16_t ps_cpu; /* cpu number */ uint16_t ps_flags; /* other flags */ lwpid_t ps_tid; /* thread id */ pid_t ps_pid; /* process PID or -1 */ int ps_ticks; /* ticks at sample time */ /* pad */ struct thread *ps_td; /* which thread */ struct pmc *ps_pmc; /* interrupting PMC */ uintptr_t *ps_pc; /* (const) callchain start */ uint64_t ps_tsc; /* tsc value */ }; #define PMC_SAMPLE_FREE ((uint16_t) 0) #define PMC_USER_CALLCHAIN_PENDING ((uint16_t) 0xFFFF) struct pmc_samplebuffer { volatile uint64_t ps_prodidx; /* producer index */ volatile uint64_t ps_considx; /* consumer index */ uintptr_t *ps_callchains; /* all saved call chains */ struct pmc_sample ps_samples[]; /* array of sample entries */ }; #define PMC_CONS_SAMPLE(psb) \ (&(psb)->ps_samples[(psb)->ps_considx & pmc_sample_mask]) #define PMC_CONS_SAMPLE_OFF(psb, off) \ (&(psb)->ps_samples[(off) & pmc_sample_mask]) #define PMC_PROD_SAMPLE(psb) \ (&(psb)->ps_samples[(psb)->ps_prodidx & pmc_sample_mask]) /* * struct pmc_cpustate * * A CPU is modelled as a collection of HW PMCs with space for additional * flags. */ struct pmc_cpu { uint32_t pc_state; /* physical cpu number + flags */ struct pmc_samplebuffer *pc_sb[3]; /* space for samples */ struct pmc_hw *pc_hwpmcs[]; /* 'npmc' pointers */ }; #define PMC_PCPU_CPU_MASK 0x000000FF #define PMC_PCPU_FLAGS_MASK 0xFFFFFF00 #define PMC_PCPU_FLAGS_SHIFT 8 #define PMC_PCPU_STATE_TO_CPU(S) ((S) & PMC_PCPU_CPU_MASK) #define PMC_PCPU_STATE_TO_FLAGS(S) (((S) & PMC_PCPU_FLAGS_MASK) >> PMC_PCPU_FLAGS_SHIFT) #define PMC_PCPU_FLAGS_TO_STATE(F) (((F) << PMC_PCPU_FLAGS_SHIFT) & PMC_PCPU_FLAGS_MASK) #define PMC_PCPU_CPU_TO_STATE(C) ((C) & PMC_PCPU_CPU_MASK) #define PMC_PCPU_FLAG_HTT (PMC_PCPU_FLAGS_TO_STATE(0x1)) /* * struct pmc_binding * * CPU binding information. */ struct pmc_binding { int pb_bound; /* is bound? */ int pb_cpu; /* if so, to which CPU */ u_char pb_priority; /* Thread active priority. */ }; struct pmc_mdep; /* * struct pmc_classdep * * PMC class-dependent operations. */ struct pmc_classdep { uint32_t pcd_caps; /* class capabilities */ enum pmc_class pcd_class; /* class id */ int pcd_num; /* number of PMCs */ int pcd_ri; /* row index of the first PMC in class */ int pcd_width; /* width of the PMC */ /* configuring/reading/writing the hardware PMCs */ int (*pcd_config_pmc)(int _cpu, int _ri, struct pmc *_pm); int (*pcd_get_config)(int _cpu, int _ri, struct pmc **_ppm); int (*pcd_read_pmc)(int _cpu, int _ri, struct pmc *_pm, pmc_value_t *_value); int (*pcd_write_pmc)(int _cpu, int _ri, struct pmc *_pm, pmc_value_t _value); /* pmc allocation/release */ int (*pcd_allocate_pmc)(int _cpu, int _ri, struct pmc *_t, const struct pmc_op_pmcallocate *_a); int (*pcd_release_pmc)(int _cpu, int _ri, struct pmc *_pm); /* starting and stopping PMCs */ int (*pcd_start_pmc)(int _cpu, int _ri, struct pmc *_pm); int (*pcd_stop_pmc)(int _cpu, int _ri, struct pmc *_pm); /* description */ int (*pcd_describe)(int _cpu, int _ri, struct pmc_info *_pi, struct pmc **_ppmc); /* class-dependent initialization & finalization */ int (*pcd_pcpu_init)(struct pmc_mdep *_md, int _cpu); int (*pcd_pcpu_fini)(struct pmc_mdep *_md, int _cpu); /* machine-specific interface */ int (*pcd_get_msr)(int _ri, uint32_t *_msr); }; /* * struct pmc_mdep * * Machine dependent bits needed per CPU type. */ struct pmc_mdep { uint32_t pmd_cputype; /* from enum pmc_cputype */ uint32_t pmd_npmc; /* number of PMCs per CPU */ uint32_t pmd_nclass; /* number of PMC classes present */ /* * Machine dependent methods. */ /* thread context switch in/out */ int (*pmd_switch_in)(struct pmc_cpu *_p, struct pmc_process *_pp); int (*pmd_switch_out)(struct pmc_cpu *_p, struct pmc_process *_pp); /* handle a PMC interrupt */ int (*pmd_intr)(struct trapframe *_tf); /* * PMC class dependent information. */ struct pmc_classdep pmd_classdep[]; }; /* * Per-CPU state. This is an array of 'mp_ncpu' pointers * to struct pmc_cpu descriptors. */ extern struct pmc_cpu **pmc_pcpu; /* driver statistics */ extern struct pmc_driverstats pmc_stats; #if defined(HWPMC_DEBUG) /* HWPMC_DEBUG without KTR will compile but is a no-op. */ #if !defined(KTR) || !defined(KTR_COMPILE) || ((KTR_COMPILE & KTR_SUBSYS) == 0) #error "HWPMC_DEBUG requires KTR and KTR_COMPILE=KTR_SUBSYS -- see ktr(4)" #endif #include #define __pmcdbg_used /* unused variable annotation */ /* * Debug flags, major flag groups. * * Please keep the DEBUGGING section of the hwpmc(4) man page in sync. */ struct pmc_debugflags { int pdb_CPU; int pdb_CSW; int pdb_LOG; int pdb_MDP; int pdb_MOD; int pdb_OWN; int pdb_PMC; int pdb_PRC; int pdb_SAM; }; extern struct pmc_debugflags pmc_debugflags; #define KTR_PMC KTR_SUBSYS #define PMC_DEBUG_STRSIZE 128 #define PMC_DEBUG_DEFAULT_FLAGS { 0, 0, 0, 0, 0, 0, 0, 0, 0 } #define PMCDBG0(M, N, L, F) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR0(KTR_PMC, #M ":" #N ":" #L ": " F); \ } while (0) #define PMCDBG1(M, N, L, F, p1) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR1(KTR_PMC, #M ":" #N ":" #L ": " F, p1); \ } while (0) #define PMCDBG2(M, N, L, F, p1, p2) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR2(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2); \ } while (0) #define PMCDBG3(M, N, L, F, p1, p2, p3) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR3(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3); \ } while (0) #define PMCDBG4(M, N, L, F, p1, p2, p3, p4) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR4(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3, p4);\ } while (0) #define PMCDBG5(M, N, L, F, p1, p2, p3, p4, p5) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR5(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3, p4, \ p5); \ } while (0) #define PMCDBG6(M, N, L, F, p1, p2, p3, p4, p5, p6) do { \ if (pmc_debugflags.pdb_ ## M & (1 << PMC_DEBUG_MIN_ ## N)) \ CTR6(KTR_PMC, #M ":" #N ":" #L ": " F, p1, p2, p3, p4, \ p5, p6); \ } while (0) /* Major numbers */ #define PMC_DEBUG_MAJ_CPU 0 /* cpu switches */ #define PMC_DEBUG_MAJ_CSW 1 /* context switches */ #define PMC_DEBUG_MAJ_LOG 2 /* logging */ #define PMC_DEBUG_MAJ_MDP 3 /* machine dependent */ #define PMC_DEBUG_MAJ_MOD 4 /* misc module infrastructure */ #define PMC_DEBUG_MAJ_OWN 5 /* owner */ #define PMC_DEBUG_MAJ_PMC 6 /* pmc management */ #define PMC_DEBUG_MAJ_PRC 7 /* processes */ #define PMC_DEBUG_MAJ_SAM 8 /* sampling */ /* Minor numbers */ /* Common (8 bits) */ #define PMC_DEBUG_MIN_ALL 0 /* allocation */ #define PMC_DEBUG_MIN_REL 1 /* release */ #define PMC_DEBUG_MIN_OPS 2 /* ops: start, stop, ... */ #define PMC_DEBUG_MIN_INI 3 /* init */ #define PMC_DEBUG_MIN_FND 4 /* find */ /* MODULE */ #define PMC_DEBUG_MIN_PMH 14 /* pmc_hook */ #define PMC_DEBUG_MIN_PMS 15 /* pmc_syscall */ /* OWN */ #define PMC_DEBUG_MIN_ORM 8 /* owner remove */ #define PMC_DEBUG_MIN_OMR 9 /* owner maybe remove */ /* PROCESSES */ #define PMC_DEBUG_MIN_TLK 8 /* link target */ #define PMC_DEBUG_MIN_TUL 9 /* unlink target */ #define PMC_DEBUG_MIN_EXT 10 /* process exit */ #define PMC_DEBUG_MIN_EXC 11 /* process exec */ #define PMC_DEBUG_MIN_FRK 12 /* process fork */ #define PMC_DEBUG_MIN_ATT 13 /* attach/detach */ #define PMC_DEBUG_MIN_SIG 14 /* signalling */ /* CONTEXT SWITCHES */ #define PMC_DEBUG_MIN_SWI 8 /* switch in */ #define PMC_DEBUG_MIN_SWO 9 /* switch out */ /* PMC */ #define PMC_DEBUG_MIN_REG 8 /* pmc register */ #define PMC_DEBUG_MIN_ALR 9 /* allocate row */ /* MACHINE DEPENDENT LAYER */ #define PMC_DEBUG_MIN_REA 8 /* read */ #define PMC_DEBUG_MIN_WRI 9 /* write */ #define PMC_DEBUG_MIN_CFG 10 /* config */ #define PMC_DEBUG_MIN_STA 11 /* start */ #define PMC_DEBUG_MIN_STO 12 /* stop */ #define PMC_DEBUG_MIN_INT 13 /* interrupts */ /* CPU */ #define PMC_DEBUG_MIN_BND 8 /* bind */ #define PMC_DEBUG_MIN_SEL 9 /* select */ /* LOG */ #define PMC_DEBUG_MIN_GTB 8 /* get buf */ #define PMC_DEBUG_MIN_SIO 9 /* schedule i/o */ #define PMC_DEBUG_MIN_FLS 10 /* flush */ #define PMC_DEBUG_MIN_SAM 11 /* sample */ #define PMC_DEBUG_MIN_CLO 12 /* close */ #else #define __pmcdbg_used __unused #define PMCDBG0(M, N, L, F) /* nothing */ #define PMCDBG1(M, N, L, F, p1) #define PMCDBG2(M, N, L, F, p1, p2) #define PMCDBG3(M, N, L, F, p1, p2, p3) #define PMCDBG4(M, N, L, F, p1, p2, p3, p4) #define PMCDBG5(M, N, L, F, p1, p2, p3, p4, p5) #define PMCDBG6(M, N, L, F, p1, p2, p3, p4, p5, p6) #endif /* declare a dedicated memory pool */ MALLOC_DECLARE(M_PMC); /* * Functions */ struct pmc_mdep *pmc_md_initialize(void); /* MD init function */ void pmc_md_finalize(struct pmc_mdep *_md); /* MD fini function */ int pmc_getrowdisp(int _ri); int pmc_process_interrupt(int _ring, struct pmc *_pm, struct trapframe *_tf); int pmc_save_kernel_callchain(uintptr_t *_cc, int _maxsamples, struct trapframe *_tf); int pmc_save_user_callchain(uintptr_t *_cc, int _maxsamples, struct trapframe *_tf); void pmc_restore_cpu_binding(struct pmc_binding *pb); void pmc_save_cpu_binding(struct pmc_binding *pb); void pmc_select_cpu(int cpu); struct pmc_mdep *pmc_mdep_alloc(int nclasses); void pmc_mdep_free(struct pmc_mdep *md); uint64_t pmc_rdtsc(void); #endif /* _KERNEL */ #endif /* _SYS_PMC_H_ */