Page MenuHomeFreeBSD

D46397.id142307.diff
No OneTemporary

D46397.id142307.diff

Index: sys/amd64/amd64/trap.c
===================================================================
--- sys/amd64/amd64/trap.c
+++ sys/amd64/amd64/trap.c
@@ -45,6 +45,7 @@
#include "opt_clock.h"
#include "opt_cpu.h"
#include "opt_hwpmc_hooks.h"
+#include "opt_hwt_hooks.h"
#include "opt_isa.h"
#include "opt_kdb.h"
@@ -74,6 +75,10 @@
PMC_SOFT_DEFINE( , , page_fault, write);
#endif
+#ifdef HWT_HOOKS
+#include <dev/hwt/hwt_intr.h>
+#endif
+
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
@@ -202,6 +207,32 @@
},
};
+static __inline int
+nmi_handle_pcint(struct trapframe *frame){
+ int handled = 0;
+
+#ifdef HWT_HOOKS
+ /*
+ * Handle Intel PT interrupt if hwt is active.
+ */
+ if (hwt_intr != NULL)
+ handled |= !!(*hwt_intr)(frame);
+#endif
+
+#ifdef HWPMC_HOOKS
+ /*
+ * CPU PMCs interrupt using an NMI. If the PMC module is
+ * active, pass the 'rip' value to the PMC module's interrupt
+ * handler. A non-zero return value from the handler means that
+ * the NMI was consumed by it and we can return immediately.
+ */
+ if (pmc_intr != NULL)
+ handled |= !!(*pmc_intr)(frame);
+#endif
+
+ return (handled);
+}
+
/*
* Exception, fault, and trap interface to the FreeBSD kernel.
* This common code is called from assembly language IDT gate entry
@@ -249,17 +280,8 @@
}
if (type == T_NMI) {
-#ifdef HWPMC_HOOKS
- /*
- * CPU PMCs interrupt using an NMI. If the PMC module is
- * active, pass the 'rip' value to the PMC module's interrupt
- * handler. A non-zero return value from the handler means that
- * the NMI was consumed by it and we can return immediately.
- */
- if (pmc_intr != NULL &&
- (*pmc_intr)(frame) != 0)
+ if (nmi_handle_pcint(frame) != 0)
return;
-#endif
}
if ((frame->tf_rflags & PSL_I) == 0) {
Index: sys/amd64/pt/pt.h
===================================================================
--- /dev/null
+++ sys/amd64/pt/pt.h
@@ -0,0 +1,76 @@
+/*-
+ * Copyright (c) 2023 Bojan Novković <bnovkov@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _AMD64_PT_PT_H_
+#define _AMD64_PT_PT_H_
+
+#include <sys/types.h>
+
+#include <x86/include/specialreg.h>
+
+#define IP_FILTER_MAX_RANGES (4) /* Intel SDM Vol. 3C, 33-29 */
+
+struct pt_cpu_config {
+ uint64_t rtit_ctl;
+ register_t cr3_filter;
+ int nranges;
+ struct ipf_range {
+ vm_offset_t start;
+ vm_offset_t end;
+ } ip_ranges[IP_FILTER_MAX_RANGES];
+ uint32_t mtc_freq;
+ uint32_t cyc_thresh;
+ uint32_t psb_freq;
+};
+
+#ifdef _KERNEL
+#include <sys/malloc.h>
+
+#define PT_CPUID 0x14
+#define PT_SUPPORTED_FLAGS \
+ (RTIT_CTL_MTCEN | RTIT_CTL_CR3FILTER | RTIT_CTL_DIS_TNT)
+
+struct xsave_header {
+ uint64_t xsave_bv;
+ uint64_t xcomp_bv;
+ uint8_t reserved[48];
+};
+
+struct pt_ext_area {
+ uint64_t rtit_ctl;
+ uint64_t rtit_output_base;
+ uint64_t rtit_output_mask_ptrs;
+ uint64_t rtit_status;
+ uint64_t rtit_cr3_match;
+ uint64_t rtit_addr0_a;
+ uint64_t rtit_addr0_b;
+ uint64_t rtit_addr1_a;
+ uint64_t rtit_addr1_b;
+};
+
+MALLOC_DECLARE(M_PT);
+#endif /* _KERNEL */
+#endif /* !_AMD64_PT_PT_H_ */
Index: sys/amd64/pt/pt.c
===================================================================
--- /dev/null
+++ sys/amd64/pt/pt.c
@@ -0,0 +1,975 @@
+/*-
+ * Copyright (c) 2023 Bojan Novković <bnovkov@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Intel Processor Trace (PT) backend
+ *
+ * Driver Design Overview
+ *
+ * - Since PT is configured on a per-core basis, the driver uses
+ * 'smp_rendezvous' to start and disable tracing on each target core.
+ * - PT-specific resources are stored in a 'struct pt_ctx' context structure for
+ * each traced CPU core or thread. Upon initialization, a ToPA configuration
+ * is generated for each 'pt_ctx' structure using the HWT tracing buffers.
+ * The HWT tracing buffer is split into 4K ToPA entries. Currently, each
+ * 4K ToPA entry is configured to trigger an interrupt after it is filled.
+ * - The PT driver uses the XSAVE/XRSTOR PT extensions to load and save all
+ * relevant PT registers. Every time a traced thread is switched
+ * out or in, its state will be saved to or loaded from its corresponding
+ * 'pt_ctx' context.
+ * - When tracing starts, the PT hardware will start writing data into the
+ * tracing buffer. When a TOPA_INT entry is filled, it will trigger an
+ * interrupt before continuing. The interrupt handler will then fetch the
+ * last valid tracing buffer offset and enqueue a HWT_RECORD_BUFFER record.
+ * The driver is currently configured to use the NMI interrupt line.
+ * - The userspace PT backend waits for incoming HWT_RECORD_BUFFER records
+ * and uses the offsets to decode data from the tracing buffer.
+ *
+ * Future improvements and limitations
+ *
+ * - We currently configure the PT hardware to trigger an interrupt whenever
+ * a 4K ToPA entry is filled. While this is fine when tracing smaller
+ * functions or infrequent code paths, this will generate too much interrupt
+ * traffic when tracing hotter functions. A proper solution for this issue
+ * should estimate the amount of data generated by the current configuration
+ * and use it to determine interrupt frequency.
+ *
+ * - Support for more tracing options and PT features.
+ *
+ */
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/event.h>
+#include <sys/hwt.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/sdt.h>
+#include <sys/smp.h>
+#include <sys/lock.h>
+#include <sys/errno.h>
+#include <sys/taskqueue.h>
+#include <sys/domainset.h>
+#include <sys/sleepqueue.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+
+#include <machine/cpufunc.h>
+#include <machine/param.h>
+#include <machine/atomic.h>
+#include <machine/smp.h>
+#include <machine/specialreg.h>
+
+#include <x86/apicvar.h>
+#include <x86/x86_var.h>
+
+#include <dev/hwt/hwt_vm.h>
+#include <dev/hwt/hwt_thread.h>
+#include <dev/hwt/hwt_cpu.h>
+#include <dev/hwt/hwt_config.h>
+#include <dev/hwt/hwt_context.h>
+#include <dev/hwt/hwt_backend.h>
+#include <dev/hwt/hwt_hook.h>
+#include <dev/hwt/hwt_intr.h>
+#include <dev/hwt/hwt_record.h>
+
+#include "pt.h"
+
+#ifdef PT_DEBUG
+#define dprintf(fmt, ...) printf(fmt, ##__VA_ARGS__)
+#else
+#define dprintf(fmt, ...)
+#endif
+
+#define PT_XSAVE_MASK (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE)
+
+MALLOC_DEFINE(M_PT, "pt", "Intel Processor Trace");
+
+SDT_PROVIDER_DEFINE(pt);
+SDT_PROBE_DEFINE(pt, , , topa__intr);
+
+TASKQUEUE_FAST_DEFINE_THREAD(pt);
+
+static void pt_send_buffer_record(void *arg, int pending __unused);
+static int pt_topa_intr(struct trapframe *tf);
+
+struct pt_save_area {
+ uint8_t legacy_state[512];
+ struct xsave_header header;
+ struct pt_ext_area pt_ext_area;
+} __aligned(64);
+
+struct pt_buffer {
+ uint64_t *topa_hw; /* ToPA table entries. */
+ size_t size;
+ struct mtx lock; /* Lock for fields below. */
+ vm_offset_t offset;
+ uint64_t wrap_count;
+ int curpage;
+};
+
+struct pt_ctx {
+ struct pt_buffer buf; /* ToPA buffer metadata */
+ struct task task; /* ToPA buffer notification task */
+ struct hwt_context *hwt_ctx;
+ struct pt_save_area save_area; /* PT XSAVE area */
+ int id;
+};
+
+/* PT tracing contexts used for CPU mode. */
+static struct pt_ctx *pt_pcpu_ctx;
+
+enum pt_cpu_state {
+ PT_DISABLED = 0,
+ PT_STOPPED,
+ PT_TERMINATING,
+ PT_ACTIVE
+};
+
+static struct pt_cpu {
+ struct pt_ctx *ctx; /* active PT tracing context */
+ enum pt_cpu_state state; /* used as part of trace stop protocol */
+} *pt_pcpu;
+
+/*
+ * PT-related CPUID bits.
+ */
+static struct pt_cpu_info {
+ uint32_t l0_eax;
+ uint32_t l0_ebx;
+ uint32_t l0_ecx;
+ uint32_t l1_eax;
+ uint32_t l1_ebx;
+} pt_info;
+
+static bool initialized = false;
+
+static __inline void
+xrstors(char *addr, uint64_t mask)
+{
+ uint32_t low, hi;
+
+ low = mask;
+ hi = mask >> 32;
+ __asm __volatile("xrstors %0" : : "m"(*addr), "a"(low), "d"(hi));
+}
+
+static __inline void
+xsaves(char *addr, uint64_t mask)
+{
+ uint32_t low, hi;
+
+ low = mask;
+ hi = mask >> 32;
+ __asm __volatile("xsaves %0"
+ : "=m"(*addr)
+ : "a"(low), "d"(hi)
+ : "memory");
+}
+
+static __inline enum pt_cpu_state
+pt_cpu_get_state(int cpu_id)
+{
+ return (pt_pcpu[cpu_id].state);
+}
+
+static __inline void
+pt_cpu_set_state(int cpu_id, enum pt_cpu_state state)
+{
+ pt_pcpu[cpu_id].state = state;
+}
+
+static __inline void
+pt_update_buffer(struct pt_buffer *buf)
+{
+ uint64_t reg;
+ int curpage;
+
+ /* Update buffer offset. */
+ reg = rdmsr(MSR_IA32_RTIT_OUTPUT_MASK_PTRS);
+ curpage = (reg & 0xffffff80) >> 7;
+ mtx_lock_spin(&buf->lock);
+ /* Check if the output wrapped. */
+ if (buf->curpage > curpage)
+ buf->wrap_count++;
+ buf->curpage = curpage;
+ buf->offset = reg >> 32;
+ mtx_unlock_spin(&buf->lock);
+
+ dprintf("%s: wrap_cnt: %lu, curpage: %d, offset: %zu\n", __func__,
+ buf->wrap_count, buf->curpage, buf->offset);
+}
+
+static __inline void
+pt_fill_buffer_record(int id, struct pt_buffer *buf,
+ struct hwt_record_entry *rec)
+{
+ rec->record_type = HWT_RECORD_BUFFER;
+ rec->buf_id = id;
+ rec->curpage = buf->curpage;
+ rec->offset = buf->offset + (buf->wrap_count * buf->size);
+}
+
+/*
+ * Enables or disables tracing on curcpu.
+ */
+static void
+pt_cpu_toggle_local(struct pt_save_area *save_area, bool enable)
+{
+ u_long xcr0, cr0;
+ u_long xss;
+
+ KASSERT((curthread)->td_critnest >= 1,
+ ("%s: not in critical section", __func__));
+
+ cr0 = rcr0();
+ if (cr0 & CR0_TS)
+ clts();
+ xcr0 = rxcr(XCR0);
+ if ((xcr0 & PT_XSAVE_MASK) != PT_XSAVE_MASK)
+ load_xcr(XCR0, xcr0 | PT_XSAVE_MASK);
+ xss = rdmsr(MSR_IA32_XSS);
+ wrmsr(MSR_IA32_XSS, xss | XFEATURE_ENABLED_PT);
+
+ if (!enable) {
+ KASSERT((rdmsr(MSR_IA32_RTIT_CTL) & RTIT_CTL_TRACEEN) != 0,
+ ("%s: PT is disabled", __func__));
+ xsaves((char *)save_area, XFEATURE_ENABLED_PT);
+ } else {
+ KASSERT((rdmsr(MSR_IA32_RTIT_CTL) & RTIT_CTL_TRACEEN) == 0,
+ ("%s: PT is enabled", __func__));
+ xrstors((char *)save_area, XFEATURE_ENABLED_PT);
+ }
+ wrmsr(MSR_IA32_XSS, xss);
+ if ((xcr0 & PT_XSAVE_MASK) != PT_XSAVE_MASK)
+ load_xcr(XCR0, xcr0);
+ if (cr0 & CR0_TS)
+ load_cr0(cr0);
+}
+
+/*
+ * Dumps contents of PT-related registers.
+ */
+static void
+pt_cpu_dump(int cpu_id)
+{
+#ifdef PT_DEBUG
+ struct pt_save_area *area = &pt_pcpu[cpu_id].ctx->save_area;
+
+ printf("dumping PT info for cpu %d\n", cpu_id);
+
+ printf("rtit_ctl: 0x%zx\n", area->pt_ext_area.rtit_ctl);
+ printf("rtit_addr0_a: 0x%zx\n", area->pt_ext_area.rtit_addr0_a);
+ printf("rtit_addr0_b: 0x%zx\n", area->pt_ext_area.rtit_addr0_b);
+
+ printf("xsave_bv: 0x%zx\n", area->header.xsave_bv);
+ printf("xcomp_bv: 0x%zx\n", area->header.xcomp_bv);
+
+ printf("rtit_status MSR: 0x%zx\n", rdmsr(MSR_IA32_RTIT_STATUS));
+ printf("rtit_ctl MSR: 0x%zx\n", rdmsr(MSR_IA32_RTIT_CTL));
+ printf("rtit output base MSR: 0x%zx\n",
+ rdmsr(MSR_IA32_RTIT_OUTPUT_BASE));
+ printf("rtit mask_ptrs MSR: 0x%zx\n",
+ rdmsr(MSR_IA32_RTIT_OUTPUT_MASK_PTRS));
+ printf("rtit_addr0_a MSR: 0x%zx\n", rdmsr(MSR_IA32_RTIT_ADDR0_A));
+
+ lapic_dump("");
+#endif
+}
+
+/*
+ * CPU mode helper routines.
+ */
+static void
+pt_cpu_start(void *dummy)
+{
+ struct pt_cpu *cpu = &pt_pcpu[curcpu];
+
+ MPASS(cpu->ctx != NULL);
+ dprintf("%s: curcpu %d\n", __func__, curcpu);
+
+ /* Enable XSAVE. */
+ load_cr4(rcr4() | CR4_XSAVE);
+ /* Clear PMI status. */
+ wrmsr(MSR_IA32_RTIT_STATUS, 0);
+ /* Start tracing. */
+ pt_cpu_toggle_local(&cpu->ctx->save_area, true);
+ pt_cpu_set_state(curcpu, PT_ACTIVE);
+
+ pt_cpu_dump(curcpu);
+}
+
+static void
+pt_cpu_stop(void *dummy)
+{
+ struct pt_cpu *cpu;
+ struct pt_ctx *ctx;
+
+ /* Shutdown may occur before PT gets properly configured on curcpu. */
+ if (pt_cpu_get_state(curcpu) == PT_DISABLED)
+ return;
+
+ cpu = &pt_pcpu[curcpu];
+ ctx = cpu->ctx;
+ MPASS(ctx != NULL);
+ dprintf("%s: curcpu %d\n", __func__, curcpu);
+
+ /* Stop tracing. */
+ pt_cpu_toggle_local(&cpu->ctx->save_area, false);
+ pt_cpu_set_state(curcpu, PT_STOPPED);
+ /* Update buffer offset. */
+ pt_update_buffer(&ctx->buf);
+
+ pt_cpu_dump(curcpu);
+}
+
+static int
+pt_topa_prepare(struct pt_ctx *ctx, struct hwt_vm *vm)
+{
+ struct pt_buffer *buf;
+ size_t topa_size;
+ int i;
+
+ topa_size = TOPA_SIZE_4K; /* 4K only for now */
+ buf = &ctx->buf;
+
+ KASSERT(buf->topa_hw == NULL,
+ ("%s: ToPA info already exists", __func__));
+
+ /* Allocate array of TOPA entries. */
+ buf->topa_hw = malloc((vm->npages + 1) * sizeof(uint64_t), M_PT,
+ M_NOWAIT | M_ZERO);
+ if (buf->topa_hw == NULL)
+ return (ENOMEM);
+
+ dprintf("%s: ToPA virt addr %p\n", __func__, buf->topa_hw);
+ buf->size = vm->npages * PAGE_SIZE;
+ for (i = 0; i < vm->npages; i++) {
+ buf->topa_hw[i] = VM_PAGE_TO_PHYS(vm->pages[i]) | topa_size;
+ /*
+ * XXX: TOPA_INT should ideally be set according to
+ * expected amount of incoming trace data. Too few TOPA_INT
+ * entries will not trigger interrupts often enough when tracing
+ * smaller functions.
+ */
+ /* Raise interrupt when entry is filled. */
+ buf->topa_hw[i] |= TOPA_INT;
+ }
+ /* Circular buffer - point last entry to first */
+ buf->topa_hw[vm->npages] = (uint64_t)vtophys(buf->topa_hw) | TOPA_END;
+
+ return (0);
+}
+
+static int
+pt_configure_ranges(struct pt_ctx *ctx, struct pt_cpu_config *cfg)
+{
+ struct pt_ext_area *pt_ext;
+ struct pt_save_area *save_area;
+ int nranges_supp, n, error = 0;
+
+ save_area = &ctx->save_area;
+ pt_ext = &save_area->pt_ext_area;
+
+ if (pt_info.l0_ebx & CPUPT_IPF) {
+ /* How many IP ranges does the CPU support? */
+ nranges_supp = (pt_info.l1_eax & CPUPT_NADDR_M) >>
+ CPUPT_NADDR_S;
+
+ /* xsave/xrstor supports two ranges only. */
+ if (nranges_supp > 2)
+ nranges_supp = 2;
+ n = cfg->nranges;
+ if (n > nranges_supp) {
+ printf("%s: %d IP filtering ranges requested, CPU "
+ "supports %d, truncating\n",
+ __func__, n, nranges_supp);
+ n = nranges_supp;
+ }
+
+ switch (n) {
+ case 2:
+ pt_ext->rtit_ctl |= (1UL << RTIT_CTL_ADDR_CFG_S(1));
+ pt_ext->rtit_addr1_a = cfg->ip_ranges[1].start;
+ pt_ext->rtit_addr1_b = cfg->ip_ranges[1].end;
+ case 1:
+ pt_ext->rtit_ctl |= (1UL << RTIT_CTL_ADDR_CFG_S(0));
+ pt_ext->rtit_addr0_a = cfg->ip_ranges[0].start;
+ pt_ext->rtit_addr0_b = cfg->ip_ranges[0].end;
+ break;
+ default:
+ error = (EINVAL);
+ break;
+ };
+ } else
+ error = (ENXIO);
+
+ return (error);
+}
+
+static int
+pt_init_ctx(struct pt_ctx *pt_ctx, struct hwt_vm *vm, int ctx_id)
+{
+
+ dprintf("%s: ctx id %d\n", __func__, ctx_id);
+
+ KASSERT(pt_ctx->buf.topa_hw == NULL,
+ ("%s: active ToPA buffer in context %p\n", __func__, pt_ctx));
+
+ memset(pt_ctx, 0, sizeof(struct pt_ctx));
+ mtx_init(&pt_ctx->buf.lock, "pttopa", NULL, MTX_SPIN);
+ dprintf("%s: preparing ToPA buffer\n", __func__);
+ if (pt_topa_prepare(pt_ctx, vm) != 0) {
+ dprintf("%s: failed to prepare ToPA buffer\n", __func__);
+ return (ENOMEM);
+ }
+
+ pt_ctx->id = ctx_id;
+ TASK_INIT(&pt_ctx->task, 0, pt_send_buffer_record, pt_ctx);
+
+ return (0);
+}
+
+static void
+pt_deinit_ctx(struct pt_ctx *pt_ctx)
+{
+
+ if (pt_ctx->buf.topa_hw != NULL)
+ free(pt_ctx->buf.topa_hw, M_PT);
+ memset(pt_ctx, 0, sizeof(*pt_ctx));
+ pt_ctx->buf.topa_hw = NULL;
+}
+
+static int
+pt_backend_configure(struct hwt_context *ctx, int cpu_id, int thread_id)
+{
+ struct hwt_cpu *hwt_cpu;
+ struct hwt_thread *thr;
+ struct pt_ctx *pt_ctx;
+ struct pt_cpu_config *cfg;
+ struct pt_ext_area *pt_ext;
+ struct xsave_header *hdr;
+ int error;
+
+ dprintf("%s\n", __func__);
+
+ cfg = (struct pt_cpu_config *)ctx->config;
+ pt_ctx = NULL;
+
+ /* Sanitize input. */
+ // cfg->rtit_ctl &= PT_SUPPORTED_FLAGS;
+
+ /* Validate user configuration */
+ if (cfg->rtit_ctl & RTIT_CTL_MTCEN) {
+ if ((pt_info.l0_ebx & CPUPT_MTC) == 0) {
+ printf("%s: CPU does not support generating MTC "
+ "packets\n", __func__);
+ return (ENXIO);
+ }
+ }
+
+ if (cfg->rtit_ctl & RTIT_CTL_CR3FILTER) {
+ if ((pt_info.l0_ebx & CPUPT_CR3) == 0) {
+ printf("%s: CPU does not support CR3 filtering\n",
+ __func__);
+ return (ENXIO);
+ }
+ }
+
+ if (cfg->rtit_ctl & RTIT_CTL_DIS_TNT) {
+ if ((pt_info.l0_ebx & CPUPT_DIS_TNT) == 0) {
+ printf("%s: CPU does not support TNT\n", __func__);
+ return (ENXIO);
+ }
+ }
+ /* TODO: support for more config bits. */
+
+ if (ctx->mode == HWT_MODE_CPU) {
+ TAILQ_FOREACH(hwt_cpu, &ctx->cpus, next) {
+ if (hwt_cpu->cpu_id != cpu_id)
+ continue;
+ pt_ctx = &pt_pcpu_ctx[cpu_id];
+ break;
+ }
+ } else {
+ TAILQ_FOREACH(thr, &ctx->threads, next) {
+ if (thr->thread_id != thread_id)
+ continue;
+ KASSERT(thr->private != NULL,
+ ("%s: hwt thread private"
+ " not set, thr %p",
+ __func__, thr));
+ pt_ctx = (struct pt_ctx *)thr->private;
+ break;
+ }
+ }
+ if (pt_ctx == NULL)
+ return (ENOENT);
+
+ dprintf("%s: preparing MSRs\n", __func__);
+ pt_ext = &pt_ctx->save_area.pt_ext_area;
+ hdr = &pt_ctx->save_area.header;
+
+ pt_ext->rtit_ctl |= cfg->rtit_ctl;
+ if (cfg->nranges != 0) {
+ dprintf("%s: preparing IPF ranges\n", __func__);
+ if ((error = pt_configure_ranges(pt_ctx, cfg)) != 0)
+ return (error);
+ }
+ /* Save hwt context for buffer records. */
+ pt_ctx->hwt_ctx = ctx;
+ /* Prepare ToPA MSR values. */
+ pt_ext->rtit_ctl |= RTIT_CTL_TOPA;
+ pt_ext->rtit_output_base = (uint64_t)vtophys(pt_ctx->buf.topa_hw);
+ pt_ext->rtit_output_mask_ptrs = 0x7f;
+ /* Init header and enable compaction. */
+ hdr->xsave_bv = XFEATURE_ENABLED_PT;
+ hdr->xcomp_bv = XFEATURE_ENABLED_PT |
+ (1ULL << 63);
+ /* Enable tracing. */
+ pt_ext->rtit_ctl |= RTIT_CTL_TRACEEN;
+
+ pt_pcpu[cpu_id].ctx = pt_ctx;
+ /* Mark CPU as initialized. */
+ pt_cpu_set_state(cpu_id, PT_STOPPED);
+
+ return (0);
+}
+
+/*
+ * hwt backend trace start operation. CPU affine.
+ */
+static void
+pt_backend_enable(struct hwt_context *ctx, int cpu_id)
+{
+
+ KASSERT(curcpu == cpu_id,
+ ("%s: attempting to start PT on another cpu", __func__));
+ pt_cpu_start(NULL);
+ CPU_SET(cpu_id, &ctx->cpu_map);
+}
+
+/*
+ * hwt backend trace stop operation. CPU affine.
+ */
+static void
+pt_backend_disable(struct hwt_context *ctx, int cpu_id)
+{
+ struct pt_cpu *cpu;
+
+ KASSERT(curcpu == cpu_id,
+ ("%s: attempting to disable PT on another cpu", __func__));
+ pt_cpu_stop(NULL);
+ CPU_CLR(cpu_id, &ctx->cpu_map);
+ cpu = &pt_pcpu[cpu_id];
+ /* Disable current context. */
+ cpu->ctx = NULL;
+}
+
+/*
+ * hwt backend trace start operation for remote CPUs.
+ */
+static void
+pt_backend_enable_smp(struct hwt_context *ctx)
+{
+
+ dprintf("%s\n", __func__);
+ KASSERT(ctx->mode == HWT_MODE_CPU,
+ ("%s: this should only be used for CPU mode", __func__));
+ smp_rendezvous_cpus(ctx->cpu_map, NULL, pt_cpu_start, NULL, NULL);
+}
+
+/*
+ * hwt backend trace stop operation for remote CPUs.
+ */
+static void
+pt_backend_disable_smp(struct hwt_context *ctx)
+{
+
+ dprintf("%s\n", __func__);
+ if (CPU_EMPTY(&ctx->cpu_map)) {
+ dprintf("%s: empty cpu map\n", __func__);
+ return;
+ }
+ smp_rendezvous_cpus(ctx->cpu_map, NULL, pt_cpu_stop, NULL, NULL);
+}
+
+static int
+pt_backend_init(struct hwt_context *ctx)
+{
+ struct hwt_cpu *hwt_cpu;
+ int error;
+
+ dprintf("%s\n", __func__);
+ /* Install ToPA PMI handler. */
+ KASSERT(hwt_intr == NULL,
+ ("%s: ToPA PMI handler already present", __func__));
+ hwt_intr = pt_topa_intr;
+ wmb();
+
+ /*
+ * Initialize per-cpu MODE_CPU contexts.
+ * MODE_THREAD contexts get initialized during thread creation.
+ */
+ if (ctx->mode == HWT_MODE_CPU) {
+ TAILQ_FOREACH(hwt_cpu, &ctx->cpus, next) {
+ error = pt_init_ctx(&pt_pcpu_ctx[hwt_cpu->cpu_id],
+ hwt_cpu->vm, hwt_cpu->cpu_id);
+ if (error)
+ return (error);
+ }
+ }
+
+ return (0);
+}
+
+static void
+pt_backend_deinit(struct hwt_context *ctx)
+{
+ struct pt_ctx *pt_ctx;
+ struct hwt_thread *thr;
+ int cpu_id;
+
+ dprintf("%s\n", __func__);
+
+ /* Remove ToPA PMI handler. */
+ hwt_intr = NULL;
+ wmb();
+
+ /* Stop tracing on all active CPUs */
+ pt_backend_disable_smp(ctx);
+ if (ctx->mode == HWT_MODE_THREAD) {
+ TAILQ_FOREACH(thr, &ctx->threads, next) {
+ KASSERT(thr->private != NULL,
+ ("%s: thr->private not set", __func__));
+ pt_ctx = (struct pt_ctx *)thr->private;
+ /* Free ToPA table. */
+ pt_deinit_ctx(pt_ctx);
+ }
+ } else {
+ CPU_FOREACH(cpu_id) {
+ if (!CPU_ISSET(cpu_id, &ctx->cpu_map))
+ continue;
+ if (pt_pcpu[cpu_id].ctx != NULL) {
+ KASSERT(pt_pcpu[cpu_id].ctx ==
+ &pt_pcpu_ctx[cpu_id],
+ ("%s: CPU mode tracing with non-cpu mode PT"
+ "context active",
+ __func__));
+ pt_pcpu[cpu_id].ctx = NULL;
+ }
+ pt_ctx = &pt_pcpu_ctx[cpu_id];
+ /* Free ToPA table. */
+ pt_deinit_ctx(pt_ctx);
+ /* Reset pcpu entry. */
+ memset(&pt_pcpu[cpu_id], 0, sizeof(struct pt_cpu));
+ }
+ }
+}
+
+/*
+ * Fetches current offset into the tracing buffer.
+ */
+static int
+pt_backend_read(struct hwt_vm *vm, int *curpage, vm_offset_t *curpage_offset,
+ uint64_t *data)
+{
+ struct pt_buffer *buf;
+
+ if (vm->ctx->mode == HWT_MODE_THREAD)
+ buf = &((struct pt_ctx *)vm->thr->private)->buf;
+ else
+ buf = &pt_pcpu[vm->cpu->cpu_id].ctx->buf;
+ mtx_lock_spin(&buf->lock);
+ *curpage = buf->curpage;
+ *curpage_offset = buf->offset + (buf->wrap_count * vm->ctx->bufsize);
+ mtx_unlock_spin(&buf->lock);
+
+ return (0);
+}
+
+static int
+pt_backend_alloc_thread(struct hwt_thread *thr)
+{
+ struct pt_ctx *pt_ctx;
+ int error;
+
+ /* Omit M_WAITOK since this might get invoked a non-sleepable context */
+ pt_ctx = malloc(sizeof(*pt_ctx), M_PT, M_NOWAIT | M_ZERO);
+ if (pt_ctx == NULL)
+ return (ENOMEM);
+
+ error = pt_init_ctx(pt_ctx, thr->vm, thr->thread_id);
+ if (error)
+ return (error);
+
+ thr->private = pt_ctx;
+ return (0);
+}
+
+static void
+pt_backend_free_thread(struct hwt_thread *thr)
+{
+ struct pt_ctx *ctx;
+
+ ctx = (struct pt_ctx *)thr->private;
+
+ pt_deinit_ctx(ctx);
+ free(ctx, M_PT);
+}
+
+static void
+pt_backend_dump(int cpu_id)
+{
+}
+
+static struct hwt_backend_ops pt_ops = {
+ .hwt_backend_init = pt_backend_init,
+ .hwt_backend_deinit = pt_backend_deinit,
+
+ .hwt_backend_configure = pt_backend_configure,
+
+ .hwt_backend_enable = pt_backend_enable,
+ .hwt_backend_disable = pt_backend_disable,
+
+#ifdef SMP
+ .hwt_backend_enable_smp = pt_backend_enable_smp,
+ .hwt_backend_disable_smp = pt_backend_disable_smp,
+#endif
+
+ .hwt_backend_read = pt_backend_read,
+ .hwt_backend_dump = pt_backend_dump,
+
+ .hwt_backend_thread_alloc = pt_backend_alloc_thread,
+ .hwt_backend_thread_free = pt_backend_free_thread,
+};
+
+static struct hwt_backend backend = {
+ .ops = &pt_ops,
+ .name = "pt",
+ .kva_req = 1,
+};
+
+static void
+pt_send_buffer_record(void *arg, int pending __unused)
+{
+ struct hwt_record_entry record;
+ struct pt_ctx *ctx = (struct pt_ctx *)arg;
+
+ /* Prepare buffer record. */
+ mtx_lock_spin(&ctx->buf.lock);
+ pt_fill_buffer_record(ctx->id, &ctx->buf, &record);
+ mtx_unlock_spin(&ctx->buf.lock);
+ hwt_record_ctx(ctx->hwt_ctx, &record, M_ZERO | M_NOWAIT);
+}
+
+/*
+ * ToPA PMI handler.
+ */
+static int
+pt_topa_intr(struct trapframe *tf)
+{
+ struct pt_buffer *buf;
+ struct pt_ctx *ctx;
+ uint64_t reg;
+
+ SDT_PROBE0(pt, , , topa__intr);
+ /* TODO: handle possible double entry */
+ /* Check ToPA PMI status on curcpu. */
+ reg = rdmsr(MSR_IA_GLOBAL_STATUS);
+ if ((reg & GLOBAL_STATUS_FLAG_TRACETOPAPMI) == 0)
+ return (0);
+
+ /* Ignore spurious PMI interrupts. */
+ if (pt_cpu_get_state(curcpu) != PT_ACTIVE)
+ return (1);
+
+ /* Disable preemption. */
+ critical_enter();
+ /* Fetch active trace context. */
+ ctx = pt_pcpu[curcpu].ctx;
+ buf = &ctx->buf;
+ KASSERT(buf->topa_hw != NULL,
+ ("%s: ToPA PMI interrupt with invalid buffer", __func__));
+
+ /* Disable tracing so we don't trace the PMI handler. */
+ pt_cpu_toggle_local(&ctx->save_area, false);
+ pt_update_buffer(buf);
+ /* Clear ToPA PMI status. */
+ reg = rdmsr(MSR_IA_GLOBAL_STATUS_RESET);
+ reg &= ~GLOBAL_STATUS_FLAG_TRACETOPAPMI;
+ reg |= GLOBAL_STATUS_FLAG_TRACETOPAPMI;
+ wrmsr(MSR_IA_GLOBAL_STATUS_RESET, reg);
+
+ /* Notify userspace. */
+ taskqueue_enqueue_flags(taskqueue_pt, &ctx->task,
+ TASKQUEUE_FAIL_IF_PENDING);
+
+ /* Don't re-enable ToPA PMI if trace stop was requested. */
+ if (pt_cpu_get_state(curcpu) != PT_TERMINATING) {
+ lapic_reenable_pcint();
+ /* Re-enable tracing. */
+ pt_cpu_toggle_local(&ctx->save_area, true);
+ }
+ critical_exit();
+
+ return (1);
+}
+
+static int
+pt_init(void)
+{
+ u_int cp[4];
+ int error;
+
+ dprintf("pt: Enumerating part 1\n");
+ cpuid_count(PT_CPUID, 0, cp);
+ dprintf("pt: Maximum valid sub-leaf Index: %x\n", cp[0]);
+ dprintf("pt: ebx %x\n", cp[1]);
+ dprintf("pt: ecx %x\n", cp[2]);
+
+ /* Save relevant cpuid info. */
+ pt_info.l0_eax = cp[0];
+ pt_info.l0_ebx = cp[1];
+ pt_info.l0_ecx = cp[2];
+
+ dprintf("pt: Enumerating part 2\n");
+ cpuid_count(PT_CPUID, 1, cp);
+ dprintf("pt: eax %x\n", cp[0]);
+ dprintf("pt: ebx %x\n", cp[1]);
+
+ pt_info.l1_eax = cp[0];
+ pt_info.l1_ebx = cp[1];
+
+ error = hwt_backend_register(&backend);
+ if (error != 0) {
+ printf("pt: unable to register hwt backend, error %d\n", error);
+ return (error);
+ }
+ pt_pcpu = malloc(sizeof(struct pt_cpu) * mp_ncpus, M_PT,
+ M_ZERO | M_WAITOK);
+ pt_pcpu_ctx = malloc(sizeof(struct pt_ctx) * mp_ncpus, M_PT,
+ M_ZERO | M_WAITOK);
+ /* Enable ToPA interrupts */
+ lapic_enable_pcint();
+
+ initialized = true;
+
+ return (0);
+}
+
+static bool
+pt_supported(void)
+{
+ u_int cp[4];
+
+ /* Intel SDM Vol. 3C, 33-30 */
+ if ((cpu_stdext_feature & CPUID_STDEXT_PROCTRACE) == 0) {
+ printf("pt: CPU does not support Intel Processor Trace\n");
+ return (false);
+ }
+
+ /* Require XSAVE support. */
+ if ((cpu_feature2 & CPUID2_XSAVE) == 0) {
+ printf("pt: XSAVE is not supported\n");
+ return (false);
+ }
+
+ cpuid_count(0xd, 0x0, cp);
+ if ((cp[0] & PT_XSAVE_MASK) != PT_XSAVE_MASK) {
+ printf("pt: CPU does not support X87 or SSE: %x", cp[0]);
+ return (false);
+ }
+
+ cpuid_count(0xd, 0x1, cp);
+ if ((cp[0] & (1 << 0)) == 0) {
+ printf("pt: XSAVE compaction is not supported\n");
+ return (false);
+ }
+ if ((cp[0] & (1 << 3)) == 0) {
+ printf("pt: XSAVES/XRSTORS are not supported\n");
+ return (false);
+ }
+
+ /* Require ToPA support. */
+ cpuid_count(PT_CPUID, 0, cp);
+ if ((cp[2] & CPUPT_TOPA) == 0) {
+ printf("pt: ToPA is not supported\n");
+ return (false);
+ }
+ if ((cp[2] & CPUPT_TOPA_MULTI) == 0) {
+ printf("pt: multiple ToPA outputs are not supported\n");
+ return (false);
+ }
+
+ return (true);
+}
+
+static void
+pt_deinit(void)
+{
+ if (initialized)
+ return;
+
+ hwt_backend_unregister(&backend);
+ hwt_intr = NULL;
+ free(pt_pcpu, M_PT);
+ free(pt_pcpu_ctx, M_PT);
+ pt_pcpu = NULL;
+ lapic_disable_pcint();
+}
+
+static int
+pt_modevent(module_t mod, int type, void *data)
+{
+ switch (type) {
+ case MOD_LOAD:
+ if (!pt_supported() || pt_init() != 0) {
+ return (ENXIO);
+ }
+ break;
+ case MOD_UNLOAD:
+ pt_deinit();
+ break;
+ default:
+ break;
+ }
+
+ return (0);
+}
+
+static moduledata_t pt_mod = { "intel_pt", pt_modevent, NULL };
+
+DECLARE_MODULE(intel_pt, pt_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST);
+MODULE_DEPEND(intel_pt, hwt, 1, 1, 1);
+MODULE_VERSION(intel_pt, 1);
Index: sys/dev/hwpmc/hwpmc_core.c
===================================================================
--- sys/dev/hwpmc/hwpmc_core.c
+++ sys/dev/hwpmc/hwpmc_core.c
@@ -1051,7 +1051,7 @@
counter_u64_add(pmc_stats.pm_intr_ignored, 1);
if (found_interrupt)
- lapic_reenable_pmc();
+ lapic_reenable_pcint();
return (found_interrupt);
}
@@ -1150,7 +1150,7 @@
counter_u64_add(pmc_stats.pm_intr_ignored, 1);
if (found_interrupt)
- lapic_reenable_pmc();
+ lapic_reenable_pcint();
/*
* Reenable all non-stalled PMCs.
Index: sys/dev/hwpmc/hwpmc_x86.c
===================================================================
--- sys/dev/hwpmc/hwpmc_x86.c
+++ sys/dev/hwpmc/hwpmc_x86.c
@@ -242,7 +242,7 @@
return (NULL);
/* disallow sampling if we do not have an LAPIC */
- if (md != NULL && !lapic_enable_pmc())
+ if (md != NULL && !lapic_enable_pcint())
for (i = 0; i < md->pmd_nclass; i++) {
if (i == PMC_CLASS_INDEX_SOFT)
continue;
@@ -256,7 +256,7 @@
pmc_md_finalize(struct pmc_mdep *md)
{
- lapic_disable_pmc();
+ lapic_disable_pcint();
if (cpu_vendor_id == CPU_VENDOR_AMD ||
cpu_vendor_id == CPU_VENDOR_HYGON)
pmc_amd_finalize(md);
Index: sys/modules/Makefile
===================================================================
--- sys/modules/Makefile
+++ sys/modules/Makefile
@@ -324,6 +324,7 @@
proto \
pseudofs \
${_pst} \
+ ${_pt} \
pty \
puc \
pwm \
@@ -835,6 +836,7 @@
_ioat= ioat
_ixl= ixl
_nvdimm= nvdimm
+_pt= pt
_pms= pms
_qat= qat
.if ${MK_SOURCELESS_UCODE} != "no"
Index: sys/modules/pt/Makefile
===================================================================
--- /dev/null
+++ sys/modules/pt/Makefile
@@ -0,0 +1,8 @@
+
+.PATH: ${SRCTOP}/sys/amd64/pt
+
+KMOD= pt
+SRCS= pt.c pt.h device_if.h bus_if.h
+SRCS+= opt_hwpmc_hooks.h opt_kstack_pages.h
+
+.include <bsd.kmod.mk>
Index: sys/x86/include/apicvar.h
===================================================================
--- sys/x86/include/apicvar.h
+++ sys/x86/include/apicvar.h
@@ -229,9 +229,9 @@
void apic_disable_vector(u_int apic_id, u_int vector);
void apic_free_vector(u_int apic_id, u_int vector, u_int irq);
void lapic_calibrate_timer(void);
-int lapic_enable_pmc(void);
-void lapic_disable_pmc(void);
-void lapic_reenable_pmc(void);
+int lapic_enable_pcint(void);
+void lapic_disable_pcint(void);
+void lapic_reenable_pcint(void);
void lapic_enable_cmc(void);
int lapic_enable_mca_elvt(void);
void lapic_ipi_raw(register_t icrlo, u_int dest);
Index: sys/x86/include/specialreg.h
===================================================================
--- sys/x86/include/specialreg.h
+++ sys/x86/include/specialreg.h
@@ -105,6 +105,7 @@
#define XFEATURE_ENABLED_OPMASK 0x00000020
#define XFEATURE_ENABLED_ZMM_HI256 0x00000040
#define XFEATURE_ENABLED_HI16_ZMM 0x00000080
+#define XFEATURE_ENABLED_PT 0x00000100
#define XFEATURE_ENABLED_PKRU 0x00000200
#define XFEATURE_ENABLED_TILECONFIG 0x00020000
#define XFEATURE_ENABLED_TILEDATA 0x00040000
@@ -195,6 +196,7 @@
#define CPUPT_MTC (1 << 3) /* MTC Supported */
#define CPUPT_PRW (1 << 4) /* PTWRITE Supported */
#define CPUPT_PWR (1 << 5) /* Power Event Trace Supported */
+#define CPUPT_DIS_TNT (1 << 8) /* TNT disable supported */
/* Leaf 0 ecx. */
#define CPUPT_TOPA (1 << 0) /* ToPA Output Supported */
@@ -622,6 +624,12 @@
#define MSR_PAT 0x277
#define MSR_MC0_CTL2 0x280
#define MSR_MTRRdefType 0x2ff
+#define MSR_IA_GLOBAL_STATUS 0x38E
+#define MSR_IA_GLOBAL_CTRL 0x38F
+#define MSR_IA_GLOBAL_OVF_CTRL 0x390
+#define MSR_IA_GLOBAL_STATUS_RESET 0x390
+#define MSR_IA_GLOBAL_STATUS_SET 0x391
+#define GLOBAL_STATUS_FLAG_TRACETOPAPMI (1ULL << 55)
#define MSR_MC0_CTL 0x400
#define MSR_MC0_STATUS 0x401
#define MSR_MC0_ADDR 0x402
@@ -749,6 +757,7 @@
#define RTIT_CTL_ADDR2_CFG_M (0xfULL << RTIT_CTL_ADDR2_CFG_S)
#define RTIT_CTL_ADDR3_CFG_S 44
#define RTIT_CTL_ADDR3_CFG_M (0xfULL << RTIT_CTL_ADDR3_CFG_S)
+#define RTIT_CTL_DIS_TNT (1ULL << 55)
#define MSR_IA32_RTIT_STATUS 0x571 /* Tracing Status Register (R/W) */
#define RTIT_STATUS_FILTEREN (1 << 0)
#define RTIT_STATUS_CONTEXTEN (1 << 1)
Index: sys/x86/x86/local_apic.c
===================================================================
--- sys/x86/x86/local_apic.c
+++ sys/x86/x86/local_apic.c
@@ -36,6 +36,7 @@
#include <sys/cdefs.h>
#include "opt_atpic.h"
#include "opt_hwpmc_hooks.h"
+#include "opt_hwt_hooks.h"
#include "opt_ddb.h"
@@ -50,6 +51,7 @@
#include <sys/mutex.h>
#include <sys/pcpu.h>
#include <sys/proc.h>
+#include <sys/refcount.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
@@ -206,6 +208,7 @@
static int __read_mostly lapic_ds_idle_timeout = 1000000;
#endif
unsigned int max_apic_id;
+static int pcint_refcnt = 0;
SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"APIC options");
@@ -809,20 +812,22 @@
SYSINIT(lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, lapic_intrcnt, NULL);
void
-lapic_reenable_pmc(void)
+lapic_reenable_pcint(void)
{
-#ifdef HWPMC_HOOKS
+#if defined(HWPMC_HOOKS) || defined(HWT_HOOKS)
uint32_t value;
+ if (refcount_load(&pcint_refcnt) == 0)
+ return;
value = lapic_read32(LAPIC_LVT_PCINT);
value &= ~APIC_LVT_M;
lapic_write32(LAPIC_LVT_PCINT, value);
#endif
}
-#ifdef HWPMC_HOOKS
+#if defined(HWPMC_HOOKS) || defined(HWT_HOOKS)
static void
-lapic_update_pmc(void *dummy)
+lapic_update_pcint(void *dummy)
{
struct lapic *la;
@@ -858,9 +863,9 @@
}
int
-lapic_enable_pmc(void)
+lapic_enable_pcint(void)
{
-#ifdef HWPMC_HOOKS
+#if defined(HWPMC_HOOKS) || defined(HWT_HOOKS)
u_int32_t maxlvt;
#ifdef DEV_ATPIC
@@ -873,11 +878,12 @@
maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
if (maxlvt < APIC_LVT_PMC)
return (0);
-
+ if (refcount_acquire(&pcint_refcnt) > 0)
+ return (1);
lvts[APIC_LVT_PMC].lvt_masked = 0;
MPASS(mp_ncpus == 1 || smp_started);
- smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
+ smp_rendezvous(NULL, lapic_update_pcint, NULL, NULL);
return (1);
#else
return (0);
@@ -885,9 +891,9 @@
}
void
-lapic_disable_pmc(void)
+lapic_disable_pcint(void)
{
-#ifdef HWPMC_HOOKS
+#if defined(HWPMC_HOOKS) || defined(HWT_HOOKS)
u_int32_t maxlvt;
#ifdef DEV_ATPIC
@@ -900,14 +906,16 @@
maxlvt = (lapic_read32(LAPIC_VERSION) & APIC_VER_MAXLVT) >> MAXLVTSHIFT;
if (maxlvt < APIC_LVT_PMC)
return;
-
+ if (!refcount_release_if_not_last(&pcint_refcnt) &&
+ refcount_release(&pcint_refcnt) != 0)
+ return;
lvts[APIC_LVT_PMC].lvt_masked = 1;
#ifdef SMP
/* The APs should always be started when hwpmc is unloaded. */
KASSERT(mp_ncpus == 1 || smp_started, ("hwpmc unloaded too early"));
#endif
- smp_rendezvous(NULL, lapic_update_pmc, NULL, NULL);
+ smp_rendezvous(NULL, lapic_update_pcint, NULL, NULL);
#endif
}

File Metadata

Mime Type
text/plain
Expires
Mon, Nov 24, 7:54 AM (20 h, 57 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
26053080
Default Alt Text
D46397.id142307.diff (35 KB)

Event Timeline