Index: head/sys/dev/hwpmc/hwpmc_amd.c =================================================================== --- head/sys/dev/hwpmc/hwpmc_amd.c +++ head/sys/dev/hwpmc/hwpmc_amd.c @@ -694,8 +694,10 @@ wrmsr(evsel, config); } - atomic_add_int(retval ? &pmc_stats.pm_intr_processed : - &pmc_stats.pm_intr_ignored, 1); + if (retval) + counter_u64_add(pmc_stats.pm_intr_processed, 1); + else + counter_u64_add(pmc_stats.pm_intr_ignored, 1); PMCDBG1(MDP,INT,2, "retval=%d", retval); return (retval); Index: head/sys/dev/hwpmc/hwpmc_core.c =================================================================== --- head/sys/dev/hwpmc/hwpmc_core.c +++ head/sys/dev/hwpmc/hwpmc_core.c @@ -2831,8 +2831,10 @@ if (found_interrupt) lapic_reenable_pmc(); - atomic_add_int(found_interrupt ? &pmc_stats.pm_intr_processed : - &pmc_stats.pm_intr_ignored, 1); + if (found_interrupt) + counter_u64_add(pmc_stats.pm_intr_processed, 1); + else + counter_u64_add(pmc_stats.pm_intr_ignored, 1); return (found_interrupt); } @@ -2896,6 +2898,7 @@ error = pmc_process_interrupt(cpu, PMC_HR, pm, tf, TRAPF_USERMODE(tf)); + if (error) intrenable &= ~flag; @@ -2955,8 +2958,10 @@ if (found_interrupt) lapic_reenable_pmc(); - atomic_add_int(found_interrupt ? &pmc_stats.pm_intr_processed : - &pmc_stats.pm_intr_ignored, 1); + if (found_interrupt) + counter_u64_add(pmc_stats.pm_intr_processed, 1); + else + counter_u64_add(pmc_stats.pm_intr_ignored, 1); return (found_interrupt); } Index: head/sys/dev/hwpmc/hwpmc_e500.c =================================================================== --- head/sys/dev/hwpmc/hwpmc_e500.c +++ head/sys/dev/hwpmc/hwpmc_e500.c @@ -616,8 +616,10 @@ e500_write_pmc(cpu, i, pm->pm_sc.pm_reloadcount); } - atomic_add_int(retval ? &pmc_stats.pm_intr_processed : - &pmc_stats.pm_intr_ignored, 1); + if (retval) + counter_u64_add(pmc_stats.pm_intr_processed, 1); + else + counter_u64_add(pmc_stats.pm_intr_ignored, 1); /* Re-enable PERF exceptions. */ if (retval) Index: head/sys/dev/hwpmc/hwpmc_intel.c =================================================================== --- head/sys/dev/hwpmc/hwpmc_intel.c +++ head/sys/dev/hwpmc/hwpmc_intel.c @@ -94,6 +94,8 @@ model = ((cpu_id & 0xF0000) >> 12) | ((cpu_id & 0xF0) >> 4); stepping = cpu_id & 0xF; + snprintf(pmc_cpuid, sizeof(pmc_cpuid), "GenuineIntel-%d-%02X", + (cpu_id & 0xF00) >> 8, model); switch (cpu_id & 0xF00) { #if defined(__i386__) case 0x500: /* Pentium family processors */ Index: head/sys/dev/hwpmc/hwpmc_logging.c =================================================================== --- head/sys/dev/hwpmc/hwpmc_logging.c +++ head/sys/dev/hwpmc/hwpmc_logging.c @@ -3,6 +3,7 @@ * * Copyright (c) 2005-2007 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation + * Copyright (c) 2018 Matthew Macy * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under @@ -50,7 +51,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -79,31 +82,28 @@ * kern.hwpmc.nbuffer -- number of global log buffers */ -static int pmc_nlogbuffers = PMC_NLOGBUFFERS; +static int pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU; #if (__FreeBSD_version < 1100000) -TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nbuffers", &pmc_nlogbuffers); +TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nbuffers", &pmc_nlogbuffers_pcpu); #endif -SYSCTL_INT(_kern_hwpmc, OID_AUTO, nbuffers, CTLFLAG_RDTUN, - &pmc_nlogbuffers, 0, "number of global log buffers"); +SYSCTL_INT(_kern_hwpmc, OID_AUTO, nbuffers_pcpu, CTLFLAG_RDTUN, + &pmc_nlogbuffers_pcpu, 0, "number of log buffers per cpu"); /* * Global log buffer list and associated spin lock. */ -TAILQ_HEAD(, pmclog_buffer) pmc_bufferlist = - TAILQ_HEAD_INITIALIZER(pmc_bufferlist); -static struct mtx pmc_bufferlist_mtx; /* spin lock */ static struct mtx pmc_kthread_mtx; /* sleep lock */ -#define PMCLOG_INIT_BUFFER_DESCRIPTOR(D) do { \ - const int __roundup = roundup(sizeof(*D), \ - sizeof(uint32_t)); \ - (D)->plb_fence = ((char *) (D)) + \ - 1024*pmclog_buffer_size; \ - (D)->plb_base = (D)->plb_ptr = ((char *) (D)) + \ - __roundup; \ +#define PMCLOG_INIT_BUFFER_DESCRIPTOR(D, buf, domain) do { \ + (D)->plb_fence = ((char *) (buf)) + 1024*pmclog_buffer_size; \ + (D)->plb_base = (D)->plb_ptr = ((char *) (buf)); \ + (D)->plb_domain = domain; \ } while (0) +#define PMCLOG_RESET_BUFFER_DESCRIPTOR(D) do { \ + (D)->plb_ptr = (D)->plb_base; \ + } while (0) /* * Log file record constructors. @@ -114,15 +114,29 @@ ((L) & 0xFFFF)) /* reserve LEN bytes of space and initialize the entry header */ -#define _PMCLOG_RESERVE(PO,TYPE,LEN,ACTION) do { \ +#define _PMCLOG_RESERVE_SAFE(PO,TYPE,LEN,ACTION) do { \ uint32_t *_le; \ - int _len = roundup((LEN), sizeof(uint32_t)); \ + int _len = roundup((LEN), sizeof(uint32_t)); \ if ((_le = pmclog_reserve((PO), _len)) == NULL) { \ ACTION; \ } \ *_le = _PMCLOG_TO_HEADER(TYPE,_len); \ _le += 3 /* skip over timestamp */ +/* reserve LEN bytes of space and initialize the entry header */ +#define _PMCLOG_RESERVE(PO,TYPE,LEN,ACTION) do { \ + uint32_t *_le; \ + int _len = roundup((LEN), sizeof(uint32_t)); \ + spinlock_enter(); \ + if ((_le = pmclog_reserve((PO), _len)) == NULL) { \ + spinlock_exit(); \ + ACTION; \ + } \ + *_le = _PMCLOG_TO_HEADER(TYPE,_len); \ + _le += 3 /* skip over timestamp */ + + +#define PMCLOG_RESERVE_SAFE(P,T,L) _PMCLOG_RESERVE_SAFE(P,T,L,return) #define PMCLOG_RESERVE(P,T,L) _PMCLOG_RESERVE(P,T,L,return) #define PMCLOG_RESERVE_WITH_ERROR(P,T,L) _PMCLOG_RESERVE(P,T,L, \ error=ENOMEM;goto error) @@ -138,11 +152,21 @@ #define PMCLOG_EMITSTRING(S,L) do { bcopy((S), _le, (L)); } while (0) #define PMCLOG_EMITNULLSTRING(L) do { bzero(_le, (L)); } while (0) -#define PMCLOG_DESPATCH(PO) \ - pmclog_release((PO)); \ +#define PMCLOG_DESPATCH_SAFE(PO) \ + pmclog_release((PO)); \ } while (0) +#define PMCLOG_DESPATCH(PO) \ + pmclog_release((PO)); \ + spinlock_exit(); \ + } while (0) +#define PMCLOG_DESPATCH_SYNC(PO) \ + pmclog_schedule_io((PO)); \ + spinlock_exit(); \ + } while (0) + + /* * Assertions about the log file format. */ @@ -180,8 +204,19 @@ char *plb_base; char *plb_ptr; char *plb_fence; -}; + uint16_t plb_domain; +} __aligned(CACHE_LINE_SIZE); +struct pmc_domain_buffer_header { + struct mtx pdbh_mtx; + TAILQ_HEAD(, pmclog_buffer) pdbh_head; + struct pmclog_buffer *pdbh_plbs; + int pdbh_ncpus; +} __aligned(CACHE_LINE_SIZE); + +struct pmc_domain_buffer_header *pmc_dom_hdrs[MAXMEMDOM]; + + /* * Prototypes */ @@ -191,12 +226,28 @@ static void pmclog_release(struct pmc_owner *po); static uint32_t *pmclog_reserve(struct pmc_owner *po, int length); static void pmclog_schedule_io(struct pmc_owner *po); +static void pmclog_schedule_all(struct pmc_owner *po); static void pmclog_stop_kthread(struct pmc_owner *po); /* * Helper functions */ +static inline void +pmc_plb_rele_unlocked(struct pmclog_buffer *plb) +{ + TAILQ_INSERT_HEAD(&pmc_dom_hdrs[plb->plb_domain]->pdbh_head, plb, plb_next); +} + +static inline void +pmc_plb_rele(struct pmclog_buffer *plb) +{ + mtx_lock_spin(&pmc_dom_hdrs[plb->plb_domain]->pdbh_mtx); + pmc_plb_rele_unlocked(plb); + mtx_unlock_spin(&pmc_dom_hdrs[plb->plb_domain]->pdbh_mtx); +} + + /* * Get a log buffer */ @@ -205,16 +256,16 @@ pmclog_get_buffer(struct pmc_owner *po) { struct pmclog_buffer *plb; + int domain; - mtx_assert(&po->po_mtx, MA_OWNED); - - KASSERT(po->po_curbuf == NULL, + KASSERT(po->po_curbuf[curcpu] == NULL, ("[pmclog,%d] po=%p current buffer still valid", __LINE__, po)); - mtx_lock_spin(&pmc_bufferlist_mtx); - if ((plb = TAILQ_FIRST(&pmc_bufferlist)) != NULL) - TAILQ_REMOVE(&pmc_bufferlist, plb, plb_next); - mtx_unlock_spin(&pmc_bufferlist_mtx); + domain = PCPU_GET(domain); + mtx_lock_spin(&pmc_dom_hdrs[domain]->pdbh_mtx); + if ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL) + TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next); + mtx_unlock_spin(&pmc_dom_hdrs[domain]->pdbh_mtx); PMCDBG2(LOG,GTB,1, "po=%p plb=%p", po, plb); @@ -227,12 +278,12 @@ plb->plb_base, plb->plb_fence)); #endif - po->po_curbuf = plb; + po->po_curbuf[curcpu] = plb; /* update stats */ - atomic_add_int(&pmc_stats.pm_buffer_requests, 1); + counter_u64_add(pmc_stats.pm_buffer_requests, 1); if (plb == NULL) - atomic_add_int(&pmc_stats.pm_buffer_requests_failed, 1); + counter_u64_add(pmc_stats.pm_buffer_requests_failed, 1); return (plb ? 0 : ENOMEM); } @@ -421,12 +472,9 @@ mtx_lock(&pmc_kthread_mtx); /* put the used buffer back into the global pool */ - PMCLOG_INIT_BUFFER_DESCRIPTOR(lb); + PMCLOG_RESET_BUFFER_DESCRIPTOR(lb); - mtx_lock_spin(&pmc_bufferlist_mtx); - TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next); - mtx_unlock_spin(&pmc_bufferlist_mtx); - + pmc_plb_rele(lb); lb = NULL; } @@ -437,11 +485,9 @@ /* return the current I/O buffer to the global pool */ if (lb) { - PMCLOG_INIT_BUFFER_DESCRIPTOR(lb); + PMCLOG_RESET_BUFFER_DESCRIPTOR(lb); - mtx_lock_spin(&pmc_bufferlist_mtx); - TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next); - mtx_unlock_spin(&pmc_bufferlist_mtx); + pmc_plb_rele(lb); } /* @@ -460,19 +506,20 @@ static void pmclog_release(struct pmc_owner *po) { - KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base, + struct pmclog_buffer *plb; + + plb = po->po_curbuf[curcpu]; + KASSERT(plb->plb_ptr >= plb->plb_base, ("[pmclog,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__, - po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base)); - KASSERT(po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence, + po, plb->plb_ptr, plb->plb_base)); + KASSERT(plb->plb_ptr <= plb->plb_fence, ("[pmclog,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__, - po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_fence)); + po, plb->plb_ptr, plb->plb_fence)); /* schedule an I/O if we've filled a buffer */ - if (po->po_curbuf->plb_ptr >= po->po_curbuf->plb_fence) + if (plb->plb_ptr >= plb->plb_fence) pmclog_schedule_io(po); - mtx_unlock_spin(&po->po_mtx); - PMCDBG1(LOG,REL,1, "po=%p", po); } @@ -492,36 +539,32 @@ uintptr_t newptr, oldptr; uint32_t *lh; struct timespec ts; + struct pmclog_buffer *plb, **pplb; PMCDBG2(LOG,ALL,1, "po=%p len=%d", po, length); KASSERT(length % sizeof(uint32_t) == 0, ("[pmclog,%d] length not a multiple of word size", __LINE__)); - mtx_lock_spin(&po->po_mtx); - /* No more data when shutdown in progress. */ - if (po->po_flags & PMC_PO_SHUTDOWN) { - mtx_unlock_spin(&po->po_mtx); + if (po->po_flags & PMC_PO_SHUTDOWN) return (NULL); - } - if (po->po_curbuf == NULL) - if (pmclog_get_buffer(po) != 0) { - mtx_unlock_spin(&po->po_mtx); - return (NULL); - } + pplb = &po->po_curbuf[curcpu]; + if (*pplb == NULL && pmclog_get_buffer(po) != 0) + goto fail; - KASSERT(po->po_curbuf != NULL, + KASSERT(*pplb != NULL, ("[pmclog,%d] po=%p no current buffer", __LINE__, po)); - KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base && - po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence, + plb = *pplb; + KASSERT(plb->plb_ptr >= plb->plb_base && + plb->plb_ptr <= plb->plb_fence, ("[pmclog,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p", - __LINE__, po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base, - po->po_curbuf->plb_fence)); + __LINE__, po, plb->plb_ptr, plb->plb_base, + plb->plb_fence)); - oldptr = (uintptr_t) po->po_curbuf->plb_ptr; + oldptr = (uintptr_t) plb->plb_ptr; newptr = oldptr + length; KASSERT(oldptr != (uintptr_t) NULL, @@ -531,8 +574,8 @@ * If we have space in the current buffer, return a pointer to * available space with the PO structure locked. */ - if (newptr <= (uintptr_t) po->po_curbuf->plb_fence) { - po->po_curbuf->plb_ptr = (char *) newptr; + if (newptr <= (uintptr_t) plb->plb_fence) { + plb->plb_ptr = (char *) newptr; goto done; } @@ -542,24 +585,23 @@ */ pmclog_schedule_io(po); - if (pmclog_get_buffer(po) != 0) { - mtx_unlock_spin(&po->po_mtx); - return (NULL); - } + if (pmclog_get_buffer(po) != 0) + goto fail; - KASSERT(po->po_curbuf != NULL, + plb = *pplb; + KASSERT(plb != NULL, ("[pmclog,%d] po=%p no current buffer", __LINE__, po)); - KASSERT(po->po_curbuf->plb_ptr != NULL, + KASSERT(plb->plb_ptr != NULL, ("[pmclog,%d] null return from pmc_get_log_buffer", __LINE__)); - KASSERT(po->po_curbuf->plb_ptr == po->po_curbuf->plb_base && - po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence, + KASSERT(plb->plb_ptr == plb->plb_base && + plb->plb_ptr <= plb->plb_fence, ("[pmclog,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p", - __LINE__, po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base, - po->po_curbuf->plb_fence)); + __LINE__, po, plb->plb_ptr, plb->plb_base, + plb->plb_fence)); - oldptr = (uintptr_t) po->po_curbuf->plb_ptr; + oldptr = (uintptr_t) plb->plb_ptr; done: lh = (uint32_t *) oldptr; @@ -568,6 +610,8 @@ *lh++ = ts.tv_sec & 0xFFFFFFFF; *lh++ = ts.tv_nsec & 0xFFFFFFF; return ((uint32_t *) oldptr); + fail: + return (NULL); } /* @@ -579,26 +623,28 @@ static void pmclog_schedule_io(struct pmc_owner *po) { - KASSERT(po->po_curbuf != NULL, - ("[pmclog,%d] schedule_io with null buffer po=%p", __LINE__, po)); + struct pmclog_buffer *plb; - KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base, + plb = po->po_curbuf[curcpu]; + po->po_curbuf[curcpu] = NULL; + KASSERT(plb != NULL, + ("[pmclog,%d] schedule_io with null buffer po=%p", __LINE__, po)); + KASSERT(plb->plb_ptr >= plb->plb_base, ("[pmclog,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__, - po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base)); - KASSERT(po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence, + po, plb->plb_ptr, plb->plb_base)); + KASSERT(plb->plb_ptr <= plb->plb_fence, ("[pmclog,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__, - po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_fence)); + po, plb->plb_ptr, plb->plb_fence)); PMCDBG1(LOG,SIO, 1, "po=%p", po); - mtx_assert(&po->po_mtx, MA_OWNED); - /* * Add the current buffer to the tail of the buffer list and * wakeup the helper. */ - TAILQ_INSERT_TAIL(&po->po_logbuffers, po->po_curbuf, plb_next); - po->po_curbuf = NULL; + mtx_lock_spin(&po->po_mtx); + TAILQ_INSERT_TAIL(&po->po_logbuffers, plb, plb_next); + mtx_unlock_spin(&po->po_mtx); wakeup_one(po); } @@ -671,7 +717,7 @@ sizeof(struct pmclog_initialize)); PMCLOG_EMIT32(PMC_VERSION); PMCLOG_EMIT32(md->pmd_cputype); - PMCLOG_DESPATCH(po); + PMCLOG_DESPATCH_SYNC(po); return (0); @@ -719,19 +765,22 @@ /* return all queued log buffers to the global pool */ while ((lb = TAILQ_FIRST(&po->po_logbuffers)) != NULL) { TAILQ_REMOVE(&po->po_logbuffers, lb, plb_next); - PMCLOG_INIT_BUFFER_DESCRIPTOR(lb); - mtx_lock_spin(&pmc_bufferlist_mtx); - TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next); - mtx_unlock_spin(&pmc_bufferlist_mtx); + PMCLOG_RESET_BUFFER_DESCRIPTOR(lb); + pmc_plb_rele(lb); } - - /* return the 'current' buffer to the global pool */ - if ((lb = po->po_curbuf) != NULL) { - PMCLOG_INIT_BUFFER_DESCRIPTOR(lb); - mtx_lock_spin(&pmc_bufferlist_mtx); - TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next); - mtx_unlock_spin(&pmc_bufferlist_mtx); + for (int i = 0; i < mp_ncpus; i++) { + thread_lock(curthread); + sched_bind(curthread, i); + thread_unlock(curthread); + /* return the 'current' buffer to the global pool */ + if ((lb = po->po_curbuf[curcpu]) != NULL) { + PMCLOG_RESET_BUFFER_DESCRIPTOR(lb); + pmc_plb_rele(lb); + } } + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); /* drop a reference to the fd */ if (po->po_file != NULL) { @@ -752,7 +801,6 @@ pmclog_flush(struct pmc_owner *po) { int error; - struct pmclog_buffer *lb; PMCDBG1(LOG,FLS,1, "po=%p", po); @@ -774,23 +822,45 @@ goto error; } - /* - * Schedule the current buffer if any and not empty. - */ - mtx_lock_spin(&po->po_mtx); - lb = po->po_curbuf; - if (lb && lb->plb_ptr != lb->plb_base) { - pmclog_schedule_io(po); - } else - error = ENOBUFS; - mtx_unlock_spin(&po->po_mtx); - + pmclog_schedule_all(po); error: mtx_unlock(&pmc_kthread_mtx); return (error); } +static void +pmclog_schedule_one_cond(void *arg) +{ + struct pmc_owner *po = arg; + struct pmclog_buffer *plb; + + spinlock_enter(); + /* tell hardclock not to run again */ + DPCPU_SET(pmc_sampled, 0); + plb = po->po_curbuf[curcpu]; + if (plb && plb->plb_ptr != plb->plb_base) + pmclog_schedule_io(po); + spinlock_exit(); +} + +static void +pmclog_schedule_all(struct pmc_owner *po) +{ + /* + * Schedule the current buffer if any and not empty. + */ + for (int i = 0; i < mp_ncpus; i++) { + thread_lock(curthread); + sched_bind(curthread, i); + thread_unlock(curthread); + pmclog_schedule_one_cond(po); + } + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); +} + int pmclog_close(struct pmc_owner *po) { @@ -804,19 +874,14 @@ /* * Schedule the current buffer. */ - mtx_lock_spin(&po->po_mtx); - if (po->po_curbuf) - pmclog_schedule_io(po); - else - wakeup_one(po); - mtx_unlock_spin(&po->po_mtx); + pmclog_schedule_all(po); + wakeup_one(po); /* * Initiate shutdown: no new data queued, * thread will close file on last block. */ po->po_flags |= PMC_PO_SHUTDOWN; - mtx_unlock(&pmc_kthread_mtx); return (0); @@ -836,20 +901,20 @@ ps->ps_nsamples * sizeof(uintfptr_t); po = pm->pm_owner; flags = PMC_CALLCHAIN_TO_CPUFLAGS(ps->ps_cpu,ps->ps_flags); - PMCLOG_RESERVE(po, CALLCHAIN, recordlen); + PMCLOG_RESERVE_SAFE(po, CALLCHAIN, recordlen); PMCLOG_EMIT32(ps->ps_pid); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(flags); for (n = 0; n < ps->ps_nsamples; n++) PMCLOG_EMITADDR(ps->ps_pc[n]); - PMCLOG_DESPATCH(po); + PMCLOG_DESPATCH_SAFE(po); } void pmclog_process_closelog(struct pmc_owner *po) { PMCLOG_RESERVE(po,CLOSELOG,sizeof(struct pmclog_closelog)); - PMCLOG_DESPATCH(po); + PMCLOG_DESPATCH_SYNC(po); } void @@ -913,14 +978,14 @@ else PMCLOG_EMITNULLSTRING(PMC_NAME_MAX); pmc_soft_ev_release(ps); - PMCLOG_DESPATCH(po); + PMCLOG_DESPATCH_SYNC(po); } else { PMCLOG_RESERVE(po, PMCALLOCATE, sizeof(struct pmclog_pmcallocate)); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(pm->pm_event); PMCLOG_EMIT32(pm->pm_flags); - PMCLOG_DESPATCH(po); + PMCLOG_DESPATCH_SYNC(po); } } @@ -941,7 +1006,7 @@ PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(pid); PMCLOG_EMITSTRING(path, pathlen); - PMCLOG_DESPATCH(po); + PMCLOG_DESPATCH_SYNC(po); } void @@ -956,7 +1021,7 @@ PMCLOG_RESERVE(po, PMCDETACH, sizeof(struct pmclog_pmcdetach)); PMCLOG_EMIT32(pm->pm_id); PMCLOG_EMIT32(pid); - PMCLOG_DESPATCH(po); + PMCLOG_DESPATCH_SYNC(po); } /* @@ -1081,30 +1146,57 @@ void pmclog_initialize() { - int n; + int domain, cpu; + struct pcpu *pc; struct pmclog_buffer *plb; - if (pmclog_buffer_size <= 0) { + if (pmclog_buffer_size <= 0 || pmclog_buffer_size > 16*1024) { (void) printf("hwpmc: tunable logbuffersize=%d must be " - "greater than zero.\n", pmclog_buffer_size); + "greater than zero and less than or equal to 16MB.\n", + pmclog_buffer_size); pmclog_buffer_size = PMC_LOG_BUFFER_SIZE; } - if (pmc_nlogbuffers <= 0) { + if (pmc_nlogbuffers_pcpu <= 0) { (void) printf("hwpmc: tunable nlogbuffers=%d must be greater " - "than zero.\n", pmc_nlogbuffers); - pmc_nlogbuffers = PMC_NLOGBUFFERS; + "than zero.\n", pmc_nlogbuffers_pcpu); + pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU; } - /* create global pool of log buffers */ - for (n = 0; n < pmc_nlogbuffers; n++) { - plb = malloc(1024 * pmclog_buffer_size, M_PMC, - M_WAITOK|M_ZERO); - PMCLOG_INIT_BUFFER_DESCRIPTOR(plb); - TAILQ_INSERT_HEAD(&pmc_bufferlist, plb, plb_next); + if (pmc_nlogbuffers_pcpu*pmclog_buffer_size > 32*1024) { + (void) printf("hwpmc: memory allocated pcpu must be less than 32MB (is %dK).\n", + pmc_nlogbuffers_pcpu*pmclog_buffer_size); + pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU; + pmclog_buffer_size = PMC_LOG_BUFFER_SIZE; } - mtx_init(&pmc_bufferlist_mtx, "pmc-buffer-list", "pmc-leaf", - MTX_SPIN); + for (domain = 0; domain < vm_ndomains; domain++) { + pmc_dom_hdrs[domain] = malloc_domain(sizeof(struct pmc_domain_buffer_header), M_PMC, domain, + M_WAITOK|M_ZERO); + mtx_init(&pmc_dom_hdrs[domain]->pdbh_mtx, "pmc_bufferlist_mtx", "pmc-leaf", MTX_SPIN); + TAILQ_INIT(&pmc_dom_hdrs[domain]->pdbh_head); + } + CPU_FOREACH(cpu) { + if (CPU_ABSENT(cpu)) + continue; + pc = pcpu_find(cpu); + domain = pc->pc_domain; + pmc_dom_hdrs[domain]->pdbh_ncpus++; + } + for (domain = 0; domain < vm_ndomains; domain++) { + int ncpus = pmc_dom_hdrs[domain]->pdbh_ncpus; + int total = ncpus*pmc_nlogbuffers_pcpu; + + plb = malloc_domain(sizeof(struct pmclog_buffer)*total, M_PMC, domain, M_WAITOK|M_ZERO); + pmc_dom_hdrs[domain]->pdbh_plbs = plb; + for (int i = 0; i < total; i++, plb++) { + void *buf; + + buf = malloc_domain(1024 * pmclog_buffer_size, M_PMC, domain, + M_WAITOK|M_ZERO); + PMCLOG_INIT_BUFFER_DESCRIPTOR(plb, buf, domain); + pmc_plb_rele_unlocked(plb); + } + } mtx_init(&pmc_kthread_mtx, "pmc-kthread", "pmc-sleep", MTX_DEF); } @@ -1118,12 +1210,17 @@ pmclog_shutdown() { struct pmclog_buffer *plb; + int domain; mtx_destroy(&pmc_kthread_mtx); - mtx_destroy(&pmc_bufferlist_mtx); - while ((plb = TAILQ_FIRST(&pmc_bufferlist)) != NULL) { - TAILQ_REMOVE(&pmc_bufferlist, plb, plb_next); - free(plb, M_PMC); + for (domain = 0; domain < vm_ndomains; domain++) { + mtx_destroy(&pmc_dom_hdrs[domain]->pdbh_mtx); + while ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL) { + TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next); + free(plb->plb_base, M_PMC); + } + free(pmc_dom_hdrs[domain]->pdbh_plbs, M_PMC); + free(pmc_dom_hdrs[domain], M_PMC); } } Index: head/sys/dev/hwpmc/hwpmc_mod.c =================================================================== --- head/sys/dev/hwpmc/hwpmc_mod.c +++ head/sys/dev/hwpmc/hwpmc_mod.c @@ -3,6 +3,7 @@ * * Copyright (c) 2003-2008 Joseph Koshy * Copyright (c) 2007 The FreeBSD Foundation + * Copyright (c) 2018 Matthew Macy * All rights reserved. * * Portions of this software were developed by A. Joseph Koshy under @@ -138,8 +139,9 @@ pmc_kld_unload_tag; /* Module statistics */ -struct pmc_op_getdriverstats pmc_stats; +struct pmc_driverstats pmc_stats; + /* Machine/processor dependent operations */ static struct pmc_mdep *md; @@ -235,11 +237,34 @@ */ SYSCTL_DECL(_kern_hwpmc); +SYSCTL_NODE(_kern_hwpmc, OID_AUTO, stats, CTLFLAG_RW, 0, "HWPMC stats"); + +/* Stats. */ +SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_ignored, CTLFLAG_RW, + &pmc_stats.pm_intr_ignored, "# of interrupts ignored"); +SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_processed, CTLFLAG_RW, + &pmc_stats.pm_intr_processed, "# of interrupts processed"); +SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_bufferfull, CTLFLAG_RW, + &pmc_stats.pm_intr_bufferfull, "# of interrupts where buffer was full"); +SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, syscalls, CTLFLAG_RW, + &pmc_stats.pm_syscalls, "# of syscalls"); +SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, syscall_errors, CTLFLAG_RW, + &pmc_stats.pm_syscall_errors, "# of syscall_errors"); +SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer_requests, CTLFLAG_RW, + &pmc_stats.pm_buffer_requests, "# of buffer requests"); +SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer_requests_failed, CTLFLAG_RW, + &pmc_stats.pm_buffer_requests_failed, "# of buffer requests which failed"); +SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, log_sweeps, CTLFLAG_RW, + &pmc_stats.pm_log_sweeps, "# of ?"); + static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH; SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_RDTUN, &pmc_callchaindepth, 0, "depth of call chain records"); +char pmc_cpuid[64]; +SYSCTL_STRING(_kern_hwpmc, OID_AUTO, cpuid, CTLFLAG_RD, + pmc_cpuid, 0, "cpu version string"); #ifdef HWPMC_DEBUG struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS; char pmc_debugstr[PMC_DEBUG_STRSIZE]; @@ -250,6 +275,7 @@ 0, 0, pmc_debugflags_sysctl_handler, "A", "debug flags"); #endif + /* * kern.hwpmc.hashrows -- determines the number of rows in the * of the hash table used to look up threads @@ -1260,7 +1286,7 @@ continue; /* increment PMC runcount */ - atomic_add_rel_int(&pm->pm_runcount, 1); + counter_u64_add(pm->pm_runcount, 1); /* configure the HWPMC we are going to use. */ pcd = pmc_ri_to_classdep(md, ri, &adjri); @@ -1311,10 +1337,10 @@ /* If a sampling mode PMC, reset stalled state. */ if (PMC_TO_MODE(pm) == PMC_MODE_TS) - CPU_CLR_ATOMIC(cpu, &pm->pm_stalled); + pm->pm_pcpu_state[cpu].pps_stalled = 0; /* Indicate that we desire this to run. */ - CPU_SET_ATOMIC(cpu, &pm->pm_cpustate); + pm->pm_pcpu_state[cpu].pps_cpustate = 1; /* Start the PMC. */ pcd->pcd_start_pmc(cpu, adjri); @@ -1417,12 +1443,12 @@ * an interrupt re-enables the PMC after this code has * already checked the pm_stalled flag. */ - CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate); - if (!CPU_ISSET(cpu, &pm->pm_stalled)) + pm->pm_pcpu_state[cpu].pps_cpustate = 0; + if (pm->pm_pcpu_state[cpu].pps_stalled == 0) pcd->pcd_stop_pmc(cpu, adjri); /* reduce this PMC's runcount */ - atomic_subtract_rel_int(&pm->pm_runcount, 1); + counter_u64_add(pm->pm_runcount, -1); /* * If this PMC is associated with this process, @@ -1537,7 +1563,7 @@ /* Inform owners of all system-wide sampling PMCs. */ LIST_FOREACH(po, &pmc_ss_owners, po_ssnext) if (po->po_flags & PMC_PO_OWNS_LOGFILE) - pmclog_process_map_in(po, pid, pkm->pm_address, fullpath); + pmclog_process_map_in(po, pid, pkm->pm_address, fullpath); if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL) goto done; @@ -1993,7 +2019,7 @@ * had already processed the interrupt). We don't * lose the interrupt sample. */ - CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmc_cpumask); + DPCPU_SET(pmc_sampled, 0); pmc_process_samples(PCPU_GET(cpuid), PMC_HR); pmc_process_samples(PCPU_GET(cpuid), PMC_SR); break; @@ -2191,7 +2217,8 @@ struct pmc *pmc; pmc = malloc(sizeof(struct pmc), M_PMC, M_WAITOK|M_ZERO); - + pmc->pm_runcount = counter_u64_alloc(M_WAITOK); + pmc->pm_pcpu_state = malloc(sizeof(struct pmc_pcpu_state)*mp_ncpus, M_PMC, M_WAITOK|M_ZERO); PMCDBG1(PMC,ALL,1, "allocate-pmc -> pmc=%p", pmc); return pmc; @@ -2212,10 +2239,12 @@ ("[pmc,%d] destroying pmc with targets", __LINE__)); KASSERT(pm->pm_owner == NULL, ("[pmc,%d] destroying pmc attached to an owner", __LINE__)); - KASSERT(pm->pm_runcount == 0, - ("[pmc,%d] pmc has non-zero run count %d", __LINE__, - pm->pm_runcount)); + KASSERT(counter_u64_fetch(pm->pm_runcount) == 0, + ("[pmc,%d] pmc has non-zero run count %ld", __LINE__, + (unsigned long)counter_u64_fetch(pm->pm_runcount))); + counter_u64_free(pm->pm_runcount); + free(pm->pm_pcpu_state, M_PMC); free(pm, M_PMC); } @@ -2231,13 +2260,13 @@ * Loop (with a forced context switch) till the PMC's runcount * comes down to zero. */ - while (atomic_load_acq_32(&pm->pm_runcount) > 0) { + while (counter_u64_fetch(pm->pm_runcount) > 0) { #ifdef HWPMC_DEBUG maxloop--; KASSERT(maxloop > 0, - ("[pmc,%d] (ri%d, rc%d) waiting too long for " + ("[pmc,%d] (ri%d, rc%ld) waiting too long for " "pmc to be free", __LINE__, - PMC_TO_ROWINDEX(pm), pm->pm_runcount)); + PMC_TO_ROWINDEX(pm), (unsigned long)counter_u64_fetch(pm->pm_runcount))); #endif pmc_force_context_switch(); } @@ -2295,9 +2324,9 @@ pmc_select_cpu(cpu); /* switch off non-stalled CPUs */ - CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate); + pm->pm_pcpu_state[cpu].pps_cpustate = 0; if (pm->pm_state == PMC_STATE_RUNNING && - !CPU_ISSET(cpu, &pm->pm_stalled)) { + pm->pm_pcpu_state[cpu].pps_stalled == 0) { phw = pmc_pcpu[cpu]->pc_hwpmcs[ri]; @@ -2735,10 +2764,10 @@ pm->pm_sc.pm_initial)) == 0) { /* If a sampling mode PMC, reset stalled state. */ if (PMC_IS_SAMPLING_MODE(mode)) - CPU_CLR_ATOMIC(cpu, &pm->pm_stalled); + pm->pm_pcpu_state[cpu].pps_stalled = 0; /* Indicate that we desire this to run. Start it. */ - CPU_SET_ATOMIC(cpu, &pm->pm_cpustate); + pm->pm_pcpu_state[cpu].pps_cpustate = 1; error = pcd->pcd_start_pmc(cpu, adjri); } critical_exit(); @@ -2802,7 +2831,7 @@ ri = PMC_TO_ROWINDEX(pm); pcd = pmc_ri_to_classdep(md, ri, &adjri); - CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate); + pm->pm_pcpu_state[cpu].pps_cpustate = 0; critical_enter(); if ((error = pcd->pcd_stop_pmc(cpu, adjri)) == 0) error = pcd->pcd_read_pmc(cpu, adjri, &pm->pm_sc.pm_initial); @@ -2884,7 +2913,7 @@ pmc_op_to_name[op], arg); error = 0; - atomic_add_int(&pmc_stats.pm_syscalls, 1); + counter_u64_add(pmc_stats.pm_syscalls, 1); switch (op) { @@ -3063,8 +3092,16 @@ case PMC_OP_GETDRIVERSTATS: { struct pmc_op_getdriverstats gms; - - bcopy(&pmc_stats, &gms, sizeof(gms)); +#define CFETCH(a, b, field) a.field = counter_u64_fetch(b.field) + CFETCH(gms, pmc_stats, pm_intr_ignored); + CFETCH(gms, pmc_stats, pm_intr_processed); + CFETCH(gms, pmc_stats, pm_intr_bufferfull); + CFETCH(gms, pmc_stats, pm_syscalls); + CFETCH(gms, pmc_stats, pm_syscall_errors); + CFETCH(gms, pmc_stats, pm_buffer_requests); + CFETCH(gms, pmc_stats, pm_buffer_requests_failed); + CFETCH(gms, pmc_stats, pm_log_sweeps); +#undef CFETCH error = copyout(&gms, arg, sizeof(gms)); } break; @@ -4040,7 +4077,7 @@ sx_xunlock(&pmc_sx); done_syscall: if (error) - atomic_add_int(&pmc_stats.pm_syscall_errors, 1); + counter_u64_add(pmc_stats.pm_syscall_errors, 1); return (error); } @@ -4115,8 +4152,8 @@ ps = psb->ps_write; if (ps->ps_nsamples) { /* in use, reader hasn't caught up */ - CPU_SET_ATOMIC(cpu, &pm->pm_stalled); - atomic_add_int(&pmc_stats.pm_intr_bufferfull, 1); + pm->pm_pcpu_state[cpu].pps_stalled = 1; + counter_u64_add(pmc_stats.pm_intr_bufferfull, 1); PMCDBG6(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm, (void *) tf, inuserspace, (int) (psb->ps_write - psb->ps_samples), @@ -4133,11 +4170,11 @@ (int) (psb->ps_write - psb->ps_samples), (int) (psb->ps_read - psb->ps_samples)); - KASSERT(pm->pm_runcount >= 0, - ("[pmc,%d] pm=%p runcount %d", __LINE__, (void *) pm, - pm->pm_runcount)); + KASSERT(counter_u64_fetch(pm->pm_runcount) >= 0, + ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm, + (unsigned long)counter_u64_fetch(pm->pm_runcount))); - atomic_add_rel_int(&pm->pm_runcount, 1); /* hold onto PMC */ + counter_u64_add(pm->pm_runcount, 1); /* hold onto PMC */ ps->ps_pmc = pm; if ((td = curthread) && td->td_proc) @@ -4180,7 +4217,7 @@ done: /* mark CPU as needing processing */ if (callchaindepth != PMC_SAMPLE_INUSE) - CPU_SET_ATOMIC(cpu, &pmc_cpumask); + DPCPU_SET(pmc_sampled, 1); return (error); } @@ -4244,8 +4281,8 @@ ("[pmc,%d] Retrieving callchain for PMC that doesn't " "want it", __LINE__)); - KASSERT(pm->pm_runcount > 0, - ("[pmc,%d] runcount %d", __LINE__, pm->pm_runcount)); + KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, + ("[pmc,%d] runcount %ld", __LINE__, (unsigned long)counter_u64_fetch(pm->pm_runcount))); /* * Retrieve the callchain and mark the sample buffer @@ -4275,9 +4312,7 @@ sched_unpin(); /* Can migrate safely now. */ /* mark CPU as needing processing */ - CPU_SET_ATOMIC(cpu, &pmc_cpumask); - - return; + DPCPU_SET(pmc_sampled, 1); } /* @@ -4309,9 +4344,9 @@ pm = ps->ps_pmc; - KASSERT(pm->pm_runcount > 0, - ("[pmc,%d] pm=%p runcount %d", __LINE__, (void *) pm, - pm->pm_runcount)); + KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, + ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm, + (unsigned long)counter_u64_fetch(pm->pm_runcount))); po = pm->pm_owner; @@ -4326,7 +4361,7 @@ /* If there is a pending AST wait for completion */ if (ps->ps_nsamples == PMC_SAMPLE_INUSE) { /* Need a rescan at a later time. */ - CPU_SET_ATOMIC(cpu, &pmc_cpumask); + DPCPU_SET(pmc_sampled, 1); break; } @@ -4359,7 +4394,7 @@ entrydone: ps->ps_nsamples = 0; /* mark entry as free */ - atomic_subtract_rel_int(&pm->pm_runcount, 1); + counter_u64_add(pm->pm_runcount, -1); /* increment read pointer, modulo sample size */ if (++ps == psb->ps_fence) @@ -4368,7 +4403,7 @@ psb->ps_read = ps; } - atomic_add_int(&pmc_stats.pm_log_sweeps, 1); + counter_u64_add(pmc_stats.pm_log_sweeps, 1); /* Do not re-enable stalled PMCs if we failed to process any samples */ if (n == 0) @@ -4390,11 +4425,11 @@ if (pm == NULL || /* !cfg'ed */ pm->pm_state != PMC_STATE_RUNNING || /* !active */ !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || /* !sampling */ - !CPU_ISSET(cpu, &pm->pm_cpustate) || /* !desired */ - !CPU_ISSET(cpu, &pm->pm_stalled)) /* !stalled */ + !pm->pm_pcpu_state[cpu].pps_cpustate || /* !desired */ + !pm->pm_pcpu_state[cpu].pps_stalled) /* !stalled */ continue; - CPU_CLR_ATOMIC(cpu, &pm->pm_stalled); + pm->pm_pcpu_state[cpu].pps_stalled = 0; (*pcd->pcd_start_pmc)(cpu, adjri); } } @@ -4513,9 +4548,9 @@ ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc)); - KASSERT(pm->pm_runcount > 0, - ("[pmc,%d] bad runcount ri %d rc %d", - __LINE__, ri, pm->pm_runcount)); + KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, + ("[pmc,%d] bad runcount ri %d rc %ld", + __LINE__, ri, (unsigned long)counter_u64_fetch(pm->pm_runcount))); /* * Change desired state, and then stop if not @@ -4524,9 +4559,9 @@ * the PMC after this code has already checked * the pm_stalled flag. */ - if (CPU_ISSET(cpu, &pm->pm_cpustate)) { - CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate); - if (!CPU_ISSET(cpu, &pm->pm_stalled)) { + if (pm->pm_pcpu_state[cpu].pps_cpustate) { + pm->pm_pcpu_state[cpu].pps_cpustate = 0; + if (!pm->pm_pcpu_state[cpu].pps_stalled) { (void) pcd->pcd_stop_pmc(cpu, adjri); pcd->pcd_read_pmc(cpu, adjri, &newvalue); @@ -4540,9 +4575,9 @@ } } - atomic_subtract_rel_int(&pm->pm_runcount,1); + counter_u64_add(pm->pm_runcount, -1); - KASSERT((int) pm->pm_runcount >= 0, + KASSERT((int) counter_u64_fetch(pm->pm_runcount) >= 0, ("[pmc,%d] runcount is %d", __LINE__, ri)); (void) pcd->pcd_config_pmc(cpu, adjri, NULL); @@ -4811,7 +4846,8 @@ pmc_initialize(void) { int c, cpu, error, n, ri; - unsigned int maxcpu; + unsigned int maxcpu, domain; + struct pcpu *pc; struct pmc_binding pb; struct pmc_sample *ps; struct pmc_classdep *pcd; @@ -4820,6 +4856,15 @@ md = NULL; error = 0; + pmc_stats.pm_intr_ignored = counter_u64_alloc(M_WAITOK); + pmc_stats.pm_intr_processed = counter_u64_alloc(M_WAITOK); + pmc_stats.pm_intr_bufferfull = counter_u64_alloc(M_WAITOK); + pmc_stats.pm_syscalls = counter_u64_alloc(M_WAITOK); + pmc_stats.pm_syscall_errors = counter_u64_alloc(M_WAITOK); + pmc_stats.pm_buffer_requests = counter_u64_alloc(M_WAITOK); + pmc_stats.pm_buffer_requests_failed = counter_u64_alloc(M_WAITOK); + pmc_stats.pm_log_sweeps = counter_u64_alloc(M_WAITOK); + #ifdef HWPMC_DEBUG /* parse debug flags first */ if (TUNABLE_STR_FETCH(PMC_SYSCTL_NAME_PREFIX "debugflags", @@ -4927,9 +4972,10 @@ for (cpu = 0; cpu < maxcpu; cpu++) { if (!pmc_cpu_is_active(cpu)) continue; - - sb = malloc(sizeof(struct pmc_samplebuffer) + - pmc_nsamples * sizeof(struct pmc_sample), M_PMC, + pc = pcpu_find(cpu); + domain = pc->pc_domain; + sb = malloc_domain(sizeof(struct pmc_samplebuffer) + + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain, M_WAITOK|M_ZERO); sb->ps_read = sb->ps_write = sb->ps_samples; sb->ps_fence = sb->ps_samples + pmc_nsamples; @@ -4937,8 +4983,8 @@ KASSERT(pmc_pcpu[cpu] != NULL, ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); - sb->ps_callchains = malloc(pmc_callchaindepth * pmc_nsamples * - sizeof(uintptr_t), M_PMC, M_WAITOK|M_ZERO); + sb->ps_callchains = malloc_domain(pmc_callchaindepth * pmc_nsamples * + sizeof(uintptr_t), M_PMC, domain, M_WAITOK|M_ZERO); for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) ps->ps_pc = sb->ps_callchains + @@ -4946,8 +4992,8 @@ pmc_pcpu[cpu]->pc_sb[PMC_HR] = sb; - sb = malloc(sizeof(struct pmc_samplebuffer) + - pmc_nsamples * sizeof(struct pmc_sample), M_PMC, + sb = malloc_domain(sizeof(struct pmc_samplebuffer) + + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain, M_WAITOK|M_ZERO); sb->ps_read = sb->ps_write = sb->ps_samples; sb->ps_fence = sb->ps_samples + pmc_nsamples; @@ -4955,8 +5001,8 @@ KASSERT(pmc_pcpu[cpu] != NULL, ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); - sb->ps_callchains = malloc(pmc_callchaindepth * pmc_nsamples * - sizeof(uintptr_t), M_PMC, M_WAITOK|M_ZERO); + sb->ps_callchains = malloc_domain(pmc_callchaindepth * pmc_nsamples * + sizeof(uintptr_t), M_PMC, domain, M_WAITOK|M_ZERO); for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) ps->ps_pc = sb->ps_callchains + @@ -5048,7 +5094,8 @@ PMCDBG0(MOD,INI,0, "cleanup"); /* switch off sampling */ - CPU_ZERO(&pmc_cpumask); + CPU_FOREACH(cpu) + DPCPU_ID_SET(cpu, pmc_sampled, 0); pmc_intr = NULL; sx_xlock(&pmc_sx); @@ -5157,11 +5204,11 @@ KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_SR] != NULL, ("[pmc,%d] Null sw cpu sample buffer cpu=%d", __LINE__, cpu)); - free(pmc_pcpu[cpu]->pc_sb[PMC_HR]->ps_callchains, M_PMC); - free(pmc_pcpu[cpu]->pc_sb[PMC_HR], M_PMC); - free(pmc_pcpu[cpu]->pc_sb[PMC_SR]->ps_callchains, M_PMC); - free(pmc_pcpu[cpu]->pc_sb[PMC_SR], M_PMC); - free(pmc_pcpu[cpu], M_PMC); + free_domain(pmc_pcpu[cpu]->pc_sb[PMC_HR]->ps_callchains, M_PMC); + free_domain(pmc_pcpu[cpu]->pc_sb[PMC_HR], M_PMC); + free_domain(pmc_pcpu[cpu]->pc_sb[PMC_SR]->ps_callchains, M_PMC); + free_domain(pmc_pcpu[cpu]->pc_sb[PMC_SR], M_PMC); + free_domain(pmc_pcpu[cpu], M_PMC); } free(pmc_pcpu, M_PMC); @@ -5181,7 +5228,14 @@ } pmclog_shutdown(); - + counter_u64_free(pmc_stats.pm_intr_ignored); + counter_u64_free(pmc_stats.pm_intr_processed); + counter_u64_free(pmc_stats.pm_intr_bufferfull); + counter_u64_free(pmc_stats.pm_syscalls); + counter_u64_free(pmc_stats.pm_syscall_errors); + counter_u64_free(pmc_stats.pm_buffer_requests); + counter_u64_free(pmc_stats.pm_buffer_requests_failed); + counter_u64_free(pmc_stats.pm_log_sweeps); sx_xunlock(&pmc_sx); /* we are done */ } Index: head/sys/dev/hwpmc/hwpmc_mpc7xxx.c =================================================================== --- head/sys/dev/hwpmc/hwpmc_mpc7xxx.c +++ head/sys/dev/hwpmc/hwpmc_mpc7xxx.c @@ -710,9 +710,10 @@ /* reload count. */ mpc7xxx_write_pmc(cpu, i, pm->pm_sc.pm_reloadcount); } - - atomic_add_int(retval ? &pmc_stats.pm_intr_processed : - &pmc_stats.pm_intr_ignored, 1); + if (retval) + counter_u64_add(pmc_stats.pm_intr_processed, 1); + else + counter_u64_add(pmc_stats.pm_intr_ignored, 1); /* Re-enable PERF exceptions. */ if (retval) Index: head/sys/dev/hwpmc/hwpmc_piv.c =================================================================== --- head/sys/dev/hwpmc/hwpmc_piv.c +++ head/sys/dev/hwpmc/hwpmc_piv.c @@ -1545,8 +1545,10 @@ if (did_interrupt) lapic_reenable_pmc(); - atomic_add_int(did_interrupt ? &pmc_stats.pm_intr_processed : - &pmc_stats.pm_intr_ignored, 1); + if (did_interrupt) + counter_u64_add(pmc_stats.pm_intr_processed, 1); + else + counter_u64_add(pmc_stats.pm_intr_ignored, 1); return (did_interrupt); } Index: head/sys/dev/hwpmc/hwpmc_ppc970.c =================================================================== --- head/sys/dev/hwpmc/hwpmc_ppc970.c +++ head/sys/dev/hwpmc/hwpmc_ppc970.c @@ -528,8 +528,10 @@ ppc970_write_pmc(cpu, i, pm->pm_sc.pm_reloadcount); } - atomic_add_int(retval ? &pmc_stats.pm_intr_processed : - &pmc_stats.pm_intr_ignored, 1); + if (retval) + counter_u64_add(pmc_stats.pm_intr_processed, 1); + else + counter_u64_add(pmc_stats.pm_intr_ignored, 1); /* Re-enable PERF exceptions. */ if (retval) Index: head/sys/dev/hwpmc/hwpmc_ppro.c =================================================================== --- head/sys/dev/hwpmc/hwpmc_ppro.c +++ head/sys/dev/hwpmc/hwpmc_ppro.c @@ -725,8 +725,10 @@ if (retval) lapic_reenable_pmc(); - atomic_add_int(retval ? &pmc_stats.pm_intr_processed : - &pmc_stats.pm_intr_ignored, 1); + if (retval) + counter_u64_add(pmc_stats.pm_intr_processed, 1); + else + counter_u64_add(pmc_stats.pm_intr_ignored, 1); /* restart counters that can be restarted */ P6_SYNC_CTR_STATE(pc); Index: head/sys/dev/hwpmc/hwpmc_soft.c =================================================================== --- head/sys/dev/hwpmc/hwpmc_soft.c +++ head/sys/dev/hwpmc/hwpmc_soft.c @@ -441,9 +441,10 @@ } else pc->soft_values[ri]++; } - - atomic_add_int(processed ? &pmc_stats.pm_intr_processed : - &pmc_stats.pm_intr_ignored, 1); + if (processed) + counter_u64_add(pmc_stats.pm_intr_processed, 1); + else + counter_u64_add(pmc_stats.pm_intr_ignored, 1); return (processed); } Index: head/sys/kern/kern_pmc.c =================================================================== --- head/sys/kern/kern_pmc.c +++ head/sys/kern/kern_pmc.c @@ -66,8 +66,7 @@ /* Interrupt handler */ int __read_mostly (*pmc_intr)(int cpu, struct trapframe *tf) = NULL; -/* Bitmask of CPUs requiring servicing at hardclock time */ -volatile cpuset_t pmc_cpumask; +DPCPU_DEFINE(uint8_t, pmc_sampled); /* * A global count of SS mode PMCs. When non-zero, this means that Index: head/sys/sys/pmc.h =================================================================== --- head/sys/sys/pmc.h +++ head/sys/sys/pmc.h @@ -36,7 +36,8 @@ #define _SYS_PMC_H_ #include - +#include +#include #include #include @@ -552,6 +553,19 @@ * * Retrieve pmc(4) driver-wide statistics. */ +#ifdef _KERNEL +struct pmc_driverstats { + counter_u64_t pm_intr_ignored; /* #interrupts ignored */ + counter_u64_t pm_intr_processed; /* #interrupts processed */ + counter_u64_t pm_intr_bufferfull; /* #interrupts with ENOSPC */ + counter_u64_t pm_syscalls; /* #syscalls */ + counter_u64_t pm_syscall_errors; /* #syscalls with errors */ + counter_u64_t pm_buffer_requests; /* #buffer requests */ + counter_u64_t pm_buffer_requests_failed; /* #failed buffer requests */ + counter_u64_t pm_log_sweeps; /* #sample buffer processing + passes */ +}; +#endif struct pmc_op_getdriverstats { unsigned int pm_intr_ignored; /* #interrupts ignored */ @@ -625,9 +639,9 @@ #define PMC_HASH_SIZE 1024 #define PMC_MTXPOOL_SIZE 2048 -#define PMC_LOG_BUFFER_SIZE 4 -#define PMC_NLOGBUFFERS 1024 -#define PMC_NSAMPLES 1024 +#define PMC_LOG_BUFFER_SIZE 128 +#define PMC_NLOGBUFFERS_PCPU 8 +#define PMC_NSAMPLES 64 #define PMC_CALLCHAIN_DEPTH 32 #define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "." @@ -701,7 +715,10 @@ * field is '0'. * */ - +struct pmc_pcpu_state { + uint8_t pps_stalled; + uint8_t pps_cpustate; +} __aligned(CACHE_LINE_SIZE); struct pmc { LIST_HEAD(,pmc_target) pm_targets; /* list of target processes */ LIST_ENTRY(pmc) pm_next; /* owner's list */ @@ -735,13 +752,13 @@ pmc_value_t pm_initial; /* counting PMC modes */ } pm_sc; - volatile cpuset_t pm_stalled; /* marks stalled sampling PMCs */ + struct pmc_pcpu_state *pm_pcpu_state; volatile cpuset_t pm_cpustate; /* CPUs where PMC should be active */ uint32_t pm_caps; /* PMC capabilities */ enum pmc_event pm_event; /* event being measured */ uint32_t pm_flags; /* additional flags PMC_F_... */ struct pmc_owner *pm_owner; /* owner thread state */ - int pm_runcount; /* #cpus currently on */ + counter_u64_t pm_runcount; /* #cpus currently on */ enum pmc_state pm_state; /* current PMC state */ uint32_t pm_overflowcnt; /* count overflow interrupts */ @@ -816,11 +833,11 @@ struct proc *po_owner; /* owner proc */ uint32_t po_flags; /* (k) flags PMC_PO_* */ struct proc *po_kthread; /* (k) helper kthread */ - struct pmclog_buffer *po_curbuf; /* current log buffer */ struct file *po_file; /* file reference */ int po_error; /* recorded error */ short po_sscount; /* # SS PMCs owned */ short po_logprocmaps; /* global mappings done */ + struct pmclog_buffer *po_curbuf[MAXCPU]; /* current log buffer */ }; #define PMC_PO_OWNS_LOGFILE 0x00000001 /* has a log file */ @@ -1012,7 +1029,10 @@ extern struct pmc_cpu **pmc_pcpu; /* driver statistics */ -extern struct pmc_op_getdriverstats pmc_stats; +extern struct pmc_driverstats pmc_stats; + +/* cpu model name for pmu lookup */ +extern char pmc_cpuid[64]; #if defined(HWPMC_DEBUG) #include Index: head/sys/sys/pmckern.h =================================================================== --- head/sys/sys/pmckern.h +++ head/sys/sys/pmckern.h @@ -165,7 +165,7 @@ extern struct sx pmc_sx; /* Per-cpu flags indicating availability of sampling data */ -extern volatile cpuset_t pmc_cpumask; +DPCPU_DECLARE(uint8_t, pmc_sampled); /* Count of system-wide sampling PMCs in existence */ extern volatile int pmc_ss_count; @@ -220,7 +220,7 @@ #define PMC_SYSTEM_SAMPLING_ACTIVE() (pmc_ss_count > 0) /* Check if a CPU has recorded samples. */ -#define PMC_CPU_HAS_SAMPLES(C) (__predict_false(CPU_ISSET(C, &pmc_cpumask))) +#define PMC_CPU_HAS_SAMPLES(C) (__predict_false(DPCPU_ID_GET((C), pmc_sampled))) /* * Helper functions.