Index: sys/dev/hwpmc/hwpmc_logging.c =================================================================== --- sys/dev/hwpmc/hwpmc_logging.c +++ sys/dev/hwpmc/hwpmc_logging.c @@ -235,7 +235,7 @@ static void pmclog_release(struct pmc_owner *po); static uint32_t *pmclog_reserve(struct pmc_owner *po, int length); static void pmclog_schedule_io(struct pmc_owner *po, int wakeup); -static void pmclog_schedule_all(struct pmc_owner *po); +static void pmclog_schedule_all(struct pmc_owner *po, int force); static void pmclog_stop_kthread(struct pmc_owner *po); /* @@ -843,7 +843,7 @@ goto error; } - pmclog_schedule_all(po); + pmclog_schedule_all(po, force); error: mtx_unlock(&pmc_kthread_mtx); @@ -851,7 +851,7 @@ } static void -pmclog_schedule_one_cond(struct pmc_owner *po) +pmclog_schedule_one_cond(struct pmc_owner *po, int force) { struct pmclog_buffer *plb; int cpu; @@ -861,7 +861,8 @@ /* tell hardclock not to run again */ if (PMC_CPU_HAS_SAMPLES(cpu)) PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); - + if (force) + pmc_flush_samples(cpu); plb = po->po_curbuf[cpu]; if (plb && plb->plb_ptr != plb->plb_base) pmclog_schedule_io(po, 1); @@ -869,7 +870,7 @@ } static void -pmclog_schedule_all(struct pmc_owner *po) +pmclog_schedule_all(struct pmc_owner *po, int force) { /* * Schedule the current buffer if any and not empty. @@ -878,7 +879,7 @@ thread_lock(curthread); sched_bind(curthread, i); thread_unlock(curthread); - pmclog_schedule_one_cond(po); + pmclog_schedule_one_cond(po, force); } thread_lock(curthread); sched_unbind(curthread); @@ -905,7 +906,7 @@ /* * Schedule the current buffer. */ - pmclog_schedule_all(po); + pmclog_schedule_all(po, 0); wakeup_one(po); mtx_unlock(&pmc_kthread_mtx); Index: sys/dev/hwpmc/hwpmc_mod.c =================================================================== --- sys/dev/hwpmc/hwpmc_mod.c +++ sys/dev/hwpmc/hwpmc_mod.c @@ -204,7 +204,7 @@ #endif static int load(struct module *module, int cmd, void *arg); -static int pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf); +static int pmc_add_sample(int ring, struct pmc *pm, struct trapframe *tf); static void pmc_add_thread_descriptors_from_proc(struct proc *p, struct pmc_process *pp); static int pmc_attach_process(struct proc *p, struct pmc *pm); @@ -242,7 +242,7 @@ static void pmc_process_exit(void *arg, struct proc *p); static void pmc_process_fork(void *arg, struct proc *p1, struct proc *p2, int n); -static void pmc_process_samples(int cpu, ring_type_t soft); +static void pmc_process_samples(int cpu, int soft); static void pmc_release_pmc_descriptor(struct pmc *pmc); static void pmc_process_thread_add(struct thread *td); static void pmc_process_thread_delete(struct thread *td); @@ -335,7 +335,6 @@ SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_RDTUN, &pmc_nsamples, 0, "number of PC samples per CPU"); -static uint64_t pmc_sample_mask = PMC_NSAMPLES-1; /* * kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool. @@ -1396,10 +1395,6 @@ if (pm->pm_state != PMC_STATE_RUNNING) continue; - KASSERT(counter_u64_fetch(pm->pm_runcount) >= 0, - ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm, - (unsigned long)counter_u64_fetch(pm->pm_runcount))); - /* increment PMC runcount */ counter_u64_add(pm->pm_runcount, 1); @@ -1594,10 +1589,6 @@ if (pm->pm_pcpu_state[cpu].pps_stalled == 0) pcd->pcd_stop_pmc(cpu, adjri); - KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, - ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm, - (unsigned long)counter_u64_fetch(pm->pm_runcount))); - /* reduce this PMC's runcount */ counter_u64_add(pm->pm_runcount, -1); @@ -2727,7 +2718,7 @@ static void pmc_wait_for_pmc_idle(struct pmc *pm) { -#ifdef INVARIANTS +#ifdef HWPMC_DEBUG volatile int maxloop; maxloop = 100 * pmc_cpu_max(); @@ -2739,7 +2730,7 @@ pmclog_flush(pm->pm_owner, 1); while (counter_u64_fetch(pm->pm_runcount) > 0) { pmclog_flush(pm->pm_owner, 1); -#ifdef INVARIANTS +#ifdef HWPMC_DEBUG maxloop--; KASSERT(maxloop > 0, ("[pmc,%d] (ri%d, rc%ld) waiting too long for " @@ -4660,7 +4651,7 @@ */ static int -pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf) +pmc_add_sample(int ring, struct pmc *pm, struct trapframe *tf) { int error, cpu, callchaindepth, inuserspace; struct thread *td; @@ -4675,15 +4666,18 @@ cpu = curcpu; psb = pmc_pcpu[cpu]->pc_sb[ring]; inuserspace = TRAPF_USERMODE(tf); - ps = PMC_PROD_SAMPLE(psb); - if (psb->ps_considx != psb->ps_prodidx && - ps->ps_nsamples) { /* in use, reader hasn't caught up */ + ps = psb->ps_write; + if (ps->ps_nsamples == PMC_SAMPLE_INUSE) { + counter_u64_add(ps->ps_pmc->pm_runcount, -1); + counter_u64_add(pmc_stats.pm_overwrites, 1); + ps->ps_nsamples = 0; + } else if (ps->ps_nsamples) { /* in use, reader hasn't caught up */ pm->pm_pcpu_state[cpu].pps_stalled = 1; counter_u64_add(pmc_stats.pm_intr_bufferfull, 1); PMCDBG6(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm, (void *) tf, inuserspace, - (int) (psb->ps_prodidx & pmc_sample_mask), - (int) (psb->ps_considx & pmc_sample_mask)); + (int) (psb->ps_write - psb->ps_samples), + (int) (psb->ps_read - psb->ps_samples)); callchaindepth = 1; error = ENOMEM; goto done; @@ -4692,8 +4686,14 @@ /* Fill in entry. */ PMCDBG6(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm, (void *) tf, inuserspace, - (int) (psb->ps_prodidx & pmc_sample_mask), - (int) (psb->ps_considx & pmc_sample_mask)); + (int) (psb->ps_write - psb->ps_samples), + (int) (psb->ps_read - psb->ps_samples)); + + KASSERT(counter_u64_fetch(pm->pm_runcount) >= 0, + ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm, + (unsigned long)counter_u64_fetch(pm->pm_runcount))); + + counter_u64_add(pm->pm_runcount, 1); /* hold onto PMC */ td = curthread; ps->ps_pmc = pm; @@ -4701,14 +4701,13 @@ ps->ps_pid = td->td_proc->p_pid; ps->ps_tid = td->td_tid; ps->ps_tsc = pmc_rdtsc(); - ps->ps_ticks = ticks; + ps->ps_cpu = cpu; ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0; callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ? pmc_callchaindepth : 1; - MPASS(ps->ps_pc != NULL); if (callchaindepth == 1) ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf); else { @@ -4722,27 +4721,26 @@ callchaindepth, tf); } else { pmc_post_callchain_callback(); - callchaindepth = PMC_USER_CALLCHAIN_PENDING; + callchaindepth = PMC_SAMPLE_INUSE; } } ps->ps_nsamples = callchaindepth; /* mark entry as in use */ if (ring == PMC_UR) { ps->ps_nsamples_actual = callchaindepth; /* mark entry as in use */ - ps->ps_nsamples = PMC_USER_CALLCHAIN_PENDING; + ps->ps_nsamples = PMC_SAMPLE_INUSE; } else ps->ps_nsamples = callchaindepth; /* mark entry as in use */ + /* increment write pointer, modulo ring buffer size */ + ps++; + if (ps == psb->ps_fence) + psb->ps_write = psb->ps_samples; + else + psb->ps_write = ps; - KASSERT(counter_u64_fetch(pm->pm_runcount) >= 0, - ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm, - (unsigned long)counter_u64_fetch(pm->pm_runcount))); - - counter_u64_add(pm->pm_runcount, 1); /* hold onto PMC */ - /* increment write pointer */ - psb->ps_prodidx++; done: /* mark CPU as needing processing */ - if (callchaindepth != PMC_USER_CALLCHAIN_PENDING) + if (callchaindepth != PMC_SAMPLE_INUSE) DPCPU_SET(pmc_sampled, 1); return (error); @@ -4781,15 +4779,14 @@ { struct pmc *pm; struct thread *td; - struct pmc_sample *ps; + struct pmc_sample *ps, *ps_end; struct pmc_samplebuffer *psb; - uint64_t considx, prodidx; - int nsamples, nrecords, pass, iter; + int nsamples, nrecords, pass; #ifdef INVARIANTS int ncallchains; int nfree; - int start_ticks = ticks; #endif + psb = pmc_pcpu[cpu]->pc_sb[ring]; td = curthread; @@ -4807,30 +4804,29 @@ if (ring == PMC_UR) nrecords = atomic_readandclear_32(&td->td_pmcpend); - for (iter = 0, considx = psb->ps_considx, prodidx = psb->ps_prodidx; - considx < prodidx && iter < pmc_nsamples; considx++, iter++) { - ps = PMC_CONS_SAMPLE_OFF(psb, considx); - /* * Iterate through all deferred callchain requests. * Walk from the current read pointer to the current * write pointer. */ + ps = psb->ps_read; + ps_end = psb->ps_write; + do { #ifdef INVARIANTS if (ps->ps_nsamples == PMC_SAMPLE_FREE) { nfree++; - continue; + goto next; } if ((ps->ps_pmc == NULL) || (ps->ps_pmc->pm_state != PMC_STATE_RUNNING)) nfree++; #endif - if (ps->ps_td != td || - ps->ps_nsamples == PMC_USER_CALLCHAIN_PENDING || - ps->ps_pmc->pm_state != PMC_STATE_RUNNING) - continue; + if (ps->ps_nsamples != PMC_SAMPLE_INUSE) + goto next; + if (ps->ps_td != td) + goto next; KASSERT(ps->ps_cpu == cpu, ("[pmc,%d] cpu mismatch ps_cpu=%d pcpu=%d", __LINE__, @@ -4863,28 +4859,15 @@ if (__predict_true(nsamples < pmc_callchaindepth - 1)) nsamples += pmc_save_user_callchain(ps->ps_pc + nsamples, pmc_callchaindepth - nsamples - 1, tf); - - /* - * We have to prevent hardclock from potentially overwriting - * this sample between when we read the value and when we set - * it - */ - spinlock_enter(); - /* - * Verify that the sample hasn't been dropped in the meantime - */ - if (ps->ps_nsamples == PMC_USER_CALLCHAIN_PENDING) { - ps->ps_nsamples = nsamples; - /* - * If we couldn't get a sample, simply drop the reference - */ - if (nsamples == 0) - counter_u64_add(pm->pm_runcount, -1); - } - spinlock_exit(); + wmb(); + ps->ps_nsamples = nsamples; if (nrecords-- == 1) break; - } +next: + /* increment the pointer, modulo sample ring size */ + if (++ps == psb->ps_fence) + ps = psb->ps_samples; + } while (ps != ps_end); if (__predict_false(ring == PMC_UR && td->td_pmcpend)) { if (pass == 0) { pass = 1; @@ -4895,20 +4878,60 @@ } #ifdef INVARIANTS - if ((ticks - start_ticks) > hz) - log(LOG_ERR, "%s took %d ticks\n", __func__, (ticks - start_ticks)); + if (ring == PMC_HR) + KASSERT(ncallchains > 0 || nfree > 0, + ("[pmc,%d] cpu %d didn't find a sample to collect", __LINE__, + cpu)); #endif /* mark CPU as needing processing */ DPCPU_SET(pmc_sampled, 1); } + +static void +pmc_flush_ring(int cpu, int ring) +{ + struct pmc *pm; + struct pmc_sample *ps; + struct pmc_samplebuffer *psb; + int n; + + psb = pmc_pcpu[cpu]->pc_sb[ring]; + + for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */ + + ps = psb->ps_read; + if (ps->ps_nsamples == PMC_SAMPLE_FREE) + goto next; + pm = ps->ps_pmc; + counter_u64_add(pm->pm_runcount, -1); + ps->ps_nsamples = PMC_SAMPLE_FREE; + /* increment read pointer, modulo sample size */ + next: + if (++ps == psb->ps_fence) + psb->ps_read = psb->ps_samples; + else + psb->ps_read = ps; + } +} + +void +pmc_flush_samples(int cpu) +{ + int n; + + for (n = 0; n < PMC_NUM_SR; n++) + pmc_flush_ring(cpu, n); +} + + /* * Process saved PC samples. */ static void -pmc_process_samples(int cpu, ring_type_t ring) +pmc_process_samples(int cpu, int ring) { struct pmc *pm; int adjri, n; @@ -4917,25 +4940,20 @@ struct pmc_sample *ps; struct pmc_classdep *pcd; struct pmc_samplebuffer *psb; - uint64_t delta; KASSERT(PCPU_GET(cpuid) == cpu, ("[pmc,%d] not on the correct CPU pcpu=%d cpu=%d", __LINE__, PCPU_GET(cpuid), cpu)); psb = pmc_pcpu[cpu]->pc_sb[ring]; - delta = psb->ps_prodidx - psb->ps_considx; - MPASS(delta <= pmc_nsamples); - MPASS(psb->ps_considx <= psb->ps_prodidx); - for (n = 0; psb->ps_considx < psb->ps_prodidx; psb->ps_considx++, n++) { - ps = PMC_CONS_SAMPLE(psb); - if (__predict_false(ps->ps_nsamples == PMC_SAMPLE_FREE)) - continue; + for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */ + + ps = psb->ps_read; + if (ps->ps_nsamples == PMC_SAMPLE_FREE) + break; + pm = ps->ps_pmc; - /* skip non-running samples */ - if (pm->pm_state != PMC_STATE_RUNNING) - goto entrydone; KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm, @@ -4947,22 +4965,12 @@ ("[pmc,%d] pmc=%p non-sampling mode=%d", __LINE__, pm, PMC_TO_MODE(pm))); + /* Ignore PMCs that have been switched off */ + if (pm->pm_state != PMC_STATE_RUNNING) + goto entrydone; /* If there is a pending AST wait for completion */ - if (ps->ps_nsamples == PMC_USER_CALLCHAIN_PENDING) { - /* if we've been waiting more than 1 tick to - * collect a callchain for this record then - * drop it and move on. - */ - if (ticks - ps->ps_ticks > 1) { - /* - * track how often we hit this as it will - * preferentially lose user samples - * for long running system calls - */ - counter_u64_add(pmc_stats.pm_overwrites, 1); - goto entrydone; - } + if (ps->ps_nsamples == PMC_SAMPLE_INUSE) { /* Need a rescan at a later time. */ DPCPU_SET(pmc_sampled, 1); break; @@ -4970,8 +4978,8 @@ PMCDBG6(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu, pm, ps->ps_nsamples, ps->ps_flags, - (int) (psb->ps_prodidx & pmc_sample_mask), - (int) (psb->ps_considx & pmc_sample_mask)); + (int) (psb->ps_write - psb->ps_samples), + (int) (psb->ps_read - psb->ps_samples)); /* * If this is a process-mode PMC that is attached to @@ -4994,11 +5002,13 @@ entrydone: ps->ps_nsamples = 0; /* mark entry as free */ - KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, - ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm, - (unsigned long)counter_u64_fetch(pm->pm_runcount))); - counter_u64_add(pm->pm_runcount, -1); + + /* increment read pointer, modulo sample size */ + if (++ps == psb->ps_fence) + psb->ps_read = psb->ps_samples; + else + psb->ps_read = ps; } counter_u64_add(pmc_stats.pm_log_sweeps, 1); @@ -5181,11 +5191,11 @@ } } - KASSERT((int64_t) counter_u64_fetch(pm->pm_runcount) > 0, - ("[pmc,%d] runcount is %d", __LINE__, ri)); - counter_u64_add(pm->pm_runcount, -1); + KASSERT((int) counter_u64_fetch(pm->pm_runcount) >= 0, + ("[pmc,%d] runcount is %d", __LINE__, ri)); + (void) pcd->pcd_config_pmc(cpu, adjri, NULL); } @@ -5567,7 +5577,6 @@ "range.\n", pmc_nsamples); pmc_nsamples = PMC_NSAMPLES; } - pmc_sample_mask = pmc_nsamples-1; if (pmc_callchaindepth <= 0 || pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) { @@ -5640,16 +5649,19 @@ continue; pc = pcpu_find(cpu); domain = pc->pc_domain; + sb = malloc_domainset(sizeof(struct pmc_samplebuffer) + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, - DOMAINSET_PREF(domain), M_WAITOK | M_ZERO); + DOMAINSET_PREF(domain), M_WAITOK|M_ZERO); + sb->ps_read = sb->ps_write = sb->ps_samples; + sb->ps_fence = sb->ps_samples + pmc_nsamples; KASSERT(pmc_pcpu[cpu] != NULL, ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); sb->ps_callchains = malloc_domainset(pmc_callchaindepth * - pmc_nsamples * sizeof(uintptr_t), M_PMC, - DOMAINSET_PREF(domain), M_WAITOK | M_ZERO); + pmc_nsamples * sizeof(uintptr_t), M_PMC, DOMAINSET_PREF(domain), + M_WAITOK|M_ZERO); for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) ps->ps_pc = sb->ps_callchains + @@ -5659,11 +5671,17 @@ sb = malloc_domainset(sizeof(struct pmc_samplebuffer) + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, - DOMAINSET_PREF(domain), M_WAITOK | M_ZERO); + DOMAINSET_PREF(domain), M_WAITOK|M_ZERO); + sb->ps_read = sb->ps_write = sb->ps_samples; + sb->ps_fence = sb->ps_samples + pmc_nsamples; + + KASSERT(pmc_pcpu[cpu] != NULL, + ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); sb->ps_callchains = malloc_domainset(pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t), M_PMC, - DOMAINSET_PREF(domain), M_WAITOK | M_ZERO); + DOMAINSET_PREF(domain), M_WAITOK|M_ZERO); + for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) ps->ps_pc = sb->ps_callchains + (n * pmc_callchaindepth); @@ -5672,12 +5690,20 @@ sb = malloc_domainset(sizeof(struct pmc_samplebuffer) + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, - DOMAINSET_PREF(domain), M_WAITOK | M_ZERO); + DOMAINSET_PREF(domain), M_WAITOK|M_ZERO); + sb->ps_read = sb->ps_write = sb->ps_samples; + sb->ps_fence = sb->ps_samples + pmc_nsamples; + + KASSERT(pmc_pcpu[cpu] != NULL, + ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); + sb->ps_callchains = malloc_domainset(pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t), M_PMC, - DOMAINSET_PREF(domain), M_WAITOK | M_ZERO); + DOMAINSET_PREF(domain), M_WAITOK|M_ZERO); + for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) - ps->ps_pc = sb->ps_callchains + n * pmc_callchaindepth; + ps->ps_pc = sb->ps_callchains + + (n * pmc_callchaindepth); pmc_pcpu[cpu]->pc_sb[PMC_UR] = sb; } Index: sys/sys/pmc.h =================================================================== --- sys/sys/pmc.h +++ sys/sys/pmc.h @@ -936,8 +936,6 @@ uint16_t ps_flags; /* other flags */ lwpid_t ps_tid; /* thread id */ pid_t ps_pid; /* process PID or -1 */ - int ps_ticks; /* ticks at sample time */ - /* pad */ struct thread *ps_td; /* which thread */ struct pmc *ps_pmc; /* interrupting PMC */ uintptr_t *ps_pc; /* (const) callchain start */ @@ -945,23 +943,16 @@ }; #define PMC_SAMPLE_FREE ((uint16_t) 0) -#define PMC_USER_CALLCHAIN_PENDING ((uint16_t) 0xFFFF) +#define PMC_SAMPLE_INUSE ((uint16_t) 0xFFFF) struct pmc_samplebuffer { - volatile uint64_t ps_prodidx; /* producer index */ - volatile uint64_t ps_considx; /* consumer index */ + struct pmc_sample * volatile ps_read; /* read pointer */ + struct pmc_sample * volatile ps_write; /* write pointer */ uintptr_t *ps_callchains; /* all saved call chains */ + struct pmc_sample *ps_fence; /* one beyond ps_samples[] */ struct pmc_sample ps_samples[]; /* array of sample entries */ }; -#define PMC_CONS_SAMPLE(psb) \ - (&(psb)->ps_samples[(psb)->ps_considx & pmc_sample_mask]) - -#define PMC_CONS_SAMPLE_OFF(psb, off) \ - (&(psb)->ps_samples[(off) & pmc_sample_mask]) - -#define PMC_PROD_SAMPLE(psb) \ - (&(psb)->ps_samples[(psb)->ps_prodidx & pmc_sample_mask]) /* * struct pmc_cpustate @@ -1225,6 +1216,7 @@ struct trapframe *_tf); struct pmc_mdep *pmc_mdep_alloc(int nclasses); void pmc_mdep_free(struct pmc_mdep *md); +void pmc_flush_samples(int cpu); uint64_t pmc_rdtsc(void); #endif /* _KERNEL */ #endif /* _SYS_PMC_H_ */