Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F137615712
D17011.id48641.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
17 KB
Referenced Files
None
Subscribers
None
D17011.id48641.diff
View Options
Index: sys/dev/hwpmc/hwpmc_logging.c
===================================================================
--- sys/dev/hwpmc/hwpmc_logging.c
+++ sys/dev/hwpmc/hwpmc_logging.c
@@ -234,7 +234,7 @@
static void pmclog_release(struct pmc_owner *po);
static uint32_t *pmclog_reserve(struct pmc_owner *po, int length);
static void pmclog_schedule_io(struct pmc_owner *po, int wakeup);
-static void pmclog_schedule_all(struct pmc_owner *po, int force);
+static void pmclog_schedule_all(struct pmc_owner *po);
static void pmclog_stop_kthread(struct pmc_owner *po);
/*
@@ -842,7 +842,7 @@
goto error;
}
- pmclog_schedule_all(po, force);
+ pmclog_schedule_all(po);
error:
mtx_unlock(&pmc_kthread_mtx);
@@ -850,7 +850,7 @@
}
static void
-pmclog_schedule_one_cond(struct pmc_owner *po, int force)
+pmclog_schedule_one_cond(struct pmc_owner *po)
{
struct pmclog_buffer *plb;
int cpu;
@@ -860,8 +860,7 @@
/* tell hardclock not to run again */
if (PMC_CPU_HAS_SAMPLES(cpu))
PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
- if (force)
- pmc_flush_samples(cpu);
+
plb = po->po_curbuf[cpu];
if (plb && plb->plb_ptr != plb->plb_base)
pmclog_schedule_io(po, 1);
@@ -869,7 +868,7 @@
}
static void
-pmclog_schedule_all(struct pmc_owner *po, int force)
+pmclog_schedule_all(struct pmc_owner *po)
{
/*
* Schedule the current buffer if any and not empty.
@@ -878,7 +877,7 @@
thread_lock(curthread);
sched_bind(curthread, i);
thread_unlock(curthread);
- pmclog_schedule_one_cond(po, force);
+ pmclog_schedule_one_cond(po);
}
thread_lock(curthread);
sched_unbind(curthread);
@@ -905,7 +904,7 @@
/*
* Schedule the current buffer.
*/
- pmclog_schedule_all(po, 0);
+ pmclog_schedule_all(po);
wakeup_one(po);
mtx_unlock(&pmc_kthread_mtx);
Index: sys/dev/hwpmc/hwpmc_mod.c
===================================================================
--- sys/dev/hwpmc/hwpmc_mod.c
+++ sys/dev/hwpmc/hwpmc_mod.c
@@ -211,7 +211,7 @@
#endif
static int load(struct module *module, int cmd, void *arg);
-static int pmc_add_sample(int ring, struct pmc *pm, struct trapframe *tf);
+static int pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf);
static void pmc_add_thread_descriptors_from_proc(struct proc *p,
struct pmc_process *pp);
static int pmc_attach_process(struct proc *p, struct pmc *pm);
@@ -249,7 +249,7 @@
static void pmc_process_exit(void *arg, struct proc *p);
static void pmc_process_fork(void *arg, struct proc *p1,
struct proc *p2, int n);
-static void pmc_process_samples(int cpu, int soft);
+static void pmc_process_samples(int cpu, ring_type_t soft);
static void pmc_release_pmc_descriptor(struct pmc *pmc);
static void pmc_process_thread_add(struct thread *td);
static void pmc_process_thread_delete(struct thread *td);
@@ -342,6 +342,7 @@
SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_RDTUN,
&pmc_nsamples, 0, "number of PC samples per CPU");
+static uint64_t pmc_sample_mask = PMC_NSAMPLES-1;
/*
* kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool.
@@ -1402,6 +1403,10 @@
if (pm->pm_state != PMC_STATE_RUNNING)
continue;
+ KASSERT(counter_u64_fetch(pm->pm_runcount) >= 0,
+ ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
+ (unsigned long)counter_u64_fetch(pm->pm_runcount)));
+
/* increment PMC runcount */
counter_u64_add(pm->pm_runcount, 1);
@@ -1596,6 +1601,10 @@
if (pm->pm_pcpu_state[cpu].pps_stalled == 0)
pcd->pcd_stop_pmc(cpu, adjri);
+ KASSERT(counter_u64_fetch(pm->pm_runcount) > 0,
+ ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
+ (unsigned long)counter_u64_fetch(pm->pm_runcount)));
+
/* reduce this PMC's runcount */
counter_u64_add(pm->pm_runcount, -1);
@@ -2725,7 +2734,7 @@
static void
pmc_wait_for_pmc_idle(struct pmc *pm)
{
-#ifdef HWPMC_DEBUG
+#ifdef INVARIANTS
volatile int maxloop;
maxloop = 100 * pmc_cpu_max();
@@ -2737,7 +2746,7 @@
pmclog_flush(pm->pm_owner, 1);
while (counter_u64_fetch(pm->pm_runcount) > 0) {
pmclog_flush(pm->pm_owner, 1);
-#ifdef HWPMC_DEBUG
+#ifdef INVARIANTS
maxloop--;
KASSERT(maxloop > 0,
("[pmc,%d] (ri%d, rc%ld) waiting too long for "
@@ -4657,7 +4666,7 @@
*/
static int
-pmc_add_sample(int ring, struct pmc *pm, struct trapframe *tf)
+pmc_add_sample(ring_type_t ring, struct pmc *pm, struct trapframe *tf)
{
int error, cpu, callchaindepth, inuserspace;
struct thread *td;
@@ -4672,19 +4681,20 @@
cpu = curcpu;
psb = pmc_pcpu[cpu]->pc_sb[ring];
inuserspace = TRAPF_USERMODE(tf);
- ps = psb->ps_write;
- if (ps->ps_nsamples == PMC_SAMPLE_INUSE) {
- counter_u64_add(ps->ps_pmc->pm_runcount, -1);
- counter_u64_add(pmc_stats.pm_overwrites, 1);
- ps->ps_nsamples = 0;
- } else if (ps->ps_nsamples) { /* in use, reader hasn't caught up */
+ ps = PMC_PROD_SAMPLE(psb);
+ if (psb->ps_considx != psb->ps_prodidx &&
+ ps->ps_nsamples) { /* in use, reader hasn't caught up */
pm->pm_pcpu_state[cpu].pps_stalled = 1;
counter_u64_add(pmc_stats.pm_intr_bufferfull, 1);
PMCDBG6(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d",
cpu, pm, (void *) tf, inuserspace,
- (int) (psb->ps_write - psb->ps_samples),
- (int) (psb->ps_read - psb->ps_samples));
+ (int) (psb->ps_prodidx & pmc_sample_mask),
+ (int) (psb->ps_considx & pmc_sample_mask));
callchaindepth = 1;
+ KASSERT(counter_u64_fetch(pm->pm_runcount) > 0,
+ ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
+ (unsigned long)counter_u64_fetch(pm->pm_runcount)));
+
error = ENOMEM;
goto done;
}
@@ -4692,14 +4702,8 @@
/* Fill in entry. */
PMCDBG6(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm,
(void *) tf, inuserspace,
- (int) (psb->ps_write - psb->ps_samples),
- (int) (psb->ps_read - psb->ps_samples));
-
- KASSERT(counter_u64_fetch(pm->pm_runcount) >= 0,
- ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
- (unsigned long)counter_u64_fetch(pm->pm_runcount)));
-
- counter_u64_add(pm->pm_runcount, 1); /* hold onto PMC */
+ (int) (psb->ps_prodidx & pmc_sample_mask),
+ (int) (psb->ps_considx & pmc_sample_mask));
td = curthread;
ps->ps_pmc = pm;
@@ -4707,13 +4711,14 @@
ps->ps_pid = td->td_proc->p_pid;
ps->ps_tid = td->td_tid;
ps->ps_tsc = pmc_rdtsc();
-
+ ps->ps_ticks = ticks;
ps->ps_cpu = cpu;
ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0;
callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ?
pmc_callchaindepth : 1;
+ MPASS(ps->ps_pc != NULL);
if (callchaindepth == 1)
ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf);
else {
@@ -4737,13 +4742,14 @@
ps->ps_nsamples = PMC_SAMPLE_INUSE;
} else
ps->ps_nsamples = callchaindepth; /* mark entry as in use */
- /* increment write pointer, modulo ring buffer size */
- ps++;
- if (ps == psb->ps_fence)
- psb->ps_write = psb->ps_samples;
- else
- psb->ps_write = ps;
+ KASSERT(counter_u64_fetch(pm->pm_runcount) >= 0,
+ ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
+ (unsigned long)counter_u64_fetch(pm->pm_runcount)));
+
+ counter_u64_add(pm->pm_runcount, 1); /* hold onto PMC */
+ /* increment write pointer */
+ psb->ps_prodidx++;
done:
/* mark CPU as needing processing */
if (callchaindepth != PMC_SAMPLE_INUSE)
@@ -4785,14 +4791,15 @@
{
struct pmc *pm;
struct thread *td;
- struct pmc_sample *ps, *ps_end;
+ struct pmc_sample *ps;
struct pmc_samplebuffer *psb;
- int nsamples, nrecords, pass;
+ uint64_t considx, prodidx;
+ int nsamples, nrecords, pass, iter;
#ifdef INVARIANTS
int ncallchains;
int nfree;
+ int start_ticks = ticks;
#endif
-
psb = pmc_pcpu[cpu]->pc_sb[ring];
td = curthread;
@@ -4810,29 +4817,30 @@
if (ring == PMC_UR)
nrecords = atomic_readandclear_32(&td->td_pmcpend);
+ for (iter = 0, considx = psb->ps_considx, prodidx = psb->ps_prodidx;
+ considx < prodidx && iter < pmc_nsamples; considx++, iter++) {
+ ps = PMC_CONS_SAMPLE_OFF(psb, considx);
+
/*
* Iterate through all deferred callchain requests.
* Walk from the current read pointer to the current
* write pointer.
*/
- ps = psb->ps_read;
- ps_end = psb->ps_write;
- do {
#ifdef INVARIANTS
if (ps->ps_nsamples == PMC_SAMPLE_FREE) {
nfree++;
- goto next;
+ continue;
}
if ((ps->ps_pmc == NULL) ||
(ps->ps_pmc->pm_state != PMC_STATE_RUNNING))
nfree++;
#endif
- if (ps->ps_nsamples != PMC_SAMPLE_INUSE)
- goto next;
- if (ps->ps_td != td)
- goto next;
+ if (ps->ps_td != td ||
+ ps->ps_nsamples == PMC_SAMPLE_INUSE ||
+ ps->ps_pmc->pm_state != PMC_STATE_RUNNING)
+ continue;
KASSERT(ps->ps_cpu == cpu,
("[pmc,%d] cpu mismatch ps_cpu=%d pcpu=%d", __LINE__,
@@ -4865,15 +4873,28 @@
if (__predict_true(nsamples < pmc_callchaindepth - 1))
nsamples += pmc_save_user_callchain(ps->ps_pc + nsamples,
pmc_callchaindepth - nsamples - 1, tf);
- wmb();
- ps->ps_nsamples = nsamples;
+
+ /*
+ * We have to prevent hardclock from potentially overwriting
+ * this sample between when we read the value and when we set
+ * it
+ */
+ spinlock_enter();
+ /*
+ * Verify that the sample hasn't been dropped in the meantime
+ */
+ if (ps->ps_nsamples == PMC_SAMPLE_INUSE) {
+ ps->ps_nsamples = nsamples;
+ /*
+ * If we couldn't get a sample, simply drop the reference
+ */
+ if (nsamples == 0)
+ counter_u64_add(pm->pm_runcount, -1);
+ }
+ spinlock_exit();
if (nrecords-- == 1)
break;
-next:
- /* increment the pointer, modulo sample ring size */
- if (++ps == psb->ps_fence)
- ps = psb->ps_samples;
- } while (ps != ps_end);
+ }
if (__predict_false(ring == PMC_UR && td->td_pmcpend)) {
if (pass == 0) {
pass = 1;
@@ -4884,60 +4905,20 @@
}
#ifdef INVARIANTS
- if (ring == PMC_HR)
- KASSERT(ncallchains > 0 || nfree > 0,
- ("[pmc,%d] cpu %d didn't find a sample to collect", __LINE__,
- cpu));
+ if ((ticks - start_ticks) > hz)
+ log(LOG_ERR, "%s took %d ticks\n", __func__, (ticks - start_ticks));
#endif
/* mark CPU as needing processing */
DPCPU_SET(pmc_sampled, 1);
}
-
-static void
-pmc_flush_ring(int cpu, int ring)
-{
- struct pmc *pm;
- struct pmc_sample *ps;
- struct pmc_samplebuffer *psb;
- int n;
-
- psb = pmc_pcpu[cpu]->pc_sb[ring];
-
- for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */
-
- ps = psb->ps_read;
- if (ps->ps_nsamples == PMC_SAMPLE_FREE)
- goto next;
- pm = ps->ps_pmc;
- counter_u64_add(pm->pm_runcount, -1);
- ps->ps_nsamples = PMC_SAMPLE_FREE;
- /* increment read pointer, modulo sample size */
- next:
- if (++ps == psb->ps_fence)
- psb->ps_read = psb->ps_samples;
- else
- psb->ps_read = ps;
- }
-}
-
-void
-pmc_flush_samples(int cpu)
-{
- int n;
-
- for (n = 0; n < PMC_NUM_SR; n++)
- pmc_flush_ring(cpu, n);
-}
-
-
/*
* Process saved PC samples.
*/
static void
-pmc_process_samples(int cpu, int ring)
+pmc_process_samples(int cpu, ring_type_t ring)
{
struct pmc *pm;
int adjri, n;
@@ -4946,20 +4927,25 @@
struct pmc_sample *ps;
struct pmc_classdep *pcd;
struct pmc_samplebuffer *psb;
+ uint64_t delta;
KASSERT(PCPU_GET(cpuid) == cpu,
("[pmc,%d] not on the correct CPU pcpu=%d cpu=%d", __LINE__,
PCPU_GET(cpuid), cpu));
psb = pmc_pcpu[cpu]->pc_sb[ring];
+ delta = psb->ps_prodidx - psb->ps_considx;
+ MPASS(delta <= pmc_nsamples);
+ MPASS(psb->ps_considx <= psb->ps_prodidx);
+ for (n = 0; psb->ps_considx < psb->ps_prodidx; psb->ps_considx++, n++) {
+ ps = PMC_CONS_SAMPLE(psb);
- for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */
-
- ps = psb->ps_read;
- if (ps->ps_nsamples == PMC_SAMPLE_FREE)
- break;
-
+ if (__predict_false(ps->ps_nsamples == PMC_SAMPLE_FREE))
+ continue;
pm = ps->ps_pmc;
+ /* skip non-running samples */
+ if (pm->pm_state != PMC_STATE_RUNNING)
+ goto entrydone;
KASSERT(counter_u64_fetch(pm->pm_runcount) > 0,
("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
@@ -4971,12 +4957,16 @@
("[pmc,%d] pmc=%p non-sampling mode=%d", __LINE__,
pm, PMC_TO_MODE(pm)));
- /* Ignore PMCs that have been switched off */
- if (pm->pm_state != PMC_STATE_RUNNING)
- goto entrydone;
/* If there is a pending AST wait for completion */
if (ps->ps_nsamples == PMC_SAMPLE_INUSE) {
+ /*
+ * XXX track how often we hit this as it will preferentially lose user samples
+ * for long running system calls
+ */
+ /* if sample is more than 65 ms old, drop it */
+ if (ticks - ps->ps_ticks > (hz >> 4))
+ goto entrydone;
/* Need a rescan at a later time. */
DPCPU_SET(pmc_sampled, 1);
break;
@@ -4984,8 +4974,8 @@
PMCDBG6(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu,
pm, ps->ps_nsamples, ps->ps_flags,
- (int) (psb->ps_write - psb->ps_samples),
- (int) (psb->ps_read - psb->ps_samples));
+ (int) (psb->ps_prodidx & pmc_sample_mask),
+ (int) (psb->ps_considx & pmc_sample_mask));
/*
* If this is a process-mode PMC that is attached to
@@ -5008,13 +4998,11 @@
entrydone:
ps->ps_nsamples = 0; /* mark entry as free */
- counter_u64_add(pm->pm_runcount, -1);
+ KASSERT(counter_u64_fetch(pm->pm_runcount) > 0,
+ ("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
+ (unsigned long)counter_u64_fetch(pm->pm_runcount)));
- /* increment read pointer, modulo sample size */
- if (++ps == psb->ps_fence)
- psb->ps_read = psb->ps_samples;
- else
- psb->ps_read = ps;
+ counter_u64_add(pm->pm_runcount, -1);
}
counter_u64_add(pmc_stats.pm_log_sweeps, 1);
@@ -5197,11 +5185,11 @@
}
}
- counter_u64_add(pm->pm_runcount, -1);
-
- KASSERT((int) counter_u64_fetch(pm->pm_runcount) >= 0,
+ KASSERT((int) counter_u64_fetch(pm->pm_runcount) > 0,
("[pmc,%d] runcount is %d", __LINE__, ri));
+ counter_u64_add(pm->pm_runcount, -1);
+
(void) pcd->pcd_config_pmc(cpu, adjri, NULL);
}
@@ -5582,7 +5570,9 @@
(void) printf("hwpmc: tunable \"nsamples\"=%d out of "
"range.\n", pmc_nsamples);
pmc_nsamples = PMC_NSAMPLES;
- }
+ pmc_sample_mask = PMC_NSAMPLES-1;
+ } else
+ pmc_sample_mask = pmc_nsamples-1;
if (pmc_callchaindepth <= 0 ||
pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) {
@@ -5658,8 +5648,6 @@
sb = malloc_domain(sizeof(struct pmc_samplebuffer) +
pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain,
M_WAITOK|M_ZERO);
- sb->ps_read = sb->ps_write = sb->ps_samples;
- sb->ps_fence = sb->ps_samples + pmc_nsamples;
KASSERT(pmc_pcpu[cpu] != NULL,
("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
@@ -5676,8 +5664,6 @@
sb = malloc_domain(sizeof(struct pmc_samplebuffer) +
pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain,
M_WAITOK|M_ZERO);
- sb->ps_read = sb->ps_write = sb->ps_samples;
- sb->ps_fence = sb->ps_samples + pmc_nsamples;
KASSERT(pmc_pcpu[cpu] != NULL,
("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
@@ -5694,8 +5680,6 @@
sb = malloc_domain(sizeof(struct pmc_samplebuffer) +
pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain,
M_WAITOK|M_ZERO);
- sb->ps_read = sb->ps_write = sb->ps_samples;
- sb->ps_fence = sb->ps_samples + pmc_nsamples;
KASSERT(pmc_pcpu[cpu] != NULL,
("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
Index: sys/sys/pmc.h
===================================================================
--- sys/sys/pmc.h
+++ sys/sys/pmc.h
@@ -936,6 +936,8 @@
uint16_t ps_flags; /* other flags */
lwpid_t ps_tid; /* thread id */
pid_t ps_pid; /* process PID or -1 */
+ int ps_ticks; /* ticks at sample time */
+ /* pad */
struct thread *ps_td; /* which thread */
struct pmc *ps_pmc; /* interrupting PMC */
uintptr_t *ps_pc; /* (const) callchain start */
@@ -946,13 +948,20 @@
#define PMC_SAMPLE_INUSE ((uint16_t) 0xFFFF)
struct pmc_samplebuffer {
- struct pmc_sample * volatile ps_read; /* read pointer */
- struct pmc_sample * volatile ps_write; /* write pointer */
+ volatile uint64_t ps_prodidx; /* producer index */
+ volatile uint64_t ps_considx; /* consumer index */
uintptr_t *ps_callchains; /* all saved call chains */
- struct pmc_sample *ps_fence; /* one beyond ps_samples[] */
struct pmc_sample ps_samples[]; /* array of sample entries */
};
+#define PMC_CONS_SAMPLE(psb) \
+ (&(psb)->ps_samples[(psb)->ps_considx & pmc_sample_mask])
+
+#define PMC_CONS_SAMPLE_OFF(psb, off) \
+ (&(psb)->ps_samples[(off) & pmc_sample_mask])
+
+#define PMC_PROD_SAMPLE(psb) \
+ (&(psb)->ps_samples[(psb)->ps_prodidx & pmc_sample_mask])
/*
* struct pmc_cpustate
@@ -1216,7 +1225,6 @@
struct trapframe *_tf);
struct pmc_mdep *pmc_mdep_alloc(int nclasses);
void pmc_mdep_free(struct pmc_mdep *md);
-void pmc_flush_samples(int cpu);
uint64_t pmc_rdtsc(void);
#endif /* _KERNEL */
#endif /* _SYS_PMC_H_ */
Index: sys/sys/pmckern.h
===================================================================
--- sys/sys/pmckern.h
+++ sys/sys/pmckern.h
@@ -67,11 +67,13 @@
#define PMC_FN_THR_EXIT_LOG 16
#define PMC_FN_PROC_CREATE_LOG 17
-#define PMC_HR 0 /* Hardware ring buffer */
-#define PMC_SR 1 /* Software ring buffer */
-#define PMC_UR 2 /* userret ring buffer */
-#define PMC_NUM_SR (PMC_UR+1)
-
+typedef enum ring_type {
+ PMC_HR = 0, /* Hardware ring buffer */
+ PMC_SR = 1, /* Software ring buffer */
+ PMC_UR = 2, /* userret ring buffer */
+ PMC_NUM_SR = PMC_UR+1
+} ring_type_t;
+
struct pmckern_procexec {
int pm_credentialschanged;
uintfptr_t pm_entryaddr;
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Nov 25, 3:22 PM (9 h, 8 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
26144508
Default Alt Text
D17011.id48641.diff (17 KB)
Attached To
Mode
D17011: hwpmc: Refactor sample ring buffer handling
Attached
Detach File
Event Timeline
Log In to Comment