Changeset View
Standalone View
sys/kern/kern_intr.c
Show First 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | |||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/priv.h> | #include <sys/priv.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/epoch.h> | #include <sys/epoch.h> | ||||
#include <sys/random.h> | #include <sys/random.h> | ||||
#include <sys/resourcevar.h> | #include <sys/resourcevar.h> | ||||
#include <sys/sched.h> | #include <sys/sched.h> | ||||
#include <sys/sbuf.h> | |||||
#include <sys/smp.h> | #include <sys/smp.h> | ||||
#include <sys/sx.h> | #include <sys/sx.h> | ||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#include <sys/syslog.h> | #include <sys/syslog.h> | ||||
#include <sys/unistd.h> | #include <sys/unistd.h> | ||||
#include <sys/vmmeter.h> | #include <sys/vmmeter.h> | ||||
#include <machine/atomic.h> | #include <machine/atomic.h> | ||||
#include <machine/cpu.h> | #include <machine/cpu.h> | ||||
▲ Show 20 Lines • Show All 1,308 Lines • ▼ Show 20 Lines | intr_event_handle(struct intr_event *ie, struct trapframe *frame) | ||||
struct trapframe *oldframe; | struct trapframe *oldframe; | ||||
struct thread *td; | struct thread *td; | ||||
int phase; | int phase; | ||||
int ret; | int ret; | ||||
bool filter, thread; | bool filter, thread; | ||||
td = curthread; | td = curthread; | ||||
#ifdef KSTACK_USAGE_PROF | #ifdef KSTACK_USAGE_PROF | ||||
intr_prof_stack_use(td, frame); | intr_prof_stack_use(td, frame); | ||||
ehem_freebsd_m5p.com: Key advantage here. Moving the counters to `struct intr_event` causes them to be widely spaced… | |||||
Done Inline ActionsThat is a very bold statement. Do you have some technical evidence (TRM, measurements or whatever) that supports this thesis on all supported platforms? mmel: That is a very bold statement. Do you have some technical evidence (TRM, measurements or… | |||||
Done Inline ActionsNo, I haven't done any benchmarking, though I suspect I was overrating the value of this. When the topic came up, the justification I was given for isrc_increment_count() using atomic_add_long() only for IPI interrupts was: atomic_add_long() could be quite expensive on some architectures if another processor was writing to the same cache line. The intr_event structure looks to be roughly 128 bytes, which is 2 complete cache lines on many processors, as a result this makes the counters unlikely to share their cache line and thus makes atomic increment reasonable for all cases. ehem_freebsd_m5p.com: No, I haven't done any benchmarking, though I suspect I was overrating the value of this.
When… | |||||
Not Done Inline ActionsAs I mentioned in another review atomics are incredibly costly are should be avoided if possible. Counting stuff is a prime example. Bouncing cachelines to update things is also costly, which is why normally there is cpu-private fields guranteed to not false share with other cpus. mjg: As I mentioned in another review atomics are incredibly costly are should be avoided if… | |||||
#endif | #endif | ||||
/* An interrupt with no event is a stray interrupt. */ | /* An interrupt with no event is a stray interrupt. */ | ||||
if (ie == NULL) | if (ie == NULL) | ||||
return (~0UL); | return (~0UL); | ||||
/* Increment the interrupt counter. */ | /* Increment the interrupt counter. */ | ||||
if (__predict_false(ie->ie_flags & IE_MULTIPROC)) | if (__predict_false(ie->ie_flags & IE_MULTIPROC)) | ||||
▲ Show 20 Lines • Show All 256 Lines • ▼ Show 20 Lines | |||||
start_softintr(void *dummy) | start_softintr(void *dummy) | ||||
{ | { | ||||
if (swi_add(&clk_intr_event, "clk", NULL, NULL, SWI_CLOCK, | if (swi_add(&clk_intr_event, "clk", NULL, NULL, SWI_CLOCK, | ||||
INTR_MPSAFE, NULL)) | INTR_MPSAFE, NULL)) | ||||
panic("died while creating clk swi ithread"); | panic("died while creating clk swi ithread"); | ||||
} | } | ||||
SYSINIT(start_softintr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softintr, | SYSINIT(start_softintr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softintr, | ||||
NULL); | NULL); | ||||
Done Inline ActionsGuess I should also note this improvement. Avoiding this significant temporary memory allocation. ehem_freebsd_m5p.com: Guess I should also note this improvement. Avoiding this significant temporary memory… | |||||
Done Inline Actionscp = intrnames; at initialization, then cp += strlen(cp) + 1; as an increment each time through the loop. This is clearly interpreting "intrnames" as a packed series of strings separated by null-terminators. ehem_freebsd_m5p.com: `cp = intrnames;` at initialization, then `cp += strlen(cp) + 1;` as an increment each time… | |||||
/* | |||||
* Sysctls used by systat and others: hw.intrnames and hw.intrcnt. | |||||
* The data for this machine independent. | |||||
*/ | |||||
int | |||||
intr_event_sysctl_intrnames(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
struct sbuf sbuf; | |||||
struct intr_event *ie; | |||||
int error = 0; | |||||
sbuf_new_for_sysctl(&sbuf, NULL, 4096, req); | |||||
sx_slock(&event_lock); | |||||
TAILQ_FOREACH(ie, &event_list, ie_list) { | |||||
Done Inline ActionsI believe this is unnecessary as ie_fullname is always supposed to include a '\0', but I wasn't quite 100% certain and thus presently have the safe option. ehem_freebsd_m5p.com: I believe this is unnecessary as `ie_fullname` is always supposed to include a '\0', but I… | |||||
if (ie->ie_flags & IE_MULTIPROC) { | |||||
u_int proc; | |||||
Done Inline Actionswhile strictly speaking it is legal to hold sx locks across copyin/copyout, it is a bad idea user memory has to be assumed to be backed with devices/filesystems with indefinite latency. what you normally want to do is either malloc to have something to copy out (if small enough) or wire user pages, eagain before you take the event_lock. mjg: while strictly speaking it is legal to hold sx locks across copyin/copyout, it is a bad idea… | |||||
Done Inline ActionsI concede the copyout could be very slow. Good news is the lock is only needed exclusively for intr_event_create() and intr_event_destroy(). Most systems those will only be called during system start/shutdown when it isn't too likely for users to be present to try to retrieve this data. The type of system most likely to see those called during normal operation are VM control systems, which better not have many logged in users (as they are a sensitive area). I halfway wondered whether it might be safe not to have the lock during this operation, but it isn't. The concern is of the entry being copied out being the one targeted for destruction or a torn read during the list modification. The primary defense here is all uses being fairly rare. ehem_freebsd_m5p.com: I concede the copyout could be very slow. Good news is the lock is only needed exclusively for… | |||||
Not Done Inline Actionsit's not about being fast or slow, but the fact that it can hang for an indefinite amount of time. trivial example: a funny user mmaps() an nfs-backed file and invokes the sysctl with that area as output. if, say, network goes down, this is going to sit there. Again, this is a non-starter and is not hard to avoid. mjg: it's not about being fast or slow, but the fact that it can hang for an indefinite amount of… | |||||
Done Inline ActionsI've got no fundamental objections to either proposed alternative. Right now though I'm looking at the issue of updating vmstat to continue working its magic. Ultimately this is a case of which failure mode is preferred. The interrupt list could be long (there was a mention of a board with 90K interrupts). Though we're no longer in the era when 256MB of memory was huge, thus now 4MB temporary isn't that awful. ehem_freebsd_m5p.com: I've got no fundamental objections to either proposed alternative. Right now though I'm… | |||||
Done Inline ActionsI should also point out intr_event_create() already has a malloc(,, M_WAITOK); so it can already hang for an indefinite amount of time. ehem_freebsd_m5p.com: I should also point out `intr_event_create()` already has a `malloc(,, M_WAITOK);` so it can… | |||||
Done Inline ActionsI note this is fairly similar to the x86's implementation of sysctl("hw.intrs") (in sys/x86/x86/intr_machdep.c, function sysctl_hw_intrs()). That implementation uses a shared-exclusive lock in the exact same way. That also inspired a bit of change here to take care of a concern I already had. ehem_freebsd_m5p.com: I note this is fairly similar to the x86's implementation of `sysctl("hw.intrs")` (in… | |||||
for (proc = 0; proc < mp_ncpus; ++proc) { | |||||
error = sbuf_printf(&sbuf, "cpu%u:%s%c", proc, | |||||
ie->ie_fullname, 0); | |||||
if (error != 0) | |||||
goto out; | |||||
} | |||||
} else { | |||||
error = sbuf_printf(&sbuf, "%s%c", ie->ie_fullname, 0); | |||||
if (error != 0) | |||||
goto out; | |||||
} | |||||
error = sbuf_printf(&sbuf, "stray %s%c", ie->ie_fullname, 0); | |||||
if (error != 0) | |||||
goto out; | |||||
} | |||||
out: | |||||
sx_sunlock(&event_lock); | |||||
error = sbuf_finish(&sbuf); | |||||
sbuf_delete(&sbuf); | |||||
return (error); | |||||
} | |||||
int | |||||
intr_event_sysctl_intrcnt(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
struct sbuf sbuf; | |||||
struct intr_event *ie; | |||||
int error = 0; | |||||
int sz = sizeof(ie->ie_intrcnt); | |||||
u_long val; | |||||
void *arg = &val; | |||||
#ifdef SCTL_MASK32 | |||||
uint32_t val32; | |||||
if (req->flags & SCTL_MASK32) { | |||||
sz = sizeof(val32); | |||||
Done Inline ActionsOne item here, does this actually need atomic_load_long()? The counter could nominally be changing rapidly, so returning a snapshot is nice; however, the snapshot is immediately out of date. ehem_freebsd_m5p.com: One item here, does this actually need `atomic_load_long()`? The counter could nominally be… | |||||
arg = &val32; | |||||
} | |||||
#endif | |||||
sbuf_new_for_sysctl(&sbuf, NULL, 4096, req); | |||||
sx_slock(&event_lock); | |||||
TAILQ_FOREACH(ie, &event_list, ie_list) { | |||||
if (ie->ie_flags & IE_MULTIPROC) { | |||||
u_int proc; | |||||
Done Inline ActionsSame question as above. ehem_freebsd_m5p.com: Same question as above. | |||||
for (proc = 0; proc < mp_ncpus; ++proc) { | |||||
#ifdef SCTL_MASK32 | |||||
val32 = | |||||
#endif | |||||
val = ie->ie_intrcnt[proc]; | |||||
error = sbuf_bcat(&sbuf, arg, sz); | |||||
if (error != 0) | |||||
goto out; | |||||
} | |||||
} else { | |||||
#ifdef SCTL_MASK32 | |||||
val32 = | |||||
#endif | |||||
val = ie->ie_intrcnt[0]; | |||||
error = sbuf_bcat(&sbuf, arg, sz); | |||||
if (error != 0) | |||||
goto out; | |||||
} | |||||
#ifdef SCTL_MASK32 | |||||
val32 = | |||||
#endif | |||||
val = ie->ie_stray; | |||||
error = sbuf_bcat(&sbuf, arg, sz); | |||||
if (error != 0) | |||||
goto out; | |||||
} | |||||
out: | |||||
sx_sunlock(&event_lock); | |||||
error = sbuf_finish(&sbuf); | |||||
sbuf_delete(&sbuf); | |||||
return (error); | |||||
} | |||||
#ifdef DDB | |||||
/* | |||||
* DDB command to dump the interrupt statistics. | |||||
*/ | |||||
DB_SHOW_COMMAND_FLAGS(intrcnt, db_show_intrcnt, DB_CMD_MEMSAFE) | |||||
{ | |||||
struct intr_event *ie; | |||||
for (ie = TAILQ_FIRST(&event_list); ie && !db_pager_quit; | |||||
ie = TAILQ_NEXT(ie, ie_list)) | |||||
if (ie->ie_flags & IE_MULTIPROC) { | |||||
u_int proc; | |||||
for (proc = 0; proc < mp_ncpus && !db_pager_quit; ++proc) | |||||
db_printf("cpu%u:%s\t%lu\n", proc, | |||||
ie->ie_fullname, ie->ie_intrcnt[proc]); | |||||
if (!db_pager_quit) | |||||
db_printf("stray %s\t%lu\n", ie->ie_fullname, | |||||
ie->ie_stray); | |||||
} else if (ie->ie_intrcnt[0] != 0) | |||||
db_printf("%s\t%lu\nstray %s\t%lu\n", ie->ie_fullname, | |||||
ie->ie_intrcnt[0], ie->ie_fullname, ie->ie_stray); | |||||
} | |||||
#endif |
Key advantage here. Moving the counters to struct intr_event causes them to be widely spaced and unlikely to have multiple on the same cache line (aside from count and stray). Thus it becomes reasonable to always use atomic add.