Changeset View
Standalone View
sys/kern/kern_intr.c
Show First 20 Lines • Show All 332 Lines • ▼ Show 20 Lines | if ((flags & ~IE_SOFT) != 0) | ||||
return (EINVAL); | return (EINVAL); | ||||
ie->ie_source = source; | ie->ie_source = source; | ||||
ie->ie_pre_ithread = pre_ithread; | ie->ie_pre_ithread = pre_ithread; | ||||
ie->ie_post_ithread = post_ithread; | ie->ie_post_ithread = post_ithread; | ||||
ie->ie_post_filter = post_filter; | ie->ie_post_filter = post_filter; | ||||
ie->ie_assign_cpu = assign_cpu; | ie->ie_assign_cpu = assign_cpu; | ||||
ie->ie_flags = flags; | ie->ie_flags = flags; | ||||
ie->ie_cpu = NOCPU; | ie->ie_cpu = NOCPU; | ||||
ie->ie_stray = 0; | |||||
ie->ie_intrcnt = 0; | |||||
CK_SLIST_INIT(&ie->ie_handlers); | CK_SLIST_INIT(&ie->ie_handlers); | ||||
mtx_init(&ie->ie_lock, "intr event", NULL, MTX_DEF); | mtx_init(&ie->ie_lock, "intr event", NULL, MTX_DEF); | ||||
vsnprintf(ie->ie_name, sizeof(ie->ie_name), fmt, ap); | vsnprintf(ie->ie_name, sizeof(ie->ie_name), fmt, ap); | ||||
strlcpy(ie->ie_fullname, ie->ie_name, sizeof(ie->ie_fullname)); | strlcpy(ie->ie_fullname, ie->ie_name, sizeof(ie->ie_fullname)); | ||||
mtx_lock(&event_lock); | mtx_lock(&event_lock); | ||||
TAILQ_INSERT_TAIL(&event_list, ie, ie_list); | TAILQ_INSERT_TAIL(&event_list, ie, ie_list); | ||||
mtx_unlock(&event_lock); | mtx_unlock(&event_lock); | ||||
▲ Show 20 Lines • Show All 1,032 Lines • ▼ Show 20 Lines | intr_event_handle(struct intr_event *ie, struct trapframe *frame) | ||||
struct trapframe *oldframe; | struct trapframe *oldframe; | ||||
struct thread *td; | struct thread *td; | ||||
int phase; | int phase; | ||||
int ret; | int ret; | ||||
bool filter, thread; | bool filter, thread; | ||||
td = curthread; | td = curthread; | ||||
#ifdef KSTACK_USAGE_PROF | #ifdef KSTACK_USAGE_PROF | ||||
intr_prof_stack_use(td, frame); | intr_prof_stack_use(td, frame); | ||||
ehem_freebsd_m5p.com: Key advantage here. Moving the counters to `struct intr_event` causes them to be widely spaced… | |||||
Done Inline ActionsThat is a very bold statement. Do you have some technical evidence (TRM, measurements or whatever) that supports this thesis on all supported platforms? mmel: That is a very bold statement. Do you have some technical evidence (TRM, measurements or… | |||||
Done Inline ActionsNo, I haven't done any benchmarking, though I suspect I was overrating the value of this. When the topic came up, the justification I was given for isrc_increment_count() using atomic_add_long() only for IPI interrupts was: atomic_add_long() could be quite expensive on some architectures if another processor was writing to the same cache line. The intr_event structure looks to be roughly 128 bytes, which is 2 complete cache lines on many processors, as a result this makes the counters unlikely to share their cache line and thus makes atomic increment reasonable for all cases. ehem_freebsd_m5p.com: No, I haven't done any benchmarking, though I suspect I was overrating the value of this.
When… | |||||
Not Done Inline ActionsAs I mentioned in another review atomics are incredibly costly are should be avoided if possible. Counting stuff is a prime example. Bouncing cachelines to update things is also costly, which is why normally there is cpu-private fields guranteed to not false share with other cpus. mjg: As I mentioned in another review atomics are incredibly costly are should be avoided if… | |||||
#endif | #endif | ||||
/* An interrupt with no event or handlers is a stray interrupt. */ | /* An interrupt with no event or handlers is a stray interrupt. */ | ||||
if (ie == NULL || CK_SLIST_EMPTY(&ie->ie_handlers)) | if (ie == NULL || CK_SLIST_EMPTY(&ie->ie_handlers)) | ||||
return (EINVAL); | return (EINVAL); | ||||
/* | /* | ||||
* Execute fast interrupt handlers directly. | * Execute fast interrupt handlers directly. | ||||
▲ Show 20 Lines • Show All 246 Lines • ▼ Show 20 Lines | if (swi_add(&clk_intr_event, "clk", NULL, NULL, SWI_CLOCK, | ||||
INTR_MPSAFE, NULL)) | INTR_MPSAFE, NULL)) | ||||
panic("died while creating clk swi ithread"); | panic("died while creating clk swi ithread"); | ||||
} | } | ||||
SYSINIT(start_softintr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softintr, | SYSINIT(start_softintr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softintr, | ||||
NULL); | NULL); | ||||
/* | /* | ||||
* Sysctls used by systat and others: hw.intrnames and hw.intrcnt. | * Sysctls used by systat and others: hw.intrnames and hw.intrcnt. | ||||
* The data for this machine dependent, and the declarations are in machine | * The data for this machine independent. | ||||
* dependent code. The layout of intrnames and intrcnt however is machine | |||||
* independent. | |||||
* | |||||
* We do not know the length of intrcnt and intrnames at compile time, so | |||||
* calculate things at run time. | |||||
*/ | */ | ||||
static int | static int | ||||
sysctl_intrnames(SYSCTL_HANDLER_ARGS) | sysctl_intrnames(SYSCTL_HANDLER_ARGS) | ||||
{ | { | ||||
return (sysctl_handle_opaque(oidp, intrnames, sintrnames, req)); | struct intr_event *ie; | ||||
const char straystr[] = "stray ", padstr[sizeof(straystr)] = ""; | |||||
int error; | |||||
TAILQ_FOREACH(ie, &event_list, ie_list) { | |||||
error = SYSCTL_OUT(req, ie->ie_fullname, | |||||
sizeof(ie->ie_fullname)); | |||||
if (error != 0) | |||||
return (error); | |||||
Done Inline Actionswhile strictly speaking it is legal to hold sx locks across copyin/copyout, it is a bad idea user memory has to be assumed to be backed with devices/filesystems with indefinite latency. what you normally want to do is either malloc to have something to copy out (if small enough) or wire user pages, eagain before you take the event_lock. mjg: while strictly speaking it is legal to hold sx locks across copyin/copyout, it is a bad idea… | |||||
Done Inline ActionsI concede the copyout could be very slow. Good news is the lock is only needed exclusively for intr_event_create() and intr_event_destroy(). Most systems those will only be called during system start/shutdown when it isn't too likely for users to be present to try to retrieve this data. The type of system most likely to see those called during normal operation are VM control systems, which better not have many logged in users (as they are a sensitive area). I halfway wondered whether it might be safe not to have the lock during this operation, but it isn't. The concern is of the entry being copied out being the one targeted for destruction or a torn read during the list modification. The primary defense here is all uses being fairly rare. ehem_freebsd_m5p.com: I concede the copyout could be very slow. Good news is the lock is only needed exclusively for… | |||||
Not Done Inline Actionsit's not about being fast or slow, but the fact that it can hang for an indefinite amount of time. trivial example: a funny user mmaps() an nfs-backed file and invokes the sysctl with that area as output. if, say, network goes down, this is going to sit there. Again, this is a non-starter and is not hard to avoid. mjg: it's not about being fast or slow, but the fact that it can hang for an indefinite amount of… | |||||
Done Inline ActionsI've got no fundamental objections to either proposed alternative. Right now though I'm looking at the issue of updating vmstat to continue working its magic. Ultimately this is a case of which failure mode is preferred. The interrupt list could be long (there was a mention of a board with 90K interrupts). Though we're no longer in the era when 256MB of memory was huge, thus now 4MB temporary isn't that awful. ehem_freebsd_m5p.com: I've got no fundamental objections to either proposed alternative. Right now though I'm… | |||||
Done Inline ActionsI should also point out intr_event_create() already has a malloc(,, M_WAITOK); so it can already hang for an indefinite amount of time. ehem_freebsd_m5p.com: I should also point out `intr_event_create()` already has a `malloc(,, M_WAITOK);` so it can… | |||||
Done Inline ActionsI note this is fairly similar to the x86's implementation of sysctl("hw.intrs") (in sys/x86/x86/intr_machdep.c, function sysctl_hw_intrs()). That implementation uses a shared-exclusive lock in the exact same way. That also inspired a bit of change here to take care of a concern I already had. ehem_freebsd_m5p.com: I note this is fairly similar to the x86's implementation of `sysctl("hw.intrs")` (in… | |||||
error = SYSCTL_OUT(req, padstr, sizeof(padstr) - 1); | |||||
if (error != 0) | |||||
return (error); | |||||
error = SYSCTL_OUT(req, straystr, sizeof(straystr) - 1); | |||||
if (error != 0) | |||||
return (error); | |||||
Done Inline ActionsI believe this is unnecessary as ie_fullname is always supposed to include a '\0', but I wasn't quite 100% certain and thus presently have the safe option. ehem_freebsd_m5p.com: I believe this is unnecessary as `ie_fullname` is always supposed to include a '\0', but I… | |||||
error = SYSCTL_OUT(req, ie->ie_fullname, | |||||
sizeof(ie->ie_fullname)); | |||||
if (error != 0) | |||||
return (error); | |||||
} | } | ||||
return (0); | |||||
} | |||||
SYSCTL_PROC(_hw, OID_AUTO, intrnames, | SYSCTL_PROC(_hw, OID_AUTO, intrnames, | ||||
CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, | CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, | ||||
sysctl_intrnames, "", | sysctl_intrnames, "", | ||||
"Interrupt Names"); | "Interrupt Names"); | ||||
static int | static int | ||||
sysctl_intrcnt(SYSCTL_HANDLER_ARGS) | sysctl_intrcnt(SYSCTL_HANDLER_ARGS) | ||||
{ | { | ||||
#ifdef SCTL_MASK32 | struct intr_event *ie; | ||||
uint32_t *intrcnt32; | |||||
unsigned i; | |||||
int error; | int error; | ||||
int sz = sizeof(ie->ie_intrcnt); | |||||
u_long val; | |||||
void *arg = &val; | |||||
#ifdef SCTL_MASK32 | |||||
uint32_t val32; | |||||
if (req->flags & SCTL_MASK32) { | if (req->flags & SCTL_MASK32) { | ||||
if (!req->oldptr) | sz = sizeof(val32); | ||||
return (sysctl_handle_opaque(oidp, NULL, sintrcnt / 2, req)); | arg = &val32; | ||||
intrcnt32 = malloc(sintrcnt / 2, M_TEMP, M_NOWAIT); | |||||
if (intrcnt32 == NULL) | |||||
return (ENOMEM); | |||||
for (i = 0; i < sintrcnt / sizeof (u_long); i++) | |||||
intrcnt32[i] = intrcnt[i]; | |||||
error = sysctl_handle_opaque(oidp, intrcnt32, sintrcnt / 2, req); | |||||
free(intrcnt32, M_TEMP); | |||||
Done Inline ActionsGuess I should also note this improvement. Avoiding this significant temporary memory allocation. ehem_freebsd_m5p.com: Guess I should also note this improvement. Avoiding this significant temporary memory… | |||||
return (error); | |||||
} | } | ||||
#endif | #endif | ||||
return (sysctl_handle_opaque(oidp, intrcnt, sintrcnt, req)); | |||||
TAILQ_FOREACH(ie, &event_list, ie_list) { | |||||
Done Inline ActionsOne item here, does this actually need atomic_load_long()? The counter could nominally be changing rapidly, so returning a snapshot is nice; however, the snapshot is immediately out of date. ehem_freebsd_m5p.com: One item here, does this actually need `atomic_load_long()`? The counter could nominally be… | |||||
#ifdef SCTL_MASK32 | |||||
val32 = | |||||
#endif | |||||
val = ie->ie_intrcnt; | |||||
error = SYSCTL_OUT(req, arg, sz); | |||||
if (error != 0) | |||||
return (error); | |||||
#ifdef SCTL_MASK32 | |||||
val32 = | |||||
Done Inline ActionsSame question as above. ehem_freebsd_m5p.com: Same question as above. | |||||
#endif | |||||
val = ie->ie_stray; | |||||
error = SYSCTL_OUT(req, arg, sz); | |||||
if (error != 0) | |||||
return (error); | |||||
} | } | ||||
return (0); | |||||
} | |||||
SYSCTL_PROC(_hw, OID_AUTO, intrcnt, | SYSCTL_PROC(_hw, OID_AUTO, intrcnt, | ||||
CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, | CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, | ||||
sysctl_intrcnt, "", | sysctl_intrcnt, "", | ||||
"Interrupt Counts"); | "Interrupt Counts"); | ||||
#ifdef DDB | #ifdef DDB | ||||
/* | /* | ||||
* DDB command to dump the interrupt statistics. | * DDB command to dump the interrupt statistics. | ||||
*/ | */ | ||||
DB_SHOW_COMMAND_FLAGS(intrcnt, db_show_intrcnt, DB_CMD_MEMSAFE) | DB_SHOW_COMMAND_FLAGS(intrcnt, db_show_intrcnt, DB_CMD_MEMSAFE) | ||||
{ | { | ||||
u_long *i; | struct intr_event *ie; | ||||
char *cp; | |||||
u_int j; | |||||
cp = intrnames; | for (ie = TAILQ_FIRST(&event_list); ie && !db_pager_quit; | ||||
j = 0; | ie = TAILQ_NEXT(ie, ie_list)) | ||||
for (i = intrcnt; j < (sintrcnt / sizeof(u_long)) && !db_pager_quit; | db_printf("%s\t%lu\nstray %s\t%lu\n", ie->ie_fullname, | ||||
i++, j++) { | ie->ie_intrcnt, ie->ie_fullname, ie->ie_stray); | ||||
if (*cp == '\0') | |||||
break; | |||||
if (*i != 0) | |||||
db_printf("%s\t%lu\n", cp, *i); | |||||
cp += strlen(cp) + 1; | |||||
Done Inline Actionscp = intrnames; at initialization, then cp += strlen(cp) + 1; as an increment each time through the loop. This is clearly interpreting "intrnames" as a packed series of strings separated by null-terminators. ehem_freebsd_m5p.com: `cp = intrnames;` at initialization, then `cp += strlen(cp) + 1;` as an increment each time… | |||||
} | |||||
} | } | ||||
#endif | #endif |
Key advantage here. Moving the counters to struct intr_event causes them to be widely spaced and unlikely to have multiple on the same cache line (aside from count and stray). Thus it becomes reasonable to always use atomic add.