Index: sys/dev/netmap/netmap.c =================================================================== --- sys/dev/netmap/netmap.c +++ sys/dev/netmap/netmap.c @@ -830,6 +830,7 @@ struct netmap_kring *kring; u_int n[NR_TXRX]; enum txrx t; + int err = 0; if (na->tx_rings != NULL) { if (netmap_debug & NM_DEBUG_ON) @@ -869,7 +870,6 @@ for (i = 0; i < n[t]; i++) { kring = NMR(na, t)[i]; bzero(kring, sizeof(*kring)); - kring->na = na; kring->notify_na = na; kring->ring_id = i; kring->tx = t; @@ -895,13 +895,21 @@ nm_txrx2str(t), i); ND("ktx %s h %d c %d t %d", kring->name, kring->rhead, kring->rcur, kring->rtail); + err = nm_os_selinfo_init(&kring->si, kring->name); + if (err) { + netmap_krings_delete(na); + return err; + } mtx_init(&kring->q_lock, (t == NR_TX ? "nm_txq_lock" : "nm_rxq_lock"), NULL, MTX_DEF); - nm_os_selinfo_init(&kring->si); + kring->na = na; /* setting this field marks the mutex as initialized */ } - nm_os_selinfo_init(&na->si[t]); + err = nm_os_selinfo_init(&na->si[t], na->name); + if (err) { + netmap_krings_delete(na); + return err; + } } - return 0; } @@ -925,7 +933,8 @@ /* we rely on the krings layout described above */ for ( ; kring != na->tailroom; kring++) { - mtx_destroy(&(*kring)->q_lock); + if ((*kring)->na != NULL) + mtx_destroy(&(*kring)->q_lock); nm_os_selinfo_uninit(&(*kring)->si); } nm_os_free(na->tx_rings); Index: sys/dev/netmap/netmap_freebsd.c =================================================================== --- sys/dev/netmap/netmap_freebsd.c +++ sys/dev/netmap/netmap_freebsd.c @@ -58,6 +58,7 @@ #include /* RFNOWAIT */ #include /* sched_bind() */ #include /* mp_maxid */ +#include /* taskqueue_enqueue(), taskqueue_create(), ... */ #include #include #include /* IFT_ETHER */ @@ -75,16 +76,48 @@ /* ======================== FREEBSD-SPECIFIC ROUTINES ================== */ -void nm_os_selinfo_init(NM_SELINFO_T *si) { - struct mtx *m = &si->m; - mtx_init(m, "nm_kn_lock", NULL, MTX_DEF); - knlist_init_mtx(&si->si.si_note, m); +static void +nm_kqueue_notify(void *opaque, int pending) +{ + struct nm_selinfo *si = opaque; + + /* We use a non-zero hint to distinguish this notification call + * from the call done in kqueue_scan(), which uses hint=0. + */ + KNOTE_UNLOCKED(&si->si.si_note, /*hint=*/0x100); } +int nm_os_selinfo_init(NM_SELINFO_T *si, const char *name) { + int err; + + TASK_INIT(&si->ntfytask, 0, nm_kqueue_notify, si); + si->ntfytq = taskqueue_create(name, M_NOWAIT, + taskqueue_thread_enqueue, &si->ntfytq); + if (si->ntfytq == NULL) + return -ENOMEM; + err = taskqueue_start_threads(&si->ntfytq, 1, PI_NET, "tq %s", name); + if (err) { + taskqueue_free(si->ntfytq); + si->ntfytq = NULL; + return err; + } + + snprintf(si->mtxname, sizeof(si->mtxname), "nmkl%s", name); + mtx_init(&si->m, si->mtxname, NULL, MTX_DEF); + knlist_init_mtx(&si->si.si_note, &si->m); + + return (0); +} + void nm_os_selinfo_uninit(NM_SELINFO_T *si) { - /* XXX kqueue(9) needed; these will mirror knlist_init. */ + if (si->ntfytq == NULL) { + return; /* si was not initialized */ + } + taskqueue_drain(si->ntfytq, &si->ntfytask); + taskqueue_free(si->ntfytq); + si->ntfytq = NULL; knlist_delete(&si->si.si_note, curthread, /*islocked=*/0); knlist_destroy(&si->si.si_note); /* now we don't need the mutex anymore */ @@ -1292,13 +1325,18 @@ /* * In addition to calling selwakeuppri(), nm_os_selwakeup() also - * needs to call KNOTE to wake up kqueue listeners. - * We use a non-zero 'hint' argument to inform the netmap_knrw() - * function that it is being called from 'nm_os_selwakeup'; this - * is necessary because when netmap_knrw() is called by the kevent - * subsystem (i.e. kevent_scan()) we also need to call netmap_poll(). - * The knote uses a private mutex associated to the 'si' (see struct - * selinfo, struct nm_selinfo, and nm_os_selinfo_init). + * needs to call knote() to wake up kqueue listeners. + * This operation is deferred to a taskqueue in order to avoid possible + * lock order reversals; these may happen because knote() grabs a + * private lock associated to the 'si' (see struct selinfo, + * struct nm_selinfo, and nm_os_selinfo_init), and nm_os_selwakeup() + * can be called while holding the lock associated to a different + * 'si'. + * When calling knote() we use a non-zero 'hint' argument to inform + * the netmap_knrw() function that it is being called from + * 'nm_os_selwakeup'; this is necessary because when netmap_knrw() is + * called by the kevent subsystem (i.e. kevent_scan()) we also need to + * call netmap_poll(). * * The netmap_kqfilter() function registers one or another f_event * depending on read or write mode. A pointer to the struct @@ -1315,11 +1353,7 @@ if (netmap_verbose) nm_prinf("on knote %p", &si->si.si_note); selwakeuppri(&si->si, PI_NET); - /* We use a non-zero hint to distinguish this notification call - * from the call done in kqueue_scan(), which uses hint=0. - */ - KNOTE(&si->si.si_note, /*hint=*/0x100, - mtx_owned(&si->m) ? KNF_LISTLOCKED : 0); + taskqueue_enqueue(si->ntfytq, &si->ntfytask); } void Index: sys/dev/netmap/netmap_kern.h =================================================================== --- sys/dev/netmap/netmap_kern.h +++ sys/dev/netmap/netmap_kern.h @@ -133,7 +133,10 @@ struct nm_selinfo { struct selinfo si; + struct taskqueue *ntfytq; + struct task ntfytask; struct mtx m; + char mtxname[32]; }; @@ -295,7 +298,7 @@ struct nm_bdg_args; /* os-specific NM_SELINFO_T initialzation/destruction functions */ -void nm_os_selinfo_init(NM_SELINFO_T *); +int nm_os_selinfo_init(NM_SELINFO_T *, const char *name); void nm_os_selinfo_uninit(NM_SELINFO_T *); const char *nm_dump_buf(char *p, int len, int lim, char *dst);