Index: sys/kern/subr_witness.c =================================================================== --- sys/kern/subr_witness.c +++ sys/kern/subr_witness.c @@ -571,7 +571,7 @@ */ { "bpf global lock", &lock_class_mtx_sleep }, { "bpf interface lock", &lock_class_rw }, - { "bpf cdev lock", &lock_class_mtx_sleep }, + { "bpf cdev lock", &lock_class_rw }, { NULL, NULL }, /* * NFS server Index: sys/net/bpf.c =================================================================== --- sys/net/bpf.c +++ sys/net/bpf.c @@ -174,8 +174,11 @@ static void bpf_detachd(struct bpf_d *); static void bpf_detachd_locked(struct bpf_d *); static void bpf_freed(struct bpf_d *); -static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **, - struct sockaddr *, int *, struct bpf_d *); +static int bpf_fill_hdrtype(int, struct sockaddr *, int *); +static int bpf_movein(struct uio *, struct mbuf **); +static int bpf_sendmbuf(struct bpf_d *, struct mbuf *); +static int bpf_prepare_mbuf_locked(struct bpf_d *, struct mbuf *, + struct sockaddr *, int *, struct mbuf **); static int bpf_setif(struct bpf_d *, struct ifreq *); static void bpf_timed_out(void *); static __inline void @@ -463,17 +466,14 @@ /* * General BPF functions. */ + +/* + * Fills in L2 header length and AF based on bpf DLT. + */ static int -bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp, - struct sockaddr *sockp, int *hdrlen, struct bpf_d *d) +bpf_fill_hdrtype(int linktype, struct sockaddr *sockp, int *hdrlen) { - const struct ieee80211_bpf_params *p; - struct ether_header *eh; - struct mbuf *m; - int error; - int len; int hlen; - int slen; /* * Build a sockaddr based on the data link layer type. @@ -546,30 +546,79 @@ return (EIO); } + *hdrlen = hlen; + return (0); +} + +/* + * Copies frame from the userspace to the newly-created mbuf. + * Returns 0 and save mbuf into @mp on success. + */ +static int +bpf_movein(struct uio *uio, struct mbuf **mp) +{ + int error, len; + struct mbuf *m; + len = uio->uio_resid; - if (len < hlen || len - hlen > ifp->if_mtu) + if (len > MJUMPAGESIZE) return (EMSGSIZE); m = m_get2(len, M_WAITOK, MT_DATA, M_PKTHDR); if (m == NULL) return (EIO); m->m_pkthdr.len = m->m_len = len; - *mp = m; error = uiomove(mtod(m, u_char *), len, uio); - if (error) - goto bad; - - slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len); - if (slen == 0) { - error = EPERM; - goto bad; + if (error != 0) { + m_freem(m); + return (error); } - /* Check for multicast destination */ + *mp = m; + return (0); +} + +/* + * Prepares mbuf for sending by finalizing link-layer header, + * filling in mbuf flags and performing sanity checks. + */ +static int +bpf_prepare_mbuf_locked(struct bpf_d *d, struct mbuf *m, struct sockaddr *dst, + int *phlen, struct mbuf **pmc) +{ + const struct ieee80211_bpf_params *p; + struct ether_header *eh; + struct ifnet *ifp; + struct mbuf *mc; + int error, hlen, len, linktype; + + BPFD_LOCK_ASSERT(d); + + if (d->bd_bif == NULL) + return (ENXIO); + + ifp = d->bd_bif->bif_ifp; + if ((ifp->if_flags & IFF_UP) == 0) + return (ENETDOWN); + + linktype = d->bd_bif->bif_dlt; + error = bpf_fill_hdrtype(linktype, dst, &hlen); + if (error != 0) + return (error); + + len = m->m_len; + if (len < hlen || len - hlen > ifp->if_mtu) + return (EMSGSIZE); + + if (bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len) == 0) + return (EPERM); + counter_u64_add(d->bd_wfcount, 1); + switch (linktype) { case DLT_EN10MB: eh = mtod(m, struct ether_header *); + /* Set mbuf flags for broadcast/multicast destinations */ if (ETHER_IS_MULTICAST(eh->ether_dhost)) { if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost, ETHER_ADDR_LEN) == 0) @@ -588,7 +637,7 @@ * Make room for link header, and copy it to sockaddr */ if (hlen != 0) { - if (sockp->sa_family == AF_IEEE80211) { + if (dst->sa_family == AF_IEEE80211) { /* * Collect true length from the parameter header * NB: sockp is known to be zero'd so if we do a @@ -600,19 +649,40 @@ */ p = mtod(m, const struct ieee80211_bpf_params *); hlen = p->ibp_len; - if (hlen > sizeof(sockp->sa_data)) { - error = EINVAL; - goto bad; - } + if (hlen > sizeof(dst->sa_data)) + return (EINVAL); } - bcopy(mtod(m, const void *), sockp->sa_data, hlen); + bcopy(mtod(m, const void *), dst->sa_data, hlen); } - *hdrlen = hlen; + if (d->bd_hdrcmplt) + dst->sa_family = pseudo_AF_HDRCMPLT; + + if (d->bd_feedback) { + mc = m_dup(m, M_NOWAIT); + if (mc != NULL) + mc->m_pkthdr.rcvif = ifp; + /* Set M_PROMISC for outgoing packets to be discarded. */ + if (d->bd_direction == BPF_D_INOUT) + m->m_flags |= M_PROMISC; + } else + mc = NULL; + + m->m_pkthdr.len -= hlen; + m->m_len -= hlen; + m->m_data += hlen; /* XXX */ + +#ifdef MAC + CURVNET_SET(ifp->if_vnet); + mac_bpfdesc_create_mbuf(d, m); + if (mc != NULL) + mac_bpfdesc_create_mbuf(d, mc); + CURVNET_RESTORE(); +#endif + *phlen = hlen; + *pmc = mc; + return (0); -bad: - m_freem(m); - return (error); } /* @@ -912,9 +982,9 @@ mac_bpfdesc_init(d); mac_bpfdesc_create(td->td_ucred, d); #endif - mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF); - callout_init_mtx(&d->bd_callout, &d->bd_lock, 0); - knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock); + rw_init(&d->bd_lock, "bpf cdev lock"); + callout_init_rw(&d->bd_callout, &d->bd_lock, 0); + knlist_init_rw_reader(&d->bd_sel.si_note, &d->bd_lock); return (0); } @@ -998,7 +1068,7 @@ BPFD_UNLOCK(d); return (EWOULDBLOCK); } - error = msleep(d, &d->bd_lock, PRINET|PCATCH, + error = rw_sleep(d, &d->bd_lock, PRINET|PCATCH, "bpf", d->bd_rtout); if (error == EINTR || error == ERESTART) { BPFD_UNLOCK(d); @@ -1109,11 +1179,8 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag) { struct bpf_d *d; - struct ifnet *ifp; - struct mbuf *m, *mc; - struct sockaddr dst; - struct route ro; - int error, hlen; + struct mbuf *m; + int error; error = devfs_get_cdevpriv((void **)&d); if (error != 0) @@ -1121,61 +1188,53 @@ BPF_PID_REFRESH_CUR(d); counter_u64_add(d->bd_wcount, 1); - /* XXX: locking required */ - if (d->bd_bif == NULL) { - counter_u64_add(d->bd_wdcount, 1); - return (ENXIO); - } - ifp = d->bd_bif->bif_ifp; - - if ((ifp->if_flags & IFF_UP) == 0) { - counter_u64_add(d->bd_wdcount, 1); - return (ENETDOWN); - } - if (uio->uio_resid == 0) { counter_u64_add(d->bd_wdcount, 1); return (0); } - bzero(&dst, sizeof(dst)); m = NULL; - hlen = 0; - /* XXX: bpf_movein() can sleep */ - error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp, - &m, &dst, &hlen, d); - if (error) { + error = bpf_movein(uio, &m); + if (error != 0) { counter_u64_add(d->bd_wdcount, 1); return (error); } - counter_u64_add(d->bd_wfcount, 1); - if (d->bd_hdrcmplt) - dst.sa_family = pseudo_AF_HDRCMPLT; - if (d->bd_feedback) { - mc = m_dup(m, M_NOWAIT); - if (mc != NULL) - mc->m_pkthdr.rcvif = ifp; - /* Set M_PROMISC for outgoing packets to be discarded. */ - if (d->bd_direction == BPF_D_INOUT) - m->m_flags |= M_PROMISC; - } else - mc = NULL; + error = bpf_sendmbuf(d, m); + return (error); +} - m->m_pkthdr.len -= hlen; - m->m_len -= hlen; - m->m_data += hlen; /* XXX */ +/* + * Sends mbuf @m to the interface specified by @d. + * Frees @m in case of error. + * Returns 0 on success. + */ +static int +bpf_sendmbuf(struct bpf_d *d, struct mbuf *m) +{ + struct route ro; + struct sockaddr dst; + struct mbuf *mc; + struct ifnet *ifp; + int error, hlen; - CURVNET_SET(ifp->if_vnet); -#ifdef MAC - BPFD_LOCK(d); - mac_bpfdesc_create_mbuf(d, m); - if (mc != NULL) - mac_bpfdesc_create_mbuf(d, mc); - BPFD_UNLOCK(d); -#endif + ifp = NULL; + bzero(&dst, sizeof(dst)); + hlen = 0; + mc = NULL; + BPFD_RLOCK(d); + error = bpf_prepare_mbuf_locked(d, m, &dst, &hlen, &mc); + if (error == 0) + ifp = d->bd_bif->bif_ifp; + BPFD_RUNLOCK(d); + if (error != 0) { + counter_u64_add(d->bd_wdcount, 1); + m_freem(m); + return (error); + } + bzero(&ro, sizeof(ro)); if (hlen != 0) { ro.ro_prepend = (u_char *)&dst.sa_data; @@ -1183,8 +1242,9 @@ ro.ro_flags = RT_HAS_HEADER; } + CURVNET_SET(ifp->if_vnet); error = (*ifp->if_output)(ifp, m, &dst, &ro); - if (error) + if (error != 0) counter_u64_add(d->bd_wdcount, 1); if (mc != NULL) { @@ -1356,7 +1416,7 @@ BPFD_LOCK(d); n = d->bd_slen; while (d->bd_hbuf_in_use) - mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, + rw_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET, "bd_hbuf", 0); if (d->bd_hbuf) n += d->bd_hlen; @@ -1513,13 +1573,15 @@ * by the time we get here. */ alloc_buf = 0; + size = 0; BPFD_LOCK(d); if (d->bd_bufmode == BPF_BUFMODE_BUFFER && - d->bd_sbuf == NULL) + d->bd_sbuf == NULL) { alloc_buf = 1; + size = d->bd_bufsize; + } BPFD_UNLOCK(d); if (alloc_buf) { - size = d->bd_bufsize; error = bpf_buffer_ioctl_sblen(d, &size); if (error != 0) break; @@ -2542,7 +2604,7 @@ } if (d->bd_wfilter != NULL) free((caddr_t)d->bd_wfilter, M_BPF); - mtx_destroy(&d->bd_lock); + rw_destroy(&d->bd_lock); counter_u64_free(d->bd_rcount); counter_u64_free(d->bd_dcount); Index: sys/net/bpf_buffer.c =================================================================== --- sys/net/bpf_buffer.c +++ sys/net/bpf_buffer.c @@ -71,6 +71,7 @@ #include #include #include +#include #include #include #include Index: sys/net/bpf_zerocopy.c =================================================================== --- sys/net/bpf_zerocopy.c +++ sys/net/bpf_zerocopy.c @@ -36,6 +36,7 @@ #include #include +#include #include #include #include Index: sys/net/bpfdesc.h =================================================================== --- sys/net/bpfdesc.h +++ sys/net/bpfdesc.h @@ -92,7 +92,7 @@ int bd_sig; /* signal to send upon packet reception */ struct sigio * bd_sigio; /* information for async I/O */ struct selinfo bd_sel; /* bsd select info */ - struct mtx bd_lock; /* per-descriptor lock */ + struct rwlock bd_lock; /* per-descriptor lock */ struct callout bd_callout; /* for BPF timeouts with select */ struct label *bd_label; /* MAC label for descriptor */ counter_u64_t bd_fcount; /* number of packets which matched filter */ @@ -111,9 +111,15 @@ #define BPF_WAITING 1 /* waiting for read timeout in select */ #define BPF_TIMED_OUT 2 /* read timeout has expired in select */ -#define BPFD_LOCK(bd) mtx_lock(&(bd)->bd_lock) -#define BPFD_UNLOCK(bd) mtx_unlock(&(bd)->bd_lock) -#define BPFD_LOCK_ASSERT(bd) mtx_assert(&(bd)->bd_lock, MA_OWNED) +#define BPFD_LOCK(bd) BPFD_WLOCK(bd) +#define BPFD_UNLOCK(bd) BPFD_WUNLOCK(bd) +#define BPFD_RLOCK(bd) rw_rlock(&(bd)->bd_lock) +#define BPFD_RUNLOCK(bd) rw_runlock(&(bd)->bd_lock) +#define BPFD_WLOCK(bd) rw_wlock(&(bd)->bd_lock) +#define BPFD_WUNLOCK(bd) rw_wunlock(&(bd)->bd_lock) +#define BPFD_LOCK_ASSERT(bd) rw_assert(&(bd)->bd_lock, RA_LOCKED) +#define BPFD_RLOCK_ASSERT(bd) rw_assert(&(bd)->bd_lock, RA_RLOCKED) +#define BPFD_WLOCK_ASSERT(bd) rw_assert(&(bd)->bd_lock, RA_WLOCKED) #define BPF_PID_REFRESH(bd, td) (bd)->bd_pid = (td)->td_proc->p_pid #define BPF_PID_REFRESH_CUR(bd) (bd)->bd_pid = curthread->td_proc->p_pid