Page MenuHomeFreeBSD

D20224.diff
No OneTemporary

D20224.diff

Index: head/sys/net/bpf.h
===================================================================
--- head/sys/net/bpf.h
+++ head/sys/net/bpf.h
@@ -42,6 +42,9 @@
#ifndef _NET_BPF_H_
#define _NET_BPF_H_
+#include <sys/ck.h>
+#include <net/dlt.h>
+
/* BSD style release date */
#define BPF_RELEASE 199606
@@ -233,9 +236,6 @@
u_int _bzh_pad[5];
};
-/* Pull in data-link level type codes. */
-#include <net/dlt.h>
-
/*
* The instruction encodings.
*
@@ -409,10 +409,11 @@
* bpf_peers_present() calls.
*/
struct bpf_if;
+CK_LIST_HEAD(bpfd_list, bpf_d);
struct bpf_if_ext {
- LIST_ENTRY(bpf_if) bif_next; /* list of all interfaces */
- LIST_HEAD(, bpf_d) bif_dlist; /* descriptor list */
+ CK_LIST_ENTRY(bpf_if) bif_next; /* list of all interfaces */
+ struct bpfd_list bif_dlist; /* descriptor list */
};
void bpf_bufheld(struct bpf_d *d);
@@ -436,7 +437,7 @@
struct bpf_if_ext *ext;
ext = (struct bpf_if_ext *)bpf;
- if (!LIST_EMPTY(&ext->bif_dlist))
+ if (!CK_LIST_EMPTY(&ext->bif_dlist))
return (1);
return (0);
}
Index: head/sys/net/bpf.c
===================================================================
--- head/sys/net/bpf.c
+++ head/sys/net/bpf.c
@@ -3,6 +3,7 @@
*
* Copyright (c) 1990, 1991, 1993
* The Regents of the University of California. All rights reserved.
+ * Copyright (c) 2019 Andrey V. Elsukov <ae@FreeBSD.org>
*
* This code is derived from the Stanford/CMU enet packet filter,
* (net/enet.c) distributed as part of 4.3BSD, and code contributed
@@ -46,7 +47,6 @@
#include <sys/types.h>
#include <sys/param.h>
#include <sys/lock.h>
-#include <sys/rwlock.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/fcntl.h>
@@ -99,7 +99,7 @@
MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
static struct bpf_if_ext dead_bpf_if = {
- .bif_dlist = LIST_HEAD_INITIALIZER()
+ .bif_dlist = CK_LIST_HEAD_INITIALIZER()
};
struct bpf_if {
@@ -108,19 +108,22 @@
struct bpf_if_ext bif_ext; /* public members */
u_int bif_dlt; /* link layer type */
u_int bif_hdrlen; /* length of link header */
+ struct bpfd_list bif_wlist; /* writer-only list */
struct ifnet *bif_ifp; /* corresponding interface */
- struct rwlock bif_lock; /* interface lock */
- LIST_HEAD(, bpf_d) bif_wlist; /* writer-only list */
- int bif_flags; /* Interface flags */
struct bpf_if **bif_bpf; /* Pointer to pointer to us */
+ volatile u_int bif_refcnt;
+ struct epoch_context epoch_ctx;
};
CTASSERT(offsetof(struct bpf_if, bif_ext) == 0);
-#define BPFIF_RLOCK(bif) rw_rlock(&(bif)->bif_lock)
-#define BPFIF_RUNLOCK(bif) rw_runlock(&(bif)->bif_lock)
-#define BPFIF_WLOCK(bif) rw_wlock(&(bif)->bif_lock)
-#define BPFIF_WUNLOCK(bif) rw_wunlock(&(bif)->bif_lock)
+struct bpf_program_buffer {
+ struct epoch_context epoch_ctx;
+#ifdef BPF_JITTER
+ bpf_jit_filter *func;
+#endif
+ void *buffer[0];
+};
#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
@@ -173,18 +176,24 @@
#define BPF_LOCK_ASSERT() sx_assert(&bpf_sx, SA_XLOCKED)
/*
* bpf_iflist is a list of BPF interface structures, each corresponding to a
- * specific DLT. The same network interface might have several BPF interface
+ * specific DLT. The same network interface might have several BPF interface
* structures registered by different layers in the stack (i.e., 802.11
* frames, ethernet frames, etc).
*/
-static LIST_HEAD(, bpf_if) bpf_iflist, bpf_freelist;
+CK_LIST_HEAD(bpf_iflist, bpf_if);
+static struct bpf_iflist bpf_iflist;
static struct sx bpf_sx; /* bpf global lock */
static int bpf_bpfd_cnt;
+static void bpfif_ref(struct bpf_if *);
+static void bpfif_rele(struct bpf_if *);
+
+static void bpfd_ref(struct bpf_d *);
+static void bpfd_rele(struct bpf_d *);
static void bpf_attachd(struct bpf_d *, struct bpf_if *);
static void bpf_detachd(struct bpf_d *);
-static void bpf_detachd_locked(struct bpf_d *);
-static void bpf_freed(struct bpf_d *);
+static void bpf_detachd_locked(struct bpf_d *, bool);
+static void bpfd_free(epoch_context_t);
static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
struct sockaddr *, int *, struct bpf_d *);
static int bpf_setif(struct bpf_d *, struct ifreq *);
@@ -243,37 +252,106 @@
.f_event = filt_bpfread,
};
-eventhandler_tag bpf_ifdetach_cookie = NULL;
-
/*
- * LOCKING MODEL USED BY BPF:
+ * LOCKING MODEL USED BY BPF
+ *
* Locks:
- * 1) global lock (BPF_LOCK). Mutex, used to protect interface addition/removal,
- * some global counters and every bpf_if reference.
- * 2) Interface lock. Rwlock, used to protect list of BPF descriptors and their filters.
- * 3) Descriptor lock. Mutex, used to protect BPF buffers and various structure fields
- * used by bpf_mtap code.
+ * 1) global lock (BPF_LOCK). Sx, used to protect some global counters,
+ * every bpf_iflist changes, serializes ioctl access to bpf descriptors.
+ * 2) Descriptor lock. Mutex, used to protect BPF buffers and various
+ * structure fields used by bpf_*tap* code.
*
- * Lock order:
+ * Lock order: global lock, then descriptor lock.
*
- * Global lock, interface lock, descriptor lock
+ * There are several possible consumers:
*
- * We have to acquire interface lock before descriptor main lock due to BPF_MTAP[2]
- * working model. In many places (like bpf_detachd) we start with BPF descriptor
- * (and we need to at least rlock it to get reliable interface pointer). This
- * gives us potential LOR. As a result, we use global lock to protect from bpf_if
- * change in every such place.
+ * 1. The kernel registers interface pointer with bpfattach().
+ * Each call allocates new bpf_if structure, references ifnet pointer
+ * and links bpf_if into bpf_iflist chain. This is protected with global
+ * lock.
*
- * Changing d->bd_bif is protected by 1) global lock, 2) interface lock and
- * 3) descriptor main wlock.
- * Reading bd_bif can be protected by any of these locks, typically global lock.
+ * 2. An userland application uses ioctl() call to bpf_d descriptor.
+ * All such call are serialized with global lock. BPF filters can be
+ * changed, but pointer to old filter will be freed using epoch_call().
+ * Thus it should be safe for bpf_tap/bpf_mtap* code to do access to
+ * filter pointers, even if change will happen during bpf_tap execution.
+ * Destroying of bpf_d descriptor also is doing using epoch_call().
*
- * Changing read/write BPF filter is protected by the same three locks,
- * the same applies for reading.
+ * 3. An userland application can write packets into bpf_d descriptor.
+ * There we need to be sure, that ifnet won't disappear during bpfwrite().
*
- * Sleeping in global lock is not allowed due to bpfdetach() using it.
+ * 4. The kernel invokes bpf_tap/bpf_mtap* functions. The access to
+ * bif_dlist is protected with net_epoch_preempt section. So, it should
+ * be safe to make access to bpf_d descriptor inside the section.
+ *
+ * 5. The kernel invokes bpfdetach() on interface destroying. All lists
+ * are modified with global lock held and actual free() is done using
+ * epoch_call().
*/
+static void
+bpfif_free(epoch_context_t ctx)
+{
+ struct bpf_if *bp;
+
+ bp = __containerof(ctx, struct bpf_if, epoch_ctx);
+ if_rele(bp->bif_ifp);
+ free(bp, M_BPF);
+}
+
+static void
+bpfif_ref(struct bpf_if *bp)
+{
+
+ refcount_acquire(&bp->bif_refcnt);
+}
+
+static void
+bpfif_rele(struct bpf_if *bp)
+{
+
+ if (!refcount_release(&bp->bif_refcnt))
+ return;
+ epoch_call(net_epoch_preempt, &bp->epoch_ctx, bpfif_free);
+}
+
+static void
+bpfd_ref(struct bpf_d *d)
+{
+
+ refcount_acquire(&d->bd_refcnt);
+}
+
+static void
+bpfd_rele(struct bpf_d *d)
+{
+
+ if (!refcount_release(&d->bd_refcnt))
+ return;
+ epoch_call(net_epoch_preempt, &d->epoch_ctx, bpfd_free);
+}
+
+static struct bpf_program_buffer*
+bpf_program_buffer_alloc(size_t size, int flags)
+{
+
+ return (malloc(sizeof(struct bpf_program_buffer) + size,
+ M_BPF, flags));
+}
+
+static void
+bpf_program_buffer_free(epoch_context_t ctx)
+{
+ struct bpf_program_buffer *ptr;
+
+ ptr = __containerof(ctx, struct bpf_program_buffer, epoch_ctx);
+#ifdef BPF_JITTER
+ if (ptr->func != NULL)
+ bpf_destroy_jit_filter(ptr->func);
+#endif
+ free(ptr, M_BPF);
+}
+
/*
* Wrapper functions for various buffering methods. If the set of buffer
* modes expands, we will probably want to introduce a switch data structure
@@ -627,7 +705,8 @@
}
/*
- * Attach file to the bpf interface, i.e. make d listen on bp.
+ * Attach descriptor to the bpf interface, i.e. make d listen on bp,
+ * then reset its buffers and counters with reset_d().
*/
static void
bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
@@ -643,7 +722,7 @@
op_w = V_bpf_optimize_writers || d->bd_writer;
if (d->bd_bif != NULL)
- bpf_detachd_locked(d);
+ bpf_detachd_locked(d, false);
/*
* Point d at bp, and add d to the interface's list.
* Since there are many applications using BPF for
@@ -652,26 +731,27 @@
* some filter is configured.
*/
- BPFIF_WLOCK(bp);
BPFD_LOCK(d);
-
+ /*
+ * Hold reference to bpif while descriptor uses this interface.
+ */
+ bpfif_ref(bp);
d->bd_bif = bp;
-
if (op_w != 0) {
/* Add to writers-only list */
- LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
+ CK_LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
/*
* We decrement bd_writer on every filter set operation.
* First BIOCSETF is done by pcap_open_live() to set up
- * snap length. After that appliation usually sets its own filter
+ * snap length. After that appliation usually sets its own
+ * filter.
*/
d->bd_writer = 2;
} else
- LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+ CK_LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
+ reset_d(d);
BPFD_UNLOCK(d);
- BPFIF_WUNLOCK(bp);
-
bpf_bpfd_cnt++;
CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
@@ -685,7 +765,8 @@
* Check if we need to upgrade our descriptor @d from write-only mode.
*/
static int
-bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode, int flen)
+bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode,
+ int flen)
{
int is_snap, need_upgrade;
@@ -705,7 +786,8 @@
* we'd prefer to treat k=0 (deny ALL) case the same way: e.g.
* do not consider upgrading immediately
*/
- if (cmd == BIOCSETF && flen == 1 && fcode[0].code == (BPF_RET | BPF_K))
+ if (cmd == BIOCSETF && flen == 1 &&
+ fcode[0].code == (BPF_RET | BPF_K))
is_snap = 1;
else
is_snap = 0;
@@ -743,88 +825,45 @@
}
/*
- * Add d to the list of active bp filters.
- * Requires bpf_attachd() to be called before.
- */
-static void
-bpf_upgraded(struct bpf_d *d)
-{
- struct bpf_if *bp;
-
- BPF_LOCK_ASSERT();
-
- bp = d->bd_bif;
-
- /*
- * Filter can be set several times without specifying interface.
- * Mark d as reader and exit.
- */
- if (bp == NULL) {
- BPFD_LOCK(d);
- d->bd_writer = 0;
- BPFD_UNLOCK(d);
- return;
- }
-
- BPFIF_WLOCK(bp);
- BPFD_LOCK(d);
-
- /* Remove from writers-only list */
- LIST_REMOVE(d, bd_next);
- LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
- /* Mark d as reader */
- d->bd_writer = 0;
-
- BPFD_UNLOCK(d);
- BPFIF_WUNLOCK(bp);
-
- CTR2(KTR_NET, "%s: upgrade required by pid %d", __func__, d->bd_pid);
-
- EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
-}
-
-/*
* Detach a file from its interface.
*/
static void
bpf_detachd(struct bpf_d *d)
{
BPF_LOCK();
- bpf_detachd_locked(d);
+ bpf_detachd_locked(d, false);
BPF_UNLOCK();
}
static void
-bpf_detachd_locked(struct bpf_d *d)
+bpf_detachd_locked(struct bpf_d *d, bool detached_ifp)
{
- int error;
struct bpf_if *bp;
struct ifnet *ifp;
+ int error;
+ BPF_LOCK_ASSERT();
CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
- BPF_LOCK_ASSERT();
-
/* Check if descriptor is attached */
if ((bp = d->bd_bif) == NULL)
return;
- BPFIF_WLOCK(bp);
BPFD_LOCK(d);
-
+ /* Remove d from the interface's descriptor list. */
+ CK_LIST_REMOVE(d, bd_next);
/* Save bd_writer value */
error = d->bd_writer;
-
- /*
- * Remove d from the interface's descriptor list.
- */
- LIST_REMOVE(d, bd_next);
-
ifp = bp->bif_ifp;
d->bd_bif = NULL;
+ if (detached_ifp) {
+ /*
+ * Notify descriptor as it's detached, so that any
+ * sleepers wake up and get ENXIO.
+ */
+ bpf_wakeup(d);
+ }
BPFD_UNLOCK(d);
- BPFIF_WUNLOCK(bp);
-
bpf_bpfd_cnt--;
/* Call event handler iff d is attached */
@@ -833,9 +872,9 @@
/*
* Check if this descriptor had requested promiscuous mode.
- * If so, turn it off.
+ * If so and ifnet is not detached, turn it off.
*/
- if (d->bd_promisc) {
+ if (d->bd_promisc && !detached_ifp) {
d->bd_promisc = 0;
CURVNET_SET(ifp->if_vnet);
error = ifpromisc(ifp, 0);
@@ -851,6 +890,7 @@
"bpf_detach: ifpromisc failed (%d)\n", error);
}
}
+ bpfif_rele(bp);
}
/*
@@ -875,8 +915,7 @@
seldrain(&d->bd_sel);
knlist_destroy(&d->bd_sel.si_note);
callout_drain(&d->bd_callout);
- bpf_freed(d);
- free(d, M_BPF);
+ bpfd_rele(d);
}
/*
@@ -918,6 +957,7 @@
d->bd_bufmode = BPF_BUFMODE_BUFFER;
d->bd_sig = SIGIO;
d->bd_direction = BPF_D_INOUT;
+ d->bd_refcnt = 1;
BPF_PID_REFRESH(d, td);
#ifdef MAC
mac_bpfdesc_init(d);
@@ -1093,7 +1133,8 @@
BPFD_LOCK_ASSERT(d);
- if (callout_pending(&d->bd_callout) || !callout_active(&d->bd_callout))
+ if (callout_pending(&d->bd_callout) ||
+ !callout_active(&d->bd_callout))
return;
if (d->bd_state == BPF_WAITING) {
d->bd_state = BPF_TIMED_OUT;
@@ -1119,47 +1160,71 @@
static int
bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
{
+ struct route ro;
+ struct sockaddr dst;
+ struct epoch_tracker et;
+ struct bpf_if *bp;
struct bpf_d *d;
struct ifnet *ifp;
struct mbuf *m, *mc;
- struct sockaddr dst;
- struct route ro;
int error, hlen;
error = devfs_get_cdevpriv((void **)&d);
if (error != 0)
return (error);
+ NET_EPOCH_ENTER(et);
+ BPFD_LOCK(d);
BPF_PID_REFRESH_CUR(d);
counter_u64_add(d->bd_wcount, 1);
- /* XXX: locking required */
- if (d->bd_bif == NULL) {
- counter_u64_add(d->bd_wdcount, 1);
- return (ENXIO);
+ if ((bp = d->bd_bif) == NULL) {
+ error = ENXIO;
+ goto out_locked;
}
- ifp = d->bd_bif->bif_ifp;
-
+ ifp = bp->bif_ifp;
if ((ifp->if_flags & IFF_UP) == 0) {
- counter_u64_add(d->bd_wdcount, 1);
- return (ENETDOWN);
+ error = ENETDOWN;
+ goto out_locked;
}
- if (uio->uio_resid == 0) {
- counter_u64_add(d->bd_wdcount, 1);
- return (0);
- }
+ if (uio->uio_resid == 0)
+ goto out_locked;
bzero(&dst, sizeof(dst));
m = NULL;
hlen = 0;
- /* XXX: bpf_movein() can sleep */
- error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
+
+ /*
+ * Take extra reference, unlock d and exit from epoch section,
+ * since bpf_movein() can sleep.
+ */
+ bpfd_ref(d);
+ NET_EPOCH_EXIT(et);
+ BPFD_UNLOCK(d);
+
+ error = bpf_movein(uio, (int)bp->bif_dlt, ifp,
&m, &dst, &hlen, d);
- if (error) {
+
+ if (error != 0) {
counter_u64_add(d->bd_wdcount, 1);
+ bpfd_rele(d);
return (error);
}
+
+ BPFD_LOCK(d);
+ /*
+ * Check that descriptor is still attached to the interface.
+ * This can happen on bpfdetach(). To avoid access to detached
+ * ifnet, free mbuf and return ENXIO.
+ */
+ if (d->bd_bif == NULL) {
+ counter_u64_add(d->bd_wdcount, 1);
+ BPFD_UNLOCK(d);
+ bpfd_rele(d);
+ m_freem(m);
+ return (ENXIO);
+ }
counter_u64_add(d->bd_wfcount, 1);
if (d->bd_hdrcmplt)
dst.sa_family = pseudo_AF_HDRCMPLT;
@@ -1180,11 +1245,9 @@
CURVNET_SET(ifp->if_vnet);
#ifdef MAC
- BPFD_LOCK(d);
mac_bpfdesc_create_mbuf(d, m);
if (mc != NULL)
mac_bpfdesc_create_mbuf(d, mc);
- BPFD_UNLOCK(d);
#endif
bzero(&ro, sizeof(ro));
@@ -1205,7 +1268,14 @@
m_freem(mc);
}
CURVNET_RESTORE();
+ BPFD_UNLOCK(d);
+ bpfd_rele(d);
+ return (error);
+out_locked:
+ counter_u64_add(d->bd_wdcount, 1);
+ NET_EPOCH_EXIT(et);
+ BPFD_UNLOCK(d);
return (error);
}
@@ -1830,16 +1900,11 @@
}
/*
- * Set d's packet filter program to fp. If this file already has a filter,
- * free it and replace it. Returns EINVAL for bogus requests.
+ * Set d's packet filter program to fp. If this file already has a filter,
+ * free it and replace it. Returns EINVAL for bogus requests.
*
- * Note we need global lock here to serialize bpf_setf() and bpf_setif() calls
- * since reading d->bd_bif can't be protected by d or interface lock due to
- * lock order.
- *
- * Additionally, we have to acquire interface write lock due to bpf_mtap() uses
- * interface read lock to read all filers.
- *
+ * Note we use global lock here to serialize bpf_setf() and bpf_setif()
+ * calls.
*/
static int
bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
@@ -1848,13 +1913,14 @@
struct bpf_program fp_swab;
struct bpf_program32 *fp32;
#endif
- struct bpf_insn *fcode, *old;
+ struct bpf_program_buffer *fcode;
+ struct bpf_insn *filter;
#ifdef BPF_JITTER
- bpf_jit_filter *jfunc, *ofunc;
+ bpf_jit_filter *jfunc;
#endif
size_t size;
u_int flen;
- int need_upgrade;
+ bool track_event;
#ifdef COMPAT_FREEBSD32
switch (cmd) {
@@ -1863,7 +1929,8 @@
case BIOCSETFNR32:
fp32 = (struct bpf_program32 *)fp;
fp_swab.bf_len = fp32->bf_len;
- fp_swab.bf_insns = (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
+ fp_swab.bf_insns =
+ (struct bpf_insn *)(uintptr_t)fp32->bf_insns;
fp = &fp_swab;
switch (cmd) {
case BIOCSETF32:
@@ -1877,12 +1944,10 @@
}
#endif
- fcode = NULL;
+ filter = NULL;
#ifdef BPF_JITTER
- jfunc = ofunc = NULL;
+ jfunc = NULL;
#endif
- need_upgrade = 0;
-
/*
* Check new filter validness before acquiring any locks.
* Allocate memory for new filter, if needed.
@@ -1892,10 +1957,11 @@
return (EINVAL);
size = flen * sizeof(*fp->bf_insns);
if (size > 0) {
- /* We're setting up new filter. Copy and check actual data. */
- fcode = malloc(size, M_BPF, M_WAITOK);
- if (copyin(fp->bf_insns, fcode, size) != 0 ||
- !bpf_validate(fcode, flen)) {
+ /* We're setting up new filter. Copy and check actual data. */
+ fcode = bpf_program_buffer_alloc(size, M_WAITOK);
+ filter = (struct bpf_insn *)fcode->buffer;
+ if (copyin(fp->bf_insns, filter, size) != 0 ||
+ !bpf_validate(filter, flen)) {
free(fcode, M_BPF);
return (EINVAL);
}
@@ -1905,50 +1971,73 @@
* Filter is copied inside fcode and is
* perfectly valid.
*/
- jfunc = bpf_jitter(fcode, flen);
+ jfunc = bpf_jitter(filter, flen);
}
#endif
}
- BPF_LOCK();
+ track_event = false;
+ fcode = NULL;
- /*
- * Set up new filter.
- * Protect filter change by interface lock.
- * Additionally, we are protected by global lock here.
- */
- if (d->bd_bif != NULL)
- BPFIF_WLOCK(d->bd_bif);
+ BPF_LOCK();
BPFD_LOCK(d);
+ /* Set up new filter. */
if (cmd == BIOCSETWF) {
- old = d->bd_wfilter;
- d->bd_wfilter = fcode;
+ if (d->bd_wfilter != NULL) {
+ fcode = __containerof((void *)d->bd_wfilter,
+ struct bpf_program_buffer, buffer);
+#ifdef BPF_JITTER
+ fcode->func = NULL;
+#endif
+ }
+ d->bd_wfilter = filter;
} else {
- old = d->bd_rfilter;
- d->bd_rfilter = fcode;
+ if (d->bd_rfilter != NULL) {
+ fcode = __containerof((void *)d->bd_rfilter,
+ struct bpf_program_buffer, buffer);
#ifdef BPF_JITTER
- ofunc = d->bd_bfilter;
+ fcode->func = d->bd_bfilter;
+#endif
+ }
+ d->bd_rfilter = filter;
+#ifdef BPF_JITTER
d->bd_bfilter = jfunc;
#endif
if (cmd == BIOCSETF)
reset_d(d);
- need_upgrade = bpf_check_upgrade(cmd, d, fcode, flen);
+ if (bpf_check_upgrade(cmd, d, filter, flen) != 0) {
+ /*
+ * Filter can be set several times without
+ * specifying interface. In this case just mark d
+ * as reader.
+ */
+ d->bd_writer = 0;
+ if (d->bd_bif != NULL) {
+ /*
+ * Remove descriptor from writers-only list
+ * and add it to active readers list.
+ */
+ CK_LIST_REMOVE(d, bd_next);
+ CK_LIST_INSERT_HEAD(&d->bd_bif->bif_dlist,
+ d, bd_next);
+ CTR2(KTR_NET,
+ "%s: upgrade required by pid %d",
+ __func__, d->bd_pid);
+ track_event = true;
+ }
+ }
}
BPFD_UNLOCK(d);
- if (d->bd_bif != NULL)
- BPFIF_WUNLOCK(d->bd_bif);
- if (old != NULL)
- free(old, M_BPF);
-#ifdef BPF_JITTER
- if (ofunc != NULL)
- bpf_destroy_jit_filter(ofunc);
-#endif
- /* Move d to active readers list. */
- if (need_upgrade != 0)
- bpf_upgraded(d);
+ if (fcode != NULL)
+ epoch_call(net_epoch_preempt, &fcode->epoch_ctx,
+ bpf_program_buffer_free);
+ if (track_event)
+ EVENTHANDLER_INVOKE(bpf_track,
+ d->bd_bif->bif_ifp, d->bd_bif->bif_dlt, 1);
+
BPF_UNLOCK();
return (0);
}
@@ -1971,15 +2060,6 @@
return (ENXIO);
bp = theywant->if_bpf;
-
- /* Check if interface is not being detached from BPF */
- BPFIF_RLOCK(bp);
- if (bp->bif_flags & BPFIF_FLAG_DYING) {
- BPFIF_RUNLOCK(bp);
- return (ENXIO);
- }
- BPFIF_RUNLOCK(bp);
-
/*
* At this point, we expect the buffer is already allocated. If not,
* return an error.
@@ -1996,9 +2076,11 @@
}
if (bp != d->bd_bif)
bpf_attachd(d, bp);
- BPFD_LOCK(d);
- reset_d(d);
- BPFD_UNLOCK(d);
+ else {
+ BPFD_LOCK(d);
+ reset_d(d);
+ BPFD_UNLOCK(d);
+ }
return (0);
}
@@ -2150,6 +2232,7 @@
void
bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
{
+ struct epoch_tracker et;
struct bintime bt;
struct bpf_d *d;
#ifdef BPF_JITTER
@@ -2159,24 +2242,14 @@
int gottime;
gottime = BPF_TSTAMP_NONE;
-
- BPFIF_RLOCK(bp);
-
- LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
- /*
- * We are not using any locks for d here because:
- * 1) any filter change is protected by interface
- * write lock
- * 2) destroying/detaching d is protected by interface
- * write lock, too
- */
-
+ NET_EPOCH_ENTER(et);
+ CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
counter_u64_add(d->bd_rcount, 1);
/*
- * NB: We dont call BPF_CHECK_DIRECTION() here since there is no
- * way for the caller to indiciate to us whether this packet
- * is inbound or outbound. In the bpf_mtap() routines, we use
- * the interface pointers on the mbuf to figure it out.
+ * NB: We dont call BPF_CHECK_DIRECTION() here since there
+ * is no way for the caller to indiciate to us whether this
+ * packet is inbound or outbound. In the bpf_mtap() routines,
+ * we use the interface pointers on the mbuf to figure it out.
*/
#ifdef BPF_JITTER
bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
@@ -2190,10 +2263,10 @@
* Filter matches. Let's to acquire write lock.
*/
BPFD_LOCK(d);
-
counter_u64_add(d->bd_fcount, 1);
if (gottime < bpf_ts_quality(d->bd_tstamp))
- gottime = bpf_gettime(&bt, d->bd_tstamp, NULL);
+ gottime = bpf_gettime(&bt, d->bd_tstamp,
+ NULL);
#ifdef MAC
if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
#endif
@@ -2202,7 +2275,7 @@
BPFD_UNLOCK(d);
}
}
- BPFIF_RUNLOCK(bp);
+ NET_EPOCH_EXIT(et);
}
#define BPF_CHECK_DIRECTION(d, r, i) \
@@ -2216,6 +2289,7 @@
void
bpf_mtap(struct bpf_if *bp, struct mbuf *m)
{
+ struct epoch_tracker et;
struct bintime bt;
struct bpf_d *d;
#ifdef BPF_JITTER
@@ -2233,9 +2307,8 @@
pktlen = m_length(m, NULL);
gottime = BPF_TSTAMP_NONE;
- BPFIF_RLOCK(bp);
-
- LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
+ NET_EPOCH_ENTER(et);
+ CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
continue;
counter_u64_add(d->bd_rcount, 1);
@@ -2243,7 +2316,8 @@
bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
/* XXX We cannot handle multiple mbufs. */
if (bf != NULL && m->m_next == NULL)
- slen = (*(bf->func))(mtod(m, u_char *), pktlen, pktlen);
+ slen = (*(bf->func))(mtod(m, u_char *), pktlen,
+ pktlen);
else
#endif
slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
@@ -2261,7 +2335,7 @@
BPFD_UNLOCK(d);
}
}
- BPFIF_RUNLOCK(bp);
+ NET_EPOCH_EXIT(et);
}
/*
@@ -2271,6 +2345,7 @@
void
bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
{
+ struct epoch_tracker et;
struct bintime bt;
struct mbuf mb;
struct bpf_d *d;
@@ -2296,9 +2371,8 @@
gottime = BPF_TSTAMP_NONE;
- BPFIF_RLOCK(bp);
-
- LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
+ NET_EPOCH_ENTER(et);
+ CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
continue;
counter_u64_add(d->bd_rcount, 1);
@@ -2317,11 +2391,10 @@
BPFD_UNLOCK(d);
}
}
- BPFIF_RUNLOCK(bp);
+ NET_EPOCH_EXIT(et);
}
#undef BPF_CHECK_DIRECTION
-
#undef BPF_TSTAMP_NONE
#undef BPF_TSTAMP_FAST
#undef BPF_TSTAMP_NORMAL
@@ -2540,26 +2613,30 @@
* Called on close.
*/
static void
-bpf_freed(struct bpf_d *d)
+bpfd_free(epoch_context_t ctx)
{
+ struct bpf_d *d;
+ struct bpf_program_buffer *p;
/*
* We don't need to lock out interrupts since this descriptor has
* been detached from its interface and it yet hasn't been marked
* free.
*/
+ d = __containerof(ctx, struct bpf_d, epoch_ctx);
bpf_free(d);
if (d->bd_rfilter != NULL) {
- free((caddr_t)d->bd_rfilter, M_BPF);
-#ifdef BPF_JITTER
- if (d->bd_bfilter != NULL)
- bpf_destroy_jit_filter(d->bd_bfilter);
-#endif
+ p = __containerof((void *)d->bd_rfilter,
+ struct bpf_program_buffer, buffer);
+ bpf_program_buffer_free(&p->epoch_ctx);
}
- if (d->bd_wfilter != NULL)
- free((caddr_t)d->bd_wfilter, M_BPF);
- mtx_destroy(&d->bd_lock);
+ if (d->bd_wfilter != NULL) {
+ p = __containerof((void *)d->bd_wfilter,
+ struct bpf_program_buffer, buffer);
+ bpf_program_buffer_free(&p->epoch_ctx);
+ }
+ mtx_destroy(&d->bd_lock);
counter_u64_free(d->bd_rcount);
counter_u64_free(d->bd_dcount);
counter_u64_free(d->bd_fcount);
@@ -2567,7 +2644,7 @@
counter_u64_free(d->bd_wfcount);
counter_u64_free(d->bd_wdcount);
counter_u64_free(d->bd_zcopy);
-
+ free(d, M_BPF);
}
/*
@@ -2588,25 +2665,31 @@
* headers are not yet supporrted).
*/
void
-bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
+bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen,
+ struct bpf_if **driverp)
{
struct bpf_if *bp;
- KASSERT(*driverp == NULL, ("bpfattach2: driverp already initialized"));
+ KASSERT(*driverp == NULL,
+ ("bpfattach2: driverp already initialized"));
bp = malloc(sizeof(*bp), M_BPF, M_WAITOK | M_ZERO);
- rw_init(&bp->bif_lock, "bpf interface lock");
- LIST_INIT(&bp->bif_dlist);
- LIST_INIT(&bp->bif_wlist);
+ CK_LIST_INIT(&bp->bif_dlist);
+ CK_LIST_INIT(&bp->bif_wlist);
bp->bif_ifp = ifp;
bp->bif_dlt = dlt;
bp->bif_hdrlen = hdrlen;
bp->bif_bpf = driverp;
+ bp->bif_refcnt = 1;
*driverp = bp;
-
+ /*
+ * Reference ifnet pointer, so it won't freed until
+ * we release it.
+ */
+ if_ref(ifp);
BPF_LOCK();
- LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
+ CK_LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
BPF_UNLOCK();
if (bootverbose && IS_DEFAULT_VNET(curvnet))
@@ -2647,103 +2730,37 @@
void
bpfdetach(struct ifnet *ifp)
{
- struct bpf_if *bp, *bp_temp;
- struct bpf_d *d;
- int ndetached;
+ struct bpf_if *bp, *bp_temp;
+ struct bpf_d *d;
- ndetached = 0;
-
BPF_LOCK();
/* Find all bpf_if struct's which reference ifp and detach them. */
- LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
+ CK_LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
if (ifp != bp->bif_ifp)
continue;
- LIST_REMOVE(bp, bif_next);
- /* Add to to-be-freed list */
- LIST_INSERT_HEAD(&bpf_freelist, bp, bif_next);
-
- ndetached++;
- /*
- * Delay freeing bp till interface is detached
- * and all routes through this interface are removed.
- * Mark bp as detached to restrict new consumers.
- */
- BPFIF_WLOCK(bp);
- bp->bif_flags |= BPFIF_FLAG_DYING;
+ CK_LIST_REMOVE(bp, bif_next);
*bp->bif_bpf = (struct bpf_if *)&dead_bpf_if;
- BPFIF_WUNLOCK(bp);
- CTR4(KTR_NET, "%s: sheduling free for encap %d (%p) for if %p",
+ CTR4(KTR_NET,
+ "%s: sheduling free for encap %d (%p) for if %p",
__func__, bp->bif_dlt, bp, ifp);
- /* Free common descriptors */
- while ((d = LIST_FIRST(&bp->bif_dlist)) != NULL) {
- bpf_detachd_locked(d);
- BPFD_LOCK(d);
- bpf_wakeup(d);
- BPFD_UNLOCK(d);
+ /* Detach common descriptors */
+ while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) {
+ bpf_detachd_locked(d, true);
}
- /* Free writer-only descriptors */
- while ((d = LIST_FIRST(&bp->bif_wlist)) != NULL) {
- bpf_detachd_locked(d);
- BPFD_LOCK(d);
- bpf_wakeup(d);
- BPFD_UNLOCK(d);
+ /* Detach writer-only descriptors */
+ while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) {
+ bpf_detachd_locked(d, true);
}
+ bpfif_rele(bp);
}
BPF_UNLOCK();
-
-#ifdef INVARIANTS
- if (ndetached == 0)
- printf("bpfdetach: %s was not attached\n", ifp->if_xname);
-#endif
}
/*
- * Interface departure handler.
- * Note departure event does not guarantee interface is going down.
- * Interface renaming is currently done via departure/arrival event set.
- *
- * Departure handled is called after all routes pointing to
- * given interface are removed and interface is in down state
- * restricting any packets to be sent/received. We assume it is now safe
- * to free data allocated by BPF.
- */
-static void
-bpf_ifdetach(void *arg __unused, struct ifnet *ifp)
-{
- struct bpf_if *bp, *bp_temp;
- int nmatched = 0;
-
- /* Ignore ifnet renaming. */
- if (ifp->if_flags & IFF_RENAMING)
- return;
-
- BPF_LOCK();
- /*
- * Find matching entries in free list.
- * Nothing should be found if bpfdetach() was not called.
- */
- LIST_FOREACH_SAFE(bp, &bpf_freelist, bif_next, bp_temp) {
- if (ifp != bp->bif_ifp)
- continue;
-
- CTR3(KTR_NET, "%s: freeing BPF instance %p for interface %p",
- __func__, bp, ifp);
-
- LIST_REMOVE(bp, bif_next);
-
- rw_destroy(&bp->bif_lock);
- free(bp, M_BPF);
-
- nmatched++;
- }
- BPF_UNLOCK();
-}
-
-/*
* Get a list of available data link type of the interface.
*/
static int
@@ -2757,9 +2774,8 @@
BPF_LOCK_ASSERT();
ifp = d->bd_bif->bif_ifp;
-again:
n1 = 0;
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
if (bp->bif_ifp == ifp)
n1++;
}
@@ -2769,24 +2785,16 @@
}
if (n1 > bfl->bfl_len)
return (ENOMEM);
- BPF_UNLOCK();
+
lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
n = 0;
- BPF_LOCK();
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
if (bp->bif_ifp != ifp)
continue;
- if (n >= n1) {
- free(lst, M_TEMP);
- goto again;
- }
- lst[n] = bp->bif_dlt;
- n++;
+ lst[n++] = bp->bif_dlt;
}
- BPF_UNLOCK();
error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n);
free(lst, M_TEMP);
- BPF_LOCK();
bfl->bfl_len = n;
return (error);
}
@@ -2802,33 +2810,34 @@
struct bpf_if *bp;
BPF_LOCK_ASSERT();
+ MPASS(d->bd_bif != NULL);
+ /*
+ * It is safe to check bd_bif without BPFD_LOCK, it can not be
+ * changed while we hold global lock.
+ */
if (d->bd_bif->bif_dlt == dlt)
return (0);
- ifp = d->bd_bif->bif_ifp;
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ ifp = d->bd_bif->bif_ifp;
+ CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
break;
}
+ if (bp == NULL)
+ return (EINVAL);
- if (bp != NULL) {
- opromisc = d->bd_promisc;
- bpf_attachd(d, bp);
- BPFD_LOCK(d);
- reset_d(d);
- BPFD_UNLOCK(d);
- if (opromisc) {
- error = ifpromisc(bp->bif_ifp, 1);
- if (error)
- if_printf(bp->bif_ifp,
- "bpf_setdlt: ifpromisc failed (%d)\n",
- error);
- else
- d->bd_promisc = 1;
- }
+ opromisc = d->bd_promisc;
+ bpf_attachd(d, bp);
+ if (opromisc) {
+ error = ifpromisc(bp->bif_ifp, 1);
+ if (error)
+ if_printf(bp->bif_ifp, "%s: ifpromisc failed (%d)\n",
+ __func__, error);
+ else
+ d->bd_promisc = 1;
}
- return (bp == NULL ? EINVAL : 0);
+ return (0);
}
static void
@@ -2837,17 +2846,11 @@
struct cdev *dev;
sx_init(&bpf_sx, "bpf global lock");
- LIST_INIT(&bpf_iflist);
- LIST_INIT(&bpf_freelist);
+ CK_LIST_INIT(&bpf_iflist);
dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
/* For compatibility */
make_dev_alias(dev, "bpf0");
-
- /* Register interface departure handler */
- bpf_ifdetach_cookie = EVENTHANDLER_REGISTER(
- ifnet_departure_event, bpf_ifdetach, NULL,
- EVENTHANDLER_PRI_ANY);
}
/*
@@ -2862,19 +2865,19 @@
struct bpf_d *bd;
BPF_LOCK();
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
- BPFIF_RLOCK(bp);
- LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
- BPFD_LOCK(bd);
+ /*
+ * We are protected by global lock here, interfaces and
+ * descriptors can not be deleted while we hold it.
+ */
+ CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
+ CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
counter_u64_zero(bd->bd_rcount);
counter_u64_zero(bd->bd_dcount);
counter_u64_zero(bd->bd_fcount);
counter_u64_zero(bd->bd_wcount);
counter_u64_zero(bd->bd_wfcount);
counter_u64_zero(bd->bd_zcopy);
- BPFD_UNLOCK(bd);
}
- BPFIF_RUNLOCK(bp);
}
BPF_UNLOCK();
}
@@ -2886,10 +2889,9 @@
bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
{
+ BPF_LOCK_ASSERT();
bzero(d, sizeof(*d));
- BPFD_LOCK_ASSERT(bd);
d->bd_structsize = sizeof(*d);
- /* XXX: reading should be protected by global lock */
d->bd_immediate = bd->bd_immediate;
d->bd_promisc = bd->bd_promisc;
d->bd_hdrcmplt = bd->bd_hdrcmplt;
@@ -2964,22 +2966,16 @@
return (ENOMEM);
}
index = 0;
- LIST_FOREACH(bp, &bpf_iflist, bif_next) {
- BPFIF_RLOCK(bp);
+ CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
/* Send writers-only first */
- LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
+ CK_LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
xbd = &xbdbuf[index++];
- BPFD_LOCK(bd);
bpfstats_fill_xbpf(xbd, bd);
- BPFD_UNLOCK(bd);
}
- LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
+ CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
xbd = &xbdbuf[index++];
- BPFD_LOCK(bd);
bpfstats_fill_xbpf(xbd, bd);
- BPFD_UNLOCK(bd);
}
- BPFIF_RUNLOCK(bp);
}
BPF_UNLOCK();
error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
@@ -3059,10 +3055,10 @@
/* bif_ext.bif_dlist */
BPF_DB_PRINTF("%#x", bif_dlt);
BPF_DB_PRINTF("%u", bif_hdrlen);
- BPF_DB_PRINTF("%p", bif_ifp);
- /* bif_lock */
/* bif_wlist */
- BPF_DB_PRINTF("%#x", bif_flags);
+ BPF_DB_PRINTF("%p", bif_ifp);
+ BPF_DB_PRINTF("%p", bif_bpf);
+ BPF_DB_PRINTF("%u", bif_refcnt);
}
DB_SHOW_COMMAND(bpf_if, db_show_bpf_if)
Index: head/sys/net/bpfdesc.h
===================================================================
--- head/sys/net/bpfdesc.h
+++ head/sys/net/bpfdesc.h
@@ -43,9 +43,10 @@
#include <sys/callout.h>
#include <sys/selinfo.h>
-#include <sys/queue.h>
+#include <sys/ck.h>
#include <sys/conf.h>
#include <sys/counter.h>
+#include <sys/epoch.h>
#include <net/if.h>
/*
@@ -53,7 +54,7 @@
*/
struct zbuf;
struct bpf_d {
- LIST_ENTRY(bpf_d) bd_next; /* Linked list of descriptors */
+ CK_LIST_ENTRY(bpf_d) bd_next; /* Linked list of descriptors */
/*
* Buffer slots: two memory buffers store the incoming packets.
* The model has three slots. Sbuf is always occupied.
@@ -104,6 +105,9 @@
counter_u64_t bd_wdcount; /* number of packets dropped during a write */
counter_u64_t bd_zcopy; /* number of zero copy operations */
u_char bd_compat32; /* 32-bit stream on LP64 system */
+
+ volatile u_int bd_refcnt;
+ struct epoch_context epoch_ctx;
};
/* Values for bd_state */

File Metadata

Mime Type
text/plain
Expires
Sat, Dec 21, 3:44 PM (19 h, 15 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15542676
Default Alt Text
D20224.diff (34 KB)

Event Timeline