Page MenuHomeFreeBSD

D22197.id63806.diff
No OneTemporary

D22197.id63806.diff

Index: sys/dev/cxgbe/cxgbei/cxgbei.c
===================================================================
--- sys/dev/cxgbe/cxgbei/cxgbei.c
+++ sys/dev/cxgbe/cxgbei/cxgbei.c
@@ -412,12 +412,12 @@
SOCKBUF_UNLOCK(sb);
INP_WUNLOCK(inp);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET);
if (tp)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
icl_cxgbei_conn_pdu_free(NULL, ip);
#ifdef INVARIANTS
Index: sys/dev/cxgbe/tom/t4_connect.c
===================================================================
--- sys/dev/cxgbe/tom/t4_connect.c
+++ sys/dev/cxgbe/tom/t4_connect.c
@@ -124,12 +124,12 @@
CURVNET_SET(toep->vnet);
if (status != EAGAIN)
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
toe_connect_failed(tod, inp, status);
final_cpl_received(toep); /* unlocks inp */
if (status != EAGAIN)
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
}
Index: sys/dev/cxgbe/tom/t4_cpl_io.c
===================================================================
--- sys/dev/cxgbe/tom/t4_cpl_io.c
+++ sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -1214,7 +1214,7 @@
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
CURVNET_SET(toep->vnet);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
tp = intotcpcb(inp);
@@ -1260,7 +1260,7 @@
case TCPS_FIN_WAIT_2:
tcp_twstart(tp);
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
INP_WLOCK(inp);
@@ -1273,7 +1273,7 @@
}
done:
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
return (0);
}
@@ -1303,7 +1303,7 @@
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
CURVNET_SET(toep->vnet);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
tp = intotcpcb(inp);
@@ -1321,7 +1321,7 @@
tcp_twstart(tp);
release:
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
INP_WLOCK(inp);
@@ -1346,7 +1346,7 @@
}
done:
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
return (0);
}
@@ -1423,7 +1423,7 @@
inp = toep->inp;
CURVNET_SET(toep->vnet);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et); /* for tcp_close */
+ NET_EPOCH_ENTER(et); /* for tcp_close */
INP_WLOCK(inp);
tp = intotcpcb(inp);
@@ -1457,7 +1457,7 @@
final_cpl_received(toep);
done:
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
return (0);
@@ -1572,12 +1572,12 @@
INP_WUNLOCK(inp);
CURVNET_SET(toep->vnet);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET);
if (tp)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
return (0);
Index: sys/dev/cxgbe/tom/t4_listen.c
===================================================================
--- sys/dev/cxgbe/tom/t4_listen.c
+++ sys/dev/cxgbe/tom/t4_listen.c
@@ -949,7 +949,7 @@
#endif
struct toepcb *toep = synqe->toep;
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
+ NET_EPOCH_ASSERT(); /* prevents bad race with accept() */
INP_WLOCK_ASSERT(inp);
KASSERT(synqe->flags & TPF_SYNQE,
("%s: %p not a synq_entry?", __func__, arg));
@@ -1242,12 +1242,12 @@
REJECT_PASS_ACCEPT_REQ(true);
/* Don't offload if the 4-tuple is already in use */
- INP_INFO_RLOCK_ET(&V_tcbinfo, et); /* for 4-tuple check */
+ NET_EPOCH_ENTER(et); /* for 4-tuple check */
if (toe_4tuple_check(&inc, &th, ifp) != 0) {
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
REJECT_PASS_ACCEPT_REQ(false);
}
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
inp = lctx->inp; /* listening socket, not owned by TOE */
INP_WLOCK(inp);
@@ -1396,7 +1396,7 @@
("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
CURVNET_SET(lctx->vnet);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et); /* for syncache_expand */
+ NET_EPOCH_ENTER(et); /* for syncache_expand */
INP_WLOCK(inp);
CTR6(KTR_CXGBE,
@@ -1412,7 +1412,7 @@
reset:
send_reset_synqe(TOEDEV(ifp), synqe);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
return (0);
}
@@ -1471,7 +1471,7 @@
inp = release_synqe(sc, synqe);
if (inp != NULL)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
return (0);
Index: sys/dev/cxgbe/tom/t4_tls.c
===================================================================
--- sys/dev/cxgbe/tom/t4_tls.c
+++ sys/dev/cxgbe/tom/t4_tls.c
@@ -2125,12 +2125,12 @@
INP_WUNLOCK(inp);
CURVNET_SET(toep->vnet);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET);
if (tp)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
return (0);
Index: sys/kern/uipc_ktls.c
===================================================================
--- sys/kern/uipc_ktls.c
+++ sys/kern/uipc_ktls.c
@@ -1137,7 +1137,7 @@
* the send tag is fixed or just rely on timers?
*/
} else {
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
if (!in_pcbrele_wlocked(inp)) {
if (!(inp->inp_flags & INP_TIMEWAIT) &&
@@ -1150,7 +1150,7 @@
} else
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
counter_u64_add(ktls_ifnet_reset_failed, 1);
Index: sys/netinet/in_pcb.h
===================================================================
--- sys/netinet/in_pcb.h
+++ sys/netinet/in_pcb.h
@@ -586,6 +586,7 @@
#define INP_TRY_WLOCK(inp) rw_try_wlock(&(inp)->inp_lock)
#define INP_RUNLOCK(inp) rw_runlock(&(inp)->inp_lock)
#define INP_WUNLOCK(inp) rw_wunlock(&(inp)->inp_lock)
+#define INP_UNLOCK(inp) rw_unlock(&(inp)->inp_lock)
#define INP_TRY_UPGRADE(inp) rw_try_upgrade(&(inp)->inp_lock)
#define INP_DOWNGRADE(inp) rw_downgrade(&(inp)->inp_lock)
#define INP_WLOCKED(inp) rw_wowned(&(inp)->inp_lock)
@@ -628,19 +629,14 @@
#define INP_INFO_LOCK_INIT(ipi, d) \
mtx_init(&(ipi)->ipi_lock, (d), NULL, MTX_DEF| MTX_RECURSE)
#define INP_INFO_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_lock)
-#define INP_INFO_RLOCK_ET(ipi, et) NET_EPOCH_ENTER((et))
#define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_lock)
#define INP_INFO_TRY_WLOCK(ipi) mtx_trylock(&(ipi)->ipi_lock)
#define INP_INFO_WLOCKED(ipi) mtx_owned(&(ipi)->ipi_lock)
-#define INP_INFO_RUNLOCK_ET(ipi, et) NET_EPOCH_EXIT((et))
-#define INP_INFO_RUNLOCK_TP(ipi, tp) NET_EPOCH_EXIT(*(tp)->t_inpcb->inp_et)
#define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_lock)
#define INP_INFO_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_lock))
-#define INP_INFO_RLOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt))
#define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_lock, MA_OWNED)
#define INP_INFO_WUNLOCK_ASSERT(ipi) \
mtx_assert(&(ipi)->ipi_lock, MA_NOTOWNED)
-#define INP_INFO_UNLOCK_ASSERT(ipi) MPASS(!in_epoch(net_epoch_preempt) && !mtx_owned(&(ipi)->ipi_lock))
#define INP_LIST_LOCK_INIT(ipi, d) \
rw_init_flags(&(ipi)->ipi_list_lock, (d), 0)
@@ -663,11 +659,7 @@
#define INP_HASH_LOCK_INIT(ipi, d) mtx_init(&(ipi)->ipi_hash_lock, (d), NULL, MTX_DEF)
#define INP_HASH_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_hash_lock)
-#define INP_HASH_RLOCK(ipi) struct epoch_tracker inp_hash_et; epoch_enter_preempt(net_epoch_preempt, &inp_hash_et)
-#define INP_HASH_RLOCK_ET(ipi, et) epoch_enter_preempt(net_epoch_preempt, &(et))
#define INP_HASH_WLOCK(ipi) mtx_lock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_RUNLOCK(ipi) NET_EPOCH_EXIT(inp_hash_et)
-#define INP_HASH_RUNLOCK_ET(ipi, et) NET_EPOCH_EXIT((et))
#define INP_HASH_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_hash_lock)
#define INP_HASH_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_hash_lock))
#define INP_HASH_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_hash_lock, MA_OWNED);
Index: sys/netinet/in_pcb.c
===================================================================
--- sys/netinet/in_pcb.c
+++ sys/netinet/in_pcb.c
@@ -515,7 +515,7 @@
#ifdef INVARIANTS
if (pcbinfo == &V_tcbinfo) {
- INP_INFO_RLOCK_ASSERT(pcbinfo);
+ NET_EPOCH_ASSERT();
} else {
INP_INFO_WLOCK_ASSERT(pcbinfo);
}
@@ -2252,12 +2252,10 @@
struct inpcb *inp, *tmpinp;
u_short fport = fport_arg, lport = lport_arg;
-#ifdef INVARIANTS
KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
- if (!mtx_owned(&pcbinfo->ipi_hash_lock))
- MPASS(in_epoch_verbose(net_epoch_preempt, 1));
-#endif
+ INP_HASH_LOCK_ASSERT(pcbinfo);
+
/*
* First look for an exact match.
*/
@@ -2384,7 +2382,6 @@
{
struct inpcb *inp;
- INP_HASH_RLOCK(pcbinfo);
inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
(lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp);
if (inp != NULL) {
@@ -2411,7 +2408,7 @@
}
#endif
}
- INP_HASH_RUNLOCK(pcbinfo);
+
return (inp);
}
@@ -2657,7 +2654,7 @@
#ifdef INVARIANTS
if (pcbinfo == &V_tcbinfo) {
- INP_INFO_RLOCK_ASSERT(pcbinfo);
+ NET_EPOCH_ASSERT();
} else {
INP_INFO_WLOCK_ASSERT(pcbinfo);
}
Index: sys/netinet/ip_divert.c
===================================================================
--- sys/netinet/ip_divert.c
+++ sys/netinet/ip_divert.c
@@ -192,7 +192,8 @@
u_int16_t nport;
struct sockaddr_in divsrc;
struct m_tag *mtag;
- struct epoch_tracker et;
+
+ NET_EPOCH_ASSERT();
mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL);
if (mtag == NULL) {
@@ -231,7 +232,6 @@
/* Sanity check */
M_ASSERTPKTHDR(m);
- NET_EPOCH_ASSERT();
/* Find IP address for receive interface */
ifp = m->m_pkthdr.rcvif;
@@ -272,7 +272,6 @@
/* Put packet on socket queue, if any */
sa = NULL;
nport = htons((u_int16_t)(((struct ipfw_rule_ref *)(mtag+1))->info));
- INP_INFO_RLOCK_ET(&V_divcbinfo, et);
CK_LIST_FOREACH(inp, &V_divcb, inp_list) {
/* XXX why does only one socket match? */
if (inp->inp_lport == nport) {
@@ -290,7 +289,6 @@
break;
}
}
- INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
if (sa == NULL) {
m_freem(m);
KMOD_IPSTAT_INC(ips_noproto);
@@ -631,71 +629,41 @@
static int
div_pcblist(SYSCTL_HANDLER_ARGS)
{
- int error, i, n;
- struct inpcb *inp, **inp_list;
- inp_gen_t gencnt;
struct xinpgen xig;
struct epoch_tracker et;
+ struct inpcb *inp;
+ int error;
+
+ if (req->newptr != 0)
+ return EPERM;
- /*
- * The process of preparing the TCB list is too time-consuming and
- * resource-intensive to repeat twice on every request.
- */
if (req->oldptr == 0) {
+ int n;
+
n = V_divcbinfo.ipi_count;
n += imax(n / 8, 10);
req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
return 0;
}
- if (req->newptr != 0)
- return EPERM;
-
- /*
- * OK, now we're committed to doing something.
- */
- INP_INFO_WLOCK(&V_divcbinfo);
- gencnt = V_divcbinfo.ipi_gencnt;
- n = V_divcbinfo.ipi_count;
- INP_INFO_WUNLOCK(&V_divcbinfo);
-
- error = sysctl_wire_old_buffer(req,
- 2 * sizeof(xig) + n*sizeof(struct xinpcb));
- if (error != 0)
+ if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
return (error);
bzero(&xig, sizeof(xig));
xig.xig_len = sizeof xig;
- xig.xig_count = n;
- xig.xig_gen = gencnt;
+ xig.xig_count = V_divcbinfo.ipi_count;
+ xig.xig_gen = V_divcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return error;
- inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
- if (inp_list == NULL)
- return ENOMEM;
-
- INP_INFO_RLOCK_ET(&V_divcbinfo, et);
- for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead), i = 0; inp && i < n;
- inp = CK_LIST_NEXT(inp, inp_list)) {
- INP_WLOCK(inp);
- if (inp->inp_gencnt <= gencnt &&
- cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
- in_pcbref(inp);
- inp_list[i++] = inp;
- }
- INP_WUNLOCK(inp);
- }
- INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
- n = i;
-
- error = 0;
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
+ NET_EPOCH_ENTER(et);
+ for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead);
+ inp != NULL;
+ inp = CK_LIST_NEXT(inp, inp_list)) {
INP_RLOCK(inp);
- if (inp->inp_gencnt <= gencnt) {
+ if (inp->inp_gencnt <= xig.xig_gen) {
struct xinpcb xi;
in_pcbtoxinpcb(inp, &xi);
@@ -704,17 +672,9 @@
} else
INP_RUNLOCK(inp);
}
- INP_INFO_WLOCK(&V_divcbinfo);
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
- INP_RLOCK(inp);
- if (!in_pcbrele_rlocked(inp))
- INP_RUNLOCK(inp);
- }
- INP_INFO_WUNLOCK(&V_divcbinfo);
+ NET_EPOCH_EXIT(et);
if (!error) {
- struct epoch_tracker et;
/*
* Give the user an updated idea of our state.
* If the generation differs from what we told
@@ -722,15 +682,13 @@
* while we were processing this request, and it
* might be necessary to retry.
*/
- INP_INFO_RLOCK_ET(&V_divcbinfo, et);
xig.xig_gen = V_divcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_divcbinfo.ipi_count;
- INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
- free(inp_list, M_TEMP);
- return error;
+
+ return (error);
}
#ifdef SYSCTL_NODE
Index: sys/netinet/raw_ip.c
===================================================================
--- sys/netinet/raw_ip.c
+++ sys/netinet/raw_ip.c
@@ -284,9 +284,10 @@
struct ip *ip = mtod(m, struct ip *);
struct inpcb *inp, *last;
struct sockaddr_in ripsrc;
- struct epoch_tracker et;
int hash;
+ NET_EPOCH_ASSERT();
+
*mp = NULL;
bzero(&ripsrc, sizeof(ripsrc));
@@ -299,7 +300,6 @@
hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr,
ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
- INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) {
if (inp->inp_ip_p != proto)
continue;
@@ -422,7 +422,6 @@
skip_2:
INP_RUNLOCK(inp);
}
- INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
if (last != NULL) {
if (rip_append(last, ip, m, &ripsrc) != 0)
IPSTAT_INC(ips_delivered);
@@ -1068,97 +1067,67 @@
static int
rip_pcblist(SYSCTL_HANDLER_ARGS)
{
- int error, i, n;
- struct inpcb *inp, **inp_list;
- inp_gen_t gencnt;
struct xinpgen xig;
struct epoch_tracker et;
+ struct inpcb *inp;
+ int error;
+
+ if (req->newptr != 0)
+ return (EPERM);
- /*
- * The process of preparing the TCB list is too time-consuming and
- * resource-intensive to repeat twice on every request.
- */
if (req->oldptr == 0) {
+ int n;
+
n = V_ripcbinfo.ipi_count;
n += imax(n / 8, 10);
req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
return (0);
}
- if (req->newptr != 0)
- return (EPERM);
-
- /*
- * OK, now we're committed to doing something.
- */
- INP_INFO_WLOCK(&V_ripcbinfo);
- gencnt = V_ripcbinfo.ipi_gencnt;
- n = V_ripcbinfo.ipi_count;
- INP_INFO_WUNLOCK(&V_ripcbinfo);
+ if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
+ return (error);
bzero(&xig, sizeof(xig));
xig.xig_len = sizeof xig;
- xig.xig_count = n;
- xig.xig_gen = gencnt;
+ xig.xig_count = V_ripcbinfo.ipi_count;
+ xig.xig_gen = V_ripcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return (error);
- inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
-
- INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
- for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n;
- inp = CK_LIST_NEXT(inp, inp_list)) {
- INP_WLOCK(inp);
- if (inp->inp_gencnt <= gencnt &&
- cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
- in_pcbref(inp);
- inp_list[i++] = inp;
- }
- INP_WUNLOCK(inp);
- }
- INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
- n = i;
-
- error = 0;
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
+ NET_EPOCH_ENTER(et);
+ for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead);
+ inp != NULL;
+ inp = CK_LIST_NEXT(inp, inp_list)) {
INP_RLOCK(inp);
- if (inp->inp_gencnt <= gencnt) {
+ if (inp->inp_gencnt <= xig.xig_gen &&
+ cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
struct xinpcb xi;
in_pcbtoxinpcb(inp, &xi);
INP_RUNLOCK(inp);
error = SYSCTL_OUT(req, &xi, sizeof xi);
+ if (error)
+ break;
} else
INP_RUNLOCK(inp);
}
- INP_INFO_WLOCK(&V_ripcbinfo);
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
- INP_RLOCK(inp);
- if (!in_pcbrele_rlocked(inp))
- INP_RUNLOCK(inp);
- }
- INP_INFO_WUNLOCK(&V_ripcbinfo);
+ NET_EPOCH_EXIT(et);
if (!error) {
- struct epoch_tracker et;
/*
* Give the user an updated idea of our state. If the
* generation differs from what we told her before, she knows
* that something happened while we were processing this
* request, and it might be necessary to retry.
*/
- INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
xig.xig_gen = V_ripcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_ripcbinfo.ipi_count;
- INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
- free(inp_list, M_TEMP);
+
return (error);
}
Index: sys/netinet/tcp_hpts.c
===================================================================
--- sys/netinet/tcp_hpts.c
+++ sys/netinet/tcp_hpts.c
@@ -1245,12 +1245,10 @@
int16_t set_cpu;
uint32_t did_prefetch = 0;
int dropped;
- struct epoch_tracker et;
HPTS_MTX_ASSERT(hpts);
-#ifndef VIMAGE
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
-#endif
+ NET_EPOCH_ASSERT();
+
while ((inp = TAILQ_FIRST(&hpts->p_input)) != NULL) {
HPTS_MTX_ASSERT(hpts);
hpts_sane_input_remove(hpts, inp, 0);
@@ -1266,7 +1264,6 @@
INP_WLOCK(inp);
#ifdef VIMAGE
CURVNET_SET(inp->inp_vnet);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
#endif
if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) ||
(inp->inp_flags2 & INP_FREED)) {
@@ -1276,7 +1273,6 @@
INP_WUNLOCK(inp);
}
#ifdef VIMAGE
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
#endif
mtx_lock(&hpts->p_mtx);
@@ -1296,7 +1292,6 @@
if (in_pcbrele_wlocked(inp) == 0)
INP_WUNLOCK(inp);
#ifdef VIMAGE
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
#endif
mtx_lock(&hpts->p_mtx);
@@ -1349,22 +1344,16 @@
INP_WUNLOCK(inp);
INP_UNLOCK_ASSERT(inp);
#ifdef VIMAGE
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
#endif
mtx_lock(&hpts->p_mtx);
hpts->p_inp = NULL;
}
-#ifndef VIMAGE
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
-#endif
}
static void
tcp_hptsi(struct tcp_hpts_entry *hpts)
{
- struct epoch_tracker et;
struct tcpcb *tp;
struct inpcb *inp = NULL, *ninp;
struct timeval tv;
@@ -1378,6 +1367,8 @@
int16_t set_cpu;
HPTS_MTX_ASSERT(hpts);
+ NET_EPOCH_ASSERT();
+
/* record previous info for any logging */
hpts->saved_lasttick = hpts->p_lasttick;
hpts->saved_curtick = hpts->p_curtick;
@@ -1469,9 +1460,6 @@
goto no_one;
}
HPTS_MTX_ASSERT(hpts);
-#ifndef VIMAGE
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
-#endif
for (i = 0; i < ticks_to_run; i++) {
/*
* Calculate our delay, if there are no extra ticks there
@@ -1586,7 +1574,6 @@
}
#ifdef VIMAGE
CURVNET_SET(inp->inp_vnet);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
#endif
/* Lets do any logging that we might want to */
if (hpts_does_tp_logging && (tp->t_logstate != TCP_LOG_STATE_OFF)) {
@@ -1658,7 +1645,6 @@
INP_WUNLOCK(inp);
skip_pacing:
#ifdef VIMAGE
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
#endif
INP_UNLOCK_ASSERT(inp);
@@ -1678,9 +1664,6 @@
hpts->p_runningtick = 0;
}
}
-#ifndef VIMAGE
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
-#endif
no_one:
HPTS_MTX_ASSERT(hpts);
hpts->p_delayed_by = 0;
@@ -1820,6 +1803,7 @@
tcp_hpts_thread(void *ctx)
{
struct tcp_hpts_entry *hpts;
+ struct epoch_tracker et;
struct timeval tv;
sbintime_t sb;
@@ -1839,7 +1823,9 @@
}
hpts->p_hpts_wake_scheduled = 0;
hpts->p_hpts_active = 1;
+ NET_EPOCH_ENTER(et);
tcp_hptsi(hpts);
+ NET_EPOCH_EXIT(et);
HPTS_MTX_ASSERT(hpts);
tv.tv_sec = 0;
tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_TICKS_PER_USEC;
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -562,7 +562,6 @@
int rstreason = 0; /* For badport_bandlim accounting purposes */
uint8_t iptos;
struct m_tag *fwd_tag = NULL;
- struct epoch_tracker et;
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
int isipv6;
@@ -571,7 +570,6 @@
#endif /* INET6 */
struct tcpopt to; /* options in this segment */
char *s = NULL; /* address and port logging */
- int ti_locked;
#ifdef TCPDEBUG
/*
* The size of tcp_saveipgen must be the size of the max ip header,
@@ -582,6 +580,8 @@
short ostate = 0;
#endif
+ NET_EPOCH_ASSERT();
+
#ifdef INET6
isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
#endif
@@ -747,19 +747,6 @@
*/
drop_hdrlen = off0 + off;
- /*
- * Locate pcb for segment; if we're likely to add or remove a
- * connection then first acquire pcbinfo lock. There are three cases
- * where we might discover later we need a write lock despite the
- * flags: ACKs moving a connection out of the syncache, ACKs for a
- * connection in TIMEWAIT and SYNs not targeting a listening socket.
- */
- if ((thflags & (TH_FIN | TH_RST)) != 0) {
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
- ti_locked = TI_RLOCKED;
- } else
- ti_locked = TI_UNLOCKED;
-
/*
* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
*/
@@ -777,13 +764,6 @@
fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
findpcb:
-#ifdef INVARIANTS
- if (ti_locked == TI_RLOCKED) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- } else {
- INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
- }
-#endif
#ifdef INET6
if (isipv6 && fwd_tag != NULL) {
struct sockaddr_in6 *next_hop6;
@@ -943,12 +923,6 @@
* XXXRW: It may be time to rethink timewait locking.
*/
if (inp->inp_flags & INP_TIMEWAIT) {
- if (ti_locked == TI_UNLOCKED) {
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
- ti_locked = TI_RLOCKED;
- }
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
-
if (thflags & TH_SYN)
tcp_dooptions(&to, optp, optlen, TO_SYN);
/*
@@ -956,7 +930,6 @@
*/
if (tcp_twcheck(inp, &to, th, m, tlen))
goto findpcb;
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (IPPROTO_DONE);
}
/*
@@ -978,27 +951,6 @@
}
#endif
- /*
- * We've identified a valid inpcb, but it could be that we need an
- * inpcbinfo write lock but don't hold it. In this case, attempt to
- * acquire using the same strategy as the TIMEWAIT case above. If we
- * relock, we have to jump back to 'relocked' as the connection might
- * now be in TIMEWAIT.
- */
-#ifdef INVARIANTS
- if ((thflags & (TH_FIN | TH_RST)) != 0)
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
-#endif
- if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
- (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
- !IS_FASTOPEN(tp->t_flags)))) {
- if (ti_locked == TI_UNLOCKED) {
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
- ti_locked = TI_RLOCKED;
- }
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- }
-
#ifdef MAC
INP_WLOCK_ASSERT(inp);
if (mac_inpcb_check_deliver(inp, m))
@@ -1053,7 +1005,6 @@
*/
if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
/*
* Parse the TCP options here because
* syncookies need access to the reflected
@@ -1131,8 +1082,6 @@
TCP_PROBE5(receive, NULL, tp, m, tp, th);
tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen,
iptos);
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (IPPROTO_DONE);
}
/*
@@ -1335,10 +1284,6 @@
* Entry added to syncache and mbuf consumed.
* Only the listen socket is unlocked by syncache_add().
*/
- if (ti_locked == TI_RLOCKED) {
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
- ti_locked = TI_UNLOCKED;
- }
INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
return (IPPROTO_DONE);
} else if (tp->t_state == TCPS_LISTEN) {
@@ -1371,25 +1316,11 @@
* the inpcb, and unlocks pcbinfo.
*/
tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos);
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (IPPROTO_DONE);
dropwithreset:
TCP_PROBE5(receive, NULL, tp, m, tp, th);
- if (ti_locked == TI_RLOCKED) {
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
- ti_locked = TI_UNLOCKED;
- }
-#ifdef INVARIANTS
- else {
- KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropwithreset "
- "ti_locked: %d", __func__, ti_locked));
- INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
- }
-#endif
-
if (inp != NULL) {
tcp_dropwithreset(m, th, tp, tlen, rstreason);
INP_WUNLOCK(inp);
@@ -1402,18 +1333,6 @@
if (m != NULL)
TCP_PROBE5(receive, NULL, tp, m, tp, th);
- if (ti_locked == TI_RLOCKED) {
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
- ti_locked = TI_UNLOCKED;
- }
-#ifdef INVARIANTS
- else {
- KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropunlock "
- "ti_locked: %d", __func__, ti_locked));
- INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
- }
-#endif
-
if (inp != NULL)
INP_WUNLOCK(inp);
@@ -1502,7 +1421,7 @@
struct mbuf *mfree;
struct tcpopt to;
int tfo_syn;
-
+
#ifdef TCPDEBUG
/*
* The size of tcp_saveipgen must be the size of the max ip header,
@@ -1517,16 +1436,8 @@
tp->sackhint.last_sack_ack = 0;
sack_changed = 0;
nsegs = max(1, m->m_pkthdr.lro_nsegs);
- /*
- * If this is either a state-changing packet or current state isn't
- * established, we require a write lock on tcbinfo. Otherwise, we
- * allow the tcbinfo to be in either alocked or unlocked, as the
- * caller may have unnecessarily acquired a write lock due to a race.
- */
- if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
- tp->t_state != TCPS_ESTABLISHED) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- }
+
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(tp->t_inpcb);
KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
__func__));
@@ -2048,7 +1959,6 @@
tcp_state_change(tp, TCPS_SYN_RECEIVED);
}
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
/*
@@ -2121,7 +2031,6 @@
SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
(tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
KASSERT(tp->t_state != TCPS_SYN_SENT,
("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
__func__, th, tp));
@@ -2164,8 +2073,6 @@
*/
if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT &&
tp->t_state != TCPS_SYN_RECEIVED) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
-
TCPSTAT_INC(tcps_badsyn);
if (V_tcp_insecure_syn &&
SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
@@ -2289,8 +2196,6 @@
*/
if ((so->so_state & SS_NOFDREF) &&
tp->t_state > TCPS_CLOSE_WAIT && tlen) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
-
if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data "
"after socket was closed, "
@@ -2876,7 +2781,6 @@
*/
case TCPS_CLOSING:
if (ourfinisacked) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp);
m_freem(m);
return;
@@ -2891,7 +2795,6 @@
*/
case TCPS_LAST_ACK:
if (ourfinisacked) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tp = tcp_close(tp);
goto drop;
}
@@ -3140,8 +3043,6 @@
* standard timers.
*/
case TCPS_FIN_WAIT_2:
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
-
tcp_twstart(tp);
return;
}
Index: sys/netinet/tcp_lro.c
===================================================================
--- sys/netinet/tcp_lro.c
+++ sys/netinet/tcp_lro.c
@@ -884,7 +884,7 @@
*/
if ((tcplro_stacks_wanting_mbufq == 0) || (le->m_head->m_flags & M_VLANTAG))
goto skip_lookup;
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
switch (le->eh_type) {
#ifdef INET6
case ETHERTYPE_IPV6:
@@ -903,7 +903,7 @@
break;
#endif
}
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
if (inp && ((inp->inp_flags & (INP_DROPPED|INP_TIMEWAIT)) ||
(inp->inp_flags2 & INP_FREED))) {
/* We don't want this guy */
Index: sys/netinet/tcp_stacks/bbr.c
===================================================================
--- sys/netinet/tcp_stacks/bbr.c
+++ sys/netinet/tcp_stacks/bbr.c
@@ -8618,7 +8618,6 @@
bbr->rc_timer_first = 1;
bbr_timer_cancel(bbr,
__LINE__, bbr->r_ctl.rc_rcvtime);
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
tcp_twstart(tp);
return (1);
@@ -9619,7 +9618,6 @@
struct tcpcb *tp, int32_t * tlen, struct tcphdr *th, struct socket *so)
{
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
if (bbr->rc_allow_data_af_clo == 0) {
close_now:
tp = tcp_close(tp);
@@ -9861,7 +9859,6 @@
return (ret_val);
}
if (ourfinisacked) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp);
m_freem(m);
return (1);
@@ -9974,7 +9971,6 @@
return (ret_val);
}
if (ourfinisacked) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tp = tcp_close(tp);
ctf_do_drop(m, tp);
return (1);
Index: sys/netinet/tcp_stacks/rack.c
===================================================================
--- sys/netinet/tcp_stacks/rack.c
+++ sys/netinet/tcp_stacks/rack.c
@@ -5875,7 +5875,6 @@
case TCPS_FIN_WAIT_2:
rack_timer_cancel(tp, rack,
rack->r_ctl.rc_rcvtime, __LINE__);
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp);
return (1);
}
@@ -6353,7 +6352,6 @@
tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
tcp_state_change(tp, TCPS_SYN_RECEIVED);
}
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
/*
* Advance th->th_seq to correspond to first data byte. If data,
@@ -6847,7 +6845,6 @@
{
struct tcp_rack *rack;
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
rack = (struct tcp_rack *)tp->t_fb_ptr;
if (rack->rc_allow_data_af_clo == 0) {
close_now:
@@ -7079,7 +7076,6 @@
return (ret_val);
}
if (ourfinisacked) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp);
m_freem(m);
return (1);
@@ -7187,7 +7183,6 @@
return (ret_val);
}
if (ourfinisacked) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tp = tcp_close(tp);
ctf_do_drop(m, tp);
return (1);
@@ -7650,16 +7645,8 @@
kern_prefetch(rack, &prev_state);
prev_state = 0;
thflags = th->th_flags;
- /*
- * If this is either a state-changing packet or current state isn't
- * established, we require a read lock on tcbinfo. Otherwise, we
- * allow the tcbinfo to be in either locked or unlocked, as the
- * caller may have unnecessarily acquired a lock due to a race.
- */
- if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
- tp->t_state != TCPS_ESTABLISHED) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- }
+
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(tp->t_inpcb);
KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
__func__));
Index: sys/netinet/tcp_stacks/rack_bbr_common.c
===================================================================
--- sys/netinet/tcp_stacks/rack_bbr_common.c
+++ sys/netinet/tcp_stacks/rack_bbr_common.c
@@ -253,7 +253,6 @@
*/
struct mbuf *m_save;
struct ether_header *eh;
- struct epoch_tracker et;
struct tcphdr *th;
#ifdef INET6
struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */
@@ -268,14 +267,8 @@
uint16_t drop_hdrlen;
uint8_t iptos, no_vn=0, bpf_req=0;
- /*
- * This is a bit deceptive, we get the
- * "info epoch" which is really the network
- * epoch. This covers us on both any INP
- * type change but also if the ifp goes
- * away it covers us as well.
- */
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ASSERT();
+
if (m && m->m_pkthdr.rcvif)
ifp = m->m_pkthdr.rcvif;
else
@@ -445,7 +438,6 @@
}
if (no_vn == 0)
CURVNET_RESTORE();
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return(retval);
}
skipped_pkt:
@@ -453,7 +445,6 @@
}
if (no_vn == 0)
CURVNET_RESTORE();
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return(retval);
}
@@ -680,7 +671,6 @@
SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
(tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
KASSERT(tp->t_state != TCPS_SYN_SENT,
("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
__func__, th, tp));
@@ -732,7 +722,8 @@
void
ctf_challenge_ack(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * ret_val)
{
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+
+ NET_EPOCH_ASSERT();
TCPSTAT_INC(tcps_badsyn);
if (V_tcp_insecure_syn &&
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -1941,7 +1941,7 @@
tp = (struct tcpcb *)ptp;
CURVNET_SET(tp->t_vnet);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
inp = tp->t_inpcb;
KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL",
__func__, tp));
@@ -1961,13 +1961,13 @@
tp->t_inpcb = NULL;
uma_zfree(V_tcpcb_zone, tp);
if (in_pcbrele_wlocked(inp)) {
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
return;
}
}
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
}
@@ -2127,17 +2127,17 @@
static int
tcp_pcblist(SYSCTL_HANDLER_ARGS)
{
- int error, i, m, n, pcb_count;
- struct inpcb *inp, **inp_list;
- inp_gen_t gencnt;
- struct xinpgen xig;
struct epoch_tracker et;
+ struct inpcb *inp;
+ struct xinpgen xig;
+ int error;
+
+ if (req->newptr != NULL)
+ return (EPERM);
- /*
- * The process of preparing the TCB list is too time-consuming and
- * resource-intensive to repeat twice on every request.
- */
if (req->oldptr == NULL) {
+ int n;
+
n = V_tcbinfo.ipi_count +
counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
n += imax(n / 8, 10);
@@ -2145,44 +2145,29 @@
return (0);
}
- if (req->newptr != NULL)
- return (EPERM);
-
- /*
- * OK, now we're committed to doing something.
- */
- INP_LIST_RLOCK(&V_tcbinfo);
- gencnt = V_tcbinfo.ipi_gencnt;
- n = V_tcbinfo.ipi_count;
- INP_LIST_RUNLOCK(&V_tcbinfo);
-
- m = counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
-
- error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
- + (n + m) * sizeof(struct xtcpcb));
- if (error != 0)
+ if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
return (error);
bzero(&xig, sizeof(xig));
xig.xig_len = sizeof xig;
- xig.xig_count = n + m;
- xig.xig_gen = gencnt;
+ xig.xig_count = V_tcbinfo.ipi_count +
+ counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
+ xig.xig_gen = V_tcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return (error);
- error = syncache_pcblist(req, m, &pcb_count);
+ error = syncache_pcblist(req);
if (error)
return (error);
- inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
-
- INP_INFO_WLOCK(&V_tcbinfo);
- for (inp = CK_LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0;
- inp != NULL && i < n; inp = CK_LIST_NEXT(inp, inp_list)) {
- INP_WLOCK(inp);
- if (inp->inp_gencnt <= gencnt) {
+ NET_EPOCH_ENTER(et);
+ for (inp = CK_LIST_FIRST(V_tcbinfo.ipi_listhead);
+ inp != NULL;
+ inp = CK_LIST_NEXT(inp, inp_list)) {
+ INP_RLOCK(inp);
+ if (inp->inp_gencnt <= xig.xig_gen) {
/*
* XXX: This use of cr_cansee(), introduced with
* TCP state changes, is not quite right, but for
@@ -2197,36 +2182,18 @@
} else
error = cr_canseeinpcb(req->td->td_ucred, inp);
if (error == 0) {
- in_pcbref(inp);
- inp_list[i++] = inp;
- }
- }
- INP_WUNLOCK(inp);
- }
- INP_INFO_WUNLOCK(&V_tcbinfo);
- n = i;
-
- error = 0;
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
- INP_RLOCK(inp);
- if (inp->inp_gencnt <= gencnt) {
- struct xtcpcb xt;
+ struct xtcpcb xt;
- tcp_inptoxtp(inp, &xt);
- INP_RUNLOCK(inp);
- error = SYSCTL_OUT(req, &xt, sizeof xt);
+ tcp_inptoxtp(inp, &xt);
+ INP_RUNLOCK(inp);
+ error = SYSCTL_OUT(req, &xt, sizeof xt);
+ if (error)
+ break;
+ }
} else
INP_RUNLOCK(inp);
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
- INP_RLOCK(inp);
- if (!in_pcbrele_rlocked(inp))
- INP_RUNLOCK(inp);
- }
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
if (!error) {
/*
@@ -2236,14 +2203,13 @@
* while we were processing this request, and it
* might be necessary to retry.
*/
- INP_LIST_RLOCK(&V_tcbinfo);
xig.xig_gen = V_tcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
- xig.xig_count = V_tcbinfo.ipi_count + pcb_count;
- INP_LIST_RUNLOCK(&V_tcbinfo);
+ xig.xig_count = V_tcbinfo.ipi_count +
+ counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
- free(inp_list, M_TEMP);
+
return (error);
}
@@ -2257,6 +2223,7 @@
{
struct xucred xuc;
struct sockaddr_in addrs[2];
+ struct epoch_tracker et;
struct inpcb *inp;
int error;
@@ -2266,8 +2233,10 @@
error = SYSCTL_IN(req, addrs, sizeof(addrs));
if (error)
return (error);
+ NET_EPOCH_ENTER(et);
inp = in_pcblookup(&V_tcbinfo, addrs[1].sin_addr, addrs[1].sin_port,
addrs[0].sin_addr, addrs[0].sin_port, INPLOOKUP_RLOCKPCB, NULL);
+ NET_EPOCH_EXIT(et);
if (inp != NULL) {
if (inp->inp_socket == NULL)
error = ENOENT;
@@ -2292,6 +2261,7 @@
static int
tcp6_getcred(SYSCTL_HANDLER_ARGS)
{
+ struct epoch_tracker et;
struct xucred xuc;
struct sockaddr_in6 addrs[2];
struct inpcb *inp;
@@ -2319,6 +2289,7 @@
return (EINVAL);
}
+ NET_EPOCH_ENTER(et);
#ifdef INET
if (mapped == 1)
inp = in_pcblookup(&V_tcbinfo,
@@ -2332,6 +2303,7 @@
&addrs[1].sin6_addr, addrs[1].sin6_port,
&addrs[0].sin6_addr, addrs[0].sin6_port,
INPLOOKUP_RLOCKPCB, NULL);
+ NET_EPOCH_EXIT(et);
if (inp != NULL) {
if (inp->inp_socket == NULL)
error = ENOENT;
@@ -2365,7 +2337,6 @@
struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
struct icmp *icp;
struct in_conninfo inc;
- struct epoch_tracker et;
tcp_seq icmp_tcp_seq;
int mtu;
@@ -2397,7 +2368,6 @@
icp = (struct icmp *)((caddr_t)ip - offsetof(struct icmp, icmp_ip));
th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src,
th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
@@ -2462,7 +2432,6 @@
out:
if (inp != NULL)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
}
#endif /* INET */
@@ -2480,7 +2449,6 @@
struct ip6ctlparam *ip6cp = NULL;
const struct sockaddr_in6 *sa6_src = NULL;
struct in_conninfo inc;
- struct epoch_tracker et;
struct tcp_ports {
uint16_t th_sport;
uint16_t th_dport;
@@ -2542,7 +2510,6 @@
}
bzero(&t_ports, sizeof(struct tcp_ports));
m_copydata(m, off, sizeof(struct tcp_ports), (caddr_t)&t_ports);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_dst, t_ports.th_dport,
&ip6->ip6_src, t_ports.th_sport, INPLOOKUP_WLOCKPCB, NULL);
if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
@@ -2614,7 +2581,6 @@
out:
if (inp != NULL)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
}
#endif /* INET6 */
@@ -2770,7 +2736,7 @@
{
struct tcpcb *tp;
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(inp);
if ((inp->inp_flags & INP_TIMEWAIT) ||
@@ -3042,7 +3008,7 @@
default:
return (EINVAL);
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
switch (addrs[0].ss_family) {
#ifdef INET6
case AF_INET6:
@@ -3081,7 +3047,7 @@
INP_WUNLOCK(inp);
} else
error = ESRCH;
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -3157,7 +3123,7 @@
default:
return (EINVAL);
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
switch (addrs[0].ss_family) {
#ifdef INET6
case AF_INET6:
@@ -3173,7 +3139,7 @@
break;
#endif
}
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
if (inp != NULL) {
if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) != 0 ||
inp->inp_socket == NULL) {
Index: sys/netinet/tcp_syncache.h
===================================================================
--- sys/netinet/tcp_syncache.h
+++ sys/netinet/tcp_syncache.h
@@ -48,7 +48,7 @@
void *, void *);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *, struct mbuf *);
void syncache_badack(struct in_conninfo *);
-int syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported);
+int syncache_pcblist(struct sysctl_req *);
struct syncache {
TAILQ_ENTRY(syncache) sc_hash;
Index: sys/netinet/tcp_syncache.c
===================================================================
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -771,7 +771,7 @@
int error;
char *s;
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
/*
* Ok, create the full blown connection, and set things up
@@ -1091,11 +1091,7 @@
char *s;
bool locked;
- /*
- * Global TCP locks are held because we manipulate the PCB lists
- * and create a new socket.
- */
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK,
("%s: can handle only ACK", __func__));
@@ -1331,11 +1327,7 @@
struct tcpcb *tp;
unsigned int *pending_counter;
- /*
- * Global TCP locks are held because we manipulate the PCB lists
- * and create a new socket.
- */
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending;
*lsop = syncache_socket(sc, *lsop, m);
@@ -2460,46 +2452,41 @@
* amount of space the caller allocated for this function to use.
*/
int
-syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported)
+syncache_pcblist(struct sysctl_req *req)
{
struct xtcpcb xt;
struct syncache *sc;
struct syncache_head *sch;
- int count, error, i;
+ int error, i;
+
+ bzero(&xt, sizeof(xt));
+ xt.xt_len = sizeof(xt);
+ xt.t_state = TCPS_SYN_RECEIVED;
+ xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
+ xt.xt_inp.xi_socket.xso_len = sizeof (struct xsocket);
+ xt.xt_inp.xi_socket.so_type = SOCK_STREAM;
+ xt.xt_inp.xi_socket.so_state = SS_ISCONNECTING;
- for (count = 0, error = 0, i = 0; i < V_tcp_syncache.hashsize; i++) {
+ for (i = 0; i < V_tcp_syncache.hashsize; i++) {
sch = &V_tcp_syncache.hashbase[i];
SCH_LOCK(sch);
TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
- if (count >= max_pcbs) {
- SCH_UNLOCK(sch);
- goto exit;
- }
if (cr_cansee(req->td->td_ucred, sc->sc_cred) != 0)
continue;
- bzero(&xt, sizeof(xt));
- xt.xt_len = sizeof(xt);
if (sc->sc_inc.inc_flags & INC_ISIPV6)
xt.xt_inp.inp_vflag = INP_IPV6;
else
xt.xt_inp.inp_vflag = INP_IPV4;
bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc,
sizeof (struct in_conninfo));
- xt.t_state = TCPS_SYN_RECEIVED;
- xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
- xt.xt_inp.xi_socket.xso_len = sizeof (struct xsocket);
- xt.xt_inp.xi_socket.so_type = SOCK_STREAM;
- xt.xt_inp.xi_socket.so_state = SS_ISCONNECTING;
error = SYSCTL_OUT(req, &xt, sizeof xt);
if (error) {
SCH_UNLOCK(sch);
- goto exit;
+ return (0);
}
- count++;
}
SCH_UNLOCK(sch);
}
-exit:
- *pcbs_exported = count;
- return error;
+
+ return (0);
}
Index: sys/netinet/tcp_timer.c
===================================================================
--- sys/netinet/tcp_timer.c
+++ sys/netinet/tcp_timer.c
@@ -339,9 +339,9 @@
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
tp = tcp_close(tp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
} else {
@@ -353,9 +353,9 @@
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
tp = tcp_close(tp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
@@ -478,7 +478,7 @@
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
tp = tcp_drop(tp, ETIMEDOUT);
#ifdef TCPDEBUG
@@ -487,7 +487,7 @@
PRU_SLOWTIMO);
#endif
TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
tcp_inpinfo_lock_del(inp, tp);
out:
CURVNET_RESTORE();
@@ -542,9 +542,9 @@
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
tp = tcp_drop(tp, ETIMEDOUT);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
@@ -559,9 +559,9 @@
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
tp = tcp_drop(tp, ETIMEDOUT);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
@@ -628,9 +628,9 @@
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
tp = tcp_drop(tp, ETIMEDOUT);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
Index: sys/netinet/tcp_timewait.c
===================================================================
--- sys/netinet/tcp_timewait.c
+++ sys/netinet/tcp_timewait.c
@@ -209,10 +209,10 @@
struct tcptw *tw;
struct epoch_tracker et;
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
while ((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL)
tcp_twclose(tw, 0);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
TW_LOCK_DESTROY(V_tw_lock);
uma_zdestroy(V_tcptw_zone);
@@ -236,7 +236,7 @@
bool isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
#endif
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(inp);
/* A dropped inp should never transition to TIME_WAIT state. */
@@ -382,7 +382,7 @@
int thflags;
tcp_seq seq;
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(inp);
/*
@@ -488,7 +488,7 @@
inp = tw->tw_inpcb;
KASSERT((inp->inp_flags & INP_TIMEWAIT), ("tcp_twclose: !timewait"));
KASSERT(intotw(inp) == tw, ("tcp_twclose: inp_ppcb != tw"));
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* in_pcbfree() */
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(inp);
tcp_tw_2msl_stop(tw, reuse);
@@ -644,7 +644,7 @@
tcp_tw_2msl_reset(struct tcptw *tw, int rearm)
{
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(tw->tw_inpcb);
TW_WLOCK(V_tw_lock);
@@ -662,7 +662,7 @@
struct inpcb *inp;
int released __unused;
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
TW_WLOCK(V_tw_lock);
inp = tw->tw_inpcb;
@@ -689,25 +689,8 @@
{
struct tcptw *tw;
struct inpcb *inp;
- struct epoch_tracker et;
-#ifdef INVARIANTS
- if (reuse) {
- /*
- * Exclusive pcbinfo lock is not required in reuse case even if
- * two inpcb locks can be acquired simultaneously:
- * - the inpcb transitioning to TIME_WAIT state in
- * tcp_tw_start(),
- * - the inpcb closed by tcp_twclose().
- *
- * It is because only inpcbs in FIN_WAIT2 or CLOSING states can
- * transition in TIME_WAIT state. Then a pcbcb cannot be in
- * TIME_WAIT list and transitioning to TIME_WAIT state at same
- * time.
- */
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- }
-#endif
+ NET_EPOCH_ASSERT();
for (;;) {
TW_RLOCK(V_tw_lock);
@@ -723,12 +706,10 @@
in_pcbref(inp);
TW_RUNLOCK(V_tw_lock);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp);
tw = intotw(inp);
if (in_pcbrele_wlocked(inp)) {
if (__predict_true(tw == NULL)) {
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
continue;
} else {
/* This should not happen as in TIMEWAIT
@@ -747,7 +728,6 @@
"|| inp last reference) && tw != "
"NULL", __func__);
#endif
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
break;
}
}
@@ -755,12 +735,10 @@
if (tw == NULL) {
/* tcp_twclose() has already been called */
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
continue;
}
tcp_twclose(tw, reuse);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
if (reuse)
return tw;
}
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -283,7 +283,7 @@
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
if (!INP_INFO_WLOCKED(&V_tcbinfo)) {
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
rlock = 1;
}
INP_WLOCK(inp);
@@ -291,7 +291,7 @@
("tcp_usr_detach: inp_socket == NULL"));
tcp_detach(so, inp);
if (rlock)
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
}
#ifdef INET
@@ -706,7 +706,7 @@
int error = 0;
TCPDEBUG0;
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
INP_WLOCK(inp);
@@ -723,7 +723,7 @@
TCPDEBUG2(PRU_DISCONNECT);
TCP_PROBE2(debug__user, tp, PRU_DISCONNECT);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -792,7 +792,7 @@
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
error = ECONNABORTED;
@@ -819,7 +819,7 @@
TCPDEBUG2(PRU_ACCEPT);
TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
if (error == 0) {
if (v4)
*nam = in6_v4mapsin6_sockaddr(port, &addr);
@@ -842,7 +842,7 @@
struct epoch_tracker et;
TCPDEBUG0;
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("inp == NULL"));
INP_WLOCK(inp);
@@ -861,7 +861,7 @@
TCPDEBUG2(PRU_SHUTDOWN);
TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -921,10 +921,10 @@
tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
struct sockaddr *nam, struct mbuf *control, struct thread *td)
{
+ struct epoch_tracker et;
int error = 0;
struct inpcb *inp;
struct tcpcb *tp = NULL;
- struct epoch_tracker net_et;
#ifdef INET
#ifdef INET6
struct sockaddr_in sin;
@@ -940,11 +940,11 @@
TCPDEBUG0;
/*
- * We require the pcbinfo lock if we will close the socket as part of
- * this call.
+ * We require the pcbinfo "read lock" if we will close the socket
+ * as part of this call.
*/
if (flags & PRUS_EOF)
- INP_INFO_RLOCK_ET(&V_tcbinfo, net_et);
+ NET_EPOCH_ENTER(et);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
INP_WLOCK(inp);
@@ -1135,7 +1135,7 @@
* Close the send side of the connection after
* the data is sent.
*/
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
socantsendmore(so);
tcp_usrclosed(tp);
}
@@ -1231,7 +1231,7 @@
((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
INP_WUNLOCK(inp);
if (flags & PRUS_EOF)
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, net_et);
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -1275,7 +1275,7 @@
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL,
("tcp_usr_abort: inp_socket == NULL"));
@@ -1301,7 +1301,7 @@
}
INP_WUNLOCK(inp);
dropped:
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
}
/*
@@ -1318,7 +1318,7 @@
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL,
("tcp_usr_close: inp_socket == NULL"));
@@ -1342,7 +1342,7 @@
inp->inp_flags |= INP_SOCKREF;
}
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
}
/*
@@ -2317,10 +2317,10 @@
}
so->so_rcv.sb_flags |= SB_AUTOSIZE;
so->so_snd.sb_flags |= SB_AUTOSIZE;
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
error = in_pcballoc(so, &V_tcbinfo);
if (error) {
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
return (error);
}
inp = sotoinpcb(so);
@@ -2338,12 +2338,12 @@
if (tp == NULL) {
in_pcbdetach(inp);
in_pcbfree(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
return (ENOBUFS);
}
tp->t_state = TCPS_CLOSED;
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
TCPSTATES_INC(TCPS_CLOSED);
return (0);
}
@@ -2362,7 +2362,7 @@
struct inpcb *inp = tp->t_inpcb;
struct socket *so = inp->inp_socket;
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(inp);
/*
@@ -2401,7 +2401,7 @@
tcp_usrclosed(struct tcpcb *tp)
{
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(tp->t_inpcb);
switch (tp->t_state) {
Index: sys/netinet/toecore.c
===================================================================
--- sys/netinet/toecore.c
+++ sys/netinet/toecore.c
@@ -360,7 +360,7 @@
struct tcphdr *th, struct socket **lsop)
{
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
return (syncache_expand(inc, to, th, lsop, NULL));
}
@@ -390,8 +390,6 @@
INP_WLOCK_ASSERT(inp);
if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) {
-
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* for twcheck */
if (!tcp_twcheck(inp, NULL, th, NULL, 0))
return (EADDRINUSE);
} else {
@@ -529,7 +527,7 @@
(void) tp->t_fb->tfb_tcp_output(tp);
} else {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
tp = tcp_drop(tp, err);
if (tp == NULL)
INP_WLOCK(inp); /* re-acquire */
Index: sys/netinet/udp_usrreq.c
===================================================================
--- sys/netinet/udp_usrreq.c
+++ sys/netinet/udp_usrreq.c
@@ -399,7 +399,6 @@
struct sockaddr_in udp_in[2];
struct mbuf *m;
struct m_tag *fwd_tag;
- struct epoch_tracker et;
int cscov_partial, iphlen;
m = *mp;
@@ -529,7 +528,8 @@
struct inpcb *last;
struct inpcbhead *pcblist;
- INP_INFO_RLOCK_ET(pcbinfo, et);
+ NET_EPOCH_ASSERT();
+
pcblist = udp_get_pcblist(proto);
last = NULL;
CK_LIST_FOREACH(inp, pcblist, inp_list) {
@@ -636,7 +636,6 @@
UDPSTAT_INC(udps_noportbcast);
if (inp)
INP_RUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(pcbinfo, et);
goto badunlocked;
}
if (proto == IPPROTO_UDPLITE)
@@ -646,7 +645,6 @@
if (udp_append(last, ip, m, iphlen, udp_in) == 0)
INP_RUNLOCK(last);
inp_lost:
- INP_INFO_RUNLOCK_ET(pcbinfo, et);
return (IPPROTO_DONE);
}
@@ -854,87 +852,53 @@
static int
udp_pcblist(SYSCTL_HANDLER_ARGS)
{
- int error, i, n;
- struct inpcb *inp, **inp_list;
- inp_gen_t gencnt;
struct xinpgen xig;
struct epoch_tracker et;
+ struct inpcb *inp;
+ int error;
+
+ if (req->newptr != 0)
+ return (EPERM);
- /*
- * The process of preparing the PCB list is too time-consuming and
- * resource-intensive to repeat twice on every request.
- */
if (req->oldptr == 0) {
+ int n;
+
n = V_udbinfo.ipi_count;
n += imax(n / 8, 10);
req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
return (0);
}
- if (req->newptr != 0)
- return (EPERM);
-
- /*
- * OK, now we're committed to doing something.
- */
- INP_INFO_RLOCK_ET(&V_udbinfo, et);
- gencnt = V_udbinfo.ipi_gencnt;
- n = V_udbinfo.ipi_count;
- INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
-
- error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
- + n * sizeof(struct xinpcb));
- if (error != 0)
+ if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
return (error);
bzero(&xig, sizeof(xig));
xig.xig_len = sizeof xig;
- xig.xig_count = n;
- xig.xig_gen = gencnt;
+ xig.xig_count = V_udbinfo.ipi_count;
+ xig.xig_gen = V_udbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return (error);
- inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
- if (inp_list == NULL)
- return (ENOMEM);
-
- INP_INFO_RLOCK_ET(&V_udbinfo, et);
- for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n;
- inp = CK_LIST_NEXT(inp, inp_list)) {
- INP_WLOCK(inp);
- if (inp->inp_gencnt <= gencnt &&
- cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
- in_pcbref(inp);
- inp_list[i++] = inp;
- }
- INP_WUNLOCK(inp);
- }
- INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
- n = i;
-
- error = 0;
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
+ NET_EPOCH_ENTER(et);
+ for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead);
+ inp != NULL;
+ inp = CK_LIST_NEXT(inp, inp_list)) {
INP_RLOCK(inp);
- if (inp->inp_gencnt <= gencnt) {
+ if (inp->inp_gencnt <= xig.xig_gen &&
+ cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
struct xinpcb xi;
in_pcbtoxinpcb(inp, &xi);
INP_RUNLOCK(inp);
error = SYSCTL_OUT(req, &xi, sizeof xi);
+ if (error)
+ break;
} else
INP_RUNLOCK(inp);
}
- INP_INFO_WLOCK(&V_udbinfo);
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
- INP_RLOCK(inp);
- if (!in_pcbrele_rlocked(inp))
- INP_RUNLOCK(inp);
- }
- INP_INFO_WUNLOCK(&V_udbinfo);
+ NET_EPOCH_EXIT(et);
if (!error) {
/*
@@ -943,14 +907,12 @@
* that something happened while we were processing this
* request, and it might be necessary to retry.
*/
- INP_INFO_RLOCK_ET(&V_udbinfo, et);
xig.xig_gen = V_udbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_udbinfo.ipi_count;
- INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
- free(inp_list, M_TEMP);
+
return (error);
}
@@ -964,6 +926,7 @@
{
struct xucred xuc;
struct sockaddr_in addrs[2];
+ struct epoch_tracker et;
struct inpcb *inp;
int error;
@@ -973,9 +936,11 @@
error = SYSCTL_IN(req, addrs, sizeof(addrs));
if (error)
return (error);
+ NET_EPOCH_ENTER(et);
inp = in_pcblookup(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
addrs[0].sin_addr, addrs[0].sin_port,
INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
+ NET_EPOCH_EXIT(et);
if (inp != NULL) {
INP_RLOCK_ASSERT(inp);
if (inp->inp_socket == NULL)
@@ -1116,9 +1081,6 @@
}
#ifdef INET
-#define UH_WLOCKED 2
-#define UH_RLOCKED 1
-#define UH_UNLOCKED 0
static int
udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
struct mbuf *control, struct thread *td)
@@ -1134,19 +1096,12 @@
int error = 0;
int ipflags;
u_short fport, lport;
- int unlock_udbinfo, unlock_inp;
u_char tos;
uint8_t pr;
uint16_t cscov = 0;
uint32_t flowid = 0;
uint8_t flowtype = M_HASHTYPE_NONE;
- /*
- * udp_output() may need to temporarily bind or connect the current
- * inpcb. As such, we don't know up front whether we will need the
- * pcbinfo lock or not. Do any work to decide what is needed up
- * front before acquiring any locks.
- */
if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
if (control)
m_freem(control);
@@ -1156,28 +1111,22 @@
src.sin_family = 0;
sin = (struct sockaddr_in *)addr;
-retry:
+
+ /*
+ * udp_output() may need to temporarily bind or connect the current
+ * inpcb. As such, we don't know up front whether we will need the
+ * pcbinfo lock or not. Do any work to decide what is needed up
+ * front before acquiring any locks.
+ *
+ * We will need network epoch in either case, to safely lookup into
+ * pcb hash.
+ */
if (sin == NULL ||
- (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
+ (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0))
INP_WLOCK(inp);
- /*
- * In case we lost a race and another thread bound addr/port
- * on the inp we cannot keep the wlock (which still would be
- * fine) as further down, based on these values we make
- * decisions for the pcbinfo lock. If the locks are not in
- * synch the assertions on unlock will fire, hence we go for
- * one retry loop.
- */
- if (sin != NULL && (inp->inp_laddr.s_addr != INADDR_ANY ||
- inp->inp_lport != 0)) {
- INP_WUNLOCK(inp);
- goto retry;
- }
- unlock_inp = UH_WLOCKED;
- } else {
+ else
INP_RLOCK(inp);
- unlock_inp = UH_RLOCKED;
- }
+ NET_EPOCH_ENTER(et);
tos = inp->inp_ip_tos;
if (control != NULL) {
/*
@@ -1185,13 +1134,9 @@
* stored in a single mbuf.
*/
if (control->m_next) {
- if (unlock_inp == UH_WLOCKED)
- INP_WUNLOCK(inp);
- else
- INP_RUNLOCK(inp);
m_freem(control);
- m_freem(m);
- return (EINVAL);
+ error = EINVAL;
+ goto release;
}
for (; control->m_len > 0;
control->m_data += CMSG_ALIGN(cm->cmsg_len),
@@ -1262,56 +1207,11 @@
}
m_freem(control);
}
- if (error) {
- if (unlock_inp == UH_WLOCKED)
- INP_WUNLOCK(inp);
- else
- INP_RUNLOCK(inp);
- m_freem(m);
- return (error);
- }
+ if (error)
+ goto release;
- /*
- * In the old days, depending on whether or not the application had
- * bound or connected the socket, we had to do varying levels of work.
- * The optimal case was for a connected UDP socket, as a global lock
- * wasn't required at all.
- * In order to decide which we need, we required stability of the
- * inpcb binding, which we ensured by acquiring a read lock on the
- * inpcb. This didn't strictly follow the lock order, so we played
- * the trylock and retry game.
- * With the re-introduction of the route-cache in some cases, we started
- * to acquire an early inp wlock and a possible race during re-lock
- * went away. With the introduction of epoch(9) some read locking
- * became epoch(9) and the lock-order issues also went away.
- * Due to route-cache we may now hold more conservative locks than
- * otherwise required and have split up the 2nd case in case 2 and 3
- * in order to keep the udpinfo lock level in sync with the inp one
- * for the IP_SENDSRCADDR case below.
- */
pr = inp->inp_socket->so_proto->pr_protocol;
pcbinfo = udp_get_inpcbinfo(pr);
- if (sin != NULL &&
- (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
- INP_HASH_WLOCK(pcbinfo);
- unlock_udbinfo = UH_WLOCKED;
- } else if (sin != NULL &&
- (sin->sin_addr.s_addr == INADDR_ANY ||
- sin->sin_addr.s_addr == INADDR_BROADCAST ||
- inp->inp_laddr.s_addr == INADDR_ANY ||
- inp->inp_lport == 0)) {
- INP_HASH_RLOCK_ET(pcbinfo, et);
- unlock_udbinfo = UH_RLOCKED;
- } else if (src.sin_family == AF_INET) {
- if (unlock_inp == UH_WLOCKED) {
- INP_HASH_WLOCK(pcbinfo);
- unlock_udbinfo = UH_WLOCKED;
- } else {
- INP_HASH_RLOCK_ET(pcbinfo, et);
- unlock_udbinfo = UH_RLOCKED;
- }
- } else
- unlock_udbinfo = UH_UNLOCKED;
/*
* If the IP_SENDSRCADDR control message was specified, override the
@@ -1387,7 +1287,6 @@
if (inp->inp_laddr.s_addr == INADDR_ANY &&
inp->inp_lport == 0) {
INP_WLOCK_ASSERT(inp);
- INP_HASH_WLOCK_ASSERT(pcbinfo);
/*
* Remember addr if jailed, to prevent
* rebinding.
@@ -1395,7 +1294,10 @@
if (prison_flag(td->td_ucred, PR_IP4))
inp->inp_laddr = laddr;
inp->inp_lport = lport;
- if (in_pcbinshash(inp) != 0) {
+ INP_HASH_WLOCK(pcbinfo);
+ error = in_pcbinshash(inp);
+ INP_HASH_WUNLOCK(pcbinfo);
+ if (error != 0) {
inp->inp_lport = 0;
error = EAGAIN;
goto release;
@@ -1560,48 +1462,20 @@
ipflags |= IP_NODEFAULTFLOWID;
#endif /* RSS */
- if (unlock_udbinfo == UH_WLOCKED)
- INP_HASH_WUNLOCK(pcbinfo);
- else if (unlock_udbinfo == UH_RLOCKED)
- INP_HASH_RUNLOCK_ET(pcbinfo, et);
if (pr == IPPROTO_UDPLITE)
UDPLITE_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
else
UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
error = ip_output(m, inp->inp_options,
- (unlock_inp == UH_WLOCKED ? &inp->inp_route : NULL), ipflags,
+ INP_WLOCKED(inp) ? &inp->inp_route : NULL, ipflags,
inp->inp_moptions, inp);
- if (unlock_inp == UH_WLOCKED)
- INP_WUNLOCK(inp);
- else
- INP_RUNLOCK(inp);
+ INP_UNLOCK(inp);
+ NET_EPOCH_EXIT(et);
return (error);
release:
- if (unlock_udbinfo == UH_WLOCKED) {
- KASSERT(unlock_inp == UH_WLOCKED,
- ("%s: excl udbinfo lock %#03x, shared inp lock %#03x, "
- "sin %p daddr %#010x inp %p laddr %#010x lport %#06x "
- "src fam %#04x",
- __func__, unlock_udbinfo, unlock_inp, sin,
- (sin != NULL) ? sin->sin_addr.s_addr : 0xfefefefe, inp,
- inp->inp_laddr.s_addr, inp->inp_lport, src.sin_family));
- INP_HASH_WUNLOCK(pcbinfo);
- INP_WUNLOCK(inp);
- } else if (unlock_udbinfo == UH_RLOCKED) {
- KASSERT(unlock_inp == UH_RLOCKED,
- ("%s: shared udbinfo lock %#03x, excl inp lock %#03x, "
- "sin %p daddr %#010x inp %p laddr %#010x lport %#06x "
- "src fam %#04x",
- __func__, unlock_udbinfo, unlock_inp, sin,
- (sin != NULL) ? sin->sin_addr.s_addr : 0xfefefefe, inp,
- inp->inp_laddr.s_addr, inp->inp_lport, src.sin_family));
- INP_HASH_RUNLOCK_ET(pcbinfo, et);
- INP_RUNLOCK(inp);
- } else if (unlock_inp == UH_WLOCKED)
- INP_WUNLOCK(inp);
- else
- INP_RUNLOCK(inp);
+ INP_UNLOCK(inp);
+ NET_EPOCH_EXIT(et);
m_freem(m);
return (error);
}
Index: sys/netinet6/icmp6.c
===================================================================
--- sys/netinet6/icmp6.c
+++ sys/netinet6/icmp6.c
@@ -1893,9 +1893,10 @@
struct inpcb *last = NULL;
struct sockaddr_in6 fromsa;
struct icmp6_hdr *icmp6;
- struct epoch_tracker et;
struct mbuf *opts = NULL;
+ NET_EPOCH_ASSERT();
+
#ifndef PULLDOWN_TEST
/* this is assumed to be safe. */
icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
@@ -1920,7 +1921,6 @@
return (IPPROTO_DONE);
}
- INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
CK_LIST_FOREACH(inp, &V_ripcb, inp_list) {
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
@@ -2002,7 +2002,6 @@
}
last = inp;
}
- INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
if (last != NULL) {
if (last->inp_flags & INP_CONTROLOPTS)
ip6_savecontrol(last, m, &opts);
Index: sys/netinet6/in6_pcb.c
===================================================================
--- sys/netinet6/in6_pcb.c
+++ sys/netinet6/in6_pcb.c
@@ -1245,7 +1245,6 @@
{
struct inpcb *inp;
- INP_HASH_RLOCK(pcbinfo);
inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
(lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp);
if (inp != NULL) {
@@ -1272,7 +1271,6 @@
}
#endif
}
- INP_HASH_RUNLOCK(pcbinfo);
return (inp);
}
Index: sys/netinet6/raw_ip6.c
===================================================================
--- sys/netinet6/raw_ip6.c
+++ sys/netinet6/raw_ip6.c
@@ -165,7 +165,8 @@
struct inpcb *last = NULL;
struct mbuf *opts = NULL;
struct sockaddr_in6 fromsa;
- struct epoch_tracker et;
+
+ NET_EPOCH_ASSERT();
RIP6STAT_INC(rip6s_ipackets);
@@ -173,7 +174,6 @@
ifp = m->m_pkthdr.rcvif;
- INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
CK_LIST_FOREACH(inp, &V_ripcb, inp_list) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
@@ -303,7 +303,6 @@
skip_2:
INP_RUNLOCK(inp);
}
- INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
/*
* Check AH/ESP integrity.
Index: sys/netinet6/udp6_usrreq.c
===================================================================
--- sys/netinet6/udp6_usrreq.c
+++ sys/netinet6/udp6_usrreq.c
@@ -214,12 +214,13 @@
int off = *offp;
int cscov_partial;
int plen, ulen;
- struct epoch_tracker et;
struct sockaddr_in6 fromsa[2];
struct m_tag *fwd_tag;
uint16_t uh_sum;
uint8_t nxt;
+ NET_EPOCH_ASSERT();
+
ifp = m->m_pkthdr.rcvif;
#ifndef PULLDOWN_TEST
@@ -301,7 +302,6 @@
struct inpcbhead *pcblist;
struct ip6_moptions *imo;
- INP_INFO_RLOCK_ET(pcbinfo, et);
/*
* In the event that laddr should be set to the link-local
* address (this happens in RIPng), the multicast address
@@ -395,7 +395,7 @@
UDP_PROBE(receive, NULL, last,
ip6, last, uh);
if (udp6_append(last, n, off, fromsa))
- goto inp_lost;
+ return (IPPROTO_DONE);
}
INP_RUNLOCK(last);
}
@@ -422,7 +422,7 @@
*/
UDPSTAT_INC(udps_noport);
UDPSTAT_INC(udps_noportmcast);
- goto badheadlocked;
+ goto badunlocked;
}
INP_RLOCK(last);
if (__predict_true(last->inp_flags2 & INP_FREED) == 0) {
@@ -434,8 +434,6 @@
INP_RUNLOCK(last);
} else
INP_RUNLOCK(last);
- inp_lost:
- INP_INFO_RUNLOCK_ET(pcbinfo, et);
return (IPPROTO_DONE);
}
/*
@@ -522,8 +520,6 @@
INP_RUNLOCK(inp);
return (IPPROTO_DONE);
-badheadlocked:
- INP_INFO_RUNLOCK_ET(pcbinfo, et);
badunlocked:
if (m)
m_freem(m);
@@ -638,6 +634,7 @@
{
struct xucred xuc;
struct sockaddr_in6 addrs[2];
+ struct epoch_tracker et;
struct inpcb *inp;
int error;
@@ -656,9 +653,11 @@
(error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) {
return (error);
}
+ NET_EPOCH_ENTER(et);
inp = in6_pcblookup(&V_udbinfo, &addrs[1].sin6_addr,
addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port,
INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
+ NET_EPOCH_EXIT(et);
if (inp != NULL) {
INP_RLOCK_ASSERT(inp);
if (inp->inp_socket == NULL)
@@ -679,14 +678,10 @@
SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0,
0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection");
-#define UH_WLOCKED 2
-#define UH_RLOCKED 1
-#define UH_UNLOCKED 0
static int
udp6_output(struct socket *so, int flags_arg, struct mbuf *m,
struct sockaddr *addr6, struct mbuf *control, struct thread *td)
{
- struct inpcbinfo *pcbinfo;
struct inpcb *inp;
struct ip6_hdr *ip6;
struct udphdr *udp6;
@@ -698,7 +693,7 @@
u_int32_t ulen, plen;
uint16_t cscov;
u_short fport;
- uint8_t nxt, unlock_inp, unlock_udbinfo;
+ uint8_t nxt;
/* addr6 has been validated in udp6_send(). */
sin6 = (struct sockaddr_in6 *)addr6;
@@ -741,30 +736,17 @@
* - on connected sockets (sin6 is NULL) for route cache updates,
* - when we are not bound to an address and source port (it is
* in6_pcbsetport() which will require the write lock).
+ *
+ * We check the inp fields before actually locking the inp, so
+ * here exists a race, and we may WLOCK the inp and end with already
+ * bound one by other thread. This is fine.
*/
-retry:
if (sin6 == NULL || (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
- inp->inp_lport == 0)) {
+ inp->inp_lport == 0))
INP_WLOCK(inp);
- /*
- * In case we lost a race and another thread bound addr/port
- * on the inp we cannot keep the wlock (which still would be
- * fine) as further down, based on these values we make
- * decisions for the pcbinfo lock. If the locks are not in
- * synch the assertions on unlock will fire, hence we go for
- * one retry loop.
- */
- if (sin6 != NULL &&
- (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ||
- inp->inp_lport != 0)) {
- INP_WUNLOCK(inp);
- goto retry;
- }
- unlock_inp = UH_WLOCKED;
- } else {
+ else
INP_RLOCK(inp);
- unlock_inp = UH_RLOCKED;
- }
+
nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
IPPROTO_UDP : IPPROTO_UDPLITE;
@@ -788,10 +770,7 @@
* potential race in which the factors causing us to
* select the UDPv4 output routine are invalidated?
*/
- if (unlock_inp == UH_WLOCKED)
- INP_WUNLOCK(inp);
- else
- INP_RUNLOCK(inp);
+ INP_UNLOCK(inp);
if (sin6)
in6_sin6_2_sin_in_sock((struct sockaddr *)sin6);
pru = inetsw[ip_protox[nxt]].pr_usrreqs;
@@ -806,21 +785,17 @@
* Given this is either an IPv6-only socket or no INET is
* supported we will fail the send if the given destination
* address is a v4mapped address.
+ *
+ * XXXGL: do we leak m and control?
*/
- if (unlock_inp == UH_WLOCKED)
- INP_WUNLOCK(inp);
- else
- INP_RUNLOCK(inp);
+ INP_UNLOCK(inp);
return (EINVAL);
}
if (control) {
if ((error = ip6_setpktopts(control, &opt,
inp->in6p_outputopts, td->td_ucred, nxt)) != 0) {
- if (unlock_inp == UH_WLOCKED)
- INP_WUNLOCK(inp);
- else
- INP_RUNLOCK(inp);
+ INP_UNLOCK(inp);
ip6_clearpktopts(&opt, -1);
if (control)
m_freem(control);
@@ -831,20 +806,7 @@
} else
optp = inp->in6p_outputopts;
- pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
- if (sin6 != NULL &&
- IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && inp->inp_lport == 0) {
- INP_HASH_WLOCK(pcbinfo);
- unlock_udbinfo = UH_WLOCKED;
- } else if (sin6 != NULL &&
- (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
- IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ||
- inp->inp_lport == 0)) {
- INP_HASH_RLOCK_ET(pcbinfo, et);
- unlock_udbinfo = UH_RLOCKED;
- } else
- unlock_udbinfo = UH_UNLOCKED;
-
+ NET_EPOCH_ENTER(et);
if (sin6) {
/*
@@ -880,9 +842,14 @@
laddr = &in6a;
if (inp->inp_lport == 0) {
+ struct inpcbinfo *pcbinfo;
INP_WLOCK_ASSERT(inp);
+
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
+ INP_HASH_WLOCK(pcbinfo);
error = in6_pcbsetport(laddr, inp, td->td_ucred);
+ INP_HASH_WUNLOCK(pcbinfo);
if (error != 0) {
/* Undo an address bind that may have occurred. */
inp->in6p_laddr = in6addr_any;
@@ -1006,21 +973,15 @@
#endif
UDPSTAT_INC(udps_opackets);
- if (unlock_udbinfo == UH_WLOCKED)
- INP_HASH_WUNLOCK(pcbinfo);
- else if (unlock_udbinfo == UH_RLOCKED)
- INP_HASH_RUNLOCK_ET(pcbinfo, et);
if (nxt == IPPROTO_UDPLITE)
UDPLITE_PROBE(send, NULL, inp, ip6, inp, udp6);
else
UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
error = ip6_output(m, optp,
- (unlock_inp == UH_WLOCKED) ? &inp->inp_route6 : NULL, flags,
+ INP_WLOCKED(inp) ? &inp->inp_route6 : NULL, flags,
inp->in6p_moptions, NULL, inp);
- if (unlock_inp == UH_WLOCKED)
- INP_WUNLOCK(inp);
- else
- INP_RUNLOCK(inp);
+ INP_UNLOCK(inp);
+ NET_EPOCH_EXIT(et);
if (control) {
ip6_clearpktopts(&opt, -1);
@@ -1029,22 +990,8 @@
return (error);
release:
- if (unlock_udbinfo == UH_WLOCKED) {
- KASSERT(unlock_inp == UH_WLOCKED, ("%s: excl udbinfo lock, "
- "non-excl inp lock: pcbinfo %p %#x inp %p %#x",
- __func__, pcbinfo, unlock_udbinfo, inp, unlock_inp));
- INP_HASH_WUNLOCK(pcbinfo);
- INP_WUNLOCK(inp);
- } else if (unlock_udbinfo == UH_RLOCKED) {
- KASSERT(unlock_inp == UH_RLOCKED, ("%s: non-excl udbinfo lock, "
- "excl inp lock: pcbinfo %p %#x inp %p %#x",
- __func__, pcbinfo, unlock_udbinfo, inp, unlock_inp));
- INP_HASH_RUNLOCK_ET(pcbinfo, et);
- INP_RUNLOCK(inp);
- } else if (unlock_inp == UH_WLOCKED)
- INP_WUNLOCK(inp);
- else
- INP_RUNLOCK(inp);
+ INP_UNLOCK(inp);
+ NET_EPOCH_EXIT(et);
if (control) {
ip6_clearpktopts(&opt, -1);
m_freem(control);

File Metadata

Mime Type
text/plain
Expires
Fri, Jun 19, 8:10 PM (13 h, 17 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
34096077
Default Alt Text
D22197.id63806.diff (73 KB)

Event Timeline