Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F159854753
D22197.id63806.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
73 KB
Referenced Files
None
Subscribers
None
D22197.id63806.diff
View Options
Index: sys/dev/cxgbe/cxgbei/cxgbei.c
===================================================================
--- sys/dev/cxgbe/cxgbei/cxgbei.c
+++ sys/dev/cxgbe/cxgbei/cxgbei.c
@@ -412,12 +412,12 @@
SOCKBUF_UNLOCK(sb);
INP_WUNLOCK(inp);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET);
if (tp)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
icl_cxgbei_conn_pdu_free(NULL, ip);
#ifdef INVARIANTS
Index: sys/dev/cxgbe/tom/t4_connect.c
===================================================================
--- sys/dev/cxgbe/tom/t4_connect.c
+++ sys/dev/cxgbe/tom/t4_connect.c
@@ -124,12 +124,12 @@
CURVNET_SET(toep->vnet);
if (status != EAGAIN)
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
toe_connect_failed(tod, inp, status);
final_cpl_received(toep); /* unlocks inp */
if (status != EAGAIN)
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
}
Index: sys/dev/cxgbe/tom/t4_cpl_io.c
===================================================================
--- sys/dev/cxgbe/tom/t4_cpl_io.c
+++ sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -1214,7 +1214,7 @@
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
CURVNET_SET(toep->vnet);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
tp = intotcpcb(inp);
@@ -1260,7 +1260,7 @@
case TCPS_FIN_WAIT_2:
tcp_twstart(tp);
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
INP_WLOCK(inp);
@@ -1273,7 +1273,7 @@
}
done:
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
return (0);
}
@@ -1303,7 +1303,7 @@
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
CURVNET_SET(toep->vnet);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
tp = intotcpcb(inp);
@@ -1321,7 +1321,7 @@
tcp_twstart(tp);
release:
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
INP_WLOCK(inp);
@@ -1346,7 +1346,7 @@
}
done:
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
return (0);
}
@@ -1423,7 +1423,7 @@
inp = toep->inp;
CURVNET_SET(toep->vnet);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et); /* for tcp_close */
+ NET_EPOCH_ENTER(et); /* for tcp_close */
INP_WLOCK(inp);
tp = intotcpcb(inp);
@@ -1457,7 +1457,7 @@
final_cpl_received(toep);
done:
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
return (0);
@@ -1572,12 +1572,12 @@
INP_WUNLOCK(inp);
CURVNET_SET(toep->vnet);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET);
if (tp)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
return (0);
Index: sys/dev/cxgbe/tom/t4_listen.c
===================================================================
--- sys/dev/cxgbe/tom/t4_listen.c
+++ sys/dev/cxgbe/tom/t4_listen.c
@@ -949,7 +949,7 @@
#endif
struct toepcb *toep = synqe->toep;
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
+ NET_EPOCH_ASSERT(); /* prevents bad race with accept() */
INP_WLOCK_ASSERT(inp);
KASSERT(synqe->flags & TPF_SYNQE,
("%s: %p not a synq_entry?", __func__, arg));
@@ -1242,12 +1242,12 @@
REJECT_PASS_ACCEPT_REQ(true);
/* Don't offload if the 4-tuple is already in use */
- INP_INFO_RLOCK_ET(&V_tcbinfo, et); /* for 4-tuple check */
+ NET_EPOCH_ENTER(et); /* for 4-tuple check */
if (toe_4tuple_check(&inc, &th, ifp) != 0) {
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
REJECT_PASS_ACCEPT_REQ(false);
}
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
inp = lctx->inp; /* listening socket, not owned by TOE */
INP_WLOCK(inp);
@@ -1396,7 +1396,7 @@
("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
CURVNET_SET(lctx->vnet);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et); /* for syncache_expand */
+ NET_EPOCH_ENTER(et); /* for syncache_expand */
INP_WLOCK(inp);
CTR6(KTR_CXGBE,
@@ -1412,7 +1412,7 @@
reset:
send_reset_synqe(TOEDEV(ifp), synqe);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
return (0);
}
@@ -1471,7 +1471,7 @@
inp = release_synqe(sc, synqe);
if (inp != NULL)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
return (0);
Index: sys/dev/cxgbe/tom/t4_tls.c
===================================================================
--- sys/dev/cxgbe/tom/t4_tls.c
+++ sys/dev/cxgbe/tom/t4_tls.c
@@ -2125,12 +2125,12 @@
INP_WUNLOCK(inp);
CURVNET_SET(toep->vnet);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET);
if (tp)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
return (0);
Index: sys/kern/uipc_ktls.c
===================================================================
--- sys/kern/uipc_ktls.c
+++ sys/kern/uipc_ktls.c
@@ -1137,7 +1137,7 @@
* the send tag is fixed or just rely on timers?
*/
} else {
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
if (!in_pcbrele_wlocked(inp)) {
if (!(inp->inp_flags & INP_TIMEWAIT) &&
@@ -1150,7 +1150,7 @@
} else
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
counter_u64_add(ktls_ifnet_reset_failed, 1);
Index: sys/netinet/in_pcb.h
===================================================================
--- sys/netinet/in_pcb.h
+++ sys/netinet/in_pcb.h
@@ -586,6 +586,7 @@
#define INP_TRY_WLOCK(inp) rw_try_wlock(&(inp)->inp_lock)
#define INP_RUNLOCK(inp) rw_runlock(&(inp)->inp_lock)
#define INP_WUNLOCK(inp) rw_wunlock(&(inp)->inp_lock)
+#define INP_UNLOCK(inp) rw_unlock(&(inp)->inp_lock)
#define INP_TRY_UPGRADE(inp) rw_try_upgrade(&(inp)->inp_lock)
#define INP_DOWNGRADE(inp) rw_downgrade(&(inp)->inp_lock)
#define INP_WLOCKED(inp) rw_wowned(&(inp)->inp_lock)
@@ -628,19 +629,14 @@
#define INP_INFO_LOCK_INIT(ipi, d) \
mtx_init(&(ipi)->ipi_lock, (d), NULL, MTX_DEF| MTX_RECURSE)
#define INP_INFO_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_lock)
-#define INP_INFO_RLOCK_ET(ipi, et) NET_EPOCH_ENTER((et))
#define INP_INFO_WLOCK(ipi) mtx_lock(&(ipi)->ipi_lock)
#define INP_INFO_TRY_WLOCK(ipi) mtx_trylock(&(ipi)->ipi_lock)
#define INP_INFO_WLOCKED(ipi) mtx_owned(&(ipi)->ipi_lock)
-#define INP_INFO_RUNLOCK_ET(ipi, et) NET_EPOCH_EXIT((et))
-#define INP_INFO_RUNLOCK_TP(ipi, tp) NET_EPOCH_EXIT(*(tp)->t_inpcb->inp_et)
#define INP_INFO_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_lock)
#define INP_INFO_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_lock))
-#define INP_INFO_RLOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt))
#define INP_INFO_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_lock, MA_OWNED)
#define INP_INFO_WUNLOCK_ASSERT(ipi) \
mtx_assert(&(ipi)->ipi_lock, MA_NOTOWNED)
-#define INP_INFO_UNLOCK_ASSERT(ipi) MPASS(!in_epoch(net_epoch_preempt) && !mtx_owned(&(ipi)->ipi_lock))
#define INP_LIST_LOCK_INIT(ipi, d) \
rw_init_flags(&(ipi)->ipi_list_lock, (d), 0)
@@ -663,11 +659,7 @@
#define INP_HASH_LOCK_INIT(ipi, d) mtx_init(&(ipi)->ipi_hash_lock, (d), NULL, MTX_DEF)
#define INP_HASH_LOCK_DESTROY(ipi) mtx_destroy(&(ipi)->ipi_hash_lock)
-#define INP_HASH_RLOCK(ipi) struct epoch_tracker inp_hash_et; epoch_enter_preempt(net_epoch_preempt, &inp_hash_et)
-#define INP_HASH_RLOCK_ET(ipi, et) epoch_enter_preempt(net_epoch_preempt, &(et))
#define INP_HASH_WLOCK(ipi) mtx_lock(&(ipi)->ipi_hash_lock)
-#define INP_HASH_RUNLOCK(ipi) NET_EPOCH_EXIT(inp_hash_et)
-#define INP_HASH_RUNLOCK_ET(ipi, et) NET_EPOCH_EXIT((et))
#define INP_HASH_WUNLOCK(ipi) mtx_unlock(&(ipi)->ipi_hash_lock)
#define INP_HASH_LOCK_ASSERT(ipi) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ipi)->ipi_hash_lock))
#define INP_HASH_WLOCK_ASSERT(ipi) mtx_assert(&(ipi)->ipi_hash_lock, MA_OWNED);
Index: sys/netinet/in_pcb.c
===================================================================
--- sys/netinet/in_pcb.c
+++ sys/netinet/in_pcb.c
@@ -515,7 +515,7 @@
#ifdef INVARIANTS
if (pcbinfo == &V_tcbinfo) {
- INP_INFO_RLOCK_ASSERT(pcbinfo);
+ NET_EPOCH_ASSERT();
} else {
INP_INFO_WLOCK_ASSERT(pcbinfo);
}
@@ -2252,12 +2252,10 @@
struct inpcb *inp, *tmpinp;
u_short fport = fport_arg, lport = lport_arg;
-#ifdef INVARIANTS
KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0,
("%s: invalid lookup flags %d", __func__, lookupflags));
- if (!mtx_owned(&pcbinfo->ipi_hash_lock))
- MPASS(in_epoch_verbose(net_epoch_preempt, 1));
-#endif
+ INP_HASH_LOCK_ASSERT(pcbinfo);
+
/*
* First look for an exact match.
*/
@@ -2384,7 +2382,6 @@
{
struct inpcb *inp;
- INP_HASH_RLOCK(pcbinfo);
inp = in_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
(lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp);
if (inp != NULL) {
@@ -2411,7 +2408,7 @@
}
#endif
}
- INP_HASH_RUNLOCK(pcbinfo);
+
return (inp);
}
@@ -2657,7 +2654,7 @@
#ifdef INVARIANTS
if (pcbinfo == &V_tcbinfo) {
- INP_INFO_RLOCK_ASSERT(pcbinfo);
+ NET_EPOCH_ASSERT();
} else {
INP_INFO_WLOCK_ASSERT(pcbinfo);
}
Index: sys/netinet/ip_divert.c
===================================================================
--- sys/netinet/ip_divert.c
+++ sys/netinet/ip_divert.c
@@ -192,7 +192,8 @@
u_int16_t nport;
struct sockaddr_in divsrc;
struct m_tag *mtag;
- struct epoch_tracker et;
+
+ NET_EPOCH_ASSERT();
mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL);
if (mtag == NULL) {
@@ -231,7 +232,6 @@
/* Sanity check */
M_ASSERTPKTHDR(m);
- NET_EPOCH_ASSERT();
/* Find IP address for receive interface */
ifp = m->m_pkthdr.rcvif;
@@ -272,7 +272,6 @@
/* Put packet on socket queue, if any */
sa = NULL;
nport = htons((u_int16_t)(((struct ipfw_rule_ref *)(mtag+1))->info));
- INP_INFO_RLOCK_ET(&V_divcbinfo, et);
CK_LIST_FOREACH(inp, &V_divcb, inp_list) {
/* XXX why does only one socket match? */
if (inp->inp_lport == nport) {
@@ -290,7 +289,6 @@
break;
}
}
- INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
if (sa == NULL) {
m_freem(m);
KMOD_IPSTAT_INC(ips_noproto);
@@ -631,71 +629,41 @@
static int
div_pcblist(SYSCTL_HANDLER_ARGS)
{
- int error, i, n;
- struct inpcb *inp, **inp_list;
- inp_gen_t gencnt;
struct xinpgen xig;
struct epoch_tracker et;
+ struct inpcb *inp;
+ int error;
+
+ if (req->newptr != 0)
+ return EPERM;
- /*
- * The process of preparing the TCB list is too time-consuming and
- * resource-intensive to repeat twice on every request.
- */
if (req->oldptr == 0) {
+ int n;
+
n = V_divcbinfo.ipi_count;
n += imax(n / 8, 10);
req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
return 0;
}
- if (req->newptr != 0)
- return EPERM;
-
- /*
- * OK, now we're committed to doing something.
- */
- INP_INFO_WLOCK(&V_divcbinfo);
- gencnt = V_divcbinfo.ipi_gencnt;
- n = V_divcbinfo.ipi_count;
- INP_INFO_WUNLOCK(&V_divcbinfo);
-
- error = sysctl_wire_old_buffer(req,
- 2 * sizeof(xig) + n*sizeof(struct xinpcb));
- if (error != 0)
+ if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
return (error);
bzero(&xig, sizeof(xig));
xig.xig_len = sizeof xig;
- xig.xig_count = n;
- xig.xig_gen = gencnt;
+ xig.xig_count = V_divcbinfo.ipi_count;
+ xig.xig_gen = V_divcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return error;
- inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
- if (inp_list == NULL)
- return ENOMEM;
-
- INP_INFO_RLOCK_ET(&V_divcbinfo, et);
- for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead), i = 0; inp && i < n;
- inp = CK_LIST_NEXT(inp, inp_list)) {
- INP_WLOCK(inp);
- if (inp->inp_gencnt <= gencnt &&
- cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
- in_pcbref(inp);
- inp_list[i++] = inp;
- }
- INP_WUNLOCK(inp);
- }
- INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
- n = i;
-
- error = 0;
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
+ NET_EPOCH_ENTER(et);
+ for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead);
+ inp != NULL;
+ inp = CK_LIST_NEXT(inp, inp_list)) {
INP_RLOCK(inp);
- if (inp->inp_gencnt <= gencnt) {
+ if (inp->inp_gencnt <= xig.xig_gen) {
struct xinpcb xi;
in_pcbtoxinpcb(inp, &xi);
@@ -704,17 +672,9 @@
} else
INP_RUNLOCK(inp);
}
- INP_INFO_WLOCK(&V_divcbinfo);
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
- INP_RLOCK(inp);
- if (!in_pcbrele_rlocked(inp))
- INP_RUNLOCK(inp);
- }
- INP_INFO_WUNLOCK(&V_divcbinfo);
+ NET_EPOCH_EXIT(et);
if (!error) {
- struct epoch_tracker et;
/*
* Give the user an updated idea of our state.
* If the generation differs from what we told
@@ -722,15 +682,13 @@
* while we were processing this request, and it
* might be necessary to retry.
*/
- INP_INFO_RLOCK_ET(&V_divcbinfo, et);
xig.xig_gen = V_divcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_divcbinfo.ipi_count;
- INP_INFO_RUNLOCK_ET(&V_divcbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
- free(inp_list, M_TEMP);
- return error;
+
+ return (error);
}
#ifdef SYSCTL_NODE
Index: sys/netinet/raw_ip.c
===================================================================
--- sys/netinet/raw_ip.c
+++ sys/netinet/raw_ip.c
@@ -284,9 +284,10 @@
struct ip *ip = mtod(m, struct ip *);
struct inpcb *inp, *last;
struct sockaddr_in ripsrc;
- struct epoch_tracker et;
int hash;
+ NET_EPOCH_ASSERT();
+
*mp = NULL;
bzero(&ripsrc, sizeof(ripsrc));
@@ -299,7 +300,6 @@
hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr,
ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask);
- INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) {
if (inp->inp_ip_p != proto)
continue;
@@ -422,7 +422,6 @@
skip_2:
INP_RUNLOCK(inp);
}
- INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
if (last != NULL) {
if (rip_append(last, ip, m, &ripsrc) != 0)
IPSTAT_INC(ips_delivered);
@@ -1068,97 +1067,67 @@
static int
rip_pcblist(SYSCTL_HANDLER_ARGS)
{
- int error, i, n;
- struct inpcb *inp, **inp_list;
- inp_gen_t gencnt;
struct xinpgen xig;
struct epoch_tracker et;
+ struct inpcb *inp;
+ int error;
+
+ if (req->newptr != 0)
+ return (EPERM);
- /*
- * The process of preparing the TCB list is too time-consuming and
- * resource-intensive to repeat twice on every request.
- */
if (req->oldptr == 0) {
+ int n;
+
n = V_ripcbinfo.ipi_count;
n += imax(n / 8, 10);
req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
return (0);
}
- if (req->newptr != 0)
- return (EPERM);
-
- /*
- * OK, now we're committed to doing something.
- */
- INP_INFO_WLOCK(&V_ripcbinfo);
- gencnt = V_ripcbinfo.ipi_gencnt;
- n = V_ripcbinfo.ipi_count;
- INP_INFO_WUNLOCK(&V_ripcbinfo);
+ if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
+ return (error);
bzero(&xig, sizeof(xig));
xig.xig_len = sizeof xig;
- xig.xig_count = n;
- xig.xig_gen = gencnt;
+ xig.xig_count = V_ripcbinfo.ipi_count;
+ xig.xig_gen = V_ripcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return (error);
- inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
-
- INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
- for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead), i = 0; inp && i < n;
- inp = CK_LIST_NEXT(inp, inp_list)) {
- INP_WLOCK(inp);
- if (inp->inp_gencnt <= gencnt &&
- cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
- in_pcbref(inp);
- inp_list[i++] = inp;
- }
- INP_WUNLOCK(inp);
- }
- INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
- n = i;
-
- error = 0;
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
+ NET_EPOCH_ENTER(et);
+ for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead);
+ inp != NULL;
+ inp = CK_LIST_NEXT(inp, inp_list)) {
INP_RLOCK(inp);
- if (inp->inp_gencnt <= gencnt) {
+ if (inp->inp_gencnt <= xig.xig_gen &&
+ cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
struct xinpcb xi;
in_pcbtoxinpcb(inp, &xi);
INP_RUNLOCK(inp);
error = SYSCTL_OUT(req, &xi, sizeof xi);
+ if (error)
+ break;
} else
INP_RUNLOCK(inp);
}
- INP_INFO_WLOCK(&V_ripcbinfo);
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
- INP_RLOCK(inp);
- if (!in_pcbrele_rlocked(inp))
- INP_RUNLOCK(inp);
- }
- INP_INFO_WUNLOCK(&V_ripcbinfo);
+ NET_EPOCH_EXIT(et);
if (!error) {
- struct epoch_tracker et;
/*
* Give the user an updated idea of our state. If the
* generation differs from what we told her before, she knows
* that something happened while we were processing this
* request, and it might be necessary to retry.
*/
- INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
xig.xig_gen = V_ripcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_ripcbinfo.ipi_count;
- INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
- free(inp_list, M_TEMP);
+
return (error);
}
Index: sys/netinet/tcp_hpts.c
===================================================================
--- sys/netinet/tcp_hpts.c
+++ sys/netinet/tcp_hpts.c
@@ -1245,12 +1245,10 @@
int16_t set_cpu;
uint32_t did_prefetch = 0;
int dropped;
- struct epoch_tracker et;
HPTS_MTX_ASSERT(hpts);
-#ifndef VIMAGE
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
-#endif
+ NET_EPOCH_ASSERT();
+
while ((inp = TAILQ_FIRST(&hpts->p_input)) != NULL) {
HPTS_MTX_ASSERT(hpts);
hpts_sane_input_remove(hpts, inp, 0);
@@ -1266,7 +1264,6 @@
INP_WLOCK(inp);
#ifdef VIMAGE
CURVNET_SET(inp->inp_vnet);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
#endif
if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) ||
(inp->inp_flags2 & INP_FREED)) {
@@ -1276,7 +1273,6 @@
INP_WUNLOCK(inp);
}
#ifdef VIMAGE
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
#endif
mtx_lock(&hpts->p_mtx);
@@ -1296,7 +1292,6 @@
if (in_pcbrele_wlocked(inp) == 0)
INP_WUNLOCK(inp);
#ifdef VIMAGE
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
#endif
mtx_lock(&hpts->p_mtx);
@@ -1349,22 +1344,16 @@
INP_WUNLOCK(inp);
INP_UNLOCK_ASSERT(inp);
#ifdef VIMAGE
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
#endif
mtx_lock(&hpts->p_mtx);
hpts->p_inp = NULL;
}
-#ifndef VIMAGE
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
- INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
-#endif
}
static void
tcp_hptsi(struct tcp_hpts_entry *hpts)
{
- struct epoch_tracker et;
struct tcpcb *tp;
struct inpcb *inp = NULL, *ninp;
struct timeval tv;
@@ -1378,6 +1367,8 @@
int16_t set_cpu;
HPTS_MTX_ASSERT(hpts);
+ NET_EPOCH_ASSERT();
+
/* record previous info for any logging */
hpts->saved_lasttick = hpts->p_lasttick;
hpts->saved_curtick = hpts->p_curtick;
@@ -1469,9 +1460,6 @@
goto no_one;
}
HPTS_MTX_ASSERT(hpts);
-#ifndef VIMAGE
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
-#endif
for (i = 0; i < ticks_to_run; i++) {
/*
* Calculate our delay, if there are no extra ticks there
@@ -1586,7 +1574,6 @@
}
#ifdef VIMAGE
CURVNET_SET(inp->inp_vnet);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
#endif
/* Lets do any logging that we might want to */
if (hpts_does_tp_logging && (tp->t_logstate != TCP_LOG_STATE_OFF)) {
@@ -1658,7 +1645,6 @@
INP_WUNLOCK(inp);
skip_pacing:
#ifdef VIMAGE
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
CURVNET_RESTORE();
#endif
INP_UNLOCK_ASSERT(inp);
@@ -1678,9 +1664,6 @@
hpts->p_runningtick = 0;
}
}
-#ifndef VIMAGE
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
-#endif
no_one:
HPTS_MTX_ASSERT(hpts);
hpts->p_delayed_by = 0;
@@ -1820,6 +1803,7 @@
tcp_hpts_thread(void *ctx)
{
struct tcp_hpts_entry *hpts;
+ struct epoch_tracker et;
struct timeval tv;
sbintime_t sb;
@@ -1839,7 +1823,9 @@
}
hpts->p_hpts_wake_scheduled = 0;
hpts->p_hpts_active = 1;
+ NET_EPOCH_ENTER(et);
tcp_hptsi(hpts);
+ NET_EPOCH_EXIT(et);
HPTS_MTX_ASSERT(hpts);
tv.tv_sec = 0;
tv.tv_usec = hpts->p_hpts_sleep_time * HPTS_TICKS_PER_USEC;
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -562,7 +562,6 @@
int rstreason = 0; /* For badport_bandlim accounting purposes */
uint8_t iptos;
struct m_tag *fwd_tag = NULL;
- struct epoch_tracker et;
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
int isipv6;
@@ -571,7 +570,6 @@
#endif /* INET6 */
struct tcpopt to; /* options in this segment */
char *s = NULL; /* address and port logging */
- int ti_locked;
#ifdef TCPDEBUG
/*
* The size of tcp_saveipgen must be the size of the max ip header,
@@ -582,6 +580,8 @@
short ostate = 0;
#endif
+ NET_EPOCH_ASSERT();
+
#ifdef INET6
isipv6 = (mtod(m, struct ip *)->ip_v == 6) ? 1 : 0;
#endif
@@ -747,19 +747,6 @@
*/
drop_hdrlen = off0 + off;
- /*
- * Locate pcb for segment; if we're likely to add or remove a
- * connection then first acquire pcbinfo lock. There are three cases
- * where we might discover later we need a write lock despite the
- * flags: ACKs moving a connection out of the syncache, ACKs for a
- * connection in TIMEWAIT and SYNs not targeting a listening socket.
- */
- if ((thflags & (TH_FIN | TH_RST)) != 0) {
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
- ti_locked = TI_RLOCKED;
- } else
- ti_locked = TI_UNLOCKED;
-
/*
* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain.
*/
@@ -777,13 +764,6 @@
fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
findpcb:
-#ifdef INVARIANTS
- if (ti_locked == TI_RLOCKED) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- } else {
- INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
- }
-#endif
#ifdef INET6
if (isipv6 && fwd_tag != NULL) {
struct sockaddr_in6 *next_hop6;
@@ -943,12 +923,6 @@
* XXXRW: It may be time to rethink timewait locking.
*/
if (inp->inp_flags & INP_TIMEWAIT) {
- if (ti_locked == TI_UNLOCKED) {
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
- ti_locked = TI_RLOCKED;
- }
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
-
if (thflags & TH_SYN)
tcp_dooptions(&to, optp, optlen, TO_SYN);
/*
@@ -956,7 +930,6 @@
*/
if (tcp_twcheck(inp, &to, th, m, tlen))
goto findpcb;
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (IPPROTO_DONE);
}
/*
@@ -978,27 +951,6 @@
}
#endif
- /*
- * We've identified a valid inpcb, but it could be that we need an
- * inpcbinfo write lock but don't hold it. In this case, attempt to
- * acquire using the same strategy as the TIMEWAIT case above. If we
- * relock, we have to jump back to 'relocked' as the connection might
- * now be in TIMEWAIT.
- */
-#ifdef INVARIANTS
- if ((thflags & (TH_FIN | TH_RST)) != 0)
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
-#endif
- if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
- (tp->t_state == TCPS_LISTEN && (thflags & TH_SYN) &&
- !IS_FASTOPEN(tp->t_flags)))) {
- if (ti_locked == TI_UNLOCKED) {
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
- ti_locked = TI_RLOCKED;
- }
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- }
-
#ifdef MAC
INP_WLOCK_ASSERT(inp);
if (mac_inpcb_check_deliver(inp, m))
@@ -1053,7 +1005,6 @@
*/
if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
/*
* Parse the TCP options here because
* syncookies need access to the reflected
@@ -1131,8 +1082,6 @@
TCP_PROBE5(receive, NULL, tp, m, tp, th);
tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen,
iptos);
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (IPPROTO_DONE);
}
/*
@@ -1335,10 +1284,6 @@
* Entry added to syncache and mbuf consumed.
* Only the listen socket is unlocked by syncache_add().
*/
- if (ti_locked == TI_RLOCKED) {
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
- ti_locked = TI_UNLOCKED;
- }
INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
return (IPPROTO_DONE);
} else if (tp->t_state == TCPS_LISTEN) {
@@ -1371,25 +1316,11 @@
* the inpcb, and unlocks pcbinfo.
*/
tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos);
- if (ti_locked == TI_RLOCKED)
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return (IPPROTO_DONE);
dropwithreset:
TCP_PROBE5(receive, NULL, tp, m, tp, th);
- if (ti_locked == TI_RLOCKED) {
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
- ti_locked = TI_UNLOCKED;
- }
-#ifdef INVARIANTS
- else {
- KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropwithreset "
- "ti_locked: %d", __func__, ti_locked));
- INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
- }
-#endif
-
if (inp != NULL) {
tcp_dropwithreset(m, th, tp, tlen, rstreason);
INP_WUNLOCK(inp);
@@ -1402,18 +1333,6 @@
if (m != NULL)
TCP_PROBE5(receive, NULL, tp, m, tp, th);
- if (ti_locked == TI_RLOCKED) {
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
- ti_locked = TI_UNLOCKED;
- }
-#ifdef INVARIANTS
- else {
- KASSERT(ti_locked == TI_UNLOCKED, ("%s: dropunlock "
- "ti_locked: %d", __func__, ti_locked));
- INP_INFO_WUNLOCK_ASSERT(&V_tcbinfo);
- }
-#endif
-
if (inp != NULL)
INP_WUNLOCK(inp);
@@ -1502,7 +1421,7 @@
struct mbuf *mfree;
struct tcpopt to;
int tfo_syn;
-
+
#ifdef TCPDEBUG
/*
* The size of tcp_saveipgen must be the size of the max ip header,
@@ -1517,16 +1436,8 @@
tp->sackhint.last_sack_ack = 0;
sack_changed = 0;
nsegs = max(1, m->m_pkthdr.lro_nsegs);
- /*
- * If this is either a state-changing packet or current state isn't
- * established, we require a write lock on tcbinfo. Otherwise, we
- * allow the tcbinfo to be in either alocked or unlocked, as the
- * caller may have unnecessarily acquired a write lock due to a race.
- */
- if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
- tp->t_state != TCPS_ESTABLISHED) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- }
+
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(tp->t_inpcb);
KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
__func__));
@@ -2048,7 +1959,6 @@
tcp_state_change(tp, TCPS_SYN_RECEIVED);
}
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
/*
@@ -2121,7 +2031,6 @@
SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
(tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
KASSERT(tp->t_state != TCPS_SYN_SENT,
("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
__func__, th, tp));
@@ -2164,8 +2073,6 @@
*/
if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT &&
tp->t_state != TCPS_SYN_RECEIVED) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
-
TCPSTAT_INC(tcps_badsyn);
if (V_tcp_insecure_syn &&
SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
@@ -2289,8 +2196,6 @@
*/
if ((so->so_state & SS_NOFDREF) &&
tp->t_state > TCPS_CLOSE_WAIT && tlen) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
-
if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data "
"after socket was closed, "
@@ -2876,7 +2781,6 @@
*/
case TCPS_CLOSING:
if (ourfinisacked) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp);
m_freem(m);
return;
@@ -2891,7 +2795,6 @@
*/
case TCPS_LAST_ACK:
if (ourfinisacked) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tp = tcp_close(tp);
goto drop;
}
@@ -3140,8 +3043,6 @@
* standard timers.
*/
case TCPS_FIN_WAIT_2:
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
-
tcp_twstart(tp);
return;
}
Index: sys/netinet/tcp_lro.c
===================================================================
--- sys/netinet/tcp_lro.c
+++ sys/netinet/tcp_lro.c
@@ -884,7 +884,7 @@
*/
if ((tcplro_stacks_wanting_mbufq == 0) || (le->m_head->m_flags & M_VLANTAG))
goto skip_lookup;
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
switch (le->eh_type) {
#ifdef INET6
case ETHERTYPE_IPV6:
@@ -903,7 +903,7 @@
break;
#endif
}
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
if (inp && ((inp->inp_flags & (INP_DROPPED|INP_TIMEWAIT)) ||
(inp->inp_flags2 & INP_FREED))) {
/* We don't want this guy */
Index: sys/netinet/tcp_stacks/bbr.c
===================================================================
--- sys/netinet/tcp_stacks/bbr.c
+++ sys/netinet/tcp_stacks/bbr.c
@@ -8618,7 +8618,6 @@
bbr->rc_timer_first = 1;
bbr_timer_cancel(bbr,
__LINE__, bbr->r_ctl.rc_rcvtime);
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
tcp_twstart(tp);
return (1);
@@ -9619,7 +9618,6 @@
struct tcpcb *tp, int32_t * tlen, struct tcphdr *th, struct socket *so)
{
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
if (bbr->rc_allow_data_af_clo == 0) {
close_now:
tp = tcp_close(tp);
@@ -9861,7 +9859,6 @@
return (ret_val);
}
if (ourfinisacked) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp);
m_freem(m);
return (1);
@@ -9974,7 +9971,6 @@
return (ret_val);
}
if (ourfinisacked) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tp = tcp_close(tp);
ctf_do_drop(m, tp);
return (1);
Index: sys/netinet/tcp_stacks/rack.c
===================================================================
--- sys/netinet/tcp_stacks/rack.c
+++ sys/netinet/tcp_stacks/rack.c
@@ -5875,7 +5875,6 @@
case TCPS_FIN_WAIT_2:
rack_timer_cancel(tp, rack,
rack->r_ctl.rc_rcvtime, __LINE__);
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp);
return (1);
}
@@ -6353,7 +6352,6 @@
tp->t_flags |= (TF_ACKNOW | TF_NEEDSYN);
tcp_state_change(tp, TCPS_SYN_RECEIVED);
}
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
/*
* Advance th->th_seq to correspond to first data byte. If data,
@@ -6847,7 +6845,6 @@
{
struct tcp_rack *rack;
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
rack = (struct tcp_rack *)tp->t_fb_ptr;
if (rack->rc_allow_data_af_clo == 0) {
close_now:
@@ -7079,7 +7076,6 @@
return (ret_val);
}
if (ourfinisacked) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp);
m_freem(m);
return (1);
@@ -7187,7 +7183,6 @@
return (ret_val);
}
if (ourfinisacked) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tp = tcp_close(tp);
ctf_do_drop(m, tp);
return (1);
@@ -7650,16 +7645,8 @@
kern_prefetch(rack, &prev_state);
prev_state = 0;
thflags = th->th_flags;
- /*
- * If this is either a state-changing packet or current state isn't
- * established, we require a read lock on tcbinfo. Otherwise, we
- * allow the tcbinfo to be in either locked or unlocked, as the
- * caller may have unnecessarily acquired a lock due to a race.
- */
- if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
- tp->t_state != TCPS_ESTABLISHED) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- }
+
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(tp->t_inpcb);
KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN",
__func__));
Index: sys/netinet/tcp_stacks/rack_bbr_common.c
===================================================================
--- sys/netinet/tcp_stacks/rack_bbr_common.c
+++ sys/netinet/tcp_stacks/rack_bbr_common.c
@@ -253,7 +253,6 @@
*/
struct mbuf *m_save;
struct ether_header *eh;
- struct epoch_tracker et;
struct tcphdr *th;
#ifdef INET6
struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */
@@ -268,14 +267,8 @@
uint16_t drop_hdrlen;
uint8_t iptos, no_vn=0, bpf_req=0;
- /*
- * This is a bit deceptive, we get the
- * "info epoch" which is really the network
- * epoch. This covers us on both any INP
- * type change but also if the ifp goes
- * away it covers us as well.
- */
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ASSERT();
+
if (m && m->m_pkthdr.rcvif)
ifp = m->m_pkthdr.rcvif;
else
@@ -445,7 +438,6 @@
}
if (no_vn == 0)
CURVNET_RESTORE();
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return(retval);
}
skipped_pkt:
@@ -453,7 +445,6 @@
}
if (no_vn == 0)
CURVNET_RESTORE();
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
return(retval);
}
@@ -680,7 +671,6 @@
SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
(tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
KASSERT(tp->t_state != TCPS_SYN_SENT,
("%s: TH_RST for TCPS_SYN_SENT th %p tp %p",
__func__, th, tp));
@@ -732,7 +722,8 @@
void
ctf_challenge_ack(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * ret_val)
{
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+
+ NET_EPOCH_ASSERT();
TCPSTAT_INC(tcps_badsyn);
if (V_tcp_insecure_syn &&
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -1941,7 +1941,7 @@
tp = (struct tcpcb *)ptp;
CURVNET_SET(tp->t_vnet);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
inp = tp->t_inpcb;
KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL",
__func__, tp));
@@ -1961,13 +1961,13 @@
tp->t_inpcb = NULL;
uma_zfree(V_tcpcb_zone, tp);
if (in_pcbrele_wlocked(inp)) {
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
return;
}
}
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
}
@@ -2127,17 +2127,17 @@
static int
tcp_pcblist(SYSCTL_HANDLER_ARGS)
{
- int error, i, m, n, pcb_count;
- struct inpcb *inp, **inp_list;
- inp_gen_t gencnt;
- struct xinpgen xig;
struct epoch_tracker et;
+ struct inpcb *inp;
+ struct xinpgen xig;
+ int error;
+
+ if (req->newptr != NULL)
+ return (EPERM);
- /*
- * The process of preparing the TCB list is too time-consuming and
- * resource-intensive to repeat twice on every request.
- */
if (req->oldptr == NULL) {
+ int n;
+
n = V_tcbinfo.ipi_count +
counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
n += imax(n / 8, 10);
@@ -2145,44 +2145,29 @@
return (0);
}
- if (req->newptr != NULL)
- return (EPERM);
-
- /*
- * OK, now we're committed to doing something.
- */
- INP_LIST_RLOCK(&V_tcbinfo);
- gencnt = V_tcbinfo.ipi_gencnt;
- n = V_tcbinfo.ipi_count;
- INP_LIST_RUNLOCK(&V_tcbinfo);
-
- m = counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
-
- error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
- + (n + m) * sizeof(struct xtcpcb));
- if (error != 0)
+ if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
return (error);
bzero(&xig, sizeof(xig));
xig.xig_len = sizeof xig;
- xig.xig_count = n + m;
- xig.xig_gen = gencnt;
+ xig.xig_count = V_tcbinfo.ipi_count +
+ counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
+ xig.xig_gen = V_tcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return (error);
- error = syncache_pcblist(req, m, &pcb_count);
+ error = syncache_pcblist(req);
if (error)
return (error);
- inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
-
- INP_INFO_WLOCK(&V_tcbinfo);
- for (inp = CK_LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0;
- inp != NULL && i < n; inp = CK_LIST_NEXT(inp, inp_list)) {
- INP_WLOCK(inp);
- if (inp->inp_gencnt <= gencnt) {
+ NET_EPOCH_ENTER(et);
+ for (inp = CK_LIST_FIRST(V_tcbinfo.ipi_listhead);
+ inp != NULL;
+ inp = CK_LIST_NEXT(inp, inp_list)) {
+ INP_RLOCK(inp);
+ if (inp->inp_gencnt <= xig.xig_gen) {
/*
* XXX: This use of cr_cansee(), introduced with
* TCP state changes, is not quite right, but for
@@ -2197,36 +2182,18 @@
} else
error = cr_canseeinpcb(req->td->td_ucred, inp);
if (error == 0) {
- in_pcbref(inp);
- inp_list[i++] = inp;
- }
- }
- INP_WUNLOCK(inp);
- }
- INP_INFO_WUNLOCK(&V_tcbinfo);
- n = i;
-
- error = 0;
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
- INP_RLOCK(inp);
- if (inp->inp_gencnt <= gencnt) {
- struct xtcpcb xt;
+ struct xtcpcb xt;
- tcp_inptoxtp(inp, &xt);
- INP_RUNLOCK(inp);
- error = SYSCTL_OUT(req, &xt, sizeof xt);
+ tcp_inptoxtp(inp, &xt);
+ INP_RUNLOCK(inp);
+ error = SYSCTL_OUT(req, &xt, sizeof xt);
+ if (error)
+ break;
+ }
} else
INP_RUNLOCK(inp);
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
- INP_RLOCK(inp);
- if (!in_pcbrele_rlocked(inp))
- INP_RUNLOCK(inp);
- }
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
if (!error) {
/*
@@ -2236,14 +2203,13 @@
* while we were processing this request, and it
* might be necessary to retry.
*/
- INP_LIST_RLOCK(&V_tcbinfo);
xig.xig_gen = V_tcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
- xig.xig_count = V_tcbinfo.ipi_count + pcb_count;
- INP_LIST_RUNLOCK(&V_tcbinfo);
+ xig.xig_count = V_tcbinfo.ipi_count +
+ counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
- free(inp_list, M_TEMP);
+
return (error);
}
@@ -2257,6 +2223,7 @@
{
struct xucred xuc;
struct sockaddr_in addrs[2];
+ struct epoch_tracker et;
struct inpcb *inp;
int error;
@@ -2266,8 +2233,10 @@
error = SYSCTL_IN(req, addrs, sizeof(addrs));
if (error)
return (error);
+ NET_EPOCH_ENTER(et);
inp = in_pcblookup(&V_tcbinfo, addrs[1].sin_addr, addrs[1].sin_port,
addrs[0].sin_addr, addrs[0].sin_port, INPLOOKUP_RLOCKPCB, NULL);
+ NET_EPOCH_EXIT(et);
if (inp != NULL) {
if (inp->inp_socket == NULL)
error = ENOENT;
@@ -2292,6 +2261,7 @@
static int
tcp6_getcred(SYSCTL_HANDLER_ARGS)
{
+ struct epoch_tracker et;
struct xucred xuc;
struct sockaddr_in6 addrs[2];
struct inpcb *inp;
@@ -2319,6 +2289,7 @@
return (EINVAL);
}
+ NET_EPOCH_ENTER(et);
#ifdef INET
if (mapped == 1)
inp = in_pcblookup(&V_tcbinfo,
@@ -2332,6 +2303,7 @@
&addrs[1].sin6_addr, addrs[1].sin6_port,
&addrs[0].sin6_addr, addrs[0].sin6_port,
INPLOOKUP_RLOCKPCB, NULL);
+ NET_EPOCH_EXIT(et);
if (inp != NULL) {
if (inp->inp_socket == NULL)
error = ENOENT;
@@ -2365,7 +2337,6 @@
struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
struct icmp *icp;
struct in_conninfo inc;
- struct epoch_tracker et;
tcp_seq icmp_tcp_seq;
int mtu;
@@ -2397,7 +2368,6 @@
icp = (struct icmp *)((caddr_t)ip - offsetof(struct icmp, icmp_ip));
th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src,
th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
@@ -2462,7 +2432,6 @@
out:
if (inp != NULL)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
}
#endif /* INET */
@@ -2480,7 +2449,6 @@
struct ip6ctlparam *ip6cp = NULL;
const struct sockaddr_in6 *sa6_src = NULL;
struct in_conninfo inc;
- struct epoch_tracker et;
struct tcp_ports {
uint16_t th_sport;
uint16_t th_dport;
@@ -2542,7 +2510,6 @@
}
bzero(&t_ports, sizeof(struct tcp_ports));
m_copydata(m, off, sizeof(struct tcp_ports), (caddr_t)&t_ports);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_dst, t_ports.th_dport,
&ip6->ip6_src, t_ports.th_sport, INPLOOKUP_WLOCKPCB, NULL);
if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
@@ -2614,7 +2581,6 @@
out:
if (inp != NULL)
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
}
#endif /* INET6 */
@@ -2770,7 +2736,7 @@
{
struct tcpcb *tp;
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(inp);
if ((inp->inp_flags & INP_TIMEWAIT) ||
@@ -3042,7 +3008,7 @@
default:
return (EINVAL);
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
switch (addrs[0].ss_family) {
#ifdef INET6
case AF_INET6:
@@ -3081,7 +3047,7 @@
INP_WUNLOCK(inp);
} else
error = ESRCH;
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -3157,7 +3123,7 @@
default:
return (EINVAL);
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
switch (addrs[0].ss_family) {
#ifdef INET6
case AF_INET6:
@@ -3173,7 +3139,7 @@
break;
#endif
}
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
if (inp != NULL) {
if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) != 0 ||
inp->inp_socket == NULL) {
Index: sys/netinet/tcp_syncache.h
===================================================================
--- sys/netinet/tcp_syncache.h
+++ sys/netinet/tcp_syncache.h
@@ -48,7 +48,7 @@
void *, void *);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *, struct mbuf *);
void syncache_badack(struct in_conninfo *);
-int syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported);
+int syncache_pcblist(struct sysctl_req *);
struct syncache {
TAILQ_ENTRY(syncache) sc_hash;
Index: sys/netinet/tcp_syncache.c
===================================================================
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -771,7 +771,7 @@
int error;
char *s;
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
/*
* Ok, create the full blown connection, and set things up
@@ -1091,11 +1091,7 @@
char *s;
bool locked;
- /*
- * Global TCP locks are held because we manipulate the PCB lists
- * and create a new socket.
- */
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK,
("%s: can handle only ACK", __func__));
@@ -1331,11 +1327,7 @@
struct tcpcb *tp;
unsigned int *pending_counter;
- /*
- * Global TCP locks are held because we manipulate the PCB lists
- * and create a new socket.
- */
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending;
*lsop = syncache_socket(sc, *lsop, m);
@@ -2460,46 +2452,41 @@
* amount of space the caller allocated for this function to use.
*/
int
-syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported)
+syncache_pcblist(struct sysctl_req *req)
{
struct xtcpcb xt;
struct syncache *sc;
struct syncache_head *sch;
- int count, error, i;
+ int error, i;
+
+ bzero(&xt, sizeof(xt));
+ xt.xt_len = sizeof(xt);
+ xt.t_state = TCPS_SYN_RECEIVED;
+ xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
+ xt.xt_inp.xi_socket.xso_len = sizeof (struct xsocket);
+ xt.xt_inp.xi_socket.so_type = SOCK_STREAM;
+ xt.xt_inp.xi_socket.so_state = SS_ISCONNECTING;
- for (count = 0, error = 0, i = 0; i < V_tcp_syncache.hashsize; i++) {
+ for (i = 0; i < V_tcp_syncache.hashsize; i++) {
sch = &V_tcp_syncache.hashbase[i];
SCH_LOCK(sch);
TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
- if (count >= max_pcbs) {
- SCH_UNLOCK(sch);
- goto exit;
- }
if (cr_cansee(req->td->td_ucred, sc->sc_cred) != 0)
continue;
- bzero(&xt, sizeof(xt));
- xt.xt_len = sizeof(xt);
if (sc->sc_inc.inc_flags & INC_ISIPV6)
xt.xt_inp.inp_vflag = INP_IPV6;
else
xt.xt_inp.inp_vflag = INP_IPV4;
bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc,
sizeof (struct in_conninfo));
- xt.t_state = TCPS_SYN_RECEIVED;
- xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
- xt.xt_inp.xi_socket.xso_len = sizeof (struct xsocket);
- xt.xt_inp.xi_socket.so_type = SOCK_STREAM;
- xt.xt_inp.xi_socket.so_state = SS_ISCONNECTING;
error = SYSCTL_OUT(req, &xt, sizeof xt);
if (error) {
SCH_UNLOCK(sch);
- goto exit;
+ return (0);
}
- count++;
}
SCH_UNLOCK(sch);
}
-exit:
- *pcbs_exported = count;
- return error;
+
+ return (0);
}
Index: sys/netinet/tcp_timer.c
===================================================================
--- sys/netinet/tcp_timer.c
+++ sys/netinet/tcp_timer.c
@@ -339,9 +339,9 @@
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
tp = tcp_close(tp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
} else {
@@ -353,9 +353,9 @@
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
tp = tcp_close(tp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
@@ -478,7 +478,7 @@
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
tp = tcp_drop(tp, ETIMEDOUT);
#ifdef TCPDEBUG
@@ -487,7 +487,7 @@
PRU_SLOWTIMO);
#endif
TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
tcp_inpinfo_lock_del(inp, tp);
out:
CURVNET_RESTORE();
@@ -542,9 +542,9 @@
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
tp = tcp_drop(tp, ETIMEDOUT);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
@@ -559,9 +559,9 @@
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
tp = tcp_drop(tp, ETIMEDOUT);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
@@ -628,9 +628,9 @@
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
tp = tcp_drop(tp, ETIMEDOUT);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
tcp_inpinfo_lock_del(inp, tp);
goto out;
}
Index: sys/netinet/tcp_timewait.c
===================================================================
--- sys/netinet/tcp_timewait.c
+++ sys/netinet/tcp_timewait.c
@@ -209,10 +209,10 @@
struct tcptw *tw;
struct epoch_tracker et;
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
while ((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL)
tcp_twclose(tw, 0);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
TW_LOCK_DESTROY(V_tw_lock);
uma_zdestroy(V_tcptw_zone);
@@ -236,7 +236,7 @@
bool isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
#endif
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(inp);
/* A dropped inp should never transition to TIME_WAIT state. */
@@ -382,7 +382,7 @@
int thflags;
tcp_seq seq;
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(inp);
/*
@@ -488,7 +488,7 @@
inp = tw->tw_inpcb;
KASSERT((inp->inp_flags & INP_TIMEWAIT), ("tcp_twclose: !timewait"));
KASSERT(intotw(inp) == tw, ("tcp_twclose: inp_ppcb != tw"));
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* in_pcbfree() */
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(inp);
tcp_tw_2msl_stop(tw, reuse);
@@ -644,7 +644,7 @@
tcp_tw_2msl_reset(struct tcptw *tw, int rearm)
{
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(tw->tw_inpcb);
TW_WLOCK(V_tw_lock);
@@ -662,7 +662,7 @@
struct inpcb *inp;
int released __unused;
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
TW_WLOCK(V_tw_lock);
inp = tw->tw_inpcb;
@@ -689,25 +689,8 @@
{
struct tcptw *tw;
struct inpcb *inp;
- struct epoch_tracker et;
-#ifdef INVARIANTS
- if (reuse) {
- /*
- * Exclusive pcbinfo lock is not required in reuse case even if
- * two inpcb locks can be acquired simultaneously:
- * - the inpcb transitioning to TIME_WAIT state in
- * tcp_tw_start(),
- * - the inpcb closed by tcp_twclose().
- *
- * It is because only inpcbs in FIN_WAIT2 or CLOSING states can
- * transition in TIME_WAIT state. Then a pcbcb cannot be in
- * TIME_WAIT list and transitioning to TIME_WAIT state at same
- * time.
- */
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
- }
-#endif
+ NET_EPOCH_ASSERT();
for (;;) {
TW_RLOCK(V_tw_lock);
@@ -723,12 +706,10 @@
in_pcbref(inp);
TW_RUNLOCK(V_tw_lock);
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
INP_WLOCK(inp);
tw = intotw(inp);
if (in_pcbrele_wlocked(inp)) {
if (__predict_true(tw == NULL)) {
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
continue;
} else {
/* This should not happen as in TIMEWAIT
@@ -747,7 +728,6 @@
"|| inp last reference) && tw != "
"NULL", __func__);
#endif
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
break;
}
}
@@ -755,12 +735,10 @@
if (tw == NULL) {
/* tcp_twclose() has already been called */
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
continue;
}
tcp_twclose(tw, reuse);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
if (reuse)
return tw;
}
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -283,7 +283,7 @@
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
if (!INP_INFO_WLOCKED(&V_tcbinfo)) {
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
rlock = 1;
}
INP_WLOCK(inp);
@@ -291,7 +291,7 @@
("tcp_usr_detach: inp_socket == NULL"));
tcp_detach(so, inp);
if (rlock)
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
}
#ifdef INET
@@ -706,7 +706,7 @@
int error = 0;
TCPDEBUG0;
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
INP_WLOCK(inp);
@@ -723,7 +723,7 @@
TCPDEBUG2(PRU_DISCONNECT);
TCP_PROBE2(debug__user, tp, PRU_DISCONNECT);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -792,7 +792,7 @@
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
error = ECONNABORTED;
@@ -819,7 +819,7 @@
TCPDEBUG2(PRU_ACCEPT);
TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
if (error == 0) {
if (v4)
*nam = in6_v4mapsin6_sockaddr(port, &addr);
@@ -842,7 +842,7 @@
struct epoch_tracker et;
TCPDEBUG0;
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("inp == NULL"));
INP_WLOCK(inp);
@@ -861,7 +861,7 @@
TCPDEBUG2(PRU_SHUTDOWN);
TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -921,10 +921,10 @@
tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
struct sockaddr *nam, struct mbuf *control, struct thread *td)
{
+ struct epoch_tracker et;
int error = 0;
struct inpcb *inp;
struct tcpcb *tp = NULL;
- struct epoch_tracker net_et;
#ifdef INET
#ifdef INET6
struct sockaddr_in sin;
@@ -940,11 +940,11 @@
TCPDEBUG0;
/*
- * We require the pcbinfo lock if we will close the socket as part of
- * this call.
+ * We require the pcbinfo "read lock" if we will close the socket
+ * as part of this call.
*/
if (flags & PRUS_EOF)
- INP_INFO_RLOCK_ET(&V_tcbinfo, net_et);
+ NET_EPOCH_ENTER(et);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
INP_WLOCK(inp);
@@ -1135,7 +1135,7 @@
* Close the send side of the connection after
* the data is sent.
*/
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
socantsendmore(so);
tcp_usrclosed(tp);
}
@@ -1231,7 +1231,7 @@
((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
INP_WUNLOCK(inp);
if (flags & PRUS_EOF)
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, net_et);
+ NET_EPOCH_EXIT(et);
return (error);
}
@@ -1275,7 +1275,7 @@
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL,
("tcp_usr_abort: inp_socket == NULL"));
@@ -1301,7 +1301,7 @@
}
INP_WUNLOCK(inp);
dropped:
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
}
/*
@@ -1318,7 +1318,7 @@
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL,
("tcp_usr_close: inp_socket == NULL"));
@@ -1342,7 +1342,7 @@
inp->inp_flags |= INP_SOCKREF;
}
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
}
/*
@@ -2317,10 +2317,10 @@
}
so->so_rcv.sb_flags |= SB_AUTOSIZE;
so->so_snd.sb_flags |= SB_AUTOSIZE;
- INP_INFO_RLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_ENTER(et);
error = in_pcballoc(so, &V_tcbinfo);
if (error) {
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
return (error);
}
inp = sotoinpcb(so);
@@ -2338,12 +2338,12 @@
if (tp == NULL) {
in_pcbdetach(inp);
in_pcbfree(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
return (ENOBUFS);
}
tp->t_state = TCPS_CLOSED;
INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(&V_tcbinfo, et);
+ NET_EPOCH_EXIT(et);
TCPSTATES_INC(TCPS_CLOSED);
return (0);
}
@@ -2362,7 +2362,7 @@
struct inpcb *inp = tp->t_inpcb;
struct socket *so = inp->inp_socket;
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(inp);
/*
@@ -2401,7 +2401,7 @@
tcp_usrclosed(struct tcpcb *tp)
{
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(tp->t_inpcb);
switch (tp->t_state) {
Index: sys/netinet/toecore.c
===================================================================
--- sys/netinet/toecore.c
+++ sys/netinet/toecore.c
@@ -360,7 +360,7 @@
struct tcphdr *th, struct socket **lsop)
{
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
return (syncache_expand(inc, to, th, lsop, NULL));
}
@@ -390,8 +390,6 @@
INP_WLOCK_ASSERT(inp);
if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) {
-
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* for twcheck */
if (!tcp_twcheck(inp, NULL, th, NULL, 0))
return (EADDRINUSE);
} else {
@@ -529,7 +527,7 @@
(void) tp->t_fb->tfb_tcp_output(tp);
} else {
- INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ NET_EPOCH_ASSERT();
tp = tcp_drop(tp, err);
if (tp == NULL)
INP_WLOCK(inp); /* re-acquire */
Index: sys/netinet/udp_usrreq.c
===================================================================
--- sys/netinet/udp_usrreq.c
+++ sys/netinet/udp_usrreq.c
@@ -399,7 +399,6 @@
struct sockaddr_in udp_in[2];
struct mbuf *m;
struct m_tag *fwd_tag;
- struct epoch_tracker et;
int cscov_partial, iphlen;
m = *mp;
@@ -529,7 +528,8 @@
struct inpcb *last;
struct inpcbhead *pcblist;
- INP_INFO_RLOCK_ET(pcbinfo, et);
+ NET_EPOCH_ASSERT();
+
pcblist = udp_get_pcblist(proto);
last = NULL;
CK_LIST_FOREACH(inp, pcblist, inp_list) {
@@ -636,7 +636,6 @@
UDPSTAT_INC(udps_noportbcast);
if (inp)
INP_RUNLOCK(inp);
- INP_INFO_RUNLOCK_ET(pcbinfo, et);
goto badunlocked;
}
if (proto == IPPROTO_UDPLITE)
@@ -646,7 +645,6 @@
if (udp_append(last, ip, m, iphlen, udp_in) == 0)
INP_RUNLOCK(last);
inp_lost:
- INP_INFO_RUNLOCK_ET(pcbinfo, et);
return (IPPROTO_DONE);
}
@@ -854,87 +852,53 @@
static int
udp_pcblist(SYSCTL_HANDLER_ARGS)
{
- int error, i, n;
- struct inpcb *inp, **inp_list;
- inp_gen_t gencnt;
struct xinpgen xig;
struct epoch_tracker et;
+ struct inpcb *inp;
+ int error;
+
+ if (req->newptr != 0)
+ return (EPERM);
- /*
- * The process of preparing the PCB list is too time-consuming and
- * resource-intensive to repeat twice on every request.
- */
if (req->oldptr == 0) {
+ int n;
+
n = V_udbinfo.ipi_count;
n += imax(n / 8, 10);
req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb);
return (0);
}
- if (req->newptr != 0)
- return (EPERM);
-
- /*
- * OK, now we're committed to doing something.
- */
- INP_INFO_RLOCK_ET(&V_udbinfo, et);
- gencnt = V_udbinfo.ipi_gencnt;
- n = V_udbinfo.ipi_count;
- INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
-
- error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
- + n * sizeof(struct xinpcb));
- if (error != 0)
+ if ((error = sysctl_wire_old_buffer(req, 0)) != 0)
return (error);
bzero(&xig, sizeof(xig));
xig.xig_len = sizeof xig;
- xig.xig_count = n;
- xig.xig_gen = gencnt;
+ xig.xig_count = V_udbinfo.ipi_count;
+ xig.xig_gen = V_udbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
error = SYSCTL_OUT(req, &xig, sizeof xig);
if (error)
return (error);
- inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
- if (inp_list == NULL)
- return (ENOMEM);
-
- INP_INFO_RLOCK_ET(&V_udbinfo, et);
- for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead), i = 0; inp && i < n;
- inp = CK_LIST_NEXT(inp, inp_list)) {
- INP_WLOCK(inp);
- if (inp->inp_gencnt <= gencnt &&
- cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
- in_pcbref(inp);
- inp_list[i++] = inp;
- }
- INP_WUNLOCK(inp);
- }
- INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
- n = i;
-
- error = 0;
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
+ NET_EPOCH_ENTER(et);
+ for (inp = CK_LIST_FIRST(V_udbinfo.ipi_listhead);
+ inp != NULL;
+ inp = CK_LIST_NEXT(inp, inp_list)) {
INP_RLOCK(inp);
- if (inp->inp_gencnt <= gencnt) {
+ if (inp->inp_gencnt <= xig.xig_gen &&
+ cr_canseeinpcb(req->td->td_ucred, inp) == 0) {
struct xinpcb xi;
in_pcbtoxinpcb(inp, &xi);
INP_RUNLOCK(inp);
error = SYSCTL_OUT(req, &xi, sizeof xi);
+ if (error)
+ break;
} else
INP_RUNLOCK(inp);
}
- INP_INFO_WLOCK(&V_udbinfo);
- for (i = 0; i < n; i++) {
- inp = inp_list[i];
- INP_RLOCK(inp);
- if (!in_pcbrele_rlocked(inp))
- INP_RUNLOCK(inp);
- }
- INP_INFO_WUNLOCK(&V_udbinfo);
+ NET_EPOCH_EXIT(et);
if (!error) {
/*
@@ -943,14 +907,12 @@
* that something happened while we were processing this
* request, and it might be necessary to retry.
*/
- INP_INFO_RLOCK_ET(&V_udbinfo, et);
xig.xig_gen = V_udbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_udbinfo.ipi_count;
- INP_INFO_RUNLOCK_ET(&V_udbinfo, et);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
- free(inp_list, M_TEMP);
+
return (error);
}
@@ -964,6 +926,7 @@
{
struct xucred xuc;
struct sockaddr_in addrs[2];
+ struct epoch_tracker et;
struct inpcb *inp;
int error;
@@ -973,9 +936,11 @@
error = SYSCTL_IN(req, addrs, sizeof(addrs));
if (error)
return (error);
+ NET_EPOCH_ENTER(et);
inp = in_pcblookup(&V_udbinfo, addrs[1].sin_addr, addrs[1].sin_port,
addrs[0].sin_addr, addrs[0].sin_port,
INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
+ NET_EPOCH_EXIT(et);
if (inp != NULL) {
INP_RLOCK_ASSERT(inp);
if (inp->inp_socket == NULL)
@@ -1116,9 +1081,6 @@
}
#ifdef INET
-#define UH_WLOCKED 2
-#define UH_RLOCKED 1
-#define UH_UNLOCKED 0
static int
udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
struct mbuf *control, struct thread *td)
@@ -1134,19 +1096,12 @@
int error = 0;
int ipflags;
u_short fport, lport;
- int unlock_udbinfo, unlock_inp;
u_char tos;
uint8_t pr;
uint16_t cscov = 0;
uint32_t flowid = 0;
uint8_t flowtype = M_HASHTYPE_NONE;
- /*
- * udp_output() may need to temporarily bind or connect the current
- * inpcb. As such, we don't know up front whether we will need the
- * pcbinfo lock or not. Do any work to decide what is needed up
- * front before acquiring any locks.
- */
if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
if (control)
m_freem(control);
@@ -1156,28 +1111,22 @@
src.sin_family = 0;
sin = (struct sockaddr_in *)addr;
-retry:
+
+ /*
+ * udp_output() may need to temporarily bind or connect the current
+ * inpcb. As such, we don't know up front whether we will need the
+ * pcbinfo lock or not. Do any work to decide what is needed up
+ * front before acquiring any locks.
+ *
+ * We will need network epoch in either case, to safely lookup into
+ * pcb hash.
+ */
if (sin == NULL ||
- (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
+ (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0))
INP_WLOCK(inp);
- /*
- * In case we lost a race and another thread bound addr/port
- * on the inp we cannot keep the wlock (which still would be
- * fine) as further down, based on these values we make
- * decisions for the pcbinfo lock. If the locks are not in
- * synch the assertions on unlock will fire, hence we go for
- * one retry loop.
- */
- if (sin != NULL && (inp->inp_laddr.s_addr != INADDR_ANY ||
- inp->inp_lport != 0)) {
- INP_WUNLOCK(inp);
- goto retry;
- }
- unlock_inp = UH_WLOCKED;
- } else {
+ else
INP_RLOCK(inp);
- unlock_inp = UH_RLOCKED;
- }
+ NET_EPOCH_ENTER(et);
tos = inp->inp_ip_tos;
if (control != NULL) {
/*
@@ -1185,13 +1134,9 @@
* stored in a single mbuf.
*/
if (control->m_next) {
- if (unlock_inp == UH_WLOCKED)
- INP_WUNLOCK(inp);
- else
- INP_RUNLOCK(inp);
m_freem(control);
- m_freem(m);
- return (EINVAL);
+ error = EINVAL;
+ goto release;
}
for (; control->m_len > 0;
control->m_data += CMSG_ALIGN(cm->cmsg_len),
@@ -1262,56 +1207,11 @@
}
m_freem(control);
}
- if (error) {
- if (unlock_inp == UH_WLOCKED)
- INP_WUNLOCK(inp);
- else
- INP_RUNLOCK(inp);
- m_freem(m);
- return (error);
- }
+ if (error)
+ goto release;
- /*
- * In the old days, depending on whether or not the application had
- * bound or connected the socket, we had to do varying levels of work.
- * The optimal case was for a connected UDP socket, as a global lock
- * wasn't required at all.
- * In order to decide which we need, we required stability of the
- * inpcb binding, which we ensured by acquiring a read lock on the
- * inpcb. This didn't strictly follow the lock order, so we played
- * the trylock and retry game.
- * With the re-introduction of the route-cache in some cases, we started
- * to acquire an early inp wlock and a possible race during re-lock
- * went away. With the introduction of epoch(9) some read locking
- * became epoch(9) and the lock-order issues also went away.
- * Due to route-cache we may now hold more conservative locks than
- * otherwise required and have split up the 2nd case in case 2 and 3
- * in order to keep the udpinfo lock level in sync with the inp one
- * for the IP_SENDSRCADDR case below.
- */
pr = inp->inp_socket->so_proto->pr_protocol;
pcbinfo = udp_get_inpcbinfo(pr);
- if (sin != NULL &&
- (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
- INP_HASH_WLOCK(pcbinfo);
- unlock_udbinfo = UH_WLOCKED;
- } else if (sin != NULL &&
- (sin->sin_addr.s_addr == INADDR_ANY ||
- sin->sin_addr.s_addr == INADDR_BROADCAST ||
- inp->inp_laddr.s_addr == INADDR_ANY ||
- inp->inp_lport == 0)) {
- INP_HASH_RLOCK_ET(pcbinfo, et);
- unlock_udbinfo = UH_RLOCKED;
- } else if (src.sin_family == AF_INET) {
- if (unlock_inp == UH_WLOCKED) {
- INP_HASH_WLOCK(pcbinfo);
- unlock_udbinfo = UH_WLOCKED;
- } else {
- INP_HASH_RLOCK_ET(pcbinfo, et);
- unlock_udbinfo = UH_RLOCKED;
- }
- } else
- unlock_udbinfo = UH_UNLOCKED;
/*
* If the IP_SENDSRCADDR control message was specified, override the
@@ -1387,7 +1287,6 @@
if (inp->inp_laddr.s_addr == INADDR_ANY &&
inp->inp_lport == 0) {
INP_WLOCK_ASSERT(inp);
- INP_HASH_WLOCK_ASSERT(pcbinfo);
/*
* Remember addr if jailed, to prevent
* rebinding.
@@ -1395,7 +1294,10 @@
if (prison_flag(td->td_ucred, PR_IP4))
inp->inp_laddr = laddr;
inp->inp_lport = lport;
- if (in_pcbinshash(inp) != 0) {
+ INP_HASH_WLOCK(pcbinfo);
+ error = in_pcbinshash(inp);
+ INP_HASH_WUNLOCK(pcbinfo);
+ if (error != 0) {
inp->inp_lport = 0;
error = EAGAIN;
goto release;
@@ -1560,48 +1462,20 @@
ipflags |= IP_NODEFAULTFLOWID;
#endif /* RSS */
- if (unlock_udbinfo == UH_WLOCKED)
- INP_HASH_WUNLOCK(pcbinfo);
- else if (unlock_udbinfo == UH_RLOCKED)
- INP_HASH_RUNLOCK_ET(pcbinfo, et);
if (pr == IPPROTO_UDPLITE)
UDPLITE_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
else
UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
error = ip_output(m, inp->inp_options,
- (unlock_inp == UH_WLOCKED ? &inp->inp_route : NULL), ipflags,
+ INP_WLOCKED(inp) ? &inp->inp_route : NULL, ipflags,
inp->inp_moptions, inp);
- if (unlock_inp == UH_WLOCKED)
- INP_WUNLOCK(inp);
- else
- INP_RUNLOCK(inp);
+ INP_UNLOCK(inp);
+ NET_EPOCH_EXIT(et);
return (error);
release:
- if (unlock_udbinfo == UH_WLOCKED) {
- KASSERT(unlock_inp == UH_WLOCKED,
- ("%s: excl udbinfo lock %#03x, shared inp lock %#03x, "
- "sin %p daddr %#010x inp %p laddr %#010x lport %#06x "
- "src fam %#04x",
- __func__, unlock_udbinfo, unlock_inp, sin,
- (sin != NULL) ? sin->sin_addr.s_addr : 0xfefefefe, inp,
- inp->inp_laddr.s_addr, inp->inp_lport, src.sin_family));
- INP_HASH_WUNLOCK(pcbinfo);
- INP_WUNLOCK(inp);
- } else if (unlock_udbinfo == UH_RLOCKED) {
- KASSERT(unlock_inp == UH_RLOCKED,
- ("%s: shared udbinfo lock %#03x, excl inp lock %#03x, "
- "sin %p daddr %#010x inp %p laddr %#010x lport %#06x "
- "src fam %#04x",
- __func__, unlock_udbinfo, unlock_inp, sin,
- (sin != NULL) ? sin->sin_addr.s_addr : 0xfefefefe, inp,
- inp->inp_laddr.s_addr, inp->inp_lport, src.sin_family));
- INP_HASH_RUNLOCK_ET(pcbinfo, et);
- INP_RUNLOCK(inp);
- } else if (unlock_inp == UH_WLOCKED)
- INP_WUNLOCK(inp);
- else
- INP_RUNLOCK(inp);
+ INP_UNLOCK(inp);
+ NET_EPOCH_EXIT(et);
m_freem(m);
return (error);
}
Index: sys/netinet6/icmp6.c
===================================================================
--- sys/netinet6/icmp6.c
+++ sys/netinet6/icmp6.c
@@ -1893,9 +1893,10 @@
struct inpcb *last = NULL;
struct sockaddr_in6 fromsa;
struct icmp6_hdr *icmp6;
- struct epoch_tracker et;
struct mbuf *opts = NULL;
+ NET_EPOCH_ASSERT();
+
#ifndef PULLDOWN_TEST
/* this is assumed to be safe. */
icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
@@ -1920,7 +1921,6 @@
return (IPPROTO_DONE);
}
- INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
CK_LIST_FOREACH(inp, &V_ripcb, inp_list) {
if ((inp->inp_vflag & INP_IPV6) == 0)
continue;
@@ -2002,7 +2002,6 @@
}
last = inp;
}
- INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
if (last != NULL) {
if (last->inp_flags & INP_CONTROLOPTS)
ip6_savecontrol(last, m, &opts);
Index: sys/netinet6/in6_pcb.c
===================================================================
--- sys/netinet6/in6_pcb.c
+++ sys/netinet6/in6_pcb.c
@@ -1245,7 +1245,6 @@
{
struct inpcb *inp;
- INP_HASH_RLOCK(pcbinfo);
inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport,
(lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp);
if (inp != NULL) {
@@ -1272,7 +1271,6 @@
}
#endif
}
- INP_HASH_RUNLOCK(pcbinfo);
return (inp);
}
Index: sys/netinet6/raw_ip6.c
===================================================================
--- sys/netinet6/raw_ip6.c
+++ sys/netinet6/raw_ip6.c
@@ -165,7 +165,8 @@
struct inpcb *last = NULL;
struct mbuf *opts = NULL;
struct sockaddr_in6 fromsa;
- struct epoch_tracker et;
+
+ NET_EPOCH_ASSERT();
RIP6STAT_INC(rip6s_ipackets);
@@ -173,7 +174,6 @@
ifp = m->m_pkthdr.rcvif;
- INP_INFO_RLOCK_ET(&V_ripcbinfo, et);
CK_LIST_FOREACH(inp, &V_ripcb, inp_list) {
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
@@ -303,7 +303,6 @@
skip_2:
INP_RUNLOCK(inp);
}
- INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et);
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
/*
* Check AH/ESP integrity.
Index: sys/netinet6/udp6_usrreq.c
===================================================================
--- sys/netinet6/udp6_usrreq.c
+++ sys/netinet6/udp6_usrreq.c
@@ -214,12 +214,13 @@
int off = *offp;
int cscov_partial;
int plen, ulen;
- struct epoch_tracker et;
struct sockaddr_in6 fromsa[2];
struct m_tag *fwd_tag;
uint16_t uh_sum;
uint8_t nxt;
+ NET_EPOCH_ASSERT();
+
ifp = m->m_pkthdr.rcvif;
#ifndef PULLDOWN_TEST
@@ -301,7 +302,6 @@
struct inpcbhead *pcblist;
struct ip6_moptions *imo;
- INP_INFO_RLOCK_ET(pcbinfo, et);
/*
* In the event that laddr should be set to the link-local
* address (this happens in RIPng), the multicast address
@@ -395,7 +395,7 @@
UDP_PROBE(receive, NULL, last,
ip6, last, uh);
if (udp6_append(last, n, off, fromsa))
- goto inp_lost;
+ return (IPPROTO_DONE);
}
INP_RUNLOCK(last);
}
@@ -422,7 +422,7 @@
*/
UDPSTAT_INC(udps_noport);
UDPSTAT_INC(udps_noportmcast);
- goto badheadlocked;
+ goto badunlocked;
}
INP_RLOCK(last);
if (__predict_true(last->inp_flags2 & INP_FREED) == 0) {
@@ -434,8 +434,6 @@
INP_RUNLOCK(last);
} else
INP_RUNLOCK(last);
- inp_lost:
- INP_INFO_RUNLOCK_ET(pcbinfo, et);
return (IPPROTO_DONE);
}
/*
@@ -522,8 +520,6 @@
INP_RUNLOCK(inp);
return (IPPROTO_DONE);
-badheadlocked:
- INP_INFO_RUNLOCK_ET(pcbinfo, et);
badunlocked:
if (m)
m_freem(m);
@@ -638,6 +634,7 @@
{
struct xucred xuc;
struct sockaddr_in6 addrs[2];
+ struct epoch_tracker et;
struct inpcb *inp;
int error;
@@ -656,9 +653,11 @@
(error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) {
return (error);
}
+ NET_EPOCH_ENTER(et);
inp = in6_pcblookup(&V_udbinfo, &addrs[1].sin6_addr,
addrs[1].sin6_port, &addrs[0].sin6_addr, addrs[0].sin6_port,
INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL);
+ NET_EPOCH_EXIT(et);
if (inp != NULL) {
INP_RLOCK_ASSERT(inp);
if (inp->inp_socket == NULL)
@@ -679,14 +678,10 @@
SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0,
0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection");
-#define UH_WLOCKED 2
-#define UH_RLOCKED 1
-#define UH_UNLOCKED 0
static int
udp6_output(struct socket *so, int flags_arg, struct mbuf *m,
struct sockaddr *addr6, struct mbuf *control, struct thread *td)
{
- struct inpcbinfo *pcbinfo;
struct inpcb *inp;
struct ip6_hdr *ip6;
struct udphdr *udp6;
@@ -698,7 +693,7 @@
u_int32_t ulen, plen;
uint16_t cscov;
u_short fport;
- uint8_t nxt, unlock_inp, unlock_udbinfo;
+ uint8_t nxt;
/* addr6 has been validated in udp6_send(). */
sin6 = (struct sockaddr_in6 *)addr6;
@@ -741,30 +736,17 @@
* - on connected sockets (sin6 is NULL) for route cache updates,
* - when we are not bound to an address and source port (it is
* in6_pcbsetport() which will require the write lock).
+ *
+ * We check the inp fields before actually locking the inp, so
+ * here exists a race, and we may WLOCK the inp and end with already
+ * bound one by other thread. This is fine.
*/
-retry:
if (sin6 == NULL || (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
- inp->inp_lport == 0)) {
+ inp->inp_lport == 0))
INP_WLOCK(inp);
- /*
- * In case we lost a race and another thread bound addr/port
- * on the inp we cannot keep the wlock (which still would be
- * fine) as further down, based on these values we make
- * decisions for the pcbinfo lock. If the locks are not in
- * synch the assertions on unlock will fire, hence we go for
- * one retry loop.
- */
- if (sin6 != NULL &&
- (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ||
- inp->inp_lport != 0)) {
- INP_WUNLOCK(inp);
- goto retry;
- }
- unlock_inp = UH_WLOCKED;
- } else {
+ else
INP_RLOCK(inp);
- unlock_inp = UH_RLOCKED;
- }
+
nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
IPPROTO_UDP : IPPROTO_UDPLITE;
@@ -788,10 +770,7 @@
* potential race in which the factors causing us to
* select the UDPv4 output routine are invalidated?
*/
- if (unlock_inp == UH_WLOCKED)
- INP_WUNLOCK(inp);
- else
- INP_RUNLOCK(inp);
+ INP_UNLOCK(inp);
if (sin6)
in6_sin6_2_sin_in_sock((struct sockaddr *)sin6);
pru = inetsw[ip_protox[nxt]].pr_usrreqs;
@@ -806,21 +785,17 @@
* Given this is either an IPv6-only socket or no INET is
* supported we will fail the send if the given destination
* address is a v4mapped address.
+ *
+ * XXXGL: do we leak m and control?
*/
- if (unlock_inp == UH_WLOCKED)
- INP_WUNLOCK(inp);
- else
- INP_RUNLOCK(inp);
+ INP_UNLOCK(inp);
return (EINVAL);
}
if (control) {
if ((error = ip6_setpktopts(control, &opt,
inp->in6p_outputopts, td->td_ucred, nxt)) != 0) {
- if (unlock_inp == UH_WLOCKED)
- INP_WUNLOCK(inp);
- else
- INP_RUNLOCK(inp);
+ INP_UNLOCK(inp);
ip6_clearpktopts(&opt, -1);
if (control)
m_freem(control);
@@ -831,20 +806,7 @@
} else
optp = inp->in6p_outputopts;
- pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
- if (sin6 != NULL &&
- IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && inp->inp_lport == 0) {
- INP_HASH_WLOCK(pcbinfo);
- unlock_udbinfo = UH_WLOCKED;
- } else if (sin6 != NULL &&
- (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
- IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ||
- inp->inp_lport == 0)) {
- INP_HASH_RLOCK_ET(pcbinfo, et);
- unlock_udbinfo = UH_RLOCKED;
- } else
- unlock_udbinfo = UH_UNLOCKED;
-
+ NET_EPOCH_ENTER(et);
if (sin6) {
/*
@@ -880,9 +842,14 @@
laddr = &in6a;
if (inp->inp_lport == 0) {
+ struct inpcbinfo *pcbinfo;
INP_WLOCK_ASSERT(inp);
+
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
+ INP_HASH_WLOCK(pcbinfo);
error = in6_pcbsetport(laddr, inp, td->td_ucred);
+ INP_HASH_WUNLOCK(pcbinfo);
if (error != 0) {
/* Undo an address bind that may have occurred. */
inp->in6p_laddr = in6addr_any;
@@ -1006,21 +973,15 @@
#endif
UDPSTAT_INC(udps_opackets);
- if (unlock_udbinfo == UH_WLOCKED)
- INP_HASH_WUNLOCK(pcbinfo);
- else if (unlock_udbinfo == UH_RLOCKED)
- INP_HASH_RUNLOCK_ET(pcbinfo, et);
if (nxt == IPPROTO_UDPLITE)
UDPLITE_PROBE(send, NULL, inp, ip6, inp, udp6);
else
UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
error = ip6_output(m, optp,
- (unlock_inp == UH_WLOCKED) ? &inp->inp_route6 : NULL, flags,
+ INP_WLOCKED(inp) ? &inp->inp_route6 : NULL, flags,
inp->in6p_moptions, NULL, inp);
- if (unlock_inp == UH_WLOCKED)
- INP_WUNLOCK(inp);
- else
- INP_RUNLOCK(inp);
+ INP_UNLOCK(inp);
+ NET_EPOCH_EXIT(et);
if (control) {
ip6_clearpktopts(&opt, -1);
@@ -1029,22 +990,8 @@
return (error);
release:
- if (unlock_udbinfo == UH_WLOCKED) {
- KASSERT(unlock_inp == UH_WLOCKED, ("%s: excl udbinfo lock, "
- "non-excl inp lock: pcbinfo %p %#x inp %p %#x",
- __func__, pcbinfo, unlock_udbinfo, inp, unlock_inp));
- INP_HASH_WUNLOCK(pcbinfo);
- INP_WUNLOCK(inp);
- } else if (unlock_udbinfo == UH_RLOCKED) {
- KASSERT(unlock_inp == UH_RLOCKED, ("%s: non-excl udbinfo lock, "
- "excl inp lock: pcbinfo %p %#x inp %p %#x",
- __func__, pcbinfo, unlock_udbinfo, inp, unlock_inp));
- INP_HASH_RUNLOCK_ET(pcbinfo, et);
- INP_RUNLOCK(inp);
- } else if (unlock_inp == UH_WLOCKED)
- INP_WUNLOCK(inp);
- else
- INP_RUNLOCK(inp);
+ INP_UNLOCK(inp);
+ NET_EPOCH_EXIT(et);
if (control) {
ip6_clearpktopts(&opt, -1);
m_freem(control);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Jun 19, 8:10 PM (13 h, 17 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
34096077
Default Alt Text
D22197.id63806.diff (73 KB)
Attached To
Mode
D22197: Refactor PCB hash read lock (the epoch)
Attached
Detach File
Event Timeline
Log In to Comment