Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F107584000
D2599.id6077.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
48 KB
Referenced Files
None
Subscribers
None
D2599.id6077.diff
View Options
Index: sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
===================================================================
--- sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
+++ sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
@@ -639,7 +639,7 @@
unsigned int tid = toep->tp_tid;
#endif
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
CTR4(KTR_CXGB, "%s: tid %d, toep %p, flags %x", __func__, tid, toep,
@@ -925,12 +925,12 @@
rc = act_open_rpl_status_to_errno(s);
if (rc != EAGAIN)
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
toe_connect_failed(tod, inp, rc);
toepcb_release(toep); /* unlocks inp */
if (rc != EAGAIN)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
m_freem(m);
return (0);
@@ -1061,7 +1061,7 @@
struct adapter *sc = tod->tod_softc;
struct mbuf *m;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
CTR4(KTR_CXGB, "%s: tid %d, toep %p (%x)", __func__, tid, toep,
@@ -1172,12 +1172,12 @@
SOCKBUF_UNLOCK(so_rcv);
INP_WUNLOCK(inp);
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET);
if (tp)
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
m_freem(m);
return (0);
@@ -1222,7 +1222,7 @@
struct tcpcb *tp;
struct socket *so;
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
tp = intotcpcb(inp);
@@ -1250,7 +1250,7 @@
case TCPS_FIN_WAIT_2:
tcp_twstart(tp);
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
INP_WLOCK(inp);
toepcb_release(toep); /* no more CPLs expected */
@@ -1264,7 +1264,7 @@
done:
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
m_freem(m);
return (0);
@@ -1285,7 +1285,7 @@
struct tcpcb *tp;
struct socket *so;
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
tp = intotcpcb(inp);
@@ -1303,7 +1303,7 @@
tcp_twstart(tp);
release:
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
INP_WLOCK(inp);
toepcb_release(toep); /* no more CPLs expected */
@@ -1328,7 +1328,7 @@
done:
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
m_freem(m);
return (0);
@@ -1489,7 +1489,7 @@
return (do_abort_req_synqe(qs, r, m));
inp = toep->tp_inp;
- INP_INFO_WLOCK(&V_tcbinfo); /* for tcp_close */
+ INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */
INP_WLOCK(inp);
tp = intotcpcb(inp);
@@ -1503,7 +1503,7 @@
toep->tp_flags |= TP_ABORT_REQ_RCVD;
toep->tp_flags |= TP_ABORT_SHUTDOWN;
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
m_freem(m);
return (0);
}
@@ -1523,7 +1523,7 @@
INP_WLOCK(inp); /* re-acquire */
toepcb_release(toep); /* no more CPLs expected */
}
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
send_abort_rpl(tod, tid, qset);
m_freem(m);
Index: sys/dev/cxgb/ulp/tom/cxgb_listen.c
===================================================================
--- sys/dev/cxgb/ulp/tom/cxgb_listen.c
+++ sys/dev/cxgb/ulp/tom/cxgb_listen.c
@@ -541,11 +541,11 @@
REJECT_PASS_ACCEPT(); /* no l2te, or ifp mismatch */
}
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
/* Don't offload if the 4-tuple is already in use */
if (toe_4tuple_check(&inc, &th, ifp) != 0) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
REJECT_PASS_ACCEPT();
}
@@ -558,7 +558,7 @@
* resources tied to this listen context.
*/
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
REJECT_PASS_ACCEPT();
}
so = inp->inp_socket;
@@ -686,7 +686,7 @@
struct toepcb *toep;
struct socket *so;
struct listen_ctx *lctx = synqe->lctx;
- struct inpcb *inp = lctx->inp;
+ struct inpcb *inp = lctx->inp, *new_inp;
struct tcpopt to;
struct tcphdr th;
struct in_conninfo inc;
@@ -700,7 +700,7 @@
KASSERT(qs->idx == synqe->qset,
("%s qset mismatch %d %d", __func__, qs->idx, synqe->qset));
- INP_INFO_WLOCK(&V_tcbinfo); /* for syncache_expand */
+ INP_INFO_RLOCK(&V_tcbinfo); /* for syncache_expand */
INP_WLOCK(inp);
if (__predict_false(inp->inp_flags & INP_DROPPED)) {
@@ -714,7 +714,7 @@
("%s: listen socket dropped but tid %u not aborted.",
__func__, tid));
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
m_freem(m);
return (0);
}
@@ -730,7 +730,7 @@
reset:
t3_send_reset_synqe(tod, synqe);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
m_freem(m);
return (0);
}
@@ -748,21 +748,23 @@
goto reset;
}
- if (__predict_false(!(synqe->flags & TP_SYNQE_EXPANDED))) {
- struct inpcb *new_inp = sotoinpcb(so);
+ /* New connection inpcb is already locked by syncache_expand(). */
+ new_inp = sotoinpcb(so);
+ INP_WLOCK_ASSERT(new_inp);
- INP_WLOCK(new_inp);
+ if (__predict_false(!(synqe->flags & TP_SYNQE_EXPANDED))) {
tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
t3_offload_socket(tod, synqe, so);
- INP_WUNLOCK(new_inp);
}
+ INP_WUNLOCK(new_inp);
+
/* Remove the synq entry and release its reference on the lctx */
TAILQ_REMOVE(&lctx->synq, synqe, link);
inp = release_lctx(td, lctx);
if (inp)
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
release_synqe(synqe);
m_freem(m);
Index: sys/dev/cxgbe/tom/t4_connect.c
===================================================================
--- sys/dev/cxgbe/tom/t4_connect.c
+++ sys/dev/cxgbe/tom/t4_connect.c
@@ -189,12 +189,12 @@
toep->tid = -1;
if (status != EAGAIN)
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
toe_connect_failed(tod, inp, status);
final_cpl_received(toep); /* unlocks inp */
if (status != EAGAIN)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
}
static int
Index: sys/dev/cxgbe/tom/t4_cpl_io.c
===================================================================
--- sys/dev/cxgbe/tom/t4_cpl_io.c
+++ sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -1087,7 +1087,7 @@
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
tp = intotcpcb(inp);
@@ -1129,7 +1129,7 @@
case TCPS_FIN_WAIT_2:
tcp_twstart(tp);
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
INP_WLOCK(inp);
final_cpl_received(toep);
@@ -1141,7 +1141,7 @@
}
done:
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (0);
}
@@ -1168,7 +1168,7 @@
KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
tp = intotcpcb(inp);
@@ -1186,7 +1186,7 @@
tcp_twstart(tp);
release:
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
INP_WLOCK(inp);
final_cpl_received(toep); /* no more CPLs expected */
@@ -1210,7 +1210,7 @@
}
done:
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (0);
}
@@ -1369,7 +1369,7 @@
}
inp = toep->inp;
- INP_INFO_WLOCK(&V_tcbinfo); /* for tcp_close */
+ INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */
INP_WLOCK(inp);
tp = intotcpcb(inp);
@@ -1403,7 +1403,7 @@
final_cpl_received(toep);
done:
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
return (0);
}
@@ -1517,12 +1517,12 @@
SOCKBUF_UNLOCK(sb);
INP_WUNLOCK(inp);
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET);
if (tp)
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (0);
}
Index: sys/dev/cxgbe/tom/t4_listen.c
===================================================================
--- sys/dev/cxgbe/tom/t4_listen.c
+++ sys/dev/cxgbe/tom/t4_listen.c
@@ -1259,15 +1259,15 @@
REJECT_PASS_ACCEPT();
rpl = wrtod(wr);
- INP_INFO_WLOCK(&V_tcbinfo); /* for 4-tuple check */
+ INP_INFO_RLOCK(&V_tcbinfo); /* for 4-tuple check */
/* Don't offload if the 4-tuple is already in use */
if (toe_4tuple_check(&inc, &th, ifp) != 0) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
free(wr, M_CXGBE);
REJECT_PASS_ACCEPT();
}
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
inp = lctx->inp; /* listening socket, not owned by TOE */
INP_WLOCK(inp);
@@ -1441,7 +1441,7 @@
unsigned int tid = GET_TID(cpl);
struct synq_entry *synqe = lookup_tid(sc, tid);
struct listen_ctx *lctx = synqe->lctx;
- struct inpcb *inp = lctx->inp;
+ struct inpcb *inp = lctx->inp, *new_inp;
struct socket *so;
struct tcphdr th;
struct tcpopt to;
@@ -1459,7 +1459,7 @@
KASSERT(synqe->flags & TPF_SYNQE,
("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
- INP_INFO_WLOCK(&V_tcbinfo); /* for syncache_expand */
+ INP_INFO_RLOCK(&V_tcbinfo); /* for syncache_expand */
INP_WLOCK(inp);
CTR6(KTR_CXGBE,
@@ -1475,7 +1475,7 @@
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (0);
}
@@ -1500,7 +1500,7 @@
*/
send_reset_synqe(TOEDEV(ifp), synqe);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (0);
}
toep->tid = tid;
@@ -1534,6 +1534,10 @@
goto reset;
}
+ /* New connection inpcb is already locked by syncache_expand(). */
+ new_inp = sotoinpcb(so);
+ INP_WLOCK_ASSERT(new_inp);
+
/*
* This is for the unlikely case where the syncache entry that we added
* has been evicted from the syncache, but the syncache_expand above
@@ -1544,20 +1548,18 @@
* this somewhat defeats the purpose of having a tod_offload_socket :-(
*/
if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) {
- struct inpcb *new_inp = sotoinpcb(so);
-
- INP_WLOCK(new_inp);
tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
t4_offload_socket(TOEDEV(ifp), synqe, so);
- INP_WUNLOCK(new_inp);
}
+ INP_WUNLOCK(new_inp);
+
/* Done with the synqe */
TAILQ_REMOVE(&lctx->synq, synqe, link);
inp = release_lctx(sc, lctx);
if (inp != NULL)
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
release_synqe(synqe);
return (0);
Index: sys/netinet/in_pcb.h
===================================================================
--- sys/netinet/in_pcb.h
+++ sys/netinet/in_pcb.h
@@ -130,23 +130,35 @@
struct icmp6_filter;
/*-
- * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4
- * and IPv6 sockets. In the case of TCP, further per-connection state is
+ * struct inpcb captures the network layer state for TCP, UDP, and raw IPv4 and
+ * IPv6 sockets. In the case of TCP and UDP, further per-connection state is
* hung off of inp_ppcb most of the time. Almost all fields of struct inpcb
* are static after creation or protected by a per-inpcb rwlock, inp_lock. A
- * few fields also require the global pcbinfo lock for the inpcb to be held,
- * when modified, such as the global connection lists and hashes, as well as
- * binding information (which affects which hash a connection is on). This
- * model means that connections can be looked up without holding the
- * per-connection lock, which is important for performance when attempting to
- * find the connection for a packet given its IP and port tuple. Writing to
- * these fields that write locks be held on both the inpcb and global locks.
+ * few fields are protected by multiple locks as indicated in the locking notes
+ * below. For these fields, all of the listed locks must be write-locked for
+ * any modifications. However, these fields can be safely read while any one of
+ * the listed locks are read-locked. This model can permit greater concurrency
+ * for read operations. For example, connections can be looked up while only
+ * holding a read lock on the global pcblist lock. This is important for
+ * performance when attempting to find the connection for a packet given its IP
+ * and port tuple.
+ *
+ * One noteworthy exception is that the global pcbinfo lock follows a different
+ * set of rules in relation to the inp_list field. Rather than being
+ * write-locked for modifications and read-locked for list iterations, it must
+ * be read-locked during modifications and write-locked during list iterations.
+ * This ensures that the relatively rare global list iterations safely walk a
+ * stable snapshot of connections while allowing more common list modifications
+ * to safely grab the pcblist lock just while adding or removing a connection
+ * from the global list.
*
* Key:
* (c) - Constant after initialization
* (g) - Protected by the pcbgroup lock
* (i) - Protected by the inpcb lock
* (p) - Protected by the pcbinfo lock for the inpcb
+ * (l) - Protected by the pcblist lock for the inpcb
+ * (h) - Protected by the pcbhash lock for the inpcb
* (s) - Protected by another subsystem's locks
* (x) - Undefined locking
*
@@ -161,15 +173,21 @@
* socket has been freed), or there may be close(2)-related races.
*
* The inp_vflag field is overloaded, and would otherwise ideally be (c).
+ *
+ * TODO: If currently only the TCP stack is leveraging the global pcbinfo lock
+ * read-lock usage during modification, this model can be applied to other
+ * protocols (especially SCTP).
*/
struct inpcb {
- LIST_ENTRY(inpcb) inp_hash; /* (i/p) hash list */
+ LIST_ENTRY(inpcb) inp_hash; /* (h/i) hash list */
LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */
- LIST_ENTRY(inpcb) inp_list; /* (i/p) list for all PCBs for proto */
+ LIST_ENTRY(inpcb) inp_list; /* (p/l) list for all PCBs for proto */
+ /* (p[w]) for list iteration */
+ /* (p[r]/l) for addition/removal */
void *inp_ppcb; /* (i) pointer to per-protocol pcb */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */
- LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/p) group wildcard entry */
+ LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/h) group wildcard entry */
struct socket *inp_socket; /* (i) back pointer to socket */
struct ucred *inp_cred; /* (c) cache of socket cred */
u_int32_t inp_flow; /* (i) IPv6 flow information */
@@ -188,7 +206,7 @@
* general use */
/* Local and foreign ports, local and foreign addr. */
- struct in_conninfo inp_inc; /* (i/p) list for PCB's local port */
+ struct in_conninfo inp_inc; /* (i) list for PCB's local port */
/* MAC and IPSEC policy information. */
struct label *inp_label; /* (i) MAC label */
@@ -213,8 +231,8 @@
int inp6_cksum;
short inp6_hops;
} inp_depend6;
- LIST_ENTRY(inpcb) inp_portlist; /* (i/p) */
- struct inpcbport *inp_phd; /* (i/p) head of this list */
+ LIST_ENTRY(inpcb) inp_portlist; /* (i/h) */
+ struct inpcbport *inp_phd; /* (i/h) head of this list */
#define inp_zero_size offsetof(struct inpcb, inp_gencnt)
inp_gen_t inp_gencnt; /* (c) generation count */
struct llentry *inp_lle; /* cached L2 information */
@@ -279,37 +297,46 @@
* Global data structure for each high-level protocol (UDP, TCP, ...) in both
* IPv4 and IPv6. Holds inpcb lists and information for managing them.
*
- * Each pcbinfo is protected by two locks: ipi_lock and ipi_hash_lock,
- * the former covering mutable global fields (such as the global pcb list),
- * and the latter covering the hashed lookup tables. The lock order is:
+ * Each pcbinfo is protected by three locks: ipi_lock, ipi_hash_lock and
+ * ipi_list_lock:
+ * - ipi_lock covering the global pcb list stability during loop iteration,
+ * - ipi_hash_lock covering the hashed lookup tables,
+ * - ipi_list_lock covering mutable global fields (such as the global
+ * pcb list)
+ *
+ * The lock order is:
*
- * ipi_lock (before) inpcb locks (before) {ipi_hash_lock, pcbgroup locks}
+ * ipi_lock (before)
+ * inpcb locks (before)
+ * ipi_list locks (before)
+ * {ipi_hash_lock, pcbgroup locks}
*
* Locking key:
*
* (c) Constant or nearly constant after initialisation
* (g) Locked by ipi_lock
+ * (l) Locked by ipi_list_lock
* (h) Read using either ipi_hash_lock or inpcb lock; write requires both
* (p) Protected by one or more pcbgroup locks
* (x) Synchronisation properties poorly defined
*/
struct inpcbinfo {
/*
- * Global lock protecting global inpcb list, inpcb count, etc.
+ * Global lock protecting full inpcb list traversal
*/
struct rwlock ipi_lock;
/*
* Global list of inpcbs on the protocol.
*/
- struct inpcbhead *ipi_listhead; /* (g) */
- u_int ipi_count; /* (g) */
+ struct inpcbhead *ipi_listhead; /* (g/l) */
+ u_int ipi_count; /* (l) */
/*
* Generation count -- incremented each time a connection is allocated
* or freed.
*/
- u_quad_t ipi_gencnt; /* (g) */
+ u_quad_t ipi_gencnt; /* (l) */
/*
* Fields associated with port lookup and allocation.
@@ -367,6 +394,11 @@
* general use 2
*/
void *ipi_pspare[2];
+
+ /*
+ * Global lock protecting global inpcb list, inpcb count, etc.
+ */
+ struct rwlock ipi_list_lock;
};
#ifdef _KERNEL
@@ -466,6 +498,25 @@
#define INP_INFO_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_WLOCKED)
#define INP_INFO_UNLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_lock, RA_UNLOCKED)
+#define INP_LIST_LOCK_INIT(ipi, d) \
+ rw_init_flags(&(ipi)->ipi_list_lock, (d), 0)
+#define INP_LIST_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_list_lock)
+#define INP_LIST_RLOCK(ipi) rw_rlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_WLOCK(ipi) rw_wlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_TRY_RLOCK(ipi) rw_try_rlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_TRY_WLOCK(ipi) rw_try_wlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_TRY_UPGRADE(ipi) rw_try_upgrade(&(ipi)->ipi_list_lock)
+#define INP_LIST_RUNLOCK(ipi) rw_runlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_WUNLOCK(ipi) rw_wunlock(&(ipi)->ipi_list_lock)
+#define INP_LIST_LOCK_ASSERT(ipi) \
+ rw_assert(&(ipi)->ipi_list_lock, RA_LOCKED)
+#define INP_LIST_RLOCK_ASSERT(ipi) \
+ rw_assert(&(ipi)->ipi_list_lock, RA_RLOCKED)
+#define INP_LIST_WLOCK_ASSERT(ipi) \
+ rw_assert(&(ipi)->ipi_list_lock, RA_WLOCKED)
+#define INP_LIST_UNLOCK_ASSERT(ipi) \
+ rw_assert(&(ipi)->ipi_list_lock, RA_UNLOCKED)
+
#define INP_HASH_LOCK_INIT(ipi, d) \
rw_init_flags(&(ipi)->ipi_hash_lock, (d), 0)
#define INP_HASH_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_hash_lock)
Index: sys/netinet/in_pcb.c
===================================================================
--- sys/netinet/in_pcb.c
+++ sys/netinet/in_pcb.c
@@ -222,6 +222,7 @@
INP_INFO_LOCK_INIT(pcbinfo, name);
INP_HASH_LOCK_INIT(pcbinfo, "pcbinfohash"); /* XXXRW: argument? */
+ INP_LIST_LOCK_INIT(pcbinfo, "pcbinfolist");
#ifdef VIMAGE
pcbinfo->ipi_vnet = curvnet;
#endif
@@ -260,6 +261,7 @@
in_pcbgroup_destroy(pcbinfo);
#endif
uma_zdestroy(pcbinfo->ipi_zone);
+ INP_LIST_LOCK_DESTROY(pcbinfo);
INP_HASH_LOCK_DESTROY(pcbinfo);
INP_INFO_LOCK_DESTROY(pcbinfo);
}
@@ -274,7 +276,14 @@
struct inpcb *inp;
int error;
- INP_INFO_WLOCK_ASSERT(pcbinfo);
+#ifdef INVARIANTS
+ if (pcbinfo == &V_tcbinfo) {
+ INP_INFO_RLOCK_ASSERT(pcbinfo);
+ } else {
+ INP_INFO_WLOCK_ASSERT(pcbinfo);
+ }
+#endif
+
error = 0;
inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
if (inp == NULL)
@@ -306,6 +315,8 @@
inp->inp_flags |= IN6P_IPV6_V6ONLY;
}
#endif
+ INP_WLOCK(inp);
+ INP_LIST_WLOCK(pcbinfo);
LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
pcbinfo->ipi_count++;
so->so_pcb = (caddr_t)inp;
@@ -313,9 +324,9 @@
if (V_ip6_auto_flowlabel)
inp->inp_flags |= IN6P_AUTOFLOWLABEL;
#endif
- INP_WLOCK(inp);
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
refcount_init(&inp->inp_refcount, 1); /* Reference from inpcbinfo */
+ INP_LIST_WUNLOCK(pcbinfo);
#if defined(IPSEC) || defined(MAC)
out:
if (error != 0) {
@@ -1243,7 +1254,13 @@
KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
- INP_INFO_WLOCK_ASSERT(pcbinfo);
+#ifdef INVARIANTS
+ if (pcbinfo == &V_tcbinfo) {
+ INP_INFO_RLOCK_ASSERT(pcbinfo);
+ } else {
+ INP_INFO_WLOCK_ASSERT(pcbinfo);
+ }
+#endif
INP_WLOCK_ASSERT(inp);
/* XXXRW: Do as much as possible here. */
@@ -1251,8 +1268,10 @@
if (inp->inp_sp != NULL)
ipsec_delete_pcbpolicy(inp);
#endif
+ INP_LIST_WLOCK(pcbinfo);
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
in_pcbremlists(inp);
+ INP_LIST_WUNLOCK(pcbinfo);
#ifdef INET6
if (inp->inp_vflag & INP_IPV6PROTO) {
ip6_freepcbopts(inp->in6p_outputopts);
@@ -1409,7 +1428,7 @@
struct ip_moptions *imo;
int i, gap;
- INP_INFO_RLOCK(pcbinfo);
+ INP_INFO_WLOCK(pcbinfo);
LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
INP_WLOCK(inp);
imo = inp->inp_moptions;
@@ -1439,7 +1458,7 @@
}
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(pcbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
}
/*
@@ -2160,8 +2179,16 @@
{
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
- INP_INFO_WLOCK_ASSERT(pcbinfo);
+#ifdef INVARIANTS
+ if (pcbinfo == &V_tcbinfo) {
+ INP_INFO_RLOCK_ASSERT(pcbinfo);
+ } else {
+ INP_INFO_WLOCK_ASSERT(pcbinfo);
+ }
+#endif
+
INP_WLOCK_ASSERT(inp);
+ INP_LIST_WLOCK_ASSERT(pcbinfo);
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
if (inp->inp_flags & INP_INHASHLIST) {
@@ -2306,13 +2333,13 @@
{
struct inpcb *inp;
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_WLOCK(&V_tcbinfo);
LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) {
INP_WLOCK(inp);
func(inp, arg);
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
}
struct socket *
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -608,7 +608,7 @@
char *s = NULL; /* address and port logging */
int ti_locked;
#define TI_UNLOCKED 1
-#define TI_WLOCKED 2
+#define TI_RLOCKED 2
#ifdef TCPDEBUG
/*
@@ -797,8 +797,8 @@
* connection in TIMEWAIT and SYNs not targeting a listening socket.
*/
if ((thflags & (TH_FIN | TH_RST)) != 0) {
- INP_INFO_WLOCK(&V_tcbinfo);
- ti_locked = TI_WLOCKED;
+ INP_INFO_RLOCK(&V_tcbinfo);
+ ti_locked = TI_RLOCKED;
} else
ti_locked = TI_UNLOCKED;
@@ -820,8 +820,8 @@
findpcb:
#ifdef INVARIANTS
- if (ti_locked == TI_WLOCKED) {
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
} else {
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
}
@@ -969,20 +969,20 @@
relocked:
if (inp->inp_flags & INP_TIMEWAIT) {
if (ti_locked == TI_UNLOCKED) {
- if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
+ if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
INP_WUNLOCK(inp);
- INP_INFO_WLOCK(&V_tcbinfo);
- ti_locked = TI_WLOCKED;
+ INP_INFO_RLOCK(&V_tcbinfo);
+ ti_locked = TI_RLOCKED;
INP_WLOCK(inp);
if (in_pcbrele_wlocked(inp)) {
inp = NULL;
goto findpcb;
}
} else
- ti_locked = TI_WLOCKED;
+ ti_locked = TI_RLOCKED;
}
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
if (thflags & TH_SYN)
tcp_dooptions(&to, optp, optlen, TO_SYN);
@@ -991,7 +991,7 @@
*/
if (tcp_twcheck(inp, &to, th, m, tlen))
goto findpcb;
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (IPPROTO_DONE);
}
/*
@@ -1022,16 +1022,16 @@
*/
#ifdef INVARIANTS
if ((thflags & (TH_FIN | TH_RST)) != 0)
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
#endif
if (!((tp->t_state == TCPS_ESTABLISHED && (thflags & TH_SYN) == 0) ||
(tp->t_state == TCPS_LISTEN && (thflags & TH_SYN)))) {
if (ti_locked == TI_UNLOCKED) {
- if (INP_INFO_TRY_WLOCK(&V_tcbinfo) == 0) {
+ if (INP_INFO_TRY_RLOCK(&V_tcbinfo) == 0) {
in_pcbref(inp);
INP_WUNLOCK(inp);
- INP_INFO_WLOCK(&V_tcbinfo);
- ti_locked = TI_WLOCKED;
+ INP_INFO_RLOCK(&V_tcbinfo);
+ ti_locked = TI_RLOCKED;
INP_WLOCK(inp);
if (in_pcbrele_wlocked(inp)) {
inp = NULL;
@@ -1039,9 +1039,9 @@
}
goto relocked;
} else
- ti_locked = TI_WLOCKED;
+ ti_locked = TI_RLOCKED;
}
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
}
#ifdef MAC
@@ -1096,7 +1096,7 @@
*/
if ((thflags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK) {
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
/*
* Parse the TCP options here because
* syncookies need access to the reflected
@@ -1148,7 +1148,11 @@
*/
INP_WUNLOCK(inp); /* listen socket */
inp = sotoinpcb(so);
- INP_WLOCK(inp); /* new connection */
+ /*
+ * New connection inpcb is already locked by
+ * syncache_expand().
+ */
+ INP_WLOCK_ASSERT(inp);
tp = intotcpcb(inp);
KASSERT(tp->t_state == TCPS_SYN_RECEIVED,
("%s: ", __func__));
@@ -1379,8 +1383,8 @@
* Entry added to syncache and mbuf consumed.
* Only the listen socket is unlocked by syncache_add().
*/
- if (ti_locked == TI_WLOCKED) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
}
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo);
@@ -1429,8 +1433,8 @@
dropwithreset:
TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
- if (ti_locked == TI_WLOCKED) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
}
#ifdef INVARIANTS
@@ -1453,8 +1457,8 @@
if (m != NULL)
TCP_PROBE5(receive, NULL, tp, mtod(m, const char *), tp, th);
- if (ti_locked == TI_WLOCKED) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
}
#ifdef INVARIANTS
@@ -1511,13 +1515,13 @@
*/
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 ||
tp->t_state != TCPS_ESTABLISHED) {
- KASSERT(ti_locked == TI_WLOCKED, ("%s ti_locked %d for "
+ KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for "
"SYN/FIN/RST/!EST", __func__, ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
} else {
#ifdef INVARIANTS
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
else {
KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST "
"ti_locked: %d", __func__, ti_locked));
@@ -1689,8 +1693,8 @@
/*
* This is a pure ack for outstanding data.
*/
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
TCPSTAT_INC(tcps_predack);
@@ -1793,8 +1797,8 @@
* nothing on the reassembly queue and we have enough
* buffer space to take it.
*/
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
/* Clean receiver SACK report if present */
@@ -2029,9 +2033,9 @@
tcp_state_change(tp, TCPS_SYN_RECEIVED);
}
- KASSERT(ti_locked == TI_WLOCKED, ("%s: trimthenstep6: "
+ KASSERT(ti_locked == TI_RLOCKED, ("%s: trimthenstep6: "
"ti_locked %d", __func__, ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
/*
@@ -2104,8 +2108,8 @@
SEQ_LT(th->th_seq, tp->last_ack_sent + tp->rcv_wnd)) ||
(tp->rcv_wnd == 0 && tp->last_ack_sent == th->th_seq)) {
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_WLOCKED,
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ KASSERT(ti_locked == TI_RLOCKED,
("%s: TH_RST ti_locked %d, th %p tp %p",
__func__, ti_locked, th, tp));
KASSERT(tp->t_state != TCPS_SYN_SENT,
@@ -2148,9 +2152,9 @@
* Send challenge ACK for any SYN in synchronized state.
*/
if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT) {
- KASSERT(ti_locked == TI_WLOCKED,
+ KASSERT(ti_locked == TI_RLOCKED,
("tcp_do_segment: TH_SYN ti_locked %d", ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
TCPSTAT_INC(tcps_badsyn);
if (V_tcp_insecure_syn &&
@@ -2263,9 +2267,9 @@
*/
if ((so->so_state & SS_NOFDREF) &&
tp->t_state > TCPS_CLOSE_WAIT && tlen) {
- KASSERT(ti_locked == TI_WLOCKED, ("%s: SS_NOFDEREF && "
+ KASSERT(ti_locked == TI_RLOCKED, ("%s: SS_NOFDEREF && "
"CLOSE_WAIT && tlen ti_locked %d", __func__, ti_locked));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: %s: Received %d bytes of data "
@@ -2766,9 +2770,9 @@
*/
case TCPS_CLOSING:
if (ourfinisacked) {
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
m_freem(m);
return;
}
@@ -2782,7 +2786,7 @@
*/
case TCPS_LAST_ACK:
if (ourfinisacked) {
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tp = tcp_close(tp);
goto drop;
}
@@ -2996,18 +3000,18 @@
* standard timers.
*/
case TCPS_FIN_WAIT_2:
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
- KASSERT(ti_locked == TI_WLOCKED, ("%s: dodata "
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
+ KASSERT(ti_locked == TI_RLOCKED, ("%s: dodata "
"TCP_FIN_WAIT_2 ti_locked: %d", __func__,
ti_locked));
tcp_twstart(tp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return;
}
}
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
#ifdef TCPDEBUG
@@ -3062,8 +3066,8 @@
tcp_trace(TA_DROP, ostate, tp, (void *)tcp_saveipgen,
&tcp_savetcp, 0);
#endif
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
tp->t_flags |= TF_ACKNOW;
@@ -3073,8 +3077,8 @@
return;
dropwithreset:
- if (ti_locked == TI_WLOCKED)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED)
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
if (tp != NULL) {
@@ -3085,8 +3089,8 @@
return;
drop:
- if (ti_locked == TI_WLOCKED) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ if (ti_locked == TI_RLOCKED) {
+ INP_INFO_RUNLOCK(&V_tcbinfo);
ti_locked = TI_UNLOCKED;
}
#ifdef INVARIANTS
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -856,7 +856,7 @@
VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_WLOCK(&V_tcbinfo);
/*
* New connections already part way through being initialised
* with the CC algo we're removing will not race with this code
@@ -886,7 +886,7 @@
}
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
@@ -904,7 +904,7 @@
{
struct socket *so = tp->t_inpcb->inp_socket;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
if (TCPS_HAVERCVDSYN(tp->t_state)) {
@@ -1106,7 +1106,7 @@
struct inpcb *inp = tp->t_inpcb;
struct socket *so;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
#ifdef TCP_OFFLOAD
@@ -1154,7 +1154,7 @@
* where we're really low on mbufs, this is potentially
* useful.
*/
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_WLOCK(&V_tcbinfo);
LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) {
if (inpb->inp_flags & INP_TIMEWAIT)
continue;
@@ -1165,7 +1165,7 @@
}
INP_WUNLOCK(inpb);
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK_NOSLEEP();
@@ -1184,7 +1184,7 @@
{
struct tcpcb *tp;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
if ((inp->inp_flags & INP_TIMEWAIT) ||
@@ -1248,10 +1248,10 @@
/*
* OK, now we're committed to doing something.
*/
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_LIST_RLOCK(&V_tcbinfo);
gencnt = V_tcbinfo.ipi_gencnt;
n = V_tcbinfo.ipi_count;
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_LIST_RUNLOCK(&V_tcbinfo);
m = syncache_pcbcount();
@@ -1276,7 +1276,7 @@
if (inp_list == NULL)
return (ENOMEM);
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_INFO_WLOCK(&V_tcbinfo);
for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0;
inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) {
INP_WLOCK(inp);
@@ -1301,7 +1301,7 @@
}
INP_WUNLOCK(inp);
}
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_INFO_WUNLOCK(&V_tcbinfo);
n = i;
error = 0;
@@ -1339,14 +1339,14 @@
} else
INP_RUNLOCK(inp);
}
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
for (i = 0; i < n; i++) {
inp = inp_list[i];
INP_RLOCK(inp);
if (!in_pcbrele_rlocked(inp))
INP_RUNLOCK(inp);
}
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
if (!error) {
/*
@@ -1356,11 +1356,11 @@
* while we were processing this request, and it
* might be necessary to retry.
*/
- INP_INFO_RLOCK(&V_tcbinfo);
+ INP_LIST_RLOCK(&V_tcbinfo);
xig.xig_gen = V_tcbinfo.ipi_gencnt;
xig.xig_sogen = so_gencnt;
xig.xig_count = V_tcbinfo.ipi_count + pcb_count;
- INP_INFO_RUNLOCK(&V_tcbinfo);
+ INP_LIST_RUNLOCK(&V_tcbinfo);
error = SYSCTL_OUT(req, &xig, sizeof xig);
}
free(inp_list, M_TEMP);
@@ -1516,7 +1516,7 @@
- offsetof(struct icmp, icmp_ip));
th = (struct tcphdr *)((caddr_t)ip
+ (ip->ip_hl << 2));
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport,
ip->ip_src, th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
if (inp != NULL) {
@@ -1576,7 +1576,7 @@
inc.inc_laddr = ip->ip_src;
syncache_unreach(&inc, th);
}
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
} else
in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify);
}
@@ -1646,9 +1646,9 @@
inc.inc6_faddr = ((struct sockaddr_in6 *)sa)->sin6_addr;
inc.inc6_laddr = ip6cp->ip6c_src->sin6_addr;
inc.inc_flags |= INC_ISIPV6;
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
syncache_unreach(&inc, &th);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
} else
in6_pcbnotify(&V_tcbinfo, sa, 0, (const struct sockaddr *)sa6_src,
0, cmd, NULL, notify);
@@ -1781,7 +1781,7 @@
{
struct tcpcb *tp;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
if ((inp->inp_flags & INP_TIMEWAIT) ||
@@ -2339,7 +2339,7 @@
default:
return (EINVAL);
}
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
switch (addrs[0].ss_family) {
#ifdef INET6
case AF_INET6:
@@ -2378,7 +2378,7 @@
INP_WUNLOCK(inp);
} else
error = ESRCH;
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (error);
}
Index: sys/netinet/tcp_syncache.c
===================================================================
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -652,6 +652,8 @@
/*
* Build a new TCP socket structure from a syncache entry.
+ *
+ * On success return the newly created socket with its underlying inp locked.
*/
static struct socket *
syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
@@ -662,7 +664,7 @@
int error;
char *s;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
/*
* Ok, create the full blown connection, and set things up
@@ -907,8 +909,6 @@
tp->t_keepcnt = sototcpcb(lso)->t_keepcnt;
tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
- INP_WUNLOCK(inp);
-
soisconnected(so);
TCPSTAT_INC(tcps_accepts);
@@ -928,6 +928,9 @@
* in the syncache, and if its there, we pull it out of
* the cache and turn it into a full-blown connection in
* the SYN-RECEIVED state.
+ *
+ * On syncache_socket() success the newly created socket
+ * has its underlying inp locked.
*/
int
syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
@@ -942,7 +945,7 @@
* Global TCP locks are held because we manipulate the PCB lists
* and create a new socket.
*/
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK,
("%s: can handle only ACK", __func__));
Index: sys/netinet/tcp_timer.c
===================================================================
--- sys/netinet/tcp_timer.c
+++ sys/netinet/tcp_timer.c
@@ -307,7 +307,7 @@
ostate = tp->t_state;
#endif
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = tp->t_inpcb;
KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
@@ -315,14 +315,14 @@
if (callout_pending(&tp->t_timers->tt_2msl) ||
!callout_active(&tp->t_timers->tt_2msl)) {
INP_WUNLOCK(tp->t_inpcb);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
callout_deactivate(&tp->t_timers->tt_2msl);
if ((inp->inp_flags & INP_DROPPED) != 0) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
@@ -363,7 +363,7 @@
#endif
if (tp != NULL)
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
@@ -379,21 +379,21 @@
ostate = tp->t_state;
#endif
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = tp->t_inpcb;
KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
if (callout_pending(&tp->t_timers->tt_keep) ||
!callout_active(&tp->t_timers->tt_keep)) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
callout_deactivate(&tp->t_timers->tt_keep);
if ((inp->inp_flags & INP_DROPPED) != 0) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
@@ -447,7 +447,7 @@
PRU_SLOWTIMO);
#endif
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
@@ -462,7 +462,7 @@
#endif
if (tp != NULL)
INP_WUNLOCK(tp->t_inpcb);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
@@ -477,21 +477,21 @@
ostate = tp->t_state;
#endif
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = tp->t_inpcb;
KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
if (callout_pending(&tp->t_timers->tt_persist) ||
!callout_active(&tp->t_timers->tt_persist)) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
callout_deactivate(&tp->t_timers->tt_persist);
if ((inp->inp_flags & INP_DROPPED) != 0) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
@@ -540,7 +540,7 @@
#endif
if (tp != NULL)
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
@@ -592,16 +592,16 @@
in_pcbref(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
INP_WUNLOCK(inp);
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
if (in_pcbrele_wlocked(inp)) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
if (inp->inp_flags & INP_DROPPED) {
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return;
}
@@ -803,7 +803,7 @@
if (tp != NULL)
INP_WUNLOCK(inp);
if (headlocked)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
}
Index: sys/netinet/tcp_timewait.c
===================================================================
--- sys/netinet/tcp_timewait.c
+++ sys/netinet/tcp_timewait.c
@@ -202,10 +202,10 @@
{
struct tcptw *tw;
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
while ((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL)
tcp_twclose(tw, 0);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
TW_LOCK_DESTROY(V_tw_lock);
uma_zdestroy(V_tcptw_zone);
@@ -228,7 +228,7 @@
int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
#endif
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
if (V_nolocaltimewait) {
@@ -367,7 +367,7 @@
int thflags;
tcp_seq seq;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
/*
@@ -468,7 +468,7 @@
inp = tw->tw_inpcb;
KASSERT((inp->inp_flags & INP_TIMEWAIT), ("tcp_twclose: !timewait"));
KASSERT(intotw(inp) == tw, ("tcp_twclose: inp_ppcb != tw"));
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* in_pcbfree() */
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* in_pcbfree() */
INP_WLOCK_ASSERT(inp);
tcp_tw_2msl_stop(tw, reuse);
@@ -623,7 +623,7 @@
tcp_tw_2msl_reset(struct tcptw *tw, int rearm)
{
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tw->tw_inpcb);
TW_WLOCK(V_tw_lock);
@@ -641,7 +641,7 @@
struct inpcb *inp;
int released;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
TW_WLOCK(V_tw_lock);
inp = tw->tw_inpcb;
@@ -677,7 +677,7 @@
* tcp_tw_start(),
* - the inpcb closed by tcp_twclose().
*/
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
}
#endif
@@ -695,26 +695,26 @@
in_pcbref(inp);
TW_RUNLOCK(V_tw_lock);
- if (INP_INFO_TRY_WLOCK(&V_tcbinfo)) {
+ if (INP_INFO_TRY_RLOCK(&V_tcbinfo)) {
INP_WLOCK(inp);
tw = intotw(inp);
if (in_pcbrele_wlocked(inp)) {
KASSERT(tw == NULL, ("%s: held last inp "
"reference but tw not NULL", __func__));
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
continue;
}
if (tw == NULL) {
/* tcp_twclose() has already been called */
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
continue;
}
tcp_twclose(tw, reuse);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
if (reuse)
return tw;
} else {
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -163,7 +163,7 @@
{
struct tcpcb *tp;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp"));
@@ -192,10 +192,10 @@
* and leave inpcb present until timewait ends
* #2 tcp_detach is called at timewait end (or reuse) by
* tcp_twclose, then the tcptw has already been discarded
- * and inpcb is freed here
+ * (or reused) and inpcb is freed here
* #3 tcp_detach is called() after timewait ends (or reuse)
* (e.g. by soclose), then tcptw has already been discarded
- * and inpcb is freed here
+ * (or reused) and inpcb is freed here
*
* In all three cases the tcptw should not be freed here.
*/
@@ -244,12 +244,12 @@
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL,
("tcp_usr_detach: inp_socket == NULL"));
tcp_detach(so, inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
}
#ifdef INET
@@ -611,7 +611,7 @@
int error = 0;
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
INP_WLOCK(inp);
@@ -627,7 +627,7 @@
out:
TCPDEBUG2(PRU_DISCONNECT);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (error);
}
@@ -742,7 +742,7 @@
struct tcpcb *tp = NULL;
TCPDEBUG0;
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("inp == NULL"));
INP_WLOCK(inp);
@@ -760,7 +760,7 @@
out:
TCPDEBUG2(PRU_SHUTDOWN);
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (error);
}
@@ -822,7 +822,7 @@
* this call.
*/
if (flags & PRUS_EOF)
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
INP_WLOCK(inp);
@@ -883,7 +883,7 @@
* Close the send side of the connection after
* the data is sent.
*/
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
socantsendmore(so);
tcp_usrclosed(tp);
}
@@ -950,7 +950,7 @@
((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
INP_WUNLOCK(inp);
if (flags & PRUS_EOF)
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (error);
}
@@ -994,7 +994,7 @@
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL,
("tcp_usr_abort: inp_socket == NULL"));
@@ -1016,7 +1016,7 @@
inp->inp_flags |= INP_SOCKREF;
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
}
/*
@@ -1032,7 +1032,7 @@
inp = sotoinpcb(so);
KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
KASSERT(inp->inp_socket != NULL,
("tcp_usr_close: inp_socket == NULL"));
@@ -1055,7 +1055,7 @@
inp->inp_flags |= INP_SOCKREF;
}
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
}
/*
@@ -1655,10 +1655,10 @@
}
so->so_rcv.sb_flags |= SB_AUTOSIZE;
so->so_snd.sb_flags |= SB_AUTOSIZE;
- INP_INFO_WLOCK(&V_tcbinfo);
+ INP_INFO_RLOCK(&V_tcbinfo);
error = in_pcballoc(so, &V_tcbinfo);
if (error) {
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (error);
}
inp = sotoinpcb(so);
@@ -1674,12 +1674,12 @@
if (tp == NULL) {
in_pcbdetach(inp);
in_pcbfree(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (ENOBUFS);
}
tp->t_state = TCPS_CLOSED;
INP_WUNLOCK(inp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_INFO_RUNLOCK(&V_tcbinfo);
return (0);
}
@@ -1697,7 +1697,7 @@
struct inpcb *inp = tp->t_inpcb;
struct socket *so = inp->inp_socket;
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
/*
@@ -1735,7 +1735,7 @@
tcp_usrclosed(struct tcpcb *tp)
{
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(tp->t_inpcb);
switch (tp->t_state) {
Index: sys/netinet/toecore.c
===================================================================
--- sys/netinet/toecore.c
+++ sys/netinet/toecore.c
@@ -339,7 +339,7 @@
struct tcphdr *th, struct socket **lsop)
{
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
return (syncache_expand(inc, to, th, lsop, NULL));
}
@@ -370,7 +370,7 @@
if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) {
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* for twcheck */
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* for twcheck */
if (!tcp_twcheck(inp, NULL, th, NULL, 0))
return (EADDRINUSE);
} else {
@@ -571,7 +571,7 @@
(void) tcp_output(tp);
} else {
- INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tp = tcp_drop(tp, err);
if (tp == NULL)
INP_WLOCK(inp); /* re-acquire */
Index: sys/netinet6/in6_pcb.c
===================================================================
--- sys/netinet6/in6_pcb.c
+++ sys/netinet6/in6_pcb.c
@@ -789,7 +789,7 @@
struct ip6_moptions *im6o;
int i, gap;
- INP_INFO_RLOCK(pcbinfo);
+ INP_INFO_WLOCK(pcbinfo);
LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) {
INP_WLOCK(in6p);
im6o = in6p->in6p_moptions;
@@ -820,7 +820,7 @@
}
INP_WUNLOCK(in6p);
}
- INP_INFO_RUNLOCK(pcbinfo);
+ INP_INFO_WUNLOCK(pcbinfo);
}
/*
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Jan 17, 5:58 AM (20 h, 49 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15834845
Default Alt Text
D2599.id6077.diff (48 KB)
Attached To
Mode
D2599: Decompose TCP INP_INFO lock to increase short-lived connections scalability
Attached
Detach File
Event Timeline
Log In to Comment