Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F149560719
D55967.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
13 KB
Referenced Files
None
Subscribers
None
D55967.diff
View Options
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -321,10 +321,14 @@
* Almost all fields of struct inpcb are static after creation or protected by
* a per-inpcb rwlock, inp_lock.
*
- * A inpcb database is indexed by addresses/ports hash as well as list of
- * all pcbs that belong to a certain proto. Database lookups or list traversals
- * are be performed inside SMR section. Once desired PCB is found its own
- * lock is to be obtained and SMR section exited.
+ * A inpcb database consist of two hash tables: one for connected pcbs and the
+ * other to wildcard-bound pcbs. The newborn pcbs reside on the unconnected
+ * list. Although a pcb can be on either of these three lists, we can't share
+ * the linkage pointer due to its CK-nature. The only thing that can be
+ * unionized is the load-balance table and exact hash, as a pcb can never
+ * participate in both tables through its entire life time. Database lookups
+ * or list traversals are to be performed inside SMR section. Once desired PCB
+ * is found its own lock is to be obtained and SMR section exited.
*
* Key:
* (c) - Constant after initialization
@@ -352,10 +356,11 @@
struct inpcb {
/* Cache line #1 (amd64) */
union {
- CK_LIST_ENTRY(inpcb) inp_hash_exact; /* hash table linkage */
- LIST_ENTRY(inpcb) inp_lbgroup_list; /* lb group list */
+ CK_LIST_ENTRY(inpcb) inp_hash_exact;
+ LIST_ENTRY(inpcb) inp_lbgroup_list;
};
- CK_LIST_ENTRY(inpcb) inp_hash_wild; /* hash table linkage */
+ CK_LIST_ENTRY(inpcb) inp_hash_wild;
+ CK_LIST_ENTRY(inpcb) inp_unconn_list;
struct rwlock inp_lock;
/* Cache line #2 (amd64) */
#define inp_start_zero inp_refcount
@@ -412,7 +417,6 @@
struct route inp_route;
struct route_in6 inp_route6;
};
- CK_LIST_ENTRY(inpcb) inp_list; /* (r:e/w:p) all PCBs for proto */
};
#define inp_vnet inp_pcbinfo->ipi_vnet
@@ -431,7 +435,6 @@
* (h) Locked by ipi_hash_lock
*/
struct inpcbinfo {
- struct inpcbhead ipi_listhead; /* (r:e/w:h) */
u_int ipi_count; /* (h) */
/*
@@ -460,6 +463,7 @@
* address, and "wild" holds the rest.
*/
struct mtx ipi_hash_lock;
+ struct inpcbhead ipi_list_unconn; /* (r:e/w:h) */
struct inpcbhead *ipi_hash_exact; /* (r:e/w:h) */
struct inpcbhead *ipi_hash_wild; /* (r:e/w:h) */
u_long ipi_hashmask; /* (c) */
@@ -671,13 +675,20 @@
int (*ctloutput_set)(struct inpcb *, struct sockopt *));
#endif
+/*
+ * struct inpcb_iterator is located on the stack of a function that uses
+ * inp_next(). The const members are set by the caller with INP_ITERATOR().
+ * (c) - caller
+ * (n) - inp_next()
+ */
typedef bool inp_match_t(const struct inpcb *, void *);
struct inpcb_iterator {
const struct inpcbinfo *ipi;
- struct inpcb *inp;
+ struct inpcb *inp; /* c:r, n:w */
inp_match_t *match;
void *ctx;
- int hash;
+ int hash; /* c:w, n:rw */
+ int mode; /* n:rw */
#define INP_ALL_LIST -1
const inp_lookup_t lock;
};
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -241,8 +241,6 @@
"Allow connecting to INADDR_ANY or INADDR_BROADCAST for connect(2)");
#endif
-static void in_pcbremhash(struct inpcb *);
-
/*
* in_pcb.c: manage the Protocol Control Blocks.
*
@@ -562,7 +560,7 @@
#ifdef VIMAGE
pcbinfo->ipi_vnet = curvnet;
#endif
- CK_LIST_INIT(&pcbinfo->ipi_listhead);
+ CK_LIST_INIT(&pcbinfo->ipi_list_unconn);
pcbinfo->ipi_count = 0;
pcbinfo->ipi_hash_exact = hashinit(hash_nelements, M_PCB,
&pcbinfo->ipi_hashmask);
@@ -691,7 +689,7 @@
INP_HASH_WLOCK(pcbinfo);
pcbinfo->ipi_count++;
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
- CK_LIST_INSERT_HEAD(&pcbinfo->ipi_listhead, inp, inp_list);
+ CK_LIST_INSERT_HEAD(&pcbinfo->ipi_list_unconn, inp, inp_unconn_list);
INP_HASH_WUNLOCK(pcbinfo);
so->so_pcb = inp;
@@ -1423,7 +1421,9 @@
KASSERT(inp->inp_smr == SMR_SEQ_INVALID,
("%s: inp %p was already disconnected", __func__, inp));
- in_pcbremhash_locked(inp);
+ in_pcbremhash(inp);
+ CK_LIST_INSERT_HEAD(&inp->inp_pcbinfo->ipi_list_unconn, inp,
+ inp_unconn_list);
if ((inp->inp_socket->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
/* See the comment in in_pcbinshash(). */
@@ -1546,8 +1546,13 @@
*
* - Iterator can have either write-lock or read-lock semantics, that can not
* be changed later.
- * - Iterator can iterate either over all pcbs list (INP_ALL_LIST), or through
- * a single hash slot. Note: only rip_input() does the latter.
+ * - Iterator can iterate either over all possible lists where inpcb resides:
+ * if initialized with INP_ALL_LIST hash the iterator will go through the
+ * unconnected list, then all wildcard hash slots and then all exact hash
+ * slots. Alternatively iterator can go through a single exact hash slot.
+ * Note: only rip_input() and sysctl_setsockopt() does the latter.
+ * The interface may be extended for iteration over single wildcard hash
+ * slot, but there is no use case for that today.
* - Iterator may have optional bool matching function. The matching function
* will be executed for each inpcb in the SMR context, so it can not acquire
* locks and can safely access only immutable fields of inpcb.
@@ -1565,49 +1570,72 @@
* - Removed entries won't stop traversal as long as they are not added to
* a different list. This is violated by in_pcbrehash().
*/
-#define II_LIST_FIRST(ipi, hash) \
- (((hash) == INP_ALL_LIST) ? \
- CK_LIST_FIRST(&(ipi)->ipi_listhead) : \
- CK_LIST_FIRST(&(ipi)->ipi_hash_exact[(hash)]))
-#define II_LIST_NEXT(inp, hash) \
- (((hash) == INP_ALL_LIST) ? \
- CK_LIST_NEXT((inp), inp_list) : \
- CK_LIST_NEXT((inp), inp_hash_exact))
-#define II_LOCK_ASSERT(inp, lock) \
- rw_assert(&(inp)->inp_lock, \
- (lock) == INPLOOKUP_RLOCKPCB ? RA_RLOCKED : RA_WLOCKED )
+static inline struct inpcb *
+ii_list_first(const struct inpcb_iterator *ii)
+{
+ const struct inpcbinfo *ipi = ii->ipi;
+ const int hash = ii->hash;
+
+ if (hash == INP_ALL_LIST)
+ return (CK_LIST_FIRST(&ipi->ipi_list_unconn));
+ else if (hash <= ipi->ipi_hashmask)
+ return (CK_LIST_FIRST(&ipi->ipi_hash_wild[hash]));
+ else
+ return (CK_LIST_FIRST(
+ &ipi->ipi_hash_exact[hash - ipi->ipi_hashmask - 1]));
+}
+
+static inline struct inpcb *
+ii_list_next(const struct inpcb_iterator *ii, struct inpcb *inp)
+{
+ if (ii->hash == INP_ALL_LIST)
+ return (CK_LIST_NEXT(inp, inp_unconn_list));
+ else if (ii->hash <= ii->ipi->ipi_hashmask)
+ return (CK_LIST_NEXT(inp, inp_hash_wild));
+ else
+ return (CK_LIST_NEXT(inp, inp_hash_exact));
+}
+
struct inpcb *
inp_next(struct inpcb_iterator *ii)
{
const struct inpcbinfo *ipi = ii->ipi;
+ const int hashmax = (ipi->ipi_hashmask + 1) * 2;
inp_match_t *match = ii->match;
void *ctx = ii->ctx;
inp_lookup_t lock = ii->lock;
- int hash = ii->hash;
struct inpcb *inp;
if (ii->inp == NULL) { /* First call. */
+ if ((ii->mode = ii->hash) != INP_ALL_LIST) {
+ /* Targeted iterators support only the exact hash. */
+ MPASS(ii->hash <= ipi->ipi_hashmask);
+ ii->hash += ipi->ipi_hashmask + 1;
+ }
smr_enter(ipi->ipi_smr);
- /* This is unrolled CK_LIST_FOREACH(). */
- for (inp = II_LIST_FIRST(ipi, hash);
+next_first:
+ /* This is unrolled CK_LIST_FOREACH() over different headers. */
+ for (inp = ii_list_first(ii);
inp != NULL;
- inp = II_LIST_NEXT(inp, hash)) {
+ inp = ii_list_next(ii, inp)) {
if (match != NULL && (match)(inp, ctx) == false)
continue;
if (__predict_true(_inp_smr_lock(inp, lock, INP_FREED)))
break;
else {
smr_enter(ipi->ipi_smr);
- MPASS(inp != II_LIST_FIRST(ipi, hash));
- inp = II_LIST_FIRST(ipi, hash);
+ MPASS(inp != ii_list_first(ii));
+ inp = ii_list_first(ii);
if (inp == NULL)
break;
}
}
- if (inp == NULL)
+ if (inp == NULL) {
+ if (ii->mode == INP_ALL_LIST && ++ii->hash < hashmax)
+ goto next_first;
smr_exit(ipi->ipi_smr);
- else
+ } else
ii->inp = inp;
return (inp);
@@ -1617,11 +1645,18 @@
smr_enter(ipi->ipi_smr);
restart:
inp = ii->inp;
- II_LOCK_ASSERT(inp, lock);
+ rw_assert(&inp->inp_lock,
+ lock == INPLOOKUP_RLOCKPCB ? RA_RLOCKED : RA_WLOCKED);
next:
- inp = II_LIST_NEXT(inp, hash);
+ inp = ii_list_next(ii, inp);
if (inp == NULL) {
+ if (ii->mode == INP_ALL_LIST && ++ii->hash < hashmax) {
+ inp_unlock(ii->inp, lock);
+ ii->inp = NULL;
+ goto next_first;
+ }
smr_exit(ipi->ipi_smr);
+ ii->hash = ii->mode; /* Make ii reusable. */
goto found;
}
@@ -1793,10 +1828,11 @@
*/
INP_HASH_WLOCK(pcbinfo);
if (inp->inp_flags & INP_INHASHLIST)
- in_pcbremhash_locked(inp);
+ in_pcbremhash(inp);
+ else
+ CK_LIST_REMOVE(inp, inp_unconn_list);
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
pcbinfo->ipi_count--;
- CK_LIST_REMOVE(inp, inp_list);
INP_HASH_WUNLOCK(pcbinfo);
#ifdef RATELIMIT
@@ -1876,8 +1912,13 @@
INP_WLOCK_ASSERT(inp);
inp->inp_flags |= INP_DROPPED;
- if (inp->inp_flags & INP_INHASHLIST)
+ if (inp->inp_flags & INP_INHASHLIST) {
+ INP_HASH_WLOCK(inp->inp_pcbinfo);
in_pcbremhash(inp);
+ CK_LIST_INSERT_HEAD(&inp->inp_pcbinfo->ipi_list_unconn, inp,
+ inp_unconn_list);
+ INP_HASH_WUNLOCK(inp->inp_pcbinfo);
+ }
}
#ifdef INET
@@ -2687,6 +2728,8 @@
inp->inp_smr = SMR_SEQ_INVALID;
}
+ CK_LIST_REMOVE(inp, inp_unconn_list);
+
if (connected)
CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash_exact);
else {
@@ -2704,7 +2747,7 @@
}
void
-in_pcbremhash_locked(struct inpcb *inp)
+in_pcbremhash(struct inpcb *inp)
{
INP_WLOCK_ASSERT(inp);
@@ -2731,14 +2774,6 @@
inp->inp_flags &= ~INP_INHASHLIST;
}
-static void
-in_pcbremhash(struct inpcb *inp)
-{
- INP_HASH_WLOCK(inp->inp_pcbinfo);
- in_pcbremhash_locked(inp);
- INP_HASH_WUNLOCK(inp->inp_pcbinfo);
-}
-
/*
* Move PCB to the proper hash bucket when { faddr, fport } have been
* changed. NOTE: This does not handle the case of the lport changing (the
@@ -2781,15 +2816,12 @@
* When rehashing, the caller must ensure that either the new or the old
* foreign address was unspecified.
*/
- if (connected)
- CK_LIST_REMOVE(inp, inp_hash_wild);
- else
- CK_LIST_REMOVE(inp, inp_hash_exact);
-
if (connected) {
+ CK_LIST_REMOVE(inp, inp_hash_wild);
head = &pcbinfo->ipi_hash_exact[hash];
CK_LIST_INSERT_HEAD(head, inp, inp_hash_exact);
} else {
+ CK_LIST_REMOVE(inp, inp_hash_exact);
head = &pcbinfo->ipi_hash_wild[hash];
CK_LIST_INSERT_HEAD(head, inp, inp_hash_wild);
}
diff --git a/sys/netinet/in_pcb_var.h b/sys/netinet/in_pcb_var.h
--- a/sys/netinet/in_pcb_var.h
+++ b/sys/netinet/in_pcb_var.h
@@ -57,7 +57,7 @@
int, int, struct ucred *);
int in_pcbinshash(struct inpcb *);
void in_pcbrehash(struct inpcb *);
-void in_pcbremhash_locked(struct inpcb *);
+void in_pcbremhash(struct inpcb *);
/*
* Load balance groups used for the SO_REUSEPORT_LB socket option. Each group
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -2841,9 +2841,12 @@
static void
db_print_tcpcb(struct tcpcb *tp, const char *name, int indent, bool show_bblog,
- bool show_inpcb)
+ bool show_inpcb, bool only_locked)
{
+ if (only_locked && tp->t_inpcb.inp_lock.rw_lock == RW_UNLOCKED)
+ return;
+
db_print_indent(indent);
db_printf("%s at %p\n", name, tp);
@@ -2987,14 +2990,13 @@
show_bblog = strchr(modif, 'b') != NULL;
show_inpcb = strchr(modif, 'i') != NULL;
tp = (struct tcpcb *)addr;
- db_print_tcpcb(tp, "tcpcb", 0, show_bblog, show_inpcb);
+ db_print_tcpcb(tp, "tcpcb", 0, show_bblog, show_inpcb, false);
}
DB_SHOW_ALL_COMMAND(tcpcbs, db_show_all_tcpcbs)
{
VNET_ITERATOR_DECL(vnet_iter);
struct inpcb *inp;
- struct tcpcb *tp;
bool only_locked, show_bblog, show_inpcb;
only_locked = strchr(modif, 'l') != NULL;
@@ -3002,18 +3004,23 @@
show_inpcb = strchr(modif, 'i') != NULL;
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- CK_LIST_FOREACH(inp, &V_tcbinfo.ipi_listhead, inp_list) {
- if (only_locked &&
- inp->inp_lock.rw_lock == RW_UNLOCKED)
- continue;
- tp = intotcpcb(inp);
- db_print_tcpcb(tp, "tcpcb", 0, show_bblog, show_inpcb);
+ for (u_int i = 0; i <= V_tcbinfo.ipi_porthashmask; i++)
+ CK_LIST_FOREACH(inp, &V_tcbinfo.ipi_porthashbase[i],
+ inp_portlist) {
+ db_print_tcpcb(intotcpcb(inp), "tcpcb", 0,
+ show_bblog, show_inpcb, only_locked);
+ if (db_pager_quit)
+ goto break_hash;
+ }
+break_hash:
+ CK_LIST_FOREACH(inp, &V_tcbinfo.ipi_list_unconn,
+ inp_unconn_list) {
+ db_print_tcpcb(intotcpcb(inp), "tcpcb", 0,
+ show_bblog, show_inpcb, only_locked);
if (db_pager_quit)
break;
}
CURVNET_RESTORE();
- if (db_pager_quit)
- break;
}
}
#endif
diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c
--- a/sys/netinet6/in6_pcb.c
+++ b/sys/netinet6/in6_pcb.c
@@ -511,7 +511,9 @@
KASSERT(inp->inp_smr == SMR_SEQ_INVALID,
("%s: inp %p was already disconnected", __func__, inp));
- in_pcbremhash_locked(inp);
+ in_pcbremhash(inp);
+ CK_LIST_INSERT_HEAD(&inp->inp_pcbinfo->ipi_list_unconn, inp,
+ inp_unconn_list);
if ((inp->inp_socket->so_proto->pr_flags & PR_CONNREQUIRED) == 0) {
/* See the comment in in_pcbinshash(). */
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Mar 26, 6:37 AM (44 m, 24 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
30338501
Default Alt Text
D55967.diff (13 KB)
Attached To
Mode
D55967: inpcb: retire the inpcb global list
Attached
Detach File
Event Timeline
Log In to Comment