Page MenuHomeFreeBSD

D33542.id100894.diff
No OneTemporary

D33542.id100894.diff

diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -375,8 +375,8 @@
#ifdef _KERNEL
/*
- * Global data structure for each high-level protocol (UDP, TCP, ...) in both
- * IPv4 and IPv6. Holds inpcb lists and information for managing them.
+ * Per-VNET pcb database for each high-level protocol (UDP, TCP, ...) in both
+ * IPv4 and IPv6.
*
* The pcbs are protected with SMR section and thus all lists in inpcbinfo
* are CK-lists. Locking is required to insert a pcb into database. Two
@@ -445,6 +445,41 @@
struct vnet *ipi_vnet; /* (c) */
};
+/*
+ * Global allocation storage for each high-level protocol (UDP, TCP, ...).
+ * Each corresponding per-VNET inpcbinfo points into this one.
+ */
+struct inpcbstorage {
+ uma_zone_t ips_zone;
+ uma_zone_t ips_portzone;
+ uma_init ips_pcbinit;
+ const char * ips_zone_name;
+ const char * ips_portzone_name;
+ const char * ips_infolock_name;
+ const char * ips_hashlock_name;
+};
+
+#define INPCBSTORAGE_DEFINE(prot, lname, zname, iname, hname) \
+static int \
+prot##_inpcb_init(void *mem, int size __unused, int flags __unused) \
+{ \
+ struct inpcb *inp = mem; \
+ \
+ rw_init_flags(&inp->inp_lock, lname, RW_RECURSE | RW_DUPOK); \
+ return (0); \
+} \
+static struct inpcbstorage prot = { \
+ .ips_pcbinit = prot##_inpcb_init, \
+ .ips_zone_name = zname, \
+ .ips_portzone_name = zname " ports", \
+ .ips_infolock_name = iname, \
+ .ips_hashlock_name = hname, \
+}; \
+SYSINIT(prot##_inpcbstorage_init, SI_SUB_PROTO_DOMAIN, \
+ SI_ORDER_SECOND, in_pcbstorage_init, &prot); \
+SYSUNINIT(prot##_inpcbstorage_uninit, SI_SUB_PROTO_DOMAIN, \
+ SI_ORDER_SECOND, in_pcbstorage_destroy, &prot)
+
/*
* Load balance groups used for the SO_REUSEPORT_LB socket option. Each group
* (or unique address:port combination) can be re-used at most
@@ -688,9 +723,11 @@
#define V_ipport_stoprandom VNET(ipport_stoprandom)
#define V_ipport_tcpallocs VNET(ipport_tcpallocs)
+void in_pcbinfo_init(struct inpcbinfo *, struct inpcbstorage *,
+ u_int, u_int);
void in_pcbinfo_destroy(struct inpcbinfo *);
-void in_pcbinfo_init(struct inpcbinfo *, const char *, u_int, int, char *,
- uma_init);
+void in_pcbstorage_init(void *);
+void in_pcbstorage_destroy(void *);
int in_pcbbind_check_bindmulti(const struct inpcb *ni,
const struct inpcb *oi);
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -518,19 +518,16 @@
CTASSERT(sizeof(struct inpcbhead) == sizeof(LIST_HEAD(, inpcb)));
/*
- * Initialize an inpcbinfo -- we should be able to reduce the number of
- * arguments in time.
+ * Initialize an inpcbinfo - a per-VNET instance of connections db.
*/
-static void inpcb_dtor(void *, int, void *);
-static void inpcb_fini(void *, int);
void
-in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
- u_int hash_nelements, int porthash_nelements, char *inpcbzone_name,
- uma_init inpcbzone_init)
+in_pcbinfo_init(struct inpcbinfo *pcbinfo, struct inpcbstorage *pcbstor,
+ u_int hash_nelements, u_int porthash_nelements)
{
- mtx_init(&pcbinfo->ipi_lock, name, NULL, MTX_DEF);
- mtx_init(&pcbinfo->ipi_hash_lock, "pcbinfohash", NULL, MTX_DEF);
+ mtx_init(&pcbinfo->ipi_lock, pcbstor->ips_infolock_name, NULL, MTX_DEF);
+ mtx_init(&pcbinfo->ipi_hash_lock, pcbstor->ips_hashlock_name,
+ NULL, MTX_DEF);
#ifdef VIMAGE
pcbinfo->ipi_vnet = curvnet;
#endif
@@ -543,16 +540,9 @@
&pcbinfo->ipi_porthashmask);
pcbinfo->ipi_lbgrouphashbase = hashinit(porthash_nelements, M_PCB,
&pcbinfo->ipi_lbgrouphashmask);
- pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb),
- NULL, inpcb_dtor, inpcbzone_init, inpcb_fini, UMA_ALIGN_PTR,
- UMA_ZONE_SMR);
- uma_zone_set_max(pcbinfo->ipi_zone, maxsockets);
- uma_zone_set_warning(pcbinfo->ipi_zone,
- "kern.ipc.maxsockets limit reached");
+ pcbinfo->ipi_zone = pcbstor->ips_zone;
+ pcbinfo->ipi_portzone = pcbstor->ips_portzone;
pcbinfo->ipi_smr = uma_zone_get_smr(pcbinfo->ipi_zone);
- pcbinfo->ipi_portzone = uma_zcreate(inpcbzone_name,
- sizeof(struct inpcbport), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
- uma_zone_set_smr(pcbinfo->ipi_portzone, pcbinfo->ipi_smr);
}
/*
@@ -570,12 +560,41 @@
pcbinfo->ipi_porthashmask);
hashdestroy(pcbinfo->ipi_lbgrouphashbase, M_PCB,
pcbinfo->ipi_lbgrouphashmask);
- uma_zdestroy(pcbinfo->ipi_zone);
- uma_zdestroy(pcbinfo->ipi_portzone);
mtx_destroy(&pcbinfo->ipi_hash_lock);
mtx_destroy(&pcbinfo->ipi_lock);
}
+/*
+ * Initialize a pcbstorage - per protocol zones to allocate inpcbs.
+ */
+static void inpcb_dtor(void *, int, void *);
+static void inpcb_fini(void *, int);
+void
+in_pcbstorage_init(void *arg)
+{
+ struct inpcbstorage *pcbstor = arg;
+
+ pcbstor->ips_zone = uma_zcreate(pcbstor->ips_zone_name,
+ sizeof(struct inpcb), NULL, inpcb_dtor, pcbstor->ips_pcbinit,
+ inpcb_fini, UMA_ALIGN_PTR, UMA_ZONE_SMR);
+ pcbstor->ips_portzone = uma_zcreate(pcbstor->ips_portzone_name,
+ sizeof(struct inpcbport), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+ uma_zone_set_smr(pcbstor->ips_portzone,
+ uma_zone_get_smr(pcbstor->ips_zone));
+}
+
+/*
+ * Destroy a pcbstorage - used by unloadable protocols.
+ */
+void
+in_pcbstorage_destroy(void *arg)
+{
+ struct inpcbstorage *pcbstor = arg;
+
+ uma_zdestroy(pcbstor->ips_zone);
+ uma_zdestroy(pcbstor->ips_portzone);
+}
+
/*
* Allocate a PCB and associate it with the socket.
* On success return with the PCB locked.
diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c
--- a/sys/netinet/ip_divert.c
+++ b/sys/netinet/ip_divert.c
@@ -117,8 +117,6 @@
static u_long div_sendspace = DIVSNDQ; /* XXX sysctl ? */
static u_long div_recvspace = DIVRCVQ; /* XXX sysctl ? */
-static eventhandler_tag ip_divert_event_tag;
-
static int div_output_inbound(int fmaily, struct socket *so, struct mbuf *m,
struct sockaddr_in *sin);
static int div_output_outbound(int family, struct socket *so, struct mbuf *m);
@@ -126,21 +124,7 @@
/*
* Initialize divert connection block queue.
*/
-static void
-div_zone_change(void *tag)
-{
-
- uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets);
-}
-
-static int
-div_inpcb_init(void *mem, int size, int flags)
-{
- struct inpcb *inp = mem;
-
- INP_LOCK_INIT(inp, "inp", "divinp");
- return (0);
-}
+INPCBSTORAGE_DEFINE(divcbstor, "divinp", "divcb", "div", "divhash");
static void
div_init(void *arg __unused)
@@ -151,7 +135,7 @@
* allocate one-entry hash lists than it is to check all over the
* place for hashbase == NULL.
*/
- in_pcbinfo_init(&V_divcbinfo, "div", 1, 1, "divcb", div_inpcb_init);
+ in_pcbinfo_init(&V_divcbinfo, &divcbstor, 1, 1);
}
VNET_SYSINIT(div_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, div_init, NULL);
@@ -794,8 +778,6 @@
if (err != 0)
return (err);
ip_divert_ptr = divert_packet;
- ip_divert_event_tag = EVENTHANDLER_REGISTER(maxsockets_change,
- div_zone_change, NULL, EVENTHANDLER_PRI_ANY);
break;
case MOD_QUIESCE:
/*
@@ -829,7 +811,6 @@
#ifndef VIMAGE
div_destroy(NULL);
#endif
- EVENTHANDLER_DEREGISTER(maxsockets_change, ip_divert_event_tag);
break;
default:
err = EOPNOTSUPP;
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -182,37 +182,13 @@
}
#endif /* INET */
-/*
- * Raw interface to IP protocol.
- */
-
-/*
- * Initialize raw connection block q.
- */
-static void
-rip_zone_change(void *tag)
-{
-
- uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets);
-}
-
-static int
-rip_inpcb_init(void *mem, int size, int flags)
-{
- struct inpcb *inp = mem;
-
- INP_LOCK_INIT(inp, "inp", "rawinp");
- return (0);
-}
+INPCBSTORAGE_DEFINE(ripcbstor, "rawinp", "ripcb", "rip", "riphash");
static void
rip_init(void *arg __unused)
{
- in_pcbinfo_init(&V_ripcbinfo, "rip", INP_PCBHASH_RAW_SIZE, 1, "ripcb",
- rip_inpcb_init);
- EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL,
- EVENTHANDLER_PRI_ANY);
+ in_pcbinfo_init(&V_ripcbinfo, &ripcbstor, INP_PCBHASH_RAW_SIZE, 1);
}
VNET_SYSINIT(rip_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rip_init, NULL);
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -1146,26 +1146,7 @@
#define ISN_LOCK() mtx_lock(&isn_mtx)
#define ISN_UNLOCK() mtx_unlock(&isn_mtx)
-/*
- * TCP initialization.
- */
-static void
-tcp_zone_change(void *tag)
-{
-
- uma_zone_set_max(V_tcbinfo.ipi_zone, maxsockets);
- uma_zone_set_max(V_tcpcb_zone, maxsockets);
- tcp_tw_zone_change();
-}
-
-static int
-tcp_inpcb_init(void *mem, int size, int flags)
-{
- struct inpcb *inp = mem;
-
- INP_LOCK_INIT(inp, "inp", "tcpinp");
- return (0);
-}
+INPCBSTORAGE_DEFINE(tcpcbstor, "tcpinp", "tcp_inpcb", "tcp", "tcphash");
/*
* Take a value and get the next power of 2 that doesn't overflow.
@@ -1439,8 +1420,8 @@
printf("%s: WARNING: unable to initialise TCP stats\n",
__func__);
#endif
- in_pcbinfo_init(&V_tcbinfo, "tcp", tcp_tcbhashsize, tcp_tcbhashsize,
- "tcp_inpcb", tcp_inpcb_init);
+ in_pcbinfo_init(&V_tcbinfo, &tcpcbstor, tcp_tcbhashsize,
+ tcp_tcbhashsize);
/*
* These have to be type stable for the benefit of the timers.
@@ -1526,8 +1507,6 @@
ISN_LOCK_INIT();
EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL,
SHUTDOWN_PRI_DEFAULT);
- EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL,
- EVENTHANDLER_PRI_ANY);
tcp_inp_lro_direct_queue = counter_u64_alloc(M_WAITOK);
tcp_inp_lro_wokeup_queue = counter_u64_alloc(M_WAITOK);
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -170,33 +170,9 @@
struct mbuf *, struct thread *, int);
#endif
-static void
-udp_zone_change(void *tag)
-{
-
- uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets);
- uma_zone_set_max(V_udpcb_zone, maxsockets);
-}
-
-static int
-udp_inpcb_init(void *mem, int size, int flags)
-{
- struct inpcb *inp;
-
- inp = mem;
- INP_LOCK_INIT(inp, "inp", "udpinp");
- return (0);
-}
-
-static int
-udplite_inpcb_init(void *mem, int size, int flags)
-{
- struct inpcb *inp;
-
- inp = mem;
- INP_LOCK_INIT(inp, "inp", "udpliteinp");
- return (0);
-}
+INPCBSTORAGE_DEFINE(udpcbstor, "udpinp", "udp_inpcb", "udp", "udphash");
+INPCBSTORAGE_DEFINE(udplitecbstor, "udpliteinp", "udplite_inpcb", "udplite",
+ "udplitehash");
static void
udp_init(void *arg __unused)
@@ -209,18 +185,15 @@
* Once we can calculate the flowid that way and re-establish
* a 4-tuple, flip this to 4-tuple.
*/
- in_pcbinfo_init(&V_udbinfo, "udp", UDBHASHSIZE, UDBHASHSIZE,
- "udp_inpcb", udp_inpcb_init);
+ in_pcbinfo_init(&V_udbinfo, &udpcbstor, UDBHASHSIZE, UDBHASHSIZE);
V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
uma_zone_set_max(V_udpcb_zone, maxsockets);
uma_zone_set_warning(V_udpcb_zone, "kern.ipc.maxsockets limit reached");
- EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
- EVENTHANDLER_PRI_ANY);
/* Additional pcbinfo for UDP-Lite */
- in_pcbinfo_init(&V_ulitecbinfo, "udplite", UDBHASHSIZE,
- UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init);
+ in_pcbinfo_init(&V_ulitecbinfo, &udplitecbstor, UDBHASHSIZE,
+ UDBHASHSIZE);
}
VNET_SYSINIT(udp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, udp_init, NULL);

File Metadata

Mime Type
text/plain
Expires
Wed, Jan 8, 11:53 PM (3 h, 22 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15727425
Default Alt Text
D33542.id100894.diff (11 KB)

Event Timeline