diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -375,8 +375,8 @@ #ifdef _KERNEL /* - * Global data structure for each high-level protocol (UDP, TCP, ...) in both - * IPv4 and IPv6. Holds inpcb lists and information for managing them. + * Per-VNET pcb database for each high-level protocol (UDP, TCP, ...) in both + * IPv4 and IPv6. * * The pcbs are protected with SMR section and thus all lists in inpcbinfo * are CK-lists. Locking is required to insert a pcb into database. Two @@ -445,6 +445,41 @@ struct vnet *ipi_vnet; /* (c) */ }; +/* + * Global allocation storage for each high-level protocol (UDP, TCP, ...). + * Each corresponding per-VNET inpcbinfo points into this one. + */ +struct inpcbstorage { + uma_zone_t ips_zone; + uma_zone_t ips_portzone; + uma_init ips_pcbinit; + const char * ips_zone_name; + const char * ips_portzone_name; + const char * ips_infolock_name; + const char * ips_hashlock_name; +}; + +#define INPCBSTORAGE_DEFINE(prot, lname, zname, iname, hname) \ +static int \ +prot##_inpcb_init(void *mem, int size __unused, int flags __unused) \ +{ \ + struct inpcb *inp = mem; \ + \ + rw_init_flags(&inp->inp_lock, lname, RW_RECURSE | RW_DUPOK); \ + return (0); \ +} \ +static struct inpcbstorage prot = { \ + .ips_pcbinit = prot##_inpcb_init, \ + .ips_zone_name = zname, \ + .ips_portzone_name = zname " ports", \ + .ips_infolock_name = iname, \ + .ips_hashlock_name = hname, \ +}; \ +SYSINIT(prot##_inpcbstorage_init, SI_SUB_PROTO_DOMAIN, \ + SI_ORDER_SECOND, in_pcbstorage_init, &prot); \ +SYSUNINIT(prot##_inpcbstorage_uninit, SI_SUB_PROTO_DOMAIN, \ + SI_ORDER_SECOND, in_pcbstorage_destroy, &prot) + /* * Load balance groups used for the SO_REUSEPORT_LB socket option. Each group * (or unique address:port combination) can be re-used at most @@ -688,9 +723,11 @@ #define V_ipport_stoprandom VNET(ipport_stoprandom) #define V_ipport_tcpallocs VNET(ipport_tcpallocs) +void in_pcbinfo_init(struct inpcbinfo *, struct inpcbstorage *, + u_int, u_int); void in_pcbinfo_destroy(struct inpcbinfo *); -void in_pcbinfo_init(struct inpcbinfo *, const char *, u_int, int, char *, - uma_init); +void in_pcbstorage_init(void *); +void in_pcbstorage_destroy(void *); int in_pcbbind_check_bindmulti(const struct inpcb *ni, const struct inpcb *oi); diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -518,19 +518,16 @@ CTASSERT(sizeof(struct inpcbhead) == sizeof(LIST_HEAD(, inpcb))); /* - * Initialize an inpcbinfo -- we should be able to reduce the number of - * arguments in time. + * Initialize an inpcbinfo - a per-VNET instance of connections db. */ -static void inpcb_dtor(void *, int, void *); -static void inpcb_fini(void *, int); void -in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name, - u_int hash_nelements, int porthash_nelements, char *inpcbzone_name, - uma_init inpcbzone_init) +in_pcbinfo_init(struct inpcbinfo *pcbinfo, struct inpcbstorage *pcbstor, + u_int hash_nelements, u_int porthash_nelements) { - mtx_init(&pcbinfo->ipi_lock, name, NULL, MTX_DEF); - mtx_init(&pcbinfo->ipi_hash_lock, "pcbinfohash", NULL, MTX_DEF); + mtx_init(&pcbinfo->ipi_lock, pcbstor->ips_infolock_name, NULL, MTX_DEF); + mtx_init(&pcbinfo->ipi_hash_lock, pcbstor->ips_hashlock_name, + NULL, MTX_DEF); #ifdef VIMAGE pcbinfo->ipi_vnet = curvnet; #endif @@ -543,16 +540,9 @@ &pcbinfo->ipi_porthashmask); pcbinfo->ipi_lbgrouphashbase = hashinit(porthash_nelements, M_PCB, &pcbinfo->ipi_lbgrouphashmask); - pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb), - NULL, inpcb_dtor, inpcbzone_init, inpcb_fini, UMA_ALIGN_PTR, - UMA_ZONE_SMR); - uma_zone_set_max(pcbinfo->ipi_zone, maxsockets); - uma_zone_set_warning(pcbinfo->ipi_zone, - "kern.ipc.maxsockets limit reached"); + pcbinfo->ipi_zone = pcbstor->ips_zone; + pcbinfo->ipi_portzone = pcbstor->ips_portzone; pcbinfo->ipi_smr = uma_zone_get_smr(pcbinfo->ipi_zone); - pcbinfo->ipi_portzone = uma_zcreate(inpcbzone_name, - sizeof(struct inpcbport), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - uma_zone_set_smr(pcbinfo->ipi_portzone, pcbinfo->ipi_smr); } /* @@ -570,12 +560,41 @@ pcbinfo->ipi_porthashmask); hashdestroy(pcbinfo->ipi_lbgrouphashbase, M_PCB, pcbinfo->ipi_lbgrouphashmask); - uma_zdestroy(pcbinfo->ipi_zone); - uma_zdestroy(pcbinfo->ipi_portzone); mtx_destroy(&pcbinfo->ipi_hash_lock); mtx_destroy(&pcbinfo->ipi_lock); } +/* + * Initialize a pcbstorage - per protocol zones to allocate inpcbs. + */ +static void inpcb_dtor(void *, int, void *); +static void inpcb_fini(void *, int); +void +in_pcbstorage_init(void *arg) +{ + struct inpcbstorage *pcbstor = arg; + + pcbstor->ips_zone = uma_zcreate(pcbstor->ips_zone_name, + sizeof(struct inpcb), NULL, inpcb_dtor, pcbstor->ips_pcbinit, + inpcb_fini, UMA_ALIGN_PTR, UMA_ZONE_SMR); + pcbstor->ips_portzone = uma_zcreate(pcbstor->ips_portzone_name, + sizeof(struct inpcbport), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + uma_zone_set_smr(pcbstor->ips_portzone, + uma_zone_get_smr(pcbstor->ips_zone)); +} + +/* + * Destroy a pcbstorage - used by unloadable protocols. + */ +void +in_pcbstorage_destroy(void *arg) +{ + struct inpcbstorage *pcbstor = arg; + + uma_zdestroy(pcbstor->ips_zone); + uma_zdestroy(pcbstor->ips_portzone); +} + /* * Allocate a PCB and associate it with the socket. * On success return with the PCB locked. diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -117,8 +117,6 @@ static u_long div_sendspace = DIVSNDQ; /* XXX sysctl ? */ static u_long div_recvspace = DIVRCVQ; /* XXX sysctl ? */ -static eventhandler_tag ip_divert_event_tag; - static int div_output_inbound(int fmaily, struct socket *so, struct mbuf *m, struct sockaddr_in *sin); static int div_output_outbound(int family, struct socket *so, struct mbuf *m); @@ -126,21 +124,7 @@ /* * Initialize divert connection block queue. */ -static void -div_zone_change(void *tag) -{ - - uma_zone_set_max(V_divcbinfo.ipi_zone, maxsockets); -} - -static int -div_inpcb_init(void *mem, int size, int flags) -{ - struct inpcb *inp = mem; - - INP_LOCK_INIT(inp, "inp", "divinp"); - return (0); -} +INPCBSTORAGE_DEFINE(divcbstor, "divinp", "divcb", "div", "divhash"); static void div_init(void *arg __unused) @@ -151,7 +135,7 @@ * allocate one-entry hash lists than it is to check all over the * place for hashbase == NULL. */ - in_pcbinfo_init(&V_divcbinfo, "div", 1, 1, "divcb", div_inpcb_init); + in_pcbinfo_init(&V_divcbinfo, &divcbstor, 1, 1); } VNET_SYSINIT(div_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, div_init, NULL); @@ -794,8 +778,6 @@ if (err != 0) return (err); ip_divert_ptr = divert_packet; - ip_divert_event_tag = EVENTHANDLER_REGISTER(maxsockets_change, - div_zone_change, NULL, EVENTHANDLER_PRI_ANY); break; case MOD_QUIESCE: /* @@ -829,7 +811,6 @@ #ifndef VIMAGE div_destroy(NULL); #endif - EVENTHANDLER_DEREGISTER(maxsockets_change, ip_divert_event_tag); break; default: err = EOPNOTSUPP; diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -182,37 +182,13 @@ } #endif /* INET */ -/* - * Raw interface to IP protocol. - */ - -/* - * Initialize raw connection block q. - */ -static void -rip_zone_change(void *tag) -{ - - uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets); -} - -static int -rip_inpcb_init(void *mem, int size, int flags) -{ - struct inpcb *inp = mem; - - INP_LOCK_INIT(inp, "inp", "rawinp"); - return (0); -} +INPCBSTORAGE_DEFINE(ripcbstor, "rawinp", "ripcb", "rip", "riphash"); static void rip_init(void *arg __unused) { - in_pcbinfo_init(&V_ripcbinfo, "rip", INP_PCBHASH_RAW_SIZE, 1, "ripcb", - rip_inpcb_init); - EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL, - EVENTHANDLER_PRI_ANY); + in_pcbinfo_init(&V_ripcbinfo, &ripcbstor, INP_PCBHASH_RAW_SIZE, 1); } VNET_SYSINIT(rip_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rip_init, NULL); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1146,26 +1146,7 @@ #define ISN_LOCK() mtx_lock(&isn_mtx) #define ISN_UNLOCK() mtx_unlock(&isn_mtx) -/* - * TCP initialization. - */ -static void -tcp_zone_change(void *tag) -{ - - uma_zone_set_max(V_tcbinfo.ipi_zone, maxsockets); - uma_zone_set_max(V_tcpcb_zone, maxsockets); - tcp_tw_zone_change(); -} - -static int -tcp_inpcb_init(void *mem, int size, int flags) -{ - struct inpcb *inp = mem; - - INP_LOCK_INIT(inp, "inp", "tcpinp"); - return (0); -} +INPCBSTORAGE_DEFINE(tcpcbstor, "tcpinp", "tcp_inpcb", "tcp", "tcphash"); /* * Take a value and get the next power of 2 that doesn't overflow. @@ -1439,8 +1420,8 @@ printf("%s: WARNING: unable to initialise TCP stats\n", __func__); #endif - in_pcbinfo_init(&V_tcbinfo, "tcp", tcp_tcbhashsize, tcp_tcbhashsize, - "tcp_inpcb", tcp_inpcb_init); + in_pcbinfo_init(&V_tcbinfo, &tcpcbstor, tcp_tcbhashsize, + tcp_tcbhashsize); /* * These have to be type stable for the benefit of the timers. @@ -1526,8 +1507,6 @@ ISN_LOCK_INIT(); EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL, SHUTDOWN_PRI_DEFAULT); - EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL, - EVENTHANDLER_PRI_ANY); tcp_inp_lro_direct_queue = counter_u64_alloc(M_WAITOK); tcp_inp_lro_wokeup_queue = counter_u64_alloc(M_WAITOK); diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -170,33 +170,9 @@ struct mbuf *, struct thread *, int); #endif -static void -udp_zone_change(void *tag) -{ - - uma_zone_set_max(V_udbinfo.ipi_zone, maxsockets); - uma_zone_set_max(V_udpcb_zone, maxsockets); -} - -static int -udp_inpcb_init(void *mem, int size, int flags) -{ - struct inpcb *inp; - - inp = mem; - INP_LOCK_INIT(inp, "inp", "udpinp"); - return (0); -} - -static int -udplite_inpcb_init(void *mem, int size, int flags) -{ - struct inpcb *inp; - - inp = mem; - INP_LOCK_INIT(inp, "inp", "udpliteinp"); - return (0); -} +INPCBSTORAGE_DEFINE(udpcbstor, "udpinp", "udp_inpcb", "udp", "udphash"); +INPCBSTORAGE_DEFINE(udplitecbstor, "udpliteinp", "udplite_inpcb", "udplite", + "udplitehash"); static void udp_init(void *arg __unused) @@ -209,18 +185,15 @@ * Once we can calculate the flowid that way and re-establish * a 4-tuple, flip this to 4-tuple. */ - in_pcbinfo_init(&V_udbinfo, "udp", UDBHASHSIZE, UDBHASHSIZE, - "udp_inpcb", udp_inpcb_init); + in_pcbinfo_init(&V_udbinfo, &udpcbstor, UDBHASHSIZE, UDBHASHSIZE); V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_zone_set_max(V_udpcb_zone, maxsockets); uma_zone_set_warning(V_udpcb_zone, "kern.ipc.maxsockets limit reached"); - EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL, - EVENTHANDLER_PRI_ANY); /* Additional pcbinfo for UDP-Lite */ - in_pcbinfo_init(&V_ulitecbinfo, "udplite", UDBHASHSIZE, - UDBHASHSIZE, "udplite_inpcb", udplite_inpcb_init); + in_pcbinfo_init(&V_ulitecbinfo, &udplitecbstor, UDBHASHSIZE, + UDBHASHSIZE); } VNET_SYSINIT(udp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, udp_init, NULL);