diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c --- a/sys/kern/kern_mbuf.c +++ b/sys/kern/kern_mbuf.c @@ -39,14 +39,12 @@ #include #include #include -#include #include #include #include #include #include #include -#include #include #include #include @@ -396,14 +394,6 @@ uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached"); uma_zone_set_maxaction(zone_jumbo16, mb_reclaim); - /* - * Hook event handler for low-memory situation, used to - * drain protocols and push data back to the caches (UMA - * later pushes it back to VM). - */ - EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, - EVENTHANDLER_PRI_FIRST); - snd_tag_count = counter_u64_alloc(M_WAITOK); } SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); @@ -828,26 +818,12 @@ /* * This is the protocol drain routine. Called by UMA whenever any of the * mbuf zones is closed to its limit. - * - * No locks should be held when this is called. The drain routines have to - * presently acquire some locks which raises the possibility of lock order - * reversal. */ static void mb_reclaim(uma_zone_t zone __unused, int pending __unused) { - struct epoch_tracker et; - struct domain *dp; - struct protosw *pr; - - WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, __func__); - - NET_EPOCH_ENTER(et); - for (dp = domains; dp != NULL; dp = dp->dom_next) - for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) - if (pr->pr_drain != NULL) - (*pr->pr_drain)(); - NET_EPOCH_EXIT(et); + + EVENTHANDLER_INVOKE(mbuf_lowmem, VM_LOW_MBUFS); } /* diff --git a/sys/kern/uipc_debug.c b/sys/kern/uipc_debug.c --- a/sys/kern/uipc_debug.c +++ b/sys/kern/uipc_debug.c @@ -315,9 +315,6 @@ db_print_indent(indent); db_printf("pr_ctloutput: %p ", pr->pr_ctloutput); - - db_print_indent(indent); - db_printf("pr_drain: %p\n", pr->pr_drain); } static void diff --git a/sys/kern/uipc_domain.c b/sys/kern/uipc_domain.c --- a/sys/kern/uipc_domain.c +++ b/sys/kern/uipc_domain.c @@ -435,7 +435,6 @@ dpr->pr_protocol = PROTO_SPACER; dpr->pr_flags = 0; dpr->pr_ctloutput = NULL; - dpr->pr_drain = NULL; dpr->pr_usrreqs = &nousrreqs; /* Job is done, not more protection required. */ diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c --- a/sys/netinet/in_proto.c +++ b/sys/netinet/in_proto.c @@ -108,14 +108,6 @@ } struct protosw inetsw[] = { -{ - .pr_type = 0, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_IP, - .pr_flags = PR_CAPATTACH, - .pr_drain = ip_drain, - .pr_usrreqs = &nousrreqs -}, { .pr_type = SOCK_DGRAM, .pr_domain = &inetdomain, @@ -131,7 +123,6 @@ .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD| PR_CAPATTACH, .pr_ctloutput = tcp_ctloutput, - .pr_drain = tcp_drain, .pr_usrreqs = &tcp_usrreqs }, #ifdef SCTP @@ -141,7 +132,6 @@ .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_WANTRCVD, .pr_ctloutput = sctp_ctloutput, - .pr_drain = sctp_drain, .pr_usrreqs = &sctp_usrreqs }, { @@ -150,7 +140,6 @@ .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD, .pr_ctloutput = sctp_ctloutput, - .pr_drain = NULL, /* Covered by the SOCK_SEQPACKET entry. */ .pr_usrreqs = &sctp_usrreqs }, #endif /* SCTP */ diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -101,7 +101,6 @@ /* IP reassembly functions are defined in ip_reass.c. */ extern void ipreass_init(void); -extern void ipreass_drain(void); #ifdef VIMAGE extern void ipreass_destroy(void); #endif @@ -845,20 +844,6 @@ m_freem(m); } -void -ip_drain(void) -{ - VNET_ITERATOR_DECL(vnet_iter); - - VNET_LIST_RLOCK_NOSLEEP(); - VNET_FOREACH(vnet_iter) { - CURVNET_SET(vnet_iter); - ipreass_drain(); - CURVNET_RESTORE(); - } - VNET_LIST_RUNLOCK_NOSLEEP(); -} - int ipproto_register(uint8_t proto, ipproto_input_t input, ipproto_ctlinput_t ctl) { diff --git a/sys/netinet/ip_reass.c b/sys/netinet/ip_reass.c --- a/sys/netinet/ip_reass.c +++ b/sys/netinet/ip_reass.c @@ -92,7 +92,6 @@ #define V_ipreass_maxbucketsize VNET(ipreass_maxbucketsize) void ipreass_init(void); -void ipreass_drain(void); #ifdef VIMAGE void ipreass_destroy(void); #endif @@ -597,6 +596,31 @@ } SYSINIT(ipreass, SI_SUB_VNET_DONE, SI_ORDER_ANY, ipreass_timer_init, NULL); +/* + * Drain off all datagram fragments. + */ +static void +ipreass_drain(void) +{ + VNET_ITERATOR_DECL(vnet_iter); + + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + for (int i = 0; i < IPREASS_NHASH; i++) { + IPQ_LOCK(i); + while(!TAILQ_EMPTY(&V_ipq[i].head)) + ipq_drop(&V_ipq[i], + TAILQ_FIRST(&V_ipq[i].head)); + KASSERT(V_ipq[i].count == 0, + ("%s: V_ipq[%d] count %d (V_ipq=%p)", __func__, i, + V_ipq[i].count, V_ipq)); + IPQ_UNLOCK(i); + } + CURVNET_RESTORE(); + } +} + + /* * Initialize IP reassembly structures. */ @@ -623,24 +647,10 @@ maxfrags = IP_MAXFRAGS; EVENTHANDLER_REGISTER(nmbclusters_change, ipreass_zone_change, NULL, EVENTHANDLER_PRI_ANY); - } -} - -/* - * Drain off all datagram fragments. - */ -void -ipreass_drain(void) -{ - - for (int i = 0; i < IPREASS_NHASH; i++) { - IPQ_LOCK(i); - while(!TAILQ_EMPTY(&V_ipq[i].head)) - ipq_drop(&V_ipq[i], TAILQ_FIRST(&V_ipq[i].head)); - KASSERT(V_ipq[i].count == 0, - ("%s: V_ipq[%d] count %d (V_ipq=%p)", __func__, i, - V_ipq[i].count, V_ipq)); - IPQ_UNLOCK(i); + EVENTHANDLER_REGISTER(vm_lowmem, ipreass_drain, NULL, + LOWMEM_PRI_DEFAULT); + EVENTHANDLER_REGISTER(mbuf_lowmem, ipreass_drain, NULL, + LOWMEM_PRI_DEFAULT); } } diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -214,7 +214,6 @@ int inp_setmoptions(struct inpcb *, struct sockopt *); int ip_ctloutput(struct socket *, struct sockopt *sopt); -void ip_drain(void); int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, u_long if_hwassist_flags); void ip_forward(struct mbuf *m, int srcrt); diff --git a/sys/netinet/sctp_module.c b/sys/netinet/sctp_module.c --- a/sys/netinet/sctp_module.c +++ b/sys/netinet/sctp_module.c @@ -61,7 +61,6 @@ .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD, .pr_ctloutput = sctp_ctloutput, - .pr_drain = sctp_drain, .pr_usrreqs = &sctp_usrreqs, }; @@ -71,7 +70,6 @@ .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_WANTRCVD, .pr_ctloutput = sctp_ctloutput, - .pr_drain = sctp_drain, .pr_usrreqs = &sctp_usrreqs, }; #endif @@ -85,7 +83,6 @@ .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD, .pr_ctloutput = sctp_ctloutput, - .pr_drain = sctp_drain, .pr_usrreqs = &sctp6_usrreqs, }; @@ -95,9 +92,6 @@ .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_WANTRCVD, .pr_ctloutput = sctp_ctloutput, -#ifndef INET /* Do not call initialization and drain routines twice. */ - .pr_drain = sctp_drain, -#endif .pr_usrreqs = &sctp6_usrreqs, }; #endif diff --git a/sys/netinet/sctp_pcb.h b/sys/netinet/sctp_pcb.h --- a/sys/netinet/sctp_pcb.h +++ b/sys/netinet/sctp_pcb.h @@ -611,8 +611,6 @@ bool sctp_is_vtag_good(uint32_t, uint16_t lport, uint16_t rport, struct timeval *); -/* void sctp_drain(void); */ - int sctp_destination_is_reachable(struct sctp_tcb *, struct sockaddr *); int sctp_swap_inpcb_for_listen(struct sctp_inpcb *inp); diff --git a/sys/netinet/sctp_pcb.c b/sys/netinet/sctp_pcb.c --- a/sys/netinet/sctp_pcb.c +++ b/sys/netinet/sctp_pcb.c @@ -6942,15 +6942,18 @@ */ } -void +static void sctp_drain(void) { + struct epoch_tracker et; + VNET_ITERATOR_DECL(vnet_iter); + + NET_EPOCH_ENTER(et); /* * We must walk the PCB lists for ALL associations here. The system * is LOW on MBUF's and needs help. This is where reneging will * occur. We really hope this does NOT happen! */ - VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); @@ -6962,6 +6965,7 @@ #ifdef VIMAGE continue; #else + NET_EPOCH_EXIT(et); return; #endif } @@ -6981,7 +6985,10 @@ CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); + NET_EPOCH_EXIT(et); } +EVENTHANDLER_DEFINE(vm_lowmem, sctp_drain, NULL, LOWMEM_PRI_DEFAULT); +EVENTHANDLER_DEFINE(mbuf_lowmem, sctp_drain, NULL, LOWMEM_PRI_DEFAULT); /* * start a new iterator diff --git a/sys/netinet/sctp_var.h b/sys/netinet/sctp_var.h --- a/sys/netinet/sctp_var.h +++ b/sys/netinet/sctp_var.h @@ -327,7 +327,6 @@ void sctp_input_with_port(struct mbuf *, int, uint16_t); int sctp_input(struct mbuf **, int *, int); void sctp_pathmtu_adjustment(struct sctp_tcb *, uint32_t, bool); -void sctp_drain(void); void sctp_notify(struct sctp_inpcb *, struct sctp_tcb *, struct sctp_nets *, uint8_t, uint8_t, uint16_t, uint32_t); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1448,6 +1448,8 @@ VNET_SYSINIT(tcp_vnet_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, tcp_vnet_init, NULL); +static void tcp_drain(void); + static void tcp_init(void *arg __unused) { @@ -1506,6 +1508,8 @@ ISN_LOCK_INIT(); EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL, SHUTDOWN_PRI_DEFAULT); + EVENTHANDLER_REGISTER(vm_lowmem, tcp_drain, NULL, LOWMEM_PRI_DEFAULT); + EVENTHANDLER_REGISTER(mbuf_lowmem, tcp_drain, NULL, LOWMEM_PRI_DEFAULT); tcp_inp_lro_direct_queue = counter_u64_alloc(M_WAITOK); tcp_inp_lro_wokeup_queue = counter_u64_alloc(M_WAITOK); @@ -2513,14 +2517,16 @@ return (tp); } -void +static void tcp_drain(void) { + struct epoch_tracker et; VNET_ITERATOR_DECL(vnet_iter); if (!do_tcpdrain) return; + NET_EPOCH_ENTER(et); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); @@ -2558,6 +2564,7 @@ CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); + NET_EPOCH_EXIT(et); } /* diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -1085,7 +1085,6 @@ void tcp_ctlinput(int, struct sockaddr *, void *); int tcp_ctloutput(struct socket *, struct sockopt *); void tcp_ctlinput_viaudp(int, struct sockaddr *, void *, void *); -void tcp_drain(void); void tcp_fini(void *); char *tcp_log_addrs(struct in_conninfo *, struct tcphdr *, const void *, const void *); diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c --- a/sys/netinet6/in6_proto.c +++ b/sys/netinet6/in6_proto.c @@ -141,14 +141,6 @@ } struct protosw inet6sw[] = { -{ - .pr_type = 0, - .pr_domain = &inet6domain, - .pr_protocol = IPPROTO_IPV6, - .pr_flags = PR_CAPATTACH, - .pr_drain = frag6_drain, - .pr_usrreqs = &nousrreqs, -}, { .pr_type = SOCK_DGRAM, .pr_domain = &inet6domain, @@ -164,9 +156,6 @@ .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD| PR_LISTEN|PR_CAPATTACH, .pr_ctloutput = tcp_ctloutput, -#ifndef INET /* don't call initialization, timeout, and drain routines twice */ - .pr_drain = tcp_drain, -#endif .pr_usrreqs = &tcp6_usrreqs, }, #ifdef SCTP @@ -176,9 +165,6 @@ .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_WANTRCVD, .pr_ctloutput = sctp_ctloutput, -#ifndef INET /* Do not call initialization and drain routines twice. */ - .pr_drain = sctp_drain, -#endif .pr_usrreqs = &sctp6_usrreqs }, { @@ -187,7 +173,6 @@ .pr_protocol = IPPROTO_SCTP, .pr_flags = PR_CONNREQUIRED|PR_WANTRCVD, .pr_ctloutput = sctp_ctloutput, - .pr_drain = NULL, /* Covered by the SOCK_SEQPACKET entry. */ .pr_usrreqs = &sctp6_usrreqs }, #endif /* SCTP */ diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -295,6 +295,10 @@ IP6PROTO_REGISTER(IPPROTO_SCTP, sctp6_input, sctp6_ctlinput); #endif + EVENTHANDLER_REGISTER(vm_lowmem, frag6_drain, NULL, LOWMEM_PRI_DEFAULT); + EVENTHANDLER_REGISTER(mbuf_lowmem, frag6_drain, NULL, + LOWMEM_PRI_DEFAULT); + netisr_register(&ip6_nh); #ifdef RSS netisr_register(&ip6_direct_nh); diff --git a/sys/sys/eventhandler.h b/sys/sys/eventhandler.h --- a/sys/sys/eventhandler.h +++ b/sys/sys/eventhandler.h @@ -205,6 +205,8 @@ typedef void (*vm_lowmem_handler_t)(void *, int); #define LOWMEM_PRI_DEFAULT EVENTHANDLER_PRI_FIRST EVENTHANDLER_DECLARE(vm_lowmem, vm_lowmem_handler_t); +/* Some of mbuf(9) zones reached maximum */ +EVENTHANDLER_DECLARE(mbuf_lowmem, vm_lowmem_handler_t); /* Root mounted event */ typedef void (*mountroot_handler_t)(void *); diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h --- a/sys/sys/protosw.h +++ b/sys/sys/protosw.h @@ -52,9 +52,6 @@ * Each protocol has a handle initializing one of these structures, * which is used for protocol-protocol and system-protocol communication. * - * The system will call the pr_drain entry if it is low on space and - * this should throw away any non-critical data. - * * In retrospect, it would be a lot nicer to use an interface * similar to the vnode VOP interface. */ @@ -65,7 +62,6 @@ /* USE THESE FOR YOUR PROTOTYPES ! */ typedef int pr_ctloutput_t(struct socket *, struct sockopt *); -typedef void pr_drain_t(void); typedef void pr_abort_t(struct socket *); typedef int pr_accept_t(struct socket *, struct sockaddr **); typedef int pr_attach_t(struct socket *, int, struct thread *); @@ -117,7 +113,6 @@ /* protocol-protocol hooks */ pr_ctloutput_t *pr_ctloutput; /* control output (from above) */ /* utility hooks */ - pr_drain_t *pr_drain; /* flush any excess space possible */ struct pr_usrreqs *pr_usrreqs; /* user-protocol hook */ }; diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h --- a/sys/vm/vm_pageout.h +++ b/sys/vm/vm_pageout.h @@ -87,6 +87,7 @@ */ #define VM_LOW_KMEM 0x01 #define VM_LOW_PAGES 0x02 +#define VM_LOW_MBUFS 0x04 /* * Exported routines.