Index: head/share/man/man9/Makefile =================================================================== --- head/share/man/man9/Makefile +++ head/share/man/man9/Makefile @@ -1911,6 +1911,7 @@ zone.9 uma_zone_get_cur.9 \ zone.9 uma_zone_get_max.9 \ zone.9 uma_zone_set_max.9 \ - zone.9 uma_zone_set_warning.9 + zone.9 uma_zone_set_warning.9 \ + zone.9 uma_zone_set_maxaction.9 .include Index: head/share/man/man9/zone.9 =================================================================== --- head/share/man/man9/zone.9 +++ head/share/man/man9/zone.9 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd February 7, 2014 +.Dd December 20, 2015 .Dt ZONE 9 .Os .Sh NAME @@ -39,7 +39,8 @@ .Nm uma_zone_set_max, .Nm uma_zone_get_max, .Nm uma_zone_get_cur, -.Nm uma_zone_set_warning +.Nm uma_zone_set_warning, +.Nm uma_zone_set_maxaction .Nd zone allocator .Sh SYNOPSIS .In sys/param.h @@ -71,6 +72,8 @@ .Fn uma_zone_get_cur "uma_zone_t zone" .Ft void .Fn uma_zone_set_warning "uma_zone_t zone" "const char *warning" +.Ft void +.Fn uma_zone_set_maxaction "uma_zone_t zone" "void (*maxaction)(uma_zone_t)" .In sys/sysctl.h .Fn SYSCTL_UMA_MAX parent nbr name access zone descr .Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr @@ -307,13 +310,21 @@ .Fn uma_zone_set_warning function sets a warning that will be printed on the system console when the given zone becomes full and fails to allocate an item. -The warning will be printed not often than every five minutes. +The warning will be printed no more often than every five minutes. Warnings can be turned off globally by setting the .Va vm.zone_warnings sysctl tunable to .Va 0 . .Pp The +.Fn uma_zone_set_maxaction +function sets a function that will be called when the given zone becomes full +and fails to allocate an item. +The function will be called with the zone locked. Also, the function +that called the allocation function may have held additional locks. Therefore, +this function should do very little work (similar to a signal handler). +.Pp +The .Fn SYSCTL_UMA_MAX parent nbr name access zone descr macro declares a static .Xr sysctl Index: head/sys/kern/kern_mbuf.c =================================================================== --- head/sys/kern/kern_mbuf.c +++ head/sys/kern/kern_mbuf.c @@ -32,11 +32,14 @@ #include #include +#include #include #include #include #include #include +#include +#include #include #include #include @@ -272,6 +275,12 @@ uma_zone_t zone_ext_refcnt; /* + * Callout to assist us in freeing mbufs. + */ +static struct callout mb_reclaim_callout; +static struct mtx mb_reclaim_callout_mtx; + +/* * Local prototypes. */ static int mb_ctor_mbuf(void *, int, void *, int); @@ -285,6 +294,7 @@ static void mb_reclaim(void *); static void *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, uint8_t *, int); +static void mb_maxaction(uma_zone_t); /* Ensure that MSIZE is a power of 2. */ CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); @@ -310,6 +320,7 @@ if (nmbufs > 0) nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached"); + uma_zone_set_maxaction(zone_mbuf, mb_maxaction); zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, mb_ctor_clust, mb_dtor_clust, @@ -322,6 +333,7 @@ if (nmbclusters > 0) nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached"); + uma_zone_set_maxaction(zone_clust, mb_maxaction); zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); @@ -338,6 +350,7 @@ if (nmbjumbop > 0) nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached"); + uma_zone_set_maxaction(zone_jumbop, mb_maxaction); zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, mb_ctor_clust, mb_dtor_clust, @@ -351,6 +364,7 @@ if (nmbjumbo9 > 0) nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached"); + uma_zone_set_maxaction(zone_jumbo9, mb_maxaction); zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, mb_ctor_clust, mb_dtor_clust, @@ -364,6 +378,7 @@ if (nmbjumbo16 > 0) nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached"); + uma_zone_set_maxaction(zone_jumbo16, mb_maxaction); zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int), NULL, NULL, @@ -372,6 +387,11 @@ /* uma_prealloc() goes here... */ + /* Initialize the mb_reclaim() callout. */ + mtx_init(&mb_reclaim_callout_mtx, "mb_reclaim_callout_mtx", NULL, + MTX_DEF); + callout_init(&mb_reclaim_callout, 1); + /* * Hook event handler for low-memory situation, used to * drain protocols and push data back to the caches (UMA @@ -678,3 +698,61 @@ if (pr->pr_drain != NULL) (*pr->pr_drain)(); } + +/* + * This is the function called by the mb_reclaim_callout, which is + * used when we hit the maximum for a zone. + * + * (See mb_maxaction() below.) + */ +static void +mb_reclaim_timer(void *junk __unused) +{ + + mtx_lock(&mb_reclaim_callout_mtx); + + /* + * Avoid running this function extra times by skipping this invocation + * if the callout has already been rescheduled. + */ + if (callout_pending(&mb_reclaim_callout) || + !callout_active(&mb_reclaim_callout)) { + mtx_unlock(&mb_reclaim_callout_mtx); + return; + } + mtx_unlock(&mb_reclaim_callout_mtx); + + mb_reclaim(NULL); + + mtx_lock(&mb_reclaim_callout_mtx); + callout_deactivate(&mb_reclaim_callout); + mtx_unlock(&mb_reclaim_callout_mtx); +} + +/* + * This function is called when we hit the maximum for a zone. + * + * At that point, we want to call the protocol drain routine to free up some + * mbufs. However, we will use the callout routines to schedule this to + * occur in another thread. (The thread calling this function holds the + * zone lock.) + */ +static void +mb_maxaction(uma_zone_t zone __unused) +{ + + /* + * If we can't immediately obtain the lock, either the callout + * is currently running, or another thread is scheduling the + * callout. + */ + if (!mtx_trylock(&mb_reclaim_callout_mtx)) + return; + + /* If not already scheduled/running, schedule the callout. */ + if (!callout_active(&mb_reclaim_callout)) { + callout_reset(&mb_reclaim_callout, 1, mb_reclaim_timer, NULL); + } + + mtx_unlock(&mb_reclaim_callout_mtx); +} Index: head/sys/vm/uma.h =================================================================== --- head/sys/vm/uma.h +++ head/sys/vm/uma.h @@ -521,6 +521,19 @@ void uma_zone_set_warning(uma_zone_t zone, const char *warning); /* + * Sets a function to run when limit is reached + * + * Arguments: + * zone The zone to which this applies + * fx The function ro run + * + * Returns: + * Nothing + */ +typedef void (*uma_maxaction_t)(uma_zone_t); +void uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t); + +/* * Obtains the approximate current number of items allocated from a zone * * Arguments: Index: head/sys/vm/uma_core.c =================================================================== --- head/sys/vm/uma_core.c +++ head/sys/vm/uma_core.c @@ -431,6 +431,13 @@ printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning); } +static inline void +zone_maxaction(uma_zone_t zone) +{ + if (zone->uz_maxaction) + (*zone->uz_maxaction)(zone); +} + static void zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t)) { @@ -1578,6 +1585,7 @@ zone->uz_flags = 0; zone->uz_warning = NULL; timevalclear(&zone->uz_ratecheck); + zone->uz_maxaction = NULL; keg = arg->keg; ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS)); @@ -2382,6 +2390,7 @@ if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) { zone->uz_flags |= UMA_ZFLAG_FULL; zone_log_warning(zone); + zone_maxaction(zone); } if (flags & M_NOWAIT) break; @@ -2501,6 +2510,7 @@ zone->uz_flags |= UMA_ZFLAG_FULL; zone->uz_sleeps++; zone_log_warning(zone); + zone_maxaction(zone); msleep(zone, zone->uz_lockptr, PVM, "zonelimit", hz/100); zone->uz_flags &= ~UMA_ZFLAG_FULL; @@ -3007,6 +3017,16 @@ } /* See uma.h */ +void +uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction) +{ + + ZONE_LOCK(zone); + zone->uz_maxaction = maxaction; + ZONE_UNLOCK(zone); +} + +/* See uma.h */ int uma_zone_get_cur(uma_zone_t zone) { Index: head/sys/vm/uma_int.h =================================================================== --- head/sys/vm/uma_int.h +++ head/sys/vm/uma_int.h @@ -303,10 +303,12 @@ uint16_t uz_count; /* Amount of items in full bucket */ uint16_t uz_count_min; /* Minimal amount of items there */ - /* The next three fields are used to print a rate-limited warnings. */ + /* The next two fields are used to print a rate-limited warnings. */ const char *uz_warning; /* Warning to print on failure */ struct timeval uz_ratecheck; /* Warnings rate-limiting */ + uma_maxaction_t uz_maxaction; /* Function to run when at limit */ + /* * This HAS to be the last item because we adjust the zone size * based on NCPU and then allocate the space for the zones.