Index: share/man/man9/zone.9 =================================================================== --- share/man/man9/zone.9 +++ share/man/man9/zone.9 @@ -292,18 +292,12 @@ .Fn uma_zalloc_pcpu function and its variants instead, and pass .Dv M_ZERO . -.It Dv UMA_ZONE_OFFPAGE -By default book-keeping of items within a slab is done in the slab page itself. -This flag explicitly tells subsystem that book-keeping structure should be -allocated separately from special internal zone. -This flag requires either -.Dv UMA_ZONE_VTOSLAB -or -.Dv UMA_ZONE_HASH , -since subsystem requires a mechanism to find a book-keeping structure -to an item being freed. -The subsystem may choose to prefer offpage book-keeping for certain zones -implicitly. +.It Dv UMA_ZONE_NOTOUCH +The subsystem may not directly touch (i.e. read or write) the slab memory. +Otherwise, by default, book-keeping of items within a slab may be done in the +slab page itself, and +.Dv INVARIANTS +kernels may also do use-after-free checking by accessing the slab memory. .It Dv UMA_ZONE_ZINIT The zone will have its .Ft uma_init @@ -317,13 +311,11 @@ .Dv UMA_ZONE_ZINIT flag would not return zeroed memory on every .Fn uma_zalloc . -.It Dv UMA_ZONE_HASH -The zone should use an internal hash table to find slab book-keeping -structure where an allocation being freed belongs to. -.It Dv UMA_ZONE_VTOSLAB -The zone should use special field of -.Vt vm_page_t -to find slab book-keeping structure where an allocation being freed belongs to. +.It Dv UMA_ZONE_NOTPAGE +An allocator function will be supplied with +.Fn uma_zone_set_allocf +and the memory that it returns may not be kernel virtual memory backed by VM +pages in the page array. .It Dv UMA_ZONE_MALLOC The zone is for the .Xr malloc 9 Index: sys/arm/arm/busdma_machdep-v6.c =================================================================== --- sys/arm/arm/busdma_machdep-v6.c +++ sys/arm/arm/busdma_machdep-v6.c @@ -244,7 +244,7 @@ * atomic ops on uma_slab_t fields and safety of this * operation is not guaranteed for write-back caches */ - uma_flags = UMA_ZONE_OFFPAGE; + uma_flags = UMA_ZONE_NOTOUCH; #endif /* * Create a cache of buffers in uncacheable memory, to implement the Index: sys/vm/uma.h =================================================================== --- sys/vm/uma.h +++ sys/vm/uma.h @@ -232,14 +232,10 @@ * Definitions for uma_zcreate flags * * These flags share space with UMA_ZFLAGs in uma_int.h. Be careful not to - * overlap when adding new features. 0xff000000 is in use by uma_int.h. + * overlap when adding new features. */ -#define UMA_ZONE_PAGEABLE 0x0001 /* Return items not fully backed by - physical memory XXX Not yet */ #define UMA_ZONE_ZINIT 0x0002 /* Initialize with zeros */ -#define UMA_ZONE_STATIC 0x0004 /* Statically sized zone */ -#define UMA_ZONE_OFFPAGE 0x0008 /* Force the slab structure allocation - off of the real memory */ +#define UMA_ZONE_NOTOUCH 0x0008 /* UMA may not access the memory */ #define UMA_ZONE_MALLOC 0x0010 /* For use by malloc(9) only! */ #define UMA_ZONE_NOFREE 0x0020 /* Do not free slabs of this type! */ #define UMA_ZONE_MTXCLASS 0x0040 /* Create a new lock class */ @@ -247,20 +243,17 @@ * Used for internal vm datastructures * only. */ -#define UMA_ZONE_HASH 0x0100 /* - * Use a hash table instead of caching - * information in the vm_page. - */ +#define UMA_ZONE_NOTPAGE 0x0100 /* allocf memory not vm pages */ #define UMA_ZONE_SECONDARY 0x0200 /* Zone is a Secondary Zone */ #define UMA_ZONE_NOBUCKET 0x0400 /* Do not use buckets. */ #define UMA_ZONE_MAXBUCKET 0x0800 /* Use largest buckets. */ -#define UMA_ZONE_CACHESPREAD 0x1000 /* +#define UMA_ZONE_MINBUCKET 0x1000 /* Use smallest buckets. */ +#define UMA_ZONE_CACHESPREAD 0x2000 /* * Spread memory start locations across * all possible cache lines. May * require many virtually contiguous * backend pages and can fail early. */ -#define UMA_ZONE_VTOSLAB 0x2000 /* Zone uses vtoslab for lookup. */ #define UMA_ZONE_NODUMP 0x4000 /* * Zone's pages will not be included in * mini-dumps. @@ -268,9 +261,9 @@ #define UMA_ZONE_PCPU 0x8000 /* * Allocates mp_maxid + 1 slabs of PAGE_SIZE */ -#define UMA_ZONE_MINBUCKET 0x10000 /* Use smallest buckets. */ -#define UMA_ZONE_FIRSTTOUCH 0x20000 /* First touch NUMA policy */ -#define UMA_ZONE_ROUNDROBIN 0x40000 /* Round-robin NUMA policy. */ +#define UMA_ZONE_FIRSTTOUCH 0x10000 /* First touch NUMA policy */ +#define UMA_ZONE_ROUNDROBIN 0x20000 /* Round-robin NUMA policy. */ +/* In use by UMA_ZFLAGs: 0xffe00000 */ /* * These flags are shared between the keg and zone. In zones wishing to add @@ -278,9 +271,9 @@ * physical parameters of the request and may not be provided by the consumer. */ #define UMA_ZONE_INHERIT \ - (UMA_ZONE_OFFPAGE | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE | \ - UMA_ZONE_HASH | UMA_ZONE_VTOSLAB | UMA_ZONE_PCPU | \ - UMA_ZONE_FIRSTTOUCH | UMA_ZONE_ROUNDROBIN) + (UMA_ZONE_NOTOUCH | UMA_ZONE_MALLOC | UMA_ZONE_NOFREE | \ + UMA_ZONE_NOTPAGE | UMA_ZONE_PCPU | UMA_ZONE_FIRSTTOUCH | \ + UMA_ZONE_ROUNDROBIN) /* Definitions for align */ #define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */ Index: sys/vm/uma_core.c =================================================================== --- sys/vm/uma_core.c +++ sys/vm/uma_core.c @@ -741,7 +741,7 @@ uma_keg_t keg; u_int slabs, pages; - if ((zone->uz_flags & UMA_ZONE_HASH) == 0) + if ((zone->uz_flags & UMA_ZFLAG_HASH) == 0) goto update_wss; keg = zone->uz_keg; @@ -1159,7 +1159,7 @@ #endif keg->uk_fini(slab_item(slab, keg, i), keg->uk_size); } - if (keg->uk_flags & UMA_ZONE_OFFPAGE) + if (keg->uk_flags & UMA_ZFLAG_OFFPAGE) zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE); keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags); uma_total_dec(PAGE_SIZE * keg->uk_ppera); @@ -1196,7 +1196,7 @@ /* We have nowhere to free these to. */ if (slab->us_flags & UMA_SLAB_BOOT) continue; - if (keg->uk_flags & UMA_ZONE_HASH) + if (keg->uk_flags & UMA_ZFLAG_HASH) UMA_HASH_REMOVE(&keg->uk_hash, slab); n++; LIST_REMOVE(slab, us_link); @@ -1292,7 +1292,7 @@ allocf = keg->uk_allocf; slab = NULL; mem = NULL; - if (keg->uk_flags & UMA_ZONE_OFFPAGE) { + if (keg->uk_flags & UMA_ZFLAG_OFFPAGE) { slab = zone_alloc_item(keg->uk_slabzone, NULL, domain, aflags); if (slab == NULL) goto fail; @@ -1317,23 +1317,23 @@ size = keg->uk_ppera * PAGE_SIZE; mem = allocf(zone, size, domain, &sflags, aflags); if (mem == NULL) { - if (keg->uk_flags & UMA_ZONE_OFFPAGE) + if (keg->uk_flags & UMA_ZFLAG_OFFPAGE) zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE); goto fail; } uma_total_inc(size); /* For HASH zones all pages go to the same uma_domain. */ - if ((keg->uk_flags & UMA_ZONE_HASH) != 0) + if ((keg->uk_flags & UMA_ZFLAG_HASH) != 0) domain = 0; /* Point the slab into the allocated memory */ - if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) + if (!(keg->uk_flags & UMA_ZFLAG_OFFPAGE)) slab = (uma_slab_t )(mem + keg->uk_pgoff); else ((uma_hash_slab_t)slab)->uhs_data = mem; - if (keg->uk_flags & UMA_ZONE_VTOSLAB) + if (keg->uk_flags & UMA_ZFLAG_VTOSLAB) for (i = 0; i < keg->uk_ppera; i++) vsetzoneslab((vm_offset_t)mem + (i * PAGE_SIZE), zone, slab); @@ -1362,7 +1362,7 @@ CTR3(KTR_UMA, "keg_alloc_slab: allocated slab %p for %s(%p)", slab, keg->uk_name, keg); - if (keg->uk_flags & UMA_ZONE_HASH) + if (keg->uk_flags & UMA_ZFLAG_HASH) UMA_HASH_INSERT(&keg->uk_hash, slab, mem); /* @@ -1733,9 +1733,10 @@ * squeeze one more item in for very particular sizes if we were * to loop and reduce the bitsize if there is waste. */ - if (keg->uk_flags & UMA_ZONE_OFFPAGE) + if (keg->uk_flags & (UMA_ZONE_NOTOUCH | UMA_ZONE_PCPU)) { + keg->uk_flags |= UMA_ZFLAG_OFFPAGE; shsize = 0; - else + } else shsize = slab_sizeof(slabsize / rsize); if (rsize <= slabsize - shsize) @@ -1761,8 +1762,12 @@ * of UMA_ZONE_VM, which clearly forbids it. */ if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) || - (keg->uk_flags & UMA_ZFLAG_CACHEONLY)) + (keg->uk_flags & UMA_ZFLAG_CACHEONLY)) { + KASSERT((keg->uk_flags & UMA_ZFLAG_OFFPAGE) == 0, + ("%s: incompatible flags 0x%b", __func__, keg->uk_flags, + PRINT_UMA_ZFLAGS)); return; + } /* * See if using an OFFPAGE slab will limit our waste. Only do @@ -1790,13 +1795,15 @@ * hash to find slabs. If the zone was explicitly created * OFFPAGE we can't necessarily touch the memory. */ - if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) - keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB; + keg->uk_flags |= UMA_ZFLAG_OFFPAGE; } - if ((keg->uk_flags & UMA_ZONE_OFFPAGE) && - (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0) - keg->uk_flags |= UMA_ZONE_HASH; + if ((keg->uk_flags & UMA_ZFLAG_OFFPAGE) != 0) { + if ((keg->uk_flags & UMA_ZONE_NOTPAGE) != 0) + keg->uk_flags |= UMA_ZFLAG_HASH; + else + keg->uk_flags |= UMA_ZFLAG_VTOSLAB; + } } /* @@ -1823,7 +1830,7 @@ keg->uk_rsize = keg->uk_size; /* Check whether we have enough space to not do OFFPAGE. */ - if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0 && + if ((keg->uk_flags & UMA_ZONE_NOTOUCH) == 0 && PAGE_SIZE * keg->uk_ppera - keg->uk_rsize < slab_sizeof(SLAB_MIN_SETSIZE)) { /* @@ -1832,14 +1839,17 @@ * slab header. */ if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) == 0) - keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB; + keg->uk_flags |= UMA_ZFLAG_OFFPAGE; else keg->uk_ppera++; } - if ((keg->uk_flags & UMA_ZONE_OFFPAGE) && - (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0) - keg->uk_flags |= UMA_ZONE_HASH; + if ((keg->uk_flags & UMA_ZFLAG_OFFPAGE) != 0) { + if ((keg->uk_flags & UMA_ZONE_NOTPAGE) != 0) + keg->uk_flags |= UMA_ZFLAG_HASH; + else + keg->uk_flags |= UMA_ZFLAG_VTOSLAB; + } } static void @@ -1871,7 +1881,7 @@ keg->uk_rsize = rsize; keg->uk_ppera = pages; keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize; - keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB; + keg->uk_flags |= UMA_ZFLAG_OFFPAGE | UMA_ZFLAG_VTOSLAB; KASSERT(keg->uk_ipers <= SLAB_MAX_SETSIZE, ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__, keg->uk_ipers)); @@ -1922,13 +1932,10 @@ keg->uk_init = zero_init; if (arg->flags & UMA_ZONE_MALLOC) - keg->uk_flags |= UMA_ZONE_VTOSLAB; + keg->uk_flags |= UMA_ZFLAG_VTOSLAB; - if (arg->flags & UMA_ZONE_PCPU) -#ifdef SMP - keg->uk_flags |= UMA_ZONE_OFFPAGE; -#else - keg->uk_flags &= ~UMA_ZONE_PCPU; +#ifndef SMP + keg->uk_flags &= ~UMA_ZONE_PCPU; #endif if (keg->uk_flags & UMA_ZONE_CACHESPREAD) { @@ -1949,13 +1956,13 @@ */ #ifdef NUMA if ((keg->uk_flags & - (UMA_ZONE_HASH | UMA_ZONE_VM | UMA_ZONE_ROUNDROBIN)) == 0) + (UMA_ZFLAG_HASH | UMA_ZONE_VM | UMA_ZONE_ROUNDROBIN)) == 0) keg->uk_flags |= UMA_ZONE_FIRSTTOUCH; else if ((keg->uk_flags & UMA_ZONE_FIRSTTOUCH) == 0) keg->uk_flags |= UMA_ZONE_ROUNDROBIN; #endif - if (keg->uk_flags & UMA_ZONE_OFFPAGE) + if (keg->uk_flags & UMA_ZFLAG_OFFPAGE) keg->uk_slabzone = slabzone; /* @@ -1993,7 +2000,7 @@ * figure out where in each page it goes. See slab_sizeof * definition. */ - if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) { + if (!(keg->uk_flags & UMA_ZFLAG_OFFPAGE)) { size_t shsize; shsize = slab_sizeof(keg->uk_ipers); @@ -2010,7 +2017,7 @@ zone->uz_name, keg->uk_ipers, keg->uk_rsize, keg->uk_size)); } - if (keg->uk_flags & UMA_ZONE_HASH) + if (keg->uk_flags & UMA_ZFLAG_HASH) hash_alloc(&keg->uk_hash, 0); CTR3(KTR_UMA, "keg_ctor %p zone %s(%p)\n", keg, zone->uz_name, zone); @@ -2084,7 +2091,7 @@ /* * keg if present. */ - if ((zone->uz_flags & UMA_ZONE_HASH) == 0) + if ((zone->uz_flags & UMA_ZFLAG_HASH) == 0) domains = vm_ndomains; else domains = 1; @@ -2718,11 +2725,9 @@ * or fini procedures, no dependency on the initial value of the * memory, and no (legitimate) use of the memory after free. Note, * the ctor and dtor do not need to be empty. - * - * XXX UMA_ZONE_OFFPAGE. */ - if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOFREE))) && - uminit == NULL && fini == NULL) { + if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOTOUCH | + UMA_ZONE_NOFREE))) && uminit == NULL && fini == NULL) { args.uminit = trash_init; args.fini = trash_fini; } @@ -3205,7 +3210,7 @@ uint32_t reserve; /* HASH has a single free list. */ - if ((keg->uk_flags & UMA_ZONE_HASH) != 0) + if ((keg->uk_flags & UMA_ZFLAG_HASH) != 0) domain = 0; KEG_LOCK(keg, domain); @@ -4007,15 +4012,15 @@ zone = arg; keg = zone->uz_keg; lock = NULL; - if (__predict_false((zone->uz_flags & UMA_ZONE_HASH) != 0)) + if (__predict_false((zone->uz_flags & UMA_ZFLAG_HASH) != 0)) lock = KEG_LOCK(keg, 0); for (i = 0; i < cnt; i++) { item = bucket[i]; - if (__predict_true((zone->uz_flags & UMA_ZONE_VTOSLAB) != 0)) { + if (__predict_true((zone->uz_flags & UMA_ZFLAG_VTOSLAB) != 0)) { slab = vtoslab((vm_offset_t)item); } else { mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK)); - if ((zone->uz_flags & UMA_ZONE_HASH) != 0) + if ((zone->uz_flags & UMA_ZFLAG_HASH) != 0) slab = hash_sfind(&keg->uk_hash, mem); else slab = (uma_slab_t)(mem + keg->uk_pgoff); @@ -4739,7 +4744,7 @@ int avail, effpct, total; total = keg->uk_ppera * PAGE_SIZE; - if ((keg->uk_flags & UMA_ZONE_OFFPAGE) != 0) + if ((keg->uk_flags & UMA_ZFLAG_OFFPAGE) != 0) total += slab_sizeof(SLAB_MAX_SETSIZE); /* * We consider the client's requested size and alignment here, not the @@ -4779,10 +4784,10 @@ mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK)); if ((zone->uz_flags & UMA_ZFLAG_CACHE) != 0) return (NULL); - if (zone->uz_flags & UMA_ZONE_VTOSLAB) + if (zone->uz_flags & UMA_ZFLAG_VTOSLAB) return (vtoslab((vm_offset_t)mem)); keg = zone->uz_keg; - if ((keg->uk_flags & UMA_ZONE_HASH) == 0) + if ((keg->uk_flags & UMA_ZFLAG_HASH) == 0) return ((uma_slab_t)(mem + keg->uk_pgoff)); KEG_LOCK(keg, 0); slab = hash_sfind(&keg->uk_hash, mem); Index: sys/vm/uma_int.h =================================================================== --- sys/vm/uma_int.h +++ sys/vm/uma_int.h @@ -139,6 +139,64 @@ /* Max waste percentage before going to off page slab management */ #define UMA_MAX_WASTE 10 +/* + * These flags must not overlap with the UMA_ZONE flags specified in uma.h. + */ +#define UMA_ZFLAG_OFFPAGE 0x00200000 /* + * Force the slab structure + * allocation off of the real + * memory. + */ +#define UMA_ZFLAG_HASH 0x00400000 /* + * Use a hash table instead of + * caching information in the + * vm_page. + */ +#define UMA_ZFLAG_VTOSLAB 0x00800000 /* + * Zone uses vtoslab for + * lookup. + */ +#define UMA_ZFLAG_CTORDTOR 0x01000000 /* Zone has ctor/dtor set. */ +#define UMA_ZFLAG_LIMIT 0x02000000 /* Zone has limit set. */ +#define UMA_ZFLAG_CACHE 0x04000000 /* uma_zcache_create()d it */ +#define UMA_ZFLAG_RECLAIMING 0x08000000 /* Running zone_reclaim(). */ +#define UMA_ZFLAG_BUCKET 0x10000000 /* Bucket zone. */ +#define UMA_ZFLAG_INTERNAL 0x20000000 /* No offpage no PCPU. */ +#define UMA_ZFLAG_TRASH 0x40000000 /* Add trash ctor/dtor. */ +#define UMA_ZFLAG_CACHEONLY 0x80000000 /* Don't ask VM for buckets. */ + +#define UMA_ZFLAG_INHERIT \ + (UMA_ZFLAG_OFFPAGE | UMA_ZFLAG_HASH | UMA_ZFLAG_VTOSLAB | \ + UMA_ZFLAG_BUCKET | UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY) + +#define PRINT_UMA_ZFLAGS "\20" \ + "\40CACHEONLY" \ + "\37TRASH" \ + "\36INTERNAL" \ + "\35BUCKET" \ + "\34RECLAIMING" \ + "\33CACHE" \ + "\32LIMIT" \ + "\31CTORDTOR" \ + "\30VTOSLAB" \ + "\27HASH" \ + "\26OFFPAGE" \ + "\22ROUNDROBIN" \ + "\21FIRSTTOUCH" \ + "\20PCPU" \ + "\17NODUMP" \ + "\16CACHESPREAD" \ + "\15MINBUCKET" \ + "\14MAXBUCKET" \ + "\13NOBUCKET" \ + "\12SECONDARY" \ + "\11NOTPAGE" \ + "\10VM" \ + "\7MTXCLASS" \ + "\6NOFREE" \ + "\5MALLOC" \ + "\4NOTOUCH" \ + "\2ZINIT" /* * Hash table for freed address -> slab translation. @@ -370,7 +428,7 @@ slab_data(uma_slab_t slab, uma_keg_t keg) { - if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) + if ((keg->uk_flags & UMA_ZFLAG_OFFPAGE) == 0) return ((void *)((uintptr_t)slab - keg->uk_pgoff)); else return (((uma_hash_slab_t)slab)->uhs_data); @@ -473,50 +531,6 @@ /* uz_domain follows here. */ }; -/* - * These flags must not overlap with the UMA_ZONE flags specified in uma.h. - */ -#define UMA_ZFLAG_CTORDTOR 0x01000000 /* Zone has ctor/dtor set. */ -#define UMA_ZFLAG_LIMIT 0x02000000 /* Zone has limit set. */ -#define UMA_ZFLAG_CACHE 0x04000000 /* uma_zcache_create()d it */ -#define UMA_ZFLAG_RECLAIMING 0x08000000 /* Running zone_reclaim(). */ -#define UMA_ZFLAG_BUCKET 0x10000000 /* Bucket zone. */ -#define UMA_ZFLAG_INTERNAL 0x20000000 /* No offpage no PCPU. */ -#define UMA_ZFLAG_TRASH 0x40000000 /* Add trash ctor/dtor. */ -#define UMA_ZFLAG_CACHEONLY 0x80000000 /* Don't ask VM for buckets. */ - -#define UMA_ZFLAG_INHERIT \ - (UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY | UMA_ZFLAG_BUCKET) - -#define PRINT_UMA_ZFLAGS "\20" \ - "\40CACHEONLY" \ - "\37TRASH" \ - "\36INTERNAL" \ - "\35BUCKET" \ - "\34RECLAIMING" \ - "\33CACHE" \ - "\32LIMIT" \ - "\31CTORDTOR" \ - "\23ROUNDROBIN" \ - "\22FIRSTTOUCH" \ - "\21MINBUCKET" \ - "\20PCPU" \ - "\17NODUMP" \ - "\16VTOSLAB" \ - "\15CACHESPREAD" \ - "\14MAXBUCKET" \ - "\13NOBUCKET" \ - "\12SECONDARY" \ - "\11HASH" \ - "\10VM" \ - "\7MTXCLASS" \ - "\6NOFREE" \ - "\5MALLOC" \ - "\4OFFPAGE" \ - "\3STATIC" \ - "\2ZINIT" \ - "\1PAGEABLE" - /* * Macros for interpreting the uz_items field. 20 bits of sleeper count * and 44 bit of item count.