Index: sys/vm/uma_core.c =================================================================== --- sys/vm/uma_core.c +++ sys/vm/uma_core.c @@ -1018,6 +1018,12 @@ } if (keg->uk_flags & UMA_ZONE_OFFPAGE) zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE); + else if (keg->uk_flags & UMA_ZFLAG_SLABVMPAGE) { + vm_page_t m; + m = (vm_page_t)slab_topageslab(slab); + bzero(m, offsetof(struct vm_page, p_opaque_end)); + m->flags &= ~PG_OPAQUE; + } keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags); uma_total_dec(PAGE_SIZE * keg->uk_ppera); } @@ -1142,7 +1148,9 @@ int aflags) { uma_alloc allocf; + uma_page_slab_t ups; uma_slab_t slab; + vm_page_t m; unsigned long size; uint8_t *mem; uint8_t sflags; @@ -1158,6 +1166,7 @@ slab = NULL; mem = NULL; + ups = NULL; if (keg->uk_flags & UMA_ZONE_OFFPAGE) { slab = zone_alloc_item(keg->uk_slabzone, NULL, domain, aflags); if (slab == NULL) @@ -1191,15 +1200,30 @@ uma_total_inc(size); /* Point the slab into the allocated memory */ - if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) - slab = (uma_slab_t )(mem + keg->uk_pgoff); - else + if (keg->uk_flags & UMA_ZONE_OFFPAGE) { ((uma_hash_slab_t)slab)->uhs_data = mem; + } else if (keg->uk_flags & UMA_ZFLAG_SLABVMPAGE) { + m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)mem)); + KASSERT(m->object == NULL, + ("%s: cannot embed slab, page %p has object %p", __func__, + m, m->object)); + m->flags |= PG_OPAQUE; + ups = (uma_page_slab_t)m; + ups->ups_zone = zone; + ups->ups_data = mem; + slab = &ups->ups_slab; + } else { + slab = (uma_slab_t )(mem + keg->uk_pgoff); + } - if (keg->uk_flags & UMA_ZONE_VTOSLAB) - for (i = 0; i < keg->uk_ppera; i++) + if (keg->uk_flags & UMA_ZONE_VTOSLAB) { + i = 0; + if (ups != NULL) + i++; + for (; i < keg->uk_ppera; i++) vsetzoneslab((vm_offset_t)mem + (i * PAGE_SIZE), zone, slab); + } slab->us_freecount = keg->uk_ipers; slab->us_flags = sflags; @@ -1538,6 +1562,12 @@ return (slab_space(nitems) / rsize); } +#define PAGESLAB_MAX_ITEMS \ + (MIN(SLAB_MAX_SETSIZE, MAX(0, \ + offsetof(struct vm_page, p_opaque_end) - \ + offsetof(struct uma_page_slab, ups_slab.us_free)) / \ + BITSET_SIZE(1) / SLAB_BITSETS * BITSET_SIZE(1) * NBBY)) + /* * Finish creating a small uma keg. This calculates ipers, and the keg size. * @@ -1550,11 +1580,13 @@ static void keg_small_init(uma_keg_t keg) { - u_int rsize; + u_int format; + u_int ipers; u_int memused; - u_int wastedspace; + u_int rsize; u_int shsize; u_int slabsize; + u_int wastedspace; if (keg->uk_flags & UMA_ZONE_PCPU) { u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU; @@ -1582,30 +1614,56 @@ keg->uk_rsize < UMA_PCPU_ALLOC_SIZE, ("%s: size %u too large", __func__, keg->uk_rsize)); + format = 0; + ipers = 0; + shsize = 0; + /* - * Use a pessimistic bit count for shsize. It may be possible to - * squeeze one more item in for very particular sizes if we were - * to loop and reduce the bitsize if there is waste. + * If the zone was explicitly created OFFPAGE we can't + * necessarily touch the memory or the page structure. */ - if (keg->uk_flags & UMA_ZONE_OFFPAGE) - shsize = 0; - else - shsize = slab_sizeof(slabsize / rsize); - - if (rsize <= slabsize - shsize) - keg->uk_ipers = (slabsize - shsize) / rsize; - else { - /* Handle special case when we have 1 item per slab, so - * alignment requirement can be relaxed. */ - KASSERT(keg->uk_size <= slabsize - shsize, - ("%s: size %u greater than slab", __func__, keg->uk_size)); - keg->uk_ipers = 1; + if (keg->uk_flags & UMA_ZONE_OFFPAGE) { + KASSERT((keg->uk_flags & + (UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY)) == 0, + ("%s: incompatible flags %#x", __func__, keg->uk_flags)); + if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0) + format = UMA_ZONE_HASH; + ipers = slabsize / rsize; + goto out; } - KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_MAX_SETSIZE, - ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers)); - memused = keg->uk_ipers * rsize + shsize; - wastedspace = slabsize - memused; + /* + * Evaluate an inline slab layout. Start with the maximum + * possible item count and remove items until the slab header + * fits alongside the allocatable memory. + */ + for (ipers = (slabsize - slab_sizeof(1)) / rsize; + ipers > 0 && ipers * rsize + slab_sizeof(ipers) > slabsize; + ipers--) + continue; + /* + * Handle the special case where the alignment requirement can + * be relaxed when there is only 1 item per slab. + */ + if (ipers == 0 && keg->uk_size + slab_sizeof(1) <= slabsize) + ipers = 1; + if (ipers > 0) + shsize = slab_sizeof(ipers); + +#ifdef UMA_MD_SMALL_ALLOC + /* + * Would a slab embedded in the vm_page allow more items? We can + * only embed the slab in the vm_page when we have complete control + * over the page. In particular, it can't belong to an object. + * This effectively means it needs to come from uma_small_alloc. + */ + if (MIN(PAGESLAB_MAX_ITEMS, slabsize / rsize) > ipers && + __predict_true(booted >= BOOT_PAGEALLOC)) { + format = UMA_ZFLAG_SLABVMPAGE | UMA_ZONE_VTOSLAB; + ipers = MIN(PAGESLAB_MAX_ITEMS, slabsize / rsize); + shsize = 0; + } +#endif /* * We can't do OFFPAGE if we're internal or if we've been @@ -1616,7 +1674,7 @@ */ if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) || (keg->uk_flags & UMA_ZFLAG_CACHEONLY)) - return; + goto out; /* * See if using an OFFPAGE slab will limit our waste. Only do @@ -1626,31 +1684,27 @@ * Historically this was not done because the VM could not * efficiently handle contiguous allocations. */ + memused = ipers * rsize + shsize; + wastedspace = slabsize - memused; if ((wastedspace >= slabsize / UMA_MAX_WASTE) && - (keg->uk_ipers < (slabsize / keg->uk_rsize))) { - keg->uk_ipers = slabsize / keg->uk_rsize; - KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_MAX_SETSIZE, - ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers)); + (ipers < (slabsize / rsize))) { + format = UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB; + ipers = slabsize / rsize; CTR6(KTR_UMA, "UMA decided we need offpage slab headers for " "keg: %s(%p), calculated wastedspace = %d, " "maximum wasted space allowed = %d, " "calculated ipers = %d, " "new wasted space = %d\n", keg->uk_name, keg, wastedspace, - slabsize / UMA_MAX_WASTE, keg->uk_ipers, - slabsize - keg->uk_ipers * keg->uk_rsize); - /* - * If we had access to memory to embed a slab header we - * also have a page structure to use vtoslab() instead of - * hash to find slabs. If the zone was explicitly created - * OFFPAGE we can't necessarily touch the memory. - */ - if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) - keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB; + slabsize / UMA_MAX_WASTE, ipers, slabsize - ipers * rsize); } - if ((keg->uk_flags & UMA_ZONE_OFFPAGE) && - (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0) - keg->uk_flags |= UMA_ZONE_HASH; +out: + keg->uk_flags |= format; + keg->uk_ipers = ipers; + CTR5(KTR_UMA, "%s: %s, rsize=%u, ipers=%u, flags=%#x\n", + __func__, keg->uk_name, rsize, ipers, keg->uk_flags); + KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_MAX_SETSIZE, + ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers)); } /* @@ -1781,7 +1835,7 @@ if (arg->flags & UMA_ZONE_PCPU) #ifdef SMP - keg->uk_flags |= UMA_ZONE_OFFPAGE; + keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB; #else keg->uk_flags &= ~UMA_ZONE_PCPU; #endif Index: sys/vm/uma_int.h =================================================================== --- sys/vm/uma_int.h +++ sys/vm/uma_int.h @@ -210,6 +210,48 @@ typedef struct uma_domain * uma_domain_t; +/* + * These flags must not overlap with the UMA_ZONE flags specified in uma.h. + */ +#define UMA_ZFLAG_SLABVMPAGE 0x02000000 /* XXX */ +#define UMA_ZFLAG_CACHE 0x04000000 /* uma_zcache_create()d it */ +#define UMA_ZFLAG_RECLAIMING 0x08000000 /* Running zone_reclaim(). */ +#define UMA_ZFLAG_BUCKET 0x10000000 /* Bucket zone. */ +#define UMA_ZFLAG_INTERNAL 0x20000000 /* No offpage no PCPU. */ +#define UMA_ZFLAG_TRASH 0x40000000 /* Add trash ctor/dtor. */ +#define UMA_ZFLAG_CACHEONLY 0x80000000 /* Don't ask VM for buckets. */ + +#define UMA_ZFLAG_INHERIT \ + (UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY | UMA_ZFLAG_BUCKET | \ + UMA_ZFLAG_SLABVMPAGE) + +#define PRINT_UMA_ZFLAGS "\20" \ + "\40CACHEONLY" \ + "\37TRASH" \ + "\36INTERNAL" \ + "\35BUCKET" \ + "\34RECLAIMING" \ + "\33CACHE" \ + "\32SLABVMPAGE" \ + "\22MINBUCKET" \ + "\21NUMA" \ + "\20PCPU" \ + "\17NODUMP" \ + "\16VTOSLAB" \ + "\15CACHESPREAD" \ + "\14MAXBUCKET" \ + "\13NOBUCKET" \ + "\12SECONDARY" \ + "\11HASH" \ + "\10VM" \ + "\7MTXCLASS" \ + "\6NOFREE" \ + "\5MALLOC" \ + "\4OFFPAGE" \ + "\3STATIC" \ + "\2ZINIT" \ + "\1PAGEABLE" + /* * Keg management structure * @@ -254,6 +296,20 @@ }; typedef struct uma_keg * uma_keg_t; +/* + * XXX enhance comment, mention flags. + * + * The slab has several possible layouts. A layout is selected with the + * goal of minimizing internal fragmentation. The slab layout is also + * restricted when UMA is not allowed to access the actual backing memory. + * The possible layouts are: + * - on-page: The slab structure is embedded in the memory backing the + * client allocation, at the end. + * - off-page: The slab structure is allocated from the slabzone. + * - vm_page-embedded: The slab structure is embedded in the vm_page + * itself. + */ + /* * Free bits per-slab. */ @@ -311,11 +367,28 @@ typedef struct uma_hash_slab * uma_hash_slab_t; +struct uma_page_slab { + uma_zone_t ups_zone; + uint8_t *ups_data; /* First item */ + struct uma_slab ups_slab; /* Must be last. */ +}; + +typedef struct uma_page_slab * uma_page_slab_t; + +static inline uma_page_slab_t +slab_topageslab(uma_slab_t slab) +{ + + return (__containerof(slab, struct uma_page_slab, ups_slab)); +} + static inline void * slab_data(uma_slab_t slab, uma_keg_t keg) { - if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) + if (keg->uk_flags & UMA_ZFLAG_SLABVMPAGE) + return slab_topageslab(slab)->ups_data; + else if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) return ((void *)((uintptr_t)slab - keg->uk_pgoff)); else return (((uma_hash_slab_t)slab)->uhs_data); @@ -418,45 +491,6 @@ /* uz_domain follows here. */ }; -/* - * These flags must not overlap with the UMA_ZONE flags specified in uma.h. - */ -#define UMA_ZFLAG_CACHE 0x04000000 /* uma_zcache_create()d it */ -#define UMA_ZFLAG_RECLAIMING 0x08000000 /* Running zone_reclaim(). */ -#define UMA_ZFLAG_BUCKET 0x10000000 /* Bucket zone. */ -#define UMA_ZFLAG_INTERNAL 0x20000000 /* No offpage no PCPU. */ -#define UMA_ZFLAG_TRASH 0x40000000 /* Add trash ctor/dtor. */ -#define UMA_ZFLAG_CACHEONLY 0x80000000 /* Don't ask VM for buckets. */ - -#define UMA_ZFLAG_INHERIT \ - (UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY | UMA_ZFLAG_BUCKET) - -#define PRINT_UMA_ZFLAGS "\20" \ - "\40CACHEONLY" \ - "\37TRASH" \ - "\36INTERNAL" \ - "\35BUCKET" \ - "\34RECLAIMING" \ - "\33CACHE" \ - "\22MINBUCKET" \ - "\21NUMA" \ - "\20PCPU" \ - "\17NODUMP" \ - "\16VTOSLAB" \ - "\15CACHESPREAD" \ - "\14MAXBUCKET" \ - "\13NOBUCKET" \ - "\12SECONDARY" \ - "\11HASH" \ - "\10VM" \ - "\7MTXCLASS" \ - "\6NOFREE" \ - "\5MALLOC" \ - "\4OFFPAGE" \ - "\3STATIC" \ - "\2ZINIT" \ - "\1PAGEABLE" - #undef UMA_ALIGN #ifdef _KERNEL @@ -534,7 +568,10 @@ vm_page_t p; p = PHYS_TO_VM_PAGE(pmap_kextract(va)); - return (p->plinks.uma.slab); + if ((p->flags & PG_OPAQUE) != 0) + return (&((uma_page_slab_t)p)->ups_slab); + else + return (p->plinks.uma.slab); } static __inline void @@ -543,8 +580,13 @@ vm_page_t p; p = PHYS_TO_VM_PAGE(pmap_kextract(va)); - *slab = p->plinks.uma.slab; - *zone = p->plinks.uma.zone; + if ((p->flags & PG_OPAQUE) != 0) { + *zone = ((uma_page_slab_t)p)->ups_zone; + *slab = &((uma_page_slab_t)p)->ups_slab; + } else { + *zone = p->plinks.uma.zone; + *slab = p->plinks.uma.slab; + } } static __inline void @@ -553,6 +595,7 @@ vm_page_t p; p = PHYS_TO_VM_PAGE(pmap_kextract(va)); + KASSERT((p->flags & PG_OPAQUE) == 0, ("Clobbering slab page %p", p)); p->plinks.uma.slab = slab; p->plinks.uma.zone = zone; } Index: sys/vm/vm_page.h =================================================================== --- sys/vm/vm_page.h +++ sys/vm/vm_page.h @@ -226,13 +226,14 @@ u_long v; } memguard; struct { - void *slab; void *zone; + void *slab; } uma; } plinks; TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ vm_object_t object; /* which object am I in (O) */ vm_pindex_t pindex; /* offset into object (O,P) */ + char p_opaque_end[0]; /* end of private union space */ vm_paddr_t phys_addr; /* physical address of page (C) */ struct md_page md; /* machine dependent stuff */ u_int ref_count; /* page references (A) */ @@ -448,6 +449,7 @@ #define PG_ZERO 0x04 /* page is zeroed */ #define PG_MARKER 0x08 /* special queue marker page */ #define PG_NODUMP 0x10 /* don't include this page in a dump */ +#define PG_OPAQUE 0x20 /* private union up to p_opaque_end */ /* * Misc constants. Index: sys/vm/vm_page.c =================================================================== --- sys/vm/vm_page.c +++ sys/vm/vm_page.c @@ -2482,7 +2482,7 @@ vm_page_change_lock(m, &m_mtx); m_inc = 1; retry: - if (vm_page_wired(m)) + if ((m->flags & PG_OPAQUE) != 0 || vm_page_wired(m)) run_ext = 0; #if VM_NRESERVLEVEL > 0 else if ((level = vm_reserv_level(m)) >= 0 &&