Index: sys/sys/bitset.h =================================================================== --- sys/sys/bitset.h +++ sys/sys/bitset.h @@ -213,6 +213,44 @@ __bit; \ }) +#define BIT_FFS_AT(_s, n, p) __extension__ ({ \ + __size_t __i; \ + int __bit; \ + \ + __bit = 0; \ + __i = __bitset_words((n)); \ + if (__i < __bitset_words((_s)) && \ + ((p)->__bits[__i] & -__bitset_mask((_s), (n))) != 0) { \ + __bit = ffsl((p)->__bits[__i] & -__bitset_mask((_s), (n)));\ + __bit += __i * _BITSET_BITS; \ + } else { \ + while (++__i < __bitset_words((_s))) { \ + if ((p)->__bits[__i] != 0) { \ + __bit = ffsl((p)->__bits[__i]); \ + __bit += __i * _BITSET_BITS; \ + break; \ + } \ + } \ + } \ + __bit; \ +}) + +#define BIT_FFS_AND(_s, p1, p2) __extension__ ({ \ + __size_t __i; \ + int __bit; \ + \ + __bit = 0; \ + for (__i = 0; __i < __bitset_words((_s)); __i++) { \ + if (((p1)->__bits[__i] & (p2)->__bits[__i]) != 0) { \ + __bit = ffsl((p1)->__bits[__i] & \ + (p2)->__bits[__i]); \ + __bit += __i * _BITSET_BITS; \ + break; \ + } \ + } \ + __bit; \ +}) + #define BIT_FLS(_s, p) __extension__ ({ \ __size_t __i; \ int __bit; \ Index: sys/vm/vm_phys.h =================================================================== --- sys/vm/vm_phys.h +++ sys/vm/vm_phys.h @@ -39,6 +39,7 @@ #define _VM_PHYS_H_ #ifdef _KERNEL +#include /* Domains must be dense (non-sparse) and zero-based. */ struct mem_affinity { @@ -52,12 +53,20 @@ int lcnt; }; +#define VM_NFREESIZES (1 + (VM_NFREEORDER * (VM_NFREEORDER + 1) / 2)) +BITSET_DEFINE(_freelist_set, VM_NFREESIZES); +typedef struct _freelist_set freelist_set_t; +struct vm_sized_freelists { + freelist_set_t avail_sizes; + struct vm_freelist freelist[VM_NFREESIZES]; +}; + struct vm_phys_seg { vm_paddr_t start; vm_paddr_t end; vm_page_t first_page; int domain; - struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER]; + struct vm_sized_freelists (*free_queues)[VM_NFREEPOOL]; }; extern struct mem_affinity *mem_affinity; @@ -86,7 +95,6 @@ vm_page_t vm_phys_paddr_to_vm_page(vm_paddr_t pa); vm_page_t vm_phys_scan_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, int options); -void vm_phys_set_pool(int pool, vm_page_t m, int order); boolean_t vm_phys_unfree_page(vm_page_t m); int vm_phys_mem_affinity(int f, int t); Index: sys/vm/vm_phys.c =================================================================== --- sys/vm/vm_phys.c +++ sys/vm/vm_phys.c @@ -42,6 +42,7 @@ #include "opt_ddb.h" #include "opt_vm.h" +#include #include #include #include @@ -102,8 +103,70 @@ static struct rwlock vm_phys_fictitious_reg_lock; MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); -static struct vm_freelist - vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; +static struct vm_sized_freelists + vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL]; + +static int triroot[VM_NFREESIZES]; +freelist_set_t best_to_split[VM_NFREESIZES]; + +/* + * For ints lo and hi, lo <= hi, a pair [lo,hi] represents a contiguous sequence + * of aligned memory blocks of power-of-two sizes from 2**lo to 2**hi. Such a + * sequence is called a superblock. pair_to_size computes the total size of a + * superblock. + */ +static inline int +pair_to_size(int lo, int hi) +{ + + return ((2 << hi) - (1 << lo)); +} + +/* + * pair_to_ndx computes a mapping of pairs to array indices such that the + * ordering of two indices matches the ordering of the two sizes. + */ +static inline int +pair_to_ndx(int lo, int hi) +{ + return (hi * (hi + 3) / 2 - lo); +} + +/* + * The inverse of the mapping in pair_to_ndx, using bit_count with a special bit + * string to compute the hi value. + */ +static inline void +ndx_to_pair(int ndx, int *lo, int *hi) +{ + *hi = triroot[ndx]; + *lo = *hi * (*hi + 3) / 2 - ndx; +} + +/* + * pair_decr updates a [lo,hi] pair to the next smaller pair. + */ +static inline void +pair_decr(int *lo, int *hi) +{ + if (*hi == *lo) { + (*hi)--; + *lo = 0; + } else + (*lo)++; +} + +/* + * pair_incr updates a [lo,hi] pair to the next larger pair. + */ +static inline void +pair_incr(int *lo, int *hi) +{ + if (*lo == 0) + *lo = ++(*hi); + else + (*lo)--; +} static int vm_nfreelists; @@ -165,15 +228,16 @@ #endif static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool, - int order); + int lo, int hi); static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary); static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain); static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end); static int vm_phys_paddr_to_segind(vm_paddr_t pa); -static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, - int order); +static vm_page_t vm_phys_split_pages(vm_page_t m, int m_lo, int m_hi, + struct vm_sized_freelists *fl, int lo, int hi); +static void vm_phys_set_pool(int pool, vm_page_t m); static int sysctl_vm_default_policy(SYSCTL_HANDLER_ARGS) @@ -359,8 +423,8 @@ sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) { struct sbuf sbuf; - struct vm_freelist *fl; - int dom, error, flind, oind, pind; + struct vm_sized_freelists *fl; + int dom, error, flind, hi, lo, oind, pind; error = sysctl_wire_old_buffer(req, 0); if (error != 0) @@ -378,15 +442,17 @@ for (pind = 0; pind < VM_NFREEPOOL; pind++) sbuf_printf(&sbuf, "-- -- "); sbuf_printf(&sbuf, "--\n"); - for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { + lo = hi = VM_NFREEORDER - 1; + for (oind = VM_NFREESIZES - 1; oind >= 0; oind--) { sbuf_printf(&sbuf, " %2d (%6dK)", oind, - 1 << (PAGE_SHIFT - 10 + oind)); + pair_to_size(lo, hi) << (PAGE_SHIFT - 10)); for (pind = 0; pind < VM_NFREEPOOL; pind++) { - fl = vm_phys_free_queues[dom][flind][pind]; + fl = &vm_phys_free_queues[dom][flind][pind]; sbuf_printf(&sbuf, " | %6d", - fl[oind].lcnt); + fl->freelist[oind].lcnt); } sbuf_printf(&sbuf, "\n"); + pair_decr(&lo, &hi); } } } @@ -472,24 +538,75 @@ } #endif +/* + * Add a superblock to its freelist, and set order in the first and last blocks, + * to hi and lo, respectively. Set a bit to mark the list as nonempty, if it + * was empty before. + */ static void -vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) +vm_freelist_add(struct vm_sized_freelists *fl, vm_page_t m, int lo, int hi, + int tail) { + int ndx; - m->order = order; + ndx = pair_to_ndx(lo, hi); if (tail) - TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q); + TAILQ_INSERT_TAIL(&fl->freelist[ndx].pl, m, plinks.q); else - TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q); - fl[order].lcnt++; + TAILQ_INSERT_HEAD(&fl->freelist[ndx].pl, m, plinks.q); + if (fl->freelist[ndx].lcnt++ == 0) + BIT_SET(VM_NFREESIZES, ndx, &fl->avail_sizes); + m->order = hi; + m[pair_to_size(lo, hi)-1].order = lo; } -static void -vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) +/* + * Call vm_freelist_add, and return the end-of-superblock address. + */ +static inline vm_page_t +vm_freelist_adv(struct vm_sized_freelists *fl, vm_page_t m, int lo, + int hi) +{ + vm_freelist_add(fl, m, lo, hi, 0); + return (m + pair_to_size(lo, hi)); +} + +/* + * If the pair [lo, hi] defines a valid superblock, call vm_freelist_adv. + */ +static inline vm_page_t +vm_freelist_chkadv(struct vm_sized_freelists *fl, vm_page_t m, int lo, + int hi) { + return (lo <= hi ? vm_freelist_adv(fl, m, lo, hi) : m); +} - TAILQ_REMOVE(&fl[order].pl, m, plinks.q); - fl[order].lcnt--; +/* + * For a superblock starting at m, and its largest block size, return its + * smallest block size. + */ +static int +vm_freelist_getlo(vm_page_t m, int hi) +{ + return (ffsl((VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) | (1 << hi)) - 1); +} + +/* + * remove a superblock from its freelist, clear order in the first and last + * blocks, and clear a bit if the list has become empty. + */ +static void +vm_freelist_rem(struct vm_sized_freelists *fl, vm_page_t m) +{ + int m_hi, m_lo, ndx; + + m_hi = m->order; + m_lo = vm_freelist_getlo(m, m_hi); + ndx = pair_to_ndx(m_lo, m_hi); + TAILQ_REMOVE(&fl->freelist[ndx].pl, m, plinks.q); + if (--fl->freelist[ndx].lcnt == 0) + BIT_CLR(VM_NFREESIZES, ndx, &fl->avail_sizes); + m[pair_to_size(m_lo, m_hi)-1].order = VM_NFREEORDER; m->order = VM_NFREEORDER; } @@ -595,10 +712,10 @@ void vm_phys_init(void) { - struct vm_freelist *fl; + struct vm_sized_freelists *fl; struct vm_phys_seg *seg; u_long npages; - int dom, flind, freelist, oind, pind, segind; + int dom, flind, freelist, hi, lo, oind, pind, segind; /* * Compute the number of free lists, and generate the mapping from the @@ -695,14 +812,44 @@ } /* + * If an allocation request cannot be satisfied by a free superblock + * that matches the requested pair, the next best choice is to pick a + * free superblock that can provide the requested pair without making + * the one free superblock into 2 or 3 free superblocks. + * best_to_split[ndx] is a bitmask that identifies the indices of pairs + * that can be split to produce a free superblock matching the requested + * pair without extra fragmentation. + * + * triroot[ndx] stores the 'hi' value associated with an ndx. + */ + lo = hi = VM_NFREEORDER - 1; + for (oind = VM_NFREESIZES - 1; oind >= 0; oind--) { + triroot[oind] = hi; + /* Identify matches at the end of a superblock. */ + for (dom = lo; lo >= 0; lo--) + BIT_SET(VM_NFREESIZES, pair_to_ndx(dom, hi), &best_to_split[oind]); + /* Identify matches at the start of a superblock. */ + for (dom = hi + 1; dom <= VM_NFREEORDER; dom++) + BIT_SET(VM_NFREESIZES, pair_to_ndx(lo, dom), &best_to_split[oind]); + /* Identify matches that require splitting a lone block. */ + BIT_SET(VM_NFREESIZES, pair_to_ndx(hi+1, hi+1), &best_to_split[oind]); + if (lo == hi) { + for (dom = hi + 2; dom <= VM_NFREEORDER; dom++) + BIT_SET(VM_NFREESIZES, pair_to_ndx(dom, dom), &best_to_split[oind]); + } + pair_decr(&lo, &hi); + } + + + /* * Initialize the free queues. */ for (dom = 0; dom < vm_ndomains; dom++) { for (flind = 0; flind < vm_nfreelists; flind++) { for (pind = 0; pind < VM_NFREEPOOL; pind++) { - fl = vm_phys_free_queues[dom][flind][pind]; - for (oind = 0; oind < VM_NFREEORDER; oind++) - TAILQ_INIT(&fl[oind].pl); + fl = &vm_phys_free_queues[dom][flind][pind]; + for (oind = 0; oind < VM_NFREESIZES; oind++) + TAILQ_INIT(&fl->freelist[oind].pl); } } } @@ -713,19 +860,57 @@ /* * Split a contiguous, power of two-sized set of physical pages. */ -static __inline void -vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) +static __inline vm_page_t +vm_phys_split_pages(vm_page_t m, int m_lo, int m_hi, + struct vm_sized_freelists *fl, int lo, int hi) { - vm_page_t m_buddy; + vm_page_t m_res; - while (oind > order) { - oind--; - m_buddy = &m[1 << oind]; - KASSERT(m_buddy->order == VM_NFREEORDER, - ("vm_phys_split_pages: page %p has unexpected order %d", - m_buddy, m_buddy->order)); - vm_freelist_add(fl, m_buddy, oind, 0); - } + if (m_lo <= lo) { + /* Add to freelist the small superblock of blocks too small to + * satisfy the request, if any. + */ + m = vm_freelist_chkadv(fl, m, m_lo, lo - 1); + + /* The return superblock is found within the input superblock, + * without splitting a block. + */ + m_res = m; + m += pair_to_size(lo, hi); + m_lo = hi + 1; + } else if (lo == hi) { + /* The return block can be provided by splitting the first + * block, and adding the remainder to a free list. + */ + m_res = m; + m += pair_to_size(lo, lo); + m = vm_freelist_adv(fl, m, lo, m_lo - 1); + m_lo++; + } else if (m_lo <= hi) { + /* Leading blocks of order <= hi are added to the + * freelist. + */ + m = vm_freelist_adv(fl, m, m_lo, hi); + + /* Split the block of order hi + 1; save one piece for returning, + * and add the other to a free list. + */ + m_res = m = vm_freelist_adv(fl, m, lo, lo); + m += pair_to_size(lo, hi); + m_lo = hi + 2; + } else { + /* Split the block of order m_lo; save one piece for returning, + * and add the others to free lists. + */ + m_res = m = vm_freelist_adv(fl, m, lo, lo); + m += pair_to_size(lo, hi); + m = vm_freelist_chkadv(fl, m, hi + 1, m_lo - 1); + m_lo++; + } + /* Add any leftover big pieces to a free list */ + vm_freelist_chkadv(fl, m, m_lo, m_hi); + + return (m_res); } /* @@ -776,7 +961,7 @@ while ((vm_domain_iterator_run(&vi, &domain)) == 0) { for (flind = 0; flind < vm_nfreelists; flind++) { m = vm_phys_alloc_domain_pages(domain, flind, pool, - order); + order, order); if (m != NULL) return (m); } @@ -812,7 +997,7 @@ while ((vm_domain_iterator_run(&vi, &domain)) == 0) { m = vm_phys_alloc_domain_pages(domain, - vm_freelist_to_flind[freelist], pool, order); + vm_freelist_to_flind[freelist], pool, order, order); if (m != NULL) return (m); } @@ -822,41 +1007,50 @@ } static vm_page_t -vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order) +vm_phys_alloc_domain_pages(int domain, int flind, int pool, int lo, int hi) { - struct vm_freelist *fl; - struct vm_freelist *alt; - int oind, pind; + struct vm_sized_freelists *fl; + struct vm_sized_freelists *alt; + int m_hi, m_lo, oind, ndx, pind; vm_page_t m; mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - fl = &vm_phys_free_queues[domain][flind][pool][0]; - for (oind = order; oind < VM_NFREEORDER; oind++) { - m = TAILQ_FIRST(&fl[oind].pl); - if (m != NULL) { - vm_freelist_rem(fl, m, oind); - vm_phys_split_pages(m, oind, fl, order); - return (m); - } + fl = &vm_phys_free_queues[domain][flind][pool]; + ndx = pair_to_ndx(lo, hi); + /* Seek an free superblock to split that won't create new free + * superblocks. + */ + oind = BIT_FFS_AND(VM_NFREESIZES, &fl->avail_sizes, &best_to_split[ndx]); + if (oind == 0) + /* Seek any empty block that's big enough. */ + oind = BIT_FFS_AT(VM_NFREESIZES, ndx, &fl->avail_sizes); + if (oind != 0) { + oind--; + ndx_to_pair(oind, &m_lo, &m_hi); + m = TAILQ_FIRST(&fl->freelist[oind].pl); + vm_freelist_rem(fl, m); + m = vm_phys_split_pages(m, m_lo, m_hi, fl, lo, hi); + return (m); } /* - * The given pool was empty. Find the largest - * contiguous, power-of-two-sized set of pages in any - * pool. Transfer these pages to the given pool, and + * The given pool had no large-enough blocks. Find the largest + * superblock in any pool. Transfer these pages to the given pool, and * use them to satisfy the allocation. */ - for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { + m_lo = m_hi = VM_NFREEORDER - 1; + for (oind = VM_NFREESIZES - 1; oind >= ndx; oind--) { for (pind = 0; pind < VM_NFREEPOOL; pind++) { - alt = &vm_phys_free_queues[domain][flind][pind][0]; - m = TAILQ_FIRST(&alt[oind].pl); + alt = &vm_phys_free_queues[domain][flind][pind]; + m = TAILQ_FIRST(&alt->freelist[oind].pl); if (m != NULL) { - vm_freelist_rem(alt, m, oind); - vm_phys_set_pool(pool, m, oind); - vm_phys_split_pages(m, oind, fl, order); + vm_phys_set_pool(pool, m); + vm_freelist_rem(alt, m); + m = vm_phys_split_pages(m, m_lo, m_hi, fl, lo, hi); return (m); } } + pair_decr(&m_lo, &m_hi); } return (NULL); } @@ -1080,18 +1274,88 @@ } /* - * Free a contiguous, power of two-sized set of physical pages. + * Free a superblock, composed of blocks of size lo, lo + 1, ... hi. * * The free page queues must be locked. */ -void -vm_phys_free_pages(vm_page_t m, int order) +static void +vm_phys_free_superblock(vm_page_t m, int m_lo, int m_hi) { - struct vm_freelist *fl; + struct vm_sized_freelists *fl; struct vm_phys_seg *seg; - vm_paddr_t pa; + vm_paddr_t bit, pa; vm_page_t m_buddy; + int order; + order = m_lo; + bit = (vm_paddr_t)1 << (PAGE_SHIFT + order); + seg = &vm_phys_segs[m->segind]; + pa = VM_PAGE_TO_PHYS(m); + while (order < VM_NFREEORDER - 1) { + pa ^= bit; + m_buddy = &seg->first_page[atop(pa - seg->start)]; + if ((pa & bit) == 0) { + if (pa < seg->start) + break; + if (m_lo < order) + /* Free superblock can't grow left anymore. */ + break; + if (m_buddy->order == m_lo) { + /* Free space can grow left to form a block. + * b[lo,lo]+m[lo,hi] = m[hi + 1,hi + 1] + */ + m = m_buddy; + } else if ((order = m[-1].order) != VM_NFREEORDER && + m_buddy[1 << order].order == m_lo - 1) { + /* Free space can grow left as a superblock this + * time only, but can't grow left after that. + * b[order,lo - 1] + m[lo,hi] => m[order,hi] + */ + m_buddy += 1 << order; + m_lo = order; + m = m_buddy; + } else + /* Free space can't grow left, but might still + * grow right as a superblock in the next + * iteration. + */ + order = VM_NFREEORDER; + } else { + if (pa >= seg->end) + break; + if (order > m_buddy->order) + break; + /* Free space can grow right, to extend a block, or a + * superblock. + * m[order,order]+b[order,hi] = m[hi + 1,hi + 1] + * m[lo,order - 1]+b[order,hi] = m[lo,hi] + */ + m_hi = m_buddy->order; + } + if (order <= m_buddy->order) { + fl = &(*seg->free_queues)[m_buddy->pool]; + if (m_buddy->pool != m->pool) + vm_phys_set_pool(m->pool, m_buddy); + vm_freelist_rem(fl, m_buddy); + } + order = m_hi + 1; + if (m_lo == m_hi) + m_lo = m_hi = order; + bit = (vm_paddr_t)1 << (PAGE_SHIFT + order); + pa &= ~(bit - 1); + } + fl = &(*seg->free_queues)[m->pool]; + vm_freelist_add(fl, m, m_lo, m_hi, 1); +} + +/* + * Free a contiguous, power of two-sized set of physical pages. + * + * The free page queues must be locked. + */ +void +vm_phys_free_pages(vm_page_t m, int order) +{ KASSERT(m->order == VM_NFREEORDER, ("vm_phys_free_pages: page %p has unexpected order %d", m, m->order)); @@ -1101,27 +1365,7 @@ KASSERT(order < VM_NFREEORDER, ("vm_phys_free_pages: order %d is out of range", order)); mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - seg = &vm_phys_segs[m->segind]; - if (order < VM_NFREEORDER - 1) { - pa = VM_PAGE_TO_PHYS(m); - do { - pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); - if (pa < seg->start || pa >= seg->end) - break; - m_buddy = &seg->first_page[atop(pa - seg->start)]; - if (m_buddy->order != order) - break; - fl = (*seg->free_queues)[m_buddy->pool]; - vm_freelist_rem(fl, m_buddy, order); - if (m_buddy->pool != m->pool) - vm_phys_set_pool(m->pool, m_buddy, order); - order++; - pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); - m = &seg->first_page[atop(pa - seg->start)]; - } while (order < VM_NFREEORDER - 1); - } - fl = (*seg->free_queues)[m->pool]; - vm_freelist_add(fl, m, order, 1); + vm_phys_free_superblock(m, order, order); } /* @@ -1132,36 +1376,38 @@ void vm_phys_free_contig(vm_page_t m, u_long npages) { - u_int n; - int order; + vm_paddr_t pa; + vm_page_t m_end; + int hi, lo, maxpos; /* * Avoid unnecessary coalescing by freeing the pages in the largest - * possible power-of-two-sized subsets. + * possible sequence-of-powers-of-two-sized subsets. */ mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - for (;; npages -= n) { - /* - * Unsigned "min" is used here so that "order" is assigned - * "VM_NFREEORDER - 1" when "m"'s physical address is zero - * or the low-order bits of its physical address are zero - * because the size of a physical address exceeds the size of - * a long. - */ - order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, - VM_NFREEORDER - 1); - n = 1 << order; - if (npages < n) - break; - vm_phys_free_pages(m, order); - m += n; + m_end = m + npages; + + /* Find the biggest bit that has to flip walking from m to m_end. */ + pa = VM_PAGE_TO_PHYS(m) ^ VM_PAGE_TO_PHYS(m_end); + maxpos = flsl((pa >> PAGE_SHIFT) | (1 << (VM_NFREEORDER - 1))) - 1; + + /* Free growing maximal power-of-two sequeces. */ + while ((lo = vm_freelist_getlo(m, maxpos)) < maxpos) { + /* Find the last 0-bit before another 1-bit. */ + hi = vm_freelist_getlo(m - (1 << lo), maxpos) - 1; + vm_phys_free_superblock(m, lo, hi); + m += pair_to_size(lo, hi); + } + /* Free max-sized blocks. */ + while (m + pair_to_size(lo, lo) <= m_end) { + vm_phys_free_superblock(m, lo, lo); + m += pair_to_size(lo, lo); } - /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ - for (; npages > 0; npages -= n) { - order = flsl(npages) - 1; - n = 1 << order; - vm_phys_free_pages(m, order); - m += n; + /* Free residual powers-of-two blocks of decreasing size. */ + while (m < m_end) { + lo = flsl(m_end - m) - 1; + vm_phys_free_superblock(m, lo, lo); + m += pair_to_size(lo, lo); } } @@ -1218,13 +1464,17 @@ /* * Set the pool for a contiguous, power of two-sized set of physical pages. */ -void -vm_phys_set_pool(int pool, vm_page_t m, int order) +static void +vm_phys_set_pool(int pool, vm_page_t m) { - vm_page_t m_tmp; - - for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) - m_tmp->pool = pool; + int m_hi, m_lo; + vm_page_t m_end; + + m_hi = m->order; + m_lo = vm_freelist_getlo(m, m_hi); + m_end = &m[pair_to_size(m_lo, m_hi)]; + while (m < m_end) + (m++)->pool = pool; } /* @@ -1237,30 +1487,35 @@ boolean_t vm_phys_unfree_page(vm_page_t m) { - struct vm_freelist *fl; + struct vm_sized_freelists *fl; struct vm_phys_seg *seg; - vm_paddr_t pa, pa_half; - vm_page_t m_set, m_tmp; - int order; + vm_paddr_t pa; + vm_page_t m_set; + int b, m_hi, m_lo; mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); /* - * First, find the contiguous, power of two-sized set of free - * physical pages containing the given physical page "m" and - * assign it to "m_set". + * First, find the contiguous, superblock containing the given physical + * page "m" and assign it to "m_set". */ seg = &vm_phys_segs[m->segind]; - for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && - order < VM_NFREEORDER - 1; ) { - order++; - pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); - if (pa >= seg->start) - m_set = &seg->first_page[atop(pa - seg->start)]; - else + m_set = m; + m_hi = 0; + m_lo = m_set->order; + while (m_lo == VM_NFREEORDER && m_hi < VM_NFREEORDER - 1) { + m_hi++; + pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + m_hi)); + if (pa < seg->start) + return (FALSE); + if (pa + ((vm_paddr_t)1 << (PAGE_SHIFT + m_hi)) > seg->end) return (FALSE); + m_set = &seg->first_page[atop(pa - seg->start)]; + m_lo = m_set[(1 << m_hi) - 1].order; } - if (m_set->order < order) + if (m_lo < m_hi) + m_set += pair_to_size(m_lo, m_lo); + if (m_set->order < m_hi) return (FALSE); if (m_set->order == VM_NFREEORDER) return (FALSE); @@ -1269,26 +1524,27 @@ m_set, m_set->order)); /* - * Next, remove "m_set" from the free lists. Finally, extract - * "m" from "m_set" using an iterative algorithm: While "m_set" - * is larger than a page, shrink "m_set" by returning the half - * of "m_set" that does not contain "m" to the free lists. + * Next, remove "m_set" from the free lists. Finally, extract "m" from + * "m_set" using an iterative algorithm: While "m_set" is larger than a + * page, shrink "m_set" by returning blocks of "m_set" that do not + * contain "m" to the free lists. */ - fl = (*seg->free_queues)[m_set->pool]; - order = m_set->order; - vm_freelist_rem(fl, m_set, order); - while (order > 0) { - order--; - pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); - if (m->phys_addr < pa_half) - m_tmp = &seg->first_page[atop(pa_half - seg->start)]; - else { - m_tmp = m_set; - m_set = &seg->first_page[atop(pa_half - seg->start)]; - } - vm_freelist_add(fl, m_tmp, order, 0); + fl = &(*seg->free_queues)[m_set->pool]; + m_hi = m_set->order; + vm_freelist_rem(fl, m_set); + while (m != m_set) { + b = flsl(m - m_set) - 1; + if (m_lo == m_hi) { + m_set = vm_freelist_adv(fl, m_set, b, b); + m_hi--; + } else + m_set = vm_freelist_chkadv(fl, m_set, m_lo, b - 1); + vm_freelist_chkadv(fl, m_set + pair_to_size(b, b), b + 1, m_hi); + m_lo = m_hi = b; } KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); + m_set = vm_freelist_chkadv(fl, m_set + 1, 0, m_lo - 1); + vm_freelist_chkadv(fl, m_set, m_lo + 1, m_hi); return (TRUE); } @@ -1360,83 +1616,108 @@ vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) { - struct vm_freelist *fl; - vm_paddr_t pa, pa_end, size; + struct vm_sized_freelists *fl; + vm_paddr_t blocksize, pa, pa_end, size; vm_page_t m, m_ret; - u_long npages_end; - int oind, order, pind; + int hi, lo, m_hi, m_lo, nmegapages, offset, oind, order, pind; KASSERT(npages > 0, ("npages is 0")); KASSERT(powerof2(alignment), ("alignment is not a power of 2")); KASSERT(powerof2(boundary), ("boundary is not a power of 2")); mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); - /* Compute the queue that is the best fit for npages. */ - for (order = 0; (1 << order) < npages; order++); + /* Find the best fitting [lo,hi] range for npages. */ + nmegapages = npages >> (VM_NFREEORDER - 1); + npages -= nmegapages << (VM_NFREEORDER - 1); + hi = flsl(npages) - 1; + lo = ffsl((2 << hi) - npages) - 1; + npages += nmegapages << (VM_NFREEORDER - 1); + if (alignment > (1 << lo)) { + /* Grow lo, hi to match alignment requirements. */ + lo = ffsl(alignment) - 1; + hi = imax(lo, hi + 1); + } + if (hi > VM_NFREEORDER - 1) + /* Don't let alignment requirements exceed max alloc size. */ + hi = lo = VM_NFREEORDER - 1; + + if (nmegapages > 0 && lo < VM_NFREEORDER - 1) + /* + * If the initial allocation has to be extended with megapages, + * make the initial allocation at least half a megapage big. + */ + hi = VM_NFREEORDER - 2; + order = pair_to_ndx(lo, hi); /* Search for a run satisfying the specified conditions. */ size = npages << PAGE_SHIFT; - for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; - oind++) { - for (pind = 0; pind < VM_NFREEPOOL; pind++) { - fl = (*seg->free_queues)[pind]; - TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) { + blocksize = 1 << (PAGE_SHIFT + VM_NFREEORDER - 1); + for (m_lo = lo, m_hi = hi, oind = order, m_ret = NULL; + m_ret == NULL && oind < VM_NFREESIZES; + pair_incr(&m_lo, &m_hi), oind++) { + offset = (m_lo < lo) ? pair_to_size(m_lo, lo - 1) : 0; + for (pind = 0; m_ret == NULL && pind < VM_NFREEPOOL; pind++) { + fl = &(*seg->free_queues)[pind]; + TAILQ_FOREACH(m_ret, &fl->freelist[oind].pl, plinks.q) { /* - * Is the size of this allocation request - * larger than the largest block size? + * Determine if the blocks that would be + * allocated from m_ret are within the given + * range, satisfy the given alignment, and do + * not cross the given boundary. */ - if (order >= VM_NFREEORDER) { - /* - * Determine if a sufficient number of - * subsequent blocks to satisfy the - * allocation request are free. - */ - pa = VM_PAGE_TO_PHYS(m_ret); - pa_end = pa + size; - for (;;) { - pa += 1 << (PAGE_SHIFT + - VM_NFREEORDER - 1); - if (pa >= pa_end || - pa < seg->start || - pa >= seg->end) - break; - m = &seg->first_page[atop(pa - - seg->start)]; - if (m->order != VM_NFREEORDER - - 1) - break; - } - /* If not, go to the next block. */ - if (pa < pa_end) - continue; - } + pa = VM_PAGE_TO_PHYS(m_ret + offset); + pa_end = pa + size; + if (pa < low || pa_end > high || + (pa & (alignment - 1)) != 0 || + rounddown2(pa ^ (pa_end - 1), boundary) != 0) + continue; /* - * Determine if the blocks are within the - * given range, satisfy the given alignment, - * and do not cross the given boundary. + * Is the size of this allocation request larger + * than the largest block size? */ - pa = VM_PAGE_TO_PHYS(m_ret); - pa_end = pa + size; - if (pa >= low && pa_end <= high && - (pa & (alignment - 1)) == 0 && - rounddown2(pa ^ (pa_end - 1), boundary) == 0) - goto done; + if (nmegapages > 0) + break; + /* + * Determine if a sufficient number of + * subsequent blocks to satisfy the allocation + * request are free. + */ + m = m_ret + pair_to_size(lo, hi); + for (pa = VM_PAGE_TO_PHYS(m); + seg->start <= pa && + pa < seg->end && + pa < pa_end; + pa += blocksize) { + m = &seg->first_page[atop(pa - + seg->start)]; + if (m->order != VM_NFREEORDER - 1) + break; + + } + /* If not, go to the next block. */ + if (pa >= pa_end) + break; } } } - return (NULL); -done: - for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { - fl = (*seg->free_queues)[m->pool]; - vm_freelist_rem(fl, m, m->order); - } + if (m_ret == NULL) + return (NULL); + + fl = &(*seg->free_queues)[m_ret->pool]; if (m_ret->pool != VM_FREEPOOL_DEFAULT) - vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); - fl = (*seg->free_queues)[m_ret->pool]; - vm_phys_split_pages(m_ret, oind, fl, order); + vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret); + vm_freelist_rem(fl, m_ret); + m_ret = vm_phys_split_pages(m_ret, m_lo, m_hi, fl, lo, hi); + for (m = m_ret + pair_to_size(lo, hi); + m < &m_ret[npages]; + m = &m[1 << (VM_NFREEORDER - 1)]) { + fl = &(*seg->free_queues)[m->pool]; + if (m->pool != VM_FREEPOOL_DEFAULT) + vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m); + vm_freelist_rem(fl, m); + } /* Return excess pages to the free lists. */ - npages_end = roundup2(npages, 1 << imin(oind, order)); - if (npages < npages_end) - vm_phys_free_contig(&m_ret[npages], npages_end - npages); + if (m > &m_ret[npages]) + vm_phys_free_contig(&m_ret[npages], m - &m_ret[npages]); return (m_ret); } @@ -1446,7 +1727,7 @@ */ DB_SHOW_COMMAND(freepages, db_show_freepages) { - struct vm_freelist *fl; + struct vm_sized_freelists *fl; int flind, oind, pind, dom; for (dom = 0; dom < vm_ndomains; dom++) { @@ -1465,8 +1746,8 @@ db_printf(" %2.2d (%6.6dK)", oind, 1 << (PAGE_SHIFT - 10 + oind)); for (pind = 0; pind < VM_NFREEPOOL; pind++) { - fl = vm_phys_free_queues[dom][flind][pind]; - db_printf(" | %6.6d", fl[oind].lcnt); + fl = &vm_phys_free_queues[dom][flind][pind]; + db_printf(" | %6.6d", fl->freelist[oind].lcnt); } db_printf("\n"); }