diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -751,11 +751,14 @@ return (malloc_large(size, mtp, DOMAINSET_RR(), flags DEBUG_REDZONE_ARG)); - vm_domainset_iter_policy_init(&di, ds, &domain, &flags); - do { - va = malloc_domain(&size, &indx, mtp, domain, flags); - } while (va == NULL && vm_domainset_iter_policy(&di, &domain) == 0); + indx = -1; + va = NULL; + if (vm_domainset_iter_policy_init(&di, ds, &domain, &flags) == 0) + do { + va = malloc_domain(&size, &indx, mtp, domain, flags); + } while (va == NULL && vm_domainset_iter_policy(&di, &domain) == 0); malloc_type_zone_allocated(mtp, va == NULL ? 0 : size, indx); + if (__predict_false(va == NULL)) { KASSERT((flags & M_WAITOK) == 0, ("malloc(M_WAITOK) returned NULL")); diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c --- a/sys/vm/uma_core.c +++ b/sys/vm/uma_core.c @@ -4017,8 +4017,9 @@ rr = rdomain == UMA_ANYDOMAIN; if (rr) { aflags = (flags & ~M_WAITOK) | M_NOWAIT; - vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, - &aflags); + if (vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, + &aflags) != 0) + return (NULL); } else { aflags = flags; domain = rdomain; @@ -5245,8 +5246,9 @@ slabs = howmany(items, keg->uk_ipers); while (slabs-- > 0) { aflags = M_NOWAIT; - vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, - &aflags); + if (vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, + &aflags) != 0) + panic("%s: Domainset is empty", __func__); for (;;) { slab = keg_alloc_slab(keg, zone, domain, M_WAITOK, aflags); diff --git a/sys/vm/vm_domainset.h b/sys/vm/vm_domainset.h --- a/sys/vm/vm_domainset.h +++ b/sys/vm/vm_domainset.h @@ -47,13 +47,12 @@ int vm_domainset_iter_page(struct vm_domainset_iter *, struct vm_object *, int *, struct pctrie_iter *); -void vm_domainset_iter_page_init(struct vm_domainset_iter *, - struct vm_object *, vm_pindex_t, int *, int *, - struct pctrie_iter *); +int vm_domainset_iter_page_init(struct vm_domainset_iter *, + struct vm_object *, vm_pindex_t, int *, int *); int vm_domainset_iter_policy(struct vm_domainset_iter *, int *); -void vm_domainset_iter_policy_init(struct vm_domainset_iter *, +int vm_domainset_iter_policy_init(struct vm_domainset_iter *, struct domainset *, int *, int *); -void vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *, +int vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *, struct domainset_ref *, int *, int *); void vm_domainset_iter_ignore(struct vm_domainset_iter *, int); diff --git a/sys/vm/vm_domainset.c b/sys/vm/vm_domainset.c --- a/sys/vm/vm_domainset.c +++ b/sys/vm/vm_domainset.c @@ -58,6 +58,9 @@ static int vm_domainset_default_stride = 64; +static bool vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain); + + /* * Determine which policy is to be used for this allocation. */ @@ -93,8 +96,6 @@ pindex += (((uintptr_t)obj) / sizeof(*obj)); di->di_offset = pindex; } - /* Skip domains below min on the first pass. */ - di->di_minskip = true; } static void @@ -103,7 +104,7 @@ /* Grab the next domain in 'ds_order'. */ *domain = di->di_domain->ds_order[ - ++(*di->di_iter) % di->di_domain->ds_cnt]; + (*di->di_iter)++ % di->di_domain->ds_cnt]; } static void @@ -116,31 +117,21 @@ *domain = di->di_domain->ds_order[d]; } -static void -vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) -{ - - KASSERT(!DOMAINSET_EMPTY(&di->di_remain_mask), - ("%s: Already iterated on all domains", __func__)); - vm_domainset_iter_rr(di, domain); - KASSERT(*domain < vm_ndomains, - ("%s: Invalid domain %d", __func__, *domain)); -} - -static void -vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) +/* + * Internal function determining the current phase's first candidate domain. + * + * Returns whether these is an eligible domain, which is returned through + * '*domain'. '*domain' can be modified even if there is no eligible domain. + * + * See herald comment of vm_domainset_iter_first() below about phases. + */ +static bool +vm_domainset_iter_phase_first(struct vm_domainset_iter *di, int *domain) { - switch (di->di_policy) { case DOMAINSET_POLICY_FIRSTTOUCH: *domain = PCPU_GET(domain); - if (DOMAINSET_ISSET(*domain, &di->di_valid_mask)) - break; - /* - * To prevent impossible allocations we convert an invalid - * first-touch to round-robin. - */ - /* FALLTHROUGH */ + break; case DOMAINSET_POLICY_ROUNDROBIN: vm_domainset_iter_rr(di, domain); break; @@ -156,25 +147,114 @@ KASSERT(*domain < vm_ndomains, ("%s: Invalid domain %d", __func__, *domain)); + /* + * Has the policy's start domain already been visited? + */ + if (!DOMAINSET_ISSET(*domain, &di->di_remain_mask)) + return (vm_domainset_iter_next(di, domain)); + + DOMAINSET_CLR(*domain, &di->di_remain_mask); + + /* Does it have enough free pages (phase 1)? */ + if (di->di_minskip && vm_page_count_min_domain(*domain)) { + /* Mark the domain as eligible for phase 2. */ + DOMAINSET_SET(*domain, &di->di_min_mask); + return (vm_domainset_iter_next(di, domain)); + } + + return (true); +} + +/* + * Resets an iterator to point to the first candidate domain. + * + * Returns whether there is an eligible domain to start with. '*domain' may be + * modified even if there is none. + * + * There must have been one call to vm_domainset_iter_init() before. + * + * This function must be called at least once before calling + * vm_domainset_iter_next(). Note that functions wrapping + * vm_domainset_iter_init() usually do that themselves. + * + * This function may be called again to reset the iterator to the policy's first + * candidate domain. After each reset, the iterator will visit the same domains + * as in the previous iteration minus those on which vm_domainset_iter_ignore() + * has been called. Note that the first candidate domain may change at each + * reset (at time of this writing, only on the DOMAINSET_POLICY_ROUNDROBIN + * policy). + * + * Domains which have a number of free pages over 'v_free_min' are always + * visited first (this is called the "phase 1" in comments, "phase 2" being the + * examination of the remaining domains; no domains are ever visited twice). + */ +static bool +vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain) +{ /* Initialize the mask of domains to visit. */ + DOMAINSET_COPY(&di->di_valid_mask, &di->di_remain_mask); + /* + * No candidate domains for phase 2 at start. This will be filled by + * phase 1. + */ + DOMAINSET_ZERO(&di->di_min_mask); + /* Skip domains below 'v_free_min' on phase 1. */ + di->di_minskip = true; + + return (vm_domainset_iter_phase_first(di, domain)); +} + +/* + * Advances the iterator to the next candidate domain. + * + * Returns whether there was another domain to visit. '*domain' may be modified + * even if there is none. + * + * vm_domainset_iter_first() must have been called at least once before using + * this function (see its herald comment for more details on iterators). + */ +static bool +vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain) +{ + /* Loop while there remains domains to visit in the current phase. */ + while (!DOMAINSET_EMPTY(&di->di_remain_mask)) { + /* Grab the next domain in 'ds_order'. */ + vm_domainset_iter_rr(di, domain); + KASSERT(*domain < vm_ndomains, + ("%s: Invalid domain %d", __func__, *domain)); + + if (DOMAINSET_ISSET(*domain, &di->di_remain_mask)) { + DOMAINSET_CLR(*domain, &di->di_remain_mask); + if (!di->di_minskip || !vm_page_count_min_domain(*domain)) + return (true); + DOMAINSET_SET(*domain, &di->di_min_mask); + } + } + + /* + * If phase 1 (skip low memory domains) is over, start phase 2 (consider + * low memory domains). + */ if (di->di_minskip) { - /* Phase 1: Skip domains under 'v_free_min'. */ - DOMAINSET_COPY(&di->di_valid_mask, &di->di_remain_mask); - DOMAINSET_ZERO(&di->di_min_mask); - } else - /* Phase 2: Browse domains that were under 'v_free_min'. */ + di->di_minskip = false; + /* Browse domains that were under 'v_free_min'. */ DOMAINSET_COPY(&di->di_min_mask, &di->di_remain_mask); + return (vm_domainset_iter_phase_first(di, domain)); + } - /* Mark first domain as seen. */ - DOMAINSET_CLR(*domain, &di->di_remain_mask); + return (false); } -void +int vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, - vm_pindex_t pindex, int *domain, int *req, struct pctrie_iter *pages) + vm_pindex_t pindex, int *domain, int *req) { struct domainset_ref *dr; + di->di_flags = *req; + *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | + VM_ALLOC_NOWAIT; + /* * Object policy takes precedence over thread policy. The policies * are immutable and unsynchronized. Updates can race but pointer @@ -184,39 +264,21 @@ dr = &obj->domain; else dr = &curthread->td_domain; + vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, obj, pindex); - di->di_flags = *req; - *req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) | - VM_ALLOC_NOWAIT; - vm_domainset_iter_first(di, domain); - if (vm_page_count_min_domain(*domain)) - vm_domainset_iter_page(di, obj, domain, pages); + /* + * XXXOC: Shouldn't we just panic on 'false' if VM_ALLOC_WAITOK was + * passed? + */ + return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM); } int vm_domainset_iter_page(struct vm_domainset_iter *di, struct vm_object *obj, int *domain, struct pctrie_iter *pages) { - if (__predict_false(DOMAINSET_EMPTY(&di->di_valid_mask))) - return (ENOMEM); - - /* If there are more domains to visit in this phase, run the iterator. */ - while (!DOMAINSET_EMPTY(&di->di_remain_mask)) { - vm_domainset_iter_next(di, domain); - if (DOMAINSET_ISSET(*domain, &di->di_remain_mask)) { - DOMAINSET_CLR(*domain, &di->di_remain_mask); - if (!di->di_minskip || !vm_page_count_min_domain(*domain)) - return (0); - DOMAINSET_SET(*domain, &di->di_min_mask); - } - } - - /* If we skipped domains below min restart the search. */ - if (di->di_minskip) { - di->di_minskip = false; - vm_domainset_iter_first(di, domain); + if (vm_domainset_iter_next(di, domain)) return (0); - } /* If we visited all domains and this was a NOWAIT we return error. */ if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) @@ -235,64 +297,43 @@ return (ENOMEM); /* Restart the search. */ - vm_domainset_iter_first(di, domain); - - return (0); + /* XXXOC: Shouldn't we just panic on 'false'? */ + return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM); } -static void +static int _vm_domainset_iter_policy_init(struct vm_domainset_iter *di, int *domain, int *flags) { - di->di_flags = *flags; *flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT; - vm_domainset_iter_first(di, domain); - if (vm_page_count_min_domain(*domain)) - vm_domainset_iter_policy(di, domain); + /* XXXOC: Shouldn't we just panic on 'false' if M_WAITOK was passed? */ + return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM); } -void +int vm_domainset_iter_policy_init(struct vm_domainset_iter *di, struct domainset *ds, int *domain, int *flags) { vm_domainset_iter_init(di, ds, &curthread->td_domain.dr_iter, NULL, 0); - _vm_domainset_iter_policy_init(di, domain, flags); + return (_vm_domainset_iter_policy_init(di, domain, flags)); } -void +int vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, struct domainset_ref *dr, int *domain, int *flags) { vm_domainset_iter_init(di, dr->dr_policy, &dr->dr_iter, NULL, 0); - _vm_domainset_iter_policy_init(di, domain, flags); + return (_vm_domainset_iter_policy_init(di, domain, flags)); } int vm_domainset_iter_policy(struct vm_domainset_iter *di, int *domain) { - if (DOMAINSET_EMPTY(&di->di_valid_mask)) - return (ENOMEM); - - /* If there are more domains to visit in this phase, run the iterator. */ - while (!DOMAINSET_EMPTY(&di->di_remain_mask)) { - vm_domainset_iter_next(di, domain); - if (DOMAINSET_ISSET(*domain, &di->di_remain_mask)) { - DOMAINSET_CLR(*domain, &di->di_remain_mask); - if (!di->di_minskip || !vm_page_count_min_domain(*domain)) - return (0); - DOMAINSET_SET(*domain, &di->di_min_mask); - } - } - - /* If we skipped domains below min restart the search. */ - if (di->di_minskip) { - di->di_minskip = false; - vm_domainset_iter_first(di, domain); + if (vm_domainset_iter_next(di, domain)) return (0); - } /* If we visited all domains and this was a NOWAIT we return error. */ if ((di->di_flags & M_WAITOK) == 0) @@ -302,9 +343,8 @@ vm_wait_doms(&di->di_valid_mask, 0); /* Restart the search. */ - vm_domainset_iter_first(di, domain); - - return (0); + /* XXXOC: Shouldn't we just panic on 'false'? */ + return (vm_domainset_iter_first(di, domain) ? 0 : ENOMEM); } void @@ -326,12 +366,12 @@ return (EJUSTRETURN); } -void +int vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj, - vm_pindex_t pindex, int *domain, int *flags, struct pctrie_iter *pages) + vm_pindex_t pindex, int *domain, int *flags) { - *domain = 0; + return (0); } int @@ -341,20 +381,20 @@ return (EJUSTRETURN); } -void +int vm_domainset_iter_policy_init(struct vm_domainset_iter *di, struct domainset *ds, int *domain, int *flags) { - *domain = 0; + return (0); } -void +int vm_domainset_iter_policy_ref_init(struct vm_domainset_iter *di, struct domainset_ref *dr, int *domain, int *flags) { - *domain = 0; + return (0); } void diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -453,7 +453,7 @@ obj = vm_thread_kstack_size_to_obj(pages); if (vm_ndomains > 1) obj->domain.dr_policy = ds; - vm_domainset_iter_page_init(&di, obj, 0, &domain, &req, NULL); + vm_domainset_iter_page_init(&di, obj, 0, &domain, &req); do { /* * Get a kernel virtual address for this thread's kstack. diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -323,7 +323,9 @@ start_segind = -1; - vm_domainset_iter_policy_init(&di, ds, &domain, &flags); + if (vm_domainset_iter_policy_init(&di, ds, &domain, &flags) != 0) + return (NULL); + do { addr = kmem_alloc_attr_domain(domain, size, flags, low, high, memattr); @@ -417,7 +419,9 @@ start_segind = -1; - vm_domainset_iter_policy_init(&di, ds, &domain, &flags); + if (vm_domainset_iter_policy_init(&di, ds, &domain, &flags)) + return (NULL); + do { addr = kmem_alloc_contig_domain(domain, size, flags, low, high, alignment, boundary, memattr); @@ -517,7 +521,9 @@ void *addr; int domain; - vm_domainset_iter_policy_init(&di, ds, &domain, &flags); + if (vm_domainset_iter_policy_init(&di, ds, &domain, &flags) != 0) + return (NULL); + do { addr = kmem_malloc_domain(domain, size, flags); if (addr != NULL) diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -2015,8 +2015,9 @@ vm_page_t m; int domain; - vm_domainset_iter_page_init(&di, object, pindex, &domain, &req, - pages); + if (vm_domainset_iter_page_init(&di, object, pindex, &domain, &req) != 0) + return (NULL); + do { m = vm_page_alloc_domain_iter(object, pindex, domain, req, pages); @@ -2268,7 +2269,9 @@ start_segind = -1; - vm_domainset_iter_page_init(&di, object, pindex, &domain, &req, NULL); + if (vm_domainset_iter_page_init(&di, object, pindex, &domain, &req) != 0) + return (NULL); + do { m = vm_page_alloc_contig_domain(object, pindex, domain, req, npages, low, high, alignment, boundary, memattr); @@ -2596,7 +2599,9 @@ vm_page_t m; int domain; - vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req, NULL); + if (vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req) != 0) + return (NULL); + do { m = vm_page_alloc_noobj_domain(domain, req); if (m != NULL) @@ -2615,7 +2620,9 @@ vm_page_t m; int domain; - vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req, NULL); + if (vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req) != 0) + return (NULL); + do { m = vm_page_alloc_noobj_contig_domain(domain, req, npages, low, high, alignment, boundary, memattr); @@ -3334,7 +3341,9 @@ ret = ERANGE; - vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req, NULL); + if (vm_domainset_iter_page_init(&di, NULL, 0, &domain, &req) != 0) + return (ret); + do { status = vm_page_reclaim_contig_domain(domain, req, npages, low, high, alignment, boundary);