diff --git a/sys/vm/vm_domainset.h b/sys/vm/vm_domainset.h --- a/sys/vm/vm_domainset.h +++ b/sys/vm/vm_domainset.h @@ -39,6 +39,12 @@ bool di_minskip; }; +struct vm_domainset_batch_iter { + struct vm_domainset_iter di; + u_short dbi_obj_color; + int dbi_npages; +}; + int vm_domainset_iter_page(struct vm_domainset_iter *, struct vm_object *, int *); void vm_domainset_iter_page_init(struct vm_domainset_iter *, @@ -50,6 +56,11 @@ struct domainset_ref *, int *, int *); void vm_domainset_iter_ignore(struct vm_domainset_iter *, int); +int vm_domainset_batch_iter_page(struct vm_domainset_batch_iter *, struct vm_object *, vm_pindex_t, + int *, int *); +void vm_domainset_batch_iter_page_init(struct vm_domainset_batch_iter *, + struct vm_object *, vm_pindex_t, int, int *, int *, int *); + int vm_wait_doms(const domainset_t *, int mflags); #endif /* __VM_DOMAINSET_H__ */ diff --git a/sys/vm/vm_domainset.c b/sys/vm/vm_domainset.c --- a/sys/vm/vm_domainset.c +++ b/sys/vm/vm_domainset.c @@ -71,23 +71,24 @@ DOMAINSET_COPY(&ds->ds_mask, &di->di_valid_mask); if (di->di_policy == DOMAINSET_POLICY_INTERLEAVE) { #if VM_NRESERVLEVEL > 0 - if (vm_object_reserv(obj)) { - /* - * Color the pindex so we end up on the correct - * reservation boundary. - */ - pindex += obj->pg_color; - pindex >>= VM_LEVEL_0_ORDER; - } else + if (vm_object_reserv(obj)) { + /* + * Color the pindex so we end up on the correct + * reservation boundary. + */ + pindex += obj->pg_color; + pindex >>= VM_LEVEL_0_ORDER; + } else #endif - pindex /= vm_domainset_default_stride; - /* - * Offset pindex so the first page of each object does - * not end up in domain 0. - */ - if (obj != NULL) - pindex += (((uintptr_t)obj) / sizeof(*obj)); - di->di_offset = pindex; + pindex /= vm_domainset_default_stride; + /* + * Offset pindex so the first page of each object does + * not end up in domain 0. + */ + if (obj != NULL) + pindex += (((uintptr_t)obj) / sizeof(*obj)); + di->di_offset = pindex; + } /* Skip domains below min on the first pass. */ di->di_minskip = true; @@ -332,6 +333,136 @@ DOMAINSET_CLR(domain, &di->di_valid_mask); } +static void +vm_domainset_batch_iter_npages_interleave(struct vm_domainset_batch_iter *dbi, struct vm_object* obj, vm_pindex_t pindex, int *npages){ + vm_pindex_t mask; + +#if VM_NRESERVLEVEL > 0 + mask = ((1 << VM_LEVEL_0_ORDER) - 1); + if(obj != NULL) + pindex += obj->pg_color; +#else + mask = vm_domainset_default_stride - 1; +#endif + + *npages = (mask + 1) - (pindex & mask); +} + +static void +vm_domainset_batch_iter_npages_next(struct vm_domainset_batch_iter *dbi, int *npages) +{ + struct vm_domainset_iter *di = &dbi->di; + + switch (di->di_policy) { + case DOMAINSET_POLICY_INTERLEAVE: +#if VM_NRESERVLEVEL > 0 + *npages = 1 << VM_LEVEL_0_ORDER; +#else + *npages = vm_domainset_default_stride; +#endif + break; + case DOMAINSET_POLICY_FIRSTTOUCH: + /* FALLTHROUGH */ + case DOMAINSET_POLICY_ROUNDROBIN: + *npages = dbi->dbi_npages / vm_ndomains; + break; + case DOMAINSET_POLICY_PREFER: + *npages = dbi->dbi_npages; + break; + default: + panic("%s: Unknown policy %d", __func__, + di->di_policy); + } +} + +static void +vm_domainset_batch_iter_npages_first(struct vm_domainset_batch_iter *dbi, struct vm_object* obj, vm_pindex_t pindex, int *npages) +{ + struct vm_domainset_iter *di = &dbi->di; + + switch (di->di_policy) { + case DOMAINSET_POLICY_FIRSTTOUCH: + /* FALLTHROUGH */ + case DOMAINSET_POLICY_PREFER: + *npages = dbi->dbi_npages; + break; + case DOMAINSET_POLICY_ROUNDROBIN: + *npages = dbi->dbi_npages / vm_ndomains; + break; + case DOMAINSET_POLICY_INTERLEAVE: + vm_domainset_batch_iter_npages_interleave(dbi, obj, pindex, npages); + break; + default: + panic("%s: Unknown policy %d", __func__, + di->di_policy); + } +} + +static void +vm_domainset_batch_iter_next(struct vm_domainset_batch_iter *dbi, + int *domain, int *npages) +{ + vm_domainset_iter_next(&dbi->di, domain); + vm_domainset_batch_iter_npages_next(dbi, npages); +} + +static void +vm_domainset_batch_iter_first(struct vm_domainset_batch_iter *dbi, vm_object_t obj, vm_pindex_t pindex, int *domain, int *npages) +{ + vm_domainset_iter_first(&dbi->di, domain); + vm_domainset_batch_iter_npages_first(dbi, obj, pindex, npages); +} + + +void +vm_domainset_batch_iter_page_init(struct vm_domainset_batch_iter *dbi, struct vm_object *obj, + vm_pindex_t pindex, int req_npages, int *domain, int *npages, int *req) +{ + vm_domainset_iter_page_init(&dbi->di, obj, pindex, domain, req); + dbi->dbi_npages = req_npages; + + vm_domainset_batch_iter_npages_first(dbi, obj, pindex, npages); +} + +int +vm_domainset_batch_iter_page(struct vm_domainset_batch_iter *dbi, struct vm_object *obj, + vm_pindex_t pindex, int *domain, int *npages) +{ + struct vm_domainset_iter *di = &dbi->di; + + /* If there are more domains to visit we run the iterator. */ + while (--di->di_n != 0) { + vm_domainset_batch_iter_next(dbi, domain, npages); + if (!di->di_minskip || !vm_page_count_min_domain(*domain)) + return (0); + } + + /* If we skipped domains below min restart the search. */ + if (di->di_minskip) { + di->di_minskip = false; + vm_domainset_batch_iter_first(dbi, obj, pindex, domain, npages); + return (0); + } + + /* If we visited all domains and this was a NOWAIT we return error. */ + if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0) + return (ENOMEM); + + /* Wait for one of the domains to accumulate some free pages. */ + if (obj != NULL) + VM_OBJECT_WUNLOCK(obj); + vm_wait_doms(&di->di_domain->ds_mask, 0); + if (obj != NULL) + VM_OBJECT_WLOCK(obj); + if ((di->di_flags & VM_ALLOC_WAITFAIL) != 0) + return (ENOMEM); + + /* Restart the search. */ + vm_domainset_batch_iter_first(dbi, obj, pindex, domain, npages); + + return (0); +} + #else /* !NUMA */ int diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -614,6 +614,7 @@ vm_pindex_t pindex, int domain, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr); +int vm_page_alloc_pages(vm_object_t object, vm_pindex_t pindex, vm_page_t *ma, int npages, int req); vm_page_t vm_page_alloc_freelist(int, int); vm_page_t vm_page_alloc_freelist_domain(int, int, int); vm_page_t vm_page_alloc_noobj(int); diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -1950,6 +1950,122 @@ return (m); } +static void +vm_page_alloc_init_page(vm_object_t object, vm_pindex_t pindex, vm_page_t m, vm_page_t mpred, int req){ + int flags = 0; + + vm_page_dequeue(m); + vm_page_alloc_check(m); + + /* + * Initialize the page. Only the PG_ZERO flag is inherited. + */ + flags |= m->flags & PG_ZERO; + if ((req & VM_ALLOC_NODUMP) != 0) + flags |= PG_NODUMP; + m->flags = flags; + m->a.flags = 0; + m->oflags = (object->flags & OBJ_UNMANAGED) != 0 ? VPO_UNMANAGED : 0; + if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0) + m->busy_lock = VPB_CURTHREAD_EXCLUSIVE; + else if ((req & VM_ALLOC_SBUSY) != 0) + m->busy_lock = VPB_SHARERS_WORD(1); + else + m->busy_lock = VPB_UNBUSIED; + if (req & VM_ALLOC_WIRED) { + vm_wire_add(1); + m->ref_count = 1; + } + m->a.act_count = 0; + + if (vm_page_insert_after(m, object, pindex, mpred)) { + if (req & VM_ALLOC_WIRED) { + vm_wire_sub(1); + m->ref_count = 0; + } + KASSERT(m->object == NULL, ("page %p has object", m)); + m->oflags = VPO_UNMANAGED; + m->busy_lock = VPB_UNBUSIED; + /* Don't change PG_ZERO. */ + vm_page_free_toq(m); + if (req & VM_ALLOC_WAITFAIL) { + VM_OBJECT_WUNLOCK(object); + printf("%s: waiting\n", __func__); + vm_radix_wait(); + VM_OBJECT_WLOCK(object); + } + /* TODO: figure out how to handle insert failure */ + } + + /* Ignore device objects; the pager sets "memattr" for them. */ + if (object->memattr != VM_MEMATTR_DEFAULT && + (object->flags & OBJ_FICTITIOUS) == 0) + pmap_page_set_memattr(m, object->memattr); +} + +static int +vm_page_alloc_pages_domain(vm_object_t object, vm_pindex_t pindex, vm_page_t *ma, int npages, int req, int domain, vm_page_t mpred){ + int got = 0, rv; + vm_page_t m; + struct vm_domain *vmd = VM_DOMAIN(domain); + + KASSERT(npages > 0, ("%s: npages is negative: %d", __func__, npages)); + /* Allocate as much pages as possible from the reservation. */ + if (vm_object_reserv(object)){ + rv = vm_reserv_alloc_npages(object, pindex, domain, mpred, req, ma, npages); + got += rv; + } + + // TODO: select appropriate pool + if(got < npages){ + /* vm_phys_alloc_npage can only handle (1 << NFREEORDER-1) pages at a time. */ + while (got < npages){ + vm_domain_free_lock(vmd); + rv = vm_phys_alloc_npages(domain, 0, min(npages - got, 1 << (VM_NFREEORDER - 1)), &ma[got]); + vm_domain_free_unlock(vmd); + if (rv == 0) + break; + got += rv; + } + } + + for(int i=0; i object->size) + npages = object->size - pindex; + + vm_domainset_batch_iter_page_init(&dbi, object, pindex, npages, &domain, &batch_npages, &req); + do { + mpred = vm_radix_lookup_le(&object->rtree, pindex + got); + if(got + batch_npages > npages){ + batch_npages = npages - got; + } + got += vm_page_alloc_pages_domain(object, pindex + got, &ma[got], batch_npages, req, domain, mpred); + + if (got == npages) + break; + + } while (vm_domainset_batch_iter_page(&dbi, object, pindex + got, &domain, &batch_npages) == 0); + + return got; +} + /* * Returns true if the number of free pages exceeds the minimum * for the request class and false otherwise. @@ -2198,7 +2314,7 @@ vm_page_find_contig_domain(int domain, int req, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) { - struct vm_domain *vmd; + struct vm_domain *vmd; vm_page_t m_ret; /* @@ -4921,7 +5037,8 @@ { vm_page_t m, mpred; int pflags; - int i; + int ncontig = 0; + int i, got; VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(((u_int)allocflags >> VM_ALLOC_COUNT_SHIFT) == 0, @@ -4947,27 +5064,49 @@ goto retrylookup; break; } + if (vm_page_none_valid(m) && + (allocflags & VM_ALLOC_ZERO) != 0) { + if ((m->flags & PG_ZERO) == 0) + pmap_zero_page(m); + vm_page_valid(m); + } + vm_page_grab_release(m, allocflags); + ma[i] = mpred = m; + m = vm_page_next(m); + } else { if ((allocflags & VM_ALLOC_NOCREAT) != 0) break; - m = vm_page_alloc_after(object, pindex + i, - pflags | VM_ALLOC_COUNT(count - i), mpred); - if (m == NULL) { - if ((allocflags & (VM_ALLOC_NOWAIT | - VM_ALLOC_WAITFAIL)) != 0) - break; - goto retrylookup; - } - } - if (vm_page_none_valid(m) && - (allocflags & VM_ALLOC_ZERO) != 0) { - if ((m->flags & PG_ZERO) == 0) - pmap_zero_page(m); - vm_page_valid(m); + if(mpred != NULL){ + m = vm_page_next(mpred); + if(m != NULL){ + ncontig = m->pindex - mpred->pindex; + if(ncontig > count){ + ncontig = count; + } + } else { + ncontig = count - i; + } + } else { + ncontig = count; + } + + got = vm_page_alloc_pages(object, pindex + i, &ma[i], ncontig, pflags); + i += got; + if(i < count){ + m = ma[i + got -1]; + mpred = ma[i + got-2]; + goto retrylookup; + } + /* m = vm_page_alloc_after(object, pindex + i, */ + /* pflags | VM_ALLOC_COUNT(count - i), mpred); */ + /* if (m == NULL) { */ + /* if ((allocflags & (VM_ALLOC_NOWAIT | */ + /* VM_ALLOC_WAITFAIL)) != 0) */ + /* break; */ + /* goto retrylookup; */ + /* } */ } - vm_page_grab_release(m, allocflags); - ma[i] = mpred = m; - m = vm_page_next(m); } return (i); } diff --git a/sys/vm/vm_phys.h b/sys/vm/vm_phys.h --- a/sys/vm/vm_phys.h +++ b/sys/vm/vm_phys.h @@ -61,6 +61,7 @@ void vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end); vm_page_t vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary); +int vm_phys_alloc_from(vm_page_t m_start, vm_page_t m_end, int npages, vm_page_t *ma, int domain); vm_page_t vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order); int vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]); diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c --- a/sys/vm/vm_phys.c +++ b/sys/vm/vm_phys.c @@ -844,6 +844,63 @@ return (i); } +/* + * Tries to allocate the specified number of contiguous pages + * from the specified physical address range within a domain. + * Returns the actual number of allocated pages. + * + * The free page queues for the specified domain must be locked. + */ +int +vm_phys_alloc_from(vm_page_t m_start, vm_page_t m_end, int npages, vm_page_t *ma, int domain) +{ + int i, oind, rem_oind; + u_long rem; + vm_page_t m; + struct vm_freelist *fl; + struct vm_freelist (*queues)[VM_NFREEPOOL][VM_NFREEORDER_MAX]; + + + KASSERT(npages > 0, ("npages is zero")); + KASSERT(domain >= 0 && domain < vm_ndomains, ("domain out of range")); + KASSERT(m_start != NULL, ("m_start is NULL")); + vm_domain_free_assert_locked(VM_DOMAIN(domain)); + // TODO: sanity checks for npages and bounds check + i = 0; + m = m_start; + while(m < m_end) { + KASSERT(vm_phys_segs[m->segind].domain == domain, ("%s: allocation spans to another domain", __func__)); + + oind = m->order; + /* Skip allocated pages. */ + if(oind == VM_NFREEORDER){ + m++; + continue; + } + + queues = vm_phys_segs[m->segind].free_queues; + fl = (*queues)[m->pool]; + vm_freelist_rem(fl, m, oind); + /* Check whether this allocation overshoots. */ + if(m + (1 << oind) >= m_end){ + /* Round no. remaining pages down to nearest order and split page */ + rem = ptoa(m_end->phys_addr - m->phys_addr); // TODO: check correctness + rem_oind = 1 << (31 - __builtin_clz(rem)); + vm_phys_split_pages(m, oind, fl, rem_oind, 0); + oind = rem_oind; + } + ma[i] = m; + /* Skip over allocated pages. */ + m += (1 << oind); + i += (1 << oind); + + if (i >= npages) + break; + } + + return (i); +} + /* * Allocate a contiguous, power of two-sized set of physical pages * from the free lists. diff --git a/sys/vm/vm_reserv.h b/sys/vm/vm_reserv.h --- a/sys/vm/vm_reserv.h +++ b/sys/vm/vm_reserv.h @@ -51,6 +51,8 @@ vm_paddr_t boundary); vm_page_t vm_reserv_alloc_page(vm_object_t object, vm_pindex_t pindex, int domain, int req, vm_page_t mpred); +int vm_reserv_alloc_npages(vm_object_t object, vm_pindex_t pindex, int domain, vm_page_t mpred, int req, + vm_page_t *ma, u_long npages); void vm_reserv_break_all(vm_object_t object); boolean_t vm_reserv_free_page(vm_page_t m); void vm_reserv_init(void); diff --git a/sys/vm/vm_reserv.c b/sys/vm/vm_reserv.c --- a/sys/vm/vm_reserv.c +++ b/sys/vm/vm_reserv.c @@ -753,6 +753,136 @@ return (m_ret); } +/* + * Allocates a contiguous set of physical pages of the given size "npages" + * from existing or newly created reservations. All of the physical pages + * must be at or above the given physical address "low" and below the given + * physical address "high". The given value "alignment" determines the + * alignment of the first physical page in the set. If the given value + * "boundary" is non-zero, then the set of physical pages cannot cross any + * physical address boundary that is a multiple of that value. Both + * "alignment" and "boundary" must be a power of two. + * + * The page "mpred" must immediately precede the offset "pindex" within the + * specified object. + * + * The object must be locked. + */ +int +vm_reserv_alloc_npages(vm_object_t object, vm_pindex_t pindex, int domain, vm_page_t mpred, int req, + vm_page_t *ma, u_long npages) +{ + struct vm_domain *vmd; + vm_page_t m, msucc; + vm_pindex_t first, leftcap, rightcap; + vm_reserv_t rv; + int index, got; + + VM_OBJECT_ASSERT_WLOCKED(object); + KASSERT(npages != 0, ("%s: npages is 0", __func__)); + vmd = VM_DOMAIN(domain); + + /* + * Is a reservation fundamentally impossible? + */ + if (pindex < VM_RESERV_INDEX(object, pindex) || + pindex >= object->size) + return (0); + /* + * Look for an existing reservation. + */ + rv = vm_reserv_from_object(object, pindex, mpred, &msucc); + if (rv == NULL) { + /* + * Could a reservation fit between the first index to the left that + * can be used and the first index to the right that cannot be used? + * + * We must synchronize with the reserv object lock to protect the + * pindex/object of the resulting reservations against rename while + * we are inspecting. + */ + first = pindex - VM_RESERV_INDEX(object, pindex); + vm_reserv_object_lock(object); + if (mpred != NULL) { + if ((rv = vm_reserv_from_page(mpred))->object != object) + leftcap = mpred->pindex + 1; + else + leftcap = rv->pindex + VM_LEVEL_0_NPAGES; + if (leftcap > first) { + vm_reserv_object_unlock(object); + return (0); + } + } + if (msucc != NULL) { + if ((rv = vm_reserv_from_page(msucc))->object != object) + rightcap = msucc->pindex; + else + rightcap = rv->pindex; + if (first + VM_LEVEL_0_NPAGES > rightcap) { + vm_reserv_object_unlock(object); + return (0); + } + } + vm_reserv_object_unlock(object); + + /* + * Would the last new reservation extend past the end of the object? + * + * If the object is unlikely to grow don't allocate a reservation for + * the tail. + */ + if ((object->flags & OBJ_ANON) == 0 && + first + VM_LEVEL_0_NPAGES > object->size) + return (0); + + /* + * Allocate and populate the new reservation. + */ + m = NULL; + if (vm_domain_allocate(vmd, req, 1)) { + vm_domain_free_lock(vmd); + m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT, + VM_LEVEL_0_ORDER); + vm_domain_free_unlock(vmd); + if (m == NULL) { + vm_domain_freecnt_inc(vmd, 1); + return (0); + } + } else + return (0); + + rv = vm_reserv_from_page(m); + vm_reserv_lock(rv); + KASSERT(rv->pages == m, + ("vm_reserv_alloc_page: reserv %p's pages is corrupted", rv)); + vm_reserv_insert(rv, object, first); + } else { + vm_reserv_lock(rv); + } + + KASSERT(object != kernel_object || rv->domain == domain, + ("vm_reserv_alloc_page: domain mismatch")); + + index = VM_RESERV_INDEX(object, pindex); + got = 0; + /* Scan bitmap and allocate pages */ + for(;index < VM_LEVEL_0_NPAGES && got < npages; index++){ + /* Test whether page at 'index' is free */ + if(bit_test(rv->popmap, index)) + continue; + vm_reserv_populate(rv, index); + ma[got] = &rv->pages[index]; + got++; + } + if(vm_domain_allocate(vmd, req, got) == 0){ + // TODO: idk + } + +vm_reserv_unlock(rv); + return got; +} + + /* * Allocate a physical page from an existing or newly created reservation. *