Changeset View
Standalone View
vm/vm_phys.c
Context not available. | |||||
} | } | ||||
} | } | ||||
static int foo[VM_NFREEORDER]; | |||||
static int | |||||
sysctl_debug_foo(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
struct sbuf sbuf; | |||||
int error, oind, savings; | |||||
error = sysctl_wire_old_buffer(req, 0); | |||||
if (error != 0) | |||||
return (error); | |||||
sbuf_new_for_sysctl(&sbuf, NULL, 1024, req); | |||||
sbuf_printf(&sbuf,"\nfoo:"); | |||||
savings = 0; | |||||
for (oind = 0; oind < VM_NFREEORDER; oind++) { | |||||
sbuf_printf(&sbuf, " %d", foo[oind]); | |||||
savings += ((1 << oind) - 1) * foo[oind]; | |||||
} | |||||
//sbuf_printf(&sbuf, "\ncalls avoided: %d\n", savings); | |||||
error = sbuf_finish(&sbuf); | |||||
sbuf_delete(&sbuf); | |||||
return (error); | |||||
} | |||||
SYSCTL_OID(_debug, OID_AUTO, foo, CTLTYPE_STRING | CTLFLAG_RD, | |||||
NULL, 0, sysctl_debug_foo, "A", ""); | |||||
/* | /* | ||||
* XXX | |||||
* | |||||
* The free page queues for the specified domain must be locked. | |||||
*/ | |||||
int | |||||
vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t *ma) | |||||
{ | |||||
alc: I don't believe that an explicit prefetch on TAILQ_NEXT("m") would do any good here because the… | |||||
struct vm_freelist *alt, *fl; | |||||
vm_page_t m; | |||||
int avail, end, flind, freelist, i, need, oind, pind; | |||||
KASSERT(domain >= 0 && domain < vm_ndomains, | |||||
("vm_phys_alloc_npages: domain %d is out of range", domain)); | |||||
KASSERT(pool < VM_NFREEPOOL, | |||||
("vm_phys_alloc_npages: pool %d is out of range", pool)); | |||||
Not Done Inline ActionsThis is good, although I wonder if there is any benefit to passing the page array down a layer. You could then not only avoid splitting but also avoid multiple calls to walk the freelists and order lists. jeff: This is good, although I wonder if there is any benefit to passing the page array down a layer. | |||||
Not Done Inline ActionsYes, it's probably worthwhile, but not by as much as you might hope. When the state of the free lists is such that you're doing more calls, because you're returning pages from order 0, then you're spending less time climbing the order lists looking for a non-empty list. I'll give it a try. In the "believe or not" category, clang is completely unrolling the oind loop that searches for a non-empty free list, which would make sense if you expected the queues to be empty, but poorly utilizes the I-cache if you expect the first free list to be non-empty. alc: Yes, it's probably worthwhile, but not by as much as you might hope. When the state of the… | |||||
Not Done Inline ActionsI could benchmark with and without if you prefer but in my experience even eliminating small overheads from this path helps a lot because the free page lock is still semi-hot. I don't feel too strongly about it though so really use your discretion and either is good. jeff: I could benchmark with and without if you prefer but in my experience even eliminating small… | |||||
KASSERT(npages <= 1 << (VM_NFREEORDER - 1), | |||||
("vm_phys_alloc_npages: npages %d is out of range", npages)); | |||||
vm_domain_free_assert_locked(VM_DOMAIN(domain)); | |||||
i = 0; | |||||
for (freelist = 0; freelist < VM_NFREELIST; freelist++) { | |||||
flind = vm_freelist_to_flind[freelist]; | |||||
if (flind < 0) | |||||
continue; | |||||
fl = vm_phys_free_queues[domain][flind][pool]; | |||||
for (oind = 0; oind < VM_NFREEORDER; oind++) { | |||||
while ((m = TAILQ_FIRST(&fl[oind].pl)) != NULL) { | |||||
vm_freelist_rem(fl, m, oind); | |||||
alcAuthorUnsubmitted Not Done Inline ActionsI am somewhat surprised by the frequency at which this code is taking pages from the high-order queues under a buildworld workload. With that in mind, I've asked Doug Moore to look at further optimizing vm_phys_free_contig() to avoid unnecessary checks for coalescing that can't possibly happen. Alternatively, to avoid pointless coalescing checks, I would need to compute fls(npages - i - 1) on every iteration and perform a vm_phys_split_pages() here. alc: I am somewhat surprised by the frequency at which this code is taking pages from the high-order… | |||||
foo[oind]++; | |||||
avail = 1 << oind; | |||||
need = imin(npages - i, avail); | |||||
end = i + need; | |||||
while (i < end) | |||||
ma[i++] = m++; | |||||
if (need < avail) { | |||||
vm_phys_free_contig(m, avail - need); | |||||
return (npages); | |||||
} else if (i == npages) | |||||
return (npages); | |||||
} | |||||
} | |||||
for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { | |||||
for (pind = 0; pind < VM_NFREEPOOL; pind++) { | |||||
alt = vm_phys_free_queues[domain][flind][pind]; | |||||
while ((m = TAILQ_FIRST(&alt[oind].pl)) != | |||||
NULL) { | |||||
vm_freelist_rem(alt, m, oind); | |||||
foo[oind]++; | |||||
vm_phys_set_pool(pool, m, oind); | |||||
avail = 1 << oind; | |||||
need = imin(npages - i, avail); | |||||
end = i + need; | |||||
while (i < end) | |||||
ma[i++] = m++; | |||||
if (need < avail) { | |||||
vm_phys_free_contig(m, avail - | |||||
need); | |||||
return (npages); | |||||
} else if (i == npages) | |||||
return (npages); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
return (i); | |||||
} | |||||
/* | |||||
* Allocate a contiguous, power of two-sized set of physical pages | * Allocate a contiguous, power of two-sized set of physical pages | ||||
* from the free lists. | * from the free lists. | ||||
* | * | ||||
Context not available. | |||||
return (NULL); | return (NULL); | ||||
} | } | ||||
int | |||||
vm_phys_alloc_npages(int domain, int pool, vm_page_t *mp, int cnt) | |||||
{ | |||||
vm_page_t m; | |||||
int order, freelist; | |||||
for (freelist = 0; freelist < VM_NFREELIST; freelist++) { | |||||
for (order = fls(cnt) -1; order >= 0; order--) { | |||||
m = vm_phys_alloc_freelist_pages(domain, freelist, | |||||
pool, order); | |||||
if (m != NULL) { | |||||
*mp = m; | |||||
return (1 << order); | |||||
} | |||||
} | |||||
} | |||||
*mp = NULL; | |||||
return (0); | |||||
} | |||||
/* | /* | ||||
* Allocate a contiguous, power of two-sized set of physical pages from the | * Allocate a contiguous, power of two-sized set of physical pages from the | ||||
* specified free list. The free list must be specified using one of the | * specified free list. The free list must be specified using one of the | ||||
Context not available. |
I don't believe that an explicit prefetch on TAILQ_NEXT("m") would do any good here because the first thing that TAILQ_REMOVE("m") does is touch m's next. Maybe a prefetch on TAILQ_NEXT(TAILQ_NEXT("m")) would do some good.