Index: sys/vm/vm_reserv.c =================================================================== --- sys/vm/vm_reserv.c +++ sys/vm/vm_reserv.c @@ -181,10 +181,11 @@ * * A partially populated reservation can be broken and reclaimed at any time. * - * r - vm_reserv_lock + * c - constant after boot * d - vm_reserv_domain_lock * o - vm_reserv_object_lock - * c - constant after boot + * r - vm_reserv_lock + * s - vm_reserv_domain_scan_lock */ struct vm_reserv { struct mtx lock; /* reservation lock. */ @@ -200,6 +201,8 @@ popmap_t popmap[NPOPMAP_MAX]; /* (r) bit vector, used pages */ }; +TAILQ_HEAD(vm_reserv_queue, vm_reserv); + #define vm_reserv_lockptr(rv) (&(rv)->lock) #define vm_reserv_assert_locked(rv) \ mtx_assert(vm_reserv_lockptr(rv), MA_OWNED) @@ -238,10 +241,13 @@ * is the least recently changed, partially populated reservation. * * Access to this queue is synchronized by the per-domain reservation lock. + * Threads reclaiming free pages from the queue must hold the per-domain scan + * lock. */ struct vm_reserv_domain { - struct mtx lock; - TAILQ_HEAD(, vm_reserv) partpop; + struct mtx lock; + struct vm_reserv_queue partpop; /* (d) */ + struct vm_reserv marker; /* (d, s) scan marker/lock */ } __aligned(CACHE_LINE_SIZE); static struct vm_reserv_domain vm_rvd[MAXMEMDOM]; @@ -250,6 +256,9 @@ #define vm_reserv_domain_lock(d) mtx_lock(vm_reserv_domain_lockptr(d)) #define vm_reserv_domain_unlock(d) mtx_unlock(vm_reserv_domain_lockptr(d)) +#define vm_reserv_domain_scan_lock(d) mtx_lock(&vm_rvd[(d)].marker.lock) +#define vm_reserv_domain_scan_unlock(d) mtx_unlock(&vm_rvd[(d)].marker.lock) + static SYSCTL_NODE(_vm, OID_AUTO, reserv, CTLFLAG_RD, 0, "Reservation Info"); static counter_u64_t vm_reserv_broken = EARLY_COUNTER; @@ -350,6 +359,8 @@ unused_pages = 0; vm_reserv_domain_lock(domain); TAILQ_FOREACH(rv, &vm_rvd[domain].partpop, partpopq) { + if (rv == &vm_rvd[domain].marker) + continue; counter++; unused_pages += VM_LEVEL_0_NPAGES - rv->popcnt; } @@ -1047,7 +1058,8 @@ vm_paddr_t paddr; struct vm_phys_seg *seg; struct vm_reserv *rv; - int i, segind; + struct vm_reserv_domain *rvd; + int i, j, segind; /* * Initialize the reservation array. Specifically, initialize the @@ -1066,8 +1078,18 @@ } } for (i = 0; i < MAXMEMDOM; i++) { - mtx_init(&vm_rvd[i].lock, "VM reserv domain", NULL, MTX_DEF); - TAILQ_INIT(&vm_rvd[i].partpop); + rvd = &vm_rvd[i]; + mtx_init(&rvd->lock, "vm reserv domain", NULL, MTX_DEF); + TAILQ_INIT(&rvd->partpop); + mtx_init(&rvd->marker.lock, "vm reserv marker", NULL, MTX_DEF); + + /* + * Fully populated reservations should never be present in the + * partially populated reservation queues. + */ + rvd->marker.popcnt = VM_LEVEL_0_NPAGES; + for (j = 0; j < NBPOPMAP; j++) + popmap_set(rvd->marker.popmap, j); } for (i = 0; i < VM_RESERV_OBJ_LOCK_COUNT; i++) @@ -1149,18 +1171,23 @@ vm_reserv_reclaim_inactive(int domain) { vm_reserv_t rv; + boolean_t ret; + + ret = FALSE; - while ((rv = TAILQ_FIRST(&vm_rvd[domain].partpop)) != NULL) { + vm_reserv_domain_scan_lock(domain); + while (!ret && (rv = TAILQ_FIRST(&vm_rvd[domain].partpop)) != NULL) { + /* The scan lock must be held in order to enqueue the marker. */ + KASSERT(rv != &vm_rvd[domain].marker, + ("vm_reserv_reclaim_inactive: queue contains marker")); vm_reserv_lock(rv); - if (rv != TAILQ_FIRST(&vm_rvd[domain].partpop)) { - vm_reserv_unlock(rv); - continue; - } - vm_reserv_reclaim(rv); + ret = rv == TAILQ_FIRST(&vm_rvd[domain].partpop); + if (ret) + vm_reserv_reclaim(rv); vm_reserv_unlock(rv); - return (TRUE); } - return (FALSE); + vm_reserv_domain_scan_unlock(domain); + return (ret); } /* @@ -1250,16 +1277,19 @@ vm_reserv_reclaim_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary) { + struct vm_reserv_queue *queue; vm_paddr_t pa, size; - vm_reserv_t rv, rvn; + vm_reserv_t marker, rv, rvn; if (npages > VM_LEVEL_0_NPAGES - 1) return (false); + marker = &vm_rvd[domain].marker; + queue = &vm_rvd[domain].partpop; size = npages << PAGE_SHIFT; + + vm_reserv_domain_scan_lock(domain); vm_reserv_domain_lock(domain); -again: - for (rv = TAILQ_FIRST(&vm_rvd[domain].partpop); rv != NULL; rv = rvn) { - rvn = TAILQ_NEXT(rv, partpopq); + TAILQ_FOREACH_SAFE(rv, queue, partpopq, rvn) { pa = VM_PAGE_TO_PHYS(&rv->pages[0]); if (pa + VM_LEVEL_0_SIZE - size < low) { /* This entire reservation is too low; go to next. */ @@ -1269,29 +1299,35 @@ /* This entire reservation is too high; go to next. */ continue; } + if (vm_reserv_trylock(rv) == 0) { + TAILQ_INSERT_AFTER(queue, rv, marker, partpopq); vm_reserv_domain_unlock(domain); vm_reserv_lock(rv); - if (!rv->inpartpopq) { + if (!rv->inpartpopq || + TAILQ_NEXT(rv, partpopq) != marker) { + vm_reserv_unlock(rv); vm_reserv_domain_lock(domain); - if (!rvn->inpartpopq) - goto again; + rvn = TAILQ_NEXT(marker, partpopq); + TAILQ_REMOVE(queue, marker, partpopq); continue; } - } else - vm_reserv_domain_unlock(domain); + vm_reserv_domain_lock(domain); + TAILQ_REMOVE(queue, marker, partpopq); + } + vm_reserv_domain_unlock(domain); if (vm_reserv_test_contig(rv, npages, low, high, alignment, boundary)) { vm_reserv_reclaim(rv); vm_reserv_unlock(rv); + vm_reserv_domain_scan_unlock(domain); return (true); } vm_reserv_unlock(rv); vm_reserv_domain_lock(domain); - if (rvn != NULL && !rvn->inpartpopq) - goto again; } vm_reserv_domain_unlock(domain); + vm_reserv_domain_scan_unlock(domain); return (false); }