Changeset View
Standalone View
sys/vm/vm_reserv.c
Show All 32 Lines | |||||
/* | /* | ||||
* Superpage reservation management module | * Superpage reservation management module | ||||
* | * | ||||
* Any external functions defined by this module are only to be used by the | * Any external functions defined by this module are only to be used by the | ||||
* virtual memory system. | * virtual memory system. | ||||
*/ | */ | ||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
markj: Stray newline. | |||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include "opt_vm.h" | #include "opt_vm.h" | ||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
▲ Show 20 Lines • Show All 179 Lines • ▼ Show 20 Lines | |||||
* An "active" reservation is a valid reservation structure that has a non-NULL | * An "active" reservation is a valid reservation structure that has a non-NULL | ||||
* "object" field and a non-zero "popcnt" field. In other words, every active | * "object" field and a non-zero "popcnt" field. In other words, every active | ||||
* reservation belongs to a particular object. Moreover, every active | * reservation belongs to a particular object. Moreover, every active | ||||
* reservation has an entry in the containing object's list of reservations. | * reservation has an entry in the containing object's list of reservations. | ||||
*/ | */ | ||||
static vm_reserv_t vm_reserv_array; | static vm_reserv_t vm_reserv_array; | ||||
/* | /* | ||||
* The per-domain partially populated reservation queues | * The per-domain partially populated reservation queues | ||||
Done Inline ActionsStyle: the opening brace should be on its own line. markj: Style: the opening brace should be on its own line. | |||||
Done Inline ActionsThis function is defined as static, but I don't see a caller in this file. alc: This function is defined as static, but I don't see a caller in this file. | |||||
Done Inline ActionsIt was vestigial, deleted scottph: It was vestigial, deleted | |||||
* | * | ||||
* These queues enable the fast recovery of an unused free small page from a | * These queues enable the fast recovery of an unused free small page from a | ||||
* partially populated reservation. The reservation at the head of a queue | * partially populated reservation. The reservation at the head of a queue | ||||
* is the least recently changed, partially populated reservation. | * is the least recently changed, partially populated reservation. | ||||
* | * | ||||
* Access to this queue is synchronized by the per-domain reservation lock. | * Access to this queue is synchronized by the per-domain reservation lock. | ||||
* Threads reclaiming free pages from the queue must hold the per-domain scan | * Threads reclaiming free pages from the queue must hold the per-domain scan | ||||
* lock. | * lock. | ||||
*/ | */ | ||||
struct vm_reserv_domain { | struct vm_reserv_domain { | ||||
struct mtx lock; | struct mtx lock; | ||||
Done Inline ActionsUse #%jx instead of 0x%jx kib: Use #%jx instead of 0x%jx | |||||
struct vm_reserv_queue partpop; /* (d) */ | struct vm_reserv_queue partpop; /* (d) */ | ||||
struct vm_reserv marker; /* (d, s) scan marker/lock */ | struct vm_reserv marker; /* (d, s) scan marker/lock */ | ||||
} __aligned(CACHE_LINE_SIZE); | } __aligned(CACHE_LINE_SIZE); | ||||
static struct vm_reserv_domain vm_rvd[MAXMEMDOM]; | static struct vm_reserv_domain vm_rvd[MAXMEMDOM]; | ||||
#define vm_reserv_domain_lockptr(d) (&vm_rvd[(d)].lock) | #define vm_reserv_domain_lockptr(d) (&vm_rvd[(d)].lock) | ||||
#define vm_reserv_domain_assert_locked(d) \ | #define vm_reserv_domain_assert_locked(d) \ | ||||
▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines | sysctl_vm_reserv_fullpop(SYSCTL_HANDLER_ARGS) | ||||
struct vm_phys_seg *seg; | struct vm_phys_seg *seg; | ||||
vm_reserv_t rv; | vm_reserv_t rv; | ||||
int fullpop, segind; | int fullpop, segind; | ||||
fullpop = 0; | fullpop = 0; | ||||
for (segind = 0; segind < vm_phys_nsegs; segind++) { | for (segind = 0; segind < vm_phys_nsegs; segind++) { | ||||
seg = &vm_phys_segs[segind]; | seg = &vm_phys_segs[segind]; | ||||
paddr = roundup2(seg->start, VM_LEVEL_0_SIZE); | paddr = roundup2(seg->start, VM_LEVEL_0_SIZE); | ||||
#ifdef VM_PHYSSEG_SPARSE | |||||
rv = seg->first_reserv + (paddr >> VM_LEVEL_0_SHIFT) - | |||||
(seg->start >> VM_LEVEL_0_SHIFT); | |||||
#else | |||||
rv = &vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT]; | |||||
#endif | |||||
while (paddr + VM_LEVEL_0_SIZE > paddr && paddr + | while (paddr + VM_LEVEL_0_SIZE > paddr && paddr + | ||||
VM_LEVEL_0_SIZE <= seg->end) { | VM_LEVEL_0_SIZE <= seg->end) { | ||||
rv = &vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT]; | |||||
fullpop += rv->popcnt == VM_LEVEL_0_NPAGES; | fullpop += rv->popcnt == VM_LEVEL_0_NPAGES; | ||||
paddr += VM_LEVEL_0_SIZE; | paddr += VM_LEVEL_0_SIZE; | ||||
rv++; | |||||
} | } | ||||
} | } | ||||
return (sysctl_handle_int(oidp, &fullpop, 0, req)); | return (sysctl_handle_int(oidp, &fullpop, 0, req)); | ||||
} | } | ||||
/* | /* | ||||
* Describes the current state of the partially populated reservation queue. | * Describes the current state of the partially populated reservation queue. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 142 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
/* | /* | ||||
* Returns the reservation to which the given page might belong. | * Returns the reservation to which the given page might belong. | ||||
*/ | */ | ||||
static __inline vm_reserv_t | static __inline vm_reserv_t | ||||
vm_reserv_from_page(vm_page_t m) | vm_reserv_from_page(vm_page_t m) | ||||
{ | { | ||||
#ifdef VM_PHYSSEG_SPARSE | |||||
struct vm_phys_seg *seg; | |||||
seg = &vm_phys_segs[m->segind]; | |||||
return (seg->first_reserv + (m->phys_addr >> VM_LEVEL_0_SHIFT) - | |||||
alcUnsubmitted Done Inline ActionsVM_PAGE_TO_PHYS(m) instead of m->phys_addr alc: `VM_PAGE_TO_PHYS(m)` instead of `m->phys_addr` | |||||
(seg->start >> VM_LEVEL_0_SHIFT)); | |||||
#else | |||||
return (&vm_reserv_array[VM_PAGE_TO_PHYS(m) >> VM_LEVEL_0_SHIFT]); | return (&vm_reserv_array[VM_PAGE_TO_PHYS(m) >> VM_LEVEL_0_SHIFT]); | ||||
#endif | |||||
Done Inline ActionsSo here we are going to loop over the phys_seg array, but the vm_page already contains the segment index for the segment to which it belongs. markj: So here we are going to loop over the phys_seg array, but the vm_page already contains the… | |||||
Done Inline ActionsEven though the iteration has been eliminated, I'd rather not see VM_PHYSSEG_DENSE architectures pessimized by the extra complexity. Just like the vm_page_array, which method we use should be selected at compile time by VM_PHYSSEG_DENSE versus VM_PHYSSEG_SPARSE. alc: Even though the iteration has been eliminated, I'd rather not see VM_PHYSSEG_DENSE… | |||||
Done Inline ActionsI've predicated the sparse paths on VM_PHYSSEG_SPARSE scottph: I've predicated the sparse paths on VM_PHYSSEG_SPARSE | |||||
} | } | ||||
/* | /* | ||||
* Returns an existing reservation or NULL and initialized successor pointer. | * Returns an existing reservation or NULL and initialized successor pointer. | ||||
*/ | */ | ||||
static vm_reserv_t | static vm_reserv_t | ||||
vm_reserv_from_object(vm_object_t object, vm_pindex_t pindex, | vm_reserv_from_object(vm_object_t object, vm_pindex_t pindex, | ||||
vm_page_t mpred, vm_page_t *msuccp) | vm_page_t mpred, vm_page_t *msuccp) | ||||
▲ Show 20 Lines • Show All 539 Lines • ▼ Show 20 Lines | |||||
* Requires that vm_page_array and first_page are initialized! | * Requires that vm_page_array and first_page are initialized! | ||||
*/ | */ | ||||
void | void | ||||
vm_reserv_init(void) | vm_reserv_init(void) | ||||
{ | { | ||||
vm_paddr_t paddr; | vm_paddr_t paddr; | ||||
struct vm_phys_seg *seg; | struct vm_phys_seg *seg; | ||||
struct vm_reserv *rv; | struct vm_reserv *rv; | ||||
#ifdef VM_PHYSSEG_SPARSE | |||||
struct vm_reserv *first; | |||||
#endif | |||||
struct vm_reserv_domain *rvd; | struct vm_reserv_domain *rvd; | ||||
int i, j, segind; | int i, j, segind; | ||||
/* | /* | ||||
* Initialize the reservation array. Specifically, initialize the | * Initialize the reservation array. Specifically, initialize the | ||||
* "pages" field for every element that has an underlying superpage. | * "pages" field for every element that has an underlying superpage. | ||||
*/ | */ | ||||
#ifdef VM_PHYSSEG_SPARSE | |||||
first = vm_reserv_array; | |||||
#endif | |||||
for (segind = 0; segind < vm_phys_nsegs; segind++) { | for (segind = 0; segind < vm_phys_nsegs; segind++) { | ||||
seg = &vm_phys_segs[segind]; | seg = &vm_phys_segs[segind]; | ||||
paddr = roundup2(seg->start, VM_LEVEL_0_SIZE); | paddr = roundup2(seg->start, VM_LEVEL_0_SIZE); | ||||
#ifdef VM_PHYSSEG_SPARSE | |||||
seg->first_reserv = first; | |||||
rv = first + (paddr >> VM_LEVEL_0_SHIFT) - | |||||
(seg->start >> VM_LEVEL_0_SHIFT); | |||||
#else | |||||
alcUnsubmitted Done Inline ActionsI would suggest initializing seg->first_reserv under VM_PHYSSEG_DENSE even though we don't currently use it. alc: I would suggest initializing seg->first_reserv under VM_PHYSSEG_DENSE even though we don't… | |||||
rv = &vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT]; | |||||
#endif | |||||
while (paddr + VM_LEVEL_0_SIZE > paddr && paddr + | while (paddr + VM_LEVEL_0_SIZE > paddr && paddr + | ||||
VM_LEVEL_0_SIZE <= seg->end) { | VM_LEVEL_0_SIZE <= seg->end) { | ||||
rv = &vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT]; | |||||
rv->pages = PHYS_TO_VM_PAGE(paddr); | rv->pages = PHYS_TO_VM_PAGE(paddr); | ||||
rv->domain = seg->domain; | rv->domain = seg->domain; | ||||
mtx_init(&rv->lock, "vm reserv", NULL, MTX_DEF); | mtx_init(&rv->lock, "vm reserv", NULL, MTX_DEF); | ||||
paddr += VM_LEVEL_0_SIZE; | paddr += VM_LEVEL_0_SIZE; | ||||
rv++; | |||||
} | } | ||||
#ifdef VM_PHYSSEG_SPARSE | |||||
first += howmany(seg->end, VM_LEVEL_0_SIZE) - | |||||
seg->start / VM_LEVEL_0_SIZE; | |||||
#endif | |||||
} | } | ||||
for (i = 0; i < MAXMEMDOM; i++) { | for (i = 0; i < MAXMEMDOM; i++) { | ||||
rvd = &vm_rvd[i]; | rvd = &vm_rvd[i]; | ||||
mtx_init(&rvd->lock, "vm reserv domain", NULL, MTX_DEF); | mtx_init(&rvd->lock, "vm reserv domain", NULL, MTX_DEF); | ||||
TAILQ_INIT(&rvd->partpop); | TAILQ_INIT(&rvd->partpop); | ||||
mtx_init(&rvd->marker.lock, "vm reserv marker", NULL, MTX_DEF); | mtx_init(&rvd->marker.lock, "vm reserv marker", NULL, MTX_DEF); | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 313 Lines • ▼ Show 20 Lines | |||||
/* | /* | ||||
* Allocates the virtual and physical memory required by the reservation | * Allocates the virtual and physical memory required by the reservation | ||||
* management system's data structures, in particular, the reservation array. | * management system's data structures, in particular, the reservation array. | ||||
*/ | */ | ||||
vm_paddr_t | vm_paddr_t | ||||
vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end) | vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end) | ||||
{ | { | ||||
vm_paddr_t new_end, high_water; | vm_paddr_t new_end; | ||||
vm_pindex_t count; | |||||
Not Done Inline ActionsSimilarly to pagecount in vm_page_startup(), this should just be u_long. (A VM object can be larger than the physical memory size of some machines. On those machines vm_pindex_t is necessary to represent either a page's position within the VM object or the size of the VM object. Here, however, we are dealing with physical memory, and u_long is sufficient to represent the number of physical pages on any given architecture. (That said, in a lot of places we are still using u_int.)) alc: Similarly to pagecount in vm_page_startup(), this should just be `u_long`.
(A VM object can be… | |||||
Not Done Inline Actions
In fact not, recent SDM stopped limiting bits for phys addresses in PAE page tables in 32bit mode. Currently Intel limits it to 40 bit of page index, which is larger then u_long can hold. kib: > Here, however, we are dealing with physical memory, and u_long is sufficient to represent… | |||||
Not Done Inline ActionsInteresting, I hadn't seen that yet. alc: Interesting, I hadn't seen that yet. | |||||
size_t size; | size_t size; | ||||
int i; | int i; | ||||
Done Inline Actionsint again kib: int again | |||||
high_water = phys_avail[1]; | count = 0; | ||||
for (i = 0; i < vm_phys_nsegs; i++) { | for (i = 0; i < vm_phys_nsegs; i++) { | ||||
if (vm_phys_segs[i].end > high_water) | #ifdef VM_PHYSSEG_SPARSE | ||||
high_water = vm_phys_segs[i].end; | count += howmany(vm_phys_segs[i].end, VM_LEVEL_0_SIZE) - | ||||
vm_phys_segs[i].start / VM_LEVEL_0_SIZE; | |||||
#else | |||||
count = MAX(count, | |||||
howmany(vm_phys_segs[i].end, VM_LEVEL_0_SIZE)); | |||||
#endif | |||||
} | } | ||||
/* Skip the first chunk. It is already accounted for. */ | for (i = 0; phys_avail[i + 1] != 0; i += 2) { | ||||
for (i = 2; phys_avail[i + 1] != 0; i += 2) { | #ifdef VM_PHYSSEG_SPARSE | ||||
if (phys_avail[i + 1] > high_water) | count += howmany(phys_avail[i + 1], VM_LEVEL_0_SIZE) - | ||||
high_water = phys_avail[i + 1]; | phys_avail[i] / VM_LEVEL_0_SIZE; | ||||
#else | |||||
count = MAX(count, | |||||
howmany(phys_avail[i + 1], VM_LEVEL_0_SIZE)); | |||||
#endif | |||||
} | } | ||||
Not Done Inline ActionsThis looks incorrect since vm_phys_segs[] and phys_avail[] are basically the same set. phys_avail[] is populated earlier during boot and used to initialize vm_phys_segs[]. Looking at vm_page_startup(), I can kind of see why it was written this way: the vm_phys_segs[] array is not fully populated at this point. I think we should just be looping over phys_avail[] here. markj: This looks incorrect since vm_phys_segs[] and phys_avail[] are basically the same set. | |||||
Done Inline ActionsIt looks like sys/powerpc/aim/mmu_radix.c and sys/arm/arm/pmap-v6.c can vm_phys_add_seg() an entry into vm_phys_segs before it gets properly populated, if I understand correctly. scottph: It looks like sys/powerpc/aim/mmu_radix.c and sys/arm/arm/pmap-v6.c can vm_phys_add_seg() an… | |||||
Not Done Inline ActionsThe worst case scenario here is that we allocate twice as many vm_reserv structures as we need under VM_PHYSSEG_SPARSE. alc: The worst case scenario here is that we allocate twice as many vm_reserv structures as we need… | |||||
Done Inline ActionsLooking more into vm_page.c, specifically lines 650-670 where size is computed with phys_avail and vm_phys_segs, and then the loop on lines 758-793 where (after vm_phys_segs has been fully populated) only memory that came from phys_avail is added to free lists, it appears that the purpose of machdep code putting memory directly in to vm_phys_segs is that it wants it to be represented by a vm_page, but not added to a free list. I believe the intention is that phys_avail and vm_phys_segs are disjoint at this point in initialization, but there don't seem to be any asserts to ensure that this is the case. scottph: Looking more into vm_page.c, specifically lines 650-670 where size is computed with phys_avail… | |||||
Not Done Inline ActionsAfter re-reading more carefully I think you're right. markj: After re-reading more carefully I think you're right. | |||||
Not Done Inline ActionsThere are a few situations where we need to have a struct vm_page corresponding to a physical page allocated early in the boot process even though the page will never be added to the free lists, for example, kernel page table pages underlying kernel mappings that might be promoted to a superpage. This is why pmap-v6.c is calling vm_phys_add_seg(). alc: There are a few situations where we need to have a struct vm_page corresponding to a physical… | |||||
/* | /* | ||||
* Calculate the size (in bytes) of the reservation array. Round up | * Calculate the size (in bytes) of the reservation array. Rounding up | ||||
* from "high_water" because every small page is mapped to an element | * for partial superpages at boundaries, as every small page is mapped | ||||
* in the reservation array based on its physical address. Thus, the | * to an element in the reservation array based on its physical address. | ||||
* number of elements in the reservation array can be greater than the | * Thus, the number of elements in the reservation array can be greater | ||||
* number of superpages. | * than the number of superpages. | ||||
*/ | */ | ||||
size = howmany(high_water, VM_LEVEL_0_SIZE) * sizeof(struct vm_reserv); | size = count * sizeof(struct vm_reserv); | ||||
/* | /* | ||||
* Allocate and map the physical memory for the reservation array. The | * Allocate and map the physical memory for the reservation array. The | ||||
* next available virtual address is returned by reference. | * next available virtual address is returned by reference. | ||||
*/ | */ | ||||
new_end = end - round_page(size); | new_end = end - round_page(size); | ||||
vm_reserv_array = (void *)(uintptr_t)pmap_map(vaddr, new_end, end, | vm_reserv_array = (void *)(uintptr_t)pmap_map(vaddr, new_end, end, | ||||
VM_PROT_READ | VM_PROT_WRITE); | VM_PROT_READ | VM_PROT_WRITE); | ||||
Show All 27 Lines |
Stray newline.