Changeset View
Standalone View
sys/amd64/amd64/pmap.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 108 Lines • ▼ Show 20 Lines | |||||
#include "opt_ddb.h" | #include "opt_ddb.h" | ||||
#include "opt_pmap.h" | #include "opt_pmap.h" | ||||
#include "opt_vm.h" | #include "opt_vm.h" | ||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/bitstring.h> | #include <sys/bitstring.h> | ||||
#include <sys/bus.h> | #include <sys/bus.h> | ||||
#include <sys/types.h> | |||||
kib: This is not needed. sys/param.h already includes sys/types.h. Was there a reason like a… | |||||
jahAuthorUnsubmitted Done Inline Actions
Not a compile error, just a result of reading counter(9). I realized we were coincidentally getting counter.h from *somewhere* else, and I prefer to explicitly include dependency headers to avoid problems later. I'm fine removing sys/types.h if we can guarantee it'll always be included, but I'd prefer to keep the explicit include of sys/counter.h jah: > This is not needed. sys/param.h already includes sys/types.h. Was there a reason like a… | |||||
kibUnsubmitted Not Done Inline Actionssys/param.h is guaranteed to include sys/types.h, we should not include sys/types.h then. I am fine with explicitly adding sys/counter.h. kib: sys/param.h is guaranteed to include sys/types.h, we should not include sys/types.h then.
I am… | |||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/counter.h> | |||||
kibUnsubmitted Not Done Inline ActionsWhy did you decided to add the include now? kib: Why did you decided to add the include now? | |||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#include <sys/ktr.h> | #include <sys/ktr.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/mman.h> | #include <sys/mman.h> | ||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/rangeset.h> | #include <sys/rangeset.h> | ||||
▲ Show 20 Lines • Show All 406 Lines • ▼ Show 20 Lines | |||||
static void *pkru_dup_range(void *ctx, void *data); | static void *pkru_dup_range(void *ctx, void *data); | ||||
static void pkru_free_range(void *ctx, void *node); | static void pkru_free_range(void *ctx, void *node); | ||||
static int pmap_pkru_copy(pmap_t dst_pmap, pmap_t src_pmap); | static int pmap_pkru_copy(pmap_t dst_pmap, pmap_t src_pmap); | ||||
static int pmap_pkru_deassign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva); | static int pmap_pkru_deassign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva); | ||||
static void pmap_pkru_deassign_all(pmap_t pmap); | static void pmap_pkru_deassign_all(pmap_t pmap); | ||||
static COUNTER_U64_DEFINE_EARLY(pcid_save_cnt); | static COUNTER_U64_DEFINE_EARLY(pcid_save_cnt); | ||||
SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLFLAG_RD, | SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLFLAG_RD, | ||||
&pcid_save_cnt, "Count of saved TLB context on switch"); | &pcid_save_cnt, "Count of saved TLB context on switch"); | ||||
Not Done Inline ActionsThis removal should be split out and committed separately. kib: This removal should be split out and committed separately. | |||||
Not Done Inline ActionsOops, I'd meant not to include this change in the review, but I accidentally added the commit to the new branch. jah: Oops, I'd meant not to include this change in the review, but I accidentally added the commit… | |||||
static LIST_HEAD(, pmap_invl_gen) pmap_invl_gen_tracker = | static LIST_HEAD(, pmap_invl_gen) pmap_invl_gen_tracker = | ||||
LIST_HEAD_INITIALIZER(&pmap_invl_gen_tracker); | LIST_HEAD_INITIALIZER(&pmap_invl_gen_tracker); | ||||
static struct mtx invl_gen_mtx; | static struct mtx invl_gen_mtx; | ||||
/* Fake lock object to satisfy turnstiles interface. */ | /* Fake lock object to satisfy turnstiles interface. */ | ||||
static struct lock_object invl_gen_ts = { | static struct lock_object invl_gen_ts = { | ||||
.lo_name = "invlts", | .lo_name = "invlts", | ||||
}; | }; | ||||
▲ Show 20 Lines • Show All 207 Lines • ▼ Show 20 Lines | __asm volatile("lock;cmpxchg16b\t%1" | ||||
: "memory", "cc"); | : "memory", "cc"); | ||||
return (res); | return (res); | ||||
} | } | ||||
static COUNTER_U64_DEFINE_EARLY(pv_page_count); | static COUNTER_U64_DEFINE_EARLY(pv_page_count); | ||||
SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, pv_page_count, CTLFLAG_RD, | SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, pv_page_count, CTLFLAG_RD, | ||||
&pv_page_count, "Current number of allocated pv pages"); | &pv_page_count, "Current number of allocated pv pages"); | ||||
static COUNTER_U64_DEFINE_EARLY(pt_page_count); | static COUNTER_U64_DEFINE_EARLY(user_pt_page_count); | ||||
SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, pt_page_count, CTLFLAG_RD, | SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, user_pt_page_count, CTLFLAG_RD, | ||||
&pt_page_count, "Current number of allocated page table pages"); | &user_pt_page_count, | ||||
"Current number of allocated page table pages for userspace"); | |||||
static COUNTER_U64_DEFINE_EARLY(kernel_pt_page_count); | |||||
SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, kernel_pt_page_count, CTLFLAG_RD, | |||||
&kernel_pt_page_count, | |||||
"Current number of allocated page table pages for the kernel"); | |||||
#ifdef PV_STATS | #ifdef PV_STATS | ||||
static COUNTER_U64_DEFINE_EARLY(invl_start_restart); | static COUNTER_U64_DEFINE_EARLY(invl_start_restart); | ||||
SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, invl_start_restart, | SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, invl_start_restart, | ||||
CTLFLAG_RD, &invl_start_restart, | CTLFLAG_RD, &invl_start_restart, | ||||
"Number of delayed TLB invalidation request restarts"); | "Number of delayed TLB invalidation request restarts"); | ||||
static COUNTER_U64_DEFINE_EARLY(invl_finish_restart); | static COUNTER_U64_DEFINE_EARLY(invl_finish_restart); | ||||
▲ Show 20 Lines • Show All 509 Lines • ▼ Show 20 Lines | static vm_page_t pmap_allocpte_nosleep(pmap_t pmap, vm_pindex_t ptepindex, | ||||
struct rwlock **lockp, vm_offset_t va); | struct rwlock **lockp, vm_offset_t va); | ||||
static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, | static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, | ||||
struct rwlock **lockp); | struct rwlock **lockp); | ||||
static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, | static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, | ||||
struct spglist *free); | struct spglist *free); | ||||
static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); | static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); | ||||
static vm_page_t pmap_alloc_pt_page(pmap_t, vm_pindex_t, int); | |||||
static void pmap_free_pt_page(pmap_t, vm_page_t, bool); | |||||
/********************/ | /********************/ | ||||
/* Inline functions */ | /* Inline functions */ | ||||
/********************/ | /********************/ | ||||
/* | /* | ||||
* Return a non-clipped indexes for a given VA, which are page table | * Return a non-clipped indexes for a given VA, which are page table | ||||
* pages indexes at the corresponding level. | * pages indexes at the corresponding level. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 327 Lines • ▼ Show 20 Lines | if (ndmpdpphys > NDMPML4E) { | ||||
* Each NDMPML4E allows 512 GB, so limit to that, | * Each NDMPML4E allows 512 GB, so limit to that, | ||||
* and then readjust ndmpdp and ndmpdpphys. | * and then readjust ndmpdp and ndmpdpphys. | ||||
*/ | */ | ||||
printf("NDMPML4E limits system to %d GB\n", NDMPML4E * 512); | printf("NDMPML4E limits system to %d GB\n", NDMPML4E * 512); | ||||
Maxmem = atop(NDMPML4E * NBPML4); | Maxmem = atop(NDMPML4E * NBPML4); | ||||
ndmpdpphys = NDMPML4E; | ndmpdpphys = NDMPML4E; | ||||
ndmpdp = NDMPML4E * NPDEPG; | ndmpdp = NDMPML4E * NPDEPG; | ||||
} | } | ||||
DMPDPphys = allocpages(firstaddr, ndmpdpphys); | DMPDPphys = allocpages(firstaddr, ndmpdpphys); | ||||
kibUnsubmitted Not Done Inline ActionsIn principle, all early-allocated kernel page table pages should be accounted in the kernel pmap pages counter. kib: In principle, all early-allocated kernel page table pages should be accounted in the kernel… | |||||
jahAuthorUnsubmitted Done Inline ActionsHow important do you think it is to account for these? I've avoided tracking these early-boot allocations since they're fixed and unlikely to be useful in tracking runtime changes in kernel PT page footprint. Also, direct updates to kernel_pt_page_count at this point will be lost since pmap_bootstrap runs before SI_SUB_COUNTER. If needed, we can save off the count and add it to kernel_pt_page_count later in boot. jah: How important do you think it is to account for these? I've avoided tracking these early-boot… | |||||
kibUnsubmitted Not Done Inline ActionsNot too much important. I thought that switching from bootstrap to proper counter preserves the accumulated value, but apparently I am wrong. Not sure if it worth fixing. kib: Not too much important.
I thought that switching from bootstrap to proper counter preserves… | |||||
jahAuthorUnsubmitted Done Inline ActionsI wondered the same thing about fixing counter(9). It would be easy enough to make COUNTER_U64_DEFINE_EARLY() declare a regular uint64_t for early use and then copy that value to the per-CPU location during the sysinit. But callers would still either need to know to use the regular variable by name during early boot, or would need a COUNTER_U64_ADD_EARLY() wrapper to check the sysinit phase and use either the early variable or per-CPU variable as appropriate (making the regular counter_u64_add() handle this automatically would just penalize the 99+% case where the counters are used after SI_SUB_COUNTER). This didn't seem worth doing for the likely very few use cases. jah: I wondered the same thing about fixing counter(9). It would be easy enough to make… | |||||
kibUnsubmitted Not Done Inline ActionsRight now counter is single uintptr_t value. Assume we change it to two uintptr_t, first is the pointer, second is the early value accumulator. For early initialization, first word should be set to &(second word) - &__pcpu[0]. After actual counter initialization, we move the second word into counter[0]. Then one word per counter would be left unused. Might be, we could find some other use for it. kib: Right now counter is single uintptr_t value. Assume we change it to two uintptr_t, first is… | |||||
mjgUnsubmitted Not Done Inline ActionsAll counters used here are known to be there at compilation time and present for the duration of the kernel being up. A counter-like interface utilizing this fact would avoid the entire problem with supporting 'early' states at least in pmap and probably for all other users. An additional win would be cheaper increments for not having to load a pointer with the offset. mjg: All counters used here are known to be there at compilation time and present for the duration… | |||||
jahAuthorUnsubmitted Done Inline ActionsThat seems like a better approach, since all the other options involve some form of waste in the common case. What would this look like? Would it be something like a 'PCPU' ELF segment that gets replicated at boot for each CPU (so assuming BSP is always cpu 0, the runtime pointer would be "<static_addr> + <cpu_num> * <segment_size>)? That would also get these static fields out of the UMA per-CPU zone, which if I'm reading correctly is limited to a page for each CPU... jah: That seems like a better approach, since all the other options involve some form of waste in… | |||||
kibUnsubmitted Not Done Inline ActionsCounter's elements are per-cpu, and having a simple scheme where VA for curcpu is trivially calculated from VA for cpu 0, requires a lot of setup at runtime. This is why this EARLY_COUNTER business appeared at all. Static configuration of known counters cannot be done before we at least enumerate CPUs we would have, before kernel pmap is fully set up, and perhaps before all the initial allocations (that abuse the BIOS/EFI memory maps, instead of being able to use normal allocators) are done. So I really not sure what is being proposed there. kib: Counter's elements are per-cpu, and having a simple scheme where VA for curcpu is trivially… | |||||
mjgUnsubmitted Not Done Inline ActionsMy proposal would put them in struct pcpu, just like pc_pm_save_cnt. Making this into something resembling a facility instead of open-coded support may require some effort, unless a zero-tech solution is implemented. The most trivial take I have on this is to create a dedicated defining struct static_counter (or similar) header where all the counters get explicitly specified, then said struct would be a part of struct pcpu. mjg: My proposal would put them in struct pcpu, just like pc_pm_save_cnt. Making this into something… | |||||
jahAuthorUnsubmitted Done Inline ActionsI'm still not convinced we need to do anything to make early counters work. SI_SUB_COUNTER is pretty early in boot. Are there really that many things that are likely to have counter values they really care about tracking that early? Code that runs that early in boot tends to be specialized anyway, so surely it could just stash the count into a regular variable and update the counter later. I realize early counters would be a secondary benefit to the main idea of reworking static counters to make their access more efficient, but I'm not sure we need to worry about the early counter case at all. jah: I'm still not convinced we need to do anything to make early counters work. SI_SUB_COUNTER is… | |||||
ndm1g = 0; | ndm1g = 0; | ||||
if ((amd_feature & AMDID_PAGE1GB) != 0) { | if ((amd_feature & AMDID_PAGE1GB) != 0) { | ||||
/* | /* | ||||
* Calculate the number of 1G pages that will fully fit in | * Calculate the number of 1G pages that will fully fit in | ||||
* Maxmem. | * Maxmem. | ||||
*/ | */ | ||||
ndm1g = ptoa(Maxmem) >> PDPSHIFT; | ndm1g = ptoa(Maxmem) >> PDPSHIFT; | ||||
▲ Show 20 Lines • Show All 381 Lines • ▼ Show 20 Lines | pmap_bootstrap_la57(void *arg __unused) | ||||
r_gdt.rd_base = (long)__pcpu[0].pc_gdt; | r_gdt.rd_base = (long)__pcpu[0].pc_gdt; | ||||
m_code = vm_page_alloc_contig(NULL, 0, | m_code = vm_page_alloc_contig(NULL, 0, | ||||
VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_ZERO | VM_ALLOC_NOOBJ, | VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_ZERO | VM_ALLOC_NOOBJ, | ||||
1, 0, (1ULL << 32), PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); | 1, 0, (1ULL << 32), PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); | ||||
if ((m_code->flags & PG_ZERO) == 0) | if ((m_code->flags & PG_ZERO) == 0) | ||||
pmap_zero_page(m_code); | pmap_zero_page(m_code); | ||||
v_code = (char *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_code)); | v_code = (char *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_code)); | ||||
m_pml5 = vm_page_alloc_contig(NULL, 0, | m_pml5 = vm_page_alloc_contig(NULL, 0, | ||||
kibUnsubmitted Not Done Inline ActionsLA57 root pages should be also accounted for kernel_pmap. kib: LA57 root pages should be also accounted for kernel_pmap. | |||||
VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_ZERO | VM_ALLOC_NOOBJ, | VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_ZERO | VM_ALLOC_NOOBJ, | ||||
1, 0, (1ULL << 32), PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); | 1, 0, (1ULL << 32), PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); | ||||
if ((m_pml5->flags & PG_ZERO) == 0) | if ((m_pml5->flags & PG_ZERO) == 0) | ||||
pmap_zero_page(m_pml5); | pmap_zero_page(m_pml5); | ||||
KPML5phys = VM_PAGE_TO_PHYS(m_pml5); | KPML5phys = VM_PAGE_TO_PHYS(m_pml5); | ||||
v_pml5 = (pml5_entry_t *)PHYS_TO_DMAP(KPML5phys); | v_pml5 = (pml5_entry_t *)PHYS_TO_DMAP(KPML5phys); | ||||
m_pml4 = vm_page_alloc_contig(NULL, 0, | m_pml4 = vm_page_alloc_contig(NULL, 0, | ||||
VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_ZERO | VM_ALLOC_NOOBJ, | VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_ZERO | VM_ALLOC_NOOBJ, | ||||
Show All 13 Lines | pmap_bootstrap_la57(void *arg __unused) | ||||
if ((m_pd->flags & PG_ZERO) == 0) | if ((m_pd->flags & PG_ZERO) == 0) | ||||
pmap_zero_page(m_pd); | pmap_zero_page(m_pd); | ||||
v_pd = (pdp_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pd)); | v_pd = (pdp_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pd)); | ||||
m_pt = vm_page_alloc_contig(NULL, 0, | m_pt = vm_page_alloc_contig(NULL, 0, | ||||
VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_ZERO | VM_ALLOC_NOOBJ, | VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_ZERO | VM_ALLOC_NOOBJ, | ||||
1, 0, (1ULL << 32), PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); | 1, 0, (1ULL << 32), PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); | ||||
if ((m_pt->flags & PG_ZERO) == 0) | if ((m_pt->flags & PG_ZERO) == 0) | ||||
pmap_zero_page(m_pt); | pmap_zero_page(m_pt); | ||||
v_pt = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pt)); | v_pt = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pt)); | ||||
Not Done Inline ActionsIt should be 1, not 5. Everything except the top level page is deallocated later in this kib: It should be 1, not 5. Everything except the top level page is deallocated later in this | |||||
Done Inline ActionsThanks for catching this, will fix. jah: Thanks for catching this, will fix. | |||||
/* | /* | ||||
* Map m_code 1:1, it appears below 4G in KVA due to physical | * Map m_code 1:1, it appears below 4G in KVA due to physical | ||||
* address being below 4G. Since kernel KVA is in upper half, | * address being below 4G. Since kernel KVA is in upper half, | ||||
* the pml4e should be zero and free for temporary use. | * the pml4e should be zero and free for temporary use. | ||||
*/ | */ | ||||
kernel_pmap->pm_pmltop[pmap_pml4e_index(VM_PAGE_TO_PHYS(m_code))] = | kernel_pmap->pm_pmltop[pmap_pml4e_index(VM_PAGE_TO_PHYS(m_code))] = | ||||
VM_PAGE_TO_PHYS(m_pdp) | X86_PG_V | X86_PG_RW | X86_PG_A | | VM_PAGE_TO_PHYS(m_pdp) | X86_PG_V | X86_PG_RW | X86_PG_A | | ||||
▲ Show 20 Lines • Show All 1,940 Lines • ▼ Show 20 Lines | if (m->pindex < NUPDE) { | ||||
pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & PG_FRAME); | pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & PG_FRAME); | ||||
pmap_unwire_ptp(pmap, va, pdppg, free); | pmap_unwire_ptp(pmap, va, pdppg, free); | ||||
} else if (m->pindex < NUPDE + NUPDPE + NUPML4E && pmap_is_la57(pmap)) { | } else if (m->pindex < NUPDE + NUPDPE + NUPML4E && pmap_is_la57(pmap)) { | ||||
/* We just released a PDP, unhold the matching PML4 */ | /* We just released a PDP, unhold the matching PML4 */ | ||||
pml4pg = PHYS_TO_VM_PAGE(*pmap_pml5e(pmap, va) & PG_FRAME); | pml4pg = PHYS_TO_VM_PAGE(*pmap_pml5e(pmap, va) & PG_FRAME); | ||||
pmap_unwire_ptp(pmap, va, pml4pg, free); | pmap_unwire_ptp(pmap, va, pml4pg, free); | ||||
} | } | ||||
counter_u64_add(pt_page_count, -1); | if (pmap == kernel_pmap) | ||||
counter_u64_add(kernel_pt_page_count, -1); | |||||
else | |||||
counter_u64_add(user_pt_page_count, -1); | |||||
/* | /* | ||||
* Put page on a list so that it is released after | * Put page on a list so that it is released after | ||||
* *ALL* TLB shootdown is done | * *ALL* TLB shootdown is done | ||||
*/ | */ | ||||
pmap_add_delayed_free_list(m, free, TRUE); | pmap_add_delayed_free_list(m, free, TRUE); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 148 Lines • ▼ Show 20 Lines | pmap_pinit_pml5_pti(vm_page_t pml5pgu) | ||||
* into level 5 table. | * into level 5 table. | ||||
*/ | */ | ||||
pm_pml5u[pmap_pml5e_index(UPT_MAX_ADDRESS)] = | pm_pml5u[pmap_pml5e_index(UPT_MAX_ADDRESS)] = | ||||
pmap_kextract((vm_offset_t)pti_pml4) | | pmap_kextract((vm_offset_t)pti_pml4) | | ||||
X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M | pg_g | | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M | pg_g | | ||||
pmap_cache_bits(kernel_pmap, VM_MEMATTR_DEFAULT, FALSE); | pmap_cache_bits(kernel_pmap, VM_MEMATTR_DEFAULT, FALSE); | ||||
} | } | ||||
/* Allocate a page table page and do related bookkeeping */ | |||||
static vm_page_t | |||||
pmap_alloc_pt_page(pmap_t pmap, vm_pindex_t pindex, int flags) | |||||
{ | |||||
vm_page_t m; | |||||
m = vm_page_alloc(NULL, pindex, flags | VM_ALLOC_NOOBJ); | |||||
if (__predict_false(m == NULL)) | |||||
return (NULL); | |||||
if (pmap == kernel_pmap) | |||||
Done Inline ActionsI haven't rolled resident count management into these functions, for a couple of reasons:
jah: I haven't rolled resident count management into these functions, for a couple of reasons:
1) We… | |||||
Not Done Inline ActionsFor #1, nothing prevents us from starting accounting the root page. For #2, you can try to switch to atomics and stop require pmap lock. kib: For #1, nothing prevents us from starting accounting the root page.
You would need to change… | |||||
Not Done Inline ActionsWe can definitely do those things, but are they worth doing for the small amount of cleanup? The atomics especially would just be unnecessary overhead right now, since we already hold the lock (but they would be necessary if we started accounting the root table, unless pmap_pinit()/pmap_release() started holding the lock). jah: We can definitely do those things, but are they worth doing for the small amount of cleanup? | |||||
Done Inline ActionsReally, neither atomics nor the lock would be necessary for pmap_pinit()/pmap_release() since nothing else should be using the pmap during those calls, but it would mean that pmap_alloc_pt_page()/pmap_free_pt_page() would need to do something different for resident count during pmap_pinit()/pmap_release(). jah: Really, neither atomics nor the lock would be necessary for pmap_pinit()/pmap_release() since… | |||||
Not Done Inline ActionsSo pmap_pinit() is the only places which change resident_count unlocked? kib: So pmap_pinit() is the only places which change resident_count unlocked?
Why not initialize… | |||||
Done Inline ActionsWe could do that, or we could keep doing what we're already doing and just not charge the top-level page(s) to resident_count. That would be simpler, since the top-level page count is either 1 or 2 depending on PTI, which would complicate the check in pmap_remove() and pmap_release(). In either case, we'd need a flag to pmap_alloc_pt_page()/pmap_free_pt_page() to tell them not to update resident_count when called from pmap_pinit()/pmap_release(). jah: We could do that, or we could keep doing what we're already doing and just not charge the top… | |||||
Not Done Inline ActionsYou can pass NULL pmap for the case where the counting is not needed. Or reuse some VM_ALLOC flag that is never passed to pmap_alloc_pt_page() otherwise. kib: You can pass NULL pmap for the case where the counting is not needed. Or reuse some VM_ALLOC… | |||||
Done Inline ActionsI'd be ok with using a NULL pmap for this case; I like that somewhat better than adding yet another boolean. This ability to opt out of counting would also alleviate my concern over coupling resident_count's locking requirements to page allocation. jah: I'd be ok with using a NULL pmap for this case; I like that somewhat better than adding yet… | |||||
counter_u64_add(kernel_pt_page_count, 1); | |||||
else { | |||||
if (pmap != NULL) | |||||
pmap_resident_count_inc(pmap, 1); | |||||
counter_u64_add(user_pt_page_count, 1); | |||||
} | |||||
if ((flags & VM_ALLOC_ZERO) != 0 && (m->flags & PG_ZERO) == 0) | |||||
pmap_zero_page(m); | |||||
return (m); | |||||
} | |||||
static void | |||||
pmap_free_pt_page(pmap_t pmap, vm_page_t m, bool zerofilled) | |||||
{ | |||||
/* | /* | ||||
* This function assumes the page will need to be unwired, | |||||
* even though the counterpart allocation in pmap_alloc_pt_page() | |||||
* doesn't enforce VM_ALLOC_WIRED. However, all current uses | |||||
* of pmap_free_pt_page() require unwiring. The case in which | |||||
* a PT page doesn't require unwiring because its ref_count has | |||||
* naturally reached 0 is handled through _pmap_unwire_ptp(). | |||||
*/ | |||||
vm_page_unwire_noq(m); | |||||
if (zerofilled) | |||||
vm_page_free_zero(m); | |||||
else | |||||
vm_page_free(m); | |||||
if (pmap == kernel_pmap) | |||||
counter_u64_add(kernel_pt_page_count, -1); | |||||
else { | |||||
if (pmap != NULL) | |||||
pmap_resident_count_dec(pmap, 1); | |||||
counter_u64_add(user_pt_page_count, -1); | |||||
} | |||||
} | |||||
/* | |||||
* Initialize a preallocated and zeroed pmap structure, | * Initialize a preallocated and zeroed pmap structure, | ||||
* such as one in a vmspace structure. | * such as one in a vmspace structure. | ||||
*/ | */ | ||||
int | int | ||||
pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags) | pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags) | ||||
{ | { | ||||
vm_page_t pmltop_pg, pmltop_pgu; | vm_page_t pmltop_pg, pmltop_pgu; | ||||
vm_paddr_t pmltop_phys; | vm_paddr_t pmltop_phys; | ||||
int i; | int i; | ||||
/* | /* | ||||
* allocate the page directory page | * allocate the page directory page | ||||
*/ | */ | ||||
pmltop_pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | | pmltop_pg = pmap_alloc_pt_page(NULL, 0, VM_ALLOC_NORMAL | | ||||
VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_WAITOK); | VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_WAITOK); | ||||
counter_u64_add(pt_page_count, 1); | |||||
pmltop_phys = VM_PAGE_TO_PHYS(pmltop_pg); | pmltop_phys = VM_PAGE_TO_PHYS(pmltop_pg); | ||||
pmap->pm_pmltop = (pml5_entry_t *)PHYS_TO_DMAP(pmltop_phys); | pmap->pm_pmltop = (pml5_entry_t *)PHYS_TO_DMAP(pmltop_phys); | ||||
CPU_FOREACH(i) { | CPU_FOREACH(i) { | ||||
pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE; | pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE; | ||||
pmap->pm_pcids[i].pm_gen = 0; | pmap->pm_pcids[i].pm_gen = 0; | ||||
} | } | ||||
pmap->pm_cr3 = PMAP_NO_CR3; /* initialize to an invalid value */ | pmap->pm_cr3 = PMAP_NO_CR3; /* initialize to an invalid value */ | ||||
pmap->pm_ucr3 = PMAP_NO_CR3; | pmap->pm_ucr3 = PMAP_NO_CR3; | ||||
pmap->pm_pmltopu = NULL; | pmap->pm_pmltopu = NULL; | ||||
pmap->pm_type = pm_type; | pmap->pm_type = pm_type; | ||||
if ((pmltop_pg->flags & PG_ZERO) == 0) | |||||
pagezero(pmap->pm_pmltop); | |||||
/* | /* | ||||
* Do not install the host kernel mappings in the nested page | * Do not install the host kernel mappings in the nested page | ||||
* tables. These mappings are meaningless in the guest physical | * tables. These mappings are meaningless in the guest physical | ||||
* address space. | * address space. | ||||
* Install minimal kernel mappings in PTI case. | * Install minimal kernel mappings in PTI case. | ||||
*/ | */ | ||||
switch (pm_type) { | switch (pm_type) { | ||||
case PT_X86: | case PT_X86: | ||||
pmap->pm_cr3 = pmltop_phys; | pmap->pm_cr3 = pmltop_phys; | ||||
if (pmap_is_la57(pmap)) | if (pmap_is_la57(pmap)) | ||||
pmap_pinit_pml5(pmltop_pg); | pmap_pinit_pml5(pmltop_pg); | ||||
else | else | ||||
pmap_pinit_pml4(pmltop_pg); | pmap_pinit_pml4(pmltop_pg); | ||||
if ((curproc->p_md.md_flags & P_MD_KPTI) != 0) { | if ((curproc->p_md.md_flags & P_MD_KPTI) != 0) { | ||||
pmltop_pgu = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | | pmltop_pgu = pmap_alloc_pt_page(NULL, 0, | ||||
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_WAITOK); | VM_ALLOC_WIRED | VM_ALLOC_NORMAL | | ||||
counter_u64_add(pt_page_count, 1); | VM_ALLOC_WAITOK); | ||||
pmap->pm_pmltopu = (pml4_entry_t *)PHYS_TO_DMAP( | pmap->pm_pmltopu = (pml4_entry_t *)PHYS_TO_DMAP( | ||||
VM_PAGE_TO_PHYS(pmltop_pgu)); | VM_PAGE_TO_PHYS(pmltop_pgu)); | ||||
if (pmap_is_la57(pmap)) | if (pmap_is_la57(pmap)) | ||||
pmap_pinit_pml5_pti(pmltop_pgu); | pmap_pinit_pml5_pti(pmltop_pgu); | ||||
else | else | ||||
pmap_pinit_pml4_pti(pmltop_pgu); | pmap_pinit_pml4_pti(pmltop_pgu); | ||||
pmap->pm_ucr3 = VM_PAGE_TO_PHYS(pmltop_pgu); | pmap->pm_ucr3 = VM_PAGE_TO_PHYS(pmltop_pgu); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 168 Lines • ▼ Show 20 Lines | pmap_allocpte_nosleep(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, | ||||
PG_A = pmap_accessed_bit(pmap); | PG_A = pmap_accessed_bit(pmap); | ||||
PG_M = pmap_modified_bit(pmap); | PG_M = pmap_modified_bit(pmap); | ||||
PG_V = pmap_valid_bit(pmap); | PG_V = pmap_valid_bit(pmap); | ||||
PG_RW = pmap_rw_bit(pmap); | PG_RW = pmap_rw_bit(pmap); | ||||
/* | /* | ||||
* Allocate a page table page. | * Allocate a page table page. | ||||
*/ | */ | ||||
if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | | m = pmap_alloc_pt_page(pmap, ptepindex, | ||||
VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) | VM_ALLOC_WIRED | VM_ALLOC_ZERO); | ||||
if (m == NULL) | |||||
return (NULL); | return (NULL); | ||||
if ((m->flags & PG_ZERO) == 0) | |||||
pmap_zero_page(m); | |||||
/* | /* | ||||
* Map the pagetable page into the process address space, if | * Map the pagetable page into the process address space, if | ||||
* it isn't already there. | * it isn't already there. | ||||
*/ | */ | ||||
if (ptepindex >= NUPDE + NUPDPE + NUPML4E) { | if (ptepindex >= NUPDE + NUPDPE + NUPML4E) { | ||||
MPASS(pmap_is_la57(pmap)); | MPASS(pmap_is_la57(pmap)); | ||||
pml5index = pmap_pml5e_index(va); | pml5index = pmap_pml5e_index(va); | ||||
Show All 10 Lines | if (pmap->pm_pmltopu != NULL && pml5index < NUPML5E) { | ||||
*pml5u = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | | *pml5u = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | | ||||
PG_A | PG_M; | PG_A | PG_M; | ||||
} | } | ||||
} else if (ptepindex >= NUPDE + NUPDPE) { | } else if (ptepindex >= NUPDE + NUPDPE) { | ||||
pml4index = pmap_pml4e_index(va); | pml4index = pmap_pml4e_index(va); | ||||
/* Wire up a new PDPE page */ | /* Wire up a new PDPE page */ | ||||
pml4 = pmap_allocpte_getpml4(pmap, lockp, va, true); | pml4 = pmap_allocpte_getpml4(pmap, lockp, va, true); | ||||
if (pml4 == NULL) { | if (pml4 == NULL) { | ||||
vm_page_unwire_noq(m); | pmap_free_pt_page(pmap, m, true); | ||||
vm_page_free_zero(m); | |||||
return (NULL); | return (NULL); | ||||
} | } | ||||
KASSERT((*pml4 & PG_V) == 0, | KASSERT((*pml4 & PG_V) == 0, | ||||
("pmap %p va %#lx pml4 %#lx", pmap, va, *pml4)); | ("pmap %p va %#lx pml4 %#lx", pmap, va, *pml4)); | ||||
*pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; | *pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; | ||||
if (!pmap_is_la57(pmap) && pmap->pm_pmltopu != NULL && | if (!pmap_is_la57(pmap) && pmap->pm_pmltopu != NULL && | ||||
pml4index < NUPML4E) { | pml4index < NUPML4E) { | ||||
Show All 10 Lines | if (!pmap_is_la57(pmap) && pmap->pm_pmltopu != NULL && | ||||
pml4u = &pmap->pm_pmltopu[pml4index]; | pml4u = &pmap->pm_pmltopu[pml4index]; | ||||
*pml4u = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | | *pml4u = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | | ||||
PG_A | PG_M; | PG_A | PG_M; | ||||
} | } | ||||
} else if (ptepindex >= NUPDE) { | } else if (ptepindex >= NUPDE) { | ||||
/* Wire up a new PDE page */ | /* Wire up a new PDE page */ | ||||
pdp = pmap_allocpte_getpdp(pmap, lockp, va, true); | pdp = pmap_allocpte_getpdp(pmap, lockp, va, true); | ||||
if (pdp == NULL) { | if (pdp == NULL) { | ||||
vm_page_unwire_noq(m); | pmap_free_pt_page(pmap, m, true); | ||||
vm_page_free_zero(m); | |||||
return (NULL); | return (NULL); | ||||
} | } | ||||
KASSERT((*pdp & PG_V) == 0, | KASSERT((*pdp & PG_V) == 0, | ||||
("pmap %p va %#lx pdp %#lx", pmap, va, *pdp)); | ("pmap %p va %#lx pdp %#lx", pmap, va, *pdp)); | ||||
*pdp = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; | *pdp = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; | ||||
} else { | } else { | ||||
/* Wire up a new PTE page */ | /* Wire up a new PTE page */ | ||||
pdp = pmap_allocpte_getpdp(pmap, lockp, va, false); | pdp = pmap_allocpte_getpdp(pmap, lockp, va, false); | ||||
if (pdp == NULL) { | if (pdp == NULL) { | ||||
vm_page_unwire_noq(m); | pmap_free_pt_page(pmap, m, true); | ||||
vm_page_free_zero(m); | |||||
return (NULL); | return (NULL); | ||||
} | } | ||||
if ((*pdp & PG_V) == 0) { | if ((*pdp & PG_V) == 0) { | ||||
/* Have to allocate a new pd, recurse */ | /* Have to allocate a new pd, recurse */ | ||||
if (pmap_allocpte_nosleep(pmap, pmap_pdpe_pindex(va), | if (pmap_allocpte_nosleep(pmap, pmap_pdpe_pindex(va), | ||||
lockp, va) == NULL) { | lockp, va) == NULL) { | ||||
pmap_allocpte_free_unref(pmap, va, | pmap_allocpte_free_unref(pmap, va, | ||||
pmap_pml4e(pmap, va)); | pmap_pml4e(pmap, va)); | ||||
vm_page_unwire_noq(m); | pmap_free_pt_page(pmap, m, true); | ||||
vm_page_free_zero(m); | |||||
return (NULL); | return (NULL); | ||||
} | } | ||||
} else { | } else { | ||||
/* Add reference to the pd page */ | /* Add reference to the pd page */ | ||||
pdpg = PHYS_TO_VM_PAGE(*pdp & PG_FRAME); | pdpg = PHYS_TO_VM_PAGE(*pdp & PG_FRAME); | ||||
pdpg->ref_count++; | pdpg->ref_count++; | ||||
} | } | ||||
pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & PG_FRAME); | pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & PG_FRAME); | ||||
/* Now we know where the page directory page is */ | /* Now we know where the page directory page is */ | ||||
pd = &pd[pmap_pde_index(va)]; | pd = &pd[pmap_pde_index(va)]; | ||||
KASSERT((*pd & PG_V) == 0, | KASSERT((*pd & PG_V) == 0, | ||||
("pmap %p va %#lx pd %#lx", pmap, va, *pd)); | ("pmap %p va %#lx pd %#lx", pmap, va, *pd)); | ||||
*pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; | *pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; | ||||
} | } | ||||
pmap_resident_count_inc(pmap, 1); | |||||
counter_u64_add(pt_page_count, 1); | |||||
return (m); | return (m); | ||||
} | } | ||||
/* | /* | ||||
* This routine is called if the desired page table page does not exist. | * This routine is called if the desired page table page does not exist. | ||||
* | * | ||||
* If page table page allocation fails, this routine may sleep before | * If page table page allocation fails, this routine may sleep before | ||||
* returning NULL. It sleeps only if a lock pointer was given. Sleep | * returning NULL. It sleeps only if a lock pointer was given. Sleep | ||||
▲ Show 20 Lines • Show All 144 Lines • ▼ Show 20 Lines | for (i = 0; i < NKPML4E; i++) /* KVA */ | ||||
pmap->pm_pmltop[KPML4BASE + i] = 0; | pmap->pm_pmltop[KPML4BASE + i] = 0; | ||||
for (i = 0; i < ndmpdpphys; i++)/* Direct Map */ | for (i = 0; i < ndmpdpphys; i++)/* Direct Map */ | ||||
pmap->pm_pmltop[DMPML4I + i] = 0; | pmap->pm_pmltop[DMPML4I + i] = 0; | ||||
pmap->pm_pmltop[PML4PML4I] = 0; /* Recursive Mapping */ | pmap->pm_pmltop[PML4PML4I] = 0; /* Recursive Mapping */ | ||||
for (i = 0; i < lm_ents; i++) /* Large Map */ | for (i = 0; i < lm_ents; i++) /* Large Map */ | ||||
pmap->pm_pmltop[LMSPML4I + i] = 0; | pmap->pm_pmltop[LMSPML4I + i] = 0; | ||||
} | } | ||||
vm_page_unwire_noq(m); | pmap_free_pt_page(NULL, m, true); | ||||
vm_page_free_zero(m); | |||||
counter_u64_add(pt_page_count, -1); | |||||
if (pmap->pm_pmltopu != NULL) { | if (pmap->pm_pmltopu != NULL) { | ||||
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap-> | m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap-> | ||||
pm_pmltopu)); | pm_pmltopu)); | ||||
vm_page_unwire_noq(m); | pmap_free_pt_page(NULL, m, false); | ||||
vm_page_free(m); | |||||
counter_u64_add(pt_page_count, -1); | |||||
} | } | ||||
if (pmap->pm_type == PT_X86 && | if (pmap->pm_type == PT_X86 && | ||||
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) | (cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) | ||||
rangeset_fini(&pmap->pm_pkru); | rangeset_fini(&pmap->pm_pkru); | ||||
} | } | ||||
static int | static int | ||||
kvm_size(SYSCTL_HANDLER_ARGS) | kvm_size(SYSCTL_HANDLER_ARGS) | ||||
▲ Show 20 Lines • Show All 92 Lines • ▼ Show 20 Lines | pmap_growkernel(vm_offset_t addr) | ||||
addr = roundup2(addr, NBPDR); | addr = roundup2(addr, NBPDR); | ||||
if (addr - 1 >= vm_map_max(kernel_map)) | if (addr - 1 >= vm_map_max(kernel_map)) | ||||
addr = vm_map_max(kernel_map); | addr = vm_map_max(kernel_map); | ||||
while (kernel_vm_end < addr) { | while (kernel_vm_end < addr) { | ||||
pdpe = pmap_pdpe(kernel_pmap, kernel_vm_end); | pdpe = pmap_pdpe(kernel_pmap, kernel_vm_end); | ||||
if ((*pdpe & X86_PG_V) == 0) { | if ((*pdpe & X86_PG_V) == 0) { | ||||
/* We need a new PDP entry */ | /* We need a new PDP entry */ | ||||
nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDPSHIFT, | nkpg = pmap_alloc_pt_page(kernel_pmap, | ||||
VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | | kernel_vm_end >> PDPSHIFT, VM_ALLOC_WIRED | | ||||
VM_ALLOC_WIRED | VM_ALLOC_ZERO); | VM_ALLOC_INTERRUPT | VM_ALLOC_ZERO); | ||||
if (nkpg == NULL) | if (nkpg == NULL) | ||||
panic("pmap_growkernel: no memory to grow kernel"); | panic("pmap_growkernel: no memory to grow kernel"); | ||||
if ((nkpg->flags & PG_ZERO) == 0) | |||||
pmap_zero_page(nkpg); | |||||
counter_u64_add(pt_page_count, 1); | |||||
paddr = VM_PAGE_TO_PHYS(nkpg); | paddr = VM_PAGE_TO_PHYS(nkpg); | ||||
*pdpe = (pdp_entry_t)(paddr | X86_PG_V | X86_PG_RW | | *pdpe = (pdp_entry_t)(paddr | X86_PG_V | X86_PG_RW | | ||||
X86_PG_A | X86_PG_M); | X86_PG_A | X86_PG_M); | ||||
continue; /* try again */ | continue; /* try again */ | ||||
} | } | ||||
pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end); | pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end); | ||||
if ((*pde & X86_PG_V) != 0) { | if ((*pde & X86_PG_V) != 0) { | ||||
kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; | kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; | ||||
if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { | if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { | ||||
kernel_vm_end = vm_map_max(kernel_map); | kernel_vm_end = vm_map_max(kernel_map); | ||||
break; | break; | ||||
} | } | ||||
continue; | continue; | ||||
} | } | ||||
nkpg = vm_page_alloc(NULL, pmap_pde_pindex(kernel_vm_end), | nkpg = pmap_alloc_pt_page(kernel_pmap, | ||||
VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | | pmap_pde_pindex(kernel_vm_end), VM_ALLOC_WIRED | | ||||
VM_ALLOC_ZERO); | VM_ALLOC_INTERRUPT | VM_ALLOC_ZERO); | ||||
if (nkpg == NULL) | if (nkpg == NULL) | ||||
panic("pmap_growkernel: no memory to grow kernel"); | panic("pmap_growkernel: no memory to grow kernel"); | ||||
if ((nkpg->flags & PG_ZERO) == 0) | |||||
pmap_zero_page(nkpg); | |||||
counter_u64_add(pt_page_count, 1); | |||||
paddr = VM_PAGE_TO_PHYS(nkpg); | paddr = VM_PAGE_TO_PHYS(nkpg); | ||||
newpdir = paddr | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; | newpdir = paddr | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; | ||||
pde_store(pde, newpdir); | pde_store(pde, newpdir); | ||||
kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; | kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; | ||||
if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { | if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { | ||||
kernel_vm_end = vm_map_max(kernel_map); | kernel_vm_end = vm_map_max(kernel_map); | ||||
break; | break; | ||||
▲ Show 20 Lines • Show All 900 Lines • ▼ Show 20 Lines | if (mpte == NULL) { | ||||
/* | /* | ||||
* If the 2MB page mapping belongs to the direct map | * If the 2MB page mapping belongs to the direct map | ||||
* region of the kernel's address space, then the page | * region of the kernel's address space, then the page | ||||
* allocation request specifies the highest possible | * allocation request specifies the highest possible | ||||
* priority (VM_ALLOC_INTERRUPT). Otherwise, the | * priority (VM_ALLOC_INTERRUPT). Otherwise, the | ||||
* priority is normal. | * priority is normal. | ||||
*/ | */ | ||||
mpte = vm_page_alloc(NULL, pmap_pde_pindex(va), | mpte = pmap_alloc_pt_page(pmap, pmap_pde_pindex(va), | ||||
(in_kernel ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | | (in_kernel ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | | ||||
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); | VM_ALLOC_WIRED); | ||||
/* | /* | ||||
* If the allocation of the new page table page fails, | * If the allocation of the new page table page fails, | ||||
* invalidate the 2MB page mapping and return "failure". | * invalidate the 2MB page mapping and return "failure". | ||||
*/ | */ | ||||
if (mpte == NULL) { | if (mpte == NULL) { | ||||
pmap_demote_pde_abort(pmap, va, pde, oldpde, lockp); | pmap_demote_pde_abort(pmap, va, pde, oldpde, lockp); | ||||
return (FALSE); | return (FALSE); | ||||
} | } | ||||
counter_u64_add(pt_page_count, 1); | if (!in_kernel) | ||||
if (!in_kernel) { | |||||
mpte->ref_count = NPTEPG; | mpte->ref_count = NPTEPG; | ||||
pmap_resident_count_inc(pmap, 1); | |||||
} | } | ||||
} | |||||
mptepa = VM_PAGE_TO_PHYS(mpte); | mptepa = VM_PAGE_TO_PHYS(mpte); | ||||
firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); | firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); | ||||
newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; | newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; | ||||
KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, | KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, | ||||
("pmap_demote_pde: oldpde is missing PG_M")); | ("pmap_demote_pde: oldpde is missing PG_M")); | ||||
newpte = oldpde & ~PG_PS; | newpte = oldpde & ~PG_PS; | ||||
newpte = pmap_swap_pat(pmap, newpte); | newpte = pmap_swap_pat(pmap, newpte); | ||||
▲ Show 20 Lines • Show All 3,315 Lines • ▼ Show 20 Lines | pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va) | ||||
PG_M = pmap_modified_bit(pmap); | PG_M = pmap_modified_bit(pmap); | ||||
PG_V = pmap_valid_bit(pmap); | PG_V = pmap_valid_bit(pmap); | ||||
PG_RW = pmap_rw_bit(pmap); | PG_RW = pmap_rw_bit(pmap); | ||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | PMAP_LOCK_ASSERT(pmap, MA_OWNED); | ||||
oldpdpe = *pdpe; | oldpdpe = *pdpe; | ||||
KASSERT((oldpdpe & (PG_PS | PG_V)) == (PG_PS | PG_V), | KASSERT((oldpdpe & (PG_PS | PG_V)) == (PG_PS | PG_V), | ||||
("pmap_demote_pdpe: oldpdpe is missing PG_PS and/or PG_V")); | ("pmap_demote_pdpe: oldpdpe is missing PG_PS and/or PG_V")); | ||||
if ((pdpg = vm_page_alloc(NULL, va >> PDPSHIFT, VM_ALLOC_INTERRUPT | | pdpg = pmap_alloc_pt_page(pmap, va >> PDPSHIFT, | ||||
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { | VM_ALLOC_WIRED | VM_ALLOC_INTERRUPT); | ||||
if (pdpg == NULL) { | |||||
CTR2(KTR_PMAP, "pmap_demote_pdpe: failure for va %#lx" | CTR2(KTR_PMAP, "pmap_demote_pdpe: failure for va %#lx" | ||||
" in pmap %p", va, pmap); | " in pmap %p", va, pmap); | ||||
return (FALSE); | return (FALSE); | ||||
} | } | ||||
counter_u64_add(pt_page_count, 1); | |||||
pdpgpa = VM_PAGE_TO_PHYS(pdpg); | pdpgpa = VM_PAGE_TO_PHYS(pdpg); | ||||
firstpde = (pd_entry_t *)PHYS_TO_DMAP(pdpgpa); | firstpde = (pd_entry_t *)PHYS_TO_DMAP(pdpgpa); | ||||
newpdpe = pdpgpa | PG_M | PG_A | (oldpdpe & PG_U) | PG_RW | PG_V; | newpdpe = pdpgpa | PG_M | PG_A | (oldpdpe & PG_U) | PG_RW | PG_V; | ||||
KASSERT((oldpdpe & PG_A) != 0, | KASSERT((oldpdpe & PG_A) != 0, | ||||
("pmap_demote_pdpe: oldpdpe is missing PG_A")); | ("pmap_demote_pdpe: oldpdpe is missing PG_A")); | ||||
KASSERT((oldpdpe & (PG_M | PG_RW)) != PG_RW, | KASSERT((oldpdpe & (PG_M | PG_RW)) != PG_RW, | ||||
("pmap_demote_pdpe: oldpdpe is missing PG_M")); | ("pmap_demote_pdpe: oldpdpe is missing PG_M")); | ||||
newpde = oldpdpe; | newpde = oldpdpe; | ||||
▲ Show 20 Lines • Show All 995 Lines • ▼ Show 20 Lines | |||||
* | * | ||||
* In contrast, pd and pt pages are managed like user page table | * In contrast, pd and pt pages are managed like user page table | ||||
* pages. They are dynamically allocated, and their reference count | * pages. They are dynamically allocated, and their reference count | ||||
* represents the number of valid entries within the page. | * represents the number of valid entries within the page. | ||||
*/ | */ | ||||
static vm_page_t | static vm_page_t | ||||
pmap_large_map_getptp_unlocked(void) | pmap_large_map_getptp_unlocked(void) | ||||
{ | { | ||||
vm_page_t m; | return (pmap_alloc_pt_page(kernel_pmap, 0, | ||||
VM_ALLOC_NORMAL | VM_ALLOC_ZERO)); | |||||
m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | | |||||
VM_ALLOC_ZERO); | |||||
if (m != NULL) { | |||||
if ((m->flags & PG_ZERO) == 0) | |||||
pmap_zero_page(m); | |||||
counter_u64_add(pt_page_count, 1); | |||||
} | |||||
return (m); | |||||
} | } | ||||
static vm_page_t | static vm_page_t | ||||
pmap_large_map_getptp(void) | pmap_large_map_getptp(void) | ||||
{ | { | ||||
vm_page_t m; | vm_page_t m; | ||||
PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED); | PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED); | ||||
▲ Show 20 Lines • Show All 1,481 Lines • Show Last 20 Lines |
This is not needed. sys/param.h already includes sys/types.h. Was there a reason like a compiler error that caused you to add the include?