Changeset View
Standalone View
sys/amd64/amd64/pmap.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 529 Lines • ▼ Show 20 Lines | |||||
static pt_entry_t pmap_pkru_get(pmap_t pmap, vm_offset_t va); | static pt_entry_t pmap_pkru_get(pmap_t pmap, vm_offset_t va); | ||||
static void pmap_pkru_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva); | static void pmap_pkru_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva); | ||||
static void *pkru_dup_range(void *ctx, void *data); | static void *pkru_dup_range(void *ctx, void *data); | ||||
static void pkru_free_range(void *ctx, void *node); | static void pkru_free_range(void *ctx, void *node); | ||||
static int pmap_pkru_copy(pmap_t dst_pmap, pmap_t src_pmap); | static int pmap_pkru_copy(pmap_t dst_pmap, pmap_t src_pmap); | ||||
static int pmap_pkru_deassign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva); | static int pmap_pkru_deassign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva); | ||||
static void pmap_pkru_deassign_all(pmap_t pmap); | static void pmap_pkru_deassign_all(pmap_t pmap); | ||||
static int | static COUNTER_U64_DEFINE_EARLY(pcid_save_cnt); | ||||
pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS) | SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLFLAG_RD, | ||||
jah: Removing the handrolled pc_pm_save_cnt seemed like a useful bit of cleanup. But I realize this… | |||||
Not Done Inline ActionsIt's trivial to patch macros to use incq instead of addq automatically, I did not do it because it did not seem worth it from looking Agner Fog's instruction tables. The real problem with counter(9) is that even counters which are known to be there at compilation time require fetching something to find the address. There should be a facility to sort them out by storing everything in struct pcpu. mjg: It's trivial to patch macros to use incq instead of addq automatically, I did not do it because… | |||||
Not Done Inline ActionsI suspect that I was the only user for this counter, and I would be fine with removing it altogether. Last time it was useful was when I split pmap_activate_sw() into ifuncs, to more easily see that about right ifunc was selected. But I also do not object against keeping it around. kib: I suspect that I was the only user for this counter, and I would be fine with removing it… | |||||
Done Inline ActionsI'm fine with removing it if you are. On a related note, it looks like this is the only remaining in-tree use of PCPU_INC; would it be worth removing PCPU_INC entirely in a separate change? Non-x86 architectures just implement it as PCPU_ADD(..., 1) anyway. jah: I'm fine with removing it if you are. On a related note, it looks like this is the only… | |||||
{ | &pcid_save_cnt, "Count of saved TLB context on switch"); | ||||
Not Done Inline ActionsTLB flushes kib: TLB flushes | |||||
int i; | |||||
uint64_t res; | |||||
res = 0; | |||||
CPU_FOREACH(i) { | |||||
res += cpuid_to_pcpu[i]->pc_pm_save_cnt; | |||||
} | |||||
return (sysctl_handle_64(oidp, &res, 0, req)); | |||||
} | |||||
SYSCTL_PROC(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLTYPE_U64 | CTLFLAG_RD | | |||||
CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU", | |||||
"Count of saved TLB context on switch"); | |||||
static LIST_HEAD(, pmap_invl_gen) pmap_invl_gen_tracker = | static LIST_HEAD(, pmap_invl_gen) pmap_invl_gen_tracker = | ||||
LIST_HEAD_INITIALIZER(&pmap_invl_gen_tracker); | LIST_HEAD_INITIALIZER(&pmap_invl_gen_tracker); | ||||
static struct mtx invl_gen_mtx; | static struct mtx invl_gen_mtx; | ||||
/* Fake lock object to satisfy turnstiles interface. */ | /* Fake lock object to satisfy turnstiles interface. */ | ||||
static struct lock_object invl_gen_ts = { | static struct lock_object invl_gen_ts = { | ||||
.lo_name = "invlts", | .lo_name = "invlts", | ||||
}; | }; | ||||
static struct pmap_invl_gen pmap_invl_gen_head = { | static struct pmap_invl_gen pmap_invl_gen_head = { | ||||
▲ Show 20 Lines • Show All 202 Lines • ▼ Show 20 Lines | pmap_di_store_invl(struct pmap_invl_gen *ptr, struct pmap_invl_gen *old_val, | ||||
__asm volatile("lock;cmpxchg16b\t%1" | __asm volatile("lock;cmpxchg16b\t%1" | ||||
: "=@cce" (res), "+m" (*ptr), "+a" (old_low), "+d" (old_high) | : "=@cce" (res), "+m" (*ptr), "+a" (old_low), "+d" (old_high) | ||||
: "b"(new_low), "c" (new_high) | : "b"(new_low), "c" (new_high) | ||||
: "memory", "cc"); | : "memory", "cc"); | ||||
return (res); | return (res); | ||||
} | } | ||||
static COUNTER_U64_DEFINE_EARLY(pv_page_count); | |||||
SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, pv_page_count, CTLFLAG_RD, | |||||
&pv_page_count, "Current number of allocated pv pages"); | |||||
static COUNTER_U64_DEFINE_EARLY(pt_page_count); | |||||
SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, pt_page_count, CTLFLAG_RD, | |||||
&pt_page_count, "Current number of allocated page table pages"); | |||||
#ifdef PV_STATS | #ifdef PV_STATS | ||||
static long invl_start_restart; | |||||
SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_start_restart, CTLFLAG_RD, | static COUNTER_U64_DEFINE_EARLY(invl_start_restart); | ||||
Not Done Inline ActionsDoes it make sense to convert these to counters, even under PV_STATS? The event is relatively rare, and when it happens, we already lost a race. I am not opposing, but curious if there some visible reason. kib: Does it make sense to convert these to counters, even under PV_STATS? The event is relatively… | |||||
Done Inline ActionsNot really; I wasn't sure how frequently these might be updated under load, and I didn't see a downside to making them counter(9) since they would still be guarded by PV_STATS anyway. jah: Not really; I wasn't sure how frequently these might be updated under load, and I didn't see a… | |||||
&invl_start_restart, 0, | SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, invl_start_restart, | ||||
""); | CTLFLAG_RD, &invl_start_restart, | ||||
static long invl_finish_restart; | "Number of delayed TLB invalidation request restarts"); | ||||
SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_finish_restart, CTLFLAG_RD, | |||||
&invl_finish_restart, 0, | static COUNTER_U64_DEFINE_EARLY(invl_finish_restart); | ||||
""); | SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, invl_finish_restart, CTLFLAG_RD, | ||||
&invl_finish_restart, | |||||
"Number of delayed TLB invalidation completion restarts"); | |||||
static int invl_max_qlen; | static int invl_max_qlen; | ||||
SYSCTL_INT(_vm_pmap, OID_AUTO, invl_max_qlen, CTLFLAG_RD, | SYSCTL_INT(_vm_pmap, OID_AUTO, invl_max_qlen, CTLFLAG_RD, | ||||
&invl_max_qlen, 0, | &invl_max_qlen, 0, | ||||
""); | "Maximum delayed TLB invalidation request queue length"); | ||||
#endif | #endif | ||||
#define di_delay locks_delay | #define di_delay locks_delay | ||||
static void | static void | ||||
pmap_delayed_invl_start_u(void) | pmap_delayed_invl_start_u(void) | ||||
{ | { | ||||
struct pmap_invl_gen *invl_gen, *p, prev, new_prev; | struct pmap_invl_gen *invl_gen, *p, prev, new_prev; | ||||
Show All 21 Lines | if (pri > PVM) { | ||||
thread_unlock(td); | thread_unlock(td); | ||||
} | } | ||||
again: | again: | ||||
PV_STAT(i = 0); | PV_STAT(i = 0); | ||||
for (p = &pmap_invl_gen_head;; p = prev.next) { | for (p = &pmap_invl_gen_head;; p = prev.next) { | ||||
PV_STAT(i++); | PV_STAT(i++); | ||||
prevl = (uintptr_t)atomic_load_ptr(&p->next); | prevl = (uintptr_t)atomic_load_ptr(&p->next); | ||||
if ((prevl & PMAP_INVL_GEN_NEXT_INVALID) != 0) { | if ((prevl & PMAP_INVL_GEN_NEXT_INVALID) != 0) { | ||||
PV_STAT(atomic_add_long(&invl_start_restart, 1)); | PV_STAT(counter_u64_add(invl_start_restart, 1)); | ||||
lock_delay(&lda); | lock_delay(&lda); | ||||
goto again; | goto again; | ||||
} | } | ||||
if (prevl == 0) | if (prevl == 0) | ||||
break; | break; | ||||
prev.next = (void *)prevl; | prev.next = (void *)prevl; | ||||
} | } | ||||
#ifdef PV_STATS | #ifdef PV_STATS | ||||
if ((ii = invl_max_qlen) < i) | if ((ii = invl_max_qlen) < i) | ||||
atomic_cmpset_int(&invl_max_qlen, ii, i); | atomic_cmpset_int(&invl_max_qlen, ii, i); | ||||
#endif | #endif | ||||
if (!pmap_di_load_invl(p, &prev) || prev.next != NULL) { | if (!pmap_di_load_invl(p, &prev) || prev.next != NULL) { | ||||
PV_STAT(atomic_add_long(&invl_start_restart, 1)); | PV_STAT(counter_u64_add(invl_start_restart, 1)); | ||||
lock_delay(&lda); | lock_delay(&lda); | ||||
goto again; | goto again; | ||||
} | } | ||||
new_prev.gen = prev.gen; | new_prev.gen = prev.gen; | ||||
new_prev.next = invl_gen; | new_prev.next = invl_gen; | ||||
invl_gen->gen = prev.gen + 1; | invl_gen->gen = prev.gen + 1; | ||||
Show All 12 Lines | #endif | ||||
* ABA for *p is not possible there, since p->gen can only | * ABA for *p is not possible there, since p->gen can only | ||||
* increase. So if the *p thread finished its di, then | * increase. So if the *p thread finished its di, then | ||||
* started a new one and got inserted into the list at the | * started a new one and got inserted into the list at the | ||||
* same place, its gen will appear greater than the previously | * same place, its gen will appear greater than the previously | ||||
* read gen. | * read gen. | ||||
*/ | */ | ||||
if (!pmap_di_store_invl(p, &prev, &new_prev)) { | if (!pmap_di_store_invl(p, &prev, &new_prev)) { | ||||
critical_exit(); | critical_exit(); | ||||
PV_STAT(atomic_add_long(&invl_start_restart, 1)); | PV_STAT(counter_u64_add(invl_start_restart, 1)); | ||||
lock_delay(&lda); | lock_delay(&lda); | ||||
goto again; | goto again; | ||||
} | } | ||||
/* | /* | ||||
* There we clear PMAP_INVL_GEN_NEXT_INVALID in | * There we clear PMAP_INVL_GEN_NEXT_INVALID in | ||||
* invl_gen->next, allowing other threads to iterate past us. | * invl_gen->next, allowing other threads to iterate past us. | ||||
* pmap_di_store_invl() provides fence between the generation | * pmap_di_store_invl() provides fence between the generation | ||||
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines | pmap_delayed_invl_finish_u(void) | ||||
KASSERT(((uintptr_t)invl_gen->next & PMAP_INVL_GEN_NEXT_INVALID) == 0, | KASSERT(((uintptr_t)invl_gen->next & PMAP_INVL_GEN_NEXT_INVALID) == 0, | ||||
("missed invl_start: INVALID")); | ("missed invl_start: INVALID")); | ||||
lock_delay_arg_init(&lda, &di_delay); | lock_delay_arg_init(&lda, &di_delay); | ||||
again: | again: | ||||
for (p = &pmap_invl_gen_head; p != NULL; p = (void *)prevl) { | for (p = &pmap_invl_gen_head; p != NULL; p = (void *)prevl) { | ||||
prevl = (uintptr_t)atomic_load_ptr(&p->next); | prevl = (uintptr_t)atomic_load_ptr(&p->next); | ||||
if ((prevl & PMAP_INVL_GEN_NEXT_INVALID) != 0) { | if ((prevl & PMAP_INVL_GEN_NEXT_INVALID) != 0) { | ||||
PV_STAT(atomic_add_long(&invl_finish_restart, 1)); | PV_STAT(counter_u64_add(invl_finish_restart, 1)); | ||||
lock_delay(&lda); | lock_delay(&lda); | ||||
goto again; | goto again; | ||||
} | } | ||||
if ((void *)prevl == invl_gen) | if ((void *)prevl == invl_gen) | ||||
break; | break; | ||||
} | } | ||||
/* | /* | ||||
* It is legitimate to not find ourself on the list if a | * It is legitimate to not find ourself on the list if a | ||||
* thread before us finished its DI and started it again. | * thread before us finished its DI and started it again. | ||||
*/ | */ | ||||
if (__predict_false(p == NULL)) { | if (__predict_false(p == NULL)) { | ||||
PV_STAT(atomic_add_long(&invl_finish_restart, 1)); | PV_STAT(counter_u64_add(invl_finish_restart, 1)); | ||||
lock_delay(&lda); | lock_delay(&lda); | ||||
goto again; | goto again; | ||||
} | } | ||||
critical_enter(); | critical_enter(); | ||||
atomic_set_ptr((uintptr_t *)&invl_gen->next, | atomic_set_ptr((uintptr_t *)&invl_gen->next, | ||||
PMAP_INVL_GEN_NEXT_INVALID); | PMAP_INVL_GEN_NEXT_INVALID); | ||||
if (!pmap_delayed_invl_finish_u_crit(invl_gen, p)) { | if (!pmap_delayed_invl_finish_u_crit(invl_gen, p)) { | ||||
atomic_clear_ptr((uintptr_t *)&invl_gen->next, | atomic_clear_ptr((uintptr_t *)&invl_gen->next, | ||||
PMAP_INVL_GEN_NEXT_INVALID); | PMAP_INVL_GEN_NEXT_INVALID); | ||||
critical_exit(); | critical_exit(); | ||||
PV_STAT(atomic_add_long(&invl_finish_restart, 1)); | PV_STAT(counter_u64_add(invl_finish_restart, 1)); | ||||
lock_delay(&lda); | lock_delay(&lda); | ||||
goto again; | goto again; | ||||
} | } | ||||
critical_exit(); | critical_exit(); | ||||
if (atomic_load_int(&pmap_invl_waiters) > 0) | if (atomic_load_int(&pmap_invl_waiters) > 0) | ||||
pmap_delayed_invl_finish_unblock(0); | pmap_delayed_invl_finish_unblock(0); | ||||
if (invl_gen->saved_pri != 0) { | if (invl_gen->saved_pri != 0) { | ||||
thread_lock(td); | thread_lock(td); | ||||
Show All 19 Lines | for (p = &pmap_invl_gen_head, first = true; p != NULL; p = pn, | ||||
db_printf("gen %lu inv %d td %p tid %d\n", p->gen, | db_printf("gen %lu inv %d td %p tid %d\n", p->gen, | ||||
(nextl & PMAP_INVL_GEN_NEXT_INVALID) != 0, td, | (nextl & PMAP_INVL_GEN_NEXT_INVALID) != 0, td, | ||||
td != NULL ? td->td_tid : -1); | td != NULL ? td->td_tid : -1); | ||||
} | } | ||||
} | } | ||||
#endif | #endif | ||||
#ifdef PV_STATS | #ifdef PV_STATS | ||||
static long invl_wait; | static COUNTER_U64_DEFINE_EARLY(invl_wait); | ||||
SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait, CTLFLAG_RD, &invl_wait, 0, | SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, invl_wait, | ||||
CTLFLAG_RD, &invl_wait, | |||||
"Number of times DI invalidation blocked pmap_remove_all/write"); | "Number of times DI invalidation blocked pmap_remove_all/write"); | ||||
static long invl_wait_slow; | |||||
SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait_slow, CTLFLAG_RD, &invl_wait_slow, 0, | static COUNTER_U64_DEFINE_EARLY(invl_wait_slow); | ||||
"Number of slow invalidation waits for lockless DI"); | SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, invl_wait_slow, CTLFLAG_RD, | ||||
&invl_wait_slow, "Number of slow invalidation waits for lockless DI"); | |||||
#endif | #endif | ||||
#ifdef NUMA | #ifdef NUMA | ||||
static u_long * | static u_long * | ||||
pmap_delayed_invl_genp(vm_page_t m) | pmap_delayed_invl_genp(vm_page_t m) | ||||
{ | { | ||||
vm_paddr_t pa; | vm_paddr_t pa; | ||||
u_long *gen; | u_long *gen; | ||||
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines | |||||
#ifdef PV_STATS | #ifdef PV_STATS | ||||
bool accounted = false; | bool accounted = false; | ||||
#endif | #endif | ||||
m_gen = pmap_delayed_invl_genp(m); | m_gen = pmap_delayed_invl_genp(m); | ||||
while (*m_gen > pmap_invl_gen) { | while (*m_gen > pmap_invl_gen) { | ||||
#ifdef PV_STATS | #ifdef PV_STATS | ||||
if (!accounted) { | if (!accounted) { | ||||
atomic_add_long(&invl_wait, 1); | counter_u64_add(invl_wait, 1); | ||||
accounted = true; | accounted = true; | ||||
} | } | ||||
#endif | #endif | ||||
pmap_delayed_invl_wait_block(m_gen, &pmap_invl_gen); | pmap_delayed_invl_wait_block(m_gen, &pmap_invl_gen); | ||||
} | } | ||||
} | } | ||||
static void | static void | ||||
pmap_delayed_invl_wait_u(vm_page_t m) | pmap_delayed_invl_wait_u(vm_page_t m) | ||||
{ | { | ||||
u_long *m_gen; | u_long *m_gen; | ||||
struct lock_delay_arg lda; | struct lock_delay_arg lda; | ||||
bool fast; | bool fast; | ||||
fast = true; | fast = true; | ||||
m_gen = pmap_delayed_invl_genp(m); | m_gen = pmap_delayed_invl_genp(m); | ||||
lock_delay_arg_init(&lda, &di_delay); | lock_delay_arg_init(&lda, &di_delay); | ||||
while (*m_gen > atomic_load_long(&pmap_invl_gen_head.gen)) { | while (*m_gen > atomic_load_long(&pmap_invl_gen_head.gen)) { | ||||
if (fast || !pmap_invl_callout_inited) { | if (fast || !pmap_invl_callout_inited) { | ||||
PV_STAT(atomic_add_long(&invl_wait, 1)); | PV_STAT(counter_u64_add(invl_wait, 1)); | ||||
lock_delay(&lda); | lock_delay(&lda); | ||||
fast = false; | fast = false; | ||||
} else { | } else { | ||||
/* | /* | ||||
* The page's invalidation generation number | * The page's invalidation generation number | ||||
* is still below the current thread's number. | * is still below the current thread's number. | ||||
* Prepare to block so that we do not waste | * Prepare to block so that we do not waste | ||||
* CPU cycles or worse, suffer livelock. | * CPU cycles or worse, suffer livelock. | ||||
Show All 16 Lines | if (fast || !pmap_invl_callout_inited) { | ||||
* pmap_invl_waiters. The only race allowed | * pmap_invl_waiters. The only race allowed | ||||
* is for a missed unblock, which is handled | * is for a missed unblock, which is handled | ||||
* by the callout. | * by the callout. | ||||
*/ | */ | ||||
if (*m_gen > | if (*m_gen > | ||||
atomic_load_long(&pmap_invl_gen_head.gen)) { | atomic_load_long(&pmap_invl_gen_head.gen)) { | ||||
callout_reset(&pmap_invl_callout, 1, | callout_reset(&pmap_invl_callout, 1, | ||||
pmap_delayed_invl_callout_func, NULL); | pmap_delayed_invl_callout_func, NULL); | ||||
PV_STAT(atomic_add_long(&invl_wait_slow, 1)); | PV_STAT(counter_u64_add(invl_wait_slow, 1)); | ||||
pmap_delayed_invl_wait_block(m_gen, | pmap_delayed_invl_wait_block(m_gen, | ||||
&pmap_invl_gen_head.gen); | &pmap_invl_gen_head.gen); | ||||
} | } | ||||
atomic_add_int(&pmap_invl_waiters, -1); | atomic_add_int(&pmap_invl_waiters, -1); | ||||
} | } | ||||
} | } | ||||
} | } | ||||
▲ Show 20 Lines • Show All 1,323 Lines • ▼ Show 20 Lines | |||||
SYSCTL_UINT(_vm_pmap, OID_AUTO, large_map_pml4_entries, | SYSCTL_UINT(_vm_pmap, OID_AUTO, large_map_pml4_entries, | ||||
CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &lm_ents, 0, | CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &lm_ents, 0, | ||||
"Maximum number of PML4 entries for use by large map (tunable). " | "Maximum number of PML4 entries for use by large map (tunable). " | ||||
"Each entry corresponds to 512GB of address space."); | "Each entry corresponds to 512GB of address space."); | ||||
static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, | static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, | ||||
"2MB page mapping counters"); | "2MB page mapping counters"); | ||||
static u_long pmap_pde_demotions; | static COUNTER_U64_DEFINE_EARLY(pmap_pde_demotions); | ||||
SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD, | SYSCTL_COUNTER_U64(_vm_pmap_pde, OID_AUTO, demotions, | ||||
&pmap_pde_demotions, 0, "2MB page demotions"); | CTLFLAG_RD, &pmap_pde_demotions, "2MB page demotions"); | ||||
Not Done Inline ActionsPromotions/demotions are quire rare. kib: Promotions/demotions are quire rare. | |||||
Done Inline ActionsThat's a little bit surprising to me. I'm not surprised that promotions are rare, but I would've expected that 2M->4K demotions might happen frequently under certain workloads, such as heavy forking of processes with large CoW data segments. jah: That's a little bit surprising to me. I'm not surprised that promotions are rare, but I… | |||||
Not Done Inline ActionsThis is on my relatively small 32G w/s, stable/12, uptime 70 days, load is poudriere + buildworld/buildkernel + emacs/browsers: vm.pmap.pde.promotions: 52283 vm.pmap.pde.p_failures: 16395 vm.pmap.pde.mappings: 78953 vm.pmap.pde.demotions: 20526 kib: This is on my relatively small 32G w/s, stable/12, uptime 70 days, load is poudriere +… | |||||
Not Done Inline ActionsOn my workstation, also with 32GB of RAM and with 1 day of uptime: vm.pmap.pde.promotions: 87908 vm.pmap.pde.p_failures: 75207 vm.pmap.pde.mappings: 40585 vm.pmap.pde.demotions: 29985 Once physical memory is fragmented enough these counters are effectively frozen. For now I suspect that converting these to counter(9) will not benefit anything but I see no problem with it. If we gain some mechanisms for online defragmentation and speculative promotions, then these counters might be updated more frequently. markj: On my workstation, also with 32GB of RAM and with 1 day of uptime:
```
vm.pmap.pde.promotions… | |||||
static u_long pmap_pde_mappings; | static COUNTER_U64_DEFINE_EARLY(pmap_pde_mappings); | ||||
SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, | SYSCTL_COUNTER_U64(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, | ||||
&pmap_pde_mappings, 0, "2MB page mappings"); | &pmap_pde_mappings, "2MB page mappings"); | ||||
static u_long pmap_pde_p_failures; | static COUNTER_U64_DEFINE_EARLY(pmap_pde_p_failures); | ||||
SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD, | SYSCTL_COUNTER_U64(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD, | ||||
&pmap_pde_p_failures, 0, "2MB page promotion failures"); | &pmap_pde_p_failures, "2MB page promotion failures"); | ||||
static u_long pmap_pde_promotions; | static COUNTER_U64_DEFINE_EARLY(pmap_pde_promotions); | ||||
SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD, | SYSCTL_COUNTER_U64(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD, | ||||
&pmap_pde_promotions, 0, "2MB page promotions"); | &pmap_pde_promotions, "2MB page promotions"); | ||||
static SYSCTL_NODE(_vm_pmap, OID_AUTO, pdpe, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, | static SYSCTL_NODE(_vm_pmap, OID_AUTO, pdpe, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, | ||||
"1GB page mapping counters"); | "1GB page mapping counters"); | ||||
static u_long pmap_pdpe_demotions; | static COUNTER_U64_DEFINE_EARLY(pmap_pdpe_demotions); | ||||
SYSCTL_ULONG(_vm_pmap_pdpe, OID_AUTO, demotions, CTLFLAG_RD, | SYSCTL_COUNTER_U64(_vm_pmap_pdpe, OID_AUTO, demotions, CTLFLAG_RD, | ||||
&pmap_pdpe_demotions, 0, "1GB page demotions"); | &pmap_pdpe_demotions, "1GB page demotions"); | ||||
/*************************************************** | /*************************************************** | ||||
* Low level helper routines..... | * Low level helper routines..... | ||||
***************************************************/ | ***************************************************/ | ||||
static pt_entry_t | static pt_entry_t | ||||
pmap_swap_pat(pmap_t pmap, pt_entry_t entry) | pmap_swap_pat(pmap_t pmap, pt_entry_t entry) | ||||
{ | { | ||||
▲ Show 20 Lines • Show All 1,519 Lines • ▼ Show 20 Lines | if (m->pindex < NUPDE) { | ||||
pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & PG_FRAME); | pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & PG_FRAME); | ||||
pmap_unwire_ptp(pmap, va, pdppg, free); | pmap_unwire_ptp(pmap, va, pdppg, free); | ||||
} else if (m->pindex < NUPDE + NUPDPE + NUPML4E && pmap_is_la57(pmap)) { | } else if (m->pindex < NUPDE + NUPDPE + NUPML4E && pmap_is_la57(pmap)) { | ||||
/* We just released a PDP, unhold the matching PML4 */ | /* We just released a PDP, unhold the matching PML4 */ | ||||
pml4pg = PHYS_TO_VM_PAGE(*pmap_pml5e(pmap, va) & PG_FRAME); | pml4pg = PHYS_TO_VM_PAGE(*pmap_pml5e(pmap, va) & PG_FRAME); | ||||
pmap_unwire_ptp(pmap, va, pml4pg, free); | pmap_unwire_ptp(pmap, va, pml4pg, free); | ||||
} | } | ||||
counter_u64_add(pt_page_count, -1); | |||||
/* | /* | ||||
* Put page on a list so that it is released after | * Put page on a list so that it is released after | ||||
* *ALL* TLB shootdown is done | * *ALL* TLB shootdown is done | ||||
*/ | */ | ||||
pmap_add_delayed_free_list(m, free, TRUE); | pmap_add_delayed_free_list(m, free, TRUE); | ||||
} | } | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 164 Lines • ▼ Show 20 Lines | pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags) | ||||
int i; | int i; | ||||
/* | /* | ||||
* allocate the page directory page | * allocate the page directory page | ||||
*/ | */ | ||||
pmltop_pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | | pmltop_pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | | ||||
VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_WAITOK); | VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_WAITOK); | ||||
counter_u64_add(pt_page_count, 1); | |||||
pmltop_phys = VM_PAGE_TO_PHYS(pmltop_pg); | pmltop_phys = VM_PAGE_TO_PHYS(pmltop_pg); | ||||
pmap->pm_pmltop = (pml5_entry_t *)PHYS_TO_DMAP(pmltop_phys); | pmap->pm_pmltop = (pml5_entry_t *)PHYS_TO_DMAP(pmltop_phys); | ||||
CPU_FOREACH(i) { | CPU_FOREACH(i) { | ||||
pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE; | pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE; | ||||
pmap->pm_pcids[i].pm_gen = 0; | pmap->pm_pcids[i].pm_gen = 0; | ||||
} | } | ||||
pmap->pm_cr3 = PMAP_NO_CR3; /* initialize to an invalid value */ | pmap->pm_cr3 = PMAP_NO_CR3; /* initialize to an invalid value */ | ||||
Show All 15 Lines | case PT_X86: | ||||
pmap->pm_cr3 = pmltop_phys; | pmap->pm_cr3 = pmltop_phys; | ||||
if (pmap_is_la57(pmap)) | if (pmap_is_la57(pmap)) | ||||
pmap_pinit_pml5(pmltop_pg); | pmap_pinit_pml5(pmltop_pg); | ||||
else | else | ||||
pmap_pinit_pml4(pmltop_pg); | pmap_pinit_pml4(pmltop_pg); | ||||
if ((curproc->p_md.md_flags & P_MD_KPTI) != 0) { | if ((curproc->p_md.md_flags & P_MD_KPTI) != 0) { | ||||
pmltop_pgu = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | | pmltop_pgu = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | | ||||
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_WAITOK); | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_WAITOK); | ||||
counter_u64_add(pt_page_count, 1); | |||||
pmap->pm_pmltopu = (pml4_entry_t *)PHYS_TO_DMAP( | pmap->pm_pmltopu = (pml4_entry_t *)PHYS_TO_DMAP( | ||||
VM_PAGE_TO_PHYS(pmltop_pgu)); | VM_PAGE_TO_PHYS(pmltop_pgu)); | ||||
if (pmap_is_la57(pmap)) | if (pmap_is_la57(pmap)) | ||||
pmap_pinit_pml5_pti(pmltop_pgu); | pmap_pinit_pml5_pti(pmltop_pgu); | ||||
else | else | ||||
pmap_pinit_pml4_pti(pmltop_pgu); | pmap_pinit_pml4_pti(pmltop_pgu); | ||||
pmap->pm_ucr3 = VM_PAGE_TO_PHYS(pmltop_pgu); | pmap->pm_ucr3 = VM_PAGE_TO_PHYS(pmltop_pgu); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 171 Lines • ▼ Show 20 Lines | pmap_allocpte_nosleep(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, | ||||
PG_RW = pmap_rw_bit(pmap); | PG_RW = pmap_rw_bit(pmap); | ||||
/* | /* | ||||
* Allocate a page table page. | * Allocate a page table page. | ||||
*/ | */ | ||||
if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | | if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | | ||||
VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) | ||||
return (NULL); | return (NULL); | ||||
if ((m->flags & PG_ZERO) == 0) | if ((m->flags & PG_ZERO) == 0) | ||||
pmap_zero_page(m); | pmap_zero_page(m); | ||||
/* | /* | ||||
* Map the pagetable page into the process address space, if | * Map the pagetable page into the process address space, if | ||||
* it isn't already there. | * it isn't already there. | ||||
*/ | */ | ||||
if (ptepindex >= NUPDE + NUPDPE + NUPML4E) { | if (ptepindex >= NUPDE + NUPDPE + NUPML4E) { | ||||
▲ Show 20 Lines • Show All 81 Lines • ▼ Show 20 Lines | if (ptepindex >= NUPDE + NUPDPE + NUPML4E) { | ||||
/* Now we know where the page directory page is */ | /* Now we know where the page directory page is */ | ||||
pd = &pd[pmap_pde_index(va)]; | pd = &pd[pmap_pde_index(va)]; | ||||
KASSERT((*pd & PG_V) == 0, | KASSERT((*pd & PG_V) == 0, | ||||
("pmap %p va %#lx pd %#lx", pmap, va, *pd)); | ("pmap %p va %#lx pd %#lx", pmap, va, *pd)); | ||||
*pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; | *pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; | ||||
} | } | ||||
pmap_resident_count_inc(pmap, 1); | pmap_resident_count_inc(pmap, 1); | ||||
counter_u64_add(pt_page_count, 1); | |||||
Not Done Inline ActionsStray new line. kib: Stray new line. | |||||
return (m); | return (m); | ||||
} | } | ||||
/* | /* | ||||
* This routine is called if the desired page table page does not exist. | * This routine is called if the desired page table page does not exist. | ||||
* | * | ||||
* If page table page allocation fails, this routine may sleep before | * If page table page allocation fails, this routine may sleep before | ||||
* returning NULL. It sleeps only if a lock pointer was given. Sleep | * returning NULL. It sleeps only if a lock pointer was given. Sleep | ||||
▲ Show 20 Lines • Show All 146 Lines • ▼ Show 20 Lines | for (i = 0; i < ndmpdpphys; i++)/* Direct Map */ | ||||
pmap->pm_pmltop[DMPML4I + i] = 0; | pmap->pm_pmltop[DMPML4I + i] = 0; | ||||
pmap->pm_pmltop[PML4PML4I] = 0; /* Recursive Mapping */ | pmap->pm_pmltop[PML4PML4I] = 0; /* Recursive Mapping */ | ||||
for (i = 0; i < lm_ents; i++) /* Large Map */ | for (i = 0; i < lm_ents; i++) /* Large Map */ | ||||
pmap->pm_pmltop[LMSPML4I + i] = 0; | pmap->pm_pmltop[LMSPML4I + i] = 0; | ||||
} | } | ||||
vm_page_unwire_noq(m); | vm_page_unwire_noq(m); | ||||
vm_page_free_zero(m); | vm_page_free_zero(m); | ||||
counter_u64_add(pt_page_count, -1); | |||||
if (pmap->pm_pmltopu != NULL) { | if (pmap->pm_pmltopu != NULL) { | ||||
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap-> | m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap-> | ||||
pm_pmltopu)); | pm_pmltopu)); | ||||
vm_page_unwire_noq(m); | vm_page_unwire_noq(m); | ||||
vm_page_free(m); | vm_page_free(m); | ||||
counter_u64_add(pt_page_count, -1); | |||||
} | } | ||||
if (pmap->pm_type == PT_X86 && | if (pmap->pm_type == PT_X86 && | ||||
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) | (cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) | ||||
rangeset_fini(&pmap->pm_pkru); | rangeset_fini(&pmap->pm_pkru); | ||||
} | } | ||||
static int | static int | ||||
kvm_size(SYSCTL_HANDLER_ARGS) | kvm_size(SYSCTL_HANDLER_ARGS) | ||||
▲ Show 20 Lines • Show All 99 Lines • ▼ Show 20 Lines | if ((*pdpe & X86_PG_V) == 0) { | ||||
/* We need a new PDP entry */ | /* We need a new PDP entry */ | ||||
nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDPSHIFT, | nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDPSHIFT, | ||||
VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | | VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | | ||||
VM_ALLOC_WIRED | VM_ALLOC_ZERO); | VM_ALLOC_WIRED | VM_ALLOC_ZERO); | ||||
if (nkpg == NULL) | if (nkpg == NULL) | ||||
panic("pmap_growkernel: no memory to grow kernel"); | panic("pmap_growkernel: no memory to grow kernel"); | ||||
if ((nkpg->flags & PG_ZERO) == 0) | if ((nkpg->flags & PG_ZERO) == 0) | ||||
pmap_zero_page(nkpg); | pmap_zero_page(nkpg); | ||||
counter_u64_add(pt_page_count, 1); | |||||
paddr = VM_PAGE_TO_PHYS(nkpg); | paddr = VM_PAGE_TO_PHYS(nkpg); | ||||
*pdpe = (pdp_entry_t)(paddr | X86_PG_V | X86_PG_RW | | *pdpe = (pdp_entry_t)(paddr | X86_PG_V | X86_PG_RW | | ||||
X86_PG_A | X86_PG_M); | X86_PG_A | X86_PG_M); | ||||
continue; /* try again */ | continue; /* try again */ | ||||
} | } | ||||
pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end); | pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end); | ||||
if ((*pde & X86_PG_V) != 0) { | if ((*pde & X86_PG_V) != 0) { | ||||
kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; | kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; | ||||
if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { | if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { | ||||
kernel_vm_end = vm_map_max(kernel_map); | kernel_vm_end = vm_map_max(kernel_map); | ||||
break; | break; | ||||
} | } | ||||
continue; | continue; | ||||
} | } | ||||
nkpg = vm_page_alloc(NULL, pmap_pde_pindex(kernel_vm_end), | nkpg = vm_page_alloc(NULL, pmap_pde_pindex(kernel_vm_end), | ||||
VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | | VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | | ||||
VM_ALLOC_ZERO); | VM_ALLOC_ZERO); | ||||
if (nkpg == NULL) | if (nkpg == NULL) | ||||
panic("pmap_growkernel: no memory to grow kernel"); | panic("pmap_growkernel: no memory to grow kernel"); | ||||
if ((nkpg->flags & PG_ZERO) == 0) | if ((nkpg->flags & PG_ZERO) == 0) | ||||
pmap_zero_page(nkpg); | pmap_zero_page(nkpg); | ||||
counter_u64_add(pt_page_count, 1); | |||||
paddr = VM_PAGE_TO_PHYS(nkpg); | paddr = VM_PAGE_TO_PHYS(nkpg); | ||||
newpdir = paddr | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; | newpdir = paddr | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; | ||||
pde_store(pde, newpdir); | pde_store(pde, newpdir); | ||||
kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; | kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; | ||||
if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { | if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { | ||||
kernel_vm_end = vm_map_max(kernel_map); | kernel_vm_end = vm_map_max(kernel_map); | ||||
break; | break; | ||||
Show All 20 Lines | |||||
#define PC_FREE0 0xfffffffffffffffful | #define PC_FREE0 0xfffffffffffffffful | ||||
#define PC_FREE1 0xfffffffffffffffful | #define PC_FREE1 0xfffffffffffffffful | ||||
#define PC_FREE2 0x000000fffffffffful | #define PC_FREE2 0x000000fffffffffful | ||||
static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; | static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; | ||||
#ifdef PV_STATS | #ifdef PV_STATS | ||||
static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; | |||||
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, | static COUNTER_U64_DEFINE_EARLY(pc_chunk_count); | ||||
"Current number of pv entry chunks"); | SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, | ||||
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, | &pc_chunk_count, "Current number of pv entry cnunks"); | ||||
"Current number of pv entry chunks allocated"); | |||||
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, | |||||
"Current number of pv entry chunks frees"); | |||||
SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, | |||||
"Number of times tried to get a chunk page but failed."); | |||||
static long pv_entry_frees, pv_entry_allocs, pv_entry_count; | static COUNTER_U64_DEFINE_EARLY(pc_chunk_allocs); | ||||
static int pv_entry_spare; | SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, | ||||
&pc_chunk_allocs, "Total number of pv entry chunks allocated"); | |||||
SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, | static COUNTER_U64_DEFINE_EARLY(pc_chunk_frees); | ||||
"Current number of pv entry frees"); | SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, | ||||
SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, | &pc_chunk_frees, "Total number of pv entry chunks freed"); | ||||
"Current number of pv entry allocs"); | |||||
SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, | static COUNTER_U64_DEFINE_EARLY(pc_chunk_tryfail); | ||||
"Current number of pv entries"); | SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, | ||||
SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, | &pc_chunk_tryfail, | ||||
"Current number of spare pv entries"); | "Number of failed attempts to get a pv entry chunk page"); | ||||
static COUNTER_U64_DEFINE_EARLY(pv_entry_frees); | |||||
SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, | |||||
&pv_entry_frees, "Total number of pv entries freed"); | |||||
static COUNTER_U64_DEFINE_EARLY(pv_entry_allocs); | |||||
SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, | |||||
&pv_entry_allocs, "Total number of pv entries allocated"); | |||||
static COUNTER_U64_DEFINE_EARLY(pv_entry_count); | |||||
SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, | |||||
&pv_entry_count, "Current number of pv entries"); | |||||
static COUNTER_U64_DEFINE_EARLY(pv_entry_spare); | |||||
SYSCTL_COUNTER_U64(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, | |||||
&pv_entry_spare, "Current number of spare pv entries"); | |||||
#endif | #endif | ||||
static void | static void | ||||
reclaim_pv_chunk_leave_pmap(pmap_t pmap, pmap_t locked_pmap, bool start_di) | reclaim_pv_chunk_leave_pmap(pmap_t pmap, pmap_t locked_pmap, bool start_di) | ||||
{ | { | ||||
if (pmap == NULL) | if (pmap == NULL) | ||||
return; | return; | ||||
▲ Show 20 Lines • Show All 156 Lines • ▼ Show 20 Lines | for (field = 0; field < _NPCM; field++) { | ||||
} | } | ||||
} | } | ||||
if (freed == 0) { | if (freed == 0) { | ||||
mtx_lock(&pvc->pvc_lock); | mtx_lock(&pvc->pvc_lock); | ||||
goto next_chunk; | goto next_chunk; | ||||
} | } | ||||
/* Every freed mapping is for a 4 KB page. */ | /* Every freed mapping is for a 4 KB page. */ | ||||
pmap_resident_count_dec(pmap, freed); | pmap_resident_count_dec(pmap, freed); | ||||
PV_STAT(atomic_add_long(&pv_entry_frees, freed)); | PV_STAT(counter_u64_add(pv_entry_frees, freed)); | ||||
PV_STAT(atomic_add_int(&pv_entry_spare, freed)); | PV_STAT(counter_u64_add(pv_entry_spare, freed)); | ||||
PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); | PV_STAT(counter_u64_add(pv_entry_count, -freed)); | ||||
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | ||||
if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 && | if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 && | ||||
pc->pc_map[2] == PC_FREE2) { | pc->pc_map[2] == PC_FREE2) { | ||||
PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); | PV_STAT(counter_u64_add(pv_entry_spare, -_NPCPV)); | ||||
PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); | PV_STAT(counter_u64_add(pc_chunk_count, -1)); | ||||
PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); | PV_STAT(counter_u64_add(pc_chunk_frees, 1)); | ||||
/* Entire chunk is free; return it. */ | /* Entire chunk is free; return it. */ | ||||
m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); | m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); | ||||
dump_drop_page(m_pc->phys_addr); | dump_drop_page(m_pc->phys_addr); | ||||
mtx_lock(&pvc->pvc_lock); | mtx_lock(&pvc->pvc_lock); | ||||
TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru); | TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru); | ||||
break; | break; | ||||
} | } | ||||
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); | ||||
▲ Show 20 Lines • Show All 56 Lines • ▼ Show 20 Lines | |||||
*/ | */ | ||||
static void | static void | ||||
free_pv_entry(pmap_t pmap, pv_entry_t pv) | free_pv_entry(pmap_t pmap, pv_entry_t pv) | ||||
{ | { | ||||
struct pv_chunk *pc; | struct pv_chunk *pc; | ||||
int idx, field, bit; | int idx, field, bit; | ||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | PMAP_LOCK_ASSERT(pmap, MA_OWNED); | ||||
PV_STAT(atomic_add_long(&pv_entry_frees, 1)); | PV_STAT(counter_u64_add(pv_entry_frees, 1)); | ||||
PV_STAT(atomic_add_int(&pv_entry_spare, 1)); | PV_STAT(counter_u64_add(pv_entry_spare, 1)); | ||||
PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); | PV_STAT(counter_u64_add(pv_entry_count, -1)); | ||||
pc = pv_to_chunk(pv); | pc = pv_to_chunk(pv); | ||||
idx = pv - &pc->pc_pventry[0]; | idx = pv - &pc->pc_pventry[0]; | ||||
field = idx / 64; | field = idx / 64; | ||||
bit = idx % 64; | bit = idx % 64; | ||||
pc->pc_map[field] |= 1ul << bit; | pc->pc_map[field] |= 1ul << bit; | ||||
if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || | if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || | ||||
pc->pc_map[2] != PC_FREE2) { | pc->pc_map[2] != PC_FREE2) { | ||||
/* 98% of the time, pc is already at the head of the list. */ | /* 98% of the time, pc is already at the head of the list. */ | ||||
if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { | if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { | ||||
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | ||||
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); | ||||
} | } | ||||
return; | return; | ||||
} | } | ||||
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | ||||
free_pv_chunk(pc); | free_pv_chunk(pc); | ||||
} | } | ||||
static void | static void | ||||
free_pv_chunk_dequeued(struct pv_chunk *pc) | free_pv_chunk_dequeued(struct pv_chunk *pc) | ||||
{ | { | ||||
vm_page_t m; | vm_page_t m; | ||||
PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); | PV_STAT(counter_u64_add(pv_entry_spare, -_NPCPV)); | ||||
PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); | PV_STAT(counter_u64_add(pc_chunk_count, -1)); | ||||
PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); | PV_STAT(counter_u64_add(pc_chunk_frees, 1)); | ||||
counter_u64_add(pv_page_count, -1); | |||||
/* entire chunk is free, return it */ | /* entire chunk is free, return it */ | ||||
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); | m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); | ||||
dump_drop_page(m->phys_addr); | dump_drop_page(m->phys_addr); | ||||
vm_page_unwire_noq(m); | vm_page_unwire_noq(m); | ||||
vm_page_free(m); | vm_page_free(m); | ||||
} | } | ||||
static void | static void | ||||
▲ Show 20 Lines • Show All 46 Lines • ▼ Show 20 Lines | |||||
{ | { | ||||
struct pv_chunks_list *pvc; | struct pv_chunks_list *pvc; | ||||
int bit, field; | int bit, field; | ||||
pv_entry_t pv; | pv_entry_t pv; | ||||
struct pv_chunk *pc; | struct pv_chunk *pc; | ||||
vm_page_t m; | vm_page_t m; | ||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | PMAP_LOCK_ASSERT(pmap, MA_OWNED); | ||||
PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); | PV_STAT(counter_u64_add(pv_entry_allocs, 1)); | ||||
retry: | retry: | ||||
pc = TAILQ_FIRST(&pmap->pm_pvchunk); | pc = TAILQ_FIRST(&pmap->pm_pvchunk); | ||||
if (pc != NULL) { | if (pc != NULL) { | ||||
for (field = 0; field < _NPCM; field++) { | for (field = 0; field < _NPCM; field++) { | ||||
if (pc->pc_map[field]) { | if (pc->pc_map[field]) { | ||||
bit = bsfq(pc->pc_map[field]); | bit = bsfq(pc->pc_map[field]); | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
if (field < _NPCM) { | if (field < _NPCM) { | ||||
pv = &pc->pc_pventry[field * 64 + bit]; | pv = &pc->pc_pventry[field * 64 + bit]; | ||||
pc->pc_map[field] &= ~(1ul << bit); | pc->pc_map[field] &= ~(1ul << bit); | ||||
/* If this was the last item, move it to tail */ | /* If this was the last item, move it to tail */ | ||||
if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && | if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && | ||||
pc->pc_map[2] == 0) { | pc->pc_map[2] == 0) { | ||||
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | ||||
TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, | TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, | ||||
pc_list); | pc_list); | ||||
} | } | ||||
PV_STAT(atomic_add_long(&pv_entry_count, 1)); | PV_STAT(counter_u64_add(pv_entry_count, 1)); | ||||
PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); | PV_STAT(counter_u64_add(pv_entry_spare, -1)); | ||||
return (pv); | return (pv); | ||||
} | } | ||||
} | } | ||||
/* No free items, allocate another chunk */ | /* No free items, allocate another chunk */ | ||||
m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | | m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | | ||||
VM_ALLOC_WIRED); | VM_ALLOC_WIRED); | ||||
if (m == NULL) { | if (m == NULL) { | ||||
if (lockp == NULL) { | if (lockp == NULL) { | ||||
PV_STAT(pc_chunk_tryfail++); | PV_STAT(counter_u64_add(pc_chunk_tryfail, 1)); | ||||
return (NULL); | return (NULL); | ||||
} | } | ||||
m = reclaim_pv_chunk(pmap, lockp); | m = reclaim_pv_chunk(pmap, lockp); | ||||
if (m == NULL) | if (m == NULL) | ||||
goto retry; | goto retry; | ||||
} | } else | ||||
PV_STAT(atomic_add_int(&pc_chunk_count, 1)); | counter_u64_add(pv_page_count, 1); | ||||
PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); | PV_STAT(counter_u64_add(pc_chunk_count, 1)); | ||||
PV_STAT(counter_u64_add(pc_chunk_allocs, 1)); | |||||
dump_add_page(m->phys_addr); | dump_add_page(m->phys_addr); | ||||
pc = (void *)PHYS_TO_DMAP(m->phys_addr); | pc = (void *)PHYS_TO_DMAP(m->phys_addr); | ||||
pc->pc_pmap = pmap; | pc->pc_pmap = pmap; | ||||
pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ | pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ | ||||
pc->pc_map[1] = PC_FREE1; | pc->pc_map[1] = PC_FREE1; | ||||
pc->pc_map[2] = PC_FREE2; | pc->pc_map[2] = PC_FREE2; | ||||
pvc = &pv_chunks[vm_page_domain(m)]; | pvc = &pv_chunks[vm_page_domain(m)]; | ||||
mtx_lock(&pvc->pvc_lock); | mtx_lock(&pvc->pvc_lock); | ||||
TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru); | TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru); | ||||
mtx_unlock(&pvc->pvc_lock); | mtx_unlock(&pvc->pvc_lock); | ||||
pv = &pc->pc_pventry[0]; | pv = &pc->pc_pventry[0]; | ||||
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); | ||||
PV_STAT(atomic_add_long(&pv_entry_count, 1)); | PV_STAT(counter_u64_add(pv_entry_count, 1)); | ||||
PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); | PV_STAT(counter_u64_add(pv_entry_spare, _NPCPV - 1)); | ||||
return (pv); | return (pv); | ||||
} | } | ||||
/* | /* | ||||
* Returns the number of one bits within the given PV chunk map. | * Returns the number of one bits within the given PV chunk map. | ||||
* | * | ||||
* The erratas for Intel processors state that "POPCNT Instruction May | * The erratas for Intel processors state that "POPCNT Instruction May | ||||
* Take Longer to Execute Than Expected". It is believed that the | * Take Longer to Execute Than Expected". It is believed that the | ||||
▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines | #endif | ||||
for (reclaimed = false; avail < needed; avail += _NPCPV) { | for (reclaimed = false; avail < needed; avail += _NPCPV) { | ||||
m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | | m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | | ||||
VM_ALLOC_WIRED); | VM_ALLOC_WIRED); | ||||
if (m == NULL) { | if (m == NULL) { | ||||
m = reclaim_pv_chunk(pmap, lockp); | m = reclaim_pv_chunk(pmap, lockp); | ||||
if (m == NULL) | if (m == NULL) | ||||
goto retry; | goto retry; | ||||
reclaimed = true; | reclaimed = true; | ||||
} | } else | ||||
PV_STAT(atomic_add_int(&pc_chunk_count, 1)); | counter_u64_add(pv_page_count, 1); | ||||
PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); | PV_STAT(counter_u64_add(pc_chunk_count, 1)); | ||||
PV_STAT(counter_u64_add(pc_chunk_allocs, 1)); | |||||
dump_add_page(m->phys_addr); | dump_add_page(m->phys_addr); | ||||
pc = (void *)PHYS_TO_DMAP(m->phys_addr); | pc = (void *)PHYS_TO_DMAP(m->phys_addr); | ||||
pc->pc_pmap = pmap; | pc->pc_pmap = pmap; | ||||
pc->pc_map[0] = PC_FREE0; | pc->pc_map[0] = PC_FREE0; | ||||
pc->pc_map[1] = PC_FREE1; | pc->pc_map[1] = PC_FREE1; | ||||
pc->pc_map[2] = PC_FREE2; | pc->pc_map[2] = PC_FREE2; | ||||
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); | ||||
TAILQ_INSERT_TAIL(&new_tail[vm_page_domain(m)], pc, pc_lru); | TAILQ_INSERT_TAIL(&new_tail[vm_page_domain(m)], pc, pc_lru); | ||||
PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV)); | PV_STAT(counter_u64_add(pv_entry_spare, _NPCPV)); | ||||
/* | /* | ||||
* The reclaim might have freed a chunk from the current pmap. | * The reclaim might have freed a chunk from the current pmap. | ||||
* If that chunk contained available entries, we need to | * If that chunk contained available entries, we need to | ||||
* re-count the number of available entries. | * re-count the number of available entries. | ||||
*/ | */ | ||||
if (reclaimed) | if (reclaimed) | ||||
goto retry; | goto retry; | ||||
▲ Show 20 Lines • Show All 58 Lines • ▼ Show 20 Lines | pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, | ||||
pvh = pa_to_pvh(pa); | pvh = pa_to_pvh(pa); | ||||
va = trunc_2mpage(va); | va = trunc_2mpage(va); | ||||
pv = pmap_pvh_remove(pvh, pmap, va); | pv = pmap_pvh_remove(pvh, pmap, va); | ||||
KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); | KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); | ||||
m = PHYS_TO_VM_PAGE(pa); | m = PHYS_TO_VM_PAGE(pa); | ||||
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); | TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); | ||||
m->md.pv_gen++; | m->md.pv_gen++; | ||||
/* Instantiate the remaining NPTEPG - 1 pv entries. */ | /* Instantiate the remaining NPTEPG - 1 pv entries. */ | ||||
PV_STAT(atomic_add_long(&pv_entry_allocs, NPTEPG - 1)); | PV_STAT(counter_u64_add(pv_entry_allocs, NPTEPG - 1)); | ||||
va_last = va + NBPDR - PAGE_SIZE; | va_last = va + NBPDR - PAGE_SIZE; | ||||
for (;;) { | for (;;) { | ||||
pc = TAILQ_FIRST(&pmap->pm_pvchunk); | pc = TAILQ_FIRST(&pmap->pm_pvchunk); | ||||
KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 || | KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 || | ||||
pc->pc_map[2] != 0, ("pmap_pv_demote_pde: missing spare")); | pc->pc_map[2] != 0, ("pmap_pv_demote_pde: missing spare")); | ||||
for (field = 0; field < _NPCM; field++) { | for (field = 0; field < _NPCM; field++) { | ||||
while (pc->pc_map[field]) { | while (pc->pc_map[field]) { | ||||
bit = bsfq(pc->pc_map[field]); | bit = bsfq(pc->pc_map[field]); | ||||
Show All 13 Lines | for (;;) { | ||||
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | ||||
TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); | ||||
} | } | ||||
out: | out: | ||||
if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { | if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { | ||||
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | ||||
TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); | ||||
} | } | ||||
PV_STAT(atomic_add_long(&pv_entry_count, NPTEPG - 1)); | PV_STAT(counter_u64_add(pv_entry_count, NPTEPG - 1)); | ||||
PV_STAT(atomic_subtract_int(&pv_entry_spare, NPTEPG - 1)); | PV_STAT(counter_u64_add(pv_entry_spare, -(NPTEPG - 1))); | ||||
} | } | ||||
#if VM_NRESERVLEVEL > 0 | #if VM_NRESERVLEVEL > 0 | ||||
/* | /* | ||||
* After promotion from 512 4KB page mappings to a single 2MB page mapping, | * After promotion from 512 4KB page mappings to a single 2MB page mapping, | ||||
* replace the many pv entries for the 4KB page mappings by a single pv entry | * replace the many pv entries for the 4KB page mappings by a single pv entry | ||||
* for the 2MB page mapping. | * for the 2MB page mapping. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 243 Lines • ▼ Show 20 Lines | if (mpte == NULL) { | ||||
* If the allocation of the new page table page fails, | * If the allocation of the new page table page fails, | ||||
* invalidate the 2MB page mapping and return "failure". | * invalidate the 2MB page mapping and return "failure". | ||||
*/ | */ | ||||
if (mpte == NULL) { | if (mpte == NULL) { | ||||
pmap_demote_pde_abort(pmap, va, pde, oldpde, lockp); | pmap_demote_pde_abort(pmap, va, pde, oldpde, lockp); | ||||
return (FALSE); | return (FALSE); | ||||
} | } | ||||
counter_u64_add(pt_page_count, 1); | |||||
if (!in_kernel) { | if (!in_kernel) { | ||||
mpte->ref_count = NPTEPG; | mpte->ref_count = NPTEPG; | ||||
pmap_resident_count_inc(pmap, 1); | pmap_resident_count_inc(pmap, 1); | ||||
} | } | ||||
} | } | ||||
mptepa = VM_PAGE_TO_PHYS(mpte); | mptepa = VM_PAGE_TO_PHYS(mpte); | ||||
firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); | firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); | ||||
newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; | newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; | ||||
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | if (in_kernel) | ||||
pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); | pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); | ||||
/* | /* | ||||
* Demote the PV entry. | * Demote the PV entry. | ||||
*/ | */ | ||||
if ((oldpde & PG_MANAGED) != 0) | if ((oldpde & PG_MANAGED) != 0) | ||||
pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME, lockp); | pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME, lockp); | ||||
atomic_add_long(&pmap_pde_demotions, 1); | counter_u64_add(pmap_pde_demotions, 1); | ||||
CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#lx in pmap %p", | CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#lx in pmap %p", | ||||
va, pmap); | va, pmap); | ||||
return (TRUE); | return (TRUE); | ||||
} | } | ||||
/* | /* | ||||
* pmap_remove_kernel_pde: Remove a kernel superpage mapping. | * pmap_remove_kernel_pde: Remove a kernel superpage mapping. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 705 Lines • ▼ Show 20 Lines | pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, | ||||
* within a 2MB page. | * within a 2MB page. | ||||
*/ | */ | ||||
firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); | firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); | ||||
setpde: | setpde: | ||||
newpde = *firstpte; | newpde = *firstpte; | ||||
if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V) || | if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V) || | ||||
!pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap, | !pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap, | ||||
newpde))) { | newpde))) { | ||||
atomic_add_long(&pmap_pde_p_failures, 1); | counter_u64_add(pmap_pde_p_failures, 1); | ||||
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" | CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" | ||||
" in pmap %p", va, pmap); | " in pmap %p", va, pmap); | ||||
return; | return; | ||||
} | } | ||||
if ((newpde & (PG_M | PG_RW)) == PG_RW) { | if ((newpde & (PG_M | PG_RW)) == PG_RW) { | ||||
/* | /* | ||||
* When PG_M is already clear, PG_RW can be cleared without | * When PG_M is already clear, PG_RW can be cleared without | ||||
* a TLB invalidation. | * a TLB invalidation. | ||||
*/ | */ | ||||
if (!atomic_cmpset_long(firstpte, newpde, newpde & ~PG_RW)) | if (!atomic_cmpset_long(firstpte, newpde, newpde & ~PG_RW)) | ||||
goto setpde; | goto setpde; | ||||
newpde &= ~PG_RW; | newpde &= ~PG_RW; | ||||
} | } | ||||
/* | /* | ||||
* Examine each of the other PTEs in the specified PTP. Abort if this | * Examine each of the other PTEs in the specified PTP. Abort if this | ||||
* PTE maps an unexpected 4KB physical page or does not have identical | * PTE maps an unexpected 4KB physical page or does not have identical | ||||
* characteristics to the first PTE. | * characteristics to the first PTE. | ||||
*/ | */ | ||||
pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE; | pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE; | ||||
for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) { | for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) { | ||||
setpte: | setpte: | ||||
oldpte = *pte; | oldpte = *pte; | ||||
if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) { | if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) { | ||||
atomic_add_long(&pmap_pde_p_failures, 1); | counter_u64_add(pmap_pde_p_failures, 1); | ||||
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" | CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" | ||||
" in pmap %p", va, pmap); | " in pmap %p", va, pmap); | ||||
return; | return; | ||||
} | } | ||||
if ((oldpte & (PG_M | PG_RW)) == PG_RW) { | if ((oldpte & (PG_M | PG_RW)) == PG_RW) { | ||||
/* | /* | ||||
* When PG_M is already clear, PG_RW can be cleared | * When PG_M is already clear, PG_RW can be cleared | ||||
* without a TLB invalidation. | * without a TLB invalidation. | ||||
*/ | */ | ||||
if (!atomic_cmpset_long(pte, oldpte, oldpte & ~PG_RW)) | if (!atomic_cmpset_long(pte, oldpte, oldpte & ~PG_RW)) | ||||
goto setpte; | goto setpte; | ||||
oldpte &= ~PG_RW; | oldpte &= ~PG_RW; | ||||
CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#lx" | CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#lx" | ||||
" in pmap %p", (oldpte & PG_FRAME & PDRMASK) | | " in pmap %p", (oldpte & PG_FRAME & PDRMASK) | | ||||
(va & ~PDRMASK), pmap); | (va & ~PDRMASK), pmap); | ||||
} | } | ||||
if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) { | if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) { | ||||
atomic_add_long(&pmap_pde_p_failures, 1); | counter_u64_add(pmap_pde_p_failures, 1); | ||||
CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" | CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" | ||||
" in pmap %p", va, pmap); | " in pmap %p", va, pmap); | ||||
return; | return; | ||||
} | } | ||||
pa -= PAGE_SIZE; | pa -= PAGE_SIZE; | ||||
} | } | ||||
/* | /* | ||||
* Save the page table page in its current state until the PDE | * Save the page table page in its current state until the PDE | ||||
* mapping the superpage is demoted by pmap_demote_pde() or | * mapping the superpage is demoted by pmap_demote_pde() or | ||||
* destroyed by pmap_remove_pde(). | * destroyed by pmap_remove_pde(). | ||||
*/ | */ | ||||
mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); | mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); | ||||
KASSERT(mpte >= vm_page_array && | KASSERT(mpte >= vm_page_array && | ||||
mpte < &vm_page_array[vm_page_array_size], | mpte < &vm_page_array[vm_page_array_size], | ||||
("pmap_promote_pde: page table page is out of range")); | ("pmap_promote_pde: page table page is out of range")); | ||||
KASSERT(mpte->pindex == pmap_pde_pindex(va), | KASSERT(mpte->pindex == pmap_pde_pindex(va), | ||||
("pmap_promote_pde: page table page's pindex is wrong")); | ("pmap_promote_pde: page table page's pindex is wrong")); | ||||
if (pmap_insert_pt_page(pmap, mpte, true)) { | if (pmap_insert_pt_page(pmap, mpte, true)) { | ||||
atomic_add_long(&pmap_pde_p_failures, 1); | counter_u64_add(pmap_pde_p_failures, 1); | ||||
CTR2(KTR_PMAP, | CTR2(KTR_PMAP, | ||||
"pmap_promote_pde: failure for va %#lx in pmap %p", va, | "pmap_promote_pde: failure for va %#lx in pmap %p", va, | ||||
pmap); | pmap); | ||||
return; | return; | ||||
} | } | ||||
/* | /* | ||||
* Promote the pv entries. | * Promote the pv entries. | ||||
Show All 9 Lines | setpte: | ||||
/* | /* | ||||
* Map the superpage. | * Map the superpage. | ||||
*/ | */ | ||||
if (workaround_erratum383) | if (workaround_erratum383) | ||||
pmap_update_pde(pmap, va, pde, PG_PS | newpde); | pmap_update_pde(pmap, va, pde, PG_PS | newpde); | ||||
else | else | ||||
pde_store(pde, PG_PROMOTED | PG_PS | newpde); | pde_store(pde, PG_PROMOTED | PG_PS | newpde); | ||||
atomic_add_long(&pmap_pde_promotions, 1); | counter_u64_add(pmap_pde_promotions, 1); | ||||
CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx" | CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx" | ||||
" in pmap %p", va, pmap); | " in pmap %p", va, pmap); | ||||
} | } | ||||
#endif /* VM_NRESERVLEVEL > 0 */ | #endif /* VM_NRESERVLEVEL > 0 */ | ||||
static int | static int | ||||
pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t newpte, int flags, | pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t newpte, int flags, | ||||
int psind) | int psind) | ||||
▲ Show 20 Lines • Show All 559 Lines • ▼ Show 20 Lines | pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, | ||||
pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE); | pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE); | ||||
/* | /* | ||||
* Map the superpage. (This is not a promoted mapping; there will not | * Map the superpage. (This is not a promoted mapping; there will not | ||||
* be any lingering 4KB page mappings in the TLB.) | * be any lingering 4KB page mappings in the TLB.) | ||||
*/ | */ | ||||
pde_store(pde, newpde); | pde_store(pde, newpde); | ||||
atomic_add_long(&pmap_pde_mappings, 1); | counter_u64_add(pmap_pde_mappings, 1); | ||||
CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx in pmap %p", | CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx in pmap %p", | ||||
va, pmap); | va, pmap); | ||||
return (KERN_SUCCESS); | return (KERN_SUCCESS); | ||||
} | } | ||||
/* | /* | ||||
* Maps a sequence of resident pages belonging to the same object. | * Maps a sequence of resident pages belonging to the same object. | ||||
* The sequence begins with the given page m_start. This page is | * The sequence begins with the given page m_start. This page is | ||||
▲ Show 20 Lines • Show All 245 Lines • ▼ Show 20 Lines | for (pa = ptepa | pmap_cache_bits(pmap, pat_mode, 1); | ||||
*/ | */ | ||||
addr += NBPDR; | addr += NBPDR; | ||||
continue; | continue; | ||||
} | } | ||||
if ((*pde & PG_V) == 0) { | if ((*pde & PG_V) == 0) { | ||||
pde_store(pde, pa | PG_PS | PG_M | PG_A | | pde_store(pde, pa | PG_PS | PG_M | PG_A | | ||||
PG_U | PG_RW | PG_V); | PG_U | PG_RW | PG_V); | ||||
pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE); | pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE); | ||||
atomic_add_long(&pmap_pde_mappings, 1); | counter_u64_add(pmap_pde_mappings, 1); | ||||
} else { | } else { | ||||
/* Continue on if the PDE is already valid. */ | /* Continue on if the PDE is already valid. */ | ||||
pdpg->ref_count--; | pdpg->ref_count--; | ||||
KASSERT(pdpg->ref_count > 0, | KASSERT(pdpg->ref_count > 0, | ||||
("pmap_object_init_pt: missing reference " | ("pmap_object_init_pt: missing reference " | ||||
"to page directory page, va: 0x%lx", addr)); | "to page directory page, va: 0x%lx", addr)); | ||||
} | } | ||||
addr += NBPDR; | addr += NBPDR; | ||||
▲ Show 20 Lines • Show All 211 Lines • ▼ Show 20 Lines | if (srcptepaddr & PG_PS) { | ||||
if (pde == NULL) | if (pde == NULL) | ||||
break; | break; | ||||
if (*pde == 0 && ((srcptepaddr & PG_MANAGED) == 0 || | if (*pde == 0 && ((srcptepaddr & PG_MANAGED) == 0 || | ||||
pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr, | pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr, | ||||
PMAP_ENTER_NORECLAIM, &lock))) { | PMAP_ENTER_NORECLAIM, &lock))) { | ||||
*pde = srcptepaddr & ~PG_W; | *pde = srcptepaddr & ~PG_W; | ||||
pmap_resident_count_inc(dst_pmap, NBPDR / | pmap_resident_count_inc(dst_pmap, NBPDR / | ||||
PAGE_SIZE); | PAGE_SIZE); | ||||
atomic_add_long(&pmap_pde_mappings, 1); | counter_u64_add(pmap_pde_mappings, 1); | ||||
} else | } else | ||||
pmap_abort_ptp(dst_pmap, addr, dst_pdpg); | pmap_abort_ptp(dst_pmap, addr, dst_pdpg); | ||||
continue; | continue; | ||||
} | } | ||||
srcptepaddr &= PG_FRAME; | srcptepaddr &= PG_FRAME; | ||||
srcmpte = PHYS_TO_VM_PAGE(srcptepaddr); | srcmpte = PHYS_TO_VM_PAGE(srcptepaddr); | ||||
KASSERT(srcmpte->ref_count > 0, | KASSERT(srcmpte->ref_count > 0, | ||||
▲ Show 20 Lines • Show All 468 Lines • ▼ Show 20 Lines | */ | ||||
if (TAILQ_EMPTY(&pvh->pv_list)) | if (TAILQ_EMPTY(&pvh->pv_list)) | ||||
vm_page_aflag_clear(m, PGA_WRITEABLE); | vm_page_aflag_clear(m, PGA_WRITEABLE); | ||||
} | } | ||||
} | } | ||||
pmap_unuse_pt(pmap, pv->pv_va, ptepde, &free); | pmap_unuse_pt(pmap, pv->pv_va, ptepde, &free); | ||||
freed++; | freed++; | ||||
} | } | ||||
} | } | ||||
PV_STAT(atomic_add_long(&pv_entry_frees, freed)); | PV_STAT(counter_u64_add(pv_entry_frees, freed)); | ||||
PV_STAT(atomic_add_int(&pv_entry_spare, freed)); | PV_STAT(counter_u64_add(pv_entry_spare, freed)); | ||||
PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); | PV_STAT(counter_u64_add(pv_entry_count, -freed)); | ||||
if (allfree) { | if (allfree) { | ||||
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | ||||
TAILQ_INSERT_TAIL(&free_chunks[pc_to_domain(pc)], pc, pc_list); | TAILQ_INSERT_TAIL(&free_chunks[pc_to_domain(pc)], pc, pc_list); | ||||
} | } | ||||
} | } | ||||
if (lock != NULL) | if (lock != NULL) | ||||
rw_wunlock(lock); | rw_wunlock(lock); | ||||
pmap_invalidate_all(pmap); | pmap_invalidate_all(pmap); | ||||
▲ Show 20 Lines • Show All 897 Lines • ▼ Show 20 Lines | pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va) | ||||
KASSERT((oldpdpe & (PG_PS | PG_V)) == (PG_PS | PG_V), | KASSERT((oldpdpe & (PG_PS | PG_V)) == (PG_PS | PG_V), | ||||
("pmap_demote_pdpe: oldpdpe is missing PG_PS and/or PG_V")); | ("pmap_demote_pdpe: oldpdpe is missing PG_PS and/or PG_V")); | ||||
if ((pdpg = vm_page_alloc(NULL, va >> PDPSHIFT, VM_ALLOC_INTERRUPT | | if ((pdpg = vm_page_alloc(NULL, va >> PDPSHIFT, VM_ALLOC_INTERRUPT | | ||||
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { | ||||
CTR2(KTR_PMAP, "pmap_demote_pdpe: failure for va %#lx" | CTR2(KTR_PMAP, "pmap_demote_pdpe: failure for va %#lx" | ||||
" in pmap %p", va, pmap); | " in pmap %p", va, pmap); | ||||
return (FALSE); | return (FALSE); | ||||
} | } | ||||
counter_u64_add(pt_page_count, 1); | |||||
Not Done Inline ActionsRather than annotating each vm_page_alloc() call, I would suggest adding pmap_alloc_ptp() and pmap_free_ptp() which wrap vm_page_alloc() and vm_page_unwire_noq()/vm_page_free(), respectively. markj: Rather than annotating each vm_page_alloc() call, I would suggest adding `pmap_alloc_ptp()` and… | |||||
Done Inline ActionsYeah, that's a good idea. jah: Yeah, that's a good idea. | |||||
pdpgpa = VM_PAGE_TO_PHYS(pdpg); | pdpgpa = VM_PAGE_TO_PHYS(pdpg); | ||||
firstpde = (pd_entry_t *)PHYS_TO_DMAP(pdpgpa); | firstpde = (pd_entry_t *)PHYS_TO_DMAP(pdpgpa); | ||||
newpdpe = pdpgpa | PG_M | PG_A | (oldpdpe & PG_U) | PG_RW | PG_V; | newpdpe = pdpgpa | PG_M | PG_A | (oldpdpe & PG_U) | PG_RW | PG_V; | ||||
KASSERT((oldpdpe & PG_A) != 0, | KASSERT((oldpdpe & PG_A) != 0, | ||||
("pmap_demote_pdpe: oldpdpe is missing PG_A")); | ("pmap_demote_pdpe: oldpdpe is missing PG_A")); | ||||
KASSERT((oldpdpe & (PG_M | PG_RW)) != PG_RW, | KASSERT((oldpdpe & (PG_M | PG_RW)) != PG_RW, | ||||
("pmap_demote_pdpe: oldpdpe is missing PG_M")); | ("pmap_demote_pdpe: oldpdpe is missing PG_M")); | ||||
newpde = oldpdpe; | newpde = oldpdpe; | ||||
Show All 11 Lines | pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va) | ||||
*/ | */ | ||||
*pdpe = newpdpe; | *pdpe = newpdpe; | ||||
/* | /* | ||||
* Invalidate a stale recursive mapping of the page directory page. | * Invalidate a stale recursive mapping of the page directory page. | ||||
*/ | */ | ||||
pmap_invalidate_page(pmap, (vm_offset_t)vtopde(va)); | pmap_invalidate_page(pmap, (vm_offset_t)vtopde(va)); | ||||
pmap_pdpe_demotions++; | counter_u64_add(pmap_pdpe_demotions, 1); | ||||
CTR2(KTR_PMAP, "pmap_demote_pdpe: success for va %#lx" | CTR2(KTR_PMAP, "pmap_demote_pdpe: success for va %#lx" | ||||
" in pmap %p", va, pmap); | " in pmap %p", va, pmap); | ||||
return (TRUE); | return (TRUE); | ||||
} | } | ||||
/* | /* | ||||
* Sets the memory attribute for the specified page. | * Sets the memory attribute for the specified page. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 505 Lines • ▼ Show 20 Lines | ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[cpuid].pm_pcid | | ||||
PMAP_PCID_USER_PT; | PMAP_PCID_USER_PT; | ||||
if (!cached && pmap->pm_ucr3 != PMAP_NO_CR3) | if (!cached && pmap->pm_ucr3 != PMAP_NO_CR3) | ||||
PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE); | PCPU_SET(ucr3_load_mask, ~CR3_PCID_SAVE); | ||||
PCPU_SET(kcr3, kcr3 | CR3_PCID_SAVE); | PCPU_SET(kcr3, kcr3 | CR3_PCID_SAVE); | ||||
PCPU_SET(ucr3, ucr3 | CR3_PCID_SAVE); | PCPU_SET(ucr3, ucr3 | CR3_PCID_SAVE); | ||||
if (cached) | if (cached) | ||||
PCPU_INC(pm_save_cnt); | counter_u64_add(pcid_save_cnt, 1); | ||||
pmap_activate_sw_pti_post(td, pmap); | pmap_activate_sw_pti_post(td, pmap); | ||||
} | } | ||||
static void | static void | ||||
pmap_activate_sw_pcid_nopti(struct thread *td __unused, pmap_t pmap, | pmap_activate_sw_pcid_nopti(struct thread *td __unused, pmap_t pmap, | ||||
u_int cpuid) | u_int cpuid) | ||||
{ | { | ||||
uint64_t cached, cr3; | uint64_t cached, cr3; | ||||
KASSERT((read_rflags() & PSL_I) == 0, | KASSERT((read_rflags() & PSL_I) == 0, | ||||
("PCID needs interrupts disabled in pmap_activate_sw()")); | ("PCID needs interrupts disabled in pmap_activate_sw()")); | ||||
cached = pmap_pcid_alloc_checked(pmap, cpuid); | cached = pmap_pcid_alloc_checked(pmap, cpuid); | ||||
cr3 = rcr3(); | cr3 = rcr3(); | ||||
if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3) | if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3) | ||||
load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid | | load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid | | ||||
cached); | cached); | ||||
PCPU_SET(curpmap, pmap); | PCPU_SET(curpmap, pmap); | ||||
if (cached) | if (cached) | ||||
PCPU_INC(pm_save_cnt); | counter_u64_add(pcid_save_cnt, 1); | ||||
} | } | ||||
static void | static void | ||||
pmap_activate_sw_nopcid_nopti(struct thread *td __unused, pmap_t pmap, | pmap_activate_sw_nopcid_nopti(struct thread *td __unused, pmap_t pmap, | ||||
u_int cpuid __unused) | u_int cpuid __unused) | ||||
{ | { | ||||
load_cr3(pmap->pm_cr3); | load_cr3(pmap->pm_cr3); | ||||
▲ Show 20 Lines • Show All 428 Lines • ▼ Show 20 Lines | |||||
*/ | */ | ||||
static vm_page_t | static vm_page_t | ||||
pmap_large_map_getptp_unlocked(void) | pmap_large_map_getptp_unlocked(void) | ||||
{ | { | ||||
vm_page_t m; | vm_page_t m; | ||||
m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | | m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | | ||||
VM_ALLOC_ZERO); | VM_ALLOC_ZERO); | ||||
if (m != NULL && (m->flags & PG_ZERO) == 0) | if (m != NULL) { | ||||
if ((m->flags & PG_ZERO) == 0) | |||||
Done Inline ActionsNeed to only do this if m != NULL. I noticed that we don't pass VM_ALLOC_WIRED here, is that intentional? jah: Need to only do this if m != NULL. I noticed that we don't pass VM_ALLOC_WIRED here, is that… | |||||
Not Done Inline ActionsThere are different meaning for 'wired' there. First, page table pages are unmanaged, so pagedaemon never processes them and m->ref_count is free for other uses. Unmanaged property is what you probably mean by saying that page table page cannot be not wired. Second, we systematically use the ref_count of the page table page to track number of non-zero pte's in it, and most of the pmap code frees the page table page when ref_count goes to zero. In principle, this is true for the large map, with the caveat that we never free PDP pages because corresponding PML4 entries needs to be stable, they are copied into each pmap' pml4 page. [This can be improved for LA57, but kernel pretends that it is in LA48 regardless of the userspace page tables] So VM_ALLOC_WIRED is not used for large map page table handling code only because it is more convenient to manually increment ref_count where appropriate. kib: There are different meaning for 'wired' there. First, page table pages are unmanaged, so… | |||||
pmap_zero_page(m); | pmap_zero_page(m); | ||||
counter_u64_add(pt_page_count, 1); | |||||
} | |||||
return (m); | return (m); | ||||
} | } | ||||
static vm_page_t | static vm_page_t | ||||
pmap_large_map_getptp(void) | pmap_large_map_getptp(void) | ||||
{ | { | ||||
vm_page_t m; | vm_page_t m; | ||||
▲ Show 20 Lines • Show All 1,470 Lines • Show Last 20 Lines |
Removing the handrolled pc_pm_save_cnt seemed like a useful bit of cleanup. But I realize this also replaces an incq instruction on the pcpu region with a (larger) addq instruction on zpcpu; the PCID save count is also updated pretty frequently since it's managed on the context switch path. I'm fine reverting this part of the change if anyone objects to it.