Changeset View
Standalone View
sys/amd64/amd64/pmap.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 143 Lines • ▼ Show 20 Lines | |||||
#include <vm/vm_map.h> | #include <vm/vm_map.h> | ||||
#include <vm/vm_object.h> | #include <vm/vm_object.h> | ||||
#include <vm/vm_extern.h> | #include <vm/vm_extern.h> | ||||
#include <vm/vm_pageout.h> | #include <vm/vm_pageout.h> | ||||
#include <vm/vm_pager.h> | #include <vm/vm_pager.h> | ||||
#include <vm/vm_phys.h> | #include <vm/vm_phys.h> | ||||
#include <vm/vm_radix.h> | #include <vm/vm_radix.h> | ||||
#include <vm/vm_reserv.h> | #include <vm/vm_reserv.h> | ||||
#include <vm/vm_domainset.h> | |||||
#include <vm/uma.h> | #include <vm/uma.h> | ||||
#include <machine/intr_machdep.h> | #include <machine/intr_machdep.h> | ||||
#include <x86/apicvar.h> | #include <x86/apicvar.h> | ||||
#include <x86/ifunc.h> | #include <x86/ifunc.h> | ||||
#include <machine/cpu.h> | #include <machine/cpu.h> | ||||
#include <machine/cputypes.h> | #include <machine/cputypes.h> | ||||
#include <machine/md_var.h> | #include <machine/md_var.h> | ||||
#include <machine/pcb.h> | #include <machine/pcb.h> | ||||
#include <machine/specialreg.h> | #include <machine/specialreg.h> | ||||
#ifdef SMP | #ifdef SMP | ||||
#include <machine/smp.h> | #include <machine/smp.h> | ||||
#endif | #endif | ||||
#include <machine/sysarch.h> | #include <machine/sysarch.h> | ||||
#include <machine/tss.h> | #include <machine/tss.h> | ||||
#ifdef NUMA | |||||
#define PMAP_MEMDOM MAXMEMDOM | |||||
#else | |||||
#define PMAP_MEMDOM 1 | |||||
#endif | |||||
static __inline boolean_t | static __inline boolean_t | ||||
pmap_type_guest(pmap_t pmap) | pmap_type_guest(pmap_t pmap) | ||||
{ | { | ||||
return ((pmap->pm_type == PT_EPT) || (pmap->pm_type == PT_RVI)); | return ((pmap->pm_type == PT_EPT) || (pmap->pm_type == PT_RVI)); | ||||
} | } | ||||
static __inline boolean_t | static __inline boolean_t | ||||
▲ Show 20 Lines • Show All 233 Lines • ▼ Show 20 Lines | static struct pmap_preinit_mapping { | ||||
int mode; | int mode; | ||||
} pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT]; | } pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT]; | ||||
static int pmap_initialized; | static int pmap_initialized; | ||||
/* | /* | ||||
* Data for the pv entry allocation mechanism. | * Data for the pv entry allocation mechanism. | ||||
* Updates to pv_invl_gen are protected by the pv list lock but reads are not. | * Updates to pv_invl_gen are protected by the pv list lock but reads are not. | ||||
*/ | */ | ||||
static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); | #ifdef NUMA | ||||
static struct mtx __exclusive_cache_line pv_chunks_mutex; | static __inline int | ||||
pc_to_domain(struct pv_chunk *pc) | |||||
{ | |||||
vm_page_t m; | |||||
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); | |||||
return (_vm_phys_domain(m->phys_addr)); | |||||
alc: You can simplify this to:
```
return (_vm_phys_domain(DMAP_TO_PHYS((vm_offset_t)pc));
``` | |||||
} | |||||
#else | |||||
static __inline int | |||||
pc_to_domain(struct pv_chunk *pc __unused) | |||||
{ | |||||
return (0); | |||||
} | |||||
#endif | |||||
struct pv_chunks_list { | |||||
struct mtx pvc_lock; | |||||
TAILQ_HEAD(pch, pv_chunk) pvc_list; | |||||
int active_reclaims; | |||||
} __aligned(CACHE_LINE_SIZE); | |||||
struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM]; | |||||
#if VM_NRESERVLEVEL > 0 | #if VM_NRESERVLEVEL > 0 | ||||
alcUnsubmitted Not Done Inline ActionsHijacking this review for a second, the condition here (and in related code below) should not be VM_NRESERVLEVEL > 0, because we use the pv locks regardless of whether superpage reservations are enabled. In other words, someone, who disables reservations, but has a large number of processors, still deserves the additional locks. Disabling reservations only affects the use of the struct md_page, specifically, we'll never insert anything into the pv_list. So, conditioning this code on NUMA would be better. alc: Hijacking this review for a second, the condition here (and in related code below) should not… | |||||
mjgAuthorUnsubmitted Done Inline ActionsI mostly agree and can change that no problem (it's basically some churn). The question is if it can wait until after this one settles. If not, I will create a separate review and then rebase this patch. In the meantime, do you have further comments about the change as proposed? mjg: I mostly agree and can change that no problem (it's basically some churn). The question is if… | |||||
alcUnsubmitted Not Done Inline ActionsI've only glanced at this change. I should be able to carefully review it within the next 36 hours. alc: I've only glanced at this change. I should be able to carefully review it within the next 36… | |||||
struct pmap_large_md_page { | struct pmap_large_md_page { | ||||
struct rwlock pv_lock; | struct rwlock pv_lock; | ||||
struct md_page pv_page; | struct md_page pv_page; | ||||
u_long pv_invl_gen; | u_long pv_invl_gen; | ||||
}; | }; | ||||
static struct pmap_large_md_page *pv_table; | static struct pmap_large_md_page *pv_table; | ||||
#else | #else | ||||
static struct rwlock __exclusive_cache_line pv_list_locks[NPV_LIST_LOCKS]; | static struct rwlock __exclusive_cache_line pv_list_locks[NPV_LIST_LOCKS]; | ||||
▲ Show 20 Lines • Show All 1,589 Lines • ▼ Show 20 Lines | if (pg_ps_enabled) { | ||||
KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, | KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, | ||||
("pmap_init: can't assign to pagesizes[1]")); | ("pmap_init: can't assign to pagesizes[1]")); | ||||
pagesizes[1] = NBPDR; | pagesizes[1] = NBPDR; | ||||
} | } | ||||
/* | /* | ||||
* Initialize the pv chunk list mutex. | * Initialize the pv chunk list mutex. | ||||
*/ | */ | ||||
mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); | for (i = 0; i < PMAP_MEMDOM; i++) { | ||||
mtx_init(&pv_chunks[i].pvc_lock, "pmap pv chunk list", NULL, MTX_DEF); | |||||
TAILQ_INIT(&pv_chunks[i].pvc_list); | |||||
} | |||||
pmap_init_pv_table(); | pmap_init_pv_table(); | ||||
pmap_initialized = 1; | pmap_initialized = 1; | ||||
for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { | for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { | ||||
ppim = pmap_preinit_mapping + i; | ppim = pmap_preinit_mapping + i; | ||||
if (ppim->va == 0) | if (ppim->va == 0) | ||||
continue; | continue; | ||||
/* Make the direct map consistent */ | /* Make the direct map consistent */ | ||||
▲ Show 20 Lines • Show All 2,086 Lines • ▼ Show 20 Lines | |||||
* | * | ||||
* Returns NULL if PV entries were reclaimed from the specified pmap. | * Returns NULL if PV entries were reclaimed from the specified pmap. | ||||
* | * | ||||
* We do not, however, unmap 2mpages because subsequent accesses will | * We do not, however, unmap 2mpages because subsequent accesses will | ||||
* allocate per-page pv entries until repromotion occurs, thereby | * allocate per-page pv entries until repromotion occurs, thereby | ||||
* exacerbating the shortage of free pv entries. | * exacerbating the shortage of free pv entries. | ||||
*/ | */ | ||||
static vm_page_t | static vm_page_t | ||||
reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) | reclaim_pv_chunk_domain(pmap_t locked_pmap, struct rwlock **lockp, int domain) | ||||
{ | { | ||||
struct pv_chunks_list *pvc; | |||||
struct pv_chunk *pc, *pc_marker, *pc_marker_end; | struct pv_chunk *pc, *pc_marker, *pc_marker_end; | ||||
struct pv_chunk_header pc_marker_b, pc_marker_end_b; | struct pv_chunk_header pc_marker_b, pc_marker_end_b; | ||||
struct md_page *pvh; | struct md_page *pvh; | ||||
pd_entry_t *pde; | pd_entry_t *pde; | ||||
pmap_t next_pmap, pmap; | pmap_t next_pmap, pmap; | ||||
pt_entry_t *pte, tpte; | pt_entry_t *pte, tpte; | ||||
pt_entry_t PG_G, PG_A, PG_M, PG_RW; | pt_entry_t PG_G, PG_A, PG_M, PG_RW; | ||||
pv_entry_t pv; | pv_entry_t pv; | ||||
vm_offset_t va; | vm_offset_t va; | ||||
vm_page_t m, m_pc; | vm_page_t m, m_pc; | ||||
struct spglist free; | struct spglist free; | ||||
uint64_t inuse; | uint64_t inuse; | ||||
int bit, field, freed; | int bit, field, freed; | ||||
bool start_di; | bool start_di; | ||||
static int active_reclaims = 0; | |||||
PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); | PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); | ||||
KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL")); | KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL")); | ||||
pmap = NULL; | pmap = NULL; | ||||
m_pc = NULL; | m_pc = NULL; | ||||
PG_G = PG_A = PG_M = PG_RW = 0; | PG_G = PG_A = PG_M = PG_RW = 0; | ||||
SLIST_INIT(&free); | SLIST_INIT(&free); | ||||
bzero(&pc_marker_b, sizeof(pc_marker_b)); | bzero(&pc_marker_b, sizeof(pc_marker_b)); | ||||
bzero(&pc_marker_end_b, sizeof(pc_marker_end_b)); | bzero(&pc_marker_end_b, sizeof(pc_marker_end_b)); | ||||
pc_marker = (struct pv_chunk *)&pc_marker_b; | pc_marker = (struct pv_chunk *)&pc_marker_b; | ||||
pc_marker_end = (struct pv_chunk *)&pc_marker_end_b; | pc_marker_end = (struct pv_chunk *)&pc_marker_end_b; | ||||
/* | /* | ||||
* A delayed invalidation block should already be active if | * A delayed invalidation block should already be active if | ||||
* pmap_advise() or pmap_remove() called this function by way | * pmap_advise() or pmap_remove() called this function by way | ||||
* of pmap_demote_pde_locked(). | * of pmap_demote_pde_locked(). | ||||
*/ | */ | ||||
start_di = pmap_not_in_di(); | start_di = pmap_not_in_di(); | ||||
mtx_lock(&pv_chunks_mutex); | pvc = &pv_chunks[domain]; | ||||
active_reclaims++; | mtx_lock(&pvc->pvc_lock); | ||||
TAILQ_INSERT_HEAD(&pv_chunks, pc_marker, pc_lru); | pvc->active_reclaims++; | ||||
TAILQ_INSERT_TAIL(&pv_chunks, pc_marker_end, pc_lru); | TAILQ_INSERT_HEAD(&pvc->pvc_list, pc_marker, pc_lru); | ||||
TAILQ_INSERT_TAIL(&pvc->pvc_list, pc_marker_end, pc_lru); | |||||
while ((pc = TAILQ_NEXT(pc_marker, pc_lru)) != pc_marker_end && | while ((pc = TAILQ_NEXT(pc_marker, pc_lru)) != pc_marker_end && | ||||
SLIST_EMPTY(&free)) { | SLIST_EMPTY(&free)) { | ||||
next_pmap = pc->pc_pmap; | next_pmap = pc->pc_pmap; | ||||
if (next_pmap == NULL) { | if (next_pmap == NULL) { | ||||
/* | /* | ||||
* The next chunk is a marker. However, it is | * The next chunk is a marker. However, it is | ||||
* not our marker, so active_reclaims must be | * not our marker, so active_reclaims must be | ||||
* > 1. Consequently, the next_chunk code | * > 1. Consequently, the next_chunk code | ||||
* will not rotate the pv_chunks list. | * will not rotate the pv_chunks list. | ||||
*/ | */ | ||||
goto next_chunk; | goto next_chunk; | ||||
} | } | ||||
mtx_unlock(&pv_chunks_mutex); | mtx_unlock(&pvc->pvc_lock); | ||||
/* | /* | ||||
* A pv_chunk can only be removed from the pc_lru list | * A pv_chunk can only be removed from the pc_lru list | ||||
* when both pc_chunks_mutex is owned and the | * when both pc_chunks_mutex is owned and the | ||||
* corresponding pmap is locked. | * corresponding pmap is locked. | ||||
*/ | */ | ||||
if (pmap != next_pmap) { | if (pmap != next_pmap) { | ||||
reclaim_pv_chunk_leave_pmap(pmap, locked_pmap, | reclaim_pv_chunk_leave_pmap(pmap, locked_pmap, | ||||
start_di); | start_di); | ||||
pmap = next_pmap; | pmap = next_pmap; | ||||
/* Avoid deadlock and lock recursion. */ | /* Avoid deadlock and lock recursion. */ | ||||
if (pmap > locked_pmap) { | if (pmap > locked_pmap) { | ||||
RELEASE_PV_LIST_LOCK(lockp); | RELEASE_PV_LIST_LOCK(lockp); | ||||
PMAP_LOCK(pmap); | PMAP_LOCK(pmap); | ||||
if (start_di) | if (start_di) | ||||
pmap_delayed_invl_start(); | pmap_delayed_invl_start(); | ||||
mtx_lock(&pv_chunks_mutex); | mtx_lock(&pvc->pvc_lock); | ||||
continue; | continue; | ||||
} else if (pmap != locked_pmap) { | } else if (pmap != locked_pmap) { | ||||
if (PMAP_TRYLOCK(pmap)) { | if (PMAP_TRYLOCK(pmap)) { | ||||
if (start_di) | if (start_di) | ||||
pmap_delayed_invl_start(); | pmap_delayed_invl_start(); | ||||
mtx_lock(&pv_chunks_mutex); | mtx_lock(&pvc->pvc_lock); | ||||
continue; | continue; | ||||
} else { | } else { | ||||
pmap = NULL; /* pmap is not locked */ | pmap = NULL; /* pmap is not locked */ | ||||
mtx_lock(&pv_chunks_mutex); | mtx_lock(&pvc->pvc_lock); | ||||
pc = TAILQ_NEXT(pc_marker, pc_lru); | pc = TAILQ_NEXT(pc_marker, pc_lru); | ||||
if (pc == NULL || | if (pc == NULL || | ||||
pc->pc_pmap != next_pmap) | pc->pc_pmap != next_pmap) | ||||
continue; | continue; | ||||
goto next_chunk; | goto next_chunk; | ||||
} | } | ||||
} else if (start_di) | } else if (start_di) | ||||
pmap_delayed_invl_start(); | pmap_delayed_invl_start(); | ||||
Show All 40 Lines | for (field = 0; field < _NPCM; field++) { | ||||
} | } | ||||
pmap_delayed_invl_page(m); | pmap_delayed_invl_page(m); | ||||
pc->pc_map[field] |= 1UL << bit; | pc->pc_map[field] |= 1UL << bit; | ||||
pmap_unuse_pt(pmap, va, *pde, &free); | pmap_unuse_pt(pmap, va, *pde, &free); | ||||
freed++; | freed++; | ||||
} | } | ||||
} | } | ||||
if (freed == 0) { | if (freed == 0) { | ||||
mtx_lock(&pv_chunks_mutex); | mtx_lock(&pvc->pvc_lock); | ||||
goto next_chunk; | goto next_chunk; | ||||
} | } | ||||
/* Every freed mapping is for a 4 KB page. */ | /* Every freed mapping is for a 4 KB page. */ | ||||
pmap_resident_count_dec(pmap, freed); | pmap_resident_count_dec(pmap, freed); | ||||
PV_STAT(atomic_add_long(&pv_entry_frees, freed)); | PV_STAT(atomic_add_long(&pv_entry_frees, freed)); | ||||
PV_STAT(atomic_add_int(&pv_entry_spare, freed)); | PV_STAT(atomic_add_int(&pv_entry_spare, freed)); | ||||
PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); | PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); | ||||
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | ||||
if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 && | if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 && | ||||
pc->pc_map[2] == PC_FREE2) { | pc->pc_map[2] == PC_FREE2) { | ||||
PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); | PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); | ||||
PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); | PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); | ||||
PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); | PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); | ||||
/* Entire chunk is free; return it. */ | /* Entire chunk is free; return it. */ | ||||
m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); | m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); | ||||
dump_drop_page(m_pc->phys_addr); | dump_drop_page(m_pc->phys_addr); | ||||
mtx_lock(&pv_chunks_mutex); | mtx_lock(&pvc->pvc_lock); | ||||
TAILQ_REMOVE(&pv_chunks, pc, pc_lru); | TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru); | ||||
break; | break; | ||||
} | } | ||||
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); | ||||
mtx_lock(&pv_chunks_mutex); | mtx_lock(&pvc->pvc_lock); | ||||
/* One freed pv entry in locked_pmap is sufficient. */ | /* One freed pv entry in locked_pmap is sufficient. */ | ||||
if (pmap == locked_pmap) | if (pmap == locked_pmap) | ||||
break; | break; | ||||
next_chunk: | next_chunk: | ||||
TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru); | TAILQ_REMOVE(&pvc->pvc_list, pc_marker, pc_lru); | ||||
TAILQ_INSERT_AFTER(&pv_chunks, pc, pc_marker, pc_lru); | TAILQ_INSERT_AFTER(&pvc->pvc_list, pc, pc_marker, pc_lru); | ||||
if (active_reclaims == 1 && pmap != NULL) { | if (pvc->active_reclaims == 1 && pmap != NULL) { | ||||
/* | /* | ||||
* Rotate the pv chunks list so that we do not | * Rotate the pv chunks list so that we do not | ||||
* scan the same pv chunks that could not be | * scan the same pv chunks that could not be | ||||
* freed (because they contained a wired | * freed (because they contained a wired | ||||
* and/or superpage mapping) on every | * and/or superpage mapping) on every | ||||
* invocation of reclaim_pv_chunk(). | * invocation of reclaim_pv_chunk(). | ||||
*/ | */ | ||||
while ((pc = TAILQ_FIRST(&pv_chunks)) != pc_marker) { | while ((pc = TAILQ_FIRST(&pvc->pvc_list)) != pc_marker) { | ||||
MPASS(pc->pc_pmap != NULL); | MPASS(pc->pc_pmap != NULL); | ||||
TAILQ_REMOVE(&pv_chunks, pc, pc_lru); | TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru); | ||||
TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); | TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru); | ||||
} | } | ||||
} | } | ||||
} | } | ||||
TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru); | TAILQ_REMOVE(&pvc->pvc_list, pc_marker, pc_lru); | ||||
TAILQ_REMOVE(&pv_chunks, pc_marker_end, pc_lru); | TAILQ_REMOVE(&pvc->pvc_list, pc_marker_end, pc_lru); | ||||
active_reclaims--; | pvc->active_reclaims--; | ||||
mtx_unlock(&pv_chunks_mutex); | mtx_unlock(&pvc->pvc_lock); | ||||
reclaim_pv_chunk_leave_pmap(pmap, locked_pmap, start_di); | reclaim_pv_chunk_leave_pmap(pmap, locked_pmap, start_di); | ||||
if (m_pc == NULL && !SLIST_EMPTY(&free)) { | if (m_pc == NULL && !SLIST_EMPTY(&free)) { | ||||
m_pc = SLIST_FIRST(&free); | m_pc = SLIST_FIRST(&free); | ||||
SLIST_REMOVE_HEAD(&free, plinks.s.ss); | SLIST_REMOVE_HEAD(&free, plinks.s.ss); | ||||
/* Recycle a freed page table page. */ | /* Recycle a freed page table page. */ | ||||
m_pc->ref_count = 1; | m_pc->ref_count = 1; | ||||
} | } | ||||
vm_page_free_pages_toq(&free, true); | vm_page_free_pages_toq(&free, true); | ||||
return (m_pc); | return (m_pc); | ||||
} | } | ||||
static vm_page_t | |||||
reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) | |||||
{ | |||||
struct vm_domainset_iter di; | |||||
struct domainset *ds = curthread->td_domain.dr_policy; | |||||
vm_page_t m; | |||||
int domain; | |||||
int flags = M_WAITOK; /* XXX */ | |||||
vm_domainset_iter_policy_init(&di, ds, &domain, &flags); | |||||
do { | |||||
m = reclaim_pv_chunk_domain(locked_pmap, lockp, domain); | |||||
if (m != NULL) | |||||
break; | |||||
} while (vm_domainset_iter_policy(&di, &domain) == 0); | |||||
Not Done Inline ActionsSo you are only visiting for reclamation domains which are allowed by the curthread policy ? IMO it is wrong, if we are at the stage where reclaim_pv_chunk is called, all means we must get to a fresh pv chunk. kib: So you are only visiting for reclamation domains which are allowed by the curthread policy ? | |||||
Done Inline ActionsI'm fine either way. But in this case, do we want to walk "our" domains first and only then iterate the rest? Or just walk everything as it is starting from 0. mjg: I'm fine either way. But in this case, do we want to walk "our" domains first and only then… | |||||
Done Inline ActionsHow about this: simple rotor is added, threads fetchadd into it and they walk the list indicated by the new count % vm_ndomains. That way they spread themselves in case of multiple CPUs getting here. But this also increases likelyhood of getting a page from the "wrong" domain, which may not be a problem given the circumstances. mjg: How about this: simple rotor is added, threads fetchadd into it and they walk the list… | |||||
Not Done Inline ActionsYou can start from the domain of the page, instead from the counter % ndomains. kib: You can start from the domain of the page, instead from the counter % ndomains. | |||||
return (m); | |||||
} | |||||
/* | /* | ||||
* free the pv_entry back to the free list | * free the pv_entry back to the free list | ||||
*/ | */ | ||||
static void | static void | ||||
free_pv_entry(pmap_t pmap, pv_entry_t pv) | free_pv_entry(pmap_t pmap, pv_entry_t pv) | ||||
{ | { | ||||
struct pv_chunk *pc; | struct pv_chunk *pc; | ||||
int idx, field, bit; | int idx, field, bit; | ||||
Show All 33 Lines | free_pv_chunk_dequeued(struct pv_chunk *pc) | ||||
dump_drop_page(m->phys_addr); | dump_drop_page(m->phys_addr); | ||||
vm_page_unwire_noq(m); | vm_page_unwire_noq(m); | ||||
vm_page_free(m); | vm_page_free(m); | ||||
} | } | ||||
static void | static void | ||||
free_pv_chunk(struct pv_chunk *pc) | free_pv_chunk(struct pv_chunk *pc) | ||||
{ | { | ||||
struct pv_chunks_list *pvc; | |||||
mtx_lock(&pv_chunks_mutex); | pvc = &pv_chunks[pc_to_domain(pc)]; | ||||
TAILQ_REMOVE(&pv_chunks, pc, pc_lru); | mtx_lock(&pvc->pvc_lock); | ||||
mtx_unlock(&pv_chunks_mutex); | TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru); | ||||
mtx_unlock(&pvc->pvc_lock); | |||||
free_pv_chunk_dequeued(pc); | free_pv_chunk_dequeued(pc); | ||||
} | } | ||||
static void | static void | ||||
free_pv_chunk_batch(struct pv_chunklist *batch) | free_pv_chunk_batch(struct pv_chunklist *batch) | ||||
{ | { | ||||
struct pv_chunks_list *pvc; | |||||
struct pv_chunk *pc, *npc; | struct pv_chunk *pc, *npc; | ||||
int i; | |||||
if (TAILQ_EMPTY(batch)) | for (i = 0; i < vm_ndomains; i++) { | ||||
return; | if (TAILQ_EMPTY(&batch[i])) | ||||
continue; | |||||
mtx_lock(&pv_chunks_mutex); | pvc = &pv_chunks[i]; | ||||
TAILQ_FOREACH(pc, batch, pc_list) { | mtx_lock(&pvc->pvc_lock); | ||||
TAILQ_REMOVE(&pv_chunks, pc, pc_lru); | TAILQ_FOREACH(pc, &batch[i], pc_list) { | ||||
TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru); | |||||
} | } | ||||
mtx_unlock(&pv_chunks_mutex); | mtx_unlock(&pvc->pvc_lock); | ||||
} | |||||
TAILQ_FOREACH_SAFE(pc, batch, pc_list, npc) { | for (i = 0; i < vm_ndomains; i++) { | ||||
TAILQ_FOREACH_SAFE(pc, &batch[i], pc_list, npc) { | |||||
free_pv_chunk_dequeued(pc); | free_pv_chunk_dequeued(pc); | ||||
} | } | ||||
} | } | ||||
} | |||||
/* | /* | ||||
* Returns a new PV entry, allocating a new PV chunk from the system when | * Returns a new PV entry, allocating a new PV chunk from the system when | ||||
* needed. If this PV chunk allocation fails and a PV list lock pointer was | * needed. If this PV chunk allocation fails and a PV list lock pointer was | ||||
* given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is | * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is | ||||
* returned. | * returned. | ||||
* | * | ||||
* The given PV list lock may be released. | * The given PV list lock may be released. | ||||
*/ | */ | ||||
static pv_entry_t | static pv_entry_t | ||||
get_pv_entry(pmap_t pmap, struct rwlock **lockp) | get_pv_entry(pmap_t pmap, struct rwlock **lockp) | ||||
{ | { | ||||
struct pv_chunks_list *pvc; | |||||
int bit, field; | int bit, field; | ||||
pv_entry_t pv; | pv_entry_t pv; | ||||
struct pv_chunk *pc; | struct pv_chunk *pc; | ||||
vm_page_t m; | vm_page_t m; | ||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | PMAP_LOCK_ASSERT(pmap, MA_OWNED); | ||||
PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); | PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); | ||||
retry: | retry: | ||||
Show All 35 Lines | retry: | ||||
PV_STAT(atomic_add_int(&pc_chunk_count, 1)); | PV_STAT(atomic_add_int(&pc_chunk_count, 1)); | ||||
PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); | PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); | ||||
dump_add_page(m->phys_addr); | dump_add_page(m->phys_addr); | ||||
pc = (void *)PHYS_TO_DMAP(m->phys_addr); | pc = (void *)PHYS_TO_DMAP(m->phys_addr); | ||||
pc->pc_pmap = pmap; | pc->pc_pmap = pmap; | ||||
pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ | pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ | ||||
pc->pc_map[1] = PC_FREE1; | pc->pc_map[1] = PC_FREE1; | ||||
pc->pc_map[2] = PC_FREE2; | pc->pc_map[2] = PC_FREE2; | ||||
mtx_lock(&pv_chunks_mutex); | pvc = &pv_chunks[_vm_phys_domain(m->phys_addr)]; | ||||
TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); | mtx_lock(&pvc->pvc_lock); | ||||
mtx_unlock(&pv_chunks_mutex); | TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru); | ||||
mtx_unlock(&pvc->pvc_lock); | |||||
pv = &pc->pc_pventry[0]; | pv = &pc->pc_pventry[0]; | ||||
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); | ||||
PV_STAT(atomic_add_long(&pv_entry_count, 1)); | PV_STAT(atomic_add_long(&pv_entry_count, 1)); | ||||
PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); | PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); | ||||
return (pv); | return (pv); | ||||
} | } | ||||
/* | /* | ||||
Show All 29 Lines | |||||
* Ensure that the number of spare PV entries in the specified pmap meets or | * Ensure that the number of spare PV entries in the specified pmap meets or | ||||
* exceeds the given count, "needed". | * exceeds the given count, "needed". | ||||
* | * | ||||
* The given PV list lock may be released. | * The given PV list lock may be released. | ||||
*/ | */ | ||||
static void | static void | ||||
reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) | reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) | ||||
{ | { | ||||
struct pch new_tail; | struct pv_chunks_list *pvc; | ||||
struct pch new_tail[PMAP_MEMDOM]; | |||||
struct pv_chunk *pc; | struct pv_chunk *pc; | ||||
vm_page_t m; | vm_page_t m; | ||||
int avail, free; | int avail, free, i; | ||||
bool reclaimed; | bool reclaimed; | ||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | PMAP_LOCK_ASSERT(pmap, MA_OWNED); | ||||
KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL")); | KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL")); | ||||
/* | /* | ||||
* Newly allocated PV chunks must be stored in a private list until | * Newly allocated PV chunks must be stored in a private list until | ||||
* the required number of PV chunks have been allocated. Otherwise, | * the required number of PV chunks have been allocated. Otherwise, | ||||
* reclaim_pv_chunk() could recycle one of these chunks. In | * reclaim_pv_chunk() could recycle one of these chunks. In | ||||
* contrast, these chunks must be added to the pmap upon allocation. | * contrast, these chunks must be added to the pmap upon allocation. | ||||
*/ | */ | ||||
TAILQ_INIT(&new_tail); | for (i = 0; i < vm_ndomains; i++) | ||||
TAILQ_INIT(&new_tail[i]); | |||||
retry: | retry: | ||||
avail = 0; | avail = 0; | ||||
TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) { | TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) { | ||||
#ifndef __POPCNT__ | #ifndef __POPCNT__ | ||||
if ((cpu_feature2 & CPUID2_POPCNT) == 0) | if ((cpu_feature2 & CPUID2_POPCNT) == 0) | ||||
bit_count((bitstr_t *)pc->pc_map, 0, | bit_count((bitstr_t *)pc->pc_map, 0, | ||||
sizeof(pc->pc_map) * NBBY, &free); | sizeof(pc->pc_map) * NBBY, &free); | ||||
else | else | ||||
Show All 18 Lines | for (reclaimed = false; avail < needed; avail += _NPCPV) { | ||||
PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); | PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); | ||||
dump_add_page(m->phys_addr); | dump_add_page(m->phys_addr); | ||||
pc = (void *)PHYS_TO_DMAP(m->phys_addr); | pc = (void *)PHYS_TO_DMAP(m->phys_addr); | ||||
pc->pc_pmap = pmap; | pc->pc_pmap = pmap; | ||||
pc->pc_map[0] = PC_FREE0; | pc->pc_map[0] = PC_FREE0; | ||||
pc->pc_map[1] = PC_FREE1; | pc->pc_map[1] = PC_FREE1; | ||||
pc->pc_map[2] = PC_FREE2; | pc->pc_map[2] = PC_FREE2; | ||||
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); | ||||
TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); | TAILQ_INSERT_TAIL(&new_tail[pc_to_domain(pc)], pc, pc_lru); | ||||
PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV)); | PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV)); | ||||
/* | /* | ||||
* The reclaim might have freed a chunk from the current pmap. | * The reclaim might have freed a chunk from the current pmap. | ||||
* If that chunk contained available entries, we need to | * If that chunk contained available entries, we need to | ||||
* re-count the number of available entries. | * re-count the number of available entries. | ||||
*/ | */ | ||||
if (reclaimed) | if (reclaimed) | ||||
goto retry; | goto retry; | ||||
} | } | ||||
if (!TAILQ_EMPTY(&new_tail)) { | for (i = 0; i < vm_ndomains; i++) { | ||||
mtx_lock(&pv_chunks_mutex); | if (TAILQ_EMPTY(&new_tail[i])) | ||||
TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); | continue; | ||||
mtx_unlock(&pv_chunks_mutex); | pvc = &pv_chunks[i]; | ||||
mtx_lock(&pvc->pvc_lock); | |||||
TAILQ_CONCAT(&pvc->pvc_list, &new_tail[i], pc_lru); | |||||
mtx_unlock(&pvc->pvc_lock); | |||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* First find and then remove the pv entry for the specified pmap and virtual | * First find and then remove the pv entry for the specified pmap and virtual | ||||
* address from the specified pv list. Returns the pv entry if found and NULL | * address from the specified pv list. Returns the pv entry if found and NULL | ||||
* otherwise. This operation can be performed on pv lists for either 4KB or | * otherwise. This operation can be performed on pv lists for either 4KB or | ||||
* 2MB page mappings. | * 2MB page mappings. | ||||
▲ Show 20 Lines • Show All 2,409 Lines • ▼ Show 20 Lines | |||||
*/ | */ | ||||
void | void | ||||
pmap_remove_pages(pmap_t pmap) | pmap_remove_pages(pmap_t pmap) | ||||
{ | { | ||||
pd_entry_t ptepde; | pd_entry_t ptepde; | ||||
pt_entry_t *pte, tpte; | pt_entry_t *pte, tpte; | ||||
pt_entry_t PG_M, PG_RW, PG_V; | pt_entry_t PG_M, PG_RW, PG_V; | ||||
struct spglist free; | struct spglist free; | ||||
struct pv_chunklist free_chunks; | struct pv_chunklist free_chunks[PMAP_MEMDOM]; | ||||
vm_page_t m, mpte, mt; | vm_page_t m, mpte, mt; | ||||
pv_entry_t pv; | pv_entry_t pv; | ||||
struct md_page *pvh; | struct md_page *pvh; | ||||
struct pv_chunk *pc, *npc; | struct pv_chunk *pc, *npc; | ||||
struct rwlock *lock; | struct rwlock *lock; | ||||
int64_t bit; | int64_t bit; | ||||
uint64_t inuse, bitmask; | uint64_t inuse, bitmask; | ||||
int allfree, field, freed, idx; | int allfree, field, freed, i, idx; | ||||
boolean_t superpage; | boolean_t superpage; | ||||
vm_paddr_t pa; | vm_paddr_t pa; | ||||
/* | /* | ||||
* Assert that the given pmap is only active on the current | * Assert that the given pmap is only active on the current | ||||
* CPU. Unfortunately, we cannot block another CPU from | * CPU. Unfortunately, we cannot block another CPU from | ||||
* activating the pmap while this function is executing. | * activating the pmap while this function is executing. | ||||
*/ | */ | ||||
Show All 11 Lines | #ifdef INVARIANTS | ||||
} | } | ||||
#endif | #endif | ||||
lock = NULL; | lock = NULL; | ||||
PG_M = pmap_modified_bit(pmap); | PG_M = pmap_modified_bit(pmap); | ||||
PG_V = pmap_valid_bit(pmap); | PG_V = pmap_valid_bit(pmap); | ||||
PG_RW = pmap_rw_bit(pmap); | PG_RW = pmap_rw_bit(pmap); | ||||
TAILQ_INIT(&free_chunks); | for (i = 0; i < vm_ndomains; i++) | ||||
TAILQ_INIT(&free_chunks[i]); | |||||
SLIST_INIT(&free); | SLIST_INIT(&free); | ||||
PMAP_LOCK(pmap); | PMAP_LOCK(pmap); | ||||
TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { | TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { | ||||
allfree = 1; | allfree = 1; | ||||
freed = 0; | freed = 0; | ||||
for (field = 0; field < _NPCM; field++) { | for (field = 0; field < _NPCM; field++) { | ||||
inuse = ~pc->pc_map[field] & pc_freemask[field]; | inuse = ~pc->pc_map[field] & pc_freemask[field]; | ||||
while (inuse != 0) { | while (inuse != 0) { | ||||
▲ Show 20 Lines • Show All 111 Lines • ▼ Show 20 Lines | */ | ||||
freed++; | freed++; | ||||
} | } | ||||
} | } | ||||
PV_STAT(atomic_add_long(&pv_entry_frees, freed)); | PV_STAT(atomic_add_long(&pv_entry_frees, freed)); | ||||
PV_STAT(atomic_add_int(&pv_entry_spare, freed)); | PV_STAT(atomic_add_int(&pv_entry_spare, freed)); | ||||
PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); | PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); | ||||
if (allfree) { | if (allfree) { | ||||
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | ||||
TAILQ_INSERT_TAIL(&free_chunks, pc, pc_list); | TAILQ_INSERT_TAIL(&free_chunks[pc_to_domain(pc)], pc, pc_list); | ||||
} | } | ||||
} | } | ||||
if (lock != NULL) | if (lock != NULL) | ||||
rw_wunlock(lock); | rw_wunlock(lock); | ||||
pmap_invalidate_all(pmap); | pmap_invalidate_all(pmap); | ||||
pmap_pkru_deassign_all(pmap); | pmap_pkru_deassign_all(pmap); | ||||
free_pv_chunk_batch(&free_chunks); | free_pv_chunk_batch((struct pv_chunklist *)&free_chunks); | ||||
PMAP_UNLOCK(pmap); | PMAP_UNLOCK(pmap); | ||||
vm_page_free_pages_toq(&free, true); | vm_page_free_pages_toq(&free, true); | ||||
} | } | ||||
static boolean_t | static boolean_t | ||||
pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) | pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) | ||||
{ | { | ||||
struct rwlock *lock; | struct rwlock *lock; | ||||
▲ Show 20 Lines • Show All 3,270 Lines • Show Last 20 Lines |
You can simplify this to: