Page MenuHomeFreeBSD

D21976.id63139.diff
No OneTemporary

D21976.id63139.diff

Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -149,6 +149,7 @@
#include <vm/vm_phys.h>
#include <vm/vm_radix.h>
#include <vm/vm_reserv.h>
+#include <vm/vm_domainset.h>
#include <vm/uma.h>
#include <machine/intr_machdep.h>
@@ -165,6 +166,12 @@
#include <machine/sysarch.h>
#include <machine/tss.h>
+#ifdef NUMA
+#define PMAP_MEMDOM MAXMEMDOM
+#else
+#define PMAP_MEMDOM 1
+#endif
+
static __inline boolean_t
pmap_type_guest(pmap_t pmap)
{
@@ -414,8 +421,32 @@
* Data for the pv entry allocation mechanism.
* Updates to pv_invl_gen are protected by the pv list lock but reads are not.
*/
-static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
-static struct mtx __exclusive_cache_line pv_chunks_mutex;
+#ifdef NUMA
+static __inline int
+pc_to_domain(struct pv_chunk *pc)
+{
+ vm_page_t m;
+
+ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
+ return (_vm_phys_domain(m->phys_addr));
+}
+#else
+static __inline int
+pc_to_domain(struct pv_chunk *pc __unused)
+{
+
+ return (0);
+}
+#endif
+
+struct pv_chunks_list {
+ struct mtx pvc_lock;
+ TAILQ_HEAD(pch, pv_chunk) pvc_list;
+ int active_reclaims;
+} __aligned(CACHE_LINE_SIZE);
+
+struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM];
+
#if VM_NRESERVLEVEL > 0
struct pmap_large_md_page {
struct rwlock pv_lock;
@@ -2022,8 +2053,10 @@
/*
* Initialize the pv chunk list mutex.
*/
- mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
-
+ for (i = 0; i < PMAP_MEMDOM; i++) {
+ mtx_init(&pv_chunks[i].pvc_lock, "pmap pv chunk list", NULL, MTX_DEF);
+ TAILQ_INIT(&pv_chunks[i].pvc_list);
+ }
pmap_init_pv_table();
pmap_initialized = 1;
@@ -4126,8 +4159,9 @@
* exacerbating the shortage of free pv entries.
*/
static vm_page_t
-reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
+reclaim_pv_chunk_domain(pmap_t locked_pmap, struct rwlock **lockp, int domain)
{
+ struct pv_chunks_list *pvc;
struct pv_chunk *pc, *pc_marker, *pc_marker_end;
struct pv_chunk_header pc_marker_b, pc_marker_end_b;
struct md_page *pvh;
@@ -4142,7 +4176,6 @@
uint64_t inuse;
int bit, field, freed;
bool start_di;
- static int active_reclaims = 0;
PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
@@ -4162,10 +4195,11 @@
*/
start_di = pmap_not_in_di();
- mtx_lock(&pv_chunks_mutex);
- active_reclaims++;
- TAILQ_INSERT_HEAD(&pv_chunks, pc_marker, pc_lru);
- TAILQ_INSERT_TAIL(&pv_chunks, pc_marker_end, pc_lru);
+ pvc = &pv_chunks[domain];
+ mtx_lock(&pvc->pvc_lock);
+ pvc->active_reclaims++;
+ TAILQ_INSERT_HEAD(&pvc->pvc_list, pc_marker, pc_lru);
+ TAILQ_INSERT_TAIL(&pvc->pvc_list, pc_marker_end, pc_lru);
while ((pc = TAILQ_NEXT(pc_marker, pc_lru)) != pc_marker_end &&
SLIST_EMPTY(&free)) {
next_pmap = pc->pc_pmap;
@@ -4178,7 +4212,7 @@
*/
goto next_chunk;
}
- mtx_unlock(&pv_chunks_mutex);
+ mtx_unlock(&pvc->pvc_lock);
/*
* A pv_chunk can only be removed from the pc_lru list
@@ -4195,17 +4229,17 @@
PMAP_LOCK(pmap);
if (start_di)
pmap_delayed_invl_start();
- mtx_lock(&pv_chunks_mutex);
+ mtx_lock(&pvc->pvc_lock);
continue;
} else if (pmap != locked_pmap) {
if (PMAP_TRYLOCK(pmap)) {
if (start_di)
pmap_delayed_invl_start();
- mtx_lock(&pv_chunks_mutex);
+ mtx_lock(&pvc->pvc_lock);
continue;
} else {
pmap = NULL; /* pmap is not locked */
- mtx_lock(&pv_chunks_mutex);
+ mtx_lock(&pvc->pvc_lock);
pc = TAILQ_NEXT(pc_marker, pc_lru);
if (pc == NULL ||
pc->pc_pmap != next_pmap)
@@ -4262,7 +4296,7 @@
}
}
if (freed == 0) {
- mtx_lock(&pv_chunks_mutex);
+ mtx_lock(&pvc->pvc_lock);
goto next_chunk;
}
/* Every freed mapping is for a 4 KB page. */
@@ -4279,19 +4313,19 @@
/* Entire chunk is free; return it. */
m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
dump_drop_page(m_pc->phys_addr);
- mtx_lock(&pv_chunks_mutex);
- TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+ mtx_lock(&pvc->pvc_lock);
+ TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
break;
}
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
- mtx_lock(&pv_chunks_mutex);
+ mtx_lock(&pvc->pvc_lock);
/* One freed pv entry in locked_pmap is sufficient. */
if (pmap == locked_pmap)
break;
next_chunk:
- TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru);
- TAILQ_INSERT_AFTER(&pv_chunks, pc, pc_marker, pc_lru);
- if (active_reclaims == 1 && pmap != NULL) {
+ TAILQ_REMOVE(&pvc->pvc_list, pc_marker, pc_lru);
+ TAILQ_INSERT_AFTER(&pvc->pvc_list, pc, pc_marker, pc_lru);
+ if (pvc->active_reclaims == 1 && pmap != NULL) {
/*
* Rotate the pv chunks list so that we do not
* scan the same pv chunks that could not be
@@ -4299,17 +4333,17 @@
* and/or superpage mapping) on every
* invocation of reclaim_pv_chunk().
*/
- while ((pc = TAILQ_FIRST(&pv_chunks)) != pc_marker) {
+ while ((pc = TAILQ_FIRST(&pvc->pvc_list)) != pc_marker) {
MPASS(pc->pc_pmap != NULL);
- TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
- TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
+ TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
+ TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru);
}
}
}
- TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru);
- TAILQ_REMOVE(&pv_chunks, pc_marker_end, pc_lru);
- active_reclaims--;
- mtx_unlock(&pv_chunks_mutex);
+ TAILQ_REMOVE(&pvc->pvc_list, pc_marker, pc_lru);
+ TAILQ_REMOVE(&pvc->pvc_list, pc_marker_end, pc_lru);
+ pvc->active_reclaims--;
+ mtx_unlock(&pvc->pvc_lock);
reclaim_pv_chunk_leave_pmap(pmap, locked_pmap, start_di);
if (m_pc == NULL && !SLIST_EMPTY(&free)) {
m_pc = SLIST_FIRST(&free);
@@ -4321,6 +4355,25 @@
return (m_pc);
}
+static vm_page_t
+reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
+{
+ struct vm_domainset_iter di;
+ struct domainset *ds = curthread->td_domain.dr_policy;
+ vm_page_t m;
+ int domain;
+ int flags = M_WAITOK; /* XXX */
+
+ vm_domainset_iter_policy_init(&di, ds, &domain, &flags);
+ do {
+ m = reclaim_pv_chunk_domain(locked_pmap, lockp, domain);
+ if (m != NULL)
+ break;
+ } while (vm_domainset_iter_policy(&di, &domain) == 0);
+
+ return (m);
+}
+
/*
* free the pv_entry back to the free list
*/
@@ -4370,29 +4423,37 @@
static void
free_pv_chunk(struct pv_chunk *pc)
{
+ struct pv_chunks_list *pvc;
- mtx_lock(&pv_chunks_mutex);
- TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
- mtx_unlock(&pv_chunks_mutex);
+ pvc = &pv_chunks[pc_to_domain(pc)];
+ mtx_lock(&pvc->pvc_lock);
+ TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
+ mtx_unlock(&pvc->pvc_lock);
free_pv_chunk_dequeued(pc);
}
static void
free_pv_chunk_batch(struct pv_chunklist *batch)
{
+ struct pv_chunks_list *pvc;
struct pv_chunk *pc, *npc;
+ int i;
- if (TAILQ_EMPTY(batch))
- return;
-
- mtx_lock(&pv_chunks_mutex);
- TAILQ_FOREACH(pc, batch, pc_list) {
- TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
+ for (i = 0; i < vm_ndomains; i++) {
+ if (TAILQ_EMPTY(&batch[i]))
+ continue;
+ pvc = &pv_chunks[i];
+ mtx_lock(&pvc->pvc_lock);
+ TAILQ_FOREACH(pc, &batch[i], pc_list) {
+ TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
+ }
+ mtx_unlock(&pvc->pvc_lock);
}
- mtx_unlock(&pv_chunks_mutex);
- TAILQ_FOREACH_SAFE(pc, batch, pc_list, npc) {
- free_pv_chunk_dequeued(pc);
+ for (i = 0; i < vm_ndomains; i++) {
+ TAILQ_FOREACH_SAFE(pc, &batch[i], pc_list, npc) {
+ free_pv_chunk_dequeued(pc);
+ }
}
}
@@ -4407,6 +4468,7 @@
static pv_entry_t
get_pv_entry(pmap_t pmap, struct rwlock **lockp)
{
+ struct pv_chunks_list *pvc;
int bit, field;
pv_entry_t pv;
struct pv_chunk *pc;
@@ -4458,9 +4520,10 @@
pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */
pc->pc_map[1] = PC_FREE1;
pc->pc_map[2] = PC_FREE2;
- mtx_lock(&pv_chunks_mutex);
- TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
- mtx_unlock(&pv_chunks_mutex);
+ pvc = &pv_chunks[_vm_phys_domain(m->phys_addr)];
+ mtx_lock(&pvc->pvc_lock);
+ TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru);
+ mtx_unlock(&pvc->pvc_lock);
pv = &pc->pc_pventry[0];
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
PV_STAT(atomic_add_long(&pv_entry_count, 1));
@@ -4506,10 +4569,11 @@
static void
reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
{
- struct pch new_tail;
+ struct pv_chunks_list *pvc;
+ struct pch new_tail[PMAP_MEMDOM];
struct pv_chunk *pc;
vm_page_t m;
- int avail, free;
+ int avail, free, i;
bool reclaimed;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
@@ -4521,7 +4585,8 @@
* reclaim_pv_chunk() could recycle one of these chunks. In
* contrast, these chunks must be added to the pmap upon allocation.
*/
- TAILQ_INIT(&new_tail);
+ for (i = 0; i < vm_ndomains; i++)
+ TAILQ_INIT(&new_tail[i]);
retry:
avail = 0;
TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
@@ -4556,7 +4621,7 @@
pc->pc_map[1] = PC_FREE1;
pc->pc_map[2] = PC_FREE2;
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
- TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
+ TAILQ_INSERT_TAIL(&new_tail[pc_to_domain(pc)], pc, pc_lru);
PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV));
/*
@@ -4567,10 +4632,13 @@
if (reclaimed)
goto retry;
}
- if (!TAILQ_EMPTY(&new_tail)) {
- mtx_lock(&pv_chunks_mutex);
- TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
- mtx_unlock(&pv_chunks_mutex);
+ for (i = 0; i < vm_ndomains; i++) {
+ if (TAILQ_EMPTY(&new_tail[i]))
+ continue;
+ pvc = &pv_chunks[i];
+ mtx_lock(&pvc->pvc_lock);
+ TAILQ_CONCAT(&pvc->pvc_list, &new_tail[i], pc_lru);
+ mtx_unlock(&pvc->pvc_lock);
}
}
@@ -6996,7 +7064,7 @@
pt_entry_t *pte, tpte;
pt_entry_t PG_M, PG_RW, PG_V;
struct spglist free;
- struct pv_chunklist free_chunks;
+ struct pv_chunklist free_chunks[PMAP_MEMDOM];
vm_page_t m, mpte, mt;
pv_entry_t pv;
struct md_page *pvh;
@@ -7004,7 +7072,7 @@
struct rwlock *lock;
int64_t bit;
uint64_t inuse, bitmask;
- int allfree, field, freed, idx;
+ int allfree, field, freed, i, idx;
boolean_t superpage;
vm_paddr_t pa;
@@ -7032,7 +7100,8 @@
PG_V = pmap_valid_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
- TAILQ_INIT(&free_chunks);
+ for (i = 0; i < vm_ndomains; i++)
+ TAILQ_INIT(&free_chunks[i]);
SLIST_INIT(&free);
PMAP_LOCK(pmap);
TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
@@ -7160,14 +7229,14 @@
PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
if (allfree) {
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- TAILQ_INSERT_TAIL(&free_chunks, pc, pc_list);
+ TAILQ_INSERT_TAIL(&free_chunks[pc_to_domain(pc)], pc, pc_list);
}
}
if (lock != NULL)
rw_wunlock(lock);
pmap_invalidate_all(pmap);
pmap_pkru_deassign_all(pmap);
- free_pv_chunk_batch(&free_chunks);
+ free_pv_chunk_batch((struct pv_chunklist *)&free_chunks);
PMAP_UNLOCK(pmap);
vm_page_free_pages_toq(&free, true);
}

File Metadata

Mime Type
text/plain
Expires
Tue, Jan 21, 8:56 AM (5 h, 32 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16005537
Default Alt Text
D21976.id63139.diff (10 KB)

Event Timeline