Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F135355059
D24217.id74632.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
62 KB
Referenced Files
None
Subscribers
None
D24217.id74632.diff
View Options
Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -121,6 +121,7 @@
#include <sys/malloc.h>
#include <sys/mman.h>
#include <sys/mutex.h>
+#include <sys/obm.h>
#include <sys/proc.h>
#include <sys/rangeset.h>
#include <sys/rwlock.h>
@@ -171,6 +172,13 @@
#define PMAP_MEMDOM 1
#endif
+#define PC_FREE0 0xfffffffffffffffful
+#define PC_FREE1 0xfffffffffffffffful
+#define PC_FREE2 0x000000fffffffffful
+
+_Static_assert(sizeof(struct pv_chunk) <= PAGE_SIZE, "");
+_Static_assert(NBBY * sizeof(uint64_t) * _NPCM >= _NPCPV, "");
+
static __inline boolean_t
pmap_type_guest(pmap_t pmap)
{
@@ -316,6 +324,9 @@
#define PMAP_INLINE
#endif
+static TAILQ_HEAD(, pmap) all_pmaps;
+static struct mtx all_pmaps_lock;
+
#ifdef PV_STATS
#define PV_STAT(x) do { x ; } while (0)
#else
@@ -331,51 +342,52 @@
})
#define pa_to_pmdp(pa) (&pv_table[pa_index(pa)])
#define pa_to_pvh(pa) (&(pa_to_pmdp(pa)->pv_page))
-#define PHYS_TO_PV_LIST_LOCK(pa) ({ \
- struct rwlock *_lock; \
- if (__predict_false((pa) > pmap_last_pa)) \
- _lock = &pv_dummy_large.pv_lock; \
- else \
- _lock = &(pa_to_pmdp(pa)->pv_lock); \
- _lock; \
-})
#else
#define pa_index(pa) ((pa) >> PDRSHIFT)
#define pa_to_pvh(pa) (&pv_table[pa_index(pa)])
#define NPV_LIST_LOCKS MAXCPU
-#define PHYS_TO_PV_LIST_LOCK(pa) \
- (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
#endif
+#define PHYS_TO_PV_LIST_LOCK(pa) PHYS_TO_VM_PAGE(pa)
+
#define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \
- struct rwlock **_lockp = (lockp); \
- struct rwlock *_new_lock; \
+ PVLL **_lockp = (lockp); \
+ PVLL *_new_lock; \
\
_new_lock = PHYS_TO_PV_LIST_LOCK(pa); \
if (_new_lock != *_lockp) { \
if (*_lockp != NULL) \
- rw_wunlock(*_lockp); \
+ pmap_pv_list_unlock(*_lockp); \
+ if (_new_lock == NULL) \
+ _new_lock = &pv_fake_page; \
*_lockp = _new_lock; \
- rw_wlock(*_lockp); \
+ pmap_pv_list_lock(*_lockp); \
} \
} while (0)
-#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \
- CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
+#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) do { \
+ PVLL **_lockp = (lockp); \
+ \
+ if (m != *_lockp) { \
+ if (*_lockp != NULL) \
+ pmap_pv_list_unlock(*_lockp); \
+ *_lockp = m; \
+ pmap_pv_list_lock(m); \
+ } \
+} while (0)
#define RELEASE_PV_LIST_LOCK(lockp) do { \
- struct rwlock **_lockp = (lockp); \
+ PVLL **_lockp = (lockp); \
\
if (*_lockp != NULL) { \
- rw_wunlock(*_lockp); \
+ pmap_pv_list_unlock(*_lockp); \
*_lockp = NULL; \
} \
} while (0)
-#define VM_PAGE_TO_PV_LIST_LOCK(m) \
- PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
+#define VM_PAGE_TO_PV_LIST_LOCK(m) (m)
struct pmap kernel_pmap_store;
@@ -428,46 +440,158 @@
* Data for the pv entry allocation mechanism.
* Updates to pv_invl_gen are protected by the pv list lock but reads are not.
*/
-#ifdef NUMA
-static __inline int
-pc_to_domain(struct pv_chunk *pc)
-{
- return (_vm_phys_domain(DMAP_TO_PHYS((vm_offset_t)pc)));
-}
-#else
-static __inline int
-pc_to_domain(struct pv_chunk *pc __unused)
-{
-
- return (0);
-}
-#endif
-
-struct pv_chunks_list {
- struct mtx pvc_lock;
- TAILQ_HEAD(pch, pv_chunk) pvc_list;
- int active_reclaims;
-} __aligned(CACHE_LINE_SIZE);
-
-struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM];
+typedef struct vm_page PVLL;
#ifdef NUMA
struct pmap_large_md_page {
- struct rwlock pv_lock;
- struct md_page pv_page;
+ struct lock_object lo;
+ uintptr_t pad;
+ struct md_page pv_page;
u_long pv_invl_gen;
};
+/*
+ * We strongly depend on the size being a power of two, so the assert
+ * is overzealous. However, should the struct be resized to a
+ * different power of two, the code below needs to be revisited.
+ */
+_Static_assert(sizeof(struct pmap_large_md_page) == 64, "pmap_large_md_page");
+
__exclusive_cache_line static struct pmap_large_md_page pv_dummy_large;
#define pv_dummy pv_dummy_large.pv_page
__read_mostly static struct pmap_large_md_page *pv_table;
__read_mostly vm_paddr_t pmap_last_pa;
+static struct lock_object *
+pv_list_lock_object(vm_paddr_t pa)
+{
+ if (__predict_false(pa) > pmap_last_pa)
+ return (&pv_dummy_large.lo);
+ return (&pa_to_pmdp(pa)->lo);
+}
#else
-static struct rwlock __exclusive_cache_line pv_list_locks[NPV_LIST_LOCKS];
+static struct lock_object __exclusive_cache_line pv_lo[NPV_LIST_LOCKS];
static u_long pv_invl_gen[NPV_LIST_LOCKS];
static struct md_page *pv_table;
static struct md_page pv_dummy;
+static struct lock_object *
+pv_list_lock_object(vm_paddr_t pa)
+{
+ return (&pv_lo[pa_index(pa) % NPV_LIST_LOCKS]);
+}
#endif
+__read_mostly static struct vm_page pv_fake_page;
+
+static void
+pmap_pv_list_lock(vm_page_t m)
+{
+ obm_lock(&m->md.pv_lock, pv_list_lock_object(VM_PAGE_TO_PHYS(m)));
+}
+
+static void
+pmap_pv_list_unlock(vm_page_t m)
+{
+ obm_unlock(&m->md.pv_lock, pv_list_lock_object(VM_PAGE_TO_PHYS(m)));
+}
+
+/*
+ * Locks all pv lists for 4k pages constituting the superpage that
+ * contains the passed page. The page's pv list is locked according
+ * to pte_locked.
+ *
+ * Returns false if the initial trylock failed and the page's pv list
+ * was unlocked in the process, which typically means that the caller
+ * must restart.
+ */
+static void
+pmap_pv_list_lock_pde1(vm_page_t m, bool pte_locked)
+{
+ vm_page_t mt, sm;
+ struct lock_object *lo;
+ int i;
+
+ if (pte_locked)
+ obm_assert_locked(&m->md.pv_lock);
+
+ sm = m - atop(VM_PAGE_TO_PHYS(m) - (VM_PAGE_TO_PHYS(m) & PG_PS_FRAME));
+ lo = pv_list_lock_object(VM_PAGE_TO_PHYS(m));
+
+ if (pte_locked) {
+ /*
+ * Fast attempt. If we either own or can get the pv
+ * list lock of the first page in the superpage, all
+ * other owners must release their locks without
+ * waiting for us.
+ */
+ if (m == sm || obm_trylock(&sm->md.pv_lock)) {
+ for (i = 1, mt = sm + 1; i < NPDEPG; i++, mt++) {
+ if (m != mt)
+ obm_lock(&mt->md.pv_lock, lo);
+ }
+ return;
+ }
+
+ obm_unlock(&m->md.pv_lock, lo);
+ }
+
+ for (i = 0, mt = sm; i < NPDEPG; i++, mt++) {
+ obm_lock(&mt->md.pv_lock, lo);
+ }
+}
+
+/*
+ * If *lockp points to one of the ordinary pages from the superpage we
+ * are demoting or promoting, then we keep this page' pv list locked
+ * after pmap_pv_list_unlock_pde(). Otherwise, we just unlock whatever
+ * was locked, and unlock all run on pmap_pv_list_unlock_pde().
+ */
+static void
+pmap_pv_list_lock_pde(vm_paddr_t pa, PVLL **lockp)
+{
+ vm_page_t m;
+
+ m = PHYS_TO_VM_PAGE(pa);
+ KASSERT(m != NULL,
+ ("pmap_pv_list_lock_pde: unmanaged phys addr %#lx", pa));
+
+ if (*lockp == NULL) {
+ pmap_pv_list_lock_pde1(m, false);
+ return;
+ }
+ if ((VM_PAGE_TO_PHYS(*lockp) & PG_PS_FRAME) != (pa & PG_PS_FRAME)) {
+ pmap_pv_list_unlock(*lockp);
+ *lockp = NULL;
+ pmap_pv_list_lock_pde1(m, false);
+ return;
+ }
+ pmap_pv_list_lock_pde1(*lockp, true);
+}
+
+static void
+pmap_pv_list_unlock_pde(vm_paddr_t pa, PVLL **lockp)
+{
+ vm_page_t m, mt, sm;
+ struct lock_object *lo;
+ int i;
+ bool pte_locked;
+
+ m = *lockp;
+ pte_locked = m != NULL;
+ if (!pte_locked) {
+ m = PHYS_TO_VM_PAGE(pa);
+ if (m == NULL)
+ m = &pv_fake_page;
+ }
+
+ sm = m - atop(VM_PAGE_TO_PHYS(m) - (VM_PAGE_TO_PHYS(m) & PG_PS_FRAME));
+ lo = pv_list_lock_object(VM_PAGE_TO_PHYS(m));
+ obm_assert_locked(&m->md.pv_lock);
+ obm_assert_locked(&sm->md.pv_lock);
+
+ for (i = 0, mt = sm; i < NPDEPG; i++, mt++) {
+ if (!pte_locked || mt != m)
+ obm_unlock(&mt->md.pv_lock, lo);
+ }
+}
/*
* All those kernel PT submaps that BSD is so fond of
@@ -1153,7 +1277,7 @@
{
u_long gen, *m_gen;
- rw_assert(VM_PAGE_TO_PV_LIST_LOCK(m), RA_WLOCKED);
+ obm_assert_locked(&m->md.pv_lock);
gen = curthread->td_md.md_invl_gen.gen;
if (gen == 0)
return;
@@ -1186,37 +1310,37 @@
static void free_pv_chunk(struct pv_chunk *pc);
static void free_pv_chunk_batch(struct pv_chunklist *batch);
static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
-static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
+static pv_entry_t get_pv_entry(pmap_t pmap, PVLL **lockp);
static int popcnt_pc_map_pq(uint64_t *map);
-static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
-static void reserve_pv_entries(pmap_t pmap, int needed,
- struct rwlock **lockp);
+static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, PVLL **lockp,
+ bool avoid_locked_pmap);
+static void reserve_pv_entries(pmap_t pmap, int needed, PVLL **lockp);
static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
- struct rwlock **lockp);
+ PVLL **lockp);
static bool pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde,
- u_int flags, struct rwlock **lockp);
+ u_int flags, PVLL **lockp);
#if VM_NRESERVLEVEL > 0
static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
- struct rwlock **lockp);
+ PVLL **lockp);
#endif
static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
- vm_offset_t va);
+ vm_offset_t va);
static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte);
static int pmap_change_props_locked(vm_offset_t va, vm_size_t size,
vm_prot_t prot, int mode, int flags);
static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
static boolean_t pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde,
- vm_offset_t va, struct rwlock **lockp);
+ vm_offset_t va, PVLL **lockp);
static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe,
vm_offset_t va);
static bool pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
- vm_prot_t prot, struct rwlock **lockp);
+ vm_prot_t prot, PVLL **lockp);
static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde,
- u_int flags, vm_page_t m, struct rwlock **lockp);
+ u_int flags, vm_page_t m, PVLL **lockp);
static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
- vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
+ vm_page_t m, vm_prot_t prot, vm_page_t mpte, PVLL **lockp);
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted);
static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva,
@@ -1224,13 +1348,13 @@
static void pmap_invalidate_cache_range_all(vm_offset_t sva,
vm_offset_t eva);
static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
- pd_entry_t pde);
+ pd_entry_t pde);
static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
static vm_page_t pmap_large_map_getptp_unlocked(void);
static vm_paddr_t pmap_large_map_kextract(vm_offset_t va);
#if VM_NRESERVLEVEL > 0
static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
- struct rwlock **lockp);
+ PVLL **lockp);
#endif
static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
vm_prot_t prot);
@@ -1241,27 +1365,26 @@
static pd_entry_t *pmap_pti_pde(vm_offset_t va);
static void pmap_pti_wire_pte(void *pte);
static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
- struct spglist *free, struct rwlock **lockp);
+ struct spglist *free, PVLL **lockp);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
- pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
+ pd_entry_t ptepde, struct spglist *free, PVLL **lockp);
static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
struct spglist *free);
static bool pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
- pd_entry_t *pde, struct spglist *free,
- struct rwlock **lockp);
+ pd_entry_t *pde, struct spglist *free, PVLL **lockp);
static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
- vm_page_t m, struct rwlock **lockp);
+ vm_page_t m, PVLL **lockp);
static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
pd_entry_t newpde);
static void pmap_update_pde_invalidate(pmap_t, vm_offset_t va, pd_entry_t pde);
static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex,
- struct rwlock **lockp);
+ PVLL **lockp);
static pd_entry_t *pmap_alloc_pde(pmap_t pmap, vm_offset_t va, vm_page_t *pdpgp,
- struct rwlock **lockp);
+ PVLL **lockp);
static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va,
- struct rwlock **lockp);
+ PVLL **lockp);
static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m,
struct spglist *free);
@@ -1724,6 +1847,9 @@
cr4 |= CR4_SMAP;
load_cr4(cr4);
+ TAILQ_INIT(&all_pmaps);
+ mtx_init(&all_pmaps_lock, "allpms", NULL, MTX_DEF);
+
/*
* Initialize the kernel pmap (which is statically allocated).
* Count bootstrap data as being resident in case any of this data is
@@ -1900,6 +2026,7 @@
TAILQ_INIT(&m->md.pv_list);
m->md.pat_mode = PAT_WRITE_BACK;
+ obm_init(&m->md.pv_lock);
}
static int pmap_allow_2m_x_ept;
@@ -1956,13 +2083,6 @@
long start, end, highest, pv_npg;
int domain, i, j, pages;
- /*
- * We strongly depend on the size being a power of two, so the assert
- * is overzealous. However, should the struct be resized to a
- * different power of two, the code below needs to be revisited.
- */
- CTASSERT((sizeof(*pvd) == 64));
-
/*
* Calculate the size of the array.
*/
@@ -1997,12 +2117,13 @@
vm_page_t m = vm_page_alloc_domain(NULL, 0,
domain, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ);
if (m == NULL)
- panic("vm_page_alloc_domain failed for %lx\n", (vm_offset_t)pvd + j);
+ panic("vm_page_alloc_domain failed for %lx\n",
+ (vm_offset_t)pvd + j);
pmap_qenter((vm_offset_t)pvd + j, &m, 1);
}
for (j = 0; j < s / sizeof(*pvd); j++) {
- rw_init_flags(&pvd->pv_lock, "pmap pv list", RW_NEW);
+ obm_init_lo(&pvd->lo, "pmap pv list");
TAILQ_INIT(&pvd->pv_page.pv_list);
pvd->pv_page.pv_gen = 0;
pvd->pv_page.pat_mode = 0;
@@ -2011,8 +2132,10 @@
}
}
pvd = &pv_dummy_large;
- rw_init_flags(&pvd->pv_lock, "pmap pv list dummy", RW_NEW);
+ obm_init_lo(&pvd->lo, "pmap pv list dummy");
TAILQ_INIT(&pvd->pv_page.pv_list);
+ pmap_page_init(&pv_fake_page);
+ pv_fake_page.phys_addr = pmap_last_pa + PAGE_SIZE;
pvd->pv_page.pv_gen = 0;
pvd->pv_page.pat_mode = 0;
pvd->pv_invl_gen = 0;
@@ -2028,7 +2151,7 @@
* Initialize the pool of pv list locks.
*/
for (i = 0; i < NPV_LIST_LOCKS; i++)
- rw_init(&pv_list_locks[i], "pmap pv list");
+ obm_init_lo(&pv_lo[i], "pmap pv list");
/*
* Calculate the size of the pv head table for superpages.
@@ -2044,6 +2167,8 @@
for (i = 0; i < pv_npg; i++)
TAILQ_INIT(&pv_table[i].pv_list);
TAILQ_INIT(&pv_dummy.pv_list);
+ pmap_page_init(&pv_fake_page);
+ pv_fake_page.phys_addr = vm_phys_segs[vm_phys_nsegs - 1].end + PAGE_SIZE;
}
#endif
@@ -2059,6 +2184,10 @@
vm_page_t m, mpte;
int error, i, ret, skz63;
+ /* Compiler cannot evaluate this at compile time. */
+ MPASS(__bitcount64(PC_FREE0) + __bitcount64(PC_FREE1) +
+ __bitcount64(PC_FREE2) == _NPCPV);
+
/* L1TF, reserve page @0 unconditionally */
vm_page_blacklist_add(0, bootverbose);
@@ -2143,13 +2272,6 @@
pagesizes[1] = NBPDR;
}
- /*
- * Initialize pv chunk lists.
- */
- for (i = 0; i < PMAP_MEMDOM; i++) {
- mtx_init(&pv_chunks[i].pvc_lock, "pmap pv chunk list", NULL, MTX_DEF);
- TAILQ_INIT(&pv_chunks[i].pvc_list);
- }
pmap_init_pv_table();
pmap_initialized = 1;
@@ -3651,6 +3773,21 @@
}
}
+void
+pmap_lock_init(pmap_t pmap)
+{
+ mtx_init(&pmap->pm_mtx, "pmap", NULL, MTX_DEF | MTX_DUPOK);
+
+ /*
+ * Add pmap to the global list, to be used during the pv
+ * chunks reclamation. Pmap is never removed from the list,
+ * relying on type-stability of the vmspace zone.
+ */
+ mtx_lock(&all_pmaps_lock);
+ TAILQ_INSERT_TAIL(&all_pmaps, pmap, pm_allpmaps);
+ mtx_unlock(&all_pmaps_lock);
+}
+
void
pmap_pinit0(pmap_t pmap)
{
@@ -3834,7 +3971,7 @@
* it is statically allocated by pmap_pinit() and not by _pmap_allocpte().
*/
static vm_page_t
-_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
+_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, PVLL **lockp)
{
vm_page_t m, pdppg, pdpg;
pt_entry_t PG_A, PG_M, PG_RW, PG_V;
@@ -3981,7 +4118,7 @@
static pd_entry_t *
pmap_alloc_pde(pmap_t pmap, vm_offset_t va, vm_page_t *pdpgp,
- struct rwlock **lockp)
+ PVLL **lockp)
{
pdp_entry_t *pdpe, PG_V;
pd_entry_t *pde;
@@ -4020,7 +4157,7 @@
}
static vm_page_t
-pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
+pmap_allocpte(pmap_t pmap, vm_offset_t va, PVLL **lockp)
{
vm_pindex_t ptepindex;
pd_entry_t *pd, PG_V;
@@ -4266,10 +4403,6 @@
* page management routines.
***************************************************/
-CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
-CTASSERT(_NPCM == 3);
-CTASSERT(_NPCPV == 168);
-
static __inline struct pv_chunk *
pv_to_chunk(pv_entry_t pv)
{
@@ -4279,10 +4412,6 @@
#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
-#define PC_FREE0 0xfffffffffffffffful
-#define PC_FREE1 0xfffffffffffffffful
-#define PC_FREE2 0x000000fffffffffful
-
static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
#ifdef PV_STATS
@@ -4310,129 +4439,32 @@
"Current number of spare pv entries");
#endif
-static void
-reclaim_pv_chunk_leave_pmap(pmap_t pmap, pmap_t locked_pmap, bool start_di)
-{
-
- if (pmap == NULL)
- return;
- pmap_invalidate_all(pmap);
- if (pmap != locked_pmap)
- PMAP_UNLOCK(pmap);
- if (start_di)
- pmap_delayed_invl_finish();
-}
-
-/*
- * We are in a serious low memory condition. Resort to
- * drastic measures to free some pages so we can allocate
- * another pv entry chunk.
- *
- * Returns NULL if PV entries were reclaimed from the specified pmap.
- *
- * We do not, however, unmap 2mpages because subsequent accesses will
- * allocate per-page pv entries until repromotion occurs, thereby
- * exacerbating the shortage of free pv entries.
- */
-static vm_page_t
-reclaim_pv_chunk_domain(pmap_t locked_pmap, struct rwlock **lockp, int domain)
+static bool
+reclaim_pv_chunk_handle_pmap(pmap_t pmap, pmap_t locked_pmap,
+ bool avoid_locked_pmap, PVLL **lockp, struct spglist *free)
{
- struct pv_chunks_list *pvc;
- struct pv_chunk *pc, *pc_marker, *pc_marker_end;
- struct pv_chunk_header pc_marker_b, pc_marker_end_b;
+ struct pv_chunk *pc, *pcn;
+ pv_entry_t pv;
+ vm_offset_t va;
+ vm_page_t m, m_pc;
struct md_page *pvh;
pd_entry_t *pde;
- pmap_t next_pmap, pmap;
pt_entry_t *pte, tpte;
pt_entry_t PG_G, PG_A, PG_M, PG_RW;
- pv_entry_t pv;
- vm_offset_t va;
- vm_page_t m, m_pc;
- struct spglist free;
uint64_t inuse;
int bit, field, freed;
- bool start_di, restart;
-
- PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
- KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
- pmap = NULL;
- m_pc = NULL;
- PG_G = PG_A = PG_M = PG_RW = 0;
- SLIST_INIT(&free);
- bzero(&pc_marker_b, sizeof(pc_marker_b));
- bzero(&pc_marker_end_b, sizeof(pc_marker_end_b));
- pc_marker = (struct pv_chunk *)&pc_marker_b;
- pc_marker_end = (struct pv_chunk *)&pc_marker_end_b;
-
- /*
- * A delayed invalidation block should already be active if
- * pmap_advise() or pmap_remove() called this function by way
- * of pmap_demote_pde_locked().
- */
- start_di = pmap_not_in_di();
+ bool ret;
- pvc = &pv_chunks[domain];
- mtx_lock(&pvc->pvc_lock);
- pvc->active_reclaims++;
- TAILQ_INSERT_HEAD(&pvc->pvc_list, pc_marker, pc_lru);
- TAILQ_INSERT_TAIL(&pvc->pvc_list, pc_marker_end, pc_lru);
- while ((pc = TAILQ_NEXT(pc_marker, pc_lru)) != pc_marker_end &&
- SLIST_EMPTY(&free)) {
- next_pmap = pc->pc_pmap;
- if (next_pmap == NULL) {
- /*
- * The next chunk is a marker. However, it is
- * not our marker, so active_reclaims must be
- * > 1. Consequently, the next_chunk code
- * will not rotate the pv_chunks list.
- */
- goto next_chunk;
- }
- mtx_unlock(&pvc->pvc_lock);
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- /*
- * A pv_chunk can only be removed from the pc_lru list
- * when both pc_chunks_mutex is owned and the
- * corresponding pmap is locked.
- */
- if (pmap != next_pmap) {
- restart = false;
- reclaim_pv_chunk_leave_pmap(pmap, locked_pmap,
- start_di);
- pmap = next_pmap;
- /* Avoid deadlock and lock recursion. */
- if (pmap > locked_pmap) {
- RELEASE_PV_LIST_LOCK(lockp);
- PMAP_LOCK(pmap);
- if (start_di)
- pmap_delayed_invl_start();
- mtx_lock(&pvc->pvc_lock);
- restart = true;
- } else if (pmap != locked_pmap) {
- if (PMAP_TRYLOCK(pmap)) {
- if (start_di)
- pmap_delayed_invl_start();
- mtx_lock(&pvc->pvc_lock);
- restart = true;
- } else {
- pmap = NULL; /* pmap is not locked */
- mtx_lock(&pvc->pvc_lock);
- pc = TAILQ_NEXT(pc_marker, pc_lru);
- if (pc == NULL ||
- pc->pc_pmap != next_pmap)
- continue;
- goto next_chunk;
- }
- } else if (start_di)
- pmap_delayed_invl_start();
- PG_G = pmap_global_bit(pmap);
- PG_A = pmap_accessed_bit(pmap);
- PG_M = pmap_modified_bit(pmap);
- PG_RW = pmap_rw_bit(pmap);
- if (restart)
- continue;
- }
+ ret = false;
+ PG_G = pmap_global_bit(pmap);
+ PG_A = pmap_accessed_bit(pmap);
+ PG_M = pmap_modified_bit(pmap);
+ PG_RW = pmap_rw_bit(pmap);
+ TAILQ_FOREACH_REVERSE_SAFE(pc, &pmap->pm_pvchunk, pvchunks,
+ pc_list, pcn) {
/*
* Destroy every non-wired, 4 KB page mapping in the chunk.
*/
@@ -4470,84 +4502,156 @@
}
pmap_delayed_invl_page(m);
pc->pc_map[field] |= 1UL << bit;
- pmap_unuse_pt(pmap, va, *pde, &free);
+ pmap_unuse_pt(pmap, va, *pde, free);
freed++;
}
}
- if (freed == 0) {
- mtx_lock(&pvc->pvc_lock);
- goto next_chunk;
- }
+ if (freed == 0)
+ continue;
+
/* Every freed mapping is for a 4 KB page. */
pmap_resident_count_dec(pmap, freed);
PV_STAT(atomic_add_long(&pv_entry_frees, freed));
PV_STAT(atomic_add_int(&pv_entry_spare, freed));
PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 &&
pc->pc_map[2] == PC_FREE2) {
- PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
- PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
- PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
- /* Entire chunk is free; return it. */
- m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
- dump_drop_page(m_pc->phys_addr);
- mtx_lock(&pvc->pvc_lock);
- TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
- break;
+ if (!avoid_locked_pmap || locked_pmap != pmap) {
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ PV_STAT(atomic_subtract_int(&pv_entry_spare,
+ _NPCPV));
+ PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
+ PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
+ /* Entire chunk is free; return it. */
+ m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS(
+ (vm_offset_t)pc));
+ dump_drop_page(m_pc->phys_addr);
+ m_pc->ref_count = 0;
+ SLIST_INSERT_HEAD(free, m_pc, plinks.s.ss);
+ break;
+ }
+ } else {
+ /*
+ * Re-insert at head because allocator bails
+ * out if it finds fully populated chunk.
+ */
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
}
- TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
- mtx_lock(&pvc->pvc_lock);
/* One freed pv entry in locked_pmap is sufficient. */
- if (pmap == locked_pmap)
+ if (pmap == locked_pmap) {
+ ret = true;
break;
-next_chunk:
- TAILQ_REMOVE(&pvc->pvc_list, pc_marker, pc_lru);
- TAILQ_INSERT_AFTER(&pvc->pvc_list, pc, pc_marker, pc_lru);
- if (pvc->active_reclaims == 1 && pmap != NULL) {
- /*
- * Rotate the pv chunks list so that we do not
- * scan the same pv chunks that could not be
- * freed (because they contained a wired
- * and/or superpage mapping) on every
- * invocation of reclaim_pv_chunk().
- */
- while ((pc = TAILQ_FIRST(&pvc->pvc_list)) != pc_marker) {
- MPASS(pc->pc_pmap != NULL);
- TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
- TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru);
- }
}
}
- TAILQ_REMOVE(&pvc->pvc_list, pc_marker, pc_lru);
- TAILQ_REMOVE(&pvc->pvc_list, pc_marker_end, pc_lru);
- pvc->active_reclaims--;
- mtx_unlock(&pvc->pvc_lock);
- reclaim_pv_chunk_leave_pmap(pmap, locked_pmap, start_di);
- if (m_pc == NULL && !SLIST_EMPTY(&free)) {
- m_pc = SLIST_FIRST(&free);
- SLIST_REMOVE_HEAD(&free, plinks.s.ss);
- /* Recycle a freed page table page. */
- m_pc->ref_count = 1;
- }
- vm_page_free_pages_toq(&free, true);
- return (m_pc);
+ return (ret);
}
+/*
+ * We are in a serious low memory condition. Resort to
+ * drastic measures to free some pages so we can allocate
+ * another pv entry chunk.
+ *
+ * Returns NULL if PV entries were reclaimed from the specified pmap,
+ * otherwise, returns a free page to be used for a PV chunk.
+ *
+ * If avoid_locked_pmap is true, chunks are not freed from the
+ * locked_pmap (but pv entries are).
+ *
+ * We do not, however, unmap 2mpages because subsequent accesses will
+ * allocate per-page pv entries until repromotion occurs, thereby
+ * exacerbating the shortage of free pv entries.
+ */
static vm_page_t
-reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
+reclaim_pv_chunk(pmap_t locked_pmap, PVLL **lockp, bool avoid_locked_pmap)
{
vm_page_t m;
- int i, domain;
+ pmap_t next_pmap, pmap;
+ struct spglist free;
+ bool res, start_di;
+
+ PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
+ KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
+ pmap = NULL;
+ m = NULL;
+ res = false;
+ SLIST_INIT(&free);
+
+ /*
+ * A delayed invalidation block should already be active if
+ * pmap_advise() or pmap_remove() called this function by way
+ * of pmap_demote_pde_locked().
+ */
+ start_di = pmap_not_in_di();
- domain = PCPU_GET(domain);
- for (i = 0; i < vm_ndomains; i++) {
- m = reclaim_pv_chunk_domain(locked_pmap, lockp, domain);
- if (m != NULL)
+ for (;;) {
+ /*
+ * A parallel reclaim_pv_chunk() could move our cursor
+ * to the end of the list, which causes earlier
+ * termination of the loop. Since all callers are
+ * prepared to the reclaim_pv_chunk() failure, it only
+ * means that callers retry with the page allocator
+ * before trying to reclaim one more time.
+ */
+ mtx_lock(&all_pmaps_lock);
+ next_pmap = pmap == NULL ? TAILQ_FIRST(&all_pmaps) :
+ TAILQ_NEXT(pmap, pm_allpmaps);
+ mtx_unlock(&all_pmaps_lock);
+ if (next_pmap == NULL)
break;
- domain = (domain + 1) % vm_ndomains;
- }
+ pmap = next_pmap;
+ /*
+ * This lockless check is fine, we would either
+ * process a pmap without any pv chunks or skip some
+ * potentially consumable pmap. But it is still
+ * useful to cheaply skip freed pmaps which are kept
+ * on the list due to type stability.
+ */
+ if (pmap->pm_stats.resident_count == 0)
+ continue;
+
+ /* Avoid deadlock and lock recursion. */
+ if (pmap > locked_pmap) {
+ RELEASE_PV_LIST_LOCK(lockp);
+ PMAP_LOCK(pmap);
+ if (start_di)
+ pmap_delayed_invl_start();
+ } else if (pmap != locked_pmap) {
+ if (PMAP_TRYLOCK(pmap)) {
+ if (start_di)
+ pmap_delayed_invl_start();
+ } else {
+ /* The pmap is not locked, skip it. */
+ continue;
+ }
+ } else if (start_di)
+ pmap_delayed_invl_start();
+
+ if (pmap->pm_stats.resident_count != 0) {
+ res = reclaim_pv_chunk_handle_pmap(pmap, locked_pmap,
+ avoid_locked_pmap, lockp, &free);
+ }
+ pmap_invalidate_all(pmap);
+ if (pmap != locked_pmap)
+ PMAP_UNLOCK(pmap);
+ if (start_di)
+ pmap_delayed_invl_finish();
+ if (res || !SLIST_EMPTY(&free)) {
+ mtx_lock(&all_pmaps_lock);
+ TAILQ_REMOVE(&all_pmaps, pmap, pm_allpmaps);
+ TAILQ_INSERT_TAIL(&all_pmaps, pmap, pm_allpmaps);
+ mtx_unlock(&all_pmaps_lock);
+ break;
+ }
+ }
+ if (!res && !SLIST_EMPTY(&free)) {
+ m = SLIST_FIRST(&free);
+ SLIST_REMOVE_HEAD(&free, plinks.s.ss);
+ /* Recycle a freed page table page. */
+ m->ref_count = 1;
+ }
+ vm_page_free_pages_toq(&free, true);
return (m);
}
@@ -4583,7 +4687,7 @@
}
static void
-free_pv_chunk_dequeued(struct pv_chunk *pc)
+free_pv_chunk(struct pv_chunk *pc)
{
vm_page_t m;
@@ -4597,40 +4701,13 @@
vm_page_free(m);
}
-static void
-free_pv_chunk(struct pv_chunk *pc)
-{
- struct pv_chunks_list *pvc;
-
- pvc = &pv_chunks[pc_to_domain(pc)];
- mtx_lock(&pvc->pvc_lock);
- TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
- mtx_unlock(&pvc->pvc_lock);
- free_pv_chunk_dequeued(pc);
-}
-
static void
free_pv_chunk_batch(struct pv_chunklist *batch)
{
- struct pv_chunks_list *pvc;
struct pv_chunk *pc, *npc;
- int i;
- for (i = 0; i < vm_ndomains; i++) {
- if (TAILQ_EMPTY(&batch[i]))
- continue;
- pvc = &pv_chunks[i];
- mtx_lock(&pvc->pvc_lock);
- TAILQ_FOREACH(pc, &batch[i], pc_list) {
- TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
- }
- mtx_unlock(&pvc->pvc_lock);
- }
-
- for (i = 0; i < vm_ndomains; i++) {
- TAILQ_FOREACH_SAFE(pc, &batch[i], pc_list, npc) {
- free_pv_chunk_dequeued(pc);
- }
+ TAILQ_FOREACH_SAFE(pc, batch, pc_list, npc) {
+ free_pv_chunk(pc);
}
}
@@ -4643,9 +4720,8 @@
* The given PV list lock may be released.
*/
static pv_entry_t
-get_pv_entry(pmap_t pmap, struct rwlock **lockp)
+get_pv_entry(pmap_t pmap, PVLL **lockp)
{
- struct pv_chunks_list *pvc;
int bit, field;
pv_entry_t pv;
struct pv_chunk *pc;
@@ -4685,7 +4761,7 @@
PV_STAT(pc_chunk_tryfail++);
return (NULL);
}
- m = reclaim_pv_chunk(pmap, lockp);
+ m = reclaim_pv_chunk(pmap, lockp, false);
if (m == NULL)
goto retry;
}
@@ -4697,10 +4773,6 @@
pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */
pc->pc_map[1] = PC_FREE1;
pc->pc_map[2] = PC_FREE2;
- pvc = &pv_chunks[_vm_phys_domain(m->phys_addr)];
- mtx_lock(&pvc->pvc_lock);
- TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru);
- mtx_unlock(&pvc->pvc_lock);
pv = &pc->pc_pventry[0];
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
PV_STAT(atomic_add_long(&pv_entry_count, 1));
@@ -4744,26 +4816,16 @@
* The given PV list lock may be released.
*/
static void
-reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
+reserve_pv_entries(pmap_t pmap, int needed, PVLL **lockp)
{
- struct pv_chunks_list *pvc;
- struct pch new_tail[PMAP_MEMDOM];
struct pv_chunk *pc;
vm_page_t m;
- int avail, free, i;
+ int avail, free;
bool reclaimed;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL"));
- /*
- * Newly allocated PV chunks must be stored in a private list until
- * the required number of PV chunks have been allocated. Otherwise,
- * reclaim_pv_chunk() could recycle one of these chunks. In
- * contrast, these chunks must be added to the pmap upon allocation.
- */
- for (i = 0; i < PMAP_MEMDOM; i++)
- TAILQ_INIT(&new_tail[i]);
retry:
avail = 0;
TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
@@ -4784,7 +4846,7 @@
m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
VM_ALLOC_WIRED);
if (m == NULL) {
- m = reclaim_pv_chunk(pmap, lockp);
+ m = reclaim_pv_chunk(pmap, lockp, true);
if (m == NULL)
goto retry;
reclaimed = true;
@@ -4798,7 +4860,6 @@
pc->pc_map[1] = PC_FREE1;
pc->pc_map[2] = PC_FREE2;
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
- TAILQ_INSERT_TAIL(&new_tail[pc_to_domain(pc)], pc, pc_lru);
PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV));
/*
@@ -4809,14 +4870,6 @@
if (reclaimed)
goto retry;
}
- for (i = 0; i < vm_ndomains; i++) {
- if (TAILQ_EMPTY(&new_tail[i]))
- continue;
- pvc = &pv_chunks[i];
- mtx_lock(&pvc->pvc_lock);
- TAILQ_CONCAT(&pvc->pvc_list, &new_tail[i], pc_lru);
- mtx_unlock(&pvc->pvc_lock);
- }
}
/*
@@ -4847,7 +4900,7 @@
*/
static void
pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
- struct rwlock **lockp)
+ PVLL **lockp)
{
struct md_page *pvh;
struct pv_chunk *pc;
@@ -4859,7 +4912,6 @@
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_demote_pde: pa is not 2mpage aligned"));
- CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
/*
* Transfer the 2mpage's pv entry for this mapping to the first
@@ -4916,7 +4968,7 @@
*/
static void
pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
- struct rwlock **lockp)
+ PVLL **lockp)
{
struct md_page *pvh;
pv_entry_t pv;
@@ -4925,7 +4977,6 @@
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_promote_pde: pa is not 2mpage aligned"));
- CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
/*
* Transfer the first page's pv entry for this mapping to the 2mpage's
@@ -4972,7 +5023,7 @@
*/
static boolean_t
pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
- struct rwlock **lockp)
+ PVLL **lockp)
{
pv_entry_t pv;
@@ -4995,7 +5046,7 @@
*/
static bool
pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde, u_int flags,
- struct rwlock **lockp)
+ PVLL **lockp)
{
struct md_page *pvh;
pv_entry_t pv;
@@ -5036,13 +5087,13 @@
static boolean_t
pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
{
- struct rwlock *lock;
+ PVLL *lock;
boolean_t rv;
lock = NULL;
rv = pmap_demote_pde_locked(pmap, pde, va, &lock);
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
return (rv);
}
@@ -5075,7 +5126,7 @@
static void
pmap_demote_pde_abort(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
- pd_entry_t oldpde, struct rwlock **lockp)
+ pd_entry_t oldpde, PVLL **lockp)
{
struct spglist free;
vm_offset_t sva;
@@ -5092,7 +5143,7 @@
static boolean_t
pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
- struct rwlock **lockp)
+ PVLL **lockp)
{
pd_entry_t newpde, oldpde;
pt_entry_t *firstpte, newpte;
@@ -5226,8 +5277,11 @@
/*
* Demote the PV entry.
*/
- if ((oldpde & PG_MANAGED) != 0)
+ if ((oldpde & PG_MANAGED) != 0) {
+ pmap_pv_list_lock_pde(oldpde & PG_PS_FRAME, lockp);
pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME, lockp);
+ pmap_pv_list_unlock_pde(oldpde & PG_PS_FRAME, lockp);
+ }
atomic_add_long(&pmap_pde_demotions, 1);
CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#lx in pmap %p",
@@ -5280,7 +5334,7 @@
*/
static int
pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
- struct spglist *free, struct rwlock **lockp)
+ struct spglist *free, PVLL **lockp)
{
struct md_page *pvh;
pd_entry_t oldpde;
@@ -5309,6 +5363,7 @@
eva = sva + NBPDR;
for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
va < eva; va += PAGE_SIZE, m++) {
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
vm_page_dirty(m);
if (oldpde & PG_A)
@@ -5341,7 +5396,7 @@
*/
static int
pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va,
- pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp)
+ pd_entry_t ptepde, struct spglist *free, PVLL **lockp)
{
struct md_page *pvh;
pt_entry_t oldpte, PG_A, PG_M, PG_RW;
@@ -5382,7 +5437,7 @@
pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
struct spglist *free)
{
- struct rwlock *lock;
+ PVLL *lock;
pt_entry_t *pte, PG_V;
PG_V = pmap_valid_bit(pmap);
@@ -5395,7 +5450,7 @@
lock = NULL;
pmap_remove_pte(pmap, pte, va, *pde, free, &lock);
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
pmap_invalidate_page(pmap, va);
}
@@ -5404,7 +5459,7 @@
*/
static bool
pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
- pd_entry_t *pde, struct spglist *free, struct rwlock **lockp)
+ pd_entry_t *pde, struct spglist *free, PVLL **lockp)
{
pt_entry_t PG_G, *pte;
vm_offset_t va;
@@ -5446,7 +5501,7 @@
void
pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
- struct rwlock *lock;
+ PVLL *lock;
vm_offset_t va_next;
pml4_entry_t *pml4e;
pdp_entry_t *pdpe;
@@ -5559,7 +5614,7 @@
anyvalid = 1;
}
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
out:
if (anyvalid)
pmap_invalidate_all(pmap);
@@ -5587,7 +5642,7 @@
struct md_page *pvh;
pv_entry_t pv;
pmap_t pmap;
- struct rwlock *lock;
+ PVLL *lock;
pt_entry_t *pte, tpte, PG_A, PG_M, PG_RW;
pd_entry_t *pde;
vm_offset_t va;
@@ -5601,16 +5656,16 @@
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
pa_to_pvh(VM_PAGE_TO_PHYS(m));
retry:
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen) {
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(pmap);
goto retry;
}
@@ -5619,17 +5674,19 @@
pde = pmap_pde(pmap, va);
(void)pmap_demote_pde_locked(pmap, pde, va, &lock);
PMAP_UNLOCK(pmap);
+ if (lock != m)
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
}
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
md_gen = m->md.pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(pmap);
goto retry;
}
@@ -5661,7 +5718,7 @@
PMAP_UNLOCK(pmap);
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
pmap_delayed_invl_wait(m);
vm_page_free_pages_toq(&free, true);
}
@@ -5877,8 +5934,7 @@
* identical characteristics.
*/
static void
-pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
- struct rwlock **lockp)
+pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, PVLL **lockp)
{
pd_entry_t newpde;
pt_entry_t *firstpte, oldpte, pa, *pte;
@@ -5980,8 +6036,11 @@
/*
* Promote the pv entries.
*/
- if ((newpde & PG_MANAGED) != 0)
+ if ((newpde & PG_MANAGED) != 0) {
+ pmap_pv_list_lock_pde(newpde & PG_PS_FRAME, lockp);
pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME, lockp);
+ pmap_pv_list_unlock_pde(newpde & PG_PS_FRAME, lockp);
+ }
/*
* Propagate the PAT index to its proper position.
@@ -6022,7 +6081,7 @@
pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
u_int flags, int8_t psind)
{
- struct rwlock *lock;
+ PVLL *lock;
pd_entry_t *pde;
pt_entry_t *pte, PG_G, PG_A, PG_M, PG_RW, PG_V;
pt_entry_t newpte, origpte;
@@ -6228,7 +6287,7 @@
pv = get_pv_entry(pmap, &lock);
pv->pv_va = va;
}
- CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
m->md.pv_gen++;
if ((newpte & PG_RW) != 0)
@@ -6281,7 +6340,7 @@
rv = KERN_SUCCESS;
out:
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(pmap);
return (rv);
}
@@ -6295,7 +6354,7 @@
*/
static bool
pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
- struct rwlock **lockp)
+ PVLL **lockp)
{
pd_entry_t newpde;
pt_entry_t PG_V;
@@ -6346,7 +6405,7 @@
*/
static int
pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
- vm_page_t m, struct rwlock **lockp)
+ vm_page_t m, PVLL **lockp)
{
struct spglist free;
pd_entry_t oldpde, *pde;
@@ -6494,7 +6553,7 @@
pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
vm_page_t m_start, vm_prot_t prot)
{
- struct rwlock *lock;
+ PVLL *lock;
vm_offset_t va;
vm_page_t m, mpte;
vm_pindex_t diff, psize;
@@ -6519,7 +6578,7 @@
m = TAILQ_NEXT(m, listq);
}
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(pmap);
}
@@ -6535,19 +6594,19 @@
void
pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
{
- struct rwlock *lock;
+ PVLL *lock;
lock = NULL;
PMAP_LOCK(pmap);
(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(pmap);
}
static vm_page_t
pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
- vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
+ vm_prot_t prot, vm_page_t mpte, PVLL **lockp)
{
pt_entry_t newpte, *pte, PG_V;
@@ -6841,7 +6900,7 @@
pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
vm_offset_t src_addr)
{
- struct rwlock *lock;
+ PVLL *lock;
pml4_entry_t *pml4e;
pdp_entry_t *pdpe;
pd_entry_t *pde, srcptepaddr;
@@ -6978,7 +7037,7 @@
}
out:
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(src_pmap);
PMAP_UNLOCK(dst_pmap);
}
@@ -7093,7 +7152,7 @@
pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
{
struct md_page *pvh;
- struct rwlock *lock;
+ PVLL *lock;
pv_entry_t pv;
int loops = 0;
boolean_t rv;
@@ -7102,7 +7161,7 @@
("pmap_page_exists_quick: page %p is not managed", m));
rv = FALSE;
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
if (PV_PMAP(pv) == pmap) {
rv = TRUE;
@@ -7124,7 +7183,7 @@
break;
}
}
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
return (rv);
}
@@ -7137,7 +7196,7 @@
int
pmap_page_wired_mappings(vm_page_t m)
{
- struct rwlock *lock;
+ PVLL *lock;
struct md_page *pvh;
pmap_t pmap;
pt_entry_t *pte;
@@ -7147,16 +7206,16 @@
if ((m->oflags & VPO_UNMANAGED) != 0)
return (0);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
restart:
count = 0;
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
if (md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
@@ -7174,9 +7233,9 @@
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
pvh_gen = pvh->pv_gen;
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
if (md_gen != m->md.pv_gen ||
pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
@@ -7189,7 +7248,7 @@
PMAP_UNLOCK(pmap);
}
}
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
return (count);
}
@@ -7200,17 +7259,17 @@
boolean_t
pmap_page_is_mapped(vm_page_t m)
{
- struct rwlock *lock;
+ PVLL *lock;
boolean_t rv;
if ((m->oflags & VPO_UNMANAGED) != 0)
return (FALSE);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
rv = !TAILQ_EMPTY(&m->md.pv_list) ||
((m->flags & PG_FICTITIOUS) == 0 &&
!TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list));
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
return (rv);
}
@@ -7246,15 +7305,15 @@
pt_entry_t *pte, tpte;
pt_entry_t PG_M, PG_RW, PG_V;
struct spglist free;
- struct pv_chunklist free_chunks[PMAP_MEMDOM];
+ struct pv_chunklist free_chunks;
vm_page_t m, mpte, mt;
pv_entry_t pv;
struct md_page *pvh;
struct pv_chunk *pc, *npc;
- struct rwlock *lock;
+ PVLL *lock;
int64_t bit;
uint64_t inuse, bitmask;
- int allfree, field, freed, i, idx;
+ int allfree, field, freed, idx;
boolean_t superpage;
vm_paddr_t pa;
@@ -7282,8 +7341,7 @@
PG_V = pmap_valid_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
- for (i = 0; i < PMAP_MEMDOM; i++)
- TAILQ_INIT(&free_chunks[i]);
+ TAILQ_INIT(&free_chunks);
SLIST_INIT(&free);
PMAP_LOCK(pmap);
TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
@@ -7411,11 +7469,11 @@
PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
if (allfree) {
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- TAILQ_INSERT_TAIL(&free_chunks[pc_to_domain(pc)], pc, pc_list);
+ TAILQ_INSERT_TAIL(&free_chunks, pc, pc_list);
}
}
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
pmap_invalidate_all(pmap);
pmap_pkru_deassign_all(pmap);
free_pv_chunk_batch((struct pv_chunklist *)&free_chunks);
@@ -7426,7 +7484,7 @@
static boolean_t
pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
{
- struct rwlock *lock;
+ PVLL *lock;
pv_entry_t pv;
struct md_page *pvh;
pt_entry_t *pte, mask;
@@ -7437,15 +7495,15 @@
rv = FALSE;
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
restart:
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
if (md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
@@ -7475,9 +7533,9 @@
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
pvh_gen = pvh->pv_gen;
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
if (md_gen != m->md.pv_gen ||
pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
@@ -7503,7 +7561,7 @@
}
}
out:
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
return (rv);
}
@@ -7576,7 +7634,7 @@
{
struct md_page *pvh;
pmap_t pmap;
- struct rwlock *lock;
+ PVLL *lock;
pv_entry_t next_pv, pv;
pd_entry_t *pde;
pt_entry_t oldpte, *pte, PG_M, PG_RW;
@@ -7594,17 +7652,18 @@
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
pa_to_pvh(VM_PAGE_TO_PHYS(m));
retry_pv_loop:
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
+retry_pv_loop_locked:
+ pvh_gen = pvh->pv_gen;
TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
- pvh_gen = pvh->pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
goto retry_pv_loop;
}
}
@@ -7613,23 +7672,24 @@
pde = pmap_pde(pmap, va);
if ((*pde & PG_RW) != 0)
(void)pmap_demote_pde_locked(pmap, pde, va, &lock);
- KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
- ("inconsistent pv lock %p %p for page %p",
- lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
PMAP_UNLOCK(pmap);
+ if (lock != m || pvh_gen != pvh->pv_gen) {
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
+ goto retry_pv_loop_locked;
+ }
}
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
md_gen = m->md.pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen ||
md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
goto retry_pv_loop;
}
}
@@ -7652,7 +7712,7 @@
}
PMAP_UNLOCK(pmap);
}
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
vm_page_aflag_clear(m, PGA_WRITEABLE);
pmap_delayed_invl_wait(m);
}
@@ -7710,7 +7770,7 @@
struct md_page *pvh;
pv_entry_t pv, pvf;
pmap_t pmap;
- struct rwlock *lock;
+ PVLL *lock;
pd_entry_t oldpde, *pde;
pt_entry_t *pte, PG_A, PG_M, PG_RW;
vm_offset_t va;
@@ -7726,21 +7786,21 @@
pa = VM_PAGE_TO_PHYS(m);
lock = PHYS_TO_PV_LIST_LOCK(pa);
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
retry:
not_cleared = 0;
if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL)
goto small_mappings;
pv = pvf;
do {
+ pvh_gen = pvh->pv_gen;
if (pvf == NULL)
pvf = pv;
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
- pvh_gen = pvh->pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
goto retry;
@@ -7805,7 +7865,6 @@
pmap_invalidate_page(pmap, va);
} else
demoted = TRUE;
-
if (demoted) {
/*
* The superpage mapping was removed
@@ -7817,9 +7876,10 @@
pv = NULL;
}
cleared++;
- KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
- ("inconsistent pv lock %p %p for page %p",
- lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
+ if (lock != m || pvh_gen != pvh->pv_gen) {
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
+ goto retry;
+ }
} else
not_cleared++;
}
@@ -7844,9 +7904,9 @@
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
md_gen = m->md.pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto retry;
@@ -7897,7 +7957,7 @@
} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
not_cleared < PMAP_TS_REFERENCED_MAX);
out:
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
vm_page_free_pages_toq(&free, true);
return (cleared + not_cleared);
}
@@ -7910,7 +7970,7 @@
void
pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
{
- struct rwlock *lock;
+ PVLL *lock;
pml4_entry_t *pml4e;
pdp_entry_t *pdpe;
pd_entry_t oldpde, *pde;
@@ -7967,7 +8027,7 @@
lock = NULL;
if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) {
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
/*
* The large page mapping was destroyed.
@@ -7999,7 +8059,7 @@
anychanged = true;
}
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
}
if (va_next > eva)
va_next = eva;
@@ -8056,7 +8116,7 @@
pv_entry_t next_pv, pv;
pd_entry_t oldpde, *pde;
pt_entry_t *pte, PG_M, PG_RW;
- struct rwlock *lock;
+ PVLL *lock;
vm_offset_t va;
int md_gen, pvh_gen;
@@ -8069,15 +8129,15 @@
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
pa_to_pvh(VM_PAGE_TO_PHYS(m));
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
restart:
+ pvh_gen = pvh->pv_gen;
TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
- pvh_gen = pvh->pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
@@ -8103,15 +8163,19 @@
pmap_invalidate_page(pmap, va);
}
PMAP_UNLOCK(pmap);
+ if (lock != m || pvh_gen != pvh->pv_gen) {
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
+ goto restart;
+ }
}
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
pvh_gen = pvh->pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
@@ -8129,7 +8193,7 @@
}
PMAP_UNLOCK(pmap);
}
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
}
/*
@@ -9041,7 +9105,7 @@
pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t va, int ftype)
{
int rv;
- struct rwlock *lock;
+ PVLL *lock;
#if VM_NRESERVLEVEL > 0
vm_page_t m, mpte;
#endif
@@ -9128,7 +9192,7 @@
rv = 0; /* success */
done:
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(pmap);
return (rv);
}
Index: sys/amd64/include/pmap.h
===================================================================
--- sys/amd64/include/pmap.h
+++ sys/amd64/include/pmap.h
@@ -249,6 +249,7 @@
#include <sys/_cpuset.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
+#include <sys/_obm.h>
#include <sys/_pctrie.h>
#include <sys/_rangeset.h>
@@ -313,7 +314,9 @@
struct md_page {
TAILQ_HEAD(, pv_entry) pv_list; /* (p) */
int pv_gen; /* (p) */
- int pat_mode;
+ obm_lock_t pv_lock;
+ uint8_t pat_mode;
+ uint8_t pad0[2];
};
enum pmap_type {
@@ -327,6 +330,8 @@
uint32_t pm_gen;
};
+TAILQ_HEAD(pvchunks, pv_chunk);
+
/*
* The kernel virtual address (KVA) of the level 4 page table page is always
* within the direct map (DMAP) region.
@@ -337,7 +342,7 @@
pml4_entry_t *pm_pml4u; /* KVA of user l4 page table */
uint64_t pm_cr3;
uint64_t pm_ucr3;
- TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */
+ struct pvchunks pm_pvchunk; /* list of mappings in pmap */
cpuset_t pm_active; /* active on cpus */
enum pmap_type pm_type; /* regular or nested tables */
struct pmap_statistics pm_stats; /* pmap statistics */
@@ -346,6 +351,7 @@
int pm_flags;
struct pmap_pcids pm_pcids[MAXCPU];
struct rangeset pm_pkru;
+ TAILQ_ENTRY(pmap) pm_allpmaps;
};
/* flags */
@@ -364,8 +370,7 @@
#define PMAP_LOCK_ASSERT(pmap, type) \
mtx_assert(&(pmap)->pm_mtx, (type))
#define PMAP_LOCK_DESTROY(pmap) mtx_destroy(&(pmap)->pm_mtx)
-#define PMAP_LOCK_INIT(pmap) mtx_init(&(pmap)->pm_mtx, "pmap", \
- NULL, MTX_DEF | MTX_DUPOK)
+#define PMAP_LOCK_INIT(pmap) pmap_lock_init(pmap)
#define PMAP_LOCKED(pmap) mtx_owned(&(pmap)->pm_mtx)
#define PMAP_MTX(pmap) (&(pmap)->pm_mtx)
#define PMAP_TRYLOCK(pmap) mtx_trylock(&(pmap)->pm_mtx)
@@ -393,8 +398,7 @@
#define PV_CHUNK_HEADER \
pmap_t pc_pmap; \
TAILQ_ENTRY(pv_chunk) pc_list; \
- uint64_t pc_map[_NPCM]; /* bitmap; 1 = free */ \
- TAILQ_ENTRY(pv_chunk) pc_lru;
+ uint64_t pc_map[_NPCM]; /* bitmap; 1 = free */
struct pv_chunk_header {
PV_CHUNK_HEADER
@@ -439,6 +443,7 @@
int pmap_large_map(vm_paddr_t, vm_size_t, void **, vm_memattr_t);
void pmap_large_map_wb(void *sva, vm_size_t len);
void pmap_large_unmap(void *sva, vm_size_t len);
+void pmap_lock_init(pmap_t pmap);
void *pmap_mapbios(vm_paddr_t, vm_size_t);
void *pmap_mapdev(vm_paddr_t, vm_size_t);
void *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int);
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -3745,6 +3745,7 @@
kern/kern_mtxpool.c standard
kern/kern_mutex.c standard
kern/kern_ntptime.c standard
+kern/kern_obm.c standard
kern/kern_osd.c standard
kern/kern_physio.c standard
kern/kern_pmc.c standard
Index: sys/kern/kern_obm.c
===================================================================
--- /dev/null
+++ sys/kern/kern_obm.c
@@ -0,0 +1,129 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/obm.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/turnstile.h>
+#include <machine/atomic.h>
+
+#ifdef OBM_DEBUG
+static SYSCTL_NODE(_debug, OID_AUTO, obm, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+ "");
+static u_long obm_slow_lock;
+SYSCTL_LONG(_debug_obm, OID_AUTO, slow_lock, CTLFLAG_RD,
+ &obm_slow_lock, 0,
+ "");
+static u_long obm_slow_unlock;
+SYSCTL_LONG(_debug_obm, OID_AUTO, slow_unlock, CTLFLAG_RD,
+ &obm_slow_unlock, 0,
+ "");
+#endif
+
+void
+obm_init_lo(struct lock_object *lo, const char *name)
+{
+ bzero(lo, sizeof(*lo));
+ lo->lo_name = name;
+}
+
+void
+obm_init(obm_lock_t *obm)
+{
+ obm->lk = OBM_UNLOCKED;
+}
+
+void
+obm_lock_slow(obm_lock_t *obm, struct lock_object *lo)
+{
+ struct turnstile *ts;
+ struct lock_delay_arg lda;
+ uint8_t v;
+
+#ifdef OBM_DEBUG
+ atomic_add_long(&obm_slow_lock, 1);
+#endif
+ lock_delay_arg_init(&lda, &locks_delay);
+ lock_delay(&lda);
+ for (;;) {
+ v = atomic_load_char(&obm->lk);
+ if (v == OBM_UNLOCKED) {
+ if (atomic_fcmpset_acq_char(&obm->lk, &v, OBM_LOCKED) != 0)
+ break;
+ lock_delay(&lda);
+ continue;
+ }
+
+ ts = turnstile_trywait(lo);
+ v = atomic_load_char(&obm->lk);
+ if (v == OBM_UNLOCKED) {
+ turnstile_cancel(ts);
+ if (atomic_fcmpset_acq_char(&obm->lk, &v, OBM_LOCKED) != 0)
+ break;
+ lock_delay(&lda);
+ continue;
+ }
+ if ((v & OBM_CONTESTED) == 0 &&
+ atomic_fcmpset_char(&obm->lk, &v, v | OBM_CONTESTED) == 0) {
+ turnstile_cancel(ts);
+ continue;
+ }
+ turnstile_wait(ts, NULL, TS_SHARED_QUEUE);
+ }
+ TD_LOCKS_INC(curthread);
+}
+
+void
+obm_unlock_slow(obm_lock_t *obm, struct lock_object *lo)
+{
+ struct turnstile *ts;
+
+#ifdef OBM_DEBUG
+ atomic_add_long(&obm_slow_unlock, 1);
+#endif
+ turnstile_chain_lock(lo);
+ atomic_store_rel_char(&obm->lk, OBM_UNLOCKED);
+ ts = turnstile_lookup(lo);
+ if (ts != NULL) {
+ turnstile_broadcast(ts, TS_SHARED_QUEUE);
+ turnstile_unpend(ts);
+ }
+ turnstile_chain_unlock(lo);
+ TD_LOCKS_DEC(curthread);
+}
+
Index: sys/sys/_obm.h
===================================================================
--- /dev/null
+++ sys/sys/_obm.h
@@ -0,0 +1,47 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS__OBM_H
+#define _SYS__OBM_H
+
+/* One-Byte Lock */
+
+#define OBM_UNLOCKED 0x00
+#define OBM_LOCKED 0x02
+#define OBM_CONTESTED 0x01
+
+typedef struct obm_lock_tag {
+ uint8_t lk;
+} obm_lock_t;
+
+#endif
Index: sys/sys/obm.h
===================================================================
--- /dev/null
+++ sys/sys/obm.h
@@ -0,0 +1,103 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_OBM_H
+#define _SYS_OBM_H
+
+/* One-Byte Lock */
+
+#ifdef _KERNEL
+
+#include <sys/systm.h>
+#include <sys/_lock.h>
+#include <sys/_obm.h>
+#include <sys/proc.h>
+#include <machine/atomic.h>
+
+void obm_init_lo(struct lock_object *lo, const char *name);
+void obm_init(obm_lock_t *obm);
+void obm_lock_slow(obm_lock_t *obm, struct lock_object *lo);
+void obm_unlock_slow(obm_lock_t *obm, struct lock_object *lo);
+
+__used static void
+obm_assert_locked(obm_lock_t *obm)
+{
+#ifdef INVARIANTS
+ uint8_t v;
+
+ v = atomic_load_char(&obm->lk);
+ MPASS(v == OBM_LOCKED || v == (OBM_LOCKED | OBM_CONTESTED));
+#endif
+}
+
+static inline bool
+obm_trylock(obm_lock_t *obm)
+{
+ if (atomic_cmpset_acq_char(&obm->lk, OBM_UNLOCKED, OBM_LOCKED) != 0) {
+ TD_LOCKS_INC(curthread);
+ return (true);
+ }
+ return (false);
+}
+
+static inline void
+obm_lock(obm_lock_t *obm, struct lock_object *lo)
+{
+ uint8_t v;
+
+ v = OBM_UNLOCKED;
+ if (__predict_true(atomic_fcmpset_acq_char(&obm->lk, &v, OBM_LOCKED))) {
+ TD_LOCKS_INC(curthread);
+ } else {
+ MPASS(v == OBM_LOCKED || v == (OBM_LOCKED | OBM_CONTESTED) ||
+ v == OBM_UNLOCKED);
+ obm_lock_slow(obm, lo);
+ }
+}
+
+static inline void
+obm_unlock(obm_lock_t *obm, struct lock_object *lo)
+{
+ uint8_t v;
+
+ v = OBM_LOCKED;
+ if (atomic_fcmpset_rel_char(&obm->lk, &v, OBM_UNLOCKED)) {
+ TD_LOCKS_DEC(curthread);
+ } else {
+ MPASS(v == OBM_LOCKED || v == (OBM_LOCKED | OBM_CONTESTED));
+ obm_unlock_slow(obm, lo);
+ }
+}
+
+#endif
+#endif
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Nov 10, 1:39 AM (2 h, 47 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25090825
Default Alt Text
D24217.id74632.diff (62 KB)
Attached To
Mode
D24217: amd64 pmap: fine-grained pv list locking
Attached
Detach File
Event Timeline
Log In to Comment