Page MenuHomeFreeBSD

D24217.id74632.diff
No OneTemporary

D24217.id74632.diff

Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -121,6 +121,7 @@
#include <sys/malloc.h>
#include <sys/mman.h>
#include <sys/mutex.h>
+#include <sys/obm.h>
#include <sys/proc.h>
#include <sys/rangeset.h>
#include <sys/rwlock.h>
@@ -171,6 +172,13 @@
#define PMAP_MEMDOM 1
#endif
+#define PC_FREE0 0xfffffffffffffffful
+#define PC_FREE1 0xfffffffffffffffful
+#define PC_FREE2 0x000000fffffffffful
+
+_Static_assert(sizeof(struct pv_chunk) <= PAGE_SIZE, "");
+_Static_assert(NBBY * sizeof(uint64_t) * _NPCM >= _NPCPV, "");
+
static __inline boolean_t
pmap_type_guest(pmap_t pmap)
{
@@ -316,6 +324,9 @@
#define PMAP_INLINE
#endif
+static TAILQ_HEAD(, pmap) all_pmaps;
+static struct mtx all_pmaps_lock;
+
#ifdef PV_STATS
#define PV_STAT(x) do { x ; } while (0)
#else
@@ -331,51 +342,52 @@
})
#define pa_to_pmdp(pa) (&pv_table[pa_index(pa)])
#define pa_to_pvh(pa) (&(pa_to_pmdp(pa)->pv_page))
-#define PHYS_TO_PV_LIST_LOCK(pa) ({ \
- struct rwlock *_lock; \
- if (__predict_false((pa) > pmap_last_pa)) \
- _lock = &pv_dummy_large.pv_lock; \
- else \
- _lock = &(pa_to_pmdp(pa)->pv_lock); \
- _lock; \
-})
#else
#define pa_index(pa) ((pa) >> PDRSHIFT)
#define pa_to_pvh(pa) (&pv_table[pa_index(pa)])
#define NPV_LIST_LOCKS MAXCPU
-#define PHYS_TO_PV_LIST_LOCK(pa) \
- (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
#endif
+#define PHYS_TO_PV_LIST_LOCK(pa) PHYS_TO_VM_PAGE(pa)
+
#define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \
- struct rwlock **_lockp = (lockp); \
- struct rwlock *_new_lock; \
+ PVLL **_lockp = (lockp); \
+ PVLL *_new_lock; \
\
_new_lock = PHYS_TO_PV_LIST_LOCK(pa); \
if (_new_lock != *_lockp) { \
if (*_lockp != NULL) \
- rw_wunlock(*_lockp); \
+ pmap_pv_list_unlock(*_lockp); \
+ if (_new_lock == NULL) \
+ _new_lock = &pv_fake_page; \
*_lockp = _new_lock; \
- rw_wlock(*_lockp); \
+ pmap_pv_list_lock(*_lockp); \
} \
} while (0)
-#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \
- CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
+#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) do { \
+ PVLL **_lockp = (lockp); \
+ \
+ if (m != *_lockp) { \
+ if (*_lockp != NULL) \
+ pmap_pv_list_unlock(*_lockp); \
+ *_lockp = m; \
+ pmap_pv_list_lock(m); \
+ } \
+} while (0)
#define RELEASE_PV_LIST_LOCK(lockp) do { \
- struct rwlock **_lockp = (lockp); \
+ PVLL **_lockp = (lockp); \
\
if (*_lockp != NULL) { \
- rw_wunlock(*_lockp); \
+ pmap_pv_list_unlock(*_lockp); \
*_lockp = NULL; \
} \
} while (0)
-#define VM_PAGE_TO_PV_LIST_LOCK(m) \
- PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
+#define VM_PAGE_TO_PV_LIST_LOCK(m) (m)
struct pmap kernel_pmap_store;
@@ -428,46 +440,158 @@
* Data for the pv entry allocation mechanism.
* Updates to pv_invl_gen are protected by the pv list lock but reads are not.
*/
-#ifdef NUMA
-static __inline int
-pc_to_domain(struct pv_chunk *pc)
-{
- return (_vm_phys_domain(DMAP_TO_PHYS((vm_offset_t)pc)));
-}
-#else
-static __inline int
-pc_to_domain(struct pv_chunk *pc __unused)
-{
-
- return (0);
-}
-#endif
-
-struct pv_chunks_list {
- struct mtx pvc_lock;
- TAILQ_HEAD(pch, pv_chunk) pvc_list;
- int active_reclaims;
-} __aligned(CACHE_LINE_SIZE);
-
-struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM];
+typedef struct vm_page PVLL;
#ifdef NUMA
struct pmap_large_md_page {
- struct rwlock pv_lock;
- struct md_page pv_page;
+ struct lock_object lo;
+ uintptr_t pad;
+ struct md_page pv_page;
u_long pv_invl_gen;
};
+/*
+ * We strongly depend on the size being a power of two, so the assert
+ * is overzealous. However, should the struct be resized to a
+ * different power of two, the code below needs to be revisited.
+ */
+_Static_assert(sizeof(struct pmap_large_md_page) == 64, "pmap_large_md_page");
+
__exclusive_cache_line static struct pmap_large_md_page pv_dummy_large;
#define pv_dummy pv_dummy_large.pv_page
__read_mostly static struct pmap_large_md_page *pv_table;
__read_mostly vm_paddr_t pmap_last_pa;
+static struct lock_object *
+pv_list_lock_object(vm_paddr_t pa)
+{
+ if (__predict_false(pa) > pmap_last_pa)
+ return (&pv_dummy_large.lo);
+ return (&pa_to_pmdp(pa)->lo);
+}
#else
-static struct rwlock __exclusive_cache_line pv_list_locks[NPV_LIST_LOCKS];
+static struct lock_object __exclusive_cache_line pv_lo[NPV_LIST_LOCKS];
static u_long pv_invl_gen[NPV_LIST_LOCKS];
static struct md_page *pv_table;
static struct md_page pv_dummy;
+static struct lock_object *
+pv_list_lock_object(vm_paddr_t pa)
+{
+ return (&pv_lo[pa_index(pa) % NPV_LIST_LOCKS]);
+}
#endif
+__read_mostly static struct vm_page pv_fake_page;
+
+static void
+pmap_pv_list_lock(vm_page_t m)
+{
+ obm_lock(&m->md.pv_lock, pv_list_lock_object(VM_PAGE_TO_PHYS(m)));
+}
+
+static void
+pmap_pv_list_unlock(vm_page_t m)
+{
+ obm_unlock(&m->md.pv_lock, pv_list_lock_object(VM_PAGE_TO_PHYS(m)));
+}
+
+/*
+ * Locks all pv lists for 4k pages constituting the superpage that
+ * contains the passed page. The page's pv list is locked according
+ * to pte_locked.
+ *
+ * Returns false if the initial trylock failed and the page's pv list
+ * was unlocked in the process, which typically means that the caller
+ * must restart.
+ */
+static void
+pmap_pv_list_lock_pde1(vm_page_t m, bool pte_locked)
+{
+ vm_page_t mt, sm;
+ struct lock_object *lo;
+ int i;
+
+ if (pte_locked)
+ obm_assert_locked(&m->md.pv_lock);
+
+ sm = m - atop(VM_PAGE_TO_PHYS(m) - (VM_PAGE_TO_PHYS(m) & PG_PS_FRAME));
+ lo = pv_list_lock_object(VM_PAGE_TO_PHYS(m));
+
+ if (pte_locked) {
+ /*
+ * Fast attempt. If we either own or can get the pv
+ * list lock of the first page in the superpage, all
+ * other owners must release their locks without
+ * waiting for us.
+ */
+ if (m == sm || obm_trylock(&sm->md.pv_lock)) {
+ for (i = 1, mt = sm + 1; i < NPDEPG; i++, mt++) {
+ if (m != mt)
+ obm_lock(&mt->md.pv_lock, lo);
+ }
+ return;
+ }
+
+ obm_unlock(&m->md.pv_lock, lo);
+ }
+
+ for (i = 0, mt = sm; i < NPDEPG; i++, mt++) {
+ obm_lock(&mt->md.pv_lock, lo);
+ }
+}
+
+/*
+ * If *lockp points to one of the ordinary pages from the superpage we
+ * are demoting or promoting, then we keep this page' pv list locked
+ * after pmap_pv_list_unlock_pde(). Otherwise, we just unlock whatever
+ * was locked, and unlock all run on pmap_pv_list_unlock_pde().
+ */
+static void
+pmap_pv_list_lock_pde(vm_paddr_t pa, PVLL **lockp)
+{
+ vm_page_t m;
+
+ m = PHYS_TO_VM_PAGE(pa);
+ KASSERT(m != NULL,
+ ("pmap_pv_list_lock_pde: unmanaged phys addr %#lx", pa));
+
+ if (*lockp == NULL) {
+ pmap_pv_list_lock_pde1(m, false);
+ return;
+ }
+ if ((VM_PAGE_TO_PHYS(*lockp) & PG_PS_FRAME) != (pa & PG_PS_FRAME)) {
+ pmap_pv_list_unlock(*lockp);
+ *lockp = NULL;
+ pmap_pv_list_lock_pde1(m, false);
+ return;
+ }
+ pmap_pv_list_lock_pde1(*lockp, true);
+}
+
+static void
+pmap_pv_list_unlock_pde(vm_paddr_t pa, PVLL **lockp)
+{
+ vm_page_t m, mt, sm;
+ struct lock_object *lo;
+ int i;
+ bool pte_locked;
+
+ m = *lockp;
+ pte_locked = m != NULL;
+ if (!pte_locked) {
+ m = PHYS_TO_VM_PAGE(pa);
+ if (m == NULL)
+ m = &pv_fake_page;
+ }
+
+ sm = m - atop(VM_PAGE_TO_PHYS(m) - (VM_PAGE_TO_PHYS(m) & PG_PS_FRAME));
+ lo = pv_list_lock_object(VM_PAGE_TO_PHYS(m));
+ obm_assert_locked(&m->md.pv_lock);
+ obm_assert_locked(&sm->md.pv_lock);
+
+ for (i = 0, mt = sm; i < NPDEPG; i++, mt++) {
+ if (!pte_locked || mt != m)
+ obm_unlock(&mt->md.pv_lock, lo);
+ }
+}
/*
* All those kernel PT submaps that BSD is so fond of
@@ -1153,7 +1277,7 @@
{
u_long gen, *m_gen;
- rw_assert(VM_PAGE_TO_PV_LIST_LOCK(m), RA_WLOCKED);
+ obm_assert_locked(&m->md.pv_lock);
gen = curthread->td_md.md_invl_gen.gen;
if (gen == 0)
return;
@@ -1186,37 +1310,37 @@
static void free_pv_chunk(struct pv_chunk *pc);
static void free_pv_chunk_batch(struct pv_chunklist *batch);
static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
-static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
+static pv_entry_t get_pv_entry(pmap_t pmap, PVLL **lockp);
static int popcnt_pc_map_pq(uint64_t *map);
-static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
-static void reserve_pv_entries(pmap_t pmap, int needed,
- struct rwlock **lockp);
+static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, PVLL **lockp,
+ bool avoid_locked_pmap);
+static void reserve_pv_entries(pmap_t pmap, int needed, PVLL **lockp);
static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
- struct rwlock **lockp);
+ PVLL **lockp);
static bool pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde,
- u_int flags, struct rwlock **lockp);
+ u_int flags, PVLL **lockp);
#if VM_NRESERVLEVEL > 0
static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
- struct rwlock **lockp);
+ PVLL **lockp);
#endif
static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
- vm_offset_t va);
+ vm_offset_t va);
static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte);
static int pmap_change_props_locked(vm_offset_t va, vm_size_t size,
vm_prot_t prot, int mode, int flags);
static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
static boolean_t pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde,
- vm_offset_t va, struct rwlock **lockp);
+ vm_offset_t va, PVLL **lockp);
static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe,
vm_offset_t va);
static bool pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
- vm_prot_t prot, struct rwlock **lockp);
+ vm_prot_t prot, PVLL **lockp);
static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde,
- u_int flags, vm_page_t m, struct rwlock **lockp);
+ u_int flags, vm_page_t m, PVLL **lockp);
static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
- vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
+ vm_page_t m, vm_prot_t prot, vm_page_t mpte, PVLL **lockp);
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted);
static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva,
@@ -1224,13 +1348,13 @@
static void pmap_invalidate_cache_range_all(vm_offset_t sva,
vm_offset_t eva);
static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
- pd_entry_t pde);
+ pd_entry_t pde);
static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
static vm_page_t pmap_large_map_getptp_unlocked(void);
static vm_paddr_t pmap_large_map_kextract(vm_offset_t va);
#if VM_NRESERVLEVEL > 0
static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
- struct rwlock **lockp);
+ PVLL **lockp);
#endif
static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
vm_prot_t prot);
@@ -1241,27 +1365,26 @@
static pd_entry_t *pmap_pti_pde(vm_offset_t va);
static void pmap_pti_wire_pte(void *pte);
static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
- struct spglist *free, struct rwlock **lockp);
+ struct spglist *free, PVLL **lockp);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
- pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
+ pd_entry_t ptepde, struct spglist *free, PVLL **lockp);
static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
struct spglist *free);
static bool pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
- pd_entry_t *pde, struct spglist *free,
- struct rwlock **lockp);
+ pd_entry_t *pde, struct spglist *free, PVLL **lockp);
static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
- vm_page_t m, struct rwlock **lockp);
+ vm_page_t m, PVLL **lockp);
static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
pd_entry_t newpde);
static void pmap_update_pde_invalidate(pmap_t, vm_offset_t va, pd_entry_t pde);
static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex,
- struct rwlock **lockp);
+ PVLL **lockp);
static pd_entry_t *pmap_alloc_pde(pmap_t pmap, vm_offset_t va, vm_page_t *pdpgp,
- struct rwlock **lockp);
+ PVLL **lockp);
static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va,
- struct rwlock **lockp);
+ PVLL **lockp);
static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m,
struct spglist *free);
@@ -1724,6 +1847,9 @@
cr4 |= CR4_SMAP;
load_cr4(cr4);
+ TAILQ_INIT(&all_pmaps);
+ mtx_init(&all_pmaps_lock, "allpms", NULL, MTX_DEF);
+
/*
* Initialize the kernel pmap (which is statically allocated).
* Count bootstrap data as being resident in case any of this data is
@@ -1900,6 +2026,7 @@
TAILQ_INIT(&m->md.pv_list);
m->md.pat_mode = PAT_WRITE_BACK;
+ obm_init(&m->md.pv_lock);
}
static int pmap_allow_2m_x_ept;
@@ -1956,13 +2083,6 @@
long start, end, highest, pv_npg;
int domain, i, j, pages;
- /*
- * We strongly depend on the size being a power of two, so the assert
- * is overzealous. However, should the struct be resized to a
- * different power of two, the code below needs to be revisited.
- */
- CTASSERT((sizeof(*pvd) == 64));
-
/*
* Calculate the size of the array.
*/
@@ -1997,12 +2117,13 @@
vm_page_t m = vm_page_alloc_domain(NULL, 0,
domain, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ);
if (m == NULL)
- panic("vm_page_alloc_domain failed for %lx\n", (vm_offset_t)pvd + j);
+ panic("vm_page_alloc_domain failed for %lx\n",
+ (vm_offset_t)pvd + j);
pmap_qenter((vm_offset_t)pvd + j, &m, 1);
}
for (j = 0; j < s / sizeof(*pvd); j++) {
- rw_init_flags(&pvd->pv_lock, "pmap pv list", RW_NEW);
+ obm_init_lo(&pvd->lo, "pmap pv list");
TAILQ_INIT(&pvd->pv_page.pv_list);
pvd->pv_page.pv_gen = 0;
pvd->pv_page.pat_mode = 0;
@@ -2011,8 +2132,10 @@
}
}
pvd = &pv_dummy_large;
- rw_init_flags(&pvd->pv_lock, "pmap pv list dummy", RW_NEW);
+ obm_init_lo(&pvd->lo, "pmap pv list dummy");
TAILQ_INIT(&pvd->pv_page.pv_list);
+ pmap_page_init(&pv_fake_page);
+ pv_fake_page.phys_addr = pmap_last_pa + PAGE_SIZE;
pvd->pv_page.pv_gen = 0;
pvd->pv_page.pat_mode = 0;
pvd->pv_invl_gen = 0;
@@ -2028,7 +2151,7 @@
* Initialize the pool of pv list locks.
*/
for (i = 0; i < NPV_LIST_LOCKS; i++)
- rw_init(&pv_list_locks[i], "pmap pv list");
+ obm_init_lo(&pv_lo[i], "pmap pv list");
/*
* Calculate the size of the pv head table for superpages.
@@ -2044,6 +2167,8 @@
for (i = 0; i < pv_npg; i++)
TAILQ_INIT(&pv_table[i].pv_list);
TAILQ_INIT(&pv_dummy.pv_list);
+ pmap_page_init(&pv_fake_page);
+ pv_fake_page.phys_addr = vm_phys_segs[vm_phys_nsegs - 1].end + PAGE_SIZE;
}
#endif
@@ -2059,6 +2184,10 @@
vm_page_t m, mpte;
int error, i, ret, skz63;
+ /* Compiler cannot evaluate this at compile time. */
+ MPASS(__bitcount64(PC_FREE0) + __bitcount64(PC_FREE1) +
+ __bitcount64(PC_FREE2) == _NPCPV);
+
/* L1TF, reserve page @0 unconditionally */
vm_page_blacklist_add(0, bootverbose);
@@ -2143,13 +2272,6 @@
pagesizes[1] = NBPDR;
}
- /*
- * Initialize pv chunk lists.
- */
- for (i = 0; i < PMAP_MEMDOM; i++) {
- mtx_init(&pv_chunks[i].pvc_lock, "pmap pv chunk list", NULL, MTX_DEF);
- TAILQ_INIT(&pv_chunks[i].pvc_list);
- }
pmap_init_pv_table();
pmap_initialized = 1;
@@ -3651,6 +3773,21 @@
}
}
+void
+pmap_lock_init(pmap_t pmap)
+{
+ mtx_init(&pmap->pm_mtx, "pmap", NULL, MTX_DEF | MTX_DUPOK);
+
+ /*
+ * Add pmap to the global list, to be used during the pv
+ * chunks reclamation. Pmap is never removed from the list,
+ * relying on type-stability of the vmspace zone.
+ */
+ mtx_lock(&all_pmaps_lock);
+ TAILQ_INSERT_TAIL(&all_pmaps, pmap, pm_allpmaps);
+ mtx_unlock(&all_pmaps_lock);
+}
+
void
pmap_pinit0(pmap_t pmap)
{
@@ -3834,7 +3971,7 @@
* it is statically allocated by pmap_pinit() and not by _pmap_allocpte().
*/
static vm_page_t
-_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
+_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, PVLL **lockp)
{
vm_page_t m, pdppg, pdpg;
pt_entry_t PG_A, PG_M, PG_RW, PG_V;
@@ -3981,7 +4118,7 @@
static pd_entry_t *
pmap_alloc_pde(pmap_t pmap, vm_offset_t va, vm_page_t *pdpgp,
- struct rwlock **lockp)
+ PVLL **lockp)
{
pdp_entry_t *pdpe, PG_V;
pd_entry_t *pde;
@@ -4020,7 +4157,7 @@
}
static vm_page_t
-pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
+pmap_allocpte(pmap_t pmap, vm_offset_t va, PVLL **lockp)
{
vm_pindex_t ptepindex;
pd_entry_t *pd, PG_V;
@@ -4266,10 +4403,6 @@
* page management routines.
***************************************************/
-CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
-CTASSERT(_NPCM == 3);
-CTASSERT(_NPCPV == 168);
-
static __inline struct pv_chunk *
pv_to_chunk(pv_entry_t pv)
{
@@ -4279,10 +4412,6 @@
#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
-#define PC_FREE0 0xfffffffffffffffful
-#define PC_FREE1 0xfffffffffffffffful
-#define PC_FREE2 0x000000fffffffffful
-
static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
#ifdef PV_STATS
@@ -4310,129 +4439,32 @@
"Current number of spare pv entries");
#endif
-static void
-reclaim_pv_chunk_leave_pmap(pmap_t pmap, pmap_t locked_pmap, bool start_di)
-{
-
- if (pmap == NULL)
- return;
- pmap_invalidate_all(pmap);
- if (pmap != locked_pmap)
- PMAP_UNLOCK(pmap);
- if (start_di)
- pmap_delayed_invl_finish();
-}
-
-/*
- * We are in a serious low memory condition. Resort to
- * drastic measures to free some pages so we can allocate
- * another pv entry chunk.
- *
- * Returns NULL if PV entries were reclaimed from the specified pmap.
- *
- * We do not, however, unmap 2mpages because subsequent accesses will
- * allocate per-page pv entries until repromotion occurs, thereby
- * exacerbating the shortage of free pv entries.
- */
-static vm_page_t
-reclaim_pv_chunk_domain(pmap_t locked_pmap, struct rwlock **lockp, int domain)
+static bool
+reclaim_pv_chunk_handle_pmap(pmap_t pmap, pmap_t locked_pmap,
+ bool avoid_locked_pmap, PVLL **lockp, struct spglist *free)
{
- struct pv_chunks_list *pvc;
- struct pv_chunk *pc, *pc_marker, *pc_marker_end;
- struct pv_chunk_header pc_marker_b, pc_marker_end_b;
+ struct pv_chunk *pc, *pcn;
+ pv_entry_t pv;
+ vm_offset_t va;
+ vm_page_t m, m_pc;
struct md_page *pvh;
pd_entry_t *pde;
- pmap_t next_pmap, pmap;
pt_entry_t *pte, tpte;
pt_entry_t PG_G, PG_A, PG_M, PG_RW;
- pv_entry_t pv;
- vm_offset_t va;
- vm_page_t m, m_pc;
- struct spglist free;
uint64_t inuse;
int bit, field, freed;
- bool start_di, restart;
-
- PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
- KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
- pmap = NULL;
- m_pc = NULL;
- PG_G = PG_A = PG_M = PG_RW = 0;
- SLIST_INIT(&free);
- bzero(&pc_marker_b, sizeof(pc_marker_b));
- bzero(&pc_marker_end_b, sizeof(pc_marker_end_b));
- pc_marker = (struct pv_chunk *)&pc_marker_b;
- pc_marker_end = (struct pv_chunk *)&pc_marker_end_b;
-
- /*
- * A delayed invalidation block should already be active if
- * pmap_advise() or pmap_remove() called this function by way
- * of pmap_demote_pde_locked().
- */
- start_di = pmap_not_in_di();
+ bool ret;
- pvc = &pv_chunks[domain];
- mtx_lock(&pvc->pvc_lock);
- pvc->active_reclaims++;
- TAILQ_INSERT_HEAD(&pvc->pvc_list, pc_marker, pc_lru);
- TAILQ_INSERT_TAIL(&pvc->pvc_list, pc_marker_end, pc_lru);
- while ((pc = TAILQ_NEXT(pc_marker, pc_lru)) != pc_marker_end &&
- SLIST_EMPTY(&free)) {
- next_pmap = pc->pc_pmap;
- if (next_pmap == NULL) {
- /*
- * The next chunk is a marker. However, it is
- * not our marker, so active_reclaims must be
- * > 1. Consequently, the next_chunk code
- * will not rotate the pv_chunks list.
- */
- goto next_chunk;
- }
- mtx_unlock(&pvc->pvc_lock);
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- /*
- * A pv_chunk can only be removed from the pc_lru list
- * when both pc_chunks_mutex is owned and the
- * corresponding pmap is locked.
- */
- if (pmap != next_pmap) {
- restart = false;
- reclaim_pv_chunk_leave_pmap(pmap, locked_pmap,
- start_di);
- pmap = next_pmap;
- /* Avoid deadlock and lock recursion. */
- if (pmap > locked_pmap) {
- RELEASE_PV_LIST_LOCK(lockp);
- PMAP_LOCK(pmap);
- if (start_di)
- pmap_delayed_invl_start();
- mtx_lock(&pvc->pvc_lock);
- restart = true;
- } else if (pmap != locked_pmap) {
- if (PMAP_TRYLOCK(pmap)) {
- if (start_di)
- pmap_delayed_invl_start();
- mtx_lock(&pvc->pvc_lock);
- restart = true;
- } else {
- pmap = NULL; /* pmap is not locked */
- mtx_lock(&pvc->pvc_lock);
- pc = TAILQ_NEXT(pc_marker, pc_lru);
- if (pc == NULL ||
- pc->pc_pmap != next_pmap)
- continue;
- goto next_chunk;
- }
- } else if (start_di)
- pmap_delayed_invl_start();
- PG_G = pmap_global_bit(pmap);
- PG_A = pmap_accessed_bit(pmap);
- PG_M = pmap_modified_bit(pmap);
- PG_RW = pmap_rw_bit(pmap);
- if (restart)
- continue;
- }
+ ret = false;
+ PG_G = pmap_global_bit(pmap);
+ PG_A = pmap_accessed_bit(pmap);
+ PG_M = pmap_modified_bit(pmap);
+ PG_RW = pmap_rw_bit(pmap);
+ TAILQ_FOREACH_REVERSE_SAFE(pc, &pmap->pm_pvchunk, pvchunks,
+ pc_list, pcn) {
/*
* Destroy every non-wired, 4 KB page mapping in the chunk.
*/
@@ -4470,84 +4502,156 @@
}
pmap_delayed_invl_page(m);
pc->pc_map[field] |= 1UL << bit;
- pmap_unuse_pt(pmap, va, *pde, &free);
+ pmap_unuse_pt(pmap, va, *pde, free);
freed++;
}
}
- if (freed == 0) {
- mtx_lock(&pvc->pvc_lock);
- goto next_chunk;
- }
+ if (freed == 0)
+ continue;
+
/* Every freed mapping is for a 4 KB page. */
pmap_resident_count_dec(pmap, freed);
PV_STAT(atomic_add_long(&pv_entry_frees, freed));
PV_STAT(atomic_add_int(&pv_entry_spare, freed));
PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 &&
pc->pc_map[2] == PC_FREE2) {
- PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
- PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
- PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
- /* Entire chunk is free; return it. */
- m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
- dump_drop_page(m_pc->phys_addr);
- mtx_lock(&pvc->pvc_lock);
- TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
- break;
+ if (!avoid_locked_pmap || locked_pmap != pmap) {
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ PV_STAT(atomic_subtract_int(&pv_entry_spare,
+ _NPCPV));
+ PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
+ PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
+ /* Entire chunk is free; return it. */
+ m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS(
+ (vm_offset_t)pc));
+ dump_drop_page(m_pc->phys_addr);
+ m_pc->ref_count = 0;
+ SLIST_INSERT_HEAD(free, m_pc, plinks.s.ss);
+ break;
+ }
+ } else {
+ /*
+ * Re-insert at head because allocator bails
+ * out if it finds fully populated chunk.
+ */
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
}
- TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
- mtx_lock(&pvc->pvc_lock);
/* One freed pv entry in locked_pmap is sufficient. */
- if (pmap == locked_pmap)
+ if (pmap == locked_pmap) {
+ ret = true;
break;
-next_chunk:
- TAILQ_REMOVE(&pvc->pvc_list, pc_marker, pc_lru);
- TAILQ_INSERT_AFTER(&pvc->pvc_list, pc, pc_marker, pc_lru);
- if (pvc->active_reclaims == 1 && pmap != NULL) {
- /*
- * Rotate the pv chunks list so that we do not
- * scan the same pv chunks that could not be
- * freed (because they contained a wired
- * and/or superpage mapping) on every
- * invocation of reclaim_pv_chunk().
- */
- while ((pc = TAILQ_FIRST(&pvc->pvc_list)) != pc_marker) {
- MPASS(pc->pc_pmap != NULL);
- TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
- TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru);
- }
}
}
- TAILQ_REMOVE(&pvc->pvc_list, pc_marker, pc_lru);
- TAILQ_REMOVE(&pvc->pvc_list, pc_marker_end, pc_lru);
- pvc->active_reclaims--;
- mtx_unlock(&pvc->pvc_lock);
- reclaim_pv_chunk_leave_pmap(pmap, locked_pmap, start_di);
- if (m_pc == NULL && !SLIST_EMPTY(&free)) {
- m_pc = SLIST_FIRST(&free);
- SLIST_REMOVE_HEAD(&free, plinks.s.ss);
- /* Recycle a freed page table page. */
- m_pc->ref_count = 1;
- }
- vm_page_free_pages_toq(&free, true);
- return (m_pc);
+ return (ret);
}
+/*
+ * We are in a serious low memory condition. Resort to
+ * drastic measures to free some pages so we can allocate
+ * another pv entry chunk.
+ *
+ * Returns NULL if PV entries were reclaimed from the specified pmap,
+ * otherwise, returns a free page to be used for a PV chunk.
+ *
+ * If avoid_locked_pmap is true, chunks are not freed from the
+ * locked_pmap (but pv entries are).
+ *
+ * We do not, however, unmap 2mpages because subsequent accesses will
+ * allocate per-page pv entries until repromotion occurs, thereby
+ * exacerbating the shortage of free pv entries.
+ */
static vm_page_t
-reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
+reclaim_pv_chunk(pmap_t locked_pmap, PVLL **lockp, bool avoid_locked_pmap)
{
vm_page_t m;
- int i, domain;
+ pmap_t next_pmap, pmap;
+ struct spglist free;
+ bool res, start_di;
+
+ PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
+ KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
+ pmap = NULL;
+ m = NULL;
+ res = false;
+ SLIST_INIT(&free);
+
+ /*
+ * A delayed invalidation block should already be active if
+ * pmap_advise() or pmap_remove() called this function by way
+ * of pmap_demote_pde_locked().
+ */
+ start_di = pmap_not_in_di();
- domain = PCPU_GET(domain);
- for (i = 0; i < vm_ndomains; i++) {
- m = reclaim_pv_chunk_domain(locked_pmap, lockp, domain);
- if (m != NULL)
+ for (;;) {
+ /*
+ * A parallel reclaim_pv_chunk() could move our cursor
+ * to the end of the list, which causes earlier
+ * termination of the loop. Since all callers are
+ * prepared to the reclaim_pv_chunk() failure, it only
+ * means that callers retry with the page allocator
+ * before trying to reclaim one more time.
+ */
+ mtx_lock(&all_pmaps_lock);
+ next_pmap = pmap == NULL ? TAILQ_FIRST(&all_pmaps) :
+ TAILQ_NEXT(pmap, pm_allpmaps);
+ mtx_unlock(&all_pmaps_lock);
+ if (next_pmap == NULL)
break;
- domain = (domain + 1) % vm_ndomains;
- }
+ pmap = next_pmap;
+ /*
+ * This lockless check is fine, we would either
+ * process a pmap without any pv chunks or skip some
+ * potentially consumable pmap. But it is still
+ * useful to cheaply skip freed pmaps which are kept
+ * on the list due to type stability.
+ */
+ if (pmap->pm_stats.resident_count == 0)
+ continue;
+
+ /* Avoid deadlock and lock recursion. */
+ if (pmap > locked_pmap) {
+ RELEASE_PV_LIST_LOCK(lockp);
+ PMAP_LOCK(pmap);
+ if (start_di)
+ pmap_delayed_invl_start();
+ } else if (pmap != locked_pmap) {
+ if (PMAP_TRYLOCK(pmap)) {
+ if (start_di)
+ pmap_delayed_invl_start();
+ } else {
+ /* The pmap is not locked, skip it. */
+ continue;
+ }
+ } else if (start_di)
+ pmap_delayed_invl_start();
+
+ if (pmap->pm_stats.resident_count != 0) {
+ res = reclaim_pv_chunk_handle_pmap(pmap, locked_pmap,
+ avoid_locked_pmap, lockp, &free);
+ }
+ pmap_invalidate_all(pmap);
+ if (pmap != locked_pmap)
+ PMAP_UNLOCK(pmap);
+ if (start_di)
+ pmap_delayed_invl_finish();
+ if (res || !SLIST_EMPTY(&free)) {
+ mtx_lock(&all_pmaps_lock);
+ TAILQ_REMOVE(&all_pmaps, pmap, pm_allpmaps);
+ TAILQ_INSERT_TAIL(&all_pmaps, pmap, pm_allpmaps);
+ mtx_unlock(&all_pmaps_lock);
+ break;
+ }
+ }
+ if (!res && !SLIST_EMPTY(&free)) {
+ m = SLIST_FIRST(&free);
+ SLIST_REMOVE_HEAD(&free, plinks.s.ss);
+ /* Recycle a freed page table page. */
+ m->ref_count = 1;
+ }
+ vm_page_free_pages_toq(&free, true);
return (m);
}
@@ -4583,7 +4687,7 @@
}
static void
-free_pv_chunk_dequeued(struct pv_chunk *pc)
+free_pv_chunk(struct pv_chunk *pc)
{
vm_page_t m;
@@ -4597,40 +4701,13 @@
vm_page_free(m);
}
-static void
-free_pv_chunk(struct pv_chunk *pc)
-{
- struct pv_chunks_list *pvc;
-
- pvc = &pv_chunks[pc_to_domain(pc)];
- mtx_lock(&pvc->pvc_lock);
- TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
- mtx_unlock(&pvc->pvc_lock);
- free_pv_chunk_dequeued(pc);
-}
-
static void
free_pv_chunk_batch(struct pv_chunklist *batch)
{
- struct pv_chunks_list *pvc;
struct pv_chunk *pc, *npc;
- int i;
- for (i = 0; i < vm_ndomains; i++) {
- if (TAILQ_EMPTY(&batch[i]))
- continue;
- pvc = &pv_chunks[i];
- mtx_lock(&pvc->pvc_lock);
- TAILQ_FOREACH(pc, &batch[i], pc_list) {
- TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru);
- }
- mtx_unlock(&pvc->pvc_lock);
- }
-
- for (i = 0; i < vm_ndomains; i++) {
- TAILQ_FOREACH_SAFE(pc, &batch[i], pc_list, npc) {
- free_pv_chunk_dequeued(pc);
- }
+ TAILQ_FOREACH_SAFE(pc, batch, pc_list, npc) {
+ free_pv_chunk(pc);
}
}
@@ -4643,9 +4720,8 @@
* The given PV list lock may be released.
*/
static pv_entry_t
-get_pv_entry(pmap_t pmap, struct rwlock **lockp)
+get_pv_entry(pmap_t pmap, PVLL **lockp)
{
- struct pv_chunks_list *pvc;
int bit, field;
pv_entry_t pv;
struct pv_chunk *pc;
@@ -4685,7 +4761,7 @@
PV_STAT(pc_chunk_tryfail++);
return (NULL);
}
- m = reclaim_pv_chunk(pmap, lockp);
+ m = reclaim_pv_chunk(pmap, lockp, false);
if (m == NULL)
goto retry;
}
@@ -4697,10 +4773,6 @@
pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */
pc->pc_map[1] = PC_FREE1;
pc->pc_map[2] = PC_FREE2;
- pvc = &pv_chunks[_vm_phys_domain(m->phys_addr)];
- mtx_lock(&pvc->pvc_lock);
- TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru);
- mtx_unlock(&pvc->pvc_lock);
pv = &pc->pc_pventry[0];
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
PV_STAT(atomic_add_long(&pv_entry_count, 1));
@@ -4744,26 +4816,16 @@
* The given PV list lock may be released.
*/
static void
-reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
+reserve_pv_entries(pmap_t pmap, int needed, PVLL **lockp)
{
- struct pv_chunks_list *pvc;
- struct pch new_tail[PMAP_MEMDOM];
struct pv_chunk *pc;
vm_page_t m;
- int avail, free, i;
+ int avail, free;
bool reclaimed;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL"));
- /*
- * Newly allocated PV chunks must be stored in a private list until
- * the required number of PV chunks have been allocated. Otherwise,
- * reclaim_pv_chunk() could recycle one of these chunks. In
- * contrast, these chunks must be added to the pmap upon allocation.
- */
- for (i = 0; i < PMAP_MEMDOM; i++)
- TAILQ_INIT(&new_tail[i]);
retry:
avail = 0;
TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
@@ -4784,7 +4846,7 @@
m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
VM_ALLOC_WIRED);
if (m == NULL) {
- m = reclaim_pv_chunk(pmap, lockp);
+ m = reclaim_pv_chunk(pmap, lockp, true);
if (m == NULL)
goto retry;
reclaimed = true;
@@ -4798,7 +4860,6 @@
pc->pc_map[1] = PC_FREE1;
pc->pc_map[2] = PC_FREE2;
TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
- TAILQ_INSERT_TAIL(&new_tail[pc_to_domain(pc)], pc, pc_lru);
PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV));
/*
@@ -4809,14 +4870,6 @@
if (reclaimed)
goto retry;
}
- for (i = 0; i < vm_ndomains; i++) {
- if (TAILQ_EMPTY(&new_tail[i]))
- continue;
- pvc = &pv_chunks[i];
- mtx_lock(&pvc->pvc_lock);
- TAILQ_CONCAT(&pvc->pvc_list, &new_tail[i], pc_lru);
- mtx_unlock(&pvc->pvc_lock);
- }
}
/*
@@ -4847,7 +4900,7 @@
*/
static void
pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
- struct rwlock **lockp)
+ PVLL **lockp)
{
struct md_page *pvh;
struct pv_chunk *pc;
@@ -4859,7 +4912,6 @@
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_demote_pde: pa is not 2mpage aligned"));
- CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
/*
* Transfer the 2mpage's pv entry for this mapping to the first
@@ -4916,7 +4968,7 @@
*/
static void
pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
- struct rwlock **lockp)
+ PVLL **lockp)
{
struct md_page *pvh;
pv_entry_t pv;
@@ -4925,7 +4977,6 @@
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_promote_pde: pa is not 2mpage aligned"));
- CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
/*
* Transfer the first page's pv entry for this mapping to the 2mpage's
@@ -4972,7 +5023,7 @@
*/
static boolean_t
pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
- struct rwlock **lockp)
+ PVLL **lockp)
{
pv_entry_t pv;
@@ -4995,7 +5046,7 @@
*/
static bool
pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde, u_int flags,
- struct rwlock **lockp)
+ PVLL **lockp)
{
struct md_page *pvh;
pv_entry_t pv;
@@ -5036,13 +5087,13 @@
static boolean_t
pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
{
- struct rwlock *lock;
+ PVLL *lock;
boolean_t rv;
lock = NULL;
rv = pmap_demote_pde_locked(pmap, pde, va, &lock);
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
return (rv);
}
@@ -5075,7 +5126,7 @@
static void
pmap_demote_pde_abort(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
- pd_entry_t oldpde, struct rwlock **lockp)
+ pd_entry_t oldpde, PVLL **lockp)
{
struct spglist free;
vm_offset_t sva;
@@ -5092,7 +5143,7 @@
static boolean_t
pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
- struct rwlock **lockp)
+ PVLL **lockp)
{
pd_entry_t newpde, oldpde;
pt_entry_t *firstpte, newpte;
@@ -5226,8 +5277,11 @@
/*
* Demote the PV entry.
*/
- if ((oldpde & PG_MANAGED) != 0)
+ if ((oldpde & PG_MANAGED) != 0) {
+ pmap_pv_list_lock_pde(oldpde & PG_PS_FRAME, lockp);
pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME, lockp);
+ pmap_pv_list_unlock_pde(oldpde & PG_PS_FRAME, lockp);
+ }
atomic_add_long(&pmap_pde_demotions, 1);
CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#lx in pmap %p",
@@ -5280,7 +5334,7 @@
*/
static int
pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
- struct spglist *free, struct rwlock **lockp)
+ struct spglist *free, PVLL **lockp)
{
struct md_page *pvh;
pd_entry_t oldpde;
@@ -5309,6 +5363,7 @@
eva = sva + NBPDR;
for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME);
va < eva; va += PAGE_SIZE, m++) {
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW))
vm_page_dirty(m);
if (oldpde & PG_A)
@@ -5341,7 +5396,7 @@
*/
static int
pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va,
- pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp)
+ pd_entry_t ptepde, struct spglist *free, PVLL **lockp)
{
struct md_page *pvh;
pt_entry_t oldpte, PG_A, PG_M, PG_RW;
@@ -5382,7 +5437,7 @@
pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
struct spglist *free)
{
- struct rwlock *lock;
+ PVLL *lock;
pt_entry_t *pte, PG_V;
PG_V = pmap_valid_bit(pmap);
@@ -5395,7 +5450,7 @@
lock = NULL;
pmap_remove_pte(pmap, pte, va, *pde, free, &lock);
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
pmap_invalidate_page(pmap, va);
}
@@ -5404,7 +5459,7 @@
*/
static bool
pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
- pd_entry_t *pde, struct spglist *free, struct rwlock **lockp)
+ pd_entry_t *pde, struct spglist *free, PVLL **lockp)
{
pt_entry_t PG_G, *pte;
vm_offset_t va;
@@ -5446,7 +5501,7 @@
void
pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
- struct rwlock *lock;
+ PVLL *lock;
vm_offset_t va_next;
pml4_entry_t *pml4e;
pdp_entry_t *pdpe;
@@ -5559,7 +5614,7 @@
anyvalid = 1;
}
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
out:
if (anyvalid)
pmap_invalidate_all(pmap);
@@ -5587,7 +5642,7 @@
struct md_page *pvh;
pv_entry_t pv;
pmap_t pmap;
- struct rwlock *lock;
+ PVLL *lock;
pt_entry_t *pte, tpte, PG_A, PG_M, PG_RW;
pd_entry_t *pde;
vm_offset_t va;
@@ -5601,16 +5656,16 @@
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
pa_to_pvh(VM_PAGE_TO_PHYS(m));
retry:
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen) {
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(pmap);
goto retry;
}
@@ -5619,17 +5674,19 @@
pde = pmap_pde(pmap, va);
(void)pmap_demote_pde_locked(pmap, pde, va, &lock);
PMAP_UNLOCK(pmap);
+ if (lock != m)
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
}
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
md_gen = m->md.pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(pmap);
goto retry;
}
@@ -5661,7 +5718,7 @@
PMAP_UNLOCK(pmap);
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
pmap_delayed_invl_wait(m);
vm_page_free_pages_toq(&free, true);
}
@@ -5877,8 +5934,7 @@
* identical characteristics.
*/
static void
-pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
- struct rwlock **lockp)
+pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, PVLL **lockp)
{
pd_entry_t newpde;
pt_entry_t *firstpte, oldpte, pa, *pte;
@@ -5980,8 +6036,11 @@
/*
* Promote the pv entries.
*/
- if ((newpde & PG_MANAGED) != 0)
+ if ((newpde & PG_MANAGED) != 0) {
+ pmap_pv_list_lock_pde(newpde & PG_PS_FRAME, lockp);
pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME, lockp);
+ pmap_pv_list_unlock_pde(newpde & PG_PS_FRAME, lockp);
+ }
/*
* Propagate the PAT index to its proper position.
@@ -6022,7 +6081,7 @@
pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
u_int flags, int8_t psind)
{
- struct rwlock *lock;
+ PVLL *lock;
pd_entry_t *pde;
pt_entry_t *pte, PG_G, PG_A, PG_M, PG_RW, PG_V;
pt_entry_t newpte, origpte;
@@ -6228,7 +6287,7 @@
pv = get_pv_entry(pmap, &lock);
pv->pv_va = va;
}
- CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
m->md.pv_gen++;
if ((newpte & PG_RW) != 0)
@@ -6281,7 +6340,7 @@
rv = KERN_SUCCESS;
out:
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(pmap);
return (rv);
}
@@ -6295,7 +6354,7 @@
*/
static bool
pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
- struct rwlock **lockp)
+ PVLL **lockp)
{
pd_entry_t newpde;
pt_entry_t PG_V;
@@ -6346,7 +6405,7 @@
*/
static int
pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
- vm_page_t m, struct rwlock **lockp)
+ vm_page_t m, PVLL **lockp)
{
struct spglist free;
pd_entry_t oldpde, *pde;
@@ -6494,7 +6553,7 @@
pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
vm_page_t m_start, vm_prot_t prot)
{
- struct rwlock *lock;
+ PVLL *lock;
vm_offset_t va;
vm_page_t m, mpte;
vm_pindex_t diff, psize;
@@ -6519,7 +6578,7 @@
m = TAILQ_NEXT(m, listq);
}
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(pmap);
}
@@ -6535,19 +6594,19 @@
void
pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
{
- struct rwlock *lock;
+ PVLL *lock;
lock = NULL;
PMAP_LOCK(pmap);
(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(pmap);
}
static vm_page_t
pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
- vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
+ vm_prot_t prot, vm_page_t mpte, PVLL **lockp)
{
pt_entry_t newpte, *pte, PG_V;
@@ -6841,7 +6900,7 @@
pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
vm_offset_t src_addr)
{
- struct rwlock *lock;
+ PVLL *lock;
pml4_entry_t *pml4e;
pdp_entry_t *pdpe;
pd_entry_t *pde, srcptepaddr;
@@ -6978,7 +7037,7 @@
}
out:
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(src_pmap);
PMAP_UNLOCK(dst_pmap);
}
@@ -7093,7 +7152,7 @@
pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
{
struct md_page *pvh;
- struct rwlock *lock;
+ PVLL *lock;
pv_entry_t pv;
int loops = 0;
boolean_t rv;
@@ -7102,7 +7161,7 @@
("pmap_page_exists_quick: page %p is not managed", m));
rv = FALSE;
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
if (PV_PMAP(pv) == pmap) {
rv = TRUE;
@@ -7124,7 +7183,7 @@
break;
}
}
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
return (rv);
}
@@ -7137,7 +7196,7 @@
int
pmap_page_wired_mappings(vm_page_t m)
{
- struct rwlock *lock;
+ PVLL *lock;
struct md_page *pvh;
pmap_t pmap;
pt_entry_t *pte;
@@ -7147,16 +7206,16 @@
if ((m->oflags & VPO_UNMANAGED) != 0)
return (0);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
restart:
count = 0;
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
if (md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
@@ -7174,9 +7233,9 @@
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
pvh_gen = pvh->pv_gen;
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
if (md_gen != m->md.pv_gen ||
pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
@@ -7189,7 +7248,7 @@
PMAP_UNLOCK(pmap);
}
}
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
return (count);
}
@@ -7200,17 +7259,17 @@
boolean_t
pmap_page_is_mapped(vm_page_t m)
{
- struct rwlock *lock;
+ PVLL *lock;
boolean_t rv;
if ((m->oflags & VPO_UNMANAGED) != 0)
return (FALSE);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
rv = !TAILQ_EMPTY(&m->md.pv_list) ||
((m->flags & PG_FICTITIOUS) == 0 &&
!TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list));
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
return (rv);
}
@@ -7246,15 +7305,15 @@
pt_entry_t *pte, tpte;
pt_entry_t PG_M, PG_RW, PG_V;
struct spglist free;
- struct pv_chunklist free_chunks[PMAP_MEMDOM];
+ struct pv_chunklist free_chunks;
vm_page_t m, mpte, mt;
pv_entry_t pv;
struct md_page *pvh;
struct pv_chunk *pc, *npc;
- struct rwlock *lock;
+ PVLL *lock;
int64_t bit;
uint64_t inuse, bitmask;
- int allfree, field, freed, i, idx;
+ int allfree, field, freed, idx;
boolean_t superpage;
vm_paddr_t pa;
@@ -7282,8 +7341,7 @@
PG_V = pmap_valid_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
- for (i = 0; i < PMAP_MEMDOM; i++)
- TAILQ_INIT(&free_chunks[i]);
+ TAILQ_INIT(&free_chunks);
SLIST_INIT(&free);
PMAP_LOCK(pmap);
TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
@@ -7411,11 +7469,11 @@
PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
if (allfree) {
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- TAILQ_INSERT_TAIL(&free_chunks[pc_to_domain(pc)], pc, pc_list);
+ TAILQ_INSERT_TAIL(&free_chunks, pc, pc_list);
}
}
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
pmap_invalidate_all(pmap);
pmap_pkru_deassign_all(pmap);
free_pv_chunk_batch((struct pv_chunklist *)&free_chunks);
@@ -7426,7 +7484,7 @@
static boolean_t
pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
{
- struct rwlock *lock;
+ PVLL *lock;
pv_entry_t pv;
struct md_page *pvh;
pt_entry_t *pte, mask;
@@ -7437,15 +7495,15 @@
rv = FALSE;
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
restart:
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
if (md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
@@ -7475,9 +7533,9 @@
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
pvh_gen = pvh->pv_gen;
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
if (md_gen != m->md.pv_gen ||
pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
@@ -7503,7 +7561,7 @@
}
}
out:
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
return (rv);
}
@@ -7576,7 +7634,7 @@
{
struct md_page *pvh;
pmap_t pmap;
- struct rwlock *lock;
+ PVLL *lock;
pv_entry_t next_pv, pv;
pd_entry_t *pde;
pt_entry_t oldpte, *pte, PG_M, PG_RW;
@@ -7594,17 +7652,18 @@
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
pa_to_pvh(VM_PAGE_TO_PHYS(m));
retry_pv_loop:
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
+retry_pv_loop_locked:
+ pvh_gen = pvh->pv_gen;
TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
- pvh_gen = pvh->pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
goto retry_pv_loop;
}
}
@@ -7613,23 +7672,24 @@
pde = pmap_pde(pmap, va);
if ((*pde & PG_RW) != 0)
(void)pmap_demote_pde_locked(pmap, pde, va, &lock);
- KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
- ("inconsistent pv lock %p %p for page %p",
- lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
PMAP_UNLOCK(pmap);
+ if (lock != m || pvh_gen != pvh->pv_gen) {
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
+ goto retry_pv_loop_locked;
+ }
}
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
md_gen = m->md.pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen ||
md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
goto retry_pv_loop;
}
}
@@ -7652,7 +7712,7 @@
}
PMAP_UNLOCK(pmap);
}
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
vm_page_aflag_clear(m, PGA_WRITEABLE);
pmap_delayed_invl_wait(m);
}
@@ -7710,7 +7770,7 @@
struct md_page *pvh;
pv_entry_t pv, pvf;
pmap_t pmap;
- struct rwlock *lock;
+ PVLL *lock;
pd_entry_t oldpde, *pde;
pt_entry_t *pte, PG_A, PG_M, PG_RW;
vm_offset_t va;
@@ -7726,21 +7786,21 @@
pa = VM_PAGE_TO_PHYS(m);
lock = PHYS_TO_PV_LIST_LOCK(pa);
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
retry:
not_cleared = 0;
if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL)
goto small_mappings;
pv = pvf;
do {
+ pvh_gen = pvh->pv_gen;
if (pvf == NULL)
pvf = pv;
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
- pvh_gen = pvh->pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
goto retry;
@@ -7805,7 +7865,6 @@
pmap_invalidate_page(pmap, va);
} else
demoted = TRUE;
-
if (demoted) {
/*
* The superpage mapping was removed
@@ -7817,9 +7876,10 @@
pv = NULL;
}
cleared++;
- KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
- ("inconsistent pv lock %p %p for page %p",
- lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
+ if (lock != m || pvh_gen != pvh->pv_gen) {
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
+ goto retry;
+ }
} else
not_cleared++;
}
@@ -7844,9 +7904,9 @@
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
md_gen = m->md.pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto retry;
@@ -7897,7 +7957,7 @@
} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
not_cleared < PMAP_TS_REFERENCED_MAX);
out:
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
vm_page_free_pages_toq(&free, true);
return (cleared + not_cleared);
}
@@ -7910,7 +7970,7 @@
void
pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
{
- struct rwlock *lock;
+ PVLL *lock;
pml4_entry_t *pml4e;
pdp_entry_t *pdpe;
pd_entry_t oldpde, *pde;
@@ -7967,7 +8027,7 @@
lock = NULL;
if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) {
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
/*
* The large page mapping was destroyed.
@@ -7999,7 +8059,7 @@
anychanged = true;
}
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
}
if (va_next > eva)
va_next = eva;
@@ -8056,7 +8116,7 @@
pv_entry_t next_pv, pv;
pd_entry_t oldpde, *pde;
pt_entry_t *pte, PG_M, PG_RW;
- struct rwlock *lock;
+ PVLL *lock;
vm_offset_t va;
int md_gen, pvh_gen;
@@ -8069,15 +8129,15 @@
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
pa_to_pvh(VM_PAGE_TO_PHYS(m));
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
restart:
+ pvh_gen = pvh->pv_gen;
TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
- pvh_gen = pvh->pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
@@ -8103,15 +8163,19 @@
pmap_invalidate_page(pmap, va);
}
PMAP_UNLOCK(pmap);
+ if (lock != m || pvh_gen != pvh->pv_gen) {
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
+ goto restart;
+ }
}
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
pvh_gen = pvh->pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
@@ -8129,7 +8193,7 @@
}
PMAP_UNLOCK(pmap);
}
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
}
/*
@@ -9041,7 +9105,7 @@
pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t va, int ftype)
{
int rv;
- struct rwlock *lock;
+ PVLL *lock;
#if VM_NRESERVLEVEL > 0
vm_page_t m, mpte;
#endif
@@ -9128,7 +9192,7 @@
rv = 0; /* success */
done:
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(pmap);
return (rv);
}
Index: sys/amd64/include/pmap.h
===================================================================
--- sys/amd64/include/pmap.h
+++ sys/amd64/include/pmap.h
@@ -249,6 +249,7 @@
#include <sys/_cpuset.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
+#include <sys/_obm.h>
#include <sys/_pctrie.h>
#include <sys/_rangeset.h>
@@ -313,7 +314,9 @@
struct md_page {
TAILQ_HEAD(, pv_entry) pv_list; /* (p) */
int pv_gen; /* (p) */
- int pat_mode;
+ obm_lock_t pv_lock;
+ uint8_t pat_mode;
+ uint8_t pad0[2];
};
enum pmap_type {
@@ -327,6 +330,8 @@
uint32_t pm_gen;
};
+TAILQ_HEAD(pvchunks, pv_chunk);
+
/*
* The kernel virtual address (KVA) of the level 4 page table page is always
* within the direct map (DMAP) region.
@@ -337,7 +342,7 @@
pml4_entry_t *pm_pml4u; /* KVA of user l4 page table */
uint64_t pm_cr3;
uint64_t pm_ucr3;
- TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */
+ struct pvchunks pm_pvchunk; /* list of mappings in pmap */
cpuset_t pm_active; /* active on cpus */
enum pmap_type pm_type; /* regular or nested tables */
struct pmap_statistics pm_stats; /* pmap statistics */
@@ -346,6 +351,7 @@
int pm_flags;
struct pmap_pcids pm_pcids[MAXCPU];
struct rangeset pm_pkru;
+ TAILQ_ENTRY(pmap) pm_allpmaps;
};
/* flags */
@@ -364,8 +370,7 @@
#define PMAP_LOCK_ASSERT(pmap, type) \
mtx_assert(&(pmap)->pm_mtx, (type))
#define PMAP_LOCK_DESTROY(pmap) mtx_destroy(&(pmap)->pm_mtx)
-#define PMAP_LOCK_INIT(pmap) mtx_init(&(pmap)->pm_mtx, "pmap", \
- NULL, MTX_DEF | MTX_DUPOK)
+#define PMAP_LOCK_INIT(pmap) pmap_lock_init(pmap)
#define PMAP_LOCKED(pmap) mtx_owned(&(pmap)->pm_mtx)
#define PMAP_MTX(pmap) (&(pmap)->pm_mtx)
#define PMAP_TRYLOCK(pmap) mtx_trylock(&(pmap)->pm_mtx)
@@ -393,8 +398,7 @@
#define PV_CHUNK_HEADER \
pmap_t pc_pmap; \
TAILQ_ENTRY(pv_chunk) pc_list; \
- uint64_t pc_map[_NPCM]; /* bitmap; 1 = free */ \
- TAILQ_ENTRY(pv_chunk) pc_lru;
+ uint64_t pc_map[_NPCM]; /* bitmap; 1 = free */
struct pv_chunk_header {
PV_CHUNK_HEADER
@@ -439,6 +443,7 @@
int pmap_large_map(vm_paddr_t, vm_size_t, void **, vm_memattr_t);
void pmap_large_map_wb(void *sva, vm_size_t len);
void pmap_large_unmap(void *sva, vm_size_t len);
+void pmap_lock_init(pmap_t pmap);
void *pmap_mapbios(vm_paddr_t, vm_size_t);
void *pmap_mapdev(vm_paddr_t, vm_size_t);
void *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int);
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -3745,6 +3745,7 @@
kern/kern_mtxpool.c standard
kern/kern_mutex.c standard
kern/kern_ntptime.c standard
+kern/kern_obm.c standard
kern/kern_osd.c standard
kern/kern_physio.c standard
kern/kern_pmc.c standard
Index: sys/kern/kern_obm.c
===================================================================
--- /dev/null
+++ sys/kern/kern_obm.c
@@ -0,0 +1,129 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/obm.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/turnstile.h>
+#include <machine/atomic.h>
+
+#ifdef OBM_DEBUG
+static SYSCTL_NODE(_debug, OID_AUTO, obm, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+ "");
+static u_long obm_slow_lock;
+SYSCTL_LONG(_debug_obm, OID_AUTO, slow_lock, CTLFLAG_RD,
+ &obm_slow_lock, 0,
+ "");
+static u_long obm_slow_unlock;
+SYSCTL_LONG(_debug_obm, OID_AUTO, slow_unlock, CTLFLAG_RD,
+ &obm_slow_unlock, 0,
+ "");
+#endif
+
+void
+obm_init_lo(struct lock_object *lo, const char *name)
+{
+ bzero(lo, sizeof(*lo));
+ lo->lo_name = name;
+}
+
+void
+obm_init(obm_lock_t *obm)
+{
+ obm->lk = OBM_UNLOCKED;
+}
+
+void
+obm_lock_slow(obm_lock_t *obm, struct lock_object *lo)
+{
+ struct turnstile *ts;
+ struct lock_delay_arg lda;
+ uint8_t v;
+
+#ifdef OBM_DEBUG
+ atomic_add_long(&obm_slow_lock, 1);
+#endif
+ lock_delay_arg_init(&lda, &locks_delay);
+ lock_delay(&lda);
+ for (;;) {
+ v = atomic_load_char(&obm->lk);
+ if (v == OBM_UNLOCKED) {
+ if (atomic_fcmpset_acq_char(&obm->lk, &v, OBM_LOCKED) != 0)
+ break;
+ lock_delay(&lda);
+ continue;
+ }
+
+ ts = turnstile_trywait(lo);
+ v = atomic_load_char(&obm->lk);
+ if (v == OBM_UNLOCKED) {
+ turnstile_cancel(ts);
+ if (atomic_fcmpset_acq_char(&obm->lk, &v, OBM_LOCKED) != 0)
+ break;
+ lock_delay(&lda);
+ continue;
+ }
+ if ((v & OBM_CONTESTED) == 0 &&
+ atomic_fcmpset_char(&obm->lk, &v, v | OBM_CONTESTED) == 0) {
+ turnstile_cancel(ts);
+ continue;
+ }
+ turnstile_wait(ts, NULL, TS_SHARED_QUEUE);
+ }
+ TD_LOCKS_INC(curthread);
+}
+
+void
+obm_unlock_slow(obm_lock_t *obm, struct lock_object *lo)
+{
+ struct turnstile *ts;
+
+#ifdef OBM_DEBUG
+ atomic_add_long(&obm_slow_unlock, 1);
+#endif
+ turnstile_chain_lock(lo);
+ atomic_store_rel_char(&obm->lk, OBM_UNLOCKED);
+ ts = turnstile_lookup(lo);
+ if (ts != NULL) {
+ turnstile_broadcast(ts, TS_SHARED_QUEUE);
+ turnstile_unpend(ts);
+ }
+ turnstile_chain_unlock(lo);
+ TD_LOCKS_DEC(curthread);
+}
+
Index: sys/sys/_obm.h
===================================================================
--- /dev/null
+++ sys/sys/_obm.h
@@ -0,0 +1,47 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS__OBM_H
+#define _SYS__OBM_H
+
+/* One-Byte Lock */
+
+#define OBM_UNLOCKED 0x00
+#define OBM_LOCKED 0x02
+#define OBM_CONTESTED 0x01
+
+typedef struct obm_lock_tag {
+ uint8_t lk;
+} obm_lock_t;
+
+#endif
Index: sys/sys/obm.h
===================================================================
--- /dev/null
+++ sys/sys/obm.h
@@ -0,0 +1,103 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_OBM_H
+#define _SYS_OBM_H
+
+/* One-Byte Lock */
+
+#ifdef _KERNEL
+
+#include <sys/systm.h>
+#include <sys/_lock.h>
+#include <sys/_obm.h>
+#include <sys/proc.h>
+#include <machine/atomic.h>
+
+void obm_init_lo(struct lock_object *lo, const char *name);
+void obm_init(obm_lock_t *obm);
+void obm_lock_slow(obm_lock_t *obm, struct lock_object *lo);
+void obm_unlock_slow(obm_lock_t *obm, struct lock_object *lo);
+
+__used static void
+obm_assert_locked(obm_lock_t *obm)
+{
+#ifdef INVARIANTS
+ uint8_t v;
+
+ v = atomic_load_char(&obm->lk);
+ MPASS(v == OBM_LOCKED || v == (OBM_LOCKED | OBM_CONTESTED));
+#endif
+}
+
+static inline bool
+obm_trylock(obm_lock_t *obm)
+{
+ if (atomic_cmpset_acq_char(&obm->lk, OBM_UNLOCKED, OBM_LOCKED) != 0) {
+ TD_LOCKS_INC(curthread);
+ return (true);
+ }
+ return (false);
+}
+
+static inline void
+obm_lock(obm_lock_t *obm, struct lock_object *lo)
+{
+ uint8_t v;
+
+ v = OBM_UNLOCKED;
+ if (__predict_true(atomic_fcmpset_acq_char(&obm->lk, &v, OBM_LOCKED))) {
+ TD_LOCKS_INC(curthread);
+ } else {
+ MPASS(v == OBM_LOCKED || v == (OBM_LOCKED | OBM_CONTESTED) ||
+ v == OBM_UNLOCKED);
+ obm_lock_slow(obm, lo);
+ }
+}
+
+static inline void
+obm_unlock(obm_lock_t *obm, struct lock_object *lo)
+{
+ uint8_t v;
+
+ v = OBM_LOCKED;
+ if (atomic_fcmpset_rel_char(&obm->lk, &v, OBM_UNLOCKED)) {
+ TD_LOCKS_DEC(curthread);
+ } else {
+ MPASS(v == OBM_LOCKED || v == (OBM_LOCKED | OBM_CONTESTED));
+ obm_unlock_slow(obm, lo);
+ }
+}
+
+#endif
+#endif

File Metadata

Mime Type
text/plain
Expires
Mon, Nov 10, 1:39 AM (2 h, 47 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25090825
Default Alt Text
D24217.id74632.diff (62 KB)

Event Timeline