Page MenuHomeFreeBSD

D24217.id70002.diff
No OneTemporary

D24217.id70002.diff

Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -121,6 +121,7 @@
#include <sys/malloc.h>
#include <sys/mman.h>
#include <sys/mutex.h>
+#include <sys/obm.h>
#include <sys/proc.h>
#include <sys/rangeset.h>
#include <sys/rwlock.h>
@@ -331,50 +332,51 @@
#ifdef NUMA
#define pa_to_pmdp(pa) (&pv_table[pa_index(pa)])
#define pa_to_pvh(pa) (&(pa_to_pmdp(pa)->pv_page))
-#define PHYS_TO_PV_LIST_LOCK(pa) ({ \
- struct rwlock *_lock; \
- if (__predict_false((pa) > pmap_last_pa)) \
- _lock = &pv_dummy_large.pv_lock; \
- else \
- _lock = &(pa_to_pmdp(pa)->pv_lock); \
- _lock; \
-})
#else
#define pa_to_pvh(pa) (&pv_table[pa_index(pa)])
#define NPV_LIST_LOCKS MAXCPU
-#define PHYS_TO_PV_LIST_LOCK(pa) \
- (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
#endif
+#define PHYS_TO_PV_LIST_LOCK(pa) PHYS_TO_VM_PAGE(pa)
+
#define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \
- struct rwlock **_lockp = (lockp); \
- struct rwlock *_new_lock; \
+ PVLL **_lockp = (lockp); \
+ PVLL *_new_lock; \
\
_new_lock = PHYS_TO_PV_LIST_LOCK(pa); \
if (_new_lock != *_lockp) { \
if (*_lockp != NULL) \
- rw_wunlock(*_lockp); \
+ pmap_pv_list_unlock(*_lockp); \
+ if (_new_lock == NULL) \
+ _new_lock = &pv_fake_page; \
*_lockp = _new_lock; \
- rw_wlock(*_lockp); \
+ pmap_pv_list_lock(*_lockp); \
} \
} while (0)
-#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \
- CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
+#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) do { \
+ PVLL **_lockp = (lockp); \
+ \
+ if (m != *_lockp) { \
+ if (*_lockp != NULL) \
+ pmap_pv_list_unlock(*_lockp); \
+ *_lockp = m; \
+ pmap_pv_list_lock(m); \
+ } \
+} while (0)
#define RELEASE_PV_LIST_LOCK(lockp) do { \
- struct rwlock **_lockp = (lockp); \
+ PVLL **_lockp = (lockp); \
\
if (*_lockp != NULL) { \
- rw_wunlock(*_lockp); \
+ pmap_pv_list_unlock(*_lockp); \
*_lockp = NULL; \
} \
} while (0)
-#define VM_PAGE_TO_PV_LIST_LOCK(m) \
- PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
+#define VM_PAGE_TO_PV_LIST_LOCK(m) (m)
struct pmap kernel_pmap_store;
@@ -451,22 +453,167 @@
struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM];
+typedef struct vm_page PVLL;
+
#ifdef NUMA
struct pmap_large_md_page {
- struct rwlock pv_lock;
- struct md_page pv_page;
+ struct lock_object lo;
+ uintptr_t pad;
+ struct md_page pv_page;
u_long pv_invl_gen;
};
+/*
+ * We strongly depend on the size being a power of two, so the assert
+ * is overzealous. However, should the struct be resized to a
+ * different power of two, the code below needs to be revisited.
+ */
+_Static_assert(sizeof(struct pmap_large_md_page) == 64, "pmap_large_md_page");
+
__exclusive_cache_line static struct pmap_large_md_page pv_dummy_large;
#define pv_dummy pv_dummy_large.pv_page
__read_mostly static struct pmap_large_md_page *pv_table;
__read_mostly vm_paddr_t pmap_last_pa;
+static struct lock_object *
+pv_list_lock_object(vm_paddr_t pa)
+{
+ if (__predict_false((pa) > pmap_last_pa)) \
+ return (&pv_dummy_large.lo);
+ return (&(pa_to_pmdp(pa)->lo));
+}
#else
-static struct rwlock __exclusive_cache_line pv_list_locks[NPV_LIST_LOCKS];
+static struct lock_object __exclusive_cache_line pv_lo[NPV_LIST_LOCKS];
static u_long pv_invl_gen[NPV_LIST_LOCKS];
static struct md_page *pv_table;
static struct md_page pv_dummy;
+static struct lock_object *
+pv_list_lock_object(vm_paddr_t pa)
+{
+ return (&pv_lo[pa_index(pa) % NPV_LIST_LOCKS]);
+}
#endif
+__read_mostly static struct vm_page pv_fake_page;
+
+static void
+pmap_pv_list_lock(vm_page_t m)
+{
+ obm_lock(&m->md.pv_lock, pv_list_lock_object(VM_PAGE_TO_PHYS(m)));
+}
+
+static void
+pmap_pv_list_unlock(vm_page_t m)
+{
+ obm_unlock(&m->md.pv_lock, pv_list_lock_object(VM_PAGE_TO_PHYS(m)));
+}
+
+/*
+ * Locks all pv lists for 4k pages constituing the superpage that
+ * contains the passed page. The page's pv list must be locked.
+ * Returns false if trylock failed and the page's pv list was unlocked
+ * in the process, which typically means that the caller must restart.
+ */
+static bool
+pmap_pv_list_lock_pde1(vm_page_t m, bool pte_locked)
+{
+ vm_page_t mt, sm;
+ struct lock_object *lo;
+ int i;
+ bool ret;
+
+ obm_assert_locked(&m->md.pv_lock);
+
+ sm = m - ((VM_PAGE_TO_PHYS(m) - (VM_PAGE_TO_PHYS(m) &
+ PG_PS_FRAME)) >> PAGE_SHIFT);
+ lo = pv_list_lock_object(VM_PAGE_TO_PHYS(m));
+
+ if (!pte_locked) {
+ ret = true;
+ goto alllocks;
+ }
+
+ /*
+ * Fast attempt. If we either own or can get the pv list lock
+ * of the first page in the superpage, all other owners must
+ * release their locks without waiting for us.
+ */
+ if (m == sm || obm_trylock(&sm->md.pv_lock)) {
+ for (i = 1, mt = sm + 1; i < NPDEPG; i++, mt++) {
+ if (m != mt)
+ obm_lock(&mt->md.pv_lock, lo);
+ }
+ return (true);
+ }
+
+ obm_unlock(&m->md.pv_lock, lo);
+ ret = false;
+alllocks:
+ for (i = 0, mt = sm; i < NPDEPG; i++, mt++) {
+ obm_lock(&mt->md.pv_lock, lo);
+ }
+ return (ret);
+}
+
+/*
+ * If *lockp points to one of the ordinary pages from the superpage we
+ * are demoting or promoting, then we keep this page' pv list locked
+ * after pmap_pv_list_unlock_pde(). Otherwise, we just unlock whatever
+ * was locked, and unlock all run on pmap_pv_list_unlock_pde().
+ */
+static void
+pmap_pv_list_lock_pde(vm_paddr_t pa, PVLL **lockp)
+{
+ vm_page_t m;
+
+ m = PHYS_TO_VM_PAGE(pa);
+ if (m == NULL)
+ m = &pv_fake_page;
+
+ if (*lockp == NULL) {
+ pmap_pv_list_lock_pde1(m, false);
+ return;
+ }
+ if ((VM_PAGE_TO_PHYS(*lockp) & PG_PS_FRAME) != (pa & PG_PS_FRAME)) {
+ pmap_pv_list_unlock(*lockp);
+ *lockp = NULL;
+ pmap_pv_list_lock_pde1(m, false);
+ return;
+ }
+ pmap_pv_list_lock_pde1(*lockp, true);
+}
+
+static void
+pmap_pv_list_unlock_pde1(vm_page_t m, bool pte_locked)
+{
+ vm_page_t mt, sm;
+ struct lock_object *lo;
+ int i;
+
+ sm = m - ((VM_PAGE_TO_PHYS(m) - (VM_PAGE_TO_PHYS(m) &
+ PG_PS_FRAME)) >> PAGE_SHIFT);
+ lo = pv_list_lock_object(VM_PAGE_TO_PHYS(m));
+ obm_assert_locked(&m->md.pv_lock);
+ obm_assert_locked(&sm->md.pv_lock);
+
+ for (i = 0, mt = sm; i < NPDEPG; i++, mt++) {
+ if (!pte_locked || mt != m)
+ obm_unlock(&mt->md.pv_lock, lo);
+ }
+}
+
+static void
+pmap_pv_list_unlock_pde(vm_paddr_t pa, PVLL **lockp)
+{
+ vm_page_t m;
+ bool pte_locked;
+
+ m = *lockp;
+ pte_locked = m != NULL;
+ if (!pte_locked) {
+ m = PHYS_TO_VM_PAGE(pa);
+ if (m == NULL)
+ m = &pv_fake_page;
+ }
+ pmap_pv_list_unlock_pde1(m, pte_locked);
+}
/*
* All those kernel PT submaps that BSD is so fond of
@@ -1152,7 +1299,7 @@
{
u_long gen, *m_gen;
- rw_assert(VM_PAGE_TO_PV_LIST_LOCK(m), RA_WLOCKED);
+//XXXKIB rw_assert(VM_PAGE_TO_PV_LIST_LOCK(m), RA_WLOCKED);
gen = curthread->td_md.md_invl_gen.gen;
if (gen == 0)
return;
@@ -1185,37 +1332,36 @@
static void free_pv_chunk(struct pv_chunk *pc);
static void free_pv_chunk_batch(struct pv_chunklist *batch);
static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
-static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
+static pv_entry_t get_pv_entry(pmap_t pmap, PVLL **lockp);
static int popcnt_pc_map_pq(uint64_t *map);
-static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
-static void reserve_pv_entries(pmap_t pmap, int needed,
- struct rwlock **lockp);
+static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, PVLL **lockp);
+static void reserve_pv_entries(pmap_t pmap, int needed, PVLL **lockp);
static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
- struct rwlock **lockp);
+ PVLL **lockp);
static bool pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde,
- u_int flags, struct rwlock **lockp);
+ u_int flags, PVLL **lockp);
#if VM_NRESERVLEVEL > 0
static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
- struct rwlock **lockp);
+ PVLL **lockp);
#endif
static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
- vm_offset_t va);
+ vm_offset_t va);
static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte);
static int pmap_change_props_locked(vm_offset_t va, vm_size_t size,
vm_prot_t prot, int mode, int flags);
static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
static boolean_t pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde,
- vm_offset_t va, struct rwlock **lockp);
+ vm_offset_t va, PVLL **lockp);
static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe,
vm_offset_t va);
static bool pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m,
- vm_prot_t prot, struct rwlock **lockp);
+ vm_prot_t prot, PVLL **lockp);
static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde,
- u_int flags, vm_page_t m, struct rwlock **lockp);
+ u_int flags, vm_page_t m, PVLL **lockp);
static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
- vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
+ vm_page_t m, vm_prot_t prot, vm_page_t mpte, PVLL **lockp);
static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted);
static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva,
@@ -1223,13 +1369,13 @@
static void pmap_invalidate_cache_range_all(vm_offset_t sva,
vm_offset_t eva);
static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
- pd_entry_t pde);
+ pd_entry_t pde);
static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
static vm_page_t pmap_large_map_getptp_unlocked(void);
static vm_paddr_t pmap_large_map_kextract(vm_offset_t va);
#if VM_NRESERVLEVEL > 0
static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
- struct rwlock **lockp);
+ PVLL **lockp);
#endif
static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
vm_prot_t prot);
@@ -1240,27 +1386,26 @@
static pd_entry_t *pmap_pti_pde(vm_offset_t va);
static void pmap_pti_wire_pte(void *pte);
static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
- struct spglist *free, struct rwlock **lockp);
+ struct spglist *free, PVLL **lockp);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
- pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
+ pd_entry_t ptepde, struct spglist *free, PVLL **lockp);
static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
struct spglist *free);
static bool pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
- pd_entry_t *pde, struct spglist *free,
- struct rwlock **lockp);
+ pd_entry_t *pde, struct spglist *free, PVLL **lockp);
static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
- vm_page_t m, struct rwlock **lockp);
+ vm_page_t m, PVLL **lockp);
static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
pd_entry_t newpde);
static void pmap_update_pde_invalidate(pmap_t, vm_offset_t va, pd_entry_t pde);
static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex,
- struct rwlock **lockp);
+ PVLL **lockp);
static pd_entry_t *pmap_alloc_pde(pmap_t pmap, vm_offset_t va, vm_page_t *pdpgp,
- struct rwlock **lockp);
+ PVLL **lockp);
static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va,
- struct rwlock **lockp);
+ PVLL **lockp);
static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m,
struct spglist *free);
@@ -1895,6 +2040,7 @@
TAILQ_INIT(&m->md.pv_list);
m->md.pat_mode = PAT_WRITE_BACK;
+ obm_init(&m->md.pv_lock);
}
static int pmap_allow_2m_x_ept;
@@ -1951,13 +2097,6 @@
long start, end, highest, pv_npg;
int domain, i, j, pages;
- /*
- * We strongly depend on the size being a power of two, so the assert
- * is overzealous. However, should the struct be resized to a
- * different power of two, the code below needs to be revisited.
- */
- CTASSERT((sizeof(*pvd) == 64));
-
/*
* Calculate the size of the array.
*/
@@ -1992,12 +2131,13 @@
vm_page_t m = vm_page_alloc_domain(NULL, 0,
domain, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ);
if (m == NULL)
- panic("vm_page_alloc_domain failed for %lx\n", (vm_offset_t)pvd + j);
+ panic("vm_page_alloc_domain failed for %lx\n",
+ (vm_offset_t)pvd + j);
pmap_qenter((vm_offset_t)pvd + j, &m, 1);
}
for (j = 0; j < s / sizeof(*pvd); j++) {
- rw_init_flags(&pvd->pv_lock, "pmap pv list", RW_NEW);
+ obm_init_lo(&pvd->lo, "pmap pv list");
TAILQ_INIT(&pvd->pv_page.pv_list);
pvd->pv_page.pv_gen = 0;
pvd->pv_page.pat_mode = 0;
@@ -2006,8 +2146,10 @@
}
}
pvd = &pv_dummy_large;
- rw_init_flags(&pvd->pv_lock, "pmap pv list dummy", RW_NEW);
+ obm_init_lo(&pvd->lo, "pmap pv list dummy");
TAILQ_INIT(&pvd->pv_page.pv_list);
+ pmap_page_init(&pv_fake_page);
+ pv_fake_page.phys_addr = pmap_last_pa + PAGE_SIZE;
pvd->pv_page.pv_gen = 0;
pvd->pv_page.pat_mode = 0;
pvd->pv_invl_gen = 0;
@@ -2023,7 +2165,7 @@
* Initialize the pool of pv list locks.
*/
for (i = 0; i < NPV_LIST_LOCKS; i++)
- rw_init(&pv_list_locks[i], "pmap pv list");
+ obm_init_lo(&pv_lo[i], "pmap pv list");
/*
* Calculate the size of the pv head table for superpages.
@@ -2039,6 +2181,8 @@
for (i = 0; i < pv_npg; i++)
TAILQ_INIT(&pv_table[i].pv_list);
TAILQ_INIT(&pv_dummy.pv_list);
+ pmap_page_init(&pv_fake_page);
+ pv_fake_page.phys_addr = vm_phys_segs[vm_phys_nsegs - 1].end + PAGE_SIZE;
}
#endif
@@ -3782,7 +3926,7 @@
* race conditions.
*/
static vm_page_t
-_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
+_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, PVLL **lockp)
{
vm_page_t m, pdppg, pdpg;
pt_entry_t PG_A, PG_M, PG_RW, PG_V;
@@ -3929,7 +4073,7 @@
static pd_entry_t *
pmap_alloc_pde(pmap_t pmap, vm_offset_t va, vm_page_t *pdpgp,
- struct rwlock **lockp)
+ PVLL **lockp)
{
pdp_entry_t *pdpe, PG_V;
pd_entry_t *pde;
@@ -3968,7 +4112,7 @@
}
static vm_page_t
-pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
+pmap_allocpte(pmap_t pmap, vm_offset_t va, PVLL **lockp)
{
vm_pindex_t ptepindex;
pd_entry_t *pd, PG_V;
@@ -4283,7 +4427,7 @@
* exacerbating the shortage of free pv entries.
*/
static vm_page_t
-reclaim_pv_chunk_domain(pmap_t locked_pmap, struct rwlock **lockp, int domain)
+reclaim_pv_chunk_domain(pmap_t locked_pmap, PVLL **lockp, int domain)
{
struct pv_chunks_list *pvc;
struct pv_chunk *pc, *pc_marker, *pc_marker_end;
@@ -4483,7 +4627,7 @@
}
static vm_page_t
-reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
+reclaim_pv_chunk(pmap_t locked_pmap, PVLL **lockp)
{
vm_page_t m;
int i, domain;
@@ -4591,7 +4735,7 @@
* The given PV list lock may be released.
*/
static pv_entry_t
-get_pv_entry(pmap_t pmap, struct rwlock **lockp)
+get_pv_entry(pmap_t pmap, PVLL **lockp)
{
struct pv_chunks_list *pvc;
int bit, field;
@@ -4692,7 +4836,7 @@
* The given PV list lock may be released.
*/
static void
-reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
+reserve_pv_entries(pmap_t pmap, int needed, PVLL **lockp)
{
struct pv_chunks_list *pvc;
struct pch new_tail[PMAP_MEMDOM];
@@ -4795,7 +4939,7 @@
*/
static void
pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
- struct rwlock **lockp)
+ PVLL **lockp)
{
struct md_page *pvh;
struct pv_chunk *pc;
@@ -4807,7 +4951,6 @@
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_demote_pde: pa is not 2mpage aligned"));
- CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
/*
* Transfer the 2mpage's pv entry for this mapping to the first
@@ -4864,7 +5007,7 @@
*/
static void
pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
- struct rwlock **lockp)
+ PVLL **lockp)
{
struct md_page *pvh;
pv_entry_t pv;
@@ -4873,7 +5016,6 @@
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_promote_pde: pa is not 2mpage aligned"));
- CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
/*
* Transfer the first page's pv entry for this mapping to the 2mpage's
@@ -4920,7 +5062,7 @@
*/
static boolean_t
pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
- struct rwlock **lockp)
+ PVLL **lockp)
{
pv_entry_t pv;
@@ -4943,7 +5085,7 @@
*/
static bool
pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde, u_int flags,
- struct rwlock **lockp)
+ PVLL **lockp)
{
struct md_page *pvh;
pv_entry_t pv;
@@ -4984,13 +5126,13 @@
static boolean_t
pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
{
- struct rwlock *lock;
+ PVLL *lock;
boolean_t rv;
lock = NULL;
rv = pmap_demote_pde_locked(pmap, pde, va, &lock);
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
return (rv);
}
@@ -5023,7 +5165,7 @@
static void
pmap_demote_pde_abort(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
- pd_entry_t oldpde, struct rwlock **lockp)
+ pd_entry_t oldpde, PVLL **lockp)
{
struct spglist free;
vm_offset_t sva;
@@ -5040,7 +5182,7 @@
static boolean_t
pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
- struct rwlock **lockp)
+ PVLL **lockp)
{
pd_entry_t newpde, oldpde;
pt_entry_t *firstpte, newpte;
@@ -5174,8 +5316,11 @@
/*
* Demote the PV entry.
*/
- if ((oldpde & PG_MANAGED) != 0)
+ if ((oldpde & PG_MANAGED) != 0) {
+ pmap_pv_list_lock_pde(oldpde & PG_PS_FRAME, lockp);
pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME, lockp);
+ pmap_pv_list_unlock_pde(oldpde & PG_PS_FRAME, lockp);
+ }
atomic_add_long(&pmap_pde_demotions, 1);
CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#lx in pmap %p",
@@ -5228,7 +5373,7 @@
*/
static int
pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
- struct spglist *free, struct rwlock **lockp)
+ struct spglist *free, PVLL **lockp)
{
struct md_page *pvh;
pd_entry_t oldpde;
@@ -5289,7 +5434,7 @@
*/
static int
pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va,
- pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp)
+ pd_entry_t ptepde, struct spglist *free, PVLL **lockp)
{
struct md_page *pvh;
pt_entry_t oldpte, PG_A, PG_M, PG_RW;
@@ -5330,7 +5475,7 @@
pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
struct spglist *free)
{
- struct rwlock *lock;
+ PVLL *lock;
pt_entry_t *pte, PG_V;
PG_V = pmap_valid_bit(pmap);
@@ -5343,7 +5488,7 @@
lock = NULL;
pmap_remove_pte(pmap, pte, va, *pde, free, &lock);
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
pmap_invalidate_page(pmap, va);
}
@@ -5352,7 +5497,7 @@
*/
static bool
pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
- pd_entry_t *pde, struct spglist *free, struct rwlock **lockp)
+ pd_entry_t *pde, struct spglist *free, PVLL **lockp)
{
pt_entry_t PG_G, *pte;
vm_offset_t va;
@@ -5394,7 +5539,7 @@
void
pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
- struct rwlock *lock;
+ PVLL *lock;
vm_offset_t va_next;
pml4_entry_t *pml4e;
pdp_entry_t *pdpe;
@@ -5507,7 +5652,7 @@
anyvalid = 1;
}
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
out:
if (anyvalid)
pmap_invalidate_all(pmap);
@@ -5535,7 +5680,7 @@
struct md_page *pvh;
pv_entry_t pv;
pmap_t pmap;
- struct rwlock *lock;
+ PVLL *lock;
pt_entry_t *pte, tpte, PG_A, PG_M, PG_RW;
pd_entry_t *pde;
vm_offset_t va;
@@ -5549,16 +5694,16 @@
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
pa_to_pvh(VM_PAGE_TO_PHYS(m));
retry:
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen) {
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(pmap);
goto retry;
}
@@ -5573,11 +5718,11 @@
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
md_gen = m->md.pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(pmap);
goto retry;
}
@@ -5609,7 +5754,7 @@
PMAP_UNLOCK(pmap);
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
pmap_delayed_invl_wait(m);
vm_page_free_pages_toq(&free, true);
}
@@ -5825,8 +5970,7 @@
* identical characteristics.
*/
static void
-pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
- struct rwlock **lockp)
+pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, PVLL **lockp)
{
pd_entry_t newpde;
pt_entry_t *firstpte, oldpte, pa, *pte;
@@ -5928,8 +6072,11 @@
/*
* Promote the pv entries.
*/
- if ((newpde & PG_MANAGED) != 0)
+ if ((newpde & PG_MANAGED) != 0) {
+ pmap_pv_list_lock_pde(newpde & PG_PS_FRAME, lockp);
pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME, lockp);
+ pmap_pv_list_unlock_pde(newpde & PG_PS_FRAME, lockp);
+ }
/*
* Propagate the PAT index to its proper position.
@@ -5970,7 +6117,7 @@
pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
u_int flags, int8_t psind)
{
- struct rwlock *lock;
+ PVLL *lock;
pd_entry_t *pde;
pt_entry_t *pte, PG_G, PG_A, PG_M, PG_RW, PG_V;
pt_entry_t newpte, origpte;
@@ -6176,7 +6323,7 @@
pv = get_pv_entry(pmap, &lock);
pv->pv_va = va;
}
- CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
+ CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
m->md.pv_gen++;
if ((newpte & PG_RW) != 0)
@@ -6229,7 +6376,7 @@
rv = KERN_SUCCESS;
out:
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(pmap);
return (rv);
}
@@ -6243,7 +6390,7 @@
*/
static bool
pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
- struct rwlock **lockp)
+ PVLL **lockp)
{
pd_entry_t newpde;
pt_entry_t PG_V;
@@ -6294,7 +6441,7 @@
*/
static int
pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
- vm_page_t m, struct rwlock **lockp)
+ vm_page_t m, PVLL **lockp)
{
struct spglist free;
pd_entry_t oldpde, *pde;
@@ -6442,7 +6589,7 @@
pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
vm_page_t m_start, vm_prot_t prot)
{
- struct rwlock *lock;
+ PVLL *lock;
vm_offset_t va;
vm_page_t m, mpte;
vm_pindex_t diff, psize;
@@ -6467,7 +6614,7 @@
m = TAILQ_NEXT(m, listq);
}
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(pmap);
}
@@ -6483,19 +6630,19 @@
void
pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
{
- struct rwlock *lock;
+ PVLL *lock;
lock = NULL;
PMAP_LOCK(pmap);
(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(pmap);
}
static vm_page_t
pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
- vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
+ vm_prot_t prot, vm_page_t mpte, PVLL **lockp)
{
pt_entry_t newpte, *pte, PG_V;
@@ -6789,7 +6936,7 @@
pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
vm_offset_t src_addr)
{
- struct rwlock *lock;
+ PVLL *lock;
pml4_entry_t *pml4e;
pdp_entry_t *pdpe;
pd_entry_t *pde, srcptepaddr;
@@ -6926,7 +7073,7 @@
}
out:
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(src_pmap);
PMAP_UNLOCK(dst_pmap);
}
@@ -7041,7 +7188,7 @@
pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
{
struct md_page *pvh;
- struct rwlock *lock;
+ PVLL *lock;
pv_entry_t pv;
int loops = 0;
boolean_t rv;
@@ -7050,7 +7197,7 @@
("pmap_page_exists_quick: page %p is not managed", m));
rv = FALSE;
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
if (PV_PMAP(pv) == pmap) {
rv = TRUE;
@@ -7072,7 +7219,7 @@
break;
}
}
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
return (rv);
}
@@ -7085,7 +7232,7 @@
int
pmap_page_wired_mappings(vm_page_t m)
{
- struct rwlock *lock;
+ PVLL *lock;
struct md_page *pvh;
pmap_t pmap;
pt_entry_t *pte;
@@ -7095,16 +7242,16 @@
if ((m->oflags & VPO_UNMANAGED) != 0)
return (0);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
restart:
count = 0;
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
if (md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
@@ -7122,9 +7269,9 @@
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
pvh_gen = pvh->pv_gen;
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
if (md_gen != m->md.pv_gen ||
pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
@@ -7137,7 +7284,7 @@
PMAP_UNLOCK(pmap);
}
}
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
return (count);
}
@@ -7148,17 +7295,17 @@
boolean_t
pmap_page_is_mapped(vm_page_t m)
{
- struct rwlock *lock;
+ PVLL *lock;
boolean_t rv;
if ((m->oflags & VPO_UNMANAGED) != 0)
return (FALSE);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
rv = !TAILQ_EMPTY(&m->md.pv_list) ||
((m->flags & PG_FICTITIOUS) == 0 &&
!TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list));
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
return (rv);
}
@@ -7199,7 +7346,7 @@
pv_entry_t pv;
struct md_page *pvh;
struct pv_chunk *pc, *npc;
- struct rwlock *lock;
+ PVLL *lock;
int64_t bit;
uint64_t inuse, bitmask;
int allfree, field, freed, i, idx;
@@ -7363,7 +7510,7 @@
}
}
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
pmap_invalidate_all(pmap);
pmap_pkru_deassign_all(pmap);
free_pv_chunk_batch((struct pv_chunklist *)&free_chunks);
@@ -7374,7 +7521,7 @@
static boolean_t
pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
{
- struct rwlock *lock;
+ PVLL *lock;
pv_entry_t pv;
struct md_page *pvh;
pt_entry_t *pte, mask;
@@ -7385,15 +7532,15 @@
rv = FALSE;
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
restart:
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
if (md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
@@ -7423,9 +7570,9 @@
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
pvh_gen = pvh->pv_gen;
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_rlock(lock);
+ pmap_pv_list_lock(lock);
if (md_gen != m->md.pv_gen ||
pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
@@ -7451,7 +7598,7 @@
}
}
out:
- rw_runlock(lock);
+ pmap_pv_list_unlock(lock);
return (rv);
}
@@ -7524,7 +7671,7 @@
{
struct md_page *pvh;
pmap_t pmap;
- struct rwlock *lock;
+ PVLL *lock;
pv_entry_t next_pv, pv;
pd_entry_t *pde;
pt_entry_t oldpte, *pte, PG_M, PG_RW;
@@ -7542,17 +7689,17 @@
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
pa_to_pvh(VM_PAGE_TO_PHYS(m));
retry_pv_loop:
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
goto retry_pv_loop;
}
}
@@ -7571,13 +7718,13 @@
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
md_gen = m->md.pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen ||
md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
goto retry_pv_loop;
}
}
@@ -7600,7 +7747,7 @@
}
PMAP_UNLOCK(pmap);
}
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
vm_page_aflag_clear(m, PGA_WRITEABLE);
pmap_delayed_invl_wait(m);
}
@@ -7658,7 +7805,7 @@
struct md_page *pvh;
pv_entry_t pv, pvf;
pmap_t pmap;
- struct rwlock *lock;
+ PVLL *lock;
pd_entry_t oldpde, *pde;
pt_entry_t *pte, PG_A, PG_M, PG_RW;
vm_offset_t va;
@@ -7674,7 +7821,7 @@
pa = VM_PAGE_TO_PHYS(m);
lock = PHYS_TO_PV_LIST_LOCK(pa);
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
retry:
not_cleared = 0;
if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL)
@@ -7686,9 +7833,9 @@
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
goto retry;
@@ -7792,9 +7939,9 @@
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
md_gen = m->md.pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto retry;
@@ -7845,7 +7992,7 @@
} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
not_cleared < PMAP_TS_REFERENCED_MAX);
out:
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
vm_page_free_pages_toq(&free, true);
return (cleared + not_cleared);
}
@@ -7858,7 +8005,7 @@
void
pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
{
- struct rwlock *lock;
+ PVLL *lock;
pml4_entry_t *pml4e;
pdp_entry_t *pdpe;
pd_entry_t oldpde, *pde;
@@ -7915,7 +8062,7 @@
lock = NULL;
if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) {
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
/*
* The large page mapping was destroyed.
@@ -7947,7 +8094,7 @@
anychanged = true;
}
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
}
if (va_next > eva)
va_next = eva;
@@ -8004,7 +8151,7 @@
pv_entry_t next_pv, pv;
pd_entry_t oldpde, *pde;
pt_entry_t *pte, PG_M, PG_RW;
- struct rwlock *lock;
+ PVLL *lock;
vm_offset_t va;
int md_gen, pvh_gen;
@@ -8017,15 +8164,15 @@
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
pa_to_pvh(VM_PAGE_TO_PHYS(m));
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
restart:
TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
pmap = PV_PMAP(pv);
if (!PMAP_TRYLOCK(pmap)) {
pvh_gen = pvh->pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
@@ -8057,9 +8204,9 @@
if (!PMAP_TRYLOCK(pmap)) {
md_gen = m->md.pv_gen;
pvh_gen = pvh->pv_gen;
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_LOCK(pmap);
- rw_wlock(lock);
+ pmap_pv_list_lock(lock);
if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
PMAP_UNLOCK(pmap);
goto restart;
@@ -8077,7 +8224,7 @@
}
PMAP_UNLOCK(pmap);
}
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
}
/*
@@ -9026,7 +9173,7 @@
pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t va, int ftype)
{
int rv;
- struct rwlock *lock;
+ PVLL *lock;
#if VM_NRESERVLEVEL > 0
vm_page_t m, mpte;
#endif
@@ -9113,7 +9260,7 @@
rv = 0; /* success */
done:
if (lock != NULL)
- rw_wunlock(lock);
+ pmap_pv_list_unlock(lock);
PMAP_UNLOCK(pmap);
return (rv);
}
Index: sys/amd64/include/pmap.h
===================================================================
--- sys/amd64/include/pmap.h
+++ sys/amd64/include/pmap.h
@@ -312,7 +312,9 @@
struct md_page {
TAILQ_HEAD(, pv_entry) pv_list; /* (p) */
int pv_gen; /* (p) */
- int pat_mode;
+ uint8_t pv_lock;
+ uint8_t pat_mode;
+ uint8_t pad0[2];
};
enum pmap_type {
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -3742,6 +3742,7 @@
kern/kern_mtxpool.c standard
kern/kern_mutex.c standard
kern/kern_ntptime.c standard
+kern/kern_obm.c standard
kern/kern_osd.c standard
kern/kern_physio.c standard
kern/kern_pmc.c standard
Index: sys/kern/kern_obm.c
===================================================================
--- /dev/null
+++ sys/kern/kern_obm.c
@@ -0,0 +1,132 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/obm.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/turnstile.h>
+#include <machine/atomic.h>
+
+static SYSCTL_NODE(_debug, OID_AUTO, obm, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+ "");
+static u_long obm_slow_lock;
+SYSCTL_LONG(_debug_obm, OID_AUTO, slow_lock, CTLFLAG_RD,
+ &obm_slow_lock, 0,
+ "");
+static u_long obm_slow_unlock;
+SYSCTL_LONG(_debug_obm, OID_AUTO, slow_unlock, CTLFLAG_RD,
+ &obm_slow_unlock, 0,
+ "");
+
+void
+obm_init_lo(struct lock_object *lo, const char *name)
+{
+ bzero(lo, sizeof(*lo));
+ lo->lo_name = name;
+}
+
+void
+obm_init(uint8_t *byte)
+{
+ *byte = OBM_UNLOCKED;
+}
+
+bool
+obm_trylock(uint8_t *byte)
+{
+ return (atomic_cmpset_acq_char(byte, OBM_UNLOCKED, OBM_LOCKED) != 0);
+}
+
+void
+obm_lock(uint8_t *byte, struct lock_object *lo)
+{
+ struct turnstile *ts;
+ struct lock_delay_arg lda;
+ uint8_t v;
+
+ v = OBM_UNLOCKED;
+ if (atomic_fcmpset_acq_char(byte, &v, OBM_LOCKED))
+ return;
+
+ atomic_add_long(&obm_slow_lock, 1);
+ lock_delay_arg_init(&lda, &locks_delay);
+ for (;;) {
+ lock_delay(&lda);
+reload:
+ v = atomic_load_char(byte);
+ if (v == OBM_UNLOCKED) {
+ if (atomic_fcmpset_acq_char(byte, &v, OBM_LOCKED))
+ break;
+ continue;
+ }
+
+ ts = turnstile_trywait(lo);
+ v = atomic_load_char(byte);
+cancel_turnstile:
+ if (v == OBM_UNLOCKED) {
+ turnstile_cancel(ts);
+ continue;
+ }
+ if ((v & OBM_CONTESTED) == 0 &&
+ atomic_fcmpset_char(byte, &v, v | OBM_CONTESTED) == 0)
+ goto cancel_turnstile;
+ turnstile_wait(ts, NULL, TS_SHARED_QUEUE);
+ goto reload;
+ }
+}
+
+void
+obm_unlock(uint8_t *byte, struct lock_object *lo)
+{
+ struct turnstile *ts;
+ uint8_t v;
+
+ v = OBM_LOCKED;
+ if (atomic_fcmpset_rel_char(byte, &v, OBM_UNLOCKED))
+ return;
+ MPASS(v == OBM_LOCKED || v == (OBM_LOCKED | OBM_CONTESTED));
+ atomic_add_long(&obm_slow_unlock, 1);
+ turnstile_chain_lock(lo);
+ atomic_store_rel_char(byte, OBM_UNLOCKED);
+ ts = turnstile_lookup(lo);
+ if (ts != NULL) {
+ turnstile_broadcast(ts, TS_SHARED_QUEUE);
+ turnstile_unpend(ts);
+ }
+ turnstile_chain_unlock(lo);
+}
+
Index: sys/sys/obm.h
===================================================================
--- /dev/null
+++ sys/sys/obm.h
@@ -0,0 +1,66 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_OBM_H
+#define _SYS_OBM_H
+
+/* One-Byte Lock */
+
+#ifdef _KERNEL
+
+#include <sys/systm.h>
+#include <sys/_lock.h>
+#include <machine/atomic.h>
+
+#define OBM_UNLOCKED 0x00
+#define OBM_LOCKED 0x02
+#define OBM_CONTESTED 0x01
+
+void obm_init_lo(struct lock_object *lo, const char *name);
+void obm_init(uint8_t *byte);
+void obm_lock(uint8_t *byte, struct lock_object *lo);
+void obm_unlock(uint8_t *byte, struct lock_object *lo);
+bool obm_trylock(uint8_t *byte);
+
+__used static void
+obm_assert_locked(uint8_t *byte)
+{
+#ifdef INVARIANTS
+ uint8_t v;
+
+ v = atomic_load_char(byte);
+ MPASS(v == OBM_LOCKED || v == (OBM_LOCKED | OBM_CONTESTED));
+#endif
+}
+#endif
+#endif

File Metadata

Mime Type
text/plain
Expires
Sat, May 23, 12:28 PM (19 h, 55 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33446415
Default Alt Text
D24217.id70002.diff (39 KB)

Event Timeline