Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -121,6 +121,7 @@ #include #include #include +#include #include #include #include @@ -331,50 +332,51 @@ #ifdef NUMA #define pa_to_pmdp(pa) (&pv_table[pa_index(pa)]) #define pa_to_pvh(pa) (&(pa_to_pmdp(pa)->pv_page)) -#define PHYS_TO_PV_LIST_LOCK(pa) ({ \ - struct rwlock *_lock; \ - if (__predict_false((pa) > pmap_last_pa)) \ - _lock = &pv_dummy_large.pv_lock; \ - else \ - _lock = &(pa_to_pmdp(pa)->pv_lock); \ - _lock; \ -}) #else #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) #define NPV_LIST_LOCKS MAXCPU -#define PHYS_TO_PV_LIST_LOCK(pa) \ - (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) #endif +#define PHYS_TO_PV_LIST_LOCK(pa) PHYS_TO_VM_PAGE(pa) + #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ - struct rwlock **_lockp = (lockp); \ - struct rwlock *_new_lock; \ + PVLL **_lockp = (lockp); \ + PVLL *_new_lock; \ \ _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ if (_new_lock != *_lockp) { \ if (*_lockp != NULL) \ - rw_wunlock(*_lockp); \ + pmap_pv_list_unlock(*_lockp); \ + if (_new_lock == NULL) \ + _new_lock = &pv_fake_page; \ *_lockp = _new_lock; \ - rw_wlock(*_lockp); \ + pmap_pv_list_lock(*_lockp); \ } \ } while (0) -#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ - CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) +#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) do { \ + PVLL **_lockp = (lockp); \ + \ + if (m != *_lockp) { \ + if (*_lockp != NULL) \ + pmap_pv_list_unlock(*_lockp); \ + *_lockp = m; \ + pmap_pv_list_lock(m); \ + } \ +} while (0) #define RELEASE_PV_LIST_LOCK(lockp) do { \ - struct rwlock **_lockp = (lockp); \ + PVLL **_lockp = (lockp); \ \ if (*_lockp != NULL) { \ - rw_wunlock(*_lockp); \ + pmap_pv_list_unlock(*_lockp); \ *_lockp = NULL; \ } \ } while (0) -#define VM_PAGE_TO_PV_LIST_LOCK(m) \ - PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) +#define VM_PAGE_TO_PV_LIST_LOCK(m) (m) struct pmap kernel_pmap_store; @@ -451,22 +453,168 @@ struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM]; +typedef struct vm_page PVLL; + #ifdef NUMA struct pmap_large_md_page { - struct rwlock pv_lock; - struct md_page pv_page; + struct lock_object lo; + uintptr_t pad; + struct md_page pv_page; + struct md_page pv_page; u_long pv_invl_gen; }; +/* + * We strongly depend on the size being a power of two, so the assert + * is overzealous. However, should the struct be resized to a + * different power of two, the code below needs to be revisited. + */ +_Static_assert(sizeof(struct pmap_large_md_page) == 64, "pmap_large_md_page"); + __exclusive_cache_line static struct pmap_large_md_page pv_dummy_large; #define pv_dummy pv_dummy_large.pv_page __read_mostly static struct pmap_large_md_page *pv_table; __read_mostly vm_paddr_t pmap_last_pa; +static struct lock_object * +pv_list_lock_object(vm_paddr_t pa) +{ + if (__predict_false((pa) > pmap_last_pa)) \ + return (&pv_dummy_large.lo); + return (&(pa_to_pmdp(pa)->lo)); +} #else -static struct rwlock __exclusive_cache_line pv_list_locks[NPV_LIST_LOCKS]; +static struct lock_object __exclusive_cache_line pv_lo[NPV_LIST_LOCKS]; static u_long pv_invl_gen[NPV_LIST_LOCKS]; static struct md_page *pv_table; static struct md_page pv_dummy; +static struct lock_object * +pv_list_lock_object(vm_paddr_t pa) +{ + return (&pv_lo[pa_index(pa) % NPV_LIST_LOCKS]); +} #endif +__read_mostly static struct vm_page pv_fake_page; + +static void +pmap_pv_list_lock(vm_page_t m) +{ + obm_lock(&m->md.pv_lock, pv_list_lock_object(VM_PAGE_TO_PHYS(m))); +} + +static void +pmap_pv_list_unlock(vm_page_t m) +{ + obm_unlock(&m->md.pv_lock, pv_list_lock_object(VM_PAGE_TO_PHYS(m))); +} + +/* + * Locks all pv lists for 4k pages constituing the superpage that + * contains the passed page. The page's pv list must be locked. + * Returns false if trylock failed and the page's pv list was unlocked + * in the process, which typically means that the caller must restart. + */ +static bool +pmap_pv_list_lock_pde1(vm_page_t m, bool pte_locked) +{ + vm_page_t mt, sm; + struct lock_object *lo; + int i; + bool ret; + + obm_assert_locked(&m->md.pv_lock); + + sm = m - ((VM_PAGE_TO_PHYS(m) - (VM_PAGE_TO_PHYS(m) & + PG_PS_FRAME)) >> PAGE_SHIFT); + lo = pv_list_lock_object(VM_PAGE_TO_PHYS(m)); + + if (!pte_locked) { + ret = true; + goto alllocks; + } + + /* + * Fast attempt. If we either own or can get the pv list lock + * of the first page in the superpage, all other owners must + * release their locks without waiting for us. + */ + if (m == sm || obm_trylock(&sm->md.pv_lock)) { + for (i = 1, mt = sm + 1; i < NPDEPG; i++, mt++) { + if (m != mt) + obm_lock(&mt->md.pv_lock, lo); + } + return (true); + } + + obm_unlock(&m->md.pv_lock, lo); + ret = false; +alllocks: + for (i = 0, mt = sm; i < NPDEPG; i++, mt++) { + obm_lock(&mt->md.pv_lock, lo); + } + return (ret); +} + +/* + * If *lockp points to one of the ordinary pages from the superpage we + * are demoting or promoting, then we keep this page' pv list locked + * after pmap_pv_list_unlock_pde(). Otherwise, we just unlock whatever + * was locked, and unlock all run on pmap_pv_list_unlock_pde(). + */ +static void +pmap_pv_list_lock_pde(vm_paddr_t pa, PVLL **lockp) +{ + vm_page_t m; + + m = PHYS_TO_VM_PAGE(pa); + if (m == NULL) + m = &pv_fake_page; + + if (*lockp == NULL) { + pmap_pv_list_lock_pde1(m, false); + return; + } + if ((VM_PAGE_TO_PHYS(*lockp) & PG_PS_FRAME) != (pa & PG_PS_FRAME)) { + pmap_pv_list_unlock(*lockp); + *lockp = NULL; + pmap_pv_list_lock_pde1(m, false); + return; + } + pmap_pv_list_lock_pde1(*lockp, true); +} + +static void +pmap_pv_list_unlock_pde1(vm_page_t m, bool pte_locked) +{ + vm_page_t mt, sm; + struct lock_object *lo; + int i; + + sm = m - ((VM_PAGE_TO_PHYS(m) - (VM_PAGE_TO_PHYS(m) & + PG_PS_FRAME)) >> PAGE_SHIFT); + lo = pv_list_lock_object(VM_PAGE_TO_PHYS(m)); + obm_assert_locked(&m->md.pv_lock); + obm_assert_locked(&sm->md.pv_lock); + + for (i = 0, mt = sm; i < NPDEPG; i++, mt++) { + if (!pte_locked || mt != m) + obm_unlock(&mt->md.pv_lock, lo); + } +} + +static void +pmap_pv_list_unlock_pde(vm_paddr_t pa, PVLL **lockp) +{ + vm_page_t m; + bool pte_locked; + + m = *lockp; + pte_locked = m != NULL; + if (!pte_locked) { + m = PHYS_TO_VM_PAGE(pa); + if (m == NULL) + m = &pv_fake_page; + } + pmap_pv_list_unlock_pde1(m, pte_locked); +} /* * All those kernel PT submaps that BSD is so fond of @@ -1152,7 +1300,7 @@ { u_long gen, *m_gen; - rw_assert(VM_PAGE_TO_PV_LIST_LOCK(m), RA_WLOCKED); +//XXXKIB rw_assert(VM_PAGE_TO_PV_LIST_LOCK(m), RA_WLOCKED); gen = curthread->td_md.md_invl_gen.gen; if (gen == 0) return; @@ -1185,37 +1333,36 @@ static void free_pv_chunk(struct pv_chunk *pc); static void free_pv_chunk_batch(struct pv_chunklist *batch); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); -static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); +static pv_entry_t get_pv_entry(pmap_t pmap, PVLL **lockp); static int popcnt_pc_map_pq(uint64_t *map); -static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); -static void reserve_pv_entries(pmap_t pmap, int needed, - struct rwlock **lockp); +static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, PVLL **lockp); +static void reserve_pv_entries(pmap_t pmap, int needed, PVLL **lockp); static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, - struct rwlock **lockp); + PVLL **lockp); static bool pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde, - u_int flags, struct rwlock **lockp); + u_int flags, PVLL **lockp); #if VM_NRESERVLEVEL > 0 static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, - struct rwlock **lockp); + PVLL **lockp); #endif static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, - vm_offset_t va); + vm_offset_t va); static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte); static int pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, int mode, int flags); static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static boolean_t pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, - vm_offset_t va, struct rwlock **lockp); + vm_offset_t va, PVLL **lockp); static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va); static bool pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, - vm_prot_t prot, struct rwlock **lockp); + vm_prot_t prot, PVLL **lockp); static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, - u_int flags, vm_page_t m, struct rwlock **lockp); + u_int flags, vm_page_t m, PVLL **lockp); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, - vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); + vm_page_t m, vm_prot_t prot, vm_page_t mpte, PVLL **lockp); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted); static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, @@ -1223,13 +1370,13 @@ static void pmap_invalidate_cache_range_all(vm_offset_t sva, vm_offset_t eva); static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, - pd_entry_t pde); + pd_entry_t pde); static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static vm_page_t pmap_large_map_getptp_unlocked(void); static vm_paddr_t pmap_large_map_kextract(vm_offset_t va); #if VM_NRESERVLEVEL > 0 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, - struct rwlock **lockp); + PVLL **lockp); #endif static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot); @@ -1240,27 +1387,26 @@ static pd_entry_t *pmap_pti_pde(vm_offset_t va); static void pmap_pti_wire_pte(void *pte); static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, - struct spglist *free, struct rwlock **lockp); + struct spglist *free, PVLL **lockp); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, - pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); + pd_entry_t ptepde, struct spglist *free, PVLL **lockp); static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, struct spglist *free); static bool pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, - pd_entry_t *pde, struct spglist *free, - struct rwlock **lockp); + pd_entry_t *pde, struct spglist *free, PVLL **lockp); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, - vm_page_t m, struct rwlock **lockp); + vm_page_t m, PVLL **lockp); static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde); static void pmap_update_pde_invalidate(pmap_t, vm_offset_t va, pd_entry_t pde); static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, - struct rwlock **lockp); + PVLL **lockp); static pd_entry_t *pmap_alloc_pde(pmap_t pmap, vm_offset_t va, vm_page_t *pdpgp, - struct rwlock **lockp); + PVLL **lockp); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, - struct rwlock **lockp); + PVLL **lockp); static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free); @@ -1895,6 +2041,7 @@ TAILQ_INIT(&m->md.pv_list); m->md.pat_mode = PAT_WRITE_BACK; + obm_init(&m->md.pv_lock); } static int pmap_allow_2m_x_ept; @@ -1951,13 +2098,6 @@ long start, end, highest, pv_npg; int domain, i, j, pages; - /* - * We strongly depend on the size being a power of two, so the assert - * is overzealous. However, should the struct be resized to a - * different power of two, the code below needs to be revisited. - */ - CTASSERT((sizeof(*pvd) == 64)); - /* * Calculate the size of the array. */ @@ -1992,12 +2132,13 @@ vm_page_t m = vm_page_alloc_domain(NULL, 0, domain, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ); if (m == NULL) - panic("vm_page_alloc_domain failed for %lx\n", (vm_offset_t)pvd + j); + panic("vm_page_alloc_domain failed for %lx\n", + (vm_offset_t)pvd + j); pmap_qenter((vm_offset_t)pvd + j, &m, 1); } for (j = 0; j < s / sizeof(*pvd); j++) { - rw_init_flags(&pvd->pv_lock, "pmap pv list", RW_NEW); + obm_init_lo(&pvd->lo, "pmap pv list"); TAILQ_INIT(&pvd->pv_page.pv_list); pvd->pv_page.pv_gen = 0; pvd->pv_page.pat_mode = 0; @@ -2006,8 +2147,10 @@ } } pvd = &pv_dummy_large; - rw_init_flags(&pvd->pv_lock, "pmap pv list dummy", RW_NEW); + obm_init_lo(&pvd->lo, "pmap pv list dummy"); TAILQ_INIT(&pvd->pv_page.pv_list); + pmap_page_init(&pv_fake_page); + pv_fake_page.phys_addr = pmap_last_pa + PAGE_SIZE; pvd->pv_page.pv_gen = 0; pvd->pv_page.pat_mode = 0; pvd->pv_invl_gen = 0; @@ -2023,7 +2166,7 @@ * Initialize the pool of pv list locks. */ for (i = 0; i < NPV_LIST_LOCKS; i++) - rw_init(&pv_list_locks[i], "pmap pv list"); + obm_init_lo(&pv_lo[i], "pmap pv list"); /* * Calculate the size of the pv head table for superpages. @@ -2039,6 +2182,8 @@ for (i = 0; i < pv_npg; i++) TAILQ_INIT(&pv_table[i].pv_list); TAILQ_INIT(&pv_dummy.pv_list); + pmap_page_init(&pv_fake_page); + pv_fake_page.phys_addr = vm_phys_segs[vm_phys_nsegs - 1].end + PAGE_SIZE; } #endif @@ -3782,7 +3927,7 @@ * race conditions. */ static vm_page_t -_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) +_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, PVLL **lockp) { vm_page_t m, pdppg, pdpg; pt_entry_t PG_A, PG_M, PG_RW, PG_V; @@ -3929,7 +4074,7 @@ static pd_entry_t * pmap_alloc_pde(pmap_t pmap, vm_offset_t va, vm_page_t *pdpgp, - struct rwlock **lockp) + PVLL **lockp) { pdp_entry_t *pdpe, PG_V; pd_entry_t *pde; @@ -3968,7 +4113,7 @@ } static vm_page_t -pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) +pmap_allocpte(pmap_t pmap, vm_offset_t va, PVLL **lockp) { vm_pindex_t ptepindex; pd_entry_t *pd, PG_V; @@ -4283,7 +4428,7 @@ * exacerbating the shortage of free pv entries. */ static vm_page_t -reclaim_pv_chunk_domain(pmap_t locked_pmap, struct rwlock **lockp, int domain) +reclaim_pv_chunk_domain(pmap_t locked_pmap, PVLL **lockp, int domain) { struct pv_chunks_list *pvc; struct pv_chunk *pc, *pc_marker, *pc_marker_end; @@ -4483,7 +4628,7 @@ } static vm_page_t -reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) +reclaim_pv_chunk(pmap_t locked_pmap, PVLL **lockp) { vm_page_t m; int i, domain; @@ -4591,7 +4736,7 @@ * The given PV list lock may be released. */ static pv_entry_t -get_pv_entry(pmap_t pmap, struct rwlock **lockp) +get_pv_entry(pmap_t pmap, PVLL **lockp) { struct pv_chunks_list *pvc; int bit, field; @@ -4692,7 +4837,7 @@ * The given PV list lock may be released. */ static void -reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) +reserve_pv_entries(pmap_t pmap, int needed, PVLL **lockp) { struct pv_chunks_list *pvc; struct pch new_tail[PMAP_MEMDOM]; @@ -4795,7 +4940,7 @@ */ static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, - struct rwlock **lockp) + PVLL **lockp) { struct md_page *pvh; struct pv_chunk *pc; @@ -4807,7 +4952,6 @@ PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_demote_pde: pa is not 2mpage aligned")); - CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); /* * Transfer the 2mpage's pv entry for this mapping to the first @@ -4864,7 +5008,7 @@ */ static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, - struct rwlock **lockp) + PVLL **lockp) { struct md_page *pvh; pv_entry_t pv; @@ -4873,7 +5017,6 @@ KASSERT((pa & PDRMASK) == 0, ("pmap_pv_promote_pde: pa is not 2mpage aligned")); - CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); /* * Transfer the first page's pv entry for this mapping to the 2mpage's @@ -4920,7 +5063,7 @@ */ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, - struct rwlock **lockp) + PVLL **lockp) { pv_entry_t pv; @@ -4943,7 +5086,7 @@ */ static bool pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde, u_int flags, - struct rwlock **lockp) + PVLL **lockp) { struct md_page *pvh; pv_entry_t pv; @@ -4984,13 +5127,13 @@ static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { - struct rwlock *lock; + PVLL *lock; boolean_t rv; lock = NULL; rv = pmap_demote_pde_locked(pmap, pde, va, &lock); if (lock != NULL) - rw_wunlock(lock); + pmap_pv_list_unlock(lock); return (rv); } @@ -5023,7 +5166,7 @@ static void pmap_demote_pde_abort(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, - pd_entry_t oldpde, struct rwlock **lockp) + pd_entry_t oldpde, PVLL **lockp) { struct spglist free; vm_offset_t sva; @@ -5040,7 +5183,7 @@ static boolean_t pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, - struct rwlock **lockp) + PVLL **lockp) { pd_entry_t newpde, oldpde; pt_entry_t *firstpte, newpte; @@ -5174,8 +5317,11 @@ /* * Demote the PV entry. */ - if ((oldpde & PG_MANAGED) != 0) + if ((oldpde & PG_MANAGED) != 0) { + pmap_pv_list_lock_pde(oldpde & PG_PS_FRAME, lockp); pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME, lockp); + pmap_pv_list_unlock_pde(oldpde & PG_PS_FRAME, lockp); + } atomic_add_long(&pmap_pde_demotions, 1); CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#lx in pmap %p", @@ -5228,7 +5374,7 @@ */ static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, - struct spglist *free, struct rwlock **lockp) + struct spglist *free, PVLL **lockp) { struct md_page *pvh; pd_entry_t oldpde; @@ -5289,7 +5435,7 @@ */ static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, - pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp) + pd_entry_t ptepde, struct spglist *free, PVLL **lockp) { struct md_page *pvh; pt_entry_t oldpte, PG_A, PG_M, PG_RW; @@ -5330,7 +5476,7 @@ pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, struct spglist *free) { - struct rwlock *lock; + PVLL *lock; pt_entry_t *pte, PG_V; PG_V = pmap_valid_bit(pmap); @@ -5343,7 +5489,7 @@ lock = NULL; pmap_remove_pte(pmap, pte, va, *pde, free, &lock); if (lock != NULL) - rw_wunlock(lock); + pmap_pv_list_unlock(lock); pmap_invalidate_page(pmap, va); } @@ -5352,7 +5498,7 @@ */ static bool pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, - pd_entry_t *pde, struct spglist *free, struct rwlock **lockp) + pd_entry_t *pde, struct spglist *free, PVLL **lockp) { pt_entry_t PG_G, *pte; vm_offset_t va; @@ -5394,7 +5540,7 @@ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - struct rwlock *lock; + PVLL *lock; vm_offset_t va_next; pml4_entry_t *pml4e; pdp_entry_t *pdpe; @@ -5507,7 +5653,7 @@ anyvalid = 1; } if (lock != NULL) - rw_wunlock(lock); + pmap_pv_list_unlock(lock); out: if (anyvalid) pmap_invalidate_all(pmap); @@ -5535,7 +5681,7 @@ struct md_page *pvh; pv_entry_t pv; pmap_t pmap; - struct rwlock *lock; + PVLL *lock; pt_entry_t *pte, tpte, PG_A, PG_M, PG_RW; pd_entry_t *pde; vm_offset_t va; @@ -5549,16 +5695,16 @@ pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); retry: - rw_wlock(lock); + pmap_pv_list_lock(lock); while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; - rw_wunlock(lock); + pmap_pv_list_unlock(lock); PMAP_LOCK(pmap); - rw_wlock(lock); + pmap_pv_list_lock(lock); if (pvh_gen != pvh->pv_gen) { - rw_wunlock(lock); + pmap_pv_list_unlock(lock); PMAP_UNLOCK(pmap); goto retry; } @@ -5573,11 +5719,11 @@ if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; - rw_wunlock(lock); + pmap_pv_list_unlock(lock); PMAP_LOCK(pmap); - rw_wlock(lock); + pmap_pv_list_lock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { - rw_wunlock(lock); + pmap_pv_list_unlock(lock); PMAP_UNLOCK(pmap); goto retry; } @@ -5609,7 +5755,7 @@ PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); - rw_wunlock(lock); + pmap_pv_list_unlock(lock); pmap_delayed_invl_wait(m); vm_page_free_pages_toq(&free, true); } @@ -5825,8 +5971,7 @@ * identical characteristics. */ static void -pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, - struct rwlock **lockp) +pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, PVLL **lockp) { pd_entry_t newpde; pt_entry_t *firstpte, oldpte, pa, *pte; @@ -5928,8 +6073,11 @@ /* * Promote the pv entries. */ - if ((newpde & PG_MANAGED) != 0) + if ((newpde & PG_MANAGED) != 0) { + pmap_pv_list_lock_pde(newpde & PG_PS_FRAME, lockp); pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME, lockp); + pmap_pv_list_unlock_pde(newpde & PG_PS_FRAME, lockp); + } /* * Propagate the PAT index to its proper position. @@ -5970,7 +6118,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind) { - struct rwlock *lock; + PVLL *lock; pd_entry_t *pde; pt_entry_t *pte, PG_G, PG_A, PG_M, PG_RW, PG_V; pt_entry_t newpte, origpte; @@ -6176,7 +6324,7 @@ pv = get_pv_entry(pmap, &lock); pv->pv_va = va; } - CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); + CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if ((newpte & PG_RW) != 0) @@ -6229,7 +6377,7 @@ rv = KERN_SUCCESS; out: if (lock != NULL) - rw_wunlock(lock); + pmap_pv_list_unlock(lock); PMAP_UNLOCK(pmap); return (rv); } @@ -6243,7 +6391,7 @@ */ static bool pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, - struct rwlock **lockp) + PVLL **lockp) { pd_entry_t newpde; pt_entry_t PG_V; @@ -6294,7 +6442,7 @@ */ static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, - vm_page_t m, struct rwlock **lockp) + vm_page_t m, PVLL **lockp) { struct spglist free; pd_entry_t oldpde, *pde; @@ -6442,7 +6590,7 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { - struct rwlock *lock; + PVLL *lock; vm_offset_t va; vm_page_t m, mpte; vm_pindex_t diff, psize; @@ -6467,7 +6615,7 @@ m = TAILQ_NEXT(m, listq); } if (lock != NULL) - rw_wunlock(lock); + pmap_pv_list_unlock(lock); PMAP_UNLOCK(pmap); } @@ -6483,19 +6631,19 @@ void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { - struct rwlock *lock; + PVLL *lock; lock = NULL; PMAP_LOCK(pmap); (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); if (lock != NULL) - rw_wunlock(lock); + pmap_pv_list_unlock(lock); PMAP_UNLOCK(pmap); } static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, - vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) + vm_prot_t prot, vm_page_t mpte, PVLL **lockp) { pt_entry_t newpte, *pte, PG_V; @@ -6789,7 +6937,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { - struct rwlock *lock; + PVLL *lock; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t *pde, srcptepaddr; @@ -6926,7 +7074,7 @@ } out: if (lock != NULL) - rw_wunlock(lock); + pmap_pv_list_unlock(lock); PMAP_UNLOCK(src_pmap); PMAP_UNLOCK(dst_pmap); } @@ -7041,7 +7189,7 @@ pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { struct md_page *pvh; - struct rwlock *lock; + PVLL *lock; pv_entry_t pv; int loops = 0; boolean_t rv; @@ -7050,7 +7198,7 @@ ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; lock = VM_PAGE_TO_PV_LIST_LOCK(m); - rw_rlock(lock); + pmap_pv_list_lock(lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; @@ -7072,7 +7220,7 @@ break; } } - rw_runlock(lock); + pmap_pv_list_unlock(lock); return (rv); } @@ -7085,7 +7233,7 @@ int pmap_page_wired_mappings(vm_page_t m) { - struct rwlock *lock; + PVLL *lock; struct md_page *pvh; pmap_t pmap; pt_entry_t *pte; @@ -7095,16 +7243,16 @@ if ((m->oflags & VPO_UNMANAGED) != 0) return (0); lock = VM_PAGE_TO_PV_LIST_LOCK(m); - rw_rlock(lock); + pmap_pv_list_lock(lock); restart: count = 0; TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; - rw_runlock(lock); + pmap_pv_list_unlock(lock); PMAP_LOCK(pmap); - rw_rlock(lock); + pmap_pv_list_lock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; @@ -7122,9 +7270,9 @@ if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; pvh_gen = pvh->pv_gen; - rw_runlock(lock); + pmap_pv_list_unlock(lock); PMAP_LOCK(pmap); - rw_rlock(lock); + pmap_pv_list_lock(lock); if (md_gen != m->md.pv_gen || pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); @@ -7137,7 +7285,7 @@ PMAP_UNLOCK(pmap); } } - rw_runlock(lock); + pmap_pv_list_unlock(lock); return (count); } @@ -7148,17 +7296,17 @@ boolean_t pmap_page_is_mapped(vm_page_t m) { - struct rwlock *lock; + PVLL *lock; boolean_t rv; if ((m->oflags & VPO_UNMANAGED) != 0) return (FALSE); lock = VM_PAGE_TO_PV_LIST_LOCK(m); - rw_rlock(lock); + pmap_pv_list_lock(lock); rv = !TAILQ_EMPTY(&m->md.pv_list) || ((m->flags & PG_FICTITIOUS) == 0 && !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); - rw_runlock(lock); + pmap_pv_list_unlock(lock); return (rv); } @@ -7199,7 +7347,7 @@ pv_entry_t pv; struct md_page *pvh; struct pv_chunk *pc, *npc; - struct rwlock *lock; + PVLL *lock; int64_t bit; uint64_t inuse, bitmask; int allfree, field, freed, i, idx; @@ -7363,7 +7511,7 @@ } } if (lock != NULL) - rw_wunlock(lock); + pmap_pv_list_unlock(lock); pmap_invalidate_all(pmap); pmap_pkru_deassign_all(pmap); free_pv_chunk_batch((struct pv_chunklist *)&free_chunks); @@ -7374,7 +7522,7 @@ static boolean_t pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) { - struct rwlock *lock; + PVLL *lock; pv_entry_t pv; struct md_page *pvh; pt_entry_t *pte, mask; @@ -7385,15 +7533,15 @@ rv = FALSE; lock = VM_PAGE_TO_PV_LIST_LOCK(m); - rw_rlock(lock); + pmap_pv_list_lock(lock); restart: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; - rw_runlock(lock); + pmap_pv_list_unlock(lock); PMAP_LOCK(pmap); - rw_rlock(lock); + pmap_pv_list_lock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; @@ -7423,9 +7571,9 @@ if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; pvh_gen = pvh->pv_gen; - rw_runlock(lock); + pmap_pv_list_unlock(lock); PMAP_LOCK(pmap); - rw_rlock(lock); + pmap_pv_list_lock(lock); if (md_gen != m->md.pv_gen || pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); @@ -7451,7 +7599,7 @@ } } out: - rw_runlock(lock); + pmap_pv_list_unlock(lock); return (rv); } @@ -7524,7 +7672,7 @@ { struct md_page *pvh; pmap_t pmap; - struct rwlock *lock; + PVLL *lock; pv_entry_t next_pv, pv; pd_entry_t *pde; pt_entry_t oldpte, *pte, PG_M, PG_RW; @@ -7542,17 +7690,17 @@ pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); retry_pv_loop: - rw_wlock(lock); + pmap_pv_list_lock(lock); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; - rw_wunlock(lock); + pmap_pv_list_unlock(lock); PMAP_LOCK(pmap); - rw_wlock(lock); + pmap_pv_list_lock(lock); if (pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); - rw_wunlock(lock); + pmap_pv_list_unlock(lock); goto retry_pv_loop; } } @@ -7571,13 +7719,13 @@ if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; - rw_wunlock(lock); + pmap_pv_list_unlock(lock); PMAP_LOCK(pmap); - rw_wlock(lock); + pmap_pv_list_lock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); - rw_wunlock(lock); + pmap_pv_list_unlock(lock); goto retry_pv_loop; } } @@ -7600,7 +7748,7 @@ } PMAP_UNLOCK(pmap); } - rw_wunlock(lock); + pmap_pv_list_unlock(lock); vm_page_aflag_clear(m, PGA_WRITEABLE); pmap_delayed_invl_wait(m); } @@ -7658,7 +7806,7 @@ struct md_page *pvh; pv_entry_t pv, pvf; pmap_t pmap; - struct rwlock *lock; + PVLL *lock; pd_entry_t oldpde, *pde; pt_entry_t *pte, PG_A, PG_M, PG_RW; vm_offset_t va; @@ -7674,7 +7822,7 @@ pa = VM_PAGE_TO_PHYS(m); lock = PHYS_TO_PV_LIST_LOCK(pa); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa); - rw_wlock(lock); + pmap_pv_list_lock(lock); retry: not_cleared = 0; if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) @@ -7686,9 +7834,9 @@ pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; - rw_wunlock(lock); + pmap_pv_list_unlock(lock); PMAP_LOCK(pmap); - rw_wlock(lock); + pmap_pv_list_lock(lock); if (pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); goto retry; @@ -7792,9 +7940,9 @@ if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; - rw_wunlock(lock); + pmap_pv_list_unlock(lock); PMAP_LOCK(pmap); - rw_wlock(lock); + pmap_pv_list_lock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto retry; @@ -7845,7 +7993,7 @@ } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + not_cleared < PMAP_TS_REFERENCED_MAX); out: - rw_wunlock(lock); + pmap_pv_list_unlock(lock); vm_page_free_pages_toq(&free, true); return (cleared + not_cleared); } @@ -7858,7 +8006,7 @@ void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) { - struct rwlock *lock; + PVLL *lock; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t oldpde, *pde; @@ -7915,7 +8063,7 @@ lock = NULL; if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) { if (lock != NULL) - rw_wunlock(lock); + pmap_pv_list_unlock(lock); /* * The large page mapping was destroyed. @@ -7947,7 +8095,7 @@ anychanged = true; } if (lock != NULL) - rw_wunlock(lock); + pmap_pv_list_unlock(lock); } if (va_next > eva) va_next = eva; @@ -8004,7 +8152,7 @@ pv_entry_t next_pv, pv; pd_entry_t oldpde, *pde; pt_entry_t *pte, PG_M, PG_RW; - struct rwlock *lock; + PVLL *lock; vm_offset_t va; int md_gen, pvh_gen; @@ -8017,15 +8165,15 @@ pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); lock = VM_PAGE_TO_PV_LIST_LOCK(m); - rw_wlock(lock); + pmap_pv_list_lock(lock); restart: TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; - rw_wunlock(lock); + pmap_pv_list_unlock(lock); PMAP_LOCK(pmap); - rw_wlock(lock); + pmap_pv_list_lock(lock); if (pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); goto restart; @@ -8057,9 +8205,9 @@ if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; pvh_gen = pvh->pv_gen; - rw_wunlock(lock); + pmap_pv_list_unlock(lock); PMAP_LOCK(pmap); - rw_wlock(lock); + pmap_pv_list_lock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; @@ -8077,7 +8225,7 @@ } PMAP_UNLOCK(pmap); } - rw_wunlock(lock); + pmap_pv_list_unlock(lock); } /* @@ -9026,7 +9174,7 @@ pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t va, int ftype) { int rv; - struct rwlock *lock; + PVLL *lock; #if VM_NRESERVLEVEL > 0 vm_page_t m, mpte; #endif @@ -9113,7 +9261,7 @@ rv = 0; /* success */ done: if (lock != NULL) - rw_wunlock(lock); + pmap_pv_list_unlock(lock); PMAP_UNLOCK(pmap); return (rv); } Index: sys/amd64/include/pmap.h =================================================================== --- sys/amd64/include/pmap.h +++ sys/amd64/include/pmap.h @@ -312,7 +312,9 @@ struct md_page { TAILQ_HEAD(, pv_entry) pv_list; /* (p) */ int pv_gen; /* (p) */ - int pat_mode; + uint8_t pv_lock; + uint8_t pat_mode; + uint8_t pad0[2]; }; enum pmap_type { Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -3742,6 +3742,7 @@ kern/kern_mtxpool.c standard kern/kern_mutex.c standard kern/kern_ntptime.c standard +kern/kern_obm.c standard kern/kern_osd.c standard kern/kern_physio.c standard kern/kern_pmc.c standard Index: sys/kern/kern_obm.c =================================================================== --- /dev/null +++ sys/kern/kern_obm.c @@ -0,0 +1,132 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2020 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include + +static SYSCTL_NODE(_debug, OID_AUTO, obm, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, + ""); +static u_long obm_slow_lock; +SYSCTL_LONG(_debug_obm, OID_AUTO, slow_lock, CTLFLAG_RD, + &obm_slow_lock, 0, + ""); +static u_long obm_slow_unlock; +SYSCTL_LONG(_debug_obm, OID_AUTO, slow_unlock, CTLFLAG_RD, + &obm_slow_unlock, 0, + ""); + +void +obm_init_lo(struct lock_object *lo, const char *name) +{ + bzero(lo, sizeof(*lo)); + lo->lo_name = name; +} + +void +obm_init(uint8_t *byte) +{ + *byte = OBM_UNLOCKED; +} + +bool +obm_trylock(uint8_t *byte) +{ + return (atomic_cmpset_acq_char(byte, OBM_UNLOCKED, OBM_LOCKED) != 0); +} + +void +obm_lock(uint8_t *byte, struct lock_object *lo) +{ + struct turnstile *ts; + struct lock_delay_arg lda; + uint8_t v; + + v = OBM_UNLOCKED; + if (atomic_fcmpset_acq_char(byte, &v, OBM_LOCKED)) + return; + + atomic_add_long(&obm_slow_lock, 1); + lock_delay_arg_init(&lda, &locks_delay); + for (;;) { + lock_delay(&lda); +reload: + v = atomic_load_char(byte); + if (v == OBM_UNLOCKED) { + if (atomic_fcmpset_acq_char(byte, &v, OBM_LOCKED)) + break; + continue; + } + + ts = turnstile_trywait(lo); + v = atomic_load_char(byte); +cancel_turnstile: + if (v == OBM_UNLOCKED) { + turnstile_cancel(ts); + continue; + } + if ((v & OBM_CONTESTED) == 0 && + atomic_fcmpset_char(byte, &v, v | OBM_CONTESTED) == 0) + goto cancel_turnstile; + turnstile_wait(ts, NULL, TS_SHARED_QUEUE); + goto reload; + } +} + +void +obm_unlock(uint8_t *byte, struct lock_object *lo) +{ + struct turnstile *ts; + uint8_t v; + + v = OBM_LOCKED; + if (atomic_fcmpset_rel_char(byte, &v, OBM_UNLOCKED)) + return; + MPASS(v == OBM_LOCKED || v == (OBM_LOCKED | OBM_CONTESTED)); + atomic_add_long(&obm_slow_unlock, 1); + turnstile_chain_lock(lo); + atomic_store_rel_char(byte, OBM_UNLOCKED); + ts = turnstile_lookup(lo); + if (ts != NULL) { + turnstile_broadcast(ts, TS_SHARED_QUEUE); + turnstile_unpend(ts); + } + turnstile_chain_unlock(lo); +} + Index: sys/sys/obm.h =================================================================== --- /dev/null +++ sys/sys/obm.h @@ -0,0 +1,66 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2020 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS_OBM_H +#define _SYS_OBM_H + +/* One-Byte Lock */ + +#ifdef _KERNEL + +#include +#include +#include + +#define OBM_UNLOCKED 0x00 +#define OBM_LOCKED 0x02 +#define OBM_CONTESTED 0x01 + +void obm_init_lo(struct lock_object *lo, const char *name); +void obm_init(uint8_t *byte); +void obm_lock(uint8_t *byte, struct lock_object *lo); +void obm_unlock(uint8_t *byte, struct lock_object *lo); +bool obm_trylock(uint8_t *byte); + +__used static void +obm_assert_locked(uint8_t *byte) +{ +#ifdef INVARIANTS + uint8_t v; + + v = atomic_load_char(byte); + MPASS(v == OBM_LOCKED || v == (OBM_LOCKED | OBM_CONTESTED)); +#endif +} +#endif +#endif