Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -375,8 +375,6 @@ } pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT]; static int pmap_initialized; -static struct rwlock_padalign pvh_global_lock; - /* * Data for the pv entry allocation mechanism */ @@ -418,6 +416,127 @@ CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU", "Count of saved TLB context on switch"); +static LIST_HEAD(, pmap_invl_gen) pmap_invl_gen_tracker = + LIST_HEAD_INITIALIZER(&pmap_invl_gen_tracker); +static struct mtx invl_gen_mtx; +static u_long pmap_invl_gen = 0; + +/* + * Start a new Delayed Invalidation (DI) block of code, executed by + * the current thread. The DI block is the code sequence which may + * modify (remove) a PTE mapping some page and modify PV list for the + * page, before ensuring that TLBs on all processors were consistently + * cleared from that PTE, but unlocking PV lock. + */ +static void +pmap_delayed_invl_started(void) +{ + struct pmap_invl_gen *invl_gen; + u_long currgen; + + invl_gen = &curthread->td_md.md_invl_gen; + KASSERT(invl_gen->gen == 0, ("recursed invl_gen")); + mtx_lock(&invl_gen_mtx); + if (LIST_EMPTY(&pmap_invl_gen_tracker)) + currgen = pmap_invl_gen; + else + currgen = LIST_FIRST(&pmap_invl_gen_tracker)->gen; + invl_gen->gen = currgen + 1; + LIST_INSERT_HEAD(&pmap_invl_gen_tracker, invl_gen, link); + mtx_unlock(&invl_gen_mtx); +} + +/* + * Finish the DI block, previously started by the current thread. All + * required TLB flushes for the pages marked by pmap_delayed_invl_page() + * must be finished when the block is finished. No other CPU must be able + * to access the removed mapping for the participating pages. + * + * The function works by bumping the DI global generation count to the + * current thread generation count, unless some thread beyond current + * have gen count higher. In the later case, the bump is postponed to + * the thread beyond. + */ +static void +pmap_delayed_invl_finished(void) +{ + struct pmap_invl_gen *invl_gen, *next; + + invl_gen = &curthread->td_md.md_invl_gen; + KASSERT(invl_gen->gen != 0, ("missed invl_started")); + mtx_lock(&invl_gen_mtx); + next = LIST_NEXT(invl_gen, link); + if (next == NULL) + atomic_store_rel_long(&pmap_invl_gen, invl_gen->gen); + else + next->gen = invl_gen->gen; + LIST_REMOVE(invl_gen, link); + mtx_unlock(&invl_gen_mtx); + invl_gen->gen = 0; +} + +#ifdef INVARIANTS +static long invl_wait; +SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait, CTLFLAG_RD, &invl_wait, 0, + ""); +#endif + +/* + * The pmap_delayed_invl_wait() function ensures that all currently + * executing DI blocks, which need to flush TLB for the given page m, + * actually flushed the TLB at the time the function returned. If the + * page m has empty PV list and we called pmap_delayed_invl_wait(), we + * know that no CPU has a valid mapping for the page m, neither in the + * active page table, nor cached in TLB. + * + * The function works by spinning until the current global + * invalidation generation is bumped beyond the page generation by the + * threads leaving DI blocks. + */ +static void +pmap_delayed_invl_wait(vm_page_t m) +{ +#ifdef INVARIANTS + boolean_t accounted = FALSE; +#endif + + while ((u_long)atomic_load_acq_long(&m->md.invl_gen) > + (u_long)atomic_load_acq_long(&pmap_invl_gen)) { +#ifdef INVARIANTS + if (!accounted) { + atomic_add_long(&invl_wait, 1); + accounted = TRUE; + } +#endif + kern_yield(PRI_USER); /* XXX */ + } +} + +/* + * Mark the given page m as participating in the current thread DI + * block. Waiter for the page DI invalidation which noted current + * block, is not allowed to execute until the block finishes. + * + * The function works by setting invalidation generational count for + * the page to at least the gen count for the current page. This + * forces a caller to pmap_delayed_invl_wait() to spin until current + * thread called pmap_delayed_invl_finished(). + */ +static void +pmap_delayed_invl_page(vm_page_t m) +{ + u_long old_gen, gen; + + gen = curthread->td_md.md_invl_gen.gen; + if (gen == 0) + return; + do { + old_gen = m->md.invl_gen; + if (old_gen >= gen) + break; + } while (!atomic_cmpset_rel_long(&m->md.invl_gen, old_gen, gen)); +} + /* * Crashdump maps. */ @@ -886,9 +1005,9 @@ kernel_pmap->pm_flags = pmap_flags; /* - * Initialize the global pv list lock. + * Initialize the TLB invalidations generation number lock. */ - rw_init(&pvh_global_lock, "pmap pv global"); + mtx_init(&invl_gen_mtx, "invlgn", NULL, MTX_DEF); /* * Reserve some special page table entries/VA space for temporary @@ -2313,9 +2432,7 @@ if (lockp != NULL) { RELEASE_PV_LIST_LOCK(lockp); PMAP_UNLOCK(pmap); - rw_runlock(&pvh_global_lock); VM_WAIT; - rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); } @@ -2719,7 +2836,6 @@ uint64_t inuse; int bit, field, freed; - rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL")); pmap = NULL; @@ -2727,6 +2843,7 @@ PG_G = PG_A = PG_M = PG_RW = 0; SLIST_INIT(&free); TAILQ_INIT(&new_tail); + pmap_delayed_invl_started(); mtx_lock(&pv_chunks_mutex); while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && SLIST_EMPTY(&free)) { TAILQ_REMOVE(&pv_chunks, pc, pc_lru); @@ -2737,6 +2854,8 @@ if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } + pmap_delayed_invl_finished(); + pmap_delayed_invl_started(); pmap = pc->pc_pmap; /* Avoid deadlock and lock recursion. */ if (pmap > locked_pmap) { @@ -2790,6 +2909,7 @@ PGA_WRITEABLE); } } + pmap_delayed_invl_page(m); pc->pc_map[field] |= 1UL << bit; pmap_unuse_pt(pmap, va, *pde, &free); freed++; @@ -2831,6 +2951,7 @@ if (pmap != locked_pmap) PMAP_UNLOCK(pmap); } + pmap_delayed_invl_finished(); if (m_pc == NULL && !SLIST_EMPTY(&free)) { m_pc = SLIST_FIRST(&free); SLIST_REMOVE_HEAD(&free, plinks.s.ss); @@ -2851,7 +2972,6 @@ struct pv_chunk *pc; int idx, field, bit; - rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(atomic_add_long(&pv_entry_frees, 1)); PV_STAT(atomic_add_int(&pv_entry_spare, 1)); @@ -2908,7 +3028,6 @@ struct pv_chunk *pc; vm_page_t m; - rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); retry: @@ -3004,7 +3123,6 @@ int avail, free; vm_page_t m; - rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL")); @@ -3074,7 +3192,6 @@ { pv_entry_t pv; - rw_assert(&pvh_global_lock, RA_LOCKED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); @@ -3101,7 +3218,6 @@ vm_page_t m; int bit, field; - rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_demote_pde: pa is not 2mpage aligned")); @@ -3168,7 +3284,6 @@ vm_offset_t va_last; vm_page_t m; - rw_assert(&pvh_global_lock, RA_LOCKED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_promote_pde: pa is not 2mpage aligned")); CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); @@ -3221,7 +3336,6 @@ { pv_entry_t pv; - rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* Pass NULL instead of the lock pointer to disable reclamation. */ if ((pv = get_pv_entry(pmap, NULL)) != NULL) { @@ -3245,7 +3359,6 @@ struct md_page *pvh; pv_entry_t pv; - rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* Pass NULL instead of the lock pointer to disable reclamation. */ if ((pv = get_pv_entry(pmap, NULL)) != NULL) { @@ -3503,6 +3616,7 @@ if (TAILQ_EMPTY(&m->md.pv_list) && TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); + pmap_delayed_invl_page(m); } } if (pmap == kernel_pmap) { @@ -3556,6 +3670,7 @@ if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } + pmap_delayed_invl_page(m); } return (pmap_unuse_pt(pmap, va, ptepde, free)); } @@ -3614,7 +3729,7 @@ anyvalid = 0; SLIST_INIT(&free); - rw_rlock(&pvh_global_lock); + pmap_delayed_invl_started(); PMAP_LOCK(pmap); /* @@ -3729,8 +3844,8 @@ out: if (anyvalid) pmap_invalidate_all(pmap); - rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); + pmap_delayed_invl_finished(); pmap_free_zero_pages(&free); } @@ -3753,30 +3868,55 @@ struct md_page *pvh; pv_entry_t pv; pmap_t pmap; + struct rwlock *lock; pt_entry_t *pte, tpte, PG_A, PG_M, PG_RW; pd_entry_t *pde; vm_offset_t va; struct spglist free; + int pvh_gen, md_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); SLIST_INIT(&free); - rw_wlock(&pvh_global_lock); + lock = VM_PAGE_TO_PV_LIST_LOCK(m); + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); +retry: + rw_wlock(lock); if ((m->flags & PG_FICTITIOUS) != 0) goto small_mappings; - pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { pmap = PV_PMAP(pv); - PMAP_LOCK(pmap); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen) { + rw_wunlock(lock); + PMAP_UNLOCK(pmap); + goto retry; + } + } va = pv->pv_va; pde = pmap_pde(pmap, va); - (void)pmap_demote_pde(pmap, pde, va); + (void)pmap_demote_pde_locked(pmap, pde, va, &lock); PMAP_UNLOCK(pmap); } small_mappings: while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); - PMAP_LOCK(pmap); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + md_gen = m->md.pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { + rw_wunlock(lock); + PMAP_UNLOCK(pmap); + goto retry; + } + } PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); @@ -3804,7 +3944,8 @@ PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); - rw_wunlock(&pvh_global_lock); + rw_wunlock(lock); + pmap_delayed_invl_wait(m); pmap_free_zero_pages(&free); } @@ -3864,7 +4005,7 @@ pdp_entry_t *pdpe; pd_entry_t ptpaddr, *pde; pt_entry_t *pte, PG_G, PG_M, PG_RW, PG_V; - boolean_t anychanged, pv_lists_locked; + boolean_t anychanged; KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); if (prot == VM_PROT_NONE) { @@ -3880,8 +4021,6 @@ PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); - pv_lists_locked = FALSE; -resume: anychanged = FALSE; PMAP_LOCK(pmap); @@ -3932,25 +4071,11 @@ if (pmap_protect_pde(pmap, pde, sva, prot)) anychanged = TRUE; continue; - } else { - if (!pv_lists_locked) { - pv_lists_locked = TRUE; - if (!rw_try_rlock(&pvh_global_lock)) { - if (anychanged) - pmap_invalidate_all( - pmap); - PMAP_UNLOCK(pmap); - rw_rlock(&pvh_global_lock); - goto resume; - } - } - if (!pmap_demote_pde(pmap, pde, sva)) { - /* - * The large page mapping was - * destroyed. - */ - continue; - } + } else if (!pmap_demote_pde(pmap, pde, sva)) { + /* + * The large page mapping was destroyed. + */ + continue; } } @@ -3990,8 +4115,6 @@ } if (anychanged) pmap_invalidate_all(pmap); - if (pv_lists_locked) - rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } @@ -4135,6 +4258,11 @@ * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. + * + * In each case when destroing a mapping, and its PV entry, and + * requires a TLB invalidation, the function does the TLB + * invalidation before releasing the PV list lock, so we do not + * need pmap_delayed_invl_page() calls there. */ int pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, @@ -4196,7 +4324,6 @@ mpte = NULL; lock = NULL; - rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); /* @@ -4223,7 +4350,6 @@ if (mpte == NULL && nosleep) { if (lock != NULL) rw_wunlock(lock); - rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_RESOURCE_SHORTAGE); } @@ -4356,7 +4482,6 @@ if (lock != NULL) rw_wunlock(lock); - rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_SUCCESS); } @@ -4377,7 +4502,6 @@ struct spglist free; PG_V = pmap_valid_bit(pmap); - rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((mpde = pmap_allocpde(pmap, va, NULL)) == NULL) { @@ -4469,7 +4593,6 @@ mpte = NULL; m = m_start; lock = NULL; - rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); @@ -4484,7 +4607,6 @@ } if (lock != NULL) rw_wunlock(lock); - rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } @@ -4503,12 +4625,10 @@ struct rwlock *lock; lock = NULL; - rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); if (lock != NULL) rw_wunlock(lock); - rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } @@ -4524,7 +4644,6 @@ (m->oflags & VPO_UNMANAGED) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); PG_V = pmap_valid_bit(pmap); - rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* @@ -4741,8 +4860,11 @@ * must have the wired attribute set. In contrast, invalid mappings * cannot have the wired attribute set, so they are ignored. * - * The wired attribute of the page table entry is not a hardware feature, - * so there is no need to invalidate any TLB entries. + * The wired attribute of the page table entry is not a hardware + * feature, so there is no need to invalidate any TLB entries. + * Since pmap_demote_pde() for the wired entry must never fail, + * pmap_delayed_invl_started()/finished() calls around the + * function are not needed. */ void pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) @@ -4752,11 +4874,8 @@ pdp_entry_t *pdpe; pd_entry_t *pde; pt_entry_t *pte, PG_V; - boolean_t pv_lists_locked; PG_V = pmap_valid_bit(pmap); - pv_lists_locked = FALSE; -resume: PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { pml4e = pmap_pml4e(pmap, sva); @@ -4794,15 +4913,6 @@ PAGE_SIZE; continue; } else { - if (!pv_lists_locked) { - pv_lists_locked = TRUE; - if (!rw_try_rlock(&pvh_global_lock)) { - PMAP_UNLOCK(pmap); - rw_rlock(&pvh_global_lock); - /* Repeat sva. */ - goto resume; - } - } if (!pmap_demote_pde(pmap, pde, sva)) panic("pmap_unwire: demotion failed"); } @@ -4826,8 +4936,6 @@ pmap->pm_stats.wired_count--; } } - if (pv_lists_locked) - rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } @@ -4868,7 +4976,6 @@ return; lock = NULL; - rw_rlock(&pvh_global_lock); if (dst_pmap < src_pmap) { PMAP_LOCK(dst_pmap); PMAP_LOCK(src_pmap); @@ -5003,7 +5110,6 @@ out: if (lock != NULL) rw_wunlock(lock); - rw_runlock(&pvh_global_lock); PMAP_UNLOCK(src_pmap); PMAP_UNLOCK(dst_pmap); } @@ -5116,7 +5222,6 @@ KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; - rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { @@ -5141,7 +5246,6 @@ } } rw_runlock(lock); - rw_runlock(&pvh_global_lock); return (rv); } @@ -5163,7 +5267,6 @@ if ((m->oflags & VPO_UNMANAGED) != 0) return (0); - rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); restart: @@ -5208,7 +5311,6 @@ } } rw_runlock(lock); - rw_runlock(&pvh_global_lock); return (count); } @@ -5224,14 +5326,12 @@ if ((m->oflags & VPO_UNMANAGED) != 0) return (FALSE); - rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); rv = !TAILQ_EMPTY(&m->md.pv_list) || ((m->flags & PG_FICTITIOUS) == 0 && !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); rw_runlock(lock); - rw_runlock(&pvh_global_lock); return (rv); } @@ -5294,7 +5394,6 @@ PG_RW = pmap_rw_bit(pmap); SLIST_INIT(&free); - rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { allfree = 1; @@ -5427,7 +5526,6 @@ if (lock != NULL) rw_wunlock(lock); pmap_invalidate_all(pmap); - rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); pmap_free_zero_pages(&free); } @@ -5445,7 +5543,6 @@ boolean_t rv; rv = FALSE; - rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); restart: @@ -5514,7 +5611,6 @@ } out: rw_runlock(lock); - rw_runlock(&pvh_global_lock); return (rv); } @@ -5608,7 +5704,6 @@ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; - rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); retry_pv_loop: @@ -5675,7 +5770,7 @@ } rw_wunlock(lock); vm_page_aflag_clear(m, PGA_WRITEABLE); - rw_runlock(&pvh_global_lock); + pmap_delayed_invl_wait(m); } static __inline boolean_t @@ -5736,12 +5831,12 @@ KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); + pmap_delayed_invl_started(); SLIST_INIT(&free); cleared = 0; pa = VM_PAGE_TO_PHYS(m); lock = PHYS_TO_PV_LIST_LOCK(pa); pvh = pa_to_pvh(pa); - rw_rlock(&pvh_global_lock); rw_wlock(lock); retry: not_cleared = 0; @@ -5901,7 +5996,7 @@ not_cleared < PMAP_TS_REFERENCED_MAX); out: rw_wunlock(lock); - rw_runlock(&pvh_global_lock); + pmap_delayed_invl_finished(); pmap_free_zero_pages(&free); return (cleared + not_cleared); } @@ -5921,10 +6016,11 @@ pt_entry_t *pte, PG_A, PG_G, PG_M, PG_RW, PG_V; vm_offset_t va_next; vm_page_t m; - boolean_t anychanged, pv_lists_locked; + boolean_t anychanged; if (advice != MADV_DONTNEED && advice != MADV_FREE) return; + pmap_delayed_invl_started(); /* * A/D bit emulation requires an alternate code path when clearing @@ -5940,9 +6036,6 @@ PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); - - pv_lists_locked = FALSE; -resume: anychanged = FALSE; PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { @@ -5970,16 +6063,6 @@ else if ((oldpde & PG_PS) != 0) { if ((oldpde & PG_MANAGED) == 0) continue; - if (!pv_lists_locked) { - pv_lists_locked = TRUE; - if (!rw_try_rlock(&pvh_global_lock)) { - if (anychanged) - pmap_invalidate_all(pmap); - PMAP_UNLOCK(pmap); - rw_rlock(&pvh_global_lock); - goto resume; - } - } lock = NULL; if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) { if (lock != NULL) @@ -6039,9 +6122,8 @@ } if (anychanged) pmap_invalidate_all(pmap); - if (pv_lists_locked) - rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); + pmap_delayed_invl_finished(); } /* @@ -6073,7 +6155,6 @@ if ((m->aflags & PGA_WRITEABLE) == 0) return; pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_wlock(lock); restart: @@ -6149,7 +6230,6 @@ PMAP_UNLOCK(pmap); } rw_wunlock(lock); - rw_runlock(&pvh_global_lock); } /* @@ -6851,7 +6931,6 @@ vm_page_t m, mpte; pd_entry_t *pde; pt_entry_t *pte, PG_A, PG_M, PG_RW, PG_V; - boolean_t pv_lists_locked; KASSERT(ftype == VM_PROT_READ || ftype == VM_PROT_WRITE, ("pmap_emulate_accessed_dirty: invalid fault type %d", ftype)); @@ -6866,8 +6945,6 @@ rv = -1; lock = NULL; - pv_lists_locked = FALSE; -retry: PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); @@ -6918,14 +6995,6 @@ pmap_ps_enabled(pmap) && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) { - if (!pv_lists_locked) { - pv_lists_locked = TRUE; - if (!rw_try_rlock(&pvh_global_lock)) { - PMAP_UNLOCK(pmap); - rw_rlock(&pvh_global_lock); - goto retry; - } - } pmap_promote_pde(pmap, pde, va, &lock); #ifdef INVARIANTS atomic_add_long(&ad_emulation_superpage_promotions, 1); @@ -6941,8 +7010,6 @@ done: if (lock != NULL) rw_wunlock(lock); - if (pv_lists_locked) - rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (rv); } Index: sys/amd64/amd64/vm_machdep.c =================================================================== --- sys/amd64/amd64/vm_machdep.c +++ sys/amd64/amd64/vm_machdep.c @@ -236,6 +236,7 @@ /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I; + td2->td_md.md_invl_gen.gen = 0; /* As an i386, do not copy io permission bitmap. */ pcb2->pcb_tssp = NULL; Index: sys/amd64/include/pmap.h =================================================================== --- sys/amd64/include/pmap.h +++ sys/amd64/include/pmap.h @@ -288,6 +288,7 @@ TAILQ_HEAD(,pv_entry) pv_list; int pv_gen; int pat_mode; + u_long invl_gen; /* long to avoid wraparounds */ }; enum pmap_type { Index: sys/amd64/include/proc.h =================================================================== --- sys/amd64/include/proc.h +++ sys/amd64/include/proc.h @@ -33,6 +33,7 @@ #ifndef _MACHINE_PROC_H_ #define _MACHINE_PROC_H_ +#include #include struct proc_ldt { @@ -40,6 +41,11 @@ int ldt_refcnt; }; +struct pmap_invl_gen { + u_long gen; + LIST_ENTRY(pmap_invl_gen) link; +}; + /* * Machine-dependent part of the proc structure for AMD64. */ @@ -47,6 +53,7 @@ int md_spinlock_count; /* (k) */ register_t md_saved_flags; /* (k) */ register_t md_spurflt_addr; /* (k) Spurious page fault address. */ + struct pmap_invl_gen md_invl_gen; /* (k) */ }; struct mdproc {