Page MenuHomeFreeBSD

D5747.id15946.diff
No OneTemporary

D5747.id15946.diff

Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -375,8 +375,6 @@
} pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT];
static int pmap_initialized;
-static struct rwlock_padalign pvh_global_lock;
-
/*
* Data for the pv entry allocation mechanism
*/
@@ -418,6 +416,135 @@
CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU",
"Count of saved TLB context on switch");
+static LIST_HEAD(, pmap_invl_gen) pmap_invl_gen_tracker =
+ LIST_HEAD_INITIALIZER(&pmap_invl_gen_tracker);
+static struct mtx invl_gen_mtx;
+static u_long pmap_invl_gen = 0;
+
+/*
+ * Start a new Delayed Invalidation (DI) block of code, executed by
+ * the current thread. The DI block is the code sequence which may
+ * modify (remove) a PTE mapping some page and modify PV list for the
+ * page, before ensuring that TLBs on all processors were consistently
+ * cleared from that PTE, but unlocking PV lock.
+ */
+static void
+pmap_delayed_invl_started(void)
+{
+ struct pmap_invl_gen *invl_gen;
+ u_long currgen;
+
+ invl_gen = &curthread->td_md.md_invl_gen;
+ KASSERT(invl_gen->gen == 0, ("recursed invl_gen"));
+ mtx_lock(&invl_gen_mtx);
+ if (LIST_EMPTY(&pmap_invl_gen_tracker))
+ currgen = pmap_invl_gen;
+ else
+ currgen = LIST_FIRST(&pmap_invl_gen_tracker)->gen;
+ invl_gen->gen = currgen + 1;
+ LIST_INSERT_HEAD(&pmap_invl_gen_tracker, invl_gen, link);
+ mtx_unlock(&invl_gen_mtx);
+}
+
+/*
+ * Finish the DI block, previously started by the current thread. All
+ * required TLB flushes for the pages marked by pmap_delayed_invl_page()
+ * must be finished when the block is finished. No other CPU must be able
+ * to access the removed mapping for the participating pages.
+ *
+ * The function works by bumping the DI global generation count to the
+ * current thread generation count, unless some thread beyond current
+ * have gen count higher. In the later case, the bump is postponed to
+ * the thread beyond.
+ */
+static void
+pmap_delayed_invl_finished(void)
+{
+ struct pmap_invl_gen *invl_gen, *next;
+
+ invl_gen = &curthread->td_md.md_invl_gen;
+ KASSERT(invl_gen->gen != 0, ("missed invl_started"));
+ mtx_lock(&invl_gen_mtx);
+ next = LIST_NEXT(invl_gen, link);
+ if (next == NULL)
+ atomic_store_rel_long(&pmap_invl_gen, invl_gen->gen);
+ else
+ next->gen = invl_gen->gen;
+ LIST_REMOVE(invl_gen, link);
+ mtx_unlock(&invl_gen_mtx);
+ invl_gen->gen = 0;
+}
+
+static bool
+pmap_delayed_invl_active(void)
+{
+
+ return (curthread->td_md.md_invl_gen.gen != 0);
+
+}
+
+#ifdef INVARIANTS
+static long invl_wait;
+SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait, CTLFLAG_RD, &invl_wait, 0,
+ "");
+#endif
+
+/*
+ * The pmap_delayed_invl_wait() function ensures that all currently
+ * executing DI blocks, which need to flush TLB for the given page m,
+ * actually flushed the TLB at the time the function returned. If the
+ * page m has empty PV list and we called pmap_delayed_invl_wait(), we
+ * know that no CPU has a valid mapping for the page m, neither in the
+ * active page table, nor cached in TLB.
+ *
+ * The function works by spinning until the current global
+ * invalidation generation is bumped beyond the page generation by the
+ * threads leaving DI blocks.
+ */
+static void
+pmap_delayed_invl_wait(vm_page_t m)
+{
+#ifdef INVARIANTS
+ boolean_t accounted = FALSE;
+#endif
+
+ while ((u_long)atomic_load_acq_long(&m->md.invl_gen) >
+ (u_long)atomic_load_acq_long(&pmap_invl_gen)) {
+#ifdef INVARIANTS
+ if (!accounted) {
+ atomic_add_long(&invl_wait, 1);
+ accounted = TRUE;
+ }
+#endif
+ kern_yield(PRI_USER); /* XXX */
+ }
+}
+
+/*
+ * Mark the given page m as participating in the current thread DI
+ * block. Waiter for the page DI invalidation which noted current
+ * block, is not allowed to execute until the block finishes.
+ *
+ * The function works by setting invalidation generational count for
+ * the page to at least the gen count for the current page. This
+ * forces a caller to pmap_delayed_invl_wait() to spin until current
+ * thread called pmap_delayed_invl_finished().
+ */
+static void
+pmap_delayed_invl_page(vm_page_t m)
+{
+ u_long old_gen, gen;
+
+ gen = curthread->td_md.md_invl_gen.gen;
+ if (gen == 0)
+ return;
+ do {
+ old_gen = m->md.invl_gen;
+ if (old_gen >= gen)
+ break;
+ } while (!atomic_cmpset_rel_long(&m->md.invl_gen, old_gen, gen));
+}
+
/*
* Crashdump maps.
*/
@@ -886,9 +1013,9 @@
kernel_pmap->pm_flags = pmap_flags;
/*
- * Initialize the global pv list lock.
+ * Initialize the TLB invalidations generation number lock.
*/
- rw_init(&pvh_global_lock, "pmap pv global");
+ mtx_init(&invl_gen_mtx, "invlgn", NULL, MTX_DEF);
/*
* Reserve some special page table entries/VA space for temporary
@@ -2297,6 +2424,7 @@
{
vm_page_t m, pdppg, pdpg;
pt_entry_t PG_A, PG_M, PG_RW, PG_V;
+ bool delayed_invl;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
@@ -2313,9 +2441,12 @@
if (lockp != NULL) {
RELEASE_PV_LIST_LOCK(lockp);
PMAP_UNLOCK(pmap);
- rw_runlock(&pvh_global_lock);
+ delayed_invl = pmap_delayed_invl_active();
+ if (delayed_invl)
+ pmap_delayed_invl_finished();
VM_WAIT;
- rw_rlock(&pvh_global_lock);
+ if (delayed_invl)
+ pmap_delayed_invl_started();
PMAP_LOCK(pmap);
}
@@ -2719,7 +2850,6 @@
uint64_t inuse;
int bit, field, freed;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
pmap = NULL;
@@ -2790,6 +2920,7 @@
PGA_WRITEABLE);
}
}
+ pmap_delayed_invl_page(m);
pc->pc_map[field] |= 1UL << bit;
pmap_unuse_pt(pmap, va, *pde, &free);
freed++;
@@ -2851,7 +2982,6 @@
struct pv_chunk *pc;
int idx, field, bit;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PV_STAT(atomic_add_long(&pv_entry_frees, 1));
PV_STAT(atomic_add_int(&pv_entry_spare, 1));
@@ -2908,7 +3038,6 @@
struct pv_chunk *pc;
vm_page_t m;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
retry:
@@ -3004,7 +3133,6 @@
int avail, free;
vm_page_t m;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL"));
@@ -3074,7 +3202,6 @@
{
pv_entry_t pv;
- rw_assert(&pvh_global_lock, RA_LOCKED);
TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
@@ -3101,7 +3228,6 @@
vm_page_t m;
int bit, field;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_demote_pde: pa is not 2mpage aligned"));
@@ -3168,7 +3294,6 @@
vm_offset_t va_last;
vm_page_t m;
- rw_assert(&pvh_global_lock, RA_LOCKED);
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_promote_pde: pa is not 2mpage aligned"));
CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
@@ -3221,7 +3346,6 @@
{
pv_entry_t pv;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/* Pass NULL instead of the lock pointer to disable reclamation. */
if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
@@ -3245,7 +3369,6 @@
struct md_page *pvh;
pv_entry_t pv;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/* Pass NULL instead of the lock pointer to disable reclamation. */
if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
@@ -3503,6 +3626,7 @@
if (TAILQ_EMPTY(&m->md.pv_list) &&
TAILQ_EMPTY(&pvh->pv_list))
vm_page_aflag_clear(m, PGA_WRITEABLE);
+ pmap_delayed_invl_page(m);
}
}
if (pmap == kernel_pmap) {
@@ -3556,6 +3680,7 @@
if (TAILQ_EMPTY(&pvh->pv_list))
vm_page_aflag_clear(m, PGA_WRITEABLE);
}
+ pmap_delayed_invl_page(m);
}
return (pmap_unuse_pt(pmap, va, ptepde, free));
}
@@ -3614,7 +3739,7 @@
anyvalid = 0;
SLIST_INIT(&free);
- rw_rlock(&pvh_global_lock);
+ pmap_delayed_invl_started();
PMAP_LOCK(pmap);
/*
@@ -3729,8 +3854,8 @@
out:
if (anyvalid)
pmap_invalidate_all(pmap);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
+ pmap_delayed_invl_finished();
pmap_free_zero_pages(&free);
}
@@ -3753,30 +3878,55 @@
struct md_page *pvh;
pv_entry_t pv;
pmap_t pmap;
+ struct rwlock *lock;
pt_entry_t *pte, tpte, PG_A, PG_M, PG_RW;
pd_entry_t *pde;
vm_offset_t va;
struct spglist free;
+ int pvh_gen, md_gen;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_remove_all: page %p is not managed", m));
SLIST_INIT(&free);
- rw_wlock(&pvh_global_lock);
+ lock = VM_PAGE_TO_PV_LIST_LOCK(m);
+ pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+retry:
+ rw_wlock(lock);
if ((m->flags & PG_FICTITIOUS) != 0)
goto small_mappings;
- pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
+ if (!PMAP_TRYLOCK(pmap)) {
+ pvh_gen = pvh->pv_gen;
+ rw_wunlock(lock);
+ PMAP_LOCK(pmap);
+ rw_wlock(lock);
+ if (pvh_gen != pvh->pv_gen) {
+ rw_wunlock(lock);
+ PMAP_UNLOCK(pmap);
+ goto retry;
+ }
+ }
va = pv->pv_va;
pde = pmap_pde(pmap, va);
- (void)pmap_demote_pde(pmap, pde, va);
+ (void)pmap_demote_pde_locked(pmap, pde, va, &lock);
PMAP_UNLOCK(pmap);
}
small_mappings:
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
+ if (!PMAP_TRYLOCK(pmap)) {
+ pvh_gen = pvh->pv_gen;
+ md_gen = m->md.pv_gen;
+ rw_wunlock(lock);
+ PMAP_LOCK(pmap);
+ rw_wlock(lock);
+ if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
+ rw_wunlock(lock);
+ PMAP_UNLOCK(pmap);
+ goto retry;
+ }
+ }
PG_A = pmap_accessed_bit(pmap);
PG_M = pmap_modified_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
@@ -3804,7 +3954,8 @@
PMAP_UNLOCK(pmap);
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
- rw_wunlock(&pvh_global_lock);
+ rw_wunlock(lock);
+ pmap_delayed_invl_wait(m);
pmap_free_zero_pages(&free);
}
@@ -3864,7 +4015,7 @@
pdp_entry_t *pdpe;
pd_entry_t ptpaddr, *pde;
pt_entry_t *pte, PG_G, PG_M, PG_RW, PG_V;
- boolean_t anychanged, pv_lists_locked;
+ boolean_t anychanged;
KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot));
if (prot == VM_PROT_NONE) {
@@ -3880,8 +4031,6 @@
PG_M = pmap_modified_bit(pmap);
PG_V = pmap_valid_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
- pv_lists_locked = FALSE;
-resume:
anychanged = FALSE;
PMAP_LOCK(pmap);
@@ -3932,25 +4081,11 @@
if (pmap_protect_pde(pmap, pde, sva, prot))
anychanged = TRUE;
continue;
- } else {
- if (!pv_lists_locked) {
- pv_lists_locked = TRUE;
- if (!rw_try_rlock(&pvh_global_lock)) {
- if (anychanged)
- pmap_invalidate_all(
- pmap);
- PMAP_UNLOCK(pmap);
- rw_rlock(&pvh_global_lock);
- goto resume;
- }
- }
- if (!pmap_demote_pde(pmap, pde, sva)) {
- /*
- * The large page mapping was
- * destroyed.
- */
- continue;
- }
+ } else if (!pmap_demote_pde(pmap, pde, sva)) {
+ /*
+ * The large page mapping was destroyed.
+ */
+ continue;
}
}
@@ -3990,8 +4125,6 @@
}
if (anychanged)
pmap_invalidate_all(pmap);
- if (pv_lists_locked)
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -4135,6 +4268,11 @@
* NB: This is the only routine which MAY NOT lazy-evaluate
* or lose information. That is, this routine must actually
* insert this page into the given map NOW.
+ *
+ * In each case when destroing a mapping, and its PV entry, and
+ * requires a TLB invalidation, the function does the TLB
+ * invalidation before releasing the PV list lock, so we do not
+ * need pmap_delayed_invl_page() calls there.
*/
int
pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
@@ -4196,7 +4334,6 @@
mpte = NULL;
lock = NULL;
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
/*
@@ -4223,7 +4360,6 @@
if (mpte == NULL && nosleep) {
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
return (KERN_RESOURCE_SHORTAGE);
}
@@ -4356,7 +4492,6 @@
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
return (KERN_SUCCESS);
}
@@ -4377,7 +4512,6 @@
struct spglist free;
PG_V = pmap_valid_bit(pmap);
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if ((mpde = pmap_allocpde(pmap, va, NULL)) == NULL) {
@@ -4469,7 +4603,6 @@
mpte = NULL;
m = m_start;
lock = NULL;
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
va = start + ptoa(diff);
@@ -4484,7 +4617,6 @@
}
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -4503,12 +4635,10 @@
struct rwlock *lock;
lock = NULL;
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -4524,7 +4654,6 @@
(m->oflags & VPO_UNMANAGED) != 0,
("pmap_enter_quick_locked: managed mapping within the clean submap"));
PG_V = pmap_valid_bit(pmap);
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/*
@@ -4741,8 +4870,11 @@
* must have the wired attribute set. In contrast, invalid mappings
* cannot have the wired attribute set, so they are ignored.
*
- * The wired attribute of the page table entry is not a hardware feature,
- * so there is no need to invalidate any TLB entries.
+ * The wired attribute of the page table entry is not a hardware
+ * feature, so there is no need to invalidate any TLB entries.
+ * Since pmap_demote_pde() for the wired entry must never fail,
+ * pmap_delayed_invl_started()/finished() calls around the
+ * function are not needed.
*/
void
pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
@@ -4752,11 +4884,8 @@
pdp_entry_t *pdpe;
pd_entry_t *pde;
pt_entry_t *pte, PG_V;
- boolean_t pv_lists_locked;
PG_V = pmap_valid_bit(pmap);
- pv_lists_locked = FALSE;
-resume:
PMAP_LOCK(pmap);
for (; sva < eva; sva = va_next) {
pml4e = pmap_pml4e(pmap, sva);
@@ -4794,15 +4923,6 @@
PAGE_SIZE;
continue;
} else {
- if (!pv_lists_locked) {
- pv_lists_locked = TRUE;
- if (!rw_try_rlock(&pvh_global_lock)) {
- PMAP_UNLOCK(pmap);
- rw_rlock(&pvh_global_lock);
- /* Repeat sva. */
- goto resume;
- }
- }
if (!pmap_demote_pde(pmap, pde, sva))
panic("pmap_unwire: demotion failed");
}
@@ -4826,8 +4946,6 @@
pmap->pm_stats.wired_count--;
}
}
- if (pv_lists_locked)
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -4868,7 +4986,6 @@
return;
lock = NULL;
- rw_rlock(&pvh_global_lock);
if (dst_pmap < src_pmap) {
PMAP_LOCK(dst_pmap);
PMAP_LOCK(src_pmap);
@@ -5003,7 +5120,6 @@
out:
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(src_pmap);
PMAP_UNLOCK(dst_pmap);
}
@@ -5116,7 +5232,6 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_page_exists_quick: page %p is not managed", m));
rv = FALSE;
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
@@ -5141,7 +5256,6 @@
}
}
rw_runlock(lock);
- rw_runlock(&pvh_global_lock);
return (rv);
}
@@ -5163,7 +5277,6 @@
if ((m->oflags & VPO_UNMANAGED) != 0)
return (0);
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
restart:
@@ -5208,7 +5321,6 @@
}
}
rw_runlock(lock);
- rw_runlock(&pvh_global_lock);
return (count);
}
@@ -5224,14 +5336,12 @@
if ((m->oflags & VPO_UNMANAGED) != 0)
return (FALSE);
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
rv = !TAILQ_EMPTY(&m->md.pv_list) ||
((m->flags & PG_FICTITIOUS) == 0 &&
!TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list));
rw_runlock(lock);
- rw_runlock(&pvh_global_lock);
return (rv);
}
@@ -5294,7 +5404,6 @@
PG_RW = pmap_rw_bit(pmap);
SLIST_INIT(&free);
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
allfree = 1;
@@ -5427,7 +5536,6 @@
if (lock != NULL)
rw_wunlock(lock);
pmap_invalidate_all(pmap);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
pmap_free_zero_pages(&free);
}
@@ -5445,7 +5553,6 @@
boolean_t rv;
rv = FALSE;
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
restart:
@@ -5514,7 +5621,6 @@
}
out:
rw_runlock(lock);
- rw_runlock(&pvh_global_lock);
return (rv);
}
@@ -5608,7 +5714,6 @@
VM_OBJECT_ASSERT_WLOCKED(m->object);
if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
return;
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
retry_pv_loop:
@@ -5675,7 +5780,7 @@
}
rw_wunlock(lock);
vm_page_aflag_clear(m, PGA_WRITEABLE);
- rw_runlock(&pvh_global_lock);
+ pmap_delayed_invl_wait(m);
}
static __inline boolean_t
@@ -5736,12 +5841,12 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_ts_referenced: page %p is not managed", m));
+ pmap_delayed_invl_started();
SLIST_INIT(&free);
cleared = 0;
pa = VM_PAGE_TO_PHYS(m);
lock = PHYS_TO_PV_LIST_LOCK(pa);
pvh = pa_to_pvh(pa);
- rw_rlock(&pvh_global_lock);
rw_wlock(lock);
retry:
not_cleared = 0;
@@ -5901,7 +6006,7 @@
not_cleared < PMAP_TS_REFERENCED_MAX);
out:
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
+ pmap_delayed_invl_finished();
pmap_free_zero_pages(&free);
return (cleared + not_cleared);
}
@@ -5921,10 +6026,11 @@
pt_entry_t *pte, PG_A, PG_G, PG_M, PG_RW, PG_V;
vm_offset_t va_next;
vm_page_t m;
- boolean_t anychanged, pv_lists_locked;
+ boolean_t anychanged;
if (advice != MADV_DONTNEED && advice != MADV_FREE)
return;
+ pmap_delayed_invl_started();
/*
* A/D bit emulation requires an alternate code path when clearing
@@ -5940,9 +6046,6 @@
PG_M = pmap_modified_bit(pmap);
PG_V = pmap_valid_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
-
- pv_lists_locked = FALSE;
-resume:
anychanged = FALSE;
PMAP_LOCK(pmap);
for (; sva < eva; sva = va_next) {
@@ -5970,16 +6073,6 @@
else if ((oldpde & PG_PS) != 0) {
if ((oldpde & PG_MANAGED) == 0)
continue;
- if (!pv_lists_locked) {
- pv_lists_locked = TRUE;
- if (!rw_try_rlock(&pvh_global_lock)) {
- if (anychanged)
- pmap_invalidate_all(pmap);
- PMAP_UNLOCK(pmap);
- rw_rlock(&pvh_global_lock);
- goto resume;
- }
- }
lock = NULL;
if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) {
if (lock != NULL)
@@ -6039,9 +6132,8 @@
}
if (anychanged)
pmap_invalidate_all(pmap);
- if (pv_lists_locked)
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
+ pmap_delayed_invl_finished();
}
/*
@@ -6073,7 +6165,6 @@
if ((m->aflags & PGA_WRITEABLE) == 0)
return;
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_wlock(lock);
restart:
@@ -6149,7 +6240,6 @@
PMAP_UNLOCK(pmap);
}
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
}
/*
@@ -6851,7 +6941,6 @@
vm_page_t m, mpte;
pd_entry_t *pde;
pt_entry_t *pte, PG_A, PG_M, PG_RW, PG_V;
- boolean_t pv_lists_locked;
KASSERT(ftype == VM_PROT_READ || ftype == VM_PROT_WRITE,
("pmap_emulate_accessed_dirty: invalid fault type %d", ftype));
@@ -6866,8 +6955,6 @@
rv = -1;
lock = NULL;
- pv_lists_locked = FALSE;
-retry:
PMAP_LOCK(pmap);
pde = pmap_pde(pmap, va);
@@ -6918,14 +7005,6 @@
pmap_ps_enabled(pmap) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0) {
- if (!pv_lists_locked) {
- pv_lists_locked = TRUE;
- if (!rw_try_rlock(&pvh_global_lock)) {
- PMAP_UNLOCK(pmap);
- rw_rlock(&pvh_global_lock);
- goto retry;
- }
- }
pmap_promote_pde(pmap, pde, va, &lock);
#ifdef INVARIANTS
atomic_add_long(&ad_emulation_superpage_promotions, 1);
@@ -6941,8 +7020,6 @@
done:
if (lock != NULL)
rw_wunlock(lock);
- if (pv_lists_locked)
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
return (rv);
}
Index: sys/amd64/amd64/vm_machdep.c
===================================================================
--- sys/amd64/amd64/vm_machdep.c
+++ sys/amd64/amd64/vm_machdep.c
@@ -236,6 +236,7 @@
/* Setup to release spin count in fork_exit(). */
td2->td_md.md_spinlock_count = 1;
td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
+ td2->td_md.md_invl_gen.gen = 0;
/* As an i386, do not copy io permission bitmap. */
pcb2->pcb_tssp = NULL;
Index: sys/amd64/include/pmap.h
===================================================================
--- sys/amd64/include/pmap.h
+++ sys/amd64/include/pmap.h
@@ -288,6 +288,7 @@
TAILQ_HEAD(,pv_entry) pv_list;
int pv_gen;
int pat_mode;
+ u_long invl_gen; /* long to avoid wraparounds */
};
enum pmap_type {
Index: sys/amd64/include/proc.h
===================================================================
--- sys/amd64/include/proc.h
+++ sys/amd64/include/proc.h
@@ -33,6 +33,7 @@
#ifndef _MACHINE_PROC_H_
#define _MACHINE_PROC_H_
+#include <sys/queue.h>
#include <machine/segments.h>
struct proc_ldt {
@@ -40,6 +41,11 @@
int ldt_refcnt;
};
+struct pmap_invl_gen {
+ u_long gen;
+ LIST_ENTRY(pmap_invl_gen) link;
+};
+
/*
* Machine-dependent part of the proc structure for AMD64.
*/
@@ -47,6 +53,7 @@
int md_spinlock_count; /* (k) */
register_t md_saved_flags; /* (k) */
register_t md_spurflt_addr; /* (k) Spurious page fault address. */
+ struct pmap_invl_gen md_invl_gen; /* (k) */
};
struct mdproc {

File Metadata

Mime Type
text/plain
Expires
Sun, Feb 23, 7:31 PM (12 h, 57 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16804250
Default Alt Text
D5747.id15946.diff (21 KB)

Event Timeline