Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F110764254
D5747.id15971.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
21 KB
Referenced Files
None
Subscribers
None
D5747.id15971.diff
View Options
Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -375,8 +375,6 @@
} pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT];
static int pmap_initialized;
-static struct rwlock_padalign pvh_global_lock;
-
/*
* Data for the pv entry allocation mechanism
*/
@@ -418,6 +416,127 @@
CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU",
"Count of saved TLB context on switch");
+static LIST_HEAD(, pmap_invl_gen) pmap_invl_gen_tracker =
+ LIST_HEAD_INITIALIZER(&pmap_invl_gen_tracker);
+static struct mtx invl_gen_mtx;
+static u_long pmap_invl_gen = 0;
+
+/*
+ * Start a new Delayed Invalidation (DI) block of code, executed by
+ * the current thread. The DI block is the code sequence which may
+ * modify (remove) a PTE mapping some page and modify PV list for the
+ * page, before ensuring that TLBs on all processors were consistently
+ * cleared from that PTE, but unlocking PV lock.
+ */
+static void
+pmap_delayed_invl_started(void)
+{
+ struct pmap_invl_gen *invl_gen;
+ u_long currgen;
+
+ invl_gen = &curthread->td_md.md_invl_gen;
+ KASSERT(invl_gen->gen == 0, ("recursed invl_gen"));
+ mtx_lock(&invl_gen_mtx);
+ if (LIST_EMPTY(&pmap_invl_gen_tracker))
+ currgen = pmap_invl_gen;
+ else
+ currgen = LIST_FIRST(&pmap_invl_gen_tracker)->gen;
+ invl_gen->gen = currgen + 1;
+ LIST_INSERT_HEAD(&pmap_invl_gen_tracker, invl_gen, link);
+ mtx_unlock(&invl_gen_mtx);
+}
+
+/*
+ * Finish the DI block, previously started by the current thread. All
+ * required TLB flushes for the pages marked by pmap_delayed_invl_page()
+ * must be finished when the block is finished. No other CPU must be able
+ * to access the removed mapping for the participating pages.
+ *
+ * The function works by bumping the DI global generation count to the
+ * current thread generation count, unless some thread beyond current
+ * have gen count higher. In the later case, the bump is postponed to
+ * the thread beyond.
+ */
+static void
+pmap_delayed_invl_finished(void)
+{
+ struct pmap_invl_gen *invl_gen, *next;
+
+ invl_gen = &curthread->td_md.md_invl_gen;
+ KASSERT(invl_gen->gen != 0, ("missed invl_started"));
+ mtx_lock(&invl_gen_mtx);
+ next = LIST_NEXT(invl_gen, link);
+ if (next == NULL)
+ atomic_store_rel_long(&pmap_invl_gen, invl_gen->gen);
+ else
+ next->gen = invl_gen->gen;
+ LIST_REMOVE(invl_gen, link);
+ mtx_unlock(&invl_gen_mtx);
+ invl_gen->gen = 0;
+}
+
+#ifdef INVARIANTS
+static long invl_wait;
+SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait, CTLFLAG_RD, &invl_wait, 0,
+ "");
+#endif
+
+/*
+ * The pmap_delayed_invl_wait() function ensures that all currently
+ * executing DI blocks, which need to flush TLB for the given page m,
+ * actually flushed the TLB at the time the function returned. If the
+ * page m has empty PV list and we called pmap_delayed_invl_wait(), we
+ * know that no CPU has a valid mapping for the page m, neither in the
+ * active page table, nor cached in TLB.
+ *
+ * The function works by spinning until the current global
+ * invalidation generation is bumped beyond the page generation by the
+ * threads leaving DI blocks.
+ */
+static void
+pmap_delayed_invl_wait(vm_page_t m)
+{
+#ifdef INVARIANTS
+ boolean_t accounted = FALSE;
+#endif
+
+ while ((u_long)atomic_load_acq_long(&m->md.invl_gen) >
+ (u_long)atomic_load_acq_long(&pmap_invl_gen)) {
+#ifdef INVARIANTS
+ if (!accounted) {
+ atomic_add_long(&invl_wait, 1);
+ accounted = TRUE;
+ }
+#endif
+ kern_yield(PRI_USER); /* XXX */
+ }
+}
+
+/*
+ * Mark the given page m as participating in the current thread DI
+ * block. Waiter for the page DI invalidation which noted current
+ * block, is not allowed to execute until the block finishes.
+ *
+ * The function works by setting invalidation generational count for
+ * the page to at least the gen count for the current page. This
+ * forces a caller to pmap_delayed_invl_wait() to spin until current
+ * thread called pmap_delayed_invl_finished().
+ */
+static void
+pmap_delayed_invl_page(vm_page_t m)
+{
+ u_long old_gen, gen;
+
+ gen = curthread->td_md.md_invl_gen.gen;
+ if (gen == 0)
+ return;
+ do {
+ old_gen = m->md.invl_gen;
+ if (old_gen >= gen)
+ break;
+ } while (!atomic_cmpset_rel_long(&m->md.invl_gen, old_gen, gen));
+}
+
/*
* Crashdump maps.
*/
@@ -886,9 +1005,9 @@
kernel_pmap->pm_flags = pmap_flags;
/*
- * Initialize the global pv list lock.
+ * Initialize the TLB invalidations generation number lock.
*/
- rw_init(&pvh_global_lock, "pmap pv global");
+ mtx_init(&invl_gen_mtx, "invlgn", NULL, MTX_DEF);
/*
* Reserve some special page table entries/VA space for temporary
@@ -2313,9 +2432,7 @@
if (lockp != NULL) {
RELEASE_PV_LIST_LOCK(lockp);
PMAP_UNLOCK(pmap);
- rw_runlock(&pvh_global_lock);
VM_WAIT;
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
}
@@ -2719,7 +2836,6 @@
uint64_t inuse;
int bit, field, freed;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
pmap = NULL;
@@ -2727,6 +2843,7 @@
PG_G = PG_A = PG_M = PG_RW = 0;
SLIST_INIT(&free);
TAILQ_INIT(&new_tail);
+ pmap_delayed_invl_started();
mtx_lock(&pv_chunks_mutex);
while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && SLIST_EMPTY(&free)) {
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
@@ -2737,6 +2854,8 @@
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
}
+ pmap_delayed_invl_finished();
+ pmap_delayed_invl_started();
pmap = pc->pc_pmap;
/* Avoid deadlock and lock recursion. */
if (pmap > locked_pmap) {
@@ -2790,6 +2909,7 @@
PGA_WRITEABLE);
}
}
+ pmap_delayed_invl_page(m);
pc->pc_map[field] |= 1UL << bit;
pmap_unuse_pt(pmap, va, *pde, &free);
freed++;
@@ -2831,6 +2951,7 @@
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
}
+ pmap_delayed_invl_finished();
if (m_pc == NULL && !SLIST_EMPTY(&free)) {
m_pc = SLIST_FIRST(&free);
SLIST_REMOVE_HEAD(&free, plinks.s.ss);
@@ -2851,7 +2972,6 @@
struct pv_chunk *pc;
int idx, field, bit;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PV_STAT(atomic_add_long(&pv_entry_frees, 1));
PV_STAT(atomic_add_int(&pv_entry_spare, 1));
@@ -2908,7 +3028,6 @@
struct pv_chunk *pc;
vm_page_t m;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
retry:
@@ -3004,7 +3123,6 @@
int avail, free;
vm_page_t m;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL"));
@@ -3074,7 +3192,6 @@
{
pv_entry_t pv;
- rw_assert(&pvh_global_lock, RA_LOCKED);
TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
@@ -3101,7 +3218,6 @@
vm_page_t m;
int bit, field;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_demote_pde: pa is not 2mpage aligned"));
@@ -3168,7 +3284,6 @@
vm_offset_t va_last;
vm_page_t m;
- rw_assert(&pvh_global_lock, RA_LOCKED);
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_promote_pde: pa is not 2mpage aligned"));
CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
@@ -3221,7 +3336,6 @@
{
pv_entry_t pv;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/* Pass NULL instead of the lock pointer to disable reclamation. */
if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
@@ -3245,7 +3359,6 @@
struct md_page *pvh;
pv_entry_t pv;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/* Pass NULL instead of the lock pointer to disable reclamation. */
if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
@@ -3503,6 +3616,7 @@
if (TAILQ_EMPTY(&m->md.pv_list) &&
TAILQ_EMPTY(&pvh->pv_list))
vm_page_aflag_clear(m, PGA_WRITEABLE);
+ pmap_delayed_invl_page(m);
}
}
if (pmap == kernel_pmap) {
@@ -3556,6 +3670,7 @@
if (TAILQ_EMPTY(&pvh->pv_list))
vm_page_aflag_clear(m, PGA_WRITEABLE);
}
+ pmap_delayed_invl_page(m);
}
return (pmap_unuse_pt(pmap, va, ptepde, free));
}
@@ -3614,7 +3729,7 @@
anyvalid = 0;
SLIST_INIT(&free);
- rw_rlock(&pvh_global_lock);
+ pmap_delayed_invl_started();
PMAP_LOCK(pmap);
/*
@@ -3729,8 +3844,8 @@
out:
if (anyvalid)
pmap_invalidate_all(pmap);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
+ pmap_delayed_invl_finished();
pmap_free_zero_pages(&free);
}
@@ -3753,30 +3868,55 @@
struct md_page *pvh;
pv_entry_t pv;
pmap_t pmap;
+ struct rwlock *lock;
pt_entry_t *pte, tpte, PG_A, PG_M, PG_RW;
pd_entry_t *pde;
vm_offset_t va;
struct spglist free;
+ int pvh_gen, md_gen;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_remove_all: page %p is not managed", m));
SLIST_INIT(&free);
- rw_wlock(&pvh_global_lock);
+ lock = VM_PAGE_TO_PV_LIST_LOCK(m);
+ pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+retry:
+ rw_wlock(lock);
if ((m->flags & PG_FICTITIOUS) != 0)
goto small_mappings;
- pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
+ if (!PMAP_TRYLOCK(pmap)) {
+ pvh_gen = pvh->pv_gen;
+ rw_wunlock(lock);
+ PMAP_LOCK(pmap);
+ rw_wlock(lock);
+ if (pvh_gen != pvh->pv_gen) {
+ rw_wunlock(lock);
+ PMAP_UNLOCK(pmap);
+ goto retry;
+ }
+ }
va = pv->pv_va;
pde = pmap_pde(pmap, va);
- (void)pmap_demote_pde(pmap, pde, va);
+ (void)pmap_demote_pde_locked(pmap, pde, va, &lock);
PMAP_UNLOCK(pmap);
}
small_mappings:
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
+ if (!PMAP_TRYLOCK(pmap)) {
+ pvh_gen = pvh->pv_gen;
+ md_gen = m->md.pv_gen;
+ rw_wunlock(lock);
+ PMAP_LOCK(pmap);
+ rw_wlock(lock);
+ if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
+ rw_wunlock(lock);
+ PMAP_UNLOCK(pmap);
+ goto retry;
+ }
+ }
PG_A = pmap_accessed_bit(pmap);
PG_M = pmap_modified_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
@@ -3804,7 +3944,8 @@
PMAP_UNLOCK(pmap);
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
- rw_wunlock(&pvh_global_lock);
+ rw_wunlock(lock);
+ pmap_delayed_invl_wait(m);
pmap_free_zero_pages(&free);
}
@@ -3864,7 +4005,7 @@
pdp_entry_t *pdpe;
pd_entry_t ptpaddr, *pde;
pt_entry_t *pte, PG_G, PG_M, PG_RW, PG_V;
- boolean_t anychanged, pv_lists_locked;
+ boolean_t anychanged;
KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot));
if (prot == VM_PROT_NONE) {
@@ -3880,8 +4021,6 @@
PG_M = pmap_modified_bit(pmap);
PG_V = pmap_valid_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
- pv_lists_locked = FALSE;
-resume:
anychanged = FALSE;
PMAP_LOCK(pmap);
@@ -3932,25 +4071,11 @@
if (pmap_protect_pde(pmap, pde, sva, prot))
anychanged = TRUE;
continue;
- } else {
- if (!pv_lists_locked) {
- pv_lists_locked = TRUE;
- if (!rw_try_rlock(&pvh_global_lock)) {
- if (anychanged)
- pmap_invalidate_all(
- pmap);
- PMAP_UNLOCK(pmap);
- rw_rlock(&pvh_global_lock);
- goto resume;
- }
- }
- if (!pmap_demote_pde(pmap, pde, sva)) {
- /*
- * The large page mapping was
- * destroyed.
- */
- continue;
- }
+ } else if (!pmap_demote_pde(pmap, pde, sva)) {
+ /*
+ * The large page mapping was destroyed.
+ */
+ continue;
}
}
@@ -3990,8 +4115,6 @@
}
if (anychanged)
pmap_invalidate_all(pmap);
- if (pv_lists_locked)
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -4135,6 +4258,11 @@
* NB: This is the only routine which MAY NOT lazy-evaluate
* or lose information. That is, this routine must actually
* insert this page into the given map NOW.
+ *
+ * In each case when destroing a mapping, and its PV entry, and
+ * requires a TLB invalidation, the function does the TLB
+ * invalidation before releasing the PV list lock, so we do not
+ * need pmap_delayed_invl_page() calls there.
*/
int
pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
@@ -4196,7 +4324,6 @@
mpte = NULL;
lock = NULL;
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
/*
@@ -4223,7 +4350,6 @@
if (mpte == NULL && nosleep) {
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
return (KERN_RESOURCE_SHORTAGE);
}
@@ -4356,7 +4482,6 @@
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
return (KERN_SUCCESS);
}
@@ -4377,7 +4502,6 @@
struct spglist free;
PG_V = pmap_valid_bit(pmap);
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if ((mpde = pmap_allocpde(pmap, va, NULL)) == NULL) {
@@ -4469,7 +4593,6 @@
mpte = NULL;
m = m_start;
lock = NULL;
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
va = start + ptoa(diff);
@@ -4484,7 +4607,6 @@
}
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -4503,12 +4625,10 @@
struct rwlock *lock;
lock = NULL;
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -4524,7 +4644,6 @@
(m->oflags & VPO_UNMANAGED) != 0,
("pmap_enter_quick_locked: managed mapping within the clean submap"));
PG_V = pmap_valid_bit(pmap);
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/*
@@ -4741,8 +4860,11 @@
* must have the wired attribute set. In contrast, invalid mappings
* cannot have the wired attribute set, so they are ignored.
*
- * The wired attribute of the page table entry is not a hardware feature,
- * so there is no need to invalidate any TLB entries.
+ * The wired attribute of the page table entry is not a hardware
+ * feature, so there is no need to invalidate any TLB entries.
+ * Since pmap_demote_pde() for the wired entry must never fail,
+ * pmap_delayed_invl_started()/finished() calls around the
+ * function are not needed.
*/
void
pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
@@ -4752,11 +4874,8 @@
pdp_entry_t *pdpe;
pd_entry_t *pde;
pt_entry_t *pte, PG_V;
- boolean_t pv_lists_locked;
PG_V = pmap_valid_bit(pmap);
- pv_lists_locked = FALSE;
-resume:
PMAP_LOCK(pmap);
for (; sva < eva; sva = va_next) {
pml4e = pmap_pml4e(pmap, sva);
@@ -4794,15 +4913,6 @@
PAGE_SIZE;
continue;
} else {
- if (!pv_lists_locked) {
- pv_lists_locked = TRUE;
- if (!rw_try_rlock(&pvh_global_lock)) {
- PMAP_UNLOCK(pmap);
- rw_rlock(&pvh_global_lock);
- /* Repeat sva. */
- goto resume;
- }
- }
if (!pmap_demote_pde(pmap, pde, sva))
panic("pmap_unwire: demotion failed");
}
@@ -4826,8 +4936,6 @@
pmap->pm_stats.wired_count--;
}
}
- if (pv_lists_locked)
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -4868,7 +4976,6 @@
return;
lock = NULL;
- rw_rlock(&pvh_global_lock);
if (dst_pmap < src_pmap) {
PMAP_LOCK(dst_pmap);
PMAP_LOCK(src_pmap);
@@ -5003,7 +5110,6 @@
out:
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(src_pmap);
PMAP_UNLOCK(dst_pmap);
}
@@ -5116,7 +5222,6 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_page_exists_quick: page %p is not managed", m));
rv = FALSE;
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
@@ -5141,7 +5246,6 @@
}
}
rw_runlock(lock);
- rw_runlock(&pvh_global_lock);
return (rv);
}
@@ -5163,7 +5267,6 @@
if ((m->oflags & VPO_UNMANAGED) != 0)
return (0);
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
restart:
@@ -5208,7 +5311,6 @@
}
}
rw_runlock(lock);
- rw_runlock(&pvh_global_lock);
return (count);
}
@@ -5224,14 +5326,12 @@
if ((m->oflags & VPO_UNMANAGED) != 0)
return (FALSE);
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
rv = !TAILQ_EMPTY(&m->md.pv_list) ||
((m->flags & PG_FICTITIOUS) == 0 &&
!TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list));
rw_runlock(lock);
- rw_runlock(&pvh_global_lock);
return (rv);
}
@@ -5294,7 +5394,6 @@
PG_RW = pmap_rw_bit(pmap);
SLIST_INIT(&free);
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
allfree = 1;
@@ -5427,7 +5526,6 @@
if (lock != NULL)
rw_wunlock(lock);
pmap_invalidate_all(pmap);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
pmap_free_zero_pages(&free);
}
@@ -5445,7 +5543,6 @@
boolean_t rv;
rv = FALSE;
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
restart:
@@ -5514,7 +5611,6 @@
}
out:
rw_runlock(lock);
- rw_runlock(&pvh_global_lock);
return (rv);
}
@@ -5608,7 +5704,6 @@
VM_OBJECT_ASSERT_WLOCKED(m->object);
if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
return;
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
retry_pv_loop:
@@ -5675,7 +5770,7 @@
}
rw_wunlock(lock);
vm_page_aflag_clear(m, PGA_WRITEABLE);
- rw_runlock(&pvh_global_lock);
+ pmap_delayed_invl_wait(m);
}
static __inline boolean_t
@@ -5736,12 +5831,12 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_ts_referenced: page %p is not managed", m));
+ pmap_delayed_invl_started();
SLIST_INIT(&free);
cleared = 0;
pa = VM_PAGE_TO_PHYS(m);
lock = PHYS_TO_PV_LIST_LOCK(pa);
pvh = pa_to_pvh(pa);
- rw_rlock(&pvh_global_lock);
rw_wlock(lock);
retry:
not_cleared = 0;
@@ -5901,7 +5996,7 @@
not_cleared < PMAP_TS_REFERENCED_MAX);
out:
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
+ pmap_delayed_invl_finished();
pmap_free_zero_pages(&free);
return (cleared + not_cleared);
}
@@ -5921,10 +6016,11 @@
pt_entry_t *pte, PG_A, PG_G, PG_M, PG_RW, PG_V;
vm_offset_t va_next;
vm_page_t m;
- boolean_t anychanged, pv_lists_locked;
+ boolean_t anychanged;
if (advice != MADV_DONTNEED && advice != MADV_FREE)
return;
+ pmap_delayed_invl_started();
/*
* A/D bit emulation requires an alternate code path when clearing
@@ -5940,9 +6036,6 @@
PG_M = pmap_modified_bit(pmap);
PG_V = pmap_valid_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
-
- pv_lists_locked = FALSE;
-resume:
anychanged = FALSE;
PMAP_LOCK(pmap);
for (; sva < eva; sva = va_next) {
@@ -5970,16 +6063,6 @@
else if ((oldpde & PG_PS) != 0) {
if ((oldpde & PG_MANAGED) == 0)
continue;
- if (!pv_lists_locked) {
- pv_lists_locked = TRUE;
- if (!rw_try_rlock(&pvh_global_lock)) {
- if (anychanged)
- pmap_invalidate_all(pmap);
- PMAP_UNLOCK(pmap);
- rw_rlock(&pvh_global_lock);
- goto resume;
- }
- }
lock = NULL;
if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) {
if (lock != NULL)
@@ -6039,9 +6122,8 @@
}
if (anychanged)
pmap_invalidate_all(pmap);
- if (pv_lists_locked)
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
+ pmap_delayed_invl_finished();
}
/*
@@ -6073,7 +6155,6 @@
if ((m->aflags & PGA_WRITEABLE) == 0)
return;
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_wlock(lock);
restart:
@@ -6149,7 +6230,6 @@
PMAP_UNLOCK(pmap);
}
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
}
/*
@@ -6851,7 +6931,6 @@
vm_page_t m, mpte;
pd_entry_t *pde;
pt_entry_t *pte, PG_A, PG_M, PG_RW, PG_V;
- boolean_t pv_lists_locked;
KASSERT(ftype == VM_PROT_READ || ftype == VM_PROT_WRITE,
("pmap_emulate_accessed_dirty: invalid fault type %d", ftype));
@@ -6866,8 +6945,6 @@
rv = -1;
lock = NULL;
- pv_lists_locked = FALSE;
-retry:
PMAP_LOCK(pmap);
pde = pmap_pde(pmap, va);
@@ -6918,14 +6995,6 @@
pmap_ps_enabled(pmap) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0) {
- if (!pv_lists_locked) {
- pv_lists_locked = TRUE;
- if (!rw_try_rlock(&pvh_global_lock)) {
- PMAP_UNLOCK(pmap);
- rw_rlock(&pvh_global_lock);
- goto retry;
- }
- }
pmap_promote_pde(pmap, pde, va, &lock);
#ifdef INVARIANTS
atomic_add_long(&ad_emulation_superpage_promotions, 1);
@@ -6941,8 +7010,6 @@
done:
if (lock != NULL)
rw_wunlock(lock);
- if (pv_lists_locked)
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
return (rv);
}
Index: sys/amd64/amd64/vm_machdep.c
===================================================================
--- sys/amd64/amd64/vm_machdep.c
+++ sys/amd64/amd64/vm_machdep.c
@@ -236,6 +236,7 @@
/* Setup to release spin count in fork_exit(). */
td2->td_md.md_spinlock_count = 1;
td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
+ td2->td_md.md_invl_gen.gen = 0;
/* As an i386, do not copy io permission bitmap. */
pcb2->pcb_tssp = NULL;
Index: sys/amd64/include/pmap.h
===================================================================
--- sys/amd64/include/pmap.h
+++ sys/amd64/include/pmap.h
@@ -288,6 +288,7 @@
TAILQ_HEAD(,pv_entry) pv_list;
int pv_gen;
int pat_mode;
+ u_long invl_gen; /* long to avoid wraparounds */
};
enum pmap_type {
Index: sys/amd64/include/proc.h
===================================================================
--- sys/amd64/include/proc.h
+++ sys/amd64/include/proc.h
@@ -33,6 +33,7 @@
#ifndef _MACHINE_PROC_H_
#define _MACHINE_PROC_H_
+#include <sys/queue.h>
#include <machine/segments.h>
struct proc_ldt {
@@ -40,6 +41,11 @@
int ldt_refcnt;
};
+struct pmap_invl_gen {
+ u_long gen;
+ LIST_ENTRY(pmap_invl_gen) link;
+};
+
/*
* Machine-dependent part of the proc structure for AMD64.
*/
@@ -47,6 +53,7 @@
int md_spinlock_count; /* (k) */
register_t md_saved_flags; /* (k) */
register_t md_spurflt_addr; /* (k) Spurious page fault address. */
+ struct pmap_invl_gen md_invl_gen; /* (k) */
};
struct mdproc {
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Feb 23, 7:51 PM (12 h, 59 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16804471
Default Alt Text
D5747.id15971.diff (21 KB)
Attached To
Mode
D5747: Eliminate pvh_global_lock.
Attached
Detach File
Event Timeline
Log In to Comment