Page MenuHomeFreeBSD

D5747.id16120.diff
No OneTemporary

D5747.id16120.diff

Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -375,14 +375,13 @@
} pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT];
static int pmap_initialized;
-static struct rwlock_padalign pvh_global_lock;
-
/*
* Data for the pv entry allocation mechanism
*/
static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
static struct mtx pv_chunks_mutex;
static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
+static u_long pv_invl_gen[NPV_LIST_LOCKS];
static struct md_page *pv_table;
/*
@@ -418,6 +417,144 @@
CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU",
"Count of saved TLB context on switch");
+static LIST_HEAD(, pmap_invl_gen) pmap_invl_gen_tracker =
+ LIST_HEAD_INITIALIZER(&pmap_invl_gen_tracker);
+static struct mtx invl_gen_mtx;
+static u_long pmap_invl_gen = 0;
+
+#define PMAP_ASSERT_NOT_IN_DI() \
+ KASSERT(curthread->td_md.md_invl_gen.gen == 0, ("DI opened"));
+
+
+/*
+ * Start a new Delayed Invalidation (DI) block of code, executed by
+ * the current thread. Within a DI block, the current thread may
+ * destroy both the page table and PV list entries for a mapping and
+ * then release the corresponding PV list lock before ensuring that
+ * the mapping is flushed from the TLBs of any processors with the
+ * pmap active.
+ */
+static void
+pmap_delayed_invl_started(void)
+{
+ struct pmap_invl_gen *invl_gen;
+ u_long currgen;
+
+ invl_gen = &curthread->td_md.md_invl_gen;
+ PMAP_ASSERT_NOT_IN_DI();
+ mtx_lock(&invl_gen_mtx);
+ if (LIST_EMPTY(&pmap_invl_gen_tracker))
+ currgen = pmap_invl_gen;
+ else
+ currgen = LIST_FIRST(&pmap_invl_gen_tracker)->gen;
+ invl_gen->gen = currgen + 1;
+ LIST_INSERT_HEAD(&pmap_invl_gen_tracker, invl_gen, link);
+ mtx_unlock(&invl_gen_mtx);
+}
+
+/*
+ * Finish the DI block, previously started by the current thread. All
+ * required TLB flushes for the pages marked by
+ * pmap_delayed_invl_page() must be finished before this function is
+ * called. No other processor must be able to access the removed
+ * mapping for the marked pages.
+ *
+ * The function works by bumping the DI global generation count to the
+ * current thread generation count, unless some thread beyond current
+ * have gen count higher. In the later case, the bump is postponed to
+ * the thread beyond.
+ */
+static void
+pmap_delayed_invl_finished(void)
+{
+ struct pmap_invl_gen *invl_gen, *next;
+
+ invl_gen = &curthread->td_md.md_invl_gen;
+ KASSERT(invl_gen->gen != 0, ("missed invl_started"));
+ mtx_lock(&invl_gen_mtx);
+ next = LIST_NEXT(invl_gen, link);
+ if (next == NULL)
+ atomic_store_rel_long(&pmap_invl_gen, invl_gen->gen);
+ else
+ next->gen = invl_gen->gen;
+ LIST_REMOVE(invl_gen, link);
+ mtx_unlock(&invl_gen_mtx);
+ invl_gen->gen = 0;
+}
+
+#ifdef PV_STATS
+static long invl_wait;
+SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait, CTLFLAG_RD, &invl_wait, 0,
+ "Number of times DI invalidation blocked pmap_remove_all/write");
+#endif
+
+static u_long *
+pmap_delayed_invl_genp(vm_page_t m)
+{
+
+ return (&pv_invl_gen[pa_index(VM_PAGE_TO_PHYS(m)) % NPV_LIST_LOCKS]);
+}
+
+/*
+ * The pmap_delayed_invl_wait() function ensures that all currently
+ * executing DI blocks, which need to flush TLB for the given page m,
+ * actually flushed the TLB at the time the function returned. If the
+ * page m has empty PV list and we called pmap_delayed_invl_wait(), we
+ * know that no CPU has a valid mapping for the page m, neither in the
+ * active page table, nor cached in TLB.
+ *
+ * The function works by spinning until the current global
+ * invalidation generation is bumped beyond the page pv list
+ * generation by the threads leaving DI blocks.
+ */
+static void
+pmap_delayed_invl_wait(vm_page_t m)
+{
+ u_long *m_gen;
+#ifdef PV_STATS
+ bool accounted = false;
+#endif
+
+ m_gen = pmap_delayed_invl_genp(m);
+ while ((u_long)atomic_load_acq_long(m_gen) >
+ (u_long)atomic_load_acq_long(&pmap_invl_gen)) {
+#ifdef PV_STATS
+ if (!accounted) {
+ atomic_add_long(&invl_wait, 1);
+ accounted = true;
+ }
+#endif
+ if (mp_ncpus == 1)
+ pause("pmapdi", 1);
+ else
+ kern_yield(PRI_USER);
+ }
+}
+
+/*
+ * Mark the page m' pv list as participating in the current thread DI
+ * block. Waiter for the page DI invalidation which noted current
+ * block, is not allowed to execute until the block finishes.
+ *
+ * The function works by setting invalidation generational count for
+ * the page pv list to at least the gen count for the current thread.
+ * This forces a caller to pmap_delayed_invl_wait() to spin until
+ * current thread called pmap_delayed_invl_finished().
+ */
+static void
+pmap_delayed_invl_page(vm_page_t m)
+{
+ u_long gen, *m_gen;
+
+ rw_assert(VM_PAGE_TO_PV_LIST_LOCK(m), RA_WLOCKED);
+ gen = curthread->td_md.md_invl_gen.gen;
+ if (gen == 0)
+ return;
+ m_gen = pmap_delayed_invl_genp(m);
+ if (*m_gen < gen)
+ atomic_store_rel_long(m_gen, gen);
+}
+
/*
* Crashdump maps.
*/
@@ -886,9 +1023,9 @@
kernel_pmap->pm_flags = pmap_flags;
/*
- * Initialize the global pv list lock.
+ * Initialize the TLB invalidations generation number lock.
*/
- rw_init(&pvh_global_lock, "pmap pv global");
+ mtx_init(&invl_gen_mtx, "invlgn", NULL, MTX_DEF);
/*
* Reserve some special page table entries/VA space for temporary
@@ -2313,9 +2450,8 @@
if (lockp != NULL) {
RELEASE_PV_LIST_LOCK(lockp);
PMAP_UNLOCK(pmap);
- rw_runlock(&pvh_global_lock);
+ PMAP_ASSERT_NOT_IN_DI();
VM_WAIT;
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
}
@@ -2719,7 +2855,6 @@
uint64_t inuse;
int bit, field, freed;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
pmap = NULL;
@@ -2727,6 +2862,7 @@
PG_G = PG_A = PG_M = PG_RW = 0;
SLIST_INIT(&free);
TAILQ_INIT(&new_tail);
+ pmap_delayed_invl_started();
mtx_lock(&pv_chunks_mutex);
while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && SLIST_EMPTY(&free)) {
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
@@ -2737,6 +2873,8 @@
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
}
+ pmap_delayed_invl_finished();
+ pmap_delayed_invl_started();
pmap = pc->pc_pmap;
/* Avoid deadlock and lock recursion. */
if (pmap > locked_pmap) {
@@ -2790,6 +2928,7 @@
PGA_WRITEABLE);
}
}
+ pmap_delayed_invl_page(m);
pc->pc_map[field] |= 1UL << bit;
pmap_unuse_pt(pmap, va, *pde, &free);
freed++;
@@ -2831,6 +2970,7 @@
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
}
+ pmap_delayed_invl_finished();
if (m_pc == NULL && !SLIST_EMPTY(&free)) {
m_pc = SLIST_FIRST(&free);
SLIST_REMOVE_HEAD(&free, plinks.s.ss);
@@ -2851,7 +2991,6 @@
struct pv_chunk *pc;
int idx, field, bit;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PV_STAT(atomic_add_long(&pv_entry_frees, 1));
PV_STAT(atomic_add_int(&pv_entry_spare, 1));
@@ -2908,7 +3047,6 @@
struct pv_chunk *pc;
vm_page_t m;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
retry:
@@ -3004,7 +3142,6 @@
int avail, free;
vm_page_t m;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL"));
@@ -3074,7 +3211,6 @@
{
pv_entry_t pv;
- rw_assert(&pvh_global_lock, RA_LOCKED);
TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
@@ -3101,7 +3237,6 @@
vm_page_t m;
int bit, field;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_demote_pde: pa is not 2mpage aligned"));
@@ -3168,7 +3303,6 @@
vm_offset_t va_last;
vm_page_t m;
- rw_assert(&pvh_global_lock, RA_LOCKED);
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_promote_pde: pa is not 2mpage aligned"));
CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
@@ -3221,7 +3355,6 @@
{
pv_entry_t pv;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/* Pass NULL instead of the lock pointer to disable reclamation. */
if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
@@ -3245,7 +3378,6 @@
struct md_page *pvh;
pv_entry_t pv;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/* Pass NULL instead of the lock pointer to disable reclamation. */
if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
@@ -3503,6 +3635,7 @@
if (TAILQ_EMPTY(&m->md.pv_list) &&
TAILQ_EMPTY(&pvh->pv_list))
vm_page_aflag_clear(m, PGA_WRITEABLE);
+ pmap_delayed_invl_page(m);
}
}
if (pmap == kernel_pmap) {
@@ -3556,6 +3689,7 @@
if (TAILQ_EMPTY(&pvh->pv_list))
vm_page_aflag_clear(m, PGA_WRITEABLE);
}
+ pmap_delayed_invl_page(m);
}
return (pmap_unuse_pt(pmap, va, ptepde, free));
}
@@ -3614,7 +3748,7 @@
anyvalid = 0;
SLIST_INIT(&free);
- rw_rlock(&pvh_global_lock);
+ pmap_delayed_invl_started();
PMAP_LOCK(pmap);
/*
@@ -3729,8 +3863,8 @@
out:
if (anyvalid)
pmap_invalidate_all(pmap);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
+ pmap_delayed_invl_finished();
pmap_free_zero_pages(&free);
}
@@ -3753,30 +3887,55 @@
struct md_page *pvh;
pv_entry_t pv;
pmap_t pmap;
+ struct rwlock *lock;
pt_entry_t *pte, tpte, PG_A, PG_M, PG_RW;
pd_entry_t *pde;
vm_offset_t va;
struct spglist free;
+ int pvh_gen, md_gen;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_remove_all: page %p is not managed", m));
SLIST_INIT(&free);
- rw_wlock(&pvh_global_lock);
+ lock = VM_PAGE_TO_PV_LIST_LOCK(m);
+ pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+retry:
+ rw_wlock(lock);
if ((m->flags & PG_FICTITIOUS) != 0)
goto small_mappings;
- pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
+ if (!PMAP_TRYLOCK(pmap)) {
+ pvh_gen = pvh->pv_gen;
+ rw_wunlock(lock);
+ PMAP_LOCK(pmap);
+ rw_wlock(lock);
+ if (pvh_gen != pvh->pv_gen) {
+ rw_wunlock(lock);
+ PMAP_UNLOCK(pmap);
+ goto retry;
+ }
+ }
va = pv->pv_va;
pde = pmap_pde(pmap, va);
- (void)pmap_demote_pde(pmap, pde, va);
+ (void)pmap_demote_pde_locked(pmap, pde, va, &lock);
PMAP_UNLOCK(pmap);
}
small_mappings:
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
+ if (!PMAP_TRYLOCK(pmap)) {
+ pvh_gen = pvh->pv_gen;
+ md_gen = m->md.pv_gen;
+ rw_wunlock(lock);
+ PMAP_LOCK(pmap);
+ rw_wlock(lock);
+ if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
+ rw_wunlock(lock);
+ PMAP_UNLOCK(pmap);
+ goto retry;
+ }
+ }
PG_A = pmap_accessed_bit(pmap);
PG_M = pmap_modified_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
@@ -3804,7 +3963,8 @@
PMAP_UNLOCK(pmap);
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
- rw_wunlock(&pvh_global_lock);
+ rw_wunlock(lock);
+ pmap_delayed_invl_wait(m);
pmap_free_zero_pages(&free);
}
@@ -3864,7 +4024,7 @@
pdp_entry_t *pdpe;
pd_entry_t ptpaddr, *pde;
pt_entry_t *pte, PG_G, PG_M, PG_RW, PG_V;
- boolean_t anychanged, pv_lists_locked;
+ boolean_t anychanged;
KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot));
if (prot == VM_PROT_NONE) {
@@ -3880,8 +4040,6 @@
PG_M = pmap_modified_bit(pmap);
PG_V = pmap_valid_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
- pv_lists_locked = FALSE;
-resume:
anychanged = FALSE;
PMAP_LOCK(pmap);
@@ -3932,25 +4090,11 @@
if (pmap_protect_pde(pmap, pde, sva, prot))
anychanged = TRUE;
continue;
- } else {
- if (!pv_lists_locked) {
- pv_lists_locked = TRUE;
- if (!rw_try_rlock(&pvh_global_lock)) {
- if (anychanged)
- pmap_invalidate_all(
- pmap);
- PMAP_UNLOCK(pmap);
- rw_rlock(&pvh_global_lock);
- goto resume;
- }
- }
- if (!pmap_demote_pde(pmap, pde, sva)) {
- /*
- * The large page mapping was
- * destroyed.
- */
- continue;
- }
+ } else if (!pmap_demote_pde(pmap, pde, sva)) {
+ /*
+ * The large page mapping was destroyed.
+ */
+ continue;
}
}
@@ -3990,8 +4134,6 @@
}
if (anychanged)
pmap_invalidate_all(pmap);
- if (pv_lists_locked)
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -4135,6 +4277,11 @@
* NB: This is the only routine which MAY NOT lazy-evaluate
* or lose information. That is, this routine must actually
* insert this page into the given map NOW.
+ *
+ * In each case when destroing a mapping, and its PV entry, and
+ * requires a TLB invalidation, the function does the TLB
+ * invalidation before releasing the PV list lock, so we do not
+ * need pmap_delayed_invl_page() calls there.
*/
int
pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
@@ -4196,7 +4343,6 @@
mpte = NULL;
lock = NULL;
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
/*
@@ -4223,7 +4369,6 @@
if (mpte == NULL && nosleep) {
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
return (KERN_RESOURCE_SHORTAGE);
}
@@ -4356,7 +4501,6 @@
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
return (KERN_SUCCESS);
}
@@ -4377,7 +4521,6 @@
struct spglist free;
PG_V = pmap_valid_bit(pmap);
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if ((mpde = pmap_allocpde(pmap, va, NULL)) == NULL) {
@@ -4469,7 +4612,6 @@
mpte = NULL;
m = m_start;
lock = NULL;
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
va = start + ptoa(diff);
@@ -4484,7 +4626,6 @@
}
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -4503,12 +4644,10 @@
struct rwlock *lock;
lock = NULL;
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -4524,7 +4663,6 @@
(m->oflags & VPO_UNMANAGED) != 0,
("pmap_enter_quick_locked: managed mapping within the clean submap"));
PG_V = pmap_valid_bit(pmap);
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/*
@@ -4741,8 +4879,11 @@
* must have the wired attribute set. In contrast, invalid mappings
* cannot have the wired attribute set, so they are ignored.
*
- * The wired attribute of the page table entry is not a hardware feature,
- * so there is no need to invalidate any TLB entries.
+ * The wired attribute of the page table entry is not a hardware
+ * feature, so there is no need to invalidate any TLB entries.
+ * Since pmap_demote_pde() for the wired entry must never fail,
+ * pmap_delayed_invl_started()/finished() calls around the
+ * function are not needed.
*/
void
pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
@@ -4752,11 +4893,8 @@
pdp_entry_t *pdpe;
pd_entry_t *pde;
pt_entry_t *pte, PG_V;
- boolean_t pv_lists_locked;
PG_V = pmap_valid_bit(pmap);
- pv_lists_locked = FALSE;
-resume:
PMAP_LOCK(pmap);
for (; sva < eva; sva = va_next) {
pml4e = pmap_pml4e(pmap, sva);
@@ -4793,19 +4931,8 @@
pmap->pm_stats.wired_count -= NBPDR /
PAGE_SIZE;
continue;
- } else {
- if (!pv_lists_locked) {
- pv_lists_locked = TRUE;
- if (!rw_try_rlock(&pvh_global_lock)) {
- PMAP_UNLOCK(pmap);
- rw_rlock(&pvh_global_lock);
- /* Repeat sva. */
- goto resume;
- }
- }
- if (!pmap_demote_pde(pmap, pde, sva))
- panic("pmap_unwire: demotion failed");
- }
+ } else if (!pmap_demote_pde(pmap, pde, sva))
+ panic("pmap_unwire: demotion failed");
}
if (va_next > eva)
va_next = eva;
@@ -4826,8 +4953,6 @@
pmap->pm_stats.wired_count--;
}
}
- if (pv_lists_locked)
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -4868,7 +4993,6 @@
return;
lock = NULL;
- rw_rlock(&pvh_global_lock);
if (dst_pmap < src_pmap) {
PMAP_LOCK(dst_pmap);
PMAP_LOCK(src_pmap);
@@ -5003,7 +5127,6 @@
out:
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(src_pmap);
PMAP_UNLOCK(dst_pmap);
}
@@ -5116,7 +5239,6 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_page_exists_quick: page %p is not managed", m));
rv = FALSE;
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
@@ -5141,7 +5263,6 @@
}
}
rw_runlock(lock);
- rw_runlock(&pvh_global_lock);
return (rv);
}
@@ -5163,7 +5284,6 @@
if ((m->oflags & VPO_UNMANAGED) != 0)
return (0);
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
restart:
@@ -5208,7 +5328,6 @@
}
}
rw_runlock(lock);
- rw_runlock(&pvh_global_lock);
return (count);
}
@@ -5224,14 +5343,12 @@
if ((m->oflags & VPO_UNMANAGED) != 0)
return (FALSE);
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
rv = !TAILQ_EMPTY(&m->md.pv_list) ||
((m->flags & PG_FICTITIOUS) == 0 &&
!TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list));
rw_runlock(lock);
- rw_runlock(&pvh_global_lock);
return (rv);
}
@@ -5294,7 +5411,6 @@
PG_RW = pmap_rw_bit(pmap);
SLIST_INIT(&free);
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
allfree = 1;
@@ -5427,7 +5543,6 @@
if (lock != NULL)
rw_wunlock(lock);
pmap_invalidate_all(pmap);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
pmap_free_zero_pages(&free);
}
@@ -5445,7 +5560,6 @@
boolean_t rv;
rv = FALSE;
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
restart:
@@ -5514,7 +5628,6 @@
}
out:
rw_runlock(lock);
- rw_runlock(&pvh_global_lock);
return (rv);
}
@@ -5608,7 +5721,6 @@
VM_OBJECT_ASSERT_WLOCKED(m->object);
if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
return;
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
retry_pv_loop:
@@ -5675,7 +5787,7 @@
}
rw_wunlock(lock);
vm_page_aflag_clear(m, PGA_WRITEABLE);
- rw_runlock(&pvh_global_lock);
+ pmap_delayed_invl_wait(m);
}
static __inline boolean_t
@@ -5718,6 +5830,10 @@
* XXX: The exact number of bits to check and clear is a matter that
* should be tested and standardized at some point in the future for
* optimal aging of shared pages.
+ *
+ * The DI block around the function is not needed, since
+ * invalidations are performed before the PV list lock is
+ * released.
*/
int
pmap_ts_referenced(vm_page_t m)
@@ -5741,7 +5857,6 @@
pa = VM_PAGE_TO_PHYS(m);
lock = PHYS_TO_PV_LIST_LOCK(pa);
pvh = pa_to_pvh(pa);
- rw_rlock(&pvh_global_lock);
rw_wlock(lock);
retry:
not_cleared = 0;
@@ -5901,7 +6016,6 @@
not_cleared < PMAP_TS_REFERENCED_MAX);
out:
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
pmap_free_zero_pages(&free);
return (cleared + not_cleared);
}
@@ -5921,10 +6035,11 @@
pt_entry_t *pte, PG_A, PG_G, PG_M, PG_RW, PG_V;
vm_offset_t va_next;
vm_page_t m;
- boolean_t anychanged, pv_lists_locked;
+ boolean_t anychanged;
if (advice != MADV_DONTNEED && advice != MADV_FREE)
return;
+ pmap_delayed_invl_started();
/*
* A/D bit emulation requires an alternate code path when clearing
@@ -5940,9 +6055,6 @@
PG_M = pmap_modified_bit(pmap);
PG_V = pmap_valid_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
-
- pv_lists_locked = FALSE;
-resume:
anychanged = FALSE;
PMAP_LOCK(pmap);
for (; sva < eva; sva = va_next) {
@@ -5970,16 +6082,6 @@
else if ((oldpde & PG_PS) != 0) {
if ((oldpde & PG_MANAGED) == 0)
continue;
- if (!pv_lists_locked) {
- pv_lists_locked = TRUE;
- if (!rw_try_rlock(&pvh_global_lock)) {
- if (anychanged)
- pmap_invalidate_all(pmap);
- PMAP_UNLOCK(pmap);
- rw_rlock(&pvh_global_lock);
- goto resume;
- }
- }
lock = NULL;
if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) {
if (lock != NULL)
@@ -6039,9 +6141,8 @@
}
if (anychanged)
pmap_invalidate_all(pmap);
- if (pv_lists_locked)
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
+ pmap_delayed_invl_finished();
}
/*
@@ -6073,7 +6174,6 @@
if ((m->aflags & PGA_WRITEABLE) == 0)
return;
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_wlock(lock);
restart:
@@ -6149,7 +6249,6 @@
PMAP_UNLOCK(pmap);
}
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
}
/*
@@ -6851,7 +6950,6 @@
vm_page_t m, mpte;
pd_entry_t *pde;
pt_entry_t *pte, PG_A, PG_M, PG_RW, PG_V;
- boolean_t pv_lists_locked;
KASSERT(ftype == VM_PROT_READ || ftype == VM_PROT_WRITE,
("pmap_emulate_accessed_dirty: invalid fault type %d", ftype));
@@ -6866,8 +6964,6 @@
rv = -1;
lock = NULL;
- pv_lists_locked = FALSE;
-retry:
PMAP_LOCK(pmap);
pde = pmap_pde(pmap, va);
@@ -6918,14 +7014,6 @@
pmap_ps_enabled(pmap) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0) {
- if (!pv_lists_locked) {
- pv_lists_locked = TRUE;
- if (!rw_try_rlock(&pvh_global_lock)) {
- PMAP_UNLOCK(pmap);
- rw_rlock(&pvh_global_lock);
- goto retry;
- }
- }
pmap_promote_pde(pmap, pde, va, &lock);
#ifdef INVARIANTS
atomic_add_long(&ad_emulation_superpage_promotions, 1);
@@ -6941,8 +7029,6 @@
done:
if (lock != NULL)
rw_wunlock(lock);
- if (pv_lists_locked)
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
return (rv);
}
Index: sys/amd64/amd64/vm_machdep.c
===================================================================
--- sys/amd64/amd64/vm_machdep.c
+++ sys/amd64/amd64/vm_machdep.c
@@ -236,6 +236,7 @@
/* Setup to release spin count in fork_exit(). */
td2->td_md.md_spinlock_count = 1;
td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
+ td2->td_md.md_invl_gen.gen = 0;
/* As an i386, do not copy io permission bitmap. */
pcb2->pcb_tssp = NULL;
Index: sys/amd64/include/pmap.h
===================================================================
--- sys/amd64/include/pmap.h
+++ sys/amd64/include/pmap.h
@@ -284,9 +284,13 @@
struct pv_entry;
struct pv_chunk;
+/*
+ * Locks
+ * (p) PV list lock
+ */
struct md_page {
- TAILQ_HEAD(,pv_entry) pv_list;
- int pv_gen;
+ TAILQ_HEAD(, pv_entry) pv_list; /* (p) */
+ int pv_gen; /* (p) */
int pat_mode;
};
Index: sys/amd64/include/proc.h
===================================================================
--- sys/amd64/include/proc.h
+++ sys/amd64/include/proc.h
@@ -33,6 +33,7 @@
#ifndef _MACHINE_PROC_H_
#define _MACHINE_PROC_H_
+#include <sys/queue.h>
#include <machine/segments.h>
struct proc_ldt {
@@ -40,6 +41,11 @@
int ldt_refcnt;
};
+struct pmap_invl_gen {
+ u_long gen;
+ LIST_ENTRY(pmap_invl_gen) link;
+};
+
/*
* Machine-dependent part of the proc structure for AMD64.
*/
@@ -47,6 +53,7 @@
int md_spinlock_count; /* (k) */
register_t md_saved_flags; /* (k) */
register_t md_spurflt_addr; /* (k) Spurious page fault address. */
+ struct pmap_invl_gen md_invl_gen; /* (k) */
};
struct mdproc {

File Metadata

Mime Type
text/plain
Expires
Sun, Feb 23, 7:42 PM (13 h, 13 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16804358
Default Alt Text
D5747.id16120.diff (22 KB)

Event Timeline