Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F110763797
D5747.id16120.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
22 KB
Referenced Files
None
Subscribers
None
D5747.id16120.diff
View Options
Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -375,14 +375,13 @@
} pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT];
static int pmap_initialized;
-static struct rwlock_padalign pvh_global_lock;
-
/*
* Data for the pv entry allocation mechanism
*/
static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
static struct mtx pv_chunks_mutex;
static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
+static u_long pv_invl_gen[NPV_LIST_LOCKS];
static struct md_page *pv_table;
/*
@@ -418,6 +417,144 @@
CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU",
"Count of saved TLB context on switch");
+static LIST_HEAD(, pmap_invl_gen) pmap_invl_gen_tracker =
+ LIST_HEAD_INITIALIZER(&pmap_invl_gen_tracker);
+static struct mtx invl_gen_mtx;
+static u_long pmap_invl_gen = 0;
+
+#define PMAP_ASSERT_NOT_IN_DI() \
+ KASSERT(curthread->td_md.md_invl_gen.gen == 0, ("DI opened"));
+
+
+/*
+ * Start a new Delayed Invalidation (DI) block of code, executed by
+ * the current thread. Within a DI block, the current thread may
+ * destroy both the page table and PV list entries for a mapping and
+ * then release the corresponding PV list lock before ensuring that
+ * the mapping is flushed from the TLBs of any processors with the
+ * pmap active.
+ */
+static void
+pmap_delayed_invl_started(void)
+{
+ struct pmap_invl_gen *invl_gen;
+ u_long currgen;
+
+ invl_gen = &curthread->td_md.md_invl_gen;
+ PMAP_ASSERT_NOT_IN_DI();
+ mtx_lock(&invl_gen_mtx);
+ if (LIST_EMPTY(&pmap_invl_gen_tracker))
+ currgen = pmap_invl_gen;
+ else
+ currgen = LIST_FIRST(&pmap_invl_gen_tracker)->gen;
+ invl_gen->gen = currgen + 1;
+ LIST_INSERT_HEAD(&pmap_invl_gen_tracker, invl_gen, link);
+ mtx_unlock(&invl_gen_mtx);
+}
+
+/*
+ * Finish the DI block, previously started by the current thread. All
+ * required TLB flushes for the pages marked by
+ * pmap_delayed_invl_page() must be finished before this function is
+ * called. No other processor must be able to access the removed
+ * mapping for the marked pages.
+ *
+ * The function works by bumping the DI global generation count to the
+ * current thread generation count, unless some thread beyond current
+ * have gen count higher. In the later case, the bump is postponed to
+ * the thread beyond.
+ */
+static void
+pmap_delayed_invl_finished(void)
+{
+ struct pmap_invl_gen *invl_gen, *next;
+
+ invl_gen = &curthread->td_md.md_invl_gen;
+ KASSERT(invl_gen->gen != 0, ("missed invl_started"));
+ mtx_lock(&invl_gen_mtx);
+ next = LIST_NEXT(invl_gen, link);
+ if (next == NULL)
+ atomic_store_rel_long(&pmap_invl_gen, invl_gen->gen);
+ else
+ next->gen = invl_gen->gen;
+ LIST_REMOVE(invl_gen, link);
+ mtx_unlock(&invl_gen_mtx);
+ invl_gen->gen = 0;
+}
+
+#ifdef PV_STATS
+static long invl_wait;
+SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait, CTLFLAG_RD, &invl_wait, 0,
+ "Number of times DI invalidation blocked pmap_remove_all/write");
+#endif
+
+static u_long *
+pmap_delayed_invl_genp(vm_page_t m)
+{
+
+ return (&pv_invl_gen[pa_index(VM_PAGE_TO_PHYS(m)) % NPV_LIST_LOCKS]);
+}
+
+/*
+ * The pmap_delayed_invl_wait() function ensures that all currently
+ * executing DI blocks, which need to flush TLB for the given page m,
+ * actually flushed the TLB at the time the function returned. If the
+ * page m has empty PV list and we called pmap_delayed_invl_wait(), we
+ * know that no CPU has a valid mapping for the page m, neither in the
+ * active page table, nor cached in TLB.
+ *
+ * The function works by spinning until the current global
+ * invalidation generation is bumped beyond the page pv list
+ * generation by the threads leaving DI blocks.
+ */
+static void
+pmap_delayed_invl_wait(vm_page_t m)
+{
+ u_long *m_gen;
+#ifdef PV_STATS
+ bool accounted = false;
+#endif
+
+ m_gen = pmap_delayed_invl_genp(m);
+ while ((u_long)atomic_load_acq_long(m_gen) >
+ (u_long)atomic_load_acq_long(&pmap_invl_gen)) {
+#ifdef PV_STATS
+ if (!accounted) {
+ atomic_add_long(&invl_wait, 1);
+ accounted = true;
+ }
+#endif
+ if (mp_ncpus == 1)
+ pause("pmapdi", 1);
+ else
+ kern_yield(PRI_USER);
+ }
+}
+
+/*
+ * Mark the page m' pv list as participating in the current thread DI
+ * block. Waiter for the page DI invalidation which noted current
+ * block, is not allowed to execute until the block finishes.
+ *
+ * The function works by setting invalidation generational count for
+ * the page pv list to at least the gen count for the current thread.
+ * This forces a caller to pmap_delayed_invl_wait() to spin until
+ * current thread called pmap_delayed_invl_finished().
+ */
+static void
+pmap_delayed_invl_page(vm_page_t m)
+{
+ u_long gen, *m_gen;
+
+ rw_assert(VM_PAGE_TO_PV_LIST_LOCK(m), RA_WLOCKED);
+ gen = curthread->td_md.md_invl_gen.gen;
+ if (gen == 0)
+ return;
+ m_gen = pmap_delayed_invl_genp(m);
+ if (*m_gen < gen)
+ atomic_store_rel_long(m_gen, gen);
+}
+
/*
* Crashdump maps.
*/
@@ -886,9 +1023,9 @@
kernel_pmap->pm_flags = pmap_flags;
/*
- * Initialize the global pv list lock.
+ * Initialize the TLB invalidations generation number lock.
*/
- rw_init(&pvh_global_lock, "pmap pv global");
+ mtx_init(&invl_gen_mtx, "invlgn", NULL, MTX_DEF);
/*
* Reserve some special page table entries/VA space for temporary
@@ -2313,9 +2450,8 @@
if (lockp != NULL) {
RELEASE_PV_LIST_LOCK(lockp);
PMAP_UNLOCK(pmap);
- rw_runlock(&pvh_global_lock);
+ PMAP_ASSERT_NOT_IN_DI();
VM_WAIT;
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
}
@@ -2719,7 +2855,6 @@
uint64_t inuse;
int bit, field, freed;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL"));
pmap = NULL;
@@ -2727,6 +2862,7 @@
PG_G = PG_A = PG_M = PG_RW = 0;
SLIST_INIT(&free);
TAILQ_INIT(&new_tail);
+ pmap_delayed_invl_started();
mtx_lock(&pv_chunks_mutex);
while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && SLIST_EMPTY(&free)) {
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
@@ -2737,6 +2873,8 @@
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
}
+ pmap_delayed_invl_finished();
+ pmap_delayed_invl_started();
pmap = pc->pc_pmap;
/* Avoid deadlock and lock recursion. */
if (pmap > locked_pmap) {
@@ -2790,6 +2928,7 @@
PGA_WRITEABLE);
}
}
+ pmap_delayed_invl_page(m);
pc->pc_map[field] |= 1UL << bit;
pmap_unuse_pt(pmap, va, *pde, &free);
freed++;
@@ -2831,6 +2970,7 @@
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
}
+ pmap_delayed_invl_finished();
if (m_pc == NULL && !SLIST_EMPTY(&free)) {
m_pc = SLIST_FIRST(&free);
SLIST_REMOVE_HEAD(&free, plinks.s.ss);
@@ -2851,7 +2991,6 @@
struct pv_chunk *pc;
int idx, field, bit;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PV_STAT(atomic_add_long(&pv_entry_frees, 1));
PV_STAT(atomic_add_int(&pv_entry_spare, 1));
@@ -2908,7 +3047,6 @@
struct pv_chunk *pc;
vm_page_t m;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
retry:
@@ -3004,7 +3142,6 @@
int avail, free;
vm_page_t m;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL"));
@@ -3074,7 +3211,6 @@
{
pv_entry_t pv;
- rw_assert(&pvh_global_lock, RA_LOCKED);
TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
@@ -3101,7 +3237,6 @@
vm_page_t m;
int bit, field;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_demote_pde: pa is not 2mpage aligned"));
@@ -3168,7 +3303,6 @@
vm_offset_t va_last;
vm_page_t m;
- rw_assert(&pvh_global_lock, RA_LOCKED);
KASSERT((pa & PDRMASK) == 0,
("pmap_pv_promote_pde: pa is not 2mpage aligned"));
CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
@@ -3221,7 +3355,6 @@
{
pv_entry_t pv;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/* Pass NULL instead of the lock pointer to disable reclamation. */
if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
@@ -3245,7 +3378,6 @@
struct md_page *pvh;
pv_entry_t pv;
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/* Pass NULL instead of the lock pointer to disable reclamation. */
if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
@@ -3503,6 +3635,7 @@
if (TAILQ_EMPTY(&m->md.pv_list) &&
TAILQ_EMPTY(&pvh->pv_list))
vm_page_aflag_clear(m, PGA_WRITEABLE);
+ pmap_delayed_invl_page(m);
}
}
if (pmap == kernel_pmap) {
@@ -3556,6 +3689,7 @@
if (TAILQ_EMPTY(&pvh->pv_list))
vm_page_aflag_clear(m, PGA_WRITEABLE);
}
+ pmap_delayed_invl_page(m);
}
return (pmap_unuse_pt(pmap, va, ptepde, free));
}
@@ -3614,7 +3748,7 @@
anyvalid = 0;
SLIST_INIT(&free);
- rw_rlock(&pvh_global_lock);
+ pmap_delayed_invl_started();
PMAP_LOCK(pmap);
/*
@@ -3729,8 +3863,8 @@
out:
if (anyvalid)
pmap_invalidate_all(pmap);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
+ pmap_delayed_invl_finished();
pmap_free_zero_pages(&free);
}
@@ -3753,30 +3887,55 @@
struct md_page *pvh;
pv_entry_t pv;
pmap_t pmap;
+ struct rwlock *lock;
pt_entry_t *pte, tpte, PG_A, PG_M, PG_RW;
pd_entry_t *pde;
vm_offset_t va;
struct spglist free;
+ int pvh_gen, md_gen;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_remove_all: page %p is not managed", m));
SLIST_INIT(&free);
- rw_wlock(&pvh_global_lock);
+ lock = VM_PAGE_TO_PV_LIST_LOCK(m);
+ pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
+retry:
+ rw_wlock(lock);
if ((m->flags & PG_FICTITIOUS) != 0)
goto small_mappings;
- pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
+ if (!PMAP_TRYLOCK(pmap)) {
+ pvh_gen = pvh->pv_gen;
+ rw_wunlock(lock);
+ PMAP_LOCK(pmap);
+ rw_wlock(lock);
+ if (pvh_gen != pvh->pv_gen) {
+ rw_wunlock(lock);
+ PMAP_UNLOCK(pmap);
+ goto retry;
+ }
+ }
va = pv->pv_va;
pde = pmap_pde(pmap, va);
- (void)pmap_demote_pde(pmap, pde, va);
+ (void)pmap_demote_pde_locked(pmap, pde, va, &lock);
PMAP_UNLOCK(pmap);
}
small_mappings:
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
pmap = PV_PMAP(pv);
- PMAP_LOCK(pmap);
+ if (!PMAP_TRYLOCK(pmap)) {
+ pvh_gen = pvh->pv_gen;
+ md_gen = m->md.pv_gen;
+ rw_wunlock(lock);
+ PMAP_LOCK(pmap);
+ rw_wlock(lock);
+ if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
+ rw_wunlock(lock);
+ PMAP_UNLOCK(pmap);
+ goto retry;
+ }
+ }
PG_A = pmap_accessed_bit(pmap);
PG_M = pmap_modified_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
@@ -3804,7 +3963,8 @@
PMAP_UNLOCK(pmap);
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
- rw_wunlock(&pvh_global_lock);
+ rw_wunlock(lock);
+ pmap_delayed_invl_wait(m);
pmap_free_zero_pages(&free);
}
@@ -3864,7 +4024,7 @@
pdp_entry_t *pdpe;
pd_entry_t ptpaddr, *pde;
pt_entry_t *pte, PG_G, PG_M, PG_RW, PG_V;
- boolean_t anychanged, pv_lists_locked;
+ boolean_t anychanged;
KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot));
if (prot == VM_PROT_NONE) {
@@ -3880,8 +4040,6 @@
PG_M = pmap_modified_bit(pmap);
PG_V = pmap_valid_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
- pv_lists_locked = FALSE;
-resume:
anychanged = FALSE;
PMAP_LOCK(pmap);
@@ -3932,25 +4090,11 @@
if (pmap_protect_pde(pmap, pde, sva, prot))
anychanged = TRUE;
continue;
- } else {
- if (!pv_lists_locked) {
- pv_lists_locked = TRUE;
- if (!rw_try_rlock(&pvh_global_lock)) {
- if (anychanged)
- pmap_invalidate_all(
- pmap);
- PMAP_UNLOCK(pmap);
- rw_rlock(&pvh_global_lock);
- goto resume;
- }
- }
- if (!pmap_demote_pde(pmap, pde, sva)) {
- /*
- * The large page mapping was
- * destroyed.
- */
- continue;
- }
+ } else if (!pmap_demote_pde(pmap, pde, sva)) {
+ /*
+ * The large page mapping was destroyed.
+ */
+ continue;
}
}
@@ -3990,8 +4134,6 @@
}
if (anychanged)
pmap_invalidate_all(pmap);
- if (pv_lists_locked)
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -4135,6 +4277,11 @@
* NB: This is the only routine which MAY NOT lazy-evaluate
* or lose information. That is, this routine must actually
* insert this page into the given map NOW.
+ *
+ * In each case when destroing a mapping, and its PV entry, and
+ * requires a TLB invalidation, the function does the TLB
+ * invalidation before releasing the PV list lock, so we do not
+ * need pmap_delayed_invl_page() calls there.
*/
int
pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
@@ -4196,7 +4343,6 @@
mpte = NULL;
lock = NULL;
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
/*
@@ -4223,7 +4369,6 @@
if (mpte == NULL && nosleep) {
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
return (KERN_RESOURCE_SHORTAGE);
}
@@ -4356,7 +4501,6 @@
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
return (KERN_SUCCESS);
}
@@ -4377,7 +4521,6 @@
struct spglist free;
PG_V = pmap_valid_bit(pmap);
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
if ((mpde = pmap_allocpde(pmap, va, NULL)) == NULL) {
@@ -4469,7 +4612,6 @@
mpte = NULL;
m = m_start;
lock = NULL;
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
va = start + ptoa(diff);
@@ -4484,7 +4626,6 @@
}
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -4503,12 +4644,10 @@
struct rwlock *lock;
lock = NULL;
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -4524,7 +4663,6 @@
(m->oflags & VPO_UNMANAGED) != 0,
("pmap_enter_quick_locked: managed mapping within the clean submap"));
PG_V = pmap_valid_bit(pmap);
- rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
/*
@@ -4741,8 +4879,11 @@
* must have the wired attribute set. In contrast, invalid mappings
* cannot have the wired attribute set, so they are ignored.
*
- * The wired attribute of the page table entry is not a hardware feature,
- * so there is no need to invalidate any TLB entries.
+ * The wired attribute of the page table entry is not a hardware
+ * feature, so there is no need to invalidate any TLB entries.
+ * Since pmap_demote_pde() for the wired entry must never fail,
+ * pmap_delayed_invl_started()/finished() calls around the
+ * function are not needed.
*/
void
pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
@@ -4752,11 +4893,8 @@
pdp_entry_t *pdpe;
pd_entry_t *pde;
pt_entry_t *pte, PG_V;
- boolean_t pv_lists_locked;
PG_V = pmap_valid_bit(pmap);
- pv_lists_locked = FALSE;
-resume:
PMAP_LOCK(pmap);
for (; sva < eva; sva = va_next) {
pml4e = pmap_pml4e(pmap, sva);
@@ -4793,19 +4931,8 @@
pmap->pm_stats.wired_count -= NBPDR /
PAGE_SIZE;
continue;
- } else {
- if (!pv_lists_locked) {
- pv_lists_locked = TRUE;
- if (!rw_try_rlock(&pvh_global_lock)) {
- PMAP_UNLOCK(pmap);
- rw_rlock(&pvh_global_lock);
- /* Repeat sva. */
- goto resume;
- }
- }
- if (!pmap_demote_pde(pmap, pde, sva))
- panic("pmap_unwire: demotion failed");
- }
+ } else if (!pmap_demote_pde(pmap, pde, sva))
+ panic("pmap_unwire: demotion failed");
}
if (va_next > eva)
va_next = eva;
@@ -4826,8 +4953,6 @@
pmap->pm_stats.wired_count--;
}
}
- if (pv_lists_locked)
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
@@ -4868,7 +4993,6 @@
return;
lock = NULL;
- rw_rlock(&pvh_global_lock);
if (dst_pmap < src_pmap) {
PMAP_LOCK(dst_pmap);
PMAP_LOCK(src_pmap);
@@ -5003,7 +5127,6 @@
out:
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(src_pmap);
PMAP_UNLOCK(dst_pmap);
}
@@ -5116,7 +5239,6 @@
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_page_exists_quick: page %p is not managed", m));
rv = FALSE;
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
@@ -5141,7 +5263,6 @@
}
}
rw_runlock(lock);
- rw_runlock(&pvh_global_lock);
return (rv);
}
@@ -5163,7 +5284,6 @@
if ((m->oflags & VPO_UNMANAGED) != 0)
return (0);
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
restart:
@@ -5208,7 +5328,6 @@
}
}
rw_runlock(lock);
- rw_runlock(&pvh_global_lock);
return (count);
}
@@ -5224,14 +5343,12 @@
if ((m->oflags & VPO_UNMANAGED) != 0)
return (FALSE);
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
rv = !TAILQ_EMPTY(&m->md.pv_list) ||
((m->flags & PG_FICTITIOUS) == 0 &&
!TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list));
rw_runlock(lock);
- rw_runlock(&pvh_global_lock);
return (rv);
}
@@ -5294,7 +5411,6 @@
PG_RW = pmap_rw_bit(pmap);
SLIST_INIT(&free);
- rw_rlock(&pvh_global_lock);
PMAP_LOCK(pmap);
TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
allfree = 1;
@@ -5427,7 +5543,6 @@
if (lock != NULL)
rw_wunlock(lock);
pmap_invalidate_all(pmap);
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
pmap_free_zero_pages(&free);
}
@@ -5445,7 +5560,6 @@
boolean_t rv;
rv = FALSE;
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_rlock(lock);
restart:
@@ -5514,7 +5628,6 @@
}
out:
rw_runlock(lock);
- rw_runlock(&pvh_global_lock);
return (rv);
}
@@ -5608,7 +5721,6 @@
VM_OBJECT_ASSERT_WLOCKED(m->object);
if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
return;
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
retry_pv_loop:
@@ -5675,7 +5787,7 @@
}
rw_wunlock(lock);
vm_page_aflag_clear(m, PGA_WRITEABLE);
- rw_runlock(&pvh_global_lock);
+ pmap_delayed_invl_wait(m);
}
static __inline boolean_t
@@ -5718,6 +5830,10 @@
* XXX: The exact number of bits to check and clear is a matter that
* should be tested and standardized at some point in the future for
* optimal aging of shared pages.
+ *
+ * The DI block around the function is not needed, since
+ * invalidations are performed before the PV list lock is
+ * released.
*/
int
pmap_ts_referenced(vm_page_t m)
@@ -5741,7 +5857,6 @@
pa = VM_PAGE_TO_PHYS(m);
lock = PHYS_TO_PV_LIST_LOCK(pa);
pvh = pa_to_pvh(pa);
- rw_rlock(&pvh_global_lock);
rw_wlock(lock);
retry:
not_cleared = 0;
@@ -5901,7 +6016,6 @@
not_cleared < PMAP_TS_REFERENCED_MAX);
out:
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
pmap_free_zero_pages(&free);
return (cleared + not_cleared);
}
@@ -5921,10 +6035,11 @@
pt_entry_t *pte, PG_A, PG_G, PG_M, PG_RW, PG_V;
vm_offset_t va_next;
vm_page_t m;
- boolean_t anychanged, pv_lists_locked;
+ boolean_t anychanged;
if (advice != MADV_DONTNEED && advice != MADV_FREE)
return;
+ pmap_delayed_invl_started();
/*
* A/D bit emulation requires an alternate code path when clearing
@@ -5940,9 +6055,6 @@
PG_M = pmap_modified_bit(pmap);
PG_V = pmap_valid_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
-
- pv_lists_locked = FALSE;
-resume:
anychanged = FALSE;
PMAP_LOCK(pmap);
for (; sva < eva; sva = va_next) {
@@ -5970,16 +6082,6 @@
else if ((oldpde & PG_PS) != 0) {
if ((oldpde & PG_MANAGED) == 0)
continue;
- if (!pv_lists_locked) {
- pv_lists_locked = TRUE;
- if (!rw_try_rlock(&pvh_global_lock)) {
- if (anychanged)
- pmap_invalidate_all(pmap);
- PMAP_UNLOCK(pmap);
- rw_rlock(&pvh_global_lock);
- goto resume;
- }
- }
lock = NULL;
if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) {
if (lock != NULL)
@@ -6039,9 +6141,8 @@
}
if (anychanged)
pmap_invalidate_all(pmap);
- if (pv_lists_locked)
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
+ pmap_delayed_invl_finished();
}
/*
@@ -6073,7 +6174,6 @@
if ((m->aflags & PGA_WRITEABLE) == 0)
return;
pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m));
- rw_rlock(&pvh_global_lock);
lock = VM_PAGE_TO_PV_LIST_LOCK(m);
rw_wlock(lock);
restart:
@@ -6149,7 +6249,6 @@
PMAP_UNLOCK(pmap);
}
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
}
/*
@@ -6851,7 +6950,6 @@
vm_page_t m, mpte;
pd_entry_t *pde;
pt_entry_t *pte, PG_A, PG_M, PG_RW, PG_V;
- boolean_t pv_lists_locked;
KASSERT(ftype == VM_PROT_READ || ftype == VM_PROT_WRITE,
("pmap_emulate_accessed_dirty: invalid fault type %d", ftype));
@@ -6866,8 +6964,6 @@
rv = -1;
lock = NULL;
- pv_lists_locked = FALSE;
-retry:
PMAP_LOCK(pmap);
pde = pmap_pde(pmap, va);
@@ -6918,14 +7014,6 @@
pmap_ps_enabled(pmap) &&
(m->flags & PG_FICTITIOUS) == 0 &&
vm_reserv_level_iffullpop(m) == 0) {
- if (!pv_lists_locked) {
- pv_lists_locked = TRUE;
- if (!rw_try_rlock(&pvh_global_lock)) {
- PMAP_UNLOCK(pmap);
- rw_rlock(&pvh_global_lock);
- goto retry;
- }
- }
pmap_promote_pde(pmap, pde, va, &lock);
#ifdef INVARIANTS
atomic_add_long(&ad_emulation_superpage_promotions, 1);
@@ -6941,8 +7029,6 @@
done:
if (lock != NULL)
rw_wunlock(lock);
- if (pv_lists_locked)
- rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
return (rv);
}
Index: sys/amd64/amd64/vm_machdep.c
===================================================================
--- sys/amd64/amd64/vm_machdep.c
+++ sys/amd64/amd64/vm_machdep.c
@@ -236,6 +236,7 @@
/* Setup to release spin count in fork_exit(). */
td2->td_md.md_spinlock_count = 1;
td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
+ td2->td_md.md_invl_gen.gen = 0;
/* As an i386, do not copy io permission bitmap. */
pcb2->pcb_tssp = NULL;
Index: sys/amd64/include/pmap.h
===================================================================
--- sys/amd64/include/pmap.h
+++ sys/amd64/include/pmap.h
@@ -284,9 +284,13 @@
struct pv_entry;
struct pv_chunk;
+/*
+ * Locks
+ * (p) PV list lock
+ */
struct md_page {
- TAILQ_HEAD(,pv_entry) pv_list;
- int pv_gen;
+ TAILQ_HEAD(, pv_entry) pv_list; /* (p) */
+ int pv_gen; /* (p) */
int pat_mode;
};
Index: sys/amd64/include/proc.h
===================================================================
--- sys/amd64/include/proc.h
+++ sys/amd64/include/proc.h
@@ -33,6 +33,7 @@
#ifndef _MACHINE_PROC_H_
#define _MACHINE_PROC_H_
+#include <sys/queue.h>
#include <machine/segments.h>
struct proc_ldt {
@@ -40,6 +41,11 @@
int ldt_refcnt;
};
+struct pmap_invl_gen {
+ u_long gen;
+ LIST_ENTRY(pmap_invl_gen) link;
+};
+
/*
* Machine-dependent part of the proc structure for AMD64.
*/
@@ -47,6 +53,7 @@
int md_spinlock_count; /* (k) */
register_t md_saved_flags; /* (k) */
register_t md_spurflt_addr; /* (k) Spurious page fault address. */
+ struct pmap_invl_gen md_invl_gen; /* (k) */
};
struct mdproc {
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Feb 23, 7:42 PM (13 h, 13 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16804358
Default Alt Text
D5747.id16120.diff (22 KB)
Attached To
Mode
D5747: Eliminate pvh_global_lock.
Attached
Detach File
Event Timeline
Log In to Comment