Page MenuHomeFreeBSD

D15983.diff
No OneTemporary

D15983.diff

Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -114,6 +114,7 @@
#include <sys/bitstring.h>
#include <sys/bus.h>
#include <sys/systm.h>
+#include <sys/epoch.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
#include <sys/lock.h>
@@ -349,6 +350,7 @@
vm_paddr_t dmaplimit;
vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
pt_entry_t pg_nx;
+static epoch_t pmap_epoch;
static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
@@ -386,13 +388,10 @@
/*
* Data for the pv entry allocation mechanism.
- * Updates to pv_invl_gen are protected by the pv_list_locks[]
- * elements, but reads are not.
*/
static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
static struct mtx __exclusive_cache_line pv_chunks_mutex;
static struct rwlock __exclusive_cache_line pv_list_locks[NPV_LIST_LOCKS];
-static u_long pv_invl_gen[NPV_LIST_LOCKS];
static struct md_page *pv_table;
static struct md_page pv_dummy;
@@ -438,14 +437,13 @@
CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU",
"Count of saved TLB context on switch");
-static LIST_HEAD(, pmap_invl_gen) pmap_invl_gen_tracker =
- LIST_HEAD_INITIALIZER(&pmap_invl_gen_tracker);
-static struct mtx invl_gen_mtx;
-static u_long pmap_invl_gen = 0;
-/* Fake lock object to satisfy turnstiles interface. */
-static struct lock_object invl_gen_ts = {
- .lo_name = "invlts",
-};
+static void
+pmap_epoch_init(void *arg __unused)
+{
+
+ pmap_epoch = epoch_alloc(EPOCH_PREEMPT|EPOCH_LOCKED);
+}
+SYSINIT(epoch, SI_SUB_TASKQ + 1, SI_ORDER_ANY, pmap_epoch_init, NULL);
static bool
pmap_not_in_di(void)
@@ -466,62 +464,24 @@
* pmap active.
*/
static void
-pmap_delayed_invl_started(void)
+pmap_delayed_invl_started(epoch_tracker_t et)
{
- struct pmap_invl_gen *invl_gen;
- u_long currgen;
- invl_gen = &curthread->td_md.md_invl_gen;
- PMAP_ASSERT_NOT_IN_DI();
- mtx_lock(&invl_gen_mtx);
- if (LIST_EMPTY(&pmap_invl_gen_tracker))
- currgen = pmap_invl_gen;
- else
- currgen = LIST_FIRST(&pmap_invl_gen_tracker)->gen;
- invl_gen->gen = currgen + 1;
- LIST_INSERT_HEAD(&pmap_invl_gen_tracker, invl_gen, link);
- mtx_unlock(&invl_gen_mtx);
+ epoch_enter_preempt(pmap_epoch, et);
+ curthread->td_md.md_invl_gen.gen = 1;
}
/*
- * Finish the DI block, previously started by the current thread. All
- * required TLB flushes for the pages marked by
- * pmap_delayed_invl_page() must be finished before this function is
- * called.
- *
- * This function works by bumping the global DI generation number to
- * the generation number of the current thread's DI, unless there is a
- * pending DI that started earlier. In the latter case, bumping the
- * global DI generation number would incorrectly signal that the
- * earlier DI had finished. Instead, this function bumps the earlier
- * DI's generation number to match the generation number of the
- * current thread's DI.
+ * This function works by checking that there are either no callers
+ * within a DI block or if there are that a grace period elapses for
+ * any callers in an epoch section when it is initially called.
*/
static void
-pmap_delayed_invl_finished(void)
-{
- struct pmap_invl_gen *invl_gen, *next;
- struct turnstile *ts;
-
- invl_gen = &curthread->td_md.md_invl_gen;
- KASSERT(invl_gen->gen != 0, ("missed invl_started"));
- mtx_lock(&invl_gen_mtx);
- next = LIST_NEXT(invl_gen, link);
- if (next == NULL) {
- turnstile_chain_lock(&invl_gen_ts);
- ts = turnstile_lookup(&invl_gen_ts);
- pmap_invl_gen = invl_gen->gen;
- if (ts != NULL) {
- turnstile_broadcast(ts, TS_SHARED_QUEUE);
- turnstile_unpend(ts);
- }
- turnstile_chain_unlock(&invl_gen_ts);
- } else {
- next->gen = invl_gen->gen;
- }
- LIST_REMOVE(invl_gen, link);
- mtx_unlock(&invl_gen_mtx);
- invl_gen->gen = 0;
+pmap_delayed_invl_finished(epoch_tracker_t et)
+{
+
+ curthread->td_md.md_invl_gen.gen = 0;
+ epoch_exit_preempt(pmap_epoch, et);
}
#ifdef PV_STATS
@@ -530,13 +490,6 @@
"Number of times DI invalidation blocked pmap_remove_all/write");
#endif
-static u_long *
-pmap_delayed_invl_genp(vm_page_t m)
-{
-
- return (&pv_invl_gen[pa_index(VM_PAGE_TO_PHYS(m)) % NPV_LIST_LOCKS]);
-}
-
/*
* Ensure that all currently executing DI blocks, that need to flush
* TLB for the given page m, actually flushed the TLB at the time the
@@ -554,51 +507,8 @@
static void
pmap_delayed_invl_wait(vm_page_t m)
{
- struct turnstile *ts;
- u_long *m_gen;
-#ifdef PV_STATS
- bool accounted = false;
-#endif
- m_gen = pmap_delayed_invl_genp(m);
- while (*m_gen > pmap_invl_gen) {
-#ifdef PV_STATS
- if (!accounted) {
- atomic_add_long(&invl_wait, 1);
- accounted = true;
- }
-#endif
- ts = turnstile_trywait(&invl_gen_ts);
- if (*m_gen > pmap_invl_gen)
- turnstile_wait(ts, NULL, TS_SHARED_QUEUE);
- else
- turnstile_cancel(ts);
- }
-}
-
-/*
- * Mark the page m's PV list as participating in the current thread's
- * DI block. Any threads concurrently using m's PV list to remove or
- * restrict all mappings to m will wait for the current thread's DI
- * block to complete before proceeding.
- *
- * The function works by setting the DI generation number for m's PV
- * list to at least the DI generation number of the current thread.
- * This forces a caller of pmap_delayed_invl_wait() to block until
- * current thread calls pmap_delayed_invl_finished().
- */
-static void
-pmap_delayed_invl_page(vm_page_t m)
-{
- u_long gen, *m_gen;
-
- rw_assert(VM_PAGE_TO_PV_LIST_LOCK(m), RA_WLOCKED);
- gen = curthread->td_md.md_invl_gen.gen;
- if (gen == 0)
- return;
- m_gen = pmap_delayed_invl_genp(m);
- if (*m_gen < gen)
- *m_gen = gen;
+ epoch_wait_preempt(pmap_epoch);
}
/*
@@ -1130,11 +1040,6 @@
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
kernel_pmap->pm_flags = pmap_flags;
- /*
- * Initialize the TLB invalidations generation number lock.
- */
- mtx_init(&invl_gen_mtx, "invlgn", NULL, MTX_DEF);
-
/*
* Reserve some special page table entries/VA space for temporary
* mapping of pages.
@@ -3160,7 +3065,8 @@
#endif
static void
-reclaim_pv_chunk_leave_pmap(pmap_t pmap, pmap_t locked_pmap, bool start_di)
+reclaim_pv_chunk_leave_pmap(pmap_t pmap, pmap_t locked_pmap, bool start_di,
+ epoch_tracker_t et)
{
if (pmap == NULL)
@@ -3169,7 +3075,7 @@
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
if (start_di)
- pmap_delayed_invl_finished();
+ pmap_delayed_invl_finished(et);
}
/*
@@ -3200,6 +3106,7 @@
uint64_t inuse;
int bit, field, freed;
bool start_di;
+ struct epoch_tracker et;
static int active_reclaims = 0;
PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED);
@@ -3245,20 +3152,20 @@
*/
if (pmap != next_pmap) {
reclaim_pv_chunk_leave_pmap(pmap, locked_pmap,
- start_di);
+ start_di, &et);
pmap = next_pmap;
/* Avoid deadlock and lock recursion. */
if (pmap > locked_pmap) {
RELEASE_PV_LIST_LOCK(lockp);
PMAP_LOCK(pmap);
if (start_di)
- pmap_delayed_invl_started();
+ pmap_delayed_invl_started(&et);
mtx_lock(&pv_chunks_mutex);
continue;
} else if (pmap != locked_pmap) {
if (PMAP_TRYLOCK(pmap)) {
if (start_di)
- pmap_delayed_invl_started();
+ pmap_delayed_invl_started(&et);
mtx_lock(&pv_chunks_mutex);
continue;
} else {
@@ -3271,7 +3178,7 @@
goto next_chunk;
}
} else if (start_di)
- pmap_delayed_invl_started();
+ pmap_delayed_invl_started(&et);
PG_G = pmap_global_bit(pmap);
PG_A = pmap_accessed_bit(pmap);
PG_M = pmap_modified_bit(pmap);
@@ -3313,7 +3220,6 @@
PGA_WRITEABLE);
}
}
- pmap_delayed_invl_page(m);
pc->pc_map[field] |= 1UL << bit;
pmap_unuse_pt(pmap, va, *pde, &free);
freed++;
@@ -3368,7 +3274,7 @@
TAILQ_REMOVE(&pv_chunks, pc_marker_end, pc_lru);
active_reclaims--;
mtx_unlock(&pv_chunks_mutex);
- reclaim_pv_chunk_leave_pmap(pmap, locked_pmap, start_di);
+ reclaim_pv_chunk_leave_pmap(pmap, locked_pmap, start_di, &et);
if (m_pc == NULL && !SLIST_EMPTY(&free)) {
m_pc = SLIST_FIRST(&free);
SLIST_REMOVE_HEAD(&free, plinks.s.ss);
@@ -4040,7 +3946,6 @@
if (TAILQ_EMPTY(&m->md.pv_list) &&
TAILQ_EMPTY(&pvh->pv_list))
vm_page_aflag_clear(m, PGA_WRITEABLE);
- pmap_delayed_invl_page(m);
}
}
if (pmap == kernel_pmap) {
@@ -4092,7 +3997,6 @@
if (TAILQ_EMPTY(&pvh->pv_list))
vm_page_aflag_clear(m, PGA_WRITEABLE);
}
- pmap_delayed_invl_page(m);
}
return (pmap_unuse_pt(pmap, va, ptepde, free));
}
@@ -4175,6 +4079,7 @@
pd_entry_t ptpaddr, *pde;
pt_entry_t PG_G, PG_V;
struct spglist free;
+ struct epoch_tracker et;
int anyvalid;
PG_G = pmap_global_bit(pmap);
@@ -4189,7 +4094,7 @@
anyvalid = 0;
SLIST_INIT(&free);
- pmap_delayed_invl_started();
+ pmap_delayed_invl_started(&et);
PMAP_LOCK(pmap);
/*
@@ -4285,7 +4190,7 @@
if (anyvalid)
pmap_invalidate_all(pmap);
PMAP_UNLOCK(pmap);
- pmap_delayed_invl_finished();
+ pmap_delayed_invl_finished(&et);
vm_page_free_pages_toq(&free, true);
}
@@ -4724,9 +4629,6 @@
* or lose information. That is, this routine must actually
* insert this page into the given map NOW.
*
- * When destroying both a page table and PV entry, this function
- * performs the TLB invalidation before releasing the PV list
- * lock, so we do not need pmap_delayed_invl_page() calls here.
*/
int
pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
@@ -5032,6 +4934,7 @@
pd_entry_t oldpde, *pde;
pt_entry_t PG_G, PG_RW, PG_V;
vm_page_t mt, pdpg;
+ struct epoch_tracker et;
PG_G = pmap_global_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
@@ -5071,11 +4974,11 @@
if ((oldpde & PG_G) == 0)
pmap_invalidate_pde_page(pmap, va, oldpde);
} else {
- pmap_delayed_invl_started();
+ pmap_delayed_invl_started(&et);
if (pmap_remove_ptes(pmap, va, va + NBPDR, pde, &free,
lockp))
pmap_invalidate_all(pmap);
- pmap_delayed_invl_finished();
+ pmap_delayed_invl_finished(&et);
}
vm_page_free_pages_toq(&free, true);
if (va >= VM_MAXUSER_ADDRESS) {
@@ -6589,6 +6492,7 @@
vm_offset_t va, va_next;
vm_page_t m;
boolean_t anychanged;
+ struct epoch_tracker et;
if (advice != MADV_DONTNEED && advice != MADV_FREE)
return;
@@ -6608,7 +6512,7 @@
PG_V = pmap_valid_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
anychanged = FALSE;
- pmap_delayed_invl_started();
+ pmap_delayed_invl_started(&et);
PMAP_LOCK(pmap);
for (; sva < eva; sva = va_next) {
pml4e = pmap_pml4e(pmap, sva);
@@ -6705,7 +6609,7 @@
if (anychanged)
pmap_invalidate_all(pmap);
PMAP_UNLOCK(pmap);
- pmap_delayed_invl_finished();
+ pmap_delayed_invl_finished(&et);
}
/*

File Metadata

Mime Type
text/plain
Expires
Fri, Mar 20, 4:32 AM (1 h, 36 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
29998567
Default Alt Text
D15983.diff (10 KB)

Event Timeline