Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F157859438
D19630.id55618.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
11 KB
Referenced Files
None
Subscribers
None
D19630.id55618.diff
View Options
Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -468,21 +468,45 @@
static LIST_HEAD(, pmap_invl_gen) pmap_invl_gen_tracker =
LIST_HEAD_INITIALIZER(&pmap_invl_gen_tracker);
static struct mtx invl_gen_mtx;
-static u_long pmap_invl_gen = 0;
/* Fake lock object to satisfy turnstiles interface. */
static struct lock_object invl_gen_ts = {
.lo_name = "invlts",
};
+struct pmap_invl_gen pmap_invl_gen_head = {
+ .gen = 1,
+ .next = NULL,
+};
+static u_long pmap_invl_gen = 1;
+
+#define PMAP_ASSERT_NOT_IN_DI() \
+ KASSERT(pmap_not_in_di(), ("DI already started"))
+
+static bool pmap_not_in_di_l(void);
+static bool pmap_not_in_di_u(void);
+DEFINE_IFUNC(, bool, pmap_not_in_di, (void), static)
+{
+
+ return ((cpu_feature & CPUID_CX8) == 0 ? pmap_not_in_di_l :
+ pmap_not_in_di_u);
+}
static bool
-pmap_not_in_di(void)
+pmap_not_in_di_l(void)
{
+ struct pmap_invl_gen *invl_gen;
- return (curthread->td_md.md_invl_gen.gen == 0);
+ invl_gen = &curthread->td_md.md_invl_gen;
+ return (invl_gen->gen == 0);
}
-#define PMAP_ASSERT_NOT_IN_DI() \
- KASSERT(pmap_not_in_di(), ("DI already started"))
+static void
+pmap_thread_init_invl_gen_l(struct thread *td)
+{
+ struct pmap_invl_gen *invl_gen;
+
+ invl_gen = &td->td_md.md_invl_gen;
+ invl_gen->gen = 0;
+}
/*
* Start a new Delayed Invalidation (DI) block of code, executed by
@@ -493,7 +517,7 @@
* pmap active.
*/
static void
-pmap_delayed_invl_started(void)
+pmap_delayed_invl_started_l(void)
{
struct pmap_invl_gen *invl_gen;
u_long currgen;
@@ -525,7 +549,7 @@
* current thread's DI.
*/
static void
-pmap_delayed_invl_finished(void)
+pmap_delayed_invl_finished_l(void)
{
struct pmap_invl_gen *invl_gen, *next;
struct turnstile *ts;
@@ -551,6 +575,180 @@
invl_gen->gen = 0;
}
+static bool
+pmap_not_in_di_u(void)
+{
+ struct pmap_invl_gen *invl_gen;
+
+ invl_gen = &curthread->td_md.md_invl_gen;
+ return (((uintptr_t)invl_gen->next & PMAP_INVL_GEN_NEXT_INVALID) != 0);
+}
+
+static void
+pmap_thread_init_invl_gen_u(struct thread *td)
+{
+ struct pmap_invl_gen *invl_gen;
+
+ invl_gen = &td->td_md.md_invl_gen;
+ invl_gen->gen = 0;
+ invl_gen->next = (void *)PMAP_INVL_GEN_NEXT_INVALID;
+}
+
+static bool
+pmap_di_load_invl(struct pmap_invl_gen *ptr, struct pmap_invl_gen *out)
+{
+ uint64_t new_high, new_low, old_high, old_low;
+ char res;
+
+ old_low = new_low = 0;
+ old_high = new_high = (uintptr_t)0;
+
+ __asm volatile("lock;cmpxchg16b\t%1;sete\t%0"
+ : "=r" (res), "+m" (*ptr), "+a" (old_low), "+d" (old_high)
+ : "b"(new_low), "c" (new_high)
+ : "memory", "cc");
+ if (res == 0) {
+ if ((old_high & PMAP_INVL_GEN_NEXT_INVALID) != 0)
+ return (false);
+ out->gen = old_low;
+ out->next = (void *)old_high;
+ } else {
+ out->gen = new_low;
+ out->next = (void *)new_high;
+ }
+ return (true);
+}
+
+static bool
+pmap_di_store_invl(struct pmap_invl_gen *ptr, struct pmap_invl_gen *old_val,
+ struct pmap_invl_gen *new_val)
+{
+
+ uint64_t new_high, new_low, old_high, old_low;
+ char res;
+
+ new_low = new_val->gen;
+ new_high = (uintptr_t)new_val->next;
+ old_low = old_val->gen;
+ old_high = (uintptr_t)old_val->next;
+
+ __asm volatile("lock;cmpxchg16b\t%1;sete\t%0"
+ : "=r" (res), "+m" (*ptr), "+a" (old_low), "+d" (old_high)
+ : "b"(new_low), "c" (new_high)
+ : "memory", "cc");
+ return (res);
+}
+
+#ifdef PV_STATS
+static long invl_start_restart;
+SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_start_restart, CTLFLAG_RD,
+ &invl_start_restart, 0,
+ "");
+static long invl_finish_restart;
+SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_finish_restart, CTLFLAG_RD,
+ &invl_finish_restart, 0,
+ "");
+static int invl_max_qlen;
+SYSCTL_INT(_vm_pmap, OID_AUTO, invl_max_qlen, CTLFLAG_RD,
+ &invl_max_qlen, 0,
+ "");
+#endif
+
+static void
+pmap_delayed_invl_started_u(void)
+{
+ struct pmap_invl_gen *invl_gen, *p, prev, new_prev;
+#ifdef PV_STAT
+ int i, ii;
+#endif
+
+ invl_gen = &curthread->td_md.md_invl_gen;
+ PMAP_ASSERT_NOT_IN_DI();
+
+again:
+ PV_STAT(i = 0);
+ for (p = &pmap_invl_gen_head;; p = prev.next) {
+ PV_STAT(i++);
+ if (!pmap_di_load_invl(p, &prev)) {
+ PV_STAT(atomic_add_long(&invl_start_restart, 1));
+ goto again;
+ }
+ if (prev.next == NULL)
+ break;
+ }
+#ifdef PV_STAT
+ if ((ii = invl_max_qlen) < i)
+ atomic_cmpset_int(&invl_max_qlen, ii, i);
+#endif
+
+ new_prev.gen = prev.gen;
+ new_prev.next = invl_gen;
+ invl_gen->gen = prev.gen + 1;
+
+ /*
+ * ABA for *p is not a problem there, since p->gen can only
+ * increase. So if the *p thread finished its di, then
+ * started a new one and got inserted into the list at the
+ * same place, its gen is still greater than currgen.
+ */
+ if (!pmap_di_store_invl(p, &prev, &new_prev)) {
+ PV_STAT(atomic_add_long(&invl_start_restart, 1));
+ goto again;
+ }
+
+ /*
+ * There we clear PMAP_INVL_GEN_NEXT_INVALID in
+ * invl_gen->next, allowing other threads to iterate past us.
+ * pmap_di_store_invl() provides fence between generation
+ * wrote and update of next.
+ */
+ invl_gen->next = NULL;
+}
+
+static void
+pmap_delayed_invl_finished_u(void)
+{
+ struct pmap_invl_gen *invl_gen, *p, prev, new_prev;
+ u_long mygen;
+
+ invl_gen = &curthread->td_md.md_invl_gen;
+ KASSERT(invl_gen->gen != 0, ("missed invl_start: gen 0"));
+ KASSERT(((uintptr_t)invl_gen->next & PMAP_INVL_GEN_NEXT_INVALID) == 0,
+ ("missed invl_start: INVALID"));
+
+ atomic_set_ptr((uintptr_t *)&invl_gen->next,
+ PMAP_INVL_GEN_NEXT_INVALID);
+
+ /*
+ * Load invl_gen->gen after setting invl_gen->next
+ * PMAP_INVL_GEN_NEXT_INVALID. This prevents larger
+ * generations to propagate to our invl_gen->gen. Lock prefix
+ * in atomic_set_ptr() works as seq_cst fence.
+ */
+ mygen = atomic_load_long(&invl_gen->gen);
+
+again:
+ for (p = &pmap_invl_gen_head; p != NULL; p = prev.next) {
+ if (!pmap_di_load_invl(p, &prev)) {
+ PV_STAT(atomic_add_long(&invl_finish_restart, 1));
+ goto again;
+ }
+ if (prev.next == invl_gen)
+ break;
+ }
+ KASSERT(p != NULL, ("not found myself on the di list"));
+ KASSERT(prev.gen < mygen,
+ ("invalid di gen sequence %lu %lu", prev.gen, mygen));
+ new_prev.gen = mygen;
+ new_prev.next = (void *)((uintptr_t)invl_gen->next &
+ ~PMAP_INVL_GEN_NEXT_INVALID);
+ atomic_thread_fence_rel();
+ if (!pmap_di_store_invl(p, &prev, &new_prev)) {
+ PV_STAT(atomic_add_long(&invl_finish_restart, 1));
+ goto again;
+ }
+}
+
#ifdef PV_STATS
static long invl_wait;
SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait, CTLFLAG_RD, &invl_wait, 0,
@@ -579,7 +777,7 @@
* processor.
*/
static void
-pmap_delayed_invl_wait(vm_page_t m)
+pmap_delayed_invl_wait_l(vm_page_t m)
{
struct turnstile *ts;
u_long *m_gen;
@@ -603,6 +801,54 @@
}
}
+static void
+pmap_delayed_invl_wait_u(vm_page_t m)
+{
+ u_long *m_gen;
+#ifdef PV_STATS
+ bool accounted = false;
+#endif
+
+ m_gen = pmap_delayed_invl_genp(m);
+ while (*m_gen > atomic_load_long(&pmap_invl_gen_head.gen)) {
+#ifdef PV_STATS
+ if (!accounted) {
+ atomic_add_long(&invl_wait, 1);
+ accounted = true;
+ }
+#endif
+ kern_yield(PRI_USER);
+ }
+}
+
+DEFINE_IFUNC(, void, pmap_thread_init_invl_gen, (struct thread *), static)
+{
+
+ return ((cpu_feature & CPUID_CX8) == 0 ? pmap_thread_init_invl_gen_l :
+ pmap_thread_init_invl_gen_u);
+}
+
+DEFINE_IFUNC(static, void, pmap_delayed_invl_started, (void), static)
+{
+
+ return ((cpu_feature & CPUID_CX8) == 0 ? pmap_delayed_invl_started_l :
+ pmap_delayed_invl_started_u);
+}
+
+DEFINE_IFUNC(static, void, pmap_delayed_invl_finished, (void), static)
+{
+
+ return ((cpu_feature & CPUID_CX8) == 0 ? pmap_delayed_invl_finished_l :
+ pmap_delayed_invl_finished_u);
+}
+
+DEFINE_IFUNC(static, void, pmap_delayed_invl_wait, (vm_page_t), static)
+{
+
+ return ((cpu_feature & CPUID_CX8) == 0 ? pmap_delayed_invl_wait_l :
+ pmap_delayed_invl_wait_u);
+}
+
/*
* Mark the page m's PV list as participating in the current thread's
* DI block. Any threads concurrently using m's PV list to remove or
@@ -2854,6 +3100,7 @@
pmap_pinit0(pmap_t pmap)
{
struct proc *p;
+ struct thread *td;
int i;
PMAP_LOCK_INIT(pmap);
@@ -2872,12 +3119,14 @@
pmap->pm_pcids[i].pm_gen = 1;
}
pmap_activate_boot(pmap);
+ td = curthread;
if (pti) {
- p = curproc;
+ p = td->td_proc;
PROC_LOCK(p);
p->p_md.md_flags |= P_MD_KPTI;
PROC_UNLOCK(p);
}
+ pmap_thread_init_invl_gen(td);
if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) {
pmap_pkru_ranges_zone = uma_zcreate("pkru ranges",
Index: sys/amd64/amd64/trap.c
===================================================================
--- sys/amd64/amd64/trap.c
+++ sys/amd64/amd64/trap.c
@@ -1183,7 +1183,7 @@
KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td),
("System call %s returning with mangled pcb_save",
syscallname(td->td_proc, td->td_sa.code)));
- KASSERT(td->td_md.md_invl_gen.gen == 0,
+ KASSERT(pmap_not_in_di(),
("System call %s returning with leaked invl_gen %lu",
syscallname(td->td_proc, td->td_sa.code),
td->td_md.md_invl_gen.gen));
Index: sys/amd64/amd64/vm_machdep.c
===================================================================
--- sys/amd64/amd64/vm_machdep.c
+++ sys/amd64/amd64/vm_machdep.c
@@ -228,7 +228,7 @@
/* Setup to release spin count in fork_exit(). */
td2->td_md.md_spinlock_count = 1;
td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
- td2->td_md.md_invl_gen.gen = 0;
+ pmap_thread_init_invl_gen(td2);
/* As an i386, do not copy io permission bitmap. */
pcb2->pcb_tssp = NULL;
@@ -544,6 +544,7 @@
/* Setup to release spin count in fork_exit(). */
td->td_md.md_spinlock_count = 1;
td->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
+ pmap_thread_init_invl_gen(td);
}
/*
Index: sys/amd64/conf/X
===================================================================
--- sys/amd64/conf/X
+++ sys/amd64/conf/X
@@ -86,3 +86,4 @@
device uart
device random
+options PV_STATS
Index: sys/amd64/include/pmap.h
===================================================================
--- sys/amd64/include/pmap.h
+++ sys/amd64/include/pmap.h
@@ -441,6 +441,7 @@
void *pmap_mapdev(vm_paddr_t, vm_size_t);
void *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int);
void *pmap_mapdev_pciecfg(vm_paddr_t pa, vm_size_t size);
+bool pmap_not_in_di(void);
boolean_t pmap_page_is_mapped(vm_page_t m);
void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma);
void pmap_pinit_pml4(vm_page_t);
@@ -465,6 +466,7 @@
int pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
int pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
u_int keyidx, int flags);
+void pmap_thread_init_invl_gen(struct thread *td);
int pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap);
#endif /* _KERNEL */
Index: sys/amd64/include/proc.h
===================================================================
--- sys/amd64/include/proc.h
+++ sys/amd64/include/proc.h
@@ -50,10 +50,14 @@
int ldt_refcnt;
};
+#define PMAP_INVL_GEN_NEXT_INVALID 0x1ULL
struct pmap_invl_gen {
u_long gen; /* (k) */
- LIST_ENTRY(pmap_invl_gen) link; /* (pp) */
-};
+ union {
+ LIST_ENTRY(pmap_invl_gen) link; /* (pp) */
+ struct pmap_invl_gen *next;
+ };
+} __aligned(16);
/*
* Machine-dependent part of the proc structure for AMD64.
Index: sys/kern/kern_thread.c
===================================================================
--- sys/kern/kern_thread.c
+++ sys/kern/kern_thread.c
@@ -84,7 +84,7 @@
"struct thread KBI td_pflags");
_Static_assert(offsetof(struct thread, td_frame) == 0x478,
"struct thread KBI td_frame");
-_Static_assert(offsetof(struct thread, td_emuldata) == 0x530,
+_Static_assert(offsetof(struct thread, td_emuldata) == 0x548,
"struct thread KBI td_emuldata");
_Static_assert(offsetof(struct proc, p_flag) == 0xb0,
"struct proc KBI p_flag");
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, May 26, 9:49 PM (12 h, 43 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33539237
Default Alt Text
D19630.id55618.diff (11 KB)
Attached To
Mode
D19630: amd64 pmap: rework di removing global mutex
Attached
Detach File
Event Timeline
Log In to Comment