Page MenuHomeFreeBSD

D32569.id97210.diff
No OneTemporary

D32569.id97210.diff

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -1495,6 +1495,15 @@
pmap->pm_stats.resident_count += count;
}
+static __inline void
+pmap_pt_page_count_pinit(pmap_t pmap, int count)
+{
+ KASSERT(pmap->pm_stats.resident_count + count >= 0,
+ ("pmap %p resident count underflow %ld %d", pmap,
+ pmap->pm_stats.resident_count, count));
+ pmap->pm_stats.resident_count += count;
+}
+
static __inline void
pmap_pt_page_count_adj(pmap_t pmap, int count)
{
@@ -4344,13 +4353,24 @@
vm_paddr_t pmltop_phys;
int i;
+ bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
+
/*
- * Allocate the page directory page. Pass NULL instead of a pointer to
- * the pmap here to avoid recording this page in the resident count, as
- * optimizations in pmap_remove() depend on this.
+ * Allocate the page directory page. Pass NULL instead of a
+ * pointer to the pmap here to avoid calling
+ * pmap_resident_count_adj() through pmap_pt_page_count_adj(),
+ * since that requires pmap lock. Instead do the accounting
+ * manually.
+ *
+ * Note that final call to pmap_remove() optimization that
+ * checks for zero resident_count is basically disabled by
+ * accounting for top-level page. But the optimization was
+ * not effective since we started using non-managed mapping of
+ * the shared page.
*/
pmltop_pg = pmap_alloc_pt_page(NULL, 0, VM_ALLOC_WIRED | VM_ALLOC_ZERO |
VM_ALLOC_WAITOK);
+ pmap_pt_page_count_pinit(pmap, 1);
pmltop_phys = VM_PAGE_TO_PHYS(pmltop_pg);
pmap->pm_pmltop = (pml5_entry_t *)PHYS_TO_DMAP(pmltop_phys);
@@ -4380,11 +4400,13 @@
pmap_pinit_pml4(pmltop_pg);
if ((curproc->p_md.md_flags & P_MD_KPTI) != 0) {
/*
- * As with pmltop_pg, pass NULL instead of a pointer to
- * the pmap to ensure that the PTI page isn't counted.
+ * As with pmltop_pg, pass NULL instead of a
+ * pointer to the pmap to ensure that the PTI
+ * page counted explicitly.
*/
pmltop_pgu = pmap_alloc_pt_page(NULL, 0,
VM_ALLOC_WIRED | VM_ALLOC_WAITOK);
+ pmap_pt_page_count_pinit(pmap, 1);
pmap->pm_pmltopu = (pml4_entry_t *)PHYS_TO_DMAP(
VM_PAGE_TO_PHYS(pmltop_pgu));
if (pmap_is_la57(pmap))
@@ -4407,7 +4429,6 @@
vm_radix_init(&pmap->pm_root);
CPU_ZERO(&pmap->pm_active);
TAILQ_INIT(&pmap->pm_pvchunk);
- bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
pmap->pm_flags = flags;
pmap->pm_eptgen = 0;
@@ -4799,9 +4820,6 @@
vm_page_t m;
int i;
- KASSERT(pmap->pm_stats.resident_count == 0,
- ("pmap_release: pmap %p resident count %ld != 0",
- pmap, pmap->pm_stats.resident_count));
KASSERT(vm_radix_is_empty(&pmap->pm_root),
("pmap_release: pmap %p has reserved page table page(s)",
pmap));
@@ -4834,15 +4852,21 @@
}
pmap_free_pt_page(NULL, m, true);
+ pmap_pt_page_count_pinit(pmap, -1);
if (pmap->pm_pmltopu != NULL) {
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->
pm_pmltopu));
pmap_free_pt_page(NULL, m, false);
+ pmap_pt_page_count_pinit(pmap, -1);
}
if (pmap->pm_type == PT_X86 &&
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0)
rangeset_fini(&pmap->pm_pkru);
+
+ KASSERT(pmap->pm_stats.resident_count == 0,
+ ("pmap_release: pmap %p resident count %ld != 0",
+ pmap, pmap->pm_stats.resident_count));
}
static int
@@ -6254,9 +6278,14 @@
PG_V = pmap_valid_bit(pmap);
/*
+ * If there are no resident pages besides the top level page
+ * table page(s), there is nothing to do. Kernel pmap always
+ * accounts whole preloaded area as resident, which makes its
+ * resident count > 2.
* Perform an unsynchronized read. This is, however, safe.
*/
- if (pmap->pm_stats.resident_count == 0)
+ if (pmap->pm_stats.resident_count <= 1 +
+ (int)(pmap->pm_pmltopu != NULL))
return;
anyvalid = 0;
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1070,6 +1070,30 @@
umtx_exec(td->td_proc);
}
+/*
+ * This is optional optimization that removes unmanaged shared page
+ * mapping. In combination with pmap_remove_pages() that cleans all
+ * managed mappings in the process' vmspace pmap, it should left no
+ * work for pmap_remove(min, max).
+ */
+void
+exec_free_abi_mappings(struct proc *p)
+{
+ struct vmspace *vmspace;
+ struct sysentvec *sv;
+
+ vmspace = p->p_vmspace;
+ if (refcount_load(&vmspace->vm_refcnt) != 1)
+ return;
+
+ sv = p->p_sysent;
+ if (sv->sv_shared_page_obj == NULL)
+ return;
+
+ pmap_remove(vmspace_pmap(vmspace), sv->sv_shared_page_base,
+ sv->sv_shared_page_base + sv->sv_shared_page_len);
+}
+
/*
* Destroy old address space, and allocate a new stack.
* The new stack is only sgrowsiz large because it is grown
@@ -1112,6 +1136,7 @@
vm_map_min(map) == sv_minuser &&
vm_map_max(map) == sv->sv_maxuser &&
cpu_exec_vmspace_reuse(p, map)) {
+ exec_free_abi_mappings(p);
shmexit(vmspace);
pmap_remove_pages(vmspace_pmap(vmspace));
vm_map_remove(map, vm_map_min(map), vm_map_max(map));
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -417,6 +417,7 @@
mtx_unlock(&ppeers_lock);
}
+ exec_free_abi_mappings(p);
vmspace_exit(td);
(void)acct_process(td);
diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h
--- a/sys/sys/sysent.h
+++ b/sys/sys/sysent.h
@@ -324,6 +324,7 @@
void exec_inittk(void);
void exit_onexit(struct proc *p);
+void exec_free_abi_mappings(struct proc *p);
void exec_onexec_old(struct thread *td);
#define INIT_SYSENTVEC(name, sv) \

File Metadata

Mime Type
text/plain
Expires
Tue, May 19, 12:17 AM (8 h, 31 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33279034
Default Alt Text
D32569.id97210.diff (5 KB)

Event Timeline