Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F133251759
D17070.id48452.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
23 KB
Referenced Files
None
Subscribers
None
D17070.id48452.diff
View Options
Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -143,6 +143,7 @@
#include <vm/vm_radix.h>
#include <vm/vm_reserv.h>
#include <vm/uma.h>
+#include <sys/_vmem.h>
#include <machine/intr_machdep.h>
#include <x86/apicvar.h>
@@ -409,6 +410,10 @@
static int pmap_flags = PMAP_PDE_SUPERPAGE; /* flags for x86 pmaps */
+static vmem_t large_vmem;
+static struct mtx lm_lock;
+static u_int lm_ents;
+
int pmap_pcid_enabled = 1;
SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
&pmap_pcid_enabled, 0, "Is TLB Context ID enabled ?");
@@ -655,6 +660,7 @@
static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
pd_entry_t pde);
static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
+static vm_page_t pmap_large_map_getpage_unlocked(void);
static void pmap_pde_attr(pd_entry_t *pde, int cache_bits, int mask);
#if VM_NRESERVLEVEL > 0
static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
@@ -1310,7 +1316,7 @@
pmap_init(void)
{
struct pmap_preinit_mapping *ppim;
- vm_page_t mpte;
+ vm_page_t m, mpte;
vm_size_t s;
int error, i, pv_npg, ret, skz63;
@@ -1437,6 +1443,27 @@
(vmem_addr_t *)&qframe);
if (error != 0)
panic("qframe allocation failed");
+
+ lm_ents = 8;
+ TUNABLE_INT_FETCH("vm.pmap.large_map_pml4_entries", &lm_ents);
+ if (lm_ents > LMEPML4I - LMSPML4I + 1)
+ lm_ents = LMEPML4I - LMSPML4I + 1;
+ if (bootverbose)
+ printf("pmap: large map %u PML4 slots (%lu Gb)\n",
+ lm_ents, (u_long)lm_ents * (NBPML4 / 1024 / 1024 / 1024));
+ if (lm_ents != 0 && vmem_init(&large_vmem, "large",
+ LARGEMAP_MIN_ADDRESS, (vmem_size_t)lm_ents * NBPML4, PAGE_SIZE, 0,
+ M_WAITOK) == NULL) {
+ printf("pmap: cannot create large map, %d PML4 slots\n",
+ lm_ents);
+ lm_ents = 0;
+ }
+ for (i = 0; i < lm_ents; i++) {
+ m = pmap_large_map_getpage_unlocked();
+ kernel_pmap->pm_pml4[LMSPML4I + i] = X86_PG_V | X86_PG_RW |
+ X86_PG_A | X86_PG_M | pg_nx | VM_PAGE_TO_PHYS(m);
+ }
+ mtx_init(&lm_lock, "lm", NULL, MTX_DEF);
}
static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
@@ -2312,14 +2339,6 @@
{
sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1);
- if (eva - sva >= PMAP_CLFLUSH_THRESHOLD) {
- /*
- * The supplied range is bigger than 2MB.
- * Globally invalidate cache.
- */
- pmap_invalidate_cache();
- return;
- }
/*
* XXX: Some CPUs fault, hang, or trash the local APIC
@@ -2403,6 +2422,66 @@
}
}
+void
+pmap_flush_cache_range(vm_offset_t sva, vm_offset_t eva)
+{
+
+ pmap_invalidate_cache_range_check_align(sva, eva);
+
+ if ((cpu_stdext_feature & CPUID_STDEXT_CLWB) == 0) {
+ pmap_force_invalidate_cache_range(sva, eva);
+ return;
+ }
+
+ if (pmap_kextract(sva) == lapic_paddr)
+ return;
+
+ sfence();
+ for (; sva < eva; sva += cpu_clflush_line_size) {
+ clwb(sva);
+ maybe_yield();
+ }
+ sfence();
+}
+
+void
+pmap_flush_cache_phys_range(vm_paddr_t spa, vm_paddr_t epa, vm_memattr_t mattr)
+{
+ pt_entry_t *pte;
+ vm_offset_t vaddr;
+ int error, pte_bits;
+
+ KASSERT((spa & PAGE_MASK) == 0,
+ ("pmap_flush_cache_phys_range: spa not page-aligned"));
+ KASSERT((epa & PAGE_MASK) == 0,
+ ("pmap_flush_cache_phys_range: epa not page-aligned"));
+
+ if (spa < dmaplimit) {
+ pmap_flush_cache_range(PHYS_TO_DMAP(spa), PHYS_TO_DMAP(spa) +
+ (dmaplimit > epa ? dmaplimit : epa));
+ if (dmaplimit > epa)
+ return;
+ spa = dmaplimit;
+ }
+
+ pte_bits = pmap_cache_bits(kernel_pmap, mattr, 0) | X86_PG_RW |
+ X86_PG_V;
+ error = vmem_alloc(kernel_arena, PAGE_SIZE, M_BESTFIT | M_WAITOK,
+ &vaddr);
+ KASSERT(error == 0, ("vmem_alloc failed: %d", error));
+ pte = vtopte(vaddr);
+ for (; spa < epa; spa += PAGE_SIZE) {
+ sched_pin();
+ pte_store(pte, spa | pte_bits);
+ invlpg(vaddr);
+ /* XXXKIB sfences inside flush_cache_range are excessive */
+ pmap_flush_cache_range(vaddr, vaddr + PAGE_SIZE);
+ sched_unpin();
+ maybe_yield();
+ }
+ vmem_free(kernel_arena, vaddr, PAGE_SIZE);
+}
+
/*
* Routine: pmap_extract
* Function:
@@ -2809,6 +2888,10 @@
/* install self-referential address mapping entry(s) */
pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | X86_PG_V | X86_PG_RW |
X86_PG_A | X86_PG_M;
+
+ /* install large map entries if configured */
+ for (i = 0; i < lm_ents; i++)
+ pm_pml4[LMSPML4I + i] = kernel_pmap->pm_pml4[LMSPML4I + i];
}
static void
@@ -3155,6 +3238,8 @@
for (i = 0; i < ndmpdpphys; i++)/* Direct Map */
pmap->pm_pml4[DMPML4I + i] = 0;
pmap->pm_pml4[PML4PML4I] = 0; /* Recursive Mapping */
+ for (i = 0; i < lm_ents; i++) /* Large Map */
+ pmap->pm_pml4[LMSPML4I + i] = 0;
vm_page_unwire_noq(m);
vm_page_free_zero(m);
@@ -8098,6 +8183,406 @@
mtx_unlock_spin(&qframe_mtx);
}
+static vm_page_t
+pmap_large_map_getpage_unlocked(void)
+{
+ vm_page_t m;
+
+ m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
+ VM_ALLOC_NOBUSY | VM_ALLOC_ZERO);
+ if (m != NULL && (m->flags & PG_ZERO) == 0)
+ pmap_zero_page(m);
+ return (m);
+}
+
+static vm_page_t
+pmap_large_map_getpage(void)
+{
+ vm_page_t m;
+
+ mtx_assert(&lm_lock, MA_OWNED);
+ for (;;) {
+ m = pmap_large_map_getpage_unlocked();
+ if (m != NULL)
+ return (m);
+ mtx_unlock(&lm_lock);
+ vm_wait(NULL);
+ mtx_lock(&lm_lock);
+ }
+}
+
+static pdp_entry_t *
+pmap_large_map_pdpe(vm_offset_t va)
+{
+ pdp_entry_t *pdpe;
+ vm_pindex_t pml4_idx;
+ vm_paddr_t mphys;
+
+ pml4_idx = pmap_pml4e_index(va);
+ KASSERT(LMSPML4I <= pml4_idx && pml4_idx < LMSPML4I + lm_ents,
+ ("pmap_large_map_pdpe: va %#jx out of range idx %#jx LMSPML4I "
+ "%#jx lm_ents %d",
+ (uintmax_t)va, (uintmax_t)pml4_idx, LMSPML4I, lm_ents));
+ KASSERT((kernel_pmap->pm_pml4[pml4_idx] & X86_PG_V) != 0,
+ ("pmap_large_map_pdpe: invalid pml4 for va %#jx idx %#jx "
+ "LMSPML4I %#jx lm_ents %d",
+ (uintmax_t)va, (uintmax_t)pml4_idx, LMSPML4I, lm_ents));
+ mphys = kernel_pmap->pm_pml4[pml4_idx] & ~(PAGE_MASK | pg_nx);
+ pdpe = (pdp_entry_t *)PHYS_TO_DMAP(mphys) + pmap_pdpe_index(va);
+ return (pdpe);
+}
+
+static pd_entry_t *
+pmap_large_map_pde(vm_offset_t va)
+{
+ pdp_entry_t *pdpe;
+ pd_entry_t *pde;
+ vm_page_t m;
+ vm_pindex_t pd_idx;
+ vm_paddr_t mphys;
+
+ pdpe = pmap_large_map_pdpe(va);
+ if (*pdpe == 0) {
+ m = pmap_large_map_getpage();
+ mphys = VM_PAGE_TO_PHYS(m);
+ *pdpe = mphys | X86_PG_RW | X86_PG_V | X86_PG_G | pg_nx;
+ } else {
+ MPASS((*pdpe & X86_PG_PS) == 0);
+ mphys = *pdpe & ~(PAGE_MASK | pg_nx);
+ }
+
+ pde = (pd_entry_t *)PHYS_TO_DMAP(mphys);
+ pd_idx = pmap_pde_index(va);
+ pde += pd_idx;
+ return (pde);
+}
+
+static pt_entry_t *
+pmap_large_map_pte(vm_offset_t va)
+{
+ pd_entry_t *pde;
+ pt_entry_t *pte;
+ vm_page_t m;
+ vm_paddr_t mphys;
+
+ pde = pmap_large_map_pde(va);
+ if (*pde == 0) {
+ m = pmap_large_map_getpage();
+ mphys = VM_PAGE_TO_PHYS(m);
+ *pde = mphys | X86_PG_RW | X86_PG_V | X86_PG_G | pg_nx;
+ PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pde))->wire_count++;
+ } else {
+ MPASS((*pde & X86_PG_PS) == 0);
+ mphys = *pde & ~(PAGE_MASK | pg_nx);
+ }
+
+ pte = (pt_entry_t *)PHYS_TO_DMAP(mphys);
+ pte += pmap_pte_index(va);
+
+ return (pte);
+}
+
+static int
+pmap_large_map_getva(vm_size_t len, vm_offset_t align, vm_offset_t phase,
+ vmem_addr_t *vmem_res)
+{
+
+ return (vmem_xalloc(&large_vmem, len, align, phase, 0, VMEM_ADDR_MIN,
+ VMEM_ADDR_MAX, M_NOWAIT | M_FIRSTFIT, vmem_res));
+}
+
+int
+pmap_large_map(vm_paddr_t spa, vm_size_t len, void **addr,
+ vm_memattr_t mattr)
+{
+ pdp_entry_t *pdpe;
+ pd_entry_t *pde;
+ pt_entry_t *pte;
+ vm_offset_t va, inc;
+ vmem_addr_t vmem_res;
+ vm_paddr_t pa;
+ int error;
+
+ if (len == 0 || spa + len < spa)
+ return (EINVAL);
+
+ /* See if DMAP can serve. */
+ if (spa + len <= dmaplimit) {
+ va = PHYS_TO_DMAP(spa);
+ pmap_change_attr(va, len, mattr);
+ *addr = (void *)va;
+ return (0);
+ }
+
+ /*
+ * No, allocate KVA. Fit the address with best possible
+ * alignment for superpages. Fall back to worse align if
+ * failed.
+ */
+ error = ENOMEM;
+ if ((amd_feature & AMDID_PAGE1GB) != 0 && rounddown2(spa + len,
+ NBPDP) >= roundup2(spa, NBPDP) + NBPDP)
+ error = pmap_large_map_getva(len, NBPDP, spa & PDPMASK,
+ &vmem_res);
+ if (error != 0 && rounddown2(spa + len, NBPDR) >= roundup2(spa,
+ NBPDR) + NBPDR)
+ error = pmap_large_map_getva(len, NBPDR, spa & PDRMASK,
+ &vmem_res);
+ if (error != 0)
+ error = pmap_large_map_getva(len, PAGE_SIZE, 0, &vmem_res);
+ if (error != 0)
+ return (error);
+
+ /*
+ * Fill pagetable. PG_M is not pre-set, we scan modified bits
+ * in the pagetable to minimize flushing. No need to
+ * invalidate TLB, since we only update invalid entries.
+ */
+ mtx_lock(&lm_lock);
+ for (pa = spa, va = vmem_res; len > 0; pa += inc, va += inc,
+ len -= inc) {
+ if ((amd_feature & AMDID_PAGE1GB) != 0 && len >= NBPDP &&
+ (pa & PDPMASK) == 0 && (va & PDPMASK) == 0) {
+ pdpe = pmap_large_map_pdpe(va);
+ MPASS(*pdpe == 0);
+ *pdpe = pa | pg_g | X86_PG_PS | X86_PG_RW |
+ X86_PG_V | X86_PG_A | pg_nx |
+ pmap_cache_bits(kernel_pmap, mattr, TRUE);
+ inc = NBPDP;
+ } else if (len >= NBPDR && (pa & PDRMASK) == 0 &&
+ (va & PDRMASK) == 0) {
+ pde = pmap_large_map_pde(va);
+ MPASS(*pde == 0);
+ *pde = pa | pg_g | X86_PG_PS | X86_PG_RW |
+ X86_PG_V | X86_PG_A | pg_nx |
+ pmap_cache_bits(kernel_pmap, mattr, TRUE);
+ PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pde))->
+ wire_count++;
+ inc = NBPDR;
+ } else {
+ pte = pmap_large_map_pte(va);
+ MPASS(*pte == 0);
+ *pte = pa | pg_g | X86_PG_RW | X86_PG_V |
+ X86_PG_A | pg_nx | pmap_cache_bits(kernel_pmap,
+ mattr, FALSE);
+ PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pte))->
+ wire_count++;
+ inc = PAGE_SIZE;
+ }
+ }
+ mtx_unlock(&lm_lock);
+ MPASS(len == 0);
+
+ *addr = (void *)vmem_res;
+ return (0);
+}
+
+void
+pmap_large_unmap(void *svaa, vm_size_t len)
+{
+ vm_offset_t sva, va;
+ vm_size_t inc;
+ pdp_entry_t *pdpe, pdp;
+ pd_entry_t *pde, pd;
+ pt_entry_t *pte;
+ vm_page_t m;
+ struct spglist spgf;
+
+ sva = (vm_offset_t)svaa;
+ if (len == 0 || sva + len < sva || (sva >= DMAP_MIN_ADDRESS &&
+ sva < DMAP_MIN_ADDRESS + dmaplimit))
+ return;
+
+ SLIST_INIT(&spgf);
+ KASSERT(LARGEMAP_MIN_ADDRESS <= sva &&
+ sva < LARGEMAP_MAX_ADDRESS + NBPML4 * (u_long)lm_ents,
+ ("not largemap range %#lx %#lx", (u_long)svaa, (u_long)svaa + len));
+ mtx_lock(&lm_lock);
+ for (va = sva; va < sva + len; va += inc) {
+ pdpe = pmap_large_map_pdpe(va);
+ pdp = *pdpe;
+ KASSERT((pdp & X86_PG_V) != 0,
+ ("invalid pdp va %#lx pdpe %#lx pdp %#lx", va,
+ (u_long)pdpe, pdp));
+ if ((pdp & X86_PG_PS) != 0) {
+ KASSERT((amd_feature & AMDID_PAGE1GB) != 0,
+ ("no 1G pages, va %#lx pdpe %#lx pdp %#lx", va,
+ (u_long)pdpe, pdp));
+ KASSERT((va & PDPMASK) == 0,
+ ("PDPMASK bit set, va %#lx pdpe %#lx pdp %#lx", va,
+ (u_long)pdpe, pdp));
+ KASSERT(len <= NBPDP,
+ ("len < NBPDP, sva %#lx va %#lx pdpe %#lx pdp %#lx "
+ "len %#lx", sva, va, (u_long)pdpe, pdp, len));
+ inc = NBPDP;
+ *pdpe = 0;
+ continue;
+ }
+ pde = pmap_large_map_pde(va);
+ pd = *pde;
+ KASSERT((pd & X86_PG_V) != 0,
+ ("invalid pd va %#lx pde %#lx pd %#lx", va,
+ (u_long)pde, pd));
+ if ((pd & X86_PG_PS) != 0) {
+ KASSERT((va & PDRMASK) == 0,
+ ("PDRMASK bit set, va %#lx pde %#lx pd %#lx", va,
+ (u_long)pde, pd));
+ KASSERT(len <= NBPDR,
+ ("len < NBPDR, sva %#lx va %#lx pde %#lx pd %#lx "
+ "len %#lx", sva, va, (u_long)pde, pd, len));
+ pde_store(pde, 0);
+ inc = NBPDR;
+ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pde));
+ m->wire_count--;
+ if (m->wire_count == 0) {
+ *pdpe = 0;
+ SLIST_INSERT_HEAD(&spgf, m, plinks.s.ss);
+ }
+ continue;
+ }
+ pte = pmap_large_map_pte(va);
+ KASSERT((*pte & X86_PG_V) != 0,
+ ("invalid pte va %#lx pte %#lx pt %#lx", va,
+ (u_long)pte, *pte));
+ pte_clear(pte);
+ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pte));
+ m->wire_count--;
+ if (m->wire_count == 0) {
+ *pde = 0;
+ SLIST_INSERT_HEAD(&spgf, m, plinks.s.ss);
+ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pde));
+ m->wire_count--;
+ if (m->wire_count == 0) {
+ *pdpe = 0;
+ SLIST_INSERT_HEAD(&spgf, m, plinks.s.ss);
+ }
+ }
+ inc = PAGE_SIZE;
+ }
+ mtx_unlock(&lm_lock);
+ pmap_invalidate_range(kernel_pmap, sva, len);
+ vm_page_free_pages_toq(&spgf, false);
+ vmem_free(&large_vmem, sva, len);
+}
+
+static void
+pmap_large_map_wb_fence(void)
+{
+
+ if (cpu_vendor_id != CPU_VENDOR_INTEL)
+ mfence();
+ else if ((cpu_stdext_feature & (CPUID_STDEXT_CLWB |
+ CPUID_STDEXT_CLFLUSHOPT)) == 0)
+ /* clflush is strongly enough ordered */
+ sfence();
+}
+
+static void
+pmap_large_map_flush_range(vm_offset_t va, vm_size_t len)
+{
+
+ for (; len > 0; len -= cpu_clflush_line_size,
+ va += cpu_clflush_line_size) {
+ if ((cpu_stdext_feature & CPUID_STDEXT_CLWB) != 0)
+ clwb(va);
+ else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0)
+ clflushopt(va);
+ else if ((cpu_feature & CPUID_CLFSH) != 0)
+ clflush(va);
+ }
+}
+
+/*
+ * Write-back cache lines for the given address range.
+ *
+ * Must be called only on the range or sub-range returned from
+ * pmap_large_map(). Must not be called on the coalesced ranges.
+ *
+ * Does nothing on CPUs without CLWB, CLFLUSHOPT, or CLFLUSH
+ * instructions support.
+ */
+void
+pmap_large_map_wb(vm_offset_t sva, vm_size_t len)
+{
+ volatile u_long *pe;
+ u_long p;
+ vm_offset_t va, eva;
+ vm_size_t inc;
+ bool seen_other;
+
+ KASSERT(sva >= LARGEMAP_MIN_ADDRESS &&
+ sva + len < LARGEMAP_MIN_ADDRESS + lm_ents * NBPML4,
+ ("pmap_large_map_wb: not largemap %#lx %#lx", sva, len));
+ if ((cpu_feature & CPUID_CLFSH) == 0)
+ return; /* Interface is of no use */
+ eva = sva + len;
+ pmap_large_map_wb_fence();
+ for (va = sva; va < eva; va += inc) {
+ inc = 0;
+ if ((amd_feature & AMDID_PAGE1GB) != 0) {
+ pe = (volatile u_long *)pmap_large_map_pdpe(va);
+ p = *pe;
+ if ((p & X86_PG_PS) != 0)
+ inc = NBPDP;
+ }
+ if (inc == 0) {
+ pe = (volatile u_long *)pmap_large_map_pde(va);
+ p = *pe;
+ if ((p & X86_PG_PS) != 0)
+ inc = NBPDR;
+ }
+ if (inc == 0) {
+ pe = (volatile u_long *)pmap_large_map_pte(va);
+ p = *pe;
+ inc = PAGE_SIZE;
+ }
+ seen_other = false;
+ for (;;) {
+ if ((p & X86_PG_AVAIL1) != 0) {
+ /*
+ * Spin-wait for the end of a parallel
+ * write-back.
+ */
+ cpu_spinwait();
+ p = *pe;
+
+ /*
+ * If we saw other write-back
+ * occuring, we cannot rely on PG_M to
+ * indicate state of the cache. The
+ * PG_M bit is cleared before the
+ * flush to avoid ignoring new writes,
+ * and writes which are relevant for
+ * us might happen after.
+ */
+ seen_other = true;
+ continue;
+ }
+
+ if ((p & X86_PG_M) != 0 || seen_other) {
+ if (!atomic_fcmpset_long(pe, &p,
+ (p & ~X86_PG_M) | X86_PG_AVAIL1))
+ /*
+ * If we saw PG_M without
+ * PG_AVAIL1, and then on the
+ * next attempt we do not
+ * observe neither PG_M nor
+ * PG_AVAIL1, other write-back
+ * started after us and
+ * finished before us. We can
+ * rely on it doing our work.
+ */
+ continue;
+ pmap_large_map_flush_range(va, inc);
+ atomic_clear_long(pe, X86_PG_AVAIL1);
+ }
+ break;
+ }
+ maybe_yield();
+ }
+ pmap_large_map_wb_fence();
+}
+
static vm_page_t
pmap_pti_alloc_page(void)
{
Index: sys/amd64/include/cpufunc.h
===================================================================
--- sys/amd64/include/cpufunc.h
+++ sys/amd64/include/cpufunc.h
@@ -115,6 +115,13 @@
__asm __volatile(".byte 0x66;clflush %0" : : "m" (*(char *)addr));
}
+static __inline void
+clwb(u_long addr)
+{
+
+ __asm __volatile("clwb %0" : : "m" (*(char *)addr));
+}
+
static __inline void
clts(void)
{
Index: sys/amd64/include/pmap.h
===================================================================
--- sys/amd64/include/pmap.h
+++ sys/amd64/include/pmap.h
@@ -216,6 +216,10 @@
#define KPML4I (NPML4EPG-1)
#define KPDPI (NPDPEPG-2) /* kernbase at -2GB */
+/* Large map: index of the first and max last pml4 entry */
+#define LMSPML4I (PML4PML4I + 1)
+#define LMEPML4I (DMPML4I - 1)
+
/*
* XXX doesn't really belong here I guess...
*/
@@ -413,11 +417,16 @@
int pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde);
int pmap_change_attr(vm_offset_t, vm_size_t, int);
void pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, boolean_t invalidate);
+void pmap_flush_cache_range(vm_offset_t, vm_offset_t);
+void pmap_flush_cache_phys_range(vm_paddr_t, vm_paddr_t, vm_memattr_t);
void pmap_init_pat(void);
void pmap_kenter(vm_offset_t va, vm_paddr_t pa);
void *pmap_kenter_temporary(vm_paddr_t pa, int i);
vm_paddr_t pmap_kextract(vm_offset_t);
void pmap_kremove(vm_offset_t);
+int pmap_large_map(vm_paddr_t, vm_size_t, void **, vm_memattr_t);
+void pmap_large_map_wb(vm_offset_t sva, vm_size_t len);
+void pmap_large_unmap(void *sva, vm_size_t len);
void *pmap_mapbios(vm_paddr_t, vm_size_t);
void *pmap_mapdev(vm_paddr_t, vm_size_t);
void *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int);
Index: sys/amd64/include/vmparam.h
===================================================================
--- sys/amd64/include/vmparam.h
+++ sys/amd64/include/vmparam.h
@@ -156,7 +156,9 @@
* 0x0000000000000000 - 0x00007fffffffffff user map
* 0x0000800000000000 - 0xffff7fffffffffff does not exist (hole)
* 0xffff800000000000 - 0xffff804020100fff recursive page table (512GB slot)
- * 0xffff804020101000 - 0xfffff7ffffffffff unused
+ * 0xffff804020100fff - 0xffff807fffffffff unused
+ * 0xffff808000000000 - 0xffff847fffffffff large map (can be tuned up)
+ * 0xffff848000000000 - 0xfffff7ffffffffff unused (large map extends there)
* 0xfffff80000000000 - 0xfffffbffffffffff 4TB direct map
* 0xfffffc0000000000 - 0xfffffdffffffffff unused
* 0xfffffe0000000000 - 0xffffffffffffffff 2TB kernel map
@@ -173,6 +175,9 @@
#define DMAP_MIN_ADDRESS KVADDR(DMPML4I, 0, 0, 0)
#define DMAP_MAX_ADDRESS KVADDR(DMPML4I + NDMPML4E, 0, 0, 0)
+#define LARGEMAP_MIN_ADDRESS KVADDR(LMSPML4I, 0, 0, 0)
+#define LARGEMAP_MAX_ADDRESS KVADDR(LMEPML4I + 1, 0, 0, 0)
+
#define KERNBASE KVADDR(KPML4I, KPDPI, 0, 0)
#define UPT_MAX_ADDRESS KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I)
Index: sys/kern/subr_vmem.c
===================================================================
--- sys/kern/subr_vmem.c
+++ sys/kern/subr_vmem.c
@@ -73,22 +73,13 @@
#include <vm/vm_param.h>
#include <vm/vm_page.h>
#include <vm/vm_pageout.h>
+#include <sys/_vmem.h>
#include <vm/vm_phys.h>
#include <vm/vm_pagequeue.h>
#include <vm/uma_int.h>
int vmem_startup_count(void);
-#define VMEM_OPTORDER 5
-#define VMEM_OPTVALUE (1 << VMEM_OPTORDER)
-#define VMEM_MAXORDER \
- (VMEM_OPTVALUE - 1 + sizeof(vmem_size_t) * NBBY - VMEM_OPTORDER)
-
-#define VMEM_HASHSIZE_MIN 16
-#define VMEM_HASHSIZE_MAX 131072
-
-#define VMEM_QCACHE_IDX_MAX 16
-
#define VMEM_FITMASK (M_BESTFIT | M_FIRSTFIT)
#define VMEM_FLAGS \
@@ -96,8 +87,6 @@
#define BT_FLAGS (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM)
-#define QC_NAME_MAX 16
-
/*
* Data structures private to vmem.
*/
@@ -105,59 +94,8 @@
typedef struct vmem_btag bt_t;
-TAILQ_HEAD(vmem_seglist, vmem_btag);
-LIST_HEAD(vmem_freelist, vmem_btag);
-LIST_HEAD(vmem_hashlist, vmem_btag);
-
-struct qcache {
- uma_zone_t qc_cache;
- vmem_t *qc_vmem;
- vmem_size_t qc_size;
- char qc_name[QC_NAME_MAX];
-};
-typedef struct qcache qcache_t;
#define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool->pr_qcache))
-#define VMEM_NAME_MAX 16
-
-/* vmem arena */
-struct vmem {
- struct mtx_padalign vm_lock;
- struct cv vm_cv;
- char vm_name[VMEM_NAME_MAX+1];
- LIST_ENTRY(vmem) vm_alllist;
- struct vmem_hashlist vm_hash0[VMEM_HASHSIZE_MIN];
- struct vmem_freelist vm_freelist[VMEM_MAXORDER];
- struct vmem_seglist vm_seglist;
- struct vmem_hashlist *vm_hashlist;
- vmem_size_t vm_hashsize;
-
- /* Constant after init */
- vmem_size_t vm_qcache_max;
- vmem_size_t vm_quantum_mask;
- vmem_size_t vm_import_quantum;
- int vm_quantum_shift;
-
- /* Written on alloc/free */
- LIST_HEAD(, vmem_btag) vm_freetags;
- int vm_nfreetags;
- int vm_nbusytag;
- vmem_size_t vm_inuse;
- vmem_size_t vm_size;
- vmem_size_t vm_limit;
-
- /* Used on import. */
- vmem_import_t *vm_importfn;
- vmem_release_t *vm_releasefn;
- void *vm_arg;
-
- /* Space exhaustion callback. */
- vmem_reclaim_t *vm_reclaimfn;
-
- /* quantum cache */
- qcache_t vm_qcache[VMEM_QCACHE_IDX_MAX];
-};
-
/* boundary tag */
struct vmem_btag {
TAILQ_ENTRY(vmem_btag) bt_seglist;
Index: sys/sys/_vmem.h
===================================================================
--- /dev/null
+++ sys/sys/_vmem.h
@@ -0,0 +1,101 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi,
+ * Copyright (c) 2013 EMC Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS__VMEM_H_
+#define _SYS__VMEM_H_
+
+#define QC_NAME_MAX 16
+
+struct qcache {
+ uma_zone_t qc_cache;
+ vmem_t *qc_vmem;
+ vmem_size_t qc_size;
+ char qc_name[QC_NAME_MAX];
+};
+
+typedef struct qcache qcache_t;
+
+TAILQ_HEAD(vmem_seglist, vmem_btag);
+LIST_HEAD(vmem_freelist, vmem_btag);
+LIST_HEAD(vmem_hashlist, vmem_btag);
+
+#define VMEM_OPTORDER 5
+#define VMEM_OPTVALUE (1 << VMEM_OPTORDER)
+#define VMEM_MAXORDER \
+ (VMEM_OPTVALUE - 1 + sizeof(vmem_size_t) * NBBY - VMEM_OPTORDER)
+
+#define VMEM_QCACHE_IDX_MAX 16
+
+#define VMEM_HASHSIZE_MIN 16
+#define VMEM_HASHSIZE_MAX 131072
+
+#define VMEM_NAME_MAX 16
+
+/* vmem arena */
+struct vmem {
+ struct mtx_padalign vm_lock;
+ struct cv vm_cv;
+ char vm_name[VMEM_NAME_MAX+1];
+ LIST_ENTRY(vmem) vm_alllist;
+ struct vmem_hashlist vm_hash0[VMEM_HASHSIZE_MIN];
+ struct vmem_freelist vm_freelist[VMEM_MAXORDER];
+ struct vmem_seglist vm_seglist;
+ struct vmem_hashlist *vm_hashlist;
+ vmem_size_t vm_hashsize;
+
+ /* Constant after init */
+ vmem_size_t vm_qcache_max;
+ vmem_size_t vm_quantum_mask;
+ vmem_size_t vm_import_quantum;
+ int vm_quantum_shift;
+
+ /* Written on alloc/free */
+ LIST_HEAD(, vmem_btag) vm_freetags;
+ int vm_nfreetags;
+ int vm_nbusytag;
+ vmem_size_t vm_inuse;
+ vmem_size_t vm_size;
+ vmem_size_t vm_limit;
+
+ /* Used on import. */
+ vmem_import_t *vm_importfn;
+ vmem_release_t *vm_releasefn;
+ void *vm_arg;
+
+ /* Space exhaustion callback. */
+ vmem_reclaim_t *vm_reclaimfn;
+
+ /* quantum cache */
+ qcache_t vm_qcache[VMEM_QCACHE_IDX_MAX];
+};
+
+
+#endif
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Oct 25, 8:27 AM (1 h, 48 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
24135518
Default Alt Text
D17070.id48452.diff (23 KB)
Attached To
Mode
D17070: pmap_large_map() KPI
Attached
Detach File
Event Timeline
Log In to Comment