Page MenuHomeFreeBSD

D17070.id49178.diff
No OneTemporary

D17070.id49178.diff

Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -409,6 +409,9 @@
static int pmap_flags = PMAP_PDE_SUPERPAGE; /* flags for x86 pmaps */
+static vmem_t *large_vmem;
+static u_int lm_ents;
+
int pmap_pcid_enabled = 1;
SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
&pmap_pcid_enabled, 0, "Is TLB Context ID enabled ?");
@@ -655,6 +658,7 @@
static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va,
pd_entry_t pde);
static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
+static vm_page_t pmap_large_map_getptp_unlocked(void);
static void pmap_pde_attr(pd_entry_t *pde, int cache_bits, int mask);
#if VM_NRESERVLEVEL > 0
static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
@@ -1313,7 +1317,7 @@
pmap_init(void)
{
struct pmap_preinit_mapping *ppim;
- vm_page_t mpte;
+ vm_page_t m, mpte;
vm_size_t s;
int error, i, pv_npg, ret, skz63;
@@ -1440,6 +1444,28 @@
(vmem_addr_t *)&qframe);
if (error != 0)
panic("qframe allocation failed");
+
+ lm_ents = 8;
+ TUNABLE_INT_FETCH("vm.pmap.large_map_pml4_entries", &lm_ents);
+ if (lm_ents > LMEPML4I - LMSPML4I + 1)
+ lm_ents = LMEPML4I - LMSPML4I + 1;
+ if (bootverbose)
+ printf("pmap: large map %u PML4 slots (%lu Gb)\n",
+ lm_ents, (u_long)lm_ents * (NBPML4 / 1024 / 1024 / 1024));
+ if (lm_ents != 0) {
+ large_vmem = vmem_create("large", LARGEMAP_MIN_ADDRESS,
+ (vmem_size_t)lm_ents * NBPML4, PAGE_SIZE, 0, M_WAITOK);
+ if (large_vmem == NULL) {
+ printf("pmap: cannot create large map\n");
+ lm_ents = 0;
+ }
+ for (i = 0; i < lm_ents; i++) {
+ m = pmap_large_map_getptp_unlocked();
+ kernel_pmap->pm_pml4[LMSPML4I + i] = X86_PG_V |
+ X86_PG_RW | X86_PG_A | X86_PG_M | pg_nx |
+ VM_PAGE_TO_PHYS(m);
+ }
+ }
}
static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
@@ -2315,14 +2341,6 @@
{
sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1);
- if (eva - sva >= PMAP_CLFLUSH_THRESHOLD) {
- /*
- * The supplied range is bigger than 2MB.
- * Globally invalidate cache.
- */
- pmap_invalidate_cache();
- return;
- }
/*
* XXX: Some CPUs fault, hang, or trash the local APIC
@@ -2406,6 +2424,64 @@
}
}
+void
+pmap_flush_cache_range(vm_offset_t sva, vm_offset_t eva)
+{
+
+ pmap_invalidate_cache_range_check_align(sva, eva);
+
+ if ((cpu_stdext_feature & CPUID_STDEXT_CLWB) == 0) {
+ pmap_force_invalidate_cache_range(sva, eva);
+ return;
+ }
+
+ /* See comment in pmap_force_invalidate_cache_range(). */
+ if (pmap_kextract(sva) == lapic_paddr)
+ return;
+
+ sfence();
+ for (; sva < eva; sva += cpu_clflush_line_size)
+ clwb(sva);
+ sfence();
+}
+
+void
+pmap_flush_cache_phys_range(vm_paddr_t spa, vm_paddr_t epa, vm_memattr_t mattr)
+{
+ pt_entry_t *pte;
+ vm_offset_t vaddr;
+ int error, pte_bits;
+
+ KASSERT((spa & PAGE_MASK) == 0,
+ ("pmap_flush_cache_phys_range: spa not page-aligned"));
+ KASSERT((epa & PAGE_MASK) == 0,
+ ("pmap_flush_cache_phys_range: epa not page-aligned"));
+
+ if (spa < dmaplimit) {
+ pmap_flush_cache_range(PHYS_TO_DMAP(spa), PHYS_TO_DMAP(MIN(
+ dmaplimit, epa)));
+ if (dmaplimit >= epa)
+ return;
+ spa = dmaplimit;
+ }
+
+ pte_bits = pmap_cache_bits(kernel_pmap, mattr, 0) | X86_PG_RW |
+ X86_PG_V;
+ error = vmem_alloc(kernel_arena, PAGE_SIZE, M_BESTFIT | M_WAITOK,
+ &vaddr);
+ KASSERT(error == 0, ("vmem_alloc failed: %d", error));
+ pte = vtopte(vaddr);
+ for (; spa < epa; spa += PAGE_SIZE) {
+ sched_pin();
+ pte_store(pte, spa | pte_bits);
+ invlpg(vaddr);
+ /* XXXKIB sfences inside flush_cache_range are excessive */
+ pmap_flush_cache_range(vaddr, vaddr + PAGE_SIZE);
+ sched_unpin();
+ }
+ vmem_free(kernel_arena, vaddr, PAGE_SIZE);
+}
+
/*
* Routine: pmap_extract
* Function:
@@ -2812,6 +2888,10 @@
/* install self-referential address mapping entry(s) */
pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | X86_PG_V | X86_PG_RW |
X86_PG_A | X86_PG_M;
+
+ /* install large map entries if configured */
+ for (i = 0; i < lm_ents; i++)
+ pm_pml4[LMSPML4I + i] = kernel_pmap->pm_pml4[LMSPML4I + i];
}
static void
@@ -3158,6 +3238,8 @@
for (i = 0; i < ndmpdpphys; i++)/* Direct Map */
pmap->pm_pml4[DMPML4I + i] = 0;
pmap->pm_pml4[PML4PML4I] = 0; /* Recursive Mapping */
+ for (i = 0; i < lm_ents; i++) /* Large Map */
+ pmap->pm_pml4[LMSPML4I + i] = 0;
vm_page_unwire_noq(m);
vm_page_free_zero(m);
@@ -8101,6 +8183,487 @@
mtx_unlock_spin(&qframe_mtx);
}
+/*
+ * Pdp pages from the large map are managed differently from either
+ * kernel or user page table pages. They are permanently allocated at
+ * initialization time, and their wire count is permanently set to
+ * zero. The pml4 entries pointing to those pages are copied into
+ * each allocated pmap.
+ *
+ * In contrast, pd and pt pages are managed like user page table
+ * pages. They are dynamically allocated, and their wire count
+ * represents the number of valid entries within the page.
+ */
+static vm_page_t
+pmap_large_map_getptp_unlocked(void)
+{
+ vm_page_t m;
+
+ m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
+ VM_ALLOC_ZERO);
+ if (m != NULL && (m->flags & PG_ZERO) == 0)
+ pmap_zero_page(m);
+ return (m);
+}
+
+static vm_page_t
+pmap_large_map_getptp(void)
+{
+ vm_page_t m;
+
+ PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED);
+ m = pmap_large_map_getptp_unlocked();
+ if (m == NULL) {
+ PMAP_UNLOCK(kernel_pmap);
+ vm_wait(NULL);
+ PMAP_LOCK(kernel_pmap);
+ /* Callers retry. */
+ }
+ return (m);
+}
+
+static pdp_entry_t *
+pmap_large_map_pdpe(vm_offset_t va)
+{
+ vm_pindex_t pml4_idx;
+ vm_paddr_t mphys;
+
+ pml4_idx = pmap_pml4e_index(va);
+ KASSERT(LMSPML4I <= pml4_idx && pml4_idx < LMSPML4I + lm_ents,
+ ("pmap_large_map_pdpe: va %#jx out of range idx %#jx LMSPML4I "
+ "%#jx lm_ents %d",
+ (uintmax_t)va, (uintmax_t)pml4_idx, LMSPML4I, lm_ents));
+ KASSERT((kernel_pmap->pm_pml4[pml4_idx] & X86_PG_V) != 0,
+ ("pmap_large_map_pdpe: invalid pml4 for va %#jx idx %#jx "
+ "LMSPML4I %#jx lm_ents %d",
+ (uintmax_t)va, (uintmax_t)pml4_idx, LMSPML4I, lm_ents));
+ mphys = kernel_pmap->pm_pml4[pml4_idx] & PG_FRAME;
+ return ((pdp_entry_t *)PHYS_TO_DMAP(mphys) + pmap_pdpe_index(va));
+}
+
+static pd_entry_t *
+pmap_large_map_pde(vm_offset_t va)
+{
+ pdp_entry_t *pdpe;
+ vm_page_t m;
+ vm_paddr_t mphys;
+
+ m = NULL;
+retry:
+ pdpe = pmap_large_map_pdpe(va);
+ if (*pdpe == 0) {
+ if (m == NULL) {
+ m = pmap_large_map_getptp();
+ goto retry;
+ }
+ mphys = VM_PAGE_TO_PHYS(m);
+ m = NULL;
+ *pdpe = mphys | X86_PG_A | X86_PG_RW | X86_PG_V | pg_nx;
+ } else {
+ MPASS((*pdpe & X86_PG_PS) == 0);
+ mphys = *pdpe & PG_FRAME;
+ }
+ if (m != NULL)
+ vm_page_free_toq(m);
+ return ((pd_entry_t *)PHYS_TO_DMAP(mphys) + pmap_pde_index(va));
+}
+
+static pt_entry_t *
+pmap_large_map_pte(vm_offset_t va)
+{
+ pd_entry_t *pde;
+ vm_page_t m;
+ vm_paddr_t mphys;
+
+ m = NULL;
+retry:
+ pde = pmap_large_map_pde(va);
+ if (*pde == 0) {
+ if (m == NULL) {
+ m = pmap_large_map_getptp();
+ goto retry;
+ }
+ mphys = VM_PAGE_TO_PHYS(m);
+ m = NULL;
+ *pde = mphys | X86_PG_A | X86_PG_RW | X86_PG_V | pg_nx;
+ PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pde))->wire_count++;
+ } else {
+ MPASS((*pde & X86_PG_PS) == 0);
+ mphys = *pde & PG_FRAME;
+ }
+ if (m != NULL)
+ vm_page_free_toq(m);
+ return ((pt_entry_t *)PHYS_TO_DMAP(mphys) + pmap_pte_index(va));
+}
+
+static int
+pmap_large_map_getva(vm_size_t len, vm_offset_t align, vm_offset_t phase,
+ vmem_addr_t *vmem_res)
+{
+
+ /*
+ * Large mappings are all but static. Consequently, there
+ * is no point in waiting for an earlier allocation to be
+ * freed.
+ */
+ return (vmem_xalloc(large_vmem, len, align, phase, 0, VMEM_ADDR_MIN,
+ VMEM_ADDR_MAX, M_NOWAIT | M_BESTFIT, vmem_res));
+}
+
+int
+pmap_large_map(vm_paddr_t spa, vm_size_t len, void **addr,
+ vm_memattr_t mattr)
+{
+ pdp_entry_t *pdpe;
+ pd_entry_t *pde;
+ pt_entry_t *pte;
+ vm_offset_t va, inc;
+ vmem_addr_t vmem_res;
+ vm_paddr_t pa;
+ int error;
+
+ if (len == 0 || spa + len < spa)
+ return (EINVAL);
+
+ /* See if DMAP can serve. */
+ if (spa + len <= dmaplimit) {
+ va = PHYS_TO_DMAP(spa);
+ *addr = (void *)va;
+ return (pmap_change_attr(va, len, mattr));
+ }
+
+ /*
+ * No, allocate KVA. Fit the address with best possible
+ * alignment for superpages. Fall back to worse align if
+ * failed.
+ */
+ error = ENOMEM;
+ if ((amd_feature & AMDID_PAGE1GB) != 0 && rounddown2(spa + len,
+ NBPDP) >= roundup2(spa, NBPDP) + NBPDP)
+ error = pmap_large_map_getva(len, NBPDP, spa & PDPMASK,
+ &vmem_res);
+ if (error != 0 && rounddown2(spa + len, NBPDR) >= roundup2(spa,
+ NBPDR) + NBPDR)
+ error = pmap_large_map_getva(len, NBPDR, spa & PDRMASK,
+ &vmem_res);
+ if (error != 0)
+ error = pmap_large_map_getva(len, PAGE_SIZE, 0, &vmem_res);
+ if (error != 0)
+ return (error);
+
+ /*
+ * Fill pagetable. PG_M is not pre-set, we scan modified bits
+ * in the pagetable to minimize flushing. No need to
+ * invalidate TLB, since we only update invalid entries.
+ */
+ PMAP_LOCK(kernel_pmap);
+ for (pa = spa, va = vmem_res; len > 0; pa += inc, va += inc,
+ len -= inc) {
+ if ((amd_feature & AMDID_PAGE1GB) != 0 && len >= NBPDP &&
+ (pa & PDPMASK) == 0 && (va & PDPMASK) == 0) {
+ pdpe = pmap_large_map_pdpe(va);
+ MPASS(*pdpe == 0);
+ *pdpe = pa | pg_g | X86_PG_PS | X86_PG_RW |
+ X86_PG_V | X86_PG_A | pg_nx |
+ pmap_cache_bits(kernel_pmap, mattr, TRUE);
+ inc = NBPDP;
+ } else if (len >= NBPDR && (pa & PDRMASK) == 0 &&
+ (va & PDRMASK) == 0) {
+ pde = pmap_large_map_pde(va);
+ MPASS(*pde == 0);
+ *pde = pa | pg_g | X86_PG_PS | X86_PG_RW |
+ X86_PG_V | X86_PG_A | pg_nx |
+ pmap_cache_bits(kernel_pmap, mattr, TRUE);
+ PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pde))->
+ wire_count++;
+ inc = NBPDR;
+ } else {
+ pte = pmap_large_map_pte(va);
+ MPASS(*pte == 0);
+ *pte = pa | pg_g | X86_PG_RW | X86_PG_V |
+ X86_PG_A | pg_nx | pmap_cache_bits(kernel_pmap,
+ mattr, FALSE);
+ PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pte))->
+ wire_count++;
+ inc = PAGE_SIZE;
+ }
+ }
+ PMAP_UNLOCK(kernel_pmap);
+ MPASS(len == 0);
+
+ *addr = (void *)vmem_res;
+ return (0);
+}
+
+void
+pmap_large_unmap(void *svaa, vm_size_t len)
+{
+ vm_offset_t sva, va;
+ vm_size_t inc;
+ pdp_entry_t *pdpe, pdp;
+ pd_entry_t *pde, pd;
+ pt_entry_t *pte;
+ vm_page_t m;
+ struct spglist spgf;
+
+ sva = (vm_offset_t)svaa;
+ if (len == 0 || sva + len < sva || (sva >= DMAP_MIN_ADDRESS &&
+ sva + len <= DMAP_MIN_ADDRESS + dmaplimit))
+ return;
+
+ SLIST_INIT(&spgf);
+ KASSERT(LARGEMAP_MIN_ADDRESS <= sva && sva + len <=
+ LARGEMAP_MAX_ADDRESS + NBPML4 * (u_long)lm_ents,
+ ("not largemap range %#lx %#lx", (u_long)svaa, (u_long)svaa + len));
+ PMAP_LOCK(kernel_pmap);
+ for (va = sva; va < sva + len; va += inc) {
+ pdpe = pmap_large_map_pdpe(va);
+ pdp = *pdpe;
+ KASSERT((pdp & X86_PG_V) != 0,
+ ("invalid pdp va %#lx pdpe %#lx pdp %#lx", va,
+ (u_long)pdpe, pdp));
+ if ((pdp & X86_PG_PS) != 0) {
+ KASSERT((amd_feature & AMDID_PAGE1GB) != 0,
+ ("no 1G pages, va %#lx pdpe %#lx pdp %#lx", va,
+ (u_long)pdpe, pdp));
+ KASSERT((va & PDPMASK) == 0,
+ ("PDPMASK bit set, va %#lx pdpe %#lx pdp %#lx", va,
+ (u_long)pdpe, pdp));
+ KASSERT(len <= NBPDP,
+ ("len < NBPDP, sva %#lx va %#lx pdpe %#lx pdp %#lx "
+ "len %#lx", sva, va, (u_long)pdpe, pdp, len));
+ *pdpe = 0;
+ inc = NBPDP;
+ continue;
+ }
+ pde = pmap_pdpe_to_pde(pdpe, va);
+ pd = *pde;
+ KASSERT((pd & X86_PG_V) != 0,
+ ("invalid pd va %#lx pde %#lx pd %#lx", va,
+ (u_long)pde, pd));
+ if ((pd & X86_PG_PS) != 0) {
+ KASSERT((va & PDRMASK) == 0,
+ ("PDRMASK bit set, va %#lx pde %#lx pd %#lx", va,
+ (u_long)pde, pd));
+ KASSERT(len <= NBPDR,
+ ("len < NBPDR, sva %#lx va %#lx pde %#lx pd %#lx "
+ "len %#lx", sva, va, (u_long)pde, pd, len));
+ pde_store(pde, 0);
+ inc = NBPDR;
+ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pde));
+ m->wire_count--;
+ if (m->wire_count == 0) {
+ *pdpe = 0;
+ SLIST_INSERT_HEAD(&spgf, m, plinks.s.ss);
+ }
+ continue;
+ }
+ pte = pmap_pde_to_pte(pde, va);
+ KASSERT((*pte & X86_PG_V) != 0,
+ ("invalid pte va %#lx pte %#lx pt %#lx", va,
+ (u_long)pte, *pte));
+ pte_clear(pte);
+ inc = PAGE_SIZE;
+ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pte));
+ m->wire_count--;
+ if (m->wire_count == 0) {
+ *pde = 0;
+ SLIST_INSERT_HEAD(&spgf, m, plinks.s.ss);
+ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pde));
+ m->wire_count--;
+ if (m->wire_count == 0) {
+ *pdpe = 0;
+ SLIST_INSERT_HEAD(&spgf, m, plinks.s.ss);
+ }
+ }
+ }
+ pmap_invalidate_range(kernel_pmap, sva, sva + len);
+ PMAP_UNLOCK(kernel_pmap);
+ vm_page_free_pages_toq(&spgf, false);
+ vmem_free(large_vmem, sva, len);
+}
+
+static void
+pmap_large_map_wb_fence_mfence(void)
+{
+
+ mfence();
+}
+
+static void
+pmap_large_map_wb_fence_sfence(void)
+{
+
+ sfence();
+}
+
+static void
+pmap_large_map_wb_fence_nop(void)
+{
+}
+
+DEFINE_IFUNC(static, void, pmap_large_map_wb_fence, (void), static)
+{
+
+ if (cpu_vendor_id != CPU_VENDOR_INTEL)
+ return (pmap_large_map_wb_fence_mfence);
+ else if ((cpu_stdext_feature & (CPUID_STDEXT_CLWB |
+ CPUID_STDEXT_CLFLUSHOPT)) == 0)
+ return (pmap_large_map_wb_fence_sfence);
+ else
+ /* clflush is strongly enough ordered */
+ return (pmap_large_map_wb_fence_nop);
+}
+
+static void
+pmap_large_map_flush_range_clwb(vm_offset_t va, vm_size_t len)
+{
+
+ for (; len > 0; len -= cpu_clflush_line_size,
+ va += cpu_clflush_line_size)
+ clwb(va);
+}
+
+static void
+pmap_large_map_flush_range_clflushopt(vm_offset_t va, vm_size_t len)
+{
+
+ for (; len > 0; len -= cpu_clflush_line_size,
+ va += cpu_clflush_line_size)
+ clflushopt(va);
+}
+
+static void
+pmap_large_map_flush_range_clflush(vm_offset_t va, vm_size_t len)
+{
+
+ for (; len > 0; len -= cpu_clflush_line_size,
+ va += cpu_clflush_line_size)
+ clflush(va);
+}
+
+static void
+pmap_large_map_flush_range_nop(vm_offset_t sva __unused, vm_size_t len __unused)
+{
+}
+
+DEFINE_IFUNC(static, void, pmap_large_map_flush_range, (vm_offset_t, vm_size_t),
+ static)
+{
+
+ if ((cpu_stdext_feature & CPUID_STDEXT_CLWB) != 0)
+ return (pmap_large_map_flush_range_clwb);
+ else if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0)
+ return (pmap_large_map_flush_range_clflushopt);
+ else if ((cpu_feature & CPUID_CLFSH) != 0)
+ return (pmap_large_map_flush_range_clflush);
+ else
+ return (pmap_large_map_flush_range_nop);
+}
+
+static void
+pmap_large_map_wb_large(vm_offset_t sva, vm_offset_t eva)
+{
+ volatile u_long *pe;
+ u_long p;
+ vm_offset_t va;
+ vm_size_t inc;
+ bool seen_other;
+
+ for (va = sva; va < eva; va += inc) {
+ inc = 0;
+ if ((amd_feature & AMDID_PAGE1GB) != 0) {
+ pe = (volatile u_long *)pmap_large_map_pdpe(va);
+ p = *pe;
+ if ((p & X86_PG_PS) != 0)
+ inc = NBPDP;
+ }
+ if (inc == 0) {
+ pe = (volatile u_long *)pmap_large_map_pde(va);
+ p = *pe;
+ if ((p & X86_PG_PS) != 0)
+ inc = NBPDR;
+ }
+ if (inc == 0) {
+ pe = (volatile u_long *)pmap_large_map_pte(va);
+ p = *pe;
+ inc = PAGE_SIZE;
+ }
+ seen_other = false;
+ for (;;) {
+ if ((p & X86_PG_AVAIL1) != 0) {
+ /*
+ * Spin-wait for the end of a parallel
+ * write-back.
+ */
+ cpu_spinwait();
+ p = *pe;
+
+ /*
+ * If we saw other write-back
+ * occuring, we cannot rely on PG_M to
+ * indicate state of the cache. The
+ * PG_M bit is cleared before the
+ * flush to avoid ignoring new writes,
+ * and writes which are relevant for
+ * us might happen after.
+ */
+ seen_other = true;
+ continue;
+ }
+
+ if ((p & X86_PG_M) != 0 || seen_other) {
+ if (!atomic_fcmpset_long(pe, &p,
+ (p & ~X86_PG_M) | X86_PG_AVAIL1))
+ /*
+ * If we saw PG_M without
+ * PG_AVAIL1, and then on the
+ * next attempt we do not
+ * observe either PG_M or
+ * PG_AVAIL1, the other
+ * write-back started after us
+ * and finished before us. We
+ * can rely on it doing our
+ * work.
+ */
+ continue;
+ pmap_large_map_flush_range(va, inc);
+ atomic_clear_long(pe, X86_PG_AVAIL1);
+ }
+ break;
+ }
+ maybe_yield();
+ }
+}
+
+/*
+ * Write-back cache lines for the given address range.
+ *
+ * Must be called only on the range or sub-range returned from
+ * pmap_large_map(). Must not be called on the coalesced ranges.
+ *
+ * Does nothing on CPUs without CLWB, CLFLUSHOPT, or CLFLUSH
+ * instructions support.
+ */
+void
+pmap_large_map_wb(void *svap, vm_size_t len)
+{
+ vm_offset_t eva, sva;
+
+ sva = (vm_offset_t)svap;
+ eva = sva + len;
+ pmap_large_map_wb_fence();
+ if (sva >= DMAP_MIN_ADDRESS && eva <= DMAP_MIN_ADDRESS + dmaplimit) {
+ pmap_large_map_flush_range(sva, len);
+ } else {
+ KASSERT(sva >= LARGEMAP_MIN_ADDRESS &&
+ eva <= LARGEMAP_MIN_ADDRESS + lm_ents * NBPML4,
+ ("pmap_large_map_wb: not largemap %#lx %#lx", sva, len));
+ pmap_large_map_wb_large(sva, eva);
+ }
+ pmap_large_map_wb_fence();
+}
+
static vm_page_t
pmap_pti_alloc_page(void)
{
Index: sys/amd64/include/cpufunc.h
===================================================================
--- sys/amd64/include/cpufunc.h
+++ sys/amd64/include/cpufunc.h
@@ -115,6 +115,13 @@
__asm __volatile(".byte 0x66;clflush %0" : : "m" (*(char *)addr));
}
+static __inline void
+clwb(u_long addr)
+{
+
+ __asm __volatile("clwb %0" : : "m" (*(char *)addr));
+}
+
static __inline void
clts(void)
{
Index: sys/amd64/include/pmap.h
===================================================================
--- sys/amd64/include/pmap.h
+++ sys/amd64/include/pmap.h
@@ -216,6 +216,10 @@
#define KPML4I (NPML4EPG-1)
#define KPDPI (NPDPEPG-2) /* kernbase at -2GB */
+/* Large map: index of the first and max last pml4 entry */
+#define LMSPML4I (PML4PML4I + 1)
+#define LMEPML4I (DMPML4I - 1)
+
/*
* XXX doesn't really belong here I guess...
*/
@@ -413,11 +417,16 @@
int pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde);
int pmap_change_attr(vm_offset_t, vm_size_t, int);
void pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, boolean_t invalidate);
+void pmap_flush_cache_range(vm_offset_t, vm_offset_t);
+void pmap_flush_cache_phys_range(vm_paddr_t, vm_paddr_t, vm_memattr_t);
void pmap_init_pat(void);
void pmap_kenter(vm_offset_t va, vm_paddr_t pa);
void *pmap_kenter_temporary(vm_paddr_t pa, int i);
vm_paddr_t pmap_kextract(vm_offset_t);
void pmap_kremove(vm_offset_t);
+int pmap_large_map(vm_paddr_t, vm_size_t, void **, vm_memattr_t);
+void pmap_large_map_wb(void *sva, vm_size_t len);
+void pmap_large_unmap(void *sva, vm_size_t len);
void *pmap_mapbios(vm_paddr_t, vm_size_t);
void *pmap_mapdev(vm_paddr_t, vm_size_t);
void *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int);
Index: sys/amd64/include/vmparam.h
===================================================================
--- sys/amd64/include/vmparam.h
+++ sys/amd64/include/vmparam.h
@@ -156,7 +156,9 @@
* 0x0000000000000000 - 0x00007fffffffffff user map
* 0x0000800000000000 - 0xffff7fffffffffff does not exist (hole)
* 0xffff800000000000 - 0xffff804020100fff recursive page table (512GB slot)
- * 0xffff804020101000 - 0xfffff7ffffffffff unused
+ * 0xffff804020100fff - 0xffff807fffffffff unused
+ * 0xffff808000000000 - 0xffff847fffffffff large map (can be tuned up)
+ * 0xffff848000000000 - 0xfffff7ffffffffff unused (large map extends there)
* 0xfffff80000000000 - 0xfffffbffffffffff 4TB direct map
* 0xfffffc0000000000 - 0xfffffdffffffffff unused
* 0xfffffe0000000000 - 0xffffffffffffffff 2TB kernel map
@@ -173,6 +175,9 @@
#define DMAP_MIN_ADDRESS KVADDR(DMPML4I, 0, 0, 0)
#define DMAP_MAX_ADDRESS KVADDR(DMPML4I + NDMPML4E, 0, 0, 0)
+#define LARGEMAP_MIN_ADDRESS KVADDR(LMSPML4I, 0, 0, 0)
+#define LARGEMAP_MAX_ADDRESS KVADDR(LMEPML4I + 1, 0, 0, 0)
+
#define KERNBASE KVADDR(KPML4I, KPDPI, 0, 0)
#define UPT_MAX_ADDRESS KVADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I)

File Metadata

Mime Type
text/plain
Expires
Tue, Oct 14, 3:35 AM (58 m, 35 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23705060
Default Alt Text
D17070.id49178.diff (19 KB)

Event Timeline