Page MenuHomeFreeBSD

D45766.id140859.diff
No OneTemporary

D45766.id140859.diff

Index: share/man/man7/arch.7
===================================================================
--- share/man/man7/arch.7
+++ share/man/man7/arch.7
@@ -218,7 +218,7 @@
.Ss Page Size
.Bl -column -offset indent "Architecture" "Page Sizes"
.It Sy Architecture Ta Sy Page Sizes
-.It aarch64 Ta 4K, 2M, 1G
+.It aarch64 Ta 4K, 64K, 2M, 1G
.It amd64 Ta 4K, 2M, 1G
.It armv7 Ta 4K, 1M
.It i386 Ta 4K, 2M (PAE), 4M
Index: sys/arm64/arm64/pmap.c
===================================================================
--- sys/arm64/arm64/pmap.c
+++ sys/arm64/arm64/pmap.c
@@ -1631,11 +1631,14 @@
if (superpages_enabled) {
KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
("pmap_init: can't assign to pagesizes[1]"));
- pagesizes[1] = L2_SIZE;
+ pagesizes[1] = L3C_SIZE;
+ KASSERT(MAXPAGESIZES > 2 && pagesizes[2] == 0,
+ ("pmap_init: can't assign to pagesizes[2]"));
+ pagesizes[2] = L2_SIZE;
if (L1_BLOCKS_SUPPORTED) {
- KASSERT(MAXPAGESIZES > 2 && pagesizes[2] == 0,
- ("pmap_init: can't assign to pagesizes[2]"));
- pagesizes[2] = L1_SIZE;
+ KASSERT(MAXPAGESIZES > 3 && pagesizes[3] == 0,
+ ("pmap_init: can't assign to pagesizes[3]"));
+ pagesizes[3] = L1_SIZE;
}
}
@@ -4959,7 +4962,7 @@
pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t pte, int flags,
int psind)
{
- pd_entry_t *l0p, *l1p, *l2p, newpte, origpte;
+ pd_entry_t *l0p, *l1p, *l2p, *l3p, newpte, origpte, *tl3p;
vm_page_t mp;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
@@ -4973,9 +4976,11 @@
newpte = pte;
if (!pmap_bti_same(pmap, va, va + pagesizes[psind], &newpte))
return (KERN_PROTECTION_FAILURE);
- if (psind == 2) {
+ if (psind == 3) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
+ KASSERT(pagesizes[psind] == L1_SIZE,
+ ("pagesizes[%d] != L1_SIZE", psind));
l0p = pmap_l0(pmap, va);
if ((pmap_load(l0p) & ATTR_DESCR_VALID) == 0) {
mp = _pmap_alloc_l3(pmap, pmap_l0_pindex(va), NULL);
@@ -5005,7 +5010,9 @@
("va %#lx changing 1G phys page l1 %#lx newpte %#lx",
va, origpte, newpte));
pmap_store(l1p, newpte);
- } else /* (psind == 1) */ {
+ } else if (psind == 2) {
+ KASSERT(pagesizes[psind] == L2_SIZE,
+ ("pagesizes[%d] != L2_SIZE", psind));
l2p = pmap_l2(pmap, va);
if (l2p == NULL) {
mp = _pmap_alloc_l3(pmap, pmap_l1_pindex(va), NULL);
@@ -5034,6 +5041,40 @@
("va %#lx changing 2M phys page l2 %#lx newpte %#lx",
va, origpte, newpte));
pmap_store(l2p, newpte);
+ } else /* (psind == 1) */ {
+ KASSERT(pagesizes[psind] == L3C_SIZE,
+ ("pagesizes[%d] != L3C_SIZE", psind));
+ l2p = pmap_l2(pmap, va);
+ if (l2p == NULL || (pmap_load(l2p) & ATTR_DESCR_VALID) == 0) {
+ mp = _pmap_alloc_l3(pmap, pmap_l2_pindex(va), NULL);
+ if (mp == NULL) {
+ if ((flags & PMAP_ENTER_NOSLEEP) != 0)
+ return (KERN_RESOURCE_SHORTAGE);
+ PMAP_UNLOCK(pmap);
+ vm_wait(NULL);
+ PMAP_LOCK(pmap);
+ goto restart;
+ }
+ mp->ref_count += L3C_ENTRIES - 1;
+ l3p = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mp));
+ l3p = &l3p[pmap_l3_index(va)];
+ } else {
+ l3p = pmap_l2_to_l3(l2p, va);
+ if ((pmap_load(l3p) & ATTR_DESCR_VALID) == 0) {
+ mp = PTE_TO_VM_PAGE(pmap_load(l2p));
+ mp->ref_count += L3C_ENTRIES;
+ }
+ }
+ for (tl3p = l3p; tl3p < &l3p[L3C_ENTRIES]; tl3p++) {
+ origpte = pmap_load(tl3p);
+ KASSERT((origpte & ATTR_DESCR_VALID) == 0 ||
+ ((origpte & ATTR_CONTIGUOUS) != 0 &&
+ PTE_TO_PHYS(origpte) == PTE_TO_PHYS(newpte)),
+ ("va %#lx changing 64K phys page l3 %#lx newpte %#lx",
+ va, origpte, newpte));
+ pmap_store(tl3p, newpte);
+ newpte += L3_SIZE;
+ }
}
dsb(ishst);
@@ -5072,7 +5113,7 @@
vm_paddr_t opa, pa;
vm_page_t mpte, om;
bool nosleep;
- int lvl, rv;
+ int full_lvl, lvl, rv;
KASSERT(ADDR_IS_CANONICAL(va),
("%s: Address not in canonical form: %lx", __func__, va));
@@ -5128,24 +5169,47 @@
if ((flags & PMAP_ENTER_LARGEPAGE) != 0) {
KASSERT((m->oflags & VPO_UNMANAGED) != 0,
("managed largepage va %#lx flags %#x", va, flags));
- new_l3 &= ~L3_PAGE;
- if (psind == 2) {
+ if (psind == 3) {
PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
+ new_l3 &= ~L3_PAGE;
new_l3 |= L1_BLOCK;
- } else /* (psind == 1) */
+ } else if (psind == 2) {
+ new_l3 &= ~L3_PAGE;
new_l3 |= L2_BLOCK;
+ } else /* (psind == 1) */
+ new_l3 |= ATTR_CONTIGUOUS;
rv = pmap_enter_largepage(pmap, va, new_l3, flags, psind);
goto out;
}
- if (psind == 1) {
+ if (psind == 2) {
/* Assert the required virtual and physical alignment. */
KASSERT((va & L2_OFFSET) == 0, ("pmap_enter: va unaligned"));
- KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind"));
+ KASSERT(m->psind > 1, ("pmap_enter: m->psind < psind"));
rv = pmap_enter_l2(pmap, va, (new_l3 & ~L3_PAGE) | L2_BLOCK,
flags, m, &lock);
goto out;
}
mpte = NULL;
+ if (psind == 1) {
+ KASSERT((va & L3C_OFFSET) == 0, ("pmap_enter: va unaligned"));
+ KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind"));
+ rv = pmap_enter_l3c(pmap, va, new_l3 | ATTR_CONTIGUOUS, flags,
+ m, &mpte, &lock);
+#if VM_NRESERVLEVEL > 0
+ /*
+ * Attempt L2 promotion, if both the PTP and a level 1
+ * reservation are fully populated.
+ */
+ if (rv == KERN_SUCCESS &&
+ (mpte == NULL || mpte->ref_count == NL3PG) &&
+ (m->flags & PG_FICTITIOUS) == 0 &&
+ vm_reserv_level_iffullpop(m) == 1) {
+ pde = pmap_l2(pmap, va);
+ (void)pmap_promote_l2(pmap, pde, va, mpte, &lock);
+ }
+#endif
+ goto out;
+ }
/*
* In the case that a page table page is not
@@ -5365,15 +5429,14 @@
* are aligned with each other and an underlying reservation has the
* neighboring L3 pages allocated. The first condition is simply an
* optimization that recognizes some eventual promotion failures early
- * at a lower run-time cost. Then, if both the page table page and
- * the reservation are fully populated, attempt L2 promotion.
+ * at a lower run-time cost. Then, if both a level 1 reservation and
+ * the PTP are fully populated, attempt L2 promotion.
*/
if ((va & L3C_OFFSET) == (pa & L3C_OFFSET) &&
(m->flags & PG_FICTITIOUS) == 0 &&
- vm_reserv_is_populated(m, L3C_ENTRIES) &&
+ (full_lvl = vm_reserv_level_iffullpop(m)) >= 0 &&
pmap_promote_l3c(pmap, l3, va) &&
- (mpte == NULL || mpte->ref_count == NL3PG) &&
- vm_reserv_level_iffullpop(m) == 0)
+ full_lvl == 1 && (mpte == NULL || mpte->ref_count == NL3PG))
(void)pmap_promote_l2(pmap, pde, va, mpte, &lock);
#endif
@@ -5667,6 +5730,8 @@
("pmap_enter_l3c: va is not aligned"));
KASSERT(!VA_IS_CLEANMAP(va) || (l3e & ATTR_SW_MANAGED) == 0,
("pmap_enter_l3c: managed mapping within the clean submap"));
+ KASSERT((l3e & ATTR_CONTIGUOUS) != 0,
+ ("pmap_enter_l3c: l3e is missing ATTR_CONTIGUOUS"));
/*
* If the L3 PTP is not resident, we attempt to create it here.
@@ -5873,14 +5938,12 @@
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
va = start + ptoa(diff);
if ((va & L2_OFFSET) == 0 && va + L2_SIZE <= end &&
- m->psind == 1 && pmap_ps_enabled(pmap) &&
+ m->psind == 2 && pmap_ps_enabled(pmap) &&
((rv = pmap_enter_l2_rx(pmap, va, m, prot, &lock)) ==
KERN_SUCCESS || rv == KERN_NO_SPACE))
m = &m[L2_SIZE / PAGE_SIZE - 1];
else if ((va & L3C_OFFSET) == 0 && va + L3C_SIZE <= end &&
- (VM_PAGE_TO_PHYS(m) & L3C_OFFSET) == 0 &&
- vm_reserv_is_populated(m, L3C_ENTRIES) &&
- pmap_ps_enabled(pmap) &&
+ m->psind >= 1 && pmap_ps_enabled(pmap) &&
((rv = pmap_enter_l3c_rx(pmap, va, m, &mpte, prot,
&lock)) == KERN_SUCCESS || rv == KERN_NO_SPACE))
m = &m[L3C_ENTRIES - 1];
@@ -5932,7 +5995,7 @@
{
pt_entry_t *l1, *l2, *l3, l3_val;
vm_paddr_t pa;
- int lvl;
+ int full_lvl, lvl;
KASSERT(!VA_IS_CLEANMAP(va) ||
(m->oflags & VPO_UNMANAGED) != 0,
@@ -6063,18 +6126,17 @@
* are aligned with each other and an underlying reservation has the
* neighboring L3 pages allocated. The first condition is simply an
* optimization that recognizes some eventual promotion failures early
- * at a lower run-time cost. Then, attempt L2 promotion, if both the
- * PTP and the reservation are fully populated.
+ * at a lower run-time cost. Then, attempt L2 promotion, if both a
+ * level 1 reservation and the PTP are fully populated.
*/
if ((prot & VM_PROT_NO_PROMOTE) == 0 &&
(va & L3C_OFFSET) == (pa & L3C_OFFSET) &&
(m->flags & PG_FICTITIOUS) == 0 &&
- vm_reserv_is_populated(m, L3C_ENTRIES) &&
+ (full_lvl = vm_reserv_level_iffullpop(m)) >= 0 &&
pmap_promote_l3c(pmap, l3, va) &&
- (mpte == NULL || mpte->ref_count == NL3PG) &&
- vm_reserv_level_iffullpop(m) == 0) {
+ full_lvl == 1 && (mpte == NULL || mpte->ref_count == NL3PG)) {
if (l2 == NULL)
- l2 = pmap_pde(pmap, va, &lvl);
+ l2 = pmap_l2(pmap, va);
/*
* If promotion succeeds, then the next call to this function
@@ -8566,7 +8628,7 @@
{
pt_entry_t *pte, tpte;
vm_paddr_t mask, pa;
- int lvl, val;
+ int lvl, psind, val;
bool managed;
PMAP_ASSERT_STAGE1(pmap);
@@ -8578,21 +8640,22 @@
switch (lvl) {
case 3:
mask = L3_OFFSET;
+ psind = (tpte & ATTR_CONTIGUOUS) != 0 ? 1 : 0;
break;
case 2:
mask = L2_OFFSET;
+ psind = 2;
break;
case 1:
mask = L1_OFFSET;
+ psind = 3;
break;
default:
panic("pmap_mincore: invalid level %d", lvl);
}
managed = (tpte & ATTR_SW_MANAGED) != 0;
- val = MINCORE_INCORE;
- if (lvl != 3)
- val |= MINCORE_PSIND(3 - lvl);
+ val = MINCORE_INCORE | MINCORE_PSIND(psind);
if ((managed && pmap_pte_dirty(pmap, tpte)) || (!managed &&
(tpte & ATTR_S1_AP_RW_BIT) == ATTR_S1_AP(ATTR_S1_AP_RW)))
val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
@@ -9128,18 +9191,37 @@
{
vm_offset_t superpage_offset;
- if (size < L2_SIZE)
+ if (size < L3C_SIZE)
return;
if (object != NULL && (object->flags & OBJ_COLORED) != 0)
offset += ptoa(object->pg_color);
+
+ /*
+ * Considering the object's physical alignment, is the mapping large
+ * enough to encompass an L2 (2MB/32MB) superpage ...
+ */
superpage_offset = offset & L2_OFFSET;
- if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE ||
- (*addr & L2_OFFSET) == superpage_offset)
+ if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) >= L2_SIZE) {
+ /*
+ * If the virtual and physical alignments differ, then
+ * increase the virtual address so that the alignments match.
+ */
+ if ((*addr & L2_OFFSET) < superpage_offset)
+ *addr = (*addr & ~L2_OFFSET) + superpage_offset;
+ else if ((*addr & L2_OFFSET) > superpage_offset)
+ *addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) +
+ superpage_offset;
return;
- if ((*addr & L2_OFFSET) < superpage_offset)
- *addr = (*addr & ~L2_OFFSET) + superpage_offset;
- else
- *addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset;
+ }
+ /* ... or an L3C (64KB/2MB) superpage? */
+ superpage_offset = offset & L3C_OFFSET;
+ if (size - ((L3C_SIZE - superpage_offset) & L3C_OFFSET) >= L3C_SIZE) {
+ if ((*addr & L3C_OFFSET) < superpage_offset)
+ *addr = (*addr & ~L3C_OFFSET) + superpage_offset;
+ else if ((*addr & L3C_OFFSET) > superpage_offset)
+ *addr = ((*addr + L3C_OFFSET) & ~L3C_OFFSET) +
+ superpage_offset;
+ }
}
/**
Index: sys/arm64/include/param.h
===================================================================
--- sys/arm64/include/param.h
+++ sys/arm64/include/param.h
@@ -97,7 +97,7 @@
#define PAGE_SIZE (1 << PAGE_SHIFT)
#define PAGE_MASK (PAGE_SIZE - 1)
-#define MAXPAGESIZES 3 /* maximum number of supported page sizes */
+#define MAXPAGESIZES 4 /* maximum number of supported page sizes */
#ifndef KSTACK_PAGES
#if defined(KASAN) || defined(KMSAN)
Index: sys/arm64/include/vmparam.h
===================================================================
--- sys/arm64/include/vmparam.h
+++ sys/arm64/include/vmparam.h
@@ -114,25 +114,34 @@
#endif
/*
- * Enable superpage reservations: 1 level.
+ * Enable superpage reservations: 2 levels.
*/
#ifndef VM_NRESERVLEVEL
-#define VM_NRESERVLEVEL 1
+#define VM_NRESERVLEVEL 2
#endif
/*
- * Level 0 reservations consist of 512 pages when PAGE_SIZE is 4KB, and
- * 2048 pages when PAGE_SIZE is 16KB.
+ * Level 0 reservations consist of 16 pages when PAGE_SIZE is 4KB, and 128
+ * pages when PAGE_SIZE is 16KB. Level 1 reservations consist of 32 64KB
+ * pages when PAGE_SIZE is 4KB, and 16 2M pages when PAGE_SIZE is 16KB.
*/
-#ifndef VM_LEVEL_0_ORDER
#if PAGE_SIZE == PAGE_SIZE_4K
-#define VM_LEVEL_0_ORDER 9
+#ifndef VM_LEVEL_0_ORDER
+#define VM_LEVEL_0_ORDER 4
+#endif
+#ifndef VM_LEVEL_1_ORDER
+#define VM_LEVEL_1_ORDER 5
+#endif
#elif PAGE_SIZE == PAGE_SIZE_16K
-#define VM_LEVEL_0_ORDER 11
+#ifndef VM_LEVEL_0_ORDER
+#define VM_LEVEL_0_ORDER 7
+#endif
+#ifndef VM_LEVEL_1_ORDER
+#define VM_LEVEL_1_ORDER 4
+#endif
#else
#error Unsupported page size
#endif
-#endif
/**
* Address space layout.
Index: sys/kern/imgact_elf.c
===================================================================
--- sys/kern/imgact_elf.c
+++ sys/kern/imgact_elf.c
@@ -1360,8 +1360,12 @@
if ((map->flags & MAP_ASLR) != 0) {
maxv1 = maxv / 2 + addr / 2;
error = __CONCAT(rnd_, __elfN(base))(map, addr, maxv1,
- (MAXPAGESIZES > 1 && pagesizes[1] != 0) ?
- pagesizes[1] : pagesizes[0], &anon_loc);
+#if VM_NRESERVLEVEL > 0
+ pagesizes[VM_NRESERVLEVEL] != 0 ?
+ /* Align anon_loc to the largest superpage size. */
+ pagesizes[VM_NRESERVLEVEL] :
+#endif
+ pagesizes[0], &anon_loc);
if (error != 0)
goto ret;
map->anon_loc = anon_loc;
Index: sys/kern/kern_mib.c
===================================================================
--- sys/kern/kern_mib.c
+++ sys/kern/kern_mib.c
@@ -58,6 +58,8 @@
#include <sys/systm.h>
#include <sys/unistd.h>
+#include <vm/vm_param.h>
+
SYSCTL_ROOT_NODE(0, sysctl, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"Sysctl internal magic");
SYSCTL_ROOT_NODE(CTL_KERN, kern, CTLFLAG_RW | CTLFLAG_CAPRD | CTLFLAG_MPSAFE, 0,
@@ -242,7 +244,11 @@
SYSCTL_LONG(_hw, OID_AUTO, availpages, CTLFLAG_RD, &physmem, 0,
"Amount of physical memory (in pages)");
-u_long pagesizes[MAXPAGESIZES] = { PAGE_SIZE };
+#if VM_NRESERVLEVEL > 0
+_Static_assert(MAXPAGESIZES > VM_NRESERVLEVEL, "MAXPAGESIZES is too small");
+#endif
+
+u_long __read_mostly pagesizes[MAXPAGESIZES] = { PAGE_SIZE };
static int
sysctl_hw_pagesizes(SYSCTL_HANDLER_ARGS)
Index: sys/kern/kern_proc.c
===================================================================
--- sys/kern/kern_proc.c
+++ sys/kern/kern_proc.c
@@ -2542,6 +2542,7 @@
vm_offset_t addr;
vm_paddr_t pa;
vm_pindex_t pi, pi_adv, pindex;
+ int incore;
*super = false;
*resident_count = 0;
@@ -2577,10 +2578,15 @@
}
m_adv = NULL;
if (m->psind != 0 && addr + pagesizes[1] <= entry->end &&
- (addr & (pagesizes[1] - 1)) == 0 &&
- (pmap_mincore(map->pmap, addr, &pa) & MINCORE_SUPER) != 0) {
+ (addr & (pagesizes[1] - 1)) == 0 && (incore =
+ pmap_mincore(map->pmap, addr, &pa) & MINCORE_SUPER) != 0) {
*super = true;
- pi_adv = atop(pagesizes[1]);
+ /*
+ * The virtual page might be smaller than the physical
+ * page, so we use the page size reported by the pmap
+ * rather than m->psind.
+ */
+ pi_adv = atop(pagesizes[incore >> MINCORE_PSIND_SHIFT]);
} else {
/*
* We do not test the found page on validity.
Index: sys/kern/uipc_shm.c
===================================================================
--- sys/kern/uipc_shm.c
+++ sys/kern/uipc_shm.c
@@ -1589,9 +1589,20 @@
if (align == 0) {
align = pagesizes[shmfd->shm_lp_psind];
} else if (align == MAP_ALIGNED_SUPER) {
- if (shmfd->shm_lp_psind != 1)
+ /*
+ * MAP_ALIGNED_SUPER is only supported on superpage sizes,
+ * i.e., [1, VM_NRESERVLEVEL]. shmfd->shm_lp_psind < 1 is
+ * handled above.
+ */
+ if (
+#if VM_NRESERVLEVEL > 0
+ shmfd->shm_lp_psind > VM_NRESERVLEVEL
+#else
+ shmfd->shm_lp_psind > 1
+#endif
+ )
return (EINVAL);
- align = pagesizes[1];
+ align = pagesizes[shmfd->shm_lp_psind];
} else {
align >>= MAP_ALIGNMENT_SHIFT;
align = 1ULL << align;
Index: sys/sys/mman.h
===================================================================
--- sys/sys/mman.h
+++ sys/sys/mman.h
@@ -175,7 +175,9 @@
#define MINCORE_REFERENCED_OTHER 0x8 /* Page has been referenced */
#define MINCORE_MODIFIED_OTHER 0x10 /* Page has been modified */
#define MINCORE_SUPER 0x60 /* Page is a "super" page */
-#define MINCORE_PSIND(i) (((i) << 5) & MINCORE_SUPER) /* Page size */
+#define MINCORE_PSIND_SHIFT 5
+#define MINCORE_PSIND(i) (((i) << MINCORE_PSIND_SHIFT) & MINCORE_SUPER)
+ /* Page size */
/*
* Anonymous object constant for shm_open().
Index: sys/vm/vm_domainset.c
===================================================================
--- sys/vm/vm_domainset.c
+++ sys/vm/vm_domainset.c
@@ -77,6 +77,9 @@
* reservation boundary.
*/
pindex += obj->pg_color;
+#if VM_NRESERVLEVEL > 1
+ pindex >>= VM_LEVEL_1_ORDER;
+#endif
pindex >>= VM_LEVEL_0_ORDER;
} else
#endif
Index: sys/vm/vm_fault.c
===================================================================
--- sys/vm/vm_fault.c
+++ sys/vm/vm_fault.c
@@ -380,12 +380,10 @@
psind = 0;
#if VM_NRESERVLEVEL > 0
if ((m->flags & PG_FICTITIOUS) == 0 &&
- (m_super = vm_reserv_to_superpage(m)) != NULL &&
- rounddown2(vaddr, pagesizes[m_super->psind]) >= fs->entry->start &&
- roundup2(vaddr + 1, pagesizes[m_super->psind]) <= fs->entry->end &&
- (vaddr & (pagesizes[m_super->psind] - 1)) == (VM_PAGE_TO_PHYS(m) &
- (pagesizes[m_super->psind] - 1)) &&
- pmap_ps_enabled(fs->map->pmap)) {
+ (m_super = vm_reserv_to_superpage(m)) != NULL) {
+ psind = m_super->psind;
+ KASSERT(psind > 0,
+ ("psind %d of m_super %p < 1", psind, m_super));
flags = PS_ALL_VALID;
if ((fs->prot & VM_PROT_WRITE) != 0) {
/*
@@ -398,9 +396,23 @@
if ((fs->first_object->flags & OBJ_UNMANAGED) == 0)
flags |= PS_ALL_DIRTY;
}
- if (vm_page_ps_test(m_super, flags, m)) {
+ while (rounddown2(vaddr, pagesizes[psind]) < fs->entry->start ||
+ roundup2(vaddr + 1, pagesizes[psind]) > fs->entry->end ||
+ (vaddr & (pagesizes[psind] - 1)) !=
+ (VM_PAGE_TO_PHYS(m) & (pagesizes[psind] - 1)) ||
+ !vm_page_ps_test(m_super, psind, flags, m) ||
+ !pmap_ps_enabled(fs->map->pmap)) {
+ psind--;
+ if (psind == 0)
+ break;
+ m_super += rounddown2(m - m_super,
+ atop(pagesizes[psind]));
+ KASSERT(m_super->psind >= psind,
+ ("psind %d of m_super %p < %d", m_super->psind,
+ m_super, psind));
+ }
+ if (psind > 0) {
m_map = m_super;
- psind = m_super->psind;
vaddr = rounddown2(vaddr, pagesizes[psind]);
/* Preset the modified bit for dirty superpages. */
if ((flags & PS_ALL_DIRTY) != 0)
@@ -615,10 +627,10 @@
vaddr = fs->entry->start + IDX_TO_OFF(pidx) - fs->entry->offset;
psind = m->psind;
- if (psind > 0 && ((vaddr & (pagesizes[psind] - 1)) != 0 ||
+ while (psind > 0 && ((vaddr & (pagesizes[psind] - 1)) != 0 ||
pidx + OFF_TO_IDX(pagesizes[psind]) - 1 > pager_last ||
!pmap_ps_enabled(fs->map->pmap)))
- psind = 0;
+ psind--;
npages = atop(pagesizes[psind]);
for (i = 0; i < npages; i++) {
Index: sys/vm/vm_glue.c
===================================================================
--- sys/vm/vm_glue.c
+++ sys/vm/vm_glue.c
@@ -106,7 +106,10 @@
#include <machine/cpu.h>
-#if VM_NRESERVLEVEL > 0
+#if VM_NRESERVLEVEL > 1
+#define KVA_KSTACK_QUANTUM_SHIFT (VM_LEVEL_1_ORDER + VM_LEVEL_0_ORDER + \
+ PAGE_SHIFT)
+#elif VM_NRESERVLEVEL > 0
#define KVA_KSTACK_QUANTUM_SHIFT (VM_LEVEL_0_ORDER + PAGE_SHIFT)
#else
#define KVA_KSTACK_QUANTUM_SHIFT (8 + PAGE_SHIFT)
Index: sys/vm/vm_kern.c
===================================================================
--- sys/vm/vm_kern.c
+++ sys/vm/vm_kern.c
@@ -120,7 +120,10 @@
#endif
"Max kernel address");
-#if VM_NRESERVLEVEL > 0
+#if VM_NRESERVLEVEL > 1
+#define KVA_QUANTUM_SHIFT (VM_LEVEL_1_ORDER + VM_LEVEL_0_ORDER + \
+ PAGE_SHIFT)
+#elif VM_NRESERVLEVEL > 0
#define KVA_QUANTUM_SHIFT (VM_LEVEL_0_ORDER + PAGE_SHIFT)
#else
/* On non-superpage architectures we want large import sizes. */
Index: sys/vm/vm_map.c
===================================================================
--- sys/vm/vm_map.c
+++ sys/vm/vm_map.c
@@ -1993,8 +1993,15 @@
return (result);
}
+#if VM_NRESERVLEVEL == 1
static const int aslr_pages_rnd_64[2] = {0x1000, 0x10};
static const int aslr_pages_rnd_32[2] = {0x100, 0x4};
+#elif VM_NRESERVLEVEL == 2
+static const int aslr_pages_rnd_64[3] = {0x1000, 0x1000, 0x10};
+static const int aslr_pages_rnd_32[3] = {0x100, 0x100, 0x4};
+#else
+#error "Unsupported VM_NRESERVLEVEL"
+#endif
static int cluster_anon = 1;
SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW,
@@ -2190,9 +2197,23 @@
* Find space for allocation, including
* gap needed for later randomization.
*/
- pidx = MAXPAGESIZES > 1 && pagesizes[1] != 0 &&
- (find_space == VMFS_SUPER_SPACE || find_space ==
- VMFS_OPTIMAL_SPACE) ? 1 : 0;
+ pidx = 0;
+#if VM_NRESERVLEVEL > 0
+ if ((find_space == VMFS_SUPER_SPACE ||
+ find_space == VMFS_OPTIMAL_SPACE) &&
+ pagesizes[VM_NRESERVLEVEL] != 0) {
+ /*
+ * Do not pointlessly increase the space that
+ * is requested from vm_map_findspace().
+ * pmap_align_superpage() will only change a
+ * mapping's alignment if that mapping is at
+ * least a superpage in size.
+ */
+ pidx = VM_NRESERVLEVEL;
+ while (pidx > 0 && length < pagesizes[pidx])
+ pidx--;
+ }
+#endif
gap = vm_map_max(map) > MAP_32BIT_MAX_ADDR &&
(max_addr == 0 || max_addr > MAP_32BIT_MAX_ADDR) ?
aslr_pages_rnd_64[pidx] : aslr_pages_rnd_32[pidx];
@@ -2656,6 +2677,7 @@
vm_offset_t start;
vm_page_t p, p_start;
vm_pindex_t mask, psize, threshold, tmpidx;
+ int psind;
if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || object == NULL)
return;
@@ -2710,13 +2732,17 @@
p_start = p;
}
/* Jump ahead if a superpage mapping is possible. */
- if (p->psind > 0 && ((addr + ptoa(tmpidx)) &
- (pagesizes[p->psind] - 1)) == 0) {
- mask = atop(pagesizes[p->psind]) - 1;
- if (tmpidx + mask < psize &&
- vm_page_ps_test(p, PS_ALL_VALID, NULL)) {
- p += mask;
- threshold += mask;
+ for (psind = p->psind; psind > 0; psind--) {
+ if (((addr + ptoa(tmpidx)) &
+ (pagesizes[psind] - 1)) == 0) {
+ mask = atop(pagesizes[psind]) - 1;
+ if (tmpidx + mask < psize &&
+ vm_page_ps_test(p, psind,
+ PS_ALL_VALID, NULL)) {
+ p += mask;
+ threshold += mask;
+ break;
+ }
}
}
} else if (p_start != NULL) {
Index: sys/vm/vm_page.h
===================================================================
--- sys/vm/vm_page.h
+++ sys/vm/vm_page.h
@@ -657,7 +657,7 @@
bool vm_page_pqstate_commit(vm_page_t m, vm_page_astate_t *old,
vm_page_astate_t new);
vm_page_t vm_page_prev(vm_page_t m);
-bool vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m);
+bool vm_page_ps_test(vm_page_t m, int psind, int flags, vm_page_t skip_m);
void vm_page_putfake(vm_page_t m);
void vm_page_readahead_finish(vm_page_t m);
int vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low,
Index: sys/vm/vm_page.c
===================================================================
--- sys/vm/vm_page.c
+++ sys/vm/vm_page.c
@@ -5562,7 +5562,7 @@
* (super)page and false otherwise.
*/
bool
-vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m)
+vm_page_ps_test(vm_page_t m, int psind, int flags, vm_page_t skip_m)
{
vm_object_t object;
int i, npages;
@@ -5571,7 +5571,9 @@
if (skip_m != NULL && skip_m->object != object)
return (false);
VM_OBJECT_ASSERT_LOCKED(object);
- npages = atop(pagesizes[m->psind]);
+ KASSERT(psind <= m->psind,
+ ("psind %d > psind %d of m %p", psind, m->psind, m));
+ npages = atop(pagesizes[psind]);
/*
* The physically contiguous pages that make up a superpage, i.e., a
Index: sys/vm/vm_reserv.c
===================================================================
--- sys/vm/vm_reserv.c
+++ sys/vm/vm_reserv.c
@@ -77,6 +77,29 @@
#if VM_NRESERVLEVEL > 0
+/*
+ * Temporarily simulate two-level reservations. Effectively, VM_LEVEL_0_* is
+ * level 1, and VM_SUBLEVEL_0_* is level 0.
+ */
+#if VM_NRESERVLEVEL == 2
+#undef VM_NRESERVLEVEL
+#define VM_NRESERVLEVEL 1
+#if VM_LEVEL_0_ORDER == 4
+#undef VM_LEVEL_0_ORDER
+#define VM_LEVEL_0_ORDER (4 + VM_LEVEL_1_ORDER)
+#define VM_SUBLEVEL_0_NPAGES (1 << 4)
+#elif VM_LEVEL_0_ORDER == 7
+#undef VM_LEVEL_0_ORDER
+#define VM_LEVEL_0_ORDER (7 + VM_LEVEL_1_ORDER)
+#define VM_SUBLEVEL_0_NPAGES (1 << 7)
+#else
+#error "Unsupported level 0 reservation size"
+#endif
+#define VM_LEVEL_0_PSIND 2
+#else
+#define VM_LEVEL_0_PSIND 1
+#endif
+
#ifndef VM_LEVEL_0_ORDER_MAX
#define VM_LEVEL_0_ORDER_MAX VM_LEVEL_0_ORDER
#endif
@@ -381,6 +404,27 @@
vm_reserv_object_unlock(object);
}
+#ifdef VM_SUBLEVEL_0_NPAGES
+static inline bool
+vm_reserv_is_sublevel_full(vm_reserv_t rv, int index)
+{
+ _Static_assert(VM_SUBLEVEL_0_NPAGES == 16 ||
+ VM_SUBLEVEL_0_NPAGES == 128,
+ "vm_reserv_is_sublevel_full: unsupported VM_SUBLEVEL_0_NPAGES");
+ /* An equivalent bit_ntest() compiles to more instructions. */
+ switch (VM_SUBLEVEL_0_NPAGES) {
+ case 16:
+ return (((uint16_t *)rv->popmap)[index / 16] == UINT16_MAX);
+ case 128:
+ index = rounddown2(index, 128) / 64;
+ return (((uint64_t *)rv->popmap)[index] == UINT64_MAX &&
+ ((uint64_t *)rv->popmap)[index + 1] == UINT64_MAX);
+ default:
+ __unreachable();
+ }
+}
+#endif
+
/*
* Reduces the given reservation's population count. If the population count
* becomes zero, the reservation is destroyed. Additionally, moves the
@@ -406,11 +450,15 @@
("vm_reserv_depopulate: reserv %p's domain is corrupted %d",
rv, rv->domain));
if (rv->popcnt == VM_LEVEL_0_NPAGES) {
- KASSERT(rv->pages->psind == 1,
+ KASSERT(rv->pages->psind == VM_LEVEL_0_PSIND,
("vm_reserv_depopulate: reserv %p is already demoted",
rv));
- rv->pages->psind = 0;
+ rv->pages->psind = VM_LEVEL_0_PSIND - 1;
}
+#ifdef VM_SUBLEVEL_0_NPAGES
+ if (vm_reserv_is_sublevel_full(rv, index))
+ rv->pages[rounddown2(index, VM_SUBLEVEL_0_NPAGES)].psind = 0;
+#endif
bit_clear(rv->popmap, index);
rv->popcnt--;
if ((unsigned)(ticks - rv->lasttick) >= PARTPOPSLOP ||
@@ -522,12 +570,17 @@
index));
KASSERT(rv->popcnt < VM_LEVEL_0_NPAGES,
("vm_reserv_populate: reserv %p is already full", rv));
- KASSERT(rv->pages->psind == 0,
+ KASSERT(rv->pages->psind >= 0 &&
+ rv->pages->psind < VM_LEVEL_0_PSIND,
("vm_reserv_populate: reserv %p is already promoted", rv));
KASSERT(rv->domain < vm_ndomains,
("vm_reserv_populate: reserv %p's domain is corrupted %d",
rv, rv->domain));
bit_set(rv->popmap, index);
+#ifdef VM_SUBLEVEL_0_NPAGES
+ if (vm_reserv_is_sublevel_full(rv, index))
+ rv->pages[rounddown2(index, VM_SUBLEVEL_0_NPAGES)].psind = 1;
+#endif
rv->popcnt++;
if ((unsigned)(ticks - rv->lasttick) < PARTPOPSLOP &&
rv->inpartpopq && rv->popcnt != VM_LEVEL_0_NPAGES)
@@ -542,10 +595,10 @@
rv->inpartpopq = TRUE;
TAILQ_INSERT_TAIL(&vm_rvd[rv->domain].partpop, rv, partpopq);
} else {
- KASSERT(rv->pages->psind == 0,
+ KASSERT(rv->pages->psind == VM_LEVEL_0_PSIND - 1,
("vm_reserv_populate: reserv %p is already promoted",
rv));
- rv->pages->psind = 1;
+ rv->pages->psind = VM_LEVEL_0_PSIND;
}
vm_reserv_domain_unlock(rv->domain);
}
@@ -889,13 +942,18 @@
static void
vm_reserv_break(vm_reserv_t rv)
{
+ vm_page_t m;
int hi, lo, pos;
vm_reserv_assert_locked(rv);
CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
__FUNCTION__, rv, rv->object, rv->popcnt, rv->inpartpopq);
vm_reserv_remove(rv);
- rv->pages->psind = 0;
+ m = rv->pages;
+#ifdef VM_SUBLEVEL_0_NPAGES
+ for (; m < rv->pages + VM_LEVEL_0_NPAGES; m += VM_SUBLEVEL_0_NPAGES)
+#endif
+ m->psind = 0;
hi = lo = -1;
pos = 0;
for (;;) {
@@ -1089,7 +1147,11 @@
vm_reserv_t rv;
rv = vm_reserv_from_page(m);
+#ifdef VM_SUBLEVEL_0_NPAGES
+ return (rv->object != NULL ? 1 : -1);
+#else
return (rv->object != NULL ? 0 : -1);
+#endif
}
/*
@@ -1102,7 +1164,15 @@
vm_reserv_t rv;
rv = vm_reserv_from_page(m);
- return (rv->popcnt == VM_LEVEL_0_NPAGES ? 0 : -1);
+ if (rv->popcnt == VM_LEVEL_0_NPAGES) {
+#ifdef VM_SUBLEVEL_0_NPAGES
+ return (1);
+ } else if (rv->pages != NULL &&
+ vm_reserv_is_sublevel_full(rv, m - rv->pages)) {
+#endif
+ return (0);
+ }
+ return (-1);
}
/*
@@ -1357,6 +1427,10 @@
switch (level) {
case 0:
+#ifdef VM_SUBLEVEL_0_NPAGES
+ return (VM_SUBLEVEL_0_NPAGES * PAGE_SIZE);
+ case 1:
+#endif
return (VM_LEVEL_0_SIZE);
case -1:
return (PAGE_SIZE);
@@ -1432,12 +1506,16 @@
VM_OBJECT_ASSERT_LOCKED(m->object);
rv = vm_reserv_from_page(m);
- if (rv->object == m->object && rv->popcnt == VM_LEVEL_0_NPAGES)
- m = rv->pages;
- else
- m = NULL;
-
- return (m);
+ if (rv->object == m->object) {
+ if (rv->popcnt == VM_LEVEL_0_NPAGES)
+ return (rv->pages);
+#ifdef VM_SUBLEVEL_0_NPAGES
+ if (vm_reserv_is_sublevel_full(rv, m - rv->pages))
+ return (rv->pages + rounddown2(m - rv->pages,
+ VM_SUBLEVEL_0_NPAGES));
+#endif
+ }
+ return (NULL);
}
#endif /* VM_NRESERVLEVEL > 0 */

File Metadata

Mime Type
text/plain
Expires
Fri, Nov 8, 11:07 PM (3 h, 19 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14544938
Default Alt Text
D45766.id140859.diff (28 KB)

Event Timeline