Page MenuHomeFreeBSD

D20907.id59613.diff
No OneTemporary

D20907.id59613.diff

Index: sys/arm64/arm64/pmap.c
===================================================================
--- sys/arm64/arm64/pmap.c
+++ sys/arm64/arm64/pmap.c
@@ -217,6 +217,14 @@
#define VM_PAGE_TO_PV_LIST_LOCK(m) \
PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
+/*
+ * The presence of this flag indicates that the mapping is writeable.
+ * If the ATTR_AP_RO bit is also set, then the mapping is clean, otherwise it is
+ * dirty. This bit should be preemptively set on unmanaged mappings to avoid
+ * unnecessary faults.
+ */
+static pt_entry_t ATTR_SW_DBM;
+
struct pmap kernel_pmap_store;
/* Used for mapping ACPI memory before VM is initialized */
@@ -315,10 +323,9 @@
* They need to be atomic as the System MMU may write to the table at
* the same time as the CPU.
*/
-#define pmap_load_store(table, entry) atomic_swap_64(table, entry)
-#define pmap_set(table, mask) atomic_set_64(table, mask)
-#define pmap_load_clear(table) atomic_swap_64(table, 0)
-#define pmap_load(table) (*table)
+#define pmap_load_store(table, entry) atomic_swap_64(table, entry)
+#define pmap_load_clear(table) atomic_swap_64(table, 0)
+#define pmap_load(table) (*table)
/********************/
/* Inline functions */
@@ -530,16 +537,15 @@
CTASSERT(L1_BLOCK == L2_BLOCK);
-/*
- * Checks if the page is dirty. We currently lack proper tracking of this on
- * arm64 so for now assume is a page mapped as rw was accessed it is.
- */
static inline int
-pmap_page_dirty(pt_entry_t pte)
+pmap_pte_dirty(pt_entry_t pte)
{
- return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) ==
- (ATTR_AF | ATTR_AP(ATTR_AP_RW)));
+ KASSERT((pte & (ATTR_AP_RW_BIT | ATTR_SW_DBM)) != 0,
+ ("pte %#lx is writeable and missing ATTR_SW_DBM", pte));
+
+ return ((pte & (ATTR_AP_RW_BIT | ATTR_SW_DBM)) ==
+ (ATTR_AP(ATTR_AP_RW) | ATTR_SW_DBM));
}
static __inline void
@@ -644,7 +650,8 @@
l2_slot = pmap_l2_index(va);
KASSERT(l2_slot != 0, ("..."));
pmap_load_store(&l2[l2_slot],
- (pa & ~L2_OFFSET) | ATTR_DEFAULT | ATTR_XN |
+ (pa & ~L2_OFFSET) | ATTR_DEFAULT |
+ ATTR_AP(ATTR_AP_RW) | ATTR_SW_DBM | ATTR_XN |
ATTR_IDX(CACHED_MEMORY) | L2_BLOCK);
}
KASSERT(va == (pa - dmap_phys_base + DMAP_MIN_ADDRESS),
@@ -656,7 +663,8 @@
pa += L1_SIZE, va += L1_SIZE) {
l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT);
pmap_load_store(&pagetable_dmap[l1_slot],
- (pa & ~L1_OFFSET) | ATTR_DEFAULT | ATTR_XN |
+ (pa & ~L1_OFFSET) | ATTR_DEFAULT |
+ ATTR_AP(ATTR_AP_RW) | ATTR_SW_DBM | ATTR_XN |
ATTR_IDX(CACHED_MEMORY) | L1_BLOCK);
}
@@ -681,7 +689,8 @@
pa += L2_SIZE, va += L2_SIZE) {
l2_slot = pmap_l2_index(va);
pmap_load_store(&l2[l2_slot],
- (pa & ~L2_OFFSET) | ATTR_DEFAULT | ATTR_XN |
+ (pa & ~L2_OFFSET) | ATTR_DEFAULT |
+ ATTR_AP(ATTR_AP_RW) | ATTR_SW_DBM | ATTR_XN |
ATTR_IDX(CACHED_MEMORY) | L2_BLOCK);
}
}
@@ -764,13 +773,18 @@
vm_size_t kernlen)
{
u_int l1_slot, l2_slot;
- uint64_t kern_delta;
pt_entry_t *l2;
vm_offset_t va, freemempos;
vm_offset_t dpcpu, msgbufpv;
vm_paddr_t start_pa, pa, min_pa;
+ uint64_t kern_delta, reg;
int i;
+ /* Determine whether the hardware implements DBM management. */
+ reg = READ_SPECIALREG(ID_AA64MMFR1_EL1);
+ ATTR_SW_DBM = ID_AA64MMFR1_HAFDBS(reg) == ID_AA64MMFR1_HAFDBS_AF_DBS ?
+ ATTR_DBM : _ATTR_SW_DBM;
+
kern_delta = KERNBASE - kernstart;
printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
@@ -1167,7 +1181,8 @@
KASSERT((size & PAGE_MASK) == 0,
("pmap_kenter: Mapping is not page-sized"));
- attr = ATTR_DEFAULT | ATTR_IDX(mode) | L3_PAGE;
+ attr = ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | ATTR_SW_DBM | ATTR_IDX(mode) |
+ L3_PAGE;
if (mode == DEVICE_MEMORY)
attr |= ATTR_XN;
@@ -1285,7 +1300,7 @@
m = ma[i];
pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) |
- ATTR_IDX(m->md.pv_memattr) | L3_PAGE;
+ ATTR_SW_DBM | ATTR_IDX(m->md.pv_memattr) | L3_PAGE;
if (m->md.pv_memattr == DEVICE_MEMORY)
pa |= ATTR_XN;
pte = pmap_l2_to_l3(pde, va);
@@ -1951,7 +1966,7 @@
tpte = pmap_load_clear(pte);
pmap_invalidate_page(pmap, va);
m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK);
- if (pmap_page_dirty(tpte))
+ if (pmap_pte_dirty(tpte))
vm_page_dirty(m);
if ((tpte & ATTR_AF) != 0)
vm_page_aflag_set(m, PGA_REFERENCED);
@@ -2448,7 +2463,7 @@
eva = sva + L2_SIZE;
for (va = sva, m = PHYS_TO_VM_PAGE(old_l2 & ~ATTR_MASK);
va < eva; va += PAGE_SIZE, m++) {
- if (pmap_page_dirty(old_l2))
+ if (pmap_pte_dirty(old_l2))
vm_page_dirty(m);
if (old_l2 & ATTR_AF)
vm_page_aflag_set(m, PGA_REFERENCED);
@@ -2477,7 +2492,7 @@
/*
* pmap_remove_l3: do the things to unmap a page in a process
*/
-static int
+static int __unused
pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va,
pd_entry_t l2e, struct spglist *free, struct rwlock **lockp)
{
@@ -2493,7 +2508,7 @@
pmap_resident_count_dec(pmap, 1);
if (old_l3 & ATTR_SW_MANAGED) {
m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK);
- if (pmap_page_dirty(old_l3))
+ if (pmap_pte_dirty(old_l3))
vm_page_dirty(m);
if (old_l3 & ATTR_AF)
vm_page_aflag_set(m, PGA_REFERENCED);
@@ -2541,7 +2556,7 @@
pmap_resident_count_dec(pmap, 1);
if ((old_l3 & ATTR_SW_MANAGED) != 0) {
m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK);
- if (pmap_page_dirty(old_l3))
+ if (pmap_pte_dirty(old_l3))
vm_page_dirty(m);
if ((old_l3 & ATTR_AF) != 0)
vm_page_aflag_set(m, PGA_REFERENCED);
@@ -2771,7 +2786,7 @@
/*
* Update the vm_page_t clean and reference bits.
*/
- if (pmap_page_dirty(tpte))
+ if (pmap_pte_dirty(tpte))
vm_page_dirty(m);
pmap_unuse_pt(pmap, pv->pv_va, tpde, &free);
TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
@@ -2788,7 +2803,8 @@
* pmap_protect_l2: do the things to protect a 2MB page in a pmap
*/
static void
-pmap_protect_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pt_entry_t nbits)
+pmap_protect_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pt_entry_t mask,
+ pt_entry_t nbits)
{
pd_entry_t old_l2;
vm_page_t m, mt;
@@ -2804,7 +2820,8 @@
* Return if the L2 entry already has the desired access restrictions
* in place.
*/
- if ((old_l2 | nbits) == old_l2)
+retry:
+ if ((old_l2 & mask) == nbits)
return;
/*
@@ -2812,15 +2829,15 @@
* update the dirty field of each of the superpage's constituent 4KB
* pages.
*/
- if ((nbits & ATTR_AP(ATTR_AP_RO)) != 0 &&
- (old_l2 & ATTR_SW_MANAGED) != 0 &&
- pmap_page_dirty(old_l2)) {
+ if ((old_l2 & ATTR_SW_MANAGED) != 0 &&
+ (nbits & ATTR_AP(ATTR_AP_RO)) != 0 && pmap_pte_dirty(old_l2)) {
m = PHYS_TO_VM_PAGE(old_l2 & ~ATTR_MASK);
for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
vm_page_dirty(mt);
}
- pmap_set(l2, nbits);
+ if (!atomic_fcmpset_64(l2, &old_l2, (old_l2 & ~mask) | nbits))
+ goto retry;
/*
* Since a promotion must break the 4KB page mappings before making
@@ -2838,7 +2855,7 @@
{
vm_offset_t va, va_next;
pd_entry_t *l0, *l1, *l2;
- pt_entry_t *l3p, l3, nbits;
+ pt_entry_t *l3p, l3, mask, nbits;
KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot));
if (prot == VM_PROT_NONE) {
@@ -2846,12 +2863,16 @@
return;
}
- nbits = 0;
- if ((prot & VM_PROT_WRITE) == 0)
+ mask = nbits = 0;
+ if ((prot & VM_PROT_WRITE) == 0) {
+ mask |= ATTR_AP_RW_BIT | ATTR_SW_DBM;
nbits |= ATTR_AP(ATTR_AP_RO);
- if ((prot & VM_PROT_EXECUTE) == 0)
+ }
+ if ((prot & VM_PROT_EXECUTE) == 0) {
+ mask |= ATTR_XN;
nbits |= ATTR_XN;
- if (nbits == 0)
+ }
+ if (mask == 0)
return;
PMAP_LOCK(pmap);
@@ -2883,7 +2904,7 @@
if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) {
if (sva + L2_SIZE == va_next && eva >= va_next) {
- pmap_protect_l2(pmap, l2, sva, nbits);
+ pmap_protect_l2(pmap, l2, sva, mask, nbits);
continue;
} else if (pmap_demote_l2(pmap, l2, sva) == NULL)
continue;
@@ -2897,6 +2918,8 @@
va = va_next;
for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
sva += L3_SIZE) {
+ l3 = pmap_load(l3p);
+retry:
/*
* Go to the next L3 entry if the current one is
* invalid or already has the desired access
@@ -2905,27 +2928,27 @@
* workload, almost 1 out of 4 L3 entries already
* have the desired restrictions.)
*/
- l3 = pmap_load(l3p);
- if (!pmap_l3_valid(l3) || (l3 | nbits) == l3) {
+ if (!pmap_l3_valid(l3) || (l3 & mask) == nbits) {
if (va != va_next) {
pmap_invalidate_range(pmap, va, sva);
va = va_next;
}
continue;
}
- if (va == va_next)
- va = sva;
- /*
- * When a dirty read/write mapping is write protected,
- * update the page's dirty field.
- */
- if ((nbits & ATTR_AP(ATTR_AP_RO)) != 0 &&
- (l3 & ATTR_SW_MANAGED) != 0 &&
- pmap_page_dirty(l3))
+ /*
+ * When a dirty read/write mapping is write protected,
+ * update the page's dirty field.
+ */
+ if ((l3 & ATTR_SW_MANAGED) != 0 &&
+ (nbits & ATTR_AP(ATTR_AP_RO)) != 0 &&
+ pmap_pte_dirty(l3))
vm_page_dirty(PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK));
- pmap_set(l3p, nbits);
+ if (!atomic_fcmpset_64(l3p, &l3, (l3 & ~mask) | nbits))
+ goto retry;
+ if (va == va_next)
+ va = sva;
}
if (va != va_next)
pmap_invalidate_range(pmap, va, sva);
@@ -3145,7 +3168,11 @@
pa = VM_PAGE_TO_PHYS(m);
new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
L3_PAGE);
- if ((prot & VM_PROT_WRITE) == 0)
+ if ((prot & VM_PROT_WRITE) != 0)
+ new_l3 |= ATTR_SW_DBM;
+ if ((flags & VM_PROT_WRITE) != 0)
+ new_l3 |= ATTR_AP(ATTR_AP_RW);
+ else
new_l3 |= ATTR_AP(ATTR_AP_RO);
if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY)
new_l3 |= ATTR_XN;
@@ -3266,6 +3293,7 @@
/*
* The physical page has changed.
+ * XXX need to reload orig_l3 for hardware DBM.
*/
(void)pmap_load_clear(l3);
KASSERT((orig_l3 & ~ATTR_MASK) == opa,
@@ -3278,7 +3306,7 @@
* concurrent calls to pmap_page_test_mappings() and
* pmap_ts_referenced().
*/
- if (pmap_page_dirty(orig_l3))
+ if (pmap_pte_dirty(orig_l3))
vm_page_dirty(om);
if ((orig_l3 & ATTR_AF) != 0)
vm_page_aflag_set(om, PGA_REFERENCED);
@@ -3341,9 +3369,10 @@
KASSERT(opa == pa, ("pmap_enter: invalid update"));
if ((orig_l3 & ~ATTR_AF) != (new_l3 & ~ATTR_AF)) {
/* same PA, different attributes */
+ /* XXXMJ need to reload orig_l3 for hardware DBM. */
pmap_load_store(l3, new_l3);
pmap_invalidate_page(pmap, va);
- if (pmap_page_dirty(orig_l3) &&
+ if (pmap_pte_dirty(orig_l3) &&
(orig_l3 & ATTR_SW_MANAGED) != 0)
vm_page_dirty(m);
} else {
@@ -3694,7 +3723,7 @@
pmap_resident_count_inc(pmap, 1);
pa = VM_PAGE_TO_PHYS(m);
- l3_val = pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
+ l3_val = pa | (ATTR_DEFAULT & ~ATTR_AF) | ATTR_IDX(m->md.pv_memattr) |
ATTR_AP(ATTR_AP_RO) | L3_PAGE;
if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY)
l3_val |= ATTR_XN;
@@ -3841,7 +3870,7 @@
struct rwlock *lock;
struct spglist free;
pd_entry_t *l0, *l1, *l2, srcptepaddr;
- pt_entry_t *dst_pte, ptetemp, *src_pte;
+ pt_entry_t *dst_pte, mask, ptetemp, *src_pte;
vm_offset_t addr, end_addr, va_next;
vm_page_t dst_l2pg, dstmpte, srcmpte;
@@ -3892,8 +3921,10 @@
((srcptepaddr & ATTR_SW_MANAGED) == 0 ||
pmap_pv_insert_l2(dst_pmap, addr, srcptepaddr,
PMAP_ENTER_NORECLAIM, &lock))) {
- (void)pmap_load_store(l2, srcptepaddr &
- ~ATTR_SW_WIRED);
+ mask = ATTR_AF | ATTR_SW_WIRED;
+ if ((srcptepaddr & ATTR_SW_DBM) != 0)
+ mask |= ATTR_AP_RW_BIT;
+ (void)pmap_load_store(l2, srcptepaddr & ~mask);
pmap_resident_count_inc(dst_pmap, L2_SIZE /
PAGE_SIZE);
atomic_add_long(&pmap_l2_mappings, 1);
@@ -3937,11 +3968,11 @@
/*
* Clear the wired, modified, and accessed
* (referenced) bits during the copy.
- *
- * XXX not yet
*/
- (void)pmap_load_store(dst_pte, ptetemp &
- ~ATTR_SW_WIRED);
+ mask = ATTR_AF | ATTR_SW_WIRED;
+ if ((ptetemp & ATTR_SW_DBM) != 0)
+ mask |= ATTR_AP_RW_BIT;
+ (void)pmap_load_store(dst_pte, ptetemp & ~mask);
pmap_resident_count_inc(dst_pmap, 1);
} else {
SLIST_INIT(&free);
@@ -4287,8 +4318,7 @@
/*
* Update the vm_page_t clean/reference bits.
*/
- if ((tpte & ATTR_AP_RW_BIT) ==
- ATTR_AP(ATTR_AP_RW)) {
+ if (pmap_pte_dirty(tpte)) {
switch (lvl) {
case 1:
for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++)
@@ -4563,7 +4593,7 @@
}
va = pv->pv_va;
pte = pmap_pte(pmap, pv->pv_va, &lvl);
- if ((pmap_load(pte) & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW))
+ if ((pmap_load(pte) & ATTR_SW_DBM) != 0)
(void)pmap_demote_l2_locked(pmap, pte, va, &lock);
KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
("inconsistent pv lock %p %p for page %p",
@@ -4586,13 +4616,14 @@
}
}
pte = pmap_pte(pmap, pv->pv_va, &lvl);
-retry:
oldpte = pmap_load(pte);
- if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) {
- if (!atomic_cmpset_long(pte, oldpte,
- oldpte | ATTR_AP(ATTR_AP_RO)))
+retry:
+ if ((oldpte & ATTR_SW_DBM) != 0) {
+ if (!atomic_fcmpset_long(pte, &oldpte,
+ (oldpte | ATTR_AP_RW_BIT) & ~ATTR_SW_DBM))
goto retry;
- if ((oldpte & ATTR_AF) != 0)
+ if ((oldpte & ATTR_AP(ATTR_AP_RW)) ==
+ ATTR_AP(ATTR_AP_RW))
vm_page_dirty(m);
pmap_invalidate_page(pmap, pv->pv_va);
}
@@ -4602,13 +4633,6 @@
vm_page_aflag_clear(m, PGA_WRITEABLE);
}
-static __inline boolean_t
-safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte)
-{
-
- return (FALSE);
-}
-
/*
* pmap_ts_referenced:
*
@@ -4634,12 +4658,10 @@
struct rwlock *lock;
pd_entry_t *pde, tpde;
pt_entry_t *pte, tpte;
- pt_entry_t *l3;
vm_offset_t va;
vm_paddr_t pa;
- int cleared, md_gen, not_cleared, lvl, pvh_gen;
+ int cleared, md_gen, lvl, pvh_gen;
struct spglist free;
- bool demoted;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_ts_referenced: page %p is not managed", m));
@@ -4650,7 +4672,6 @@
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa);
rw_wlock(lock);
retry:
- not_cleared = 0;
if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL)
goto small_mappings;
pv = pvf;
@@ -4678,7 +4699,7 @@
("pmap_ts_referenced: found an invalid l1 table"));
pte = pmap_l1_to_l2(pde, pv->pv_va);
tpte = pmap_load(pte);
- if (pmap_page_dirty(tpte)) {
+ if (pmap_pte_dirty(tpte)) {
/*
* Although "tpte" is mapping a 2MB page, because
* this function is called at a 4KB page granularity,
@@ -4686,63 +4707,29 @@
*/
vm_page_dirty(m);
}
- if ((tpte & ATTR_AF) != 0) {
- /*
- * Since this reference bit is shared by 512 4KB
- * pages, it should not be cleared every time it is
- * tested. Apply a simple "hash" function on the
- * physical page number, the virtual superpage number,
- * and the pmap address to select one 4KB page out of
- * the 512 on which testing the reference bit will
- * result in clearing that reference bit. This
- * function is designed to avoid the selection of the
- * same 4KB page for every 2MB page mapping.
- *
- * On demotion, a mapping that hasn't been referenced
- * is simply destroyed. To avoid the possibility of a
- * subsequent page fault on a demoted wired mapping,
- * always leave its reference bit set. Moreover,
- * since the superpage is wired, the current state of
- * its reference bit won't affect page replacement.
- */
- if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^
- (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 &&
- (tpte & ATTR_SW_WIRED) == 0) {
- if (safe_to_clear_referenced(pmap, tpte)) {
- /*
- * TODO: We don't handle the access
- * flag at all. We need to be able
- * to set it in the exception handler.
- */
- panic("ARM64TODO: "
- "safe_to_clear_referenced\n");
- } else if (pmap_demote_l2_locked(pmap, pte,
- pv->pv_va, &lock) != NULL) {
- demoted = true;
- va += VM_PAGE_TO_PHYS(m) -
- (tpte & ~ATTR_MASK);
- l3 = pmap_l2_to_l3(pte, va);
- pmap_remove_l3(pmap, l3, va,
- pmap_load(pte), NULL, &lock);
- } else
- demoted = true;
-
- if (demoted) {
- /*
- * The superpage mapping was removed
- * entirely and therefore 'pv' is no
- * longer valid.
- */
- if (pvf == pv)
- pvf = NULL;
- pv = NULL;
- }
- cleared++;
- KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
- ("inconsistent pv lock %p %p for page %p",
- lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
- } else
- not_cleared++;
+
+ /*
+ * Since this reference bit is shared by 512 4KB pages, it
+ * should not be cleared every time it is tested. Apply a
+ * simple "hash" function on the physical page number, the
+ * virtual superpage number, and the pmap address to select one
+ * 4KB page out of the 512 on which testing the reference bit
+ * will result in clearing that reference bit. This function is
+ * designed to avoid the selection of the same 4KB page for
+ * every 2MB page mapping.
+ *
+ * On demotion, a mapping that hasn't been referenced is simply
+ * destroyed. To avoid the possibility of a subsequent page
+ * fault on a demoted wired mapping, always leave its reference
+ * bit set. Moreover, since the superpage is wired, the current
+ * state of its reference bit won't affect page replacement.
+ */
+ if ((tpte & ATTR_AF) != 0 && (((pa >> PAGE_SHIFT) ^
+ (pv->pv_va >> L2_SHIFT) ^ (uintptr_t)pmap) &
+ (Ln_ENTRIES - 1)) == 0 && (tpte & ATTR_SW_WIRED) == 0) {
+ atomic_clear_64(pte, ATTR_AF);
+ pmap_invalidate_page(pmap, pv->pv_va);
+ cleared++;
}
PMAP_UNLOCK(pmap);
/* Rotate the PV list if it has more than one entry. */
@@ -4751,7 +4738,7 @@
TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
pvh->pv_gen++;
}
- if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX)
+ if (cleared >= PMAP_TS_REFERENCED_MAX)
goto out;
} while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf);
small_mappings:
@@ -4782,34 +4769,12 @@
("pmap_ts_referenced: found an invalid l2 table"));
pte = pmap_l2_to_l3(pde, pv->pv_va);
tpte = pmap_load(pte);
- if (pmap_page_dirty(tpte))
+ if (pmap_pte_dirty(tpte))
vm_page_dirty(m);
if ((tpte & ATTR_AF) != 0) {
- if (safe_to_clear_referenced(pmap, tpte)) {
- /*
- * TODO: We don't handle the access flag
- * at all. We need to be able to set it in
- * the exception handler.
- */
- panic("ARM64TODO: safe_to_clear_referenced\n");
- } else if ((tpte & ATTR_SW_WIRED) == 0) {
- /*
- * Wired pages cannot be paged out so
- * doing accessed bit emulation for
- * them is wasted effort. We do the
- * hard work for unwired pages only.
- */
- pmap_remove_l3(pmap, pte, pv->pv_va, tpde,
- &free, &lock);
- cleared++;
- if (pvf == pv)
- pvf = NULL;
- pv = NULL;
- KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
- ("inconsistent pv lock %p %p for page %p",
- lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
- } else
- not_cleared++;
+ atomic_clear_64(pte, ATTR_AF);
+ pmap_invalidate_page(pmap, pv->pv_va);
+ cleared++;
}
PMAP_UNLOCK(pmap);
/* Rotate the PV list if it has more than one entry. */
@@ -4818,12 +4783,12 @@
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
m->md.pv_gen++;
}
- } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
- not_cleared < PMAP_TS_REFERENCED_MAX);
+ } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared <
+ PMAP_TS_REFERENCED_MAX);
out:
rw_wunlock(lock);
vm_page_free_pages_toq(&free, true);
- return (cleared + not_cleared);
+ return (cleared);
}
/*
@@ -4842,6 +4807,14 @@
void
pmap_clear_modify(vm_page_t m)
{
+ struct md_page *pvh;
+ struct rwlock *lock;
+ pmap_t pmap;
+ pv_entry_t next_pv, pv;
+ pd_entry_t *l2, oldl2;
+ pt_entry_t *l3, oldl3;
+ vm_offset_t va;
+ int md_gen, pvh_gen;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_clear_modify: page %p is not managed", m));
@@ -4850,14 +4823,81 @@
("pmap_clear_modify: page %p is exclusive busied", m));
/*
- * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
- * If the object containing the page is locked and the page is not
+ * If the page is not PGA_WRITEABLE, then no PTEs can have ATTR_SW_DBM
+ * set. If the object containing the page is locked and the page is not
* exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
*/
if ((m->aflags & PGA_WRITEABLE) == 0)
return;
-
- /* ARM64TODO: We lack support for tracking if a page is modified */
+ pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
+ pa_to_pvh(VM_PAGE_TO_PHYS(m));
+ lock = VM_PAGE_TO_PV_LIST_LOCK(m);
+ rw_wlock(lock);
+restart:
+ TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) {
+ pmap = PV_PMAP(pv);
+ if (!PMAP_TRYLOCK(pmap)) {
+ pvh_gen = pvh->pv_gen;
+ rw_wunlock(lock);
+ PMAP_LOCK(pmap);
+ rw_wlock(lock);
+ if (pvh_gen != pvh->pv_gen) {
+ PMAP_UNLOCK(pmap);
+ goto restart;
+ }
+ }
+ va = pv->pv_va;
+ l2 = pmap_l2(pmap, va);
+ oldl2 = pmap_load(l2);
+ if ((oldl2 & ATTR_SW_DBM) != 0) {
+ if (pmap_demote_l2_locked(pmap, l2, va, &lock)) {
+ if ((oldl2 & ATTR_SW_WIRED) == 0) {
+ /*
+ * Write protect the mapping to a
+ * single page so that a subsequent
+ * write access may repromote.
+ */
+ va += VM_PAGE_TO_PHYS(m) -
+ (oldl2 & ~ATTR_MASK);
+ l3 = pmap_l2_to_l3(l2, va);
+ oldl3 = pmap_load(l3);
+ if (pmap_l3_valid(oldl3)) {
+ while (!atomic_fcmpset_long(l3,
+ &oldl3, (oldl3 & ~ATTR_SW_DBM) |
+ ATTR_AP(ATTR_AP_RO)))
+ cpu_spinwait();
+ vm_page_dirty(m);
+ pmap_invalidate_page(pmap, va);
+ }
+ }
+ }
+ }
+ PMAP_UNLOCK(pmap);
+ }
+ TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
+ pmap = PV_PMAP(pv);
+ if (!PMAP_TRYLOCK(pmap)) {
+ md_gen = m->md.pv_gen;
+ pvh_gen = pvh->pv_gen;
+ rw_wunlock(lock);
+ PMAP_LOCK(pmap);
+ rw_wlock(lock);
+ if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) {
+ PMAP_UNLOCK(pmap);
+ goto restart;
+ }
+ }
+ l2 = pmap_l2(pmap, pv->pv_va);
+ l3 = pmap_l2_to_l3(l2, pv->pv_va);
+ oldl3 = pmap_load(l3);
+ if (pmap_l3_valid(oldl3) &&
+ (oldl3 & (ATTR_AP_RW_BIT | ATTR_SW_DBM)) == ATTR_SW_DBM) {
+ atomic_clear_64(l3, ATTR_AP_RW_BIT);
+ pmap_invalidate_page(pmap, pv->pv_va);
+ }
+ PMAP_UNLOCK(pmap);
+ }
+ rw_wunlock(lock);
}
void *
@@ -4936,8 +4976,8 @@
/* Insert L2_BLOCK */
l2 = pmap_l1_to_l2(pde, va);
pmap_load_store(l2,
- pa | ATTR_DEFAULT | ATTR_XN |
- ATTR_IDX(CACHED_MEMORY) | L2_BLOCK);
+ pa | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | ATTR_SW_DBM |
+ ATTR_XN | ATTR_IDX(CACHED_MEMORY) | L2_BLOCK);
va += L2_SIZE;
pa += L2_SIZE;
@@ -5448,7 +5488,7 @@
val = MINCORE_INCORE;
if (lvl != 3)
val |= MINCORE_SUPER;
- if (pmap_page_dirty(tpte))
+ if (pmap_pte_dirty(tpte))
val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
if ((tpte & ATTR_AF) == ATTR_AF)
val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
@@ -5561,22 +5601,54 @@
int
pmap_fault(pmap_t pmap, uint64_t esr, uint64_t far)
{
-#ifdef SMP
+ pt_entry_t *pte;
register_t intr;
- uint64_t par;
+ uint64_t ec, par;
+ int lvl, rv;
- switch (ESR_ELx_EXCEPTION(esr)) {
+ rv = KERN_FAILURE;
+
+ ec = ESR_ELx_EXCEPTION(esr);
+ switch (ec) {
case EXCP_INSN_ABORT_L:
case EXCP_INSN_ABORT:
case EXCP_DATA_ABORT_L:
case EXCP_DATA_ABORT:
break;
default:
- return (KERN_FAILURE);
+ return (rv);
}
- /* Data and insn aborts use same encoding for FCS field. */
+ /* Data and insn aborts use same encoding for FSC field. */
switch (esr & ISS_DATA_DFSC_MASK) {
+ case ISS_DATA_DFSC_AFF_L1:
+ case ISS_DATA_DFSC_AFF_L2:
+ case ISS_DATA_DFSC_AFF_L3:
+ PMAP_LOCK(pmap);
+ pte = pmap_pte(pmap, far, &lvl);
+ if (pte != NULL && (pmap_load(pte) & ATTR_AF) == 0) {
+ atomic_set_64(pte, ATTR_AF);
+ rv = KERN_SUCCESS;
+ }
+ PMAP_UNLOCK(pmap);
+ break;
+ case ISS_DATA_DFSC_PF_L1:
+ case ISS_DATA_DFSC_PF_L2:
+ case ISS_DATA_DFSC_PF_L3:
+ if ((ec != EXCP_DATA_ABORT_L && ec != EXCP_DATA_ABORT) ||
+ (esr & ISS_DATA_WnR) == 0)
+ return (rv);
+ PMAP_LOCK(pmap);
+ pte = pmap_pte(pmap, far, &lvl);
+ if (pte != NULL &&
+ (pmap_load(pte) & (ATTR_AP_RW_BIT | ATTR_SW_DBM)) ==
+ (ATTR_AP(ATTR_AP_RO) | ATTR_SW_DBM)) {
+ atomic_clear_64(pte, ATTR_AP_RW_BIT);
+ pmap_invalidate_page(pmap, trunc_page(far));
+ rv = KERN_SUCCESS;
+ }
+ PMAP_UNLOCK(pmap);
+ break;
case ISS_DATA_DFSC_TF_L0:
case ISS_DATA_DFSC_TF_L1:
case ISS_DATA_DFSC_TF_L2:
@@ -5597,14 +5669,11 @@
* return success to the trap handler.
*/
if (PAR_SUCCESS(par))
- return (KERN_SUCCESS);
- break;
- default:
+ rv = KERN_SUCCESS;
break;
}
-#endif
- return (KERN_FAILURE);
+ return (rv);
}
/*
Index: sys/arm64/arm64/trap.c
===================================================================
--- sys/arm64/arm64/trap.c
+++ sys/arm64/arm64/trap.c
@@ -192,32 +192,16 @@
}
/*
- * The call to pmap_fault can be dangerous when coming from the
- * kernel as it may be not be able to lock the pmap to check if
- * the address is now valid. Because of this we filter the cases
- * when we are not going to see superpage activity.
+ * Try to handle translation, access flag, and permission faults.
+ * Translation faults may occur as a result of the required
+ * break-before-make sequence used when promoting or demoting
+ * superpages. Such faults must not occur while holding the pmap lock,
+ * or pmap_fault() will recurse on that lock.
*/
- if (!lower) {
- /*
- * We may fault in a DMAP region due to a superpage being
- * unmapped when the access took place.
- */
- if (map == kernel_map && !VIRT_IN_DMAP(far))
- goto no_pmap_fault;
- /*
- * We can also fault in the userspace handling functions,
- * e.g. copyin. In these cases we will have set a fault
- * handler so we can check if this is set before calling
- * pmap_fault.
- */
- if (map != kernel_map && pcb->pcb_onfault == 0)
- goto no_pmap_fault;
- }
-
- if (pmap_fault(map->pmap, esr, far) == KERN_SUCCESS)
+ if ((lower || map == kernel_map || pcb->pcb_onfault != 0) &&
+ pmap_fault(map->pmap, esr, far) == KERN_SUCCESS)
return;
-no_pmap_fault:
KASSERT(td->td_md.md_spinlock_count == 0,
("data abort with spinlock held"));
if (td->td_critnest != 0 || WITNESS_CHECK(WARN_SLEEPOK |
@@ -229,9 +213,11 @@
}
va = trunc_page(far);
- ftype = ((esr >> 6) & 1) ? VM_PROT_READ | VM_PROT_WRITE : VM_PROT_READ;
if (exec)
- ftype |= VM_PROT_EXECUTE;
+ ftype = VM_PROT_EXECUTE;
+ else
+ ftype = (esr & ISS_DATA_WnR) == 0 ? VM_PROT_READ :
+ VM_PROT_READ | VM_PROT_WRITE;
/* Fault in the page. */
error = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
Index: sys/arm64/include/armreg.h
===================================================================
--- sys/arm64/include/armreg.h
+++ sys/arm64/include/armreg.h
@@ -91,10 +91,10 @@
#define ISS_DATA_SF (0x01 << 15)
#define ISS_DATA_AR (0x01 << 14)
#define ISS_DATA_FnV (0x01 << 10)
-#define ISS_DATa_EA (0x01 << 9)
-#define ISS_DATa_CM (0x01 << 8)
-#define ISS_INSN_S1PTW (0x01 << 7)
-#define ISS_DATa_WnR (0x01 << 6)
+#define ISS_DATA_EA (0x01 << 9)
+#define ISS_DATA_CM (0x01 << 8)
+#define ISS_DATA_S1PTW (0x01 << 7)
+#define ISS_DATA_WnR (0x01 << 6)
#define ISS_DATA_DFSC_MASK (0x3f << 0)
#define ISS_DATA_DFSC_ASF_L0 (0x00 << 0)
#define ISS_DATA_DFSC_ASF_L1 (0x01 << 0)
Index: sys/arm64/include/pte.h
===================================================================
--- sys/arm64/include/pte.h
+++ sys/arm64/include/pte.h
@@ -39,11 +39,12 @@
#endif
/* Block and Page attributes */
-/* TODO: Add the upper attributes */
#define ATTR_MASK_H UINT64_C(0xfff0000000000000)
#define ATTR_MASK_L UINT64_C(0x0000000000000fff)
#define ATTR_MASK (ATTR_MASK_H | ATTR_MASK_L)
/* Bits 58:55 are reserved for software */
+#define ATTR_SW_UNUSED (1UL << 58)
+#define _ATTR_SW_DBM (1UL << 57)
#define ATTR_SW_MANAGED (1UL << 56)
#define ATTR_SW_WIRED (1UL << 55)
#define ATTR_UXN (1UL << 54)

File Metadata

Mime Type
text/plain
Expires
Sun, Oct 19, 5:28 AM (2 h, 42 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23912295
Default Alt Text
D20907.id59613.diff (27 KB)

Event Timeline