Changeset View
Standalone View
sys/arm64/arm64/pmap.c
Show First 20 Lines • Show All 211 Lines • ▼ Show 20 Lines | if (*_lockp != NULL) { \ | ||||
rw_wunlock(*_lockp); \ | rw_wunlock(*_lockp); \ | ||||
*_lockp = NULL; \ | *_lockp = NULL; \ | ||||
} \ | } \ | ||||
} while (0) | } while (0) | ||||
#define VM_PAGE_TO_PV_LIST_LOCK(m) \ | #define VM_PAGE_TO_PV_LIST_LOCK(m) \ | ||||
PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) | PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) | ||||
/* | |||||
* The presence of this flag indicates that the mapping is writeable. | |||||
* If the ATTR_AP_RO bit is also set, then the mapping is clean, otherwise it is | |||||
* dirty. This bit should be preemptively set on unmanaged mappings to avoid | |||||
* unnecessary faults. | |||||
alc: I would consider restricting the use of ATTR_SW_DBM to managed mappings. | |||||
Done Inline ActionsI had thought about it a bit and didn't see a strong reason to go either way. I'll change it. markj: I had thought about it a bit and didn't see a strong reason to go either way. I'll change it. | |||||
*/ | |||||
static pt_entry_t ATTR_SW_DBM; | |||||
struct pmap kernel_pmap_store; | struct pmap kernel_pmap_store; | ||||
/* Used for mapping ACPI memory before VM is initialized */ | /* Used for mapping ACPI memory before VM is initialized */ | ||||
#define PMAP_PREINIT_MAPPING_COUNT 32 | #define PMAP_PREINIT_MAPPING_COUNT 32 | ||||
#define PMAP_PREINIT_MAPPING_SIZE (PMAP_PREINIT_MAPPING_COUNT * L2_SIZE) | #define PMAP_PREINIT_MAPPING_SIZE (PMAP_PREINIT_MAPPING_COUNT * L2_SIZE) | ||||
static vm_offset_t preinit_map_va; /* Start VA of pre-init mapping space */ | static vm_offset_t preinit_map_va; /* Start VA of pre-init mapping space */ | ||||
static int vm_initialized = 0; /* No need to use pre-init maps when set */ | static int vm_initialized = 0; /* No need to use pre-init maps when set */ | ||||
▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines | |||||
static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); | static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); | ||||
static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); | static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); | ||||
/* | /* | ||||
* These load the old table data and store the new value. | * These load the old table data and store the new value. | ||||
* They need to be atomic as the System MMU may write to the table at | * They need to be atomic as the System MMU may write to the table at | ||||
* the same time as the CPU. | * the same time as the CPU. | ||||
*/ | */ | ||||
#define pmap_clear(table) atomic_store_64(table, 0) | #define pmap_clear(table) atomic_store_64(table, 0) | ||||
#define pmap_load_store(table, entry) atomic_swap_64(table, entry) | |||||
#define pmap_set(table, mask) atomic_set_64(table, mask) | |||||
#define pmap_load_clear(table) atomic_swap_64(table, 0) | |||||
#define pmap_load(table) (*table) | #define pmap_load(table) (*table) | ||||
#define pmap_load_clear(table) atomic_swap_64(table, 0) | |||||
#define pmap_load_store(table, entry) atomic_swap_64(table, entry) | |||||
Not Done Inline ActionsI would suggest that you go ahead and commit these style changes. alc: I would suggest that you go ahead and commit these style changes. | |||||
Done Inline ActionsGiven that you are editing these definitions, maybe put them in alphabetical order. alc: Given that you are editing these definitions, maybe put them in alphabetical order. | |||||
Not Done Inline ActionsTo be honest, I would just as well see these macros deleted, and have us use atomics directly on PTEs. alc: To be honest, I would just as well see these macros deleted, and have us use atomics directly… | |||||
Not Done Inline ActionsI created them so I could find where we operate on the page table. andrew: I created them so I could find where we operate on the page table. | |||||
Done Inline ActionsHmm, I rather like them. It's been helpful to be able to quickly locate all the places where we modify a PTE. I was going to add pmap_set_bits() and pmap_clear_bits() here to replace some of the inline atomics that I added. I did that on RISC-V too. markj: Hmm, I rather like them. It's been helpful to be able to quickly locate all the places where we… | |||||
/********************/ | /********************/ | ||||
/* Inline functions */ | /* Inline functions */ | ||||
/********************/ | /********************/ | ||||
static __inline void | static __inline void | ||||
pagecopy(void *s, void *d) | pagecopy(void *s, void *d) | ||||
{ | { | ||||
▲ Show 20 Lines • Show All 194 Lines • ▼ Show 20 Lines | |||||
{ | { | ||||
return ((l3 & ATTR_DESCR_MASK) == L3_PAGE); | return ((l3 & ATTR_DESCR_MASK) == L3_PAGE); | ||||
} | } | ||||
CTASSERT(L1_BLOCK == L2_BLOCK); | CTASSERT(L1_BLOCK == L2_BLOCK); | ||||
/* | |||||
* Checks if the page is dirty. We currently lack proper tracking of this on | |||||
Done Inline ActionsI would retain the first sentence, replacing the word "page" with the acronym "PTE". (The way that a "dirty bit" is implemented on this architecture is unusual enough that sentences like this may help the person reading this code for the first time.) alc: I would retain the first sentence, replacing the word "page" with the acronym "PTE". (The way… | |||||
* arm64 so for now assume is a page mapped as rw was accessed it is. | |||||
*/ | |||||
static inline int | static inline int | ||||
pmap_pte_dirty(pt_entry_t pte) | pmap_pte_dirty(pt_entry_t pte) | ||||
Not Done Inline ActionsI would suggest that you go ahead and commit a change that renames this function. alc: I would suggest that you go ahead and commit a change that renames this function. | |||||
{ | { | ||||
return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) == | KASSERT((pte & (ATTR_AP_RW_BIT | ATTR_SW_DBM)) != 0, | ||||
(ATTR_AF | ATTR_AP(ATTR_AP_RW))); | ("pte %#lx is writeable and missing ATTR_SW_DBM", pte)); | ||||
return ((pte & (ATTR_AP_RW_BIT | ATTR_SW_DBM)) == | |||||
(ATTR_AP(ATTR_AP_RW) | ATTR_SW_DBM)); | |||||
} | } | ||||
static __inline void | static __inline void | ||||
pmap_resident_count_inc(pmap_t pmap, int count) | pmap_resident_count_inc(pmap_t pmap, int count) | ||||
{ | { | ||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | PMAP_LOCK_ASSERT(pmap, MA_OWNED); | ||||
pmap->pm_stats.resident_count += count; | pmap->pm_stats.resident_count += count; | ||||
▲ Show 20 Lines • Show All 88 Lines • ▼ Show 20 Lines | if ((pa & L1_OFFSET) != 0) { | ||||
* create a level 1 block | * create a level 1 block | ||||
*/ | */ | ||||
if ((pa & L1_OFFSET) == 0) | if ((pa & L1_OFFSET) == 0) | ||||
break; | break; | ||||
l2_slot = pmap_l2_index(va); | l2_slot = pmap_l2_index(va); | ||||
KASSERT(l2_slot != 0, ("...")); | KASSERT(l2_slot != 0, ("...")); | ||||
pmap_load_store(&l2[l2_slot], | pmap_load_store(&l2[l2_slot], | ||||
(pa & ~L2_OFFSET) | ATTR_DEFAULT | ATTR_XN | | (pa & ~L2_OFFSET) | ATTR_DEFAULT | | ||||
ATTR_AP(ATTR_AP_RW) | ATTR_SW_DBM | ATTR_XN | | |||||
ATTR_IDX(CACHED_MEMORY) | L2_BLOCK); | ATTR_IDX(CACHED_MEMORY) | L2_BLOCK); | ||||
} | } | ||||
KASSERT(va == (pa - dmap_phys_base + DMAP_MIN_ADDRESS), | KASSERT(va == (pa - dmap_phys_base + DMAP_MIN_ADDRESS), | ||||
("...")); | ("...")); | ||||
} | } | ||||
for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1] && | for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1] && | ||||
(physmap[i + 1] - pa) >= L1_SIZE; | (physmap[i + 1] - pa) >= L1_SIZE; | ||||
pa += L1_SIZE, va += L1_SIZE) { | pa += L1_SIZE, va += L1_SIZE) { | ||||
l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); | l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); | ||||
pmap_load_store(&pagetable_dmap[l1_slot], | pmap_load_store(&pagetable_dmap[l1_slot], | ||||
(pa & ~L1_OFFSET) | ATTR_DEFAULT | ATTR_XN | | (pa & ~L1_OFFSET) | ATTR_DEFAULT | | ||||
ATTR_AP(ATTR_AP_RW) | ATTR_SW_DBM | ATTR_XN | | |||||
ATTR_IDX(CACHED_MEMORY) | L1_BLOCK); | ATTR_IDX(CACHED_MEMORY) | L1_BLOCK); | ||||
} | } | ||||
/* Create L2 mappings at the end of the region */ | /* Create L2 mappings at the end of the region */ | ||||
if (pa < physmap[i + 1]) { | if (pa < physmap[i + 1]) { | ||||
l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); | l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); | ||||
if (l1_slot != prev_l1_slot) { | if (l1_slot != prev_l1_slot) { | ||||
prev_l1_slot = l1_slot; | prev_l1_slot = l1_slot; | ||||
l2 = (pt_entry_t *)freemempos; | l2 = (pt_entry_t *)freemempos; | ||||
l2_pa = pmap_early_vtophys(kern_l1, | l2_pa = pmap_early_vtophys(kern_l1, | ||||
(vm_offset_t)l2); | (vm_offset_t)l2); | ||||
freemempos += PAGE_SIZE; | freemempos += PAGE_SIZE; | ||||
pmap_load_store(&pagetable_dmap[l1_slot], | pmap_load_store(&pagetable_dmap[l1_slot], | ||||
(l2_pa & ~Ln_TABLE_MASK) | L1_TABLE); | (l2_pa & ~Ln_TABLE_MASK) | L1_TABLE); | ||||
memset(l2, 0, PAGE_SIZE); | memset(l2, 0, PAGE_SIZE); | ||||
} | } | ||||
KASSERT(l2 != NULL, | KASSERT(l2 != NULL, | ||||
("pmap_bootstrap_dmap: NULL l2 map")); | ("pmap_bootstrap_dmap: NULL l2 map")); | ||||
for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1]; | for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1]; | ||||
pa += L2_SIZE, va += L2_SIZE) { | pa += L2_SIZE, va += L2_SIZE) { | ||||
l2_slot = pmap_l2_index(va); | l2_slot = pmap_l2_index(va); | ||||
pmap_load_store(&l2[l2_slot], | pmap_load_store(&l2[l2_slot], | ||||
(pa & ~L2_OFFSET) | ATTR_DEFAULT | ATTR_XN | | (pa & ~L2_OFFSET) | ATTR_DEFAULT | | ||||
ATTR_AP(ATTR_AP_RW) | ATTR_SW_DBM | ATTR_XN | | |||||
ATTR_IDX(CACHED_MEMORY) | L2_BLOCK); | ATTR_IDX(CACHED_MEMORY) | L2_BLOCK); | ||||
} | } | ||||
} | } | ||||
if (pa > dmap_phys_max) { | if (pa > dmap_phys_max) { | ||||
dmap_phys_max = pa; | dmap_phys_max = pa; | ||||
dmap_max_addr = va; | dmap_max_addr = va; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 66 Lines • ▼ Show 20 Lines | |||||
/* | /* | ||||
* Bootstrap the system enough to run with virtual memory. | * Bootstrap the system enough to run with virtual memory. | ||||
*/ | */ | ||||
void | void | ||||
pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart, | pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart, | ||||
vm_size_t kernlen) | vm_size_t kernlen) | ||||
{ | { | ||||
u_int l1_slot, l2_slot; | u_int l1_slot, l2_slot; | ||||
uint64_t kern_delta; | |||||
pt_entry_t *l2; | pt_entry_t *l2; | ||||
vm_offset_t va, freemempos; | vm_offset_t va, freemempos; | ||||
vm_offset_t dpcpu, msgbufpv; | vm_offset_t dpcpu, msgbufpv; | ||||
vm_paddr_t start_pa, pa, min_pa; | vm_paddr_t start_pa, pa, min_pa; | ||||
uint64_t kern_delta, reg; | |||||
int i; | int i; | ||||
/* Determine whether the hardware implements DBM management. */ | |||||
reg = READ_SPECIALREG(ID_AA64MMFR1_EL1); | |||||
ATTR_SW_DBM = ID_AA64MMFR1_HAFDBS(reg) == ID_AA64MMFR1_HAFDBS_AF_DBS ? | |||||
ATTR_DBM : _ATTR_SW_DBM; | |||||
kern_delta = KERNBASE - kernstart; | kern_delta = KERNBASE - kernstart; | ||||
printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); | printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); | ||||
printf("%lx\n", l1pt); | printf("%lx\n", l1pt); | ||||
printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); | printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); | ||||
/* Set this early so we can use the pagetable walking functions */ | /* Set this early so we can use the pagetable walking functions */ | ||||
kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt; | kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt; | ||||
▲ Show 20 Lines • Show All 380 Lines • ▼ Show 20 Lines | pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode) | ||||
KASSERT((pa & L3_OFFSET) == 0, | KASSERT((pa & L3_OFFSET) == 0, | ||||
("pmap_kenter: Invalid physical address")); | ("pmap_kenter: Invalid physical address")); | ||||
KASSERT((sva & L3_OFFSET) == 0, | KASSERT((sva & L3_OFFSET) == 0, | ||||
("pmap_kenter: Invalid virtual address")); | ("pmap_kenter: Invalid virtual address")); | ||||
KASSERT((size & PAGE_MASK) == 0, | KASSERT((size & PAGE_MASK) == 0, | ||||
("pmap_kenter: Mapping is not page-sized")); | ("pmap_kenter: Mapping is not page-sized")); | ||||
attr = ATTR_DEFAULT | ATTR_IDX(mode) | L3_PAGE; | attr = ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | ATTR_SW_DBM | ATTR_IDX(mode) | | ||||
L3_PAGE; | |||||
if (mode == DEVICE_MEMORY) | if (mode == DEVICE_MEMORY) | ||||
attr |= ATTR_XN; | attr |= ATTR_XN; | ||||
va = sva; | va = sva; | ||||
while (size != 0) { | while (size != 0) { | ||||
pde = pmap_pde(kernel_pmap, va, &lvl); | pde = pmap_pde(kernel_pmap, va, &lvl); | ||||
KASSERT(pde != NULL, | KASSERT(pde != NULL, | ||||
("pmap_kenter: Invalid page entry, va: 0x%lx", va)); | ("pmap_kenter: Invalid page entry, va: 0x%lx", va)); | ||||
▲ Show 20 Lines • Show All 101 Lines • ▼ Show 20 Lines | for (i = 0; i < count; i++) { | ||||
pde = pmap_pde(kernel_pmap, va, &lvl); | pde = pmap_pde(kernel_pmap, va, &lvl); | ||||
KASSERT(pde != NULL, | KASSERT(pde != NULL, | ||||
("pmap_qenter: Invalid page entry, va: 0x%lx", va)); | ("pmap_qenter: Invalid page entry, va: 0x%lx", va)); | ||||
KASSERT(lvl == 2, | KASSERT(lvl == 2, | ||||
("pmap_qenter: Invalid level %d", lvl)); | ("pmap_qenter: Invalid level %d", lvl)); | ||||
m = ma[i]; | m = ma[i]; | ||||
pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | | pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | | ||||
ATTR_IDX(m->md.pv_memattr) | L3_PAGE; | ATTR_SW_DBM | ATTR_IDX(m->md.pv_memattr) | L3_PAGE; | ||||
if (m->md.pv_memattr == DEVICE_MEMORY) | if (m->md.pv_memattr == DEVICE_MEMORY) | ||||
pa |= ATTR_XN; | pa |= ATTR_XN; | ||||
pte = pmap_l2_to_l3(pde, va); | pte = pmap_l2_to_l3(pde, va); | ||||
pmap_load_store(pte, pa); | pmap_load_store(pte, pa); | ||||
va += L3_SIZE; | va += L3_SIZE; | ||||
} | } | ||||
pmap_invalidate_range(kernel_pmap, sva, va); | pmap_invalidate_range(kernel_pmap, sva, va); | ||||
▲ Show 20 Lines • Show All 1,175 Lines • ▼ Show 20 Lines | if (pmap == kernel_pmap) { | ||||
} | } | ||||
} | } | ||||
return (pmap_unuse_pt(pmap, sva, l1e, free)); | return (pmap_unuse_pt(pmap, sva, l1e, free)); | ||||
} | } | ||||
/* | /* | ||||
* pmap_remove_l3: do the things to unmap a page in a process | * pmap_remove_l3: do the things to unmap a page in a process | ||||
*/ | */ | ||||
static int | static int __unused | ||||
pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, | pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, | ||||
pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) | pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) | ||||
{ | { | ||||
struct md_page *pvh; | struct md_page *pvh; | ||||
pt_entry_t old_l3; | pt_entry_t old_l3; | ||||
vm_page_t m; | vm_page_t m; | ||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | PMAP_LOCK_ASSERT(pmap, MA_OWNED); | ||||
▲ Show 20 Lines • Show All 293 Lines • ▼ Show 20 Lines | retry: | ||||
rw_wunlock(lock); | rw_wunlock(lock); | ||||
vm_page_free_pages_toq(&free, true); | vm_page_free_pages_toq(&free, true); | ||||
} | } | ||||
/* | /* | ||||
* pmap_protect_l2: do the things to protect a 2MB page in a pmap | * pmap_protect_l2: do the things to protect a 2MB page in a pmap | ||||
*/ | */ | ||||
static void | static void | ||||
pmap_protect_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pt_entry_t nbits) | pmap_protect_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pt_entry_t mask, | ||||
pt_entry_t nbits) | |||||
{ | { | ||||
pd_entry_t old_l2; | pd_entry_t old_l2; | ||||
vm_page_t m, mt; | vm_page_t m, mt; | ||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | PMAP_LOCK_ASSERT(pmap, MA_OWNED); | ||||
KASSERT((sva & L2_OFFSET) == 0, | KASSERT((sva & L2_OFFSET) == 0, | ||||
("pmap_protect_l2: sva is not 2mpage aligned")); | ("pmap_protect_l2: sva is not 2mpage aligned")); | ||||
old_l2 = pmap_load(l2); | old_l2 = pmap_load(l2); | ||||
KASSERT((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK, | KASSERT((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK, | ||||
("pmap_protect_l2: L2e %lx is not a block mapping", old_l2)); | ("pmap_protect_l2: L2e %lx is not a block mapping", old_l2)); | ||||
/* | /* | ||||
* Return if the L2 entry already has the desired access restrictions | * Return if the L2 entry already has the desired access restrictions | ||||
* in place. | * in place. | ||||
*/ | */ | ||||
if ((old_l2 | nbits) == old_l2) | retry: | ||||
if ((old_l2 & mask) == nbits) | |||||
return; | return; | ||||
/* | /* | ||||
* When a dirty read/write superpage mapping is write protected, | * When a dirty read/write superpage mapping is write protected, | ||||
* update the dirty field of each of the superpage's constituent 4KB | * update the dirty field of each of the superpage's constituent 4KB | ||||
* pages. | * pages. | ||||
*/ | */ | ||||
if ((nbits & ATTR_AP(ATTR_AP_RO)) != 0 && | if ((old_l2 & ATTR_SW_MANAGED) != 0 && | ||||
(old_l2 & ATTR_SW_MANAGED) != 0 && | (nbits & ATTR_AP(ATTR_AP_RO)) != 0 && pmap_pte_dirty(old_l2)) { | ||||
pmap_pte_dirty(old_l2)) { | |||||
m = PHYS_TO_VM_PAGE(old_l2 & ~ATTR_MASK); | m = PHYS_TO_VM_PAGE(old_l2 & ~ATTR_MASK); | ||||
Not Done Inline ActionsThis could be committed now. It doesn't really depend on the rest of the patch. alc: This could be committed now. It doesn't really depend on the rest of the patch. | |||||
for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) | for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) | ||||
vm_page_dirty(mt); | vm_page_dirty(mt); | ||||
} | } | ||||
pmap_set(l2, nbits); | if (!atomic_fcmpset_64(l2, &old_l2, (old_l2 & ~mask) | nbits)) | ||||
goto retry; | |||||
/* | /* | ||||
* Since a promotion must break the 4KB page mappings before making | * Since a promotion must break the 4KB page mappings before making | ||||
* the 2MB page mapping, a pmap_invalidate_page() suffices. | * the 2MB page mapping, a pmap_invalidate_page() suffices. | ||||
*/ | */ | ||||
pmap_invalidate_page(pmap, sva); | pmap_invalidate_page(pmap, sva); | ||||
} | } | ||||
/* | /* | ||||
* Set the physical protection on the | * Set the physical protection on the | ||||
* specified range of this map as requested. | * specified range of this map as requested. | ||||
*/ | */ | ||||
void | void | ||||
pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) | pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) | ||||
{ | { | ||||
vm_offset_t va, va_next; | vm_offset_t va, va_next; | ||||
pd_entry_t *l0, *l1, *l2; | pd_entry_t *l0, *l1, *l2; | ||||
pt_entry_t *l3p, l3, nbits; | pt_entry_t *l3p, l3, mask, nbits; | ||||
KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); | KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); | ||||
if (prot == VM_PROT_NONE) { | if (prot == VM_PROT_NONE) { | ||||
pmap_remove(pmap, sva, eva); | pmap_remove(pmap, sva, eva); | ||||
return; | return; | ||||
} | } | ||||
nbits = 0; | mask = nbits = 0; | ||||
if ((prot & VM_PROT_WRITE) == 0) | if ((prot & VM_PROT_WRITE) == 0) { | ||||
mask |= ATTR_AP_RW_BIT | ATTR_SW_DBM; | |||||
nbits |= ATTR_AP(ATTR_AP_RO); | nbits |= ATTR_AP(ATTR_AP_RO); | ||||
if ((prot & VM_PROT_EXECUTE) == 0) | } | ||||
if ((prot & VM_PROT_EXECUTE) == 0) { | |||||
mask |= ATTR_XN; | |||||
nbits |= ATTR_XN; | nbits |= ATTR_XN; | ||||
if (nbits == 0) | } | ||||
if (mask == 0) | |||||
return; | return; | ||||
PMAP_LOCK(pmap); | PMAP_LOCK(pmap); | ||||
for (; sva < eva; sva = va_next) { | for (; sva < eva; sva = va_next) { | ||||
l0 = pmap_l0(pmap, sva); | l0 = pmap_l0(pmap, sva); | ||||
if (pmap_load(l0) == 0) { | if (pmap_load(l0) == 0) { | ||||
va_next = (sva + L0_SIZE) & ~L0_OFFSET; | va_next = (sva + L0_SIZE) & ~L0_OFFSET; | ||||
Show All 15 Lines | if (va_next < sva) | ||||
va_next = eva; | va_next = eva; | ||||
l2 = pmap_l1_to_l2(l1, sva); | l2 = pmap_l1_to_l2(l1, sva); | ||||
if (pmap_load(l2) == 0) | if (pmap_load(l2) == 0) | ||||
continue; | continue; | ||||
if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) { | if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) { | ||||
if (sva + L2_SIZE == va_next && eva >= va_next) { | if (sva + L2_SIZE == va_next && eva >= va_next) { | ||||
pmap_protect_l2(pmap, l2, sva, nbits); | pmap_protect_l2(pmap, l2, sva, mask, nbits); | ||||
continue; | continue; | ||||
} else if (pmap_demote_l2(pmap, l2, sva) == NULL) | } else if (pmap_demote_l2(pmap, l2, sva) == NULL) | ||||
continue; | continue; | ||||
} | } | ||||
KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, | KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, | ||||
("pmap_protect: Invalid L2 entry after demotion")); | ("pmap_protect: Invalid L2 entry after demotion")); | ||||
if (va_next > eva) | if (va_next > eva) | ||||
va_next = eva; | va_next = eva; | ||||
va = va_next; | va = va_next; | ||||
for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, | for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, | ||||
sva += L3_SIZE) { | sva += L3_SIZE) { | ||||
l3 = pmap_load(l3p); | |||||
retry: | |||||
/* | /* | ||||
* Go to the next L3 entry if the current one is | * Go to the next L3 entry if the current one is | ||||
* invalid or already has the desired access | * invalid or already has the desired access | ||||
* restrictions in place. (The latter case occurs | * restrictions in place. (The latter case occurs | ||||
* frequently. For example, in a "buildworld" | * frequently. For example, in a "buildworld" | ||||
* workload, almost 1 out of 4 L3 entries already | * workload, almost 1 out of 4 L3 entries already | ||||
* have the desired restrictions.) | * have the desired restrictions.) | ||||
*/ | */ | ||||
l3 = pmap_load(l3p); | if (!pmap_l3_valid(l3) || (l3 & mask) == nbits) { | ||||
if (!pmap_l3_valid(l3) || (l3 | nbits) == l3) { | |||||
if (va != va_next) { | if (va != va_next) { | ||||
pmap_invalidate_range(pmap, va, sva); | pmap_invalidate_range(pmap, va, sva); | ||||
va = va_next; | va = va_next; | ||||
} | } | ||||
continue; | continue; | ||||
} | } | ||||
if (va == va_next) | |||||
va = sva; | |||||
/* | /* | ||||
* When a dirty read/write mapping is write protected, | * When a dirty read/write mapping is write protected, | ||||
* update the page's dirty field. | * update the page's dirty field. | ||||
*/ | */ | ||||
if ((nbits & ATTR_AP(ATTR_AP_RO)) != 0 && | if ((l3 & ATTR_SW_MANAGED) != 0 && | ||||
(l3 & ATTR_SW_MANAGED) != 0 && | (nbits & ATTR_AP(ATTR_AP_RO)) != 0 && | ||||
pmap_pte_dirty(l3)) | pmap_pte_dirty(l3)) | ||||
Done Inline ActionsThis looks like tabs got replaced by spaces. alc: This looks like tabs got replaced by spaces. | |||||
Not Done Inline ActionsThis could be committed now. It doesn't really depend on the rest of the patch. alc: This could be committed now. It doesn't really depend on the rest of the patch. | |||||
vm_page_dirty(PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK)); | vm_page_dirty(PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK)); | ||||
pmap_set(l3p, nbits); | if (!atomic_fcmpset_64(l3p, &l3, (l3 & ~mask) | nbits)) | ||||
goto retry; | |||||
if (va == va_next) | |||||
va = sva; | |||||
} | } | ||||
if (va != va_next) | if (va != va_next) | ||||
pmap_invalidate_range(pmap, va, sva); | pmap_invalidate_range(pmap, va, sva); | ||||
} | } | ||||
PMAP_UNLOCK(pmap); | PMAP_UNLOCK(pmap); | ||||
} | } | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 203 Lines • ▼ Show 20 Lines | pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, | ||||
int lvl, rv; | int lvl, rv; | ||||
va = trunc_page(va); | va = trunc_page(va); | ||||
if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) | if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) | ||||
VM_OBJECT_ASSERT_LOCKED(m->object); | VM_OBJECT_ASSERT_LOCKED(m->object); | ||||
pa = VM_PAGE_TO_PHYS(m); | pa = VM_PAGE_TO_PHYS(m); | ||||
new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | | new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | | ||||
L3_PAGE); | L3_PAGE); | ||||
if ((prot & VM_PROT_WRITE) == 0) | if ((prot & VM_PROT_WRITE) != 0) | ||||
new_l3 |= ATTR_SW_DBM; | |||||
if ((flags & VM_PROT_WRITE) != 0) | |||||
new_l3 |= ATTR_AP(ATTR_AP_RW); | |||||
else | |||||
new_l3 |= ATTR_AP(ATTR_AP_RO); | new_l3 |= ATTR_AP(ATTR_AP_RO); | ||||
Done Inline ActionsThis doesn't appear to write-protect unmanaged mappings. I think that you should restore: if ((prot & VM_PROT_WRITE) == 0) new_l3 |= ATTR_AP(ATTR_AP_RO); here and below ... alc: This doesn't appear to write-protect unmanaged mappings. I think that you should restore:
```… | |||||
if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY) | if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY) | ||||
new_l3 |= ATTR_XN; | new_l3 |= ATTR_XN; | ||||
if ((flags & PMAP_ENTER_WIRED) != 0) | if ((flags & PMAP_ENTER_WIRED) != 0) | ||||
new_l3 |= ATTR_SW_WIRED; | new_l3 |= ATTR_SW_WIRED; | ||||
if (va < VM_MAXUSER_ADDRESS) | if (va < VM_MAXUSER_ADDRESS) | ||||
new_l3 |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN; | new_l3 |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN; | ||||
if ((m->oflags & VPO_UNMANAGED) == 0) | if ((m->oflags & VPO_UNMANAGED) == 0) | ||||
new_l3 |= ATTR_SW_MANAGED; | new_l3 |= ATTR_SW_MANAGED; | ||||
CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); | CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); | ||||
Done Inline Actions... change this to if ((prot & VM_PROT_WRITE) != 0) { new_l3 |= ATTR_SW_DBM; if ((flags & VM_PROT_WRITE) == 0) new_l3 |= ATTR_AP(ATTR_AP_RO); } alc: ... change this to
```
if ((prot & VM_PROT_WRITE) != 0) {
new_l3 |= ATTR_SW_DBM… | |||||
lock = NULL; | lock = NULL; | ||||
PMAP_LOCK(pmap); | PMAP_LOCK(pmap); | ||||
if (psind == 1) { | if (psind == 1) { | ||||
/* Assert the required virtual and physical alignment. */ | /* Assert the required virtual and physical alignment. */ | ||||
KASSERT((va & L2_OFFSET) == 0, ("pmap_enter: va unaligned")); | KASSERT((va & L2_OFFSET) == 0, ("pmap_enter: va unaligned")); | ||||
KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind")); | KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind")); | ||||
rv = pmap_enter_l2(pmap, va, (new_l3 & ~L3_PAGE) | L2_BLOCK, | rv = pmap_enter_l2(pmap, va, (new_l3 & ~L3_PAGE) | L2_BLOCK, | ||||
▲ Show 20 Lines • Show All 93 Lines • ▼ Show 20 Lines | if (opa == pa) { | ||||
vm_page_aflag_set(m, PGA_WRITEABLE); | vm_page_aflag_set(m, PGA_WRITEABLE); | ||||
} | } | ||||
} | } | ||||
goto validate; | goto validate; | ||||
} | } | ||||
/* | /* | ||||
* The physical page has changed. Temporarily invalidate | * The physical page has changed. Temporarily invalidate | ||||
* the mapping. | * the mapping. | ||||
Not Done Inline ActionsI fixed this one in r349866. alc: I fixed this one in r349866. | |||||
*/ | */ | ||||
orig_l3 = pmap_load_clear(l3); | orig_l3 = pmap_load_clear(l3); | ||||
KASSERT((orig_l3 & ~ATTR_MASK) == opa, | KASSERT((orig_l3 & ~ATTR_MASK) == opa, | ||||
("pmap_enter: unexpected pa update for %#lx", va)); | ("pmap_enter: unexpected pa update for %#lx", va)); | ||||
if ((orig_l3 & ATTR_SW_MANAGED) != 0) { | if ((orig_l3 & ATTR_SW_MANAGED) != 0) { | ||||
om = PHYS_TO_VM_PAGE(opa); | om = PHYS_TO_VM_PAGE(opa); | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 59 Lines • ▼ Show 20 Lines | validate: | ||||
/* | /* | ||||
* Update the L3 entry | * Update the L3 entry | ||||
*/ | */ | ||||
if (pmap_l3_valid(orig_l3)) { | if (pmap_l3_valid(orig_l3)) { | ||||
KASSERT(opa == pa, ("pmap_enter: invalid update")); | KASSERT(opa == pa, ("pmap_enter: invalid update")); | ||||
if ((orig_l3 & ~ATTR_AF) != (new_l3 & ~ATTR_AF)) { | if ((orig_l3 & ~ATTR_AF) != (new_l3 & ~ATTR_AF)) { | ||||
/* same PA, different attributes */ | /* same PA, different attributes */ | ||||
/* XXXMJ need to reload orig_l3 for hardware DBM. */ | |||||
pmap_load_store(l3, new_l3); | pmap_load_store(l3, new_l3); | ||||
pmap_invalidate_page(pmap, va); | pmap_invalidate_page(pmap, va); | ||||
if (pmap_pte_dirty(orig_l3) && | if (pmap_pte_dirty(orig_l3) && | ||||
(orig_l3 & ATTR_SW_MANAGED) != 0) | (orig_l3 & ATTR_SW_MANAGED) != 0) | ||||
Done Inline ActionsWe should reverse the order of these tests. markj: We should reverse the order of these tests. | |||||
vm_page_dirty(m); | vm_page_dirty(m); | ||||
} else { | } else { | ||||
/* | /* | ||||
* orig_l3 == new_l3 | * orig_l3 == new_l3 | ||||
* This can happens if multiple threads simultaneously | * This can happens if multiple threads simultaneously | ||||
* access not yet mapped page. This bad for performance | * access not yet mapped page. This bad for performance | ||||
* since this can cause full demotion-NOP-promotion | * since this can cause full demotion-NOP-promotion | ||||
* cycle. | * cycle. | ||||
Show All 40 Lines | |||||
static bool | static bool | ||||
pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, | pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, | ||||
struct rwlock **lockp) | struct rwlock **lockp) | ||||
{ | { | ||||
pd_entry_t new_l2; | pd_entry_t new_l2; | ||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | PMAP_LOCK_ASSERT(pmap, MA_OWNED); | ||||
new_l2 = (pd_entry_t)(VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | | new_l2 = (pd_entry_t)(VM_PAGE_TO_PHYS(m) | (ATTR_DEFAULT & ~ATTR_AF) | | ||||
Done Inline ActionsThis is also a speculative mapping, so ATTR_AF should be cleared, just like pmap_enter_quick_locked(). alc: This is also a speculative mapping, so ATTR_AF should be cleared, just like… | |||||
Not Done Inline ActionsStrictly speaking, l don't think that we should clear ATTR_AF until we know that this is a managed mapping, which is below. alc: Strictly speaking, l don't think that we should clear ATTR_AF until we know that this is a… | |||||
Done Inline ActionsFor consistency, should the amd64 and other pmaps set PG_A on unmanaged mappings? markj: For consistency, should the amd64 and other pmaps set PG_A on unmanaged mappings? | |||||
Not Done Inline ActionsYes. alc: Yes. | |||||
ATTR_IDX(m->md.pv_memattr) | ATTR_AP(ATTR_AP_RO) | L2_BLOCK); | ATTR_IDX(m->md.pv_memattr) | ATTR_AP(ATTR_AP_RO) | L2_BLOCK); | ||||
if ((m->oflags & VPO_UNMANAGED) == 0) | if ((m->oflags & VPO_UNMANAGED) == 0) | ||||
new_l2 |= ATTR_SW_MANAGED; | new_l2 |= ATTR_SW_MANAGED; | ||||
if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY) | if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY) | ||||
new_l2 |= ATTR_XN; | new_l2 |= ATTR_XN; | ||||
if (va < VM_MAXUSER_ADDRESS) | if (va < VM_MAXUSER_ADDRESS) | ||||
new_l2 |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN; | new_l2 |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN; | ||||
return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP | | return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP | | ||||
▲ Show 20 Lines • Show All 276 Lines • ▼ Show 20 Lines | pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, | ||||
} | } | ||||
/* | /* | ||||
* Increment counters | * Increment counters | ||||
*/ | */ | ||||
pmap_resident_count_inc(pmap, 1); | pmap_resident_count_inc(pmap, 1); | ||||
pa = VM_PAGE_TO_PHYS(m); | pa = VM_PAGE_TO_PHYS(m); | ||||
l3_val = pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | | l3_val = pa | (ATTR_DEFAULT & ~ATTR_AF) | ATTR_IDX(m->md.pv_memattr) | | ||||
Done Inline ActionsStrictly speaking, l don't think that we should clear ATTR_AF until we know that this is a managed mapping, which is below. alc: Strictly speaking, l don't think that we should clear ATTR_AF until we know that this is a… | |||||
ATTR_AP(ATTR_AP_RO) | L3_PAGE; | ATTR_AP(ATTR_AP_RO) | L3_PAGE; | ||||
if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY) | if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY) | ||||
l3_val |= ATTR_XN; | l3_val |= ATTR_XN; | ||||
else if (va < VM_MAXUSER_ADDRESS) | else if (va < VM_MAXUSER_ADDRESS) | ||||
l3_val |= ATTR_PXN; | l3_val |= ATTR_PXN; | ||||
/* | /* | ||||
* Now validate mapping with RO protection | * Now validate mapping with RO protection | ||||
▲ Show 20 Lines • Show All 130 Lines • ▼ Show 20 Lines | |||||
*/ | */ | ||||
void | void | ||||
pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, | pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, | ||||
vm_offset_t src_addr) | vm_offset_t src_addr) | ||||
{ | { | ||||
struct rwlock *lock; | struct rwlock *lock; | ||||
struct spglist free; | struct spglist free; | ||||
pd_entry_t *l0, *l1, *l2, srcptepaddr; | pd_entry_t *l0, *l1, *l2, srcptepaddr; | ||||
pt_entry_t *dst_pte, ptetemp, *src_pte; | pt_entry_t *dst_pte, mask, ptetemp, *src_pte; | ||||
vm_offset_t addr, end_addr, va_next; | vm_offset_t addr, end_addr, va_next; | ||||
vm_page_t dst_l2pg, dstmpte, srcmpte; | vm_page_t dst_l2pg, dstmpte, srcmpte; | ||||
if (dst_addr != src_addr) | if (dst_addr != src_addr) | ||||
return; | return; | ||||
end_addr = src_addr + len; | end_addr = src_addr + len; | ||||
lock = NULL; | lock = NULL; | ||||
if (dst_pmap < src_pmap) { | if (dst_pmap < src_pmap) { | ||||
Show All 34 Lines | if ((srcptepaddr & ATTR_DESCR_MASK) == L2_BLOCK) { | ||||
break; | break; | ||||
l2 = (pd_entry_t *) | l2 = (pd_entry_t *) | ||||
PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dst_l2pg)); | PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dst_l2pg)); | ||||
l2 = &l2[pmap_l2_index(addr)]; | l2 = &l2[pmap_l2_index(addr)]; | ||||
if (pmap_load(l2) == 0 && | if (pmap_load(l2) == 0 && | ||||
((srcptepaddr & ATTR_SW_MANAGED) == 0 || | ((srcptepaddr & ATTR_SW_MANAGED) == 0 || | ||||
pmap_pv_insert_l2(dst_pmap, addr, srcptepaddr, | pmap_pv_insert_l2(dst_pmap, addr, srcptepaddr, | ||||
PMAP_ENTER_NORECLAIM, &lock))) { | PMAP_ENTER_NORECLAIM, &lock))) { | ||||
(void)pmap_load_store(l2, srcptepaddr & | mask = ATTR_AF | ATTR_SW_WIRED; | ||||
~ATTR_SW_WIRED); | if ((srcptepaddr & ATTR_SW_DBM) != 0) | ||||
mask |= ATTR_AP_RW_BIT; | |||||
Done Inline ActionsThis seems backwards. You want to set the bit, not clear it. alc: This seems backwards. You want to set the bit, not clear it. | |||||
(void)pmap_load_store(l2, srcptepaddr & ~mask); | |||||
pmap_resident_count_inc(dst_pmap, L2_SIZE / | pmap_resident_count_inc(dst_pmap, L2_SIZE / | ||||
PAGE_SIZE); | PAGE_SIZE); | ||||
atomic_add_long(&pmap_l2_mappings, 1); | atomic_add_long(&pmap_l2_mappings, 1); | ||||
} else | } else | ||||
dst_l2pg->wire_count--; | dst_l2pg->wire_count--; | ||||
continue; | continue; | ||||
} | } | ||||
KASSERT((srcptepaddr & ATTR_DESCR_MASK) == L2_TABLE, | KASSERT((srcptepaddr & ATTR_DESCR_MASK) == L2_TABLE, | ||||
Show All 27 Lines | for (; addr < va_next; addr += PAGE_SIZE, src_pte++) { | ||||
PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte)); | PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte)); | ||||
dst_pte = &dst_pte[pmap_l3_index(addr)]; | dst_pte = &dst_pte[pmap_l3_index(addr)]; | ||||
if (pmap_load(dst_pte) == 0 && | if (pmap_load(dst_pte) == 0 && | ||||
pmap_try_insert_pv_entry(dst_pmap, addr, | pmap_try_insert_pv_entry(dst_pmap, addr, | ||||
PHYS_TO_VM_PAGE(ptetemp & ~ATTR_MASK), &lock)) { | PHYS_TO_VM_PAGE(ptetemp & ~ATTR_MASK), &lock)) { | ||||
/* | /* | ||||
* Clear the wired, modified, and accessed | * Clear the wired, modified, and accessed | ||||
* (referenced) bits during the copy. | * (referenced) bits during the copy. | ||||
* | |||||
* XXX not yet | |||||
*/ | */ | ||||
(void)pmap_load_store(dst_pte, ptetemp & | mask = ATTR_AF | ATTR_SW_WIRED; | ||||
~ATTR_SW_WIRED); | if ((ptetemp & ATTR_SW_DBM) != 0) | ||||
mask |= ATTR_AP_RW_BIT; | |||||
Done Inline ActionsThis seems backwards. You want to set the bit, not clear it. alc: This seems backwards. You want to set the bit, not clear it. | |||||
(void)pmap_load_store(dst_pte, ptetemp & ~mask); | |||||
pmap_resident_count_inc(dst_pmap, 1); | pmap_resident_count_inc(dst_pmap, 1); | ||||
} else { | } else { | ||||
SLIST_INIT(&free); | SLIST_INIT(&free); | ||||
if (pmap_unwire_l3(dst_pmap, addr, dstmpte, | if (pmap_unwire_l3(dst_pmap, addr, dstmpte, | ||||
&free)) { | &free)) { | ||||
/* | /* | ||||
* Although "addr" is not mapped, | * Although "addr" is not mapped, | ||||
* paging-structure caches could | * paging-structure caches could | ||||
▲ Show 20 Lines • Show All 334 Lines • ▼ Show 20 Lines | */ | ||||
* processors, the dirty bit cannot have | * processors, the dirty bit cannot have | ||||
* changed state since we last loaded pte. | * changed state since we last loaded pte. | ||||
*/ | */ | ||||
pmap_clear(pte); | pmap_clear(pte); | ||||
/* | /* | ||||
* Update the vm_page_t clean/reference bits. | * Update the vm_page_t clean/reference bits. | ||||
*/ | */ | ||||
if ((tpte & ATTR_AP_RW_BIT) == | if (pmap_pte_dirty(tpte)) { | ||||
Not Done Inline ActionsThis could be committed now. It doesn't really depend on the rest of the patch. alc: This could be committed now. It doesn't really depend on the rest of the patch. | |||||
ATTR_AP(ATTR_AP_RW)) { | |||||
switch (lvl) { | switch (lvl) { | ||||
case 1: | case 1: | ||||
for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) | for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) | ||||
vm_page_dirty(mt); | vm_page_dirty(mt); | ||||
break; | break; | ||||
case 2: | case 2: | ||||
vm_page_dirty(m); | vm_page_dirty(m); | ||||
break; | break; | ||||
▲ Show 20 Lines • Show All 258 Lines • ▼ Show 20 Lines | if (!PMAP_TRYLOCK(pmap)) { | ||||
if (pvh_gen != pvh->pv_gen) { | if (pvh_gen != pvh->pv_gen) { | ||||
PMAP_UNLOCK(pmap); | PMAP_UNLOCK(pmap); | ||||
rw_wunlock(lock); | rw_wunlock(lock); | ||||
goto retry_pv_loop; | goto retry_pv_loop; | ||||
} | } | ||||
} | } | ||||
va = pv->pv_va; | va = pv->pv_va; | ||||
pte = pmap_pte(pmap, pv->pv_va, &lvl); | pte = pmap_pte(pmap, pv->pv_va, &lvl); | ||||
if ((pmap_load(pte) & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) | if ((pmap_load(pte) & ATTR_SW_DBM) != 0) | ||||
(void)pmap_demote_l2_locked(pmap, pte, va, &lock); | (void)pmap_demote_l2_locked(pmap, pte, va, &lock); | ||||
KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), | KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), | ||||
("inconsistent pv lock %p %p for page %p", | ("inconsistent pv lock %p %p for page %p", | ||||
lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); | lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); | ||||
PMAP_UNLOCK(pmap); | PMAP_UNLOCK(pmap); | ||||
} | } | ||||
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { | TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { | ||||
pmap = PV_PMAP(pv); | pmap = PV_PMAP(pv); | ||||
if (!PMAP_TRYLOCK(pmap)) { | if (!PMAP_TRYLOCK(pmap)) { | ||||
pvh_gen = pvh->pv_gen; | pvh_gen = pvh->pv_gen; | ||||
md_gen = m->md.pv_gen; | md_gen = m->md.pv_gen; | ||||
rw_wunlock(lock); | rw_wunlock(lock); | ||||
PMAP_LOCK(pmap); | PMAP_LOCK(pmap); | ||||
rw_wlock(lock); | rw_wlock(lock); | ||||
if (pvh_gen != pvh->pv_gen || | if (pvh_gen != pvh->pv_gen || | ||||
md_gen != m->md.pv_gen) { | md_gen != m->md.pv_gen) { | ||||
PMAP_UNLOCK(pmap); | PMAP_UNLOCK(pmap); | ||||
rw_wunlock(lock); | rw_wunlock(lock); | ||||
goto retry_pv_loop; | goto retry_pv_loop; | ||||
} | } | ||||
} | } | ||||
pte = pmap_pte(pmap, pv->pv_va, &lvl); | pte = pmap_pte(pmap, pv->pv_va, &lvl); | ||||
retry: | |||||
oldpte = pmap_load(pte); | oldpte = pmap_load(pte); | ||||
if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { | retry: | ||||
if (!atomic_cmpset_long(pte, oldpte, | if ((oldpte & ATTR_SW_DBM) != 0) { | ||||
oldpte | ATTR_AP(ATTR_AP_RO))) | if (!atomic_fcmpset_long(pte, &oldpte, | ||||
(oldpte | ATTR_AP_RW_BIT) & ~ATTR_SW_DBM)) | |||||
goto retry; | goto retry; | ||||
if ((oldpte & ATTR_AF) != 0) | if ((oldpte & ATTR_AP(ATTR_AP_RW)) == | ||||
ATTR_AP(ATTR_AP_RW)) | |||||
Done Inline ActionsShouldn't this be: if ((oldpte & ATTR_AP_RW_BIT) == alc: Shouldn't this be:
```
if ((oldpte & ATTR_AP_RW_BIT) ==
``` | |||||
vm_page_dirty(m); | vm_page_dirty(m); | ||||
pmap_invalidate_page(pmap, pv->pv_va); | pmap_invalidate_page(pmap, pv->pv_va); | ||||
} | } | ||||
PMAP_UNLOCK(pmap); | PMAP_UNLOCK(pmap); | ||||
} | } | ||||
rw_wunlock(lock); | rw_wunlock(lock); | ||||
vm_page_aflag_clear(m, PGA_WRITEABLE); | vm_page_aflag_clear(m, PGA_WRITEABLE); | ||||
} | } | ||||
static __inline boolean_t | |||||
safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) | |||||
{ | |||||
return (FALSE); | |||||
} | |||||
/* | /* | ||||
* pmap_ts_referenced: | * pmap_ts_referenced: | ||||
* | * | ||||
* Return a count of reference bits for a page, clearing those bits. | * Return a count of reference bits for a page, clearing those bits. | ||||
* It is not necessary for every reference bit to be cleared, but it | * It is not necessary for every reference bit to be cleared, but it | ||||
* is necessary that 0 only be returned when there are truly no | * is necessary that 0 only be returned when there are truly no | ||||
* reference bits set. | * reference bits set. | ||||
* | * | ||||
Show All 9 Lines | |||||
pmap_ts_referenced(vm_page_t m) | pmap_ts_referenced(vm_page_t m) | ||||
{ | { | ||||
struct md_page *pvh; | struct md_page *pvh; | ||||
pv_entry_t pv, pvf; | pv_entry_t pv, pvf; | ||||
pmap_t pmap; | pmap_t pmap; | ||||
struct rwlock *lock; | struct rwlock *lock; | ||||
pd_entry_t *pde, tpde; | pd_entry_t *pde, tpde; | ||||
pt_entry_t *pte, tpte; | pt_entry_t *pte, tpte; | ||||
pt_entry_t *l3; | |||||
vm_offset_t va; | vm_offset_t va; | ||||
vm_paddr_t pa; | vm_paddr_t pa; | ||||
int cleared, md_gen, not_cleared, lvl, pvh_gen; | int lvl, md_gen, pvh_gen, refs; | ||||
struct spglist free; | struct spglist free; | ||||
bool demoted; | |||||
KASSERT((m->oflags & VPO_UNMANAGED) == 0, | KASSERT((m->oflags & VPO_UNMANAGED) == 0, | ||||
("pmap_ts_referenced: page %p is not managed", m)); | ("pmap_ts_referenced: page %p is not managed", m)); | ||||
SLIST_INIT(&free); | SLIST_INIT(&free); | ||||
cleared = 0; | refs = 0; | ||||
pa = VM_PAGE_TO_PHYS(m); | pa = VM_PAGE_TO_PHYS(m); | ||||
lock = PHYS_TO_PV_LIST_LOCK(pa); | lock = PHYS_TO_PV_LIST_LOCK(pa); | ||||
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa); | pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa); | ||||
rw_wlock(lock); | rw_wlock(lock); | ||||
retry: | retry: | ||||
not_cleared = 0; | |||||
alcUnsubmitted Done Inline ActionsSuppose that there are two superpage mappings and one 4KB page mapping. Further, suppose that because of the "hash", we clear the accessed bit for one of the superpage mappings but not the other. The first increments cleared and the second increments not_cleared. Then, as we try to process the 4KB page mapping, we are forced to retry. Consequently, we will reexamine the superpage mappings. For the one that had its accessed bit cleared, there is no issue. However, for the one that did not have its accessed bit clear, we should beware of "double counting". This is why not_cleared was reset on a retry. In contrast, in this new version refs is being incremented both times that we examine the superpage mapping that doesn't have its accessed bit cleared. alc: Suppose that there are two superpage mappings and one 4KB page mapping. Further, suppose that… | |||||
markjAuthorUnsubmitted Done Inline ActionsI see. I made the same mistake in the risc-v pmap. markj: I see. I made the same mistake in the risc-v pmap. | |||||
if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) | if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) | ||||
goto small_mappings; | goto small_mappings; | ||||
pv = pvf; | pv = pvf; | ||||
do { | do { | ||||
if (pvf == NULL) | if (pvf == NULL) | ||||
pvf = pv; | pvf = pv; | ||||
pmap = PV_PMAP(pv); | pmap = PV_PMAP(pv); | ||||
if (!PMAP_TRYLOCK(pmap)) { | if (!PMAP_TRYLOCK(pmap)) { | ||||
Show All 19 Lines | do { | ||||
if (pmap_pte_dirty(tpte)) { | if (pmap_pte_dirty(tpte)) { | ||||
/* | /* | ||||
* Although "tpte" is mapping a 2MB page, because | * Although "tpte" is mapping a 2MB page, because | ||||
* this function is called at a 4KB page granularity, | * this function is called at a 4KB page granularity, | ||||
* we only update the 4KB page under test. | * we only update the 4KB page under test. | ||||
*/ | */ | ||||
vm_page_dirty(m); | vm_page_dirty(m); | ||||
} | } | ||||
if ((tpte & ATTR_AF) != 0) { | if ((tpte & ATTR_AF) != 0) { | ||||
/* | /* | ||||
* Since this reference bit is shared by 512 4KB | * Since this reference bit is shared by 512 4KB pages, | ||||
* pages, it should not be cleared every time it is | * it should not be cleared every time it is tested. | ||||
* tested. Apply a simple "hash" function on the | * Apply a simple "hash" function on the physical page | ||||
* physical page number, the virtual superpage number, | * number, the virtual superpage number, and the pmap | ||||
* and the pmap address to select one 4KB page out of | * address to select one 4KB page out of the 512 on | ||||
* the 512 on which testing the reference bit will | * which testing the reference bit will result in | ||||
* result in clearing that reference bit. This | * clearing that reference bit. This function is | ||||
* function is designed to avoid the selection of the | * designed to avoid the selection of the same 4KB page | ||||
* same 4KB page for every 2MB page mapping. | * for every 2MB page mapping. | ||||
* | * | ||||
* On demotion, a mapping that hasn't been referenced | * On demotion, a mapping that hasn't been referenced is | ||||
* is simply destroyed. To avoid the possibility of a | * simply destroyed. To avoid the possibility of a | ||||
* subsequent page fault on a demoted wired mapping, | * subsequent page fault on a demoted wired mapping, | ||||
* always leave its reference bit set. Moreover, | * always leave its reference bit set. Moreover, since | ||||
* since the superpage is wired, the current state of | * the superpage is wired, the current state of its | ||||
* its reference bit won't affect page replacement. | * reference bit won't affect page replacement. | ||||
Done Inline ActionsActually, this comment isn't describing the code that was deleted. It is about the code that remains. It should be restored, and I should revert the i386 commit. :-( alc: Actually, this comment isn't describing the code that was deleted. It is about the code that… | |||||
Done Inline ActionsHmm. Well, we want to leave the access flag set independent of whether or not the mapping may be demoted, since otherwise a subsequent access will trigger a soft fault or a hw page table walk depending on the platform, and we wish to avoid that for wired mappings. Indeed, we have the same check for ATTR_SW_WIRED when examining small mappings of the page. So, I would argue that the deleted comment is too specific anyway. I will restore it for the purpose of this commit, though. markj: Hmm. Well, we want to leave the access flag set independent of whether or not the mapping may… | |||||
Not Done Inline ActionsOn native (as opposed to bhyve EPT virtualized) amd64, we were and are still clearing PG_A for 4KB wired mappings. This comment was written before the bhyve integration, and I think it made more sense then, because we were clearly handling PG_A differently for 2MB and 4KB mappings. Here is what I think makes this code and comment confusing. On early EPT hardware, which required software accessed and dirty bit emulation, the emulation had limitations (compared to arm64 :-) ). Specifically, I believe that we couldn't have a PTE that was dirty but not accessed, and so we have to destroy the mapping in that case. Thus, in that case, on a future access, we don't just exercise the pmap-level emulation code, but invoke vm_fault(). We don't want to invoke vm_fault() (and potentially sleep for an indeterminate period) if the mapping is wired, so we do nothing to the PTE. However, in other cases, even for wired mappings, I believe that we do clear the emulated accessed bit and let the pmap-level emulation code turn the accessed bit back on on a future access. In other words, we are not trying to prevent emulation exceptions on wired mappings. We are only trying to prevent vm_fault() calls. arm64 doesn't have this issue found in the old EPT hardware, so we could choose to unconditionally clear the accessed bit on wired 4KB page mappings. alc: On native (as opposed to bhyve EPT virtualized) amd64, we were and are still clearing PG_A for… | |||||
*/ | */ | ||||
Done Inline ActionsThis paragraph describes code that has been (correctly) deleted, so it too can be deleted. alc: This paragraph describes code that has been (correctly) deleted, so it too can be deleted. | |||||
Done Inline ActionsI note that it is present in the i386 pmap. markj: I note that it is present in the i386 pmap. | |||||
if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^ | if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^ | ||||
(uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 && | (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 && | ||||
(tpte & ATTR_SW_WIRED) == 0) { | (tpte & ATTR_SW_WIRED) == 0) { | ||||
if (safe_to_clear_referenced(pmap, tpte)) { | atomic_clear_64(pte, ATTR_AF); | ||||
/* | pmap_invalidate_page(pmap, pv->pv_va); | ||||
* TODO: We don't handle the access | |||||
* flag at all. We need to be able | |||||
* to set it in the exception handler. | |||||
*/ | |||||
panic("ARM64TODO: " | |||||
"safe_to_clear_referenced\n"); | |||||
} else if (pmap_demote_l2_locked(pmap, pte, | |||||
pv->pv_va, &lock) != NULL) { | |||||
demoted = true; | |||||
va += VM_PAGE_TO_PHYS(m) - | |||||
(tpte & ~ATTR_MASK); | |||||
l3 = pmap_l2_to_l3(pte, va); | |||||
pmap_remove_l3(pmap, l3, va, | |||||
pmap_load(pte), NULL, &lock); | |||||
} else | |||||
demoted = true; | |||||
if (demoted) { | |||||
/* | |||||
* The superpage mapping was removed | |||||
* entirely and therefore 'pv' is no | |||||
* longer valid. | |||||
*/ | |||||
if (pvf == pv) | |||||
pvf = NULL; | |||||
pv = NULL; | |||||
} | } | ||||
cleared++; | refs++; | ||||
KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), | |||||
("inconsistent pv lock %p %p for page %p", | |||||
lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); | |||||
} else | |||||
not_cleared++; | |||||
Done Inline ActionsI don't think that you should be removing "not_cleared". It exists for reasons unrelated to safe_to_clear_referenced(). Suppose that the only mapping of "m" is as part of a superpage mapping and that "m" is not the 4KB page within the superpage that is chosen by the "hash" to trigger the actual clearing of the reference bit. We still want pmap_ts_referenced() to return 1, not 0, to indicate that "m" has been referenced. alc: I don't think that you should be removing "not_cleared". It exists for reasons unrelated to… | |||||
} | } | ||||
PMAP_UNLOCK(pmap); | PMAP_UNLOCK(pmap); | ||||
/* Rotate the PV list if it has more than one entry. */ | /* Rotate the PV list if it has more than one entry. */ | ||||
if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { | if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { | ||||
TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); | TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); | ||||
TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); | TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); | ||||
pvh->pv_gen++; | pvh->pv_gen++; | ||||
} | } | ||||
if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX) | if (refs >= PMAP_TS_REFERENCED_MAX) | ||||
goto out; | goto out; | ||||
} while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); | } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); | ||||
small_mappings: | small_mappings: | ||||
if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) | if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) | ||||
goto out; | goto out; | ||||
pv = pvf; | pv = pvf; | ||||
do { | do { | ||||
if (pvf == NULL) | if (pvf == NULL) | ||||
Show All 17 Lines | do { | ||||
tpde = pmap_load(pde); | tpde = pmap_load(pde); | ||||
KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE, | KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE, | ||||
("pmap_ts_referenced: found an invalid l2 table")); | ("pmap_ts_referenced: found an invalid l2 table")); | ||||
pte = pmap_l2_to_l3(pde, pv->pv_va); | pte = pmap_l2_to_l3(pde, pv->pv_va); | ||||
tpte = pmap_load(pte); | tpte = pmap_load(pte); | ||||
if (pmap_pte_dirty(tpte)) | if (pmap_pte_dirty(tpte)) | ||||
vm_page_dirty(m); | vm_page_dirty(m); | ||||
if ((tpte & ATTR_AF) != 0) { | if ((tpte & ATTR_AF) != 0) { | ||||
if (safe_to_clear_referenced(pmap, tpte)) { | if ((tpte & ATTR_SW_WIRED) == 0) { | ||||
/* | atomic_clear_64(pte, ATTR_AF); | ||||
* TODO: We don't handle the access flag | pmap_invalidate_page(pmap, pv->pv_va); | ||||
* at all. We need to be able to set it in | |||||
* the exception handler. | |||||
*/ | |||||
panic("ARM64TODO: safe_to_clear_referenced\n"); | |||||
} else if ((tpte & ATTR_SW_WIRED) == 0) { | |||||
/* | |||||
* Wired pages cannot be paged out so | |||||
* doing accessed bit emulation for | |||||
* them is wasted effort. We do the | |||||
* hard work for unwired pages only. | |||||
*/ | |||||
pmap_remove_l3(pmap, pte, pv->pv_va, tpde, | |||||
&free, &lock); | |||||
cleared++; | |||||
if (pvf == pv) | |||||
pvf = NULL; | |||||
pv = NULL; | |||||
KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), | |||||
("inconsistent pv lock %p %p for page %p", | |||||
lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); | |||||
} else | |||||
not_cleared++; | |||||
} | } | ||||
refs++; | |||||
} | |||||
PMAP_UNLOCK(pmap); | PMAP_UNLOCK(pmap); | ||||
/* Rotate the PV list if it has more than one entry. */ | /* Rotate the PV list if it has more than one entry. */ | ||||
if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { | if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { | ||||
TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); | TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); | ||||
TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); | TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); | ||||
m->md.pv_gen++; | m->md.pv_gen++; | ||||
} | } | ||||
} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + | } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && refs < | ||||
not_cleared < PMAP_TS_REFERENCED_MAX); | PMAP_TS_REFERENCED_MAX); | ||||
out: | out: | ||||
rw_wunlock(lock); | rw_wunlock(lock); | ||||
vm_page_free_pages_toq(&free, true); | vm_page_free_pages_toq(&free, true); | ||||
return (cleared + not_cleared); | return (refs); | ||||
} | } | ||||
/* | /* | ||||
* Apply the given advice to the specified range of addresses within the | * Apply the given advice to the specified range of addresses within the | ||||
* given pmap. Depending on the advice, clear the referenced and/or | * given pmap. Depending on the advice, clear the referenced and/or | ||||
* modified flags in each mapping and set the mapped page's dirty field. | * modified flags in each mapping and set the mapped page's dirty field. | ||||
*/ | */ | ||||
void | void | ||||
pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) | pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) | ||||
{ | { | ||||
} | } | ||||
/* | /* | ||||
* Clear the modify bits on the specified physical page. | * Clear the modify bits on the specified physical page. | ||||
*/ | */ | ||||
void | void | ||||
pmap_clear_modify(vm_page_t m) | pmap_clear_modify(vm_page_t m) | ||||
{ | { | ||||
struct md_page *pvh; | |||||
struct rwlock *lock; | |||||
pmap_t pmap; | |||||
pv_entry_t next_pv, pv; | |||||
pd_entry_t *l2, oldl2; | |||||
pt_entry_t *l3, oldl3; | |||||
vm_offset_t va; | |||||
int md_gen, pvh_gen; | |||||
KASSERT((m->oflags & VPO_UNMANAGED) == 0, | KASSERT((m->oflags & VPO_UNMANAGED) == 0, | ||||
("pmap_clear_modify: page %p is not managed", m)); | ("pmap_clear_modify: page %p is not managed", m)); | ||||
VM_OBJECT_ASSERT_WLOCKED(m->object); | VM_OBJECT_ASSERT_WLOCKED(m->object); | ||||
KASSERT(!vm_page_xbusied(m), | KASSERT(!vm_page_xbusied(m), | ||||
("pmap_clear_modify: page %p is exclusive busied", m)); | ("pmap_clear_modify: page %p is exclusive busied", m)); | ||||
/* | /* | ||||
* If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. | * If the page is not PGA_WRITEABLE, then no PTEs can have ATTR_SW_DBM | ||||
* If the object containing the page is locked and the page is not | * set. If the object containing the page is locked and the page is not | ||||
* exclusive busied, then PGA_WRITEABLE cannot be concurrently set. | * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. | ||||
*/ | */ | ||||
if ((m->aflags & PGA_WRITEABLE) == 0) | if ((m->aflags & PGA_WRITEABLE) == 0) | ||||
return; | return; | ||||
pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : | |||||
/* ARM64TODO: We lack support for tracking if a page is modified */ | pa_to_pvh(VM_PAGE_TO_PHYS(m)); | ||||
lock = VM_PAGE_TO_PV_LIST_LOCK(m); | |||||
rw_wlock(lock); | |||||
restart: | |||||
TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { | |||||
pmap = PV_PMAP(pv); | |||||
if (!PMAP_TRYLOCK(pmap)) { | |||||
pvh_gen = pvh->pv_gen; | |||||
rw_wunlock(lock); | |||||
PMAP_LOCK(pmap); | |||||
rw_wlock(lock); | |||||
if (pvh_gen != pvh->pv_gen) { | |||||
PMAP_UNLOCK(pmap); | |||||
goto restart; | |||||
} | } | ||||
} | |||||
va = pv->pv_va; | |||||
l2 = pmap_l2(pmap, va); | |||||
oldl2 = pmap_load(l2); | |||||
if ((oldl2 & ATTR_SW_DBM) != 0) { | |||||
if (pmap_demote_l2_locked(pmap, l2, va, &lock)) { | |||||
if ((oldl2 & ATTR_SW_WIRED) == 0) { | |||||
/* | |||||
* Write protect the mapping to a | |||||
* single page so that a subsequent | |||||
* write access may repromote. | |||||
*/ | |||||
va += VM_PAGE_TO_PHYS(m) - | |||||
(oldl2 & ~ATTR_MASK); | |||||
l3 = pmap_l2_to_l3(l2, va); | |||||
oldl3 = pmap_load(l3); | |||||
if (pmap_l3_valid(oldl3)) { | |||||
Not Done Inline ActionsJust an FYI, not a request for a change ... I really don't remember why I included this validity test when I wrote the amd64 and i386 versions. On a successful demotion, this PTE has to be valid. alc: Just an FYI, not a request for a change ... I really don't remember why I included this… | |||||
Done Inline ActionsI can't see a reason for it either. markj: I can't see a reason for it either. | |||||
while (!atomic_fcmpset_long(l3, | |||||
&oldl3, (oldl3 & ~ATTR_SW_DBM) | | |||||
ATTR_AP(ATTR_AP_RO))) | |||||
cpu_spinwait(); | |||||
vm_page_dirty(m); | |||||
pmap_invalidate_page(pmap, va); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
PMAP_UNLOCK(pmap); | |||||
} | |||||
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { | |||||
pmap = PV_PMAP(pv); | |||||
if (!PMAP_TRYLOCK(pmap)) { | |||||
md_gen = m->md.pv_gen; | |||||
pvh_gen = pvh->pv_gen; | |||||
rw_wunlock(lock); | |||||
PMAP_LOCK(pmap); | |||||
rw_wlock(lock); | |||||
if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { | |||||
PMAP_UNLOCK(pmap); | |||||
goto restart; | |||||
} | |||||
} | |||||
l2 = pmap_l2(pmap, pv->pv_va); | |||||
l3 = pmap_l2_to_l3(l2, pv->pv_va); | |||||
oldl3 = pmap_load(l3); | |||||
if (pmap_l3_valid(oldl3) && | |||||
(oldl3 & (ATTR_AP_RW_BIT | ATTR_SW_DBM)) == ATTR_SW_DBM) { | |||||
atomic_clear_64(l3, ATTR_AP_RW_BIT); | |||||
Done Inline ActionsShouldn't this be pmap_set_bits(l3, ATTR_AP_RW_BIT)? Or its equivalent pmap_set_bits(l3, ATTR_AP(ATTR_AP_RO))? alc: Shouldn't this be pmap_set_bits(l3, ATTR_AP_RW_BIT)? Or its equivalent pmap_set_bits(l3… | |||||
pmap_invalidate_page(pmap, pv->pv_va); | |||||
} | |||||
PMAP_UNLOCK(pmap); | |||||
} | |||||
rw_wunlock(lock); | |||||
} | |||||
void * | void * | ||||
pmap_mapbios(vm_paddr_t pa, vm_size_t size) | pmap_mapbios(vm_paddr_t pa, vm_size_t size) | ||||
{ | { | ||||
struct pmap_preinit_mapping *ppim; | struct pmap_preinit_mapping *ppim; | ||||
vm_offset_t va, offset; | vm_offset_t va, offset; | ||||
pd_entry_t *pde; | pd_entry_t *pde; | ||||
pt_entry_t *l2; | pt_entry_t *l2; | ||||
▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines | for (i = 0; i < l2_blocks; i++) { | ||||
("pmap_mapbios: Invalid page entry, va: 0x%lx", | ("pmap_mapbios: Invalid page entry, va: 0x%lx", | ||||
va)); | va)); | ||||
KASSERT(lvl == 1, | KASSERT(lvl == 1, | ||||
("pmap_mapbios: Invalid level %d", lvl)); | ("pmap_mapbios: Invalid level %d", lvl)); | ||||
/* Insert L2_BLOCK */ | /* Insert L2_BLOCK */ | ||||
l2 = pmap_l1_to_l2(pde, va); | l2 = pmap_l1_to_l2(pde, va); | ||||
pmap_load_store(l2, | pmap_load_store(l2, | ||||
pa | ATTR_DEFAULT | ATTR_XN | | pa | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | ATTR_SW_DBM | | ||||
ATTR_IDX(CACHED_MEMORY) | L2_BLOCK); | ATTR_XN | ATTR_IDX(CACHED_MEMORY) | L2_BLOCK); | ||||
va += L2_SIZE; | va += L2_SIZE; | ||||
pa += L2_SIZE; | pa += L2_SIZE; | ||||
} | } | ||||
pmap_invalidate_all(kernel_pmap); | pmap_invalidate_all(kernel_pmap); | ||||
va = preinit_map_va + (start_idx * L2_SIZE); | va = preinit_map_va + (start_idx * L2_SIZE); | ||||
▲ Show 20 Lines • Show All 377 Lines • ▼ Show 20 Lines | if (ml3 == NULL) { | ||||
goto fail; | goto fail; | ||||
} | } | ||||
if (va < VM_MAXUSER_ADDRESS) { | if (va < VM_MAXUSER_ADDRESS) { | ||||
ml3->wire_count = NL3PG; | ml3->wire_count = NL3PG; | ||||
pmap_resident_count_inc(pmap, 1); | pmap_resident_count_inc(pmap, 1); | ||||
} | } | ||||
} | } | ||||
Done Inline ActionsCan you please assert here that the page is dirty if it is writeable. alc: Can you please assert here that the page is dirty if it is writeable. | |||||
l3phys = VM_PAGE_TO_PHYS(ml3); | l3phys = VM_PAGE_TO_PHYS(ml3); | ||||
l3 = (pt_entry_t *)PHYS_TO_DMAP(l3phys); | l3 = (pt_entry_t *)PHYS_TO_DMAP(l3phys); | ||||
/* Address the range points at */ | /* Address the range points at */ | ||||
phys = oldl2 & ~ATTR_MASK; | phys = oldl2 & ~ATTR_MASK; | ||||
/* The attributed from the old l2 table to be copied */ | /* The attributed from the old l2 table to be copied */ | ||||
newl3 = (oldl2 & (ATTR_MASK & ~ATTR_DESCR_MASK)) | L3_PAGE; | newl3 = (oldl2 & (ATTR_MASK & ~ATTR_DESCR_MASK)) | L3_PAGE; | ||||
/* | /* | ||||
Done Inline ActionsShouldn't this be: ... != (ATTR_AP_RW_BIT | ATTR_SW_DBM)? In other words, that DBM is set, but our "inverted" dirty bit is still 1? alc: Shouldn't this be: ... != (ATTR_AP_RW_BIT | ATTR_SW_DBM)? In other words, that DBM is set, but… | |||||
Done Inline ActionsI'm a bit confused. On amd64, when promoting a PDE, we clear PG_RW on the PTEs if they are not dirty. In particular, we will not create a writeable clean superpage, which is why the equivalent assertion is there in pmap_demote_pde(). On arm64, pmap_promote_l2() may create a clean, writeable L2 entry, presumably because without this change all writeable entries were considered dirty. Shouldn't we also implement amd64's logic here for that assertion to be correct? markj: I'm a bit confused. On amd64, when promoting a PDE, we clear PG_RW on the PTEs if they are not… | |||||
Done Inline ActionsYes, we should. alc: Yes, we should. | |||||
* If the page table page is not leftover from an earlier promotion, | * If the page table page is not leftover from an earlier promotion, | ||||
* initialize it. | * initialize it. | ||||
*/ | */ | ||||
if (ml3->valid == 0) { | if (ml3->valid == 0) { | ||||
for (i = 0; i < Ln_ENTRIES; i++) { | for (i = 0; i < Ln_ENTRIES; i++) { | ||||
l3[i] = newl3 | phys; | l3[i] = newl3 | phys; | ||||
phys += L3_SIZE; | phys += L3_SIZE; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 88 Lines • ▼ Show 20 Lines | case 2: | ||||
break; | break; | ||||
case 1: | case 1: | ||||
mask = L1_OFFSET; | mask = L1_OFFSET; | ||||
break; | break; | ||||
default: | default: | ||||
panic("pmap_mincore: invalid level %d", lvl); | panic("pmap_mincore: invalid level %d", lvl); | ||||
} | } | ||||
val = MINCORE_INCORE; | val = MINCORE_INCORE; | ||||
Done Inline ActionsSince this line is moving, I would suggest rewriting it in a more conventional way: managed = (tpte & ATTR_SW_MANAGED) != 0; alc: Since this line is moving, I would suggest rewriting it in a more conventional way:
```
managed… | |||||
if (lvl != 3) | if (lvl != 3) | ||||
val |= MINCORE_SUPER; | val |= MINCORE_SUPER; | ||||
if (pmap_pte_dirty(tpte)) | if (pmap_pte_dirty(tpte)) | ||||
val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; | val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; | ||||
if ((tpte & ATTR_AF) == ATTR_AF) | if ((tpte & ATTR_AF) == ATTR_AF) | ||||
val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; | val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; | ||||
managed = (tpte & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; | managed = (tpte & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; | ||||
▲ Show 20 Lines • Show All 99 Lines • ▼ Show 20 Lines | while (sz != 0) { | ||||
len = imin(PAGE_SIZE, sz); | len = imin(PAGE_SIZE, sz); | ||||
} | } | ||||
} | } | ||||
} | } | ||||
int | int | ||||
pmap_fault(pmap_t pmap, uint64_t esr, uint64_t far) | pmap_fault(pmap_t pmap, uint64_t esr, uint64_t far) | ||||
{ | { | ||||
#ifdef SMP | pt_entry_t *pte; | ||||
register_t intr; | register_t intr; | ||||
uint64_t par; | uint64_t ec, par; | ||||
int lvl, rv; | |||||
switch (ESR_ELx_EXCEPTION(esr)) { | rv = KERN_FAILURE; | ||||
ec = ESR_ELx_EXCEPTION(esr); | |||||
switch (ec) { | |||||
case EXCP_INSN_ABORT_L: | case EXCP_INSN_ABORT_L: | ||||
case EXCP_INSN_ABORT: | case EXCP_INSN_ABORT: | ||||
case EXCP_DATA_ABORT_L: | case EXCP_DATA_ABORT_L: | ||||
case EXCP_DATA_ABORT: | case EXCP_DATA_ABORT: | ||||
break; | break; | ||||
default: | default: | ||||
return (KERN_FAILURE); | return (rv); | ||||
} | } | ||||
/* Data and insn aborts use same encoding for FCS field. */ | /* Data and insn aborts use same encoding for FSC field. */ | ||||
switch (esr & ISS_DATA_DFSC_MASK) { | switch (esr & ISS_DATA_DFSC_MASK) { | ||||
case ISS_DATA_DFSC_AFF_L1: | |||||
case ISS_DATA_DFSC_AFF_L2: | |||||
case ISS_DATA_DFSC_AFF_L3: | |||||
PMAP_LOCK(pmap); | |||||
pte = pmap_pte(pmap, far, &lvl); | |||||
if (pte != NULL) { | |||||
atomic_set_64(pte, ATTR_AF); | |||||
rv = KERN_SUCCESS; | |||||
Done Inline ActionsAs an optimization, I think we could check whether this fault represents a write that would dirty the page, and dirty the page preemptively. AF faults have higher priority that permission faults, according to the ARM. markj: As an optimization, I think we could check whether this fault represents a write that would… | |||||
Done Inline ActionsI would suggest adding an XXX comment about this optimization. In other words, I don't think that we need this optimization in place before committing this change. alc: I would suggest adding an XXX comment about this optimization. In other words, I don't think… | |||||
} | |||||
PMAP_UNLOCK(pmap); | |||||
Done Inline ActionsSuppose that we have two concurrent calls on the same address to this function because of a missing ATTR_AF bit. Isn't the caller who loses the race to acquire the pmap lock going to receive KERN_FAILURE, and as a result call vm_fault()? alc: Suppose that we have two concurrent calls on the same address to this function because of a… | |||||
break; | |||||
case ISS_DATA_DFSC_PF_L1: | |||||
case ISS_DATA_DFSC_PF_L2: | |||||
case ISS_DATA_DFSC_PF_L3: | |||||
if ((ec != EXCP_DATA_ABORT_L && ec != EXCP_DATA_ABORT) || | |||||
(esr & ISS_DATA_WnR) == 0) | |||||
return (rv); | |||||
PMAP_LOCK(pmap); | |||||
pte = pmap_pte(pmap, far, &lvl); | |||||
if (pte != NULL && | |||||
(pmap_load(pte) & (ATTR_AP_RW_BIT | ATTR_SW_DBM)) == | |||||
(ATTR_AP(ATTR_AP_RO) | ATTR_SW_DBM)) { | |||||
atomic_clear_64(pte, ATTR_AP_RW_BIT); | |||||
pmap_invalidate_page(pmap, trunc_page(far)); | |||||
rv = KERN_SUCCESS; | |||||
} | |||||
PMAP_UNLOCK(pmap); | |||||
break; | |||||
case ISS_DATA_DFSC_TF_L0: | case ISS_DATA_DFSC_TF_L0: | ||||
case ISS_DATA_DFSC_TF_L1: | case ISS_DATA_DFSC_TF_L1: | ||||
case ISS_DATA_DFSC_TF_L2: | case ISS_DATA_DFSC_TF_L2: | ||||
case ISS_DATA_DFSC_TF_L3: | case ISS_DATA_DFSC_TF_L3: | ||||
PMAP_LOCK(pmap); | PMAP_LOCK(pmap); | ||||
/* Ask the MMU to check the address */ | /* Ask the MMU to check the address */ | ||||
intr = intr_disable(); | intr = intr_disable(); | ||||
if (pmap == kernel_pmap) | if (pmap == kernel_pmap) | ||||
par = arm64_address_translate_s1e1r(far); | par = arm64_address_translate_s1e1r(far); | ||||
else | else | ||||
par = arm64_address_translate_s1e0r(far); | par = arm64_address_translate_s1e0r(far); | ||||
intr_restore(intr); | intr_restore(intr); | ||||
PMAP_UNLOCK(pmap); | PMAP_UNLOCK(pmap); | ||||
/* | /* | ||||
* If the translation was successful the address was invalid | * If the translation was successful the address was invalid | ||||
* due to a break-before-make sequence. We can unlock and | * due to a break-before-make sequence. We can unlock and | ||||
* return success to the trap handler. | * return success to the trap handler. | ||||
*/ | */ | ||||
if (PAR_SUCCESS(par)) | if (PAR_SUCCESS(par)) | ||||
return (KERN_SUCCESS); | rv = KERN_SUCCESS; | ||||
break; | break; | ||||
default: | |||||
break; | |||||
} | } | ||||
#endif | |||||
return (KERN_FAILURE); | return (rv); | ||||
} | } | ||||
/* | /* | ||||
* Increase the starting virtual address of the given mapping if a | * Increase the starting virtual address of the given mapping if a | ||||
* different alignment might result in more superpage mappings. | * different alignment might result in more superpage mappings. | ||||
*/ | */ | ||||
void | void | ||||
pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, | pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, | ||||
▲ Show 20 Lines • Show All 100 Lines • Show Last 20 Lines |
I would consider restricting the use of ATTR_SW_DBM to managed mappings.