Changeset View
Standalone View
sys/amd64/amd64/pmap.c
Show First 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | |||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||||
* SUCH DAMAGE. | * SUCH DAMAGE. | ||||
* | * | ||||
* from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 | * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 | ||||
*/ | */ | ||||
/*- | /*- | ||||
* Copyright (c) 2003 Networks Associates Technology, Inc. | * Copyright (c) 2003 Networks Associates Technology, Inc. | ||||
* Copyright (c) 2014-2018 The FreeBSD Foundation | * Copyright (c) 2014-2019 The FreeBSD Foundation | ||||
* All rights reserved. | * All rights reserved. | ||||
* | * | ||||
* This software was developed for the FreeBSD Project by Jake Burkholder, | * This software was developed for the FreeBSD Project by Jake Burkholder, | ||||
* Safeport Network Services, and Network Associates Laboratories, the | * Safeport Network Services, and Network Associates Laboratories, the | ||||
* Security Research Division of Network Associates, Inc. under | * Security Research Division of Network Associates, Inc. under | ||||
* DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA | * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA | ||||
* CHATS research program. | * CHATS research program. | ||||
* | * | ||||
▲ Show 20 Lines • Show All 56 Lines • ▼ Show 20 Lines | |||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#include <sys/ktr.h> | #include <sys/ktr.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/mman.h> | #include <sys/mman.h> | ||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/rangeset.h> | |||||
#include <sys/rwlock.h> | #include <sys/rwlock.h> | ||||
#include <sys/sx.h> | #include <sys/sx.h> | ||||
#include <sys/turnstile.h> | #include <sys/turnstile.h> | ||||
#include <sys/vmem.h> | #include <sys/vmem.h> | ||||
#include <sys/vmmeter.h> | #include <sys/vmmeter.h> | ||||
#include <sys/sched.h> | #include <sys/sched.h> | ||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#include <sys/smp.h> | #include <sys/smp.h> | ||||
Show All 18 Lines | |||||
#include <machine/cpu.h> | #include <machine/cpu.h> | ||||
#include <machine/cputypes.h> | #include <machine/cputypes.h> | ||||
#include <machine/md_var.h> | #include <machine/md_var.h> | ||||
#include <machine/pcb.h> | #include <machine/pcb.h> | ||||
#include <machine/specialreg.h> | #include <machine/specialreg.h> | ||||
#ifdef SMP | #ifdef SMP | ||||
#include <machine/smp.h> | #include <machine/smp.h> | ||||
#endif | #endif | ||||
#include <machine/sysarch.h> | |||||
#include <machine/tss.h> | #include <machine/tss.h> | ||||
static __inline boolean_t | static __inline boolean_t | ||||
pmap_type_guest(pmap_t pmap) | pmap_type_guest(pmap_t pmap) | ||||
{ | { | ||||
return ((pmap->pm_type == PT_EPT) || (pmap->pm_type == PT_RVI)); | return ((pmap->pm_type == PT_EPT) || (pmap->pm_type == PT_RVI)); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 114 Lines • ▼ Show 20 Lines | case PT_EPT: | ||||
break; | break; | ||||
default: | default: | ||||
panic("pmap_modified_bit: invalid pm_type %d", pmap->pm_type); | panic("pmap_modified_bit: invalid pm_type %d", pmap->pm_type); | ||||
} | } | ||||
return (mask); | return (mask); | ||||
} | } | ||||
static __inline pt_entry_t | |||||
pmap_pku_mask_bit(pmap_t pmap) | |||||
{ | |||||
return (pmap->pm_type == PT_X86 ? X86_PG_PKU_MASK : 0); | |||||
} | |||||
#if !defined(DIAGNOSTIC) | #if !defined(DIAGNOSTIC) | ||||
#ifdef __GNUC_GNU_INLINE__ | #ifdef __GNUC_GNU_INLINE__ | ||||
#define PMAP_INLINE __attribute__((__gnu_inline__)) inline | #define PMAP_INLINE __attribute__((__gnu_inline__)) inline | ||||
#else | #else | ||||
#define PMAP_INLINE extern inline | #define PMAP_INLINE extern inline | ||||
#endif | #endif | ||||
#else | #else | ||||
#define PMAP_INLINE | #define PMAP_INLINE | ||||
▲ Show 20 Lines • Show All 127 Lines • ▼ Show 20 Lines | |||||
SYSCTL_INT(_vm_pmap, OID_AUTO, pti, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, | SYSCTL_INT(_vm_pmap, OID_AUTO, pti, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, | ||||
&pti, 0, | &pti, 0, | ||||
"Page Table Isolation enabled"); | "Page Table Isolation enabled"); | ||||
static vm_object_t pti_obj; | static vm_object_t pti_obj; | ||||
static pml4_entry_t *pti_pml4; | static pml4_entry_t *pti_pml4; | ||||
static vm_pindex_t pti_pg_idx; | static vm_pindex_t pti_pg_idx; | ||||
static bool pti_finalized; | static bool pti_finalized; | ||||
struct pmap_pkru_range { | |||||
struct rs_el pkru_rs_el; | |||||
u_int pkru_keyidx; | |||||
int pkru_flags; | |||||
}; | |||||
static uma_zone_t pmap_pkru_ranges_zone; | |||||
static bool pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva); | |||||
static pt_entry_t pmap_pkru_get(pmap_t pmap, vm_offset_t va); | |||||
static void pmap_pkru_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva); | |||||
static void *pkru_dup_range(void *ctx, void *data); | |||||
static void pkru_free_range(void *ctx, void *node); | |||||
static int pmap_pkru_copy(pmap_t dst_pmap, pmap_t src_pmap); | |||||
static int pmap_pkru_deassign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva); | |||||
static void pmap_pkru_deassign_all(pmap_t pmap); | |||||
static int | static int | ||||
pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS) | pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS) | ||||
{ | { | ||||
int i; | int i; | ||||
uint64_t res; | uint64_t res; | ||||
res = 0; | res = 0; | ||||
CPU_FOREACH(i) { | CPU_FOREACH(i) { | ||||
▲ Show 20 Lines • Show All 2,422 Lines • ▼ Show 20 Lines | pmap_pinit0(pmap_t pmap) | ||||
TAILQ_INIT(&pmap->pm_pvchunk); | TAILQ_INIT(&pmap->pm_pvchunk); | ||||
bzero(&pmap->pm_stats, sizeof pmap->pm_stats); | bzero(&pmap->pm_stats, sizeof pmap->pm_stats); | ||||
pmap->pm_flags = pmap_flags; | pmap->pm_flags = pmap_flags; | ||||
CPU_FOREACH(i) { | CPU_FOREACH(i) { | ||||
pmap->pm_pcids[i].pm_pcid = PMAP_PCID_KERN + 1; | pmap->pm_pcids[i].pm_pcid = PMAP_PCID_KERN + 1; | ||||
pmap->pm_pcids[i].pm_gen = 1; | pmap->pm_pcids[i].pm_gen = 1; | ||||
} | } | ||||
pmap_activate_boot(pmap); | pmap_activate_boot(pmap); | ||||
if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) { | |||||
pmap_pkru_ranges_zone = uma_zcreate("pkru ranges", | |||||
sizeof(struct pmap_pkru_range), NULL, NULL, NULL, NULL, | |||||
UMA_ALIGN_PTR, UMA_ZONE_NOFREE); | |||||
markj: Why is it NOFREE?
Extra braces. | |||||
Done Inline ActionsIt does not need to, before ENOMEM restart I tried to minimize the chances of failing allocations. What do you mean by extra braces? {} are optional by style, and I put them around multiline statements. kib: It does not need to, before ENOMEM restart I tried to minimize the chances of failing… | |||||
Done Inline ActionsI would avoid using NOFREE for zones whose usage can be easily influenced by userspace. The fragmentation that they cause is quite problematic, and their use should be strongly discouraged IMO. Hmm, ok. I thought we tried to strictly avoid redundant braces in VM/pmap code (even for multiline statements). I'll stop commenting on that. markj: I would avoid using NOFREE for zones whose usage can be easily influenced by userspace. The… | |||||
Done Inline ActionsYes, I already removed the NOFREE. kib: Yes, I already removed the NOFREE. | |||||
} | } | ||||
} | |||||
void | void | ||||
pmap_pinit_pml4(vm_page_t pml4pg) | pmap_pinit_pml4(vm_page_t pml4pg) | ||||
{ | { | ||||
pml4_entry_t *pm_pml4; | pml4_entry_t *pm_pml4; | ||||
int i; | int i; | ||||
pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg)); | pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg)); | ||||
▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines | if (pm_type == PT_X86) { | ||||
if (pti) { | if (pti) { | ||||
pml4pgu = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | | pml4pgu = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | | ||||
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_WAITOK); | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_WAITOK); | ||||
pmap->pm_pml4u = (pml4_entry_t *)PHYS_TO_DMAP( | pmap->pm_pml4u = (pml4_entry_t *)PHYS_TO_DMAP( | ||||
VM_PAGE_TO_PHYS(pml4pgu)); | VM_PAGE_TO_PHYS(pml4pgu)); | ||||
pmap_pinit_pml4_pti(pml4pgu); | pmap_pinit_pml4_pti(pml4pgu); | ||||
pmap->pm_ucr3 = VM_PAGE_TO_PHYS(pml4pgu); | pmap->pm_ucr3 = VM_PAGE_TO_PHYS(pml4pgu); | ||||
} | } | ||||
if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) { | |||||
rangeset_init(&pmap->pm_pkru, pkru_dup_range, | |||||
pkru_free_range, pmap, M_NOWAIT); | |||||
Done Inline ActionsFrom context it looks like we should be able to use M_WAITOK here. Extra braces. markj: From context it looks like we should be able to use M_WAITOK here.
Extra braces. | |||||
Done Inline ActionsThe flag is not used during rangeset_init(), it is saved and used during the rangeset operations like _remove() when we execute under pmap lock. kib: The flag is not used during rangeset_init(), it is saved and used during the rangeset… | |||||
} | } | ||||
} | |||||
pmap->pm_root.rt_root = 0; | pmap->pm_root.rt_root = 0; | ||||
CPU_ZERO(&pmap->pm_active); | CPU_ZERO(&pmap->pm_active); | ||||
TAILQ_INIT(&pmap->pm_pvchunk); | TAILQ_INIT(&pmap->pm_pvchunk); | ||||
bzero(&pmap->pm_stats, sizeof pmap->pm_stats); | bzero(&pmap->pm_stats, sizeof pmap->pm_stats); | ||||
pmap->pm_flags = flags; | pmap->pm_flags = flags; | ||||
pmap->pm_eptgen = 0; | pmap->pm_eptgen = 0; | ||||
▲ Show 20 Lines • Show All 279 Lines • ▼ Show 20 Lines | pmap_release(pmap_t pmap) | ||||
vm_page_unwire_noq(m); | vm_page_unwire_noq(m); | ||||
vm_page_free_zero(m); | vm_page_free_zero(m); | ||||
if (pmap->pm_pml4u != NULL) { | if (pmap->pm_pml4u != NULL) { | ||||
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4u)); | m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4u)); | ||||
vm_page_unwire_noq(m); | vm_page_unwire_noq(m); | ||||
vm_page_free(m); | vm_page_free(m); | ||||
} | } | ||||
if (pmap->pm_type == PT_X86 && | |||||
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) | |||||
rangeset_fini(&pmap->pm_pkru); | |||||
} | } | ||||
static int | static int | ||||
kvm_size(SYSCTL_HANDLER_ARGS) | kvm_size(SYSCTL_HANDLER_ARGS) | ||||
{ | { | ||||
unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; | unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; | ||||
return sysctl_handle_long(oidp, &ksize, 0, req); | return sysctl_handle_long(oidp, &ksize, 0, req); | ||||
▲ Show 20 Lines • Show All 814 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
static boolean_t | static boolean_t | ||||
pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, | pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, | ||||
struct rwlock **lockp) | struct rwlock **lockp) | ||||
{ | { | ||||
pd_entry_t newpde, oldpde; | pd_entry_t newpde, oldpde; | ||||
pt_entry_t *firstpte, newpte; | pt_entry_t *firstpte, newpte; | ||||
pt_entry_t PG_A, PG_G, PG_M, PG_RW, PG_V; | pt_entry_t PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V; | ||||
vm_paddr_t mptepa; | vm_paddr_t mptepa; | ||||
vm_page_t mpte; | vm_page_t mpte; | ||||
struct spglist free; | struct spglist free; | ||||
vm_offset_t sva; | vm_offset_t sva; | ||||
int PG_PTE_CACHE; | int PG_PTE_CACHE; | ||||
PG_G = pmap_global_bit(pmap); | PG_G = pmap_global_bit(pmap); | ||||
PG_A = pmap_accessed_bit(pmap); | PG_A = pmap_accessed_bit(pmap); | ||||
PG_M = pmap_modified_bit(pmap); | PG_M = pmap_modified_bit(pmap); | ||||
PG_RW = pmap_rw_bit(pmap); | PG_RW = pmap_rw_bit(pmap); | ||||
PG_V = pmap_valid_bit(pmap); | PG_V = pmap_valid_bit(pmap); | ||||
PG_PTE_CACHE = pmap_cache_mask(pmap, 0); | PG_PTE_CACHE = pmap_cache_mask(pmap, 0); | ||||
PG_PKU_MASK = pmap_pku_mask_bit(pmap); | |||||
Done Inline ActionsThis var is write-only. markj: This var is write-only. | |||||
Done Inline ActionsIt is used by PG_PTE_PROMOTE. kib: It is used by PG_PTE_PROMOTE. | |||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | PMAP_LOCK_ASSERT(pmap, MA_OWNED); | ||||
oldpde = *pde; | oldpde = *pde; | ||||
KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), | KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), | ||||
("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); | ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); | ||||
if ((oldpde & PG_A) == 0 || (mpte = pmap_remove_pt_page(pmap, va)) == | if ((oldpde & PG_A) == 0 || (mpte = pmap_remove_pt_page(pmap, va)) == | ||||
NULL) { | NULL) { | ||||
KASSERT((oldpde & PG_W) == 0, | KASSERT((oldpde & PG_W) == 0, | ||||
▲ Show 20 Lines • Show All 416 Lines • ▼ Show 20 Lines | for (; sva < eva; sva = va_next) { | ||||
if (pmap_remove_ptes(pmap, sva, va_next, pde, &free, &lock)) | if (pmap_remove_ptes(pmap, sva, va_next, pde, &free, &lock)) | ||||
anyvalid = 1; | anyvalid = 1; | ||||
} | } | ||||
if (lock != NULL) | if (lock != NULL) | ||||
rw_wunlock(lock); | rw_wunlock(lock); | ||||
out: | out: | ||||
if (anyvalid) | if (anyvalid) | ||||
pmap_invalidate_all(pmap); | pmap_invalidate_all(pmap); | ||||
pmap_pkru_on_remove(pmap, sva, eva); | |||||
PMAP_UNLOCK(pmap); | PMAP_UNLOCK(pmap); | ||||
pmap_delayed_invl_finished(); | pmap_delayed_invl_finished(); | ||||
vm_page_free_pages_toq(&free, true); | vm_page_free_pages_toq(&free, true); | ||||
} | } | ||||
/* | /* | ||||
* Routine: pmap_remove_all | * Routine: pmap_remove_all | ||||
* Function: | * Function: | ||||
▲ Show 20 Lines • Show All 295 Lines • ▼ Show 20 Lines | |||||
* identical characteristics. | * identical characteristics. | ||||
*/ | */ | ||||
static void | static void | ||||
pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, | pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, | ||||
struct rwlock **lockp) | struct rwlock **lockp) | ||||
{ | { | ||||
pd_entry_t newpde; | pd_entry_t newpde; | ||||
pt_entry_t *firstpte, oldpte, pa, *pte; | pt_entry_t *firstpte, oldpte, pa, *pte; | ||||
pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V; | pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V, PG_PKU_MASK; | ||||
vm_page_t mpte; | vm_page_t mpte; | ||||
int PG_PTE_CACHE; | int PG_PTE_CACHE; | ||||
PG_A = pmap_accessed_bit(pmap); | PG_A = pmap_accessed_bit(pmap); | ||||
PG_G = pmap_global_bit(pmap); | PG_G = pmap_global_bit(pmap); | ||||
PG_M = pmap_modified_bit(pmap); | PG_M = pmap_modified_bit(pmap); | ||||
PG_V = pmap_valid_bit(pmap); | PG_V = pmap_valid_bit(pmap); | ||||
PG_RW = pmap_rw_bit(pmap); | PG_RW = pmap_rw_bit(pmap); | ||||
PG_PKU_MASK = pmap_pku_mask_bit(pmap); | |||||
Done Inline ActionsWrite-only. markj: Write-only. | |||||
Done Inline ActionsNo, it is used by PG_PTE_PROMOTE definition. kib: No, it is used by PG_PTE_PROMOTE definition. | |||||
PG_PTE_CACHE = pmap_cache_mask(pmap, 0); | PG_PTE_CACHE = pmap_cache_mask(pmap, 0); | ||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | PMAP_LOCK_ASSERT(pmap, MA_OWNED); | ||||
/* | /* | ||||
* Examine the first PTE in the specified PTP. Abort if this PTE is | * Examine the first PTE in the specified PTP. Abort if this PTE is | ||||
* either invalid, unused, or does not map the first 4KB physical page | * either invalid, unused, or does not map the first 4KB physical page | ||||
* within a 2MB page. | * within a 2MB page. | ||||
▲ Show 20 Lines • Show All 171 Lines • ▼ Show 20 Lines | pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, | ||||
if ((m->oflags & VPO_UNMANAGED) != 0) { | if ((m->oflags & VPO_UNMANAGED) != 0) { | ||||
if ((newpte & PG_RW) != 0) | if ((newpte & PG_RW) != 0) | ||||
newpte |= PG_M; | newpte |= PG_M; | ||||
} else | } else | ||||
newpte |= PG_MANAGED; | newpte |= PG_MANAGED; | ||||
lock = NULL; | lock = NULL; | ||||
PMAP_LOCK(pmap); | PMAP_LOCK(pmap); | ||||
if (va < VM_MAXUSER_ADDRESS) | |||||
newpte |= pmap_pkru_get(pmap, va); | |||||
if (psind == 1) { | if (psind == 1) { | ||||
/* Assert the required virtual and physical alignment. */ | /* Assert the required virtual and physical alignment. */ | ||||
KASSERT((va & PDRMASK) == 0, ("pmap_enter: va unaligned")); | KASSERT((va & PDRMASK) == 0, ("pmap_enter: va unaligned")); | ||||
KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind")); | KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind")); | ||||
/* | |||||
* If pkru is not same for the whole pde range, return | |||||
* failure and let vm_fault() cope. | |||||
*/ | |||||
if (!pmap_pkru_same(pmap, va, va + pagesizes[psind])) | |||||
return (KERN_FAILURE); | |||||
Done Inline ActionsThe same condition may occur in pmap_enter_object(). Shouldn't this condition also block superpage promotion? markj: The same condition may occur in pmap_enter_object().
Shouldn't this condition also block… | |||||
Done Inline ActionsYes, I missed pmap_enter_object(). Promotion is handled by the updated definition of PG_PTE_PROMOTE. kib: Yes, I missed pmap_enter_object().
Promotion is handled by the updated definition of… | |||||
Done Inline ActionsWhy not move the check into pmap_enter_pde()? markj: Why not move the check into pmap_enter_pde()? | |||||
Done Inline ActionsThis is very good question, because pmap_enter_pde() might sleep waiting for new pdp page and then the pmap_pkru_same() check is invalidated. So the check must be moved to pmap_enter_pde() after pdp allocation, and pmap_enter_object() can avoid the call. kib: This is very good question, because pmap_enter_pde() might sleep waiting for new pdp page and… | |||||
Done Inline ActionsHrm. What if the user changes the keyid for the range during the sleep? markj: Hrm. What if the user changes the keyid for the range during the sleep? | |||||
Done Inline ActionsI moved pmap_pkru_get() down into pmap_enter_pde(). Then it allowed to remove the cleaning of the key bits for small page insertion. kib: I moved pmap_pkru_get() down into pmap_enter_pde(). Then it allowed to remove the cleaning of… | |||||
rv = pmap_enter_pde(pmap, va, newpte | PG_PS, flags, m, &lock); | rv = pmap_enter_pde(pmap, va, newpte | PG_PS, flags, m, &lock); | ||||
goto out; | goto out; | ||||
} | } | ||||
mpte = NULL; | mpte = NULL; | ||||
/* | /* | ||||
* In the case that a page table page is not | * In the case that a page table page is not | ||||
* resident, we are creating it here. | * resident, we are creating it here. | ||||
Show All 14 Lines | if (pde != NULL && (*pde & PG_V) != 0 && ((*pde & PG_PS) == 0 || | ||||
*/ | */ | ||||
nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; | nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; | ||||
mpte = _pmap_allocpte(pmap, pmap_pde_pindex(va), | mpte = _pmap_allocpte(pmap, pmap_pde_pindex(va), | ||||
nosleep ? NULL : &lock); | nosleep ? NULL : &lock); | ||||
if (mpte == NULL && nosleep) { | if (mpte == NULL && nosleep) { | ||||
rv = KERN_RESOURCE_SHORTAGE; | rv = KERN_RESOURCE_SHORTAGE; | ||||
goto out; | goto out; | ||||
} | } | ||||
if (va < VM_MAXUSER_ADDRESS && pmap->pm_type == PT_X86) { | |||||
Done Inline ActionsThe comparison with VM_MAXUSER_ADDRESS is redundant. markj: The comparison with VM_MAXUSER_ADDRESS is redundant. | |||||
newpte &= ~X86_PG_PKU_MASK; | |||||
newpte |= pmap_pkru_get(pmap, va); | |||||
} | |||||
goto retry; | goto retry; | ||||
} else | } else | ||||
panic("pmap_enter: invalid page directory va=%#lx", va); | panic("pmap_enter: invalid page directory va=%#lx", va); | ||||
origpte = *pte; | origpte = *pte; | ||||
pv = NULL; | pv = NULL; | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 332 Lines • ▼ Show 20 Lines | pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, | ||||
mpte = NULL; | mpte = NULL; | ||||
m = m_start; | m = m_start; | ||||
lock = NULL; | lock = NULL; | ||||
PMAP_LOCK(pmap); | PMAP_LOCK(pmap); | ||||
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { | while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { | ||||
va = start + ptoa(diff); | va = start + ptoa(diff); | ||||
if ((va & PDRMASK) == 0 && va + NBPDR <= end && | if ((va & PDRMASK) == 0 && va + NBPDR <= end && | ||||
m->psind == 1 && pmap_ps_enabled(pmap) && | m->psind == 1 && pmap_ps_enabled(pmap) && | ||||
pmap_pkru_same(pmap, va, va + NBPDR) && | |||||
pmap_enter_2mpage(pmap, va, m, prot, &lock)) | pmap_enter_2mpage(pmap, va, m, prot, &lock)) | ||||
m = &m[NBPDR / PAGE_SIZE - 1]; | m = &m[NBPDR / PAGE_SIZE - 1]; | ||||
else | else | ||||
mpte = pmap_enter_quick_locked(pmap, va, m, prot, | mpte = pmap_enter_quick_locked(pmap, va, m, prot, | ||||
mpte, &lock); | mpte, &lock); | ||||
m = TAILQ_NEXT(m, listq); | m = TAILQ_NEXT(m, listq); | ||||
} | } | ||||
if (lock != NULL) | if (lock != NULL) | ||||
▲ Show 20 Lines • Show All 120 Lines • ▼ Show 20 Lines | pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, | ||||
newpte = VM_PAGE_TO_PHYS(m) | PG_V | | newpte = VM_PAGE_TO_PHYS(m) | PG_V | | ||||
pmap_cache_bits(pmap, m->md.pat_mode, 0); | pmap_cache_bits(pmap, m->md.pat_mode, 0); | ||||
if ((m->oflags & VPO_UNMANAGED) == 0) | if ((m->oflags & VPO_UNMANAGED) == 0) | ||||
newpte |= PG_MANAGED; | newpte |= PG_MANAGED; | ||||
if ((prot & VM_PROT_EXECUTE) == 0) | if ((prot & VM_PROT_EXECUTE) == 0) | ||||
newpte |= pg_nx; | newpte |= pg_nx; | ||||
if (va < VM_MAXUSER_ADDRESS) | if (va < VM_MAXUSER_ADDRESS) | ||||
newpte |= PG_U; | newpte |= PG_U | pmap_pkru_get(pmap, va); | ||||
pte_store(pte, newpte); | pte_store(pte, newpte); | ||||
return (mpte); | return (mpte); | ||||
} | } | ||||
/* | /* | ||||
* Make a temporary mapping for a physical address. This is only intended | * Make a temporary mapping for a physical address. This is only intended | ||||
* to be used for panic dumps. | * to be used for panic dumps. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 359 Lines • ▼ Show 20 Lines | pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, | ||||
} | } | ||||
out: | out: | ||||
if (lock != NULL) | if (lock != NULL) | ||||
rw_wunlock(lock); | rw_wunlock(lock); | ||||
PMAP_UNLOCK(src_pmap); | PMAP_UNLOCK(src_pmap); | ||||
PMAP_UNLOCK(dst_pmap); | PMAP_UNLOCK(dst_pmap); | ||||
} | } | ||||
int | |||||
pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap) | |||||
{ | |||||
int error; | |||||
if (dst_pmap->pm_type != src_pmap->pm_type || | |||||
dst_pmap->pm_type != PT_X86 || | |||||
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0) | |||||
return (0); | |||||
if (dst_pmap < src_pmap) { | |||||
PMAP_LOCK(dst_pmap); | |||||
PMAP_LOCK(src_pmap); | |||||
} else { | |||||
PMAP_LOCK(src_pmap); | |||||
PMAP_LOCK(dst_pmap); | |||||
} | |||||
error = pmap_pkru_copy(dst_pmap, src_pmap); | |||||
PMAP_UNLOCK(src_pmap); | |||||
PMAP_UNLOCK(dst_pmap); | |||||
return (error); | |||||
} | |||||
/* | /* | ||||
* Zero the specified hardware page. | * Zero the specified hardware page. | ||||
*/ | */ | ||||
void | void | ||||
pmap_zero_page(vm_page_t m) | pmap_zero_page(vm_page_t m) | ||||
{ | { | ||||
vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); | vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); | ||||
▲ Show 20 Lines • Show All 383 Lines • ▼ Show 20 Lines | */ | ||||
if (allfree) { | if (allfree) { | ||||
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); | ||||
free_pv_chunk(pc); | free_pv_chunk(pc); | ||||
} | } | ||||
} | } | ||||
if (lock != NULL) | if (lock != NULL) | ||||
rw_wunlock(lock); | rw_wunlock(lock); | ||||
pmap_invalidate_all(pmap); | pmap_invalidate_all(pmap); | ||||
pmap_pkru_deassign_all(pmap); | |||||
PMAP_UNLOCK(pmap); | PMAP_UNLOCK(pmap); | ||||
vm_page_free_pages_toq(&free, true); | vm_page_free_pages_toq(&free, true); | ||||
} | } | ||||
static boolean_t | static boolean_t | ||||
pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) | pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) | ||||
{ | { | ||||
struct rwlock *lock; | struct rwlock *lock; | ||||
▲ Show 20 Lines • Show All 2,618 Lines • ▼ Show 20 Lines | for (va = sva; va < eva; va += PAGE_SIZE) { | ||||
KASSERT((*pte & X86_PG_V) != 0, | KASSERT((*pte & X86_PG_V) != 0, | ||||
("invalid pte va %#lx pte %#lx pt %#lx", va, | ("invalid pte va %#lx pte %#lx pt %#lx", va, | ||||
(u_long)pte, *pte)); | (u_long)pte, *pte)); | ||||
pte_clear(pte); | pte_clear(pte); | ||||
pmap_pti_unwire_pte(pte, va); | pmap_pti_unwire_pte(pte, va); | ||||
} | } | ||||
pmap_invalidate_range(kernel_pmap, sva, eva); | pmap_invalidate_range(kernel_pmap, sva, eva); | ||||
VM_OBJECT_WUNLOCK(pti_obj); | VM_OBJECT_WUNLOCK(pti_obj); | ||||
} | |||||
static void * | |||||
pkru_dup_range(void *ctx, void *data) | |||||
Done Inline Actionsctx is __unused. markj: ctx is __unused. | |||||
{ | |||||
struct pmap_pkru_range *node, *new_node; | |||||
new_node = uma_zalloc(pmap_pkru_ranges_zone, M_NOWAIT); | |||||
if (new_node == NULL) | |||||
return (NULL); | |||||
node = data; | |||||
memcpy(new_node, node, sizeof(*node)); | |||||
return (new_node); | |||||
} | |||||
static void | |||||
pkru_free_range(void *ctx, void *node) | |||||
Done Inline Actionsctx is __unused. markj: ctx is __unused. | |||||
{ | |||||
uma_zfree(pmap_pkru_ranges_zone, node); | |||||
} | |||||
static int | |||||
pmap_pkru_assign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, u_int keyidx, | |||||
int flags) | |||||
{ | |||||
struct pmap_pkru_range *ppr; | |||||
int error; | |||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | |||||
MPASS(pmap->pm_type == PT_X86); | |||||
MPASS((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0); | |||||
ppr = uma_zalloc(pmap_pkru_ranges_zone, M_NOWAIT); | |||||
if (ppr == NULL) | |||||
return (ENOMEM); | |||||
ppr->pkru_keyidx = keyidx; | |||||
ppr->pkru_flags = flags; | |||||
Done Inline ActionsIs ppr leaked if rangeset_insert() fails? markj: Is ppr leaked if rangeset_insert() fails? | |||||
Done Inline ActionsYes, should be fixed. kib: Yes, should be fixed. | |||||
error = rangeset_insert(&pmap->pm_pkru, sva, eva, ppr); | |||||
if (error != 0) | |||||
uma_zfree(pmap_pkru_ranges_zone, ppr); | |||||
return (error); | |||||
} | |||||
static int | |||||
pmap_pkru_deassign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) | |||||
{ | |||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | |||||
MPASS(pmap->pm_type == PT_X86); | |||||
MPASS((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0); | |||||
return (rangeset_remove(&pmap->pm_pkru, sva, eva)); | |||||
} | |||||
static void | |||||
pmap_pkru_deassign_all(pmap_t pmap) | |||||
{ | |||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | |||||
if (pmap->pm_type == PT_X86 && | |||||
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) | |||||
rangeset_remove_all(&pmap->pm_pkru); | |||||
} | |||||
static bool | |||||
pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) | |||||
{ | |||||
struct pmap_pkru_range *ppr, *prev_ppr; | |||||
vm_offset_t va; | |||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | |||||
if (pmap->pm_type != PT_X86 || | |||||
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0 || | |||||
sva >= VM_MAXUSER_ADDRESS) | |||||
return (true); | |||||
Done Inline ActionsPerhaps assert that eva < VM_MAXUSER_ADDRESS? markj: Perhaps assert that eva < VM_MAXUSER_ADDRESS? | |||||
Done Inline ActionsYou mean, in addition to the sva check above ? kib: You mean, in addition to the sva check above ? | |||||
for (va = sva, prev_ppr = NULL; va < eva;) { | |||||
ppr = rangeset_get(&pmap->pm_pkru, va); | |||||
if ((ppr == NULL) ^ (prev_ppr == NULL)) | |||||
return (false); | |||||
if (ppr == NULL) { | |||||
va += PAGE_SIZE; | |||||
continue; | |||||
} | |||||
if (prev_ppr->pkru_keyidx != ppr->pkru_keyidx) | |||||
return (false); | |||||
va = ppr->pkru_rs_el.re_end; | |||||
} | |||||
return (true); | |||||
} | |||||
static pt_entry_t | |||||
pmap_pkru_get(pmap_t pmap, vm_offset_t va) | |||||
{ | |||||
struct pmap_pkru_range *ppr; | |||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | |||||
if (pmap->pm_type != PT_X86 || | |||||
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0 || | |||||
va >= VM_MAXUSER_ADDRESS) | |||||
return (0); | |||||
ppr = rangeset_get(&pmap->pm_pkru, va); | |||||
if (ppr != NULL) | |||||
return (X86_PG_PKU(ppr->pkru_keyidx)); | |||||
return (0); | |||||
} | |||||
static bool | |||||
pred_pkru_on_remove(void *ctx __unused, void *r) | |||||
{ | |||||
struct pmap_pkru_range *ppr; | |||||
ppr = r; | |||||
return ((ppr->pkru_flags & AMD64_PKRU_PERSIST) == 0); | |||||
} | |||||
static void | |||||
pmap_pkru_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) | |||||
{ | |||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | |||||
if (pmap->pm_type == PT_X86 && | |||||
(cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) { | |||||
rangeset_remove_pred(&pmap->pm_pkru, sva, eva, | |||||
pred_pkru_on_remove); | |||||
} | |||||
} | |||||
static int | |||||
pmap_pkru_copy(pmap_t dst_pmap, pmap_t src_pmap) | |||||
{ | |||||
PMAP_LOCK_ASSERT(dst_pmap, MA_OWNED); | |||||
PMAP_LOCK_ASSERT(src_pmap, MA_OWNED); | |||||
MPASS(dst_pmap->pm_type == PT_X86); | |||||
MPASS(src_pmap->pm_type == PT_X86); | |||||
MPASS((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0); | |||||
if (src_pmap->pm_pkru.rs_data_ctx == NULL) | |||||
return (0); | |||||
return (rangeset_copy(&dst_pmap->pm_pkru, &src_pmap->pm_pkru)); | |||||
} | |||||
static void | |||||
pmap_pkru_update_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, | |||||
u_int keyidx) | |||||
{ | |||||
pml4_entry_t *pml4e; | |||||
pdp_entry_t *pdpe; | |||||
pd_entry_t newpde, ptpaddr, *pde; | |||||
pt_entry_t newpte, *ptep, pte; | |||||
vm_offset_t va_next; | |||||
bool changed; | |||||
PMAP_LOCK_ASSERT(pmap, MA_OWNED); | |||||
MPASS(pmap->pm_type == PT_X86); | |||||
MPASS(keyidx <= PMAP_MAX_PKRU_IDX); | |||||
for (changed = false; sva < eva; sva = va_next) { | |||||
pml4e = pmap_pml4e(pmap, sva); | |||||
if ((*pml4e & X86_PG_V) == 0) { | |||||
va_next = (sva + NBPML4) & ~PML4MASK; | |||||
if (va_next < sva) | |||||
va_next = eva; | |||||
continue; | |||||
} | |||||
pdpe = pmap_pml4e_to_pdpe(pml4e, sva); | |||||
if ((*pdpe & X86_PG_V) == 0) { | |||||
va_next = (sva + NBPDP) & ~PDPMASK; | |||||
if (va_next < sva) | |||||
va_next = eva; | |||||
continue; | |||||
} | |||||
va_next = (sva + NBPDR) & ~PDRMASK; | |||||
if (va_next < sva) | |||||
va_next = eva; | |||||
pde = pmap_pdpe_to_pde(pdpe, sva); | |||||
ptpaddr = *pde; | |||||
if (ptpaddr == 0) | |||||
continue; | |||||
MPASS((ptpaddr & X86_PG_V) != 0); | |||||
if ((ptpaddr & PG_PS) != 0) { | |||||
if (sva + NBPDR == va_next && eva >= va_next) { | |||||
newpde = (ptpaddr & ~X86_PG_PKU_MASK) | | |||||
X86_PG_PKU(keyidx); | |||||
if (newpde != ptpaddr) { | |||||
*pde = newpde; | |||||
changed = true; | |||||
} | |||||
continue; | |||||
} else if (!pmap_demote_pde(pmap, pde, sva)) { | |||||
continue; | |||||
} | |||||
} | |||||
if (va_next > eva) | |||||
va_next = eva; | |||||
for (ptep = pmap_pde_to_pte(pde, sva); sva != va_next; ptep++, | |||||
sva += PAGE_SIZE) { | |||||
pte = *ptep; | |||||
if ((pte & X86_PG_V) == 0) | |||||
continue; | |||||
newpte = (pte & ~X86_PG_PKU_MASK) | X86_PG_PKU(keyidx); | |||||
if (newpte != pte) { | |||||
*ptep = newpte; | |||||
changed = true; | |||||
Done Inline ActionsWhy not do the TLB invalidation in this function? markj: Why not do the TLB invalidation in this function? | |||||
} | |||||
} | |||||
} | |||||
if (changed) | |||||
pmap_invalidate_range(pmap, sva, eva); | |||||
Done Inline Actionssva is modified during the loop, the original value needs to be saved. markj: sva is modified during the loop, the original value needs to be saved. | |||||
Done Inline ActionsThis was the reason why invalidate_range was done outside the function. kib: This was the reason why invalidate_range was done outside the function. | |||||
Done Inline ActionsIn general it seems that pmap functions that operate on a range of VAs will handle invalidation themselves, so I suggested the change to be consistent. I don't see why msva shouldn't just be called va. markj: In general it seems that pmap functions that operate on a range of VAs will handle invalidation… | |||||
} | |||||
static int | |||||
pmap_pkru_check_uargs(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, | |||||
u_int keyidx, int flags) | |||||
{ | |||||
Done Inline ActionsWhat if eva > VM_MAXUSER_ADDRESS? markj: What if eva > VM_MAXUSER_ADDRESS? | |||||
Done Inline ActionsIn fact nothing too bad would happen. But indeed the intent was to check the eva and not sva. kib: In fact nothing too bad would happen. But indeed the intent was to check the eva and not sva. | |||||
if (pmap->pm_type != PT_X86 || keyidx > PMAP_MAX_PKRU_IDX || | |||||
(flags & ~(AMD64_PKRU_PERSIST)) != 0) | |||||
return (EINVAL); | |||||
if (eva <= sva || eva > VM_MAXUSER_ADDRESS) | |||||
return (EFAULT); | |||||
if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0) | |||||
return (ENOTSUP); | |||||
return (0); | |||||
} | |||||
int | |||||
pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, u_int keyidx, | |||||
int flags) | |||||
{ | |||||
int error; | |||||
sva = trunc_page(sva); | |||||
eva = round_page(eva); | |||||
Done Inline ActionsIf you split alloc/initialization and insertion of the pkru, you can use M_WAITOK for the allocation. I believe we generally try to avoid syscall failures due to transient ENOMEM conditions. Hmm, I see that the rangeset operations may fail with ENOMEM anyway. This is a design flaw IMHO. markj: If you split alloc/initialization and insertion of the pkru, you can use M_WAITOK for the… | |||||
Done Inline ActionsIt is more complicated than just split of the new entry. Problem is that removal might need to split and then it also needs to allocate. During the initial stages of the development, I tried to use per-pmap sx lock to protect the pkru rangeset, but it is too inconsistent and I decided that the only sane approach is to protect it with the same lock as the page tables, ie. pmap mutex. I will think about providing pre-allocated elements for rangeset operations, but it even brief evaluation shows that rangeset interface will become extra-ugly. I do not like M_NOWAIT failures as well, sure. kib: It is more complicated than just split of the new entry. Problem is that removal might need to… | |||||
error = pmap_pkru_check_uargs(pmap, sva, eva, keyidx, flags); | |||||
if (error != 0) | |||||
return (error); | |||||
PMAP_LOCK(pmap); | |||||
error = pmap_pkru_assign(pmap, sva, eva, keyidx, flags); | |||||
if (error == 0) | |||||
pmap_pkru_update_range(pmap, sva, eva, keyidx); | |||||
PMAP_UNLOCK(pmap); | |||||
return (error); | |||||
} | |||||
int | |||||
pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) | |||||
{ | |||||
int error; | |||||
sva = trunc_page(sva); | |||||
eva = round_page(eva); | |||||
error = pmap_pkru_check_uargs(pmap, sva, eva, 0, 0); | |||||
if (error != 0) | |||||
return (error); | |||||
PMAP_LOCK(pmap); | |||||
error = pmap_pkru_deassign(pmap, sva, eva); | |||||
if (error == 0) | |||||
pmap_pkru_update_range(pmap, sva, eva, 0); | |||||
PMAP_UNLOCK(pmap); | |||||
return (error); | |||||
} | } | ||||
#include "opt_ddb.h" | #include "opt_ddb.h" | ||||
#ifdef DDB | #ifdef DDB | ||||
#include <sys/kdb.h> | #include <sys/kdb.h> | ||||
#include <ddb/ddb.h> | #include <ddb/ddb.h> | ||||
DB_SHOW_COMMAND(pte, pmap_print_pte) | DB_SHOW_COMMAND(pte, pmap_print_pte) | ||||
▲ Show 20 Lines • Show All 54 Lines • Show Last 20 Lines |
Why is it NOFREE?
Extra braces.