Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F137264164
D18893.id53208.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
45 KB
Referenced Files
None
Subscribers
None
D18893.id53208.diff
View Options
Index: lib/libc/amd64/Symbol.map
===================================================================
--- lib/libc/amd64/Symbol.map
+++ lib/libc/amd64/Symbol.map
@@ -44,6 +44,13 @@
vfork;
};
+FBSD_1.6 {
+ x86_pkru_get_perm;
+ x86_pkru_set_perm;
+ x86_pkru_protect_range;
+ x86_pkru_unprotect_range;
+};
+
/*
*
* FreeBSD private ABI
Index: lib/libc/i386/Symbol.map
===================================================================
--- lib/libc/i386/Symbol.map
+++ lib/libc/i386/Symbol.map
@@ -46,6 +46,13 @@
___tls_get_addr;
};
+FBSD_1.6 {
+ x86_pkru_get_perm;
+ x86_pkru_set_perm;
+ x86_pkru_protect_range;
+ x86_pkru_unprotect_range;
+};
+
FBSDprivate_1.0 {
/* PSEUDO syscalls */
_getlogin;
Index: lib/libc/x86/sys/Makefile.inc
===================================================================
--- lib/libc/x86/sys/Makefile.inc
+++ lib/libc/x86/sys/Makefile.inc
@@ -3,7 +3,8 @@
.PATH: ${LIBC_SRCTOP}/x86/sys
SRCS+= \
- __vdso_gettc.c
+ __vdso_gettc.c \
+ pkru.c
.if ${MACHINE_CPUARCH} == "amd64" && ${MK_HYPERV} != "no"
CFLAGS+= -DWANT_HYPERV
Index: lib/libc/x86/sys/pkru.c
===================================================================
--- /dev/null
+++ lib/libc/x86/sys/pkru.c
@@ -0,0 +1,138 @@
+/*-
+ * Copyright (c) 2019 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by Konstantin Belousov
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <machine/cpufunc.h>
+#include <machine/specialreg.h>
+#include <machine/sysarch.h>
+#include <x86/ifunc.h>
+#include <errno.h>
+#include <string.h>
+
+#define MAX_PKRU_IDX 0xf
+#ifdef __i386__
+#define X86_SET_PKRU I386_SET_PKRU
+#define X86_CLEAR_PKRU I386_CLEAR_PKRU
+#else
+#define X86_SET_PKRU AMD64_SET_PKRU
+#define X86_CLEAR_PKRU AMD64_CLEAR_PKRU
+#endif
+
+static int
+x86_pkru_get_perm_unsup(u_int keyidx, int *access, int *modify)
+{
+
+ errno = EOPNOTSUPP;
+ return (-1);
+}
+
+static int
+x86_pkru_get_perm_hw(u_int keyidx, int *access, int *modify)
+{
+ uint32_t pkru;
+
+ if (keyidx > MAX_PKRU_IDX) {
+ errno = EINVAL;
+ return (-1);
+ }
+ keyidx *= 2;
+ pkru = rdpkru();
+ *access = (pkru & (1 << keyidx)) == 0;
+ *modify = (pkru & (2 << keyidx)) == 0;
+ return (0);
+}
+
+DEFINE_UIFUNC(, int, x86_pkru_get_perm, (u_int, int *, int *), static)
+{
+
+ return ((cpu_stdext_feature2 & CPUID_STDEXT2_OSPKE) == 0 ?
+ x86_pkru_get_perm_unsup : x86_pkru_get_perm_hw);
+}
+
+static int
+x86_pkru_set_perm_unsup(u_int keyidx, int access, int modify)
+{
+
+ errno = EOPNOTSUPP;
+ return (-1);
+}
+
+static int
+x86_pkru_set_perm_hw(u_int keyidx, int access, int modify)
+{
+ uint32_t pkru;
+
+ if (keyidx > MAX_PKRU_IDX) {
+ errno = EINVAL;
+ return (-1);
+ }
+ keyidx *= 2;
+ pkru = rdpkru();
+ pkru &= ~(3 << keyidx);
+ if (!access)
+ pkru |= 1 << keyidx;
+ if (!modify)
+ pkru |= 2 << keyidx;
+ wrpkru(pkru);
+ return (0);
+}
+
+DEFINE_UIFUNC(, int, x86_pkru_set_perm, (u_int, int, int), static)
+{
+
+ return ((cpu_stdext_feature2 & CPUID_STDEXT2_OSPKE) == 0 ?
+ x86_pkru_set_perm_unsup : x86_pkru_set_perm_hw);
+}
+
+int
+x86_pkru_protect_range(void *addr, unsigned long len, u_int keyidx, int flags)
+{
+ struct amd64_set_pkru a64pkru;
+
+ memset(&a64pkru, 0, sizeof(a64pkru));
+ a64pkru.addr = addr;
+ a64pkru.len = len;
+ a64pkru.keyidx = keyidx;
+ a64pkru.flags = flags;
+ return (sysarch(X86_SET_PKRU, &a64pkru));
+}
+
+int
+x86_pkru_unprotect_range(void *addr, unsigned long len)
+{
+ struct amd64_set_pkru a64pkru;
+
+ memset(&a64pkru, 0, sizeof(a64pkru));
+ a64pkru.addr = addr;
+ a64pkru.len = len;
+ return (sysarch(X86_CLEAR_PKRU, &a64pkru));
+}
Index: sys/amd64/amd64/initcpu.c
===================================================================
--- sys/amd64/amd64/initcpu.c
+++ sys/amd64/amd64/initcpu.c
@@ -233,6 +233,9 @@
if (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE)
cr4 |= CR4_FSGSBASE;
+ if (cpu_stdext_feature2 & CPUID_STDEXT2_PKU)
+ cr4 |= CR4_PKE;
+
/*
* Postpone enabling the SMEP on the boot CPU until the page
* tables are switched from the boot loader identity mapping
Index: sys/amd64/amd64/pmap.c
===================================================================
--- sys/amd64/amd64/pmap.c
+++ sys/amd64/amd64/pmap.c
@@ -48,7 +48,7 @@
*/
/*-
* Copyright (c) 2003 Networks Associates Technology, Inc.
- * Copyright (c) 2014-2018 The FreeBSD Foundation
+ * Copyright (c) 2014-2019 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed for the FreeBSD Project by Jake Burkholder,
@@ -121,6 +121,7 @@
#include <sys/mman.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/rangeset.h>
#include <sys/rwlock.h>
#include <sys/sx.h>
#include <sys/turnstile.h>
@@ -155,6 +156,7 @@
#ifdef SMP
#include <machine/smp.h>
#endif
+#include <machine/sysarch.h>
#include <machine/tss.h>
static __inline boolean_t
@@ -285,6 +287,13 @@
return (mask);
}
+static __inline pt_entry_t
+pmap_pku_mask_bit(pmap_t pmap)
+{
+
+ return (pmap->pm_type == PT_X86 ? X86_PG_PKU_MASK : 0);
+}
+
#if !defined(DIAGNOSTIC)
#ifdef __GNUC_GNU_INLINE__
#define PMAP_INLINE __attribute__((__gnu_inline__)) inline
@@ -428,6 +437,22 @@
static vm_pindex_t pti_pg_idx;
static bool pti_finalized;
+struct pmap_pkru_range {
+ struct rs_el pkru_rs_el;
+ u_int pkru_keyidx;
+ int pkru_flags;
+};
+
+static uma_zone_t pmap_pkru_ranges_zone;
+static bool pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
+static pt_entry_t pmap_pkru_get(pmap_t pmap, vm_offset_t va);
+static void pmap_pkru_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
+static void *pkru_dup_range(void *ctx, void *data);
+static void pkru_free_range(void *ctx, void *node);
+static int pmap_pkru_copy(pmap_t dst_pmap, pmap_t src_pmap);
+static int pmap_pkru_deassign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
+static void pmap_pkru_deassign_all(pmap_t pmap);
+
static int
pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS)
{
@@ -2866,6 +2891,12 @@
pmap->pm_pcids[i].pm_gen = 1;
}
pmap_activate_boot(pmap);
+
+ if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) {
+ pmap_pkru_ranges_zone = uma_zcreate("pkru ranges",
+ sizeof(struct pmap_pkru_range), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ }
}
void
@@ -2954,6 +2985,10 @@
pmap_pinit_pml4_pti(pml4pgu);
pmap->pm_ucr3 = VM_PAGE_TO_PHYS(pml4pgu);
}
+ if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) {
+ rangeset_init(&pmap->pm_pkru, pkru_dup_range,
+ pkru_free_range, pmap, M_NOWAIT);
+ }
}
pmap->pm_root.rt_root = 0;
@@ -3250,6 +3285,9 @@
vm_page_unwire_noq(m);
vm_page_free(m);
}
+ if (pmap->pm_type == PT_X86 &&
+ (cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0)
+ rangeset_fini(&pmap->pm_pkru);
}
static int
@@ -4080,7 +4118,7 @@
{
pd_entry_t newpde, oldpde;
pt_entry_t *firstpte, newpte;
- pt_entry_t PG_A, PG_G, PG_M, PG_RW, PG_V;
+ pt_entry_t PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V;
vm_paddr_t mptepa;
vm_page_t mpte;
struct spglist free;
@@ -4093,6 +4131,7 @@
PG_RW = pmap_rw_bit(pmap);
PG_V = pmap_valid_bit(pmap);
PG_PTE_CACHE = pmap_cache_mask(pmap, 0);
+ PG_PKU_MASK = pmap_pku_mask_bit(pmap);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
oldpde = *pde;
@@ -4525,6 +4564,7 @@
out:
if (anyvalid)
pmap_invalidate_all(pmap);
+ pmap_pkru_on_remove(pmap, sva, eva);
PMAP_UNLOCK(pmap);
pmap_delayed_invl_finished();
vm_page_free_pages_toq(&free, true);
@@ -4836,7 +4876,7 @@
{
pd_entry_t newpde;
pt_entry_t *firstpte, oldpte, pa, *pte;
- pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V;
+ pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V, PG_PKU_MASK;
vm_page_t mpte;
int PG_PTE_CACHE;
@@ -4845,6 +4885,7 @@
PG_M = pmap_modified_bit(pmap);
PG_V = pmap_valid_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
+ PG_PKU_MASK = pmap_pku_mask_bit(pmap);
PG_PTE_CACHE = pmap_cache_mask(pmap, 0);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
@@ -5032,6 +5073,8 @@
lock = NULL;
PMAP_LOCK(pmap);
+ if (va < VM_MAXUSER_ADDRESS)
+ newpte |= pmap_pkru_get(pmap, va);
if (psind == 1) {
/* Assert the required virtual and physical alignment. */
KASSERT((va & PDRMASK) == 0, ("pmap_enter: va unaligned"));
@@ -5066,6 +5109,10 @@
rv = KERN_RESOURCE_SHORTAGE;
goto out;
}
+ if (pmap->pm_type == PT_X86) {
+ newpte &= ~X86_PG_PKU_MASK;
+ newpte |= pmap_pkru_get(pmap, va);
+ }
goto retry;
} else
panic("pmap_enter: invalid page directory va=%#lx", va);
@@ -5291,6 +5338,21 @@
" in pmap %p", va, pmap);
return (KERN_RESOURCE_SHORTAGE);
}
+
+ /*
+ * If pkru is not same for the whole pde range, return failure
+ * and let vm_fault() cope. Check after pde allocation, since
+ * it could sleep.
+ */
+ if (!pmap_pkru_same(pmap, va, va + NBPDR)) {
+ SLIST_INIT(&free);
+ if (pmap_unwire_ptp(pmap, va, pdpg, &free)) {
+ pmap_invalidate_page(pmap, va);
+ vm_page_free_pages_toq(&free, true);
+ }
+ return (KERN_FAILURE);
+ }
+
pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg));
pde = &pde[pmap_pde_index(va)];
oldpde = *pde;
@@ -5550,7 +5612,7 @@
if ((prot & VM_PROT_EXECUTE) == 0)
newpte |= pg_nx;
if (va < VM_MAXUSER_ADDRESS)
- newpte |= PG_U;
+ newpte |= PG_U | pmap_pkru_get(pmap, va);
pte_store(pte, newpte);
return (mpte);
}
@@ -5926,6 +5988,33 @@
PMAP_UNLOCK(dst_pmap);
}
+int
+pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap)
+{
+ int error;
+
+ if (dst_pmap->pm_type != src_pmap->pm_type ||
+ dst_pmap->pm_type != PT_X86 ||
+ (cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0)
+ return (0);
+ for (;;) {
+ if (dst_pmap < src_pmap) {
+ PMAP_LOCK(dst_pmap);
+ PMAP_LOCK(src_pmap);
+ } else {
+ PMAP_LOCK(src_pmap);
+ PMAP_LOCK(dst_pmap);
+ }
+ error = pmap_pkru_copy(dst_pmap, src_pmap);
+ PMAP_UNLOCK(src_pmap);
+ PMAP_UNLOCK(dst_pmap);
+ if (error != ENOMEM)
+ break;
+ vm_wait(NULL);
+ }
+ return (error);
+}
+
/*
* Zero the specified hardware page.
*/
@@ -6325,6 +6414,7 @@
if (lock != NULL)
rw_wunlock(lock);
pmap_invalidate_all(pmap);
+ pmap_pkru_deassign_all(pmap);
PMAP_UNLOCK(pmap);
vm_page_free_pages_toq(&free, true);
}
@@ -8961,6 +9051,285 @@
VM_OBJECT_WUNLOCK(pti_obj);
}
+static void *
+pkru_dup_range(void *ctx __unused, void *data)
+{
+ struct pmap_pkru_range *node, *new_node;
+
+ new_node = uma_zalloc(pmap_pkru_ranges_zone, M_NOWAIT);
+ if (new_node == NULL)
+ return (NULL);
+ node = data;
+ memcpy(new_node, node, sizeof(*node));
+ return (new_node);
+}
+
+static void
+pkru_free_range(void *ctx __unused, void *node)
+{
+
+ uma_zfree(pmap_pkru_ranges_zone, node);
+}
+
+static int
+pmap_pkru_assign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, u_int keyidx,
+ int flags)
+{
+ struct pmap_pkru_range *ppr;
+ int error;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ MPASS(pmap->pm_type == PT_X86);
+ MPASS((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0);
+ if ((flags & AMD64_PKRU_EXCL) != 0 &&
+ !rangeset_check_empty(&pmap->pm_pkru, sva, eva))
+ return (EBUSY);
+ ppr = uma_zalloc(pmap_pkru_ranges_zone, M_NOWAIT);
+ if (ppr == NULL)
+ return (ENOMEM);
+ ppr->pkru_keyidx = keyidx;
+ ppr->pkru_flags = flags & AMD64_PKRU_PERSIST;
+ error = rangeset_insert(&pmap->pm_pkru, sva, eva, ppr);
+ if (error != 0)
+ uma_zfree(pmap_pkru_ranges_zone, ppr);
+ return (error);
+}
+
+static int
+pmap_pkru_deassign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ MPASS(pmap->pm_type == PT_X86);
+ MPASS((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0);
+ return (rangeset_remove(&pmap->pm_pkru, sva, eva));
+}
+
+static void
+pmap_pkru_deassign_all(pmap_t pmap)
+{
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ if (pmap->pm_type == PT_X86 &&
+ (cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0)
+ rangeset_remove_all(&pmap->pm_pkru);
+}
+
+static bool
+pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+ struct pmap_pkru_range *ppr, *prev_ppr;
+ vm_offset_t va;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ if (pmap->pm_type != PT_X86 ||
+ (cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0 ||
+ sva >= VM_MAXUSER_ADDRESS)
+ return (true);
+ MPASS(eva <= VM_MAXUSER_ADDRESS);
+ for (va = sva, prev_ppr = NULL; va < eva;) {
+ ppr = rangeset_get(&pmap->pm_pkru, va);
+ if ((ppr == NULL) ^ (prev_ppr == NULL))
+ return (false);
+ if (ppr == NULL) {
+ va += PAGE_SIZE;
+ continue;
+ }
+ if (prev_ppr->pkru_keyidx != ppr->pkru_keyidx)
+ return (false);
+ va = ppr->pkru_rs_el.re_end;
+ }
+ return (true);
+}
+
+static pt_entry_t
+pmap_pkru_get(pmap_t pmap, vm_offset_t va)
+{
+ struct pmap_pkru_range *ppr;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ if (pmap->pm_type != PT_X86 ||
+ (cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0 ||
+ va >= VM_MAXUSER_ADDRESS)
+ return (0);
+ ppr = rangeset_get(&pmap->pm_pkru, va);
+ if (ppr != NULL)
+ return (X86_PG_PKU(ppr->pkru_keyidx));
+ return (0);
+}
+
+static bool
+pred_pkru_on_remove(void *ctx __unused, void *r)
+{
+ struct pmap_pkru_range *ppr;
+
+ ppr = r;
+ return ((ppr->pkru_flags & AMD64_PKRU_PERSIST) == 0);
+}
+
+static void
+pmap_pkru_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ if (pmap->pm_type == PT_X86 &&
+ (cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) {
+ rangeset_remove_pred(&pmap->pm_pkru, sva, eva,
+ pred_pkru_on_remove);
+ }
+}
+
+static int
+pmap_pkru_copy(pmap_t dst_pmap, pmap_t src_pmap)
+{
+
+ PMAP_LOCK_ASSERT(dst_pmap, MA_OWNED);
+ PMAP_LOCK_ASSERT(src_pmap, MA_OWNED);
+ MPASS(dst_pmap->pm_type == PT_X86);
+ MPASS(src_pmap->pm_type == PT_X86);
+ MPASS((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0);
+ if (src_pmap->pm_pkru.rs_data_ctx == NULL)
+ return (0);
+ return (rangeset_copy(&dst_pmap->pm_pkru, &src_pmap->pm_pkru));
+}
+
+static void
+pmap_pkru_update_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
+ u_int keyidx)
+{
+ pml4_entry_t *pml4e;
+ pdp_entry_t *pdpe;
+ pd_entry_t newpde, ptpaddr, *pde;
+ pt_entry_t newpte, *ptep, pte;
+ vm_offset_t msva, va_next;
+ bool changed;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ MPASS(pmap->pm_type == PT_X86);
+ MPASS(keyidx <= PMAP_MAX_PKRU_IDX);
+
+ for (changed = false, msva = sva; msva < eva; msva = va_next) {
+ pml4e = pmap_pml4e(pmap, msva);
+ if ((*pml4e & X86_PG_V) == 0) {
+ va_next = (msva + NBPML4) & ~PML4MASK;
+ if (va_next < msva)
+ va_next = eva;
+ continue;
+ }
+
+ pdpe = pmap_pml4e_to_pdpe(pml4e, msva);
+ if ((*pdpe & X86_PG_V) == 0) {
+ va_next = (msva + NBPDP) & ~PDPMASK;
+ if (va_next < msva)
+ va_next = eva;
+ continue;
+ }
+
+ va_next = (msva + NBPDR) & ~PDRMASK;
+ if (va_next < msva)
+ va_next = eva;
+
+ pde = pmap_pdpe_to_pde(pdpe, msva);
+ ptpaddr = *pde;
+ if (ptpaddr == 0)
+ continue;
+
+ MPASS((ptpaddr & X86_PG_V) != 0);
+ if ((ptpaddr & PG_PS) != 0) {
+ if (msva + NBPDR == va_next && eva >= va_next) {
+ newpde = (ptpaddr & ~X86_PG_PKU_MASK) |
+ X86_PG_PKU(keyidx);
+ if (newpde != ptpaddr) {
+ *pde = newpde;
+ changed = true;
+ }
+ continue;
+ } else if (!pmap_demote_pde(pmap, pde, msva)) {
+ continue;
+ }
+ }
+
+ if (va_next > eva)
+ va_next = eva;
+
+ for (ptep = pmap_pde_to_pte(pde, msva); msva != va_next;
+ ptep++, msva += PAGE_SIZE) {
+ pte = *ptep;
+ if ((pte & X86_PG_V) == 0)
+ continue;
+ newpte = (pte & ~X86_PG_PKU_MASK) | X86_PG_PKU(keyidx);
+ if (newpte != pte) {
+ *ptep = newpte;
+ changed = true;
+ }
+ }
+ }
+ if (changed)
+ pmap_invalidate_range(pmap, sva, eva);
+}
+
+static int
+pmap_pkru_check_uargs(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
+ u_int keyidx, int flags)
+{
+
+ if (pmap->pm_type != PT_X86 || keyidx > PMAP_MAX_PKRU_IDX ||
+ (flags & ~(AMD64_PKRU_PERSIST | AMD64_PKRU_EXCL)) != 0)
+ return (EINVAL);
+ if (eva <= sva || eva > VM_MAXUSER_ADDRESS)
+ return (EFAULT);
+ if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0)
+ return (ENOTSUP);
+ return (0);
+}
+
+int
+pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, u_int keyidx,
+ int flags)
+{
+ int error;
+
+ sva = trunc_page(sva);
+ eva = round_page(eva);
+ error = pmap_pkru_check_uargs(pmap, sva, eva, keyidx, flags);
+ if (error != 0)
+ return (error);
+ for (;;) {
+ PMAP_LOCK(pmap);
+ error = pmap_pkru_assign(pmap, sva, eva, keyidx, flags);
+ if (error == 0)
+ pmap_pkru_update_range(pmap, sva, eva, keyidx);
+ PMAP_UNLOCK(pmap);
+ if (error != ENOMEM)
+ break;
+ vm_wait(NULL);
+ }
+ return (error);
+}
+
+int
+pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+ int error;
+
+ sva = trunc_page(sva);
+ eva = round_page(eva);
+ error = pmap_pkru_check_uargs(pmap, sva, eva, 0, 0);
+ if (error != 0)
+ return (error);
+ for (;;) {
+ PMAP_LOCK(pmap);
+ error = pmap_pkru_deassign(pmap, sva, eva);
+ if (error == 0)
+ pmap_pkru_update_range(pmap, sva, eva, 0);
+ PMAP_UNLOCK(pmap);
+ if (error != ENOMEM)
+ break;
+ vm_wait(NULL);
+ }
+ return (error);
+}
+
#include "opt_ddb.h"
#ifdef DDB
#include <sys/kdb.h>
Index: sys/amd64/amd64/sys_machdep.c
===================================================================
--- sys/amd64/amd64/sys_machdep.c
+++ sys/amd64/amd64/sys_machdep.c
@@ -44,6 +44,7 @@
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
+#include <sys/pcpu.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/smp.h>
@@ -53,6 +54,7 @@
#include <vm/vm.h>
#include <vm/pmap.h>
#include <vm/vm_kern.h> /* for kernel_map */
+#include <vm/vm_map.h>
#include <vm/vm_extern.h>
#include <machine/frame.h>
@@ -170,13 +172,16 @@
int
sysarch(struct thread *td, struct sysarch_args *uap)
{
- int error = 0;
- struct pcb *pcb = curthread->td_pcb;
+ struct pcb *pcb;
+ struct vm_map *map;
uint32_t i386base;
uint64_t a64base;
struct i386_ioperm_args iargs;
struct i386_get_xfpustate i386xfpu;
+ struct i386_set_pkru i386pkru;
struct amd64_get_xfpustate a64xfpu;
+ struct amd64_set_pkru a64pkru;
+ int error;
#ifdef CAPABILITY_MODE
/*
@@ -194,11 +199,15 @@
case I386_GET_GSBASE:
case I386_SET_GSBASE:
case I386_GET_XFPUSTATE:
+ case I386_SET_PKRU:
+ case I386_CLEAR_PKRU:
case AMD64_GET_FSBASE:
case AMD64_SET_FSBASE:
case AMD64_GET_GSBASE:
case AMD64_SET_GSBASE:
case AMD64_GET_XFPUSTATE:
+ case AMD64_SET_PKRU:
+ case AMD64_CLEAR_PKRU:
break;
case I386_SET_IOPERM:
@@ -214,6 +223,10 @@
if (uap->op == I386_GET_LDT || uap->op == I386_SET_LDT)
return (sysarch_ldt(td, uap, UIO_USERSPACE));
+
+ error = 0;
+ pcb = td->td_pcb;
+
/*
* XXXKIB check that the BSM generation code knows to encode
* the op argument.
@@ -233,11 +246,27 @@
a64xfpu.addr = (void *)(uintptr_t)i386xfpu.addr;
a64xfpu.len = i386xfpu.len;
break;
+ case I386_SET_PKRU:
+ case I386_CLEAR_PKRU:
+ if ((error = copyin(uap->parms, &i386pkru,
+ sizeof(struct i386_set_pkru))) != 0)
+ return (error);
+ a64pkru.addr = (void *)(uintptr_t)i386pkru.addr;
+ a64pkru.len = i386pkru.len;
+ a64pkru.keyidx = i386pkru.keyidx;
+ a64pkru.flags = i386pkru.flags;
+ break;
case AMD64_GET_XFPUSTATE:
if ((error = copyin(uap->parms, &a64xfpu,
sizeof(struct amd64_get_xfpustate))) != 0)
return (error);
break;
+ case AMD64_SET_PKRU:
+ case AMD64_CLEAR_PKRU:
+ if ((error = copyin(uap->parms, &a64pkru,
+ sizeof(struct amd64_set_pkru))) != 0)
+ return (error);
+ break;
default:
break;
}
@@ -326,6 +355,30 @@
a64xfpu.addr, a64xfpu.len);
break;
+ case I386_SET_PKRU:
+ case AMD64_SET_PKRU:
+ map = &td->td_proc->p_vmspace->vm_map;
+ vm_map_lock_read(map);
+ error = pmap_pkru_set(PCPU_GET(curpmap),
+ (vm_offset_t)a64pkru.addr, (vm_offset_t)a64pkru.addr +
+ a64pkru.len, a64pkru.keyidx, a64pkru.flags);
+ vm_map_unlock_read(map);
+ break;
+
+ case I386_CLEAR_PKRU:
+ case AMD64_CLEAR_PKRU:
+ if (a64pkru.flags != 0 || a64pkru.keyidx != 0) {
+ error = EINVAL;
+ break;
+ }
+ map = &td->td_proc->p_vmspace->vm_map;
+ vm_map_lock_read(map);
+ error = pmap_pkru_clear(PCPU_GET(curpmap),
+ (vm_offset_t)a64pkru.addr,
+ (vm_offset_t)a64pkru.addr + a64pkru.len);
+ vm_map_unlock(map);
+ break;
+
default:
error = EINVAL;
break;
Index: sys/amd64/amd64/trap.c
===================================================================
--- sys/amd64/amd64/trap.c
+++ sys/amd64/amd64/trap.c
@@ -807,6 +807,20 @@
return (-1);
}
+ /*
+ * User-mode protection key violation (PKU). May happen
+ * either from usermode or from kernel if copyin accessed
+ * key-protected mapping.
+ */
+ if ((frame->tf_err & PGEX_PK) != 0) {
+ if (eva > VM_MAXUSER_ADDRESS) {
+ trap_fatal(frame, eva);
+ return (-1);
+ }
+ rv = KERN_PROTECTION_FAILURE;
+ goto after_vmfault;
+ }
+
/*
* If nx protection of the usermode portion of kernel page
* tables caused trap, panic.
@@ -842,6 +856,7 @@
#endif
return (0);
}
+after_vmfault:
if (!usermode) {
if (td->td_intr_nesting_level == 0 &&
curpcb->pcb_onfault != NULL) {
@@ -885,10 +900,12 @@
#endif
if (type == T_PAGEFLT) {
printf("fault virtual address = 0x%lx\n", eva);
- printf("fault code = %s %s %s, %s\n",
+ printf("fault code = %s %s %s%s%s, %s\n",
code & PGEX_U ? "user" : "supervisor",
code & PGEX_W ? "write" : "read",
code & PGEX_I ? "instruction" : "data",
+ code & PGEX_PK ? " prot key" : " ",
+ code & PGEX_SGX ? " SGX" : " ",
code & PGEX_RSV ? "reserved bits in PTE" :
code & PGEX_P ? "protection violation" : "page not present");
}
Index: sys/amd64/include/cpufunc.h
===================================================================
--- sys/amd64/include/cpufunc.h
+++ sys/amd64/include/cpufunc.h
@@ -627,6 +627,22 @@
__asm __volatile("mwait" : : "a" (hints), "c" (extensions));
}
+static __inline uint32_t
+rdpkru(void)
+{
+ uint32_t res;
+
+ __asm __volatile("rdpkru" : "=a" (res) : "c" (0) : "edx");
+ return (res);
+}
+
+static __inline void
+wrpkru(uint32_t mask)
+{
+
+ __asm __volatile("wrpkru" : : "a" (mask), "c" (0), "d" (0));
+}
+
#ifdef _KERNEL
/* This is defined in <machine/specialreg.h> but is too painful to get to */
#ifndef MSR_FSBASE
Index: sys/amd64/include/pmap.h
===================================================================
--- sys/amd64/include/pmap.h
+++ sys/amd64/include/pmap.h
@@ -66,6 +66,7 @@
#define X86_PG_AVAIL2 0x400 /* < programmers use */
#define X86_PG_AVAIL3 0x800 /* \ */
#define X86_PG_PDE_PAT 0x1000 /* PAT PAT index */
+#define X86_PG_PKU(idx) ((pt_entry_t)idx << 59)
#define X86_PG_NX (1ul<<63) /* No-execute */
#define X86_PG_AVAIL(x) (1ul << (x))
@@ -73,6 +74,10 @@
#define X86_PG_PDE_CACHE (X86_PG_PDE_PAT | X86_PG_NC_PWT | X86_PG_NC_PCD)
#define X86_PG_PTE_CACHE (X86_PG_PTE_PAT | X86_PG_NC_PWT | X86_PG_NC_PCD)
+/* Protection keys indexes */
+#define PMAP_MAX_PKRU_IDX 0xf
+#define X86_PG_PKU_MASK X86_PG_PKU(PMAP_MAX_PKRU_IDX)
+
/*
* Intel extended page table (EPT) bit definitions.
*/
@@ -120,7 +125,7 @@
* (PTE) page mappings have identical settings for the following fields:
*/
#define PG_PTE_PROMOTE (PG_NX | PG_MANAGED | PG_W | PG_G | PG_PTE_CACHE | \
- PG_M | PG_A | PG_U | PG_RW | PG_V)
+ PG_M | PG_A | PG_U | PG_RW | PG_V | PG_PKU_MASK)
/*
* Page Protection Exception bits
@@ -131,6 +136,8 @@
#define PGEX_U 0x04 /* access from User mode (UPL) */
#define PGEX_RSV 0x08 /* reserved PTE field is non-zero */
#define PGEX_I 0x10 /* during an instruction fetch */
+#define PGEX_PK 0x20 /* protection key violation */
+#define PGEX_SGX 0x40 /* SGX-related */
/*
* undef the PG_xx macros that define bits in the regular x86 PTEs that
@@ -240,6 +247,8 @@
#include <sys/_cpuset.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
+#include <sys/_pctrie.h>
+#include <sys/_rangeset.h>
#include <vm/_vm_radix.h>
@@ -334,6 +343,7 @@
long pm_eptgen; /* EPT pmap generation id */
int pm_flags;
struct pmap_pcids pm_pcids[MAXCPU];
+ struct rangeset pm_pkru;
};
/* flags */
@@ -452,6 +462,9 @@
void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va);
void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva,
vm_offset_t eva);
+int pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
+int pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
+ u_int keyidx, int flags);
#endif /* _KERNEL */
/* Return various clipped indexes for a given VA */
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -3909,6 +3909,7 @@
kern/subr_power.c standard
kern/subr_prf.c standard
kern/subr_prof.c standard
+kern/subr_rangeset.c standard
kern/subr_rman.c standard
kern/subr_rtc.c standard
kern/subr_sbuf.c standard
Index: sys/i386/include/cpufunc.h
===================================================================
--- sys/i386/include/cpufunc.h
+++ sys/i386/include/cpufunc.h
@@ -700,6 +700,22 @@
write_eflags(eflags);
}
+static __inline uint32_t
+rdpkru(void)
+{
+ uint32_t res;
+
+ __asm __volatile("rdpkru" : "=a" (res) : "c" (0) : "edx");
+ return (res);
+}
+
+static __inline void
+wrpkru(uint32_t mask)
+{
+
+ __asm __volatile("wrpkru" : : "a" (mask), "c" (0), "d" (0));
+}
+
#else /* !(__GNUCLIKE_ASM && __CC_SUPPORTS___INLINE) */
int breakpoint(void);
Index: sys/i386/include/pmap.h
===================================================================
--- sys/i386/include/pmap.h
+++ sys/i386/include/pmap.h
@@ -372,6 +372,13 @@
#define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0)
#define pmap_unmapbios(va, sz) pmap_unmapdev((va), (sz))
+static inline int
+pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
+{
+
+ return (0);
+}
+
/*
* Only the following functions or macros may be used before pmap_bootstrap()
* is called: pmap_kenter(), pmap_kextract(), pmap_kremove(), vtophys(), and
Index: sys/kern/subr_rangeset.c
===================================================================
--- /dev/null
+++ sys/kern/subr_rangeset.c
@@ -0,0 +1,370 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/pctrie.h>
+#include <sys/rangeset.h>
+#include <vm/uma.h>
+
+#ifdef DIAGNOSTIC
+static void rangeset_check(struct rangeset *rs);
+#else
+#define rangeset_check(rs)
+#endif
+
+static uma_zone_t rs_node_zone;
+static void
+rs_rangeset_init(void *arg __unused)
+{
+
+ rs_node_zone = uma_zcreate("rangeset pctrie nodes",
+ pctrie_node_size(), NULL, NULL, pctrie_zone_init, NULL,
+ UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+}
+SYSINIT(rs, SI_SUB_LOCK, SI_ORDER_ANY, rs_rangeset_init, NULL);
+
+static void *
+rs_node_alloc(struct pctrie *ptree)
+{
+ struct rangeset *rs;
+
+ rs = __containerof(ptree, struct rangeset, rs_trie);
+ return (uma_zalloc(rs_node_zone, rs->rs_alloc_flags));
+}
+
+static void
+rs_node_free(struct pctrie *ptree __unused, void *node)
+{
+
+ uma_zfree(rs_node_zone, node);
+}
+
+void
+rangeset_init(struct rangeset *rs, rs_dup_data_t dup_data,
+ rs_free_data_t free_data, void *data_ctx, u_int alloc_flags)
+{
+
+ pctrie_init(&rs->rs_trie);
+ rs->rs_dup_data = dup_data;
+ rs->rs_free_data = free_data;
+ rs->rs_data_ctx = data_ctx;
+ rs->rs_alloc_flags = alloc_flags;
+}
+
+void
+rangeset_fini(struct rangeset *rs)
+{
+
+ rangeset_check(rs);
+ rangeset_remove_all(rs);
+}
+
+bool
+rangeset_check_empty(struct rangeset *rs, uint64_t start, uint64_t end)
+{
+ struct rs_el *r;
+ uint64_t *r1;
+
+ rangeset_check(rs);
+ r1 = pctrie_lookup_ge(&rs->rs_trie, start);
+ if (r1 != NULL) {
+ r = __containerof(r1, struct rs_el, re_start);
+ if (r->re_start < end)
+ return (false);
+ }
+ r1 = pctrie_lookup_le(&rs->rs_trie, end);
+ if (r1 != NULL) {
+ r = __containerof(r1, struct rs_el, re_start);
+ if (r->re_end > start)
+ return (false);
+ }
+ return (true);
+}
+
+int
+rangeset_insert(struct rangeset *rs, uint64_t start, uint64_t end,
+ void *data)
+{
+ struct rs_el *r;
+ int error;
+
+ rangeset_check(rs);
+ error = rangeset_remove(rs, start, end);
+ if (error != 0)
+ return (error);
+ r = data;
+ r->re_start = start;
+ r->re_end = end;
+ error = pctrie_insert(&rs->rs_trie, &r->re_start, rs_node_alloc);
+ rangeset_check(rs);
+ return (error);
+}
+
+int
+rangeset_remove_pred(struct rangeset *rs, uint64_t start, uint64_t end,
+ rs_pred_t pred)
+{
+ struct rs_el *r, *rn;
+ uint64_t *r1;
+ int error;
+
+ rangeset_check(rs);
+ error = 0;
+ for (; end > 0 && start < end;) {
+ r1 = pctrie_lookup_le(&rs->rs_trie, end - 1);
+ if (r1 == NULL)
+ break;
+ r = __containerof(r1, struct rs_el, re_start);
+
+ /*
+ * ------============================--|-------|----
+ * rs re s e
+ */
+ if (r->re_end <= start)
+ break;
+
+ if (r->re_end <= end) {
+ if (r->re_start < start) {
+ /*
+ * ------========|==============-------|----
+ * rs s re e
+ */
+ if (pred(rs->rs_data_ctx, r))
+ r->re_end = start;
+ break;
+ }
+
+ /*
+ * ------|--------===================----------|----
+ * s rs re e
+ */
+ end = r->re_start;
+ if (pred(rs->rs_data_ctx, r)) {
+ pctrie_remove(&rs->rs_trie, r->re_start,
+ rs_node_free);
+ rs->rs_free_data(rs->rs_data_ctx, r);
+ }
+ continue;
+ }
+
+ /*
+ * ------|--------====================|==========----
+ * s rs e re
+ */
+ if (r->re_start >= start) {
+ if (pred(rs->rs_data_ctx, r)) {
+ pctrie_remove(&rs->rs_trie, r->re_start,
+ rs_node_free);
+ r->re_start = end;
+ error = pctrie_insert(&rs->rs_trie,
+ &r->re_start, rs_node_alloc);
+ /*
+ * The insert above must succeed
+ * because rs_node zone is marked
+ * nofree and we freed one element
+ * just before.
+ */
+ MPASS(error == 0);
+ } else {
+ end = r->re_start;
+ }
+ continue;
+ }
+
+ /*
+ * ------=========|===================|==========----
+ * rs s e re
+ */
+ if (pred(rs->rs_data_ctx, r)) {
+ /*
+ * Split. Can only happen once, and then if
+ * any allocation fails, the rangeset is kept
+ * intact.
+ */
+ rn = rs->rs_dup_data(rs->rs_data_ctx, r);
+ if (rn == NULL) {
+ error = ENOMEM;
+ break;
+ }
+ rn->re_start = end;
+ rn->re_end = r->re_end;
+ error = pctrie_insert(&rs->rs_trie, &rn->re_start,
+ rs_node_alloc);
+ if (error != 0) {
+ rs->rs_free_data(rs->rs_data_ctx, rn);
+ break;
+ }
+ r->re_end = start;
+ }
+ break;
+ }
+ rangeset_check(rs);
+ return (error);
+}
+
+static bool
+rangeset_true_pred(void *ctx __unused, void *r __unused)
+{
+
+ return (true);
+}
+
+int
+rangeset_remove(struct rangeset *rs, uint64_t start, uint64_t end)
+{
+
+ return (rangeset_remove_pred(rs, start, end, rangeset_true_pred));
+}
+
+void
+rangeset_remove_all(struct rangeset *rs)
+{
+ struct rs_el *r;
+ uint64_t *r1;
+
+ for (;;) {
+ r1 = pctrie_lookup_ge(&rs->rs_trie, 0);
+ if (r1 == NULL)
+ break;
+ r = __containerof(r1, struct rs_el, re_start);
+ pctrie_remove(&rs->rs_trie, r->re_start, rs_node_free);
+ rs->rs_free_data(rs->rs_data_ctx, r);
+ }
+}
+
+void *
+rangeset_get(struct rangeset *rs, uint64_t place)
+{
+ struct rs_el *r;
+ uint64_t *r1;
+
+ rangeset_check(rs);
+ r1 = pctrie_lookup_le(&rs->rs_trie, place);
+ if (r1 == NULL)
+ return (NULL);
+ r = __containerof(r1, struct rs_el, re_start);
+ if (r->re_end <= place)
+ return (NULL);
+ return (r);
+}
+
+int
+rangeset_copy(struct rangeset *dst_rs, struct rangeset *src_rs)
+{
+ struct rs_el *src_r, *dst_r;
+ uint64_t cursor, *r1;
+ int error;
+
+ MPASS(pctrie_is_empty(&dst_rs->rs_trie));
+ rangeset_check(src_rs);
+ MPASS(dst_rs->rs_dup_data == src_rs->rs_dup_data);
+
+ error = 0;
+ for (cursor = 0;; cursor = src_r->re_start + 1) {
+ r1 = pctrie_lookup_ge(&src_rs->rs_trie, cursor);
+ if (r1 == NULL)
+ break;
+ src_r = __containerof(r1, struct rs_el, re_start);
+ dst_r = dst_rs->rs_dup_data(dst_rs->rs_data_ctx, src_r);
+ if (dst_r == NULL) {
+ error = ENOMEM;
+ break;
+ }
+ error = pctrie_insert(&dst_rs->rs_trie, &dst_r->re_start,
+ rs_node_alloc);
+ if (error != 0)
+ break;
+ }
+ if (error != 0)
+ rangeset_remove_all(dst_rs);
+ return (error);
+}
+
+#ifdef DIAGNOSTIC
+static void
+rangeset_check(struct rangeset *rs)
+{
+ struct rs_el *r, *rp;
+ uint64_t cursor, *r1;
+
+ for (cursor = 0, rp = NULL;; cursor = r->re_start + 1, rp = r) {
+ r1 = pctrie_lookup_ge(&rs->rs_trie, cursor);
+ if (r1 == NULL)
+ break;
+ r = __containerof(r1, struct rs_el, re_start);
+ KASSERT(r->re_start < r->re_end,
+ ("invalid interval rs %p elem %p (%#jx, %#jx)",
+ rs, r, (uintmax_t)r->re_start, (uintmax_t)r->re_end));
+ if (rp != NULL) {
+ KASSERT(rp->re_end <= r->re_start,
+ ("non-ascending neighbors rs %p "
+ "prev elem %p (%#jx, %#jx) elem %p (%#jx, %#jx)",
+ rs, rp, (uintmax_t)rp->re_start,
+ (uintmax_t)rp->re_end, r, (uintmax_t)r->re_start,
+ (uintmax_t)r->re_end));
+ }
+ }
+}
+#endif
+
+#include "opt_ddb.h"
+#ifdef DDB
+#include <sys/kernel.h>
+#include <ddb/ddb.h>
+
+DB_SHOW_COMMAND(rangeset, rangeset_show_fn)
+{
+ struct rangeset *rs;
+ struct rs_el *r;
+ uint64_t cursor, *r1;
+
+ if (!have_addr) {
+ db_printf("show rangeset addr\n");
+ return;
+ }
+
+ rs = (struct rangeset *)addr;
+ db_printf("rangeset %p\n", rs);
+ for (cursor = 0;; cursor = r->re_start + 1) {
+ r1 = pctrie_lookup_ge(&rs->rs_trie, cursor);
+ if (r1 == NULL)
+ break;
+ r = __containerof(r1, struct rs_el, re_start);
+ db_printf(" el %p start %#jx end %#jx\n",
+ r, r->re_start, r->re_end);
+ }
+}
+#endif
Index: sys/sys/_rangeset.h
===================================================================
--- /dev/null
+++ sys/sys/_rangeset.h
@@ -0,0 +1,51 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS__RANGESET_H
+#define _SYS__RANGESET_H
+
+#include <sys/_pctrie.h>
+
+typedef void *(*rs_dup_data_t)(void *ctx, void *data);
+typedef void (*rs_free_data_t)(void *ctx, void *data);
+
+struct rangeset {
+ struct pctrie rs_trie;
+ rs_dup_data_t rs_dup_data;
+ rs_free_data_t rs_free_data;
+ void *rs_data_ctx;
+ u_int rs_alloc_flags;
+};
+
+#endif
+
Index: sys/sys/rangeset.h
===================================================================
--- /dev/null
+++ sys/sys/rangeset.h
@@ -0,0 +1,88 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2019 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_RANGESET_H
+#define _SYS_RANGESET_H
+
+#ifdef _KERNEL
+
+#include <sys/_rangeset.h>
+
+typedef bool (*rs_pred_t)(void *ctx, void *r);
+
+/*
+ * This structure must be embedded at the start of the rangeset element.
+ */
+struct rs_el {
+ uint64_t re_start; /* pctrie key */
+ uint64_t re_end;
+};
+
+void rangeset_init(struct rangeset *rs, rs_dup_data_t dup_data,
+ rs_free_data_t free_data, void *rs_data_ctx, u_int alloc_flags);
+void rangeset_fini(struct rangeset *rs);
+
+bool rangeset_check_empty(struct rangeset *rs, uint64_t start,
+ uint64_t end);
+
+/*
+ * r point to the app data with struct rs_el at the beginning.
+ */
+int rangeset_insert(struct rangeset *rs, uint64_t start, uint64_t end,
+ void *r);
+
+/*
+ * Guarantees that on error the rangeset is not modified. Remove
+ * might need to split element if its start/end completely cover the
+ * removed range, in which case ENOMEM might be returned.
+ */
+void rangeset_remove_all(struct rangeset *rs);
+int rangeset_remove(struct rangeset *rs, uint64_t start, uint64_t end);
+int rangeset_remove_pred(struct rangeset *rs, uint64_t start,
+ uint64_t end, rs_pred_t pred);
+
+/*
+ * Really returns the pointer to the data with struct rs_el embedded
+ * at the beginning.
+ */
+void *rangeset_get(struct rangeset *rs, uint64_t place);
+
+/*
+ * Copies src_rs entries into dst_rs. dst_rs must be empty.
+ * Leaves dst_rs empty on failure.
+ */
+int rangeset_copy(struct rangeset *dst_rs, struct rangeset *src_rs);
+
+#endif
+
+#endif
Index: sys/vm/pmap.h
===================================================================
--- sys/vm/pmap.h
+++ sys/vm/pmap.h
@@ -168,6 +168,7 @@
void pmap_unwire(pmap_t pmap, vm_offset_t start, vm_offset_t end);
void pmap_zero_page(vm_page_t);
void pmap_zero_page_area(vm_page_t, int off, int size);
+int pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap);
#define pmap_resident_count(pm) ((pm)->pm_stats.resident_count)
#define pmap_wired_count(pm) ((pm)->pm_stats.wired_count)
Index: sys/vm/vm_fault.c
===================================================================
--- sys/vm/vm_fault.c
+++ sys/vm/vm_fault.c
@@ -479,8 +479,20 @@
fault_flags, true);
}
VM_OBJECT_WUNLOCK(fs->first_object);
- pmap_enter(fs->map->pmap, vaddr, m, prot, fault_type | (wired ?
- PMAP_ENTER_WIRED : 0), psind);
+ rv = pmap_enter(fs->map->pmap, vaddr, m, prot, fault_type |
+ (wired ? PMAP_ENTER_WIRED : 0), psind);
+#if defined(__amd64__)
+ if (psind > 0 && rv == KERN_FAILURE) {
+ for (i = 0; i < npages; i++) {
+ rv = pmap_enter(fs->map->pmap, vaddr + ptoa(i),
+ &m[i], prot, fault_type |
+ (wired ? PMAP_ENTER_WIRED : 0), 0);
+ MPASS(rv == KERN_SUCCESS);
+ }
+ }
+#else
+ MPASS(rv == KERN_SUCCESS);
+#endif
VM_OBJECT_WLOCK(fs->first_object);
m_mtx = NULL;
for (i = 0; i < npages; i++) {
Index: sys/vm/vm_map.c
===================================================================
--- sys/vm/vm_map.c
+++ sys/vm/vm_map.c
@@ -3424,7 +3424,7 @@
vm_map_t new_map, old_map;
vm_map_entry_t new_entry, old_entry;
vm_object_t object;
- int locked;
+ int error, locked;
vm_inherit_t inh;
old_map = &vm1->vm_map;
@@ -3433,6 +3433,7 @@
pmap_pinit);
if (vm2 == NULL)
return (NULL);
+
vm2->vm_taddr = vm1->vm_taddr;
vm2->vm_daddr = vm1->vm_daddr;
vm2->vm_maxsaddr = vm1->vm_maxsaddr;
@@ -3443,6 +3444,14 @@
locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */
KASSERT(locked, ("vmspace_fork: lock failed"));
+ error = pmap_vmspace_copy(new_map->pmap, old_map->pmap);
+ if (error != 0) {
+ sx_xunlock(&old_map->lock);
+ sx_xunlock(&new_map->lock);
+ vmspace_free(vm2);
+ return (NULL);
+ }
+
old_entry = old_map->header.next;
while (old_entry != &old_map->header) {
Index: sys/x86/include/specialreg.h
===================================================================
--- sys/x86/include/specialreg.h
+++ sys/x86/include/specialreg.h
@@ -77,6 +77,7 @@
#define CR4_XSAVE 0x00040000 /* XSETBV/XGETBV */
#define CR4_SMEP 0x00100000 /* Supervisor-Mode Execution Prevention */
#define CR4_SMAP 0x00200000 /* Supervisor-Mode Access Prevention */
+#define CR4_PKE 0x00400000 /* Protection Keys Enable */
/*
* Bits in AMD64 special registers. EFER is 64 bits wide.
Index: sys/x86/include/sysarch.h
===================================================================
--- sys/x86/include/sysarch.h
+++ sys/x86/include/sysarch.h
@@ -52,6 +52,8 @@
#define I386_GET_GSBASE 9
#define I386_SET_GSBASE 10
#define I386_GET_XFPUSTATE 11
+#define I386_SET_PKRU 12
+#define I386_CLEAR_PKRU 13
/* Leave space for 0-127 for to avoid translating syscalls */
#define AMD64_GET_FSBASE 128
@@ -59,6 +61,12 @@
#define AMD64_GET_GSBASE 130
#define AMD64_SET_GSBASE 131
#define AMD64_GET_XFPUSTATE 132
+#define AMD64_SET_PKRU 133
+#define AMD64_CLEAR_PKRU 134
+
+/* Flags for AMD64_SET_PKRU */
+#define AMD64_PKRU_EXCL 0x0001
+#define AMD64_PKRU_PERSIST 0x0002
struct i386_ioperm_args {
unsigned int start;
@@ -94,12 +102,26 @@
int len;
};
+struct i386_set_pkru {
+ unsigned int addr;
+ unsigned int len;
+ unsigned int keyidx;
+ int flags;
+};
+
struct amd64_get_xfpustate {
void *addr;
int len;
};
#endif
+struct amd64_set_pkru {
+ void *addr;
+ unsigned long len;
+ unsigned int keyidx;
+ int flags;
+};
+
#ifndef _KERNEL
union descriptor;
struct dbreg;
@@ -120,6 +142,11 @@
int amd64_get_gsbase(void **);
int amd64_set_fsbase(void *);
int amd64_set_gsbase(void *);
+int x86_pkru_get_perm(unsigned int keyidx, int *access, int *modify);
+int x86_pkru_set_perm(unsigned int keyidx, int access, int modify);
+int x86_pkru_protect_range(void *addr, unsigned long len, unsigned int keyidx,
+ int flag);
+int x86_pkru_unprotect_range(void *addr, unsigned long len);
int sysarch(int, void *);
__END_DECLS
#else
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Nov 22, 9:59 PM (9 h, 2 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25954998
Default Alt Text
D18893.id53208.diff (45 KB)
Attached To
Mode
D18893: Add support for Intel userspace protection keys feature on Skylake Xeons.
Attached
Detach File
Event Timeline
Log In to Comment