Index: sys/amd64/amd64/cpu_switch.S =================================================================== --- sys/amd64/amd64/cpu_switch.S +++ sys/amd64/amd64/cpu_switch.S @@ -209,14 +209,11 @@ done_tss: movq %r8,PCPU(RSP0) movq %r8,PCPU(CURPCB) - /* Update the TSS_RSP0 pointer for the next interrupt */ - cmpq $~0,PCPU(UCR3) - je 1f movq PCPU(PTI_RSP0),%rax + cmpq $~0,PCPU(UCR3) + cmove %r8,%rax movq %rax,TSS_RSP0(%rdx) - jmp 2f -1: movq %r8,TSS_RSP0(%rdx) -2: movq %r12,PCPU(CURTHREAD) /* into next thread */ + movq %r12,PCPU(CURTHREAD) /* into next thread */ /* Test if debug registers should be restored. */ testl $PCB_DBREGS,PCB_FLAGS(%r8) Index: sys/amd64/amd64/pmap.c =================================================================== --- sys/amd64/amd64/pmap.c +++ sys/amd64/amd64/pmap.c @@ -2853,6 +2853,7 @@ void pmap_pinit0(pmap_t pmap) { + struct proc *p; int i; PMAP_LOCK_INIT(pmap); @@ -2871,6 +2872,12 @@ pmap->pm_pcids[i].pm_gen = 1; } pmap_activate_boot(pmap); + if (pti) { + p = curproc; + PROC_LOCK(p); + p->p_md.md_flags |= P_MD_KPTI; + PROC_UNLOCK(p); + } if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) { pmap_pkru_ranges_zone = uma_zcreate("pkru ranges", @@ -2957,7 +2964,7 @@ if (pm_type == PT_X86) { pmap->pm_cr3 = pml4phys; pmap_pinit_pml4(pml4pg); - if (pti) { + if ((curproc->p_md.md_flags & P_MD_KPTI) != 0) { pml4pgu = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_WAITOK); pmap->pm_pml4u = (pml4_entry_t *)PHYS_TO_DMAP( @@ -7759,12 +7766,11 @@ } static void -pmap_activate_sw_pti_post(pmap_t pmap) +pmap_activate_sw_pti_post(struct thread *td, pmap_t pmap) { - if (pmap->pm_ucr3 != PMAP_NO_CR3) - PCPU_GET(tssp)->tss_rsp0 = ((vm_offset_t)PCPU_PTR(pti_stack) + - PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful; + PCPU_GET(tssp)->tss_rsp0 = pmap->pm_ucr3 != PMAP_NO_CR3 ? + PCPU_GET(pti_rsp0) : (uintptr_t)td->td_pcb; } static void inline @@ -7811,15 +7817,16 @@ } static void -pmap_activate_sw_pcid_invpcid_pti(pmap_t pmap, u_int cpuid) +pmap_activate_sw_pcid_invpcid_pti(struct thread *td, pmap_t pmap, u_int cpuid) { pmap_activate_sw_pcid_pti(pmap, cpuid, true); - pmap_activate_sw_pti_post(pmap); + pmap_activate_sw_pti_post(td, pmap); } static void -pmap_activate_sw_pcid_noinvpcid_pti(pmap_t pmap, u_int cpuid) +pmap_activate_sw_pcid_noinvpcid_pti(struct thread *td, pmap_t pmap, + u_int cpuid) { register_t rflags; @@ -7843,11 +7850,12 @@ rflags = intr_disable(); pmap_activate_sw_pcid_pti(pmap, cpuid, false); intr_restore(rflags); - pmap_activate_sw_pti_post(pmap); + pmap_activate_sw_pti_post(td, pmap); } static void -pmap_activate_sw_pcid_nopti(pmap_t pmap, u_int cpuid) +pmap_activate_sw_pcid_nopti(struct thread *td __unused, pmap_t pmap, + u_int cpuid) { uint64_t cached, cr3; @@ -7862,17 +7870,19 @@ } static void -pmap_activate_sw_pcid_noinvpcid_nopti(pmap_t pmap, u_int cpuid) +pmap_activate_sw_pcid_noinvpcid_nopti(struct thread *td __unused, pmap_t pmap, + u_int cpuid) { register_t rflags; rflags = intr_disable(); - pmap_activate_sw_pcid_nopti(pmap, cpuid); + pmap_activate_sw_pcid_nopti(td, pmap, cpuid); intr_restore(rflags); } static void -pmap_activate_sw_nopcid_nopti(pmap_t pmap, u_int cpuid __unused) +pmap_activate_sw_nopcid_nopti(struct thread *td __unused, pmap_t pmap, + u_int cpuid __unused) { load_cr3(pmap->pm_cr3); @@ -7880,16 +7890,18 @@ } static void -pmap_activate_sw_nopcid_pti(pmap_t pmap, u_int cpuid __unused) +pmap_activate_sw_nopcid_pti(struct thread *td, pmap_t pmap, + u_int cpuid __unused) { - pmap_activate_sw_nopcid_nopti(pmap, cpuid); + pmap_activate_sw_nopcid_nopti(td, pmap, cpuid); PCPU_SET(kcr3, pmap->pm_cr3); PCPU_SET(ucr3, pmap->pm_ucr3); - pmap_activate_sw_pti_post(pmap); + pmap_activate_sw_pti_post(td, pmap); } -DEFINE_IFUNC(static, void, pmap_activate_sw_mode, (pmap_t, u_int), static) +DEFINE_IFUNC(static, void, pmap_activate_sw_mode, (struct thread *, pmap_t, + u_int), static) { if (pmap_pcid_enabled && pti && invpcid_works) @@ -7922,7 +7934,7 @@ #else CPU_SET(cpuid, &pmap->pm_active); #endif - pmap_activate_sw_mode(pmap, cpuid); + pmap_activate_sw_mode(td, pmap, cpuid); #ifdef SMP CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); #else Index: sys/amd64/amd64/vm_machdep.c =================================================================== --- sys/amd64/amd64/vm_machdep.c +++ sys/amd64/amd64/vm_machdep.c @@ -59,13 +59,16 @@ #include #include #include +#include #include +#include #include #include #include #include #include #include +#include #include #include @@ -369,6 +372,74 @@ cpu_thread_clean(td); } +bool +cpu_exec_vmspace_reuse(struct proc *p, vm_map_t map) +{ + + return (((curproc->p_md.md_flags & P_MD_KPTI) != 0) == + (vm_map_pmap(map)->pm_ucr3 != PMAP_NO_CR3)); +} + +static void +cpu_procctl_kpti(struct proc *p, int com, int *val) +{ + + if (com == PROC_KPTI_CTL) { + if (pti && *val == PROC_KPTI_CTL_ENABLE_ON_EXEC) + p->p_md.md_flags |= P_MD_KPTI; + if (*val == PROC_KPTI_CTL_DISABLE_ON_EXEC) + p->p_md.md_flags &= ~P_MD_KPTI; + } else /* PROC_KPTI_STATUS */ { + *val = (p->p_md.md_flags & P_MD_KPTI) != 0 ? + PROC_KPTI_CTL_ENABLE_ON_EXEC: + PROC_KPTI_CTL_DISABLE_ON_EXEC; + if (vmspace_pmap(p->p_vmspace)->pm_ucr3 != PMAP_NO_CR3) + *val |= PROC_KPTI_STATUS_ACTIVE; + } +} + +int +cpu_procctl(struct thread *td, int idtype, id_t id, int com, void *data) +{ + struct proc *p; + int error, val; + + switch (com) { + case PROC_KPTI_CTL: + case PROC_KPTI_STATUS: + if (idtype != P_PID) { + error = EINVAL; + break; + } + if (com == PROC_KPTI_CTL) { + /* sad but true and not a joke */ + error = priv_check(td, PRIV_IO); + if (error != 0) + break; + error = copyin(data, &val, sizeof(val)); + if (error != 0) + break; + if (val != PROC_KPTI_CTL_ENABLE_ON_EXEC && + val != PROC_KPTI_CTL_DISABLE_ON_EXEC) { + error = EINVAL; + break; + } + } + error = pget(id, PGET_CANSEE | PGET_NOTWEXIT | PGET_NOTID, &p); + if (error == 0) { + cpu_procctl_kpti(p, com, &val); + PROC_UNLOCK(p); + if (com == PROC_KPTI_STATUS) + error = copyout(&val, data, sizeof(val)); + } + break; + default: + error = EINVAL; + break; + } + return (error); +} + void cpu_set_syscall_retval(struct thread *td, int error) { Index: sys/amd64/include/proc.h =================================================================== --- sys/amd64/include/proc.h +++ sys/amd64/include/proc.h @@ -40,7 +40,8 @@ /* * List of locks - * k - only accessed by curthread + * c - proc lock + * k - only accessed by curthread * pp - pmap.c:invl_gen_mtx */ @@ -69,8 +70,11 @@ struct mdproc { struct proc_ldt *md_ldt; /* (t) per-process ldt */ struct system_segment_descriptor md_ldt_sd; + u_int md_flags; /* (c) md process flags P_MD */ }; +#define P_MD_KPTI 0x00000001 /* Enable KPTI on exec */ + #define KINFO_PROC_SIZE 1088 #define KINFO_PROC32_SIZE 768 Index: sys/amd64/include/procctl.h =================================================================== --- /dev/null +++ sys/amd64/include/procctl.h @@ -0,0 +1,6 @@ +/*- + * This file is in the public domain. + */ +/* $FreeBSD$ */ + +#include Index: sys/compat/freebsd32/freebsd32_misc.c =================================================================== --- sys/compat/freebsd32/freebsd32_misc.c +++ sys/compat/freebsd32/freebsd32_misc.c @@ -3327,6 +3327,10 @@ } x32; int error, error1, flags, signum; + if (uap->com >= PROC_PROCCTL_MD_MIN) + return (cpu_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id), + uap->com, PTRIN(uap->data))); + switch (uap->com) { case PROC_ASLR_CTL: case PROC_SPROTECT: Index: sys/i386/include/procctl.h =================================================================== --- /dev/null +++ sys/i386/include/procctl.h @@ -0,0 +1,6 @@ +/*- + * This file is in the public domain. + */ +/* $FreeBSD$ */ + +#include Index: sys/kern/kern_exec.c =================================================================== --- sys/kern/kern_exec.c +++ sys/kern/kern_exec.c @@ -1100,7 +1100,8 @@ else sv_minuser = MAX(sv->sv_minuser, PAGE_SIZE); if (vmspace->vm_refcnt == 1 && vm_map_min(map) == sv_minuser && - vm_map_max(map) == sv->sv_maxuser) { + vm_map_max(map) == sv->sv_maxuser && + cpu_exec_vmspace_reuse(p, map)) { shmexit(vmspace); pmap_remove_pages(vmspace_pmap(vmspace)); vm_map_remove(map, vm_map_min(map), vm_map_max(map)); Index: sys/kern/kern_procctl.c =================================================================== --- sys/kern/kern_procctl.c +++ sys/kern/kern_procctl.c @@ -494,6 +494,10 @@ } x; int error, error1, flags, signum; + if (uap->com >= PROC_PROCCTL_MD_MIN) + return (cpu_procctl(td, uap->idtype, uap->id, + uap->com, uap->data)); + switch (uap->com) { case PROC_ASLR_CTL: case PROC_SPROTECT: Index: sys/kern/kern_thread.c =================================================================== --- sys/kern/kern_thread.c +++ sys/kern/kern_thread.c @@ -94,7 +94,7 @@ "struct proc KBI p_filemon"); _Static_assert(offsetof(struct proc, p_comm) == 0x3e8, "struct proc KBI p_comm"); -_Static_assert(offsetof(struct proc, p_emuldata) == 0x4c0, +_Static_assert(offsetof(struct proc, p_emuldata) == 0x4c8, "struct proc KBI p_emuldata"); #endif #ifdef __i386__ Index: sys/sys/proc.h =================================================================== --- sys/sys/proc.h +++ sys/sys/proc.h @@ -1093,9 +1093,12 @@ void cpu_exit(struct thread *); void exit1(struct thread *, int, int) __dead2; void cpu_copy_thread(struct thread *td, struct thread *td0); +bool cpu_exec_vmspace_reuse(struct proc *p, struct vm_map *map); int cpu_fetch_syscall_args(struct thread *td); void cpu_fork(struct thread *, struct proc *, struct thread *, int); void cpu_fork_kthread_handler(struct thread *, void (*)(void *), void *); +int cpu_procctl(struct thread *td, int idtype, id_t id, int com, + void *data); void cpu_set_syscall_retval(struct thread *, int); void cpu_set_upcall(struct thread *, void (*)(void *), void *, stack_t *); Index: sys/sys/procctl.h =================================================================== --- sys/sys/procctl.h +++ sys/sys/procctl.h @@ -41,6 +41,10 @@ #include #endif +/* MD PROCCTL verbs start at 0x10000000 */ +#define PROC_PROCCTL_MD_MIN 0x10000000 +#include + #define PROC_SPROTECT 1 /* set protected state */ #define PROC_REAP_ACQUIRE 2 /* reaping enable */ #define PROC_REAP_RELEASE 3 /* reaping disable */ Index: sys/x86/include/procctl.h =================================================================== --- /dev/null +++ sys/x86/include/procctl.h @@ -0,0 +1,43 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 The FreeBSD Foundation + * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _X86_PROCCTL_H +#define _X86_PROCCTL_H + +#define PROC_KPTI_CTL (PROC_PROCCTL_MD_MIN + 0) +#define PROC_KPTI_STATUS (PROC_PROCCTL_MD_MIN + 1) + +#define PROC_KPTI_CTL_ENABLE_ON_EXEC 1 +#define PROC_KPTI_CTL_DISABLE_ON_EXEC 2 +#define PROC_KPTI_STATUS_ACTIVE 0x80000000 + +#endif Index: usr.bin/proccontrol/proccontrol.c =================================================================== --- usr.bin/proccontrol/proccontrol.c +++ usr.bin/proccontrol/proccontrol.c @@ -43,6 +43,9 @@ MODE_INVALID, MODE_TRACE, MODE_TRAPCAP, +#ifdef PROC_KPTI_CTL + MODE_KPTI, +#endif }; static pid_t @@ -59,11 +62,18 @@ return (res); } +#ifdef PROC_KPTI_CTL +#define KPTI_USAGE "|kpti" +#else +#define KPTI_USAGE +#endif + static void __dead2 usage(void) { - fprintf(stderr, "Usage: proccontrol -m (aslr|trace|trapcap) [-q] " + fprintf(stderr, "Usage: proccontrol -m (aslr|trace|trapcap" + KPTI_USAGE") [-q] " "[-s (enable|disable)] [-p pid | command]\n"); exit(1); } @@ -88,6 +98,10 @@ mode = MODE_TRACE; else if (strcmp(optarg, "trapcap") == 0) mode = MODE_TRAPCAP; +#ifdef PROC_KPTI_CTL + else if (strcmp(optarg, "kpti") == 0) + mode = MODE_KPTI; +#endif else usage(); break; @@ -133,6 +147,11 @@ case MODE_TRAPCAP: error = procctl(P_PID, pid, PROC_TRAPCAP_STATUS, &arg); break; +#ifdef PROC_KPTI_CTL + case MODE_KPTI: + error = procctl(P_PID, pid, PROC_KPTI_STATUS, &arg); + break; +#endif default: usage(); break; @@ -175,6 +194,22 @@ break; } break; +#ifdef PROC_KPTI_CTL + case MODE_KPTI: + switch (arg & ~PROC_KPTI_STATUS_ACTIVE) { + case PROC_KPTI_CTL_ENABLE_ON_EXEC: + printf("enabled"); + break; + case PROC_KPTI_CTL_DISABLE_ON_EXEC: + printf("disabled"); + break; + } + if ((arg & PROC_KPTI_STATUS_ACTIVE) != 0) + printf(", active\n"); + else + printf(", not active\n"); + break; +#endif } } else { switch (mode) { @@ -193,6 +228,13 @@ PROC_TRAPCAP_CTL_DISABLE; error = procctl(P_PID, pid, PROC_TRAPCAP_CTL, &arg); break; +#ifdef PROC_KPTI_CTL + case MODE_KPTI: + arg = enable ? PROC_KPTI_CTL_ENABLE_ON_EXEC : + PROC_KPTI_CTL_DISABLE_ON_EXEC; + error = procctl(P_PID, pid, PROC_KPTI_CTL, &arg); + break; +#endif default: usage(); break;