Changeset View
Changeset View
Standalone View
Standalone View
sys/kern/kern_rseq.c
- This file was added.
/*- | |||||
* Copyright (c) 2021 The FreeBSD Foundation | |||||
* | |||||
* This software were developed by Konstantin Belousov <kib@FreeBSD.org> | |||||
* under sponsorship from the FreeBSD Foundation. | |||||
* | |||||
* Redistribution and use in source and binary forms, with or without | |||||
* modification, are permitted provided that the following conditions | |||||
* are met: | |||||
* 1. Redistributions of source code must retain the above copyright | |||||
* notice, this list of conditions and the following disclaimer. | |||||
* 2. Redistributions in binary form must reproduce the above copyright | |||||
* notice, this list of conditions and the following disclaimer in the | |||||
* documentation and/or other materials provided with the distribution. | |||||
* | |||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||||
* SUCH DAMAGE. | |||||
*/ | |||||
#include <sys/cdefs.h> | |||||
#include <sys/param.h> | |||||
#include <sys/systm.h> | |||||
#include <sys/kernel.h> | |||||
#include <sys/lock.h> | |||||
#include <sys/mutex.h> | |||||
#include <sys/proc.h> | |||||
#include <sys/sysproto.h> | |||||
#include <sys/rseq.h> | |||||
static void | |||||
rseq_inactivate(struct thread *td) | |||||
{ | |||||
td->td_rseq_abi = NULL; | |||||
} | |||||
static void | |||||
rseq_inactivate_sig(struct thread *td, void *addr, int si_code) | |||||
{ | |||||
ksiginfo_t ksi; | |||||
rseq_inactivate(td); | |||||
ksiginfo_init_trap(&ksi); | |||||
ksi.ksi_signo = SIGSEGV; | |||||
ksi.ksi_code = si_code; | |||||
ksi.ksi_trapno = 0; | |||||
ksi.ksi_addr = addr; | |||||
trapsignal(td, &ksi); | |||||
} | |||||
static bool | |||||
rseq_check_signature(struct thread *td, const struct rseq_cs *rc) | |||||
{ | |||||
void *usig_addr; | |||||
uint32_t usig; | |||||
int error; | |||||
usig_addr = (void *)(rc->abort_ip - sizeof(usig)); | |||||
error = copyin(usig_addr, &usig, sizeof(usig)); | |||||
if (error != 0) { | |||||
rseq_inactivate_sig(td, usig_addr, SEGV_RSEQ_R); | |||||
return (true); | |||||
} | |||||
if (usig != td->td_rseq_sig) { | |||||
rseq_inactivate_sig(td, usig_addr, SEGV_RSEQ_SIG); | |||||
return (true); | |||||
} | |||||
return (false); | |||||
} | |||||
static void | |||||
ast_rseq(struct thread *td, int tda __unused) | |||||
{ | |||||
struct rseq rs; | |||||
struct rseq_cs rc; | |||||
register_t pc; | |||||
int cpu, error; | |||||
bool clear_cs; | |||||
if (td->td_rseq_abi == NULL) | |||||
return; | |||||
/* | |||||
* We cannot enter critical section there to keep td_oncpu | |||||
* valid due to userspace access. We do not even want to | |||||
* sched_pin() for the same reason. | |||||
* | |||||
* It is fine to get a context switch after reading td_oncpu, | |||||
* since this would cause new AST pending and we re-enter this | |||||
* function to update rseq cpu number. | |||||
* | |||||
* Microoptimize 64bit architectures by doing single 64bit | |||||
* write for cpu ids. For instance, on SMAP-enabled amd64 | |||||
* this saves two serialization instructions STAC/CLAC. | |||||
*/ | |||||
cpu = td->td_oncpu; | |||||
#ifdef __LP64__ | |||||
rs.cpu_id_start = cpu; | |||||
rs.cpu_id = cpu; | |||||
error = suword64((char *)td->td_rseq_abi + offsetof(struct rseq, | |||||
cpu_id_start), *(uint64_t *)(char *)&rs.cpu_id_start); | |||||
#else | |||||
error = suword((char *)td->td_rseq_abi + offsetof(struct rseq, | |||||
cpu_id_start), cpu); | |||||
if (error == 0) { | |||||
error = suword((char *)td->td_rseq_abi + | |||||
offsetof(struct rseq, cpu_id), cpu); | |||||
} | |||||
#endif | |||||
if (error != 0) { | |||||
rseq_inactivate_sig(td, td->td_rseq_abi, SEGV_RSEQ_W); | |||||
return; | |||||
} | |||||
error = copyin(td->td_rseq_abi, &rs, sizeof(rs)); | |||||
if (error != 0) { | |||||
rseq_inactivate_sig(td, td->td_rseq_abi, SEGV_RSEQ_R); | |||||
return; | |||||
} | |||||
if (rs.rseq_cs.ptr64 == 0) | |||||
return; | |||||
clear_cs = false; | |||||
critical_enter(); | |||||
if ((td->td_flags & (TDF_RSEQ_CLRCS | TDF_RSEQ_MB)) == 0 && | |||||
(rs.flags & RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT) != 0 && | |||||
((rs.flags & RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE) != 0 || | |||||
td->td_oncpu == td->td_lastcpu)) | |||||
return; | |||||
critical_exit(); | |||||
error = copyin((void *)rs.rseq_cs.ptr64, &rc, sizeof(rc)); | |||||
if (error != 0) { | |||||
rseq_inactivate_sig(td, (void *)rs.rseq_cs.ptr64, SEGV_RSEQ_R); | |||||
return; | |||||
} | |||||
if (rc.version != 0) { | |||||
rseq_inactivate_sig(td, (void *)rs.rseq_cs.ptr64, SEGV_RSEQ_R); | |||||
return; | |||||
} | |||||
critical_enter(); | |||||
if ((td->td_flags & (TDF_RSEQ_CLRCS | TDF_RSEQ_MB)) == 0 && | |||||
(rc.flags & RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT) != 0 && | |||||
((rc.flags & RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE) != 0 || | |||||
td->td_oncpu == td->td_lastcpu)) | |||||
return; | |||||
critical_exit(); | |||||
if ((td->td_flags & (TDF_RSEQ_CLRCS | TDF_RSEQ_MB)) != 0) { | |||||
if ((td->td_flags & TDF_RSEQ_CLRCS) != 0) | |||||
clear_cs = true; | |||||
thread_lock(td); | |||||
td->td_flags &= ~(TDF_RSEQ_CLRCS | TDF_RSEQ_MB); | |||||
thread_unlock(td); | |||||
} | |||||
pc = TRAPF_PC(td->td_frame); | |||||
if (!clear_cs && | |||||
pc >= rc.start_ip && pc < rc.start_ip + rc.post_commit_offset) { | |||||
if (rseq_check_signature(td, &rc)) | |||||
return; | |||||
TRAPF_PC(td->td_frame) = rc.abort_ip; | |||||
clear_cs = true; | |||||
} | |||||
if (clear_cs) { | |||||
if (suword64((char *)td->td_rseq_abi + offsetof(struct rseq, | |||||
rseq_cs.ptr), 0) == -1) { | |||||
rseq_inactivate_sig(td, (char *)td->td_rseq_abi + | |||||
offsetof(struct rseq, rseq_cs.ptr), | |||||
SEGV_RSEQ_W); | |||||
return; | |||||
} | |||||
} | |||||
} | |||||
void | |||||
rseq_before_sig(struct thread *td) | |||||
{ | |||||
struct rseq rs; | |||||
struct rseq_cs rc; | |||||
int error; | |||||
td->td_pflags2 &= ~TDP2_RSEQ_SIG; | |||||
if (td->td_rseq_abi == NULL) | |||||
return; | |||||
error = copyin(td->td_rseq_abi, &rs, sizeof(rs)); | |||||
if (error != 0) { | |||||
rseq_inactivate_sig(td, td->td_rseq_abi, SEGV_RSEQ_R); | |||||
return; | |||||
} | |||||
if (rs.rseq_cs.ptr64 == 0 || | |||||
(rs.flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL) != 0) | |||||
return; | |||||
error = copyin((void *)rs.rseq_cs.ptr64, &rc, sizeof(rc)); | |||||
if (error != 0) { | |||||
rseq_inactivate_sig(td, (void *)rs.rseq_cs.ptr64, SEGV_RSEQ_R); | |||||
return; | |||||
} | |||||
if ((rc.flags & RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL) != 0) | |||||
return; | |||||
if (rseq_check_signature(td, &rc)) | |||||
return; | |||||
td->td_pflags2 |= TDP2_RSEQ_SIG; | |||||
td->td_rseq_start_ip = rc.start_ip; | |||||
td->td_rseq_end_ip = rc.start_ip + rc.post_commit_offset; | |||||
td->td_rseq_abort_ip = rc.abort_ip; | |||||
} | |||||
void | |||||
rseq_on_sig(struct thread *td) | |||||
{ | |||||
register_t pc; | |||||
if ((td->td_pflags2 & TDP2_RSEQ_SIG) == 0) | |||||
return; | |||||
td->td_pflags2 &= ~TDP2_RSEQ_SIG; | |||||
pc = TRAPF_PC(td->td_frame); | |||||
if (pc >= td->td_rseq_start_ip && pc < td->td_rseq_end_ip) { | |||||
TRAPF_PC(td->td_frame) = td->td_rseq_abort_ip; | |||||
thread_lock(td); | |||||
ast_sched_locked(td, TDA_RSEQ); | |||||
td->td_flags |= TDF_RSEQ_CLRCS; | |||||
thread_unlock(td); | |||||
} | |||||
} | |||||
static int | |||||
kern_rseq(struct thread *td, uintptr_t rseq, uint32_t rseqlen, int flags, | |||||
uint32_t sig) | |||||
{ | |||||
if (rseqlen != sizeof(struct rseq)) | |||||
return (EINVAL); | |||||
if (flags == RSEQ_FLAG_UNREGISTER) { | |||||
if (rseq != 0 || td->td_rseq_abi == NULL) | |||||
return (EINVAL); | |||||
if (sig != td->td_rseq_sig) | |||||
return (EPERM); | |||||
rseq_inactivate(td); | |||||
return (0); | |||||
} | |||||
if (td->td_rseq_abi != NULL) | |||||
return (EBUSY); | |||||
if (flags != 0 || rseq == 0 || | |||||
trunc_page(rseq) != trunc_page(rseq + rseqlen)) | |||||
return (EINVAL); | |||||
td->td_rseq_abi = (void *)rseq; | |||||
td->td_rseq_sig = sig; | |||||
ast_sched(td, TDA_RSEQ); | |||||
return (0); | |||||
} | |||||
int | |||||
sys_rseq(struct thread *td, struct rseq_args *uap) | |||||
{ | |||||
return (kern_rseq(td, (uintptr_t)uap->rseq, uap->rseqlen, | |||||
uap->flags, uap->sig)); | |||||
} | |||||
static void | |||||
rseq_init(void *arg __unused) | |||||
{ | |||||
ast_register(TDA_RSEQ, ASTR_ASTF_REQUIRED, 0, ast_rseq); | |||||
} | |||||
SYSINIT(rseq, SI_SUB_P1003_1B, SI_ORDER_ANY, rseq_init, NULL); |