Changeset View
Changeset View
Standalone View
Standalone View
sys/amd64/linux/linux_sysvec.c
Show First 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | |||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#include <sys/ktr.h> | #include <sys/ktr.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/module.h> | #include <sys/module.h> | ||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/resourcevar.h> | #include <sys/resourcevar.h> | ||||
#include <sys/stddef.h> | |||||
#include <sys/signalvar.h> | #include <sys/signalvar.h> | ||||
#include <sys/syscallsubr.h> | #include <sys/syscallsubr.h> | ||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#include <sys/sysent.h> | #include <sys/sysent.h> | ||||
#include <sys/sysproto.h> | #include <sys/sysproto.h> | ||||
#include <sys/vnode.h> | #include <sys/vnode.h> | ||||
#include <sys/eventhandler.h> | #include <sys/eventhandler.h> | ||||
#include <vm/vm.h> | #include <vm/vm.h> | ||||
#include <vm/pmap.h> | #include <vm/pmap.h> | ||||
#include <vm/vm_extern.h> | #include <vm/vm_extern.h> | ||||
#include <vm/vm_map.h> | #include <vm/vm_map.h> | ||||
#include <vm/vm_object.h> | #include <vm/vm_object.h> | ||||
#include <vm/vm_page.h> | #include <vm/vm_page.h> | ||||
#include <vm/vm_param.h> | #include <vm/vm_param.h> | ||||
#include <machine/cpu.h> | #include <machine/cpu.h> | ||||
#include <machine/md_var.h> | #include <machine/md_var.h> | ||||
#include <machine/pcb.h> | #include <machine/pcb.h> | ||||
#include <machine/specialreg.h> | #include <machine/specialreg.h> | ||||
#include <machine/trap.h> | #include <machine/trap.h> | ||||
#include <x86/linux/linux_x86.h> | |||||
#include <amd64/linux/linux.h> | #include <amd64/linux/linux.h> | ||||
#include <amd64/linux/linux_proto.h> | #include <amd64/linux/linux_proto.h> | ||||
#include <compat/linux/linux_emul.h> | #include <compat/linux/linux_emul.h> | ||||
#include <compat/linux/linux_ioctl.h> | #include <compat/linux/linux_ioctl.h> | ||||
#include <compat/linux/linux_mib.h> | #include <compat/linux/linux_mib.h> | ||||
#include <compat/linux/linux_misc.h> | #include <compat/linux/linux_misc.h> | ||||
#include <compat/linux/linux_signal.h> | #include <compat/linux/linux_signal.h> | ||||
#include <compat/linux/linux_sysproto.h> | #include <compat/linux/linux_sysproto.h> | ||||
#include <compat/linux/linux_util.h> | #include <compat/linux/linux_util.h> | ||||
#include <compat/linux/linux_vdso.h> | #include <compat/linux/linux_vdso.h> | ||||
MODULE_VERSION(linux64, 1); | MODULE_VERSION(linux64, 1); | ||||
#define LINUX_VDSOPAGE_SIZE PAGE_SIZE * 2 | |||||
#define LINUX_VDSOPAGE_LA48 (VM_MAXUSER_ADDRESS_LA48 - \ | |||||
LINUX_VDSOPAGE_SIZE) | |||||
#define LINUX_SHAREDPAGE_LA48 (LINUX_VDSOPAGE_LA48 - PAGE_SIZE) | |||||
/* | |||||
* PAGE_SIZE - the size | |||||
* of the native SHAREDPAGE | |||||
*/ | |||||
#define LINUX_USRSTACK_LA48 LINUX_SHAREDPAGE_LA48 | |||||
#define LINUX_PS_STRINGS_LA48 (LINUX_USRSTACK_LA48 - \ | |||||
sizeof(struct ps_strings)) | |||||
static int linux_szsigcode; | static int linux_szsigcode; | ||||
static vm_object_t linux_shared_page_obj; | static vm_object_t linux_vdso_obj; | ||||
static char *linux_shared_page_mapping; | static char *linux_vdso_mapping; | ||||
extern char _binary_linux_locore_o_start; | extern char _binary_linux_vdso_so_o_start; | ||||
extern char _binary_linux_locore_o_end; | extern char _binary_linux_vdso_so_o_end; | ||||
static vm_offset_t linux_vdso_base; | |||||
extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; | extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; | ||||
SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); | SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); | ||||
static int linux_copyout_strings(struct image_params *imgp, | static int linux_copyout_strings(struct image_params *imgp, | ||||
uintptr_t *stack_base); | uintptr_t *stack_base); | ||||
static int linux_fixup_elf(uintptr_t *stack_base, | static int linux_fixup_elf(uintptr_t *stack_base, | ||||
struct image_params *iparams); | struct image_params *iparams); | ||||
static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel); | static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel); | ||||
static void linux_vdso_install(void *param); | static void linux_vdso_install(void *param); | ||||
static void linux_vdso_deinstall(void *param); | static void linux_vdso_deinstall(void *param); | ||||
static void linux_vdso_reloc(char *mapping, Elf_Addr offset); | |||||
static void linux_set_syscall_retval(struct thread *td, int error); | static void linux_set_syscall_retval(struct thread *td, int error); | ||||
static int linux_fetch_syscall_args(struct thread *td); | static int linux_fetch_syscall_args(struct thread *td); | ||||
static void linux_exec_setregs(struct thread *td, struct image_params *imgp, | static void linux_exec_setregs(struct thread *td, struct image_params *imgp, | ||||
uintptr_t stack); | uintptr_t stack); | ||||
static void linux_exec_sysvec_init(void *param); | |||||
static int linux_on_exec_vmspace(struct proc *p, | static int linux_on_exec_vmspace(struct proc *p, | ||||
struct image_params *imgp); | struct image_params *imgp); | ||||
static int linux_vsyscall(struct thread *td); | static int linux_vsyscall(struct thread *td); | ||||
#define LINUX_T_UNKNOWN 255 | #define LINUX_T_UNKNOWN 255 | ||||
static int _bsd_to_linux_trapcode[] = { | static int _bsd_to_linux_trapcode[] = { | ||||
LINUX_T_UNKNOWN, /* 0 */ | LINUX_T_UNKNOWN, /* 0 */ | ||||
6, /* 1 T_PRIVINFLT */ | 6, /* 1 T_PRIVINFLT */ | ||||
Show All 29 Lines | |||||
}; | }; | ||||
#define bsd_to_linux_trapcode(code) \ | #define bsd_to_linux_trapcode(code) \ | ||||
((code)<nitems(_bsd_to_linux_trapcode)? \ | ((code)<nitems(_bsd_to_linux_trapcode)? \ | ||||
_bsd_to_linux_trapcode[(code)]: \ | _bsd_to_linux_trapcode[(code)]: \ | ||||
LINUX_T_UNKNOWN) | LINUX_T_UNKNOWN) | ||||
LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode); | LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode); | ||||
LINUX_VDSO_SYM_CHAR(linux_platform); | LINUX_VDSO_SYM_CHAR(linux_platform); | ||||
LINUX_VDSO_SYM_INTPTR(kern_timekeep_base); | |||||
LINUX_VDSO_SYM_INTPTR(kern_tsc_selector); | |||||
/* | /* | ||||
* If FreeBSD & Linux have a difference of opinion about what a trap | * If FreeBSD & Linux have a difference of opinion about what a trap | ||||
* means, deal with it here. | * means, deal with it here. | ||||
* | * | ||||
* MPSAFE | * MPSAFE | ||||
*/ | */ | ||||
static int | static int | ||||
▲ Show 20 Lines • Show All 96 Lines • ▼ Show 20 Lines | linux_copyout_auxargs(struct image_params *imgp, uintptr_t base) | ||||
int error, issetugid; | int error, issetugid; | ||||
p = imgp->proc; | p = imgp->proc; | ||||
args = (Elf64_Auxargs *)imgp->auxargs; | args = (Elf64_Auxargs *)imgp->auxargs; | ||||
argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP, | argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP, | ||||
M_WAITOK | M_ZERO); | M_WAITOK | M_ZERO); | ||||
issetugid = p->p_flag & P_SUGID ? 1 : 0; | issetugid = p->p_flag & P_SUGID ? 1 : 0; | ||||
AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, | AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, linux_vdso_base); | ||||
imgp->proc->p_sysent->sv_shared_page_base); | |||||
AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); | AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); | ||||
AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); | AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); | ||||
AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); | AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); | ||||
AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); | AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); | ||||
AUXARGS_ENTRY(pos, AT_PHENT, args->phent); | AUXARGS_ENTRY(pos, AT_PHENT, args->phent); | ||||
AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); | AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); | ||||
AUXARGS_ENTRY(pos, AT_BASE, args->base); | AUXARGS_ENTRY(pos, AT_BASE, args->base); | ||||
AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); | AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); | ||||
▲ Show 20 Lines • Show All 457 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
struct sysentvec elf_linux_sysvec = { | struct sysentvec elf_linux_sysvec = { | ||||
.sv_size = LINUX_SYS_MAXSYSCALL, | .sv_size = LINUX_SYS_MAXSYSCALL, | ||||
.sv_table = linux_sysent, | .sv_table = linux_sysent, | ||||
.sv_transtrap = linux_translate_traps, | .sv_transtrap = linux_translate_traps, | ||||
.sv_fixup = linux_fixup_elf, | .sv_fixup = linux_fixup_elf, | ||||
.sv_sendsig = linux_rt_sendsig, | .sv_sendsig = linux_rt_sendsig, | ||||
.sv_sigcode = &_binary_linux_locore_o_start, | .sv_sigcode = &_binary_linux_vdso_so_o_start, | ||||
.sv_szsigcode = &linux_szsigcode, | .sv_szsigcode = &linux_szsigcode, | ||||
.sv_name = "Linux ELF64", | .sv_name = "Linux ELF64", | ||||
.sv_coredump = elf64_coredump, | .sv_coredump = elf64_coredump, | ||||
.sv_imgact_try = linux_exec_imgact_try, | .sv_imgact_try = linux_exec_imgact_try, | ||||
.sv_minsigstksz = LINUX_MINSIGSTKSZ, | .sv_minsigstksz = LINUX_MINSIGSTKSZ, | ||||
.sv_minuser = VM_MIN_ADDRESS, | .sv_minuser = VM_MIN_ADDRESS, | ||||
.sv_maxuser = VM_MAXUSER_ADDRESS_LA48, | .sv_maxuser = VM_MAXUSER_ADDRESS_LA48, | ||||
.sv_usrstack = USRSTACK_LA48, | .sv_usrstack = LINUX_USRSTACK_LA48, | ||||
.sv_psstrings = PS_STRINGS_LA48, | .sv_psstrings = LINUX_PS_STRINGS_LA48, | ||||
.sv_stackprot = VM_PROT_ALL, | .sv_stackprot = VM_PROT_ALL, | ||||
.sv_copyout_auxargs = linux_copyout_auxargs, | .sv_copyout_auxargs = linux_copyout_auxargs, | ||||
.sv_copyout_strings = linux_copyout_strings, | .sv_copyout_strings = linux_copyout_strings, | ||||
.sv_setregs = linux_exec_setregs, | .sv_setregs = linux_exec_setregs, | ||||
.sv_fixlimit = NULL, | .sv_fixlimit = NULL, | ||||
.sv_maxssiz = NULL, | .sv_maxssiz = NULL, | ||||
.sv_flags = SV_ABI_LINUX | SV_LP64 | SV_SHP | SV_SIG_DISCIGN | | .sv_flags = SV_ABI_LINUX | SV_LP64 | SV_SHP | SV_SIG_DISCIGN | | ||||
SV_SIG_WAITNDQ, | SV_SIG_WAITNDQ | SV_TIMEKEEP, | ||||
.sv_set_syscall_retval = linux_set_syscall_retval, | .sv_set_syscall_retval = linux_set_syscall_retval, | ||||
.sv_fetch_syscall_args = linux_fetch_syscall_args, | .sv_fetch_syscall_args = linux_fetch_syscall_args, | ||||
.sv_syscallnames = NULL, | .sv_syscallnames = NULL, | ||||
.sv_shared_page_base = SHAREDPAGE_LA48, | .sv_shared_page_base = LINUX_SHAREDPAGE_LA48, | ||||
.sv_shared_page_len = PAGE_SIZE, | .sv_shared_page_len = PAGE_SIZE, | ||||
.sv_schedtail = linux_schedtail, | .sv_schedtail = linux_schedtail, | ||||
.sv_thread_detach = linux_thread_detach, | .sv_thread_detach = linux_thread_detach, | ||||
.sv_trap = linux_vsyscall, | .sv_trap = linux_vsyscall, | ||||
.sv_onexec = linux_on_exec_vmspace, | .sv_onexec = linux_on_exec_vmspace, | ||||
.sv_onexit = linux_on_exit, | .sv_onexit = linux_on_exit, | ||||
.sv_ontdexit = linux_thread_dtor, | .sv_ontdexit = linux_thread_dtor, | ||||
.sv_setid_allowed = &linux_setid_allowed_query, | .sv_setid_allowed = &linux_setid_allowed_query, | ||||
}; | }; | ||||
static int | static int | ||||
linux_on_exec_vmspace(struct proc *p, struct image_params *imgp) | linux_on_exec_vmspace(struct proc *p, struct image_params *imgp) | ||||
{ | { | ||||
int error; | |||||
error = linux_map_vdso(p, linux_vdso_obj, linux_vdso_base, | |||||
LINUX_VDSOPAGE_SIZE, imgp); | |||||
if (error == 0) | |||||
linux_on_exec(p, imgp); | linux_on_exec(p, imgp); | ||||
return (0); | return (error); | ||||
} | } | ||||
static void | static void | ||||
linux_vdso_install(void *param) | linux_exec_sysvec_init(void *param) | ||||
{ | { | ||||
l_uintptr_t *ktimekeep_base, *ktsc_selector; | |||||
struct sysentvec *sv; | |||||
ptrdiff_t tkoff; | |||||
amd64_lower_shared_page(&elf_linux_sysvec); | sv = param; | ||||
amd64_lower_shared_page(sv); | |||||
/* Fill timekeep_base */ | |||||
exec_sysvec_init(sv); | |||||
linux_szsigcode = (&_binary_linux_locore_o_end - | tkoff = kern_timekeep_base - linux_vdso_base; | ||||
&_binary_linux_locore_o_start); | ktimekeep_base = (l_uintptr_t *)(linux_vdso_mapping + tkoff); | ||||
*ktimekeep_base = sv->sv_timekeep_base; | |||||
if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) | tkoff = kern_tsc_selector - linux_vdso_base; | ||||
panic("Linux invalid vdso size\n"); | ktsc_selector = (l_uintptr_t *)(linux_vdso_mapping + tkoff); | ||||
*ktsc_selector = linux_vdso_tsc_selector_idx(); | |||||
if (bootverbose) | |||||
printf("Linux x86-64 vDSO tsc_selector: %lu\n", *ktsc_selector); | |||||
} | |||||
SYSINIT(elf_linux_exec_sysvec_init, SI_SUB_EXEC, SI_ORDER_ANY, | |||||
linux_exec_sysvec_init, &elf_linux_sysvec); | |||||
__elfN(linux_vdso_fixup)(&elf_linux_sysvec); | static void | ||||
linux_vdso_install(void *param) | |||||
{ | |||||
char *vdso_start = &_binary_linux_vdso_so_o_start; | |||||
char *vdso_end = &_binary_linux_vdso_so_o_end; | |||||
linux_shared_page_obj = __elfN(linux_shared_page_init) | linux_szsigcode = vdso_end - vdso_start; | ||||
(&linux_shared_page_mapping); | MPASS(linux_szsigcode <= LINUX_VDSOPAGE_SIZE); | ||||
__elfN(linux_vdso_reloc)(&elf_linux_sysvec); | linux_vdso_base = LINUX_VDSOPAGE_LA48; | ||||
if (hw_lower_amd64_sharedpage != 0) | |||||
linux_vdso_base -= PAGE_SIZE; | |||||
bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, | __elfN(linux_vdso_fixup)(vdso_start, linux_vdso_base); | ||||
linux_szsigcode); | |||||
elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; | linux_vdso_obj = __elfN(linux_shared_page_init) | ||||
(&linux_vdso_mapping, LINUX_VDSOPAGE_SIZE); | |||||
bcopy(vdso_start, linux_vdso_mapping, linux_szsigcode); | |||||
linux_vdso_reloc(linux_vdso_mapping, linux_vdso_base); | |||||
} | } | ||||
SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, | SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_FIRST, | ||||
linux_vdso_install, NULL); | linux_vdso_install, NULL); | ||||
static void | static void | ||||
linux_vdso_deinstall(void *param) | linux_vdso_deinstall(void *param) | ||||
{ | { | ||||
__elfN(linux_shared_page_fini)(linux_shared_page_obj, | __elfN(linux_shared_page_fini)(linux_vdso_obj, | ||||
linux_shared_page_mapping); | linux_vdso_mapping, LINUX_VDSOPAGE_SIZE); | ||||
} | } | ||||
SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, | SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, | ||||
linux_vdso_deinstall, NULL); | linux_vdso_deinstall, NULL); | ||||
static void | |||||
linux_vdso_reloc(char *mapping, Elf_Addr offset) | |||||
{ | |||||
const Elf_Ehdr *ehdr; | |||||
const Elf_Shdr *shdr; | |||||
Elf64_Addr *where, val; | |||||
Elf_Size rtype, symidx; | |||||
const Elf_Rela *rela; | |||||
Elf_Addr addr, addend; | |||||
int relacnt; | |||||
int i, j; | |||||
MPASS(offset != 0); | |||||
relacnt = 0; | |||||
ehdr = (const Elf_Ehdr *)mapping; | |||||
shdr = (const Elf_Shdr *)(mapping + ehdr->e_shoff); | |||||
for (i = 0; i < ehdr->e_shnum; i++) | |||||
{ | |||||
switch (shdr[i].sh_type) { | |||||
case SHT_REL: | |||||
printf("Linux x86_64 vDSO: unexpected Rel section\n"); | |||||
break; | |||||
case SHT_RELA: | |||||
rela = (const Elf_Rela *)(mapping + shdr[i].sh_offset); | |||||
relacnt = shdr[i].sh_size / sizeof(*rela); | |||||
} | |||||
} | |||||
for (j = 0; j < relacnt; j++, rela++) { | |||||
where = (Elf_Addr *)(mapping + rela->r_offset); | |||||
addend = rela->r_addend; | |||||
rtype = ELF_R_TYPE(rela->r_info); | |||||
symidx = ELF_R_SYM(rela->r_info); | |||||
switch (rtype) { | |||||
case R_X86_64_NONE: /* none */ | |||||
break; | |||||
case R_X86_64_RELATIVE: /* B + A */ | |||||
addr = (Elf_Addr)(offset + addend); | |||||
val = addr; | |||||
if (*where != val) | |||||
*where = val; | |||||
break; | |||||
case R_X86_64_IRELATIVE: | |||||
printf("Linux x86_64 vDSO: unexpected ifunc relocation, " | |||||
"symbol index %ld\n", symidx); | |||||
break; | |||||
default: | |||||
printf("Linux x86_64 vDSO: unexpected relocation type %ld, " | |||||
"symbol index %ld\n", rtype, symidx); | |||||
} | |||||
} | |||||
} | |||||
static char GNULINUX_ABI_VENDOR[] = "GNU"; | static char GNULINUX_ABI_VENDOR[] = "GNU"; | ||||
static int GNULINUX_ABI_DESC = 0; | static int GNULINUX_ABI_DESC = 0; | ||||
static bool | static bool | ||||
linux_trans_osrel(const Elf_Note *note, int32_t *osrel) | linux_trans_osrel(const Elf_Note *note, int32_t *osrel) | ||||
{ | { | ||||
const Elf32_Word *desc; | const Elf32_Word *desc; | ||||
▲ Show 20 Lines • Show All 129 Lines • Show Last 20 Lines |