diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -112,6 +112,10 @@ jnz store_dr /* static predict not taken */ done_store_dr: + testl $(PCB_LNX_TLS1 | PCB_LNX_TLS2 | PCB_LNX_TLS3),PCB_FLAGS(%r8) + jnz clear_lnx_tls +done_clean_lnx_tls: + /* have we used fp, and need a save? */ cmpq %rdi,PCPU(FPCURTHREAD) jne ctx_switch_fpusave_done @@ -210,6 +214,11 @@ jnz load_dr /* static predict not taken */ done_load_dr: + /* Handle Linux TLS GDT slots */ + testl $(PCB_LNX_TLS1 | PCB_LNX_TLS2 | PCB_LNX_TLS3),PCB_FLAGS(%r8) + jnz load_lnx_tls +done_load_lnx_tls: + /* Restore context. */ movq PCB_R15(%r8),%r15 movq PCB_R14(%r8),%r14 @@ -296,6 +305,36 @@ movl $LDTSEL,%eax jmp ld_ldt +clear_lnx_tls: + PCPU_ADDR(GDT,%r15) + xorl %eax,%eax + testl $PCB_LNX_TLS1,PCB_FLAGS(%r8) + je 1f + movq %rax,(GLNX_TLS1_SEL * 8)(%r15) +1: testl $PCB_LNX_TLS2,PCB_FLAGS(%r8) + je 2f + movq %rax,(GLNX_TLS2_SEL * 8)(%r15) +2: testl $PCB_LNX_TLS3,PCB_FLAGS(%r8) + je 3f + movq %rax,(GLNX_TLS3_SEL * 8)(%r15) +3: jmp done_clean_lnx_tls + +load_lnx_tls: + PCPU_ADDR(GDT,%rdx) + testl $PCB_LNX_TLS1,PCB_FLAGS(%r8) + je 1f + movq PCB_LNX32_TLS(%r8),%rax + movq %rax,(GLNX_TLS1_SEL * 8)(%rdx) +1: testl $PCB_LNX_TLS2,PCB_FLAGS(%r8) + je 2f + movq (PCB_LNX32_TLS + 8)(%r8),%rax + movq %rax,(GLNX_TLS2_SEL * 8)(%rdx) +2: testl $PCB_LNX_TLS3,PCB_FLAGS(%r8) + je 3f + movq (PCB_LNX32_TLS + 16)(%r8),%rax + movq %rax,(GLNX_TLS3_SEL * 8)(%rdx) +3: jmp done_load_lnx_tls + .globl ctx_switch_xsave32 ctx_switch_xsave32: xsave (%r9) diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -149,10 +149,14 @@ ASSYM(PCB_LSTAR, offsetof(struct pcb, pcb_lstar)); ASSYM(PCB_CSTAR, offsetof(struct pcb, pcb_cstar)); ASSYM(PCB_SFMASK, offsetof(struct pcb, pcb_sfmask)); +ASSYM(PCB_LNX32_TLS, offsetof(struct pcb, pcb_lnx32_tls)); ASSYM(PCB_SIZE, sizeof(struct pcb)); ASSYM(PCB_FULL_IRET, PCB_FULL_IRET); ASSYM(PCB_DBREGS, PCB_DBREGS); ASSYM(PCB_32BIT, PCB_32BIT); +ASSYM(PCB_LNX_TLS1, PCB_LNX_TLS1); +ASSYM(PCB_LNX_TLS2, PCB_LNX_TLS2); +ASSYM(PCB_LNX_TLS3, PCB_LNX_TLS3); ASSYM(TSS_RSP0, offsetof(struct amd64tss, tss_rsp0)); @@ -260,6 +264,7 @@ ASSYM(PC_MDS_TMP, offsetof(struct pcpu, pc_mds_tmp)); ASSYM(PC_MDS_BUF, offsetof(struct pcpu, pc_mds_buf)); ASSYM(PC_MDS_BUF64, offsetof(struct pcpu, pc_mds_buf64)); +ASSYM(PC_GDT, offsetof(struct pcpu, pc_gdt)); ASSYM(LA_EOI, LAPIC_EOI * LAPIC_MEM_MUL); ASSYM(LA_ISR, LAPIC_ISR0 * LAPIC_MEM_MUL); @@ -274,6 +279,9 @@ ASSYM(TSSSEL, GSEL(GPROC0_SEL, SEL_KPL)); ASSYM(LDTSEL, GSEL(GUSERLDT_SEL, SEL_KPL)); ASSYM(SEL_RPL_MASK, SEL_RPL_MASK); +ASSYM(GLNX_TLS1_SEL, GLNX_TLS1_SEL); +ASSYM(GLNX_TLS2_SEL, GLNX_TLS2_SEL); +ASSYM(GLNX_TLS3_SEL, GLNX_TLS3_SEL); ASSYM(__FreeBSD_version, __FreeBSD_version); diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -471,6 +471,33 @@ .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, +[GLNX_TLS1_SEL] = { /* 13 Linux32 TLS slot */ + .ssd_base = 0x0, + .ssd_limit = 0x0, + .ssd_type = 0, + .ssd_dpl = 0, + .ssd_p = 0, + .ssd_long = 0, + .ssd_def32 = 0, + .ssd_gran = 0 }, +[GLNX_TLS2_SEL] = { /* 14 Linux32 TLS slot */ + .ssd_base = 0x0, + .ssd_limit = 0x0, + .ssd_type = 0, + .ssd_dpl = 0, + .ssd_p = 0, + .ssd_long = 0, + .ssd_def32 = 0, + .ssd_gran = 0 }, +[GLNX_TLS3_SEL] = { /* 15 Linux32 TLS slot */ + .ssd_base = 0x0, + .ssd_limit = 0x0, + .ssd_type = 0, + .ssd_dpl = 0, + .ssd_p = 0, + .ssd_long = 0, + .ssd_def32 = 0, + .ssd_gran = 0 }, }; _Static_assert(nitems(gdt_segs) == NGDT, "Stale NGDT"); diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h --- a/sys/amd64/include/pcb.h +++ b/sys/amd64/include/pcb.h @@ -84,6 +84,9 @@ #define PCB_KERNFPU_THR 0x0020 /* fpu_kern_thread() */ #define PCB_32BIT 0x0040 /* process has 32 bit context (segs etc) */ #define PCB_FPUNOSAVE 0x0080 /* no save area for current FPU ctx */ +#define PCB_LNX_TLS1 0x0100 +#define PCB_LNX_TLS2 0x0200 +#define PCB_LNX_TLS3 0x0400 uint16_t pcb_initial_fpucw; @@ -104,7 +107,8 @@ struct savefpu *pcb_save; - uint64_t pcb_pad[5]; + struct user_segment_descriptor pcb_lnx32_tls[3]; + uint64_t pcb_pad[2]; }; /* Per-CPU state saved during suspend and resume. */ diff --git a/sys/amd64/linux32/linux32_machdep.c b/sys/amd64/linux32/linux32_machdep.c --- a/sys/amd64/linux32/linux32_machdep.c +++ b/sys/amd64/linux32/linux32_machdep.c @@ -53,6 +53,8 @@ #include static void bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru); +static int linux_control_tls(struct thread *td2, void *desc, int *selp, + bool ignore_busy); struct l_old_select_argv { l_int nfds; @@ -266,28 +268,13 @@ } int -linux_set_cloned_tls(struct thread *td, void *desc) +linux_set_cloned_tls(struct thread *td2, void *desc) { - struct l_user_desc info; - struct pcb *pcb; - int error; - - error = copyin(desc, &info, sizeof(struct l_user_desc)); - if (error) { - linux_msg(td, "set_cloned_tls copyin info failed!"); - } else { - /* We might copy out the entry_number as GUGS32_SEL. */ - info.entry_number = GUGS32_SEL; - error = copyout(&info, desc, sizeof(struct l_user_desc)); - if (error) - linux_msg(td, "set_cloned_tls copyout info failed!"); - - pcb = td->td_pcb; - update_pcb_bases(pcb); - pcb->pcb_gsbase = (register_t)info.base_addr; - td->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL); - } + int error, sel; + error = linux_control_tls(td2, desc, &sel, true); + if (error == 0) + td2->td_frame->tf_gs = GSEL(sel, SEL_UPL); return (error); } @@ -462,68 +449,174 @@ return (error); } -int -linux_set_thread_area(struct thread *td, - struct linux_set_thread_area_args *args) +static int +linux_control_tls(struct thread *td2, void *desc, int *selp, bool ignore_busy) { struct l_user_desc info; + struct soft_segment_descriptor ssd; struct pcb *pcb; + struct user_segment_descriptor *seg; + u_int bit, bits, idx; int error; + bool clear; - error = copyin(args->desc, &info, sizeof(struct l_user_desc)); - if (error) + error = copyin(desc, &info, sizeof(struct l_user_desc)); + if (error != 0) return (error); - /* - * Semantics of Linux version: every thread in the system has array - * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. - * This syscall loads one of the selected TLS descriptors with a value - * and also loads GDT descriptors 6, 7 and 8 with the content of - * the per-thread descriptors. - * - * Semantics of FreeBSD version: I think we can ignore that Linux has - * three per-thread descriptors and use just the first one. - * The tls_array[] is used only in [gs]et_thread_area() syscalls and - * for loading the GDT descriptors. We use just one GDT descriptor - * for TLS, so we will load just one. - * - * XXX: This doesn't work when a user space process tries to use more - * than one TLS segment. Comment in the Linux source says wine might - * do this. - */ + clear = info.read_exec_only == 1 && info.seg_not_present == 1 && + info.base_addr == 0 && info.limit == 0 && info.seg_32bit == 0 && + info.contents == 0 && info.limit_in_pages == 0 && + info.useable == 0; - /* - * GLIBC reads current %gs and call set_thread_area() with it. - * We should let GUDATA_SEL and GUGS32_SEL proceed as well because - * we use these segments. - */ - switch (info.entry_number) { - case GUGS32_SEL: - case GUDATA_SEL: - case 6: + pcb = td2->td_pcb; + + idx = info.entry_number; + switch (idx) { + case GLNX_TLS1_SEL: + case 6: /* XXXKIB */ + bit = PCB_LNX_TLS1; + seg = &pcb->pcb_lnx32_tls[0]; + idx = GLNX_TLS1_SEL; + break; + case GLNX_TLS2_SEL: + bit = PCB_LNX_TLS2; + seg = &pcb->pcb_lnx32_tls[1]; + break; + case GLNX_TLS3_SEL: + bit = PCB_LNX_TLS3; + seg = &pcb->pcb_lnx32_tls[2]; + break; case -1: - info.entry_number = GUGS32_SEL; + bits = pcb->pcb_flags & (PCB_LNX_TLS1 | PCB_LNX_TLS2 | + PCB_LNX_TLS3); + if (__bitcount(bits) == 3 && !clear) { + if (!ignore_busy) + return (EINVAL); + bit = PCB_LNX_TLS1; + seg = &pcb->pcb_lnx32_tls[0]; + idx = GLNX_TLS1_SEL; + break; + } + if ((bits & PCB_LNX_TLS1) == 0) { + bit = PCB_LNX_TLS1; + seg = &pcb->pcb_lnx32_tls[0]; + idx = GLNX_TLS1_SEL; + } else if ((bits & PCB_LNX_TLS2) == 0) { + bit = PCB_LNX_TLS2; + seg = &pcb->pcb_lnx32_tls[1]; + idx = GLNX_TLS2_SEL; + } else { + MPASS((bits & PCB_LNX_TLS3) == 0); + bit = PCB_LNX_TLS3; + seg = &pcb->pcb_lnx32_tls[2]; + idx = GLNX_TLS3_SEL; + } break; default: return (EINVAL); } - /* - * We have to copy out the GDT entry we use. - * - * XXX: What if a user space program does not check the return value - * and tries to use 6, 7 or 8? - */ - error = copyout(&info, args->desc, sizeof(struct l_user_desc)); - if (error) + if (!clear) { + ssd.ssd_base = info.base_addr; + ssd.ssd_limit = info.limit; + ssd.ssd_gran = info.limit_in_pages; + ssd.ssd_type = 1 | (1 << 4); /* code/data, accessed */ + ssd.ssd_type |= (info.read_exec_only == 0) << 1; + ssd.ssd_type |= info.contents << 2; + ssd.ssd_dpl = SEL_UPL; + ssd.ssd_long = 0; + ssd.ssd_def32 = info.seg_32bit; + ssd.ssd_p = info.seg_not_present == 0; + ssdtosd(&ssd, seg); + seg->sd_xx = info.useable; + set_pcb_flags(pcb, bit); + info.entry_number = idx; + } else { + clear_pcb_flags(pcb, bit); + memset(seg, 0, sizeof(*seg)); + memset(&info, 0, sizeof(info)); + info.entry_number = idx; + } + if (td2 == curthread) { + critical_enter(); + memcpy((char *)PCPU_PTR(gdt) + idx * sizeof(*seg), seg, + sizeof(*seg)); + critical_exit(); + } + set_pcb_flags(pcb, PCB_FULL_IRET); + *selp = idx; + + error = copyout(&info, desc, sizeof(struct l_user_desc)); + return (error); +} + +int +linux_set_thread_area(struct thread *td, + struct linux_set_thread_area_args *args) +{ + int sel; + + return (linux_control_tls(td, args->desc, &sel, false)); +} + +int +linux_get_thread_area(struct thread *td, + struct linux_get_thread_area_args *args) +{ + struct l_user_desc info; + struct soft_segment_descriptor ssd; + struct pcb *pcb; + struct user_segment_descriptor *seg; + u_int bit, idx; + int error; + + error = copyin(args->desc, &info, sizeof(struct l_user_desc)); + if (error != 0) return (error); pcb = td->td_pcb; - update_pcb_bases(pcb); - pcb->pcb_gsbase = (register_t)info.base_addr; - update_gdt_gsbase(td, info.base_addr); - return (0); + idx = info.entry_number; + switch (idx) { + case GLNX_TLS1_SEL: + case 6: /* XXXKIB */ + bit = PCB_LNX_TLS1; + seg = &pcb->pcb_lnx32_tls[0]; + idx = GLNX_TLS1_SEL; + break; + case GLNX_TLS2_SEL: + case 7: + bit = PCB_LNX_TLS2; + seg = &pcb->pcb_lnx32_tls[1]; + idx = GLNX_TLS2_SEL; + break; + case GLNX_TLS3_SEL: + case 8: + bit = PCB_LNX_TLS3; + seg = &pcb->pcb_lnx32_tls[2]; + idx = GLNX_TLS3_SEL; + break; + default: + return (EINVAL); + } + + memset(&info, 0, sizeof(info)); + info.entry_number = idx; + if ((pcb->pcb_flags & bit) != 0) { + sdtossd(seg, &ssd); + info.base_addr = ssd.ssd_base; + info.limit = ssd.ssd_limit; + info.limit_in_pages = ssd.ssd_gran; + info.read_exec_only = (ssd.ssd_type & 0x2) >> 1; + info.contents = ssd.ssd_type >> 2; + info.seg_32bit = ssd.ssd_def32; + info.seg_not_present = ssd.ssd_p == 0; + info.useable = seg->sd_xx; + } + + error = copyout(&info, args->desc, sizeof(struct l_user_desc)); + return (error); } void diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c --- a/sys/amd64/linux32/linux32_sysvec.c +++ b/sys/amd64/linux32/linux32_sysvec.c @@ -572,6 +572,7 @@ { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; + struct user_segment_descriptor *sd; register_t saved_rflags; regs = td->td_frame; @@ -593,7 +594,7 @@ regs->tf_rip = imgp->entry_addr; regs->tf_rsp = stack; regs->tf_rflags = PSL_USER | saved_rflags; - regs->tf_gs = _ugssel; + regs->tf_gs = GSEL(GLNX_TLS1_SEL, SEL_UPL); regs->tf_fs = _ufssel; regs->tf_es = _udatasel; regs->tf_ds = _udatasel; @@ -602,6 +603,21 @@ regs->tf_cs = _ucode32sel; regs->tf_rbx = (register_t)imgp->ps_strings; + memset(&pcb->pcb_lnx32_tls[0], 0, sizeof(pcb->pcb_lnx32_tls)); + sd = &pcb->pcb_lnx32_tls[0]; + sd->sd_lolimit = 0xffff; + sd->sd_type = SDT_MEMRWA; + sd->sd_dpl = SEL_UPL; + sd->sd_p = 1; + sd->sd_hilimit = 0xf; + sd->sd_def32 = 1; + sd->sd_gran = 1; + critical_enter(); + memcpy((char *)PCPU_PTR(gdt) + GLNX_TLS1_SEL * + sizeof(struct user_segment_descriptor), &pcb->pcb_lnx32_tls[0], + sizeof(pcb->pcb_lnx32_tls)); + critical_exit(); + x86_clear_dbregs(pcb); fpstate_drop(td); diff --git a/sys/amd64/linux32/syscalls.master b/sys/amd64/linux32/syscalls.master --- a/sys/amd64/linux32/syscalls.master +++ b/sys/amd64/linux32/syscalls.master @@ -1346,7 +1346,11 @@ struct l_user_desc *desc ); } -244 AUE_NULL UNIMPL linux_get_thread_area +244 AUE_NULL STD { + int linux_get_thread_area( + struct l_user_desc *desc + ); + } 245 AUE_NULL UNIMPL linux_io_setup 246 AUE_NULL UNIMPL linux_io_destroy 247 AUE_NULL UNIMPL linux_io_getevents diff --git a/sys/x86/include/segments.h b/sys/x86/include/segments.h --- a/sys/x86/include/segments.h +++ b/sys/x86/include/segments.h @@ -267,7 +267,10 @@ /* slot 10 is second half of GPROC0_SEL */ #define GUSERLDT_SEL 11 /* LDT */ /* slot 12 is second half of GUSERLDT_SEL */ -#define NGDT 13 +#define GLNX_TLS1_SEL 13 +#define GLNX_TLS2_SEL 14 +#define GLNX_TLS3_SEL 15 +#define NGDT 16 #endif /* __i386__ */ #endif /* !_X86_SEGMENTS_H_ */