Index: head/sys/arm/arm/genassym.c =================================================================== --- head/sys/arm/arm/genassym.c +++ head/sys/arm/arm/genassym.c @@ -81,6 +81,9 @@ ASSYM(PCB_SP, offsetof(struct pcb, pcb_regs.sf_sp)); ASSYM(PCB_LR, offsetof(struct pcb, pcb_regs.sf_lr)); ASSYM(PCB_PC, offsetof(struct pcb, pcb_regs.sf_pc)); +#if __ARM_ARCH >= 6 +ASSYM(PCB_TPIDRURW, offsetof(struct pcb, pcb_regs.sf_tpidrurw)); +#endif ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); @@ -100,8 +103,8 @@ ASSYM(TD_PROC, offsetof(struct thread, td_proc)); ASSYM(TD_MD, offsetof(struct thread, td_md)); ASSYM(TD_LOCK, offsetof(struct thread, td_lock)); -ASSYM(MD_TP, offsetof(struct mdthread, md_tp)); #if __ARM_ARCH < 6 +ASSYM(MD_TP, offsetof(struct mdthread, md_tp)); ASSYM(MD_RAS_START, offsetof(struct mdthread, md_ras_start)); ASSYM(MD_RAS_END, offsetof(struct mdthread, md_ras_end)); #endif Index: head/sys/arm/arm/swtch-v6.S =================================================================== --- head/sys/arm/arm/swtch-v6.S +++ head/sys/arm/arm/swtch-v6.S @@ -291,6 +291,8 @@ ldr r3, [r0, #(TD_PCB)] add r3, #(PCB_R4) stmia r3, {r4-r12, sp, lr, pc} + mrc CP15_TPIDRURW(r4) + str r4, [r3, #(PCB_TPIDRURW - PCB_R4)] #ifdef INVARIANTS cmp r1, #0 /* new thread? */ @@ -437,9 +439,6 @@ cmp r3, r6 beq 1b #endif - /* Set the new tls */ - ldr r0, [r11, #(TD_MD + MD_TP)] - mcr CP15_TPIDRURO(r0) /* write tls thread reg 2 */ /* We have a new curthread now so make a note it */ str r11, [r8, #PC_CURTHREAD] @@ -452,7 +451,14 @@ * Restore all saved registers and return. Note that some saved * registers can be changed when either cpu_fork(), cpu_copy_thread(), * cpu_fork_kthread_handler(), or makectx() was called. - */ + * + * The value of TPIDRURW is also written into TPIDRURO, as + * userspace still uses TPIDRURO, modifying it through + * sysarch(ARM_SET_TP, addr). + */ + ldr r3, [r7, #PCB_TPIDRURW] + mcr CP15_TPIDRURW(r3) /* write tls thread reg 2 */ + mcr CP15_TPIDRURO(r3) /* write tls thread reg 3 */ add r3, r7, #PCB_R4 ldmia r3, {r4-r12, sp, pc} Index: head/sys/arm/arm/sys_machdep.c =================================================================== --- head/sys/arm/arm/sys_machdep.c +++ head/sys/arm/arm/sys_machdep.c @@ -166,10 +166,10 @@ arm32_set_tp(struct thread *td, void *args) { - td->td_md.md_tp = (register_t)args; #if __ARM_ARCH >= 6 set_tls(args); #else + td->td_md.md_tp = (register_t)args; *(register_t *)ARM_TP_ADDRESS = (register_t)args; #endif return (0); @@ -180,7 +180,7 @@ { #if __ARM_ARCH >= 6 - td->td_retval[0] = td->td_md.md_tp; + td->td_retval[0] = (register_t)get_tls(); #else td->td_retval[0] = *(register_t *)ARM_TP_ADDRESS; #endif Index: head/sys/arm/arm/vm_machdep.c =================================================================== --- head/sys/arm/arm/vm_machdep.c +++ head/sys/arm/arm/vm_machdep.c @@ -82,8 +82,8 @@ * struct switchframe and trapframe must both be a multiple of 8 * for correct stack alignment. */ -CTASSERT(sizeof(struct switchframe) == 48); -CTASSERT(sizeof(struct trapframe) == 80); +_Static_assert((sizeof(struct switchframe) % 8) == 0, "Bad alignment"); +_Static_assert((sizeof(struct trapframe) % 8) == 0, "Bad alignment"); uint32_t initial_fpscr = VFPSCR_DN | VFPSCR_FZ; @@ -134,6 +134,9 @@ pcb2->pcb_regs.sf_r5 = (register_t)td2; pcb2->pcb_regs.sf_lr = (register_t)fork_trampoline; pcb2->pcb_regs.sf_sp = STACKALIGN(td2->td_frame); +#if __ARM_ARCH >= 6 + pcb2->pcb_regs.sf_tpidrurw = (register_t)get_tls(); +#endif pcb2->pcb_vfpcpu = -1; pcb2->pcb_vfpstate.fpscr = initial_fpscr; @@ -147,9 +150,7 @@ /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_cspr = PSR_SVC32_MODE; -#if __ARM_ARCH >= 6 - td2->td_md.md_tp = td1->td_md.md_tp; -#else +#if __ARM_ARCH < 6 td2->td_md.md_tp = *(register_t *)ARM_TP_ADDRESS; #endif } @@ -272,16 +273,18 @@ cpu_set_user_tls(struct thread *td, void *tls_base) { - td->td_md.md_tp = (register_t)tls_base; - if (td == curthread) { - critical_enter(); #if __ARM_ARCH >= 6 + td->td_pcb->pcb_regs.sf_tpidrurw = (register_t)tls_base; + if (td == curthread) set_tls(tls_base); #else + td->td_md.md_tp = (register_t)tls_base; + if (td == curthread) { + critical_enter(); *(register_t *)ARM_TP_ADDRESS = (register_t)tls_base; -#endif critical_exit(); } +#endif return (0); } Index: head/sys/arm/include/frame.h =================================================================== --- head/sys/arm/include/frame.h +++ head/sys/arm/include/frame.h @@ -117,6 +117,10 @@ register_t sf_sp; register_t sf_lr; register_t sf_pc; +#if __ARM_ARCH >= 6 + register_t sf_tpidrurw; + register_t sf_spare0; +#endif }; Index: head/sys/arm/include/pcpu.h =================================================================== --- head/sys/arm/include/pcpu.h +++ head/sys/arm/include/pcpu.h @@ -103,7 +103,8 @@ { void *tls; - __asm __volatile("mrc p15, 0, %0, c13, c0, 3" : "=r" (tls)); + /* TPIDRURW contains the authoritative value. */ + __asm __volatile("mrc p15, 0, %0, c13, c0, 2" : "=r" (tls)); return (tls); } @@ -111,7 +112,15 @@ set_tls(void *tls) { - __asm __volatile("mcr p15, 0, %0, c13, c0, 3" : : "r" (tls)); + /* + * Update both TPIDRURW and TPIDRURO. TPIDRURW needs to be written + * first to ensure that a context switch between the two writes will + * still give the desired result of updating both. + */ + __asm __volatile( + "mcr p15, 0, %0, c13, c0, 2\n" + "mcr p15, 0, %0, c13, c0, 3\n" + : : "r" (tls)); } #define curthread get_curthread() Index: head/sys/arm/include/proc.h =================================================================== --- head/sys/arm/include/proc.h +++ head/sys/arm/include/proc.h @@ -53,8 +53,8 @@ int md_ptrace_addr; int md_ptrace_instr_alt; int md_ptrace_addr_alt; - register_t md_tp; #if __ARM_ARCH < 6 + register_t md_tp; void *md_ras_start; void *md_ras_end; #endif