Index: head/libexec/rtld-elf/aarch64/reloc.c =================================================================== --- head/libexec/rtld-elf/aarch64/reloc.c +++ head/libexec/rtld-elf/aarch64/reloc.c @@ -49,7 +49,8 @@ * This is not the correct prototype, but we only need it for * a function pointer to a simple asm function. */ -void *_rtld_tlsdesc(void *); +void *_rtld_tlsdesc_static(void *); +void *_rtld_tlsdesc_undef(void *); void *_rtld_tlsdesc_dynamic(void *); void _exit(int); @@ -122,78 +123,58 @@ } struct tls_data { - int64_t index; - Obj_Entry *obj; - const Elf_Rela *rela; + Elf_Addr dtv_gen; + int tls_index; + Elf_Addr tls_offs; }; -int64_t rtld_tlsdesc_handle(struct tls_data *tlsdesc, int flags); - -static struct tls_data * -reloc_tlsdesc_alloc(Obj_Entry *obj, const Elf_Rela *rela) +static Elf_Addr +reloc_tlsdesc_alloc(int tlsindex, Elf_Addr tlsoffs) { struct tls_data *tlsdesc; tlsdesc = xmalloc(sizeof(struct tls_data)); - tlsdesc->index = -1; - tlsdesc->obj = obj; - tlsdesc->rela = rela; + tlsdesc->dtv_gen = tls_dtv_generation; + tlsdesc->tls_index = tlsindex; + tlsdesc->tls_offs = tlsoffs; - return (tlsdesc); + return ((Elf_Addr)tlsdesc); } -/* - * Look up the symbol to find its tls index - */ -static int64_t -rtld_tlsdesc_handle_locked(struct tls_data *tlsdesc, int flags, - RtldLockState *lockstate) +static void +reloc_tlsdesc(const Obj_Entry *obj, const Elf_Rela *rela, Elf_Addr *where, + int flags, RtldLockState *lockstate) { - const Elf_Rela *rela; const Elf_Sym *def; const Obj_Entry *defobj; - Obj_Entry *obj; + Elf_Addr offs; - rela = tlsdesc->rela; - obj = tlsdesc->obj; - def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj, flags, NULL, - lockstate); - if (def == NULL) - rtld_die(); + offs = 0; + if (ELF_R_SYM(rela->r_info) != 0) { + def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj, flags, + NULL, lockstate); + if (def == NULL) + rtld_die(); + offs = def->st_value; + obj = defobj; + if (def->st_shndx == SHN_UNDEF) { + /* Weak undefined thread variable */ + where[0] = (Elf_Addr)_rtld_tlsdesc_undef; + where[1] = rela->r_addend; + return; + } + } + offs += rela->r_addend; - tlsdesc->index = defobj->tlsoffset + def->st_value + rela->r_addend; - - return (tlsdesc->index); -} - -int64_t -rtld_tlsdesc_handle(struct tls_data *tlsdesc, int flags) -{ - RtldLockState lockstate; - - /* We have already found the index, return it */ - if (tlsdesc->index >= 0) - return (tlsdesc->index); - - wlock_acquire(rtld_bind_lock, &lockstate); - /* tlsdesc->index may have been set by another thread */ - if (tlsdesc->index == -1) - rtld_tlsdesc_handle_locked(tlsdesc, flags, &lockstate); - lock_release(rtld_bind_lock, &lockstate); - - return (tlsdesc->index); -} - -static void -reloc_tlsdesc(Obj_Entry *obj, const Elf_Rela *rela, Elf_Addr *where) -{ - if (ELF_R_SYM(rela->r_info) == 0) { - where[0] = (Elf_Addr)_rtld_tlsdesc; - where[1] = obj->tlsoffset + rela->r_addend; + if (obj->tlsoffset != 0) { + /* Variable is in initialy allocated TLS segment */ + where[0] = (Elf_Addr)_rtld_tlsdesc_static; + where[1] = obj->tlsoffset + offs; } else { + /* TLS offest is unknown at load time, use dynamic resolving */ where[0] = (Elf_Addr)_rtld_tlsdesc_dynamic; - where[1] = (Elf_Addr)reloc_tlsdesc_alloc(obj, rela); + where[1] = reloc_tlsdesc_alloc(obj->tlsindex, offs); } } @@ -201,7 +182,7 @@ * Process the PLT relocations. */ int -reloc_plt(Obj_Entry *obj) +reloc_plt(Obj_Entry *obj, int flags, RtldLockState *lockstate) { const Elf_Rela *relalim; const Elf_Rela *rela; @@ -218,7 +199,8 @@ *where += (Elf_Addr)obj->relocbase; break; case R_AARCH64_TLSDESC: - reloc_tlsdesc(obj, rela, where); + reloc_tlsdesc(obj, rela, where, SYMLOOK_IN_PLT | flags, + lockstate); break; case R_AARCH64_IRELATIVE: obj->irelative = true; @@ -458,7 +440,7 @@ } break; case R_AARCH64_TLSDESC: - reloc_tlsdesc(obj, rela, where); + reloc_tlsdesc(obj, rela, where, flags, lockstate); break; case R_AARCH64_TLS_TPREL64: /* @@ -478,9 +460,25 @@ return (-1); } } - - *where = def->st_value + rela->r_addend + - defobj->tlsoffset; + /* Test weak undefined thread variable */ + if (def->st_shndx != SHN_UNDEF) { + *where = def->st_value + rela->r_addend + + defobj->tlsoffset; + } else { + /* + * XXX We should relocate undefined thread + * weak variable address to NULL, but how? + * Can we return error in this situation? + */ + rtld_printf("%s: Unable to relocate undefined " + "weak TLS variable\n", obj->path); +#if 0 + return (-1); +#else + *where = def->st_value + rela->r_addend + + defobj->tlsoffset; +#endif + } break; /* Index: head/libexec/rtld-elf/aarch64/rtld_start.S =================================================================== --- head/libexec/rtld-elf/aarch64/rtld_start.S +++ head/libexec/rtld-elf/aarch64/rtld_start.S @@ -118,55 +118,145 @@ END(_rtld_bind_start) /* - * uint64_t _rtld_tlsdesc(struct tlsdesc *); + * struct rel_tlsdesc { + * uint64_t resolver_fnc; + * uint64_t resolver_arg; * - * struct tlsdesc { - * uint64_t ptr; - * uint64_t data; - * }; * - * Returns the data. + * uint64_t _rtld_tlsdesc_static(struct rel_tlsdesc *); + * + * Resolver function for TLS symbols resolved at load time */ -ENTRY(_rtld_tlsdesc) +ENTRY(_rtld_tlsdesc_static) + .cfi_startproc ldr x0, [x0, #8] ret -END(_rtld_tlsdesc) + .cfi_endproc +END(_rtld_tlsdesc_static) /* - * uint64_t _rtld_tlsdesc_dynamic(struct tlsdesc *); + * uint64_t _rtld_tlsdesc_undef(void); * - * TODO: We could lookup the saved index here to skip saving the entire stack. + * Resolver function for weak and undefined TLS symbols */ +ENTRY(_rtld_tlsdesc_undef) + .cfi_startproc + str x1, [sp, #-16]! + .cfi_adjust_cfa_offset 16 + + mrs x1, tpidr_el0 + ldr x0, [x0, #8] + sub x0, x0, x1 + + ldr x1, [sp], #16 + .cfi_adjust_cfa_offset -16 + .cfi_endproc + ret +END(_rtld_tlsdesc_undef) + +/* + * uint64_t _rtld_tlsdesc_dynamic(struct rel_tlsdesc *); + * + * Resolver function for TLS symbols from dlopen() + */ ENTRY(_rtld_tlsdesc_dynamic) - /* Store any registers we may use in rtld_tlsdesc_handle */ - stp x29, x30, [sp, #-(10 * 16)]! + .cfi_startproc + + /* Save registers used in fast path */ + stp x1, x2, [sp, #(-2 * 16)]! + stp x3, x4, [sp, #(1 * 16)] + .cfi_adjust_cfa_offset 2 * 16 + .cfi_rel_offset x1, 0 + .cfi_rel_offset x2, 8 + .cfi_rel_offset x3, 16 + .cfi_rel_offset x4, 24 + + /* Test fastpath - inlined version of tls_get_addr_common(). */ + ldr x1, [x0, #8] /* tlsdesc ptr */ + mrs x4, tpidr_el0 + ldr x0, [x4] /* DTV pointer */ + ldr x2, [x0] /* dtv[0] (generation count) */ + ldr x3, [x1] /* tlsdec->dtv_gen */ + cmp x2, x3 + b.ne 1f /* dtv[0] != tlsdec->dtv_gen */ + + ldr w2, [x1, #8] /* tlsdec->tls_index */ + add w2, w2, #1 + ldr x3, [x0, w2, sxtw #3] /* dtv[tlsdesc->tls_index + 1] */ + cbz x3, 1f + + /* Return (dtv[tlsdesc->tls_index + 1] + tlsdesc->tls_offs - tp) */ + ldr x2, [x1, #16] /* tlsdec->tls_offs */ + add x2, x2, x3 + sub x0, x2, x4 + /* Restore registers and return */ + ldp x3, x4, [sp, #(1 * 16)] + ldp x1, x2, [sp], #(2 * 16) + .cfi_adjust_cfa_offset -2 * 16 + ret + + /* + * Slow path + * return( + * tls_get_addr_common(tp, tlsdesc->tls_index, tlsdesc->tls_offs)); + * + */ +1: + /* Save all interger registers */ + stp x29, x30, [sp, #-(8 * 16)]! + .cfi_adjust_cfa_offset 8 * 16 + .cfi_rel_offset x29, 0 + .cfi_rel_offset x30, 8 + mov x29, sp - stp x1, x2, [sp, #(1 * 16)] - stp x3, x4, [sp, #(2 * 16)] - stp x5, x6, [sp, #(3 * 16)] - stp x7, x8, [sp, #(4 * 16)] - stp x9, x10, [sp, #(5 * 16)] - stp x11, x12, [sp, #(6 * 16)] - stp x13, x14, [sp, #(7 * 16)] - stp x15, x16, [sp, #(8 * 16)] - stp x17, x18, [sp, #(9 * 16)] + stp x5, x6, [sp, #(1 * 16)] + stp x7, x8, [sp, #(2 * 16)] + stp x9, x10, [sp, #(3 * 16)] + stp x11, x12, [sp, #(4 * 16)] + stp x13, x14, [sp, #(5 * 16)] + stp x15, x16, [sp, #(6 * 16)] + stp x17, x18, [sp, #(7 * 16)] + .cfi_rel_offset x5, 16 + .cfi_rel_offset x6, 24 + .cfi_rel_offset x7, 32 + .cfi_rel_offset x8, 40 + .cfi_rel_offset x9, 48 + .cfi_rel_offset x10, 56 + .cfi_rel_offset x11, 64 + .cfi_rel_offset x12, 72 + .cfi_rel_offset x13, 80 + .cfi_rel_offset x14, 88 + .cfi_rel_offset x15, 96 + .cfi_rel_offset x16, 104 + .cfi_rel_offset x17, 112 + .cfi_rel_offset x18, 120 /* Find the tls offset */ - ldr x0, [x0, #8] - mov x1, #1 - bl rtld_tlsdesc_handle + mov x0, x4 /* tp */ + mov x3, x1 /* tlsdesc ptr */ + ldr w1, [x3, #8] /* tlsdec->tls_index */ + ldr x2, [x3, #16] /* tlsdec->tls_offs */ + bl tls_get_addr_common + mrs x1, tpidr_el0 + sub x0, x0, x1 - /* Restore the registers */ - ldp x17, x18, [sp, #(9 * 16)] - ldp x15, x16, [sp, #(8 * 16)] - ldp x13, x14, [sp, #(7 * 16)] - ldp x11, x12, [sp, #(6 * 16)] - ldp x9, x10, [sp, #(5 * 16)] - ldp x7, x8, [sp, #(4 * 16)] - ldp x5, x6, [sp, #(3 * 16)] - ldp x3, x4, [sp, #(2 * 16)] - ldp x1, x2, [sp, #(1 * 16)] - ldp x29, x30, [sp], #(10 * 16) + /* Restore slow patch registers */ + ldp x17, x18, [sp, #(7 * 16)] + ldp x15, x16, [sp, #(6 * 16)] + ldp x13, x14, [sp, #(5 * 16)] + ldp x11, x12, [sp, #(4 * 16)] + ldp x9, x10, [sp, #(3 * 16)] + ldp x7, x8, [sp, #(2 * 16)] + ldp x5, x6, [sp, #(1 * 16)] + ldp x29, x30, [sp], #(8 * 16) + .cfi_adjust_cfa_offset -8 * 16 + .cfi_restore x29 + .cfi_restore x30 + /* Restore fast path registers and return */ + ldp x3, x4, [sp, #16] + ldp x1, x2, [sp], #(2 * 16) + .cfi_adjust_cfa_offset -2 * 16 + .cfi_endproc ret END(_rtld_tlsdesc_dynamic) Index: head/libexec/rtld-elf/amd64/reloc.c =================================================================== --- head/libexec/rtld-elf/amd64/reloc.c +++ head/libexec/rtld-elf/amd64/reloc.c @@ -323,7 +323,7 @@ /* Process the PLT relocations. */ int -reloc_plt(Obj_Entry *obj) +reloc_plt(Obj_Entry *obj, int flags __unused, RtldLockState *lockstate __unused) { const Elf_Rela *relalim; const Elf_Rela *rela; Index: head/libexec/rtld-elf/arm/reloc.c =================================================================== --- head/libexec/rtld-elf/arm/reloc.c +++ head/libexec/rtld-elf/arm/reloc.c @@ -389,7 +389,7 @@ * * Process the PLT relocations. * */ int -reloc_plt(Obj_Entry *obj) +reloc_plt(Obj_Entry *obj, int flags __unused, RtldLockState *lockstate __unused) { const Elf_Rel *rellim; const Elf_Rel *rel; Index: head/libexec/rtld-elf/i386/reloc.c =================================================================== --- head/libexec/rtld-elf/i386/reloc.c +++ head/libexec/rtld-elf/i386/reloc.c @@ -274,7 +274,7 @@ /* Process the PLT relocations. */ int -reloc_plt(Obj_Entry *obj) +reloc_plt(Obj_Entry *obj, int flags __unused, RtldLockState *lockstate __unused) { const Elf_Rel *rellim; const Elf_Rel *rel; Index: head/libexec/rtld-elf/mips/reloc.c =================================================================== --- head/libexec/rtld-elf/mips/reloc.c +++ head/libexec/rtld-elf/mips/reloc.c @@ -652,7 +652,7 @@ * Process the PLT relocations. */ int -reloc_plt(Obj_Entry *obj) +reloc_plt(Obj_Entry *obj, int flags __unused, RtldLockState *lockstate __unused) { const Elf_Rel *rellim; const Elf_Rel *rel; Index: head/libexec/rtld-elf/powerpc/reloc.c =================================================================== --- head/libexec/rtld-elf/powerpc/reloc.c +++ head/libexec/rtld-elf/powerpc/reloc.c @@ -402,7 +402,7 @@ * Process the PLT relocations. */ int -reloc_plt(Obj_Entry *obj) +reloc_plt(Obj_Entry *obj, int flags __unused, RtldLockState *lockstate __unused) { const Elf_Rela *relalim; const Elf_Rela *rela; Index: head/libexec/rtld-elf/powerpc64/reloc.c =================================================================== --- head/libexec/rtld-elf/powerpc64/reloc.c +++ head/libexec/rtld-elf/powerpc64/reloc.c @@ -376,7 +376,7 @@ * Process the PLT relocations. */ int -reloc_plt(Obj_Entry *obj) +reloc_plt(Obj_Entry *obj, int flags __unused, RtldLockState *lockstate __unused) { const Elf_Rela *relalim; const Elf_Rela *rela; Index: head/libexec/rtld-elf/riscv/reloc.c =================================================================== --- head/libexec/rtld-elf/riscv/reloc.c +++ head/libexec/rtld-elf/riscv/reloc.c @@ -145,7 +145,7 @@ * Process the PLT relocations. */ int -reloc_plt(Obj_Entry *obj) +reloc_plt(Obj_Entry *obj, int flags __unused, RtldLockState *lockstate __unused) { const Elf_Rela *relalim; const Elf_Rela *rela; Index: head/libexec/rtld-elf/rtld.h =================================================================== --- head/libexec/rtld-elf/rtld.h +++ head/libexec/rtld-elf/rtld.h @@ -400,7 +400,7 @@ int do_copy_relocations(Obj_Entry *); int reloc_non_plt(Obj_Entry *, Obj_Entry *, int flags, struct Struct_RtldLockState *); -int reloc_plt(Obj_Entry *); +int reloc_plt(Obj_Entry *, int flags, struct Struct_RtldLockState *); int reloc_jmpslots(Obj_Entry *, int flags, struct Struct_RtldLockState *); int reloc_iresolve(Obj_Entry *, struct Struct_RtldLockState *); int reloc_gnu_ifunc(Obj_Entry *, int flags, struct Struct_RtldLockState *); Index: head/libexec/rtld-elf/rtld.c =================================================================== --- head/libexec/rtld-elf/rtld.c +++ head/libexec/rtld-elf/rtld.c @@ -2890,7 +2890,7 @@ init_pltgot(obj); /* Process the PLT relocations. */ - if (reloc_plt(obj) == -1) + if (reloc_plt(obj, flags, lockstate) == -1) return (-1); /* Relocate the jump slots if we are doing immediate binding. */ if ((obj->bind_now || bind_now) && reloc_jmpslots(obj, flags, Index: head/libexec/rtld-elf/sparc64/reloc.c =================================================================== --- head/libexec/rtld-elf/sparc64/reloc.c +++ head/libexec/rtld-elf/sparc64/reloc.c @@ -487,7 +487,8 @@ } int -reloc_plt(Obj_Entry *obj __unused) +reloc_plt(Obj_Entry *obj __unused, int flags __unused, + RtldLockState *lockstate __unused) { #if 0 const Obj_Entry *defobj;