Index: head/libexec/rtld-elf/aarch64/rtld_machdep.h =================================================================== --- head/libexec/rtld-elf/aarch64/rtld_machdep.h +++ head/libexec/rtld-elf/aarch64/rtld_machdep.h @@ -69,6 +69,8 @@ #define calculate_tls_offset(prev_offset, prev_size, size, align) \ round(prev_offset + prev_size, align) #define calculate_tls_end(off, size) ((off) + (size)) +#define calculate_tls_post_size(align) \ + round(TLS_TCB_SIZE, align) - TLS_TCB_SIZE #define TLS_TCB_SIZE 16 typedef struct { Index: head/libexec/rtld-elf/arm/rtld_machdep.h =================================================================== --- head/libexec/rtld-elf/arm/rtld_machdep.h +++ head/libexec/rtld-elf/arm/rtld_machdep.h @@ -69,6 +69,8 @@ #define calculate_tls_offset(prev_offset, prev_size, size, align) \ round(prev_offset + prev_size, align) #define calculate_tls_end(off, size) ((off) + (size)) +#define calculate_tls_post_size(align) \ + round(TLS_TCB_SIZE, align) - TLS_TCB_SIZE extern void *__tls_get_addr(tls_index *ti); Index: head/libexec/rtld-elf/mips/rtld_machdep.h =================================================================== --- head/libexec/rtld-elf/mips/rtld_machdep.h +++ head/libexec/rtld-elf/mips/rtld_machdep.h @@ -64,10 +64,11 @@ #define round(size, align) \ (((size) + (align) - 1) & ~((align) - 1)) #define calculate_first_tls_offset(size, align) \ - round(TLS_TCB_SIZE, align) + TLS_TCB_SIZE #define calculate_tls_offset(prev_offset, prev_size, size, align) \ round(prev_offset + prev_size, align) #define calculate_tls_end(off, size) ((off) + (size)) +#define calculate_tls_post_size(align) 0 extern void *__tls_get_addr(tls_index *ti); Index: head/libexec/rtld-elf/powerpc/rtld_machdep.h =================================================================== --- head/libexec/rtld-elf/powerpc/rtld_machdep.h +++ head/libexec/rtld-elf/powerpc/rtld_machdep.h @@ -74,10 +74,11 @@ #define round(size, align) \ (((size) + (align) - 1) & ~((align) - 1)) #define calculate_first_tls_offset(size, align) \ - round(8, align) + TLS_TCB_SIZE #define calculate_tls_offset(prev_offset, prev_size, size, align) \ round(prev_offset + prev_size, align) #define calculate_tls_end(off, size) ((off) + (size)) +#define calculate_tls_post_size(align) 0 typedef struct { unsigned long ti_module; Index: head/libexec/rtld-elf/powerpc64/rtld_machdep.h =================================================================== --- head/libexec/rtld-elf/powerpc64/rtld_machdep.h +++ head/libexec/rtld-elf/powerpc64/rtld_machdep.h @@ -66,10 +66,11 @@ #define round(size, align) \ (((size) + (align) - 1) & ~((align) - 1)) #define calculate_first_tls_offset(size, align) \ - round(16, align) + TLS_TCB_SIZE #define calculate_tls_offset(prev_offset, prev_size, size, align) \ round(prev_offset + prev_size, align) #define calculate_tls_end(off, size) ((off) + (size)) +#define calculate_tls_post_size(align) 0 typedef struct { unsigned long ti_module; Index: head/libexec/rtld-elf/riscv/rtld_machdep.h =================================================================== --- head/libexec/rtld-elf/riscv/rtld_machdep.h +++ head/libexec/rtld-elf/riscv/rtld_machdep.h @@ -89,10 +89,11 @@ #define round(size, align) \ (((size) + (align) - 1) & ~((align) - 1)) #define calculate_first_tls_offset(size, align) \ - round(16, align) + TLS_TCB_SIZE #define calculate_tls_offset(prev_offset, prev_size, size, align) \ round(prev_offset + prev_size, align) #define calculate_tls_end(off, size) ((off) + (size)) +#define calculate_tls_post_size(align) 0 typedef struct { unsigned long ti_module; Index: head/libexec/rtld-elf/rtld.c =================================================================== --- head/libexec/rtld-elf/rtld.c +++ head/libexec/rtld-elf/rtld.c @@ -4693,47 +4693,87 @@ defined(__powerpc__) || defined(__riscv) /* + * Return pointer to allocated TLS block + */ +static void * +get_tls_block_ptr(void *tcb, size_t tcbsize) +{ + size_t extra_size, post_size, pre_size, tls_block_size; + size_t tls_init_align; + + tls_init_align = MAX(obj_main->tlsalign, 1); + + /* Compute fragments sizes. */ + extra_size = tcbsize - TLS_TCB_SIZE; + post_size = calculate_tls_post_size(tls_init_align); + tls_block_size = tcbsize + post_size; + pre_size = roundup2(tls_block_size, tls_init_align) - tls_block_size; + + return ((char *)tcb - pre_size - extra_size); +} + +/* * Allocate Static TLS using the Variant I method. + * + * For details on the layout, see lib/libc/gen/tls.c. + * + * NB: rtld's tls_static_space variable includes TLS_TCB_SIZE and post_size as + * it is based on tls_last_offset, and TLS offsets here are really TCB + * offsets, whereas libc's tls_static_space is just the executable's static + * TLS segment. */ void * allocate_tls(Obj_Entry *objs, void *oldtcb, size_t tcbsize, size_t tcbalign) { Obj_Entry *obj; - char *tcb; - Elf_Addr **tls; - Elf_Addr *dtv; + char *tls_block; + Elf_Addr *dtv, **tcb; Elf_Addr addr; int i; + size_t extra_size, maxalign, post_size, pre_size, tls_block_size; + size_t tls_init_align; if (oldtcb != NULL && tcbsize == TLS_TCB_SIZE) return (oldtcb); assert(tcbsize >= TLS_TCB_SIZE); - tcb = xcalloc(1, tls_static_space - TLS_TCB_SIZE + tcbsize); - tls = (Elf_Addr **)(tcb + tcbsize - TLS_TCB_SIZE); + maxalign = MAX(tcbalign, tls_static_max_align); + tls_init_align = MAX(obj_main->tlsalign, 1); + /* Compute fragmets sizes. */ + extra_size = tcbsize - TLS_TCB_SIZE; + post_size = calculate_tls_post_size(tls_init_align); + tls_block_size = tcbsize + post_size; + pre_size = roundup2(tls_block_size, tls_init_align) - tls_block_size; + tls_block_size += pre_size + tls_static_space - TLS_TCB_SIZE - post_size; + + /* Allocate whole TLS block */ + tls_block = malloc_aligned(tls_block_size, maxalign); + tcb = (Elf_Addr **)(tls_block + pre_size + extra_size); + if (oldtcb != NULL) { - memcpy(tls, oldtcb, tls_static_space); - free(oldtcb); + memcpy(tls_block, get_tls_block_ptr(oldtcb, tcbsize), + tls_static_space); + free_aligned(get_tls_block_ptr(oldtcb, tcbsize)); /* Adjust the DTV. */ - dtv = tls[0]; + dtv = tcb[0]; for (i = 0; i < dtv[1]; i++) { if (dtv[i+2] >= (Elf_Addr)oldtcb && dtv[i+2] < (Elf_Addr)oldtcb + tls_static_space) { - dtv[i+2] = dtv[i+2] - (Elf_Addr)oldtcb + (Elf_Addr)tls; + dtv[i+2] = dtv[i+2] - (Elf_Addr)oldtcb + (Elf_Addr)tcb; } } } else { dtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr)); - tls[0] = dtv; + tcb[0] = dtv; dtv[0] = tls_dtv_generation; dtv[1] = tls_max_index; for (obj = globallist_curr(objs); obj != NULL; obj = globallist_next(obj)) { if (obj->tlsoffset > 0) { - addr = (Elf_Addr)tls + obj->tlsoffset; + addr = (Elf_Addr)tcb + obj->tlsoffset; if (obj->tlsinitsize > 0) memcpy((void*) addr, obj->tlsinit, obj->tlsinitsize); if (obj->tlssize > obj->tlsinitsize) @@ -4752,14 +4792,19 @@ { Elf_Addr *dtv; Elf_Addr tlsstart, tlsend; - int dtvsize, i; + size_t post_size; + size_t dtvsize, i, tls_init_align; assert(tcbsize >= TLS_TCB_SIZE); + tls_init_align = MAX(obj_main->tlsalign, 1); - tlsstart = (Elf_Addr)tcb + tcbsize - TLS_TCB_SIZE; - tlsend = tlsstart + tls_static_space; + /* Compute fragments sizes. */ + post_size = calculate_tls_post_size(tls_init_align); - dtv = *(Elf_Addr **)tlsstart; + tlsstart = (Elf_Addr)tcb + TLS_TCB_SIZE + post_size; + tlsend = (Elf_Addr)tcb + tls_static_space; + + dtv = *(Elf_Addr **)tcb; dtvsize = dtv[1]; for (i = 0; i < dtvsize; i++) { if (dtv[i+2] && (dtv[i+2] < tlsstart || dtv[i+2] >= tlsend)) { @@ -4767,7 +4812,7 @@ } } free(dtv); - free(tcb); + free_aligned(get_tls_block_ptr(tcb, tcbsize)); } #endif