Changeset View
Standalone View
libexec/rtld-elf/rtld.c
Show First 20 Lines • Show All 1,440 Lines • ▼ Show 20 Lines | case PT_DYNAMIC: | ||||
obj->dynamic = (const Elf_Dyn *)(ph->p_vaddr + obj->relocbase); | obj->dynamic = (const Elf_Dyn *)(ph->p_vaddr + obj->relocbase); | ||||
break; | break; | ||||
case PT_TLS: | case PT_TLS: | ||||
obj->tlsindex = 1; | obj->tlsindex = 1; | ||||
obj->tlssize = ph->p_memsz; | obj->tlssize = ph->p_memsz; | ||||
obj->tlsalign = ph->p_align; | obj->tlsalign = ph->p_align; | ||||
obj->tlsinitsize = ph->p_filesz; | obj->tlsinitsize = ph->p_filesz; | ||||
obj->tlsinit = (void*)(ph->p_vaddr + obj->relocbase); | obj->tlsinit = (void*)(ph->p_vaddr + obj->relocbase); | ||||
obj->tlspoffset = ph->p_offset; | |||||
bdragon: As per the inline patch I just dumped into my last comment: Need to set obj->tlspoffset = ph… | |||||
break; | break; | ||||
case PT_GNU_STACK: | case PT_GNU_STACK: | ||||
obj->stack_flags = ph->p_flags; | obj->stack_flags = ph->p_flags; | ||||
break; | break; | ||||
case PT_GNU_RELRO: | case PT_GNU_RELRO: | ||||
obj->relro_page = obj->relocbase + trunc_page(ph->p_vaddr); | obj->relro_page = obj->relocbase + trunc_page(ph->p_vaddr); | ||||
▲ Show 20 Lines • Show All 3,339 Lines • ▼ Show 20 Lines | |||||
allocate_tls(Obj_Entry *objs, void *oldtcb, size_t tcbsize, size_t tcbalign) | allocate_tls(Obj_Entry *objs, void *oldtcb, size_t tcbsize, size_t tcbalign) | ||||
{ | { | ||||
Obj_Entry *obj; | Obj_Entry *obj; | ||||
char *tls_block; | char *tls_block; | ||||
Elf_Addr *dtv, **tcb; | Elf_Addr *dtv, **tcb; | ||||
Elf_Addr addr; | Elf_Addr addr; | ||||
Elf_Addr i; | Elf_Addr i; | ||||
size_t extra_size, maxalign, post_size, pre_size, tls_block_size; | size_t extra_size, maxalign, post_size, pre_size, tls_block_size; | ||||
size_t tls_init_align; | size_t tls_init_align, tls_init_offset; | ||||
if (oldtcb != NULL && tcbsize == TLS_TCB_SIZE) | if (oldtcb != NULL && tcbsize == TLS_TCB_SIZE) | ||||
return (oldtcb); | return (oldtcb); | ||||
assert(tcbsize >= TLS_TCB_SIZE); | assert(tcbsize >= TLS_TCB_SIZE); | ||||
maxalign = MAX(tcbalign, tls_static_max_align); | maxalign = MAX(tcbalign, tls_static_max_align); | ||||
tls_init_align = MAX(obj_main->tlsalign, 1); | tls_init_align = MAX(obj_main->tlsalign, 1); | ||||
/* Compute fragmets sizes. */ | /* Compute fragmets sizes. */ | ||||
extra_size = tcbsize - TLS_TCB_SIZE; | extra_size = tcbsize - TLS_TCB_SIZE; | ||||
post_size = calculate_tls_post_size(tls_init_align); | post_size = calculate_tls_post_size(tls_init_align); | ||||
tls_block_size = tcbsize + post_size; | tls_block_size = tcbsize + post_size; | ||||
pre_size = roundup2(tls_block_size, tls_init_align) - tls_block_size; | pre_size = roundup2(tls_block_size, tls_init_align) - tls_block_size; | ||||
tls_block_size += pre_size + tls_static_space - TLS_TCB_SIZE - post_size; | tls_block_size += pre_size + tls_static_space - TLS_TCB_SIZE - post_size; | ||||
/* Allocate whole TLS block */ | /* Allocate whole TLS block */ | ||||
tls_block = malloc_aligned(tls_block_size, maxalign); | tls_block = malloc_aligned(tls_block_size, maxalign, 0); | ||||
Not Done Inline Actionsmissing param on Variant I. bdragon: missing param on Variant I. | |||||
tcb = (Elf_Addr **)(tls_block + pre_size + extra_size); | tcb = (Elf_Addr **)(tls_block + pre_size + extra_size); | ||||
if (oldtcb != NULL) { | if (oldtcb != NULL) { | ||||
memcpy(tls_block, get_tls_block_ptr(oldtcb, tcbsize), | memcpy(tls_block, get_tls_block_ptr(oldtcb, tcbsize), | ||||
tls_static_space); | tls_static_space); | ||||
free_aligned(get_tls_block_ptr(oldtcb, tcbsize)); | free_aligned(get_tls_block_ptr(oldtcb, tcbsize)); | ||||
/* Adjust the DTV. */ | /* Adjust the DTV. */ | ||||
dtv = tcb[0]; | dtv = tcb[0]; | ||||
for (i = 0; i < dtv[1]; i++) { | for (i = 0; i < dtv[1]; i++) { | ||||
if (dtv[i+2] >= (Elf_Addr)oldtcb && | if (dtv[i+2] >= (Elf_Addr)oldtcb && | ||||
dtv[i+2] < (Elf_Addr)oldtcb + tls_static_space) { | dtv[i+2] < (Elf_Addr)oldtcb + tls_static_space) { | ||||
dtv[i+2] = dtv[i+2] - (Elf_Addr)oldtcb + (Elf_Addr)tcb; | dtv[i+2] = dtv[i+2] - (Elf_Addr)oldtcb + (Elf_Addr)tcb; | ||||
} | } | ||||
} | } | ||||
} else { | } else { | ||||
dtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr)); | dtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr)); | ||||
tcb[0] = dtv; | tcb[0] = dtv; | ||||
dtv[0] = tls_dtv_generation; | dtv[0] = tls_dtv_generation; | ||||
dtv[1] = tls_max_index; | dtv[1] = tls_max_index; | ||||
for (obj = globallist_curr(objs); obj != NULL; | for (obj = globallist_curr(objs); obj != NULL; | ||||
obj = globallist_next(obj)) { | obj = globallist_next(obj)) { | ||||
Not Done Inline ActionsI *think* the problem I'm having is that this bit needs to compensate for the difference between the file offset and the first legal address according to the padding, so that it copies the preset variables at the correct offset, instead of copying them to the beginning of the TLS memory. bdragon: I *think* the problem I'm having is that this bit needs to compensate for the difference… | |||||
Not Done Inline ActionsExample, thinking out loud: Broken: header: TLS off 0x0000000000000cd0 vaddr 0x0000000010020cd0 paddr 0x0000000010020cd0 align 2**6 filesz 0x0000000000000004 memsz 0x0000000000000030 flags r-- binary: 00000cc0 4e 80 04 20 00 00 00 00 00 02 02 50 4b ff ff c4 |N.. .......PK...| 00000cd0 [00 00 b6 12]00 00 00 00 ff ff ff ff ff ff ff ff |................| 00000ce0 00 00 00 00 00 00 00 00 ff ff ff ff ff ff ff ff |................| 00000cf0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| Working: header: TLS off 0x0000000000000c40 vaddr 0x0000000010010c40 paddr 0x0000000010010c40 align 2**6 filesz 0x0000000000000004 memsz 0x0000000000000004 flags r-- binary: 00000c10 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| * 00000c40 [00 00 b6 12]00 00 00 00 ff ff ff ff ff ff ff ff |................| 00000c50 00 00 00 00 00 00 00 00 ff ff ff ff ff ff ff ff |................| 00000c60 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| 00000c70 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 01 |................| In the broken case, the binary has 0x10 offset built in to reference the thread variable, because it expected the init to start at the difference between off (0xcd0) and the base-according-to-alignment (0xcc0). But since it was copying to the start of the area instead of an offset into it, the variable is copied 0x10 too low. So I guess another memset is needed to zero the bytes between the base and the offset here, and the memcpy should write starting after that. bdragon: Example, thinking out loud:
Broken:
```
header:
TLS off 0x0000000000000cd0 vaddr… | |||||
Not Done Inline ActionsSomething like this seems to work for my test case. (remote editing, sorry in advance about the patch copypasted from a terminal:) Index: local-src/libexec/rtld-elf/rtld.c =================================================================== --- local-src.orig/libexec/rtld-elf/rtld.c Index: local-src/libexec/rtld-elf/rtld.c =================================================================== --- local-src.orig/libexec/rtld-elf/rtld.c +++ local-src/libexec/rtld-elf/rtld.c @@ -1447,6 +1447,7 @@ digest_phdr(const Elf_Phdr *phdr, int ph obj->tlsalign = ph->p_align; obj->tlsinitsize = ph->p_filesz; obj->tlsinit = (void*)(ph->p_vaddr + obj->relocbase); + obj->tlspoffset = ph->p_offset; break; case PT_GNU_STACK: @@ -4802,7 +4803,7 @@ allocate_tls(Obj_Entry *objs, void *oldt Elf_Addr addr; Elf_Addr i; size_t extra_size, maxalign, post_size, pre_size, tls_block_size; - size_t tls_init_align; + size_t tls_init_align, tls_init_offset; if (oldtcb != NULL && tcbsize == TLS_TCB_SIZE) return (oldtcb); @@ -4843,13 +4844,17 @@ allocate_tls(Obj_Entry *objs, void *oldt for (obj = globallist_curr(objs); obj != NULL; obj = globallist_next(obj)) { + tls_init_offset = obj->tlspoffset & (obj->tlsalign - 1); + if (tls_init_offset > 0) + memset((void*) addr, 0, tls_init_offset); if (obj->tlsoffset > 0) { addr = (Elf_Addr)tcb + obj->tlsoffset; if (obj->tlsinitsize > 0) - memcpy((void*) addr, obj->tlsinit, obj->tlsinitsize); + memcpy((void*)(addr + tls_init_offset), obj->tlsinit, obj->tlsinitsize); + if (obj->tlssize > obj->tlsinitsize) - memset((void*)(addr + obj->tlsinitsize), 0, - obj->tlssize - obj->tlsinitsize); + memset((void*)(addr + tls_init_offset + obj->tlsinitsize), 0, + obj->tlssize - obj->tlsinitsize - tls_init_offset); dtv[obj->tlsindex + 1] = addr; } } bdragon: Something like this seems to work for my test case. (remote editing, sorry in advance about the… | |||||
tls_init_offset = obj->tlspoffset & (obj->tlsalign - 1); | |||||
if (tls_init_offset > 0) | |||||
memset((void*) addr, 0, tls_init_offset); | |||||
if (obj->tlsoffset > 0) { | if (obj->tlsoffset > 0) { | ||||
addr = (Elf_Addr)tcb + obj->tlsoffset; | addr = (Elf_Addr)tcb + obj->tlsoffset; | ||||
if (obj->tlsinitsize > 0) | if (obj->tlsinitsize > 0) { | ||||
memcpy((void*) addr, obj->tlsinit, obj->tlsinitsize); | memcpy((void *)(addr + tls_init_offset), obj->tlsinit, | ||||
if (obj->tlssize > obj->tlsinitsize) | obj->tlsinitsize); | ||||
memset((void*)(addr + obj->tlsinitsize), 0, | } | ||||
obj->tlssize - obj->tlsinitsize); | if (obj->tlssize > obj->tlsinitsize) { | ||||
memset((void *)(addr + tls_init_offset + obj->tlsinitsize), | |||||
0, obj->tlssize - obj->tlsinitsize - tls_init_offset); | |||||
} | |||||
dtv[obj->tlsindex + 1] = addr; | dtv[obj->tlsindex + 1] = addr; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
return (tcb); | return (tcb); | ||||
} | } | ||||
void | void | ||||
free_tls(void *tcb, size_t tcbsize, size_t tcbalign __unused) | free_tls(void *tcb, size_t tcbsize, size_t tcbalign __unused) | ||||
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines | allocate_tls(Obj_Entry *objs, void *oldtls, size_t tcbsize, size_t tcbalign) | ||||
size_t i; | size_t i; | ||||
ralign = tcbalign; | ralign = tcbalign; | ||||
if (tls_static_max_align > ralign) | if (tls_static_max_align > ralign) | ||||
ralign = tls_static_max_align; | ralign = tls_static_max_align; | ||||
size = round(tls_static_space, ralign) + round(tcbsize, ralign); | size = round(tls_static_space, ralign) + round(tcbsize, ralign); | ||||
assert(tcbsize >= 2*sizeof(Elf_Addr)); | assert(tcbsize >= 2*sizeof(Elf_Addr)); | ||||
tls = malloc_aligned(size, ralign); | tls = malloc_aligned(size, ralign, 0 /* XXX */); | ||||
dtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr)); | dtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr)); | ||||
segbase = (Elf_Addr)(tls + round(tls_static_space, ralign)); | segbase = (Elf_Addr)(tls + round(tls_static_space, ralign)); | ||||
((Elf_Addr*)segbase)[0] = segbase; | ((Elf_Addr*)segbase)[0] = segbase; | ||||
((Elf_Addr*)segbase)[1] = (Elf_Addr) dtv; | ((Elf_Addr*)segbase)[1] = (Elf_Addr) dtv; | ||||
dtv[0] = tls_dtv_generation; | dtv[0] = tls_dtv_generation; | ||||
dtv[1] = tls_max_index; | dtv[1] = tls_max_index; | ||||
▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines | |||||
#endif | #endif | ||||
/* | /* | ||||
* Allocate TLS block for module with given index. | * Allocate TLS block for module with given index. | ||||
*/ | */ | ||||
void * | void * | ||||
allocate_module_tls(int index) | allocate_module_tls(int index) | ||||
{ | { | ||||
Obj_Entry* obj; | Obj_Entry *obj; | ||||
char* p; | char *p; | ||||
TAILQ_FOREACH(obj, &obj_list, next) { | TAILQ_FOREACH(obj, &obj_list, next) { | ||||
if (obj->marker) | if (obj->marker) | ||||
continue; | continue; | ||||
if (obj->tlsindex == index) | if (obj->tlsindex == index) | ||||
break; | break; | ||||
} | } | ||||
if (!obj) { | if (obj == NULL) { | ||||
_rtld_error("Can't find module with TLS index %d", index); | _rtld_error("Can't find module with TLS index %d", index); | ||||
rtld_die(); | rtld_die(); | ||||
} | } | ||||
p = malloc_aligned(obj->tlssize, obj->tlsalign); | p = malloc_aligned(obj->tlssize, obj->tlsalign, obj->tlspoffset); | ||||
bdragonUnsubmitted Not Done Inline ActionsThis might be the bit I am hunting for. bdragon: This might be the bit I am hunting for.
I suspect this should be obj->tlspoffset & (obj… | |||||
bdragonUnsubmitted Done Inline ActionsNope, it's not, as malloc_aligned already takes care of masking it off. bdragon: Nope, it's not, as malloc_aligned already takes care of masking it off. | |||||
memcpy(p, obj->tlsinit, obj->tlsinitsize); | memcpy(p, obj->tlsinit, obj->tlsinitsize); | ||||
memset(p + obj->tlsinitsize, 0, obj->tlssize - obj->tlsinitsize); | memset(p + obj->tlsinitsize, 0, obj->tlssize - obj->tlsinitsize); | ||||
return (p); | |||||
return p; | |||||
} | } | ||||
bool | bool | ||||
allocate_tls_offset(Obj_Entry *obj) | allocate_tls_offset(Obj_Entry *obj) | ||||
{ | { | ||||
size_t off; | size_t off; | ||||
if (obj->tls_done) | if (obj->tls_done) | ||||
▲ Show 20 Lines • Show All 676 Lines • Show Last 20 Lines |
As per the inline patch I just dumped into my last comment: Need to set obj->tlspoffset = ph->p_offset; here as well to handle an unaligned tls offset in the main program.