diff --git a/lib/libthr/thread/thr_malloc.c b/lib/libthr/thread/thr_malloc.c --- a/lib/libthr/thread/thr_malloc.c +++ b/lib/libthr/thread/thr_malloc.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include "thr_private.h" @@ -134,6 +135,28 @@ return (res); } +void * +__thr_calloc_aligned_cacheline(size_t nitems, size_t nbytes) +{ + struct pthread *curthread; + void *res; + size_t cnt; + + cnt = nitems * nbytes; + if (nbytes != 0 && cnt / nbytes != nitems) { + /* size_t overflow. */ + return (NULL); + } + + curthread = _get_curthread(); + thr_malloc_lock(curthread); + res = __crt_aligned_alloc_offset(CACHE_LINE_SIZE, cnt, 0); + thr_malloc_unlock(curthread); + if (res != NULL) + memset(res, 0, cnt); + return (res); +} + void __thr_malloc_prefork(struct pthread *curthread) { diff --git a/lib/libthr/thread/thr_mutex.c b/lib/libthr/thread/thr_mutex.c --- a/lib/libthr/thread/thr_mutex.c +++ b/lib/libthr/thread/thr_mutex.c @@ -289,8 +289,8 @@ if (error != 0) return (error); } - if ((pmutex = (pthread_mutex_t)calloc_cb(1, - sizeof(struct pthread_mutex))) == NULL) + pmutex = calloc_cb(1, sizeof(struct pthread_mutex)); + if (pmutex == NULL) return (ENOMEM); mutex_init_body(pmutex, attr); *mutex = pmutex; @@ -306,10 +306,10 @@ if (*mutex == THR_MUTEX_INITIALIZER) ret = mutex_init(mutex, &_pthread_mutexattr_default, - __thr_calloc); + __thr_calloc_aligned_cacheline); else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER) ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default, - __thr_calloc); + __thr_calloc_aligned_cacheline); else ret = 0; THR_LOCK_RELEASE(thread, &_mutex_static_lock); @@ -393,7 +393,7 @@ (*mutex_attr)->m_pshared == PTHREAD_PROCESS_PRIVATE) { __thr_malloc_init(); return (mutex_init(mutex, mutex_attr ? *mutex_attr : NULL, - __thr_calloc)); + __thr_calloc_aligned_cacheline)); } pmtx = __thr_pshared_offpage(__DECONST(void *, mutex), 1); if (pmtx == NULL) diff --git a/lib/libthr/thread/thr_private.h b/lib/libthr/thread/thr_private.h --- a/lib/libthr/thread/thr_private.h +++ b/lib/libthr/thread/thr_private.h @@ -1020,6 +1020,7 @@ void *__thr_malloc(size_t nbytes); void *__thr_realloc(void *cp, size_t nbytes); void __thr_malloc_init(void); +void *__thr_calloc_aligned_cacheline(size_t nitmes, size_t nbytes); void __thr_malloc_prefork(struct pthread *curthread); void __thr_malloc_postfork(struct pthread *curthread); diff --git a/libexec/rtld-elf/rtld.h b/libexec/rtld-elf/rtld.h --- a/libexec/rtld-elf/rtld.h +++ b/libexec/rtld-elf/rtld.h @@ -361,8 +361,7 @@ void *xcalloc(size_t, size_t); void *xmalloc(size_t); char *xstrdup(const char *); -void *malloc_aligned(size_t size, size_t align, size_t offset); -void free_aligned(void *ptr); +void *xmalloc_aligned(size_t size, size_t align, size_t offset); extern Elf_Addr _GLOBAL_OFFSET_TABLE_[]; extern Elf_Sym sym_zero; /* For resolving undefined weak refs. */ extern bool ld_bind_not; diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c --- a/libexec/rtld-elf/rtld.c +++ b/libexec/rtld-elf/rtld.c @@ -5254,13 +5254,13 @@ tls_block_size += pre_size + tls_static_space - TLS_TCB_SIZE - post_size; /* Allocate whole TLS block */ - tls_block = malloc_aligned(tls_block_size, maxalign, 0); + tls_block = xmalloc_aligned(tls_block_size, maxalign, 0); tcb = (Elf_Addr **)(tls_block + pre_size + extra_size); if (oldtcb != NULL) { memcpy(tls_block, get_tls_block_ptr(oldtcb, tcbsize), tls_static_space); - free_aligned(get_tls_block_ptr(oldtcb, tcbsize)); + free(get_tls_block_ptr(oldtcb, tcbsize)); /* Adjust the DTV. */ dtv = tcb[0]; @@ -5324,7 +5324,7 @@ } } free(dtv); - free_aligned(get_tls_block_ptr(tcb, tcbsize)); + free(get_tls_block_ptr(tcb, tcbsize)); } #endif /* TLS_VARIANT_I */ @@ -5350,7 +5350,7 @@ size = roundup(tls_static_space, ralign) + roundup(tcbsize, ralign); assert(tcbsize >= 2*sizeof(Elf_Addr)); - tls = malloc_aligned(size, ralign, 0 /* XXX */); + tls = xmalloc_aligned(size, ralign, 0 /* XXX */); dtv = xcalloc(tls_max_index + 2, sizeof(Elf_Addr)); segbase = (Elf_Addr)(tls + roundup(tls_static_space, ralign)); @@ -5429,11 +5429,11 @@ for (i = 0; i < dtvsize; i++) { if (dtv[i + 2] != 0 && (dtv[i + 2] < tlsstart || dtv[i + 2] > tlsend)) { - free_aligned((void *)dtv[i + 2]); + free((void *)dtv[i + 2]); } } - free_aligned((void *)tlsstart); + free((void *)tlsstart); free((void *)dtv); } @@ -5470,7 +5470,7 @@ obj->tls_dynamic = true; - p = malloc_aligned(obj->tlssize, obj->tlsalign, obj->tlspoffset); + p = xmalloc_aligned(obj->tlssize, obj->tlsalign, obj->tlspoffset); memcpy(p, obj->tlsinit, obj->tlsinitsize); memset(p + obj->tlsinitsize, 0, obj->tlssize - obj->tlsinitsize); return (p); diff --git a/libexec/rtld-elf/rtld_malloc.h b/libexec/rtld-elf/rtld_malloc.h --- a/libexec/rtld-elf/rtld_malloc.h +++ b/libexec/rtld-elf/rtld_malloc.h @@ -32,6 +32,7 @@ #ifndef RTLD_MALLOC_H #define RTLD_MALLOC_H +void *__crt_aligned_alloc_offset(size_t align, size_t size, size_t offset); void *__crt_calloc(size_t num, size_t size); void __crt_free(void *cp); void *__crt_malloc(size_t nbytes); diff --git a/libexec/rtld-elf/rtld_malloc.c b/libexec/rtld-elf/rtld_malloc.c --- a/libexec/rtld-elf/rtld_malloc.c +++ b/libexec/rtld-elf/rtld_malloc.c @@ -75,8 +75,8 @@ union overhead { union overhead *ov_next; /* when free */ struct { - u_char ovu_magic; /* magic number */ - u_char ovu_index; /* bucket # */ + uint16_t ovu_index; /* bucket # */ + uint8_t ovu_magic; /* magic number */ } ovu; #define ov_magic ovu.ovu_magic #define ov_index ovu.ovu_index @@ -86,13 +86,15 @@ static int morepages(int n); #define MAGIC 0xef /* magic # on accounting info */ +#define AMAGIC 0xdf /* magic # for aligned alloc */ /* * nextf[i] is the pointer to the next free block of size * (FIRST_BUCKET_SIZE << i). The overhead information precedes the data * area returned to the user. */ -#define FIRST_BUCKET_SIZE 8 +#define LOW_BITS 3 +#define FIRST_BUCKET_SIZE (1U << LOW_BITS) #define NBUCKETS 30 static union overhead *nextf[NBUCKETS]; @@ -106,10 +108,10 @@ * increasing order. */ -static union overhead * +static void * cp2op(void *cp) { - return ((union overhead *)((caddr_t)cp - sizeof(union overhead))); + return (((caddr_t)cp - sizeof(union overhead))); } void * @@ -169,6 +171,28 @@ return (ret); } +void * +__crt_aligned_alloc_offset(size_t align, size_t size, size_t offset) +{ + void *mem, *ov; + union overhead ov1; + uintptr_t x; + + if (align < FIRST_BUCKET_SIZE) + align = FIRST_BUCKET_SIZE; + offset &= align - 1; + mem = __crt_malloc(size + align + offset + sizeof(union overhead)); + if (mem == NULL) + return (NULL); + x = roundup2((uintptr_t)mem + sizeof(union overhead), align); + x += offset; + ov = cp2op((void *)x); + ov1.ov_magic = AMAGIC; + ov1.ov_index = x - (uintptr_t)mem - sizeof(union overhead); + memcpy(ov, &ov1, sizeof(ov1)); + return ((void *)x); +} + /* * Allocate more memory to the indicated bucket. */ @@ -210,12 +234,16 @@ void __crt_free(void *cp) { + union overhead *op, op1; + void *opx; int size; - union overhead *op; if (cp == NULL) return; - op = cp2op(cp); + opx = cp2op(cp); + memcpy(&op1, opx, sizeof(op1)); + op = op1.ov_magic == AMAGIC ? (void *)((caddr_t)cp - op1.ov_index) : + opx; if (op->ov_magic != MAGIC) return; /* sanity */ size = op->ov_index; diff --git a/libexec/rtld-elf/xmalloc.c b/libexec/rtld-elf/xmalloc.c --- a/libexec/rtld-elf/xmalloc.c +++ b/libexec/rtld-elf/xmalloc.c @@ -75,34 +75,18 @@ } void * -malloc_aligned(size_t size, size_t align, size_t offset) +xmalloc_aligned(size_t size, size_t align, size_t offset) { - char *mem, *res; - uintptr_t x; + void *res; offset &= align - 1; if (align < sizeof(void *)) align = sizeof(void *); - mem = xmalloc(size + 3 * align + offset); - x = roundup((uintptr_t)mem + sizeof(void *), align); - x += offset; - res = (void *)x; - x -= sizeof(void *); - memcpy((void *)x, &mem, sizeof(mem)); + res = __crt_aligned_alloc_offset(align, size, offset); + if (res == NULL) { + rtld_fdputstr(STDERR_FILENO, "Out of memory\n"); + _exit(1); + } return (res); } - -void -free_aligned(void *ptr) -{ - void *mem; - uintptr_t x; - - if (ptr == NULL) - return; - x = (uintptr_t)ptr; - x -= sizeof(void *); - memcpy(&mem, (void *)x, sizeof(mem)); - free(mem); -}