diff --git a/lib/libthr/thread/thr_malloc.c b/lib/libthr/thread/thr_malloc.c --- a/lib/libthr/thread/thr_malloc.c +++ b/lib/libthr/thread/thr_malloc.c @@ -34,6 +34,7 @@ #include #include +#include #include #include "thr_private.h" @@ -136,6 +137,28 @@ return (res); } +void * +__thr_calloc_aligned_cacheline(size_t nitems, size_t nbytes) +{ + struct pthread *curthread; + void *res; + size_t cnt; + + cnt = nitems * nbytes; + if (nbytes != 0 && cnt / nbytes != nitems) { + /* size_t overflow. */ + return (NULL); + } + + curthread = _get_curthread(); + thr_malloc_lock(curthread); + res = __crt_aligned_alloc(CACHE_LINE_SIZE, cnt); + thr_malloc_unlock(curthread); + if (res != NULL) + memset(res, 0, cnt); + return (res); +} + void __thr_malloc_prefork(struct pthread *curthread) { diff --git a/lib/libthr/thread/thr_mutex.c b/lib/libthr/thread/thr_mutex.c --- a/lib/libthr/thread/thr_mutex.c +++ b/lib/libthr/thread/thr_mutex.c @@ -291,8 +291,8 @@ if (error != 0) return (error); } - if ((pmutex = (pthread_mutex_t)calloc_cb(1, - sizeof(struct pthread_mutex))) == NULL) + pmutex = calloc_cb(1, sizeof(struct pthread_mutex)); + if (pmutex == NULL) return (ENOMEM); mutex_init_body(pmutex, attr); *mutex = pmutex; @@ -308,10 +308,10 @@ if (*mutex == THR_MUTEX_INITIALIZER) ret = mutex_init(mutex, &_pthread_mutexattr_default, - __thr_calloc); + __thr_calloc_aligned_cacheline); else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER) ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default, - __thr_calloc); + __thr_calloc_aligned_cacheline); else ret = 0; THR_LOCK_RELEASE(thread, &_mutex_static_lock); @@ -395,7 +395,7 @@ (*mutex_attr)->m_pshared == PTHREAD_PROCESS_PRIVATE) { __thr_malloc_init(); return (mutex_init(mutex, mutex_attr ? *mutex_attr : NULL, - __thr_calloc)); + __thr_calloc_aligned_cacheline)); } pmtx = __thr_pshared_offpage(__DECONST(void *, mutex), 1); if (pmtx == NULL) diff --git a/lib/libthr/thread/thr_private.h b/lib/libthr/thread/thr_private.h --- a/lib/libthr/thread/thr_private.h +++ b/lib/libthr/thread/thr_private.h @@ -1022,6 +1022,7 @@ void *__thr_malloc(size_t nbytes); void *__thr_realloc(void *cp, size_t nbytes); void __thr_malloc_init(void); +void *__thr_calloc_aligned_cacheline(size_t nitmes, size_t nbytes); void __thr_malloc_prefork(struct pthread *curthread); void __thr_malloc_postfork(struct pthread *curthread); diff --git a/libexec/rtld-elf/rtld_malloc.h b/libexec/rtld-elf/rtld_malloc.h --- a/libexec/rtld-elf/rtld_malloc.h +++ b/libexec/rtld-elf/rtld_malloc.h @@ -34,6 +34,7 @@ #ifndef RTLD_MALLOC_H #define RTLD_MALLOC_H +void *__crt_aligned_alloc(size_t align, size_t size); void *__crt_calloc(size_t num, size_t size); void __crt_free(void *cp); void *__crt_malloc(size_t nbytes); diff --git a/libexec/rtld-elf/rtld_malloc.c b/libexec/rtld-elf/rtld_malloc.c --- a/libexec/rtld-elf/rtld_malloc.c +++ b/libexec/rtld-elf/rtld_malloc.c @@ -75,8 +75,8 @@ union overhead { union overhead *ov_next; /* when free */ struct { - u_char ovu_magic; /* magic number */ - u_char ovu_index; /* bucket # */ + uint16_t ovu_index; /* bucket # */ + uint8_t ovu_magic; /* magic number */ } ovu; #define ov_magic ovu.ovu_magic #define ov_index ovu.ovu_index @@ -86,13 +86,15 @@ static int morepages(int n); #define MAGIC 0xef /* magic # on accounting info */ +#define AMAGIC 0xdf /* magic # for aligned alloc */ /* * nextf[i] is the pointer to the next free block of size * (FIRST_BUCKET_SIZE << i). The overhead information precedes the data * area returned to the user. */ -#define FIRST_BUCKET_SIZE 8 +#define LOW_BITS 3 +#define FIRST_BUCKET_SIZE (1U << LOW_BITS) #define NBUCKETS 30 static union overhead *nextf[NBUCKETS]; @@ -169,6 +171,26 @@ return (ret); } +void * +__crt_aligned_alloc(size_t align, size_t size) +{ + void *mem; + union overhead *ov; + uintptr_t x; + + if (align < FIRST_BUCKET_SIZE) + align = FIRST_BUCKET_SIZE; + mem = __crt_malloc(size + align + sizeof(union overhead)); + if (mem == NULL) + return (NULL); + x = roundup((uintptr_t)mem + sizeof(union overhead), align); + ov = cp2op((void *)x); + ov->ov_magic = AMAGIC; + ov->ov_index = (x - (uintptr_t)mem - sizeof(union overhead)) >> + LOW_BITS; + return ((void *)x); +} + /* * Allocate more memory to the indicated bucket. */ @@ -216,6 +238,8 @@ if (cp == NULL) return; op = cp2op(cp); + if (op->ov_magic == AMAGIC) + op = (void *)((caddr_t)cp - (op->ov_index << LOW_BITS)); if (op->ov_magic != MAGIC) return; /* sanity */ size = op->ov_index;