diff --git a/lib/libthr/thread/thr_malloc.c b/lib/libthr/thread/thr_malloc.c --- a/lib/libthr/thread/thr_malloc.c +++ b/lib/libthr/thread/thr_malloc.c @@ -34,6 +34,7 @@ #include #include +#include #include #include "thr_private.h" @@ -136,6 +137,21 @@ return (res); } +void * +__thr_calloc_aligned_cacheline(size_t nitems, size_t nbytes) +{ + struct pthread *curthread; + void *res; + + curthread = _get_curthread(); + thr_malloc_lock(curthread); + res = __crt_aligned_alloc(CACHE_LINE_SIZE, nitems * nbytes); + thr_malloc_unlock(curthread); + if (res != NULL) + memset(res, 0, nbytes); + return (res); +} + void __thr_malloc_prefork(struct pthread *curthread) { diff --git a/lib/libthr/thread/thr_mutex.c b/lib/libthr/thread/thr_mutex.c --- a/lib/libthr/thread/thr_mutex.c +++ b/lib/libthr/thread/thr_mutex.c @@ -291,8 +291,8 @@ if (error != 0) return (error); } - if ((pmutex = (pthread_mutex_t) - calloc_cb(1, sizeof(struct pthread_mutex))) == NULL) + pmutex = calloc_cb(1, sizeof(struct pthread_mutex)); + if (pmutex == NULL) return (ENOMEM); mutex_init_body(pmutex, attr); *mutex = pmutex; @@ -308,10 +308,10 @@ if (*mutex == THR_MUTEX_INITIALIZER) ret = mutex_init(mutex, &_pthread_mutexattr_default, - __thr_calloc); + __thr_calloc_aligned_cacheline); else if (*mutex == THR_ADAPTIVE_MUTEX_INITIALIZER) ret = mutex_init(mutex, &_pthread_mutexattr_adaptive_default, - __thr_calloc); + __thr_calloc_aligned_cacheline); else ret = 0; THR_LOCK_RELEASE(thread, &_mutex_static_lock); @@ -395,7 +395,7 @@ (*mutex_attr)->m_pshared == PTHREAD_PROCESS_PRIVATE) { __thr_malloc_init(); return (mutex_init(mutex, mutex_attr ? *mutex_attr : NULL, - __thr_calloc)); + __thr_calloc_aligned_cacheline)); } pmtx = __thr_pshared_offpage(__DECONST(void *, mutex), 1); if (pmtx == NULL) diff --git a/lib/libthr/thread/thr_private.h b/lib/libthr/thread/thr_private.h --- a/lib/libthr/thread/thr_private.h +++ b/lib/libthr/thread/thr_private.h @@ -1022,6 +1022,7 @@ void *__thr_malloc(size_t nbytes); void *__thr_realloc(void *cp, size_t nbytes); void __thr_malloc_init(void); +void *__thr_calloc_aligned_cacheline(size_t nitmes, size_t nbytes); void __thr_malloc_prefork(struct pthread *curthread); void __thr_malloc_postfork(struct pthread *curthread); diff --git a/libexec/rtld-elf/rtld.h b/libexec/rtld-elf/rtld.h --- a/libexec/rtld-elf/rtld.h +++ b/libexec/rtld-elf/rtld.h @@ -419,9 +419,4 @@ void init_pltgot(Obj_Entry *); void allocate_initial_tls(Obj_Entry *); -void *__crt_calloc(size_t num, size_t size); -void __crt_free(void *cp); -void *__crt_malloc(size_t nbytes); -void *__crt_realloc(void *cp, size_t nbytes); - #endif /* } */ diff --git a/libexec/rtld-elf/rtld_malloc.h b/libexec/rtld-elf/rtld_malloc.h --- a/libexec/rtld-elf/rtld_malloc.h +++ b/libexec/rtld-elf/rtld_malloc.h @@ -34,6 +34,7 @@ #ifndef RTLD_MALLOC_H #define RTLD_MALLOC_H +void *__crt_aligned_alloc(size_t align, size_t size); void *__crt_calloc(size_t num, size_t size); void __crt_free(void *cp); void *__crt_malloc(size_t nbytes); diff --git a/libexec/rtld-elf/rtld_malloc.c b/libexec/rtld-elf/rtld_malloc.c --- a/libexec/rtld-elf/rtld_malloc.c +++ b/libexec/rtld-elf/rtld_malloc.c @@ -53,9 +53,12 @@ #include #include #include +#ifdef IN_RTLD #include "rtld.h" #include "rtld_printf.h" #include "rtld_paths.h" +#endif +#include "rtld_malloc.h" /* * Pre-allocate mmap'ed pages @@ -68,10 +71,6 @@ * contains a pointer to the next free block, and the bottom two bits must * be zero. When in use, the first byte is set to MAGIC, and the second * byte is the size index. The remaining bytes are for alignment. - * If range checking is enabled then a second word holds the size of the - * requested block, less 1, rounded up to a multiple of sizeof(RMAGIC). - * The order of elements is critical: ov_magic must overlay the low order - * bits of ov_next, and ov_magic can not be a valid ov_next bit pattern. */ union overhead { union overhead *ov_next; /* when free */ @@ -87,13 +86,15 @@ static int morepages(int n); #define MAGIC 0xef /* magic # on accounting info */ +#define AMAGIC 0xdf /* magic # for aligned alloc */ /* * nextf[i] is the pointer to the next free block of size * (FIRST_BUCKET_SIZE << i). The overhead information precedes the data * area returned to the user. */ -#define FIRST_BUCKET_SIZE 8 +#define LOW_BITS 3 +#define FIRST_BUCKET_SIZE (1U << LOW_BITS) #define NBUCKETS 30 static union overhead *nextf[NBUCKETS]; @@ -107,6 +108,12 @@ * increasing order. */ +static union overhead * +cp2op(void *cp) +{ + return ((union overhead *)((caddr_t)cp - sizeof(union overhead))); +} + void * __crt_malloc(size_t nbytes) { @@ -164,6 +171,25 @@ return (ret); } +void * +__crt_aligned_alloc(size_t align, size_t size) +{ + void *mem; + union overhead *ov; + uintptr_t x; + + if (align < FIRST_BUCKET_SIZE) + align = FIRST_BUCKET_SIZE; + mem = __crt_malloc(size + 2 * align); + if (mem == NULL) + return (NULL); + x = roundup((uintptr_t)mem + sizeof(union overhead), align); + ov = cp2op((void *)x); + ov->ov_magic = AMAGIC; + ov->ov_index = (x - (uintptr_t)mem) >> LOW_BITS; + return ((void *)x); +} + /* * Allocate more memory to the indicated bucket. */ @@ -210,7 +236,11 @@ if (cp == NULL) return; - op = (union overhead *)((caddr_t)cp - sizeof (union overhead)); + op = cp2op(cp); + if (op->ov_magic == AMAGIC) { + cp = (caddr_t)cp - (op->ov_index << LOW_BITS); + op = cp2op(cp); + } if (op->ov_magic != MAGIC) return; /* sanity */ size = op->ov_index; @@ -228,7 +258,7 @@ if (cp == NULL) return (__crt_malloc(nbytes)); - op = (union overhead *)((caddr_t)cp - sizeof (union overhead)); + op = cp2op(cp); if (op->ov_magic != MAGIC) return (NULL); /* Double-free or bad argument */ i = op->ov_index;