diff --git a/lib/libthr/Makefile b/lib/libthr/Makefile --- a/lib/libthr/Makefile +++ b/lib/libthr/Makefile @@ -44,6 +44,14 @@ CFLAGS+=-D_PTHREAD_FORCED_UNWIND .endif +.if ${MACHINE_ABI:Mlong64} +# Specify the number of elements in the per-thread pshared lock lookup +# cache (set it to zero to disable). This directly affects the size +# of struct pthread, so keep it small and prime for best results. +# +CFLAGS+=-D_PTHREAD_PSC_SIZE=11 +.endif + LDFLAGS+=-Wl,-znodelete VERSION_DEF=${SRCTOP}/lib/libc/Versions.def diff --git a/lib/libthr/thread/thr_init.c b/lib/libthr/thread/thr_init.c --- a/lib/libthr/thread/thr_init.c +++ b/lib/libthr/thread/thr_init.c @@ -340,9 +340,13 @@ PANIC("Can't allocate initial thread"); init_main_thread(curthread); } else { +#if (_PTHREAD_PSC_SIZE > 0) + /* Invalidate pshared lookup cache after a fork. */ + memset(curthread->psc_ht, 0, sizeof(curthread->psc_ht)); +#endif first = 0; } - + /* * Add the thread to the thread list queue. */ diff --git a/lib/libthr/thread/thr_private.h b/lib/libthr/thread/thr_private.h --- a/lib/libthr/thread/thr_private.h +++ b/lib/libthr/thread/thr_private.h @@ -361,6 +361,17 @@ void (*destructor)(void *); }; +#if (_PTHREAD_PSC_SIZE > 0) +/* + * Per-thread pshared lock lookup cache entry. + */ +struct pthread_psc_entry { + u_long htgen; + void *key; + void *val; +}; +#endif + /* * lwpid_t is 32bit but kernel thr API exports tid as long type * to preserve the ABI for M:N model in very early date (r131431). @@ -532,6 +543,13 @@ int unwind_disabled; #endif +#if (_PTHREAD_PSC_SIZE > 0) + /* + * Per-thread pshared lock lookup cache hash table. + */ + struct pthread_psc_entry psc_ht[_PTHREAD_PSC_SIZE]; +#endif + /* * Magic value to help recognize a valid thread structure * from an invalid one: diff --git a/lib/libthr/thread/thr_pshared.c b/lib/libthr/thread/thr_pshared.c --- a/lib/libthr/thread/thr_pshared.c +++ b/lib/libthr/thread/thr_pshared.c @@ -52,6 +52,39 @@ static struct urwlock pshared_lock = DEFAULT_URWLOCK; static int page_size; +#if (_PTHREAD_PSC_SIZE > 0) +/* + * The pshared_hash[] hash table generation count is invalidated each + * time an entry is removed from the hash table. Threads performing + * a lookup of a key after that key has been removed must never return + * a stale/invalid value from the lookup cache. To ensure this, each + * remove is preceded by a call to PSHARED_HTGEN_INVALIDATE() which + * increments pshared_htgen twice while under the pshared write lock. + * The second increment employs store+release semantics to ensure that + * any other thread performing a load+acquire of pshared_htgen + * after the second increment is guaranteed to see at least the first + * increment, indicating that its private lookup cache is invalid. + * + * Note that any thread performing a lookup for a key running + * concurrently with another thread that is removing that key could + * find and return a stale value for that key regardless of whether + * the pshared lock cache is enabled. In this case the application + * is in error, and will likely either segfault or corrupt memory. + */ +static u_long pshared_htgen; + +_Static_assert(sizeof(pshared_htgen) * NBBY >= 64, + "pshared_htgen must be at least 64-bits to avoid overflow"); + +#define PSHARED_HTGEN_INVALIDATE() \ + do { \ + atomic_add_long(&pshared_htgen, 1); \ + atomic_add_rel_long(&pshared_htgen, 1); \ + } while (0) +#else +#define PSHARED_HTGEN_INVALIDATE() +#endif + void __thr_pshared_init(void) { @@ -116,6 +149,7 @@ h->val, NULL); if (error == 0) continue; + PSHARED_HTGEN_INVALIDATE(); LIST_REMOVE(h, link); munmap(h->val, page_size); free(h); @@ -125,17 +159,49 @@ } static void * -pshared_lookup(void *key) +pshared_lookup(struct pthread *curthread, void *key) { struct pshared_hash_head *hd; struct psh *h; + void *val; + +#if (_PTHREAD_PSC_SIZE > 0) + struct pthread_psc_entry *entry; + u_long htgen; + + /* + * The load+acquire here of pshared_htgen synchronizes with the + * store+release issued by PSHARED_HTGEN_INVALIDATE(). See the + * comment at top of file for more detail. + */ + entry = &curthread->psc_ht[(uintptr_t)key % _PTHREAD_PSC_SIZE]; + htgen = atomic_load_acq_long(&pshared_htgen); + if ((htgen & 1UL) == 0) { + if (__predict_true(entry->htgen == htgen && entry->key == key)) + return (entry->val); + } +#endif + pshared_rlock(curthread); hd = &pshared_hash[PSHARED_KEY_HASH(key)]; + val = NULL; LIST_FOREACH(h, hd, link) { - if (h->key == key) - return (h->val); + if (h->key == key) { + val = h->val; + break; + } } - return (NULL); + pshared_unlock(curthread); + +#if (_PTHREAD_PSC_SIZE > 0) + if (val != NULL) { + entry->htgen = htgen; + entry->key = key; + entry->val = val; + } +#endif + + return (val); } static int @@ -195,6 +261,7 @@ hd = &pshared_hash[PSHARED_KEY_HASH(key)]; LIST_FOREACH(h, hd, link) { if (h->key == key) { + PSHARED_HTGEN_INVALIDATE(); LIST_REMOVE(h, link); val = h->val; free(h); @@ -232,13 +299,11 @@ int fd, ins_done; curthread = _get_curthread(); - if (doalloc) { + if (__predict_false(doalloc)) { pshared_destroy(curthread, key); res = NULL; } else { - pshared_rlock(curthread); - res = pshared_lookup(key); - pshared_unlock(curthread); + res = pshared_lookup(curthread, key); if (res != NULL) return (res); }