diff --git a/lib/libthr/Makefile b/lib/libthr/Makefile --- a/lib/libthr/Makefile +++ b/lib/libthr/Makefile @@ -44,6 +44,14 @@ CFLAGS+=-D_PTHREAD_FORCED_UNWIND .endif +.if ${MACHINE_ABI:Mlong64} +# Specify the number of elements in the per-thread pshared lock lookup +# cache (set it to zero to disable). This directly affects the size +# of struct pthread, so keep it small and prime for best results. +# +CFLAGS+=-D_PTHREAD_PSC_SIZE=11 +.endif + LDFLAGS+=-Wl,-znodelete VERSION_DEF=${SRCTOP}/lib/libc/Versions.def diff --git a/lib/libthr/thread/thr_init.c b/lib/libthr/thread/thr_init.c --- a/lib/libthr/thread/thr_init.c +++ b/lib/libthr/thread/thr_init.c @@ -340,9 +340,13 @@ PANIC("Can't allocate initial thread"); init_main_thread(curthread); } else { +#if (_PTHREAD_PSC_SIZE > 0) + /* Invalidate pshared lookup cache after a fork. */ + memset(curthread->psc_ht, 0, sizeof(curthread->psc_ht)); +#endif first = 0; } - + /* * Add the thread to the thread list queue. */ diff --git a/lib/libthr/thread/thr_private.h b/lib/libthr/thread/thr_private.h --- a/lib/libthr/thread/thr_private.h +++ b/lib/libthr/thread/thr_private.h @@ -361,6 +361,17 @@ void (*destructor)(void *); }; +#if (_PTHREAD_PSC_SIZE > 0) +/* + * Per-thread pshared lock lookup cache entry. + */ +struct pthread_psc_entry { + u_long htgen; + void *key; + void *val; +}; +#endif + /* * lwpid_t is 32bit but kernel thr API exports tid as long type * to preserve the ABI for M:N model in very early date (r131431). @@ -532,6 +543,13 @@ int unwind_disabled; #endif +#if (_PTHREAD_PSC_SIZE > 0) + /* + * Per-thread pshared lock lookup cache hash table. + */ + struct pthread_psc_entry psc_ht[_PTHREAD_PSC_SIZE]; +#endif + /* * Magic value to help recognize a valid thread structure * from an invalid one: diff --git a/lib/libthr/thread/thr_pshared.c b/lib/libthr/thread/thr_pshared.c --- a/lib/libthr/thread/thr_pshared.c +++ b/lib/libthr/thread/thr_pshared.c @@ -52,6 +52,59 @@ static struct urwlock pshared_lock = DEFAULT_URWLOCK; static int page_size; +#if (_PTHREAD_PSC_SIZE > 0) +/* + * A thread performing a lookup of a key AFTER that key has been + * removed from the pshared_hash[] hash table must never return + * a stale/invalid value from its lookup cache. To track this, + * we maintain a generation counter (pshared_htgen) which is + * invalidated each time an entry is removed. + * + * Each thread maintains a small cache of previously looked up keys, + * where each cache entry keeps a copy of the generation count valid + * at the time the entry was last updated. If, during a lookup, we + * find that the affected cache entry's generation count does not + * match pshared_htgen then the entry is potentially stale and must + * be disregarded. + * + * Note that any thread performing a lookup for a key running + * concurrently with another thread that is removing that key could + * find and return a stale value for that key, regardless of whether + * the pshared lock cache is enabled. In this case, the application + * is in error, and will likely either segfault or corrupt memory. + */ +static u_long pshared_htgen; + +static void +pshared_htgen_invalidate_begin(void) +{ + + pshared_htgen++; + atomic_thread_fence_rel(); +} + +static void +pshared_htgen_invalidate_end(void) +{ + + atomic_thread_fence_rel(); + pshared_htgen++; +} + +static u_long +pshared_htgen_read(void) +{ + + atomic_thread_fence_acq(); + return pshared_htgen; +} + +#else +#define pshared_htgen_invalidate_begin() +#define pshared_htgen_invalidate_end() +#define pshared_htgen_read() +#endif + void __thr_pshared_init(void) { @@ -116,7 +169,9 @@ h->val, NULL); if (error == 0) continue; + pshared_htgen_invalidate_begin(); LIST_REMOVE(h, link); + pshared_htgen_invalidate_end(); munmap(h->val, page_size); free(h); } @@ -125,17 +180,48 @@ } static void * -pshared_lookup(void *key) +pshared_lookup(struct pthread *curthread, void *key) { struct pshared_hash_head *hd; struct psh *h; + void *val; + +#if (_PTHREAD_PSC_SIZE > 0) + struct pthread_psc_entry *entry; + u_long htgen; + + /* + * The acquire+load here of pshared_htgen via pshared_htgen_read() + * synchronizes with the store+release in pshared_htgen_invalidate. + */ + entry = &curthread->psc_ht[(uintptr_t)key % _PTHREAD_PSC_SIZE]; + htgen = pshared_htgen_read(); + if (__predict_true((htgen & 1ul) == 0)) { + if (__predict_true(entry->htgen == htgen && entry->key == key)) + return (entry->val); + } +#endif + pshared_rlock(curthread); hd = &pshared_hash[PSHARED_KEY_HASH(key)]; + val = NULL; LIST_FOREACH(h, hd, link) { - if (h->key == key) - return (h->val); + if (h->key == key) { + val = h->val; + break; + } } - return (NULL); + pshared_unlock(curthread); + +#if (_PTHREAD_PSC_SIZE > 0) + if (val != NULL) { + entry->htgen = htgen; + entry->key = key; + entry->val = val; + } +#endif + + return (val); } static int @@ -195,7 +281,9 @@ hd = &pshared_hash[PSHARED_KEY_HASH(key)]; LIST_FOREACH(h, hd, link) { if (h->key == key) { + pshared_htgen_invalidate_begin(); LIST_REMOVE(h, link); + pshared_htgen_invalidate_end(); val = h->val; free(h); return (val); @@ -232,13 +320,11 @@ int fd, ins_done; curthread = _get_curthread(); - if (doalloc) { + if (__predict_false(doalloc)) { pshared_destroy(curthread, key); res = NULL; } else { - pshared_rlock(curthread); - res = pshared_lookup(key); - pshared_unlock(curthread); + res = pshared_lookup(curthread, key); if (res != NULL) return (res); }