Index: sys/kern/init_main.c =================================================================== --- sys/kern/init_main.c +++ sys/kern/init_main.c @@ -541,7 +541,10 @@ /* End hack. creds get properly set later with thread_cow_get_proc */ curthread->td_ucred = NULL; newcred->cr_prison = &prison0; + newcred->cr_users++; /* avoid assertion failure */ proc_set_cred_init(p, newcred); + newcred->cr_users--; + crfree(newcred); #ifdef AUDIT audit_cred_kproc0(newcred); #endif @@ -810,8 +813,8 @@ #endif proc_set_cred(initproc, newcred); td = FIRST_THREAD_IN_PROC(initproc); - crfree(td->td_ucred); - td->td_ucred = crhold(initproc->p_ucred); + crcowfree(td); + td->td_ucred = crcowget(initproc->p_ucred); PROC_UNLOCK(initproc); sx_xunlock(&proctree_lock); crfree(oldcred); Index: sys/kern/kern_exit.c =================================================================== --- sys/kern/kern_exit.c +++ sys/kern/kern_exit.c @@ -952,8 +952,7 @@ /* * Free credentials, arguments, and sigacts. */ - crfree(p->p_ucred); - proc_set_cred(p, NULL); + proc_unset_cred(p); pargs_drop(p->p_args); p->p_args = NULL; sigacts_free(p->p_sigacts); Index: sys/kern/kern_fork.c =================================================================== --- sys/kern/kern_fork.c +++ sys/kern/kern_fork.c @@ -969,7 +969,7 @@ * XXX: This is ugly; when we copy resource usage, we need to bump * per-cred resource counters. */ - proc_set_cred_init(newproc, crhold(td->td_ucred)); + proc_set_cred_init(newproc, td->td_ucred); /* * Initialize resource accounting for the child process. Index: sys/kern/kern_prot.c =================================================================== --- sys/kern/kern_prot.c +++ sys/kern/kern_prot.c @@ -1841,6 +1841,95 @@ return (0); } +/* + * struct ucred reference counting description + * + * struct ucred objects are rarely allocated but gain and lose references all + * the time (e.g., on struct file alloc/dealloc) turning refcount updates into + * a significant source of cache-line ping ponging. The problem is largely + * worked around by modifying thread-local counter instead if the cred to + * operate on matches td_ucred. + * + * The counter is split into 2 parts: + * - cr_users -- total count of all struct proc and struct thread objects + * which have given cred in p_ucred and td_ucred respectively + * - cr_ref -- the actual ref count + * + * If users == 0 then cr_ref behaves similarly to refcount(9) but uses mutexes, + * in particular if the count reaches 0 the object is freeable. + * If users > 0 and curthread->td_ucred == cred, then updates are performed + * against td_ucredref. + * Changing td_ucred into something else decrements cr_users and transfers + * accumulated updates in td_ucredref. Should the new cr_users count be 0, + * the resulting cr_ref represents total count of all references. + */ +struct ucred * +crcowget(struct ucred *cr) +{ + + mtx_lock(&cr->cr_mtx); + KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p", + __func__, cr->cr_users, cr)); + cr->cr_users++; + cr->cr_ref++; + mtx_unlock(&cr->cr_mtx); + return (cr); +} + +static struct ucred * +crunuse(struct thread *td) +{ + struct ucred *cr, *crold; + + cr = td->td_ucred; + mtx_lock(&cr->cr_mtx); + cr->cr_ref += td->td_ucredref; + td->td_ucredref = 0; + KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p", + __func__, cr->cr_users, cr)); + cr->cr_users--; + if (cr->cr_users == 0) { + KASSERT(cr->cr_ref > 0, ("%s: ref %d not > 0 on cred %p", + __func__, cr->cr_ref, cr)); + crold = cr; + } else { + cr->cr_ref--; + crold = NULL; + } + mtx_unlock(&cr->cr_mtx); + return (crold); +} + +void +crcowfree(struct thread *td) +{ + struct ucred *cr; + + cr = crunuse(td); + if (cr != NULL) + crfree(cr); +} + +struct ucred * +crcowsync(void) +{ + struct thread *td; + struct proc *p; + struct ucred *crnew, *crold; + + td = curthread; + p = td->td_proc; + PROC_LOCK_ASSERT(p, MA_OWNED); + + if (td->td_ucred == p->p_ucred) + return (NULL); + + crnew = crcowget(p->p_ucred); + crold = crunuse(td); + td->td_ucred = crnew; + return (crold); +} + /* * Allocate a zeroed cred structure. */ @@ -1850,7 +1939,8 @@ struct ucred *cr; cr = malloc(sizeof(*cr), M_CRED, M_WAITOK | M_ZERO); - refcount_init(&cr->cr_ref, 1); + mtx_init(&cr->cr_mtx, "cred", NULL, MTX_DEF); + cr->cr_ref = 1; #ifdef AUDIT audit_cred_init(cr); #endif @@ -1869,8 +1959,18 @@ struct ucred * crhold(struct ucred *cr) { + struct thread *td; - refcount_acquire(&cr->cr_ref); + td = curthread; + if (__predict_true(td->td_ucred == cr)) { + KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p", + __func__, cr->cr_users, cr)); + td->td_ucredref++; + return (cr); + } + mtx_lock(&cr->cr_mtx); + cr->cr_ref++; + mtx_unlock(&cr->cr_mtx); return (cr); } @@ -1880,36 +1980,55 @@ void crfree(struct ucred *cr) { + struct thread *td; - KASSERT(cr->cr_ref > 0, ("bad ucred refcount: %d", cr->cr_ref)); - KASSERT(cr->cr_ref != 0xdeadc0de, ("dangling reference to ucred")); - if (refcount_release(&cr->cr_ref)) { - /* - * Some callers of crget(), such as nfs_statfs(), - * allocate a temporary credential, but don't - * allocate a uidinfo structure. - */ - if (cr->cr_uidinfo != NULL) - uifree(cr->cr_uidinfo); - if (cr->cr_ruidinfo != NULL) - uifree(cr->cr_ruidinfo); - /* - * Free a prison, if any. - */ - if (cr->cr_prison != NULL) - prison_free(cr->cr_prison); - if (cr->cr_loginclass != NULL) - loginclass_free(cr->cr_loginclass); + td = curthread; + if (td->td_ucred == cr) { + KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p", + __func__, cr->cr_users, cr)); + td->td_ucredref--; + return; + } + KASSERT(cr->cr_users >= 0, ("%s: users %d not >= 0 on cred %p", + __func__, cr->cr_users, cr)); + mtx_lock(&cr->cr_mtx); + cr->cr_ref--; + if (cr->cr_users > 0) { + mtx_unlock(&cr->cr_mtx); + return; + } + KASSERT(cr->cr_ref >= 0, ("%s: ref %d not >= 0 on cred %p", + __func__, cr->cr_ref, cr)); + if (cr->cr_ref > 0) { + mtx_unlock(&cr->cr_mtx); + return; + } + /* + * Some callers of crget(), such as nfs_statfs(), + * allocate a temporary credential, but don't + * allocate a uidinfo structure. + */ + if (cr->cr_uidinfo != NULL) + uifree(cr->cr_uidinfo); + if (cr->cr_ruidinfo != NULL) + uifree(cr->cr_ruidinfo); + /* + * Free a prison, if any. + */ + if (cr->cr_prison != NULL) + prison_free(cr->cr_prison); + if (cr->cr_loginclass != NULL) + loginclass_free(cr->cr_loginclass); #ifdef AUDIT - audit_cred_destroy(cr); + audit_cred_destroy(cr); #endif #ifdef MAC - mac_cred_destroy(cr); + mac_cred_destroy(cr); #endif - if (cr->cr_groups != cr->cr_smallgroups) - free(cr->cr_groups, M_CRED); - free(cr, M_CRED); - } + mtx_destroy(&cr->cr_mtx); + if (cr->cr_groups != cr->cr_smallgroups) + free(cr->cr_groups, M_CRED); + free(cr, M_CRED); } /* @@ -1983,7 +2102,7 @@ proc_set_cred_init(struct proc *p, struct ucred *newcred) { - p->p_ucred = newcred; + p->p_ucred = crcowget(newcred); } /* @@ -1999,16 +2118,37 @@ void proc_set_cred(struct proc *p, struct ucred *newcred) { + struct ucred *cr; - MPASS(p->p_ucred != NULL); - if (newcred == NULL) - MPASS(p->p_state == PRS_ZOMBIE); - else - PROC_LOCK_ASSERT(p, MA_OWNED); - + cr = p->p_ucred; + MPASS(cr != NULL); + PROC_LOCK_ASSERT(p, MA_OWNED); + KASSERT(newcred->cr_users == 0, ("%s: users %d not 0 on cred %p", + __func__, newcred->cr_users, newcred)); + KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p", + __func__, cr->cr_users, cr)); + cr->cr_users--; p->p_ucred = newcred; - if (newcred != NULL) - PROC_UPDATE_COW(p); + newcred->cr_users = 1; + PROC_UPDATE_COW(p); +} + +void +proc_unset_cred(struct proc *p) +{ + struct ucred *cr; + + MPASS(p->p_state == PRS_ZOMBIE); + cr = p->p_ucred; + KASSERT(cr->cr_users > 0, ("%s: users %d not > 0 on cred %p", + __func__, cr->cr_users, cr)); + mtx_lock(&cr->cr_mtx); + cr->cr_users--; + if (cr->cr_users == 0) + KASSERT(cr->cr_ref > 0, ("%s: ref %d not > 0 on cred %p", + __func__, cr->cr_ref, cr)); + mtx_unlock(&cr->cr_mtx); + crfree(cr); } struct ucred * Index: sys/kern/kern_thread.c =================================================================== --- sys/kern/kern_thread.c +++ sys/kern/kern_thread.c @@ -465,7 +465,7 @@ { PROC_LOCK_ASSERT(p, MA_OWNED); - newtd->td_ucred = crhold(p->p_ucred); + newtd->td_ucred = crcowget(p->p_ucred); newtd->td_limit = lim_hold(p->p_limit); newtd->td_cowgen = p->p_cowgen; } @@ -474,7 +474,7 @@ thread_cow_get(struct thread *newtd, struct thread *td) { - newtd->td_ucred = crhold(td->td_ucred); + newtd->td_ucred = crcowget(td->td_ucred); newtd->td_limit = lim_hold(td->td_limit); newtd->td_cowgen = td->td_cowgen; } @@ -484,7 +484,7 @@ { if (td->td_ucred != NULL) - crfree(td->td_ucred); + crcowfree(td); if (td->td_limit != NULL) lim_free(td->td_limit); } @@ -497,13 +497,9 @@ struct plimit *oldlimit; p = td->td_proc; - oldcred = NULL; oldlimit = NULL; PROC_LOCK(p); - if (td->td_ucred != p->p_ucred) { - oldcred = td->td_ucred; - td->td_ucred = crhold(p->p_ucred); - } + oldcred = crcowsync(); if (td->td_limit != p->p_limit) { oldlimit = td->td_limit; td->td_limit = lim_hold(p->p_limit); Index: sys/sys/proc.h =================================================================== --- sys/sys/proc.h +++ sys/sys/proc.h @@ -306,6 +306,7 @@ int td_errno; /* (k) Error from last syscall. */ size_t td_vslock_sz; /* (k) amount of vslock-ed space */ struct kcov_info *td_kcov_info; /* (*) Kernel code coverage data */ + u_int td_ucredref; #define td_endzero td_sigmask /* Copied during fork1() or create_thread(). */ Index: sys/sys/ucred.h =================================================================== --- sys/sys/ucred.h +++ sys/sys/ucred.h @@ -35,6 +35,8 @@ #ifndef _SYS_UCRED_H_ #define _SYS_UCRED_H_ +#include +#include #include struct loginclass; @@ -49,7 +51,9 @@ */ #if defined(_KERNEL) || defined(_WANT_UCRED) struct ucred { + struct mtx cr_mtx; u_int cr_ref; /* reference count */ + u_int cr_users; #define cr_startcopy cr_uid uid_t cr_uid; /* effective user id */ uid_t cr_ruid; /* real user id */ @@ -113,9 +117,13 @@ void crextend(struct ucred *cr, int n); void proc_set_cred_init(struct proc *p, struct ucred *cr); void proc_set_cred(struct proc *p, struct ucred *cr); +void proc_unset_cred(struct proc *p); void crfree(struct ucred *cr); +struct ucred *crcowsync(void); struct ucred *crget(void); struct ucred *crhold(struct ucred *cr); +struct ucred *crcowget(struct ucred *cr); +void crcowfree(struct thread *td); void cru2x(struct ucred *cr, struct xucred *xcr); void cru2xt(struct thread *td, struct xucred *xcr); void crsetgroups(struct ucred *cr, int n, gid_t *groups);