Index: sys/kern/vfs_cache.c =================================================================== --- sys/kern/vfs_cache.c +++ sys/kern/vfs_cache.c @@ -48,7 +48,7 @@ #include #include #include -#include +#include #include #include #include @@ -176,14 +176,24 @@ struct nchstats nchstats; /* cache effectiveness statistics */ -static struct rwlock cache_lock; -RW_SYSINIT(vfscache, &cache_lock, "Name Cache"); +static struct rmlock cache_lock; +RM_SYSINIT(vfscache, &cache_lock, "Name Cache"); -#define CACHE_UPGRADE_LOCK() rw_try_upgrade(&cache_lock) -#define CACHE_RLOCK() rw_rlock(&cache_lock) -#define CACHE_RUNLOCK() rw_runlock(&cache_lock) -#define CACHE_WLOCK() rw_wlock(&cache_lock) -#define CACHE_WUNLOCK() rw_wunlock(&cache_lock) +/* + * The abstract way to assert that the write lock is held by the caller in routines that require it to be held. + */ +#define CACHE_WASSERT(remark_string) KASSERT(rm_wowned(&cache_lock), (remark_string)); +/* + * Using rm_lock (read mostly) so that priority elevation will be possible. + * An rmlock cannot be upgraded at this rev of the OS. + * Report the inability to upgrade so that the existing legacy code in this file will do it the slow way. + */ +#define CACHE_UPGRADE_LOCK() (0) + +#define CACHE_RLOCK(tracker) rm_rlock(&cache_lock, (tracker)) +#define CACHE_RUNLOCK(tracker) rm_runlock(&cache_lock, (tracker)) +#define CACHE_WLOCK() rm_wlock(&cache_lock) +#define CACHE_WUNLOCK() rm_wunlock(&cache_lock) /* * UMA zones for the VFS cache. @@ -306,8 +316,13 @@ static void cache_zap(struct namecache *ncp); +/* + * If vn_vptocnp_locked returns 0, the lock is still held. If it returns an error code, + * then it has released the lock. + * It might release and re-acquire the lock. + */ static int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, - u_int *buflen); + u_int *buflen, struct rm_priotracker *parent_rm_tracker); static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, char *buf, char **retbuf, u_int buflen); @@ -323,6 +338,7 @@ static int sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) { + struct rm_priotracker rm_tracker; int error; struct nchashhead *ncpp; struct namecache *ncp; @@ -335,12 +351,12 @@ /* Scan hash tables for applicable entries */ for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { - CACHE_RLOCK(); + CACHE_RLOCK(&rm_tracker); count = 0; LIST_FOREACH(ncp, ncpp, nc_hash) { count++; } - CACHE_RUNLOCK(); + CACHE_RUNLOCK(&rm_tracker); error = SYSCTL_OUT(req, &count, sizeof(count)); if (error) return (error); @@ -354,6 +370,7 @@ static int sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) { + struct rm_priotracker rm_tracker; int error; struct nchashhead *ncpp; struct namecache *ncp; @@ -370,11 +387,11 @@ /* Scan hash tables for applicable entries */ for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { count = 0; - CACHE_RLOCK(); + CACHE_RLOCK(&rm_tracker); LIST_FOREACH(ncp, ncpp, nc_hash) { count++; } - CACHE_RUNLOCK(); + CACHE_RUNLOCK(&rm_tracker); if (count) used++; if (maxlength < count) @@ -413,7 +430,8 @@ { struct vnode *vp; - rw_assert(&cache_lock, RA_WLOCKED); + CACHE_WASSERT("Attempt to zap vfs cache without wlock"); + CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp); #ifdef KDTRACE_HOOKS if (ncp->nc_vp != NULL) { @@ -475,6 +493,7 @@ struct timespec *tsp; int *ticksp; { + struct rm_priotracker rm_tracker; struct namecache *ncp; uint32_t hash; int error, ltype, wlocked; @@ -484,7 +503,7 @@ return (0); } retry: - CACHE_RLOCK(); + CACHE_RLOCK(&rm_tracker); wlocked = 0; numcalls++; error = 0; @@ -624,7 +643,7 @@ * We need to update the cache after our lookup, so upgrade to * a write lock and retry the operation. */ - CACHE_RUNLOCK(); + CACHE_RUNLOCK(&rm_tracker); CACHE_WLOCK(); numupgrades++; wlocked = 1; @@ -640,7 +659,7 @@ if (wlocked) CACHE_WUNLOCK(); else - CACHE_RUNLOCK(); + CACHE_RUNLOCK(&rm_tracker); /* * When we lookup "." we still can be asked to lock it * differently... @@ -669,7 +688,7 @@ if (wlocked) CACHE_WUNLOCK(); else - CACHE_RUNLOCK(); + CACHE_RUNLOCK(&rm_tracker); error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread); if (cnp->cn_flags & ISDOTDOT) { vn_lock(dvp, ltype | LK_RETRY); @@ -694,7 +713,7 @@ if (wlocked) CACHE_WUNLOCK(); else - CACHE_RUNLOCK(); + CACHE_RUNLOCK(&rm_tracker); return (0); } @@ -1184,18 +1203,19 @@ int vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen) { + struct rm_priotracker rm_tracker; int error; - CACHE_RLOCK(); - error = vn_vptocnp_locked(vp, cred, buf, buflen); + CACHE_RLOCK(&rm_tracker); + error = vn_vptocnp_locked(vp, cred, buf, buflen, &rm_tracker); if (error == 0) - CACHE_RUNLOCK(); + CACHE_RUNLOCK(&rm_tracker); return (error); } static int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, - u_int *buflen) + u_int *buflen, struct rm_priotracker *rm_tracker) { struct vnode *dvp; struct namecache *ncp; @@ -1207,7 +1227,7 @@ } if (ncp != NULL) { if (*buflen < ncp->nc_nlen) { - CACHE_RUNLOCK(); + CACHE_RUNLOCK(rm_tracker); vrele(*vp); numfullpathfail4++; error = ENOMEM; @@ -1222,14 +1242,14 @@ dvp = *vp; *vp = ncp->nc_dvp; vref(*vp); - CACHE_RUNLOCK(); + CACHE_RUNLOCK(rm_tracker); vrele(dvp); - CACHE_RLOCK(); + CACHE_RLOCK(rm_tracker); return (0); } SDT_PROBE(vfs, namecache, fullpath, miss, vp, 0, 0, 0, 0); - CACHE_RUNLOCK(); + CACHE_RUNLOCK(rm_tracker); vn_lock(*vp, LK_SHARED | LK_RETRY); error = VOP_VPTOCNP(*vp, &dvp, cred, buf, buflen); vput(*vp); @@ -1241,10 +1261,10 @@ } *vp = dvp; - CACHE_RLOCK(); + CACHE_RLOCK(rm_tracker); if (dvp->v_iflag & VI_DOOMED) { /* forced unmount */ - CACHE_RUNLOCK(); + CACHE_RUNLOCK(rm_tracker); vrele(dvp); error = ENOENT; SDT_PROBE(vfs, namecache, fullpath, return, error, vp, @@ -1265,6 +1285,7 @@ vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, char *buf, char **retbuf, u_int buflen) { + struct rm_priotracker rm_tracker; int error, slash_prefixed; #ifdef KDTRACE_HOOKS struct vnode *startvp = vp; @@ -1279,13 +1300,14 @@ SDT_PROBE(vfs, namecache, fullpath, entry, vp, 0, 0, 0, 0); numfullpathcalls++; vref(vp); - CACHE_RLOCK(); + CACHE_RLOCK(&rm_tracker); if (vp->v_type != VDIR) { - error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen); + error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen, + &rm_tracker); if (error) return (error); if (buflen == 0) { - CACHE_RUNLOCK(); + CACHE_RUNLOCK(&rm_tracker); vrele(vp); return (ENOMEM); } @@ -1295,7 +1317,7 @@ while (vp != rdir && vp != rootvnode) { if (vp->v_vflag & VV_ROOT) { if (vp->v_iflag & VI_DOOMED) { /* forced unmount */ - CACHE_RUNLOCK(); + CACHE_RUNLOCK(&rm_tracker); vrele(vp); error = ENOENT; SDT_PROBE(vfs, namecache, fullpath, return, @@ -1304,14 +1326,14 @@ } vp1 = vp->v_mount->mnt_vnodecovered; vref(vp1); - CACHE_RUNLOCK(); + CACHE_RUNLOCK(&rm_tracker); vrele(vp); vp = vp1; - CACHE_RLOCK(); + CACHE_RLOCK(&rm_tracker); continue; } if (vp->v_type != VDIR) { - CACHE_RUNLOCK(); + CACHE_RUNLOCK(&rm_tracker); vrele(vp); numfullpathfail1++; error = ENOTDIR; @@ -1319,11 +1341,12 @@ error, vp, NULL, 0, 0); break; } - error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen); + error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen, + &rm_tracker); if (error) break; if (buflen == 0) { - CACHE_RUNLOCK(); + CACHE_RUNLOCK(&rm_tracker); vrele(vp); error = ENOMEM; SDT_PROBE(vfs, namecache, fullpath, return, error, @@ -1337,7 +1360,7 @@ return (error); if (!slash_prefixed) { if (buflen == 0) { - CACHE_RUNLOCK(); + CACHE_RUNLOCK(&rm_tracker); vrele(vp); numfullpathfail4++; SDT_PROBE(vfs, namecache, fullpath, return, ENOMEM, @@ -1347,7 +1370,7 @@ buf[--buflen] = '/'; } numfullpathfound++; - CACHE_RUNLOCK(); + CACHE_RUNLOCK(&rm_tracker); vrele(vp); SDT_PROBE(vfs, namecache, fullpath, return, 0, startvp, buf + buflen, @@ -1359,42 +1382,44 @@ struct vnode * vn_dir_dd_ino(struct vnode *vp) { + struct rm_priotracker rm_tracker; struct namecache *ncp; struct vnode *ddvp; ASSERT_VOP_LOCKED(vp, "vn_dir_dd_ino"); - CACHE_RLOCK(); + CACHE_RLOCK(&rm_tracker); TAILQ_FOREACH(ncp, &(vp->v_cache_dst), nc_dst) { if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) continue; ddvp = ncp->nc_dvp; VI_LOCK(ddvp); - CACHE_RUNLOCK(); + CACHE_RUNLOCK(&rm_tracker); if (vget(ddvp, LK_INTERLOCK | LK_SHARED | LK_NOWAIT, curthread)) return (NULL); return (ddvp); } - CACHE_RUNLOCK(); + CACHE_RUNLOCK(&rm_tracker); return (NULL); } int vn_commname(struct vnode *vp, char *buf, u_int buflen) { + struct rm_priotracker rm_tracker; struct namecache *ncp; int l; - CACHE_RLOCK(); + CACHE_RLOCK(&rm_tracker); TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst) if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) break; if (ncp == NULL) { - CACHE_RUNLOCK(); + CACHE_RUNLOCK(&rm_tracker); return (ENOENT); } l = min(ncp->nc_nlen, buflen - 1); memcpy(buf, nc_get_name(ncp), l); - CACHE_RUNLOCK(); + CACHE_RUNLOCK(&rm_tracker); buf[l] = '\0'; return (0); }