Changeset View
Standalone View
sys/kern/vfs_vnops.c
Show First 20 Lines • Show All 64 Lines • ▼ Show 20 Lines | |||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/namei.h> | #include <sys/namei.h> | ||||
#include <sys/vnode.h> | #include <sys/vnode.h> | ||||
#include <sys/bio.h> | #include <sys/bio.h> | ||||
#include <sys/buf.h> | #include <sys/buf.h> | ||||
#include <sys/filio.h> | #include <sys/filio.h> | ||||
#include <sys/resourcevar.h> | #include <sys/resourcevar.h> | ||||
#include <sys/rwlock.h> | #include <sys/rwlock.h> | ||||
#include <sys/prng.h> | |||||
#include <sys/sx.h> | #include <sys/sx.h> | ||||
#include <sys/sleepqueue.h> | #include <sys/sleepqueue.h> | ||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#include <sys/ttycom.h> | #include <sys/ttycom.h> | ||||
#include <sys/conf.h> | #include <sys/conf.h> | ||||
#include <sys/syslog.h> | #include <sys/syslog.h> | ||||
#include <sys/unistd.h> | #include <sys/unistd.h> | ||||
#include <sys/user.h> | #include <sys/user.h> | ||||
▲ Show 20 Lines • Show All 188 Lines • ▼ Show 20 Lines | #ifdef MAC | ||||
if (error == 0) | if (error == 0) | ||||
#endif | #endif | ||||
error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, | error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, | ||||
&ndp->ni_cnd, vap); | &ndp->ni_cnd, vap); | ||||
vput(ndp->ni_dvp); | vput(ndp->ni_dvp); | ||||
vn_finished_write(mp); | vn_finished_write(mp); | ||||
if (error) { | if (error) { | ||||
NDFREE(ndp, NDF_ONLY_PNBUF); | NDFREE(ndp, NDF_ONLY_PNBUF); | ||||
if (error == ERELOOKUP) | |||||
goto restart; | |||||
markj: After r367130 I believe you need a NDREINIT here. | |||||
return (error); | return (error); | ||||
} | } | ||||
fmode &= ~O_TRUNC; | fmode &= ~O_TRUNC; | ||||
vp = ndp->ni_vp; | vp = ndp->ni_vp; | ||||
} else { | } else { | ||||
if (ndp->ni_dvp == ndp->ni_vp) | if (ndp->ni_dvp == ndp->ni_vp) | ||||
vrele(ndp->ni_dvp); | vrele(ndp->ni_dvp); | ||||
else | else | ||||
▲ Show 20 Lines • Show All 1,233 Lines • ▼ Show 20 Lines | |||||
{ | { | ||||
struct mount *mp; | struct mount *mp; | ||||
struct vnode *vp; | struct vnode *vp; | ||||
void *rl_cookie; | void *rl_cookie; | ||||
int error; | int error; | ||||
vp = fp->f_vnode; | vp = fp->f_vnode; | ||||
retry: | |||||
/* | /* | ||||
* Lock the whole range for truncation. Otherwise split i/o | * Lock the whole range for truncation. Otherwise split i/o | ||||
* might happen partly before and partly after the truncation. | * might happen partly before and partly after the truncation. | ||||
*/ | */ | ||||
rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); | rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); | ||||
error = vn_start_write(vp, &mp, V_WAIT | PCATCH); | error = vn_start_write(vp, &mp, V_WAIT | PCATCH); | ||||
if (error) | if (error) | ||||
goto out1; | goto out1; | ||||
Show All 10 Lines | |||||
#endif | #endif | ||||
error = vn_truncate_locked(vp, length, (fp->f_flag & O_FSYNC) != 0, | error = vn_truncate_locked(vp, length, (fp->f_flag & O_FSYNC) != 0, | ||||
fp->f_cred); | fp->f_cred); | ||||
out: | out: | ||||
VOP_UNLOCK(vp); | VOP_UNLOCK(vp); | ||||
vn_finished_write(mp); | vn_finished_write(mp); | ||||
out1: | out1: | ||||
vn_rangelock_unlock(vp, rl_cookie); | vn_rangelock_unlock(vp, rl_cookie); | ||||
if (error == ERELOOKUP) | |||||
goto retry; | |||||
return (error); | return (error); | ||||
} | } | ||||
/* | /* | ||||
* Truncate a file that is already locked. | * Truncate a file that is already locked. | ||||
*/ | */ | ||||
int | int | ||||
vn_truncate_locked(struct vnode *vp, off_t length, bool sync, | vn_truncate_locked(struct vnode *vp, off_t length, bool sync, | ||||
▲ Show 20 Lines • Show All 1,749 Lines • ▼ Show 20 Lines | #endif | ||||
} | } | ||||
if (error != 0 || len == 0) | if (error != 0 || len == 0) | ||||
break; | break; | ||||
KASSERT(olen > len, ("Iteration did not make progress?")); | KASSERT(olen > len, ("Iteration did not make progress?")); | ||||
maybe_yield(); | maybe_yield(); | ||||
} | } | ||||
return (error); | return (error); | ||||
} | |||||
/* | |||||
* Lock pair of vnodes vp1, vp2, avoiding lock order | |||||
* reversal. vp1_locked indicates if vp1 is exclusively locked or not, | |||||
* vnode must not be shared-locked by the current thread. Same for | |||||
Done Inline ActionsThis sentence is a bit confusing since "vp1" and "vnode" refer to the same thing. Maybe, "vp1_locked indicates whether vp1 is exclusively locked; if not, vp1 must be unlocked." markj: This sentence is a bit confusing since "vp1" and "vnode" refer to the same thing. Maybe… | |||||
* vp2 and vp2_locked. One of the vnodes can be NULL. | |||||
* | |||||
* Function returns with both vnodes exclusively locked, and | |||||
Done Inline ActionsThe function markj: The function | |||||
* guarantees that it does not create lock order reversal with other | |||||
* threads during its execution. Both vnodes could be unlocked | |||||
* temporary (and reclaimed). | |||||
*/ | |||||
void | |||||
vn_lock_pair(struct vnode *vp1, bool vp1_locked, struct vnode *vp2, | |||||
bool vp2_locked) | |||||
{ | |||||
int error; | |||||
if (vp1 == NULL && vp2 == NULL) | |||||
return; | |||||
if (vp1 != NULL) { | |||||
if (vp1_locked) | |||||
ASSERT_VOP_ELOCKED(vp1, "vp1"); | |||||
else | |||||
ASSERT_VOP_UNLOCKED(vp1, "vp1"); | |||||
} else { | |||||
vp1_locked = true; | |||||
} | |||||
if (vp2 != NULL) { | |||||
if (vp2_locked) | |||||
ASSERT_VOP_ELOCKED(vp2, "vp2"); | |||||
else | |||||
ASSERT_VOP_UNLOCKED(vp2, "vp2"); | |||||
} else { | |||||
vp2_locked = true; | |||||
} | |||||
if (!vp1_locked && !vp2_locked) { | |||||
vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY); | |||||
vp1_locked = true; | |||||
} | |||||
for (;;) { | |||||
if (vp1_locked && vp2_locked) | |||||
break; | |||||
if (vp1_locked && vp2 != NULL) { | |||||
if (vp1 != NULL) { | |||||
error = VOP_LOCK1(vp2, LK_EXCLUSIVE | LK_NOWAIT, | |||||
__FILE__, __LINE__); | |||||
if (error == 0) | |||||
break; | |||||
VOP_UNLOCK(vp1); | |||||
vp1_locked = false; | |||||
pause("vlp1", prng32_bounded(100)); | |||||
Done Inline ActionsSo we may sleep up to 100ms, assuming hz=1000? That seems like a long time. I thought typical vnode lock hold times would be much smaller than that, unlike buf locks. It might be nicer to write 100 as a function of hz. hz=100 is a common setting in guest VMs. markj: So we may sleep up to 100ms, assuming hz=1000? That seems like a long time. I thought typical… | |||||
Done Inline ActionsWe should own the vnode lock while we own buffer lock. There are exceptions, like we do not lock devvp for io, and async io 'gets out of vnode lock' when buffer lock owner is reassigned LK_KERNPROC. But otherwise, we keep vnode lock for the duration of io. As consequence, vnode lock could be held for quite long time. For instance, on busy HDD 100 ms is completely normal, and I can regularly see peaks up to several seconds. On the other hand, devices with deep queues and low latency like good nvme provide very different vnode lock hold times, sure. kib: We should own the vnode lock while we own buffer lock. There are exceptions, like we do not… | |||||
Done Inline ActionsShould we perhaps add a debug counter for pause() calls? markj: Should we perhaps add a debug counter for pause() calls? | |||||
Done Inline ActionsMissing ws around / markj: Missing ws around `/` | |||||
} | |||||
vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY); | |||||
vp2_locked = true; | |||||
} | |||||
if (vp2_locked && vp1 != NULL) { | |||||
if (vp2 != NULL) { | |||||
error = VOP_LOCK1(vp1, LK_EXCLUSIVE | LK_NOWAIT, | |||||
__FILE__, __LINE__); | |||||
if (error == 0) | |||||
break; | |||||
VOP_UNLOCK(vp2); | |||||
vp2_locked = false; | |||||
pause("vlp2", prng32_bounded(100)); | |||||
Done Inline ActionsSame here. markj: Same here. | |||||
} | |||||
vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY); | |||||
vp1_locked = true; | |||||
} | |||||
} | |||||
if (vp1 != NULL) | |||||
ASSERT_VOP_ELOCKED(vp1, "vp1 ret"); | |||||
if (vp2 != NULL) | |||||
ASSERT_VOP_ELOCKED(vp2, "vp2 ret"); | |||||
} | } |
After r367130 I believe you need a NDREINIT here.