Changeset View
Standalone View
sys/kern/vfs_vnops.c
Show First 20 Lines • Show All 64 Lines • ▼ Show 20 Lines | |||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/namei.h> | #include <sys/namei.h> | ||||
#include <sys/vnode.h> | #include <sys/vnode.h> | ||||
#include <sys/bio.h> | #include <sys/bio.h> | ||||
#include <sys/buf.h> | #include <sys/buf.h> | ||||
#include <sys/filio.h> | #include <sys/filio.h> | ||||
#include <sys/resourcevar.h> | #include <sys/resourcevar.h> | ||||
#include <sys/rwlock.h> | #include <sys/rwlock.h> | ||||
#include <sys/prng.h> | |||||
#include <sys/sx.h> | #include <sys/sx.h> | ||||
#include <sys/sleepqueue.h> | #include <sys/sleepqueue.h> | ||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#include <sys/ttycom.h> | #include <sys/ttycom.h> | ||||
#include <sys/conf.h> | #include <sys/conf.h> | ||||
#include <sys/syslog.h> | #include <sys/syslog.h> | ||||
#include <sys/unistd.h> | #include <sys/unistd.h> | ||||
#include <sys/user.h> | #include <sys/user.h> | ||||
▲ Show 20 Lines • Show All 189 Lines • ▼ Show 20 Lines | #ifdef MAC | ||||
if (error == 0) | if (error == 0) | ||||
#endif | #endif | ||||
error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, | error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, | ||||
&ndp->ni_cnd, vap); | &ndp->ni_cnd, vap); | ||||
vput(ndp->ni_dvp); | vput(ndp->ni_dvp); | ||||
vn_finished_write(mp); | vn_finished_write(mp); | ||||
if (error) { | if (error) { | ||||
NDFREE(ndp, NDF_ONLY_PNBUF); | NDFREE(ndp, NDF_ONLY_PNBUF); | ||||
if (error == ERELOOKUP) { | |||||
NDREINIT(ndp); | |||||
markj: After r367130 I believe you need a NDREINIT here. | |||||
goto restart; | |||||
} | |||||
return (error); | return (error); | ||||
} | } | ||||
fmode &= ~O_TRUNC; | fmode &= ~O_TRUNC; | ||||
vp = ndp->ni_vp; | vp = ndp->ni_vp; | ||||
} else { | } else { | ||||
if (ndp->ni_dvp == ndp->ni_vp) | if (ndp->ni_dvp == ndp->ni_vp) | ||||
vrele(ndp->ni_dvp); | vrele(ndp->ni_dvp); | ||||
else | else | ||||
▲ Show 20 Lines • Show All 1,233 Lines • ▼ Show 20 Lines | |||||
{ | { | ||||
struct mount *mp; | struct mount *mp; | ||||
struct vnode *vp; | struct vnode *vp; | ||||
void *rl_cookie; | void *rl_cookie; | ||||
int error; | int error; | ||||
vp = fp->f_vnode; | vp = fp->f_vnode; | ||||
retry: | |||||
/* | /* | ||||
* Lock the whole range for truncation. Otherwise split i/o | * Lock the whole range for truncation. Otherwise split i/o | ||||
* might happen partly before and partly after the truncation. | * might happen partly before and partly after the truncation. | ||||
*/ | */ | ||||
rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); | rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); | ||||
error = vn_start_write(vp, &mp, V_WAIT | PCATCH); | error = vn_start_write(vp, &mp, V_WAIT | PCATCH); | ||||
if (error) | if (error) | ||||
goto out1; | goto out1; | ||||
Show All 10 Lines | |||||
#endif | #endif | ||||
error = vn_truncate_locked(vp, length, (fp->f_flag & O_FSYNC) != 0, | error = vn_truncate_locked(vp, length, (fp->f_flag & O_FSYNC) != 0, | ||||
fp->f_cred); | fp->f_cred); | ||||
out: | out: | ||||
VOP_UNLOCK(vp); | VOP_UNLOCK(vp); | ||||
vn_finished_write(mp); | vn_finished_write(mp); | ||||
out1: | out1: | ||||
vn_rangelock_unlock(vp, rl_cookie); | vn_rangelock_unlock(vp, rl_cookie); | ||||
if (error == ERELOOKUP) | |||||
goto retry; | |||||
return (error); | return (error); | ||||
} | } | ||||
/* | /* | ||||
* Truncate a file that is already locked. | * Truncate a file that is already locked. | ||||
*/ | */ | ||||
int | int | ||||
vn_truncate_locked(struct vnode *vp, off_t length, bool sync, | vn_truncate_locked(struct vnode *vp, off_t length, bool sync, | ||||
▲ Show 20 Lines • Show All 1,749 Lines • ▼ Show 20 Lines | #endif | ||||
} | } | ||||
if (error != 0 || len == 0) | if (error != 0 || len == 0) | ||||
break; | break; | ||||
KASSERT(olen > len, ("Iteration did not make progress?")); | KASSERT(olen > len, ("Iteration did not make progress?")); | ||||
maybe_yield(); | maybe_yield(); | ||||
} | } | ||||
return (error); | return (error); | ||||
} | |||||
/* | |||||
* Lock pair of vnodes vp1, vp2, avoiding lock order reversal. | |||||
* vp1_locked indicates whether vp1 is exclusively locked; if not, vp1 | |||||
* must be unlocked. Same for vp2 and vp2_locked. One of the vnodes | |||||
Done Inline ActionsThis sentence is a bit confusing since "vp1" and "vnode" refer to the same thing. Maybe, "vp1_locked indicates whether vp1 is exclusively locked; if not, vp1 must be unlocked." markj: This sentence is a bit confusing since "vp1" and "vnode" refer to the same thing. Maybe… | |||||
* can be NULL. | |||||
* | |||||
* The function returns with both vnodes exclusively locked, and | |||||
Done Inline ActionsThe function markj: The function | |||||
* guarantees that it does not create lock order reversal with other | |||||
* threads during its execution. Both vnodes could be unlocked | |||||
* temporary (and reclaimed). | |||||
*/ | |||||
void | |||||
vn_lock_pair(struct vnode *vp1, bool vp1_locked, struct vnode *vp2, | |||||
bool vp2_locked) | |||||
{ | |||||
int error; | |||||
if (vp1 == NULL && vp2 == NULL) | |||||
return; | |||||
if (vp1 != NULL) { | |||||
if (vp1_locked) | |||||
ASSERT_VOP_ELOCKED(vp1, "vp1"); | |||||
else | |||||
ASSERT_VOP_UNLOCKED(vp1, "vp1"); | |||||
} else { | |||||
vp1_locked = true; | |||||
} | |||||
if (vp2 != NULL) { | |||||
if (vp2_locked) | |||||
ASSERT_VOP_ELOCKED(vp2, "vp2"); | |||||
else | |||||
ASSERT_VOP_UNLOCKED(vp2, "vp2"); | |||||
} else { | |||||
vp2_locked = true; | |||||
} | |||||
if (!vp1_locked && !vp2_locked) { | |||||
vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY); | |||||
vp1_locked = true; | |||||
} | |||||
for (;;) { | |||||
if (vp1_locked && vp2_locked) | |||||
break; | |||||
if (vp1_locked && vp2 != NULL) { | |||||
if (vp1 != NULL) { | |||||
error = VOP_LOCK1(vp2, LK_EXCLUSIVE | LK_NOWAIT, | |||||
__FILE__, __LINE__); | |||||
if (error == 0) | |||||
break; | |||||
VOP_UNLOCK(vp1); | |||||
vp1_locked = false; | |||||
pause("vlp1", prng32_bounded(hz/10)); | |||||
Done Inline ActionsSo we may sleep up to 100ms, assuming hz=1000? That seems like a long time. I thought typical vnode lock hold times would be much smaller than that, unlike buf locks. It might be nicer to write 100 as a function of hz. hz=100 is a common setting in guest VMs. markj: So we may sleep up to 100ms, assuming hz=1000? That seems like a long time. I thought typical… | |||||
Done Inline ActionsWe should own the vnode lock while we own buffer lock. There are exceptions, like we do not lock devvp for io, and async io 'gets out of vnode lock' when buffer lock owner is reassigned LK_KERNPROC. But otherwise, we keep vnode lock for the duration of io. As consequence, vnode lock could be held for quite long time. For instance, on busy HDD 100 ms is completely normal, and I can regularly see peaks up to several seconds. On the other hand, devices with deep queues and low latency like good nvme provide very different vnode lock hold times, sure. kib: We should own the vnode lock while we own buffer lock. There are exceptions, like we do not… | |||||
Done Inline ActionsShould we perhaps add a debug counter for pause() calls? markj: Should we perhaps add a debug counter for pause() calls? | |||||
markjUnsubmitted Done Inline ActionsMissing ws around / markj: Missing ws around `/` | |||||
} | |||||
vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY); | |||||
vp2_locked = true; | |||||
} | |||||
if (vp2_locked && vp1 != NULL) { | |||||
if (vp2 != NULL) { | |||||
error = VOP_LOCK1(vp1, LK_EXCLUSIVE | LK_NOWAIT, | |||||
__FILE__, __LINE__); | |||||
if (error == 0) | |||||
break; | |||||
VOP_UNLOCK(vp2); | |||||
vp2_locked = false; | |||||
pause("vlp2", prng32_bounded(hz/10)); | |||||
markjUnsubmitted Done Inline ActionsSame here. markj: Same here. | |||||
} | |||||
vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY); | |||||
vp1_locked = true; | |||||
} | |||||
} | |||||
if (vp1 != NULL) | |||||
ASSERT_VOP_ELOCKED(vp1, "vp1 ret"); | |||||
if (vp2 != NULL) | |||||
ASSERT_VOP_ELOCKED(vp2, "vp2 ret"); | |||||
} | } |
After r367130 I believe you need a NDREINIT here.