Changeset View
Standalone View
sys/kern/vfs_vnops.c
Show First 20 Lines • Show All 100 Lines • ▼ Show 20 Lines | |||||
static fo_rdwr_t vn_io_fault; | static fo_rdwr_t vn_io_fault; | ||||
static fo_truncate_t vn_truncate; | static fo_truncate_t vn_truncate; | ||||
static fo_ioctl_t vn_ioctl; | static fo_ioctl_t vn_ioctl; | ||||
static fo_poll_t vn_poll; | static fo_poll_t vn_poll; | ||||
static fo_kqfilter_t vn_kqfilter; | static fo_kqfilter_t vn_kqfilter; | ||||
static fo_close_t vn_closefile; | static fo_close_t vn_closefile; | ||||
static fo_mmap_t vn_mmap; | static fo_mmap_t vn_mmap; | ||||
static fo_fallocate_t vn_fallocate; | static fo_fallocate_t vn_fallocate; | ||||
static fo_fspacectl_t vn_fspacectl; | |||||
struct fileops vnops = { | struct fileops vnops = { | ||||
.fo_read = vn_io_fault, | .fo_read = vn_io_fault, | ||||
.fo_write = vn_io_fault, | .fo_write = vn_io_fault, | ||||
.fo_truncate = vn_truncate, | .fo_truncate = vn_truncate, | ||||
.fo_ioctl = vn_ioctl, | .fo_ioctl = vn_ioctl, | ||||
.fo_poll = vn_poll, | .fo_poll = vn_poll, | ||||
.fo_kqfilter = vn_kqfilter, | .fo_kqfilter = vn_kqfilter, | ||||
.fo_stat = vn_statfile, | .fo_stat = vn_statfile, | ||||
.fo_close = vn_closefile, | .fo_close = vn_closefile, | ||||
.fo_chmod = vn_chmod, | .fo_chmod = vn_chmod, | ||||
.fo_chown = vn_chown, | .fo_chown = vn_chown, | ||||
.fo_sendfile = vn_sendfile, | .fo_sendfile = vn_sendfile, | ||||
.fo_seek = vn_seek, | .fo_seek = vn_seek, | ||||
.fo_fill_kinfo = vn_fill_kinfo, | .fo_fill_kinfo = vn_fill_kinfo, | ||||
.fo_mmap = vn_mmap, | .fo_mmap = vn_mmap, | ||||
.fo_fallocate = vn_fallocate, | .fo_fallocate = vn_fallocate, | ||||
.fo_fspacectl = vn_fspacectl, | |||||
.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE | .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE | ||||
}; | }; | ||||
const u_int io_hold_cnt = 16; | const u_int io_hold_cnt = 16; | ||||
static int vn_io_fault_enable = 1; | static int vn_io_fault_enable = 1; | ||||
SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_enable, CTLFLAG_RWTUN, | SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_enable, CTLFLAG_RWTUN, | ||||
&vn_io_fault_enable, 0, "Enable vn_io_fault lock avoidance"); | &vn_io_fault_enable, 0, "Enable vn_io_fault lock avoidance"); | ||||
static int vn_io_fault_prefault = 0; | static int vn_io_fault_prefault = 0; | ||||
▲ Show 20 Lines • Show All 2,286 Lines • ▼ Show 20 Lines | vn_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end) | ||||
if ((object = vp->v_object) == NULL) | if ((object = vp->v_object) == NULL) | ||||
return; | return; | ||||
VM_OBJECT_WLOCK(object); | VM_OBJECT_WLOCK(object); | ||||
vm_object_page_remove(object, start, end, 0); | vm_object_page_remove(object, start, end, 0); | ||||
VM_OBJECT_WUNLOCK(object); | VM_OBJECT_WUNLOCK(object); | ||||
} | } | ||||
int | int | ||||
vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off, | vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off, | ||||
struct ucred *cred) | struct ucred *cred) | ||||
kib: I suggest to extract introduction of vn_bmap_seekhole_locked() into a separate commit and do it… | |||||
{ | { | ||||
struct vattr va; | struct vattr va; | ||||
daddr_t bn, bnp; | daddr_t bn, bnp; | ||||
uint64_t bsize; | uint64_t bsize; | ||||
off_t noff; | off_t noff; | ||||
int error; | int error; | ||||
KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, | KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, | ||||
("%s: Wrong command %lu", __func__, cmd)); | ("%s: Wrong command %lu", __func__, cmd)); | ||||
ASSERT_VOP_LOCKED(vp, "vn_bmap_seekhole_locked"); | ASSERT_VOP_LOCKED(vp, "vn_bmap_seekhole_locked"); | ||||
if (vp->v_type != VREG) { | if (vp->v_type != VREG) { | ||||
Not Done Inline ActionsYou should assert that the vnode is locked, then. kib: You should assert that the vnode is locked, then. | |||||
error = ENOTTY; | error = ENOTTY; | ||||
goto out; | goto out; | ||||
} | } | ||||
error = VOP_GETATTR(vp, &va, cred); | error = VOP_GETATTR(vp, &va, cred); | ||||
if (error != 0) | if (error != 0) | ||||
goto out; | goto out; | ||||
noff = *off; | noff = *off; | ||||
if (noff >= va.va_size) { | if (noff >= va.va_size) { | ||||
Show All 27 Lines | out: | ||||
return (error); | return (error); | ||||
} | } | ||||
int | int | ||||
vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) | vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) | ||||
{ | { | ||||
int error; | int error; | ||||
KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, | KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, | ||||
Not Done Inline ActionsThis assert should be either repeated in vn_bmap_seekhole_locked(), or moved to that function. kib: This assert should be either repeated in vn_bmap_seekhole_locked(), or moved to that function. | |||||
("%s: Wrong command %lu", __func__, cmd)); | ("%s: Wrong command %lu", __func__, cmd)); | ||||
if (vn_lock(vp, LK_SHARED) != 0) | if (vn_lock(vp, LK_SHARED) != 0) | ||||
return (EBADF); | return (EBADF); | ||||
error = vn_bmap_seekhole_locked(vp, cmd, off, cred); | error = vn_bmap_seekhole_locked(vp, cmd, off, cred); | ||||
VOP_UNLOCK(vp); | VOP_UNLOCK(vp); | ||||
return (error); | return (error); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 935 Lines • ▼ Show 20 Lines | #endif | ||||
if (olen + ooffset != offset + len) { | if (olen + ooffset != offset + len) { | ||||
panic("offset + len changed from %jx/%jx to %jx/%jx", | panic("offset + len changed from %jx/%jx to %jx/%jx", | ||||
ooffset, olen, offset, len); | ooffset, olen, offset, len); | ||||
} | } | ||||
if (error != 0 || len == 0) | if (error != 0 || len == 0) | ||||
break; | break; | ||||
KASSERT(olen > len, ("Iteration did not make progress?")); | KASSERT(olen > len, ("Iteration did not make progress?")); | ||||
maybe_yield(); | maybe_yield(); | ||||
} | |||||
return (error); | |||||
} | |||||
static int | |||||
vn_deallocate_impl(struct vnode *vp, off_t *offset, off_t *length, int flags, | |||||
int ioflg, struct ucred *active_cred, struct ucred *file_cred) | |||||
{ | |||||
struct mount *mp; | |||||
void *rl_cookie; | |||||
off_t off, len; | |||||
int error; | |||||
#ifdef AUDIT | |||||
bool audited_vnode1 = false; | |||||
#endif | |||||
Done Inline ActionsUse bool? kib: Use bool? | |||||
rl_cookie = NULL; | |||||
error = 0; | |||||
mp = NULL; | |||||
off = *offset; | |||||
len = *length; | |||||
if ((ioflg & (IO_NODELOCKED|IO_RANGELOCKED)) == 0) | |||||
rl_cookie = vn_rangelock_wlock(vp, off, off + len); | |||||
Done Inline Actionsif ((ioflg & (IO_NODELOCKED | IO_RANGELOCKED)) == 0) kib: ``` if ((ioflg & (IO_NODELOCKED | IO_RANGELOCKED)) == 0)``` | |||||
while (len > 0 && error == 0) { | |||||
/* | |||||
* Try to deallocate the longest range in one pass. | |||||
* In case a pass takes too long to be executed, it returns | |||||
* partial result. The residue will be proceeded in the next | |||||
* pass. | |||||
*/ | |||||
if ((ioflg & IO_NODELOCKED) == 0) { | |||||
bwillwrite(); | |||||
if ((error = vn_start_write(vp, &mp, | |||||
V_WAIT | PCATCH)) != 0) | |||||
goto out; | |||||
vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY); | |||||
} | |||||
#ifdef AUDIT | |||||
if (!audited_vnode1) { | |||||
AUDIT_ARG_VNODE1(vp); | |||||
audited_vnode1 = true; | |||||
} | |||||
#endif | |||||
Done Inline ActionsImagine that vn_start_write() returned EINTR due to a signal + PCATCH. Also assume that this is second iteration of the while (len > 0) loop. Then you return EINTR while some data was already deallocated. That said, relocking ranglelock basically makes it useless. Rangelock only purpose is to establish atomicity WRT parallel reads and writes due to VOP dropping the vnode lock. In other words, you need to get the ranglelock once and for whole op. kib: Imagine that vn_start_write() returned EINTR due to a signal + PCATCH. Also assume that this… | |||||
#ifdef MAC | |||||
if ((ioflg & IO_NOMACCHECK) == 0) | |||||
error = mac_vnode_check_write(active_cred, file_cred, | |||||
vp); | |||||
#endif | |||||
if (error == 0) | |||||
error = VOP_DEALLOCATE(vp, &off, &len, flags, | |||||
active_cred); | |||||
if ((ioflg & IO_NODELOCKED) == 0) { | |||||
VOP_UNLOCK(vp); | |||||
if (mp != NULL) { | |||||
vn_finished_write(mp); | |||||
mp = NULL; | |||||
} | |||||
} | |||||
} | |||||
out: | |||||
if (rl_cookie != NULL) | |||||
vn_rangelock_unlock(vp, rl_cookie); | |||||
*offset = off; | |||||
*length = len; | |||||
return (error); | |||||
} | |||||
int | |||||
vn_deallocate(struct vnode *vp, off_t *offset, off_t *length, int flags, | |||||
int ioflg, struct ucred *active_cred, struct ucred *file_cred) | |||||
{ | |||||
if (*offset < 0 || *length <= 0 || *length > OFF_MAX - *offset || | |||||
flags != 0) | |||||
return (EINVAL); | |||||
if (vp->v_type != VREG) | |||||
Done Inline ActionsYou don't unlock the rangelock on iteration, but do relock it on each loop entry. I believe you would deadlock against yourself if short dealloc occurs. kib: You don't unlock the rangelock on iteration, but do relock it on each loop entry. I believe… | |||||
return (ENODEV); | |||||
return (vn_deallocate_impl(vp, offset, length, flags, ioflg, | |||||
active_cred, file_cred)); | |||||
} | |||||
static int | |||||
vn_fspacectl(struct file *fp, int cmd, off_t *offset, off_t *length, int flags, | |||||
struct ucred *active_cred, struct thread *td) | |||||
{ | |||||
int error; | |||||
struct vnode *vp; | |||||
vp = fp->f_vnode; | |||||
if (cmd != SPACECTL_DEALLOC || *offset < 0 || *length <= 0 || | |||||
*length > OFF_MAX - *offset || flags != 0) | |||||
return (EINVAL); | |||||
if (vp->v_type != VREG) | |||||
return (ENODEV); | |||||
switch (cmd) { | |||||
case SPACECTL_DEALLOC: | |||||
error = vn_deallocate_impl(vp, offset, length, flags, 0, | |||||
active_cred, fp->f_cred); | |||||
break; | |||||
default: | |||||
panic("vn_fspacectl: unknown cmd %d", cmd); | |||||
} | } | ||||
return (error); | return (error); | ||||
} | } | ||||
static u_long vn_lock_pair_pause_cnt; | static u_long vn_lock_pair_pause_cnt; | ||||
SYSCTL_ULONG(_debug, OID_AUTO, vn_lock_pair_pause, CTLFLAG_RD, | SYSCTL_ULONG(_debug, OID_AUTO, vn_lock_pair_pause, CTLFLAG_RD, | ||||
&vn_lock_pair_pause_cnt, 0, | &vn_lock_pair_pause_cnt, 0, | ||||
▲ Show 20 Lines • Show All 98 Lines • Show Last 20 Lines |
I suggest to extract introduction of vn_bmap_seekhole_locked() into a separate commit and do it in advance.