Changeset View
Standalone View
sys/kern/vfs_vnops.c
Show First 20 Lines • Show All 100 Lines • ▼ Show 20 Lines | |||||
static fo_truncate_t vn_truncate; | static fo_truncate_t vn_truncate; | ||||
static fo_ioctl_t vn_ioctl; | static fo_ioctl_t vn_ioctl; | ||||
static fo_poll_t vn_poll; | static fo_poll_t vn_poll; | ||||
static fo_kqfilter_t vn_kqfilter; | static fo_kqfilter_t vn_kqfilter; | ||||
static fo_stat_t vn_statfile; | static fo_stat_t vn_statfile; | ||||
static fo_close_t vn_closefile; | static fo_close_t vn_closefile; | ||||
static fo_mmap_t vn_mmap; | static fo_mmap_t vn_mmap; | ||||
static fo_fallocate_t vn_fallocate; | static fo_fallocate_t vn_fallocate; | ||||
static fo_fspacectl_t vn_fspacectl; | |||||
struct fileops vnops = { | struct fileops vnops = { | ||||
.fo_read = vn_io_fault, | .fo_read = vn_io_fault, | ||||
.fo_write = vn_io_fault, | .fo_write = vn_io_fault, | ||||
.fo_truncate = vn_truncate, | .fo_truncate = vn_truncate, | ||||
.fo_ioctl = vn_ioctl, | .fo_ioctl = vn_ioctl, | ||||
.fo_poll = vn_poll, | .fo_poll = vn_poll, | ||||
.fo_kqfilter = vn_kqfilter, | .fo_kqfilter = vn_kqfilter, | ||||
.fo_stat = vn_statfile, | .fo_stat = vn_statfile, | ||||
.fo_close = vn_closefile, | .fo_close = vn_closefile, | ||||
.fo_chmod = vn_chmod, | .fo_chmod = vn_chmod, | ||||
.fo_chown = vn_chown, | .fo_chown = vn_chown, | ||||
.fo_sendfile = vn_sendfile, | .fo_sendfile = vn_sendfile, | ||||
.fo_seek = vn_seek, | .fo_seek = vn_seek, | ||||
.fo_fill_kinfo = vn_fill_kinfo, | .fo_fill_kinfo = vn_fill_kinfo, | ||||
.fo_mmap = vn_mmap, | .fo_mmap = vn_mmap, | ||||
.fo_fallocate = vn_fallocate, | .fo_fallocate = vn_fallocate, | ||||
.fo_fspacectl = vn_fspacectl, | |||||
.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE | .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE | ||||
}; | }; | ||||
const u_int io_hold_cnt = 16; | const u_int io_hold_cnt = 16; | ||||
static int vn_io_fault_enable = 1; | static int vn_io_fault_enable = 1; | ||||
SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_enable, CTLFLAG_RWTUN, | SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_enable, CTLFLAG_RWTUN, | ||||
&vn_io_fault_enable, 0, "Enable vn_io_fault lock avoidance"); | &vn_io_fault_enable, 0, "Enable vn_io_fault lock avoidance"); | ||||
static int vn_io_fault_prefault = 0; | static int vn_io_fault_prefault = 0; | ||||
▲ Show 20 Lines • Show All 2,226 Lines • ▼ Show 20 Lines | vn_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end) | ||||
if ((object = vp->v_object) == NULL) | if ((object = vp->v_object) == NULL) | ||||
return; | return; | ||||
VM_OBJECT_WLOCK(object); | VM_OBJECT_WLOCK(object); | ||||
vm_object_page_remove(object, start, end, 0); | vm_object_page_remove(object, start, end, 0); | ||||
VM_OBJECT_WUNLOCK(object); | VM_OBJECT_WUNLOCK(object); | ||||
} | } | ||||
int | int | ||||
vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) | vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off, | ||||
struct ucred *cred) | |||||
kib: I suggest to extract introduction of vn_bmap_seekhole_locked() into a separate commit and do it… | |||||
{ | { | ||||
struct vattr va; | struct vattr va; | ||||
daddr_t bn, bnp; | daddr_t bn, bnp; | ||||
uint64_t bsize; | uint64_t bsize; | ||||
off_t noff; | off_t noff; | ||||
int error; | int error; | ||||
KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, | |||||
("Wrong command %lu", cmd)); | |||||
if (vn_lock(vp, LK_SHARED) != 0) | |||||
Not Done Inline ActionsYou should assert that the vnode is locked, then. kib: You should assert that the vnode is locked, then. | |||||
return (EBADF); | |||||
if (vp->v_type != VREG) { | if (vp->v_type != VREG) { | ||||
error = ENOTTY; | error = ENOTTY; | ||||
goto unlock; | goto out; | ||||
} | } | ||||
error = VOP_GETATTR(vp, &va, cred); | error = VOP_GETATTR(vp, &va, cred); | ||||
if (error != 0) | if (error != 0) | ||||
goto unlock; | goto out; | ||||
noff = *off; | noff = *off; | ||||
if (noff >= va.va_size) { | if (noff >= va.va_size) { | ||||
error = ENXIO; | error = ENXIO; | ||||
goto unlock; | goto out; | ||||
} | } | ||||
bsize = vp->v_mount->mnt_stat.f_iosize; | bsize = vp->v_mount->mnt_stat.f_iosize; | ||||
for (bn = noff / bsize; noff < va.va_size; bn++, noff += bsize - | for (bn = noff / bsize; noff < va.va_size; bn++, noff += bsize - | ||||
noff % bsize) { | noff % bsize) { | ||||
error = VOP_BMAP(vp, bn, NULL, &bnp, NULL, NULL); | error = VOP_BMAP(vp, bn, NULL, &bnp, NULL, NULL); | ||||
if (error == EOPNOTSUPP) { | if (error == EOPNOTSUPP) { | ||||
error = ENOTTY; | error = ENOTTY; | ||||
goto unlock; | goto out; | ||||
} | } | ||||
if ((bnp == -1 && cmd == FIOSEEKHOLE) || | if ((bnp == -1 && cmd == FIOSEEKHOLE) || | ||||
(bnp != -1 && cmd == FIOSEEKDATA)) { | (bnp != -1 && cmd == FIOSEEKDATA)) { | ||||
noff = bn * bsize; | noff = bn * bsize; | ||||
if (noff < *off) | if (noff < *off) | ||||
noff = *off; | noff = *off; | ||||
goto unlock; | goto out; | ||||
} | } | ||||
} | } | ||||
if (noff > va.va_size) | if (noff > va.va_size) | ||||
noff = va.va_size; | noff = va.va_size; | ||||
/* noff == va.va_size. There is an implicit hole at the end of file. */ | /* noff == va.va_size. There is an implicit hole at the end of file. */ | ||||
if (cmd == FIOSEEKDATA) | if (cmd == FIOSEEKDATA) | ||||
error = ENXIO; | error = ENXIO; | ||||
unlock: | out: | ||||
VOP_UNLOCK(vp); | |||||
if (error == 0) | if (error == 0) | ||||
*off = noff; | *off = noff; | ||||
return (error); | return (error); | ||||
} | } | ||||
int | int | ||||
vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) | |||||
{ | |||||
int error; | |||||
KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, | |||||
Not Done Inline ActionsThis assert should be either repeated in vn_bmap_seekhole_locked(), or moved to that function. kib: This assert should be either repeated in vn_bmap_seekhole_locked(), or moved to that function. | |||||
("Wrong command %lu", cmd)); | |||||
if (vn_lock(vp, LK_SHARED) != 0) | |||||
return (EBADF); | |||||
error = vn_bmap_seekhole_locked(vp, cmd, off, cred); | |||||
VOP_UNLOCK(vp); | |||||
return (error); | |||||
} | |||||
int | |||||
vn_seek(struct file *fp, off_t offset, int whence, struct thread *td) | vn_seek(struct file *fp, off_t offset, int whence, struct thread *td) | ||||
{ | { | ||||
struct ucred *cred; | struct ucred *cred; | ||||
struct vnode *vp; | struct vnode *vp; | ||||
struct vattr vattr; | struct vattr vattr; | ||||
off_t foffset, size; | off_t foffset, size; | ||||
int error, noneg; | int error, noneg; | ||||
▲ Show 20 Lines • Show All 919 Lines • ▼ Show 20 Lines | #endif | ||||
if (olen + ooffset != offset + len) { | if (olen + ooffset != offset + len) { | ||||
panic("offset + len changed from %jx/%jx to %jx/%jx", | panic("offset + len changed from %jx/%jx to %jx/%jx", | ||||
ooffset, olen, offset, len); | ooffset, olen, offset, len); | ||||
} | } | ||||
if (error != 0 || len == 0) | if (error != 0 || len == 0) | ||||
break; | break; | ||||
KASSERT(olen > len, ("Iteration did not make progress?")); | KASSERT(olen > len, ("Iteration did not make progress?")); | ||||
maybe_yield(); | maybe_yield(); | ||||
} | |||||
return (error); | |||||
} | |||||
static int | |||||
vn_deallocate_impl(struct vnode *vp, off_t offset, off_t len, int flags, int ioflg, | |||||
bool may_audit, struct ucred *active_cred, struct ucred *file_cred, | |||||
struct thread *td) | |||||
{ | |||||
struct mount *mp; | |||||
void *rl_cookie; | |||||
int lock_flags; | |||||
int error; | |||||
#ifdef AUDIT | |||||
int audited_vnode1 = 0; | |||||
Done Inline ActionsUse bool? kib: Use bool? | |||||
#endif | |||||
rl_cookie = NULL; | |||||
error = 0; | |||||
if (offset < 0 || len <= 0 || (flags & ~SPACECTL_F_SUPPORTED) != 0) | |||||
return (EINVAL); | |||||
if (vp->v_type != VREG) | |||||
return (ENODEV); | |||||
Done Inline Actionsif ((ioflg & (IO_NODELOCKED | IO_RANGELOCKED)) == 0) kib: ``` if ((ioflg & (IO_NODELOCKED | IO_RANGELOCKED)) == 0)``` | |||||
/* Take the maximum range if end offset overflows */ | |||||
len = omin(len, OFF_MAX - offset); | |||||
while (len > 0) { | |||||
/* | |||||
* Try to deallocate the longest range in one pass. | |||||
* In case a pass takes too long to be executed, it returns | |||||
* partial result. The residue will be proceeded in the next | |||||
* pass. | |||||
*/ | |||||
mp = NULL; | |||||
bwillwrite(); | |||||
if ((ioflg & IO_NODELOCKED) == 0) { | |||||
if ((ioflg & IO_RANGELOCKED) == 0 && | |||||
rl_cookie == NULL) { | |||||
rl_cookie = vn_rangelock_wlock(vp, offset, | |||||
offset + len); | |||||
if ((error = vn_start_write(vp, &mp, | |||||
Done Inline ActionsImagine that vn_start_write() returned EINTR due to a signal + PCATCH. Also assume that this is second iteration of the while (len > 0) loop. Then you return EINTR while some data was already deallocated. That said, relocking ranglelock basically makes it useless. Rangelock only purpose is to establish atomicity WRT parallel reads and writes due to VOP dropping the vnode lock. In other words, you need to get the ranglelock once and for whole op. kib: Imagine that vn_start_write() returned EINTR due to a signal + PCATCH. Also assume that this… | |||||
V_WAIT | PCATCH)) != 0) | |||||
goto out; | |||||
} | |||||
if ((flags & SPACECTL_F_CANEXTEND) == 0 && | |||||
(MNT_SHARED_WRITES(mp) || | |||||
(mp == NULL && MNT_SHARED_WRITES(vp->v_mount)))) { | |||||
lock_flags = LK_SHARED; | |||||
} else { | |||||
lock_flags = LK_EXCLUSIVE; | |||||
} | |||||
vn_lock(vp, lock_flags | LK_RETRY); | |||||
#ifdef AUDIT | |||||
if (may_audit && !audited_vnode1) { | |||||
AUDIT_ARG_VNODE1(vp); | |||||
audited_vnode1 = 1; | |||||
} | |||||
#endif | |||||
} | |||||
#ifdef MAC | |||||
if ((ioflg & IO_NOMACCHECK) == 0) | |||||
error = mac_vnode_check_write(active_cred, file_cred, | |||||
vp); | |||||
#endif | |||||
if (error == 0) | |||||
error = VOP_DEALLOCATE(vp, &offset, &len, flags, | |||||
active_cred); | |||||
if ((ioflg & IO_NODELOCKED) == 0) { | |||||
VOP_UNLOCK(vp); | |||||
if (mp != NULL) | |||||
vn_finished_write(mp); | |||||
Done Inline ActionsYou don't unlock the rangelock on iteration, but do relock it on each loop entry. I believe you would deadlock against yourself if short dealloc occurs. kib: You don't unlock the rangelock on iteration, but do relock it on each loop entry. I believe… | |||||
} | |||||
if (error != 0) | |||||
break; | |||||
} | |||||
out: | |||||
if (rl_cookie != NULL) | |||||
vn_rangelock_unlock(vp, rl_cookie); | |||||
return (error); | |||||
} | |||||
int | |||||
vn_deallocate(struct vnode *vp, off_t offset, off_t len, int flags, int ioflg, | |||||
struct ucred *active_cred, struct ucred *file_cred, struct thread *td) | |||||
{ | |||||
return (vn_deallocate_impl(vp, offset, len, flags, ioflg, false, | |||||
active_cred, file_cred, td)); | |||||
} | |||||
static int | |||||
vn_fspacectl(struct file *fp, int cmd, off_t offset, off_t len, int flags, | |||||
struct ucred *active_cred, struct thread *td) | |||||
{ | |||||
int error; | |||||
struct vnode *vp; | |||||
vp = fp->f_vnode; | |||||
if (cmd != SPACECTL_DEALLOC) | |||||
return (EINVAL); | |||||
switch (cmd) { | |||||
case SPACECTL_DEALLOC: | |||||
error = vn_deallocate_impl(vp, offset, len, flags, 0, true, | |||||
active_cred, fp->f_cred, td); | |||||
break; | |||||
default: | |||||
panic("vn_fspacectl: unknown cmd %d", cmd); | |||||
} | } | ||||
return (error); | return (error); | ||||
} | } | ||||
static u_long vn_lock_pair_pause_cnt; | static u_long vn_lock_pair_pause_cnt; | ||||
SYSCTL_ULONG(_debug, OID_AUTO, vn_lock_pair_pause, CTLFLAG_RD, | SYSCTL_ULONG(_debug, OID_AUTO, vn_lock_pair_pause, CTLFLAG_RD, | ||||
&vn_lock_pair_pause_cnt, 0, | &vn_lock_pair_pause_cnt, 0, | ||||
▲ Show 20 Lines • Show All 89 Lines • Show Last 20 Lines |
I suggest to extract introduction of vn_bmap_seekhole_locked() into a separate commit and do it in advance.