Changeset View
Standalone View
sys/kern/vfs_vnops.c
Show First 20 Lines • Show All 100 Lines • ▼ Show 20 Lines | |||||
static fo_rdwr_t vn_io_fault; | static fo_rdwr_t vn_io_fault; | ||||
static fo_truncate_t vn_truncate; | static fo_truncate_t vn_truncate; | ||||
static fo_ioctl_t vn_ioctl; | static fo_ioctl_t vn_ioctl; | ||||
static fo_poll_t vn_poll; | static fo_poll_t vn_poll; | ||||
static fo_kqfilter_t vn_kqfilter; | static fo_kqfilter_t vn_kqfilter; | ||||
static fo_close_t vn_closefile; | static fo_close_t vn_closefile; | ||||
static fo_mmap_t vn_mmap; | static fo_mmap_t vn_mmap; | ||||
static fo_fallocate_t vn_fallocate; | static fo_fallocate_t vn_fallocate; | ||||
static fo_fspacectl_t vn_fspacectl; | |||||
struct fileops vnops = { | struct fileops vnops = { | ||||
.fo_read = vn_io_fault, | .fo_read = vn_io_fault, | ||||
.fo_write = vn_io_fault, | .fo_write = vn_io_fault, | ||||
.fo_truncate = vn_truncate, | .fo_truncate = vn_truncate, | ||||
.fo_ioctl = vn_ioctl, | .fo_ioctl = vn_ioctl, | ||||
.fo_poll = vn_poll, | .fo_poll = vn_poll, | ||||
.fo_kqfilter = vn_kqfilter, | .fo_kqfilter = vn_kqfilter, | ||||
.fo_stat = vn_statfile, | .fo_stat = vn_statfile, | ||||
.fo_close = vn_closefile, | .fo_close = vn_closefile, | ||||
.fo_chmod = vn_chmod, | .fo_chmod = vn_chmod, | ||||
.fo_chown = vn_chown, | .fo_chown = vn_chown, | ||||
.fo_sendfile = vn_sendfile, | .fo_sendfile = vn_sendfile, | ||||
.fo_seek = vn_seek, | .fo_seek = vn_seek, | ||||
.fo_fill_kinfo = vn_fill_kinfo, | .fo_fill_kinfo = vn_fill_kinfo, | ||||
.fo_mmap = vn_mmap, | .fo_mmap = vn_mmap, | ||||
.fo_fallocate = vn_fallocate, | .fo_fallocate = vn_fallocate, | ||||
.fo_fspacectl = vn_fspacectl, | |||||
.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE | .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE | ||||
}; | }; | ||||
const u_int io_hold_cnt = 16; | const u_int io_hold_cnt = 16; | ||||
static int vn_io_fault_enable = 1; | static int vn_io_fault_enable = 1; | ||||
SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_enable, CTLFLAG_RWTUN, | SYSCTL_INT(_debug, OID_AUTO, vn_io_fault_enable, CTLFLAG_RWTUN, | ||||
&vn_io_fault_enable, 0, "Enable vn_io_fault lock avoidance"); | &vn_io_fault_enable, 0, "Enable vn_io_fault lock avoidance"); | ||||
static int vn_io_fault_prefault = 0; | static int vn_io_fault_prefault = 0; | ||||
▲ Show 20 Lines • Show All 2,297 Lines • ▼ Show 20 Lines | vn_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end) | ||||
if ((object = vp->v_object) == NULL) | if ((object = vp->v_object) == NULL) | ||||
return; | return; | ||||
VM_OBJECT_WLOCK(object); | VM_OBJECT_WLOCK(object); | ||||
vm_object_page_remove(object, start, end, 0); | vm_object_page_remove(object, start, end, 0); | ||||
VM_OBJECT_WUNLOCK(object); | VM_OBJECT_WUNLOCK(object); | ||||
} | } | ||||
int | int | ||||
vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off, | vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off, | ||||
struct ucred *cred) | struct ucred *cred) | ||||
kib: I suggest to extract introduction of vn_bmap_seekhole_locked() into a separate commit and do it… | |||||
{ | { | ||||
struct vattr va; | struct vattr va; | ||||
daddr_t bn, bnp; | daddr_t bn, bnp; | ||||
uint64_t bsize; | uint64_t bsize; | ||||
off_t noff; | off_t noff; | ||||
int error; | int error; | ||||
KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, | KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, | ||||
("%s: Wrong command %lu", __func__, cmd)); | ("%s: Wrong command %lu", __func__, cmd)); | ||||
ASSERT_VOP_LOCKED(vp, "vn_bmap_seekhole_locked"); | ASSERT_VOP_LOCKED(vp, "vn_bmap_seekhole_locked"); | ||||
if (vp->v_type != VREG) { | if (vp->v_type != VREG) { | ||||
Not Done Inline ActionsYou should assert that the vnode is locked, then. kib: You should assert that the vnode is locked, then. | |||||
error = ENOTTY; | error = ENOTTY; | ||||
goto out; | goto out; | ||||
} | } | ||||
error = VOP_GETATTR(vp, &va, cred); | error = VOP_GETATTR(vp, &va, cred); | ||||
if (error != 0) | if (error != 0) | ||||
goto out; | goto out; | ||||
noff = *off; | noff = *off; | ||||
if (noff >= va.va_size) { | if (noff >= va.va_size) { | ||||
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) | ||||
return (error); | return (error); | ||||
} | } | ||||
int | int | ||||
vn_seek(struct file *fp, off_t offset, int whence, struct thread *td) | vn_seek(struct file *fp, off_t offset, int whence, struct thread *td) | ||||
{ | { | ||||
struct ucred *cred; | struct ucred *cred; | ||||
struct vnode *vp; | struct vnode *vp; | ||||
struct vattr vattr; | struct vattr vattr; | ||||
Not Done Inline ActionsThis assert should be either repeated in vn_bmap_seekhole_locked(), or moved to that function. kib: This assert should be either repeated in vn_bmap_seekhole_locked(), or moved to that function. | |||||
off_t foffset, size; | off_t foffset, size; | ||||
int error, noneg; | int error, noneg; | ||||
cred = td->td_ucred; | cred = td->td_ucred; | ||||
vp = fp->f_vnode; | vp = fp->f_vnode; | ||||
foffset = foffset_lock(fp, 0); | foffset = foffset_lock(fp, 0); | ||||
noneg = (vp->v_type != VCHR); | noneg = (vp->v_type != VCHR); | ||||
error = 0; | error = 0; | ||||
▲ Show 20 Lines • Show All 924 Lines • ▼ Show 20 Lines | #endif | ||||
if (olen + ooffset != offset + len) { | if (olen + ooffset != offset + len) { | ||||
panic("offset + len changed from %jx/%jx to %jx/%jx", | panic("offset + len changed from %jx/%jx to %jx/%jx", | ||||
ooffset, olen, offset, len); | ooffset, olen, offset, len); | ||||
} | } | ||||
if (error != 0 || len == 0) | if (error != 0 || len == 0) | ||||
break; | break; | ||||
KASSERT(olen > len, ("Iteration did not make progress?")); | KASSERT(olen > len, ("Iteration did not make progress?")); | ||||
maybe_yield(); | maybe_yield(); | ||||
} | |||||
return (error); | |||||
} | |||||
static int | |||||
vn_deallocate_impl(struct vnode *vp, off_t *offset, off_t *length, int flags, | |||||
int ioflg, struct ucred *active_cred, struct ucred *file_cred) | |||||
{ | |||||
struct mount *mp; | |||||
void *rl_cookie; | |||||
off_t off, len; | |||||
int lock_flags; | |||||
int error; | |||||
#ifdef AUDIT | |||||
bool audited_vnode1 = false; | |||||
Done Inline ActionsUse bool? kib: Use bool? | |||||
#endif | |||||
rl_cookie = NULL; | |||||
error = 0; | |||||
mp = NULL; | |||||
off = *offset; | |||||
len = *length; | |||||
if ((ioflg & (IO_NODELOCKED|IO_RANGELOCKED)) == 0) | |||||
Done Inline Actionsif ((ioflg & (IO_NODELOCKED | IO_RANGELOCKED)) == 0) kib: ``` if ((ioflg & (IO_NODELOCKED | IO_RANGELOCKED)) == 0)``` | |||||
rl_cookie = vn_rangelock_wlock(vp, off, off + len); | |||||
while (len > 0 && error == 0) { | |||||
/* | |||||
* Try to deallocate the longest range in one pass. | |||||
* In case a pass takes too long to be executed, it returns | |||||
* partial result. The residue will be proceeded in the next | |||||
* pass. | |||||
*/ | |||||
if ((ioflg & IO_NODELOCKED) == 0) { | |||||
bwillwrite(); | |||||
if ((error = vn_start_write(vp, &mp, | |||||
V_WAIT | PCATCH)) != 0) | |||||
goto out; | |||||
if ((MNT_SHARED_WRITES(mp) || | |||||
(mp == NULL && MNT_SHARED_WRITES(vp->v_mount)))) | |||||
lock_flags = LK_SHARED; | |||||
else | |||||
lock_flags = LK_EXCLUSIVE; | |||||
vn_lock(vp, lock_flags | LK_RETRY); | |||||
} | |||||
#ifdef AUDIT | |||||
Done Inline ActionsImagine that vn_start_write() returned EINTR due to a signal + PCATCH. Also assume that this is second iteration of the while (len > 0) loop. Then you return EINTR while some data was already deallocated. That said, relocking ranglelock basically makes it useless. Rangelock only purpose is to establish atomicity WRT parallel reads and writes due to VOP dropping the vnode lock. In other words, you need to get the ranglelock once and for whole op. kib: Imagine that vn_start_write() returned EINTR due to a signal + PCATCH. Also assume that this… | |||||
if (!audited_vnode1) { | |||||
AUDIT_ARG_VNODE1(vp); | |||||
audited_vnode1 = true; | |||||
} | |||||
#endif | |||||
#ifdef MAC | |||||
if ((ioflg & IO_NOMACCHECK) == 0) | |||||
error = mac_vnode_check_write(active_cred, file_cred, | |||||
vp); | |||||
#endif | |||||
if (error == 0) | |||||
error = VOP_DEALLOCATE(vp, &off, &len, flags, | |||||
active_cred); | |||||
if ((ioflg & IO_NODELOCKED) == 0) { | |||||
VOP_UNLOCK(vp); | |||||
if (mp != NULL) { | |||||
vn_finished_write(mp); | |||||
mp = NULL; | |||||
} | |||||
} | |||||
} | |||||
out: | |||||
if (rl_cookie != NULL) | |||||
vn_rangelock_unlock(vp, rl_cookie); | |||||
*offset = off; | |||||
*length = len; | |||||
return (error); | |||||
} | |||||
int | |||||
vn_deallocate(struct vnode *vp, off_t *offset, off_t *length, int flags, | |||||
Done Inline ActionsYou don't unlock the rangelock on iteration, but do relock it on each loop entry. I believe you would deadlock against yourself if short dealloc occurs. kib: You don't unlock the rangelock on iteration, but do relock it on each loop entry. I believe… | |||||
int ioflg, struct ucred *active_cred, struct ucred *file_cred) | |||||
{ | |||||
if (*offset < 0 || *length <= 0 || *length > OFF_MAX - *offset || | |||||
flags != 0) | |||||
return (EINVAL); | |||||
if (vp->v_type != VREG) | |||||
return (ENODEV); | |||||
return (vn_deallocate_impl(vp, offset, length, flags, ioflg, | |||||
active_cred, file_cred)); | |||||
} | |||||
static int | |||||
vn_fspacectl(struct file *fp, int cmd, off_t *offset, off_t *length, int flags, | |||||
struct ucred *active_cred, struct thread *td) | |||||
{ | |||||
int error; | |||||
struct vnode *vp; | |||||
vp = fp->f_vnode; | |||||
if (cmd != SPACECTL_DEALLOC || *offset < 0 || *length <= 0 || | |||||
*length > OFF_MAX - *offset || flags != 0) | |||||
return (EINVAL); | |||||
if (vp->v_type != VREG) | |||||
return (ENODEV); | |||||
switch (cmd) { | |||||
case SPACECTL_DEALLOC: | |||||
error = vn_deallocate_impl(vp, offset, length, flags, 0, | |||||
active_cred, fp->f_cred); | |||||
break; | |||||
default: | |||||
panic("vn_fspacectl: unknown cmd %d", cmd); | |||||
} | } | ||||
return (error); | return (error); | ||||
} | } | ||||
static u_long vn_lock_pair_pause_cnt; | static u_long vn_lock_pair_pause_cnt; | ||||
SYSCTL_ULONG(_debug, OID_AUTO, vn_lock_pair_pause, CTLFLAG_RD, | SYSCTL_ULONG(_debug, OID_AUTO, vn_lock_pair_pause, CTLFLAG_RD, | ||||
&vn_lock_pair_pause_cnt, 0, | &vn_lock_pair_pause_cnt, 0, | ||||
▲ Show 20 Lines • Show All 89 Lines • Show Last 20 Lines |
I suggest to extract introduction of vn_bmap_seekhole_locked() into a separate commit and do it in advance.