diff --git a/sys/kern/kern_lockf.c b/sys/kern/kern_lockf.c --- a/sys/kern/kern_lockf.c +++ b/sys/kern/kern_lockf.c @@ -1861,7 +1861,7 @@ struct lockdesc { STAILQ_ENTRY(lockdesc) link; struct vnode *vp; - struct flock fl; + struct xlockf xl; }; STAILQ_HEAD(lockdesclist, lockdesc); @@ -1894,16 +1894,17 @@ M_WAITOK); ldesc->vp = lf->lf_vnode; vref(ldesc->vp); - ldesc->fl.l_start = lf->lf_start; + ldesc->xl.xl_start = lf->lf_start; if (lf->lf_end == OFF_MAX) - ldesc->fl.l_len = 0; + ldesc->xl.xl_len = 0; else - ldesc->fl.l_len = + ldesc->xl.xl_len = lf->lf_end - lf->lf_start + 1; - ldesc->fl.l_whence = SEEK_SET; - ldesc->fl.l_type = F_UNLCK; - ldesc->fl.l_pid = lf->lf_owner->lo_pid; - ldesc->fl.l_sysid = sysid; + ldesc->xl.xl_id = (kvaddr_t)lf->lf_owner->lo_id; + ldesc->xl.xl_whence = SEEK_SET; + ldesc->xl.xl_type = lf->lf_type; + ldesc->xl.xl_pid = lf->lf_owner->lo_pid; + ldesc->xl.xl_sysid = sysid; STAILQ_INSERT_TAIL(&locks, ldesc, link); } sx_xunlock(&ls->ls_lock); @@ -1919,7 +1920,7 @@ while ((ldesc = STAILQ_FIRST(&locks)) != NULL) { STAILQ_REMOVE_HEAD(&locks, link); if (!error) - error = fn(ldesc->vp, &ldesc->fl, arg); + error = fn(ldesc->vp, &ldesc->xl, arg); vrele(ldesc->vp); free(ldesc, M_LOCKF); } @@ -1961,16 +1962,17 @@ M_WAITOK); ldesc->vp = lf->lf_vnode; vref(ldesc->vp); - ldesc->fl.l_start = lf->lf_start; + ldesc->xl.xl_start = lf->lf_start; if (lf->lf_end == OFF_MAX) - ldesc->fl.l_len = 0; + ldesc->xl.xl_len = 0; else - ldesc->fl.l_len = + ldesc->xl.xl_len = lf->lf_end - lf->lf_start + 1; - ldesc->fl.l_whence = SEEK_SET; - ldesc->fl.l_type = F_UNLCK; - ldesc->fl.l_pid = lf->lf_owner->lo_pid; - ldesc->fl.l_sysid = lf->lf_owner->lo_sysid; + ldesc->xl.xl_id = (kvaddr_t)lf->lf_owner->lo_id; + ldesc->xl.xl_whence = SEEK_SET; + ldesc->xl.xl_type = lf->lf_type; + ldesc->xl.xl_pid = lf->lf_owner->lo_pid; + ldesc->xl.xl_sysid = lf->lf_owner->lo_sysid; STAILQ_INSERT_TAIL(&locks, ldesc, link); } sx_xunlock(&ls->ls_lock); @@ -1989,7 +1991,7 @@ while ((ldesc = STAILQ_FIRST(&locks)) != NULL) { STAILQ_REMOVE_HEAD(&locks, link); if (!error) - error = fn(ldesc->vp, &ldesc->fl, arg); + error = fn(ldesc->vp, &ldesc->xl, arg); vrele(ldesc->vp); free(ldesc, M_LOCKF); } @@ -1998,10 +2000,17 @@ } static int -lf_clearremotesys_iterator(struct vnode *vp, struct flock *fl, void *arg) +lf_clearremotesys_iterator(struct vnode *vp, struct xlockf *xl, void *arg) { + struct flock fl; - VOP_ADVLOCK(vp, 0, F_UNLCK, fl, F_REMOTE); + fl.l_start = xl->xl_start; + fl.l_len = xl->xl_len; + fl.l_pid = xl->xl_pid; + fl.l_type = F_UNLCK; + fl.l_whence = xl->xl_whence; + fl.l_sysid = xl->xl_sysid; + VOP_ADVLOCK(vp, 0, F_UNLCK, &fl, F_REMOTE); return (0); } diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -307,6 +307,12 @@ static u_long vnlru_read_freevnodes(void); +struct lockf_context { + struct sysctl_req *req; + pid_t pid; + int byte_size; +}; + /* * Note that no attempt is made to sanitize these parameters. */ @@ -4682,6 +4688,85 @@ ""); #endif +static int +lockf_iterator(struct vnode *vn, struct xlockf *xl, void *arg) +{ + struct lockf_context *ctx = arg; + struct proc *p; + int error, visible; + + /* local pids, excluding the (already locked!) original one, should be checked for visibility */ + if (xl->xl_sysid == 0 && xl->xl_pid != -1 && xl->xl_pid != ctx->pid) { + error = pget(xl->xl_pid, 0, &p); + if (error == 0) { + visible = !p_cansee(ctx->req->td, p); + PROC_UNLOCK(p); + if (!visible) + return (0); + } + } + if (ctx->req->oldptr == NULL) { + ctx->byte_size += sizeof(*xl); + return (0); + } + return (SYSCTL_OUT(ctx->req, xl, sizeof(*xl))); +} + +static int +sysctl_lockf(SYSCTL_HANDLER_ARGS) +{ + int *name = (int *)arg1; + u_int namelen = arg2; + int error = 0; + pid_t pid; + int fd; + struct proc *p; + struct file *fp = NULL; + struct lockf_context ctx; + + if (namelen != 2) + return (EINVAL); + if (req->newptr) + return (EPERM); + + pid = name[0]; + fd = name[1]; + + error = sysctl_wire_old_buffer(req, 0); + if (error) + return (error); + + ctx.byte_size = 0; + ctx.req = req; + ctx.pid = pid; + + error = pget(pid, 0, &p); + if (error == 0) { + error = p_cansee(req->td, p); + if (error == 0) { + error = fget_unlocked(p->p_fd, fd, &cap_no_rights, &fp); + } + PROC_UNLOCK(p); + if (error == 0) { + error = lf_iteratelocks_vnode(fp->f_vnode, lockf_iterator, &ctx); + } + if (fp != NULL) + fdrop(fp, curthread); + } + if (error) + return (error); + + if (req->oldptr == NULL) { + return (SYSCTL_OUT(req, 0, ctx.byte_size)); + } + + return (0); +} + +SYSCTL_NODE(_kern, OID_AUTO, lockf, + CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_lockf, + "Array of struct xlockf (byte-level advisory locks) for the given process and file descriptor"); + static void unmount_or_warn(struct mount *mp) { diff --git a/sys/nlm/nlm_advlock.c b/sys/nlm/nlm_advlock.c --- a/sys/nlm/nlm_advlock.c +++ b/sys/nlm/nlm_advlock.c @@ -451,15 +451,19 @@ } static int -nlm_reclaim_free_lock(struct vnode *vp, struct flock *fl, void *arg) +nlm_reclaim_free_lock(struct vnode *vp, struct xlockf *xl, void *arg) { struct flock newfl; struct thread *td = curthread; struct ucred *oldcred; int error; - newfl = *fl; + newfl.l_start = xl->xl_start; + newfl.l_len = xl->xl_len; + newfl.l_pid = xl->xl_pid; newfl.l_type = F_UNLCK; + newfl.l_whence = xl->xl_whence; + newfl.l_sysid = xl->xl_sysid; oldcred = td->td_ucred; nlm_set_creds_for_lock(td, &newfl); @@ -488,12 +492,13 @@ }; static int -nlm_client_recover_lock(struct vnode *vp, struct flock *fl, void *arg) +nlm_client_recover_lock(struct vnode *vp, struct xlockf *xl, void *arg) { struct nlm_recovery_context *nr = (struct nlm_recovery_context *) arg; struct thread *td = curthread; struct ucred *oldcred; int state, error; + struct flock fl; /* * If the remote NSM state changes during recovery, the host @@ -508,10 +513,16 @@ if (error) return (error); + fl.l_start = xl->xl_start; + fl.l_len = xl->xl_len; + fl.l_pid = xl->xl_pid; + fl.l_type = F_UNLCK; + fl.l_whence = xl->xl_whence; + fl.l_sysid = xl->xl_sysid; oldcred = td->td_ucred; - nlm_set_creds_for_lock(td, fl); + nlm_set_creds_for_lock(td, &fl); - error = nlm_advlock_internal(vp, NULL, F_SETLK, fl, F_REMOTE, + error = nlm_advlock_internal(vp, NULL, F_SETLK, &fl, F_REMOTE, TRUE, TRUE); crfree(td->td_ucred); diff --git a/sys/sys/lockf.h b/sys/sys/lockf.h --- a/sys/sys/lockf.h +++ b/sys/sys/lockf.h @@ -122,7 +122,21 @@ }; LIST_HEAD(lockf_list, lockf); -typedef int lf_iterator(struct vnode *, struct flock *, void *); +/* + * Lock entry as exported to userland. + */ +struct xlockf { + off_t xl_start; /* starting offset */ + off_t xl_len; /* xl_len = 0 means until end of file */ + kvaddr_t xl_id; /* address of struct file, when xl_pid is -1 */ + pid_t xl_pid; /* lock owner, or -1 to identify by xl_id instead */ + short xl_type; /* lock type: read/write, etc. */ + short xl_whence; /* type of xl_start */ + int xl_sysid; /* the namespace for xl_pid: remote system id or zero for local */ +}; + + +typedef int lf_iterator(struct vnode *, struct xlockf *, void *); int lf_advlock(struct vop_advlock_args *, struct lockf **, u_quad_t); int lf_advlockasync(struct vop_advlockasync_args *, struct lockf **, u_quad_t); diff --git a/sys/sys/param.h b/sys/sys/param.h --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -76,7 +76,7 @@ * cannot include sys/param.h and should only be updated here. */ #undef __FreeBSD_version -#define __FreeBSD_version 1400052 +#define __FreeBSD_version 1400053 /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,