diff --git a/lib/libprocstat/Symbol.map b/lib/libprocstat/Symbol.map --- a/lib/libprocstat/Symbol.map +++ b/lib/libprocstat/Symbol.map @@ -44,3 +44,8 @@ procstat_get_pts_info; procstat_get_vnode_info; }; + +FBSD_1.7 { + procstat_getadvlock; + procstat_freeadvlock; +}; \ No newline at end of file diff --git a/lib/libprocstat/libprocstat.h b/lib/libprocstat/libprocstat.h --- a/lib/libprocstat/libprocstat.h +++ b/lib/libprocstat/libprocstat.h @@ -168,8 +168,33 @@ STAILQ_HEAD(filestat_list, filestat); +struct advlock { + int rw; /* PS_ADVLOCK_RO/RW */ + int type; /* PS_ADVLOCK_TYPE_ */ + int pid; + int sysid; + uint64_t file_fsid; + uint64_t file_rdev; + uint64_t file_fileid; + off_t start; + off_t len; /* len == 0 till the EOF */ + const char *path; + STAILQ_ENTRY(advlock) next; +}; + +#define PS_ADVLOCK_RO 0x01 +#define PS_ADVLOCK_RW 0x02 + +#define PS_ADVLOCK_TYPE_FLOCK 0x01 +#define PS_ADVLOCK_TYPE_PID 0x02 +#define PS_ADVLOCK_TYPE_REMOTE 0x03 + +STAILQ_HEAD(advlock_list, advlock); + __BEGIN_DECLS void procstat_close(struct procstat *procstat); +void procstat_freeadvlock(struct procstat *procstat, + struct advlock_list *advlocks); void procstat_freeargv(struct procstat *procstat); #ifndef ZFS void procstat_freeauxv(struct procstat *procstat, Elf_Auxinfo *auxv); @@ -185,6 +210,7 @@ struct ptrace_lwpinfo *pl); void procstat_freevmmap(struct procstat *procstat, struct kinfo_vmentry *vmmap); +struct advlock_list *procstat_getadvlock(struct procstat *procstat); struct filestat_list *procstat_getfiles(struct procstat *procstat, struct kinfo_proc *kp, int mmapped); struct kinfo_proc *procstat_getprocs(struct procstat *procstat, diff --git a/lib/libprocstat/libprocstat.3 b/lib/libprocstat/libprocstat.3 --- a/lib/libprocstat/libprocstat.3 +++ b/lib/libprocstat/libprocstat.3 @@ -24,11 +24,12 @@ .\" .\" $FreeBSD$ .\" -.Dd November 26, 2020 +.Dd April 3, 2022 .Dt LIBPROCSTAT 3 .Os .Sh NAME .Nm procstat_close , +.Nm procstat_freeadvlock , .Nm procstat_freeargv , .Nm procstat_freeauxv , .Nm procstat_freeenvv , @@ -44,6 +45,7 @@ .Nm procstat_get_shm_info , .Nm procstat_get_socket_info , .Nm procstat_get_vnode_info , +.Nm procstat_getadvlock , .Nm procstat_getargv , .Nm procstat_getauxv , .Nm procstat_getenvv , @@ -71,6 +73,8 @@ .Ft void .Fn procstat_close "struct procstat *procstat" .Ft void +.Fn procstat_freeadvlock "struct procstat *procstat" "struct advlock_list *list" +.Ft void .Fo procstat_freeargv .Fa "struct procstat *procstat" .Fc @@ -152,6 +156,10 @@ .Fa "struct vnstat *vn" .Fa "char *errbuf" .Fc +.Ft "struct advlock_list *" +.Fo procstat_getadvlock +.Fa "struct procstat *procstat" +.Fc .Ft "char **" .Fo procstat_getargv .Fa "struct procstat *procstat" @@ -495,6 +503,26 @@ function call. .Pp The +.Fn procstat_getadvlock +function returns the dynamically allocated list of the +.Va struct advlock +structures providing the snapshot information of the currently +acquired advisory locks in the system. +Both locally acquired POSIX ( +.Xr fcntl 2 ) +and BSD-style ( +.Xr flock 2 ) +locks are reported, as well as locks established by the remote file +system protocols. +For each lock, file identification information about mount point and +inode number is guaranteed to be provided. +If a path for the locked file can be reconstructed, it is provided +as well. +The returned list must be freed with the +.Fn procstat_freeadvlock +function. +.Pp +The .Fn procstat_get_pipe_info , .Fn procstat_get_pts_info , .Fn procstat_get_sem_info , diff --git a/lib/libprocstat/libprocstat.c b/lib/libprocstat/libprocstat.c --- a/lib/libprocstat/libprocstat.c +++ b/lib/libprocstat/libprocstat.c @@ -2644,3 +2644,138 @@ free(kkstp); } + +static struct advlock_list * +procstat_getadvlock_sysctl(struct procstat *procstat __unused) +{ + struct advlock_list *res; + struct advlock *a; + void *buf; + char *c; + struct kinfo_lockf *kl; + size_t buf_len; + int error; + static const int kl_name[] = { CTL_KERN, KERN_LOCKF }; + + res = malloc(sizeof(*res)); + if (res == NULL) + return (NULL); + STAILQ_INIT(res); + buf = NULL; + + buf_len = 0; + error = sysctl(kl_name, nitems(kl_name), NULL, &buf_len, NULL, 0); + if (error != 0) { + warn("sysctl KERN_LOCKF size"); + goto fail; + } + buf_len *= 2; + buf = malloc(buf_len); + if (buf == NULL) { + warn("malloc"); + goto fail; + } + error = sysctl(kl_name, nitems(kl_name), buf, &buf_len, NULL, 0); + if (error != 0) { + warn("sysctl KERN_LOCKF data"); + goto fail; + } + + for (c = buf; (char *)c < (char *)buf + buf_len; + c += kl->kl_structsize) { + kl = (struct kinfo_lockf *)(void *)c; + if (sizeof(*kl) < (size_t)kl->kl_structsize) { + warn("ABI broken"); + goto fail; + } + a = malloc(sizeof(*a)); + if (a == NULL) { + warn("malloc advlock"); + goto fail; + } + switch (kl->kl_rw) { + case KLOCK_RW_READ: + a->rw = PS_ADVLOCK_RO; + break; + case KLOCK_RW_WRITE: + a->rw = PS_ADVLOCK_RW; + break; + default: + warn("ABI broken"); + free(a); + goto fail; + } + switch (kl->kl_type) { + case KLOCK_TYPE_FLOCK: + a->type = PS_ADVLOCK_TYPE_FLOCK; + break; + case KLOCK_TYPE_PID: + a->type = PS_ADVLOCK_TYPE_PID; + break; + case KLOCK_TYPE_REMOTE: + a->type = PS_ADVLOCK_TYPE_REMOTE; + break; + default: + warn("ABI broken"); + free(a); + goto fail; + } + a->pid = kl->kl_pid; + a->sysid = kl->kl_sysid; + a->file_fsid = kl->kl_file_fsid; + a->file_rdev = kl->kl_file_rdev; + a->file_fileid = kl->kl_file_fileid; + a->start = kl->kl_start; + a->len = kl->kl_len; + if (kl->kl_path[0] != '\0') { + a->path = strdup(kl->kl_path); + if (a->path == NULL) { + warn("malloc"); + free(a); + goto fail; + } + } else + a->path = NULL; + STAILQ_INSERT_TAIL(res, a, next); + } + + free(buf); + return (res); + +fail: + free(buf); + procstat_freeadvlock(procstat, res); + return (NULL); +} + +struct advlock_list * +procstat_getadvlock(struct procstat *procstat) +{ + switch(procstat->type) { + case PROCSTAT_KVM: + warnx("kvm method is not supported"); + return (NULL); + case PROCSTAT_SYSCTL: + return (procstat_getadvlock_sysctl(procstat)); + case PROCSTAT_CORE: + warnx("core method is not supported"); + return (NULL); + default: + warnx("unknown access method: %d", procstat->type); + return (NULL); + } +} + +void +procstat_freeadvlock(struct procstat *procstat __unused, + struct advlock_list *lst) +{ + struct advlock *a, *a1; + + STAILQ_FOREACH_SAFE(a, lst, next, a1) { + free(__DECONST(char *, a->path)); + free(a); + } + free(lst); +} + diff --git a/sys/kern/kern_lockf.c b/sys/kern/kern_lockf.c --- a/sys/kern/kern_lockf.c +++ b/sys/kern/kern_lockf.c @@ -68,14 +68,18 @@ #include #include #include +#include #include #include #include #include #include #include +#include +#include #include #include +#include #include #include #include @@ -85,11 +89,6 @@ #ifdef LOCKF_DEBUG #include -#include -#include -#include -#include - static int lockf_debug = 0; /* control debug output */ SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, ""); #endif @@ -571,13 +570,6 @@ vref(vp); } - /* - * XXX The problem is that VTOI is ufs specific, so it will - * break LOCKF_DEBUG for all other FS's other than UFS because - * it casts the vnode->data ptr to struct inode *. - */ -/* lock->lf_inode = VTOI(ap->a_vp); */ - lock->lf_inode = (struct inode *)0; lock->lf_type = fl->l_type; LIST_INIT(&lock->lf_outedges); LIST_INIT(&lock->lf_inedges); @@ -2471,6 +2463,140 @@ return (g); } +struct kinfo_lockf_linked { + struct kinfo_lockf kl; + struct vnode *vp; + STAILQ_ENTRY(kinfo_lockf_linked) link; +}; + +int +vfs_report_lockf(struct mount *mp, struct sbuf *sb) +{ + struct lockf *ls; + struct lockf_entry *lf; + struct kinfo_lockf_linked *klf; + struct vnode *vp; + struct ucred *ucred; + char *fullpath, *freepath; + struct stat stt; + fsid_t fsidx; + STAILQ_HEAD(, kinfo_lockf_linked) locks; + int error, gerror; + + STAILQ_INIT(&locks); + sx_slock(&lf_lock_states_lock); + LIST_FOREACH(ls, &lf_lock_states, ls_link) { + sx_slock(&ls->ls_lock); + LIST_FOREACH(lf, &ls->ls_active, lf_link) { + vp = lf->lf_vnode; + if (vp == NULL || VN_IS_DOOMED(vp) || + vp->v_mount != mp) + continue; + vhold(vp); + klf = malloc(sizeof(struct kinfo_lockf_linked), + M_LOCKF, M_WAITOK | M_ZERO); + klf->vp = vp; + klf->kl.kl_structsize = sizeof(struct kinfo_lockf); + klf->kl.kl_start = lf->lf_start; + klf->kl.kl_len = lf->lf_end == OFF_MAX ? 0 : + lf->lf_end - lf->lf_start + 1; + klf->kl.kl_rw = lf->lf_type == F_RDLCK ? KLOCK_RW_READ : + KLOCK_RW_WRITE; + if (lf->lf_owner->lo_sysid != 0) { + klf->kl.kl_pid = lf->lf_owner->lo_pid; + klf->kl.kl_sysid = lf->lf_owner->lo_sysid; + klf->kl.kl_type = KLOCK_TYPE_REMOTE; + } else if (lf->lf_owner->lo_pid == -1) { + klf->kl.kl_pid = -1; + klf->kl.kl_sysid = 0; + klf->kl.kl_type = KLOCK_TYPE_FLOCK; + } else { + klf->kl.kl_pid = lf->lf_owner->lo_pid; + klf->kl.kl_sysid = 0; + klf->kl.kl_type = KLOCK_TYPE_PID; + } + STAILQ_INSERT_TAIL(&locks, klf, link); + } + sx_sunlock(&ls->ls_lock); + } + sx_sunlock(&lf_lock_states_lock); + + gerror = 0; + ucred = curthread->td_ucred; + fsidx = mp->mnt_stat.f_fsid; + while ((klf = STAILQ_FIRST(&locks)) != NULL) { + STAILQ_REMOVE_HEAD(&locks, link); + vp = klf->vp; + if (gerror == 0 && vn_lock(vp, LK_SHARED | LK_RETRY) == 0) { + error = prison_canseemount(ucred, vp->v_mount); + if (error == 0) + error = VOP_STAT(vp, &stt, ucred, NOCRED); + VOP_UNLOCK(vp); + if (error == 0) { + memcpy(&klf->kl.kl_file_fsid, &fsidx, + sizeof(fsidx)); + klf->kl.kl_file_rdev = stt.st_rdev; + klf->kl.kl_file_fileid = stt.st_ino; + freepath = NULL; + fullpath = "-"; + error = vn_fullpath(vp, &fullpath, &freepath); + if (error == 0) + strlcpy(klf->kl.kl_path, fullpath, + sizeof(klf->kl.kl_path)); + free(freepath, M_TEMP); + if (sbuf_bcat(sb, &klf->kl, + klf->kl.kl_structsize) != 0) { + gerror = sbuf_error(sb); + } + } + } + vdrop(vp); + free(klf, M_LOCKF); + } + + return (gerror); +} + +static int +sysctl_kern_lockf_run(struct sbuf *sb) +{ + struct mount *mp; + int error; + + error = 0; + mtx_lock(&mountlist_mtx); + TAILQ_FOREACH(mp, &mountlist, mnt_list) { + error = vfs_busy(mp, MBF_MNTLSTLOCK); + if (error != 0) + continue; + error = mp->mnt_op->vfs_report_lockf(mp, sb); + mtx_lock(&mountlist_mtx); + vfs_unbusy(mp); + if (error != 0) + break; + } + mtx_unlock(&mountlist_mtx); + return (error); +} + +static int +sysctl_kern_lockf(SYSCTL_HANDLER_ARGS) +{ + struct sbuf sb; + int error, error2; + + sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_lockf) * 5, req); + sbuf_clear_flags(&sb, SBUF_INCLUDENUL); + error = sysctl_kern_lockf_run(&sb); + error2 = sbuf_finish(&sb); + sbuf_delete(&sb); + return (error != 0 ? error : error2); +} +SYSCTL_PROC(_kern, KERN_LOCKF, lockf, + CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, + 0, 0, sysctl_kern_lockf, "S,lockf", + "Advisory locks table"); + #ifdef LOCKF_DEBUG /* * Print description of a lock owner @@ -2498,10 +2624,8 @@ printf("%s: lock %p for ", tag, (void *)lock); lf_print_owner(lock->lf_owner); - if (lock->lf_inode != (struct inode *)0) - printf(" in ino %ju on dev <%s>,", - (uintmax_t)lock->lf_inode->i_number, - devtoname(ITODEV(lock->lf_inode))); + printf("\nvnode %p", lock->lf_vnode); + VOP_PRINT(lock->lf_vnode); printf(" %s, start %jd, end ", lock->lf_type == F_RDLCK ? "shared" : lock->lf_type == F_WRLCK ? "exclusive" : @@ -2524,12 +2648,8 @@ struct lockf_entry *lf, *blk; struct lockf_edge *e; - if (lock->lf_inode == (struct inode *)0) - return; - - printf("%s: Lock list for ino %ju on dev <%s>:\n", - tag, (uintmax_t)lock->lf_inode->i_number, - devtoname(ITODEV(lock->lf_inode))); + printf("%s: Lock list for vnode %p:\n", + tag, lock->lf_vnode); LIST_FOREACH(lf, &lock->lf_vnode->v_lockf->ls_active, lf_link) { printf("\tlock %p for ",(void *)lf); lf_print_owner(lock->lf_owner); diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c --- a/sys/kern/vfs_init.c +++ b/sys/kern/vfs_init.c @@ -352,6 +352,17 @@ sigallowstop(prev_stops); } +static int +vfs_report_lockf_sigdefer(struct mount *mp, struct sbuf *sb) +{ + int prev_stops, rc; + + prev_stops = sigdeferstop(SIGDEFERSTOP_SILENT); + rc = (*mp->mnt_vfc->vfc_vfsops_sd->vfs_report_lockf)(mp, sb); + sigallowstop(prev_stops); + return (rc); +} + static struct vfsops vfsops_sigdefer = { .vfs_mount = vfs_mount_sigdefer, .vfs_unmount = vfs_unmount_sigdefer, @@ -369,7 +380,7 @@ .vfs_reclaim_lowervp = vfs_reclaim_lowervp_sigdefer, .vfs_unlink_lowervp = vfs_unlink_lowervp_sigdefer, .vfs_purge = vfs_purge_sigdefer, - + .vfs_report_lockf = vfs_report_lockf_sigdefer, }; /* Register a new filesystem type in the global table */ @@ -483,6 +494,8 @@ vfsops->vfs_extattrctl = vfs_stdextattrctl; if (vfsops->vfs_sysctl == NULL) vfsops->vfs_sysctl = vfs_stdsysctl; + if (vfsops->vfs_report_lockf == NULL) + vfsops->vfs_report_lockf = vfs_report_lockf; if ((vfc->vfc_flags & VFCF_SBDRY) != 0) { vfc->vfc_vfsops_sd = vfc->vfc_vfsops; diff --git a/sys/sys/lockf.h b/sys/sys/lockf.h --- a/sys/sys/lockf.h +++ b/sys/sys/lockf.h @@ -77,7 +77,6 @@ off_t lf_end; /* (s) Byte # of the end of the lock (OFF_MAX=EOF) */ struct lock_owner *lf_owner; /* (c) Owner of the lock */ struct vnode *lf_vnode; /* (c) File being locked (only valid for active lock) */ - struct inode *lf_inode; /* (c) Back pointer to the inode */ struct task *lf_async_task;/* (c) Async lock callback */ LIST_ENTRY(lockf_entry) lf_link; /* (s) Linkage for lock lists */ struct lockf_edge_list lf_outedges; /* (s) list of out-edges */ diff --git a/sys/sys/mount.h b/sys/sys/mount.h --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -804,6 +804,8 @@ typedef void vfs_susp_clean_t(struct mount *mp); typedef void vfs_notify_lowervp_t(struct mount *mp, struct vnode *lowervp); typedef void vfs_purge_t(struct mount *mp); +struct sbuf; +typedef int vfs_report_lockf_t(struct mount *mp, struct sbuf *sb); struct vfsops { vfs_mount_t *vfs_mount; @@ -825,6 +827,7 @@ vfs_notify_lowervp_t *vfs_reclaim_lowervp; vfs_notify_lowervp_t *vfs_unlink_lowervp; vfs_purge_t *vfs_purge; + vfs_report_lockf_t *vfs_report_lockf; vfs_mount_t *vfs_spare[6]; /* spares for ABI compat */ }; @@ -1039,6 +1042,7 @@ struct mount_upper_node *); void vfs_unregister_upper(struct mount *, struct mount_upper_node *); int vfs_remount_ro(struct mount *mp); +int vfs_report_lockf(struct mount *mp, struct sbuf *sb); extern TAILQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */ extern struct mtx_padalign mountlist_mtx; diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h --- a/sys/sys/sysctl.h +++ b/sys/sys/sysctl.h @@ -976,6 +976,7 @@ #define KERN_HOSTUUID 36 /* string: host UUID identifier */ #define KERN_ARND 37 /* int: from arc4rand() */ #define KERN_MAXPHYS 38 /* int: MAXPHYS value */ +#define KERN_LOCKF 39 /* struct: lockf reports */ /* * KERN_PROC subtypes */ diff --git a/sys/sys/user.h b/sys/sys/user.h --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -452,6 +452,28 @@ char kf_path[PATH_MAX]; /* Path to file, if any. */ }; +struct kinfo_lockf { + int kl_structsize; /* Variable size of record. */ + int kl_rw; + int kl_type; + int kl_pid; + int kl_sysid; + int kl_pad0; + uint64_t kl_file_fsid; + uint64_t kl_file_rdev; + uint64_t kl_file_fileid; + off_t kl_start; + off_t kl_len; /* len == 0 till the EOF */ + char kl_path[PATH_MAX]; +}; + +#define KLOCK_RW_READ 0x01 +#define KLOCK_RW_WRITE 0x02 + +#define KLOCK_TYPE_FLOCK 0x01 +#define KLOCK_TYPE_PID 0x02 +#define KLOCK_TYPE_REMOTE 0x03 + /* * The KERN_PROC_VMMAP sysctl allows a process to dump the VM layout of * another process as a series of entries. diff --git a/sys/ufs/ufs/acl.h b/sys/ufs/ufs/acl.h --- a/sys/ufs/ufs/acl.h +++ b/sys/ufs/ufs/acl.h @@ -39,8 +39,12 @@ #ifdef _KERNEL -int ufs_getacl_nfs4_internal(struct vnode *vp, struct acl *aclp, struct thread *td); -int ufs_setacl_nfs4_internal(struct vnode *vp, struct acl *aclp, struct thread *td); +struct inode; + +int ufs_getacl_nfs4_internal(struct vnode *vp, struct acl *aclp, + struct thread *td); +int ufs_setacl_nfs4_internal(struct vnode *vp, struct acl *aclp, + struct thread *td); void ufs_sync_acl_from_inode(struct inode *ip, struct acl *acl); void ufs_sync_inode_from_acl(struct acl *acl, struct inode *ip);