Changeset View
Standalone View
sys/kern/kern_lockf.c
Show First 20 Lines • Show All 62 Lines • ▼ Show 20 Lines | |||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include "opt_debug_lockf.h" | #include "opt_debug_lockf.h" | ||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/hash.h> | #include <sys/hash.h> | ||||
#include <sys/jail.h> | |||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#include <sys/limits.h> | #include <sys/limits.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/mount.h> | #include <sys/mount.h> | ||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/sbuf.h> | |||||
#include <sys/stat.h> | |||||
#include <sys/sx.h> | #include <sys/sx.h> | ||||
#include <sys/unistd.h> | #include <sys/unistd.h> | ||||
#include <sys/user.h> | |||||
#include <sys/vnode.h> | #include <sys/vnode.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/fcntl.h> | #include <sys/fcntl.h> | ||||
#include <sys/lockf.h> | #include <sys/lockf.h> | ||||
#include <sys/taskqueue.h> | #include <sys/taskqueue.h> | ||||
#ifdef LOCKF_DEBUG | #ifdef LOCKF_DEBUG | ||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#include <ufs/ufs/extattr.h> | |||||
#include <ufs/ufs/quota.h> | |||||
#include <ufs/ufs/ufsmount.h> | |||||
#include <ufs/ufs/inode.h> | |||||
static int lockf_debug = 0; /* control debug output */ | static int lockf_debug = 0; /* control debug output */ | ||||
SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, ""); | SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, ""); | ||||
#endif | #endif | ||||
static MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures"); | static MALLOC_DEFINE(M_LOCKF, "lockf", "Byte-range locking structures"); | ||||
struct owner_edge; | struct owner_edge; | ||||
struct owner_vertex; | struct owner_vertex; | ||||
▲ Show 20 Lines • Show All 465 Lines • ▼ Show 20 Lines | if (flags & F_REMOTE) { | ||||
/* | /* | ||||
* For remote locks, the caller may release its ref to | * For remote locks, the caller may release its ref to | ||||
* the vnode at any time - we have to ref it here to | * the vnode at any time - we have to ref it here to | ||||
* prevent it from being recycled unexpectedly. | * prevent it from being recycled unexpectedly. | ||||
*/ | */ | ||||
vref(vp); | vref(vp); | ||||
} | } | ||||
/* | |||||
* XXX The problem is that VTOI is ufs specific, so it will | |||||
* break LOCKF_DEBUG for all other FS's other than UFS because | |||||
* it casts the vnode->data ptr to struct inode *. | |||||
*/ | |||||
/* lock->lf_inode = VTOI(ap->a_vp); */ | |||||
lock->lf_inode = (struct inode *)0; | |||||
lock->lf_type = fl->l_type; | lock->lf_type = fl->l_type; | ||||
LIST_INIT(&lock->lf_outedges); | LIST_INIT(&lock->lf_outedges); | ||||
LIST_INIT(&lock->lf_inedges); | LIST_INIT(&lock->lf_inedges); | ||||
lock->lf_async_task = ap->a_task; | lock->lf_async_task = ap->a_task; | ||||
lock->lf_flags = ap->a_flags; | lock->lf_flags = ap->a_flags; | ||||
/* | /* | ||||
* Do the requested operation. First find our state structure | * Do the requested operation. First find our state structure | ||||
▲ Show 20 Lines • Show All 1,877 Lines • ▼ Show 20 Lines | graph_init(struct owner_graph *g) | ||||
g->g_size = 0; | g->g_size = 0; | ||||
g->g_space = 10; | g->g_space = 10; | ||||
g->g_indexbuf = malloc(g->g_space * sizeof(int), M_LOCKF, M_WAITOK); | g->g_indexbuf = malloc(g->g_space * sizeof(int), M_LOCKF, M_WAITOK); | ||||
g->g_gen = 0; | g->g_gen = 0; | ||||
return (g); | return (g); | ||||
} | } | ||||
struct kinfo_lockf_linked { | |||||
struct kinfo_lockf kl; | |||||
struct vnode *vp; | |||||
STAILQ_ENTRY(kinfo_lockf_linked) link; | |||||
}; | |||||
int | |||||
vfs_report_lockf(struct mount *mp, struct sbuf *sb) | |||||
{ | |||||
struct lockf *ls; | |||||
struct lockf_entry *lf; | |||||
struct kinfo_lockf_linked *klf; | |||||
struct vnode *vp; | |||||
struct ucred *ucred; | |||||
char *fullpath, *freepath; | |||||
struct stat stt; | |||||
fsid_t fsidx; | |||||
STAILQ_HEAD(, kinfo_lockf_linked) locks; | |||||
int error, gerror; | |||||
STAILQ_INIT(&locks); | |||||
sx_slock(&lf_lock_states_lock); | |||||
LIST_FOREACH(ls, &lf_lock_states, ls_link) { | |||||
sx_slock(&ls->ls_lock); | |||||
LIST_FOREACH(lf, &ls->ls_active, lf_link) { | |||||
vp = lf->lf_vnode; | |||||
if (VN_IS_DOOMED(vp) || vp->v_mount != mp) | |||||
markj: How is it possible to have `vp == NULL`? | |||||
continue; | |||||
vhold(vp); | |||||
Done Inline ActionsMy concern with this version is performance when there I'm almost tempted to suggest going back to your previous Anyhow, do whatever you think is best. rmacklem: My concern with this version is performance when there
are a lot of file systems. Peter… | |||||
Done Inline ActionsThis code is definitely not a hot path. In worst case, the report of the advisory locks list would take a long time. If somebody complains, we can do some optimizations, for now I prefer to keep your advise to allow for eventual proper reporting for private adv locking implementations. kib: This code is definitely not a hot path. In worst case, the report of the advisory locks list… | |||||
klf = malloc(sizeof(struct kinfo_lockf_linked), | |||||
M_LOCKF, M_WAITOK | M_ZERO); | |||||
klf->vp = vp; | |||||
klf->kl.kl_structsize = sizeof(struct kinfo_lockf); | |||||
klf->kl.kl_start = lf->lf_start; | |||||
klf->kl.kl_len = lf->lf_end == OFF_MAX ? 0 : | |||||
lf->lf_end - lf->lf_start + 1; | |||||
klf->kl.kl_rw = lf->lf_type == F_RDLCK ? KLOCK_RW_READ : | |||||
KLOCK_RW_WRITE; | |||||
if (lf->lf_owner->lo_sysid != 0) { | |||||
klf->kl.kl_pid = lf->lf_owner->lo_pid; | |||||
klf->kl.kl_sysid = lf->lf_owner->lo_sysid; | |||||
klf->kl.kl_type = KLOCK_TYPE_REMOTE; | |||||
} else if (lf->lf_owner->lo_pid == -1) { | |||||
klf->kl.kl_pid = -1; | |||||
klf->kl.kl_sysid = 0; | |||||
klf->kl.kl_type = KLOCK_TYPE_FLOCK; | |||||
} else { | |||||
klf->kl.kl_pid = lf->lf_owner->lo_pid; | |||||
klf->kl.kl_sysid = 0; | |||||
klf->kl.kl_type = KLOCK_TYPE_PID; | |||||
} | |||||
STAILQ_INSERT_TAIL(&locks, klf, link); | |||||
} | |||||
sx_sunlock(&ls->ls_lock); | |||||
} | |||||
sx_sunlock(&lf_lock_states_lock); | |||||
gerror = 0; | |||||
ucred = curthread->td_ucred; | |||||
fsidx = mp->mnt_stat.f_fsid; | |||||
Done Inline ActionsHow does fsidx get set. Maybe I just can't spot it, rmacklem: How does fsidx get set. Maybe I just can't spot it,
but I don't see it being set in the above… | |||||
Done Inline ActionsRight, it should has been moved to the loop below. kib: Right, it should has been moved to the loop below. | |||||
while ((klf = STAILQ_FIRST(&locks)) != NULL) { | |||||
STAILQ_REMOVE_HEAD(&locks, link); | |||||
vp = klf->vp; | |||||
if (gerror == 0 && vn_lock(vp, LK_SHARED) == 0) { | |||||
error = prison_canseemount(ucred, vp->v_mount); | |||||
Done Inline ActionsAlthough it would probably be less efficient, And, yes, you were correct. For the NFS client, it Or, maybe it could do the above to cover the local Anyhow, I'll leave it up to you. rmacklem: Although it would probably be less efficient,
due to making multiple passes through the above… | |||||
Done Inline ActionsDid you mean to include LK_RETRY here? vn_lock(LK_RETRY) never fails. markj: Did you mean to include LK_RETRY here? vn_lock(LK_RETRY) never fails. | |||||
if (error == 0) | |||||
error = VOP_STAT(vp, &stt, ucred, NOCRED); | |||||
VOP_UNLOCK(vp); | |||||
if (error == 0) { | |||||
memcpy(&klf->kl.kl_file_fsid, &fsidx, | |||||
sizeof(fsidx)); | |||||
klf->kl.kl_file_rdev = stt.st_rdev; | |||||
klf->kl.kl_file_fileid = stt.st_ino; | |||||
freepath = NULL; | |||||
fullpath = "-"; | |||||
error = vn_fullpath(vp, &fullpath, &freepath); | |||||
Done Inline ActionsWhat's the purpose of initializing fullpath? markj: What's the purpose of initializing `fullpath`? | |||||
Done Inline ActionsBecause it is done by other places. I am not sure at what point vn_fullpath() started doing something to fullpath in case of error. For now I kept it as is. If you insist, I would remove the initialization. kib: Because it is done by other places. I am not sure at what point vn_fullpath() started doing… | |||||
if (error == 0) | |||||
strlcpy(klf->kl.kl_path, fullpath, | |||||
sizeof(klf->kl.kl_path)); | |||||
free(freepath, M_TEMP); | |||||
if (sbuf_bcat(sb, &klf->kl, | |||||
klf->kl.kl_structsize) != 0) { | |||||
gerror = sbuf_error(sb); | |||||
} | |||||
} | |||||
} | |||||
vdrop(vp); | |||||
free(klf, M_LOCKF); | |||||
} | |||||
return (gerror); | |||||
} | |||||
static int | |||||
sysctl_kern_lockf_run(struct sbuf *sb) | |||||
{ | |||||
struct mount *mp; | |||||
int error; | |||||
error = 0; | |||||
mtx_lock(&mountlist_mtx); | |||||
TAILQ_FOREACH(mp, &mountlist, mnt_list) { | |||||
error = vfs_busy(mp, MBF_MNTLSTLOCK); | |||||
if (error != 0) | |||||
continue; | |||||
error = mp->mnt_op->vfs_report_lockf(mp, sb); | |||||
mtx_lock(&mountlist_mtx); | |||||
vfs_unbusy(mp); | |||||
if (error != 0) | |||||
break; | |||||
} | |||||
mtx_unlock(&mountlist_mtx); | |||||
return (error); | |||||
} | |||||
static int | |||||
sysctl_kern_lockf(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
struct sbuf sb; | |||||
int error, error2; | |||||
sbuf_new_for_sysctl(&sb, NULL, sizeof(struct kinfo_lockf) * 5, req); | |||||
sbuf_clear_flags(&sb, SBUF_INCLUDENUL); | |||||
error = sysctl_kern_lockf_run(&sb); | |||||
error2 = sbuf_finish(&sb); | |||||
sbuf_delete(&sb); | |||||
return (error != 0 ? error : error2); | |||||
} | |||||
SYSCTL_PROC(_kern, KERN_LOCKF, lockf, | |||||
CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, | |||||
0, 0, sysctl_kern_lockf, "S,lockf", | |||||
"Advisory locks table"); | |||||
#ifdef LOCKF_DEBUG | #ifdef LOCKF_DEBUG | ||||
/* | /* | ||||
* Print description of a lock owner | * Print description of a lock owner | ||||
*/ | */ | ||||
static void | static void | ||||
lf_print_owner(struct lock_owner *lo) | lf_print_owner(struct lock_owner *lo) | ||||
{ | { | ||||
Show All 11 Lines | |||||
* Print out a lock. | * Print out a lock. | ||||
*/ | */ | ||||
static void | static void | ||||
lf_print(char *tag, struct lockf_entry *lock) | lf_print(char *tag, struct lockf_entry *lock) | ||||
{ | { | ||||
printf("%s: lock %p for ", tag, (void *)lock); | printf("%s: lock %p for ", tag, (void *)lock); | ||||
lf_print_owner(lock->lf_owner); | lf_print_owner(lock->lf_owner); | ||||
if (lock->lf_inode != (struct inode *)0) | printf("\nvnode %p", lock->lf_vnode); | ||||
printf(" in ino %ju on dev <%s>,", | VOP_PRINT(lock->lf_vnode); | ||||
(uintmax_t)lock->lf_inode->i_number, | |||||
devtoname(ITODEV(lock->lf_inode))); | |||||
printf(" %s, start %jd, end ", | printf(" %s, start %jd, end ", | ||||
lock->lf_type == F_RDLCK ? "shared" : | lock->lf_type == F_RDLCK ? "shared" : | ||||
lock->lf_type == F_WRLCK ? "exclusive" : | lock->lf_type == F_WRLCK ? "exclusive" : | ||||
lock->lf_type == F_UNLCK ? "unlock" : "unknown", | lock->lf_type == F_UNLCK ? "unlock" : "unknown", | ||||
(intmax_t)lock->lf_start); | (intmax_t)lock->lf_start); | ||||
if (lock->lf_end == OFF_MAX) | if (lock->lf_end == OFF_MAX) | ||||
printf("EOF"); | printf("EOF"); | ||||
else | else | ||||
printf("%jd", (intmax_t)lock->lf_end); | printf("%jd", (intmax_t)lock->lf_end); | ||||
if (!LIST_EMPTY(&lock->lf_outedges)) | if (!LIST_EMPTY(&lock->lf_outedges)) | ||||
printf(" block %p\n", | printf(" block %p\n", | ||||
(void *)LIST_FIRST(&lock->lf_outedges)->le_to); | (void *)LIST_FIRST(&lock->lf_outedges)->le_to); | ||||
else | else | ||||
printf("\n"); | printf("\n"); | ||||
} | } | ||||
static void | static void | ||||
lf_printlist(char *tag, struct lockf_entry *lock) | lf_printlist(char *tag, struct lockf_entry *lock) | ||||
{ | { | ||||
struct lockf_entry *lf, *blk; | struct lockf_entry *lf, *blk; | ||||
struct lockf_edge *e; | struct lockf_edge *e; | ||||
if (lock->lf_inode == (struct inode *)0) | printf("%s: Lock list for vnode %p:\n", tag, lock->lf_vnode); | ||||
return; | |||||
printf("%s: Lock list for ino %ju on dev <%s>:\n", | |||||
tag, (uintmax_t)lock->lf_inode->i_number, | |||||
devtoname(ITODEV(lock->lf_inode))); | |||||
LIST_FOREACH(lf, &lock->lf_vnode->v_lockf->ls_active, lf_link) { | LIST_FOREACH(lf, &lock->lf_vnode->v_lockf->ls_active, lf_link) { | ||||
Done Inline ActionsThis fits on one line. markj: This fits on one line. | |||||
printf("\tlock %p for ",(void *)lf); | printf("\tlock %p for ",(void *)lf); | ||||
lf_print_owner(lock->lf_owner); | lf_print_owner(lock->lf_owner); | ||||
printf(", %s, start %jd, end %jd", | printf(", %s, start %jd, end %jd", | ||||
lf->lf_type == F_RDLCK ? "shared" : | lf->lf_type == F_RDLCK ? "shared" : | ||||
lf->lf_type == F_WRLCK ? "exclusive" : | lf->lf_type == F_WRLCK ? "exclusive" : | ||||
lf->lf_type == F_UNLCK ? "unlock" : | lf->lf_type == F_UNLCK ? "unlock" : | ||||
"unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end); | "unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end); | ||||
LIST_FOREACH(e, &lf->lf_outedges, le_outlink) { | LIST_FOREACH(e, &lf->lf_outedges, le_outlink) { | ||||
Show All 16 Lines |
How is it possible to have vp == NULL?