diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -194,7 +194,6 @@ */ static int __exclusive_cache_line openfiles; /* actual number of open files */ struct mtx sigio_lock; /* mtx to protect pointers to sigio */ -void __read_mostly (*mq_fdclose)(struct thread *td, int fd, struct file *fp); /* * If low >= size, just return low. Otherwise find the first zero bit in the @@ -1413,11 +1412,8 @@ if (__predict_false(!TAILQ_EMPTY(&fdp->fd_kqlist))) knote_fdclose(td, fd); - /* - * We need to notify mqueue if the object is of type mqueue. - */ - if (__predict_false(fp->f_type == DTYPE_MQUEUE)) - mq_fdclose(td, fd, fp); + if (fp->f_ops->fo_fdclose != NULL) + fp->f_ops->fo_fdclose(fp, fd, td); FILEDESC_XUNLOCK(fdp); #ifdef AUDIT diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c --- a/sys/kern/sys_socket.c +++ b/sys/kern/sys_socket.c @@ -90,6 +90,7 @@ static fo_kqfilter_t soo_kqfilter; static fo_stat_t soo_stat; static fo_close_t soo_close; +static fo_fdclose_t soo_fdclose; static fo_chmod_t soo_chmod; static fo_fill_kinfo_t soo_fill_kinfo; static fo_aio_queue_t soo_aio_queue; @@ -105,6 +106,7 @@ .fo_kqfilter = soo_kqfilter, .fo_stat = soo_stat, .fo_close = soo_close, + .fo_fdclose = soo_fdclose, .fo_chmod = soo_chmod, .fo_chown = invfo_chown, .fo_sendfile = invfo_sendfile, @@ -362,6 +364,16 @@ return (error); } +static void +soo_fdclose(struct file *fp, int fd __unused, struct thread *td) +{ + struct socket *so; + + so = fp->f_data; + if (so->so_proto->pr_fdclose != NULL) + so->so_proto->pr_fdclose(so); +} + static int soo_chmod(struct file *fp, mode_t mode, struct ucred *cred, struct thread *td) { diff --git a/sys/kern/uipc_mqueue.c b/sys/kern/uipc_mqueue.c --- a/sys/kern/uipc_mqueue.c +++ b/sys/kern/uipc_mqueue.c @@ -267,7 +267,6 @@ static int _mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg, int timo); static void mqueue_send_notification(struct mqueue *mq); -static void mqueue_fdclose(struct thread *td, int fd, struct file *fp); static void mq_proc_exit(void *arg, struct proc *p); /* @@ -688,7 +687,6 @@ mqfs_fixup_dir(root); exit_tag = EVENTHANDLER_REGISTER(process_exit, mq_proc_exit, NULL, EVENTHANDLER_PRI_ANY); - mq_fdclose = mqueue_fdclose; p31b_setcfg(CTL_P1003_1B_MESSAGE_PASSING, _POSIX_MESSAGE_PASSING); mqfs_osd_jail_slot = osd_jail_register(NULL, methods); return (0); @@ -2472,35 +2470,6 @@ return (kern_kmq_notify(td, uap->mqd, evp)); } -static void -mqueue_fdclose(struct thread *td, int fd, struct file *fp) -{ - struct mqueue *mq; -#ifdef INVARIANTS - struct filedesc *fdp; - - fdp = td->td_proc->p_fd; - FILEDESC_LOCK_ASSERT(fdp); -#endif - - if (fp->f_ops == &mqueueops) { - mq = FPTOMQ(fp); - mtx_lock(&mq->mq_mutex); - notifier_remove(td->td_proc, mq, fd); - - /* have to wakeup thread in same process */ - if (mq->mq_flags & MQ_RSEL) { - mq->mq_flags &= ~MQ_RSEL; - selwakeup(&mq->mq_rsel); - } - if (mq->mq_flags & MQ_WSEL) { - mq->mq_flags &= ~MQ_WSEL; - selwakeup(&mq->mq_wsel); - } - mtx_unlock(&mq->mq_mutex); - } -} - static void mq_proc_exit(void *arg __unused, struct proc *p) { @@ -2566,6 +2535,33 @@ return (0); } +static void +mqf_fdclose(struct file *fp, int fd, struct thread *td) +{ + struct mqueue *mq; +#ifdef INVARIANTS + struct filedesc *fdp; + + fdp = td->td_proc->p_fd; + FILEDESC_LOCK_ASSERT(fdp); +#endif + + mq = FPTOMQ(fp); + mtx_lock(&mq->mq_mutex); + notifier_remove(td->td_proc, mq, fd); + + /* have to wakeup thread in same process */ + if (mq->mq_flags & MQ_RSEL) { + mq->mq_flags &= ~MQ_RSEL; + selwakeup(&mq->mq_rsel); + } + if (mq->mq_flags & MQ_WSEL) { + mq->mq_flags &= ~MQ_WSEL; + selwakeup(&mq->mq_wsel); + } + mtx_unlock(&mq->mq_mutex); +} + static int mqf_stat(struct file *fp, struct stat *st, struct ucred *active_cred) { @@ -2694,6 +2690,7 @@ .fo_kqfilter = mqf_kqfilter, .fo_stat = mqf_stat, .fo_close = mqf_close, + .fo_fdclose = mqf_fdclose, .fo_chmod = mqf_chmod, .fo_chown = mqf_chown, .fo_sendfile = invfo_sendfile, diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -54,7 +54,8 @@ * consumer of a socket is starting to tear down the socket, and that the * protocol should terminate the connection. Historically, pr_abort() also * detached protocol state from the socket state, but this is no longer the - * case. + * case. pr_fdclose() is called when userspace invokes close(2) on a socket + * file descriptor. * * socreate() creates a socket and attaches protocol state. This is a public * interface that may be used by socket layer consumers to create new diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -792,13 +792,19 @@ return (0); } +static void +maybe_schedule_gc(void) +{ + if (atomic_load_int(&unp_rights) != 0) + taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1); +} + static void uipc_detach(struct socket *so) { struct unpcb *unp, *unp2; struct mtx *vplock; struct vnode *vp; - int local_unp_rights; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_detach: unp == NULL")); @@ -854,7 +860,6 @@ UNP_REF_LIST_UNLOCK(); UNP_PCB_LOCK(unp); - local_unp_rights = unp_rights; unp->unp_socket->so_pcb = NULL; unp->unp_socket = NULL; free(unp->unp_addr, M_SONAME); @@ -865,8 +870,7 @@ mtx_unlock(vplock); vrele(vp); } - if (local_unp_rights) - taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1); + maybe_schedule_gc(); switch (so->so_type) { case SOCK_STREAM: @@ -902,6 +906,18 @@ return (0); } +static void +uipc_fdclose(struct socket *so __unused) +{ + /* + * Ensure that userspace can't create orphaned file descriptors without + * triggering garbage collection. Triggering GC from uipc_detach() is + * not sufficient, since that's only closed once a socket reference + * count drops to zero. + */ + maybe_schedule_gc(); +} + static int uipc_listen(struct socket *so, int backlog, struct thread *td) { @@ -4372,6 +4388,7 @@ .pr_connect2 = uipc_connect2, .pr_detach = uipc_detach, .pr_disconnect = uipc_disconnect, + .pr_fdclose = uipc_fdclose, .pr_listen = uipc_listen, .pr_peeraddr = uipc_peeraddr, .pr_send = uipc_sendfile, @@ -4402,6 +4419,7 @@ .pr_connect2 = uipc_connect2, .pr_detach = uipc_detach, .pr_disconnect = uipc_disconnect, + .pr_fdclose = uipc_fdclose, .pr_peeraddr = uipc_peeraddr, .pr_sosend = uipc_sosend_dgram, .pr_sense = uipc_sense, @@ -4426,6 +4444,7 @@ .pr_connect2 = uipc_connect2, .pr_detach = uipc_detach, .pr_disconnect = uipc_disconnect, + .pr_fdclose = uipc_fdclose, .pr_listen = uipc_listen, .pr_peeraddr = uipc_peeraddr, .pr_sense = uipc_sense, diff --git a/sys/sys/file.h b/sys/sys/file.h --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -116,6 +116,7 @@ typedef int fo_stat_t(struct file *fp, struct stat *sb, struct ucred *active_cred); typedef int fo_close_t(struct file *fp, struct thread *td); +typedef void fo_fdclose_t(struct file *fp, int fd, struct thread *td); typedef int fo_chmod_t(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td); typedef int fo_chown_t(struct file *fp, uid_t uid, gid_t gid, @@ -153,6 +154,7 @@ fo_kqfilter_t *fo_kqfilter; fo_stat_t *fo_stat; fo_close_t *fo_close; + fo_fdclose_t *fo_fdclose; fo_chmod_t *fo_chmod; fo_chown_t *fo_chown; fo_sendfile_t *fo_sendfile; diff --git a/sys/sys/mqueue.h b/sys/sys/mqueue.h --- a/sys/sys/mqueue.h +++ b/sys/sys/mqueue.h @@ -37,9 +37,4 @@ long __reserved[4]; /* Ignored for input, zeroed for output */ }; -#ifdef _KERNEL -struct thread; -struct file; -extern void (*mq_fdclose)(struct thread *td, int fd, struct file *fp); -#endif #endif diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h --- a/sys/sys/protosw.h +++ b/sys/sys/protosw.h @@ -94,6 +94,7 @@ typedef int pr_kqfilter_t(struct socket *, struct knote *); typedef void pr_sosetlabel_t(struct socket *); typedef void pr_close_t(struct socket *); +typedef void pr_fdclose_t(struct socket *); typedef int pr_bindat_t(int, struct socket *, struct sockaddr *, struct thread *); typedef int pr_connectat_t(int, struct socket *, struct sockaddr *, @@ -120,8 +121,8 @@ pr_detach_t *pr_detach; /* destruction: sofree() */ pr_connect_t *pr_connect; /* connect(2) */ pr_disconnect_t *pr_disconnect; /* sodisconnect() */ - pr_close_t *pr_close; /* close(2) */ - pr_shutdown_t *pr_shutdown; /* shutdown(2) */ + pr_close_t *pr_close; /* soclose(), socket refcount is 0 */ + pr_fdclose_t *pr_fdclose; /* close(2) */ pr_rcvd_t *pr_rcvd; /* soreceive_generic() if PR_WANTRCVD */ pr_aio_queue_t *pr_aio_queue; /* aio(9) */ /* Cache line #3 */ @@ -142,7 +143,9 @@ pr_sosetlabel_t *pr_sosetlabel; /* MAC, XXXGL: remove */ pr_setsbopt_t *pr_setsbopt; /* Socket buffer ioctls */ pr_chmod_t *pr_chmod; /* fchmod(2) */ +/* Cache line #5 */ pr_kqfilter_t *pr_kqfilter; /* kevent(2) */ + pr_shutdown_t *pr_shutdown; /* shutdown(2) */ }; #endif /* defined(_KERNEL) || defined(_WANT_PROTOSW) */ #ifdef _KERNEL