Page MenuHomeFreeBSD

D9770.id.diff
No OneTemporary

D9770.id.diff

Index: head/sys/cam/ctl/ctl_ha.c
===================================================================
--- head/sys/cam/ctl/ctl_ha.c
+++ head/sys/cam/ctl/ctl_ha.c
@@ -458,45 +458,20 @@
static int
ctl_ha_accept(struct ha_softc *softc)
{
- struct socket *so;
+ struct socket *lso, *so;
struct sockaddr *sap;
int error;
- ACCEPT_LOCK();
- if (softc->ha_lso->so_rcv.sb_state & SBS_CANTRCVMORE)
- softc->ha_lso->so_error = ECONNABORTED;
- if (softc->ha_lso->so_error) {
- error = softc->ha_lso->so_error;
- softc->ha_lso->so_error = 0;
- ACCEPT_UNLOCK();
+ lso = softc->ha_lso;
+ SOLISTEN_LOCK(lso);
+ error = solisten_dequeue(lso, &so, 0);
+ if (error == EWOULDBLOCK)
+ return (error);
+ if (error) {
printf("%s: socket error %d\n", __func__, error);
goto out;
}
- so = TAILQ_FIRST(&softc->ha_lso->so_comp);
- if (so == NULL) {
- ACCEPT_UNLOCK();
- return (EWOULDBLOCK);
- }
- KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
- KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
- /*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- */
- SOCK_LOCK(so); /* soref() and so_state update */
- soref(so); /* file descriptor reference */
-
- TAILQ_REMOVE(&softc->ha_lso->so_comp, so, so_list);
- softc->ha_lso->so_qlen--;
- so->so_state |= SS_NBIO;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
-
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
-
sap = NULL;
error = soaccept(so, &sap);
if (error != 0) {
@@ -556,9 +531,6 @@
printf("%s: REUSEPORT setting failed %d\n",
__func__, error);
}
- SOCKBUF_LOCK(&softc->ha_lso->so_rcv);
- soupcall_set(softc->ha_lso, SO_RCV, ctl_ha_lupcall, softc);
- SOCKBUF_UNLOCK(&softc->ha_lso->so_rcv);
}
memcpy(&sa, &softc->ha_peer_in, sizeof(sa));
@@ -572,6 +544,10 @@
printf("%s: solisten() error %d\n", __func__, error);
goto out;
}
+ SOLISTEN_LOCK(softc->ha_lso);
+ softc->ha_lso->so_state |= SS_NBIO;
+ solisten_upcall_set(softc->ha_lso, ctl_ha_lupcall, softc);
+ SOLISTEN_UNLOCK(softc->ha_lso);
return (0);
out:
Index: head/sys/dev/iscsi/icl_soft_proxy.c
===================================================================
--- head/sys/dev/iscsi/icl_soft_proxy.c
+++ head/sys/dev/iscsi/icl_soft_proxy.c
@@ -92,7 +92,6 @@
struct icl_listen *ils_listen;
struct socket *ils_socket;
bool ils_running;
- bool ils_disconnecting;
int ils_id;
};
@@ -184,7 +183,9 @@
while (ils->ils_running) {
ICL_DEBUG("waiting for accept thread to terminate");
sx_xunlock(&il->il_lock);
- ils->ils_disconnecting = true;
+ SOLISTEN_LOCK(ils->ils_socket);
+ ils->ils_socket->so_error = ENOTCONN;
+ SOLISTEN_UNLOCK(ils->ils_socket);
wakeup(&ils->ils_socket->so_timeo);
pause("icl_unlisten", 1 * hz);
sx_xlock(&il->il_lock);
@@ -200,9 +201,9 @@
}
/*
- * XXX: Doing accept in a separate thread in each socket might not be the best way
- * to do stuff, but it's pretty clean and debuggable - and you probably won't
- * have hundreds of listening sockets anyway.
+ * XXX: Doing accept in a separate thread in each socket might not be the
+ * best way to do stuff, but it's pretty clean and debuggable - and you
+ * probably won't have hundreds of listening sockets anyway.
*/
static void
icl_accept_thread(void *arg)
@@ -218,55 +219,22 @@
ils->ils_running = true;
for (;;) {
- ACCEPT_LOCK();
- while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0 && ils->ils_disconnecting == false) {
- if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
- head->so_error = ECONNABORTED;
- break;
- }
- error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
- "accept", 0);
- if (error) {
- ACCEPT_UNLOCK();
- ICL_WARN("msleep failed with error %d", error);
- continue;
- }
- if (ils->ils_disconnecting) {
- ACCEPT_UNLOCK();
- ICL_DEBUG("terminating");
- ils->ils_running = false;
- kthread_exit();
- return;
- }
+ SOLISTEN_LOCK(head);
+ error = solisten_dequeue(head, &so, 0);
+ if (error == ENOTCONN) {
+ /*
+ * XXXGL: ENOTCONN is our mark from icl_listen_free().
+ * Neither socket code, nor msleep(9) may return it.
+ */
+ ICL_DEBUG("terminating");
+ ils->ils_running = false;
+ kthread_exit();
+ return;
}
- if (head->so_error) {
- error = head->so_error;
- head->so_error = 0;
- ACCEPT_UNLOCK();
- ICL_WARN("socket error %d", error);
+ if (error) {
+ ICL_WARN("solisten_dequeue error %d", error);
continue;
}
- so = TAILQ_FIRST(&head->so_comp);
- KASSERT(so != NULL, ("NULL so"));
- KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
- KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
-
- /*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- */
- SOCK_LOCK(so); /* soref() and so_state update */
- soref(so); /* file descriptor reference */
-
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- so->so_state |= (head->so_state & SS_NBIO);
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
-
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
sa = NULL;
error = soaccept(so, &sa);
Index: head/sys/kern/sys_socket.c
===================================================================
--- head/sys/kern/sys_socket.c
+++ head/sys/kern/sys_socket.c
@@ -170,32 +170,36 @@
break;
case FIOASYNC:
- /*
- * XXXRW: This code separately acquires SOCK_LOCK(so) and
- * SOCKBUF_LOCK(&so->so_rcv) even though they are the same
- * mutex to avoid introducing the assumption that they are
- * the same.
- */
if (*(int *)data) {
SOCK_LOCK(so);
so->so_state |= SS_ASYNC;
+ if (SOLISTENING(so)) {
+ so->sol_sbrcv_flags |= SB_ASYNC;
+ so->sol_sbsnd_flags |= SB_ASYNC;
+ } else {
+ SOCKBUF_LOCK(&so->so_rcv);
+ so->so_rcv.sb_flags |= SB_ASYNC;
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ SOCKBUF_LOCK(&so->so_snd);
+ so->so_snd.sb_flags |= SB_ASYNC;
+ SOCKBUF_UNLOCK(&so->so_snd);
+ }
SOCK_UNLOCK(so);
- SOCKBUF_LOCK(&so->so_rcv);
- so->so_rcv.sb_flags |= SB_ASYNC;
- SOCKBUF_UNLOCK(&so->so_rcv);
- SOCKBUF_LOCK(&so->so_snd);
- so->so_snd.sb_flags |= SB_ASYNC;
- SOCKBUF_UNLOCK(&so->so_snd);
} else {
SOCK_LOCK(so);
so->so_state &= ~SS_ASYNC;
+ if (SOLISTENING(so)) {
+ so->sol_sbrcv_flags &= ~SB_ASYNC;
+ so->sol_sbsnd_flags &= ~SB_ASYNC;
+ } else {
+ SOCKBUF_LOCK(&so->so_rcv);
+ so->so_rcv.sb_flags &= ~SB_ASYNC;
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ SOCKBUF_LOCK(&so->so_snd);
+ so->so_snd.sb_flags &= ~SB_ASYNC;
+ SOCKBUF_UNLOCK(&so->so_snd);
+ }
SOCK_UNLOCK(so);
- SOCKBUF_LOCK(&so->so_rcv);
- so->so_rcv.sb_flags &= ~SB_ASYNC;
- SOCKBUF_UNLOCK(&so->so_rcv);
- SOCKBUF_LOCK(&so->so_snd);
- so->so_snd.sb_flags &= ~SB_ASYNC;
- SOCKBUF_UNLOCK(&so->so_snd);
}
break;
@@ -706,7 +710,6 @@
sb->sb_flags &= ~SB_AIO_RUNNING;
SOCKBUF_UNLOCK(sb);
- ACCEPT_LOCK();
SOCK_LOCK(so);
sorele(so);
}
Index: head/sys/kern/uipc_accf.c
===================================================================
--- head/sys/kern/uipc_accf.c
+++ head/sys/kern/uipc_accf.c
@@ -173,13 +173,13 @@
error = EINVAL;
goto out;
}
- if ((so->so_options & SO_ACCEPTFILTER) == 0) {
+ if (so->sol_accept_filter == NULL) {
error = EINVAL;
goto out;
}
- strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name);
- if (so->so_accf->so_accept_filter_str != NULL)
- strcpy(afap->af_arg, so->so_accf->so_accept_filter_str);
+ strcpy(afap->af_name, so->sol_accept_filter->accf_name);
+ if (so->sol_accept_filter_str != NULL)
+ strcpy(afap->af_arg, so->sol_accept_filter_str);
out:
SOCK_UNLOCK(so);
if (error == 0)
@@ -193,31 +193,57 @@
{
struct accept_filter_arg *afap;
struct accept_filter *afp;
- struct so_accf *newaf;
- int error = 0;
+ char *accept_filter_str = NULL;
+ void *accept_filter_arg = NULL;
+ int error;
/*
* Handle the simple delete case first.
*/
if (sopt == NULL || sopt->sopt_val == NULL) {
+ struct socket *sp, *sp1;
+ int wakeup;
+
SOCK_LOCK(so);
if ((so->so_options & SO_ACCEPTCONN) == 0) {
SOCK_UNLOCK(so);
return (EINVAL);
}
- if (so->so_accf != NULL) {
- struct so_accf *af = so->so_accf;
- if (af->so_accept_filter != NULL &&
- af->so_accept_filter->accf_destroy != NULL) {
- af->so_accept_filter->accf_destroy(so);
- }
- if (af->so_accept_filter_str != NULL)
- free(af->so_accept_filter_str, M_ACCF);
- free(af, M_ACCF);
- so->so_accf = NULL;
+ if (so->sol_accept_filter == NULL) {
+ SOCK_UNLOCK(so);
+ return (0);
}
+ if (so->sol_accept_filter->accf_destroy != NULL)
+ so->sol_accept_filter->accf_destroy(so);
+ if (so->sol_accept_filter_str != NULL)
+ free(so->sol_accept_filter_str, M_ACCF);
+ so->sol_accept_filter = NULL;
+ so->sol_accept_filter_arg = NULL;
+ so->sol_accept_filter_str = NULL;
so->so_options &= ~SO_ACCEPTFILTER;
- SOCK_UNLOCK(so);
+
+ /*
+ * Move from incomplete queue to complete only those
+ * connections, that are blocked by us.
+ */
+ wakeup = 0;
+ TAILQ_FOREACH_SAFE(sp, &so->sol_incomp, so_list, sp1) {
+ SOCK_LOCK(sp);
+ if (sp->so_options & SO_ACCEPTFILTER) {
+ TAILQ_REMOVE(&so->sol_incomp, sp, so_list);
+ TAILQ_INSERT_TAIL(&so->sol_comp, sp, so_list);
+ sp->so_qstate = SQ_COMP;
+ sp->so_options &= ~SO_ACCEPTFILTER;
+ so->sol_incqlen--;
+ so->sol_qlen++;
+ wakeup = 1;
+ }
+ SOCK_UNLOCK(sp);
+ }
+ if (wakeup)
+ solisten_wakeup(so); /* unlocks */
+ else
+ SOLISTEN_UNLOCK(so);
return (0);
}
@@ -238,17 +264,10 @@
free(afap, M_TEMP);
return (ENOENT);
}
- /*
- * Allocate the new accept filter instance storage. We may
- * have to free it again later if we fail to attach it. If
- * attached properly, 'newaf' is NULLed to avoid a free()
- * while in use.
- */
- newaf = malloc(sizeof(*newaf), M_ACCF, M_WAITOK | M_ZERO);
if (afp->accf_create != NULL && afap->af_name[0] != '\0') {
size_t len = strlen(afap->af_name) + 1;
- newaf->so_accept_filter_str = malloc(len, M_ACCF, M_WAITOK);
- strcpy(newaf->so_accept_filter_str, afap->af_name);
+ accept_filter_str = malloc(len, M_ACCF, M_WAITOK);
+ strcpy(accept_filter_str, afap->af_name);
}
/*
@@ -256,8 +275,8 @@
* without first removing it.
*/
SOCK_LOCK(so);
- if (((so->so_options & SO_ACCEPTCONN) == 0) ||
- (so->so_accf != NULL)) {
+ if ((so->so_options & SO_ACCEPTCONN) == 0 ||
+ so->sol_accept_filter != NULL) {
error = EINVAL;
goto out;
}
@@ -268,25 +287,20 @@
* can't block.
*/
if (afp->accf_create != NULL) {
- newaf->so_accept_filter_arg =
- afp->accf_create(so, afap->af_arg);
- if (newaf->so_accept_filter_arg == NULL) {
+ accept_filter_arg = afp->accf_create(so, afap->af_arg);
+ if (accept_filter_arg == NULL) {
error = EINVAL;
goto out;
}
}
- newaf->so_accept_filter = afp;
- so->so_accf = newaf;
+ so->sol_accept_filter = afp;
+ so->sol_accept_filter_arg = accept_filter_arg;
+ so->sol_accept_filter_str = accept_filter_str;
so->so_options |= SO_ACCEPTFILTER;
- newaf = NULL;
out:
SOCK_UNLOCK(so);
- if (newaf != NULL) {
- if (newaf->so_accept_filter_str != NULL)
- free(newaf->so_accept_filter_str, M_ACCF);
- free(newaf, M_ACCF);
- }
- if (afap != NULL)
- free(afap, M_TEMP);
+ if (accept_filter_str != NULL)
+ free(accept_filter_str, M_ACCF);
+ free(afap, M_TEMP);
return (error);
}
Index: head/sys/kern/uipc_debug.c
===================================================================
--- head/sys/kern/uipc_debug.c
+++ head/sys/kern/uipc_debug.c
@@ -448,8 +448,6 @@
db_printf(")\n");
db_print_indent(indent);
- db_printf("so_qstate: 0x%x (", so->so_qstate);
- db_print_soqstate(so->so_qstate);
db_printf(") ");
db_printf("so_pcb: %p ", so->so_pcb);
db_printf("so_proto: %p\n", so->so_proto);
@@ -458,24 +456,28 @@
db_print_protosw(so->so_proto, "so_proto", indent);
db_print_indent(indent);
- db_printf("so_head: %p ", so->so_head);
- db_printf("so_incomp first: %p ", TAILQ_FIRST(&so->so_incomp));
- db_printf("so_comp first: %p\n", TAILQ_FIRST(&so->so_comp));
+ if (so->so_options & SO_ACCEPTCONN) {
+ db_printf("sol_incomp first: %p ",
+ TAILQ_FIRST(&so->sol_incomp));
+ db_printf("sol_comp first: %p\n", TAILQ_FIRST(&so->sol_comp));
+ db_printf("sol_qlen: %d ", so->sol_qlen);
+ db_printf("sol_incqlen: %d ", so->sol_incqlen);
+ db_printf("sol_qlimit: %d ", so->sol_qlimit);
+ } else {
+ db_printf("so_qstate: 0x%x (", so->so_qstate);
+ db_print_soqstate(so->so_qstate);
+ db_printf("so_listen: %p ", so->so_listen);
+ /* so_list skipped */
+ db_printf("so_timeo: %d ", so->so_timeo);
+ db_printf("so_error: %d\n", so->so_error);
- db_print_indent(indent);
- /* so_list skipped */
- db_printf("so_qlen: %u ", so->so_qlen);
- db_printf("so_incqlen: %u ", so->so_incqlen);
- db_printf("so_qlimit: %u ", so->so_qlimit);
- db_printf("so_timeo: %d ", so->so_timeo);
- db_printf("so_error: %d\n", so->so_error);
+ db_print_indent(indent);
+ db_printf("so_sigio: %p ", so->so_sigio);
+ db_printf("so_oobmark: %lu ", so->so_oobmark);
- db_print_indent(indent);
- db_printf("so_sigio: %p ", so->so_sigio);
- db_printf("so_oobmark: %lu ", so->so_oobmark);
-
- db_print_sockbuf(&so->so_rcv, "so_rcv", indent);
- db_print_sockbuf(&so->so_snd, "so_snd", indent);
+ db_print_sockbuf(&so->so_rcv, "so_rcv", indent);
+ db_print_sockbuf(&so->so_snd, "so_snd", indent);
+ }
}
DB_SHOW_COMMAND(socket, db_show_socket)
Index: head/sys/kern/uipc_sockbuf.c
===================================================================
--- head/sys/kern/uipc_sockbuf.c
+++ head/sys/kern/uipc_sockbuf.c
@@ -314,14 +314,14 @@
SOCKBUF_LOCK_ASSERT(sb);
- selwakeuppri(&sb->sb_sel, PSOCK);
- if (!SEL_WAITING(&sb->sb_sel))
+ selwakeuppri(sb->sb_sel, PSOCK);
+ if (!SEL_WAITING(sb->sb_sel))
sb->sb_flags &= ~SB_SEL;
if (sb->sb_flags & SB_WAIT) {
sb->sb_flags &= ~SB_WAIT;
wakeup(&sb->sb_acc);
}
- KNOTE_LOCKED(&sb->sb_sel.si_note, 0);
+ KNOTE_LOCKED(&sb->sb_sel->si_note, 0);
if (sb->sb_upcall != NULL && !(so->so_state & SS_ISDISCONNECTED)) {
ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT);
if (ret == SU_ISCONNECTED) {
Index: head/sys/kern/uipc_socket.c
===================================================================
--- head/sys/kern/uipc_socket.c
+++ head/sys/kern/uipc_socket.c
@@ -106,6 +106,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_compat.h"
+#include "opt_sctp.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -154,13 +155,21 @@
static int soreceive_rcvoob(struct socket *so, struct uio *uio,
int flags);
+static void so_rdknl_lock(void *);
+static void so_rdknl_unlock(void *);
+static void so_rdknl_assert_locked(void *);
+static void so_rdknl_assert_unlocked(void *);
+static void so_wrknl_lock(void *);
+static void so_wrknl_unlock(void *);
+static void so_wrknl_assert_locked(void *);
+static void so_wrknl_assert_unlocked(void *);
static void filt_sordetach(struct knote *kn);
static int filt_soread(struct knote *kn, long hint);
static void filt_sowdetach(struct knote *kn);
static int filt_sowrite(struct knote *kn, long hint);
-static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id);
static int filt_soempty(struct knote *kn, long hint);
+static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id);
fo_kqfilter_t soo_kqfilter;
static struct filterops soread_filtops = {
@@ -393,8 +402,16 @@
return (NULL);
}
+ /*
+ * The socket locking protocol allows to lock 2 sockets at a time,
+ * however, the first one must be a listening socket. WITNESS lacks
+ * a feature to change class of an existing lock, so we use DUPOK.
+ */
+ mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK);
SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
+ so->so_rcv.sb_sel = &so->so_rdsel;
+ so->so_snd.sb_sel = &so->so_wrsel;
sx_init(&so->so_snd.sb_sx, "so_snd_sx");
sx_init(&so->so_rcv.sb_sx, "so_rcv_sx");
TAILQ_INIT(&so->so_snd.sb_aiojobq);
@@ -450,9 +467,6 @@
if (so->so_snd.sb_hiwat)
(void)chgsbsize(so->so_cred->cr_uidinfo,
&so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
- /* remove accept filter if one is present. */
- if (so->so_accf != NULL)
- accept_filt_setopt(so, NULL);
#ifdef MAC
mac_socket_destroy(so);
#endif
@@ -460,10 +474,16 @@
crfree(so->so_cred);
khelp_destroy_osd(&so->osd);
- sx_destroy(&so->so_snd.sb_sx);
- sx_destroy(&so->so_rcv.sb_sx);
- SOCKBUF_LOCK_DESTROY(&so->so_snd);
- SOCKBUF_LOCK_DESTROY(&so->so_rcv);
+ if (SOLISTENING(so)) {
+ if (so->sol_accept_filter != NULL)
+ accept_filt_setopt(so, NULL);
+ } else {
+ sx_destroy(&so->so_snd.sb_sx);
+ sx_destroy(&so->so_rcv.sb_sx);
+ SOCKBUF_LOCK_DESTROY(&so->so_snd);
+ SOCKBUF_LOCK_DESTROY(&so->so_rcv);
+ }
+ mtx_destroy(&so->so_lock);
uma_zfree(socket_zone, so);
}
@@ -506,8 +526,6 @@
if (so == NULL)
return (ENOBUFS);
- TAILQ_INIT(&so->so_incomp);
- TAILQ_INIT(&so->so_comp);
so->so_type = type;
so->so_cred = crhold(cred);
if ((prp->pr_domain->dom_family == PF_INET) ||
@@ -520,9 +538,10 @@
#ifdef MAC
mac_socket_create(cred, so);
#endif
- knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
- knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
- so->so_count = 1;
+ knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
+ so_rdknl_assert_locked, so_rdknl_assert_unlocked);
+ knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
+ so_wrknl_assert_locked, so_wrknl_assert_unlocked);
/*
* Auto-sizing of socket buffers is managed by the protocols and
* the appropriate flags must be set in the pru_attach function.
@@ -531,12 +550,10 @@
error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
CURVNET_RESTORE();
if (error) {
- KASSERT(so->so_count == 1, ("socreate: so_count %d",
- so->so_count));
- so->so_count = 0;
sodealloc(so);
return (error);
}
+ soref(so);
*aso = so;
return (0);
}
@@ -564,11 +581,11 @@
static int overcount;
struct socket *so;
- int over;
+ u_int over;
- ACCEPT_LOCK();
- over = (head->so_qlen > 3 * head->so_qlimit / 2);
- ACCEPT_UNLOCK();
+ SOLISTEN_LOCK(head);
+ over = (head->sol_qlen > 3 * head->sol_qlimit / 2);
+ SOLISTEN_UNLOCK(head);
#ifdef REGRESSION
if (regression_sonewconn_earlytest && over) {
#else
@@ -580,15 +597,15 @@
log(LOG_DEBUG, "%s: pcb %p: Listen queue overflow: "
"%i already in queue awaiting acceptance "
"(%d occurrences)\n",
- __func__, head->so_pcb, head->so_qlen, overcount);
+ __func__, head->so_pcb, head->sol_qlen, overcount);
overcount = 0;
}
return (NULL);
}
- VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
- __func__, __LINE__, head));
+ VNET_ASSERT(head->so_vnet != NULL, ("%s: so %p vnet is NULL",
+ __func__, head));
so = soalloc(head->so_vnet);
if (so == NULL) {
log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
@@ -596,11 +613,8 @@
__func__, head->so_pcb);
return (NULL);
}
- if ((head->so_options & SO_ACCEPTFILTER) != 0)
- connstatus = 0;
- so->so_head = head;
+ so->so_listen = head;
so->so_type = head->so_type;
- so->so_options = head->so_options &~ SO_ACCEPTCONN;
so->so_linger = head->so_linger;
so->so_state = head->so_state | SS_NOFDREF;
so->so_fibnum = head->so_fibnum;
@@ -609,10 +623,12 @@
#ifdef MAC
mac_socket_newconn(head, so);
#endif
- knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
- knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
+ knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
+ so_rdknl_assert_locked, so_rdknl_assert_unlocked);
+ knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
+ so_wrknl_assert_locked, so_wrknl_assert_unlocked);
VNET_SO_ASSERT(head);
- if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
+ if (soreserve(so, head->sol_sbsnd_hiwat, head->sol_sbrcv_hiwat)) {
sodealloc(so);
log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
__func__, head->so_pcb);
@@ -624,32 +640,24 @@
__func__, head->so_pcb);
return (NULL);
}
- so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
- so->so_snd.sb_lowat = head->so_snd.sb_lowat;
- so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
- so->so_snd.sb_timeo = head->so_snd.sb_timeo;
- so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
- so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
+ so->so_rcv.sb_lowat = head->sol_sbrcv_lowat;
+ so->so_snd.sb_lowat = head->sol_sbsnd_lowat;
+ so->so_rcv.sb_timeo = head->sol_sbrcv_timeo;
+ so->so_snd.sb_timeo = head->sol_sbsnd_timeo;
+ so->so_rcv.sb_flags |= head->sol_sbrcv_flags & SB_AUTOSIZE;
+ so->so_snd.sb_flags |= head->sol_sbsnd_flags & SB_AUTOSIZE;
+
+ SOLISTEN_LOCK(head);
+ if (head->sol_accept_filter != NULL)
+ connstatus = 0;
so->so_state |= connstatus;
- ACCEPT_LOCK();
- /*
- * The accept socket may be tearing down but we just
- * won a race on the ACCEPT_LOCK.
- * However, if sctp_peeloff() is called on a 1-to-many
- * style socket, the SO_ACCEPTCONN doesn't need to be set.
- */
- if (!(head->so_options & SO_ACCEPTCONN) &&
- ((head->so_proto->pr_protocol != IPPROTO_SCTP) ||
- (head->so_type != SOCK_SEQPACKET))) {
- SOCK_LOCK(so);
- so->so_head = NULL;
- sofree(so); /* NB: returns ACCEPT_UNLOCK'ed. */
- return (NULL);
- }
+ so->so_options = head->so_options & ~SO_ACCEPTCONN;
+ soref(head); /* A socket on (in)complete queue refs head. */
if (connstatus) {
- TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
- so->so_qstate |= SQ_COMP;
- head->so_qlen++;
+ TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list);
+ so->so_qstate = SQ_COMP;
+ head->sol_qlen++;
+ solisten_wakeup(head); /* unlocks */
} else {
/*
* Keep removing sockets from the head until there's room for
@@ -658,28 +666,86 @@
* threads and soabort() requires dropping locks, we must
* loop waiting for the condition to be true.
*/
- while (head->so_incqlen > head->so_qlimit) {
+ while (head->sol_incqlen > head->sol_qlimit) {
struct socket *sp;
- sp = TAILQ_FIRST(&head->so_incomp);
- TAILQ_REMOVE(&head->so_incomp, sp, so_list);
- head->so_incqlen--;
- sp->so_qstate &= ~SQ_INCOMP;
- sp->so_head = NULL;
- ACCEPT_UNLOCK();
+
+ sp = TAILQ_FIRST(&head->sol_incomp);
+ TAILQ_REMOVE(&head->sol_incomp, sp, so_list);
+ head->sol_incqlen--;
+ SOCK_LOCK(sp);
+ sp->so_qstate = SQ_NONE;
+ sp->so_listen = NULL;
+ SOCK_UNLOCK(sp);
+ sorele(head); /* does SOLISTEN_UNLOCK, head stays */
soabort(sp);
- ACCEPT_LOCK();
+ SOLISTEN_LOCK(head);
}
- TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
- so->so_qstate |= SQ_INCOMP;
- head->so_incqlen++;
+ TAILQ_INSERT_TAIL(&head->sol_incomp, so, so_list);
+ so->so_qstate = SQ_INCOMP;
+ head->sol_incqlen++;
+ SOLISTEN_UNLOCK(head);
}
- ACCEPT_UNLOCK();
- if (connstatus) {
- sorwakeup(head);
- wakeup_one(&head->so_timeo);
+ return (so);
+}
+
+#ifdef SCTP
+/*
+ * Socket part of sctp_peeloff(). Detach a new socket from an
+ * association. The new socket is returned with a reference.
+ */
+struct socket *
+sopeeloff(struct socket *head)
+{
+ struct socket *so;
+
+ VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
+ __func__, __LINE__, head));
+ so = soalloc(head->so_vnet);
+ if (so == NULL) {
+ log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
+ "limit reached or out of memory\n",
+ __func__, head->so_pcb);
+ return (NULL);
}
+ so->so_type = head->so_type;
+ so->so_options = head->so_options;
+ so->so_linger = head->so_linger;
+ so->so_state = (head->so_state & SS_NBIO) | SS_ISCONNECTED;
+ so->so_fibnum = head->so_fibnum;
+ so->so_proto = head->so_proto;
+ so->so_cred = crhold(head->so_cred);
+#ifdef MAC
+ mac_socket_newconn(head, so);
+#endif
+ knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
+ so_rdknl_assert_locked, so_rdknl_assert_unlocked);
+ knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
+ so_wrknl_assert_locked, so_wrknl_assert_unlocked);
+ VNET_SO_ASSERT(head);
+ if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
+ sodealloc(so);
+ log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
+ __func__, head->so_pcb);
+ return (NULL);
+ }
+ if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
+ sodealloc(so);
+ log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
+ __func__, head->so_pcb);
+ return (NULL);
+ }
+ so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
+ so->so_snd.sb_lowat = head->so_snd.sb_lowat;
+ so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
+ so->so_snd.sb_timeo = head->so_snd.sb_timeo;
+ so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
+ so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
+
+ soref(so);
+
return (so);
}
+#endif /* SCTP */
int
sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
@@ -741,16 +807,140 @@
void
solisten_proto(struct socket *so, int backlog)
{
+ int sbrcv_lowat, sbsnd_lowat;
+ u_int sbrcv_hiwat, sbsnd_hiwat;
+ short sbrcv_flags, sbsnd_flags;
+ sbintime_t sbrcv_timeo, sbsnd_timeo;
SOCK_LOCK_ASSERT(so);
+ if (SOLISTENING(so))
+ goto listening;
+
+ /*
+ * Change this socket to listening state.
+ */
+ sbrcv_lowat = so->so_rcv.sb_lowat;
+ sbsnd_lowat = so->so_snd.sb_lowat;
+ sbrcv_hiwat = so->so_rcv.sb_hiwat;
+ sbsnd_hiwat = so->so_snd.sb_hiwat;
+ sbrcv_flags = so->so_rcv.sb_flags;
+ sbsnd_flags = so->so_snd.sb_flags;
+ sbrcv_timeo = so->so_rcv.sb_timeo;
+ sbsnd_timeo = so->so_snd.sb_timeo;
+
+ sbdestroy(&so->so_snd, so);
+ sbdestroy(&so->so_rcv, so);
+ sx_destroy(&so->so_snd.sb_sx);
+ sx_destroy(&so->so_rcv.sb_sx);
+ SOCKBUF_LOCK_DESTROY(&so->so_snd);
+ SOCKBUF_LOCK_DESTROY(&so->so_rcv);
+
+#ifdef INVARIANTS
+ bzero(&so->so_rcv,
+ sizeof(struct socket) - offsetof(struct socket, so_rcv));
+#endif
+
+ so->sol_sbrcv_lowat = sbrcv_lowat;
+ so->sol_sbsnd_lowat = sbsnd_lowat;
+ so->sol_sbrcv_hiwat = sbrcv_hiwat;
+ so->sol_sbsnd_hiwat = sbsnd_hiwat;
+ so->sol_sbrcv_flags = sbrcv_flags;
+ so->sol_sbsnd_flags = sbsnd_flags;
+ so->sol_sbrcv_timeo = sbrcv_timeo;
+ so->sol_sbsnd_timeo = sbsnd_timeo;
+
+ so->sol_qlen = so->sol_incqlen = 0;
+ TAILQ_INIT(&so->sol_incomp);
+ TAILQ_INIT(&so->sol_comp);
+
+ so->sol_accept_filter = NULL;
+ so->sol_accept_filter_arg = NULL;
+ so->sol_accept_filter_str = NULL;
+
+ so->so_options |= SO_ACCEPTCONN;
+
+listening:
if (backlog < 0 || backlog > somaxconn)
backlog = somaxconn;
- so->so_qlimit = backlog;
- so->so_options |= SO_ACCEPTCONN;
+ so->sol_qlimit = backlog;
}
/*
+ * Wakeup listeners/subsystems once we have a complete connection.
+ * Enters with lock, returns unlocked.
+ */
+void
+solisten_wakeup(struct socket *sol)
+{
+
+ if (sol->sol_upcall != NULL)
+ (void )sol->sol_upcall(sol, sol->sol_upcallarg, M_NOWAIT);
+ else {
+ selwakeuppri(&sol->so_rdsel, PSOCK);
+ KNOTE_LOCKED(&sol->so_rdsel.si_note, 0);
+ }
+ SOLISTEN_UNLOCK(sol);
+ wakeup_one(&sol->sol_comp);
+}
+
+/*
+ * Return single connection off a listening socket queue. Main consumer of
+ * the function is kern_accept4(). Some modules, that do their own accept
+ * management also use the function.
+ *
+ * Listening socket must be locked on entry and is returned unlocked on
+ * return.
+ * The flags argument is set of accept4(2) flags and ACCEPT4_INHERIT.
+ */
+int
+solisten_dequeue(struct socket *head, struct socket **ret, int flags)
+{
+ struct socket *so;
+ int error;
+
+ SOLISTEN_LOCK_ASSERT(head);
+
+ while (!(head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp) &&
+ head->so_error == 0) {
+ error = msleep(&head->sol_comp, &head->so_lock, PSOCK | PCATCH,
+ "accept", 0);
+ if (error != 0) {
+ SOLISTEN_UNLOCK(head);
+ return (error);
+ }
+ }
+ if (head->so_error) {
+ error = head->so_error;
+ head->so_error = 0;
+ SOLISTEN_UNLOCK(head);
+ return (error);
+ }
+ if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp)) {
+ SOLISTEN_UNLOCK(head);
+ return (EWOULDBLOCK);
+ }
+ so = TAILQ_FIRST(&head->sol_comp);
+ SOCK_LOCK(so);
+ KASSERT(so->so_qstate == SQ_COMP,
+ ("%s: so %p not SQ_COMP", __func__, so));
+ soref(so);
+ head->sol_qlen--;
+ so->so_qstate = SQ_NONE;
+ so->so_listen = NULL;
+ TAILQ_REMOVE(&head->sol_comp, so, so_list);
+ if (flags & ACCEPT4_INHERIT)
+ so->so_state |= (head->so_state & SS_NBIO);
+ else
+ so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
+ SOCK_UNLOCK(so);
+ sorele(head);
+
+ *ret = so;
+ return (0);
+}
+
+/*
* Evaluate the reference count and named references on a socket; if no
* references remain, free it. This should be called whenever a reference is
* released, such as in sorele(), but also when named reference flags are
@@ -774,44 +964,62 @@
sofree(struct socket *so)
{
struct protosw *pr = so->so_proto;
- struct socket *head;
- ACCEPT_LOCK_ASSERT();
SOCK_LOCK_ASSERT(so);
if ((so->so_state & SS_NOFDREF) == 0 || so->so_count != 0 ||
- (so->so_state & SS_PROTOREF) || (so->so_qstate & SQ_COMP)) {
+ (so->so_state & SS_PROTOREF) || (so->so_qstate == SQ_COMP)) {
SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
return;
}
- head = so->so_head;
- if (head != NULL) {
- KASSERT((so->so_qstate & SQ_COMP) != 0 ||
- (so->so_qstate & SQ_INCOMP) != 0,
- ("sofree: so_head != NULL, but neither SQ_COMP nor "
- "SQ_INCOMP"));
- KASSERT((so->so_qstate & SQ_COMP) == 0 ||
- (so->so_qstate & SQ_INCOMP) == 0,
- ("sofree: so->so_qstate is SQ_COMP and also SQ_INCOMP"));
- TAILQ_REMOVE(&head->so_incomp, so, so_list);
- head->so_incqlen--;
- so->so_qstate &= ~SQ_INCOMP;
- so->so_head = NULL;
+ if (!SOLISTENING(so) && so->so_qstate == SQ_INCOMP) {
+ struct socket *sol;
+
+ sol = so->so_listen;
+ KASSERT(sol, ("%s: so %p on incomp of NULL", __func__, so));
+
+ /*
+ * To solve race between close of a listening socket and
+ * a socket on its incomplete queue, we need to lock both.
+ * The order is first listening socket, then regular.
+ * Since we don't have SS_NOFDREF neither SS_PROTOREF, this
+ * function and the listening socket are the only pointers
+ * to so. To preserve so and sol, we reference both and then
+ * relock.
+ * After relock the socket may not move to so_comp since it
+ * doesn't have PCB already, but it may be removed from
+ * so_incomp. If that happens, we share responsiblity on
+ * freeing the socket, but soclose() has already removed
+ * it from queue.
+ */
+ soref(sol);
+ soref(so);
+ SOCK_UNLOCK(so);
+ SOLISTEN_LOCK(sol);
+ SOCK_LOCK(so);
+ if (so->so_qstate == SQ_INCOMP) {
+ KASSERT(so->so_listen == sol,
+ ("%s: so %p migrated out of sol %p",
+ __func__, so, sol));
+ TAILQ_REMOVE(&sol->sol_incomp, so, so_list);
+ sol->sol_incqlen--;
+ /* This is guarenteed not to be the last. */
+ refcount_release(&sol->so_count);
+ so->so_qstate = SQ_NONE;
+ so->so_listen = NULL;
+ } else
+ KASSERT(so->so_listen == NULL,
+ ("%s: so %p not on (in)comp with so_listen",
+ __func__, so));
+ sorele(sol);
+ KASSERT(so->so_count == 1,
+ ("%s: so %p count %u", __func__, so, so->so_count));
+ so->so_count = 0;
}
- KASSERT((so->so_qstate & SQ_COMP) == 0 &&
- (so->so_qstate & SQ_INCOMP) == 0,
- ("sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)",
- so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP));
- if (so->so_options & SO_ACCEPTCONN) {
- KASSERT((TAILQ_EMPTY(&so->so_comp)),
- ("sofree: so_comp populated"));
- KASSERT((TAILQ_EMPTY(&so->so_incomp)),
- ("sofree: so_incomp populated"));
- }
+ if (SOLISTENING(so))
+ so->so_error = ECONNABORTED;
SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
VNET_SO_ASSERT(so);
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
@@ -833,12 +1041,14 @@
* before calling pru_detach. This means that protocols shold not
* assume they can perform socket wakeups, etc, in their detach code.
*/
- sbdestroy(&so->so_snd, so);
- sbdestroy(&so->so_rcv, so);
- seldrain(&so->so_snd.sb_sel);
- seldrain(&so->so_rcv.sb_sel);
- knlist_destroy(&so->so_rcv.sb_sel.si_note);
- knlist_destroy(&so->so_snd.sb_sel.si_note);
+ if (!SOLISTENING(so)) {
+ sbdestroy(&so->so_snd, so);
+ sbdestroy(&so->so_rcv, so);
+ }
+ seldrain(&so->so_rdsel);
+ seldrain(&so->so_wrsel);
+ knlist_destroy(&so->so_rdsel.si_note);
+ knlist_destroy(&so->so_wrsel.si_note);
sodealloc(so);
}
@@ -853,6 +1063,8 @@
int
soclose(struct socket *so)
{
+ struct accept_queue lqueue;
+ bool listening;
int error = 0;
KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter"));
@@ -885,41 +1097,42 @@
drop:
if (so->so_proto->pr_usrreqs->pru_close != NULL)
(*so->so_proto->pr_usrreqs->pru_close)(so);
- ACCEPT_LOCK();
- if (so->so_options & SO_ACCEPTCONN) {
+
+ SOCK_LOCK(so);
+ if ((listening = (so->so_options & SO_ACCEPTCONN))) {
struct socket *sp;
- /*
- * Prevent new additions to the accept queues due
- * to ACCEPT_LOCK races while we are draining them.
- */
- so->so_options &= ~SO_ACCEPTCONN;
- while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
- TAILQ_REMOVE(&so->so_incomp, sp, so_list);
- so->so_incqlen--;
- sp->so_qstate &= ~SQ_INCOMP;
- sp->so_head = NULL;
- ACCEPT_UNLOCK();
- soabort(sp);
- ACCEPT_LOCK();
+
+ TAILQ_INIT(&lqueue);
+ TAILQ_SWAP(&lqueue, &so->sol_incomp, socket, so_list);
+ TAILQ_CONCAT(&lqueue, &so->sol_comp, so_list);
+
+ so->sol_qlen = so->sol_incqlen = 0;
+
+ TAILQ_FOREACH(sp, &lqueue, so_list) {
+ SOCK_LOCK(sp);
+ sp->so_qstate = SQ_NONE;
+ sp->so_listen = NULL;
+ SOCK_UNLOCK(sp);
+ /* Guaranteed not to be the last. */
+ refcount_release(&so->so_count);
}
- while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
- TAILQ_REMOVE(&so->so_comp, sp, so_list);
- so->so_qlen--;
- sp->so_qstate &= ~SQ_COMP;
- sp->so_head = NULL;
- ACCEPT_UNLOCK();
- soabort(sp);
- ACCEPT_LOCK();
- }
- KASSERT((TAILQ_EMPTY(&so->so_comp)),
- ("%s: so_comp populated", __func__));
- KASSERT((TAILQ_EMPTY(&so->so_incomp)),
- ("%s: so_incomp populated", __func__));
}
- SOCK_LOCK(so);
KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
so->so_state |= SS_NOFDREF;
- sorele(so); /* NB: Returns with ACCEPT_UNLOCK(). */
+ sorele(so);
+ if (listening) {
+ struct socket *sp;
+
+ TAILQ_FOREACH(sp, &lqueue, so_list) {
+ SOCK_LOCK(sp);
+ if (sp->so_count == 0) {
+ SOCK_UNLOCK(sp);
+ soabort(sp);
+ } else
+ /* sp is now in sofree() */
+ SOCK_UNLOCK(sp);
+ }
+ }
CURVNET_RESTORE();
return (error);
}
@@ -951,13 +1164,11 @@
KASSERT(so->so_count == 0, ("soabort: so_count"));
KASSERT((so->so_state & SS_PROTOREF) == 0, ("soabort: SS_PROTOREF"));
KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF"));
- KASSERT((so->so_state & SQ_COMP) == 0, ("soabort: SQ_COMP"));
- KASSERT((so->so_state & SQ_INCOMP) == 0, ("soabort: SQ_INCOMP"));
+ KASSERT(so->so_qstate == SQ_NONE, ("soabort: !SQ_NONE"));
VNET_SO_ASSERT(so);
if (so->so_proto->pr_usrreqs->pru_abort != NULL)
(*so->so_proto->pr_usrreqs->pru_abort)(so);
- ACCEPT_LOCK();
SOCK_LOCK(so);
sofree(so);
}
@@ -2892,15 +3103,15 @@
break;
case SO_LISTENQLIMIT:
- optval = so->so_qlimit;
+ optval = SOLISTENING(so) ? so->sol_qlimit : 0;
goto integer;
case SO_LISTENQLEN:
- optval = so->so_qlen;
+ optval = SOLISTENING(so) ? so->sol_qlen : 0;
goto integer;
case SO_LISTENINCQLEN:
- optval = so->so_incqlen;
+ optval = SOLISTENING(so) ? so->sol_incqlen : 0;
goto integer;
case SO_TS_CLOCK:
@@ -3047,7 +3258,7 @@
if (so->so_sigio != NULL)
pgsigio(&so->so_sigio, SIGURG, 0);
- selwakeuppri(&so->so_rcv.sb_sel, PSOCK);
+ selwakeuppri(&so->so_rdsel, PSOCK);
}
int
@@ -3067,44 +3278,54 @@
sopoll_generic(struct socket *so, int events, struct ucred *active_cred,
struct thread *td)
{
- int revents = 0;
+ int revents;
- SOCKBUF_LOCK(&so->so_snd);
- SOCKBUF_LOCK(&so->so_rcv);
- if (events & (POLLIN | POLLRDNORM))
- if (soreadabledata(so))
- revents |= events & (POLLIN | POLLRDNORM);
-
- if (events & (POLLOUT | POLLWRNORM))
- if (sowriteable(so))
- revents |= events & (POLLOUT | POLLWRNORM);
-
- if (events & (POLLPRI | POLLRDBAND))
- if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK))
- revents |= events & (POLLPRI | POLLRDBAND);
-
- if ((events & POLLINIGNEOF) == 0) {
- if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
- revents |= events & (POLLIN | POLLRDNORM);
- if (so->so_snd.sb_state & SBS_CANTSENDMORE)
- revents |= POLLHUP;
+ SOCK_LOCK(so);
+ if (SOLISTENING(so)) {
+ if (!(events & (POLLIN | POLLRDNORM)))
+ revents = 0;
+ else if (!TAILQ_EMPTY(&so->sol_comp))
+ revents = events & (POLLIN | POLLRDNORM);
+ else {
+ selrecord(td, &so->so_rdsel);
+ revents = 0;
}
- }
-
- if (revents == 0) {
- if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
- selrecord(td, &so->so_rcv.sb_sel);
- so->so_rcv.sb_flags |= SB_SEL;
+ } else {
+ revents = 0;
+ SOCKBUF_LOCK(&so->so_snd);
+ SOCKBUF_LOCK(&so->so_rcv);
+ if (events & (POLLIN | POLLRDNORM))
+ if (soreadabledata(so))
+ revents |= events & (POLLIN | POLLRDNORM);
+ if (events & (POLLOUT | POLLWRNORM))
+ if (sowriteable(so))
+ revents |= events & (POLLOUT | POLLWRNORM);
+ if (events & (POLLPRI | POLLRDBAND))
+ if (so->so_oobmark ||
+ (so->so_rcv.sb_state & SBS_RCVATMARK))
+ revents |= events & (POLLPRI | POLLRDBAND);
+ if ((events & POLLINIGNEOF) == 0) {
+ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+ revents |= events & (POLLIN | POLLRDNORM);
+ if (so->so_snd.sb_state & SBS_CANTSENDMORE)
+ revents |= POLLHUP;
+ }
}
-
- if (events & (POLLOUT | POLLWRNORM)) {
- selrecord(td, &so->so_snd.sb_sel);
- so->so_snd.sb_flags |= SB_SEL;
+ if (revents == 0) {
+ if (events &
+ (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
+ selrecord(td, &so->so_rdsel);
+ so->so_rcv.sb_flags |= SB_SEL;
+ }
+ if (events & (POLLOUT | POLLWRNORM)) {
+ selrecord(td, &so->so_wrsel);
+ so->so_snd.sb_flags |= SB_SEL;
+ }
}
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ SOCKBUF_UNLOCK(&so->so_snd);
}
-
- SOCKBUF_UNLOCK(&so->so_rcv);
- SOCKBUF_UNLOCK(&so->so_snd);
+ SOCK_UNLOCK(so);
return (revents);
}
@@ -3113,28 +3334,38 @@
{
struct socket *so = kn->kn_fp->f_data;
struct sockbuf *sb;
+ struct knlist *knl;
switch (kn->kn_filter) {
case EVFILT_READ:
kn->kn_fop = &soread_filtops;
+ knl = &so->so_rdsel.si_note;
sb = &so->so_rcv;
break;
case EVFILT_WRITE:
kn->kn_fop = &sowrite_filtops;
+ knl = &so->so_wrsel.si_note;
sb = &so->so_snd;
break;
case EVFILT_EMPTY:
kn->kn_fop = &soempty_filtops;
+ knl = &so->so_wrsel.si_note;
sb = &so->so_snd;
break;
default:
return (EINVAL);
}
- SOCKBUF_LOCK(sb);
- knlist_add(&sb->sb_sel.si_note, kn, 1);
- sb->sb_flags |= SB_KNOTE;
- SOCKBUF_UNLOCK(sb);
+ SOCK_LOCK(so);
+ if (SOLISTENING(so)) {
+ knlist_add(knl, kn, 1);
+ } else {
+ SOCKBUF_LOCK(sb);
+ knlist_add(knl, kn, 1);
+ sb->sb_flags |= SB_KNOTE;
+ SOCKBUF_UNLOCK(sb);
+ }
+ SOCK_UNLOCK(so);
return (0);
}
@@ -3313,11 +3544,11 @@
{
struct socket *so = kn->kn_fp->f_data;
- SOCKBUF_LOCK(&so->so_rcv);
- knlist_remove(&so->so_rcv.sb_sel.si_note, kn, 1);
- if (knlist_empty(&so->so_rcv.sb_sel.si_note))
+ so_rdknl_lock(so);
+ knlist_remove(&so->so_rdsel.si_note, kn, 1);
+ if (!SOLISTENING(so) && knlist_empty(&so->so_rdsel.si_note))
so->so_rcv.sb_flags &= ~SB_KNOTE;
- SOCKBUF_UNLOCK(&so->so_rcv);
+ so_rdknl_unlock(so);
}
/*ARGSUSED*/
@@ -3327,11 +3558,13 @@
struct socket *so;
so = kn->kn_fp->f_data;
- if (so->so_options & SO_ACCEPTCONN) {
- kn->kn_data = so->so_qlen;
- return (!TAILQ_EMPTY(&so->so_comp));
+ if (SOLISTENING(so)) {
+ SOCK_LOCK_ASSERT(so);
+ kn->kn_data = so->sol_qlen;
+ return (!TAILQ_EMPTY(&so->sol_comp));
}
+
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
kn->kn_data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl;
@@ -3357,11 +3590,11 @@
{
struct socket *so = kn->kn_fp->f_data;
- SOCKBUF_LOCK(&so->so_snd);
- knlist_remove(&so->so_snd.sb_sel.si_note, kn, 1);
- if (knlist_empty(&so->so_snd.sb_sel.si_note))
+ so_wrknl_lock(so);
+ knlist_remove(&so->so_wrsel.si_note, kn, 1);
+ if (!SOLISTENING(so) && knlist_empty(&so->so_wrsel.si_note))
so->so_snd.sb_flags &= ~SB_KNOTE;
- SOCKBUF_UNLOCK(&so->so_snd);
+ so_wrknl_unlock(so);
}
/*ARGSUSED*/
@@ -3371,6 +3604,10 @@
struct socket *so;
so = kn->kn_fp->f_data;
+
+ if (SOLISTENING(so))
+ return (0);
+
SOCKBUF_LOCK_ASSERT(&so->so_snd);
kn->kn_data = sbspace(&so->so_snd);
@@ -3397,6 +3634,10 @@
struct socket *so;
so = kn->kn_fp->f_data;
+
+ if (SOLISTENING(so))
+ return (1);
+
SOCKBUF_LOCK_ASSERT(&so->so_snd);
kn->kn_data = sbused(&so->so_snd);
@@ -3465,42 +3706,52 @@
struct socket *head;
int ret;
+ /*
+ * XXXGL: this is the only place where we acquire socket locks
+ * in reverse order: first child, then listening socket. To
+ * avoid possible LOR, use try semantics.
+ */
restart:
- ACCEPT_LOCK();
SOCK_LOCK(so);
+ if ((head = so->so_listen) != NULL &&
+ __predict_false(SOLISTEN_TRYLOCK(head) == 0)) {
+ SOCK_UNLOCK(so);
+ goto restart;
+ }
so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
so->so_state |= SS_ISCONNECTED;
- head = so->so_head;
- if (head != NULL && (so->so_qstate & SQ_INCOMP)) {
+ if (head != NULL && (so->so_qstate == SQ_INCOMP)) {
+again:
if ((so->so_options & SO_ACCEPTFILTER) == 0) {
+ TAILQ_REMOVE(&head->sol_incomp, so, so_list);
+ head->sol_incqlen--;
+ TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list);
+ head->sol_qlen++;
+ so->so_qstate = SQ_COMP;
SOCK_UNLOCK(so);
- TAILQ_REMOVE(&head->so_incomp, so, so_list);
- head->so_incqlen--;
- so->so_qstate &= ~SQ_INCOMP;
- TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
- head->so_qlen++;
- so->so_qstate |= SQ_COMP;
- ACCEPT_UNLOCK();
- sorwakeup(head);
- wakeup_one(&head->so_timeo);
+ solisten_wakeup(head); /* unlocks */
} else {
- ACCEPT_UNLOCK();
+ SOCKBUF_LOCK(&so->so_rcv);
soupcall_set(so, SO_RCV,
- head->so_accf->so_accept_filter->accf_callback,
- head->so_accf->so_accept_filter_arg);
+ head->sol_accept_filter->accf_callback,
+ head->sol_accept_filter_arg);
so->so_options &= ~SO_ACCEPTFILTER;
- ret = head->so_accf->so_accept_filter->accf_callback(so,
- head->so_accf->so_accept_filter_arg, M_NOWAIT);
- if (ret == SU_ISCONNECTED)
+ ret = head->sol_accept_filter->accf_callback(so,
+ head->sol_accept_filter_arg, M_NOWAIT);
+ if (ret == SU_ISCONNECTED) {
soupcall_clear(so, SO_RCV);
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ goto again;
+ }
+ SOCKBUF_UNLOCK(&so->so_rcv);
SOCK_UNLOCK(so);
- if (ret == SU_ISCONNECTED)
- goto restart;
+ SOLISTEN_UNLOCK(head);
}
return;
}
+ if (head != NULL)
+ SOLISTEN_UNLOCK(head);
SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
wakeup(&so->so_timeo);
sorwakeup(so);
sowwakeup(so);
@@ -3510,16 +3761,17 @@
soisdisconnecting(struct socket *so)
{
- /*
- * Note: This code assumes that SOCK_LOCK(so) and
- * SOCKBUF_LOCK(&so->so_rcv) are the same.
- */
- SOCKBUF_LOCK(&so->so_rcv);
+ SOCK_LOCK(so);
so->so_state &= ~SS_ISCONNECTING;
so->so_state |= SS_ISDISCONNECTING;
- socantrcvmore_locked(so);
- SOCKBUF_LOCK(&so->so_snd);
- socantsendmore_locked(so);
+
+ if (!SOLISTENING(so)) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ socantrcvmore_locked(so);
+ SOCKBUF_LOCK(&so->so_snd);
+ socantsendmore_locked(so);
+ }
+ SOCK_UNLOCK(so);
wakeup(&so->so_timeo);
}
@@ -3527,17 +3779,18 @@
soisdisconnected(struct socket *so)
{
- /*
- * Note: This code assumes that SOCK_LOCK(so) and
- * SOCKBUF_LOCK(&so->so_rcv) are the same.
- */
- SOCKBUF_LOCK(&so->so_rcv);
+ SOCK_LOCK(so);
so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
so->so_state |= SS_ISDISCONNECTED;
- socantrcvmore_locked(so);
- SOCKBUF_LOCK(&so->so_snd);
- sbdrop_locked(&so->so_snd, sbused(&so->so_snd));
- socantsendmore_locked(so);
+
+ if (!SOLISTENING(so)) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ socantrcvmore_locked(so);
+ SOCKBUF_LOCK(&so->so_snd);
+ sbdrop_locked(&so->so_snd, sbused(&so->so_snd));
+ socantsendmore_locked(so);
+ }
+ SOCK_UNLOCK(so);
wakeup(&so->so_timeo);
}
@@ -3563,6 +3816,8 @@
{
struct sockbuf *sb;
+ KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so));
+
switch (which) {
case SO_RCV:
sb = &so->so_rcv;
@@ -3584,6 +3839,8 @@
{
struct sockbuf *sb;
+ KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so));
+
switch (which) {
case SO_RCV:
sb = &so->so_rcv;
@@ -3595,12 +3852,110 @@
panic("soupcall_clear: bad which");
}
SOCKBUF_LOCK_ASSERT(sb);
- KASSERT(sb->sb_upcall != NULL, ("soupcall_clear: no upcall to clear"));
+ KASSERT(sb->sb_upcall != NULL,
+ ("%s: so %p no upcall to clear", __func__, so));
sb->sb_upcall = NULL;
sb->sb_upcallarg = NULL;
sb->sb_flags &= ~SB_UPCALL;
}
+void
+solisten_upcall_set(struct socket *so, so_upcall_t func, void *arg)
+{
+
+ SOLISTEN_LOCK_ASSERT(so);
+ so->sol_upcall = func;
+ so->sol_upcallarg = arg;
+}
+
+static void
+so_rdknl_lock(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_LOCK(so);
+ else
+ SOCKBUF_LOCK(&so->so_rcv);
+}
+
+static void
+so_rdknl_unlock(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_UNLOCK(so);
+ else
+ SOCKBUF_UNLOCK(&so->so_rcv);
+}
+
+static void
+so_rdknl_assert_locked(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_LOCK_ASSERT(so);
+ else
+ SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+}
+
+static void
+so_rdknl_assert_unlocked(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_UNLOCK_ASSERT(so);
+ else
+ SOCKBUF_UNLOCK_ASSERT(&so->so_rcv);
+}
+
+static void
+so_wrknl_lock(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_LOCK(so);
+ else
+ SOCKBUF_LOCK(&so->so_snd);
+}
+
+static void
+so_wrknl_unlock(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_UNLOCK(so);
+ else
+ SOCKBUF_UNLOCK(&so->so_snd);
+}
+
+static void
+so_wrknl_assert_locked(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_LOCK_ASSERT(so);
+ else
+ SOCKBUF_LOCK_ASSERT(&so->so_snd);
+}
+
+static void
+so_wrknl_assert_unlocked(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_UNLOCK_ASSERT(so);
+ else
+ SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
+}
+
/*
* Create an external-format (``xsocket'') structure using the information in
* the kernel-format socket structure pointed to by so. This is done to
@@ -3622,32 +3977,24 @@
xso->so_pcb = so->so_pcb;
xso->xso_protocol = so->so_proto->pr_protocol;
xso->xso_family = so->so_proto->pr_domain->dom_family;
- xso->so_qlen = so->so_qlen;
- xso->so_incqlen = so->so_incqlen;
- xso->so_qlimit = so->so_qlimit;
xso->so_timeo = so->so_timeo;
xso->so_error = so->so_error;
- xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
- xso->so_oobmark = so->so_oobmark;
- sbtoxsockbuf(&so->so_snd, &xso->so_snd);
- sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
xso->so_uid = so->so_cred->cr_uid;
-}
-
-
-/*
- * Socket accessor functions to provide external consumers with
- * a safe interface to socket state
- *
- */
-
-void
-so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *),
- void *arg)
-{
-
- TAILQ_FOREACH(so, &so->so_comp, so_list)
- func(so, arg);
+ xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
+ if (SOLISTENING(so)) {
+ xso->so_qlen = so->sol_qlen;
+ xso->so_incqlen = so->sol_incqlen;
+ xso->so_qlimit = so->sol_qlimit;
+ xso->so_oobmark = 0;
+ bzero(&xso->so_snd, sizeof(xso->so_snd));
+ bzero(&xso->so_rcv, sizeof(xso->so_rcv));
+ } else {
+ xso->so_state |= so->so_qstate;
+ xso->so_qlen = xso->so_incqlen = xso->so_qlimit = 0;
+ xso->so_oobmark = so->so_oobmark;
+ sbtoxsockbuf(&so->so_snd, &xso->so_snd);
+ sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
+ }
}
struct sockbuf *
Index: head/sys/kern/uipc_syscalls.c
===================================================================
--- head/sys/kern/uipc_syscalls.c
+++ head/sys/kern/uipc_syscalls.c
@@ -68,13 +68,6 @@
#include <security/audit/audit.h>
#include <security/mac/mac_framework.h>
-/*
- * Flags for accept1() and kern_accept4(), in addition to SOCK_CLOEXEC
- * and SOCK_NONBLOCK.
- */
-#define ACCEPT4_INHERIT 0x1
-#define ACCEPT4_COMPAT 0x2
-
static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
@@ -350,59 +343,22 @@
(flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0, &fcaps);
if (error != 0)
goto done;
- ACCEPT_LOCK();
- if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
- ACCEPT_UNLOCK();
- error = EWOULDBLOCK;
+ SOCK_LOCK(head);
+ if (!SOLISTENING(head)) {
+ SOCK_UNLOCK(head);
+ error = EINVAL;
goto noconnection;
}
- while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
- if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
- head->so_error = ECONNABORTED;
- break;
- }
- error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
- "accept", 0);
- if (error != 0) {
- ACCEPT_UNLOCK();
- goto noconnection;
- }
- }
- if (head->so_error) {
- error = head->so_error;
- head->so_error = 0;
- ACCEPT_UNLOCK();
+
+ error = solisten_dequeue(head, &so, flags);
+ if (error != 0)
goto noconnection;
- }
- so = TAILQ_FIRST(&head->so_comp);
- KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
- KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
- /*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- */
- SOCK_LOCK(so); /* soref() and so_state update */
- soref(so); /* file descriptor reference */
-
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- if (flags & ACCEPT4_INHERIT)
- so->so_state |= (head->so_state & SS_NBIO);
- else
- so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
-
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
-
/* An extra reference on `nfp' has been held for us by falloc(). */
td->td_retval[0] = fd;
- /* connection has been removed from the listen queue */
- KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
+ /* Connection has been removed from the listen queue. */
+ KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0);
if (flags & ACCEPT4_INHERIT) {
pgid = fgetown(&head->so_sigio);
@@ -420,7 +376,6 @@
(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
tmp = fflag & FASYNC;
(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
- sa = NULL;
error = soaccept(so, &sa);
if (error != 0)
goto noconnection;
@@ -558,7 +513,7 @@
}
SOCK_LOCK(so);
while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
- error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
+ error = msleep(&so->so_timeo, &so->so_lock, PSOCK | PCATCH,
"connec", 0);
if (error != 0) {
if (error == EINTR || error == ERESTART)
Index: head/sys/kern/uipc_usrreq.c
===================================================================
--- head/sys/kern/uipc_usrreq.c
+++ head/sys/kern/uipc_usrreq.c
@@ -189,10 +189,9 @@
/*
* Locking and synchronization:
*
- * Three types of locks exit in the local domain socket implementation: a
- * global list mutex, a global linkage rwlock, and per-unpcb mutexes. Of the
- * global locks, the list lock protects the socket count, global generation
- * number, and stream/datagram global lists. The linkage lock protects the
+ * Two types of locks exist in the local domain socket implementation: a
+ * a global linkage rwlock and per-unpcb mutexes. The linkage lock protects
+ * the socket count, global generation number, stream/datagram global lists and
* interconnection of unpcbs, the v_socket and unp_vnode pointers, and can be
* held exclusively over the acquisition of multiple unpcb locks to prevent
* deadlock.
@@ -233,7 +232,6 @@
* to perform namei() and other file system operations.
*/
static struct rwlock unp_link_rwlock;
-static struct mtx unp_list_lock;
static struct mtx unp_defers_lock;
#define UNP_LINK_LOCK_INIT() rw_init(&unp_link_rwlock, \
@@ -250,12 +248,8 @@
#define UNP_LINK_WUNLOCK() rw_wunlock(&unp_link_rwlock)
#define UNP_LINK_WLOCK_ASSERT() rw_assert(&unp_link_rwlock, \
RA_WLOCKED)
+#define UNP_LINK_WOWNED() rw_wowned(&unp_link_rwlock)
-#define UNP_LIST_LOCK_INIT() mtx_init(&unp_list_lock, \
- "unp_list_lock", NULL, MTX_DEF)
-#define UNP_LIST_LOCK() mtx_lock(&unp_list_lock)
-#define UNP_LIST_UNLOCK() mtx_unlock(&unp_list_lock)
-
#define UNP_DEFERRED_LOCK_INIT() mtx_init(&unp_defers_lock, \
"unp_defer", NULL, MTX_DEF)
#define UNP_DEFERRED_LOCK() mtx_lock(&unp_defers_lock)
@@ -396,6 +390,7 @@
u_long sendspace, recvspace;
struct unpcb *unp;
int error;
+ bool locked;
KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL"));
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
@@ -430,10 +425,12 @@
unp->unp_socket = so;
so->so_pcb = unp;
unp->unp_refcount = 1;
- if (so->so_head != NULL)
+ if (so->so_listen != NULL)
unp->unp_flags |= UNP_NASCENT;
- UNP_LIST_LOCK();
+ if ((locked = UNP_LINK_WOWNED()) == false)
+ UNP_LINK_WLOCK();
+
unp->unp_gencnt = ++unp_gencnt;
unp_count++;
switch (so->so_type) {
@@ -452,8 +449,10 @@
default:
panic("uipc_attach");
}
- UNP_LIST_UNLOCK();
+ if (locked == false)
+ UNP_LINK_WUNLOCK();
+
return (0);
}
@@ -607,6 +606,7 @@
uipc_close(struct socket *so)
{
struct unpcb *unp, *unp2;
+ struct vnode *vp = NULL;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
@@ -619,8 +619,14 @@
unp_disconnect(unp, unp2);
UNP_PCB_UNLOCK(unp2);
}
+ if (SOLISTENING(so) && ((vp = unp->unp_vnode) != NULL)) {
+ VOP_UNP_DETACH(vp);
+ unp->unp_vnode = NULL;
+ }
UNP_PCB_UNLOCK(unp);
UNP_LINK_WUNLOCK();
+ if (vp)
+ vrele(vp);
}
static int
@@ -657,18 +663,13 @@
vp = NULL;
local_unp_rights = 0;
- UNP_LIST_LOCK();
+ UNP_LINK_WLOCK();
LIST_REMOVE(unp, unp_link);
unp->unp_gencnt = ++unp_gencnt;
--unp_count;
- UNP_LIST_UNLOCK();
-
- if ((unp->unp_flags & UNP_NASCENT) != 0) {
- UNP_PCB_LOCK(unp);
- goto teardown;
- }
- UNP_LINK_WLOCK();
UNP_PCB_LOCK(unp);
+ if ((unp->unp_flags & UNP_NASCENT) != 0)
+ goto teardown;
if ((vp = unp->unp_vnode) != NULL) {
VOP_UNP_DETACH(vp);
@@ -693,8 +694,8 @@
UNP_PCB_UNLOCK(ref);
}
local_unp_rights = unp_rights;
- UNP_LINK_WUNLOCK();
teardown:
+ UNP_LINK_WUNLOCK();
unp->unp_socket->so_pcb = NULL;
saved_unp_addr = unp->unp_addr;
unp->unp_addr = NULL;
@@ -1315,7 +1316,7 @@
{
struct sockaddr_un *soun = (struct sockaddr_un *)nam;
struct vnode *vp;
- struct socket *so2, *so3;
+ struct socket *so2;
struct unpcb *unp, *unp2, *unp3;
struct nameidata nd;
char buf[SOCK_MAXADDRLEN];
@@ -1392,22 +1393,20 @@
error = EPROTOTYPE;
goto bad2;
}
+ UNP_PCB_LOCK(unp);
+ UNP_PCB_LOCK(unp2);
if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
if (so2->so_options & SO_ACCEPTCONN) {
CURVNET_SET(so2->so_vnet);
- so3 = sonewconn(so2, 0);
+ so2 = sonewconn(so2, 0);
CURVNET_RESTORE();
} else
- so3 = NULL;
- if (so3 == NULL) {
+ so2 = NULL;
+ if (so2 == NULL) {
error = ECONNREFUSED;
- goto bad2;
+ goto bad3;
}
- unp = sotounpcb(so);
- unp2 = sotounpcb(so2);
- unp3 = sotounpcb(so3);
- UNP_PCB_LOCK(unp);
- UNP_PCB_LOCK(unp2);
+ unp3 = sotounpcb(so2);
UNP_PCB_LOCK(unp3);
if (unp2->unp_addr != NULL) {
bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
@@ -1433,23 +1432,19 @@
unp->unp_flags |= UNP_HAVEPC;
if (unp2->unp_flags & UNP_WANTCRED)
unp3->unp_flags |= UNP_WANTCRED;
- UNP_PCB_UNLOCK(unp3);
UNP_PCB_UNLOCK(unp2);
- UNP_PCB_UNLOCK(unp);
+ unp2 = unp3;
#ifdef MAC
- mac_socketpeer_set_from_socket(so, so3);
- mac_socketpeer_set_from_socket(so3, so);
+ mac_socketpeer_set_from_socket(so, so2);
+ mac_socketpeer_set_from_socket(so2, so);
#endif
-
- so2 = so3;
}
- unp = sotounpcb(so);
- KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
- unp2 = sotounpcb(so2);
- KASSERT(unp2 != NULL, ("unp_connect: unp2 == NULL"));
- UNP_PCB_LOCK(unp);
- UNP_PCB_LOCK(unp2);
+
+ KASSERT(unp2 != NULL && so2 != NULL && unp2->unp_socket == so2 &&
+ sotounpcb(so2) == unp2,
+ ("%s: unp2 %p so2 %p", __func__, unp2, so2));
error = unp_connect2(so, so2, PRU_CONNECT);
+bad3:
UNP_PCB_UNLOCK(unp2);
UNP_PCB_UNLOCK(unp);
bad2:
@@ -1591,10 +1586,10 @@
* OK, now we're committed to doing something.
*/
xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK);
- UNP_LIST_LOCK();
+ UNP_LINK_RLOCK();
gencnt = unp_gencnt;
n = unp_count;
- UNP_LIST_UNLOCK();
+ UNP_LINK_RUNLOCK();
xug->xug_len = sizeof *xug;
xug->xug_count = n;
@@ -1608,7 +1603,7 @@
unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
- UNP_LIST_LOCK();
+ UNP_LINK_RLOCK();
for (unp = LIST_FIRST(head), i = 0; unp && i < n;
unp = LIST_NEXT(unp, unp_link)) {
UNP_PCB_LOCK(unp);
@@ -1623,7 +1618,7 @@
}
UNP_PCB_UNLOCK(unp);
}
- UNP_LIST_UNLOCK();
+ UNP_LINK_RUNLOCK();
n = i; /* In case we lost some during malloc. */
error = 0;
@@ -1881,7 +1876,6 @@
TIMEOUT_TASK_INIT(taskqueue_thread, &unp_gc_task, 0, unp_gc, NULL);
TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL);
UNP_LINK_LOCK_INIT();
- UNP_LIST_LOCK_INIT();
UNP_DEFERRED_LOCK_INIT();
}
@@ -2232,8 +2226,7 @@
static void
unp_gc_process(struct unpcb *unp)
{
- struct socket *soa;
- struct socket *so;
+ struct socket *so, *soa;
struct file *fp;
/* Already processed. */
@@ -2253,28 +2246,30 @@
return;
}
- /*
- * Mark all sockets we reference with RIGHTS.
- */
so = unp->unp_socket;
- if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
- SOCKBUF_LOCK(&so->so_rcv);
- unp_scan(so->so_rcv.sb_mb, unp_accessable);
- SOCKBUF_UNLOCK(&so->so_rcv);
+ SOCK_LOCK(so);
+ if (SOLISTENING(so)) {
+ /*
+ * Mark all sockets in our accept queue.
+ */
+ TAILQ_FOREACH(soa, &so->sol_comp, so_list) {
+ if (sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS)
+ continue;
+ SOCKBUF_LOCK(&soa->so_rcv);
+ unp_scan(soa->so_rcv.sb_mb, unp_accessable);
+ SOCKBUF_UNLOCK(&soa->so_rcv);
+ }
+ } else {
+ /*
+ * Mark all sockets we reference with RIGHTS.
+ */
+ if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ unp_scan(so->so_rcv.sb_mb, unp_accessable);
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ }
}
-
- /*
- * Mark all sockets in our accept queue.
- */
- ACCEPT_LOCK();
- TAILQ_FOREACH(soa, &so->so_comp, so_list) {
- if ((sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS) != 0)
- continue;
- SOCKBUF_LOCK(&soa->so_rcv);
- unp_scan(soa->so_rcv.sb_mb, unp_accessable);
- SOCKBUF_UNLOCK(&soa->so_rcv);
- }
- ACCEPT_UNLOCK();
+ SOCK_UNLOCK(so);
unp->unp_gcflag |= UNPGC_SCANNED;
}
@@ -2297,7 +2292,7 @@
int i, total;
unp_taskcount++;
- UNP_LIST_LOCK();
+ UNP_LINK_RLOCK();
/*
* First clear all gc flags from previous runs, apart from
* UNPGC_IGNORE_RIGHTS.
@@ -2320,7 +2315,7 @@
LIST_FOREACH(unp, *head, unp_link)
unp_gc_process(unp);
} while (unp_marked);
- UNP_LIST_UNLOCK();
+ UNP_LINK_RUNLOCK();
if (unp_unreachable == 0)
return;
@@ -2335,7 +2330,6 @@
* as as unreachable and store them locally.
*/
UNP_LINK_RLOCK();
- UNP_LIST_LOCK();
for (total = 0, head = heads; *head != NULL; head++)
LIST_FOREACH(unp, *head, unp_link)
if ((unp->unp_gcflag & UNPGC_DEAD) != 0) {
@@ -2348,7 +2342,6 @@
KASSERT(total <= unp_unreachable,
("unp_gc: incorrect unreachable count."));
}
- UNP_LIST_UNLOCK();
UNP_LINK_RUNLOCK();
/*
@@ -2391,10 +2384,11 @@
struct unpcb *unp;
unp = sotounpcb(so);
- UNP_LIST_LOCK();
+ UNP_LINK_WLOCK();
unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
- UNP_LIST_UNLOCK();
- unp_dispose_mbuf(so->so_rcv.sb_mb);
+ UNP_LINK_WUNLOCK();
+ if (!SOLISTENING(so))
+ unp_dispose_mbuf(so->so_rcv.sb_mb);
}
static void
Index: head/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
===================================================================
--- head/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
+++ head/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
@@ -614,21 +614,13 @@
pcb = ng_btsocket_l2cap_pcb_by_addr(&rt->src, ip->psm);
if (pcb != NULL) {
- struct socket *so1 = NULL;
+ struct socket *so1;
mtx_lock(&pcb->pcb_mtx);
- /*
- * First check the pending connections queue and if we have
- * space then create new socket and set proper source address.
- */
-
- if (pcb->so->so_qlen <= pcb->so->so_qlimit) {
- CURVNET_SET(pcb->so->so_vnet);
- so1 = sonewconn(pcb->so, 0);
- CURVNET_RESTORE();
- }
-
+ CURVNET_SET(pcb->so->so_vnet);
+ so1 = sonewconn(pcb->so, 0);
+ CURVNET_RESTORE();
if (so1 == NULL) {
result = NG_L2CAP_NO_RESOURCES;
goto respond;
Index: head/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
===================================================================
--- head/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
+++ head/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
@@ -1149,7 +1149,7 @@
{
ng_btsocket_rfcomm_pcb_p pcb = NULL, pcb1 = NULL;
ng_btsocket_l2cap_pcb_p l2pcb = NULL;
- struct socket *so1 = NULL;
+ struct socket *so1;
mtx_assert(&s->session_mtx, MA_OWNED);
@@ -1171,11 +1171,9 @@
mtx_lock(&pcb->pcb_mtx);
- if (pcb->so->so_qlen <= pcb->so->so_qlimit) {
- CURVNET_SET(pcb->so->so_vnet);
- so1 = sonewconn(pcb->so, 0);
- CURVNET_RESTORE();
- }
+ CURVNET_SET(pcb->so->so_vnet);
+ so1 = sonewconn(pcb->so, 0);
+ CURVNET_RESTORE();
mtx_unlock(&pcb->pcb_mtx);
@@ -1405,46 +1403,24 @@
static int
ng_btsocket_rfcomm_session_accept(ng_btsocket_rfcomm_session_p s0)
{
- struct socket *l2so = NULL;
+ struct socket *l2so;
struct sockaddr_l2cap *l2sa = NULL;
ng_btsocket_l2cap_pcb_t *l2pcb = NULL;
ng_btsocket_rfcomm_session_p s = NULL;
- int error = 0;
+ int error;
mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED);
mtx_assert(&s0->session_mtx, MA_OWNED);
- /* Check if there is a complete L2CAP connection in the queue */
- if ((error = s0->l2so->so_error) != 0) {
+ SOLISTEN_LOCK(s0->l2so);
+ error = solisten_dequeue(s0->l2so, &l2so, 0);
+ if (error == EWOULDBLOCK)
+ return (error);
+ if (error) {
NG_BTSOCKET_RFCOMM_ERR(
"%s: Could not accept connection on L2CAP socket, error=%d\n", __func__, error);
- s0->l2so->so_error = 0;
-
return (error);
}
-
- ACCEPT_LOCK();
- if (TAILQ_EMPTY(&s0->l2so->so_comp)) {
- ACCEPT_UNLOCK();
- if (s0->l2so->so_rcv.sb_state & SBS_CANTRCVMORE)
- return (ECONNABORTED);
- return (EWOULDBLOCK);
- }
-
- /* Accept incoming L2CAP connection */
- l2so = TAILQ_FIRST(&s0->l2so->so_comp);
- if (l2so == NULL)
- panic("%s: l2so == NULL\n", __func__);
-
- TAILQ_REMOVE(&s0->l2so->so_comp, l2so, so_list);
- s0->l2so->so_qlen --;
- l2so->so_qstate &= ~SQ_COMP;
- l2so->so_head = NULL;
- SOCK_LOCK(l2so);
- soref(l2so);
- l2so->so_state |= SS_NBIO;
- SOCK_UNLOCK(l2so);
- ACCEPT_UNLOCK();
error = soaccept(l2so, (struct sockaddr **) &l2sa);
if (error != 0) {
Index: head/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c
===================================================================
--- head/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c
+++ head/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c
@@ -471,20 +471,13 @@
pcb = ng_btsocket_sco_pcb_by_addr(&rt->src);
if (pcb != NULL) {
- struct socket *so1 = NULL;
+ struct socket *so1;
/* pcb is locked */
- /*
- * First check the pending connections queue and if we have
- * space then create new socket and set proper source address.
- */
-
- if (pcb->so->so_qlen <= pcb->so->so_qlimit) {
- CURVNET_SET(pcb->so->so_vnet);
- so1 = sonewconn(pcb->so, 0);
- CURVNET_RESTORE();
- }
+ CURVNET_SET(pcb->so->so_vnet);
+ so1 = sonewconn(pcb->so, 0);
+ CURVNET_RESTORE();
if (so1 == NULL) {
status = 0x0d; /* Rejected due to limited resources */
Index: head/sys/netgraph/ng_ksocket.c
===================================================================
--- head/sys/netgraph/ng_ksocket.c
+++ head/sys/netgraph/ng_ksocket.c
@@ -153,8 +153,7 @@
};
/* Helper functions */
-static int ng_ksocket_check_accept(priv_p);
-static void ng_ksocket_finish_accept(priv_p);
+static int ng_ksocket_accept(priv_p);
static int ng_ksocket_incoming(struct socket *so, void *arg, int waitflag);
static int ng_ksocket_parse(const struct ng_ksocket_alias *aliases,
const char *s, int family);
@@ -698,6 +697,7 @@
ERROUT(ENXIO);
/* Listen */
+ so->so_state |= SS_NBIO;
error = solisten(so, *((int32_t *)msg->data), td);
break;
}
@@ -716,21 +716,16 @@
if (priv->flags & KSF_ACCEPTING)
ERROUT(EALREADY);
- error = ng_ksocket_check_accept(priv);
- if (error != 0 && error != EWOULDBLOCK)
- ERROUT(error);
-
/*
* If a connection is already complete, take it.
* Otherwise let the upcall function deal with
* the connection when it comes in.
*/
+ error = ng_ksocket_accept(priv);
+ if (error != 0 && error != EWOULDBLOCK)
+ ERROUT(error);
priv->response_token = msg->header.token;
raddr = priv->response_addr = NGI_RETADDR(item);
- if (error == 0) {
- ng_ksocket_finish_accept(priv);
- } else
- priv->flags |= KSF_ACCEPTING;
break;
}
@@ -1068,13 +1063,8 @@
}
/* Check whether a pending accept operation has completed */
- if (priv->flags & KSF_ACCEPTING) {
- error = ng_ksocket_check_accept(priv);
- if (error != EWOULDBLOCK)
- priv->flags &= ~KSF_ACCEPTING;
- if (error == 0)
- ng_ksocket_finish_accept(priv);
- }
+ if (priv->flags & KSF_ACCEPTING)
+ (void )ng_ksocket_accept(priv);
/*
* If we don't have a hook, we must handle data events later. When
@@ -1171,37 +1161,10 @@
}
}
-/*
- * Check for a completed incoming connection and return 0 if one is found.
- * Otherwise return the appropriate error code.
- */
static int
-ng_ksocket_check_accept(priv_p priv)
+ng_ksocket_accept(priv_p priv)
{
struct socket *const head = priv->so;
- int error;
-
- if ((error = head->so_error) != 0) {
- head->so_error = 0;
- return error;
- }
- /* Unlocked read. */
- if (TAILQ_EMPTY(&head->so_comp)) {
- if (head->so_rcv.sb_state & SBS_CANTRCVMORE)
- return ECONNABORTED;
- return EWOULDBLOCK;
- }
- return 0;
-}
-
-/*
- * Handle the first completed incoming connection, assumed to be already
- * on the socket's so_comp queue.
- */
-static void
-ng_ksocket_finish_accept(priv_p priv)
-{
- struct socket *const head = priv->so;
struct socket *so;
struct sockaddr *sa = NULL;
struct ng_mesg *resp;
@@ -1211,24 +1174,16 @@
int len;
int error;
- ACCEPT_LOCK();
- so = TAILQ_FIRST(&head->so_comp);
- if (so == NULL) { /* Should never happen */
- ACCEPT_UNLOCK();
- return;
+ SOLISTEN_LOCK(head);
+ error = solisten_dequeue(head, &so, SOCK_NONBLOCK);
+ if (error == EWOULDBLOCK) {
+ priv->flags |= KSF_ACCEPTING;
+ return (error);
}
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
- SOCK_LOCK(so);
- soref(so);
- so->so_state |= SS_NBIO;
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
+ priv->flags &= ~KSF_ACCEPTING;
+ if (error)
+ return (error);
- /* XXX KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); */
-
soaccept(so, &sa);
len = OFFSETOF(struct ng_ksocket_accept, addr);
@@ -1288,6 +1243,8 @@
out:
if (sa != NULL)
free(sa, M_SONAME);
+
+ return (0);
}
/*
Index: head/sys/netinet/sctp_input.c
===================================================================
--- head/sys/netinet/sctp_input.c
+++ head/sys/netinet/sctp_input.c
@@ -5200,11 +5200,21 @@
* listening responded to a INIT-ACK and then
* closed. We opened and bound.. and are now no
* longer listening.
+ *
+ * XXXGL: notes on checking listen queue length.
+ * 1) SCTP_IS_LISTENING() doesn't necessarily mean
+ * SOLISTENING(), because a listening "UDP type"
+ * socket isn't listening in terms of the socket
+ * layer. It is a normal data flow socket, that
+ * can fork off new connections. Thus, we should
+ * look into sol_qlen only in case we are !UDP.
+ * 2) Checking sol_qlen in general requires locking
+ * the socket, and this code lacks that.
*/
-
if ((stcb == NULL) &&
(!SCTP_IS_LISTENING(inp) ||
- inp->sctp_socket->so_qlen >= inp->sctp_socket->so_qlimit)) {
+ (!(inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) &&
+ inp->sctp_socket->sol_qlen >= inp->sctp_socket->sol_qlimit))) {
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
(SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit))) {
op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
Index: head/sys/netinet/sctp_syscalls.c
===================================================================
--- head/sys/netinet/sctp_syscalls.c
+++ head/sys/netinet/sctp_syscalls.c
@@ -152,29 +152,11 @@
td->td_retval[0] = fd;
CURVNET_SET(head->so_vnet);
- so = sonewconn(head, SS_ISCONNECTED);
+ so = sopeeloff(head);
if (so == NULL) {
error = ENOMEM;
goto noconnection;
}
- /*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- */
- SOCK_LOCK(so);
- soref(so); /* file descriptor reference */
- SOCK_UNLOCK(so);
-
- ACCEPT_LOCK();
-
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- so->so_state |= (head->so_state & SS_NBIO);
- so->so_state &= ~SS_NOFDREF;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
- ACCEPT_UNLOCK();
finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
if (error != 0)
Index: head/sys/netinet/sctp_sysctl.c
===================================================================
--- head/sys/netinet/sctp_sysctl.c
+++ head/sys/netinet/sctp_sysctl.c
@@ -415,12 +415,12 @@
xinpcb.qlen = 0;
xinpcb.maxqlen = 0;
} else {
- xinpcb.qlen = so->so_qlen;
- xinpcb.qlen_old = so->so_qlen > USHRT_MAX ?
- USHRT_MAX : (uint16_t)so->so_qlen;
- xinpcb.maxqlen = so->so_qlimit;
- xinpcb.maxqlen_old = so->so_qlimit > USHRT_MAX ?
- USHRT_MAX : (uint16_t)so->so_qlimit;
+ xinpcb.qlen = so->sol_qlen;
+ xinpcb.qlen_old = so->sol_qlen > USHRT_MAX ?
+ USHRT_MAX : (uint16_t)so->sol_qlen;
+ xinpcb.maxqlen = so->sol_qlimit;
+ xinpcb.maxqlen_old = so->sol_qlimit > USHRT_MAX ?
+ USHRT_MAX : (uint16_t)so->sol_qlimit;
}
SCTP_INP_INCR_REF(inp);
SCTP_INP_RUNLOCK(inp);
Index: head/sys/netinet/sctp_usrreq.c
===================================================================
--- head/sys/netinet/sctp_usrreq.c
+++ head/sys/netinet/sctp_usrreq.c
@@ -7138,19 +7138,12 @@
}
}
SCTP_INP_WLOCK(inp);
- SOCK_LOCK(so);
- /* It appears for 7.0 and on, we must always call this. */
- solisten_proto(so, backlog);
- if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
- /* remove the ACCEPTCONN flag for one-to-many sockets */
- so->so_options &= ~SO_ACCEPTCONN;
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) == 0) {
+ SOCK_LOCK(so);
+ solisten_proto(so, backlog);
+ SOCK_UNLOCK(so);
}
- if (backlog > 0) {
- inp->sctp_flags |= SCTP_PCB_FLAGS_ACCEPTING;
- } else {
- inp->sctp_flags &= ~SCTP_PCB_FLAGS_ACCEPTING;
- }
- SOCK_UNLOCK(so);
+ inp->sctp_flags |= SCTP_PCB_FLAGS_ACCEPTING;
SCTP_INP_WUNLOCK(inp);
return (error);
}
Index: head/sys/netinet/tcp_subr.c
===================================================================
--- head/sys/netinet/tcp_subr.c
+++ head/sys/netinet/tcp_subr.c
@@ -1664,7 +1664,6 @@
("tcp_close: !SS_PROTOREF"));
inp->inp_flags &= ~INP_SOCKREF;
INP_WUNLOCK(inp);
- ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_state &= ~SS_PROTOREF;
sofree(so);
Index: head/sys/netinet/tcp_syncache.c
===================================================================
--- head/sys/netinet/tcp_syncache.c
+++ head/sys/netinet/tcp_syncache.c
@@ -1264,6 +1264,7 @@
* soon as possible.
*/
so = *lsop;
+ KASSERT(SOLISTENING(so), ("%s: %p not listening", __func__, so));
tp = sototcpcb(so);
cred = crhold(so->so_cred);
@@ -1274,7 +1275,7 @@
#endif
ip_ttl = inp->inp_ip_ttl;
ip_tos = inp->inp_ip_tos;
- win = sbspace(&so->so_rcv);
+ win = so->sol_sbrcv_hiwat;
ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE));
#ifdef TCP_RFC7413
@@ -1287,7 +1288,7 @@
* listen queue with bogus TFO connections.
*/
if (atomic_fetchadd_int(tp->t_tfo_pending, 1) <=
- (so->so_qlimit / 2)) {
+ (so->sol_qlimit / 2)) {
int result;
result = tcp_fastopen_check_cookie(inc,
@@ -2115,7 +2116,7 @@
sc->sc_flags |= SCF_WINSCALE;
}
- wnd = sbspace(&lso->so_rcv);
+ wnd = lso->sol_sbrcv_hiwat;
wnd = imax(wnd, 0);
wnd = imin(wnd, TCP_MAXWIN);
sc->sc_wnd = wnd;
Index: head/sys/netinet/tcp_timewait.c
===================================================================
--- head/sys/netinet/tcp_timewait.c
+++ head/sys/netinet/tcp_timewait.c
@@ -352,7 +352,6 @@
("tcp_twstart: !SS_PROTOREF"));
inp->inp_flags &= ~INP_SOCKREF;
INP_WUNLOCK(inp);
- ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_state &= ~SS_PROTOREF;
sofree(so);
@@ -491,7 +490,6 @@
if (inp->inp_flags & INP_SOCKREF) {
inp->inp_flags &= ~INP_SOCKREF;
INP_WUNLOCK(inp);
- ACCEPT_LOCK();
SOCK_LOCK(so);
KASSERT(so->so_state & SS_PROTOREF,
("tcp_twclose: INP_SOCKREF && !SS_PROTOREF"));
Index: head/sys/ofed/drivers/infiniband/core/iwcm.c
===================================================================
--- head/sys/ofed/drivers/infiniband/core/iwcm.c
+++ head/sys/ofed/drivers/infiniband/core/iwcm.c
@@ -416,34 +416,19 @@
{
struct socket *so;
struct sockaddr_in *remote;
+ int error;
- ACCEPT_LOCK();
- so = TAILQ_FIRST(&head->so_comp);
- if (!so) {
- ACCEPT_UNLOCK();
- return NULL;
- }
-
- SOCK_LOCK(so);
- /*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- */
- soref(so);
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
- so->so_state |= SS_NBIO;
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
+ SOLISTEN_LOCK(head);
+ error = solisten_dequeue(head, &so, SOCK_NONBLOCK);
+ if (error == EWOULDBLOCK)
+ return (NULL);
remote = NULL;
soaccept(so, (struct sockaddr **)&remote);
free(remote, M_SONAME);
return so;
}
+
static void
iw_so_event_handler(struct work_struct *_work)
{
@@ -485,18 +470,17 @@
#endif
return;
}
+
static int
iw_so_upcall(struct socket *parent_so, void *arg, int waitflag)
{
struct iwcm_listen_work *work;
- struct socket *so;
struct iw_cm_id *cm_id = arg;
/* check whether iw_so_event_handler() already dequeued this 'so' */
- so = TAILQ_FIRST(&parent_so->so_comp);
- if (!so)
+ if (TAILQ_EMPTY(&parent_so->sol_comp))
return SU_OK;
- work = kzalloc(sizeof(*work), M_NOWAIT);
+ work = kzalloc(sizeof(*work), waitflag);
if (!work)
return -ENOMEM;
work->cm_id = cm_id;
@@ -507,17 +491,21 @@
return SU_OK;
}
-static void
-iw_init_sock(struct iw_cm_id *cm_id)
+static int
+iw_create_listen(struct iw_cm_id *cm_id, int backlog)
{
struct sockopt sopt;
struct socket *so = cm_id->so;
int on = 1;
+ int rc;
- SOCK_LOCK(so);
- soupcall_set(so, SO_RCV, iw_so_upcall, cm_id);
+ rc = -solisten(cm_id->so, backlog, curthread);
+ if (rc != 0)
+ return (rc);
+ SOLISTEN_LOCK(so);
+ solisten_upcall_set(so, iw_so_upcall, cm_id);
so->so_state |= SS_NBIO;
- SOCK_UNLOCK(so);
+ SOLISTEN_UNLOCK(so);
sopt.sopt_dir = SOPT_SET;
sopt.sopt_level = IPPROTO_TCP;
sopt.sopt_name = TCP_NODELAY;
@@ -525,37 +513,18 @@
sopt.sopt_valsize = sizeof(on);
sopt.sopt_td = NULL;
sosetopt(so, &sopt);
-}
-
-static int
-iw_uninit_socket(struct iw_cm_id *cm_id)
-{
- struct socket *so = cm_id->so;
-
- SOCK_LOCK(so);
- soupcall_clear(so, SO_RCV);
- SOCK_UNLOCK(so);
-
return (0);
}
static int
-iw_create_listen(struct iw_cm_id *cm_id, int backlog)
-{
- int rc;
-
- iw_init_sock(cm_id);
- rc = -solisten(cm_id->so, backlog, curthread);
- if (rc != 0)
- iw_uninit_socket(cm_id);
- return (rc);
-}
-
-static int
iw_destroy_listen(struct iw_cm_id *cm_id)
{
+ struct socket *so = cm_id->so;
- return (iw_uninit_socket(cm_id));
+ SOLISTEN_LOCK(so);
+ solisten_upcall_set(so, NULL, NULL);
+ SOLISTEN_UNLOCK(so);
+ return (0);
}
Index: head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
===================================================================
--- head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
+++ head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
@@ -310,7 +310,6 @@
("sdp_closed: !SS_PROTOREF"));
ssk->flags &= ~SDP_SOCKREF;
SDP_WUNLOCK(ssk);
- ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_state &= ~SS_PROTOREF;
sofree(so);
Index: head/sys/rpc/svc_vc.c
===================================================================
--- head/sys/rpc/svc_vc.c
+++ head/sys/rpc/svc_vc.c
@@ -96,6 +96,7 @@
struct sockaddr *raddr);
static int svc_vc_accept(struct socket *head, struct socket **sop);
static int svc_vc_soupcall(struct socket *so, void *arg, int waitflag);
+static int svc_vc_rendezvous_soupcall(struct socket *, void *, int);
static struct xp_ops svc_vc_rendezvous_ops = {
.xp_recv = svc_vc_rendezvous_recv,
@@ -183,10 +184,10 @@
solisten(so, -1, curthread);
- SOCKBUF_LOCK(&so->so_rcv);
+ SOLISTEN_LOCK(so);
xprt->xp_upcallset = 1;
- soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt);
- SOCKBUF_UNLOCK(&so->so_rcv);
+ solisten_upcall_set(so, svc_vc_rendezvous_soupcall, xprt);
+ SOLISTEN_UNLOCK(so);
return (xprt);
@@ -316,9 +317,11 @@
int
svc_vc_accept(struct socket *head, struct socket **sop)
{
- int error = 0;
struct socket *so;
+ int error = 0;
+ short nbio;
+ /* XXXGL: shouldn't that be an assertion? */
if ((head->so_options & SO_ACCEPTCONN) == 0) {
error = EINVAL;
goto done;
@@ -328,38 +331,26 @@
if (error != 0)
goto done;
#endif
- ACCEPT_LOCK();
- if (TAILQ_EMPTY(&head->so_comp)) {
- ACCEPT_UNLOCK();
- error = EWOULDBLOCK;
- goto done;
- }
- so = TAILQ_FIRST(&head->so_comp);
- KASSERT(!(so->so_qstate & SQ_INCOMP), ("svc_vc_accept: so SQ_INCOMP"));
- KASSERT(so->so_qstate & SQ_COMP, ("svc_vc_accept: so not SQ_COMP"));
-
/*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- * XXX might not need soref() since this is simpler than kern_accept.
+ * XXXGL: we want non-blocking semantics. The socket could be a
+ * socket created by kernel as well as socket shared with userland,
+ * so we can't be sure about presense of SS_NBIO. We also shall not
+ * toggle it on the socket, since that may surprise userland. So we
+ * set SS_NBIO only temporarily.
*/
- SOCK_LOCK(so); /* soref() and so_state update */
- soref(so); /* file descriptor reference */
+ SOLISTEN_LOCK(head);
+ nbio = head->so_state & SS_NBIO;
+ head->so_state |= SS_NBIO;
+ error = solisten_dequeue(head, &so, 0);
+ head->so_state &= (nbio & ~SS_NBIO);
+ if (error)
+ goto done;
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- so->so_state |= (head->so_state & SS_NBIO);
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
-
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
-
+ so->so_state |= nbio;
*sop = so;
/* connection has been removed from the listen queue */
- KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
+ KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0);
done:
return (error);
}
@@ -392,21 +383,21 @@
* connection arrives after our call to accept fails
* with EWOULDBLOCK.
*/
- ACCEPT_LOCK();
- if (TAILQ_EMPTY(&xprt->xp_socket->so_comp))
+ SOLISTEN_LOCK(xprt->xp_socket);
+ if (TAILQ_EMPTY(&xprt->xp_socket->sol_comp))
xprt_inactive_self(xprt);
- ACCEPT_UNLOCK();
+ SOLISTEN_UNLOCK(xprt->xp_socket);
sx_xunlock(&xprt->xp_lock);
return (FALSE);
}
if (error) {
- SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
+ SOLISTEN_LOCK(xprt->xp_socket);
if (xprt->xp_upcallset) {
xprt->xp_upcallset = 0;
soupcall_clear(xprt->xp_socket, SO_RCV);
}
- SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
+ SOLISTEN_UNLOCK(xprt->xp_socket);
xprt_inactive_self(xprt);
sx_xunlock(&xprt->xp_lock);
return (FALSE);
@@ -453,12 +444,6 @@
static void
svc_vc_destroy_common(SVCXPRT *xprt)
{
- SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
- if (xprt->xp_upcallset) {
- xprt->xp_upcallset = 0;
- soupcall_clear(xprt->xp_socket, SO_RCV);
- }
- SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
if (xprt->xp_socket)
(void)soclose(xprt->xp_socket);
@@ -472,6 +457,13 @@
svc_vc_rendezvous_destroy(SVCXPRT *xprt)
{
+ SOLISTEN_LOCK(xprt->xp_socket);
+ if (xprt->xp_upcallset) {
+ xprt->xp_upcallset = 0;
+ solisten_upcall_set(xprt->xp_socket, NULL, NULL);
+ }
+ SOLISTEN_UNLOCK(xprt->xp_socket);
+
svc_vc_destroy_common(xprt);
}
@@ -480,6 +472,13 @@
{
struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
+ SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
+ if (xprt->xp_upcallset) {
+ xprt->xp_upcallset = 0;
+ soupcall_clear(xprt->xp_socket, SO_RCV);
+ }
+ SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
+
svc_vc_destroy_common(xprt);
if (cd->mreq)
@@ -954,6 +953,16 @@
SVCXPRT *xprt = (SVCXPRT *) arg;
if (soreadable(xprt->xp_socket))
+ xprt_active(xprt);
+ return (SU_OK);
+}
+
+static int
+svc_vc_rendezvous_soupcall(struct socket *head, void *arg, int waitflag)
+{
+ SVCXPRT *xprt = (SVCXPRT *) arg;
+
+ if (!TAILQ_EMPTY(&head->sol_comp))
xprt_active(xprt);
return (SU_OK);
}
Index: head/sys/sys/sockbuf.h
===================================================================
--- head/sys/sys/sockbuf.h
+++ head/sys/sys/sockbuf.h
@@ -32,7 +32,6 @@
*/
#ifndef _SYS_SOCKBUF_H_
#define _SYS_SOCKBUF_H_
-#include <sys/selinfo.h> /* for struct selinfo */
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/_sx.h>
@@ -64,6 +63,7 @@
struct sockaddr;
struct socket;
struct thread;
+struct selinfo;
struct xsockbuf {
u_int sb_cc;
@@ -84,9 +84,9 @@
* (a) locked by SOCKBUF_LOCK().
*/
struct sockbuf {
- struct selinfo sb_sel; /* process selecting read/write */
- struct mtx sb_mtx; /* sockbuf lock */
- struct sx sb_sx; /* prevent I/O interlacing */
+ struct mtx sb_mtx; /* sockbuf lock */
+ struct sx sb_sx; /* prevent I/O interlacing */
+ struct selinfo *sb_sel; /* process selecting read/write */
short sb_state; /* (a) socket state on sockbuf */
#define sb_startzero sb_mb
struct mbuf *sb_mb; /* (a) the mbuf chain */
Index: head/sys/sys/socket.h
===================================================================
--- head/sys/sys/socket.h
+++ head/sys/sys/socket.h
@@ -111,7 +111,15 @@
*/
#define SOCK_CLOEXEC 0x10000000
#define SOCK_NONBLOCK 0x20000000
-#endif
+#ifdef _KERNEL
+/*
+ * Flags for accept1(), kern_accept4() and solisten_dequeue, in addition
+ * to SOCK_CLOEXEC and SOCK_NONBLOCK.
+ */
+#define ACCEPT4_INHERIT 0x1
+#define ACCEPT4_COMPAT 0x2
+#endif /* _KERNEL */
+#endif /* __BSD_VISIBLE */
/*
* Option flags per-socket.
@@ -704,9 +712,5 @@
void so_lock(struct socket *so);
void so_unlock(struct socket *so);
-void so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg);
-
-#endif
-
-
+#endif /* _KERNEL */
#endif /* !_SYS_SOCKET_H_ */
Index: head/sys/sys/socketvar.h
===================================================================
--- head/sys/sys/socketvar.h
+++ head/sys/sys/socketvar.h
@@ -64,60 +64,35 @@
* Locking key to struct socket:
* (a) constant after allocation, no locking required.
* (b) locked by SOCK_LOCK(so).
- * (c) locked by SOCKBUF_LOCK(&so->so_rcv).
- * (e) locked by ACCEPT_LOCK().
+ * (cr) locked by SOCKBUF_LOCK(&so->so_rcv).
+ * (cs) locked by SOCKBUF_LOCK(&so->so_rcv).
+ * (e) locked by SOLISTEN_LOCK() of corresponding listening socket.
* (f) not locked since integer reads/writes are atomic.
* (g) used only as a sleep/wakeup address, no value.
* (h) locked by global mutex so_global_mtx.
*/
+TAILQ_HEAD(accept_queue, socket);
struct socket {
- int so_count; /* (b) reference count */
+ struct mtx so_lock;
+ volatile u_int so_count; /* (b / refcount) */
+ struct selinfo so_rdsel; /* (b/cr) for so_rcv/so_comp */
+ struct selinfo so_wrsel; /* (b/cs) for so_snd */
short so_type; /* (a) generic type, see socket.h */
- short so_options; /* from socket call, see socket.h */
- short so_linger; /* time to linger while closing */
+ short so_options; /* (b) from socket call, see socket.h */
+ short so_linger; /* time to linger close(2) */
short so_state; /* (b) internal state flags SS_* */
- int so_qstate; /* (e) internal state flags SQ_* */
void *so_pcb; /* protocol control block */
struct vnet *so_vnet; /* (a) network stack instance */
struct protosw *so_proto; /* (a) protocol handle */
-/*
- * Variables for connection queuing.
- * Socket where accepts occur is so_head in all subsidiary sockets.
- * If so_head is 0, socket is not related to an accept.
- * For head socket so_incomp queues partially completed connections,
- * while so_comp is a queue of connections ready to be accepted.
- * If a connection is aborted and it has so_head set, then
- * it has to be pulled out of either so_incomp or so_comp.
- * We allow connections to queue up based on current queue lengths
- * and limit on number of queued connections for this socket.
- */
- struct socket *so_head; /* (e) back pointer to listen socket */
- TAILQ_HEAD(, socket) so_incomp; /* (e) queue of partial unaccepted connections */
- TAILQ_HEAD(, socket) so_comp; /* (e) queue of complete unaccepted connections */
- TAILQ_ENTRY(socket) so_list; /* (e) list of unaccepted connections */
- u_int so_qlen; /* (e) number of unaccepted connections */
- u_int so_incqlen; /* (e) number of unaccepted incomplete
- connections */
- u_int so_qlimit; /* (e) max number queued connections */
short so_timeo; /* (g) connection timeout */
u_short so_error; /* (f) error affecting connection */
struct sigio *so_sigio; /* [sg] information for async I/O or
out of band data (SIGURG) */
- u_long so_oobmark; /* (c) chars to oob mark */
-
- struct sockbuf so_rcv, so_snd;
-
struct ucred *so_cred; /* (a) user credentials */
struct label *so_label; /* (b) MAC label for socket */
- struct label *so_peerlabel; /* (b) cached MAC label for peer */
/* NB: generation count must not be first. */
so_gen_t so_gencnt; /* (h) generation count */
void *so_emuldata; /* (b) private data for emulators */
- struct so_accf {
- struct accept_filter *so_accept_filter;
- void *so_accept_filter_arg; /* saved filter args */
- char *so_accept_filter_str; /* saved user args */
- } *so_accf;
struct osd osd; /* Object Specific extensions */
/*
* so_fibnum, so_user_cookie and friends can be used to attach
@@ -130,41 +105,95 @@
int so_ts_clock; /* type of the clock used for timestamps */
uint32_t so_max_pacing_rate; /* (f) TX rate limit in bytes/s */
+ union {
+ /* Regular (data flow) socket. */
+ struct {
+ /* (cr, cs) Receive and send buffers. */
+ struct sockbuf so_rcv, so_snd;
- void *so_pspare[2]; /* general use */
- int so_ispare[2]; /* general use */
+ /* (e) Our place on accept queue. */
+ TAILQ_ENTRY(socket) so_list;
+ struct socket *so_listen; /* (b) */
+ enum {
+ SQ_NONE = 0,
+ SQ_INCOMP = 0x0800, /* on sol_incomp */
+ SQ_COMP = 0x1000, /* on sol_comp */
+ } so_qstate; /* (b) */
+
+ /* (b) cached MAC label for peer */
+ struct label *so_peerlabel;
+ u_long so_oobmark; /* chars to oob mark */
+ };
+ /*
+ * Listening socket, where accepts occur, is so_listen in all
+ * subsidiary sockets. If so_listen is NULL, socket is not
+ * related to an accept. For a listening socket itself
+ * sol_incomp queues partially completed connections, while
+ * sol_comp is a queue of connections ready to be accepted.
+ * If a connection is aborted and it has so_listen set, then
+ * it has to be pulled out of either sol_incomp or sol_comp.
+ * We allow connections to queue up based on current queue
+ * lengths and limit on number of queued connections for this
+ * socket.
+ */
+ struct {
+ /* (e) queue of partial unaccepted connections */
+ struct accept_queue sol_incomp;
+ /* (e) queue of complete unaccepted connections */
+ struct accept_queue sol_comp;
+ u_int sol_qlen; /* (e) sol_comp length */
+ u_int sol_incqlen; /* (e) sol_incomp length */
+ u_int sol_qlimit; /* (e) queue limit */
+
+ /* accept_filter(9) optional data */
+ struct accept_filter *sol_accept_filter;
+ void *sol_accept_filter_arg; /* saved filter args */
+ char *sol_accept_filter_str; /* saved user args */
+
+ /* Optional upcall, for kernel socket. */
+ so_upcall_t *sol_upcall; /* (e) */
+ void *sol_upcallarg; /* (e) */
+
+ /* Socket buffer parameters, to be copied to
+ * dataflow sockets, accepted from this one. */
+ int sol_sbrcv_lowat;
+ int sol_sbsnd_lowat;
+ u_int sol_sbrcv_hiwat;
+ u_int sol_sbsnd_hiwat;
+ short sol_sbrcv_flags;
+ short sol_sbsnd_flags;
+ sbintime_t sol_sbrcv_timeo;
+ sbintime_t sol_sbsnd_timeo;
+ };
+ };
};
-/*
- * Global accept mutex to serialize access to accept queues and
- * fields associated with multiple sockets. This allows us to
- * avoid defining a lock order between listen and accept sockets
- * until such time as it proves to be a good idea.
- */
-extern struct mtx accept_mtx;
-#define ACCEPT_LOCK_ASSERT() mtx_assert(&accept_mtx, MA_OWNED)
-#define ACCEPT_UNLOCK_ASSERT() mtx_assert(&accept_mtx, MA_NOTOWNED)
-#define ACCEPT_LOCK() mtx_lock(&accept_mtx)
-#define ACCEPT_UNLOCK() mtx_unlock(&accept_mtx)
+#define SOCK_MTX(so) &(so)->so_lock
+#define SOCK_LOCK(so) mtx_lock(&(so)->so_lock)
+#define SOCK_OWNED(so) mtx_owned(&(so)->so_lock)
+#define SOCK_UNLOCK(so) mtx_unlock(&(so)->so_lock)
+#define SOCK_LOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_OWNED)
+#define SOCK_UNLOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_NOTOWNED)
-/*
- * Per-socket mutex: we reuse the receive socket buffer mutex for space
- * efficiency. This decision should probably be revisited as we optimize
- * locking for the socket code.
- */
-#define SOCK_MTX(_so) SOCKBUF_MTX(&(_so)->so_rcv)
-#define SOCK_LOCK(_so) SOCKBUF_LOCK(&(_so)->so_rcv)
-#define SOCK_OWNED(_so) SOCKBUF_OWNED(&(_so)->so_rcv)
-#define SOCK_UNLOCK(_so) SOCKBUF_UNLOCK(&(_so)->so_rcv)
-#define SOCK_LOCK_ASSERT(_so) SOCKBUF_LOCK_ASSERT(&(_so)->so_rcv)
+#define SOLISTENING(sol) (((sol)->so_options & SO_ACCEPTCONN) != 0)
+#define SOLISTEN_LOCK(sol) do { \
+ mtx_lock(&(sol)->so_lock); \
+ KASSERT(SOLISTENING(sol), \
+ ("%s: %p not listening", __func__, (sol))); \
+} while (0)
+#define SOLISTEN_TRYLOCK(sol) mtx_trylock(&(sol)->so_lock)
+#define SOLISTEN_UNLOCK(sol) do { \
+ KASSERT(SOLISTENING(sol), \
+ ("%s: %p not listening", __func__, (sol))); \
+ mtx_unlock(&(sol)->so_lock); \
+} while (0)
+#define SOLISTEN_LOCK_ASSERT(sol) do { \
+ mtx_assert(&(sol)->so_lock, MA_OWNED); \
+ KASSERT(SOLISTENING(sol), \
+ ("%s: %p not listening", __func__, (sol))); \
+} while (0)
/*
- * Socket state bits stored in so_qstate.
- */
-#define SQ_INCOMP 0x0800 /* unaccepted, incomplete connection */
-#define SQ_COMP 0x1000 /* unaccepted, complete connection */
-
-/*
* Externalized form of struct socket used by the sysctl(3) interface.
*/
struct xsocket {
@@ -213,8 +242,7 @@
/* can we read something from so? */
#define soreadabledata(so) \
- (sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || \
- !TAILQ_EMPTY(&(so)->so_comp) || (so)->so_error)
+ (sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || (so)->so_error)
#define soreadable(so) \
(soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE))
@@ -227,26 +255,19 @@
(so)->so_error)
/*
- * soref()/sorele() ref-count the socket structure. Note that you must
- * still explicitly close the socket, but the last ref count will free
- * the structure.
+ * soref()/sorele() ref-count the socket structure.
+ * soref() may be called without owning socket lock, but in that case a
+ * caller must own something that holds socket, and so_count must be not 0.
+ * Note that you must still explicitly close the socket, but the last ref
+ * count will free the structure.
*/
-#define soref(so) do { \
- SOCK_LOCK_ASSERT(so); \
- ++(so)->so_count; \
-} while (0)
-
+#define soref(so) refcount_acquire(&(so)->so_count)
#define sorele(so) do { \
- ACCEPT_LOCK_ASSERT(); \
SOCK_LOCK_ASSERT(so); \
- if ((so)->so_count <= 0) \
- panic("sorele"); \
- if (--(so)->so_count == 0) \
+ if (refcount_release(&(so)->so_count)) \
sofree(so); \
- else { \
+ else \
SOCK_UNLOCK(so); \
- ACCEPT_UNLOCK(); \
- } \
} while (0)
/*
@@ -369,10 +390,11 @@
int solisten(struct socket *so, int backlog, struct thread *td);
void solisten_proto(struct socket *so, int backlog);
int solisten_proto_check(struct socket *so);
+int solisten_dequeue(struct socket *, struct socket **, int);
struct socket *
sonewconn(struct socket *head, int connstatus);
-
-
+struct socket *
+ sopeeloff(struct socket *);
int sopoll(struct socket *so, int events, struct ucred *active_cred,
struct thread *td);
int sopoll_generic(struct socket *so, int events,
@@ -403,8 +425,10 @@
void sotoxsocket(struct socket *so, struct xsocket *xso);
void soupcall_clear(struct socket *, int);
void soupcall_set(struct socket *, int, so_upcall_t, void *);
+void solisten_upcall_set(struct socket *, so_upcall_t, void *);
void sowakeup(struct socket *so, struct sockbuf *sb);
void sowakeup_aio(struct socket *so, struct sockbuf *sb);
+void solisten_wakeup(struct socket *);
int selsocket(struct socket *so, int events, struct timeval *tv,
struct thread *td);
Index: head/usr.bin/netstat/inet.c
===================================================================
--- head/usr.bin/netstat/inet.c
+++ head/usr.bin/netstat/inet.c
@@ -170,14 +170,17 @@
if (kread((uintptr_t)proto.pr_domain, &domain, sizeof(domain)) != 0)
return (-1);
xso->xso_family = domain.dom_family;
- xso->so_qlen = so->so_qlen;
- xso->so_incqlen = so->so_incqlen;
- xso->so_qlimit = so->so_qlimit;
xso->so_timeo = so->so_timeo;
xso->so_error = so->so_error;
- xso->so_oobmark = so->so_oobmark;
- sbtoxsockbuf(&so->so_snd, &xso->so_snd);
- sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
+ if (SOLISTENING(so)) {
+ xso->so_qlen = so->sol_qlen;
+ xso->so_incqlen = so->sol_incqlen;
+ xso->so_qlimit = so->sol_qlimit;
+ } else {
+ sbtoxsockbuf(&so->so_snd, &xso->so_snd);
+ sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
+ xso->so_oobmark = so->so_oobmark;
+ }
return (0);
}

File Metadata

Mime Type
text/plain
Expires
Mon, Mar 10, 8:41 AM (9 h, 10 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17081941
Default Alt Text
D9770.id.diff (93 KB)

Event Timeline