Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F111854299
D9770.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
93 KB
Referenced Files
None
Subscribers
None
D9770.id.diff
View Options
Index: head/sys/cam/ctl/ctl_ha.c
===================================================================
--- head/sys/cam/ctl/ctl_ha.c
+++ head/sys/cam/ctl/ctl_ha.c
@@ -458,45 +458,20 @@
static int
ctl_ha_accept(struct ha_softc *softc)
{
- struct socket *so;
+ struct socket *lso, *so;
struct sockaddr *sap;
int error;
- ACCEPT_LOCK();
- if (softc->ha_lso->so_rcv.sb_state & SBS_CANTRCVMORE)
- softc->ha_lso->so_error = ECONNABORTED;
- if (softc->ha_lso->so_error) {
- error = softc->ha_lso->so_error;
- softc->ha_lso->so_error = 0;
- ACCEPT_UNLOCK();
+ lso = softc->ha_lso;
+ SOLISTEN_LOCK(lso);
+ error = solisten_dequeue(lso, &so, 0);
+ if (error == EWOULDBLOCK)
+ return (error);
+ if (error) {
printf("%s: socket error %d\n", __func__, error);
goto out;
}
- so = TAILQ_FIRST(&softc->ha_lso->so_comp);
- if (so == NULL) {
- ACCEPT_UNLOCK();
- return (EWOULDBLOCK);
- }
- KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
- KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
- /*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- */
- SOCK_LOCK(so); /* soref() and so_state update */
- soref(so); /* file descriptor reference */
-
- TAILQ_REMOVE(&softc->ha_lso->so_comp, so, so_list);
- softc->ha_lso->so_qlen--;
- so->so_state |= SS_NBIO;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
-
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
-
sap = NULL;
error = soaccept(so, &sap);
if (error != 0) {
@@ -556,9 +531,6 @@
printf("%s: REUSEPORT setting failed %d\n",
__func__, error);
}
- SOCKBUF_LOCK(&softc->ha_lso->so_rcv);
- soupcall_set(softc->ha_lso, SO_RCV, ctl_ha_lupcall, softc);
- SOCKBUF_UNLOCK(&softc->ha_lso->so_rcv);
}
memcpy(&sa, &softc->ha_peer_in, sizeof(sa));
@@ -572,6 +544,10 @@
printf("%s: solisten() error %d\n", __func__, error);
goto out;
}
+ SOLISTEN_LOCK(softc->ha_lso);
+ softc->ha_lso->so_state |= SS_NBIO;
+ solisten_upcall_set(softc->ha_lso, ctl_ha_lupcall, softc);
+ SOLISTEN_UNLOCK(softc->ha_lso);
return (0);
out:
Index: head/sys/dev/iscsi/icl_soft_proxy.c
===================================================================
--- head/sys/dev/iscsi/icl_soft_proxy.c
+++ head/sys/dev/iscsi/icl_soft_proxy.c
@@ -92,7 +92,6 @@
struct icl_listen *ils_listen;
struct socket *ils_socket;
bool ils_running;
- bool ils_disconnecting;
int ils_id;
};
@@ -184,7 +183,9 @@
while (ils->ils_running) {
ICL_DEBUG("waiting for accept thread to terminate");
sx_xunlock(&il->il_lock);
- ils->ils_disconnecting = true;
+ SOLISTEN_LOCK(ils->ils_socket);
+ ils->ils_socket->so_error = ENOTCONN;
+ SOLISTEN_UNLOCK(ils->ils_socket);
wakeup(&ils->ils_socket->so_timeo);
pause("icl_unlisten", 1 * hz);
sx_xlock(&il->il_lock);
@@ -200,9 +201,9 @@
}
/*
- * XXX: Doing accept in a separate thread in each socket might not be the best way
- * to do stuff, but it's pretty clean and debuggable - and you probably won't
- * have hundreds of listening sockets anyway.
+ * XXX: Doing accept in a separate thread in each socket might not be the
+ * best way to do stuff, but it's pretty clean and debuggable - and you
+ * probably won't have hundreds of listening sockets anyway.
*/
static void
icl_accept_thread(void *arg)
@@ -218,55 +219,22 @@
ils->ils_running = true;
for (;;) {
- ACCEPT_LOCK();
- while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0 && ils->ils_disconnecting == false) {
- if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
- head->so_error = ECONNABORTED;
- break;
- }
- error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
- "accept", 0);
- if (error) {
- ACCEPT_UNLOCK();
- ICL_WARN("msleep failed with error %d", error);
- continue;
- }
- if (ils->ils_disconnecting) {
- ACCEPT_UNLOCK();
- ICL_DEBUG("terminating");
- ils->ils_running = false;
- kthread_exit();
- return;
- }
+ SOLISTEN_LOCK(head);
+ error = solisten_dequeue(head, &so, 0);
+ if (error == ENOTCONN) {
+ /*
+ * XXXGL: ENOTCONN is our mark from icl_listen_free().
+ * Neither socket code, nor msleep(9) may return it.
+ */
+ ICL_DEBUG("terminating");
+ ils->ils_running = false;
+ kthread_exit();
+ return;
}
- if (head->so_error) {
- error = head->so_error;
- head->so_error = 0;
- ACCEPT_UNLOCK();
- ICL_WARN("socket error %d", error);
+ if (error) {
+ ICL_WARN("solisten_dequeue error %d", error);
continue;
}
- so = TAILQ_FIRST(&head->so_comp);
- KASSERT(so != NULL, ("NULL so"));
- KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
- KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
-
- /*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- */
- SOCK_LOCK(so); /* soref() and so_state update */
- soref(so); /* file descriptor reference */
-
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- so->so_state |= (head->so_state & SS_NBIO);
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
-
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
sa = NULL;
error = soaccept(so, &sa);
Index: head/sys/kern/sys_socket.c
===================================================================
--- head/sys/kern/sys_socket.c
+++ head/sys/kern/sys_socket.c
@@ -170,32 +170,36 @@
break;
case FIOASYNC:
- /*
- * XXXRW: This code separately acquires SOCK_LOCK(so) and
- * SOCKBUF_LOCK(&so->so_rcv) even though they are the same
- * mutex to avoid introducing the assumption that they are
- * the same.
- */
if (*(int *)data) {
SOCK_LOCK(so);
so->so_state |= SS_ASYNC;
+ if (SOLISTENING(so)) {
+ so->sol_sbrcv_flags |= SB_ASYNC;
+ so->sol_sbsnd_flags |= SB_ASYNC;
+ } else {
+ SOCKBUF_LOCK(&so->so_rcv);
+ so->so_rcv.sb_flags |= SB_ASYNC;
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ SOCKBUF_LOCK(&so->so_snd);
+ so->so_snd.sb_flags |= SB_ASYNC;
+ SOCKBUF_UNLOCK(&so->so_snd);
+ }
SOCK_UNLOCK(so);
- SOCKBUF_LOCK(&so->so_rcv);
- so->so_rcv.sb_flags |= SB_ASYNC;
- SOCKBUF_UNLOCK(&so->so_rcv);
- SOCKBUF_LOCK(&so->so_snd);
- so->so_snd.sb_flags |= SB_ASYNC;
- SOCKBUF_UNLOCK(&so->so_snd);
} else {
SOCK_LOCK(so);
so->so_state &= ~SS_ASYNC;
+ if (SOLISTENING(so)) {
+ so->sol_sbrcv_flags &= ~SB_ASYNC;
+ so->sol_sbsnd_flags &= ~SB_ASYNC;
+ } else {
+ SOCKBUF_LOCK(&so->so_rcv);
+ so->so_rcv.sb_flags &= ~SB_ASYNC;
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ SOCKBUF_LOCK(&so->so_snd);
+ so->so_snd.sb_flags &= ~SB_ASYNC;
+ SOCKBUF_UNLOCK(&so->so_snd);
+ }
SOCK_UNLOCK(so);
- SOCKBUF_LOCK(&so->so_rcv);
- so->so_rcv.sb_flags &= ~SB_ASYNC;
- SOCKBUF_UNLOCK(&so->so_rcv);
- SOCKBUF_LOCK(&so->so_snd);
- so->so_snd.sb_flags &= ~SB_ASYNC;
- SOCKBUF_UNLOCK(&so->so_snd);
}
break;
@@ -706,7 +710,6 @@
sb->sb_flags &= ~SB_AIO_RUNNING;
SOCKBUF_UNLOCK(sb);
- ACCEPT_LOCK();
SOCK_LOCK(so);
sorele(so);
}
Index: head/sys/kern/uipc_accf.c
===================================================================
--- head/sys/kern/uipc_accf.c
+++ head/sys/kern/uipc_accf.c
@@ -173,13 +173,13 @@
error = EINVAL;
goto out;
}
- if ((so->so_options & SO_ACCEPTFILTER) == 0) {
+ if (so->sol_accept_filter == NULL) {
error = EINVAL;
goto out;
}
- strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name);
- if (so->so_accf->so_accept_filter_str != NULL)
- strcpy(afap->af_arg, so->so_accf->so_accept_filter_str);
+ strcpy(afap->af_name, so->sol_accept_filter->accf_name);
+ if (so->sol_accept_filter_str != NULL)
+ strcpy(afap->af_arg, so->sol_accept_filter_str);
out:
SOCK_UNLOCK(so);
if (error == 0)
@@ -193,31 +193,57 @@
{
struct accept_filter_arg *afap;
struct accept_filter *afp;
- struct so_accf *newaf;
- int error = 0;
+ char *accept_filter_str = NULL;
+ void *accept_filter_arg = NULL;
+ int error;
/*
* Handle the simple delete case first.
*/
if (sopt == NULL || sopt->sopt_val == NULL) {
+ struct socket *sp, *sp1;
+ int wakeup;
+
SOCK_LOCK(so);
if ((so->so_options & SO_ACCEPTCONN) == 0) {
SOCK_UNLOCK(so);
return (EINVAL);
}
- if (so->so_accf != NULL) {
- struct so_accf *af = so->so_accf;
- if (af->so_accept_filter != NULL &&
- af->so_accept_filter->accf_destroy != NULL) {
- af->so_accept_filter->accf_destroy(so);
- }
- if (af->so_accept_filter_str != NULL)
- free(af->so_accept_filter_str, M_ACCF);
- free(af, M_ACCF);
- so->so_accf = NULL;
+ if (so->sol_accept_filter == NULL) {
+ SOCK_UNLOCK(so);
+ return (0);
}
+ if (so->sol_accept_filter->accf_destroy != NULL)
+ so->sol_accept_filter->accf_destroy(so);
+ if (so->sol_accept_filter_str != NULL)
+ free(so->sol_accept_filter_str, M_ACCF);
+ so->sol_accept_filter = NULL;
+ so->sol_accept_filter_arg = NULL;
+ so->sol_accept_filter_str = NULL;
so->so_options &= ~SO_ACCEPTFILTER;
- SOCK_UNLOCK(so);
+
+ /*
+ * Move from incomplete queue to complete only those
+ * connections, that are blocked by us.
+ */
+ wakeup = 0;
+ TAILQ_FOREACH_SAFE(sp, &so->sol_incomp, so_list, sp1) {
+ SOCK_LOCK(sp);
+ if (sp->so_options & SO_ACCEPTFILTER) {
+ TAILQ_REMOVE(&so->sol_incomp, sp, so_list);
+ TAILQ_INSERT_TAIL(&so->sol_comp, sp, so_list);
+ sp->so_qstate = SQ_COMP;
+ sp->so_options &= ~SO_ACCEPTFILTER;
+ so->sol_incqlen--;
+ so->sol_qlen++;
+ wakeup = 1;
+ }
+ SOCK_UNLOCK(sp);
+ }
+ if (wakeup)
+ solisten_wakeup(so); /* unlocks */
+ else
+ SOLISTEN_UNLOCK(so);
return (0);
}
@@ -238,17 +264,10 @@
free(afap, M_TEMP);
return (ENOENT);
}
- /*
- * Allocate the new accept filter instance storage. We may
- * have to free it again later if we fail to attach it. If
- * attached properly, 'newaf' is NULLed to avoid a free()
- * while in use.
- */
- newaf = malloc(sizeof(*newaf), M_ACCF, M_WAITOK | M_ZERO);
if (afp->accf_create != NULL && afap->af_name[0] != '\0') {
size_t len = strlen(afap->af_name) + 1;
- newaf->so_accept_filter_str = malloc(len, M_ACCF, M_WAITOK);
- strcpy(newaf->so_accept_filter_str, afap->af_name);
+ accept_filter_str = malloc(len, M_ACCF, M_WAITOK);
+ strcpy(accept_filter_str, afap->af_name);
}
/*
@@ -256,8 +275,8 @@
* without first removing it.
*/
SOCK_LOCK(so);
- if (((so->so_options & SO_ACCEPTCONN) == 0) ||
- (so->so_accf != NULL)) {
+ if ((so->so_options & SO_ACCEPTCONN) == 0 ||
+ so->sol_accept_filter != NULL) {
error = EINVAL;
goto out;
}
@@ -268,25 +287,20 @@
* can't block.
*/
if (afp->accf_create != NULL) {
- newaf->so_accept_filter_arg =
- afp->accf_create(so, afap->af_arg);
- if (newaf->so_accept_filter_arg == NULL) {
+ accept_filter_arg = afp->accf_create(so, afap->af_arg);
+ if (accept_filter_arg == NULL) {
error = EINVAL;
goto out;
}
}
- newaf->so_accept_filter = afp;
- so->so_accf = newaf;
+ so->sol_accept_filter = afp;
+ so->sol_accept_filter_arg = accept_filter_arg;
+ so->sol_accept_filter_str = accept_filter_str;
so->so_options |= SO_ACCEPTFILTER;
- newaf = NULL;
out:
SOCK_UNLOCK(so);
- if (newaf != NULL) {
- if (newaf->so_accept_filter_str != NULL)
- free(newaf->so_accept_filter_str, M_ACCF);
- free(newaf, M_ACCF);
- }
- if (afap != NULL)
- free(afap, M_TEMP);
+ if (accept_filter_str != NULL)
+ free(accept_filter_str, M_ACCF);
+ free(afap, M_TEMP);
return (error);
}
Index: head/sys/kern/uipc_debug.c
===================================================================
--- head/sys/kern/uipc_debug.c
+++ head/sys/kern/uipc_debug.c
@@ -448,8 +448,6 @@
db_printf(")\n");
db_print_indent(indent);
- db_printf("so_qstate: 0x%x (", so->so_qstate);
- db_print_soqstate(so->so_qstate);
db_printf(") ");
db_printf("so_pcb: %p ", so->so_pcb);
db_printf("so_proto: %p\n", so->so_proto);
@@ -458,24 +456,28 @@
db_print_protosw(so->so_proto, "so_proto", indent);
db_print_indent(indent);
- db_printf("so_head: %p ", so->so_head);
- db_printf("so_incomp first: %p ", TAILQ_FIRST(&so->so_incomp));
- db_printf("so_comp first: %p\n", TAILQ_FIRST(&so->so_comp));
+ if (so->so_options & SO_ACCEPTCONN) {
+ db_printf("sol_incomp first: %p ",
+ TAILQ_FIRST(&so->sol_incomp));
+ db_printf("sol_comp first: %p\n", TAILQ_FIRST(&so->sol_comp));
+ db_printf("sol_qlen: %d ", so->sol_qlen);
+ db_printf("sol_incqlen: %d ", so->sol_incqlen);
+ db_printf("sol_qlimit: %d ", so->sol_qlimit);
+ } else {
+ db_printf("so_qstate: 0x%x (", so->so_qstate);
+ db_print_soqstate(so->so_qstate);
+ db_printf("so_listen: %p ", so->so_listen);
+ /* so_list skipped */
+ db_printf("so_timeo: %d ", so->so_timeo);
+ db_printf("so_error: %d\n", so->so_error);
- db_print_indent(indent);
- /* so_list skipped */
- db_printf("so_qlen: %u ", so->so_qlen);
- db_printf("so_incqlen: %u ", so->so_incqlen);
- db_printf("so_qlimit: %u ", so->so_qlimit);
- db_printf("so_timeo: %d ", so->so_timeo);
- db_printf("so_error: %d\n", so->so_error);
+ db_print_indent(indent);
+ db_printf("so_sigio: %p ", so->so_sigio);
+ db_printf("so_oobmark: %lu ", so->so_oobmark);
- db_print_indent(indent);
- db_printf("so_sigio: %p ", so->so_sigio);
- db_printf("so_oobmark: %lu ", so->so_oobmark);
-
- db_print_sockbuf(&so->so_rcv, "so_rcv", indent);
- db_print_sockbuf(&so->so_snd, "so_snd", indent);
+ db_print_sockbuf(&so->so_rcv, "so_rcv", indent);
+ db_print_sockbuf(&so->so_snd, "so_snd", indent);
+ }
}
DB_SHOW_COMMAND(socket, db_show_socket)
Index: head/sys/kern/uipc_sockbuf.c
===================================================================
--- head/sys/kern/uipc_sockbuf.c
+++ head/sys/kern/uipc_sockbuf.c
@@ -314,14 +314,14 @@
SOCKBUF_LOCK_ASSERT(sb);
- selwakeuppri(&sb->sb_sel, PSOCK);
- if (!SEL_WAITING(&sb->sb_sel))
+ selwakeuppri(sb->sb_sel, PSOCK);
+ if (!SEL_WAITING(sb->sb_sel))
sb->sb_flags &= ~SB_SEL;
if (sb->sb_flags & SB_WAIT) {
sb->sb_flags &= ~SB_WAIT;
wakeup(&sb->sb_acc);
}
- KNOTE_LOCKED(&sb->sb_sel.si_note, 0);
+ KNOTE_LOCKED(&sb->sb_sel->si_note, 0);
if (sb->sb_upcall != NULL && !(so->so_state & SS_ISDISCONNECTED)) {
ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT);
if (ret == SU_ISCONNECTED) {
Index: head/sys/kern/uipc_socket.c
===================================================================
--- head/sys/kern/uipc_socket.c
+++ head/sys/kern/uipc_socket.c
@@ -106,6 +106,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_compat.h"
+#include "opt_sctp.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -154,13 +155,21 @@
static int soreceive_rcvoob(struct socket *so, struct uio *uio,
int flags);
+static void so_rdknl_lock(void *);
+static void so_rdknl_unlock(void *);
+static void so_rdknl_assert_locked(void *);
+static void so_rdknl_assert_unlocked(void *);
+static void so_wrknl_lock(void *);
+static void so_wrknl_unlock(void *);
+static void so_wrknl_assert_locked(void *);
+static void so_wrknl_assert_unlocked(void *);
static void filt_sordetach(struct knote *kn);
static int filt_soread(struct knote *kn, long hint);
static void filt_sowdetach(struct knote *kn);
static int filt_sowrite(struct knote *kn, long hint);
-static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id);
static int filt_soempty(struct knote *kn, long hint);
+static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id);
fo_kqfilter_t soo_kqfilter;
static struct filterops soread_filtops = {
@@ -393,8 +402,16 @@
return (NULL);
}
+ /*
+ * The socket locking protocol allows to lock 2 sockets at a time,
+ * however, the first one must be a listening socket. WITNESS lacks
+ * a feature to change class of an existing lock, so we use DUPOK.
+ */
+ mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK);
SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
+ so->so_rcv.sb_sel = &so->so_rdsel;
+ so->so_snd.sb_sel = &so->so_wrsel;
sx_init(&so->so_snd.sb_sx, "so_snd_sx");
sx_init(&so->so_rcv.sb_sx, "so_rcv_sx");
TAILQ_INIT(&so->so_snd.sb_aiojobq);
@@ -450,9 +467,6 @@
if (so->so_snd.sb_hiwat)
(void)chgsbsize(so->so_cred->cr_uidinfo,
&so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
- /* remove accept filter if one is present. */
- if (so->so_accf != NULL)
- accept_filt_setopt(so, NULL);
#ifdef MAC
mac_socket_destroy(so);
#endif
@@ -460,10 +474,16 @@
crfree(so->so_cred);
khelp_destroy_osd(&so->osd);
- sx_destroy(&so->so_snd.sb_sx);
- sx_destroy(&so->so_rcv.sb_sx);
- SOCKBUF_LOCK_DESTROY(&so->so_snd);
- SOCKBUF_LOCK_DESTROY(&so->so_rcv);
+ if (SOLISTENING(so)) {
+ if (so->sol_accept_filter != NULL)
+ accept_filt_setopt(so, NULL);
+ } else {
+ sx_destroy(&so->so_snd.sb_sx);
+ sx_destroy(&so->so_rcv.sb_sx);
+ SOCKBUF_LOCK_DESTROY(&so->so_snd);
+ SOCKBUF_LOCK_DESTROY(&so->so_rcv);
+ }
+ mtx_destroy(&so->so_lock);
uma_zfree(socket_zone, so);
}
@@ -506,8 +526,6 @@
if (so == NULL)
return (ENOBUFS);
- TAILQ_INIT(&so->so_incomp);
- TAILQ_INIT(&so->so_comp);
so->so_type = type;
so->so_cred = crhold(cred);
if ((prp->pr_domain->dom_family == PF_INET) ||
@@ -520,9 +538,10 @@
#ifdef MAC
mac_socket_create(cred, so);
#endif
- knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
- knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
- so->so_count = 1;
+ knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
+ so_rdknl_assert_locked, so_rdknl_assert_unlocked);
+ knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
+ so_wrknl_assert_locked, so_wrknl_assert_unlocked);
/*
* Auto-sizing of socket buffers is managed by the protocols and
* the appropriate flags must be set in the pru_attach function.
@@ -531,12 +550,10 @@
error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
CURVNET_RESTORE();
if (error) {
- KASSERT(so->so_count == 1, ("socreate: so_count %d",
- so->so_count));
- so->so_count = 0;
sodealloc(so);
return (error);
}
+ soref(so);
*aso = so;
return (0);
}
@@ -564,11 +581,11 @@
static int overcount;
struct socket *so;
- int over;
+ u_int over;
- ACCEPT_LOCK();
- over = (head->so_qlen > 3 * head->so_qlimit / 2);
- ACCEPT_UNLOCK();
+ SOLISTEN_LOCK(head);
+ over = (head->sol_qlen > 3 * head->sol_qlimit / 2);
+ SOLISTEN_UNLOCK(head);
#ifdef REGRESSION
if (regression_sonewconn_earlytest && over) {
#else
@@ -580,15 +597,15 @@
log(LOG_DEBUG, "%s: pcb %p: Listen queue overflow: "
"%i already in queue awaiting acceptance "
"(%d occurrences)\n",
- __func__, head->so_pcb, head->so_qlen, overcount);
+ __func__, head->so_pcb, head->sol_qlen, overcount);
overcount = 0;
}
return (NULL);
}
- VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
- __func__, __LINE__, head));
+ VNET_ASSERT(head->so_vnet != NULL, ("%s: so %p vnet is NULL",
+ __func__, head));
so = soalloc(head->so_vnet);
if (so == NULL) {
log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
@@ -596,11 +613,8 @@
__func__, head->so_pcb);
return (NULL);
}
- if ((head->so_options & SO_ACCEPTFILTER) != 0)
- connstatus = 0;
- so->so_head = head;
+ so->so_listen = head;
so->so_type = head->so_type;
- so->so_options = head->so_options &~ SO_ACCEPTCONN;
so->so_linger = head->so_linger;
so->so_state = head->so_state | SS_NOFDREF;
so->so_fibnum = head->so_fibnum;
@@ -609,10 +623,12 @@
#ifdef MAC
mac_socket_newconn(head, so);
#endif
- knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
- knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
+ knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
+ so_rdknl_assert_locked, so_rdknl_assert_unlocked);
+ knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
+ so_wrknl_assert_locked, so_wrknl_assert_unlocked);
VNET_SO_ASSERT(head);
- if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
+ if (soreserve(so, head->sol_sbsnd_hiwat, head->sol_sbrcv_hiwat)) {
sodealloc(so);
log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
__func__, head->so_pcb);
@@ -624,32 +640,24 @@
__func__, head->so_pcb);
return (NULL);
}
- so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
- so->so_snd.sb_lowat = head->so_snd.sb_lowat;
- so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
- so->so_snd.sb_timeo = head->so_snd.sb_timeo;
- so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
- so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
+ so->so_rcv.sb_lowat = head->sol_sbrcv_lowat;
+ so->so_snd.sb_lowat = head->sol_sbsnd_lowat;
+ so->so_rcv.sb_timeo = head->sol_sbrcv_timeo;
+ so->so_snd.sb_timeo = head->sol_sbsnd_timeo;
+ so->so_rcv.sb_flags |= head->sol_sbrcv_flags & SB_AUTOSIZE;
+ so->so_snd.sb_flags |= head->sol_sbsnd_flags & SB_AUTOSIZE;
+
+ SOLISTEN_LOCK(head);
+ if (head->sol_accept_filter != NULL)
+ connstatus = 0;
so->so_state |= connstatus;
- ACCEPT_LOCK();
- /*
- * The accept socket may be tearing down but we just
- * won a race on the ACCEPT_LOCK.
- * However, if sctp_peeloff() is called on a 1-to-many
- * style socket, the SO_ACCEPTCONN doesn't need to be set.
- */
- if (!(head->so_options & SO_ACCEPTCONN) &&
- ((head->so_proto->pr_protocol != IPPROTO_SCTP) ||
- (head->so_type != SOCK_SEQPACKET))) {
- SOCK_LOCK(so);
- so->so_head = NULL;
- sofree(so); /* NB: returns ACCEPT_UNLOCK'ed. */
- return (NULL);
- }
+ so->so_options = head->so_options & ~SO_ACCEPTCONN;
+ soref(head); /* A socket on (in)complete queue refs head. */
if (connstatus) {
- TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
- so->so_qstate |= SQ_COMP;
- head->so_qlen++;
+ TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list);
+ so->so_qstate = SQ_COMP;
+ head->sol_qlen++;
+ solisten_wakeup(head); /* unlocks */
} else {
/*
* Keep removing sockets from the head until there's room for
@@ -658,28 +666,86 @@
* threads and soabort() requires dropping locks, we must
* loop waiting for the condition to be true.
*/
- while (head->so_incqlen > head->so_qlimit) {
+ while (head->sol_incqlen > head->sol_qlimit) {
struct socket *sp;
- sp = TAILQ_FIRST(&head->so_incomp);
- TAILQ_REMOVE(&head->so_incomp, sp, so_list);
- head->so_incqlen--;
- sp->so_qstate &= ~SQ_INCOMP;
- sp->so_head = NULL;
- ACCEPT_UNLOCK();
+
+ sp = TAILQ_FIRST(&head->sol_incomp);
+ TAILQ_REMOVE(&head->sol_incomp, sp, so_list);
+ head->sol_incqlen--;
+ SOCK_LOCK(sp);
+ sp->so_qstate = SQ_NONE;
+ sp->so_listen = NULL;
+ SOCK_UNLOCK(sp);
+ sorele(head); /* does SOLISTEN_UNLOCK, head stays */
soabort(sp);
- ACCEPT_LOCK();
+ SOLISTEN_LOCK(head);
}
- TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
- so->so_qstate |= SQ_INCOMP;
- head->so_incqlen++;
+ TAILQ_INSERT_TAIL(&head->sol_incomp, so, so_list);
+ so->so_qstate = SQ_INCOMP;
+ head->sol_incqlen++;
+ SOLISTEN_UNLOCK(head);
}
- ACCEPT_UNLOCK();
- if (connstatus) {
- sorwakeup(head);
- wakeup_one(&head->so_timeo);
+ return (so);
+}
+
+#ifdef SCTP
+/*
+ * Socket part of sctp_peeloff(). Detach a new socket from an
+ * association. The new socket is returned with a reference.
+ */
+struct socket *
+sopeeloff(struct socket *head)
+{
+ struct socket *so;
+
+ VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
+ __func__, __LINE__, head));
+ so = soalloc(head->so_vnet);
+ if (so == NULL) {
+ log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
+ "limit reached or out of memory\n",
+ __func__, head->so_pcb);
+ return (NULL);
}
+ so->so_type = head->so_type;
+ so->so_options = head->so_options;
+ so->so_linger = head->so_linger;
+ so->so_state = (head->so_state & SS_NBIO) | SS_ISCONNECTED;
+ so->so_fibnum = head->so_fibnum;
+ so->so_proto = head->so_proto;
+ so->so_cred = crhold(head->so_cred);
+#ifdef MAC
+ mac_socket_newconn(head, so);
+#endif
+ knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
+ so_rdknl_assert_locked, so_rdknl_assert_unlocked);
+ knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
+ so_wrknl_assert_locked, so_wrknl_assert_unlocked);
+ VNET_SO_ASSERT(head);
+ if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
+ sodealloc(so);
+ log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
+ __func__, head->so_pcb);
+ return (NULL);
+ }
+ if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
+ sodealloc(so);
+ log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
+ __func__, head->so_pcb);
+ return (NULL);
+ }
+ so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
+ so->so_snd.sb_lowat = head->so_snd.sb_lowat;
+ so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
+ so->so_snd.sb_timeo = head->so_snd.sb_timeo;
+ so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
+ so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
+
+ soref(so);
+
return (so);
}
+#endif /* SCTP */
int
sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
@@ -741,16 +807,140 @@
void
solisten_proto(struct socket *so, int backlog)
{
+ int sbrcv_lowat, sbsnd_lowat;
+ u_int sbrcv_hiwat, sbsnd_hiwat;
+ short sbrcv_flags, sbsnd_flags;
+ sbintime_t sbrcv_timeo, sbsnd_timeo;
SOCK_LOCK_ASSERT(so);
+ if (SOLISTENING(so))
+ goto listening;
+
+ /*
+ * Change this socket to listening state.
+ */
+ sbrcv_lowat = so->so_rcv.sb_lowat;
+ sbsnd_lowat = so->so_snd.sb_lowat;
+ sbrcv_hiwat = so->so_rcv.sb_hiwat;
+ sbsnd_hiwat = so->so_snd.sb_hiwat;
+ sbrcv_flags = so->so_rcv.sb_flags;
+ sbsnd_flags = so->so_snd.sb_flags;
+ sbrcv_timeo = so->so_rcv.sb_timeo;
+ sbsnd_timeo = so->so_snd.sb_timeo;
+
+ sbdestroy(&so->so_snd, so);
+ sbdestroy(&so->so_rcv, so);
+ sx_destroy(&so->so_snd.sb_sx);
+ sx_destroy(&so->so_rcv.sb_sx);
+ SOCKBUF_LOCK_DESTROY(&so->so_snd);
+ SOCKBUF_LOCK_DESTROY(&so->so_rcv);
+
+#ifdef INVARIANTS
+ bzero(&so->so_rcv,
+ sizeof(struct socket) - offsetof(struct socket, so_rcv));
+#endif
+
+ so->sol_sbrcv_lowat = sbrcv_lowat;
+ so->sol_sbsnd_lowat = sbsnd_lowat;
+ so->sol_sbrcv_hiwat = sbrcv_hiwat;
+ so->sol_sbsnd_hiwat = sbsnd_hiwat;
+ so->sol_sbrcv_flags = sbrcv_flags;
+ so->sol_sbsnd_flags = sbsnd_flags;
+ so->sol_sbrcv_timeo = sbrcv_timeo;
+ so->sol_sbsnd_timeo = sbsnd_timeo;
+
+ so->sol_qlen = so->sol_incqlen = 0;
+ TAILQ_INIT(&so->sol_incomp);
+ TAILQ_INIT(&so->sol_comp);
+
+ so->sol_accept_filter = NULL;
+ so->sol_accept_filter_arg = NULL;
+ so->sol_accept_filter_str = NULL;
+
+ so->so_options |= SO_ACCEPTCONN;
+
+listening:
if (backlog < 0 || backlog > somaxconn)
backlog = somaxconn;
- so->so_qlimit = backlog;
- so->so_options |= SO_ACCEPTCONN;
+ so->sol_qlimit = backlog;
}
/*
+ * Wakeup listeners/subsystems once we have a complete connection.
+ * Enters with lock, returns unlocked.
+ */
+void
+solisten_wakeup(struct socket *sol)
+{
+
+ if (sol->sol_upcall != NULL)
+ (void )sol->sol_upcall(sol, sol->sol_upcallarg, M_NOWAIT);
+ else {
+ selwakeuppri(&sol->so_rdsel, PSOCK);
+ KNOTE_LOCKED(&sol->so_rdsel.si_note, 0);
+ }
+ SOLISTEN_UNLOCK(sol);
+ wakeup_one(&sol->sol_comp);
+}
+
+/*
+ * Return single connection off a listening socket queue. Main consumer of
+ * the function is kern_accept4(). Some modules, that do their own accept
+ * management also use the function.
+ *
+ * Listening socket must be locked on entry and is returned unlocked on
+ * return.
+ * The flags argument is set of accept4(2) flags and ACCEPT4_INHERIT.
+ */
+int
+solisten_dequeue(struct socket *head, struct socket **ret, int flags)
+{
+ struct socket *so;
+ int error;
+
+ SOLISTEN_LOCK_ASSERT(head);
+
+ while (!(head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp) &&
+ head->so_error == 0) {
+ error = msleep(&head->sol_comp, &head->so_lock, PSOCK | PCATCH,
+ "accept", 0);
+ if (error != 0) {
+ SOLISTEN_UNLOCK(head);
+ return (error);
+ }
+ }
+ if (head->so_error) {
+ error = head->so_error;
+ head->so_error = 0;
+ SOLISTEN_UNLOCK(head);
+ return (error);
+ }
+ if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp)) {
+ SOLISTEN_UNLOCK(head);
+ return (EWOULDBLOCK);
+ }
+ so = TAILQ_FIRST(&head->sol_comp);
+ SOCK_LOCK(so);
+ KASSERT(so->so_qstate == SQ_COMP,
+ ("%s: so %p not SQ_COMP", __func__, so));
+ soref(so);
+ head->sol_qlen--;
+ so->so_qstate = SQ_NONE;
+ so->so_listen = NULL;
+ TAILQ_REMOVE(&head->sol_comp, so, so_list);
+ if (flags & ACCEPT4_INHERIT)
+ so->so_state |= (head->so_state & SS_NBIO);
+ else
+ so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
+ SOCK_UNLOCK(so);
+ sorele(head);
+
+ *ret = so;
+ return (0);
+}
+
+/*
* Evaluate the reference count and named references on a socket; if no
* references remain, free it. This should be called whenever a reference is
* released, such as in sorele(), but also when named reference flags are
@@ -774,44 +964,62 @@
sofree(struct socket *so)
{
struct protosw *pr = so->so_proto;
- struct socket *head;
- ACCEPT_LOCK_ASSERT();
SOCK_LOCK_ASSERT(so);
if ((so->so_state & SS_NOFDREF) == 0 || so->so_count != 0 ||
- (so->so_state & SS_PROTOREF) || (so->so_qstate & SQ_COMP)) {
+ (so->so_state & SS_PROTOREF) || (so->so_qstate == SQ_COMP)) {
SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
return;
}
- head = so->so_head;
- if (head != NULL) {
- KASSERT((so->so_qstate & SQ_COMP) != 0 ||
- (so->so_qstate & SQ_INCOMP) != 0,
- ("sofree: so_head != NULL, but neither SQ_COMP nor "
- "SQ_INCOMP"));
- KASSERT((so->so_qstate & SQ_COMP) == 0 ||
- (so->so_qstate & SQ_INCOMP) == 0,
- ("sofree: so->so_qstate is SQ_COMP and also SQ_INCOMP"));
- TAILQ_REMOVE(&head->so_incomp, so, so_list);
- head->so_incqlen--;
- so->so_qstate &= ~SQ_INCOMP;
- so->so_head = NULL;
+ if (!SOLISTENING(so) && so->so_qstate == SQ_INCOMP) {
+ struct socket *sol;
+
+ sol = so->so_listen;
+ KASSERT(sol, ("%s: so %p on incomp of NULL", __func__, so));
+
+ /*
+ * To solve race between close of a listening socket and
+ * a socket on its incomplete queue, we need to lock both.
+ * The order is first listening socket, then regular.
+ * Since we don't have SS_NOFDREF neither SS_PROTOREF, this
+ * function and the listening socket are the only pointers
+ * to so. To preserve so and sol, we reference both and then
+ * relock.
+ * After relock the socket may not move to so_comp since it
+ * doesn't have PCB already, but it may be removed from
+ * so_incomp. If that happens, we share responsiblity on
+ * freeing the socket, but soclose() has already removed
+ * it from queue.
+ */
+ soref(sol);
+ soref(so);
+ SOCK_UNLOCK(so);
+ SOLISTEN_LOCK(sol);
+ SOCK_LOCK(so);
+ if (so->so_qstate == SQ_INCOMP) {
+ KASSERT(so->so_listen == sol,
+ ("%s: so %p migrated out of sol %p",
+ __func__, so, sol));
+ TAILQ_REMOVE(&sol->sol_incomp, so, so_list);
+ sol->sol_incqlen--;
+ /* This is guarenteed not to be the last. */
+ refcount_release(&sol->so_count);
+ so->so_qstate = SQ_NONE;
+ so->so_listen = NULL;
+ } else
+ KASSERT(so->so_listen == NULL,
+ ("%s: so %p not on (in)comp with so_listen",
+ __func__, so));
+ sorele(sol);
+ KASSERT(so->so_count == 1,
+ ("%s: so %p count %u", __func__, so, so->so_count));
+ so->so_count = 0;
}
- KASSERT((so->so_qstate & SQ_COMP) == 0 &&
- (so->so_qstate & SQ_INCOMP) == 0,
- ("sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)",
- so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP));
- if (so->so_options & SO_ACCEPTCONN) {
- KASSERT((TAILQ_EMPTY(&so->so_comp)),
- ("sofree: so_comp populated"));
- KASSERT((TAILQ_EMPTY(&so->so_incomp)),
- ("sofree: so_incomp populated"));
- }
+ if (SOLISTENING(so))
+ so->so_error = ECONNABORTED;
SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
VNET_SO_ASSERT(so);
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
@@ -833,12 +1041,14 @@
* before calling pru_detach. This means that protocols shold not
* assume they can perform socket wakeups, etc, in their detach code.
*/
- sbdestroy(&so->so_snd, so);
- sbdestroy(&so->so_rcv, so);
- seldrain(&so->so_snd.sb_sel);
- seldrain(&so->so_rcv.sb_sel);
- knlist_destroy(&so->so_rcv.sb_sel.si_note);
- knlist_destroy(&so->so_snd.sb_sel.si_note);
+ if (!SOLISTENING(so)) {
+ sbdestroy(&so->so_snd, so);
+ sbdestroy(&so->so_rcv, so);
+ }
+ seldrain(&so->so_rdsel);
+ seldrain(&so->so_wrsel);
+ knlist_destroy(&so->so_rdsel.si_note);
+ knlist_destroy(&so->so_wrsel.si_note);
sodealloc(so);
}
@@ -853,6 +1063,8 @@
int
soclose(struct socket *so)
{
+ struct accept_queue lqueue;
+ bool listening;
int error = 0;
KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter"));
@@ -885,41 +1097,42 @@
drop:
if (so->so_proto->pr_usrreqs->pru_close != NULL)
(*so->so_proto->pr_usrreqs->pru_close)(so);
- ACCEPT_LOCK();
- if (so->so_options & SO_ACCEPTCONN) {
+
+ SOCK_LOCK(so);
+ if ((listening = (so->so_options & SO_ACCEPTCONN))) {
struct socket *sp;
- /*
- * Prevent new additions to the accept queues due
- * to ACCEPT_LOCK races while we are draining them.
- */
- so->so_options &= ~SO_ACCEPTCONN;
- while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
- TAILQ_REMOVE(&so->so_incomp, sp, so_list);
- so->so_incqlen--;
- sp->so_qstate &= ~SQ_INCOMP;
- sp->so_head = NULL;
- ACCEPT_UNLOCK();
- soabort(sp);
- ACCEPT_LOCK();
+
+ TAILQ_INIT(&lqueue);
+ TAILQ_SWAP(&lqueue, &so->sol_incomp, socket, so_list);
+ TAILQ_CONCAT(&lqueue, &so->sol_comp, so_list);
+
+ so->sol_qlen = so->sol_incqlen = 0;
+
+ TAILQ_FOREACH(sp, &lqueue, so_list) {
+ SOCK_LOCK(sp);
+ sp->so_qstate = SQ_NONE;
+ sp->so_listen = NULL;
+ SOCK_UNLOCK(sp);
+ /* Guaranteed not to be the last. */
+ refcount_release(&so->so_count);
}
- while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
- TAILQ_REMOVE(&so->so_comp, sp, so_list);
- so->so_qlen--;
- sp->so_qstate &= ~SQ_COMP;
- sp->so_head = NULL;
- ACCEPT_UNLOCK();
- soabort(sp);
- ACCEPT_LOCK();
- }
- KASSERT((TAILQ_EMPTY(&so->so_comp)),
- ("%s: so_comp populated", __func__));
- KASSERT((TAILQ_EMPTY(&so->so_incomp)),
- ("%s: so_incomp populated", __func__));
}
- SOCK_LOCK(so);
KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
so->so_state |= SS_NOFDREF;
- sorele(so); /* NB: Returns with ACCEPT_UNLOCK(). */
+ sorele(so);
+ if (listening) {
+ struct socket *sp;
+
+ TAILQ_FOREACH(sp, &lqueue, so_list) {
+ SOCK_LOCK(sp);
+ if (sp->so_count == 0) {
+ SOCK_UNLOCK(sp);
+ soabort(sp);
+ } else
+ /* sp is now in sofree() */
+ SOCK_UNLOCK(sp);
+ }
+ }
CURVNET_RESTORE();
return (error);
}
@@ -951,13 +1164,11 @@
KASSERT(so->so_count == 0, ("soabort: so_count"));
KASSERT((so->so_state & SS_PROTOREF) == 0, ("soabort: SS_PROTOREF"));
KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF"));
- KASSERT((so->so_state & SQ_COMP) == 0, ("soabort: SQ_COMP"));
- KASSERT((so->so_state & SQ_INCOMP) == 0, ("soabort: SQ_INCOMP"));
+ KASSERT(so->so_qstate == SQ_NONE, ("soabort: !SQ_NONE"));
VNET_SO_ASSERT(so);
if (so->so_proto->pr_usrreqs->pru_abort != NULL)
(*so->so_proto->pr_usrreqs->pru_abort)(so);
- ACCEPT_LOCK();
SOCK_LOCK(so);
sofree(so);
}
@@ -2892,15 +3103,15 @@
break;
case SO_LISTENQLIMIT:
- optval = so->so_qlimit;
+ optval = SOLISTENING(so) ? so->sol_qlimit : 0;
goto integer;
case SO_LISTENQLEN:
- optval = so->so_qlen;
+ optval = SOLISTENING(so) ? so->sol_qlen : 0;
goto integer;
case SO_LISTENINCQLEN:
- optval = so->so_incqlen;
+ optval = SOLISTENING(so) ? so->sol_incqlen : 0;
goto integer;
case SO_TS_CLOCK:
@@ -3047,7 +3258,7 @@
if (so->so_sigio != NULL)
pgsigio(&so->so_sigio, SIGURG, 0);
- selwakeuppri(&so->so_rcv.sb_sel, PSOCK);
+ selwakeuppri(&so->so_rdsel, PSOCK);
}
int
@@ -3067,44 +3278,54 @@
sopoll_generic(struct socket *so, int events, struct ucred *active_cred,
struct thread *td)
{
- int revents = 0;
+ int revents;
- SOCKBUF_LOCK(&so->so_snd);
- SOCKBUF_LOCK(&so->so_rcv);
- if (events & (POLLIN | POLLRDNORM))
- if (soreadabledata(so))
- revents |= events & (POLLIN | POLLRDNORM);
-
- if (events & (POLLOUT | POLLWRNORM))
- if (sowriteable(so))
- revents |= events & (POLLOUT | POLLWRNORM);
-
- if (events & (POLLPRI | POLLRDBAND))
- if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK))
- revents |= events & (POLLPRI | POLLRDBAND);
-
- if ((events & POLLINIGNEOF) == 0) {
- if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
- revents |= events & (POLLIN | POLLRDNORM);
- if (so->so_snd.sb_state & SBS_CANTSENDMORE)
- revents |= POLLHUP;
+ SOCK_LOCK(so);
+ if (SOLISTENING(so)) {
+ if (!(events & (POLLIN | POLLRDNORM)))
+ revents = 0;
+ else if (!TAILQ_EMPTY(&so->sol_comp))
+ revents = events & (POLLIN | POLLRDNORM);
+ else {
+ selrecord(td, &so->so_rdsel);
+ revents = 0;
}
- }
-
- if (revents == 0) {
- if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
- selrecord(td, &so->so_rcv.sb_sel);
- so->so_rcv.sb_flags |= SB_SEL;
+ } else {
+ revents = 0;
+ SOCKBUF_LOCK(&so->so_snd);
+ SOCKBUF_LOCK(&so->so_rcv);
+ if (events & (POLLIN | POLLRDNORM))
+ if (soreadabledata(so))
+ revents |= events & (POLLIN | POLLRDNORM);
+ if (events & (POLLOUT | POLLWRNORM))
+ if (sowriteable(so))
+ revents |= events & (POLLOUT | POLLWRNORM);
+ if (events & (POLLPRI | POLLRDBAND))
+ if (so->so_oobmark ||
+ (so->so_rcv.sb_state & SBS_RCVATMARK))
+ revents |= events & (POLLPRI | POLLRDBAND);
+ if ((events & POLLINIGNEOF) == 0) {
+ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+ revents |= events & (POLLIN | POLLRDNORM);
+ if (so->so_snd.sb_state & SBS_CANTSENDMORE)
+ revents |= POLLHUP;
+ }
}
-
- if (events & (POLLOUT | POLLWRNORM)) {
- selrecord(td, &so->so_snd.sb_sel);
- so->so_snd.sb_flags |= SB_SEL;
+ if (revents == 0) {
+ if (events &
+ (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
+ selrecord(td, &so->so_rdsel);
+ so->so_rcv.sb_flags |= SB_SEL;
+ }
+ if (events & (POLLOUT | POLLWRNORM)) {
+ selrecord(td, &so->so_wrsel);
+ so->so_snd.sb_flags |= SB_SEL;
+ }
}
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ SOCKBUF_UNLOCK(&so->so_snd);
}
-
- SOCKBUF_UNLOCK(&so->so_rcv);
- SOCKBUF_UNLOCK(&so->so_snd);
+ SOCK_UNLOCK(so);
return (revents);
}
@@ -3113,28 +3334,38 @@
{
struct socket *so = kn->kn_fp->f_data;
struct sockbuf *sb;
+ struct knlist *knl;
switch (kn->kn_filter) {
case EVFILT_READ:
kn->kn_fop = &soread_filtops;
+ knl = &so->so_rdsel.si_note;
sb = &so->so_rcv;
break;
case EVFILT_WRITE:
kn->kn_fop = &sowrite_filtops;
+ knl = &so->so_wrsel.si_note;
sb = &so->so_snd;
break;
case EVFILT_EMPTY:
kn->kn_fop = &soempty_filtops;
+ knl = &so->so_wrsel.si_note;
sb = &so->so_snd;
break;
default:
return (EINVAL);
}
- SOCKBUF_LOCK(sb);
- knlist_add(&sb->sb_sel.si_note, kn, 1);
- sb->sb_flags |= SB_KNOTE;
- SOCKBUF_UNLOCK(sb);
+ SOCK_LOCK(so);
+ if (SOLISTENING(so)) {
+ knlist_add(knl, kn, 1);
+ } else {
+ SOCKBUF_LOCK(sb);
+ knlist_add(knl, kn, 1);
+ sb->sb_flags |= SB_KNOTE;
+ SOCKBUF_UNLOCK(sb);
+ }
+ SOCK_UNLOCK(so);
return (0);
}
@@ -3313,11 +3544,11 @@
{
struct socket *so = kn->kn_fp->f_data;
- SOCKBUF_LOCK(&so->so_rcv);
- knlist_remove(&so->so_rcv.sb_sel.si_note, kn, 1);
- if (knlist_empty(&so->so_rcv.sb_sel.si_note))
+ so_rdknl_lock(so);
+ knlist_remove(&so->so_rdsel.si_note, kn, 1);
+ if (!SOLISTENING(so) && knlist_empty(&so->so_rdsel.si_note))
so->so_rcv.sb_flags &= ~SB_KNOTE;
- SOCKBUF_UNLOCK(&so->so_rcv);
+ so_rdknl_unlock(so);
}
/*ARGSUSED*/
@@ -3327,11 +3558,13 @@
struct socket *so;
so = kn->kn_fp->f_data;
- if (so->so_options & SO_ACCEPTCONN) {
- kn->kn_data = so->so_qlen;
- return (!TAILQ_EMPTY(&so->so_comp));
+ if (SOLISTENING(so)) {
+ SOCK_LOCK_ASSERT(so);
+ kn->kn_data = so->sol_qlen;
+ return (!TAILQ_EMPTY(&so->sol_comp));
}
+
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
kn->kn_data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl;
@@ -3357,11 +3590,11 @@
{
struct socket *so = kn->kn_fp->f_data;
- SOCKBUF_LOCK(&so->so_snd);
- knlist_remove(&so->so_snd.sb_sel.si_note, kn, 1);
- if (knlist_empty(&so->so_snd.sb_sel.si_note))
+ so_wrknl_lock(so);
+ knlist_remove(&so->so_wrsel.si_note, kn, 1);
+ if (!SOLISTENING(so) && knlist_empty(&so->so_wrsel.si_note))
so->so_snd.sb_flags &= ~SB_KNOTE;
- SOCKBUF_UNLOCK(&so->so_snd);
+ so_wrknl_unlock(so);
}
/*ARGSUSED*/
@@ -3371,6 +3604,10 @@
struct socket *so;
so = kn->kn_fp->f_data;
+
+ if (SOLISTENING(so))
+ return (0);
+
SOCKBUF_LOCK_ASSERT(&so->so_snd);
kn->kn_data = sbspace(&so->so_snd);
@@ -3397,6 +3634,10 @@
struct socket *so;
so = kn->kn_fp->f_data;
+
+ if (SOLISTENING(so))
+ return (1);
+
SOCKBUF_LOCK_ASSERT(&so->so_snd);
kn->kn_data = sbused(&so->so_snd);
@@ -3465,42 +3706,52 @@
struct socket *head;
int ret;
+ /*
+ * XXXGL: this is the only place where we acquire socket locks
+ * in reverse order: first child, then listening socket. To
+ * avoid possible LOR, use try semantics.
+ */
restart:
- ACCEPT_LOCK();
SOCK_LOCK(so);
+ if ((head = so->so_listen) != NULL &&
+ __predict_false(SOLISTEN_TRYLOCK(head) == 0)) {
+ SOCK_UNLOCK(so);
+ goto restart;
+ }
so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
so->so_state |= SS_ISCONNECTED;
- head = so->so_head;
- if (head != NULL && (so->so_qstate & SQ_INCOMP)) {
+ if (head != NULL && (so->so_qstate == SQ_INCOMP)) {
+again:
if ((so->so_options & SO_ACCEPTFILTER) == 0) {
+ TAILQ_REMOVE(&head->sol_incomp, so, so_list);
+ head->sol_incqlen--;
+ TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list);
+ head->sol_qlen++;
+ so->so_qstate = SQ_COMP;
SOCK_UNLOCK(so);
- TAILQ_REMOVE(&head->so_incomp, so, so_list);
- head->so_incqlen--;
- so->so_qstate &= ~SQ_INCOMP;
- TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
- head->so_qlen++;
- so->so_qstate |= SQ_COMP;
- ACCEPT_UNLOCK();
- sorwakeup(head);
- wakeup_one(&head->so_timeo);
+ solisten_wakeup(head); /* unlocks */
} else {
- ACCEPT_UNLOCK();
+ SOCKBUF_LOCK(&so->so_rcv);
soupcall_set(so, SO_RCV,
- head->so_accf->so_accept_filter->accf_callback,
- head->so_accf->so_accept_filter_arg);
+ head->sol_accept_filter->accf_callback,
+ head->sol_accept_filter_arg);
so->so_options &= ~SO_ACCEPTFILTER;
- ret = head->so_accf->so_accept_filter->accf_callback(so,
- head->so_accf->so_accept_filter_arg, M_NOWAIT);
- if (ret == SU_ISCONNECTED)
+ ret = head->sol_accept_filter->accf_callback(so,
+ head->sol_accept_filter_arg, M_NOWAIT);
+ if (ret == SU_ISCONNECTED) {
soupcall_clear(so, SO_RCV);
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ goto again;
+ }
+ SOCKBUF_UNLOCK(&so->so_rcv);
SOCK_UNLOCK(so);
- if (ret == SU_ISCONNECTED)
- goto restart;
+ SOLISTEN_UNLOCK(head);
}
return;
}
+ if (head != NULL)
+ SOLISTEN_UNLOCK(head);
SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
wakeup(&so->so_timeo);
sorwakeup(so);
sowwakeup(so);
@@ -3510,16 +3761,17 @@
soisdisconnecting(struct socket *so)
{
- /*
- * Note: This code assumes that SOCK_LOCK(so) and
- * SOCKBUF_LOCK(&so->so_rcv) are the same.
- */
- SOCKBUF_LOCK(&so->so_rcv);
+ SOCK_LOCK(so);
so->so_state &= ~SS_ISCONNECTING;
so->so_state |= SS_ISDISCONNECTING;
- socantrcvmore_locked(so);
- SOCKBUF_LOCK(&so->so_snd);
- socantsendmore_locked(so);
+
+ if (!SOLISTENING(so)) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ socantrcvmore_locked(so);
+ SOCKBUF_LOCK(&so->so_snd);
+ socantsendmore_locked(so);
+ }
+ SOCK_UNLOCK(so);
wakeup(&so->so_timeo);
}
@@ -3527,17 +3779,18 @@
soisdisconnected(struct socket *so)
{
- /*
- * Note: This code assumes that SOCK_LOCK(so) and
- * SOCKBUF_LOCK(&so->so_rcv) are the same.
- */
- SOCKBUF_LOCK(&so->so_rcv);
+ SOCK_LOCK(so);
so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
so->so_state |= SS_ISDISCONNECTED;
- socantrcvmore_locked(so);
- SOCKBUF_LOCK(&so->so_snd);
- sbdrop_locked(&so->so_snd, sbused(&so->so_snd));
- socantsendmore_locked(so);
+
+ if (!SOLISTENING(so)) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ socantrcvmore_locked(so);
+ SOCKBUF_LOCK(&so->so_snd);
+ sbdrop_locked(&so->so_snd, sbused(&so->so_snd));
+ socantsendmore_locked(so);
+ }
+ SOCK_UNLOCK(so);
wakeup(&so->so_timeo);
}
@@ -3563,6 +3816,8 @@
{
struct sockbuf *sb;
+ KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so));
+
switch (which) {
case SO_RCV:
sb = &so->so_rcv;
@@ -3584,6 +3839,8 @@
{
struct sockbuf *sb;
+ KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so));
+
switch (which) {
case SO_RCV:
sb = &so->so_rcv;
@@ -3595,12 +3852,110 @@
panic("soupcall_clear: bad which");
}
SOCKBUF_LOCK_ASSERT(sb);
- KASSERT(sb->sb_upcall != NULL, ("soupcall_clear: no upcall to clear"));
+ KASSERT(sb->sb_upcall != NULL,
+ ("%s: so %p no upcall to clear", __func__, so));
sb->sb_upcall = NULL;
sb->sb_upcallarg = NULL;
sb->sb_flags &= ~SB_UPCALL;
}
+void
+solisten_upcall_set(struct socket *so, so_upcall_t func, void *arg)
+{
+
+ SOLISTEN_LOCK_ASSERT(so);
+ so->sol_upcall = func;
+ so->sol_upcallarg = arg;
+}
+
+static void
+so_rdknl_lock(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_LOCK(so);
+ else
+ SOCKBUF_LOCK(&so->so_rcv);
+}
+
+static void
+so_rdknl_unlock(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_UNLOCK(so);
+ else
+ SOCKBUF_UNLOCK(&so->so_rcv);
+}
+
+static void
+so_rdknl_assert_locked(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_LOCK_ASSERT(so);
+ else
+ SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+}
+
+static void
+so_rdknl_assert_unlocked(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_UNLOCK_ASSERT(so);
+ else
+ SOCKBUF_UNLOCK_ASSERT(&so->so_rcv);
+}
+
+static void
+so_wrknl_lock(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_LOCK(so);
+ else
+ SOCKBUF_LOCK(&so->so_snd);
+}
+
+static void
+so_wrknl_unlock(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_UNLOCK(so);
+ else
+ SOCKBUF_UNLOCK(&so->so_snd);
+}
+
+static void
+so_wrknl_assert_locked(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_LOCK_ASSERT(so);
+ else
+ SOCKBUF_LOCK_ASSERT(&so->so_snd);
+}
+
+static void
+so_wrknl_assert_unlocked(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_UNLOCK_ASSERT(so);
+ else
+ SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
+}
+
/*
* Create an external-format (``xsocket'') structure using the information in
* the kernel-format socket structure pointed to by so. This is done to
@@ -3622,32 +3977,24 @@
xso->so_pcb = so->so_pcb;
xso->xso_protocol = so->so_proto->pr_protocol;
xso->xso_family = so->so_proto->pr_domain->dom_family;
- xso->so_qlen = so->so_qlen;
- xso->so_incqlen = so->so_incqlen;
- xso->so_qlimit = so->so_qlimit;
xso->so_timeo = so->so_timeo;
xso->so_error = so->so_error;
- xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
- xso->so_oobmark = so->so_oobmark;
- sbtoxsockbuf(&so->so_snd, &xso->so_snd);
- sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
xso->so_uid = so->so_cred->cr_uid;
-}
-
-
-/*
- * Socket accessor functions to provide external consumers with
- * a safe interface to socket state
- *
- */
-
-void
-so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *),
- void *arg)
-{
-
- TAILQ_FOREACH(so, &so->so_comp, so_list)
- func(so, arg);
+ xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
+ if (SOLISTENING(so)) {
+ xso->so_qlen = so->sol_qlen;
+ xso->so_incqlen = so->sol_incqlen;
+ xso->so_qlimit = so->sol_qlimit;
+ xso->so_oobmark = 0;
+ bzero(&xso->so_snd, sizeof(xso->so_snd));
+ bzero(&xso->so_rcv, sizeof(xso->so_rcv));
+ } else {
+ xso->so_state |= so->so_qstate;
+ xso->so_qlen = xso->so_incqlen = xso->so_qlimit = 0;
+ xso->so_oobmark = so->so_oobmark;
+ sbtoxsockbuf(&so->so_snd, &xso->so_snd);
+ sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
+ }
}
struct sockbuf *
Index: head/sys/kern/uipc_syscalls.c
===================================================================
--- head/sys/kern/uipc_syscalls.c
+++ head/sys/kern/uipc_syscalls.c
@@ -68,13 +68,6 @@
#include <security/audit/audit.h>
#include <security/mac/mac_framework.h>
-/*
- * Flags for accept1() and kern_accept4(), in addition to SOCK_CLOEXEC
- * and SOCK_NONBLOCK.
- */
-#define ACCEPT4_INHERIT 0x1
-#define ACCEPT4_COMPAT 0x2
-
static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
@@ -350,59 +343,22 @@
(flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0, &fcaps);
if (error != 0)
goto done;
- ACCEPT_LOCK();
- if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
- ACCEPT_UNLOCK();
- error = EWOULDBLOCK;
+ SOCK_LOCK(head);
+ if (!SOLISTENING(head)) {
+ SOCK_UNLOCK(head);
+ error = EINVAL;
goto noconnection;
}
- while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
- if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
- head->so_error = ECONNABORTED;
- break;
- }
- error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
- "accept", 0);
- if (error != 0) {
- ACCEPT_UNLOCK();
- goto noconnection;
- }
- }
- if (head->so_error) {
- error = head->so_error;
- head->so_error = 0;
- ACCEPT_UNLOCK();
+
+ error = solisten_dequeue(head, &so, flags);
+ if (error != 0)
goto noconnection;
- }
- so = TAILQ_FIRST(&head->so_comp);
- KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
- KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
- /*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- */
- SOCK_LOCK(so); /* soref() and so_state update */
- soref(so); /* file descriptor reference */
-
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- if (flags & ACCEPT4_INHERIT)
- so->so_state |= (head->so_state & SS_NBIO);
- else
- so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
-
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
-
/* An extra reference on `nfp' has been held for us by falloc(). */
td->td_retval[0] = fd;
- /* connection has been removed from the listen queue */
- KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
+ /* Connection has been removed from the listen queue. */
+ KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0);
if (flags & ACCEPT4_INHERIT) {
pgid = fgetown(&head->so_sigio);
@@ -420,7 +376,6 @@
(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
tmp = fflag & FASYNC;
(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
- sa = NULL;
error = soaccept(so, &sa);
if (error != 0)
goto noconnection;
@@ -558,7 +513,7 @@
}
SOCK_LOCK(so);
while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
- error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
+ error = msleep(&so->so_timeo, &so->so_lock, PSOCK | PCATCH,
"connec", 0);
if (error != 0) {
if (error == EINTR || error == ERESTART)
Index: head/sys/kern/uipc_usrreq.c
===================================================================
--- head/sys/kern/uipc_usrreq.c
+++ head/sys/kern/uipc_usrreq.c
@@ -189,10 +189,9 @@
/*
* Locking and synchronization:
*
- * Three types of locks exit in the local domain socket implementation: a
- * global list mutex, a global linkage rwlock, and per-unpcb mutexes. Of the
- * global locks, the list lock protects the socket count, global generation
- * number, and stream/datagram global lists. The linkage lock protects the
+ * Two types of locks exist in the local domain socket implementation: a
+ * a global linkage rwlock and per-unpcb mutexes. The linkage lock protects
+ * the socket count, global generation number, stream/datagram global lists and
* interconnection of unpcbs, the v_socket and unp_vnode pointers, and can be
* held exclusively over the acquisition of multiple unpcb locks to prevent
* deadlock.
@@ -233,7 +232,6 @@
* to perform namei() and other file system operations.
*/
static struct rwlock unp_link_rwlock;
-static struct mtx unp_list_lock;
static struct mtx unp_defers_lock;
#define UNP_LINK_LOCK_INIT() rw_init(&unp_link_rwlock, \
@@ -250,12 +248,8 @@
#define UNP_LINK_WUNLOCK() rw_wunlock(&unp_link_rwlock)
#define UNP_LINK_WLOCK_ASSERT() rw_assert(&unp_link_rwlock, \
RA_WLOCKED)
+#define UNP_LINK_WOWNED() rw_wowned(&unp_link_rwlock)
-#define UNP_LIST_LOCK_INIT() mtx_init(&unp_list_lock, \
- "unp_list_lock", NULL, MTX_DEF)
-#define UNP_LIST_LOCK() mtx_lock(&unp_list_lock)
-#define UNP_LIST_UNLOCK() mtx_unlock(&unp_list_lock)
-
#define UNP_DEFERRED_LOCK_INIT() mtx_init(&unp_defers_lock, \
"unp_defer", NULL, MTX_DEF)
#define UNP_DEFERRED_LOCK() mtx_lock(&unp_defers_lock)
@@ -396,6 +390,7 @@
u_long sendspace, recvspace;
struct unpcb *unp;
int error;
+ bool locked;
KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL"));
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
@@ -430,10 +425,12 @@
unp->unp_socket = so;
so->so_pcb = unp;
unp->unp_refcount = 1;
- if (so->so_head != NULL)
+ if (so->so_listen != NULL)
unp->unp_flags |= UNP_NASCENT;
- UNP_LIST_LOCK();
+ if ((locked = UNP_LINK_WOWNED()) == false)
+ UNP_LINK_WLOCK();
+
unp->unp_gencnt = ++unp_gencnt;
unp_count++;
switch (so->so_type) {
@@ -452,8 +449,10 @@
default:
panic("uipc_attach");
}
- UNP_LIST_UNLOCK();
+ if (locked == false)
+ UNP_LINK_WUNLOCK();
+
return (0);
}
@@ -607,6 +606,7 @@
uipc_close(struct socket *so)
{
struct unpcb *unp, *unp2;
+ struct vnode *vp = NULL;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
@@ -619,8 +619,14 @@
unp_disconnect(unp, unp2);
UNP_PCB_UNLOCK(unp2);
}
+ if (SOLISTENING(so) && ((vp = unp->unp_vnode) != NULL)) {
+ VOP_UNP_DETACH(vp);
+ unp->unp_vnode = NULL;
+ }
UNP_PCB_UNLOCK(unp);
UNP_LINK_WUNLOCK();
+ if (vp)
+ vrele(vp);
}
static int
@@ -657,18 +663,13 @@
vp = NULL;
local_unp_rights = 0;
- UNP_LIST_LOCK();
+ UNP_LINK_WLOCK();
LIST_REMOVE(unp, unp_link);
unp->unp_gencnt = ++unp_gencnt;
--unp_count;
- UNP_LIST_UNLOCK();
-
- if ((unp->unp_flags & UNP_NASCENT) != 0) {
- UNP_PCB_LOCK(unp);
- goto teardown;
- }
- UNP_LINK_WLOCK();
UNP_PCB_LOCK(unp);
+ if ((unp->unp_flags & UNP_NASCENT) != 0)
+ goto teardown;
if ((vp = unp->unp_vnode) != NULL) {
VOP_UNP_DETACH(vp);
@@ -693,8 +694,8 @@
UNP_PCB_UNLOCK(ref);
}
local_unp_rights = unp_rights;
- UNP_LINK_WUNLOCK();
teardown:
+ UNP_LINK_WUNLOCK();
unp->unp_socket->so_pcb = NULL;
saved_unp_addr = unp->unp_addr;
unp->unp_addr = NULL;
@@ -1315,7 +1316,7 @@
{
struct sockaddr_un *soun = (struct sockaddr_un *)nam;
struct vnode *vp;
- struct socket *so2, *so3;
+ struct socket *so2;
struct unpcb *unp, *unp2, *unp3;
struct nameidata nd;
char buf[SOCK_MAXADDRLEN];
@@ -1392,22 +1393,20 @@
error = EPROTOTYPE;
goto bad2;
}
+ UNP_PCB_LOCK(unp);
+ UNP_PCB_LOCK(unp2);
if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
if (so2->so_options & SO_ACCEPTCONN) {
CURVNET_SET(so2->so_vnet);
- so3 = sonewconn(so2, 0);
+ so2 = sonewconn(so2, 0);
CURVNET_RESTORE();
} else
- so3 = NULL;
- if (so3 == NULL) {
+ so2 = NULL;
+ if (so2 == NULL) {
error = ECONNREFUSED;
- goto bad2;
+ goto bad3;
}
- unp = sotounpcb(so);
- unp2 = sotounpcb(so2);
- unp3 = sotounpcb(so3);
- UNP_PCB_LOCK(unp);
- UNP_PCB_LOCK(unp2);
+ unp3 = sotounpcb(so2);
UNP_PCB_LOCK(unp3);
if (unp2->unp_addr != NULL) {
bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
@@ -1433,23 +1432,19 @@
unp->unp_flags |= UNP_HAVEPC;
if (unp2->unp_flags & UNP_WANTCRED)
unp3->unp_flags |= UNP_WANTCRED;
- UNP_PCB_UNLOCK(unp3);
UNP_PCB_UNLOCK(unp2);
- UNP_PCB_UNLOCK(unp);
+ unp2 = unp3;
#ifdef MAC
- mac_socketpeer_set_from_socket(so, so3);
- mac_socketpeer_set_from_socket(so3, so);
+ mac_socketpeer_set_from_socket(so, so2);
+ mac_socketpeer_set_from_socket(so2, so);
#endif
-
- so2 = so3;
}
- unp = sotounpcb(so);
- KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
- unp2 = sotounpcb(so2);
- KASSERT(unp2 != NULL, ("unp_connect: unp2 == NULL"));
- UNP_PCB_LOCK(unp);
- UNP_PCB_LOCK(unp2);
+
+ KASSERT(unp2 != NULL && so2 != NULL && unp2->unp_socket == so2 &&
+ sotounpcb(so2) == unp2,
+ ("%s: unp2 %p so2 %p", __func__, unp2, so2));
error = unp_connect2(so, so2, PRU_CONNECT);
+bad3:
UNP_PCB_UNLOCK(unp2);
UNP_PCB_UNLOCK(unp);
bad2:
@@ -1591,10 +1586,10 @@
* OK, now we're committed to doing something.
*/
xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK);
- UNP_LIST_LOCK();
+ UNP_LINK_RLOCK();
gencnt = unp_gencnt;
n = unp_count;
- UNP_LIST_UNLOCK();
+ UNP_LINK_RUNLOCK();
xug->xug_len = sizeof *xug;
xug->xug_count = n;
@@ -1608,7 +1603,7 @@
unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
- UNP_LIST_LOCK();
+ UNP_LINK_RLOCK();
for (unp = LIST_FIRST(head), i = 0; unp && i < n;
unp = LIST_NEXT(unp, unp_link)) {
UNP_PCB_LOCK(unp);
@@ -1623,7 +1618,7 @@
}
UNP_PCB_UNLOCK(unp);
}
- UNP_LIST_UNLOCK();
+ UNP_LINK_RUNLOCK();
n = i; /* In case we lost some during malloc. */
error = 0;
@@ -1881,7 +1876,6 @@
TIMEOUT_TASK_INIT(taskqueue_thread, &unp_gc_task, 0, unp_gc, NULL);
TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL);
UNP_LINK_LOCK_INIT();
- UNP_LIST_LOCK_INIT();
UNP_DEFERRED_LOCK_INIT();
}
@@ -2232,8 +2226,7 @@
static void
unp_gc_process(struct unpcb *unp)
{
- struct socket *soa;
- struct socket *so;
+ struct socket *so, *soa;
struct file *fp;
/* Already processed. */
@@ -2253,28 +2246,30 @@
return;
}
- /*
- * Mark all sockets we reference with RIGHTS.
- */
so = unp->unp_socket;
- if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
- SOCKBUF_LOCK(&so->so_rcv);
- unp_scan(so->so_rcv.sb_mb, unp_accessable);
- SOCKBUF_UNLOCK(&so->so_rcv);
+ SOCK_LOCK(so);
+ if (SOLISTENING(so)) {
+ /*
+ * Mark all sockets in our accept queue.
+ */
+ TAILQ_FOREACH(soa, &so->sol_comp, so_list) {
+ if (sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS)
+ continue;
+ SOCKBUF_LOCK(&soa->so_rcv);
+ unp_scan(soa->so_rcv.sb_mb, unp_accessable);
+ SOCKBUF_UNLOCK(&soa->so_rcv);
+ }
+ } else {
+ /*
+ * Mark all sockets we reference with RIGHTS.
+ */
+ if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ unp_scan(so->so_rcv.sb_mb, unp_accessable);
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ }
}
-
- /*
- * Mark all sockets in our accept queue.
- */
- ACCEPT_LOCK();
- TAILQ_FOREACH(soa, &so->so_comp, so_list) {
- if ((sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS) != 0)
- continue;
- SOCKBUF_LOCK(&soa->so_rcv);
- unp_scan(soa->so_rcv.sb_mb, unp_accessable);
- SOCKBUF_UNLOCK(&soa->so_rcv);
- }
- ACCEPT_UNLOCK();
+ SOCK_UNLOCK(so);
unp->unp_gcflag |= UNPGC_SCANNED;
}
@@ -2297,7 +2292,7 @@
int i, total;
unp_taskcount++;
- UNP_LIST_LOCK();
+ UNP_LINK_RLOCK();
/*
* First clear all gc flags from previous runs, apart from
* UNPGC_IGNORE_RIGHTS.
@@ -2320,7 +2315,7 @@
LIST_FOREACH(unp, *head, unp_link)
unp_gc_process(unp);
} while (unp_marked);
- UNP_LIST_UNLOCK();
+ UNP_LINK_RUNLOCK();
if (unp_unreachable == 0)
return;
@@ -2335,7 +2330,6 @@
* as as unreachable and store them locally.
*/
UNP_LINK_RLOCK();
- UNP_LIST_LOCK();
for (total = 0, head = heads; *head != NULL; head++)
LIST_FOREACH(unp, *head, unp_link)
if ((unp->unp_gcflag & UNPGC_DEAD) != 0) {
@@ -2348,7 +2342,6 @@
KASSERT(total <= unp_unreachable,
("unp_gc: incorrect unreachable count."));
}
- UNP_LIST_UNLOCK();
UNP_LINK_RUNLOCK();
/*
@@ -2391,10 +2384,11 @@
struct unpcb *unp;
unp = sotounpcb(so);
- UNP_LIST_LOCK();
+ UNP_LINK_WLOCK();
unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
- UNP_LIST_UNLOCK();
- unp_dispose_mbuf(so->so_rcv.sb_mb);
+ UNP_LINK_WUNLOCK();
+ if (!SOLISTENING(so))
+ unp_dispose_mbuf(so->so_rcv.sb_mb);
}
static void
Index: head/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
===================================================================
--- head/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
+++ head/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
@@ -614,21 +614,13 @@
pcb = ng_btsocket_l2cap_pcb_by_addr(&rt->src, ip->psm);
if (pcb != NULL) {
- struct socket *so1 = NULL;
+ struct socket *so1;
mtx_lock(&pcb->pcb_mtx);
- /*
- * First check the pending connections queue and if we have
- * space then create new socket and set proper source address.
- */
-
- if (pcb->so->so_qlen <= pcb->so->so_qlimit) {
- CURVNET_SET(pcb->so->so_vnet);
- so1 = sonewconn(pcb->so, 0);
- CURVNET_RESTORE();
- }
-
+ CURVNET_SET(pcb->so->so_vnet);
+ so1 = sonewconn(pcb->so, 0);
+ CURVNET_RESTORE();
if (so1 == NULL) {
result = NG_L2CAP_NO_RESOURCES;
goto respond;
Index: head/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
===================================================================
--- head/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
+++ head/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
@@ -1149,7 +1149,7 @@
{
ng_btsocket_rfcomm_pcb_p pcb = NULL, pcb1 = NULL;
ng_btsocket_l2cap_pcb_p l2pcb = NULL;
- struct socket *so1 = NULL;
+ struct socket *so1;
mtx_assert(&s->session_mtx, MA_OWNED);
@@ -1171,11 +1171,9 @@
mtx_lock(&pcb->pcb_mtx);
- if (pcb->so->so_qlen <= pcb->so->so_qlimit) {
- CURVNET_SET(pcb->so->so_vnet);
- so1 = sonewconn(pcb->so, 0);
- CURVNET_RESTORE();
- }
+ CURVNET_SET(pcb->so->so_vnet);
+ so1 = sonewconn(pcb->so, 0);
+ CURVNET_RESTORE();
mtx_unlock(&pcb->pcb_mtx);
@@ -1405,46 +1403,24 @@
static int
ng_btsocket_rfcomm_session_accept(ng_btsocket_rfcomm_session_p s0)
{
- struct socket *l2so = NULL;
+ struct socket *l2so;
struct sockaddr_l2cap *l2sa = NULL;
ng_btsocket_l2cap_pcb_t *l2pcb = NULL;
ng_btsocket_rfcomm_session_p s = NULL;
- int error = 0;
+ int error;
mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED);
mtx_assert(&s0->session_mtx, MA_OWNED);
- /* Check if there is a complete L2CAP connection in the queue */
- if ((error = s0->l2so->so_error) != 0) {
+ SOLISTEN_LOCK(s0->l2so);
+ error = solisten_dequeue(s0->l2so, &l2so, 0);
+ if (error == EWOULDBLOCK)
+ return (error);
+ if (error) {
NG_BTSOCKET_RFCOMM_ERR(
"%s: Could not accept connection on L2CAP socket, error=%d\n", __func__, error);
- s0->l2so->so_error = 0;
-
return (error);
}
-
- ACCEPT_LOCK();
- if (TAILQ_EMPTY(&s0->l2so->so_comp)) {
- ACCEPT_UNLOCK();
- if (s0->l2so->so_rcv.sb_state & SBS_CANTRCVMORE)
- return (ECONNABORTED);
- return (EWOULDBLOCK);
- }
-
- /* Accept incoming L2CAP connection */
- l2so = TAILQ_FIRST(&s0->l2so->so_comp);
- if (l2so == NULL)
- panic("%s: l2so == NULL\n", __func__);
-
- TAILQ_REMOVE(&s0->l2so->so_comp, l2so, so_list);
- s0->l2so->so_qlen --;
- l2so->so_qstate &= ~SQ_COMP;
- l2so->so_head = NULL;
- SOCK_LOCK(l2so);
- soref(l2so);
- l2so->so_state |= SS_NBIO;
- SOCK_UNLOCK(l2so);
- ACCEPT_UNLOCK();
error = soaccept(l2so, (struct sockaddr **) &l2sa);
if (error != 0) {
Index: head/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c
===================================================================
--- head/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c
+++ head/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c
@@ -471,20 +471,13 @@
pcb = ng_btsocket_sco_pcb_by_addr(&rt->src);
if (pcb != NULL) {
- struct socket *so1 = NULL;
+ struct socket *so1;
/* pcb is locked */
- /*
- * First check the pending connections queue and if we have
- * space then create new socket and set proper source address.
- */
-
- if (pcb->so->so_qlen <= pcb->so->so_qlimit) {
- CURVNET_SET(pcb->so->so_vnet);
- so1 = sonewconn(pcb->so, 0);
- CURVNET_RESTORE();
- }
+ CURVNET_SET(pcb->so->so_vnet);
+ so1 = sonewconn(pcb->so, 0);
+ CURVNET_RESTORE();
if (so1 == NULL) {
status = 0x0d; /* Rejected due to limited resources */
Index: head/sys/netgraph/ng_ksocket.c
===================================================================
--- head/sys/netgraph/ng_ksocket.c
+++ head/sys/netgraph/ng_ksocket.c
@@ -153,8 +153,7 @@
};
/* Helper functions */
-static int ng_ksocket_check_accept(priv_p);
-static void ng_ksocket_finish_accept(priv_p);
+static int ng_ksocket_accept(priv_p);
static int ng_ksocket_incoming(struct socket *so, void *arg, int waitflag);
static int ng_ksocket_parse(const struct ng_ksocket_alias *aliases,
const char *s, int family);
@@ -698,6 +697,7 @@
ERROUT(ENXIO);
/* Listen */
+ so->so_state |= SS_NBIO;
error = solisten(so, *((int32_t *)msg->data), td);
break;
}
@@ -716,21 +716,16 @@
if (priv->flags & KSF_ACCEPTING)
ERROUT(EALREADY);
- error = ng_ksocket_check_accept(priv);
- if (error != 0 && error != EWOULDBLOCK)
- ERROUT(error);
-
/*
* If a connection is already complete, take it.
* Otherwise let the upcall function deal with
* the connection when it comes in.
*/
+ error = ng_ksocket_accept(priv);
+ if (error != 0 && error != EWOULDBLOCK)
+ ERROUT(error);
priv->response_token = msg->header.token;
raddr = priv->response_addr = NGI_RETADDR(item);
- if (error == 0) {
- ng_ksocket_finish_accept(priv);
- } else
- priv->flags |= KSF_ACCEPTING;
break;
}
@@ -1068,13 +1063,8 @@
}
/* Check whether a pending accept operation has completed */
- if (priv->flags & KSF_ACCEPTING) {
- error = ng_ksocket_check_accept(priv);
- if (error != EWOULDBLOCK)
- priv->flags &= ~KSF_ACCEPTING;
- if (error == 0)
- ng_ksocket_finish_accept(priv);
- }
+ if (priv->flags & KSF_ACCEPTING)
+ (void )ng_ksocket_accept(priv);
/*
* If we don't have a hook, we must handle data events later. When
@@ -1171,37 +1161,10 @@
}
}
-/*
- * Check for a completed incoming connection and return 0 if one is found.
- * Otherwise return the appropriate error code.
- */
static int
-ng_ksocket_check_accept(priv_p priv)
+ng_ksocket_accept(priv_p priv)
{
struct socket *const head = priv->so;
- int error;
-
- if ((error = head->so_error) != 0) {
- head->so_error = 0;
- return error;
- }
- /* Unlocked read. */
- if (TAILQ_EMPTY(&head->so_comp)) {
- if (head->so_rcv.sb_state & SBS_CANTRCVMORE)
- return ECONNABORTED;
- return EWOULDBLOCK;
- }
- return 0;
-}
-
-/*
- * Handle the first completed incoming connection, assumed to be already
- * on the socket's so_comp queue.
- */
-static void
-ng_ksocket_finish_accept(priv_p priv)
-{
- struct socket *const head = priv->so;
struct socket *so;
struct sockaddr *sa = NULL;
struct ng_mesg *resp;
@@ -1211,24 +1174,16 @@
int len;
int error;
- ACCEPT_LOCK();
- so = TAILQ_FIRST(&head->so_comp);
- if (so == NULL) { /* Should never happen */
- ACCEPT_UNLOCK();
- return;
+ SOLISTEN_LOCK(head);
+ error = solisten_dequeue(head, &so, SOCK_NONBLOCK);
+ if (error == EWOULDBLOCK) {
+ priv->flags |= KSF_ACCEPTING;
+ return (error);
}
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
- SOCK_LOCK(so);
- soref(so);
- so->so_state |= SS_NBIO;
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
+ priv->flags &= ~KSF_ACCEPTING;
+ if (error)
+ return (error);
- /* XXX KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); */
-
soaccept(so, &sa);
len = OFFSETOF(struct ng_ksocket_accept, addr);
@@ -1288,6 +1243,8 @@
out:
if (sa != NULL)
free(sa, M_SONAME);
+
+ return (0);
}
/*
Index: head/sys/netinet/sctp_input.c
===================================================================
--- head/sys/netinet/sctp_input.c
+++ head/sys/netinet/sctp_input.c
@@ -5200,11 +5200,21 @@
* listening responded to a INIT-ACK and then
* closed. We opened and bound.. and are now no
* longer listening.
+ *
+ * XXXGL: notes on checking listen queue length.
+ * 1) SCTP_IS_LISTENING() doesn't necessarily mean
+ * SOLISTENING(), because a listening "UDP type"
+ * socket isn't listening in terms of the socket
+ * layer. It is a normal data flow socket, that
+ * can fork off new connections. Thus, we should
+ * look into sol_qlen only in case we are !UDP.
+ * 2) Checking sol_qlen in general requires locking
+ * the socket, and this code lacks that.
*/
-
if ((stcb == NULL) &&
(!SCTP_IS_LISTENING(inp) ||
- inp->sctp_socket->so_qlen >= inp->sctp_socket->so_qlimit)) {
+ (!(inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) &&
+ inp->sctp_socket->sol_qlen >= inp->sctp_socket->sol_qlimit))) {
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
(SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit))) {
op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
Index: head/sys/netinet/sctp_syscalls.c
===================================================================
--- head/sys/netinet/sctp_syscalls.c
+++ head/sys/netinet/sctp_syscalls.c
@@ -152,29 +152,11 @@
td->td_retval[0] = fd;
CURVNET_SET(head->so_vnet);
- so = sonewconn(head, SS_ISCONNECTED);
+ so = sopeeloff(head);
if (so == NULL) {
error = ENOMEM;
goto noconnection;
}
- /*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- */
- SOCK_LOCK(so);
- soref(so); /* file descriptor reference */
- SOCK_UNLOCK(so);
-
- ACCEPT_LOCK();
-
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- so->so_state |= (head->so_state & SS_NBIO);
- so->so_state &= ~SS_NOFDREF;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
- ACCEPT_UNLOCK();
finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
if (error != 0)
Index: head/sys/netinet/sctp_sysctl.c
===================================================================
--- head/sys/netinet/sctp_sysctl.c
+++ head/sys/netinet/sctp_sysctl.c
@@ -415,12 +415,12 @@
xinpcb.qlen = 0;
xinpcb.maxqlen = 0;
} else {
- xinpcb.qlen = so->so_qlen;
- xinpcb.qlen_old = so->so_qlen > USHRT_MAX ?
- USHRT_MAX : (uint16_t)so->so_qlen;
- xinpcb.maxqlen = so->so_qlimit;
- xinpcb.maxqlen_old = so->so_qlimit > USHRT_MAX ?
- USHRT_MAX : (uint16_t)so->so_qlimit;
+ xinpcb.qlen = so->sol_qlen;
+ xinpcb.qlen_old = so->sol_qlen > USHRT_MAX ?
+ USHRT_MAX : (uint16_t)so->sol_qlen;
+ xinpcb.maxqlen = so->sol_qlimit;
+ xinpcb.maxqlen_old = so->sol_qlimit > USHRT_MAX ?
+ USHRT_MAX : (uint16_t)so->sol_qlimit;
}
SCTP_INP_INCR_REF(inp);
SCTP_INP_RUNLOCK(inp);
Index: head/sys/netinet/sctp_usrreq.c
===================================================================
--- head/sys/netinet/sctp_usrreq.c
+++ head/sys/netinet/sctp_usrreq.c
@@ -7138,19 +7138,12 @@
}
}
SCTP_INP_WLOCK(inp);
- SOCK_LOCK(so);
- /* It appears for 7.0 and on, we must always call this. */
- solisten_proto(so, backlog);
- if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
- /* remove the ACCEPTCONN flag for one-to-many sockets */
- so->so_options &= ~SO_ACCEPTCONN;
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) == 0) {
+ SOCK_LOCK(so);
+ solisten_proto(so, backlog);
+ SOCK_UNLOCK(so);
}
- if (backlog > 0) {
- inp->sctp_flags |= SCTP_PCB_FLAGS_ACCEPTING;
- } else {
- inp->sctp_flags &= ~SCTP_PCB_FLAGS_ACCEPTING;
- }
- SOCK_UNLOCK(so);
+ inp->sctp_flags |= SCTP_PCB_FLAGS_ACCEPTING;
SCTP_INP_WUNLOCK(inp);
return (error);
}
Index: head/sys/netinet/tcp_subr.c
===================================================================
--- head/sys/netinet/tcp_subr.c
+++ head/sys/netinet/tcp_subr.c
@@ -1664,7 +1664,6 @@
("tcp_close: !SS_PROTOREF"));
inp->inp_flags &= ~INP_SOCKREF;
INP_WUNLOCK(inp);
- ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_state &= ~SS_PROTOREF;
sofree(so);
Index: head/sys/netinet/tcp_syncache.c
===================================================================
--- head/sys/netinet/tcp_syncache.c
+++ head/sys/netinet/tcp_syncache.c
@@ -1264,6 +1264,7 @@
* soon as possible.
*/
so = *lsop;
+ KASSERT(SOLISTENING(so), ("%s: %p not listening", __func__, so));
tp = sototcpcb(so);
cred = crhold(so->so_cred);
@@ -1274,7 +1275,7 @@
#endif
ip_ttl = inp->inp_ip_ttl;
ip_tos = inp->inp_ip_tos;
- win = sbspace(&so->so_rcv);
+ win = so->sol_sbrcv_hiwat;
ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE));
#ifdef TCP_RFC7413
@@ -1287,7 +1288,7 @@
* listen queue with bogus TFO connections.
*/
if (atomic_fetchadd_int(tp->t_tfo_pending, 1) <=
- (so->so_qlimit / 2)) {
+ (so->sol_qlimit / 2)) {
int result;
result = tcp_fastopen_check_cookie(inc,
@@ -2115,7 +2116,7 @@
sc->sc_flags |= SCF_WINSCALE;
}
- wnd = sbspace(&lso->so_rcv);
+ wnd = lso->sol_sbrcv_hiwat;
wnd = imax(wnd, 0);
wnd = imin(wnd, TCP_MAXWIN);
sc->sc_wnd = wnd;
Index: head/sys/netinet/tcp_timewait.c
===================================================================
--- head/sys/netinet/tcp_timewait.c
+++ head/sys/netinet/tcp_timewait.c
@@ -352,7 +352,6 @@
("tcp_twstart: !SS_PROTOREF"));
inp->inp_flags &= ~INP_SOCKREF;
INP_WUNLOCK(inp);
- ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_state &= ~SS_PROTOREF;
sofree(so);
@@ -491,7 +490,6 @@
if (inp->inp_flags & INP_SOCKREF) {
inp->inp_flags &= ~INP_SOCKREF;
INP_WUNLOCK(inp);
- ACCEPT_LOCK();
SOCK_LOCK(so);
KASSERT(so->so_state & SS_PROTOREF,
("tcp_twclose: INP_SOCKREF && !SS_PROTOREF"));
Index: head/sys/ofed/drivers/infiniband/core/iwcm.c
===================================================================
--- head/sys/ofed/drivers/infiniband/core/iwcm.c
+++ head/sys/ofed/drivers/infiniband/core/iwcm.c
@@ -416,34 +416,19 @@
{
struct socket *so;
struct sockaddr_in *remote;
+ int error;
- ACCEPT_LOCK();
- so = TAILQ_FIRST(&head->so_comp);
- if (!so) {
- ACCEPT_UNLOCK();
- return NULL;
- }
-
- SOCK_LOCK(so);
- /*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- */
- soref(so);
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
- so->so_state |= SS_NBIO;
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
+ SOLISTEN_LOCK(head);
+ error = solisten_dequeue(head, &so, SOCK_NONBLOCK);
+ if (error == EWOULDBLOCK)
+ return (NULL);
remote = NULL;
soaccept(so, (struct sockaddr **)&remote);
free(remote, M_SONAME);
return so;
}
+
static void
iw_so_event_handler(struct work_struct *_work)
{
@@ -485,18 +470,17 @@
#endif
return;
}
+
static int
iw_so_upcall(struct socket *parent_so, void *arg, int waitflag)
{
struct iwcm_listen_work *work;
- struct socket *so;
struct iw_cm_id *cm_id = arg;
/* check whether iw_so_event_handler() already dequeued this 'so' */
- so = TAILQ_FIRST(&parent_so->so_comp);
- if (!so)
+ if (TAILQ_EMPTY(&parent_so->sol_comp))
return SU_OK;
- work = kzalloc(sizeof(*work), M_NOWAIT);
+ work = kzalloc(sizeof(*work), waitflag);
if (!work)
return -ENOMEM;
work->cm_id = cm_id;
@@ -507,17 +491,21 @@
return SU_OK;
}
-static void
-iw_init_sock(struct iw_cm_id *cm_id)
+static int
+iw_create_listen(struct iw_cm_id *cm_id, int backlog)
{
struct sockopt sopt;
struct socket *so = cm_id->so;
int on = 1;
+ int rc;
- SOCK_LOCK(so);
- soupcall_set(so, SO_RCV, iw_so_upcall, cm_id);
+ rc = -solisten(cm_id->so, backlog, curthread);
+ if (rc != 0)
+ return (rc);
+ SOLISTEN_LOCK(so);
+ solisten_upcall_set(so, iw_so_upcall, cm_id);
so->so_state |= SS_NBIO;
- SOCK_UNLOCK(so);
+ SOLISTEN_UNLOCK(so);
sopt.sopt_dir = SOPT_SET;
sopt.sopt_level = IPPROTO_TCP;
sopt.sopt_name = TCP_NODELAY;
@@ -525,37 +513,18 @@
sopt.sopt_valsize = sizeof(on);
sopt.sopt_td = NULL;
sosetopt(so, &sopt);
-}
-
-static int
-iw_uninit_socket(struct iw_cm_id *cm_id)
-{
- struct socket *so = cm_id->so;
-
- SOCK_LOCK(so);
- soupcall_clear(so, SO_RCV);
- SOCK_UNLOCK(so);
-
return (0);
}
static int
-iw_create_listen(struct iw_cm_id *cm_id, int backlog)
-{
- int rc;
-
- iw_init_sock(cm_id);
- rc = -solisten(cm_id->so, backlog, curthread);
- if (rc != 0)
- iw_uninit_socket(cm_id);
- return (rc);
-}
-
-static int
iw_destroy_listen(struct iw_cm_id *cm_id)
{
+ struct socket *so = cm_id->so;
- return (iw_uninit_socket(cm_id));
+ SOLISTEN_LOCK(so);
+ solisten_upcall_set(so, NULL, NULL);
+ SOLISTEN_UNLOCK(so);
+ return (0);
}
Index: head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
===================================================================
--- head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
+++ head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
@@ -310,7 +310,6 @@
("sdp_closed: !SS_PROTOREF"));
ssk->flags &= ~SDP_SOCKREF;
SDP_WUNLOCK(ssk);
- ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_state &= ~SS_PROTOREF;
sofree(so);
Index: head/sys/rpc/svc_vc.c
===================================================================
--- head/sys/rpc/svc_vc.c
+++ head/sys/rpc/svc_vc.c
@@ -96,6 +96,7 @@
struct sockaddr *raddr);
static int svc_vc_accept(struct socket *head, struct socket **sop);
static int svc_vc_soupcall(struct socket *so, void *arg, int waitflag);
+static int svc_vc_rendezvous_soupcall(struct socket *, void *, int);
static struct xp_ops svc_vc_rendezvous_ops = {
.xp_recv = svc_vc_rendezvous_recv,
@@ -183,10 +184,10 @@
solisten(so, -1, curthread);
- SOCKBUF_LOCK(&so->so_rcv);
+ SOLISTEN_LOCK(so);
xprt->xp_upcallset = 1;
- soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt);
- SOCKBUF_UNLOCK(&so->so_rcv);
+ solisten_upcall_set(so, svc_vc_rendezvous_soupcall, xprt);
+ SOLISTEN_UNLOCK(so);
return (xprt);
@@ -316,9 +317,11 @@
int
svc_vc_accept(struct socket *head, struct socket **sop)
{
- int error = 0;
struct socket *so;
+ int error = 0;
+ short nbio;
+ /* XXXGL: shouldn't that be an assertion? */
if ((head->so_options & SO_ACCEPTCONN) == 0) {
error = EINVAL;
goto done;
@@ -328,38 +331,26 @@
if (error != 0)
goto done;
#endif
- ACCEPT_LOCK();
- if (TAILQ_EMPTY(&head->so_comp)) {
- ACCEPT_UNLOCK();
- error = EWOULDBLOCK;
- goto done;
- }
- so = TAILQ_FIRST(&head->so_comp);
- KASSERT(!(so->so_qstate & SQ_INCOMP), ("svc_vc_accept: so SQ_INCOMP"));
- KASSERT(so->so_qstate & SQ_COMP, ("svc_vc_accept: so not SQ_COMP"));
-
/*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- * XXX might not need soref() since this is simpler than kern_accept.
+ * XXXGL: we want non-blocking semantics. The socket could be a
+ * socket created by kernel as well as socket shared with userland,
+ * so we can't be sure about presense of SS_NBIO. We also shall not
+ * toggle it on the socket, since that may surprise userland. So we
+ * set SS_NBIO only temporarily.
*/
- SOCK_LOCK(so); /* soref() and so_state update */
- soref(so); /* file descriptor reference */
+ SOLISTEN_LOCK(head);
+ nbio = head->so_state & SS_NBIO;
+ head->so_state |= SS_NBIO;
+ error = solisten_dequeue(head, &so, 0);
+ head->so_state &= (nbio & ~SS_NBIO);
+ if (error)
+ goto done;
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- so->so_state |= (head->so_state & SS_NBIO);
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
-
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
-
+ so->so_state |= nbio;
*sop = so;
/* connection has been removed from the listen queue */
- KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
+ KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0);
done:
return (error);
}
@@ -392,21 +383,21 @@
* connection arrives after our call to accept fails
* with EWOULDBLOCK.
*/
- ACCEPT_LOCK();
- if (TAILQ_EMPTY(&xprt->xp_socket->so_comp))
+ SOLISTEN_LOCK(xprt->xp_socket);
+ if (TAILQ_EMPTY(&xprt->xp_socket->sol_comp))
xprt_inactive_self(xprt);
- ACCEPT_UNLOCK();
+ SOLISTEN_UNLOCK(xprt->xp_socket);
sx_xunlock(&xprt->xp_lock);
return (FALSE);
}
if (error) {
- SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
+ SOLISTEN_LOCK(xprt->xp_socket);
if (xprt->xp_upcallset) {
xprt->xp_upcallset = 0;
soupcall_clear(xprt->xp_socket, SO_RCV);
}
- SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
+ SOLISTEN_UNLOCK(xprt->xp_socket);
xprt_inactive_self(xprt);
sx_xunlock(&xprt->xp_lock);
return (FALSE);
@@ -453,12 +444,6 @@
static void
svc_vc_destroy_common(SVCXPRT *xprt)
{
- SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
- if (xprt->xp_upcallset) {
- xprt->xp_upcallset = 0;
- soupcall_clear(xprt->xp_socket, SO_RCV);
- }
- SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
if (xprt->xp_socket)
(void)soclose(xprt->xp_socket);
@@ -472,6 +457,13 @@
svc_vc_rendezvous_destroy(SVCXPRT *xprt)
{
+ SOLISTEN_LOCK(xprt->xp_socket);
+ if (xprt->xp_upcallset) {
+ xprt->xp_upcallset = 0;
+ solisten_upcall_set(xprt->xp_socket, NULL, NULL);
+ }
+ SOLISTEN_UNLOCK(xprt->xp_socket);
+
svc_vc_destroy_common(xprt);
}
@@ -480,6 +472,13 @@
{
struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
+ SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
+ if (xprt->xp_upcallset) {
+ xprt->xp_upcallset = 0;
+ soupcall_clear(xprt->xp_socket, SO_RCV);
+ }
+ SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
+
svc_vc_destroy_common(xprt);
if (cd->mreq)
@@ -954,6 +953,16 @@
SVCXPRT *xprt = (SVCXPRT *) arg;
if (soreadable(xprt->xp_socket))
+ xprt_active(xprt);
+ return (SU_OK);
+}
+
+static int
+svc_vc_rendezvous_soupcall(struct socket *head, void *arg, int waitflag)
+{
+ SVCXPRT *xprt = (SVCXPRT *) arg;
+
+ if (!TAILQ_EMPTY(&head->sol_comp))
xprt_active(xprt);
return (SU_OK);
}
Index: head/sys/sys/sockbuf.h
===================================================================
--- head/sys/sys/sockbuf.h
+++ head/sys/sys/sockbuf.h
@@ -32,7 +32,6 @@
*/
#ifndef _SYS_SOCKBUF_H_
#define _SYS_SOCKBUF_H_
-#include <sys/selinfo.h> /* for struct selinfo */
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/_sx.h>
@@ -64,6 +63,7 @@
struct sockaddr;
struct socket;
struct thread;
+struct selinfo;
struct xsockbuf {
u_int sb_cc;
@@ -84,9 +84,9 @@
* (a) locked by SOCKBUF_LOCK().
*/
struct sockbuf {
- struct selinfo sb_sel; /* process selecting read/write */
- struct mtx sb_mtx; /* sockbuf lock */
- struct sx sb_sx; /* prevent I/O interlacing */
+ struct mtx sb_mtx; /* sockbuf lock */
+ struct sx sb_sx; /* prevent I/O interlacing */
+ struct selinfo *sb_sel; /* process selecting read/write */
short sb_state; /* (a) socket state on sockbuf */
#define sb_startzero sb_mb
struct mbuf *sb_mb; /* (a) the mbuf chain */
Index: head/sys/sys/socket.h
===================================================================
--- head/sys/sys/socket.h
+++ head/sys/sys/socket.h
@@ -111,7 +111,15 @@
*/
#define SOCK_CLOEXEC 0x10000000
#define SOCK_NONBLOCK 0x20000000
-#endif
+#ifdef _KERNEL
+/*
+ * Flags for accept1(), kern_accept4() and solisten_dequeue, in addition
+ * to SOCK_CLOEXEC and SOCK_NONBLOCK.
+ */
+#define ACCEPT4_INHERIT 0x1
+#define ACCEPT4_COMPAT 0x2
+#endif /* _KERNEL */
+#endif /* __BSD_VISIBLE */
/*
* Option flags per-socket.
@@ -704,9 +712,5 @@
void so_lock(struct socket *so);
void so_unlock(struct socket *so);
-void so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg);
-
-#endif
-
-
+#endif /* _KERNEL */
#endif /* !_SYS_SOCKET_H_ */
Index: head/sys/sys/socketvar.h
===================================================================
--- head/sys/sys/socketvar.h
+++ head/sys/sys/socketvar.h
@@ -64,60 +64,35 @@
* Locking key to struct socket:
* (a) constant after allocation, no locking required.
* (b) locked by SOCK_LOCK(so).
- * (c) locked by SOCKBUF_LOCK(&so->so_rcv).
- * (e) locked by ACCEPT_LOCK().
+ * (cr) locked by SOCKBUF_LOCK(&so->so_rcv).
+ * (cs) locked by SOCKBUF_LOCK(&so->so_rcv).
+ * (e) locked by SOLISTEN_LOCK() of corresponding listening socket.
* (f) not locked since integer reads/writes are atomic.
* (g) used only as a sleep/wakeup address, no value.
* (h) locked by global mutex so_global_mtx.
*/
+TAILQ_HEAD(accept_queue, socket);
struct socket {
- int so_count; /* (b) reference count */
+ struct mtx so_lock;
+ volatile u_int so_count; /* (b / refcount) */
+ struct selinfo so_rdsel; /* (b/cr) for so_rcv/so_comp */
+ struct selinfo so_wrsel; /* (b/cs) for so_snd */
short so_type; /* (a) generic type, see socket.h */
- short so_options; /* from socket call, see socket.h */
- short so_linger; /* time to linger while closing */
+ short so_options; /* (b) from socket call, see socket.h */
+ short so_linger; /* time to linger close(2) */
short so_state; /* (b) internal state flags SS_* */
- int so_qstate; /* (e) internal state flags SQ_* */
void *so_pcb; /* protocol control block */
struct vnet *so_vnet; /* (a) network stack instance */
struct protosw *so_proto; /* (a) protocol handle */
-/*
- * Variables for connection queuing.
- * Socket where accepts occur is so_head in all subsidiary sockets.
- * If so_head is 0, socket is not related to an accept.
- * For head socket so_incomp queues partially completed connections,
- * while so_comp is a queue of connections ready to be accepted.
- * If a connection is aborted and it has so_head set, then
- * it has to be pulled out of either so_incomp or so_comp.
- * We allow connections to queue up based on current queue lengths
- * and limit on number of queued connections for this socket.
- */
- struct socket *so_head; /* (e) back pointer to listen socket */
- TAILQ_HEAD(, socket) so_incomp; /* (e) queue of partial unaccepted connections */
- TAILQ_HEAD(, socket) so_comp; /* (e) queue of complete unaccepted connections */
- TAILQ_ENTRY(socket) so_list; /* (e) list of unaccepted connections */
- u_int so_qlen; /* (e) number of unaccepted connections */
- u_int so_incqlen; /* (e) number of unaccepted incomplete
- connections */
- u_int so_qlimit; /* (e) max number queued connections */
short so_timeo; /* (g) connection timeout */
u_short so_error; /* (f) error affecting connection */
struct sigio *so_sigio; /* [sg] information for async I/O or
out of band data (SIGURG) */
- u_long so_oobmark; /* (c) chars to oob mark */
-
- struct sockbuf so_rcv, so_snd;
-
struct ucred *so_cred; /* (a) user credentials */
struct label *so_label; /* (b) MAC label for socket */
- struct label *so_peerlabel; /* (b) cached MAC label for peer */
/* NB: generation count must not be first. */
so_gen_t so_gencnt; /* (h) generation count */
void *so_emuldata; /* (b) private data for emulators */
- struct so_accf {
- struct accept_filter *so_accept_filter;
- void *so_accept_filter_arg; /* saved filter args */
- char *so_accept_filter_str; /* saved user args */
- } *so_accf;
struct osd osd; /* Object Specific extensions */
/*
* so_fibnum, so_user_cookie and friends can be used to attach
@@ -130,41 +105,95 @@
int so_ts_clock; /* type of the clock used for timestamps */
uint32_t so_max_pacing_rate; /* (f) TX rate limit in bytes/s */
+ union {
+ /* Regular (data flow) socket. */
+ struct {
+ /* (cr, cs) Receive and send buffers. */
+ struct sockbuf so_rcv, so_snd;
- void *so_pspare[2]; /* general use */
- int so_ispare[2]; /* general use */
+ /* (e) Our place on accept queue. */
+ TAILQ_ENTRY(socket) so_list;
+ struct socket *so_listen; /* (b) */
+ enum {
+ SQ_NONE = 0,
+ SQ_INCOMP = 0x0800, /* on sol_incomp */
+ SQ_COMP = 0x1000, /* on sol_comp */
+ } so_qstate; /* (b) */
+
+ /* (b) cached MAC label for peer */
+ struct label *so_peerlabel;
+ u_long so_oobmark; /* chars to oob mark */
+ };
+ /*
+ * Listening socket, where accepts occur, is so_listen in all
+ * subsidiary sockets. If so_listen is NULL, socket is not
+ * related to an accept. For a listening socket itself
+ * sol_incomp queues partially completed connections, while
+ * sol_comp is a queue of connections ready to be accepted.
+ * If a connection is aborted and it has so_listen set, then
+ * it has to be pulled out of either sol_incomp or sol_comp.
+ * We allow connections to queue up based on current queue
+ * lengths and limit on number of queued connections for this
+ * socket.
+ */
+ struct {
+ /* (e) queue of partial unaccepted connections */
+ struct accept_queue sol_incomp;
+ /* (e) queue of complete unaccepted connections */
+ struct accept_queue sol_comp;
+ u_int sol_qlen; /* (e) sol_comp length */
+ u_int sol_incqlen; /* (e) sol_incomp length */
+ u_int sol_qlimit; /* (e) queue limit */
+
+ /* accept_filter(9) optional data */
+ struct accept_filter *sol_accept_filter;
+ void *sol_accept_filter_arg; /* saved filter args */
+ char *sol_accept_filter_str; /* saved user args */
+
+ /* Optional upcall, for kernel socket. */
+ so_upcall_t *sol_upcall; /* (e) */
+ void *sol_upcallarg; /* (e) */
+
+ /* Socket buffer parameters, to be copied to
+ * dataflow sockets, accepted from this one. */
+ int sol_sbrcv_lowat;
+ int sol_sbsnd_lowat;
+ u_int sol_sbrcv_hiwat;
+ u_int sol_sbsnd_hiwat;
+ short sol_sbrcv_flags;
+ short sol_sbsnd_flags;
+ sbintime_t sol_sbrcv_timeo;
+ sbintime_t sol_sbsnd_timeo;
+ };
+ };
};
-/*
- * Global accept mutex to serialize access to accept queues and
- * fields associated with multiple sockets. This allows us to
- * avoid defining a lock order between listen and accept sockets
- * until such time as it proves to be a good idea.
- */
-extern struct mtx accept_mtx;
-#define ACCEPT_LOCK_ASSERT() mtx_assert(&accept_mtx, MA_OWNED)
-#define ACCEPT_UNLOCK_ASSERT() mtx_assert(&accept_mtx, MA_NOTOWNED)
-#define ACCEPT_LOCK() mtx_lock(&accept_mtx)
-#define ACCEPT_UNLOCK() mtx_unlock(&accept_mtx)
+#define SOCK_MTX(so) &(so)->so_lock
+#define SOCK_LOCK(so) mtx_lock(&(so)->so_lock)
+#define SOCK_OWNED(so) mtx_owned(&(so)->so_lock)
+#define SOCK_UNLOCK(so) mtx_unlock(&(so)->so_lock)
+#define SOCK_LOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_OWNED)
+#define SOCK_UNLOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_NOTOWNED)
-/*
- * Per-socket mutex: we reuse the receive socket buffer mutex for space
- * efficiency. This decision should probably be revisited as we optimize
- * locking for the socket code.
- */
-#define SOCK_MTX(_so) SOCKBUF_MTX(&(_so)->so_rcv)
-#define SOCK_LOCK(_so) SOCKBUF_LOCK(&(_so)->so_rcv)
-#define SOCK_OWNED(_so) SOCKBUF_OWNED(&(_so)->so_rcv)
-#define SOCK_UNLOCK(_so) SOCKBUF_UNLOCK(&(_so)->so_rcv)
-#define SOCK_LOCK_ASSERT(_so) SOCKBUF_LOCK_ASSERT(&(_so)->so_rcv)
+#define SOLISTENING(sol) (((sol)->so_options & SO_ACCEPTCONN) != 0)
+#define SOLISTEN_LOCK(sol) do { \
+ mtx_lock(&(sol)->so_lock); \
+ KASSERT(SOLISTENING(sol), \
+ ("%s: %p not listening", __func__, (sol))); \
+} while (0)
+#define SOLISTEN_TRYLOCK(sol) mtx_trylock(&(sol)->so_lock)
+#define SOLISTEN_UNLOCK(sol) do { \
+ KASSERT(SOLISTENING(sol), \
+ ("%s: %p not listening", __func__, (sol))); \
+ mtx_unlock(&(sol)->so_lock); \
+} while (0)
+#define SOLISTEN_LOCK_ASSERT(sol) do { \
+ mtx_assert(&(sol)->so_lock, MA_OWNED); \
+ KASSERT(SOLISTENING(sol), \
+ ("%s: %p not listening", __func__, (sol))); \
+} while (0)
/*
- * Socket state bits stored in so_qstate.
- */
-#define SQ_INCOMP 0x0800 /* unaccepted, incomplete connection */
-#define SQ_COMP 0x1000 /* unaccepted, complete connection */
-
-/*
* Externalized form of struct socket used by the sysctl(3) interface.
*/
struct xsocket {
@@ -213,8 +242,7 @@
/* can we read something from so? */
#define soreadabledata(so) \
- (sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || \
- !TAILQ_EMPTY(&(so)->so_comp) || (so)->so_error)
+ (sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || (so)->so_error)
#define soreadable(so) \
(soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE))
@@ -227,26 +255,19 @@
(so)->so_error)
/*
- * soref()/sorele() ref-count the socket structure. Note that you must
- * still explicitly close the socket, but the last ref count will free
- * the structure.
+ * soref()/sorele() ref-count the socket structure.
+ * soref() may be called without owning socket lock, but in that case a
+ * caller must own something that holds socket, and so_count must be not 0.
+ * Note that you must still explicitly close the socket, but the last ref
+ * count will free the structure.
*/
-#define soref(so) do { \
- SOCK_LOCK_ASSERT(so); \
- ++(so)->so_count; \
-} while (0)
-
+#define soref(so) refcount_acquire(&(so)->so_count)
#define sorele(so) do { \
- ACCEPT_LOCK_ASSERT(); \
SOCK_LOCK_ASSERT(so); \
- if ((so)->so_count <= 0) \
- panic("sorele"); \
- if (--(so)->so_count == 0) \
+ if (refcount_release(&(so)->so_count)) \
sofree(so); \
- else { \
+ else \
SOCK_UNLOCK(so); \
- ACCEPT_UNLOCK(); \
- } \
} while (0)
/*
@@ -369,10 +390,11 @@
int solisten(struct socket *so, int backlog, struct thread *td);
void solisten_proto(struct socket *so, int backlog);
int solisten_proto_check(struct socket *so);
+int solisten_dequeue(struct socket *, struct socket **, int);
struct socket *
sonewconn(struct socket *head, int connstatus);
-
-
+struct socket *
+ sopeeloff(struct socket *);
int sopoll(struct socket *so, int events, struct ucred *active_cred,
struct thread *td);
int sopoll_generic(struct socket *so, int events,
@@ -403,8 +425,10 @@
void sotoxsocket(struct socket *so, struct xsocket *xso);
void soupcall_clear(struct socket *, int);
void soupcall_set(struct socket *, int, so_upcall_t, void *);
+void solisten_upcall_set(struct socket *, so_upcall_t, void *);
void sowakeup(struct socket *so, struct sockbuf *sb);
void sowakeup_aio(struct socket *so, struct sockbuf *sb);
+void solisten_wakeup(struct socket *);
int selsocket(struct socket *so, int events, struct timeval *tv,
struct thread *td);
Index: head/usr.bin/netstat/inet.c
===================================================================
--- head/usr.bin/netstat/inet.c
+++ head/usr.bin/netstat/inet.c
@@ -170,14 +170,17 @@
if (kread((uintptr_t)proto.pr_domain, &domain, sizeof(domain)) != 0)
return (-1);
xso->xso_family = domain.dom_family;
- xso->so_qlen = so->so_qlen;
- xso->so_incqlen = so->so_incqlen;
- xso->so_qlimit = so->so_qlimit;
xso->so_timeo = so->so_timeo;
xso->so_error = so->so_error;
- xso->so_oobmark = so->so_oobmark;
- sbtoxsockbuf(&so->so_snd, &xso->so_snd);
- sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
+ if (SOLISTENING(so)) {
+ xso->so_qlen = so->sol_qlen;
+ xso->so_incqlen = so->sol_incqlen;
+ xso->so_qlimit = so->sol_qlimit;
+ } else {
+ sbtoxsockbuf(&so->so_snd, &xso->so_snd);
+ sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
+ xso->so_oobmark = so->so_oobmark;
+ }
return (0);
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Mar 10, 8:41 AM (9 h, 10 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17081941
Default Alt Text
D9770.id.diff (93 KB)
Attached To
Mode
D9770: Listening sockets revamp try #2.
Attached
Detach File
Event Timeline
Log In to Comment