Page MenuHomeFreeBSD

D9770.id29118.diff
No OneTemporary

D9770.id29118.diff

Index: sys/cam/ctl/ctl_ha.c
===================================================================
--- sys/cam/ctl/ctl_ha.c
+++ sys/cam/ctl/ctl_ha.c
@@ -458,44 +458,19 @@
static int
ctl_ha_accept(struct ha_softc *softc)
{
- struct socket *so;
+ struct socket *lso, *so;
struct sockaddr *sap;
int error;
- ACCEPT_LOCK();
- if (softc->ha_lso->so_rcv.sb_state & SBS_CANTRCVMORE)
- softc->ha_lso->so_error = ECONNABORTED;
- if (softc->ha_lso->so_error) {
- error = softc->ha_lso->so_error;
- softc->ha_lso->so_error = 0;
- ACCEPT_UNLOCK();
+ lso = softc->ha_lso;
+ SOLISTEN_LOCK(lso);
+ error = solisten_dequeue(lso, &so, 0);
+ if (error == EWOULDBLOCK)
+ return (error);
+ if (error) {
printf("%s: socket error %d\n", __func__, error);
goto out;
}
- so = TAILQ_FIRST(&softc->ha_lso->so_comp);
- if (so == NULL) {
- ACCEPT_UNLOCK();
- return (EWOULDBLOCK);
- }
- KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
- KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
-
- /*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- */
- SOCK_LOCK(so); /* soref() and so_state update */
- soref(so); /* file descriptor reference */
-
- TAILQ_REMOVE(&softc->ha_lso->so_comp, so, so_list);
- softc->ha_lso->so_qlen--;
- so->so_state |= SS_NBIO;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
-
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
sap = NULL;
error = soaccept(so, &sap);
@@ -556,9 +531,6 @@
printf("%s: REUSEPORT setting failed %d\n",
__func__, error);
}
- SOCKBUF_LOCK(&softc->ha_lso->so_rcv);
- soupcall_set(softc->ha_lso, SO_RCV, ctl_ha_lupcall, softc);
- SOCKBUF_UNLOCK(&softc->ha_lso->so_rcv);
}
memcpy(&sa, &softc->ha_peer_in, sizeof(sa));
@@ -572,6 +544,10 @@
printf("%s: solisten() error %d\n", __func__, error);
goto out;
}
+ SOLISTEN_LOCK(softc->ha_lso);
+ softc->ha_lso->so_state |= SS_NBIO;
+ solisten_upcall_set(softc->ha_lso, ctl_ha_lupcall, softc);
+ SOLISTEN_UNLOCK(softc->ha_lso);
return (0);
out:
Index: sys/dev/iscsi/icl_soft_proxy.c
===================================================================
--- sys/dev/iscsi/icl_soft_proxy.c
+++ sys/dev/iscsi/icl_soft_proxy.c
@@ -92,7 +92,6 @@
struct icl_listen *ils_listen;
struct socket *ils_socket;
bool ils_running;
- bool ils_disconnecting;
int ils_id;
};
@@ -184,7 +183,9 @@
while (ils->ils_running) {
ICL_DEBUG("waiting for accept thread to terminate");
sx_xunlock(&il->il_lock);
- ils->ils_disconnecting = true;
+ SOLISTEN_LOCK(ils->ils_socket);
+ ils->ils_socket->so_error = ENOTCONN;
+ SOLISTEN_UNLOCK(ils->ils_socket);
wakeup(&ils->ils_socket->so_timeo);
pause("icl_unlisten", 1 * hz);
sx_xlock(&il->il_lock);
@@ -200,9 +201,9 @@
}
/*
- * XXX: Doing accept in a separate thread in each socket might not be the best way
- * to do stuff, but it's pretty clean and debuggable - and you probably won't
- * have hundreds of listening sockets anyway.
+ * XXX: Doing accept in a separate thread in each socket might not be the
+ * best way to do stuff, but it's pretty clean and debuggable - and you
+ * probably won't have hundreds of listening sockets anyway.
*/
static void
icl_accept_thread(void *arg)
@@ -218,55 +219,22 @@
ils->ils_running = true;
for (;;) {
- ACCEPT_LOCK();
- while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0 && ils->ils_disconnecting == false) {
- if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
- head->so_error = ECONNABORTED;
- break;
- }
- error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
- "accept", 0);
- if (error) {
- ACCEPT_UNLOCK();
- ICL_WARN("msleep failed with error %d", error);
- continue;
- }
- if (ils->ils_disconnecting) {
- ACCEPT_UNLOCK();
- ICL_DEBUG("terminating");
- ils->ils_running = false;
- kthread_exit();
- return;
- }
+ SOLISTEN_LOCK(head);
+ error = solisten_dequeue(head, &so, 0);
+ if (error == ENOTCONN) {
+ /*
+ * XXXGL: ENOTCONN is our mark from icl_listen_free().
+ * Neither socket code, nor msleep(9) may return it.
+ */
+ ICL_DEBUG("terminating");
+ ils->ils_running = false;
+ kthread_exit();
+ return;
}
- if (head->so_error) {
- error = head->so_error;
- head->so_error = 0;
- ACCEPT_UNLOCK();
- ICL_WARN("socket error %d", error);
+ if (error) {
+ ICL_WARN("solisten_dequeue error %d", error);
continue;
}
- so = TAILQ_FIRST(&head->so_comp);
- KASSERT(so != NULL, ("NULL so"));
- KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
- KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
-
- /*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- */
- SOCK_LOCK(so); /* soref() and so_state update */
- soref(so); /* file descriptor reference */
-
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- so->so_state |= (head->so_state & SS_NBIO);
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
-
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
sa = NULL;
error = soaccept(so, &sa);
Index: sys/kern/sys_socket.c
===================================================================
--- sys/kern/sys_socket.c
+++ sys/kern/sys_socket.c
@@ -170,32 +170,36 @@
break;
case FIOASYNC:
- /*
- * XXXRW: This code separately acquires SOCK_LOCK(so) and
- * SOCKBUF_LOCK(&so->so_rcv) even though they are the same
- * mutex to avoid introducing the assumption that they are
- * the same.
- */
if (*(int *)data) {
SOCK_LOCK(so);
so->so_state |= SS_ASYNC;
+ if (SOLISTENING(so)) {
+ so->sol_sbrcv_flags |= SB_ASYNC;
+ so->sol_sbsnd_flags |= SB_ASYNC;
+ } else {
+ SOCKBUF_LOCK(&so->so_rcv);
+ so->so_rcv.sb_flags |= SB_ASYNC;
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ SOCKBUF_LOCK(&so->so_snd);
+ so->so_snd.sb_flags |= SB_ASYNC;
+ SOCKBUF_UNLOCK(&so->so_snd);
+ }
SOCK_UNLOCK(so);
- SOCKBUF_LOCK(&so->so_rcv);
- so->so_rcv.sb_flags |= SB_ASYNC;
- SOCKBUF_UNLOCK(&so->so_rcv);
- SOCKBUF_LOCK(&so->so_snd);
- so->so_snd.sb_flags |= SB_ASYNC;
- SOCKBUF_UNLOCK(&so->so_snd);
} else {
SOCK_LOCK(so);
so->so_state &= ~SS_ASYNC;
+ if (SOLISTENING(so)) {
+ so->sol_sbrcv_flags &= ~SB_ASYNC;
+ so->sol_sbsnd_flags &= ~SB_ASYNC;
+ } else {
+ SOCKBUF_LOCK(&so->so_rcv);
+ so->so_rcv.sb_flags &= ~SB_ASYNC;
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ SOCKBUF_LOCK(&so->so_snd);
+ so->so_snd.sb_flags &= ~SB_ASYNC;
+ SOCKBUF_UNLOCK(&so->so_snd);
+ }
SOCK_UNLOCK(so);
- SOCKBUF_LOCK(&so->so_rcv);
- so->so_rcv.sb_flags &= ~SB_ASYNC;
- SOCKBUF_UNLOCK(&so->so_rcv);
- SOCKBUF_LOCK(&so->so_snd);
- so->so_snd.sb_flags &= ~SB_ASYNC;
- SOCKBUF_UNLOCK(&so->so_snd);
}
break;
@@ -698,7 +702,6 @@
sb->sb_flags &= ~SB_AIO_RUNNING;
SOCKBUF_UNLOCK(sb);
- ACCEPT_LOCK();
SOCK_LOCK(so);
sorele(so);
}
Index: sys/kern/uipc_accf.c
===================================================================
--- sys/kern/uipc_accf.c
+++ sys/kern/uipc_accf.c
@@ -162,26 +162,25 @@
}
int
-do_getopt_accept_filter(struct socket *so, struct sockopt *sopt)
+accept_filt_getopt(struct socket *so, struct sockopt *sopt)
{
struct accept_filter_arg *afap;
int error;
error = 0;
- afap = malloc(sizeof(*afap), M_TEMP,
- M_WAITOK | M_ZERO);
+ afap = malloc(sizeof(*afap), M_TEMP, M_WAITOK | M_ZERO);
SOCK_LOCK(so);
if ((so->so_options & SO_ACCEPTCONN) == 0) {
error = EINVAL;
goto out;
}
- if ((so->so_options & SO_ACCEPTFILTER) == 0) {
+ if (so->sol_accept_filter == NULL) {
error = EINVAL;
goto out;
}
- strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name);
- if (so->so_accf->so_accept_filter_str != NULL)
- strcpy(afap->af_arg, so->so_accf->so_accept_filter_str);
+ strcpy(afap->af_name, so->sol_accept_filter->accf_name);
+ if (so->sol_accept_filter_str != NULL)
+ strcpy(afap->af_arg, so->sol_accept_filter_str);
out:
SOCK_UNLOCK(so);
if (error == 0)
@@ -191,12 +190,13 @@
}
int
-do_setopt_accept_filter(struct socket *so, struct sockopt *sopt)
+accept_filt_setopt(struct socket *so, struct sockopt *sopt)
{
struct accept_filter_arg *afap;
struct accept_filter *afp;
- struct so_accf *newaf;
- int error = 0;
+ char *accept_filter_str = NULL;
+ void *accept_filter_arg = NULL;
+ int error;
/*
* Handle the simple delete case first.
@@ -207,18 +207,15 @@
SOCK_UNLOCK(so);
return (EINVAL);
}
- if (so->so_accf != NULL) {
- struct so_accf *af = so->so_accf;
- if (af->so_accept_filter != NULL &&
- af->so_accept_filter->accf_destroy != NULL) {
- af->so_accept_filter->accf_destroy(so);
- }
- if (af->so_accept_filter_str != NULL)
- free(af->so_accept_filter_str, M_ACCF);
- free(af, M_ACCF);
- so->so_accf = NULL;
+ if (so->sol_accept_filter != NULL) {
+ if (so->sol_accept_filter->accf_destroy != NULL)
+ so->sol_accept_filter->accf_destroy(so);
+ if (so->sol_accept_filter_str != NULL)
+ free(so->sol_accept_filter_str, M_ACCF);
+ so->sol_accept_filter = NULL;
+ so->sol_accept_filter_arg = NULL;
+ so->sol_accept_filter_str = NULL;
}
- so->so_options &= ~SO_ACCEPTFILTER;
SOCK_UNLOCK(so);
return (0);
}
@@ -227,8 +224,7 @@
* Pre-allocate any memory we may need later to avoid blocking at
* untimely moments. This does not optimize for invalid arguments.
*/
- afap = malloc(sizeof(*afap), M_TEMP,
- M_WAITOK);
+ afap = malloc(sizeof(*afap), M_TEMP, M_WAITOK);
error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap);
afap->af_name[sizeof(afap->af_name)-1] = '\0';
afap->af_arg[sizeof(afap->af_arg)-1] = '\0';
@@ -241,19 +237,10 @@
free(afap, M_TEMP);
return (ENOENT);
}
- /*
- * Allocate the new accept filter instance storage. We may
- * have to free it again later if we fail to attach it. If
- * attached properly, 'newaf' is NULLed to avoid a free()
- * while in use.
- */
- newaf = malloc(sizeof(*newaf), M_ACCF, M_WAITOK |
- M_ZERO);
if (afp->accf_create != NULL && afap->af_name[0] != '\0') {
size_t len = strlen(afap->af_name) + 1;
- newaf->so_accept_filter_str = malloc(len, M_ACCF,
- M_WAITOK);
- strcpy(newaf->so_accept_filter_str, afap->af_name);
+ accept_filter_str = malloc(len, M_ACCF, M_WAITOK);
+ strcpy(accept_filter_str, afap->af_name);
}
/*
@@ -261,8 +248,8 @@
* without first removing it.
*/
SOCK_LOCK(so);
- if (((so->so_options & SO_ACCEPTCONN) == 0) ||
- (so->so_accf != NULL)) {
+ if ((so->so_options & SO_ACCEPTCONN) == 0 ||
+ so->sol_accept_filter != NULL) {
error = EINVAL;
goto out;
}
@@ -273,25 +260,19 @@
* can't block.
*/
if (afp->accf_create != NULL) {
- newaf->so_accept_filter_arg =
- afp->accf_create(so, afap->af_arg);
- if (newaf->so_accept_filter_arg == NULL) {
+ accept_filter_arg = afp->accf_create(so, afap->af_arg);
+ if (accept_filter_arg == NULL) {
error = EINVAL;
goto out;
}
}
- newaf->so_accept_filter = afp;
- so->so_accf = newaf;
- so->so_options |= SO_ACCEPTFILTER;
- newaf = NULL;
+ so->sol_accept_filter = afp;
+ so->sol_accept_filter_arg = accept_filter_arg;
+ so->sol_accept_filter_str = accept_filter_str;
out:
SOCK_UNLOCK(so);
- if (newaf != NULL) {
- if (newaf->so_accept_filter_str != NULL)
- free(newaf->so_accept_filter_str, M_ACCF);
- free(newaf, M_ACCF);
- }
- if (afap != NULL)
- free(afap, M_TEMP);
+ if (accept_filter_str != NULL)
+ free(accept_filter_str, M_ACCF);
+ free(afap, M_TEMP);
return (error);
}
Index: sys/kern/uipc_debug.c
===================================================================
--- sys/kern/uipc_debug.c
+++ sys/kern/uipc_debug.c
@@ -448,8 +448,6 @@
db_printf(")\n");
db_print_indent(indent);
- db_printf("so_qstate: 0x%x (", so->so_qstate);
- db_print_soqstate(so->so_qstate);
db_printf(") ");
db_printf("so_pcb: %p ", so->so_pcb);
db_printf("so_proto: %p\n", so->so_proto);
@@ -458,24 +456,28 @@
db_print_protosw(so->so_proto, "so_proto", indent);
db_print_indent(indent);
- db_printf("so_head: %p ", so->so_head);
- db_printf("so_incomp first: %p ", TAILQ_FIRST(&so->so_incomp));
- db_printf("so_comp first: %p\n", TAILQ_FIRST(&so->so_comp));
-
- db_print_indent(indent);
- /* so_list skipped */
- db_printf("so_qlen: %u ", so->so_qlen);
- db_printf("so_incqlen: %u ", so->so_incqlen);
- db_printf("so_qlimit: %u ", so->so_qlimit);
- db_printf("so_timeo: %d ", so->so_timeo);
- db_printf("so_error: %d\n", so->so_error);
-
- db_print_indent(indent);
- db_printf("so_sigio: %p ", so->so_sigio);
- db_printf("so_oobmark: %lu ", so->so_oobmark);
-
- db_print_sockbuf(&so->so_rcv, "so_rcv", indent);
- db_print_sockbuf(&so->so_snd, "so_snd", indent);
+ if (so->so_options & SO_ACCEPTCONN) {
+ db_printf("sol_incomp first: %p ",
+ TAILQ_FIRST(&so->sol_incomp));
+ db_printf("sol_comp first: %p\n", TAILQ_FIRST(&so->sol_comp));
+ db_printf("sol_qlen: %d ", so->sol_qlen);
+ db_printf("sol_incqlen: %d ", so->sol_incqlen);
+ db_printf("sol_qlimit: %d ", so->sol_qlimit);
+ } else {
+ db_printf("so_qstate: 0x%x (", so->so_qstate);
+ db_print_soqstate(so->so_qstate);
+ db_printf("so_listen: %p ", so->so_listen);
+ /* so_list skipped */
+ db_printf("so_timeo: %d ", so->so_timeo);
+ db_printf("so_error: %d\n", so->so_error);
+
+ db_print_indent(indent);
+ db_printf("so_sigio: %p ", so->so_sigio);
+ db_printf("so_oobmark: %lu ", so->so_oobmark);
+
+ db_print_sockbuf(&so->so_rcv, "so_rcv", indent);
+ db_print_sockbuf(&so->so_snd, "so_snd", indent);
+ }
}
DB_SHOW_COMMAND(socket, db_show_socket)
Index: sys/kern/uipc_sockbuf.c
===================================================================
--- sys/kern/uipc_sockbuf.c
+++ sys/kern/uipc_sockbuf.c
@@ -314,14 +314,14 @@
SOCKBUF_LOCK_ASSERT(sb);
- selwakeuppri(&sb->sb_sel, PSOCK);
- if (!SEL_WAITING(&sb->sb_sel))
+ selwakeuppri(sb->sb_sel, PSOCK);
+ if (!SEL_WAITING(sb->sb_sel))
sb->sb_flags &= ~SB_SEL;
if (sb->sb_flags & SB_WAIT) {
sb->sb_flags &= ~SB_WAIT;
wakeup(&sb->sb_acc);
}
- KNOTE_LOCKED(&sb->sb_sel.si_note, 0);
+ KNOTE_LOCKED(&sb->sb_sel->si_note, 0);
if (sb->sb_upcall != NULL) {
ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT);
if (ret == SU_ISCONNECTED) {
Index: sys/kern/uipc_socket.c
===================================================================
--- sys/kern/uipc_socket.c
+++ sys/kern/uipc_socket.c
@@ -106,6 +106,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_compat.h"
+#include "opt_sctp.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -154,13 +155,22 @@
static int soreceive_rcvoob(struct socket *so, struct uio *uio,
int flags);
+static void so_rdknl_lock(void *);
+static void so_rdknl_unlock(void *);
+static void so_rdknl_assert_locked(void *);
+static void so_rdknl_assert_unlocked(void *);
+static void so_wrknl_lock(void *);
+static void so_wrknl_unlock(void *);
+static void so_wrknl_assert_locked(void *);
+static void so_wrknl_assert_unlocked(void *);
static void filt_sordetach(struct knote *kn);
static int filt_soread(struct knote *kn, long hint);
static void filt_sowdetach(struct knote *kn);
static int filt_sowrite(struct knote *kn, long hint);
-static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id);
static int filt_soempty(struct knote *kn, long hint);
+static void solisten_wakeup(struct socket *);
+static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id);
fo_kqfilter_t soo_kqfilter;
static struct filterops soread_filtops = {
@@ -393,8 +403,16 @@
return (NULL);
}
+ /*
+ * The socket locking protocol allows to lock 2 sockets at a time,
+ * however, the first one must be a listening socket. WITNESS lacks
+ * a feature to change class of an existing lock, so we use DUPOK.
+ */
+ mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK);
SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
+ so->so_rcv.sb_sel = &so->so_rdsel;
+ so->so_snd.sb_sel = &so->so_wrsel;
sx_init(&so->so_snd.sb_sx, "so_snd_sx");
sx_init(&so->so_rcv.sb_sx, "so_rcv_sx");
TAILQ_INIT(&so->so_snd.sb_aiojobq);
@@ -450,9 +468,6 @@
if (so->so_snd.sb_hiwat)
(void)chgsbsize(so->so_cred->cr_uidinfo,
&so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
- /* remove accept filter if one is present. */
- if (so->so_accf != NULL)
- do_setopt_accept_filter(so, NULL);
#ifdef MAC
mac_socket_destroy(so);
#endif
@@ -460,10 +475,16 @@
crfree(so->so_cred);
khelp_destroy_osd(&so->osd);
- sx_destroy(&so->so_snd.sb_sx);
- sx_destroy(&so->so_rcv.sb_sx);
- SOCKBUF_LOCK_DESTROY(&so->so_snd);
- SOCKBUF_LOCK_DESTROY(&so->so_rcv);
+ if (SOLISTENING(so)) {
+ if (so->sol_accept_filter != NULL)
+ accept_filt_setopt(so, NULL);
+ } else {
+ sx_destroy(&so->so_snd.sb_sx);
+ sx_destroy(&so->so_rcv.sb_sx);
+ SOCKBUF_LOCK_DESTROY(&so->so_snd);
+ SOCKBUF_LOCK_DESTROY(&so->so_rcv);
+ }
+ mtx_destroy(&so->so_lock);
uma_zfree(socket_zone, so);
}
@@ -506,8 +527,6 @@
if (so == NULL)
return (ENOBUFS);
- TAILQ_INIT(&so->so_incomp);
- TAILQ_INIT(&so->so_comp);
so->so_type = type;
so->so_cred = crhold(cred);
if ((prp->pr_domain->dom_family == PF_INET) ||
@@ -520,9 +539,10 @@
#ifdef MAC
mac_socket_create(cred, so);
#endif
- knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
- knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
- so->so_count = 1;
+ knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
+ so_rdknl_assert_locked, so_rdknl_assert_unlocked);
+ knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
+ so_wrknl_assert_locked, so_wrknl_assert_unlocked);
/*
* Auto-sizing of socket buffers is managed by the protocols and
* the appropriate flags must be set in the pru_attach function.
@@ -531,12 +551,10 @@
error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
CURVNET_RESTORE();
if (error) {
- KASSERT(so->so_count == 1, ("socreate: so_count %d",
- so->so_count));
- so->so_count = 0;
sodealloc(so);
return (error);
}
+ soref(so);
*aso = so;
return (0);
}
@@ -564,11 +582,11 @@
static int overcount;
struct socket *so;
- int over;
+ u_int over;
- ACCEPT_LOCK();
- over = (head->so_qlen > 3 * head->so_qlimit / 2);
- ACCEPT_UNLOCK();
+ SOLISTEN_LOCK(head);
+ over = (head->sol_qlen > 3 * head->sol_qlimit / 2);
+ SOLISTEN_UNLOCK(head);
#ifdef REGRESSION
if (regression_sonewconn_earlytest && over) {
#else
@@ -580,15 +598,15 @@
log(LOG_DEBUG, "%s: pcb %p: Listen queue overflow: "
"%i already in queue awaiting acceptance "
"(%d occurrences)\n",
- __func__, head->so_pcb, head->so_qlen, overcount);
+ __func__, head->so_pcb, head->sol_qlen, overcount);
overcount = 0;
}
return (NULL);
}
- VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
- __func__, __LINE__, head));
+ VNET_ASSERT(head->so_vnet != NULL, ("%s: so %p vnet is NULL",
+ __func__, head));
so = soalloc(head->so_vnet);
if (so == NULL) {
log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
@@ -596,9 +614,9 @@
__func__, head->so_pcb);
return (NULL);
}
- if ((head->so_options & SO_ACCEPTFILTER) != 0)
+ if (head->sol_accept_filter != NULL)
connstatus = 0;
- so->so_head = head;
+ so->so_listen = head;
so->so_type = head->so_type;
so->so_options = head->so_options &~ SO_ACCEPTCONN;
so->so_linger = head->so_linger;
@@ -609,10 +627,12 @@
#ifdef MAC
mac_socket_newconn(head, so);
#endif
- knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
- knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
+ knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
+ so_rdknl_assert_locked, so_rdknl_assert_unlocked);
+ knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
+ so_wrknl_assert_locked, so_wrknl_assert_unlocked);
VNET_SO_ASSERT(head);
- if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
+ if (soreserve(so, head->sol_sbsnd_hiwat, head->sol_sbrcv_hiwat)) {
sodealloc(so);
log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
__func__, head->so_pcb);
@@ -624,32 +644,21 @@
__func__, head->so_pcb);
return (NULL);
}
- so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
- so->so_snd.sb_lowat = head->so_snd.sb_lowat;
- so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
- so->so_snd.sb_timeo = head->so_snd.sb_timeo;
- so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
- so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
+ so->so_rcv.sb_lowat = head->sol_sbrcv_lowat;
+ so->so_snd.sb_lowat = head->sol_sbsnd_lowat;
+ so->so_rcv.sb_timeo = head->sol_sbrcv_timeo;
+ so->so_snd.sb_timeo = head->sol_sbsnd_timeo;
+ so->so_rcv.sb_flags |= head->sol_sbrcv_flags & SB_AUTOSIZE;
+ so->so_snd.sb_flags |= head->sol_sbsnd_flags & SB_AUTOSIZE;
so->so_state |= connstatus;
- ACCEPT_LOCK();
- /*
- * The accept socket may be tearing down but we just
- * won a race on the ACCEPT_LOCK.
- * However, if sctp_peeloff() is called on a 1-to-many
- * style socket, the SO_ACCEPTCONN doesn't need to be set.
- */
- if (!(head->so_options & SO_ACCEPTCONN) &&
- ((head->so_proto->pr_protocol != IPPROTO_SCTP) ||
- (head->so_type != SOCK_SEQPACKET))) {
- SOCK_LOCK(so);
- so->so_head = NULL;
- sofree(so); /* NB: returns ACCEPT_UNLOCK'ed. */
- return (NULL);
- }
+
+ SOLISTEN_LOCK(head);
+ soref(head); /* A socket on (in)complete queue refs head. */
if (connstatus) {
- TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
- so->so_qstate |= SQ_COMP;
- head->so_qlen++;
+ TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list);
+ so->so_qstate = SQ_COMP;
+ head->sol_qlen++;
+ solisten_wakeup(head); /* unlocks */
} else {
/*
* Keep removing sockets from the head until there's room for
@@ -658,28 +667,86 @@
* threads and soabort() requires dropping locks, we must
* loop waiting for the condition to be true.
*/
- while (head->so_incqlen > head->so_qlimit) {
+ while (head->sol_incqlen > head->sol_qlimit) {
struct socket *sp;
- sp = TAILQ_FIRST(&head->so_incomp);
- TAILQ_REMOVE(&head->so_incomp, sp, so_list);
- head->so_incqlen--;
- sp->so_qstate &= ~SQ_INCOMP;
- sp->so_head = NULL;
- ACCEPT_UNLOCK();
+
+ sp = TAILQ_FIRST(&head->sol_incomp);
+ TAILQ_REMOVE(&head->sol_incomp, sp, so_list);
+ head->sol_incqlen--;
+ SOCK_LOCK(sp);
+ sp->so_qstate = SQ_NONE;
+ sp->so_listen = NULL;
+ SOCK_UNLOCK(sp);
+ sorele(head); /* does SOLISTEN_UNLOCK, head stays */
soabort(sp);
- ACCEPT_LOCK();
+ SOLISTEN_LOCK(head);
}
- TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
- so->so_qstate |= SQ_INCOMP;
- head->so_incqlen++;
+ TAILQ_INSERT_TAIL(&head->sol_incomp, so, so_list);
+ so->so_qstate = SQ_INCOMP;
+ head->sol_incqlen++;
+ SOLISTEN_UNLOCK(head);
}
- ACCEPT_UNLOCK();
- if (connstatus) {
- sorwakeup(head);
- wakeup_one(&head->so_timeo);
+ return (so);
+}
+
+#ifdef SCTP
+/*
+ * Socket part of sctp_peeloff(). Detach a new socket from an
+ * association. The new socket is returned with a reference.
+ */
+struct socket *
+sopeeloff(struct socket *head)
+{
+ struct socket *so;
+
+ VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
+ __func__, __LINE__, head));
+ so = soalloc(head->so_vnet);
+ if (so == NULL) {
+ log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
+ "limit reached or out of memory\n",
+ __func__, head->so_pcb);
+ return (NULL);
+ }
+ so->so_type = head->so_type;
+ so->so_options = head->so_options;
+ so->so_linger = head->so_linger;
+ so->so_state = (head->so_state & SS_NBIO) | SS_ISCONNECTED;
+ so->so_fibnum = head->so_fibnum;
+ so->so_proto = head->so_proto;
+ so->so_cred = crhold(head->so_cred);
+#ifdef MAC
+ mac_socket_newconn(head, so);
+#endif
+ knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
+ so_rdknl_assert_locked, so_rdknl_assert_unlocked);
+ knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
+ so_wrknl_assert_locked, so_wrknl_assert_unlocked);
+ VNET_SO_ASSERT(head);
+ if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
+ sodealloc(so);
+ log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
+ __func__, head->so_pcb);
+ return (NULL);
+ }
+ if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
+ sodealloc(so);
+ log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
+ __func__, head->so_pcb);
+ return (NULL);
}
+ so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
+ so->so_snd.sb_lowat = head->so_snd.sb_lowat;
+ so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
+ so->so_snd.sb_timeo = head->so_snd.sb_timeo;
+ so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
+ so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
+
+ soref(so);
+
return (so);
}
+#endif /* SCTP */
int
sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
@@ -741,13 +808,137 @@
void
solisten_proto(struct socket *so, int backlog)
{
+ int sbrcv_lowat, sbsnd_lowat;
+ u_int sbrcv_hiwat, sbsnd_hiwat;
+ short sbrcv_flags, sbsnd_flags;
+ sbintime_t sbrcv_timeo, sbsnd_timeo;
SOCK_LOCK_ASSERT(so);
+ if (SOLISTENING(so))
+ goto listening;
+
+ /*
+ * Change this socket to listening state.
+ */
+ sbrcv_lowat = so->so_rcv.sb_lowat;
+ sbsnd_lowat = so->so_snd.sb_lowat;
+ sbrcv_hiwat = so->so_rcv.sb_hiwat;
+ sbsnd_hiwat = so->so_snd.sb_hiwat;
+ sbrcv_flags = so->so_rcv.sb_flags;
+ sbsnd_flags = so->so_snd.sb_flags;
+ sbrcv_timeo = so->so_rcv.sb_timeo;
+ sbsnd_timeo = so->so_snd.sb_timeo;
+
+ sbdestroy(&so->so_snd, so);
+ sbdestroy(&so->so_rcv, so);
+ sx_destroy(&so->so_snd.sb_sx);
+ sx_destroy(&so->so_rcv.sb_sx);
+ SOCKBUF_LOCK_DESTROY(&so->so_snd);
+ SOCKBUF_LOCK_DESTROY(&so->so_rcv);
+
+#ifdef INVARIANTS
+ bzero(&so->so_rcv,
+ sizeof(struct socket) - offsetof(struct socket, so_rcv));
+#endif
+
+ so->sol_sbrcv_lowat = sbrcv_lowat;
+ so->sol_sbsnd_lowat = sbsnd_lowat;
+ so->sol_sbrcv_hiwat = sbrcv_hiwat;
+ so->sol_sbsnd_hiwat = sbsnd_hiwat;
+ so->sol_sbrcv_flags = sbrcv_flags;
+ so->sol_sbsnd_flags = sbsnd_flags;
+ so->sol_sbrcv_timeo = sbrcv_timeo;
+ so->sol_sbsnd_timeo = sbsnd_timeo;
+
+ so->sol_qlen = so->sol_incqlen = 0;
+ TAILQ_INIT(&so->sol_incomp);
+ TAILQ_INIT(&so->sol_comp);
+
+ so->sol_accept_filter = NULL;
+ so->sol_accept_filter_arg = NULL;
+ so->sol_accept_filter_str = NULL;
+
+ so->so_options |= SO_ACCEPTCONN;
+
+listening:
if (backlog < 0 || backlog > somaxconn)
backlog = somaxconn;
- so->so_qlimit = backlog;
- so->so_options |= SO_ACCEPTCONN;
+ so->sol_qlimit = backlog;
+}
+
+/*
+ * Wakeup listeners/subsystems once we have a complete connection.
+ * Enters with lock, returns unlocked.
+ */
+static void
+solisten_wakeup(struct socket *sol)
+{
+
+ if (sol->sol_upcall != NULL)
+ (void )sol->sol_upcall(sol, sol->sol_upcallarg, M_NOWAIT);
+ else {
+ selwakeuppri(&sol->so_rdsel, PSOCK);
+ KNOTE_LOCKED(&sol->so_rdsel.si_note, 0);
+ }
+ SOLISTEN_UNLOCK(sol);
+ wakeup_one(&sol->sol_comp);
+}
+
+/*
+ * Return single connection off a listening socket queue. Main consumer of
+ * the function is kern_accept4(). Some modules, that do their own accept
+ * management also use the function.
+ *
+ * Listening socket must be locked on entry and is returned unlocked on
+ * return.
+ * The flags argument is set of accept4(2) flags and ACCEPT4_INHERIT.
+ */
+int
+solisten_dequeue(struct socket *head, struct socket **ret, int flags)
+{
+ struct socket *so;
+ int error;
+
+ SOLISTEN_LOCK_ASSERT(head);
+
+ while (!(head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp) &&
+ head->so_error == 0) {
+ error = msleep(&head->sol_comp, &head->so_lock, PSOCK | PCATCH,
+ "accept", 0);
+ if (error != 0) {
+ SOLISTEN_UNLOCK(head);
+ return (error);
+ }
+ }
+ if (head->so_error) {
+ error = head->so_error;
+ head->so_error = 0;
+ SOLISTEN_UNLOCK(head);
+ return (error);
+ }
+ if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp)) {
+ SOLISTEN_UNLOCK(head);
+ return (EWOULDBLOCK);
+ }
+ so = TAILQ_FIRST(&head->sol_comp);
+ SOCK_LOCK(so);
+ KASSERT(so->so_qstate == SQ_COMP,
+ ("%s: so %p not SQ_COMP", __func__, so));
+ soref(so);
+ head->sol_qlen--;
+ so->so_qstate = SQ_NONE;
+ so->so_listen = NULL;
+ TAILQ_REMOVE(&head->sol_comp, so, so_list);
+ if (flags & ACCEPT4_INHERIT)
+ so->so_state |= (head->so_state & SS_NBIO);
+ else
+ so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
+ SOCK_UNLOCK(so);
+ sorele(head);
+
+ *ret = so;
+ return (0);
}
/*
@@ -774,44 +965,62 @@
sofree(struct socket *so)
{
struct protosw *pr = so->so_proto;
- struct socket *head;
- ACCEPT_LOCK_ASSERT();
SOCK_LOCK_ASSERT(so);
if ((so->so_state & SS_NOFDREF) == 0 || so->so_count != 0 ||
- (so->so_state & SS_PROTOREF) || (so->so_qstate & SQ_COMP)) {
+ (so->so_state & SS_PROTOREF) || (so->so_qstate == SQ_COMP)) {
SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
return;
}
- head = so->so_head;
- if (head != NULL) {
- KASSERT((so->so_qstate & SQ_COMP) != 0 ||
- (so->so_qstate & SQ_INCOMP) != 0,
- ("sofree: so_head != NULL, but neither SQ_COMP nor "
- "SQ_INCOMP"));
- KASSERT((so->so_qstate & SQ_COMP) == 0 ||
- (so->so_qstate & SQ_INCOMP) == 0,
- ("sofree: so->so_qstate is SQ_COMP and also SQ_INCOMP"));
- TAILQ_REMOVE(&head->so_incomp, so, so_list);
- head->so_incqlen--;
- so->so_qstate &= ~SQ_INCOMP;
- so->so_head = NULL;
- }
- KASSERT((so->so_qstate & SQ_COMP) == 0 &&
- (so->so_qstate & SQ_INCOMP) == 0,
- ("sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)",
- so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP));
- if (so->so_options & SO_ACCEPTCONN) {
- KASSERT((TAILQ_EMPTY(&so->so_comp)),
- ("sofree: so_comp populated"));
- KASSERT((TAILQ_EMPTY(&so->so_incomp)),
- ("sofree: so_incomp populated"));
+ if (!SOLISTENING(so) && so->so_qstate == SQ_INCOMP) {
+ struct socket *sol;
+
+ sol = so->so_listen;
+ KASSERT(sol, ("%s: so %p on incomp of NULL", __func__, so));
+
+ /*
+ * To solve race between close of a listening socket and
+ * a socket on its incomplete queue, we need to lock both.
+ * The order is first listening socket, then regular.
+ * Since we don't have SS_NOFDREF neither SS_PROTOREF, this
+ * function and the listening socket are the only pointers
+ * to so. To preserve so and sol, we reference both and then
+ * relock.
+ * After relock the socket may not move to so_comp since it
+ * doesn't have PCB already, but it may be removed from
+ * so_incomp. If that happens, we share responsiblity on
+ * freeing the socket, but soclose() has already removed
+ * it from queue.
+ */
+ soref(sol);
+ soref(so);
+ SOCK_UNLOCK(so);
+ SOLISTEN_LOCK(sol);
+ SOCK_LOCK(so);
+ if (so->so_qstate == SQ_INCOMP) {
+ KASSERT(so->so_listen == sol,
+ ("%s: so %p migrated out of sol %p",
+ __func__, so, sol));
+ TAILQ_REMOVE(&sol->sol_incomp, so, so_list);
+ sol->sol_incqlen--;
+ /* This is guarenteed not to be the last. */
+ refcount_release(&sol->so_count);
+ so->so_qstate = SQ_NONE;
+ so->so_listen = NULL;
+ } else
+ KASSERT(so->so_listen == NULL,
+ ("%s: so %p not on (in)comp with so_listen",
+ __func__, so));
+ sorele(sol);
+ KASSERT(so->so_count == 1,
+ ("%s: so %p count %u", __func__, so, so->so_count));
+ so->so_count = 0;
}
+ if (SOLISTENING(so))
+ so->so_error = ECONNABORTED;
SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
VNET_SO_ASSERT(so);
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
@@ -833,12 +1042,14 @@
* before calling pru_detach. This means that protocols shold not
* assume they can perform socket wakeups, etc, in their detach code.
*/
- sbdestroy(&so->so_snd, so);
- sbdestroy(&so->so_rcv, so);
- seldrain(&so->so_snd.sb_sel);
- seldrain(&so->so_rcv.sb_sel);
- knlist_destroy(&so->so_rcv.sb_sel.si_note);
- knlist_destroy(&so->so_snd.sb_sel.si_note);
+ if (!SOLISTENING(so)) {
+ sbdestroy(&so->so_snd, so);
+ sbdestroy(&so->so_rcv, so);
+ }
+ seldrain(&so->so_rdsel);
+ seldrain(&so->so_wrsel);
+ knlist_destroy(&so->so_rdsel.si_note);
+ knlist_destroy(&so->so_wrsel.si_note);
sodealloc(so);
}
@@ -853,6 +1064,8 @@
int
soclose(struct socket *so)
{
+ struct accept_queue lqueue;
+ bool listening;
int error = 0;
KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter"));
@@ -885,41 +1098,42 @@
drop:
if (so->so_proto->pr_usrreqs->pru_close != NULL)
(*so->so_proto->pr_usrreqs->pru_close)(so);
- ACCEPT_LOCK();
- if (so->so_options & SO_ACCEPTCONN) {
+
+ SOCK_LOCK(so);
+ if ((listening = (so->so_options & SO_ACCEPTCONN))) {
struct socket *sp;
- /*
- * Prevent new additions to the accept queues due
- * to ACCEPT_LOCK races while we are draining them.
- */
- so->so_options &= ~SO_ACCEPTCONN;
- while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
- TAILQ_REMOVE(&so->so_incomp, sp, so_list);
- so->so_incqlen--;
- sp->so_qstate &= ~SQ_INCOMP;
- sp->so_head = NULL;
- ACCEPT_UNLOCK();
- soabort(sp);
- ACCEPT_LOCK();
- }
- while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
- TAILQ_REMOVE(&so->so_comp, sp, so_list);
- so->so_qlen--;
- sp->so_qstate &= ~SQ_COMP;
- sp->so_head = NULL;
- ACCEPT_UNLOCK();
- soabort(sp);
- ACCEPT_LOCK();
+
+ TAILQ_INIT(&lqueue);
+ TAILQ_SWAP(&lqueue, &so->sol_incomp, socket, so_list);
+ TAILQ_CONCAT(&lqueue, &so->sol_comp, so_list);
+
+ so->sol_qlen = so->sol_incqlen = 0;
+
+ TAILQ_FOREACH(sp, &lqueue, so_list) {
+ SOCK_LOCK(sp);
+ sp->so_qstate = SQ_NONE;
+ sp->so_listen = NULL;
+ SOCK_UNLOCK(sp);
+ /* Guaranteed not to be the last. */
+ refcount_release(&so->so_count);
}
- KASSERT((TAILQ_EMPTY(&so->so_comp)),
- ("%s: so_comp populated", __func__));
- KASSERT((TAILQ_EMPTY(&so->so_incomp)),
- ("%s: so_incomp populated", __func__));
}
- SOCK_LOCK(so);
KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
so->so_state |= SS_NOFDREF;
- sorele(so); /* NB: Returns with ACCEPT_UNLOCK(). */
+ sorele(so);
+ if (listening) {
+ struct socket *sp;
+
+ TAILQ_FOREACH(sp, &lqueue, so_list) {
+ SOCK_LOCK(sp);
+ if (sp->so_count == 0) {
+ SOCK_UNLOCK(sp);
+ soabort(sp);
+ } else
+ /* sp is now in sofree() */
+ SOCK_UNLOCK(sp);
+ }
+ }
CURVNET_RESTORE();
return (error);
}
@@ -951,13 +1165,11 @@
KASSERT(so->so_count == 0, ("soabort: so_count"));
KASSERT((so->so_state & SS_PROTOREF) == 0, ("soabort: SS_PROTOREF"));
KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF"));
- KASSERT((so->so_state & SQ_COMP) == 0, ("soabort: SQ_COMP"));
- KASSERT((so->so_state & SQ_INCOMP) == 0, ("soabort: SQ_INCOMP"));
+ KASSERT(so->so_qstate == SQ_NONE, ("soabort: !SQ_NONE"));
VNET_SO_ASSERT(so);
if (so->so_proto->pr_usrreqs->pru_abort != NULL)
(*so->so_proto->pr_usrreqs->pru_abort)(so);
- ACCEPT_LOCK();
SOCK_LOCK(so);
sofree(so);
}
@@ -2526,7 +2738,7 @@
} else {
switch (sopt->sopt_name) {
case SO_ACCEPTFILTER:
- error = do_setopt_accept_filter(so, sopt);
+ error = accept_filt_setopt(so, sopt);
if (error)
goto bad;
break;
@@ -2784,7 +2996,7 @@
} else {
switch (sopt->sopt_name) {
case SO_ACCEPTFILTER:
- error = do_getopt_accept_filter(so, sopt);
+ error = accept_filt_getopt(so, sopt);
break;
case SO_LINGER:
@@ -2892,15 +3104,15 @@
break;
case SO_LISTENQLIMIT:
- optval = so->so_qlimit;
+ optval = SOLISTENING(so) ? so->sol_qlimit : 0;
goto integer;
case SO_LISTENQLEN:
- optval = so->so_qlen;
+ optval = SOLISTENING(so) ? so->sol_qlen : 0;
goto integer;
case SO_LISTENINCQLEN:
- optval = so->so_incqlen;
+ optval = SOLISTENING(so) ? so->sol_incqlen : 0;
goto integer;
case SO_TS_CLOCK:
@@ -3047,7 +3259,7 @@
if (so->so_sigio != NULL)
pgsigio(&so->so_sigio, SIGURG, 0);
- selwakeuppri(&so->so_rcv.sb_sel, PSOCK);
+ selwakeuppri(&so->so_rdsel, PSOCK);
}
int
@@ -3067,44 +3279,54 @@
sopoll_generic(struct socket *so, int events, struct ucred *active_cred,
struct thread *td)
{
- int revents = 0;
-
- SOCKBUF_LOCK(&so->so_snd);
- SOCKBUF_LOCK(&so->so_rcv);
- if (events & (POLLIN | POLLRDNORM))
- if (soreadabledata(so))
- revents |= events & (POLLIN | POLLRDNORM);
-
- if (events & (POLLOUT | POLLWRNORM))
- if (sowriteable(so))
- revents |= events & (POLLOUT | POLLWRNORM);
+ int revents;
- if (events & (POLLPRI | POLLRDBAND))
- if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK))
- revents |= events & (POLLPRI | POLLRDBAND);
-
- if ((events & POLLINIGNEOF) == 0) {
- if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
- revents |= events & (POLLIN | POLLRDNORM);
- if (so->so_snd.sb_state & SBS_CANTSENDMORE)
- revents |= POLLHUP;
+ SOCK_LOCK(so);
+ if (SOLISTENING(so)) {
+ if (!(events & (POLLIN | POLLRDNORM)))
+ revents = 0;
+ else if (!TAILQ_EMPTY(&so->sol_comp))
+ revents = events & (POLLIN | POLLRDNORM);
+ else {
+ selrecord(td, &so->so_rdsel);
+ revents = 0;
}
- }
-
- if (revents == 0) {
- if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
- selrecord(td, &so->so_rcv.sb_sel);
- so->so_rcv.sb_flags |= SB_SEL;
+ } else {
+ revents = 0;
+ SOCKBUF_LOCK(&so->so_snd);
+ SOCKBUF_LOCK(&so->so_rcv);
+ if (events & (POLLIN | POLLRDNORM))
+ if (soreadabledata(so))
+ revents |= events & (POLLIN | POLLRDNORM);
+ if (events & (POLLOUT | POLLWRNORM))
+ if (sowriteable(so))
+ revents |= events & (POLLOUT | POLLWRNORM);
+ if (events & (POLLPRI | POLLRDBAND))
+ if (so->so_oobmark ||
+ (so->so_rcv.sb_state & SBS_RCVATMARK))
+ revents |= events & (POLLPRI | POLLRDBAND);
+ if ((events & POLLINIGNEOF) == 0) {
+ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+ revents |= events & (POLLIN | POLLRDNORM);
+ if (so->so_snd.sb_state & SBS_CANTSENDMORE)
+ revents |= POLLHUP;
+ }
}
-
- if (events & (POLLOUT | POLLWRNORM)) {
- selrecord(td, &so->so_snd.sb_sel);
- so->so_snd.sb_flags |= SB_SEL;
+ if (revents == 0) {
+ if (events &
+ (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
+ selrecord(td, &so->so_rdsel);
+ so->so_rcv.sb_flags |= SB_SEL;
+ }
+ if (events & (POLLOUT | POLLWRNORM)) {
+ selrecord(td, &so->so_wrsel);
+ so->so_snd.sb_flags |= SB_SEL;
+ }
}
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ SOCKBUF_UNLOCK(&so->so_snd);
}
-
- SOCKBUF_UNLOCK(&so->so_rcv);
- SOCKBUF_UNLOCK(&so->so_snd);
+ SOCK_UNLOCK(so);
return (revents);
}
@@ -3113,28 +3335,38 @@
{
struct socket *so = kn->kn_fp->f_data;
struct sockbuf *sb;
+ struct knlist *knl;
switch (kn->kn_filter) {
case EVFILT_READ:
kn->kn_fop = &soread_filtops;
+ knl = &so->so_rdsel.si_note;
sb = &so->so_rcv;
break;
case EVFILT_WRITE:
kn->kn_fop = &sowrite_filtops;
+ knl = &so->so_wrsel.si_note;
sb = &so->so_snd;
break;
case EVFILT_EMPTY:
kn->kn_fop = &soempty_filtops;
+ knl = &so->so_wrsel.si_note;
sb = &so->so_snd;
break;
default:
return (EINVAL);
}
- SOCKBUF_LOCK(sb);
- knlist_add(&sb->sb_sel.si_note, kn, 1);
- sb->sb_flags |= SB_KNOTE;
- SOCKBUF_UNLOCK(sb);
+ SOCK_LOCK(so);
+ if (SOLISTENING(so)) {
+ knlist_add(knl, kn, 1);
+ } else {
+ SOCKBUF_LOCK(sb);
+ knlist_add(knl, kn, 1);
+ sb->sb_flags |= SB_KNOTE;
+ SOCKBUF_UNLOCK(sb);
+ }
+ SOCK_UNLOCK(so);
return (0);
}
@@ -3313,11 +3545,11 @@
{
struct socket *so = kn->kn_fp->f_data;
- SOCKBUF_LOCK(&so->so_rcv);
- knlist_remove(&so->so_rcv.sb_sel.si_note, kn, 1);
- if (knlist_empty(&so->so_rcv.sb_sel.si_note))
+ so_rdknl_lock(so);
+ knlist_remove(&so->so_rdsel.si_note, kn, 1);
+ if (!SOLISTENING(so) && knlist_empty(&so->so_rdsel.si_note))
so->so_rcv.sb_flags &= ~SB_KNOTE;
- SOCKBUF_UNLOCK(&so->so_rcv);
+ so_rdknl_unlock(so);
}
/*ARGSUSED*/
@@ -3327,11 +3559,13 @@
struct socket *so;
so = kn->kn_fp->f_data;
- if (so->so_options & SO_ACCEPTCONN) {
- kn->kn_data = so->so_qlen;
- return (!TAILQ_EMPTY(&so->so_comp));
+ if (SOLISTENING(so)) {
+ SOCK_LOCK_ASSERT(so);
+ kn->kn_data = so->sol_qlen;
+ return (!TAILQ_EMPTY(&so->sol_comp));
}
+
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
kn->kn_data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl;
@@ -3357,11 +3591,11 @@
{
struct socket *so = kn->kn_fp->f_data;
- SOCKBUF_LOCK(&so->so_snd);
- knlist_remove(&so->so_snd.sb_sel.si_note, kn, 1);
- if (knlist_empty(&so->so_snd.sb_sel.si_note))
+ so_wrknl_lock(so);
+ knlist_remove(&so->so_wrsel.si_note, kn, 1);
+ if (!SOLISTENING(so) && knlist_empty(&so->so_wrsel.si_note))
so->so_snd.sb_flags &= ~SB_KNOTE;
- SOCKBUF_UNLOCK(&so->so_snd);
+ so_wrknl_unlock(so);
}
/*ARGSUSED*/
@@ -3371,6 +3605,10 @@
struct socket *so;
so = kn->kn_fp->f_data;
+
+ if (SOLISTENING(so))
+ return (0);
+
SOCKBUF_LOCK_ASSERT(&so->so_snd);
kn->kn_data = sbspace(&so->so_snd);
@@ -3397,6 +3635,10 @@
struct socket *so;
so = kn->kn_fp->f_data;
+
+ if (SOLISTENING(so))
+ return (1);
+
SOCKBUF_LOCK_ASSERT(&so->so_snd);
kn->kn_data = sbused(&so->so_snd);
@@ -3466,31 +3708,31 @@
int ret;
restart:
- ACCEPT_LOCK();
+ if ((head = so->so_listen) != NULL)
+ SOLISTEN_LOCK(head);
SOCK_LOCK(so);
+ /*
+ * XXXGL: should we re-check so->so_listen?
+ */
so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
so->so_state |= SS_ISCONNECTED;
- head = so->so_head;
- if (head != NULL && (so->so_qstate & SQ_INCOMP)) {
+ if (head != NULL && (so->so_qstate == SQ_INCOMP)) {
if ((so->so_options & SO_ACCEPTFILTER) == 0) {
+ TAILQ_REMOVE(&head->sol_incomp, so, so_list);
+ head->sol_incqlen--;
+ TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list);
+ head->sol_qlen++;
+ so->so_qstate = SQ_COMP;
SOCK_UNLOCK(so);
- TAILQ_REMOVE(&head->so_incomp, so, so_list);
- head->so_incqlen--;
- so->so_qstate &= ~SQ_INCOMP;
- TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
- head->so_qlen++;
- so->so_qstate |= SQ_COMP;
- ACCEPT_UNLOCK();
- sorwakeup(head);
- wakeup_one(&head->so_timeo);
+ solisten_wakeup(head); /* unlocks */
} else {
- ACCEPT_UNLOCK();
+ SOLISTEN_UNLOCK(head);
soupcall_set(so, SO_RCV,
- head->so_accf->so_accept_filter->accf_callback,
- head->so_accf->so_accept_filter_arg);
+ head->sol_accept_filter->accf_callback,
+ head->sol_accept_filter_arg);
so->so_options &= ~SO_ACCEPTFILTER;
- ret = head->so_accf->so_accept_filter->accf_callback(so,
- head->so_accf->so_accept_filter_arg, M_NOWAIT);
+ ret = head->sol_accept_filter->accf_callback(so,
+ head->sol_accept_filter_arg, M_NOWAIT);
if (ret == SU_ISCONNECTED)
soupcall_clear(so, SO_RCV);
SOCK_UNLOCK(so);
@@ -3499,8 +3741,9 @@
}
return;
}
+ if (head != NULL)
+ SOLISTEN_UNLOCK(head);
SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
wakeup(&so->so_timeo);
sorwakeup(so);
sowwakeup(so);
@@ -3510,16 +3753,17 @@
soisdisconnecting(struct socket *so)
{
- /*
- * Note: This code assumes that SOCK_LOCK(so) and
- * SOCKBUF_LOCK(&so->so_rcv) are the same.
- */
- SOCKBUF_LOCK(&so->so_rcv);
+ SOCK_LOCK(so);
so->so_state &= ~SS_ISCONNECTING;
so->so_state |= SS_ISDISCONNECTING;
- socantrcvmore_locked(so);
- SOCKBUF_LOCK(&so->so_snd);
- socantsendmore_locked(so);
+
+ if (!SOLISTENING(so)) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ socantrcvmore_locked(so);
+ SOCKBUF_LOCK(&so->so_snd);
+ socantsendmore_locked(so);
+ }
+ SOCK_UNLOCK(so);
wakeup(&so->so_timeo);
}
@@ -3527,17 +3771,18 @@
soisdisconnected(struct socket *so)
{
- /*
- * Note: This code assumes that SOCK_LOCK(so) and
- * SOCKBUF_LOCK(&so->so_rcv) are the same.
- */
- SOCKBUF_LOCK(&so->so_rcv);
+ SOCK_LOCK(so);
so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
so->so_state |= SS_ISDISCONNECTED;
- socantrcvmore_locked(so);
- SOCKBUF_LOCK(&so->so_snd);
- sbdrop_locked(&so->so_snd, sbused(&so->so_snd));
- socantsendmore_locked(so);
+
+ if (!SOLISTENING(so)) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ socantrcvmore_locked(so);
+ SOCKBUF_LOCK(&so->so_snd);
+ sbdrop_locked(&so->so_snd, sbused(&so->so_snd));
+ socantsendmore_locked(so);
+ }
+ SOCK_UNLOCK(so);
wakeup(&so->so_timeo);
}
@@ -3559,11 +3804,12 @@
* Register per-socket buffer upcalls.
*/
void
-soupcall_set(struct socket *so, int which,
- int (*func)(struct socket *, void *, int), void *arg)
+soupcall_set(struct socket *so, int which, so_upcall_t func, void *arg)
{
struct sockbuf *sb;
+ KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so));
+
switch (which) {
case SO_RCV:
sb = &so->so_rcv;
@@ -3575,10 +3821,6 @@
panic("soupcall_set: bad which");
}
SOCKBUF_LOCK_ASSERT(sb);
-#if 0
- /* XXX: accf_http actually wants to do this on purpose. */
- KASSERT(sb->sb_upcall == NULL, ("soupcall_set: overwriting upcall"));
-#endif
sb->sb_upcall = func;
sb->sb_upcallarg = arg;
sb->sb_flags |= SB_UPCALL;
@@ -3589,6 +3831,8 @@
{
struct sockbuf *sb;
+ KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so));
+
switch (which) {
case SO_RCV:
sb = &so->so_rcv;
@@ -3600,12 +3844,110 @@
panic("soupcall_clear: bad which");
}
SOCKBUF_LOCK_ASSERT(sb);
- KASSERT(sb->sb_upcall != NULL, ("soupcall_clear: no upcall to clear"));
+ KASSERT(sb->sb_upcall != NULL,
+ ("%s: so %p no upcall to clear", __func__, so));
sb->sb_upcall = NULL;
sb->sb_upcallarg = NULL;
sb->sb_flags &= ~SB_UPCALL;
}
+void
+solisten_upcall_set(struct socket *so, so_upcall_t func, void *arg)
+{
+
+ SOLISTEN_LOCK_ASSERT(so);
+ so->sol_upcall = func;
+ so->sol_upcallarg = arg;
+}
+
+static void
+so_rdknl_lock(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_LOCK(so);
+ else
+ SOCKBUF_LOCK(&so->so_rcv);
+}
+
+static void
+so_rdknl_unlock(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_UNLOCK(so);
+ else
+ SOCKBUF_UNLOCK(&so->so_rcv);
+}
+
+static void
+so_rdknl_assert_locked(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_LOCK_ASSERT(so);
+ else
+ SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+}
+
+static void
+so_rdknl_assert_unlocked(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_UNLOCK_ASSERT(so);
+ else
+ SOCKBUF_UNLOCK_ASSERT(&so->so_rcv);
+}
+
+static void
+so_wrknl_lock(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_LOCK(so);
+ else
+ SOCKBUF_LOCK(&so->so_snd);
+}
+
+static void
+so_wrknl_unlock(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_UNLOCK(so);
+ else
+ SOCKBUF_UNLOCK(&so->so_snd);
+}
+
+static void
+so_wrknl_assert_locked(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_LOCK_ASSERT(so);
+ else
+ SOCKBUF_LOCK_ASSERT(&so->so_snd);
+}
+
+static void
+so_wrknl_assert_unlocked(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_UNLOCK_ASSERT(so);
+ else
+ SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
+}
+
/*
* Create an external-format (``xsocket'') structure using the information in
* the kernel-format socket structure pointed to by so. This is done to
@@ -3627,32 +3969,24 @@
xso->so_pcb = so->so_pcb;
xso->xso_protocol = so->so_proto->pr_protocol;
xso->xso_family = so->so_proto->pr_domain->dom_family;
- xso->so_qlen = so->so_qlen;
- xso->so_incqlen = so->so_incqlen;
- xso->so_qlimit = so->so_qlimit;
xso->so_timeo = so->so_timeo;
xso->so_error = so->so_error;
- xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
- xso->so_oobmark = so->so_oobmark;
- sbtoxsockbuf(&so->so_snd, &xso->so_snd);
- sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
xso->so_uid = so->so_cred->cr_uid;
-}
-
-
-/*
- * Socket accessor functions to provide external consumers with
- * a safe interface to socket state
- *
- */
-
-void
-so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *),
- void *arg)
-{
-
- TAILQ_FOREACH(so, &so->so_comp, so_list)
- func(so, arg);
+ xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
+ if (SOLISTENING(so)) {
+ xso->so_qlen = so->sol_qlen;
+ xso->so_incqlen = so->sol_incqlen;
+ xso->so_qlimit = so->sol_qlimit;
+ xso->so_oobmark = 0;
+ bzero(&xso->so_snd, sizeof(xso->so_snd));
+ bzero(&xso->so_rcv, sizeof(xso->so_rcv));
+ } else {
+ xso->so_state |= so->so_qstate;
+ xso->so_qlen = xso->so_incqlen = xso->so_qlimit = 0;
+ xso->so_oobmark = so->so_oobmark;
+ sbtoxsockbuf(&so->so_snd, &xso->so_snd);
+ sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
+ }
}
struct sockbuf *
Index: sys/kern/uipc_syscalls.c
===================================================================
--- sys/kern/uipc_syscalls.c
+++ sys/kern/uipc_syscalls.c
@@ -68,13 +68,6 @@
#include <security/audit/audit.h>
#include <security/mac/mac_framework.h>
-/*
- * Flags for accept1() and kern_accept4(), in addition to SOCK_CLOEXEC
- * and SOCK_NONBLOCK.
- */
-#define ACCEPT4_INHERIT 0x1
-#define ACCEPT4_COMPAT 0x2
-
static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
@@ -350,59 +343,22 @@
(flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0, &fcaps);
if (error != 0)
goto done;
- ACCEPT_LOCK();
- if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
- ACCEPT_UNLOCK();
- error = EWOULDBLOCK;
- goto noconnection;
- }
- while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
- if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
- head->so_error = ECONNABORTED;
- break;
- }
- error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
- "accept", 0);
- if (error != 0) {
- ACCEPT_UNLOCK();
- goto noconnection;
- }
- }
- if (head->so_error) {
- error = head->so_error;
- head->so_error = 0;
- ACCEPT_UNLOCK();
+ SOCK_LOCK(head);
+ if (!SOLISTENING(head)) {
+ SOCK_UNLOCK(head);
+ error = EINVAL;
goto noconnection;
}
- so = TAILQ_FIRST(&head->so_comp);
- KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
- KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
- /*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- */
- SOCK_LOCK(so); /* soref() and so_state update */
- soref(so); /* file descriptor reference */
-
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- if (flags & ACCEPT4_INHERIT)
- so->so_state |= (head->so_state & SS_NBIO);
- else
- so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
-
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
+ error = solisten_dequeue(head, &so, flags);
+ if (error != 0)
+ goto noconnection;
/* An extra reference on `nfp' has been held for us by falloc(). */
td->td_retval[0] = fd;
- /* connection has been removed from the listen queue */
- KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
+ /* Connection has been removed from the listen queue. */
+ KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0);
if (flags & ACCEPT4_INHERIT) {
pgid = fgetown(&head->so_sigio);
@@ -420,7 +376,6 @@
(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
tmp = fflag & FASYNC;
(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
- sa = NULL;
error = soaccept(so, &sa);
if (error != 0)
goto noconnection;
@@ -558,7 +513,7 @@
}
SOCK_LOCK(so);
while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
- error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
+ error = msleep(&so->so_timeo, &so->so_lock, PSOCK | PCATCH,
"connec", 0);
if (error != 0) {
if (error == EINTR || error == ERESTART)
Index: sys/kern/uipc_usrreq.c
===================================================================
--- sys/kern/uipc_usrreq.c
+++ sys/kern/uipc_usrreq.c
@@ -430,7 +430,7 @@
unp->unp_socket = so;
so->so_pcb = unp;
unp->unp_refcount = 1;
- if (so->so_head != NULL)
+ if (so->so_listen != NULL)
unp->unp_flags |= UNP_NASCENT;
UNP_LIST_LOCK();
@@ -552,7 +552,7 @@
UNP_LINK_WLOCK();
UNP_PCB_LOCK(unp);
- VOP_UNP_BIND(vp, unp->unp_socket);
+ VOP_UNP_BIND(vp, unp);
unp->unp_vnode = vp;
unp->unp_addr = soun;
unp->unp_flags &= ~UNP_BINDING;
@@ -607,6 +607,7 @@
uipc_close(struct socket *so)
{
struct unpcb *unp, *unp2;
+ struct vnode *vp = NULL;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
@@ -619,8 +620,14 @@
unp_disconnect(unp, unp2);
UNP_PCB_UNLOCK(unp2);
}
+ if (SOLISTENING(so) && ((vp = unp->unp_vnode) != NULL)) {
+ VOP_UNP_DETACH(vp);
+ unp->unp_vnode = NULL;
+ }
UNP_PCB_UNLOCK(unp);
UNP_LINK_WUNLOCK();
+ if (vp)
+ vrele(vp);
}
static int
@@ -663,16 +670,11 @@
--unp_count;
UNP_LIST_UNLOCK();
- if ((unp->unp_flags & UNP_NASCENT) != 0) {
- UNP_PCB_LOCK(unp);
- goto teardown;
- }
UNP_LINK_WLOCK();
UNP_PCB_LOCK(unp);
+ if ((unp->unp_flags & UNP_NASCENT) != 0)
+ goto teardown;
- /*
- * XXXRW: Should assert vp->v_socket == so.
- */
if ((vp = unp->unp_vnode) != NULL) {
VOP_UNP_DETACH(vp);
unp->unp_vnode = NULL;
@@ -696,8 +698,8 @@
UNP_PCB_UNLOCK(ref);
}
local_unp_rights = unp_rights;
- UNP_LINK_WUNLOCK();
teardown:
+ UNP_LINK_WUNLOCK();
unp->unp_socket->so_pcb = NULL;
saved_unp_addr = unp->unp_addr;
unp->unp_addr = NULL;
@@ -761,7 +763,6 @@
error = solisten_proto_check(so);
if (error == 0) {
cru2x(td->td_ucred, &unp->unp_peercred);
- unp->unp_flags |= UNP_HAVEPCCACHED;
solisten_proto(so, backlog);
}
SOCK_UNLOCK(so);
@@ -1319,7 +1320,7 @@
{
struct sockaddr_un *soun = (struct sockaddr_un *)nam;
struct vnode *vp;
- struct socket *so2, *so3;
+ struct socket *so2;
struct unpcb *unp, *unp2, *unp3;
struct nameidata nd;
char buf[SOCK_MAXADDRLEN];
@@ -1386,31 +1387,30 @@
* and to protect simultaneous locking of multiple pcbs.
*/
UNP_LINK_WLOCK();
- VOP_UNP_CONNECT(vp, &so2);
- if (so2 == NULL) {
+ VOP_UNP_CONNECT(vp, &unp2);
+ if (unp2 == NULL) {
error = ECONNREFUSED;
goto bad2;
}
+ so2 = unp2->unp_socket;
if (so->so_type != so2->so_type) {
error = EPROTOTYPE;
goto bad2;
}
+ UNP_PCB_LOCK(unp);
+ UNP_PCB_LOCK(unp2);
if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
if (so2->so_options & SO_ACCEPTCONN) {
CURVNET_SET(so2->so_vnet);
- so3 = sonewconn(so2, 0);
+ so2 = sonewconn(so2, 0);
CURVNET_RESTORE();
} else
- so3 = NULL;
- if (so3 == NULL) {
+ so2 = NULL;
+ if (so2 == NULL) {
error = ECONNREFUSED;
- goto bad2;
+ goto bad3;
}
- unp = sotounpcb(so);
- unp2 = sotounpcb(so2);
- unp3 = sotounpcb(so3);
- UNP_PCB_LOCK(unp);
- UNP_PCB_LOCK(unp2);
+ unp3 = sotounpcb(so2);
UNP_PCB_LOCK(unp3);
if (unp2->unp_addr != NULL) {
bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
@@ -1431,30 +1431,24 @@
* listen(); uipc_listen() cached that process's credentials
* at that time so we can use them now.
*/
- KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
- ("unp_connect: listener without cached peercred"));
memcpy(&unp->unp_peercred, &unp2->unp_peercred,
sizeof(unp->unp_peercred));
unp->unp_flags |= UNP_HAVEPC;
if (unp2->unp_flags & UNP_WANTCRED)
unp3->unp_flags |= UNP_WANTCRED;
- UNP_PCB_UNLOCK(unp3);
UNP_PCB_UNLOCK(unp2);
- UNP_PCB_UNLOCK(unp);
+ unp2 = unp3;
#ifdef MAC
- mac_socketpeer_set_from_socket(so, so3);
- mac_socketpeer_set_from_socket(so3, so);
+ mac_socketpeer_set_from_socket(so, so2);
+ mac_socketpeer_set_from_socket(so2, so);
#endif
-
- so2 = so3;
}
- unp = sotounpcb(so);
- KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
- unp2 = sotounpcb(so2);
- KASSERT(unp2 != NULL, ("unp_connect: unp2 == NULL"));
- UNP_PCB_LOCK(unp);
- UNP_PCB_LOCK(unp2);
+
+ KASSERT(unp2 != NULL && so2 != NULL && unp2->unp_socket == so2 &&
+ sotounpcb(so2) == unp2,
+ ("%s: unp2 %p so2 %p", __func__, unp2, so2));
error = unp_connect2(so, so2, PRU_CONNECT);
+bad3:
UNP_PCB_UNLOCK(unp2);
UNP_PCB_UNLOCK(unp);
bad2:
@@ -2237,8 +2231,7 @@
static void
unp_gc_process(struct unpcb *unp)
{
- struct socket *soa;
- struct socket *so;
+ struct socket *so, *soa;
struct file *fp;
/* Already processed. */
@@ -2258,28 +2251,30 @@
return;
}
- /*
- * Mark all sockets we reference with RIGHTS.
- */
so = unp->unp_socket;
- if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
- SOCKBUF_LOCK(&so->so_rcv);
- unp_scan(so->so_rcv.sb_mb, unp_accessable);
- SOCKBUF_UNLOCK(&so->so_rcv);
- }
-
- /*
- * Mark all sockets in our accept queue.
- */
- ACCEPT_LOCK();
- TAILQ_FOREACH(soa, &so->so_comp, so_list) {
- if ((sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS) != 0)
- continue;
- SOCKBUF_LOCK(&soa->so_rcv);
- unp_scan(soa->so_rcv.sb_mb, unp_accessable);
- SOCKBUF_UNLOCK(&soa->so_rcv);
+ SOCK_LOCK(so);
+ if (SOLISTENING(so)) {
+ /*
+ * Mark all sockets in our accept queue.
+ */
+ TAILQ_FOREACH(soa, &so->sol_comp, so_list) {
+ if (sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS)
+ continue;
+ SOCKBUF_LOCK(&soa->so_rcv);
+ unp_scan(soa->so_rcv.sb_mb, unp_accessable);
+ SOCKBUF_UNLOCK(&soa->so_rcv);
+ }
+ } else {
+ /*
+ * Mark all sockets we reference with RIGHTS.
+ */
+ if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ unp_scan(so->so_rcv.sb_mb, unp_accessable);
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ }
}
- ACCEPT_UNLOCK();
+ SOCK_UNLOCK(so);
unp->unp_gcflag |= UNPGC_SCANNED;
}
@@ -2399,7 +2394,8 @@
UNP_LIST_LOCK();
unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
UNP_LIST_UNLOCK();
- unp_dispose_mbuf(so->so_rcv.sb_mb);
+ if (!SOLISTENING(so))
+ unp_dispose_mbuf(so->so_rcv.sb_mb);
}
static void
@@ -2454,7 +2450,6 @@
void
vfs_unp_reclaim(struct vnode *vp)
{
- struct socket *so;
struct unpcb *unp;
int active;
@@ -2464,10 +2459,7 @@
active = 0;
UNP_LINK_WLOCK();
- VOP_UNP_CONNECT(vp, &so);
- if (so == NULL)
- goto done;
- unp = sotounpcb(so);
+ VOP_UNP_CONNECT(vp, &unp);
if (unp == NULL)
goto done;
UNP_PCB_LOCK(unp);
@@ -2503,10 +2495,6 @@
db_printf("%sUNP_HAVEPC", comma ? ", " : "");
comma = 1;
}
- if (unp_flags & UNP_HAVEPCCACHED) {
- db_printf("%sUNP_HAVEPCCACHED", comma ? ", " : "");
- comma = 1;
- }
if (unp_flags & UNP_WANTCRED) {
db_printf("%sUNP_WANTCRED", comma ? ", " : "");
comma = 1;
Index: sys/kern/vfs_default.c
===================================================================
--- sys/kern/vfs_default.c
+++ sys/kern/vfs_default.c
@@ -1128,7 +1128,7 @@
vop_stdunp_bind(struct vop_unp_bind_args *ap)
{
- ap->a_vp->v_socket = ap->a_socket;
+ ap->a_vp->v_unpcb = ap->a_unpcb;
return (0);
}
@@ -1136,7 +1136,7 @@
vop_stdunp_connect(struct vop_unp_connect_args *ap)
{
- *ap->a_socket = ap->a_vp->v_socket;
+ *ap->a_unpcb = ap->a_vp->v_unpcb;
return (0);
}
@@ -1144,7 +1144,7 @@
vop_stdunp_detach(struct vop_unp_detach_args *ap)
{
- ap->a_vp->v_socket = NULL;
+ ap->a_vp->v_unpcb = NULL;
return (0);
}
Index: sys/kern/vfs_subr.c
===================================================================
--- sys/kern/vfs_subr.c
+++ sys/kern/vfs_subr.c
@@ -2994,7 +2994,10 @@
/* XXX Elsewhere we detect an already freed vnode via NULL v_op. */
vp->v_op = NULL;
#endif
- bzero(&vp->v_un, sizeof(vp->v_un));
+ vp->v_mountedhere = NULL;
+ vp->v_unpcb = NULL;
+ vp->v_rdev = NULL;
+ vp->v_fifoinfo = NULL;
vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
vp->v_iflag = 0;
vp->v_vflag = 0;
Index: sys/kern/vnode_if.src
===================================================================
--- sys/kern/vnode_if.src
+++ sys/kern/vnode_if.src
@@ -662,7 +662,7 @@
vop_unp_bind {
IN struct vnode *vp;
- IN struct socket *socket;
+ IN struct unpcb *unpcb;
};
@@ -670,7 +670,7 @@
vop_unp_connect {
IN struct vnode *vp;
- OUT struct socket **socket;
+ OUT struct unpcb **unpcb;
};
Index: sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
===================================================================
--- sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
+++ sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
@@ -614,21 +614,13 @@
pcb = ng_btsocket_l2cap_pcb_by_addr(&rt->src, ip->psm);
if (pcb != NULL) {
- struct socket *so1 = NULL;
+ struct socket *so1;
mtx_lock(&pcb->pcb_mtx);
- /*
- * First check the pending connections queue and if we have
- * space then create new socket and set proper source address.
- */
-
- if (pcb->so->so_qlen <= pcb->so->so_qlimit) {
- CURVNET_SET(pcb->so->so_vnet);
- so1 = sonewconn(pcb->so, 0);
- CURVNET_RESTORE();
- }
-
+ CURVNET_SET(pcb->so->so_vnet);
+ so1 = sonewconn(pcb->so, 0);
+ CURVNET_RESTORE();
if (so1 == NULL) {
result = NG_L2CAP_NO_RESOURCES;
goto respond;
Index: sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
===================================================================
--- sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
+++ sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
@@ -1149,7 +1149,7 @@
{
ng_btsocket_rfcomm_pcb_p pcb = NULL, pcb1 = NULL;
ng_btsocket_l2cap_pcb_p l2pcb = NULL;
- struct socket *so1 = NULL;
+ struct socket *so1;
mtx_assert(&s->session_mtx, MA_OWNED);
@@ -1171,11 +1171,9 @@
mtx_lock(&pcb->pcb_mtx);
- if (pcb->so->so_qlen <= pcb->so->so_qlimit) {
- CURVNET_SET(pcb->so->so_vnet);
- so1 = sonewconn(pcb->so, 0);
- CURVNET_RESTORE();
- }
+ CURVNET_SET(pcb->so->so_vnet);
+ so1 = sonewconn(pcb->so, 0);
+ CURVNET_RESTORE();
mtx_unlock(&pcb->pcb_mtx);
@@ -1405,47 +1403,25 @@
static int
ng_btsocket_rfcomm_session_accept(ng_btsocket_rfcomm_session_p s0)
{
- struct socket *l2so = NULL;
+ struct socket *l2so;
struct sockaddr_l2cap *l2sa = NULL;
ng_btsocket_l2cap_pcb_t *l2pcb = NULL;
ng_btsocket_rfcomm_session_p s = NULL;
- int error = 0;
+ int error;
mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED);
mtx_assert(&s0->session_mtx, MA_OWNED);
- /* Check if there is a complete L2CAP connection in the queue */
- if ((error = s0->l2so->so_error) != 0) {
+ SOLISTEN_LOCK(s0->l2so);
+ error = solisten_dequeue(s0->l2so, &l2so, 0);
+ if (error == EWOULDBLOCK)
+ return (error);
+ if (error) {
NG_BTSOCKET_RFCOMM_ERR(
"%s: Could not accept connection on L2CAP socket, error=%d\n", __func__, error);
- s0->l2so->so_error = 0;
-
return (error);
}
- ACCEPT_LOCK();
- if (TAILQ_EMPTY(&s0->l2so->so_comp)) {
- ACCEPT_UNLOCK();
- if (s0->l2so->so_rcv.sb_state & SBS_CANTRCVMORE)
- return (ECONNABORTED);
- return (EWOULDBLOCK);
- }
-
- /* Accept incoming L2CAP connection */
- l2so = TAILQ_FIRST(&s0->l2so->so_comp);
- if (l2so == NULL)
- panic("%s: l2so == NULL\n", __func__);
-
- TAILQ_REMOVE(&s0->l2so->so_comp, l2so, so_list);
- s0->l2so->so_qlen --;
- l2so->so_qstate &= ~SQ_COMP;
- l2so->so_head = NULL;
- SOCK_LOCK(l2so);
- soref(l2so);
- l2so->so_state |= SS_NBIO;
- SOCK_UNLOCK(l2so);
- ACCEPT_UNLOCK();
-
error = soaccept(l2so, (struct sockaddr **) &l2sa);
if (error != 0) {
NG_BTSOCKET_RFCOMM_ERR(
Index: sys/netgraph/bluetooth/socket/ng_btsocket_sco.c
===================================================================
--- sys/netgraph/bluetooth/socket/ng_btsocket_sco.c
+++ sys/netgraph/bluetooth/socket/ng_btsocket_sco.c
@@ -471,20 +471,13 @@
pcb = ng_btsocket_sco_pcb_by_addr(&rt->src);
if (pcb != NULL) {
- struct socket *so1 = NULL;
+ struct socket *so1;
/* pcb is locked */
- /*
- * First check the pending connections queue and if we have
- * space then create new socket and set proper source address.
- */
-
- if (pcb->so->so_qlen <= pcb->so->so_qlimit) {
- CURVNET_SET(pcb->so->so_vnet);
- so1 = sonewconn(pcb->so, 0);
- CURVNET_RESTORE();
- }
+ CURVNET_SET(pcb->so->so_vnet);
+ so1 = sonewconn(pcb->so, 0);
+ CURVNET_RESTORE();
if (so1 == NULL) {
status = 0x0d; /* Rejected due to limited resources */
Index: sys/netgraph/ng_ksocket.c
===================================================================
--- sys/netgraph/ng_ksocket.c
+++ sys/netgraph/ng_ksocket.c
@@ -153,8 +153,7 @@
};
/* Helper functions */
-static int ng_ksocket_check_accept(priv_p);
-static void ng_ksocket_finish_accept(priv_p);
+static int ng_ksocket_accept(priv_p);
static int ng_ksocket_incoming(struct socket *so, void *arg, int waitflag);
static int ng_ksocket_parse(const struct ng_ksocket_alias *aliases,
const char *s, int family);
@@ -698,6 +697,7 @@
ERROUT(ENXIO);
/* Listen */
+ so->so_state |= SS_NBIO;
error = solisten(so, *((int32_t *)msg->data), td);
break;
}
@@ -716,21 +716,16 @@
if (priv->flags & KSF_ACCEPTING)
ERROUT(EALREADY);
- error = ng_ksocket_check_accept(priv);
- if (error != 0 && error != EWOULDBLOCK)
- ERROUT(error);
-
/*
* If a connection is already complete, take it.
* Otherwise let the upcall function deal with
* the connection when it comes in.
*/
+ error = ng_ksocket_accept(priv);
+ if (error != 0 && error != EWOULDBLOCK)
+ ERROUT(error);
priv->response_token = msg->header.token;
raddr = priv->response_addr = NGI_RETADDR(item);
- if (error == 0) {
- ng_ksocket_finish_accept(priv);
- } else
- priv->flags |= KSF_ACCEPTING;
break;
}
@@ -1068,13 +1063,8 @@
}
/* Check whether a pending accept operation has completed */
- if (priv->flags & KSF_ACCEPTING) {
- error = ng_ksocket_check_accept(priv);
- if (error != EWOULDBLOCK)
- priv->flags &= ~KSF_ACCEPTING;
- if (error == 0)
- ng_ksocket_finish_accept(priv);
- }
+ if (priv->flags & KSF_ACCEPTING)
+ (void )ng_ksocket_accept(priv);
/*
* If we don't have a hook, we must handle data events later. When
@@ -1171,35 +1161,8 @@
}
}
-/*
- * Check for a completed incoming connection and return 0 if one is found.
- * Otherwise return the appropriate error code.
- */
static int
-ng_ksocket_check_accept(priv_p priv)
-{
- struct socket *const head = priv->so;
- int error;
-
- if ((error = head->so_error) != 0) {
- head->so_error = 0;
- return error;
- }
- /* Unlocked read. */
- if (TAILQ_EMPTY(&head->so_comp)) {
- if (head->so_rcv.sb_state & SBS_CANTRCVMORE)
- return ECONNABORTED;
- return EWOULDBLOCK;
- }
- return 0;
-}
-
-/*
- * Handle the first completed incoming connection, assumed to be already
- * on the socket's so_comp queue.
- */
-static void
-ng_ksocket_finish_accept(priv_p priv)
+ng_ksocket_accept(priv_p priv)
{
struct socket *const head = priv->so;
struct socket *so;
@@ -1211,23 +1174,15 @@
int len;
int error;
- ACCEPT_LOCK();
- so = TAILQ_FIRST(&head->so_comp);
- if (so == NULL) { /* Should never happen */
- ACCEPT_UNLOCK();
- return;
+ SOLISTEN_LOCK(head);
+ error = solisten_dequeue(head, &so, SOCK_NONBLOCK);
+ if (error == EWOULDBLOCK) {
+ priv->flags |= KSF_ACCEPTING;
+ return (error);
}
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
- SOCK_LOCK(so);
- soref(so);
- so->so_state |= SS_NBIO;
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
-
- /* XXX KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); */
+ priv->flags &= ~KSF_ACCEPTING;
+ if (error)
+ return (error);
soaccept(so, &sa);
@@ -1288,6 +1243,8 @@
out:
if (sa != NULL)
free(sa, M_SONAME);
+
+ return (0);
}
/*
Index: sys/netinet/sctp_input.c
===================================================================
--- sys/netinet/sctp_input.c
+++ sys/netinet/sctp_input.c
@@ -161,13 +161,11 @@
*abort_no_unlock = 1;
goto outnow;
}
- /* We are only accepting if we have a socket with positive
- * so_qlimit. */
+ /* We are only accepting if we have a listening socket. */
if ((stcb == NULL) &&
((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
(inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
- (inp->sctp_socket == NULL) ||
- (inp->sctp_socket->so_qlimit == 0))) {
+ (!SCTP_IS_LISTENING(inp)))) {
/*
* FIX ME ?? What about TCP model and we have a
* match/restart case? Actually no fix is needed. the lookup
@@ -1605,8 +1603,7 @@
sctp_stop_all_cookie_timers(stcb);
if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
- (inp->sctp_socket->so_qlimit == 0)
- ) {
+ (!SCTP_IS_LISTENING(inp))) {
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -1806,7 +1803,7 @@
if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
- (inp->sctp_socket->so_qlimit == 0)) {
+ (!SCTP_IS_LISTENING(inp))) {
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
struct socket *so;
#endif
@@ -2317,7 +2314,7 @@
*notification = SCTP_NOTIFY_ASSOC_UP;
if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
- (inp->sctp_socket->so_qlimit == 0)) {
+ (!SCTP_IS_LISTENING(inp))) {
/*
* This is an endpoint that called connect() how it got a
* cookie that is NEW is a bit of a mystery. It must be that
@@ -2343,7 +2340,7 @@
SCTP_SOCKET_UNLOCK(so, 1);
#endif
} else if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
- (inp->sctp_socket->so_qlimit)) {
+ (SCTP_IS_LISTENING(inp))) {
/*
* We don't want to do anything with this one. Since it is
* the listening guy. The timer will get started for
@@ -5204,8 +5201,9 @@
* closed. We opened and bound.. and are now no
* longer listening.
*/
-
- if ((stcb == NULL) && (inp->sctp_socket->so_qlen >= inp->sctp_socket->so_qlimit)) {
+ if ((stcb == NULL) &&
+ (!SCTP_IS_LISTENING(inp) ||
+ inp->sctp_socket->sol_qlen >= inp->sctp_socket->sol_qlimit)) {
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
(SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit))) {
op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
Index: sys/netinet/sctp_os_bsd.h
===================================================================
--- sys/netinet/sctp_os_bsd.h
+++ sys/netinet/sctp_os_bsd.h
@@ -462,8 +462,6 @@
#define SCTP_SHA256_UPDATE SHA256_Update
#define SCTP_SHA256_FINAL(x,y) SHA256_Final((caddr_t)x, y)
-#endif
-
#define SCTP_DECREMENT_AND_CHECK_REFCOUNT(addr) (atomic_fetchadd_int(addr, -1) == 1)
#if defined(INVARIANTS)
#define SCTP_SAVE_ATOMIC_DECREMENT(addr, val) \
@@ -484,3 +482,7 @@
} \
}
#endif
+
+#define SCTP_IS_LISTENING(inp) ((inp->sctp_flags & SCTP_PCB_FLAGS_ACCEPTING) != 0)
+
+#endif
Index: sys/netinet/sctp_output.c
===================================================================
--- sys/netinet/sctp_output.c
+++ sys/netinet/sctp_output.c
@@ -12595,7 +12595,7 @@
(void *)addr,
sndlen);
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
- (inp->sctp_socket->so_qlimit)) {
+ SCTP_IS_LISTENING(inp)) {
/* The listener can NOT send */
SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOTCONN);
error = ENOTCONN;
Index: sys/netinet/sctp_pcb.c
===================================================================
--- sys/netinet/sctp_pcb.c
+++ sys/netinet/sctp_pcb.c
@@ -1311,7 +1311,7 @@
* it is the acceptor, then do the special_lookup to hash
* and find the real inp.
*/
- if ((inp->sctp_socket) && (inp->sctp_socket->so_qlimit)) {
+ if ((inp->sctp_socket) && SCTP_IS_LISTENING(inp)) {
/* to is peer addr, from is my addr */
stcb = sctp_tcb_special_locate(inp_p, remote, local,
netp, inp->def_vrf_id);
@@ -1884,7 +1884,7 @@
if (tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
continue;
}
- if (tinp->sctp_socket->so_qlimit) {
+ if (SCTP_IS_LISTENING(tinp)) {
continue;
}
SCTP_INP_WLOCK(tinp);
@@ -4856,7 +4856,7 @@
inp->sctp_flags &= ~SCTP_PCB_FLAGS_CONNECTED;
inp->sctp_flags |= SCTP_PCB_FLAGS_WAS_CONNECTED;
if (so) {
- SOCK_LOCK(so);
+ SOCKBUF_LOCK(&so->so_rcv);
if (so->so_rcv.sb_cc == 0) {
so->so_state &= ~(SS_ISCONNECTING |
SS_ISDISCONNECTING |
Index: sys/netinet/sctp_syscalls.c
===================================================================
--- sys/netinet/sctp_syscalls.c
+++ sys/netinet/sctp_syscalls.c
@@ -152,29 +152,11 @@
td->td_retval[0] = fd;
CURVNET_SET(head->so_vnet);
- so = sonewconn(head, SS_ISCONNECTED);
+ so = sopeeloff(head);
if (so == NULL) {
error = ENOMEM;
goto noconnection;
}
- /*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- */
- SOCK_LOCK(so);
- soref(so); /* file descriptor reference */
- SOCK_UNLOCK(so);
-
- ACCEPT_LOCK();
-
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- so->so_state |= (head->so_state & SS_NBIO);
- so->so_state &= ~SS_NOFDREF;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
- ACCEPT_UNLOCK();
finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
if (error != 0)
Index: sys/netinet/sctp_sysctl.c
===================================================================
--- sys/netinet/sctp_sysctl.c
+++ sys/netinet/sctp_sysctl.c
@@ -410,16 +410,17 @@
xinpcb.socket = inp->sctp_socket;
so = inp->sctp_socket;
if ((so == NULL) ||
+ (!SCTP_IS_LISTENING(inp)) ||
(inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
xinpcb.qlen = 0;
xinpcb.maxqlen = 0;
} else {
- xinpcb.qlen = so->so_qlen;
- xinpcb.qlen_old = so->so_qlen > USHRT_MAX ?
- USHRT_MAX : (uint16_t)so->so_qlen;
- xinpcb.maxqlen = so->so_qlimit;
- xinpcb.maxqlen_old = so->so_qlimit > USHRT_MAX ?
- USHRT_MAX : (uint16_t)so->so_qlimit;
+ xinpcb.qlen = so->sol_qlen;
+ xinpcb.qlen_old = so->sol_qlen > USHRT_MAX ?
+ USHRT_MAX : (uint16_t)so->sol_qlen;
+ xinpcb.maxqlen = so->sol_qlimit;
+ xinpcb.maxqlen_old = so->sol_qlimit > USHRT_MAX ?
+ USHRT_MAX : (uint16_t)so->sol_qlimit;
}
SCTP_INP_INCR_REF(inp);
SCTP_INP_RUNLOCK(inp);
Index: sys/netinet/sctp_usrreq.c
===================================================================
--- sys/netinet/sctp_usrreq.c
+++ sys/netinet/sctp_usrreq.c
@@ -7039,7 +7039,7 @@
if (tinp && (tinp != inp) &&
((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) == 0) &&
((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
- (tinp->sctp_socket->so_qlimit)) {
+ (SCTP_IS_LISTENING(tinp))) {
/*
* we have a listener already and
* its not this inp.
@@ -7083,7 +7083,7 @@
if (tinp && (tinp != inp) &&
((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) == 0) &&
((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
- (tinp->sctp_socket->so_qlimit)) {
+ (SCTP_IS_LISTENING(tinp))) {
/*
* we have a listener already and its not
* this inp.
@@ -7137,18 +7137,14 @@
return (error);
}
}
- SOCK_LOCK(so);
- /* It appears for 7.0 and on, we must always call this. */
- solisten_proto(so, backlog);
- if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
- /* remove the ACCEPTCONN flag for one-to-many sockets */
- so->so_options &= ~SO_ACCEPTCONN;
- }
- if (backlog == 0) {
- /* turning off listen */
- so->so_options &= ~SO_ACCEPTCONN;
+ SCTP_INP_WLOCK(inp);
+ if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) == 0) {
+ SOCK_LOCK(so);
+ solisten_proto(so, backlog);
+ SOCK_UNLOCK(so);
}
- SOCK_UNLOCK(so);
+ inp->sctp_flags |= SCTP_PCB_FLAGS_ACCEPTING;
+ SCTP_INP_WUNLOCK(inp);
return (error);
}
Index: sys/netinet/sctputil.c
===================================================================
--- sys/netinet/sctputil.c
+++ sys/netinet/sctputil.c
@@ -2792,6 +2792,7 @@
stcb->sctp_socket->so_error = ECONNABORTED;
}
}
+ SOCK_UNLOCK(stcb->sctp_socket);
}
/* Wake ANY sleepers */
#if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
@@ -2811,7 +2812,7 @@
if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
(stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
((state == SCTP_COMM_LOST) || (state == SCTP_CANT_STR_ASSOC))) {
- socantrcvmore_locked(stcb->sctp_socket);
+ socantrcvmore(stcb->sctp_socket);
}
sorwakeup(stcb->sctp_socket);
sowwakeup(stcb->sctp_socket);
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -1387,10 +1387,10 @@
TCP_PROBE3(debug__input, tp, th, m);
tcp_dooptions(&to, optp, optlen, TO_SYN);
#ifdef TCP_RFC7413
- if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL))
+ if (syncache_add(&inc, &to, th, inp, so, m, NULL, NULL))
goto tfo_socket_result;
#else
- syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
+ syncache_add(&inc, &to, th, inp, so, m, NULL, NULL);
#endif
/*
* Entry added to syncache and mbuf consumed.
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -1576,7 +1576,6 @@
("tcp_close: !SS_PROTOREF"));
inp->inp_flags &= ~INP_SOCKREF;
INP_WUNLOCK(inp);
- ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_state &= ~SS_PROTOREF;
sofree(so);
Index: sys/netinet/tcp_syncache.h
===================================================================
--- sys/netinet/tcp_syncache.h
+++ sys/netinet/tcp_syncache.h
@@ -42,7 +42,7 @@
int syncache_expand(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct socket **, struct mbuf *);
int syncache_add(struct in_conninfo *, struct tcpopt *,
- struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *,
+ struct tcphdr *, struct inpcb *, struct socket *, struct mbuf *,
void *, void *);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *);
void syncache_badack(struct in_conninfo *);
Index: sys/netinet/tcp_syncache.c
===================================================================
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -146,9 +146,9 @@
struct socket *);
static void syncookie_reseed(void *);
#ifdef INVARIANTS
-static int syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
- struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
- struct socket *lso);
+static int syncookie_cmp(struct in_conninfo *, struct syncache_head *,
+ struct syncache *, struct tcphdr *, struct tcpopt *,
+ struct socket *);
#endif
/*
@@ -1175,10 +1175,11 @@
}
#ifdef TCP_RFC7413
-static void
-syncache_tfo_expand(struct syncache *sc, struct socket **lsop, struct mbuf *m,
+static struct socket *
+syncache_tfo_expand(struct syncache *sc, struct socket *lso, struct mbuf *m,
uint64_t response_cookie)
{
+ struct socket *so;
struct inpcb *inp;
struct tcpcb *tp;
unsigned int *pending_counter;
@@ -1190,12 +1191,12 @@
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending;
- *lsop = syncache_socket(sc, *lsop, m);
- if (*lsop == NULL) {
+ so = syncache_socket(sc, lso, m);
+ if (so == NULL) {
TCPSTAT_INC(tcps_sc_aborted);
atomic_subtract_int(pending_counter, 1);
} else {
- inp = sotoinpcb(*lsop);
+ inp = sotoinpcb(so);
tp = intotcpcb(inp);
tp->t_flags |= TF_FASTOPEN;
tp->t_tfo_cookie = response_cookie;
@@ -1204,6 +1205,7 @@
tp->t_tfo_pending = pending_counter;
TCPSTAT_INC(tcps_sc_completed);
}
+ return (so);
}
#endif /* TCP_RFC7413 */
@@ -1228,11 +1230,10 @@
*/
int
syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
- struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
+ struct inpcb *inp, struct socket *so, struct mbuf *m, void *tod,
void *todctx)
{
struct tcpcb *tp;
- struct socket *so;
struct syncache *sc = NULL;
struct syncache_head *sch;
struct mbuf *ipopts = NULL;
@@ -1263,7 +1264,7 @@
* Combine all so/tp operations very early to drop the INP lock as
* soon as possible.
*/
- so = *lsop;
+ KASSERT(SOLISTENING(so), ("%s: %p not listening", __func__, so));
tp = sototcpcb(so);
cred = crhold(so->so_cred);
@@ -1274,7 +1275,7 @@
#endif
ip_ttl = inp->inp_ip_ttl;
ip_tos = inp->inp_ip_tos;
- win = sbspace(&so->so_rcv);
+ win = so->sol_sbrcv_hiwat;
ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE));
#ifdef TCP_RFC7413
@@ -1287,7 +1288,7 @@
* listen queue with bogus TFO connections.
*/
if (atomic_fetchadd_int(tp->t_tfo_pending, 1) <=
- (so->so_qlimit / 2)) {
+ (so->sol_qlimit / 2)) {
int result;
result = tcp_fastopen_check_cookie(inc,
@@ -1583,10 +1584,7 @@
}
done:
- if (m) {
- *lsop = NULL;
- m_freem(m);
- }
+ m_freem(m);
#ifdef TCP_RFC7413
/*
* If tfo_pending is not NULL here, then a TFO SYN that did not
@@ -2115,7 +2113,7 @@
sc->sc_flags |= SCF_WINSCALE;
}
- wnd = sbspace(&lso->so_rcv);
+ wnd = lso->sol_sbrcv_hiwat;
wnd = imax(wnd, 0);
wnd = imin(wnd, TCP_MAXWIN);
sc->sc_wnd = wnd;
Index: sys/netinet/tcp_timewait.c
===================================================================
--- sys/netinet/tcp_timewait.c
+++ sys/netinet/tcp_timewait.c
@@ -352,7 +352,6 @@
("tcp_twstart: !SS_PROTOREF"));
inp->inp_flags &= ~INP_SOCKREF;
INP_WUNLOCK(inp);
- ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_state &= ~SS_PROTOREF;
sofree(so);
@@ -491,7 +490,6 @@
if (inp->inp_flags & INP_SOCKREF) {
inp->inp_flags &= ~INP_SOCKREF;
INP_WUNLOCK(inp);
- ACCEPT_LOCK();
SOCK_LOCK(so);
KASSERT(so->so_state & SS_PROTOREF,
("tcp_twclose: INP_SOCKREF && !SS_PROTOREF"));
Index: sys/netinet/toecore.c
===================================================================
--- sys/netinet/toecore.c
+++ sys/netinet/toecore.c
@@ -331,7 +331,7 @@
INP_WLOCK_ASSERT(inp);
- syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx);
+ syncache_add(inc, to, th, inp, lso, NULL, tod, todctx);
}
int
Index: sys/ofed/drivers/infiniband/core/iwcm.c
===================================================================
--- sys/ofed/drivers/infiniband/core/iwcm.c
+++ sys/ofed/drivers/infiniband/core/iwcm.c
@@ -416,34 +416,19 @@
{
struct socket *so;
struct sockaddr_in *remote;
+ int error;
- ACCEPT_LOCK();
- so = TAILQ_FIRST(&head->so_comp);
- if (!so) {
- ACCEPT_UNLOCK();
- return NULL;
- }
-
- SOCK_LOCK(so);
- /*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- */
- soref(so);
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
- so->so_state |= SS_NBIO;
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
+ SOLISTEN_LOCK(head);
+ error = solisten_dequeue(head, &so, SOCK_NONBLOCK);
+ if (error == EWOULDBLOCK)
+ return (NULL);
remote = NULL;
soaccept(so, (struct sockaddr **)&remote);
free(remote, M_SONAME);
return so;
}
+
static void
iw_so_event_handler(struct work_struct *_work)
{
@@ -485,18 +470,17 @@
#endif
return;
}
+
static int
iw_so_upcall(struct socket *parent_so, void *arg, int waitflag)
{
struct iwcm_listen_work *work;
- struct socket *so;
struct iw_cm_id *cm_id = arg;
/* check whether iw_so_event_handler() already dequeued this 'so' */
- so = TAILQ_FIRST(&parent_so->so_comp);
- if (!so)
+ if (TAILQ_EMPTY(&parent_so->sol_comp))
return SU_OK;
- work = kzalloc(sizeof(*work), M_NOWAIT);
+ work = kzalloc(sizeof(*work), waitflag);
if (!work)
return -ENOMEM;
work->cm_id = cm_id;
@@ -507,17 +491,21 @@
return SU_OK;
}
-static void
-iw_init_sock(struct iw_cm_id *cm_id)
+static int
+iw_create_listen(struct iw_cm_id *cm_id, int backlog)
{
struct sockopt sopt;
struct socket *so = cm_id->so;
int on = 1;
+ int rc;
- SOCK_LOCK(so);
- soupcall_set(so, SO_RCV, iw_so_upcall, cm_id);
+ rc = -solisten(cm_id->so, backlog, curthread);
+ if (rc != 0)
+ return (rc);
+ SOLISTEN_LOCK(so);
+ solisten_upcall_set(so, iw_so_upcall, cm_id);
so->so_state |= SS_NBIO;
- SOCK_UNLOCK(so);
+ SOLISTEN_UNLOCK(so);
sopt.sopt_dir = SOPT_SET;
sopt.sopt_level = IPPROTO_TCP;
sopt.sopt_name = TCP_NODELAY;
@@ -525,37 +513,18 @@
sopt.sopt_valsize = sizeof(on);
sopt.sopt_td = NULL;
sosetopt(so, &sopt);
-}
-
-static int
-iw_uninit_socket(struct iw_cm_id *cm_id)
-{
- struct socket *so = cm_id->so;
-
- SOCK_LOCK(so);
- soupcall_clear(so, SO_RCV);
- SOCK_UNLOCK(so);
-
return (0);
}
static int
-iw_create_listen(struct iw_cm_id *cm_id, int backlog)
-{
- int rc;
-
- iw_init_sock(cm_id);
- rc = -solisten(cm_id->so, backlog, curthread);
- if (rc != 0)
- iw_uninit_socket(cm_id);
- return (rc);
-}
-
-static int
iw_destroy_listen(struct iw_cm_id *cm_id)
{
+ struct socket *so = cm_id->so;
- return (iw_uninit_socket(cm_id));
+ SOLISTEN_LOCK(so);
+ solisten_upcall_set(so, NULL, NULL);
+ SOLISTEN_UNLOCK(so);
+ return (0);
}
Index: sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
===================================================================
--- sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
+++ sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
@@ -310,7 +310,6 @@
("sdp_closed: !SS_PROTOREF"));
ssk->flags &= ~SDP_SOCKREF;
SDP_WUNLOCK(ssk);
- ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_state &= ~SS_PROTOREF;
sofree(so);
Index: sys/rpc/svc_vc.c
===================================================================
--- sys/rpc/svc_vc.c
+++ sys/rpc/svc_vc.c
@@ -96,6 +96,7 @@
struct sockaddr *raddr);
static int svc_vc_accept(struct socket *head, struct socket **sop);
static int svc_vc_soupcall(struct socket *so, void *arg, int waitflag);
+static int svc_vc_rendezvous_soupcall(struct socket *, void *, int);
static struct xp_ops svc_vc_rendezvous_ops = {
.xp_recv = svc_vc_rendezvous_recv,
@@ -183,10 +184,10 @@
solisten(so, -1, curthread);
- SOCKBUF_LOCK(&so->so_rcv);
+ SOLISTEN_LOCK(so);
xprt->xp_upcallset = 1;
- soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt);
- SOCKBUF_UNLOCK(&so->so_rcv);
+ solisten_upcall_set(so, svc_vc_rendezvous_soupcall, xprt);
+ SOLISTEN_UNLOCK(so);
return (xprt);
@@ -316,9 +317,11 @@
int
svc_vc_accept(struct socket *head, struct socket **sop)
{
- int error = 0;
struct socket *so;
+ int error = 0;
+ short nbio;
+ /* XXXGL: shouldn't that be an assertion? */
if ((head->so_options & SO_ACCEPTCONN) == 0) {
error = EINVAL;
goto done;
@@ -328,38 +331,26 @@
if (error != 0)
goto done;
#endif
- ACCEPT_LOCK();
- if (TAILQ_EMPTY(&head->so_comp)) {
- ACCEPT_UNLOCK();
- error = EWOULDBLOCK;
- goto done;
- }
- so = TAILQ_FIRST(&head->so_comp);
- KASSERT(!(so->so_qstate & SQ_INCOMP), ("svc_vc_accept: so SQ_INCOMP"));
- KASSERT(so->so_qstate & SQ_COMP, ("svc_vc_accept: so not SQ_COMP"));
-
/*
- * Before changing the flags on the socket, we have to bump the
- * reference count. Otherwise, if the protocol calls sofree(),
- * the socket will be released due to a zero refcount.
- * XXX might not need soref() since this is simpler than kern_accept.
+ * XXXGL: we want non-blocking semantics. The socket could be a
+ * socket created by kernel as well as socket shared with userland,
+ * so we can't be sure about presense of SS_NBIO. We also shall not
+ * toggle it on the socket, since that may surprise userland. So we
+ * set SS_NBIO only temporarily.
*/
- SOCK_LOCK(so); /* soref() and so_state update */
- soref(so); /* file descriptor reference */
-
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- so->so_state |= (head->so_state & SS_NBIO);
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
-
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
+ SOLISTEN_LOCK(head);
+ nbio = head->so_state & SS_NBIO;
+ head->so_state |= SS_NBIO;
+ error = solisten_dequeue(head, &so, 0);
+ head->so_state &= (nbio & ~SS_NBIO);
+ if (error)
+ goto done;
+ so->so_state |= nbio;
*sop = so;
/* connection has been removed from the listen queue */
- KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
+ KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0);
done:
return (error);
}
@@ -392,21 +383,21 @@
* connection arrives after our call to accept fails
* with EWOULDBLOCK.
*/
- ACCEPT_LOCK();
- if (TAILQ_EMPTY(&xprt->xp_socket->so_comp))
+ SOLISTEN_LOCK(xprt->xp_socket);
+ if (TAILQ_EMPTY(&xprt->xp_socket->sol_comp))
xprt_inactive_self(xprt);
- ACCEPT_UNLOCK();
+ SOLISTEN_UNLOCK(xprt->xp_socket);
sx_xunlock(&xprt->xp_lock);
return (FALSE);
}
if (error) {
- SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
+ SOLISTEN_LOCK(xprt->xp_socket);
if (xprt->xp_upcallset) {
xprt->xp_upcallset = 0;
soupcall_clear(xprt->xp_socket, SO_RCV);
}
- SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
+ SOLISTEN_UNLOCK(xprt->xp_socket);
xprt_inactive_self(xprt);
sx_xunlock(&xprt->xp_lock);
return (FALSE);
@@ -453,12 +444,6 @@
static void
svc_vc_destroy_common(SVCXPRT *xprt)
{
- SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
- if (xprt->xp_upcallset) {
- xprt->xp_upcallset = 0;
- soupcall_clear(xprt->xp_socket, SO_RCV);
- }
- SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
if (xprt->xp_socket)
(void)soclose(xprt->xp_socket);
@@ -472,6 +457,13 @@
svc_vc_rendezvous_destroy(SVCXPRT *xprt)
{
+ SOLISTEN_LOCK(xprt->xp_socket);
+ if (xprt->xp_upcallset) {
+ xprt->xp_upcallset = 0;
+ solisten_upcall_set(xprt->xp_socket, NULL, NULL);
+ }
+ SOLISTEN_UNLOCK(xprt->xp_socket);
+
svc_vc_destroy_common(xprt);
}
@@ -480,6 +472,13 @@
{
struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
+ SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
+ if (xprt->xp_upcallset) {
+ xprt->xp_upcallset = 0;
+ soupcall_clear(xprt->xp_socket, SO_RCV);
+ }
+ SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
+
svc_vc_destroy_common(xprt);
if (cd->mreq)
@@ -958,6 +957,16 @@
return (SU_OK);
}
+static int
+svc_vc_rendezvous_soupcall(struct socket *head, void *arg, int waitflag)
+{
+ SVCXPRT *xprt = (SVCXPRT *) arg;
+
+ if (!TAILQ_EMPTY(&head->sol_comp))
+ xprt_active(xprt);
+ return (SU_OK);
+}
+
#if 0
/*
* Get the effective UID of the sending process. Used by rpcbind, keyserv
Index: sys/sys/sockbuf.h
===================================================================
--- sys/sys/sockbuf.h
+++ sys/sys/sockbuf.h
@@ -32,7 +32,6 @@
*/
#ifndef _SYS_SOCKBUF_H_
#define _SYS_SOCKBUF_H_
-#include <sys/selinfo.h> /* for struct selinfo */
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/_sx.h>
@@ -64,6 +63,7 @@
struct sockaddr;
struct socket;
struct thread;
+struct selinfo;
struct xsockbuf {
u_int sb_cc;
@@ -84,9 +84,9 @@
* (a) locked by SOCKBUF_LOCK().
*/
struct sockbuf {
- struct selinfo sb_sel; /* process selecting read/write */
- struct mtx sb_mtx; /* sockbuf lock */
- struct sx sb_sx; /* prevent I/O interlacing */
+ struct mtx sb_mtx; /* sockbuf lock */
+ struct sx sb_sx; /* prevent I/O interlacing */
+ struct selinfo *sb_sel; /* process selecting read/write */
short sb_state; /* (a) socket state on sockbuf */
#define sb_startzero sb_mb
struct mbuf *sb_mb; /* (a) the mbuf chain */
Index: sys/sys/socket.h
===================================================================
--- sys/sys/socket.h
+++ sys/sys/socket.h
@@ -111,7 +111,15 @@
*/
#define SOCK_CLOEXEC 0x10000000
#define SOCK_NONBLOCK 0x20000000
-#endif
+#ifdef _KERNEL
+/*
+ * Flags for accept1(), kern_accept4() and solisten_dequeue, in addition
+ * to SOCK_CLOEXEC and SOCK_NONBLOCK.
+ */
+#define ACCEPT4_INHERIT 0x1
+#define ACCEPT4_COMPAT 0x2
+#endif /* _KERNEL */
+#endif /* __BSD_VISIBLE */
/*
* Option flags per-socket.
@@ -704,9 +712,5 @@
void so_lock(struct socket *so);
void so_unlock(struct socket *so);
-void so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg);
-
-#endif
-
-
+#endif /* _KERNEL */
#endif /* !_SYS_SOCKET_H_ */
Index: sys/sys/socketvar.h
===================================================================
--- sys/sys/socketvar.h
+++ sys/sys/socketvar.h
@@ -55,7 +55,9 @@
* handle on protocol and pointer to protocol
* private data and error information.
*/
-typedef u_quad_t so_gen_t;
+typedef uint64_t so_gen_t;
+typedef int so_upcall_t(struct socket *, void *, int);
+
struct socket;
@@ -63,60 +65,35 @@
* Locking key to struct socket:
* (a) constant after allocation, no locking required.
* (b) locked by SOCK_LOCK(so).
- * (c) locked by SOCKBUF_LOCK(&so->so_rcv).
- * (e) locked by ACCEPT_LOCK().
+ * (cr) locked by SOCKBUF_LOCK(&so->so_rcv).
+ * (cs) locked by SOCKBUF_LOCK(&so->so_rcv).
+ * (e) locked by SOLISTEN_LOCK() of corresponding listening socket.
* (f) not locked since integer reads/writes are atomic.
* (g) used only as a sleep/wakeup address, no value.
* (h) locked by global mutex so_global_mtx.
*/
+TAILQ_HEAD(accept_queue, socket);
struct socket {
- int so_count; /* (b) reference count */
+ struct mtx so_lock;
+ volatile u_int so_count; /* (b / refcount) */
+ struct selinfo so_rdsel; /* (b/cr) for so_rcv/so_comp */
+ struct selinfo so_wrsel; /* (b/cs) for so_snd */
short so_type; /* (a) generic type, see socket.h */
- short so_options; /* from socket call, see socket.h */
- short so_linger; /* time to linger while closing */
+ short so_options; /* (b) from socket call, see socket.h */
+ short so_linger; /* time to linger close(2) */
short so_state; /* (b) internal state flags SS_* */
- int so_qstate; /* (e) internal state flags SQ_* */
void *so_pcb; /* protocol control block */
struct vnet *so_vnet; /* (a) network stack instance */
struct protosw *so_proto; /* (a) protocol handle */
-/*
- * Variables for connection queuing.
- * Socket where accepts occur is so_head in all subsidiary sockets.
- * If so_head is 0, socket is not related to an accept.
- * For head socket so_incomp queues partially completed connections,
- * while so_comp is a queue of connections ready to be accepted.
- * If a connection is aborted and it has so_head set, then
- * it has to be pulled out of either so_incomp or so_comp.
- * We allow connections to queue up based on current queue lengths
- * and limit on number of queued connections for this socket.
- */
- struct socket *so_head; /* (e) back pointer to listen socket */
- TAILQ_HEAD(, socket) so_incomp; /* (e) queue of partial unaccepted connections */
- TAILQ_HEAD(, socket) so_comp; /* (e) queue of complete unaccepted connections */
- TAILQ_ENTRY(socket) so_list; /* (e) list of unaccepted connections */
- u_int so_qlen; /* (e) number of unaccepted connections */
- u_int so_incqlen; /* (e) number of unaccepted incomplete
- connections */
- u_int so_qlimit; /* (e) max number queued connections */
short so_timeo; /* (g) connection timeout */
u_short so_error; /* (f) error affecting connection */
struct sigio *so_sigio; /* [sg] information for async I/O or
out of band data (SIGURG) */
- u_long so_oobmark; /* (c) chars to oob mark */
-
- struct sockbuf so_rcv, so_snd;
-
struct ucred *so_cred; /* (a) user credentials */
struct label *so_label; /* (b) MAC label for socket */
- struct label *so_peerlabel; /* (b) cached MAC label for peer */
/* NB: generation count must not be first. */
so_gen_t so_gencnt; /* (h) generation count */
void *so_emuldata; /* (b) private data for emulators */
- struct so_accf {
- struct accept_filter *so_accept_filter;
- void *so_accept_filter_arg; /* saved filter args */
- char *so_accept_filter_str; /* saved user args */
- } *so_accf;
struct osd osd; /* Object Specific extensions */
/*
* so_fibnum, so_user_cookie and friends can be used to attach
@@ -129,39 +106,92 @@
int so_ts_clock; /* type of the clock used for timestamps */
uint32_t so_max_pacing_rate; /* (f) TX rate limit in bytes/s */
-
- void *so_pspare[2]; /* general use */
- int so_ispare[2]; /* general use */
+ union {
+ /* Regular (data flow) socket. */
+ struct {
+ /* (cr, cs) Receive and send buffers. */
+ struct sockbuf so_rcv, so_snd;
+
+ /* (e) Our place on accept queue. */
+ TAILQ_ENTRY(socket) so_list;
+ struct socket *so_listen; /* (b) */
+ enum {
+ SQ_NONE = 0,
+ SQ_INCOMP = 0x0800, /* on sol_incomp */
+ SQ_COMP = 0x1000, /* on sol_comp */
+ } so_qstate; /* (b) */
+
+ /* (b) cached MAC label for peer */
+ struct label *so_peerlabel;
+ u_long so_oobmark; /* chars to oob mark */
+ };
+ /*
+ * Listening socket, where accepts occur, is so_listen in all
+ * subsidiary sockets. If so_listen is NULL, socket is not
+ * related to an accept. For a listening socket itself
+ * sol_incomp queues partially completed connections, while
+ * sol_comp is a queue of connections ready to be accepted.
+ * If a connection is aborted and it has so_listen set, then
+ * it has to be pulled out of either sol_incomp or sol_comp.
+ * We allow connections to queue up based on current queue
+ * lengths and limit on number of queued connections for this
+ * socket.
+ */
+ struct {
+ /* (e) queue of partial unaccepted connections */
+ struct accept_queue sol_incomp;
+ /* (e) queue of complete unaccepted connections */
+ struct accept_queue sol_comp;
+ u_int sol_qlen; /* (e) sol_comp length */
+ u_int sol_incqlen; /* (e) sol_incomp length */
+ u_int sol_qlimit; /* (e) queue limit */
+
+ /* accept_filter(9) optional data */
+ struct accept_filter *sol_accept_filter;
+ void *sol_accept_filter_arg; /* saved filter args */
+ char *sol_accept_filter_str; /* saved user args */
+
+ /* Optional upcall, for kernel socket. */
+ so_upcall_t *sol_upcall; /* (e) */
+ void *sol_upcallarg; /* (e) */
+
+ /* Socket buffer parameters, to be copied to
+ * dataflow sockets, accepted from this one. */
+ int sol_sbrcv_lowat;
+ int sol_sbsnd_lowat;
+ u_int sol_sbrcv_hiwat;
+ u_int sol_sbsnd_hiwat;
+ short sol_sbrcv_flags;
+ short sol_sbsnd_flags;
+ sbintime_t sol_sbrcv_timeo;
+ sbintime_t sol_sbsnd_timeo;
+ };
+ };
};
-/*
- * Global accept mutex to serialize access to accept queues and
- * fields associated with multiple sockets. This allows us to
- * avoid defining a lock order between listen and accept sockets
- * until such time as it proves to be a good idea.
- */
-extern struct mtx accept_mtx;
-#define ACCEPT_LOCK_ASSERT() mtx_assert(&accept_mtx, MA_OWNED)
-#define ACCEPT_UNLOCK_ASSERT() mtx_assert(&accept_mtx, MA_NOTOWNED)
-#define ACCEPT_LOCK() mtx_lock(&accept_mtx)
-#define ACCEPT_UNLOCK() mtx_unlock(&accept_mtx)
-
-/*
- * Per-socket mutex: we reuse the receive socket buffer mutex for space
- * efficiency. This decision should probably be revisited as we optimize
- * locking for the socket code.
- */
-#define SOCK_MTX(_so) SOCKBUF_MTX(&(_so)->so_rcv)
-#define SOCK_LOCK(_so) SOCKBUF_LOCK(&(_so)->so_rcv)
-#define SOCK_OWNED(_so) SOCKBUF_OWNED(&(_so)->so_rcv)
-#define SOCK_UNLOCK(_so) SOCKBUF_UNLOCK(&(_so)->so_rcv)
-#define SOCK_LOCK_ASSERT(_so) SOCKBUF_LOCK_ASSERT(&(_so)->so_rcv)
-
-/*
- * Socket state bits stored in so_qstate.
- */
-#define SQ_INCOMP 0x0800 /* unaccepted, incomplete connection */
-#define SQ_COMP 0x1000 /* unaccepted, complete connection */
+#define SOCK_MTX(so) &(so)->so_lock
+#define SOCK_LOCK(so) mtx_lock(&(so)->so_lock)
+#define SOCK_OWNED(so) mtx_owned(&(so)->so_lock)
+#define SOCK_UNLOCK(so) mtx_unlock(&(so)->so_lock)
+#define SOCK_LOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_OWNED)
+#define SOCK_UNLOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_NOTOWNED)
+
+#define SOLISTENING(sol) (((sol)->so_options & SO_ACCEPTCONN) != 0)
+#define SOLISTEN_LOCK(sol) do { \
+ mtx_lock(&(sol)->so_lock); \
+ KASSERT(SOLISTENING(sol), \
+ ("%s: %p not listening", __func__, (sol))); \
+} while (0)
+#define SOLISTEN_UNLOCK(sol) do { \
+ KASSERT(SOLISTENING(sol), \
+ ("%s: %p not listening", __func__, (sol))); \
+ mtx_unlock(&(sol)->so_lock); \
+} while (0)
+#define SOLISTEN_LOCK_ASSERT(sol) do { \
+ mtx_assert(&(sol)->so_lock, MA_OWNED); \
+ KASSERT(SOLISTENING(sol), \
+ ("%s: %p not listening", __func__, (sol))); \
+} while (0)
/*
* Externalized form of struct socket used by the sysctl(3) interface.
@@ -212,8 +242,7 @@
/* can we read something from so? */
#define soreadabledata(so) \
- (sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || \
- !TAILQ_EMPTY(&(so)->so_comp) || (so)->so_error)
+ (sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || (so)->so_error)
#define soreadable(so) \
(soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE))
@@ -226,26 +255,19 @@
(so)->so_error)
/*
- * soref()/sorele() ref-count the socket structure. Note that you must
- * still explicitly close the socket, but the last ref count will free
- * the structure.
+ * soref()/sorele() ref-count the socket structure.
+ * soref() may be called without owning socket lock, but in that case a
+ * caller must own something that holds socket, and so_count must be not 0.
+ * Note that you must still explicitly close the socket, but the last ref
+ * count will free the structure.
*/
-#define soref(so) do { \
- SOCK_LOCK_ASSERT(so); \
- ++(so)->so_count; \
-} while (0)
-
+#define soref(so) refcount_acquire(&(so)->so_count)
#define sorele(so) do { \
- ACCEPT_LOCK_ASSERT(); \
SOCK_LOCK_ASSERT(so); \
- if ((so)->so_count <= 0) \
- panic("sorele"); \
- if (--(so)->so_count == 0) \
+ if (refcount_release(&(so)->so_count)) \
sofree(so); \
- else { \
+ else \
SOCK_UNLOCK(so); \
- ACCEPT_UNLOCK(); \
- } \
} while (0)
/*
@@ -368,10 +390,11 @@
int solisten(struct socket *so, int backlog, struct thread *td);
void solisten_proto(struct socket *so, int backlog);
int solisten_proto_check(struct socket *so);
+int solisten_dequeue(struct socket *, struct socket **, int);
struct socket *
sonewconn(struct socket *head, int connstatus);
-
-
+struct socket *
+ sopeeloff(struct socket *);
int sopoll(struct socket *so, int events, struct ucred *active_cred,
struct thread *td);
int sopoll_generic(struct socket *so, int events,
@@ -403,6 +426,7 @@
void soupcall_clear(struct socket *so, int which);
void soupcall_set(struct socket *so, int which,
int (*func)(struct socket *, void *, int), void *arg);
+void solisten_upcall_set(struct socket *, so_upcall_t, void *);
void sowakeup(struct socket *so, struct sockbuf *sb);
void sowakeup_aio(struct socket *so, struct sockbuf *sb);
int selsocket(struct socket *so, int events, struct timeval *tv,
Index: sys/sys/sockopt.h
===================================================================
--- sys/sys/sockopt.h
+++ sys/sys/sockopt.h
@@ -64,8 +64,8 @@
int soopt_getm(struct sockopt *sopt, struct mbuf **mp);
int soopt_mcopyin(struct sockopt *sopt, struct mbuf *m);
int soopt_mcopyout(struct sockopt *sopt, struct mbuf *m);
-int do_getopt_accept_filter(struct socket *so, struct sockopt *sopt);
-int do_setopt_accept_filter(struct socket *so, struct sockopt *sopt);
+int accept_filt_getopt(struct socket *, struct sockopt *);
+int accept_filt_setopt(struct socket *, struct sockopt *);
int so_setsockopt(struct socket *so, int level, int optname,
void *optval, size_t optlen);
Index: sys/sys/unpcb.h
===================================================================
--- sys/sys/unpcb.h
+++ sys/sys/unpcb.h
@@ -92,14 +92,8 @@
* and is really the credentials of the connected peer. This is used
* to determine whether the contents should be sent to the user or
* not.
- *
- * UNP_HAVEPCCACHED - indicates that the unp_peercred member is filled
- * in, but does *not* contain the credentials of the connected peer
- * (there may not even be a peer). This is set in unp_listen() when
- * it fills in unp_peercred for later consumption by unp_connect().
*/
#define UNP_HAVEPC 0x001
-#define UNP_HAVEPCCACHED 0x002
#define UNP_WANTCRED 0x004 /* credentials wanted */
#define UNP_CONNWAIT 0x008 /* connect blocks until accepted */
Index: sys/sys/vnode.h
===================================================================
--- sys/sys/vnode.h
+++ sys/sys/vnode.h
@@ -112,14 +112,13 @@
/*
* Type specific fields, only one applies to any given vnode.
- * See #defines below for renaming to v_* namespace.
*/
union {
- struct mount *vu_mount; /* v ptr to mountpoint (VDIR) */
- struct socket *vu_socket; /* v unix domain net (VSOCK) */
- struct cdev *vu_cdev; /* v device (VCHR, VBLK) */
- struct fifoinfo *vu_fifoinfo; /* v fifo (VFIFO) */
- } v_un;
+ struct mount *v_mountedhere; /* v ptr to mountpoint (VDIR) */
+ struct unpcb *v_unpcb; /* v unix domain net (VSOCK) */
+ struct cdev *v_rdev; /* v device (VCHR, VBLK) */
+ struct fifoinfo *v_fifoinfo; /* v fifo (VFIFO) */
+ };
/*
* vfs_hash: (mount + inode) -> vnode hash. The hash value
@@ -175,11 +174,6 @@
#endif /* defined(_KERNEL) || defined(_KVM_VNODE) */
-#define v_mountedhere v_un.vu_mount
-#define v_socket v_un.vu_socket
-#define v_rdev v_un.vu_cdev
-#define v_fifoinfo v_un.vu_fifoinfo
-
#define bo2vnode(bo) __containerof((bo), struct vnode, v_bufobj)
/* XXX: These are temporary to avoid a source sweep at this time */
@@ -200,7 +194,7 @@
long xv_numoutput; /* num of writes in progress */
enum vtype xv_type; /* vnode type */
union {
- void *xvu_socket; /* socket, if VSOCK */
+ void *xvu_socket; /* unpcb, if VSOCK */
void *xvu_fifo; /* fifo, if VFIFO */
dev_t xvu_rdev; /* maj/min, if VBLK/VCHR */
struct {
Index: usr.bin/netstat/inet.c
===================================================================
--- usr.bin/netstat/inet.c
+++ usr.bin/netstat/inet.c
@@ -170,14 +170,17 @@
if (kread((uintptr_t)proto.pr_domain, &domain, sizeof(domain)) != 0)
return (-1);
xso->xso_family = domain.dom_family;
- xso->so_qlen = so->so_qlen;
- xso->so_incqlen = so->so_incqlen;
- xso->so_qlimit = so->so_qlimit;
xso->so_timeo = so->so_timeo;
xso->so_error = so->so_error;
- xso->so_oobmark = so->so_oobmark;
- sbtoxsockbuf(&so->so_snd, &xso->so_snd);
- sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
+ if (SOLISTENING(so)) {
+ xso->so_qlen = so->sol_qlen;
+ xso->so_incqlen = so->sol_incqlen;
+ xso->so_qlimit = so->sol_qlimit;
+ } else {
+ sbtoxsockbuf(&so->so_snd, &xso->so_snd);
+ sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
+ xso->so_oobmark = so->so_oobmark;
+ }
return (0);
}

File Metadata

Mime Type
text/plain
Expires
Sat, May 16, 9:54 AM (22 h, 41 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33125785
Default Alt Text
D9770.id29118.diff (104 KB)

Event Timeline