Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F140065430
D9770.id25629.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
70 KB
Referenced Files
None
Subscribers
None
D9770.id25629.diff
View Options
Index: sys/kern/sys_socket.c
===================================================================
--- sys/kern/sys_socket.c
+++ sys/kern/sys_socket.c
@@ -170,32 +170,36 @@
break;
case FIOASYNC:
- /*
- * XXXRW: This code separately acquires SOCK_LOCK(so) and
- * SOCKBUF_LOCK(&so->so_rcv) even though they are the same
- * mutex to avoid introducing the assumption that they are
- * the same.
- */
if (*(int *)data) {
SOCK_LOCK(so);
so->so_state |= SS_ASYNC;
+ if (SOLISTENING(so)) {
+ so->sol_sbrcv_flags |= SB_ASYNC;
+ so->sol_sbsnd_flags |= SB_ASYNC;
+ } else {
+ SOCKBUF_LOCK(&so->so_rcv);
+ so->so_rcv.sb_flags |= SB_ASYNC;
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ SOCKBUF_LOCK(&so->so_snd);
+ so->so_snd.sb_flags |= SB_ASYNC;
+ SOCKBUF_UNLOCK(&so->so_snd);
+ }
SOCK_UNLOCK(so);
- SOCKBUF_LOCK(&so->so_rcv);
- so->so_rcv.sb_flags |= SB_ASYNC;
- SOCKBUF_UNLOCK(&so->so_rcv);
- SOCKBUF_LOCK(&so->so_snd);
- so->so_snd.sb_flags |= SB_ASYNC;
- SOCKBUF_UNLOCK(&so->so_snd);
} else {
SOCK_LOCK(so);
so->so_state &= ~SS_ASYNC;
+ if (SOLISTENING(so)) {
+ so->sol_sbrcv_flags &= ~SB_ASYNC;
+ so->sol_sbsnd_flags &= ~SB_ASYNC;
+ } else {
+ SOCKBUF_LOCK(&so->so_rcv);
+ so->so_rcv.sb_flags &= ~SB_ASYNC;
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ SOCKBUF_LOCK(&so->so_snd);
+ so->so_snd.sb_flags &= ~SB_ASYNC;
+ SOCKBUF_UNLOCK(&so->so_snd);
+ }
SOCK_UNLOCK(so);
- SOCKBUF_LOCK(&so->so_rcv);
- so->so_rcv.sb_flags &= ~SB_ASYNC;
- SOCKBUF_UNLOCK(&so->so_rcv);
- SOCKBUF_LOCK(&so->so_snd);
- so->so_snd.sb_flags &= ~SB_ASYNC;
- SOCKBUF_UNLOCK(&so->so_snd);
}
break;
@@ -695,7 +699,6 @@
sb->sb_flags &= ~SB_AIO_RUNNING;
SOCKBUF_UNLOCK(sb);
- ACCEPT_LOCK();
SOCK_LOCK(so);
sorele(so);
}
Index: sys/kern/uipc_accf.c
===================================================================
--- sys/kern/uipc_accf.c
+++ sys/kern/uipc_accf.c
@@ -162,26 +162,25 @@
}
int
-do_getopt_accept_filter(struct socket *so, struct sockopt *sopt)
+accept_filt_getopt(struct socket *so, struct sockopt *sopt)
{
struct accept_filter_arg *afap;
int error;
error = 0;
- afap = malloc(sizeof(*afap), M_TEMP,
- M_WAITOK | M_ZERO);
+ afap = malloc(sizeof(*afap), M_TEMP, M_WAITOK | M_ZERO);
SOCK_LOCK(so);
if ((so->so_options & SO_ACCEPTCONN) == 0) {
error = EINVAL;
goto out;
}
- if ((so->so_options & SO_ACCEPTFILTER) == 0) {
+ if (so->sol_accept_filter == NULL) {
error = EINVAL;
goto out;
}
- strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name);
- if (so->so_accf->so_accept_filter_str != NULL)
- strcpy(afap->af_arg, so->so_accf->so_accept_filter_str);
+ strcpy(afap->af_name, so->sol_accept_filter->accf_name);
+ if (so->sol_accept_filter_str != NULL)
+ strcpy(afap->af_arg, so->sol_accept_filter_str);
out:
SOCK_UNLOCK(so);
if (error == 0)
@@ -191,12 +190,13 @@
}
int
-do_setopt_accept_filter(struct socket *so, struct sockopt *sopt)
+accept_filt_setopt(struct socket *so, struct sockopt *sopt)
{
struct accept_filter_arg *afap;
struct accept_filter *afp;
- struct so_accf *newaf;
- int error = 0;
+ char *accept_filter_str = NULL;
+ void *accept_filter_arg = NULL;
+ int error;
/*
* Handle the simple delete case first.
@@ -207,18 +207,15 @@
SOCK_UNLOCK(so);
return (EINVAL);
}
- if (so->so_accf != NULL) {
- struct so_accf *af = so->so_accf;
- if (af->so_accept_filter != NULL &&
- af->so_accept_filter->accf_destroy != NULL) {
- af->so_accept_filter->accf_destroy(so);
- }
- if (af->so_accept_filter_str != NULL)
- free(af->so_accept_filter_str, M_ACCF);
- free(af, M_ACCF);
- so->so_accf = NULL;
+ if (so->sol_accept_filter != NULL) {
+ if (so->sol_accept_filter->accf_destroy != NULL)
+ so->sol_accept_filter->accf_destroy(so);
+ if (so->sol_accept_filter_str != NULL)
+ free(so->sol_accept_filter_str, M_ACCF);
+ so->sol_accept_filter = NULL;
+ so->sol_accept_filter_arg = NULL;
+ so->sol_accept_filter_str = NULL;
}
- so->so_options &= ~SO_ACCEPTFILTER;
SOCK_UNLOCK(so);
return (0);
}
@@ -227,8 +224,7 @@
* Pre-allocate any memory we may need later to avoid blocking at
* untimely moments. This does not optimize for invalid arguments.
*/
- afap = malloc(sizeof(*afap), M_TEMP,
- M_WAITOK);
+ afap = malloc(sizeof(*afap), M_TEMP, M_WAITOK);
error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap);
afap->af_name[sizeof(afap->af_name)-1] = '\0';
afap->af_arg[sizeof(afap->af_arg)-1] = '\0';
@@ -241,19 +237,10 @@
free(afap, M_TEMP);
return (ENOENT);
}
- /*
- * Allocate the new accept filter instance storage. We may
- * have to free it again later if we fail to attach it. If
- * attached properly, 'newaf' is NULLed to avoid a free()
- * while in use.
- */
- newaf = malloc(sizeof(*newaf), M_ACCF, M_WAITOK |
- M_ZERO);
if (afp->accf_create != NULL && afap->af_name[0] != '\0') {
size_t len = strlen(afap->af_name) + 1;
- newaf->so_accept_filter_str = malloc(len, M_ACCF,
- M_WAITOK);
- strcpy(newaf->so_accept_filter_str, afap->af_name);
+ accept_filter_str = malloc(len, M_ACCF, M_WAITOK);
+ strcpy(accept_filter_str, afap->af_name);
}
/*
@@ -261,8 +248,8 @@
* without first removing it.
*/
SOCK_LOCK(so);
- if (((so->so_options & SO_ACCEPTCONN) == 0) ||
- (so->so_accf != NULL)) {
+ if ((so->so_options & SO_ACCEPTCONN) == 0 ||
+ so->sol_accept_filter != NULL) {
error = EINVAL;
goto out;
}
@@ -273,25 +260,19 @@
* can't block.
*/
if (afp->accf_create != NULL) {
- newaf->so_accept_filter_arg =
- afp->accf_create(so, afap->af_arg);
- if (newaf->so_accept_filter_arg == NULL) {
+ accept_filter_arg = afp->accf_create(so, afap->af_arg);
+ if (accept_filter_arg == NULL) {
error = EINVAL;
goto out;
}
}
- newaf->so_accept_filter = afp;
- so->so_accf = newaf;
- so->so_options |= SO_ACCEPTFILTER;
- newaf = NULL;
+ so->sol_accept_filter = afp;
+ so->sol_accept_filter_arg = accept_filter_arg;
+ so->sol_accept_filter_str = accept_filter_str;
out:
SOCK_UNLOCK(so);
- if (newaf != NULL) {
- if (newaf->so_accept_filter_str != NULL)
- free(newaf->so_accept_filter_str, M_ACCF);
- free(newaf, M_ACCF);
- }
- if (afap != NULL)
- free(afap, M_TEMP);
+ if (accept_filter_str != NULL)
+ free(accept_filter_str, M_ACCF);
+ free(afap, M_TEMP);
return (error);
}
Index: sys/kern/uipc_debug.c
===================================================================
--- sys/kern/uipc_debug.c
+++ sys/kern/uipc_debug.c
@@ -448,8 +448,6 @@
db_printf(")\n");
db_print_indent(indent);
- db_printf("so_qstate: 0x%x (", so->so_qstate);
- db_print_soqstate(so->so_qstate);
db_printf(") ");
db_printf("so_pcb: %p ", so->so_pcb);
db_printf("so_proto: %p\n", so->so_proto);
@@ -458,24 +456,28 @@
db_print_protosw(so->so_proto, "so_proto", indent);
db_print_indent(indent);
- db_printf("so_head: %p ", so->so_head);
- db_printf("so_incomp first: %p ", TAILQ_FIRST(&so->so_incomp));
- db_printf("so_comp first: %p\n", TAILQ_FIRST(&so->so_comp));
-
- db_print_indent(indent);
- /* so_list skipped */
- db_printf("so_qlen: %u ", so->so_qlen);
- db_printf("so_incqlen: %u ", so->so_incqlen);
- db_printf("so_qlimit: %u ", so->so_qlimit);
- db_printf("so_timeo: %d ", so->so_timeo);
- db_printf("so_error: %d\n", so->so_error);
-
- db_print_indent(indent);
- db_printf("so_sigio: %p ", so->so_sigio);
- db_printf("so_oobmark: %lu ", so->so_oobmark);
-
- db_print_sockbuf(&so->so_rcv, "so_rcv", indent);
- db_print_sockbuf(&so->so_snd, "so_snd", indent);
+ if (so->so_options & SO_ACCEPTCONN) {
+ db_printf("sol_incomp first: %p ",
+ TAILQ_FIRST(&so->sol_incomp));
+ db_printf("sol_comp first: %p\n", TAILQ_FIRST(&so->sol_comp));
+ db_printf("sol_qlen: %d ", so->sol_qlen);
+ db_printf("sol_incqlen: %d ", so->sol_incqlen);
+ db_printf("sol_qlimit: %d ", so->sol_qlimit);
+ } else {
+ db_printf("so_qstate: 0x%x (", so->so_qstate);
+ db_print_soqstate(so->so_qstate);
+ db_printf("so_listen: %p ", so->so_listen);
+ /* so_list skipped */
+ db_printf("so_timeo: %d ", so->so_timeo);
+ db_printf("so_error: %d\n", so->so_error);
+
+ db_print_indent(indent);
+ db_printf("so_sigio: %p ", so->so_sigio);
+ db_printf("so_oobmark: %lu ", so->so_oobmark);
+
+ db_print_sockbuf(&so->so_rcv, "so_rcv", indent);
+ db_print_sockbuf(&so->so_snd, "so_snd", indent);
+ }
}
DB_SHOW_COMMAND(socket, db_show_socket)
Index: sys/kern/uipc_sockbuf.c
===================================================================
--- sys/kern/uipc_sockbuf.c
+++ sys/kern/uipc_sockbuf.c
@@ -314,14 +314,14 @@
SOCKBUF_LOCK_ASSERT(sb);
- selwakeuppri(&sb->sb_sel, PSOCK);
- if (!SEL_WAITING(&sb->sb_sel))
+ selwakeuppri(sb->sb_sel, PSOCK);
+ if (!SEL_WAITING(sb->sb_sel))
sb->sb_flags &= ~SB_SEL;
if (sb->sb_flags & SB_WAIT) {
sb->sb_flags &= ~SB_WAIT;
wakeup(&sb->sb_acc);
}
- KNOTE_LOCKED(&sb->sb_sel.si_note, 0);
+ KNOTE_LOCKED(&sb->sb_sel->si_note, 0);
if (sb->sb_upcall != NULL) {
ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT);
if (ret == SU_ISCONNECTED) {
Index: sys/kern/uipc_socket.c
===================================================================
--- sys/kern/uipc_socket.c
+++ sys/kern/uipc_socket.c
@@ -106,6 +106,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_compat.h"
+#include "opt_sctp.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -154,13 +155,21 @@
static int soreceive_rcvoob(struct socket *so, struct uio *uio,
int flags);
+static void so_rdknl_lock(void *);
+static void so_rdknl_unlock(void *);
+static void so_rdknl_assert_locked(void *);
+static void so_rdknl_assert_unlocked(void *);
+static void so_wrknl_lock(void *);
+static void so_wrknl_unlock(void *);
+static void so_wrknl_assert_locked(void *);
+static void so_wrknl_assert_unlocked(void *);
static void filt_sordetach(struct knote *kn);
static int filt_soread(struct knote *kn, long hint);
static void filt_sowdetach(struct knote *kn);
static int filt_sowrite(struct knote *kn, long hint);
-static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id);
static int filt_soempty(struct knote *kn, long hint);
+static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id);
fo_kqfilter_t soo_kqfilter;
static struct filterops soread_filtops = {
@@ -393,8 +402,16 @@
return (NULL);
}
+ /*
+ * The socket locking protocol allows to lock 2 sockets at a time,
+ * however, the first one must be a listening socket. WITNESS lacks
+ * a feature to change class of an existing lock, so we use DUPOK.
+ */
+ mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK);
SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
+ so->so_rcv.sb_sel = &so->so_rdsel;
+ so->so_snd.sb_sel = &so->so_wrsel;
sx_init(&so->so_snd.sb_sx, "so_snd_sx");
sx_init(&so->so_rcv.sb_sx, "so_rcv_sx");
TAILQ_INIT(&so->so_snd.sb_aiojobq);
@@ -450,9 +467,6 @@
if (so->so_snd.sb_hiwat)
(void)chgsbsize(so->so_cred->cr_uidinfo,
&so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
- /* remove accept filter if one is present. */
- if (so->so_accf != NULL)
- do_setopt_accept_filter(so, NULL);
#ifdef MAC
mac_socket_destroy(so);
#endif
@@ -460,10 +474,16 @@
crfree(so->so_cred);
khelp_destroy_osd(&so->osd);
- sx_destroy(&so->so_snd.sb_sx);
- sx_destroy(&so->so_rcv.sb_sx);
- SOCKBUF_LOCK_DESTROY(&so->so_snd);
- SOCKBUF_LOCK_DESTROY(&so->so_rcv);
+ if (SOLISTENING(so)) {
+ if (so->sol_accept_filter != NULL)
+ accept_filt_setopt(so, NULL);
+ } else {
+ sx_destroy(&so->so_snd.sb_sx);
+ sx_destroy(&so->so_rcv.sb_sx);
+ SOCKBUF_LOCK_DESTROY(&so->so_snd);
+ SOCKBUF_LOCK_DESTROY(&so->so_rcv);
+ }
+ mtx_destroy(&so->so_lock);
uma_zfree(socket_zone, so);
}
@@ -506,8 +526,6 @@
if (so == NULL)
return (ENOBUFS);
- TAILQ_INIT(&so->so_incomp);
- TAILQ_INIT(&so->so_comp);
so->so_type = type;
so->so_cred = crhold(cred);
if ((prp->pr_domain->dom_family == PF_INET) ||
@@ -520,9 +538,10 @@
#ifdef MAC
mac_socket_create(cred, so);
#endif
- knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
- knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
- so->so_count = 1;
+ knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
+ so_rdknl_assert_locked, so_rdknl_assert_unlocked);
+ knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
+ so_wrknl_assert_locked, so_wrknl_assert_unlocked);
/*
* Auto-sizing of socket buffers is managed by the protocols and
* the appropriate flags must be set in the pru_attach function.
@@ -531,12 +550,10 @@
error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
CURVNET_RESTORE();
if (error) {
- KASSERT(so->so_count == 1, ("socreate: so_count %d",
- so->so_count));
- so->so_count = 0;
sodealloc(so);
return (error);
}
+ soref(so);
*aso = so;
return (0);
}
@@ -564,11 +581,11 @@
static int overcount;
struct socket *so;
- int over;
+ u_int over;
- ACCEPT_LOCK();
- over = (head->so_qlen > 3 * head->so_qlimit / 2);
- ACCEPT_UNLOCK();
+ SOLISTEN_LOCK(head);
+ over = (head->sol_qlen > 3 * head->sol_qlimit / 2);
+ SOLISTEN_UNLOCK(head);
#ifdef REGRESSION
if (regression_sonewconn_earlytest && over) {
#else
@@ -580,15 +597,15 @@
log(LOG_DEBUG, "%s: pcb %p: Listen queue overflow: "
"%i already in queue awaiting acceptance "
"(%d occurrences)\n",
- __func__, head->so_pcb, head->so_qlen, overcount);
+ __func__, head->so_pcb, head->sol_qlen, overcount);
overcount = 0;
}
return (NULL);
}
- VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
- __func__, __LINE__, head));
+ VNET_ASSERT(head->so_vnet != NULL, ("%s: so %p vnet is NULL",
+ __func__, head));
so = soalloc(head->so_vnet);
if (so == NULL) {
log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
@@ -596,9 +613,9 @@
__func__, head->so_pcb);
return (NULL);
}
- if ((head->so_options & SO_ACCEPTFILTER) != 0)
+ if (head->sol_accept_filter != NULL)
connstatus = 0;
- so->so_head = head;
+ so->so_listen = head;
so->so_type = head->so_type;
so->so_options = head->so_options &~ SO_ACCEPTCONN;
so->so_linger = head->so_linger;
@@ -609,10 +626,12 @@
#ifdef MAC
mac_socket_newconn(head, so);
#endif
- knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
- knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
+ knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
+ so_rdknl_assert_locked, so_rdknl_assert_unlocked);
+ knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
+ so_wrknl_assert_locked, so_wrknl_assert_unlocked);
VNET_SO_ASSERT(head);
- if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
+ if (soreserve(so, head->sol_sbsnd_hiwat, head->sol_sbrcv_hiwat)) {
sodealloc(so);
log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
__func__, head->so_pcb);
@@ -624,32 +643,19 @@
__func__, head->so_pcb);
return (NULL);
}
- so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
- so->so_snd.sb_lowat = head->so_snd.sb_lowat;
- so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
- so->so_snd.sb_timeo = head->so_snd.sb_timeo;
- so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
- so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
+ so->so_rcv.sb_lowat = head->sol_sbrcv_lowat;
+ so->so_snd.sb_lowat = head->sol_sbsnd_lowat;
+ so->so_rcv.sb_timeo = head->sol_sbrcv_timeo;
+ so->so_snd.sb_timeo = head->sol_sbsnd_timeo;
+ so->so_rcv.sb_flags |= head->sol_sbrcv_flags & SB_AUTOSIZE;
+ so->so_snd.sb_flags |= head->sol_sbsnd_flags & SB_AUTOSIZE;
so->so_state |= connstatus;
- ACCEPT_LOCK();
- /*
- * The accept socket may be tearing down but we just
- * won a race on the ACCEPT_LOCK.
- * However, if sctp_peeloff() is called on a 1-to-many
- * style socket, the SO_ACCEPTCONN doesn't need to be set.
- */
- if (!(head->so_options & SO_ACCEPTCONN) &&
- ((head->so_proto->pr_protocol != IPPROTO_SCTP) ||
- (head->so_type != SOCK_SEQPACKET))) {
- SOCK_LOCK(so);
- so->so_head = NULL;
- sofree(so); /* NB: returns ACCEPT_UNLOCK'ed. */
- return (NULL);
- }
+
+ SOLISTEN_LOCK(head);
if (connstatus) {
- TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
- so->so_qstate |= SQ_COMP;
- head->so_qlen++;
+ TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list);
+ so->so_qstate = SQ_COMP;
+ head->sol_qlen++;
} else {
/*
* Keep removing sockets from the head until there's room for
@@ -658,28 +664,92 @@
* threads and soabort() requires dropping locks, we must
* loop waiting for the condition to be true.
*/
- while (head->so_incqlen > head->so_qlimit) {
+ while (head->sol_incqlen > head->sol_qlimit) {
struct socket *sp;
- sp = TAILQ_FIRST(&head->so_incomp);
- TAILQ_REMOVE(&head->so_incomp, sp, so_list);
- head->so_incqlen--;
- sp->so_qstate &= ~SQ_INCOMP;
- sp->so_head = NULL;
- ACCEPT_UNLOCK();
+
+ sp = TAILQ_FIRST(&head->sol_incomp);
+ TAILQ_REMOVE(&head->sol_incomp, sp, so_list);
+ head->sol_incqlen--;
+ SOCK_LOCK(sp);
+ sp->so_qstate = SQ_NONE;
+ sp->so_listen = NULL;
+ SOCK_UNLOCK(sp);
+ sorele(head); /* does SOLISTEN_UNLOCK, head stays */
soabort(sp);
- ACCEPT_LOCK();
+ SOLISTEN_LOCK(head);
}
- TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
- so->so_qstate |= SQ_INCOMP;
- head->so_incqlen++;
+ TAILQ_INSERT_TAIL(&head->sol_incomp, so, so_list);
+ so->so_qstate = SQ_INCOMP;
+ head->sol_incqlen++;
}
- ACCEPT_UNLOCK();
+ soref(head); /* A socket on (in)complete queue refs head. */
+ SOLISTEN_UNLOCK(head);
if (connstatus) {
- sorwakeup(head);
- wakeup_one(&head->so_timeo);
+ selwakeuppri(&head->so_rdsel, PSOCK);
+ KNOTE_LOCKED(&head->so_rdsel.si_note, 0);
+ wakeup_one(&head->sol_comp);
+ }
+ return (so);
+}
+
+#ifdef SCTP
+/*
+ * Socket part of sctp_peeloff(). Detach a new socket from an
+ * association. The new socket is returned with a reference.
+ */
+struct socket *
+sopeeloff(struct socket *head)
+{
+ struct socket *so;
+
+ VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
+ __func__, __LINE__, head));
+ so = soalloc(head->so_vnet);
+ if (so == NULL) {
+ log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
+ "limit reached or out of memory\n",
+ __func__, head->so_pcb);
+ return (NULL);
+ }
+ so->so_type = head->so_type;
+ so->so_options = head->so_options;
+ so->so_linger = head->so_linger;
+ so->so_state = (head->so_state & SS_NBIO) | SS_ISCONNECTED;
+ so->so_fibnum = head->so_fibnum;
+ so->so_proto = head->so_proto;
+ so->so_cred = crhold(head->so_cred);
+#ifdef MAC
+ mac_socket_newconn(head, so);
+#endif
+ knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
+ so_rdknl_assert_locked, so_rdknl_assert_unlocked);
+ knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
+ so_wrknl_assert_locked, so_wrknl_assert_unlocked);
+ VNET_SO_ASSERT(head);
+ if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
+ sodealloc(so);
+ log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
+ __func__, head->so_pcb);
+ return (NULL);
+ }
+ if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
+ sodealloc(so);
+ log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
+ __func__, head->so_pcb);
+ return (NULL);
}
+ so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
+ so->so_snd.sb_lowat = head->so_snd.sb_lowat;
+ so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
+ so->so_snd.sb_timeo = head->so_snd.sb_timeo;
+ so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
+ so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
+
+ soref(so);
+
return (so);
}
+#endif /* SCTP */
int
sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
@@ -741,13 +811,63 @@
void
solisten_proto(struct socket *so, int backlog)
{
+ int sbrcv_lowat, sbsnd_lowat;
+ u_int sbrcv_hiwat, sbsnd_hiwat;
+ short sbrcv_flags, sbsnd_flags;
+ sbintime_t sbrcv_timeo, sbsnd_timeo;
SOCK_LOCK_ASSERT(so);
+ if (SOLISTENING(so))
+ goto listening;
+
+ /*
+ * Change this socket to listening state.
+ */
+ sbrcv_lowat = so->so_rcv.sb_lowat;
+ sbsnd_lowat = so->so_snd.sb_lowat;
+ sbrcv_hiwat = so->so_rcv.sb_hiwat;
+ sbsnd_hiwat = so->so_snd.sb_hiwat;
+ sbrcv_flags = so->so_rcv.sb_flags;
+ sbsnd_flags = so->so_snd.sb_flags;
+ sbrcv_timeo = so->so_rcv.sb_timeo;
+ sbsnd_timeo = so->so_snd.sb_timeo;
+
+ sbdestroy(&so->so_snd, so);
+ sbdestroy(&so->so_rcv, so);
+ sx_destroy(&so->so_snd.sb_sx);
+ sx_destroy(&so->so_rcv.sb_sx);
+ SOCKBUF_LOCK_DESTROY(&so->so_snd);
+ SOCKBUF_LOCK_DESTROY(&so->so_rcv);
+
+#ifdef INVARIANTS
+ bzero(&so->so_rcv,
+ sizeof(struct socket) - offsetof(struct socket, so_rcv));
+#endif
+
+ so->sol_sbrcv_lowat = sbrcv_lowat;
+ so->sol_sbsnd_lowat = sbsnd_lowat;
+ so->sol_sbrcv_hiwat = sbrcv_hiwat;
+ so->sol_sbsnd_hiwat = sbsnd_hiwat;
+ so->sol_sbrcv_flags = sbrcv_flags;
+ so->sol_sbsnd_flags = sbsnd_flags;
+ so->sol_sbrcv_timeo = sbrcv_timeo;
+ so->sol_sbsnd_timeo = sbsnd_timeo;
+
+ so->sol_qlen = so->sol_incqlen = 0;
+ TAILQ_INIT(&so->sol_incomp);
+ TAILQ_INIT(&so->sol_comp);
+
+ so->sol_accept_filter = NULL;
+ so->sol_accept_filter_arg = NULL;
+ so->sol_accept_filter_str = NULL;
+
+ so->so_options |= SO_ACCEPTCONN;
+
+listening:
if (backlog < 0 || backlog > somaxconn)
backlog = somaxconn;
- so->so_qlimit = backlog;
- so->so_options |= SO_ACCEPTCONN;
+ so->sol_qlimit = backlog;
}
/*
@@ -774,44 +894,62 @@
sofree(struct socket *so)
{
struct protosw *pr = so->so_proto;
- struct socket *head;
- ACCEPT_LOCK_ASSERT();
SOCK_LOCK_ASSERT(so);
if ((so->so_state & SS_NOFDREF) == 0 || so->so_count != 0 ||
- (so->so_state & SS_PROTOREF) || (so->so_qstate & SQ_COMP)) {
+ (so->so_state & SS_PROTOREF) || (so->so_qstate == SQ_COMP)) {
SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
return;
}
- head = so->so_head;
- if (head != NULL) {
- KASSERT((so->so_qstate & SQ_COMP) != 0 ||
- (so->so_qstate & SQ_INCOMP) != 0,
- ("sofree: so_head != NULL, but neither SQ_COMP nor "
- "SQ_INCOMP"));
- KASSERT((so->so_qstate & SQ_COMP) == 0 ||
- (so->so_qstate & SQ_INCOMP) == 0,
- ("sofree: so->so_qstate is SQ_COMP and also SQ_INCOMP"));
- TAILQ_REMOVE(&head->so_incomp, so, so_list);
- head->so_incqlen--;
- so->so_qstate &= ~SQ_INCOMP;
- so->so_head = NULL;
- }
- KASSERT((so->so_qstate & SQ_COMP) == 0 &&
- (so->so_qstate & SQ_INCOMP) == 0,
- ("sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)",
- so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP));
- if (so->so_options & SO_ACCEPTCONN) {
- KASSERT((TAILQ_EMPTY(&so->so_comp)),
- ("sofree: so_comp populated"));
- KASSERT((TAILQ_EMPTY(&so->so_incomp)),
- ("sofree: so_incomp populated"));
+ if (!SOLISTENING(so) && so->so_qstate == SQ_INCOMP) {
+ struct socket *sol;
+
+ sol = so->so_listen;
+ KASSERT(sol, ("%s: so %p on incomp of NULL", __func__, so));
+
+ /*
+ * To solve race between close of a listening socket and
+ * a socket on its incomplete queue, we need to lock both.
+ * The order is first listening socket, then regular.
+ * Since we don't have SS_NOFDREF neither SS_PROTOREF, this
+ * function and the listening socket are the only pointers
+ * to so. To preserve so and sol, we reference both and then
+ * relock.
+ * After relock the socket may not move to so_comp since it
+ * doesn't have PCB already, but it may be removed from
+ * so_incomp. If that happens, we share responsiblity on
+ * freeing the socket, but soclose() has already removed
+ * it from queue.
+ */
+ soref(sol);
+ soref(so);
+ SOCK_UNLOCK(so);
+ SOLISTEN_LOCK(sol);
+ SOCK_LOCK(so);
+ if (so->so_qstate == SQ_INCOMP) {
+ KASSERT(so->so_listen == sol,
+ ("%s: so %p migrated out of sol %p",
+ __func__, so, sol));
+ TAILQ_REMOVE(&sol->sol_incomp, so, so_list);
+ sol->sol_incqlen--;
+ /* This is guarenteed not to be the last. */
+ refcount_release(&sol->so_count);
+ so->so_qstate = SQ_NONE;
+ so->so_listen = NULL;
+ } else
+ KASSERT(so->so_listen == NULL,
+ ("%s: so %p not on (in)comp with so_listen",
+ __func__, so));
+ sorele(sol);
+ KASSERT(so->so_count == 1,
+ ("%s: so %p count %u", __func__, so, so->so_count));
+ so->so_count = 0;
}
+ if (SOLISTENING(so))
+ so->so_error = ECONNABORTED;
SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
VNET_SO_ASSERT(so);
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
@@ -833,12 +971,14 @@
* before calling pru_detach. This means that protocols shold not
* assume they can perform socket wakeups, etc, in their detach code.
*/
- sbdestroy(&so->so_snd, so);
- sbdestroy(&so->so_rcv, so);
- seldrain(&so->so_snd.sb_sel);
- seldrain(&so->so_rcv.sb_sel);
- knlist_destroy(&so->so_rcv.sb_sel.si_note);
- knlist_destroy(&so->so_snd.sb_sel.si_note);
+ if (!SOLISTENING(so)) {
+ sbdestroy(&so->so_snd, so);
+ sbdestroy(&so->so_rcv, so);
+ }
+ seldrain(&so->so_rdsel);
+ seldrain(&so->so_wrsel);
+ knlist_destroy(&so->so_rdsel.si_note);
+ knlist_destroy(&so->so_wrsel.si_note);
sodealloc(so);
}
@@ -853,6 +993,8 @@
int
soclose(struct socket *so)
{
+ struct accept_queue lqueue;
+ bool listening;
int error = 0;
KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter"));
@@ -885,41 +1027,42 @@
drop:
if (so->so_proto->pr_usrreqs->pru_close != NULL)
(*so->so_proto->pr_usrreqs->pru_close)(so);
- ACCEPT_LOCK();
- if (so->so_options & SO_ACCEPTCONN) {
+
+ SOCK_LOCK(so);
+ if ((listening = (so->so_options & SO_ACCEPTCONN))) {
struct socket *sp;
- /*
- * Prevent new additions to the accept queues due
- * to ACCEPT_LOCK races while we are draining them.
- */
- so->so_options &= ~SO_ACCEPTCONN;
- while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
- TAILQ_REMOVE(&so->so_incomp, sp, so_list);
- so->so_incqlen--;
- sp->so_qstate &= ~SQ_INCOMP;
- sp->so_head = NULL;
- ACCEPT_UNLOCK();
- soabort(sp);
- ACCEPT_LOCK();
- }
- while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
- TAILQ_REMOVE(&so->so_comp, sp, so_list);
- so->so_qlen--;
- sp->so_qstate &= ~SQ_COMP;
- sp->so_head = NULL;
- ACCEPT_UNLOCK();
- soabort(sp);
- ACCEPT_LOCK();
+
+ TAILQ_INIT(&lqueue);
+ TAILQ_SWAP(&lqueue, &so->sol_incomp, socket, so_list);
+ TAILQ_CONCAT(&lqueue, &so->sol_comp, so_list);
+
+ so->sol_qlen = so->sol_incqlen = 0;
+
+ TAILQ_FOREACH(sp, &lqueue, so_list) {
+ SOCK_LOCK(sp);
+ sp->so_qstate = SQ_NONE;
+ sp->so_listen = NULL;
+ SOCK_UNLOCK(sp);
+ /* Guaranteed not to be the last. */
+ refcount_release(&so->so_count);
}
- KASSERT((TAILQ_EMPTY(&so->so_comp)),
- ("%s: so_comp populated", __func__));
- KASSERT((TAILQ_EMPTY(&so->so_incomp)),
- ("%s: so_incomp populated", __func__));
}
- SOCK_LOCK(so);
KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
so->so_state |= SS_NOFDREF;
- sorele(so); /* NB: Returns with ACCEPT_UNLOCK(). */
+ sorele(so);
+ if (listening) {
+ struct socket *sp;
+
+ TAILQ_FOREACH(sp, &lqueue, so_list) {
+ SOCK_LOCK(sp);
+ if (sp->so_count == 0) {
+ SOCK_UNLOCK(sp);
+ soabort(sp);
+ } else
+ /* sp is now in sofree() */
+ SOCK_UNLOCK(sp);
+ }
+ }
CURVNET_RESTORE();
return (error);
}
@@ -951,13 +1094,11 @@
KASSERT(so->so_count == 0, ("soabort: so_count"));
KASSERT((so->so_state & SS_PROTOREF) == 0, ("soabort: SS_PROTOREF"));
KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF"));
- KASSERT((so->so_state & SQ_COMP) == 0, ("soabort: SQ_COMP"));
- KASSERT((so->so_state & SQ_INCOMP) == 0, ("soabort: SQ_INCOMP"));
+ KASSERT(so->so_qstate == SQ_NONE, ("soabort: !SQ_NONE"));
VNET_SO_ASSERT(so);
if (so->so_proto->pr_usrreqs->pru_abort != NULL)
(*so->so_proto->pr_usrreqs->pru_abort)(so);
- ACCEPT_LOCK();
SOCK_LOCK(so);
sofree(so);
}
@@ -2511,7 +2652,7 @@
} else {
switch (sopt->sopt_name) {
case SO_ACCEPTFILTER:
- error = do_setopt_accept_filter(so, sopt);
+ error = accept_filt_setopt(so, sopt);
if (error)
goto bad;
break;
@@ -2769,7 +2910,7 @@
} else {
switch (sopt->sopt_name) {
case SO_ACCEPTFILTER:
- error = do_getopt_accept_filter(so, sopt);
+ error = accept_filt_getopt(so, sopt);
break;
case SO_LINGER:
@@ -2877,15 +3018,15 @@
break;
case SO_LISTENQLIMIT:
- optval = so->so_qlimit;
+ optval = SOLISTENING(so) ? so->sol_qlimit : 0;
goto integer;
case SO_LISTENQLEN:
- optval = so->so_qlen;
+ optval = SOLISTENING(so) ? so->sol_qlen : 0;
goto integer;
case SO_LISTENINCQLEN:
- optval = so->so_incqlen;
+ optval = SOLISTENING(so) ? so->sol_incqlen : 0;
goto integer;
case SO_TS_CLOCK:
@@ -3032,7 +3173,7 @@
if (so->so_sigio != NULL)
pgsigio(&so->so_sigio, SIGURG, 0);
- selwakeuppri(&so->so_rcv.sb_sel, PSOCK);
+ selwakeuppri(&so->so_rdsel, PSOCK);
}
int
@@ -3052,44 +3193,54 @@
sopoll_generic(struct socket *so, int events, struct ucred *active_cred,
struct thread *td)
{
- int revents = 0;
-
- SOCKBUF_LOCK(&so->so_snd);
- SOCKBUF_LOCK(&so->so_rcv);
- if (events & (POLLIN | POLLRDNORM))
- if (soreadabledata(so))
- revents |= events & (POLLIN | POLLRDNORM);
+ int revents;
- if (events & (POLLOUT | POLLWRNORM))
- if (sowriteable(so))
- revents |= events & (POLLOUT | POLLWRNORM);
-
- if (events & (POLLPRI | POLLRDBAND))
- if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK))
- revents |= events & (POLLPRI | POLLRDBAND);
-
- if ((events & POLLINIGNEOF) == 0) {
- if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
- revents |= events & (POLLIN | POLLRDNORM);
- if (so->so_snd.sb_state & SBS_CANTSENDMORE)
- revents |= POLLHUP;
+ SOCK_LOCK(so);
+ if (SOLISTENING(so)) {
+ if (!(events & (POLLIN | POLLRDNORM)))
+ revents = 0;
+ else if (!TAILQ_EMPTY(&so->sol_comp))
+ revents = events & (POLLIN | POLLRDNORM);
+ else {
+ selrecord(td, &so->so_rdsel);
+ revents = 0;
}
- }
-
- if (revents == 0) {
- if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
- selrecord(td, &so->so_rcv.sb_sel);
- so->so_rcv.sb_flags |= SB_SEL;
+ } else {
+ revents = 0;
+ SOCKBUF_LOCK(&so->so_snd);
+ SOCKBUF_LOCK(&so->so_rcv);
+ if (events & (POLLIN | POLLRDNORM))
+ if (soreadabledata(so))
+ revents |= events & (POLLIN | POLLRDNORM);
+ if (events & (POLLOUT | POLLWRNORM))
+ if (sowriteable(so))
+ revents |= events & (POLLOUT | POLLWRNORM);
+ if (events & (POLLPRI | POLLRDBAND))
+ if (so->so_oobmark ||
+ (so->so_rcv.sb_state & SBS_RCVATMARK))
+ revents |= events & (POLLPRI | POLLRDBAND);
+ if ((events & POLLINIGNEOF) == 0) {
+ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+ revents |= events & (POLLIN | POLLRDNORM);
+ if (so->so_snd.sb_state & SBS_CANTSENDMORE)
+ revents |= POLLHUP;
+ }
}
-
- if (events & (POLLOUT | POLLWRNORM)) {
- selrecord(td, &so->so_snd.sb_sel);
- so->so_snd.sb_flags |= SB_SEL;
+ if (revents == 0) {
+ if (events &
+ (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
+ selrecord(td, &so->so_rdsel);
+ so->so_rcv.sb_flags |= SB_SEL;
+ }
+ if (events & (POLLOUT | POLLWRNORM)) {
+ selrecord(td, &so->so_wrsel);
+ so->so_snd.sb_flags |= SB_SEL;
+ }
}
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ SOCKBUF_UNLOCK(&so->so_snd);
}
-
- SOCKBUF_UNLOCK(&so->so_rcv);
- SOCKBUF_UNLOCK(&so->so_snd);
+ SOCK_UNLOCK(so);
return (revents);
}
@@ -3098,28 +3249,38 @@
{
struct socket *so = kn->kn_fp->f_data;
struct sockbuf *sb;
+ struct knlist *knl;
switch (kn->kn_filter) {
case EVFILT_READ:
kn->kn_fop = &soread_filtops;
+ knl = &so->so_rdsel.si_note;
sb = &so->so_rcv;
break;
case EVFILT_WRITE:
kn->kn_fop = &sowrite_filtops;
+ knl = &so->so_wrsel.si_note;
sb = &so->so_snd;
break;
case EVFILT_EMPTY:
kn->kn_fop = &soempty_filtops;
+ knl = &so->so_wrsel.si_note;
sb = &so->so_snd;
break;
default:
return (EINVAL);
}
- SOCKBUF_LOCK(sb);
- knlist_add(&sb->sb_sel.si_note, kn, 1);
- sb->sb_flags |= SB_KNOTE;
- SOCKBUF_UNLOCK(sb);
+ SOCK_LOCK(so);
+ if (SOLISTENING(so)) {
+ knlist_add(knl, kn, 1);
+ } else {
+ SOCKBUF_LOCK(sb);
+ knlist_add(knl, kn, 1);
+ sb->sb_flags |= SB_KNOTE;
+ SOCKBUF_UNLOCK(sb);
+ }
+ SOCK_UNLOCK(so);
return (0);
}
@@ -3298,11 +3459,11 @@
{
struct socket *so = kn->kn_fp->f_data;
- SOCKBUF_LOCK(&so->so_rcv);
- knlist_remove(&so->so_rcv.sb_sel.si_note, kn, 1);
- if (knlist_empty(&so->so_rcv.sb_sel.si_note))
+ so_rdknl_lock(so);
+ knlist_remove(&so->so_rdsel.si_note, kn, 1);
+ if (!SOLISTENING(so) && knlist_empty(&so->so_rdsel.si_note))
so->so_rcv.sb_flags &= ~SB_KNOTE;
- SOCKBUF_UNLOCK(&so->so_rcv);
+ so_rdknl_unlock(so);
}
/*ARGSUSED*/
@@ -3312,11 +3473,13 @@
struct socket *so;
so = kn->kn_fp->f_data;
- if (so->so_options & SO_ACCEPTCONN) {
- kn->kn_data = so->so_qlen;
- return (!TAILQ_EMPTY(&so->so_comp));
+ if (SOLISTENING(so)) {
+ SOCK_LOCK_ASSERT(so);
+ kn->kn_data = so->sol_qlen;
+ return (!TAILQ_EMPTY(&so->sol_comp));
}
+
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
kn->kn_data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl;
@@ -3342,11 +3505,11 @@
{
struct socket *so = kn->kn_fp->f_data;
- SOCKBUF_LOCK(&so->so_snd);
- knlist_remove(&so->so_snd.sb_sel.si_note, kn, 1);
- if (knlist_empty(&so->so_snd.sb_sel.si_note))
+ so_wrknl_lock(so);
+ knlist_remove(&so->so_wrsel.si_note, kn, 1);
+ if (!SOLISTENING(so) && knlist_empty(&so->so_wrsel.si_note))
so->so_snd.sb_flags &= ~SB_KNOTE;
- SOCKBUF_UNLOCK(&so->so_snd);
+ so_wrknl_unlock(so);
}
/*ARGSUSED*/
@@ -3356,6 +3519,10 @@
struct socket *so;
so = kn->kn_fp->f_data;
+
+ if (SOLISTENING(so))
+ return (0);
+
SOCKBUF_LOCK_ASSERT(&so->so_snd);
kn->kn_data = sbspace(&so->so_snd);
@@ -3382,6 +3549,10 @@
struct socket *so;
so = kn->kn_fp->f_data;
+
+ if (SOLISTENING(so))
+ return (1);
+
SOCKBUF_LOCK_ASSERT(&so->so_snd);
kn->kn_data = sbused(&so->so_snd);
@@ -3451,31 +3622,34 @@
int ret;
restart:
- ACCEPT_LOCK();
+ if ((head = so->so_listen) != NULL)
+ SOLISTEN_LOCK(head);
SOCK_LOCK(so);
+ /*
+ * XXXGL: should we re-check so->so_listen?
+ */
so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
so->so_state |= SS_ISCONNECTED;
- head = so->so_head;
- if (head != NULL && (so->so_qstate & SQ_INCOMP)) {
+ if (head != NULL && (so->so_qstate == SQ_INCOMP)) {
if ((so->so_options & SO_ACCEPTFILTER) == 0) {
+ TAILQ_REMOVE(&head->sol_incomp, so, so_list);
+ head->sol_incqlen--;
+ TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list);
+ head->sol_qlen++;
+ so->so_qstate = SQ_COMP;
SOCK_UNLOCK(so);
- TAILQ_REMOVE(&head->so_incomp, so, so_list);
- head->so_incqlen--;
- so->so_qstate &= ~SQ_INCOMP;
- TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
- head->so_qlen++;
- so->so_qstate |= SQ_COMP;
- ACCEPT_UNLOCK();
- sorwakeup(head);
- wakeup_one(&head->so_timeo);
+ selwakeuppri(&head->so_rdsel, PSOCK);
+ KNOTE_LOCKED(&head->so_rdsel.si_note, 0);
+ SOLISTEN_UNLOCK(head);
+ wakeup_one(&head->sol_comp);
} else {
- ACCEPT_UNLOCK();
+ SOLISTEN_UNLOCK(head);
soupcall_set(so, SO_RCV,
- head->so_accf->so_accept_filter->accf_callback,
- head->so_accf->so_accept_filter_arg);
+ head->sol_accept_filter->accf_callback,
+ head->sol_accept_filter_arg);
so->so_options &= ~SO_ACCEPTFILTER;
- ret = head->so_accf->so_accept_filter->accf_callback(so,
- head->so_accf->so_accept_filter_arg, M_NOWAIT);
+ ret = head->sol_accept_filter->accf_callback(so,
+ head->sol_accept_filter_arg, M_NOWAIT);
if (ret == SU_ISCONNECTED)
soupcall_clear(so, SO_RCV);
SOCK_UNLOCK(so);
@@ -3484,8 +3658,9 @@
}
return;
}
+ if (head != NULL)
+ SOLISTEN_UNLOCK(head);
SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
wakeup(&so->so_timeo);
sorwakeup(so);
sowwakeup(so);
@@ -3495,16 +3670,17 @@
soisdisconnecting(struct socket *so)
{
- /*
- * Note: This code assumes that SOCK_LOCK(so) and
- * SOCKBUF_LOCK(&so->so_rcv) are the same.
- */
- SOCKBUF_LOCK(&so->so_rcv);
+ SOCK_LOCK(so);
so->so_state &= ~SS_ISCONNECTING;
so->so_state |= SS_ISDISCONNECTING;
- socantrcvmore_locked(so);
- SOCKBUF_LOCK(&so->so_snd);
- socantsendmore_locked(so);
+
+ if (!SOLISTENING(so)) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ socantrcvmore_locked(so);
+ SOCKBUF_LOCK(&so->so_snd);
+ socantsendmore_locked(so);
+ }
+ SOCK_UNLOCK(so);
wakeup(&so->so_timeo);
}
@@ -3512,17 +3688,18 @@
soisdisconnected(struct socket *so)
{
- /*
- * Note: This code assumes that SOCK_LOCK(so) and
- * SOCKBUF_LOCK(&so->so_rcv) are the same.
- */
- SOCKBUF_LOCK(&so->so_rcv);
+ SOCK_LOCK(so);
so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
so->so_state |= SS_ISDISCONNECTED;
- socantrcvmore_locked(so);
- SOCKBUF_LOCK(&so->so_snd);
- sbdrop_locked(&so->so_snd, sbused(&so->so_snd));
- socantsendmore_locked(so);
+
+ if (!SOLISTENING(so)) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ socantrcvmore_locked(so);
+ SOCKBUF_LOCK(&so->so_snd);
+ sbdrop_locked(&so->so_snd, sbused(&so->so_snd));
+ socantsendmore_locked(so);
+ }
+ SOCK_UNLOCK(so);
wakeup(&so->so_timeo);
}
@@ -3591,6 +3768,94 @@
sb->sb_flags &= ~SB_UPCALL;
}
+static void
+so_rdknl_lock(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_LOCK(so);
+ else
+ SOCKBUF_LOCK(&so->so_rcv);
+}
+
+static void
+so_rdknl_unlock(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_UNLOCK(so);
+ else
+ SOCKBUF_UNLOCK(&so->so_rcv);
+}
+
+static void
+so_rdknl_assert_locked(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_LOCK_ASSERT(so);
+ else
+ SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+}
+
+static void
+so_rdknl_assert_unlocked(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_UNLOCK_ASSERT(so);
+ else
+ SOCKBUF_UNLOCK_ASSERT(&so->so_rcv);
+}
+
+static void
+so_wrknl_lock(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_LOCK(so);
+ else
+ SOCKBUF_LOCK(&so->so_snd);
+}
+
+static void
+so_wrknl_unlock(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_UNLOCK(so);
+ else
+ SOCKBUF_UNLOCK(&so->so_snd);
+}
+
+static void
+so_wrknl_assert_locked(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_LOCK_ASSERT(so);
+ else
+ SOCKBUF_LOCK_ASSERT(&so->so_snd);
+}
+
+static void
+so_wrknl_assert_unlocked(void *arg)
+{
+ struct socket *so = arg;
+
+ if (SOLISTENING(so))
+ SOCK_UNLOCK_ASSERT(so);
+ else
+ SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
+}
+
/*
* Create an external-format (``xsocket'') structure using the information in
* the kernel-format socket structure pointed to by so. This is done to
@@ -3612,32 +3877,24 @@
xso->so_pcb = so->so_pcb;
xso->xso_protocol = so->so_proto->pr_protocol;
xso->xso_family = so->so_proto->pr_domain->dom_family;
- xso->so_qlen = so->so_qlen;
- xso->so_incqlen = so->so_incqlen;
- xso->so_qlimit = so->so_qlimit;
xso->so_timeo = so->so_timeo;
xso->so_error = so->so_error;
- xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
- xso->so_oobmark = so->so_oobmark;
- sbtoxsockbuf(&so->so_snd, &xso->so_snd);
- sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
xso->so_uid = so->so_cred->cr_uid;
-}
-
-
-/*
- * Socket accessor functions to provide external consumers with
- * a safe interface to socket state
- *
- */
-
-void
-so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *),
- void *arg)
-{
-
- TAILQ_FOREACH(so, &so->so_comp, so_list)
- func(so, arg);
+ xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
+ if (SOLISTENING(so)) {
+ xso->so_qlen = so->sol_qlen;
+ xso->so_incqlen = so->sol_incqlen;
+ xso->so_qlimit = so->sol_qlimit;
+ xso->so_oobmark = 0;
+ bzero(&xso->so_snd, sizeof(xso->so_snd));
+ bzero(&xso->so_rcv, sizeof(xso->so_rcv));
+ } else {
+ xso->so_state |= so->so_qstate;
+ xso->so_qlen = xso->so_incqlen = xso->so_qlimit = 0;
+ xso->so_oobmark = so->so_oobmark;
+ sbtoxsockbuf(&so->so_snd, &xso->so_snd);
+ sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
+ }
}
struct sockbuf *
Index: sys/kern/uipc_syscalls.c
===================================================================
--- sys/kern/uipc_syscalls.c
+++ sys/kern/uipc_syscalls.c
@@ -350,59 +350,57 @@
(flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0, &fcaps);
if (error != 0)
goto done;
- ACCEPT_LOCK();
- if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
- ACCEPT_UNLOCK();
+ SOCK_LOCK(head);
+ if (!SOLISTENING(head)) {
+ SOCK_UNLOCK(head);
+ error = EINVAL;
+ goto noconnection;
+ }
+ if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp)) {
+ SOLISTEN_UNLOCK(head);
error = EWOULDBLOCK;
goto noconnection;
}
- while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
- if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
- head->so_error = ECONNABORTED;
- break;
- }
- error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
+ while (TAILQ_EMPTY(&head->sol_comp) && head->so_error == 0) {
+ error = msleep(&head->sol_comp, &head->so_lock, PSOCK | PCATCH,
"accept", 0);
if (error != 0) {
- ACCEPT_UNLOCK();
+ SOLISTEN_UNLOCK(head);
goto noconnection;
}
}
if (head->so_error) {
error = head->so_error;
head->so_error = 0;
- ACCEPT_UNLOCK();
+ SOLISTEN_UNLOCK(head);
goto noconnection;
}
- so = TAILQ_FIRST(&head->so_comp);
- KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
- KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
-
+ so = TAILQ_FIRST(&head->sol_comp);
/*
* Before changing the flags on the socket, we have to bump the
* reference count. Otherwise, if the protocol calls sofree(),
* the socket will be released due to a zero refcount.
*/
- SOCK_LOCK(so); /* soref() and so_state update */
+ SOCK_LOCK(so);
+ KASSERT(so->so_qstate == SQ_COMP,
+ ("%s: so %p not SQ_COMP", __func__, so));
soref(so); /* file descriptor reference */
-
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
+ TAILQ_REMOVE(&head->sol_comp, so, so_list);
+ head->sol_qlen--;
if (flags & ACCEPT4_INHERIT)
so->so_state |= (head->so_state & SS_NBIO);
else
so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
-
+ so->so_qstate = SQ_NONE;
+ so->so_listen = NULL;
SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
+ sorele(head);
/* An extra reference on `nfp' has been held for us by falloc(). */
td->td_retval[0] = fd;
- /* connection has been removed from the listen queue */
- KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
+ /* Connection has been removed from the listen queue. */
+ KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0);
if (flags & ACCEPT4_INHERIT) {
pgid = fgetown(&head->so_sigio);
@@ -420,7 +418,6 @@
(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
tmp = fflag & FASYNC;
(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
- sa = NULL;
error = soaccept(so, &sa);
if (error != 0)
goto noconnection;
@@ -558,7 +555,7 @@
}
SOCK_LOCK(so);
while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
- error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
+ error = msleep(&so->so_timeo, &so->so_lock, PSOCK | PCATCH,
"connec", 0);
if (error != 0) {
if (error == EINTR || error == ERESTART)
Index: sys/kern/uipc_usrreq.c
===================================================================
--- sys/kern/uipc_usrreq.c
+++ sys/kern/uipc_usrreq.c
@@ -430,7 +430,7 @@
unp->unp_socket = so;
so->so_pcb = unp;
unp->unp_refcount = 1;
- if (so->so_head != NULL)
+ if (so->so_listen != NULL)
unp->unp_flags |= UNP_NASCENT;
UNP_LIST_LOCK();
@@ -552,7 +552,7 @@
UNP_LINK_WLOCK();
UNP_PCB_LOCK(unp);
- VOP_UNP_BIND(vp, unp->unp_socket);
+ VOP_UNP_BIND(vp, unp);
unp->unp_vnode = vp;
unp->unp_addr = soun;
unp->unp_flags &= ~UNP_BINDING;
@@ -607,6 +607,7 @@
uipc_close(struct socket *so)
{
struct unpcb *unp, *unp2;
+ struct vnode *vp = NULL;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
@@ -619,8 +620,14 @@
unp_disconnect(unp, unp2);
UNP_PCB_UNLOCK(unp2);
}
+ if (SOLISTENING(so) && ((vp = unp->unp_vnode) != NULL)) {
+ VOP_UNP_DETACH(vp);
+ unp->unp_vnode = NULL;
+ }
UNP_PCB_UNLOCK(unp);
UNP_LINK_WUNLOCK();
+ if (vp)
+ vrele(vp);
}
static int
@@ -663,16 +670,11 @@
--unp_count;
UNP_LIST_UNLOCK();
- if ((unp->unp_flags & UNP_NASCENT) != 0) {
- UNP_PCB_LOCK(unp);
- goto teardown;
- }
UNP_LINK_WLOCK();
UNP_PCB_LOCK(unp);
+ if ((unp->unp_flags & UNP_NASCENT) != 0)
+ goto teardown;
- /*
- * XXXRW: Should assert vp->v_socket == so.
- */
if ((vp = unp->unp_vnode) != NULL) {
VOP_UNP_DETACH(vp);
unp->unp_vnode = NULL;
@@ -696,8 +698,8 @@
UNP_PCB_UNLOCK(ref);
}
local_unp_rights = unp_rights;
- UNP_LINK_WUNLOCK();
teardown:
+ UNP_LINK_WUNLOCK();
unp->unp_socket->so_pcb = NULL;
saved_unp_addr = unp->unp_addr;
unp->unp_addr = NULL;
@@ -761,7 +763,6 @@
error = solisten_proto_check(so);
if (error == 0) {
cru2x(td->td_ucred, &unp->unp_peercred);
- unp->unp_flags |= UNP_HAVEPCCACHED;
solisten_proto(so, backlog);
}
SOCK_UNLOCK(so);
@@ -1319,7 +1320,7 @@
{
struct sockaddr_un *soun = (struct sockaddr_un *)nam;
struct vnode *vp;
- struct socket *so2, *so3;
+ struct socket *so2;
struct unpcb *unp, *unp2, *unp3;
struct nameidata nd;
char buf[SOCK_MAXADDRLEN];
@@ -1386,31 +1387,30 @@
* and to protect simultaneous locking of multiple pcbs.
*/
UNP_LINK_WLOCK();
- VOP_UNP_CONNECT(vp, &so2);
- if (so2 == NULL) {
+ VOP_UNP_CONNECT(vp, &unp2);
+ if (unp2 == NULL) {
error = ECONNREFUSED;
goto bad2;
}
+ so2 = unp2->unp_socket;
if (so->so_type != so2->so_type) {
error = EPROTOTYPE;
goto bad2;
}
+ UNP_PCB_LOCK(unp);
+ UNP_PCB_LOCK(unp2);
if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
if (so2->so_options & SO_ACCEPTCONN) {
CURVNET_SET(so2->so_vnet);
- so3 = sonewconn(so2, 0);
+ so2 = sonewconn(so2, 0);
CURVNET_RESTORE();
} else
- so3 = NULL;
- if (so3 == NULL) {
+ so2 = NULL;
+ if (so2 == NULL) {
error = ECONNREFUSED;
- goto bad2;
+ goto bad3;
}
- unp = sotounpcb(so);
- unp2 = sotounpcb(so2);
- unp3 = sotounpcb(so3);
- UNP_PCB_LOCK(unp);
- UNP_PCB_LOCK(unp2);
+ unp3 = sotounpcb(so2);
UNP_PCB_LOCK(unp3);
if (unp2->unp_addr != NULL) {
bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
@@ -1431,30 +1431,24 @@
* listen(); uipc_listen() cached that process's credentials
* at that time so we can use them now.
*/
- KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
- ("unp_connect: listener without cached peercred"));
memcpy(&unp->unp_peercred, &unp2->unp_peercred,
sizeof(unp->unp_peercred));
unp->unp_flags |= UNP_HAVEPC;
if (unp2->unp_flags & UNP_WANTCRED)
unp3->unp_flags |= UNP_WANTCRED;
- UNP_PCB_UNLOCK(unp3);
UNP_PCB_UNLOCK(unp2);
- UNP_PCB_UNLOCK(unp);
+ unp2 = unp3;
#ifdef MAC
- mac_socketpeer_set_from_socket(so, so3);
- mac_socketpeer_set_from_socket(so3, so);
+ mac_socketpeer_set_from_socket(so, so2);
+ mac_socketpeer_set_from_socket(so2, so);
#endif
-
- so2 = so3;
}
- unp = sotounpcb(so);
- KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
- unp2 = sotounpcb(so2);
- KASSERT(unp2 != NULL, ("unp_connect: unp2 == NULL"));
- UNP_PCB_LOCK(unp);
- UNP_PCB_LOCK(unp2);
+
+ KASSERT(unp2 != NULL && so2 != NULL && unp2->unp_socket == so2 &&
+ sotounpcb(so2) == unp2,
+ ("%s: unp2 %p so2 %p", __func__, unp2, so2));
error = unp_connect2(so, so2, PRU_CONNECT);
+bad3:
UNP_PCB_UNLOCK(unp2);
UNP_PCB_UNLOCK(unp);
bad2:
@@ -2237,8 +2231,7 @@
static void
unp_gc_process(struct unpcb *unp)
{
- struct socket *soa;
- struct socket *so;
+ struct socket *so, *soa;
struct file *fp;
/* Already processed. */
@@ -2258,28 +2251,30 @@
return;
}
- /*
- * Mark all sockets we reference with RIGHTS.
- */
so = unp->unp_socket;
- if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
- SOCKBUF_LOCK(&so->so_rcv);
- unp_scan(so->so_rcv.sb_mb, unp_accessable);
- SOCKBUF_UNLOCK(&so->so_rcv);
- }
-
- /*
- * Mark all sockets in our accept queue.
- */
- ACCEPT_LOCK();
- TAILQ_FOREACH(soa, &so->so_comp, so_list) {
- if ((sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS) != 0)
- continue;
- SOCKBUF_LOCK(&soa->so_rcv);
- unp_scan(soa->so_rcv.sb_mb, unp_accessable);
- SOCKBUF_UNLOCK(&soa->so_rcv);
+ SOCK_LOCK(so);
+ if (SOLISTENING(so)) {
+ /*
+ * Mark all sockets in our accept queue.
+ */
+ TAILQ_FOREACH(soa, &so->sol_comp, so_list) {
+ if (sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS)
+ continue;
+ SOCKBUF_LOCK(&soa->so_rcv);
+ unp_scan(soa->so_rcv.sb_mb, unp_accessable);
+ SOCKBUF_UNLOCK(&soa->so_rcv);
+ }
+ } else {
+ /*
+ * Mark all sockets we reference with RIGHTS.
+ */
+ if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
+ SOCKBUF_LOCK(&so->so_rcv);
+ unp_scan(so->so_rcv.sb_mb, unp_accessable);
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ }
}
- ACCEPT_UNLOCK();
+ SOCK_UNLOCK(so);
unp->unp_gcflag |= UNPGC_SCANNED;
}
@@ -2399,7 +2394,8 @@
UNP_LIST_LOCK();
unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
UNP_LIST_UNLOCK();
- unp_dispose_mbuf(so->so_rcv.sb_mb);
+ if (!SOLISTENING(so))
+ unp_dispose_mbuf(so->so_rcv.sb_mb);
}
static void
@@ -2454,7 +2450,6 @@
void
vfs_unp_reclaim(struct vnode *vp)
{
- struct socket *so;
struct unpcb *unp;
int active;
@@ -2464,10 +2459,7 @@
active = 0;
UNP_LINK_WLOCK();
- VOP_UNP_CONNECT(vp, &so);
- if (so == NULL)
- goto done;
- unp = sotounpcb(so);
+ VOP_UNP_CONNECT(vp, &unp);
if (unp == NULL)
goto done;
UNP_PCB_LOCK(unp);
@@ -2503,10 +2495,6 @@
db_printf("%sUNP_HAVEPC", comma ? ", " : "");
comma = 1;
}
- if (unp_flags & UNP_HAVEPCCACHED) {
- db_printf("%sUNP_HAVEPCCACHED", comma ? ", " : "");
- comma = 1;
- }
if (unp_flags & UNP_WANTCRED) {
db_printf("%sUNP_WANTCRED", comma ? ", " : "");
comma = 1;
Index: sys/kern/vfs_default.c
===================================================================
--- sys/kern/vfs_default.c
+++ sys/kern/vfs_default.c
@@ -1128,7 +1128,7 @@
vop_stdunp_bind(struct vop_unp_bind_args *ap)
{
- ap->a_vp->v_socket = ap->a_socket;
+ ap->a_vp->v_unpcb = ap->a_unpcb;
return (0);
}
@@ -1136,7 +1136,7 @@
vop_stdunp_connect(struct vop_unp_connect_args *ap)
{
- *ap->a_socket = ap->a_vp->v_socket;
+ *ap->a_unpcb = ap->a_vp->v_unpcb;
return (0);
}
@@ -1144,7 +1144,7 @@
vop_stdunp_detach(struct vop_unp_detach_args *ap)
{
- ap->a_vp->v_socket = NULL;
+ ap->a_vp->v_unpcb = NULL;
return (0);
}
Index: sys/kern/vfs_subr.c
===================================================================
--- sys/kern/vfs_subr.c
+++ sys/kern/vfs_subr.c
@@ -2992,7 +2992,10 @@
/* XXX Elsewhere we detect an already freed vnode via NULL v_op. */
vp->v_op = NULL;
#endif
- bzero(&vp->v_un, sizeof(vp->v_un));
+ vp->v_mountedhere = NULL;
+ vp->v_unpcb = NULL;
+ vp->v_rdev = NULL;
+ vp->v_fifoinfo = NULL;
vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
vp->v_iflag = 0;
vp->v_vflag = 0;
Index: sys/kern/vnode_if.src
===================================================================
--- sys/kern/vnode_if.src
+++ sys/kern/vnode_if.src
@@ -662,7 +662,7 @@
vop_unp_bind {
IN struct vnode *vp;
- IN struct socket *socket;
+ IN struct unpcb *unpcb;
};
@@ -670,7 +670,7 @@
vop_unp_connect {
IN struct vnode *vp;
- OUT struct socket **socket;
+ OUT struct unpcb **unpcb;
};
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -1387,10 +1387,10 @@
TCP_PROBE3(debug__input, tp, th, m);
tcp_dooptions(&to, optp, optlen, TO_SYN);
#ifdef TCP_RFC7413
- if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL))
+ if (syncache_add(&inc, &to, th, inp, so, m, NULL, NULL))
goto tfo_socket_result;
#else
- syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
+ syncache_add(&inc, &to, th, inp, so, m, NULL, NULL);
#endif
/*
* Entry added to syncache and mbuf consumed.
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -1576,7 +1576,6 @@
("tcp_close: !SS_PROTOREF"));
inp->inp_flags &= ~INP_SOCKREF;
INP_WUNLOCK(inp);
- ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_state &= ~SS_PROTOREF;
sofree(so);
Index: sys/netinet/tcp_syncache.h
===================================================================
--- sys/netinet/tcp_syncache.h
+++ sys/netinet/tcp_syncache.h
@@ -42,7 +42,7 @@
int syncache_expand(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct socket **, struct mbuf *);
int syncache_add(struct in_conninfo *, struct tcpopt *,
- struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *,
+ struct tcphdr *, struct inpcb *, struct socket *, struct mbuf *,
void *, void *);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *);
void syncache_badack(struct in_conninfo *);
Index: sys/netinet/tcp_syncache.c
===================================================================
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -146,9 +146,9 @@
struct socket *);
static void syncookie_reseed(void *);
#ifdef INVARIANTS
-static int syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
- struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
- struct socket *lso);
+static int syncookie_cmp(struct in_conninfo *, struct syncache_head *,
+ struct syncache *, struct tcphdr *, struct tcpopt *,
+ struct socket *);
#endif
/*
@@ -1160,10 +1160,11 @@
}
#ifdef TCP_RFC7413
-static void
-syncache_tfo_expand(struct syncache *sc, struct socket **lsop, struct mbuf *m,
+static struct socket *
+syncache_tfo_expand(struct syncache *sc, struct socket *lso, struct mbuf *m,
uint64_t response_cookie)
{
+ struct socket *so;
struct inpcb *inp;
struct tcpcb *tp;
unsigned int *pending_counter;
@@ -1175,12 +1176,12 @@
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending;
- *lsop = syncache_socket(sc, *lsop, m);
- if (*lsop == NULL) {
+ so = syncache_socket(sc, lso, m);
+ if (so == NULL) {
TCPSTAT_INC(tcps_sc_aborted);
atomic_subtract_int(pending_counter, 1);
} else {
- inp = sotoinpcb(*lsop);
+ inp = sotoinpcb(so);
tp = intotcpcb(inp);
tp->t_flags |= TF_FASTOPEN;
tp->t_tfo_cookie = response_cookie;
@@ -1189,6 +1190,7 @@
tp->t_tfo_pending = pending_counter;
TCPSTAT_INC(tcps_sc_completed);
}
+ return (so);
}
#endif /* TCP_RFC7413 */
@@ -1213,11 +1215,10 @@
*/
int
syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
- struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
+ struct inpcb *inp, struct socket *so, struct mbuf *m, void *tod,
void *todctx)
{
struct tcpcb *tp;
- struct socket *so;
struct syncache *sc = NULL;
struct syncache_head *sch;
struct mbuf *ipopts = NULL;
@@ -1248,7 +1249,7 @@
* Combine all so/tp operations very early to drop the INP lock as
* soon as possible.
*/
- so = *lsop;
+ KASSERT(SOLISTENING(so), ("%s: %p not listening", __func__, so));
tp = sototcpcb(so);
cred = crhold(so->so_cred);
@@ -1259,7 +1260,7 @@
#endif
ip_ttl = inp->inp_ip_ttl;
ip_tos = inp->inp_ip_tos;
- win = sbspace(&so->so_rcv);
+ win = so->sol_sbrcv_hiwat;
ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE));
#ifdef TCP_RFC7413
@@ -1272,7 +1273,7 @@
* listen queue with bogus TFO connections.
*/
if (atomic_fetchadd_int(tp->t_tfo_pending, 1) <=
- (so->so_qlimit / 2)) {
+ (so->sol_qlimit / 2)) {
int result;
result = tcp_fastopen_check_cookie(inc,
@@ -1566,10 +1567,7 @@
}
done:
- if (m) {
- *lsop = NULL;
- m_freem(m);
- }
+ m_freem(m);
#ifdef TCP_RFC7413
/*
* If tfo_pending is not NULL here, then a TFO SYN that did not
@@ -2098,7 +2096,7 @@
sc->sc_flags |= SCF_WINSCALE;
}
- wnd = sbspace(&lso->so_rcv);
+ wnd = lso->sol_sbrcv_hiwat;
wnd = imax(wnd, 0);
wnd = imin(wnd, TCP_MAXWIN);
sc->sc_wnd = wnd;
Index: sys/netinet/tcp_timewait.c
===================================================================
--- sys/netinet/tcp_timewait.c
+++ sys/netinet/tcp_timewait.c
@@ -352,7 +352,6 @@
("tcp_twstart: !SS_PROTOREF"));
inp->inp_flags &= ~INP_SOCKREF;
INP_WUNLOCK(inp);
- ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_state &= ~SS_PROTOREF;
sofree(so);
@@ -491,7 +490,6 @@
if (inp->inp_flags & INP_SOCKREF) {
inp->inp_flags &= ~INP_SOCKREF;
INP_WUNLOCK(inp);
- ACCEPT_LOCK();
SOCK_LOCK(so);
KASSERT(so->so_state & SS_PROTOREF,
("tcp_twclose: INP_SOCKREF && !SS_PROTOREF"));
Index: sys/sys/sockbuf.h
===================================================================
--- sys/sys/sockbuf.h
+++ sys/sys/sockbuf.h
@@ -32,7 +32,6 @@
*/
#ifndef _SYS_SOCKBUF_H_
#define _SYS_SOCKBUF_H_
-#include <sys/selinfo.h> /* for struct selinfo */
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/_sx.h>
@@ -64,6 +63,7 @@
struct sockaddr;
struct socket;
struct thread;
+struct selinfo;
struct xsockbuf {
u_int sb_cc;
@@ -84,9 +84,9 @@
* (a) locked by SOCKBUF_LOCK().
*/
struct sockbuf {
- struct selinfo sb_sel; /* process selecting read/write */
- struct mtx sb_mtx; /* sockbuf lock */
- struct sx sb_sx; /* prevent I/O interlacing */
+ struct mtx sb_mtx; /* sockbuf lock */
+ struct sx sb_sx; /* prevent I/O interlacing */
+ struct selinfo *sb_sel; /* process selecting read/write */
short sb_state; /* (a) socket state on sockbuf */
#define sb_startzero sb_mb
struct mbuf *sb_mb; /* (a) the mbuf chain */
Index: sys/sys/socket.h
===================================================================
--- sys/sys/socket.h
+++ sys/sys/socket.h
@@ -704,9 +704,5 @@
void so_lock(struct socket *so);
void so_unlock(struct socket *so);
-void so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg);
-
-#endif
-
-
+#endif /* _KERNEL */
#endif /* !_SYS_SOCKET_H_ */
Index: sys/sys/socketvar.h
===================================================================
--- sys/sys/socketvar.h
+++ sys/sys/socketvar.h
@@ -63,60 +63,35 @@
* Locking key to struct socket:
* (a) constant after allocation, no locking required.
* (b) locked by SOCK_LOCK(so).
- * (c) locked by SOCKBUF_LOCK(&so->so_rcv).
- * (e) locked by ACCEPT_LOCK().
+ * (cr) locked by SOCKBUF_LOCK(&so->so_rcv).
+ * (cs) locked by SOCKBUF_LOCK(&so->so_rcv).
+ * (e) locked by SOLISTEN_LOCK() of corresponding listening socket.
* (f) not locked since integer reads/writes are atomic.
* (g) used only as a sleep/wakeup address, no value.
* (h) locked by global mutex so_global_mtx.
*/
+TAILQ_HEAD(accept_queue, socket);
struct socket {
- int so_count; /* (b) reference count */
+ struct mtx so_lock;
+ volatile u_int so_count; /* (b / refcount) */
+ struct selinfo so_rdsel; /* (b/cr) for so_rcv/so_comp */
+ struct selinfo so_wrsel; /* (b/cs) for so_snd */
short so_type; /* (a) generic type, see socket.h */
- short so_options; /* from socket call, see socket.h */
- short so_linger; /* time to linger while closing */
+ short so_options; /* (b) from socket call, see socket.h */
+ short so_linger; /* time to linger close(2) */
short so_state; /* (b) internal state flags SS_* */
- int so_qstate; /* (e) internal state flags SQ_* */
void *so_pcb; /* protocol control block */
struct vnet *so_vnet; /* (a) network stack instance */
struct protosw *so_proto; /* (a) protocol handle */
-/*
- * Variables for connection queuing.
- * Socket where accepts occur is so_head in all subsidiary sockets.
- * If so_head is 0, socket is not related to an accept.
- * For head socket so_incomp queues partially completed connections,
- * while so_comp is a queue of connections ready to be accepted.
- * If a connection is aborted and it has so_head set, then
- * it has to be pulled out of either so_incomp or so_comp.
- * We allow connections to queue up based on current queue lengths
- * and limit on number of queued connections for this socket.
- */
- struct socket *so_head; /* (e) back pointer to listen socket */
- TAILQ_HEAD(, socket) so_incomp; /* (e) queue of partial unaccepted connections */
- TAILQ_HEAD(, socket) so_comp; /* (e) queue of complete unaccepted connections */
- TAILQ_ENTRY(socket) so_list; /* (e) list of unaccepted connections */
- u_int so_qlen; /* (e) number of unaccepted connections */
- u_int so_incqlen; /* (e) number of unaccepted incomplete
- connections */
- u_int so_qlimit; /* (e) max number queued connections */
short so_timeo; /* (g) connection timeout */
u_short so_error; /* (f) error affecting connection */
struct sigio *so_sigio; /* [sg] information for async I/O or
out of band data (SIGURG) */
- u_long so_oobmark; /* (c) chars to oob mark */
-
- struct sockbuf so_rcv, so_snd;
-
struct ucred *so_cred; /* (a) user credentials */
struct label *so_label; /* (b) MAC label for socket */
- struct label *so_peerlabel; /* (b) cached MAC label for peer */
/* NB: generation count must not be first. */
so_gen_t so_gencnt; /* (h) generation count */
void *so_emuldata; /* (b) private data for emulators */
- struct so_accf {
- struct accept_filter *so_accept_filter;
- void *so_accept_filter_arg; /* saved filter args */
- char *so_accept_filter_str; /* saved user args */
- } *so_accf;
struct osd osd; /* Object Specific extensions */
/*
* so_fibnum, so_user_cookie and friends can be used to attach
@@ -129,39 +104,82 @@
int so_ts_clock; /* type of the clock used for timestamps */
uint32_t so_max_pacing_rate; /* (f) TX rate limit in bytes/s */
-
- void *so_pspare[2]; /* general use */
- int so_ispare[2]; /* general use */
+ union {
+ /* Regular (data flow) socket. */
+ struct {
+ /* (cr, cs) Receive and send buffers. */
+ struct sockbuf so_rcv, so_snd;
+
+ /* (e) Our place on accept queue. */
+ TAILQ_ENTRY(socket) so_list;
+ struct socket *so_listen; /* (b) */
+ enum {
+ SQ_NONE = 0,
+ SQ_INCOMP = 0x0800, /* on sol_incomp */
+ SQ_COMP = 0x1000, /* on sol_comp */
+ } so_qstate; /* (b) */
+
+ /* (b) cached MAC label for peer */
+ struct label *so_peerlabel;
+ u_long so_oobmark; /* chars to oob mark */
+ };
+ /*
+ * Listening socket, where accepts occur, is so_listen in all
+ * subsidiary sockets. If so_listen is NULL, socket is not
+ * related to an accept. For a listening socket itself
+ * sol_incomp queues partially completed connections, while
+ * sol_comp is a queue of connections ready to be accepted.
+ * If a connection is aborted and it has so_listen set, then
+ * it has to be pulled out of either sol_incomp or sol_comp.
+ * We allow connections to queue up based on current queue
+ * lengths and limit on number of queued connections for this
+ * socket.
+ */
+ struct {
+ /* (e) queue of partial unaccepted connections */
+ struct accept_queue sol_incomp;
+ /* (e) queue of complete unaccepted connections */
+ struct accept_queue sol_comp;
+ u_int sol_qlen; /* (e) sol_comp length */
+ u_int sol_incqlen; /* (e) sol_incomp length */
+ u_int sol_qlimit; /* (e) queue limit */
+
+ /* accept_filter(9) optional data */
+ struct accept_filter *sol_accept_filter;
+ void *sol_accept_filter_arg; /* saved filter args */
+ char *sol_accept_filter_str; /* saved user args */
+
+ /* Socket buffer parameters, to be copied to
+ * dataflow sockets, accepted from this one. */
+ int sol_sbrcv_lowat;
+ int sol_sbsnd_lowat;
+ u_int sol_sbrcv_hiwat;
+ u_int sol_sbsnd_hiwat;
+ short sol_sbrcv_flags;
+ short sol_sbsnd_flags;
+ sbintime_t sol_sbrcv_timeo;
+ sbintime_t sol_sbsnd_timeo;
+ };
+ };
};
-/*
- * Global accept mutex to serialize access to accept queues and
- * fields associated with multiple sockets. This allows us to
- * avoid defining a lock order between listen and accept sockets
- * until such time as it proves to be a good idea.
- */
-extern struct mtx accept_mtx;
-#define ACCEPT_LOCK_ASSERT() mtx_assert(&accept_mtx, MA_OWNED)
-#define ACCEPT_UNLOCK_ASSERT() mtx_assert(&accept_mtx, MA_NOTOWNED)
-#define ACCEPT_LOCK() mtx_lock(&accept_mtx)
-#define ACCEPT_UNLOCK() mtx_unlock(&accept_mtx)
-
-/*
- * Per-socket mutex: we reuse the receive socket buffer mutex for space
- * efficiency. This decision should probably be revisited as we optimize
- * locking for the socket code.
- */
-#define SOCK_MTX(_so) SOCKBUF_MTX(&(_so)->so_rcv)
-#define SOCK_LOCK(_so) SOCKBUF_LOCK(&(_so)->so_rcv)
-#define SOCK_OWNED(_so) SOCKBUF_OWNED(&(_so)->so_rcv)
-#define SOCK_UNLOCK(_so) SOCKBUF_UNLOCK(&(_so)->so_rcv)
-#define SOCK_LOCK_ASSERT(_so) SOCKBUF_LOCK_ASSERT(&(_so)->so_rcv)
-
-/*
- * Socket state bits stored in so_qstate.
- */
-#define SQ_INCOMP 0x0800 /* unaccepted, incomplete connection */
-#define SQ_COMP 0x1000 /* unaccepted, complete connection */
+#define SOCK_LOCK(so) mtx_lock(&(so)->so_lock)
+#define SOCK_OWNED(so) mtx_owned(&(so)->so_lock)
+#define SOCK_UNLOCK(so) mtx_unlock(&(so)->so_lock)
+#define SOCK_LOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_OWNED)
+#define SOCK_UNLOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_NOTOWNED)
+
+#define SOLISTENING(so) (((so)->so_options & SO_ACCEPTCONN) != 0)
+#define SOLISTEN_LOCK(sol) do { \
+ mtx_lock(&(sol)->so_lock); \
+ KASSERT(SOLISTENING(sol), \
+ ("%s: %p not listening", __func__, (sol))); \
+} while (0)
+#define SOLISTEN_UNLOCK(sol) do { \
+ KASSERT(SOLISTENING(sol), \
+ ("%s: %p not listening", __func__, (sol))); \
+ mtx_unlock(&(sol)->so_lock); \
+} while (0)
/*
* Externalized form of struct socket used by the sysctl(3) interface.
@@ -212,8 +230,7 @@
/* can we read something from so? */
#define soreadabledata(so) \
- (sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || \
- !TAILQ_EMPTY(&(so)->so_comp) || (so)->so_error)
+ (sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || (so)->so_error)
#define soreadable(so) \
(soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE))
@@ -226,26 +243,19 @@
(so)->so_error)
/*
- * soref()/sorele() ref-count the socket structure. Note that you must
- * still explicitly close the socket, but the last ref count will free
- * the structure.
+ * soref()/sorele() ref-count the socket structure.
+ * soref() may be called without owning socket lock, but in that case a
+ * caller must own something that holds socket, and so_count must be not 0.
+ * Note that you must still explicitly close the socket, but the last ref
+ * count will free the structure.
*/
-#define soref(so) do { \
- SOCK_LOCK_ASSERT(so); \
- ++(so)->so_count; \
-} while (0)
-
+#define soref(so) refcount_acquire(&(so)->so_count)
#define sorele(so) do { \
- ACCEPT_LOCK_ASSERT(); \
SOCK_LOCK_ASSERT(so); \
- if ((so)->so_count <= 0) \
- panic("sorele"); \
- if (--(so)->so_count == 0) \
+ if (refcount_release(&(so)->so_count)) \
sofree(so); \
- else { \
+ else \
SOCK_UNLOCK(so); \
- ACCEPT_UNLOCK(); \
- } \
} while (0)
/*
@@ -370,8 +380,8 @@
int solisten_proto_check(struct socket *so);
struct socket *
sonewconn(struct socket *head, int connstatus);
-
-
+struct socket *
+ sopeeloff(struct socket *);
int sopoll(struct socket *so, int events, struct ucred *active_cred,
struct thread *td);
int sopoll_generic(struct socket *so, int events,
Index: sys/sys/sockopt.h
===================================================================
--- sys/sys/sockopt.h
+++ sys/sys/sockopt.h
@@ -64,8 +64,8 @@
int soopt_getm(struct sockopt *sopt, struct mbuf **mp);
int soopt_mcopyin(struct sockopt *sopt, struct mbuf *m);
int soopt_mcopyout(struct sockopt *sopt, struct mbuf *m);
-int do_getopt_accept_filter(struct socket *so, struct sockopt *sopt);
-int do_setopt_accept_filter(struct socket *so, struct sockopt *sopt);
+int accept_filt_getopt(struct socket *, struct sockopt *);
+int accept_filt_setopt(struct socket *, struct sockopt *);
int so_setsockopt(struct socket *so, int level, int optname,
void *optval, size_t optlen);
Index: sys/sys/unpcb.h
===================================================================
--- sys/sys/unpcb.h
+++ sys/sys/unpcb.h
@@ -92,14 +92,8 @@
* and is really the credentials of the connected peer. This is used
* to determine whether the contents should be sent to the user or
* not.
- *
- * UNP_HAVEPCCACHED - indicates that the unp_peercred member is filled
- * in, but does *not* contain the credentials of the connected peer
- * (there may not even be a peer). This is set in unp_listen() when
- * it fills in unp_peercred for later consumption by unp_connect().
*/
#define UNP_HAVEPC 0x001
-#define UNP_HAVEPCCACHED 0x002
#define UNP_WANTCRED 0x004 /* credentials wanted */
#define UNP_CONNWAIT 0x008 /* connect blocks until accepted */
Index: sys/sys/vnode.h
===================================================================
--- sys/sys/vnode.h
+++ sys/sys/vnode.h
@@ -112,14 +112,13 @@
/*
* Type specific fields, only one applies to any given vnode.
- * See #defines below for renaming to v_* namespace.
*/
union {
- struct mount *vu_mount; /* v ptr to mountpoint (VDIR) */
- struct socket *vu_socket; /* v unix domain net (VSOCK) */
- struct cdev *vu_cdev; /* v device (VCHR, VBLK) */
- struct fifoinfo *vu_fifoinfo; /* v fifo (VFIFO) */
- } v_un;
+ struct mount *v_mountedhere; /* v ptr to mountpoint (VDIR) */
+ struct unpcb *v_unpcb; /* v unix domain net (VSOCK) */
+ struct cdev *v_rdev; /* v device (VCHR, VBLK) */
+ struct fifoinfo *v_fifoinfo; /* v fifo (VFIFO) */
+ };
/*
* vfs_hash: (mount + inode) -> vnode hash. The hash value
@@ -175,11 +174,6 @@
#endif /* defined(_KERNEL) || defined(_KVM_VNODE) */
-#define v_mountedhere v_un.vu_mount
-#define v_socket v_un.vu_socket
-#define v_rdev v_un.vu_cdev
-#define v_fifoinfo v_un.vu_fifoinfo
-
#define bo2vnode(bo) __containerof((bo), struct vnode, v_bufobj)
/* XXX: These are temporary to avoid a source sweep at this time */
@@ -200,7 +194,7 @@
long xv_numoutput; /* num of writes in progress */
enum vtype xv_type; /* vnode type */
union {
- void *xvu_socket; /* socket, if VSOCK */
+ void *xvu_socket; /* unpcb, if VSOCK */
void *xvu_fifo; /* fifo, if VFIFO */
dev_t xvu_rdev; /* maj/min, if VBLK/VCHR */
struct {
Index: usr.bin/netstat/inet.c
===================================================================
--- usr.bin/netstat/inet.c
+++ usr.bin/netstat/inet.c
@@ -170,14 +170,17 @@
if (kread((uintptr_t)proto.pr_domain, &domain, sizeof(domain)) != 0)
return (-1);
xso->xso_family = domain.dom_family;
- xso->so_qlen = so->so_qlen;
- xso->so_incqlen = so->so_incqlen;
- xso->so_qlimit = so->so_qlimit;
xso->so_timeo = so->so_timeo;
xso->so_error = so->so_error;
- xso->so_oobmark = so->so_oobmark;
- sbtoxsockbuf(&so->so_snd, &xso->so_snd);
- sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
+ if (SOLISTENING(so)) {
+ xso->so_qlen = so->sol_qlen;
+ xso->so_incqlen = so->sol_incqlen;
+ xso->so_qlimit = so->sol_qlimit;
+ } else {
+ sbtoxsockbuf(&so->so_snd, &xso->so_snd);
+ sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
+ xso->so_oobmark = so->so_oobmark;
+ }
return (0);
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Dec 20, 6:38 PM (15 h, 26 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27086543
Default Alt Text
D9770.id25629.diff (70 KB)
Attached To
Mode
D9770: Listening sockets revamp try #2.
Attached
Detach File
Event Timeline
Log In to Comment