Changeset View
Standalone View
sys/netinet/in_pcb.c
Show First 20 Lines • Show All 102 Lines • ▼ Show 20 Lines | |||||
#include <netinet6/in6_var.h> | #include <netinet6/in6_var.h> | ||||
#include <netinet6/ip6_var.h> | #include <netinet6/ip6_var.h> | ||||
#endif /* INET6 */ | #endif /* INET6 */ | ||||
#include <netipsec/ipsec_support.h> | #include <netipsec/ipsec_support.h> | ||||
#include <security/mac/mac_framework.h> | #include <security/mac/mac_framework.h> | ||||
#define INPCBLBGROUP_SIZMIN 8 | |||||
#define INPCBLBGROUP_SIZMAX 256 | |||||
static struct callout ipport_tick_callout; | static struct callout ipport_tick_callout; | ||||
/* | /* | ||||
* These configure the range of local port addresses assigned to | * These configure the range of local port addresses assigned to | ||||
* "unspecified" outgoing connections/packets/whatever. | * "unspecified" outgoing connections/packets/whatever. | ||||
*/ | */ | ||||
VNET_DEFINE(int, ipport_lowfirstauto) = IPPORT_RESERVED - 1; /* 1023 */ | VNET_DEFINE(int, ipport_lowfirstauto) = IPPORT_RESERVED - 1; /* 1023 */ | ||||
VNET_DEFINE(int, ipport_lowlastauto) = IPPORT_RESERVEDSTART; /* 600 */ | VNET_DEFINE(int, ipport_lowlastauto) = IPPORT_RESERVEDSTART; /* 600 */ | ||||
▲ Show 20 Lines • Show All 93 Lines • ▼ Show 20 Lines | |||||
/* | /* | ||||
* in_pcb.c: manage the Protocol Control Blocks. | * in_pcb.c: manage the Protocol Control Blocks. | ||||
* | * | ||||
* NOTE: It is assumed that most of these functions will be called with | * NOTE: It is assumed that most of these functions will be called with | ||||
* the pcbinfo lock held, and often, the inpcb lock held, as these utility | * the pcbinfo lock held, and often, the inpcb lock held, as these utility | ||||
* functions often modify hash chains or addresses in pcbs. | * functions often modify hash chains or addresses in pcbs. | ||||
*/ | */ | ||||
static struct inpcblbgroup * | |||||
in_pcblbgroup_alloc(struct inpcblbgrouphead *hdr, u_char vflag, | |||||
uint16_t port, const union in_dependaddr *addr, int size) | |||||
{ | |||||
struct inpcblbgroup *grp; | |||||
size_t bytes; | |||||
rwatson: Should there be a lock assertion here? | |||||
Not Done Inline Actionsin_pcblbgroup_{alloc,free,resize,reorder} are only called from in_pcb{ins,rem}lbgrouphash where locks are asserted. To assert locks again in these functions we'd need to pass the inpcb as argument. What do you recommend? johalun0_gmail.com: in_pcblbgroup_{alloc,free,resize,reorder} are only called from in_pcb{ins,rem}lbgrouphash where… | |||||
bytes = __offsetof(struct inpcblbgroup, il_inp[size]); | |||||
grp = malloc(bytes, M_PCB, M_ZERO | M_NOWAIT); | |||||
if (!grp) | |||||
return (NULL); | |||||
grp->il_vflag = vflag; | |||||
grp->il_lport = port; | |||||
grp->il_dependladdr = *addr; | |||||
grp->il_inpsiz = size; | |||||
LIST_INSERT_HEAD(hdr, grp, il_list); | |||||
return (grp); | |||||
Done Inline ActionsThis vertical whitespace is probably not needed. rwatson: This vertical whitespace is probably not needed. | |||||
} | |||||
static void | |||||
in_pcblbgroup_free(struct inpcblbgroup *grp) | |||||
{ | |||||
LIST_REMOVE(grp, il_list); | |||||
Not Done Inline ActionsShould there be a lock assertion here? rwatson: Should there be a lock assertion here? | |||||
free(grp, M_TEMP); | |||||
} | |||||
static struct inpcblbgroup * | |||||
in_pcblbgroup_resize(struct inpcblbgrouphead *hdr, | |||||
struct inpcblbgroup *old_grp, int size) | |||||
{ | |||||
struct inpcblbgroup *grp; | |||||
int i; | |||||
grp = in_pcblbgroup_alloc(hdr, old_grp->il_vflag, | |||||
Not Done Inline ActionsShould there be a lock assertion here? rwatson: Should there be a lock assertion here? | |||||
old_grp->il_lport, &old_grp->il_dependladdr, size); | |||||
if (!grp) | |||||
return (NULL); | |||||
KASSERT(old_grp->il_inpcnt < grp->il_inpsiz, | |||||
("invalid new local group size %d and old local group count %d", | |||||
grp->il_inpsiz, old_grp->il_inpcnt)); | |||||
for (i = 0; i < old_grp->il_inpcnt; ++i) | |||||
grp->il_inp[i] = old_grp->il_inp[i]; | |||||
grp->il_inpcnt = old_grp->il_inpcnt; | |||||
in_pcblbgroup_free(old_grp); | |||||
Done Inline ActionsCan probably drop this blank line here. rwatson: Can probably drop this blank line here. | |||||
return (grp); | |||||
} | |||||
Done Inline ActionsCan probably drop this blank line here. rwatson: Can probably drop this blank line here. | |||||
/* | /* | ||||
* PCB at index 'i' is removed from the group. Pull up the ones below il_inp[i] | |||||
* and shrink group if possible. | |||||
*/ | |||||
Done Inline ActionsAdd a "." at the end -- style(9) would like comments to be complete sentences. rwatson: Add a "." at the end -- style(9) would like comments to be complete sentences. | |||||
static void | |||||
in_pcblbgroup_reorder(struct inpcblbgrouphead *hdr, struct inpcblbgroup **grpp, | |||||
int i) | |||||
Done Inline ActionsIn other in_..._inshash() functions, the pcbinfo argument is implied by the 'inp' argument. Do we need it to be explicit here? rwatson: In other in_..._inshash() functions, the pcbinfo argument is implied by the 'inp' argument. Do… | |||||
{ | |||||
struct inpcblbgroup *grp = *grpp; | |||||
for (; i + 1 < grp->il_inpcnt; ++i) | |||||
grp->il_inp[i] = grp->il_inp[i + 1]; | |||||
grp->il_inpcnt--; | |||||
if (grp->il_inpsiz > INPCBLBGROUP_SIZMIN && | |||||
grp->il_inpcnt <= (grp->il_inpsiz / 4)) { | |||||
/* Shrink this group. */ | |||||
struct inpcblbgroup *new_grp = | |||||
in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz / 2); | |||||
if (new_grp) | |||||
*grpp = new_grp; | |||||
} | |||||
return; | |||||
} | |||||
/* | |||||
* Add PCB to load balance group for SO_REUSEPORT_LB option. | |||||
Done Inline ActionsComment should be a full sentence. rwatson: Comment should be a full sentence. | |||||
*/ | |||||
static int | |||||
in_pcbinslbgrouphash(struct inpcb *inp) | |||||
{ | |||||
struct inpcbinfo *pcbinfo; | |||||
struct inpcblbgrouphead *hdr; | |||||
struct inpcblbgroup *grp; | |||||
uint16_t hashmask, lport; | |||||
uint32_t group_index; | |||||
struct ucred *cred; | |||||
static int limit_logged = 0; | |||||
Done Inline ActionsComment should be a full sentence. rwatson: Comment should be a full sentence. | |||||
pcbinfo = inp->inp_pcbinfo; | |||||
INP_WLOCK_ASSERT(inp); | |||||
INP_HASH_WLOCK_ASSERT(pcbinfo); | |||||
if (pcbinfo->ipi_lbgrouphashbase == NULL) | |||||
return (0); | |||||
hashmask = pcbinfo->ipi_lbgrouphashmask; | |||||
lport = inp->inp_lport; | |||||
group_index = INP_PCBLBGROUP_PORTHASH(lport, hashmask); | |||||
Done Inline ActionsThis vertical whitespace is probably not needed. rwatson: This vertical whitespace is probably not needed. | |||||
hdr = &pcbinfo->ipi_lbgrouphashbase[group_index]; | |||||
/* | |||||
* Don't allow jailed socket to join local group. | |||||
*/ | |||||
if (inp->inp_socket != NULL) | |||||
cred = inp->inp_socket->so_cred; | |||||
else | |||||
cred = NULL; | |||||
if (cred != NULL && jailed(cred)) | |||||
return (0); | |||||
Done Inline ActionsComment should be a full sentence. rwatson: Comment should be a full sentence. | |||||
#ifdef INET6 | |||||
/* | |||||
* Don't allow IPv4 mapped INET6 wild socket. | |||||
*/ | |||||
if ((inp->inp_vflag & INP_IPV4) && | |||||
inp->inp_laddr.s_addr == INADDR_ANY && | |||||
INP_CHECK_SOCKAF(inp->inp_socket, AF_INET6)) { | |||||
Done Inline ActionsThis variable would ideally be declared at the top of the function, in style(9). rwatson: This variable would ideally be declared at the top of the function, in style(9). | |||||
return (0); | |||||
} | |||||
#endif | |||||
hdr = &pcbinfo->ipi_lbgrouphashbase[ | |||||
INP_PCBLBGROUP_PORTHASH(inp->inp_lport, | |||||
pcbinfo->ipi_lbgrouphashmask)]; | |||||
LIST_FOREACH(grp, hdr, il_list) { | |||||
if (grp->il_vflag == inp->inp_vflag && | |||||
grp->il_lport == inp->inp_lport && | |||||
Done Inline ActionsComment should be a full sentence. rwatson: Comment should be a full sentence. | |||||
memcmp(&grp->il_dependladdr, | |||||
&inp->inp_inc.inc_ie.ie_dependladdr, | |||||
sizeof(grp->il_dependladdr)) == 0) { | |||||
break; | |||||
} | |||||
} | |||||
if (grp == NULL) { | |||||
/* Create new load balance group. */ | |||||
grp = in_pcblbgroup_alloc(hdr, inp->inp_vflag, | |||||
inp->inp_lport, &inp->inp_inc.inc_ie.ie_dependladdr, | |||||
INPCBLBGROUP_SIZMIN); | |||||
if (!grp) | |||||
return (ENOBUFS); | |||||
} else if (grp->il_inpcnt == grp->il_inpsiz) { | |||||
if (grp->il_inpsiz >= INPCBLBGROUP_SIZMAX) { | |||||
if (!limit_logged) { | |||||
Done Inline ActionsHere, as above, can pcbinfo be implied by inp? rwatson: Here, as above, can pcbinfo be implied by inp? | |||||
limit_logged = 1; | |||||
printf("lb group port %d, limit reached\n", | |||||
ntohs(grp->il_lport)); | |||||
} | |||||
return (0); | |||||
} | |||||
/* Expand this local group. */ | |||||
grp = in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz * 2); | |||||
if (!grp) | |||||
return (ENOBUFS); | |||||
} | |||||
KASSERT(grp->il_inpcnt < grp->il_inpsiz, | |||||
("invalid local group size %d and count %d", | |||||
grp->il_inpsiz, grp->il_inpcnt)); | |||||
grp->il_inp[grp->il_inpcnt] = inp; | |||||
grp->il_inpcnt++; | |||||
return (0); | |||||
} | |||||
/* | |||||
Done Inline ActionsComment should be a full sentence. rwatson: Comment should be a full sentence. | |||||
* Remove PCB from load balance group. | |||||
*/ | |||||
static void | |||||
Done Inline ActionsComment should be a full sentence. rwatson: Comment should be a full sentence. | |||||
in_pcbremlbgrouphash(struct inpcb *inp) | |||||
{ | |||||
struct inpcbinfo *pcbinfo; | |||||
struct inpcblbgrouphead *hdr; | |||||
struct inpcblbgroup *grp; | |||||
int i; | |||||
Done Inline ActionsComment should be a full sentence. rwatson: Comment should be a full sentence. | |||||
pcbinfo = inp->inp_pcbinfo; | |||||
INP_WLOCK_ASSERT(inp); | |||||
INP_HASH_WLOCK_ASSERT(pcbinfo); | |||||
if (pcbinfo->ipi_lbgrouphashbase == NULL) | |||||
return; | |||||
Done Inline ActionsThis structure is visually confusing. Could we break some of this out into another function, or use another control-flow structure? Currently it's hard to see how the return fits. rwatson: This structure is visually confusing. Could we break some of this out into another function, or… | |||||
hdr = &pcbinfo->ipi_lbgrouphashbase[ | |||||
INP_PCBLBGROUP_PORTHASH(inp->inp_lport, | |||||
pcbinfo->ipi_lbgrouphashmask)]; | |||||
LIST_FOREACH(grp, hdr, il_list) { | |||||
for (i = 0; i < grp->il_inpcnt; ++i) { | |||||
if (grp->il_inp[i] != inp) | |||||
continue; | |||||
if (grp->il_inpcnt == 1) { | |||||
/* We are the last, free this local group. */ | |||||
in_pcblbgroup_free(grp); | |||||
} else { | |||||
/* Pull up inpcbs, shrink group if possible. */ | |||||
in_pcblbgroup_reorder(hdr, &grp, i); | |||||
} | |||||
return; | |||||
} | |||||
} | |||||
} | |||||
/* | |||||
* Different protocols initialize their inpcbs differently - giving | * Different protocols initialize their inpcbs differently - giving | ||||
* different name to the lock. But they all are disposed the same. | * different name to the lock. But they all are disposed the same. | ||||
*/ | */ | ||||
static void | static void | ||||
inpcb_fini(void *mem, int size) | inpcb_fini(void *mem, int size) | ||||
{ | { | ||||
struct inpcb *inp = mem; | struct inpcb *inp = mem; | ||||
Show All 18 Lines | |||||
#endif | #endif | ||||
pcbinfo->ipi_listhead = listhead; | pcbinfo->ipi_listhead = listhead; | ||||
LIST_INIT(pcbinfo->ipi_listhead); | LIST_INIT(pcbinfo->ipi_listhead); | ||||
pcbinfo->ipi_count = 0; | pcbinfo->ipi_count = 0; | ||||
pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB, | pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB, | ||||
&pcbinfo->ipi_hashmask); | &pcbinfo->ipi_hashmask); | ||||
pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB, | pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB, | ||||
&pcbinfo->ipi_porthashmask); | &pcbinfo->ipi_porthashmask); | ||||
pcbinfo->ipi_lbgrouphashbase = hashinit(hash_nelements, M_PCB, | |||||
&pcbinfo->ipi_lbgrouphashmask); | |||||
#ifdef PCBGROUP | #ifdef PCBGROUP | ||||
in_pcbgroup_init(pcbinfo, hashfields, hash_nelements); | in_pcbgroup_init(pcbinfo, hashfields, hash_nelements); | ||||
#endif | #endif | ||||
pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb), | pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb), | ||||
NULL, NULL, inpcbzone_init, inpcb_fini, UMA_ALIGN_PTR, 0); | NULL, NULL, inpcbzone_init, inpcb_fini, UMA_ALIGN_PTR, 0); | ||||
uma_zone_set_max(pcbinfo->ipi_zone, maxsockets); | uma_zone_set_max(pcbinfo->ipi_zone, maxsockets); | ||||
uma_zone_set_warning(pcbinfo->ipi_zone, | uma_zone_set_warning(pcbinfo->ipi_zone, | ||||
"kern.ipc.maxsockets limit reached"); | "kern.ipc.maxsockets limit reached"); | ||||
} | } | ||||
/* | /* | ||||
* Destroy an inpcbinfo. | * Destroy an inpcbinfo. | ||||
*/ | */ | ||||
void | void | ||||
in_pcbinfo_destroy(struct inpcbinfo *pcbinfo) | in_pcbinfo_destroy(struct inpcbinfo *pcbinfo) | ||||
{ | { | ||||
KASSERT(pcbinfo->ipi_count == 0, | KASSERT(pcbinfo->ipi_count == 0, | ||||
("%s: ipi_count = %u", __func__, pcbinfo->ipi_count)); | ("%s: ipi_count = %u", __func__, pcbinfo->ipi_count)); | ||||
hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask); | hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask); | ||||
hashdestroy(pcbinfo->ipi_porthashbase, M_PCB, | hashdestroy(pcbinfo->ipi_porthashbase, M_PCB, | ||||
pcbinfo->ipi_porthashmask); | pcbinfo->ipi_porthashmask); | ||||
hashdestroy(pcbinfo->ipi_lbgrouphashbase, M_PCB, | |||||
pcbinfo->ipi_lbgrouphashmask); | |||||
#ifdef PCBGROUP | #ifdef PCBGROUP | ||||
in_pcbgroup_destroy(pcbinfo); | in_pcbgroup_destroy(pcbinfo); | ||||
#endif | #endif | ||||
uma_zdestroy(pcbinfo->ipi_zone); | uma_zdestroy(pcbinfo->ipi_zone); | ||||
INP_LIST_LOCK_DESTROY(pcbinfo); | INP_LIST_LOCK_DESTROY(pcbinfo); | ||||
INP_HASH_LOCK_DESTROY(pcbinfo); | INP_HASH_LOCK_DESTROY(pcbinfo); | ||||
INP_INFO_LOCK_DESTROY(pcbinfo); | INP_INFO_LOCK_DESTROY(pcbinfo); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 222 Lines • ▼ Show 20 Lines | #endif | ||||
*lportp = lport; | *lportp = lport; | ||||
return (0); | return (0); | ||||
} | } | ||||
/* | /* | ||||
* Return cached socket options. | * Return cached socket options. | ||||
*/ | */ | ||||
short | int | ||||
inp_so_options(const struct inpcb *inp) | inp_so_options(const struct inpcb *inp) | ||||
{ | { | ||||
short so_options; | int so_options; | ||||
so_options = 0; | so_options = 0; | ||||
if ((inp->inp_flags2 & INP_REUSEPORT_LB) != 0) | |||||
so_options |= SO_REUSEPORT_LB; | |||||
if ((inp->inp_flags2 & INP_REUSEPORT) != 0) | if ((inp->inp_flags2 & INP_REUSEPORT) != 0) | ||||
so_options |= SO_REUSEPORT; | so_options |= SO_REUSEPORT; | ||||
if ((inp->inp_flags2 & INP_REUSEADDR) != 0) | if ((inp->inp_flags2 & INP_REUSEADDR) != 0) | ||||
so_options |= SO_REUSEADDR; | so_options |= SO_REUSEADDR; | ||||
return (so_options); | return (so_options); | ||||
} | } | ||||
#endif /* INET || INET6 */ | #endif /* INET || INET6 */ | ||||
/* | /* | ||||
* Check if a new BINDMULTI socket is allowed to be created. | * Check if a new BINDMULTI socket is allowed to be created. | ||||
* | * | ||||
* ni points to the new inp. | * ni points to the new inp. | ||||
* oi points to the exisitng inp. | * oi points to the exisitng inp. | ||||
Show All 40 Lines | in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp, | ||||
struct sockaddr_in *sin; | struct sockaddr_in *sin; | ||||
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; | struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; | ||||
struct in_addr laddr; | struct in_addr laddr; | ||||
u_short lport = 0; | u_short lport = 0; | ||||
int lookupflags = 0, reuseport = (so->so_options & SO_REUSEPORT); | int lookupflags = 0, reuseport = (so->so_options & SO_REUSEPORT); | ||||
int error; | int error; | ||||
/* | /* | ||||
* XXX: Maybe we could let SO_REUSEPORT_LB set SO_REUSEPORT bit here | |||||
* so that we don't have to add to the (already messy) code below. | |||||
Done Inline ActionsComment should be a full sentence. rwatson: Comment should be a full sentence. | |||||
*/ | |||||
int reuseport_lb = (so->so_options & SO_REUSEPORT_LB); | |||||
/* | |||||
* No state changes, so read locks are sufficient here. | * No state changes, so read locks are sufficient here. | ||||
*/ | */ | ||||
INP_LOCK_ASSERT(inp); | INP_LOCK_ASSERT(inp); | ||||
INP_HASH_LOCK_ASSERT(pcbinfo); | INP_HASH_LOCK_ASSERT(pcbinfo); | ||||
if (TAILQ_EMPTY(&V_in_ifaddrhead)) /* XXX broken! */ | if (TAILQ_EMPTY(&V_in_ifaddrhead)) /* XXX broken! */ | ||||
return (EADDRNOTAVAIL); | return (EADDRNOTAVAIL); | ||||
laddr.s_addr = *laddrp; | laddr.s_addr = *laddrp; | ||||
if (nam != NULL && laddr.s_addr != INADDR_ANY) | if (nam != NULL && laddr.s_addr != INADDR_ANY) | ||||
return (EINVAL); | return (EINVAL); | ||||
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) | if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT|SO_REUSEPORT_LB)) == 0) | ||||
lookupflags = INPLOOKUP_WILDCARD; | lookupflags = INPLOOKUP_WILDCARD; | ||||
if (nam == NULL) { | if (nam == NULL) { | ||||
if ((error = prison_local_ip4(cred, &laddr)) != 0) | if ((error = prison_local_ip4(cred, &laddr)) != 0) | ||||
return (error); | return (error); | ||||
} else { | } else { | ||||
sin = (struct sockaddr_in *)nam; | sin = (struct sockaddr_in *)nam; | ||||
if (nam->sa_len != sizeof (*sin)) | if (nam->sa_len != sizeof (*sin)) | ||||
return (EINVAL); | return (EINVAL); | ||||
Show All 20 Lines | if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { | ||||
* Treat SO_REUSEADDR as SO_REUSEPORT for multicast; | * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; | ||||
* allow complete duplication of binding if | * allow complete duplication of binding if | ||||
* SO_REUSEPORT is set, or if SO_REUSEADDR is set | * SO_REUSEPORT is set, or if SO_REUSEADDR is set | ||||
* and a multicast address is bound on both | * and a multicast address is bound on both | ||||
* new and duplicated sockets. | * new and duplicated sockets. | ||||
*/ | */ | ||||
if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0) | if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0) | ||||
reuseport = SO_REUSEADDR|SO_REUSEPORT; | reuseport = SO_REUSEADDR|SO_REUSEPORT; | ||||
/* | |||||
* XXX: How to deal with SO_REUSEPORT_LB here? | |||||
* Treat same as SO_REUSEPORT for now. | |||||
*/ | |||||
if ((so->so_options & | |||||
(SO_REUSEADDR|SO_REUSEPORT_LB)) != 0) | |||||
reuseport_lb = SO_REUSEADDR|SO_REUSEPORT_LB; | |||||
} else if (sin->sin_addr.s_addr != INADDR_ANY) { | } else if (sin->sin_addr.s_addr != INADDR_ANY) { | ||||
sin->sin_port = 0; /* yech... */ | sin->sin_port = 0; /* yech... */ | ||||
bzero(&sin->sin_zero, sizeof(sin->sin_zero)); | bzero(&sin->sin_zero, sizeof(sin->sin_zero)); | ||||
/* | /* | ||||
* Is the address a local IP address? | * Is the address a local IP address? | ||||
* If INP_BINDANY is set, then the socket may be bound | * If INP_BINDANY is set, then the socket may be bound | ||||
* to any endpoint address, local or not. | * to any endpoint address, local or not. | ||||
*/ | */ | ||||
if ((inp->inp_flags & INP_BINDANY) == 0 && | if ((inp->inp_flags & INP_BINDANY) == 0 && | ||||
ifa_ifwithaddr_check((struct sockaddr *)sin) == 0) | ifa_ifwithaddr_check((struct sockaddr *)sin) == 0) | ||||
return (EADDRNOTAVAIL); | return (EADDRNOTAVAIL); | ||||
} | } | ||||
laddr = sin->sin_addr; | laddr = sin->sin_addr; | ||||
if (lport) { | if (lport) { | ||||
struct inpcb *t; | struct inpcb *t; | ||||
struct tcptw *tw; | struct tcptw *tw; | ||||
/* GROSS */ | /* GROSS */ | ||||
Show All 13 Lines | #endif | ||||
*/ | */ | ||||
if (t && | if (t && | ||||
((inp->inp_flags2 & INP_BINDMULTI) == 0) && | ((inp->inp_flags2 & INP_BINDMULTI) == 0) && | ||||
((t->inp_flags & INP_TIMEWAIT) == 0) && | ((t->inp_flags & INP_TIMEWAIT) == 0) && | ||||
(so->so_type != SOCK_STREAM || | (so->so_type != SOCK_STREAM || | ||||
ntohl(t->inp_faddr.s_addr) == INADDR_ANY) && | ntohl(t->inp_faddr.s_addr) == INADDR_ANY) && | ||||
(ntohl(sin->sin_addr.s_addr) != INADDR_ANY || | (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || | ||||
ntohl(t->inp_laddr.s_addr) != INADDR_ANY || | ntohl(t->inp_laddr.s_addr) != INADDR_ANY || | ||||
(t->inp_flags2 & INP_REUSEPORT) == 0) && | (t->inp_flags2 & INP_REUSEPORT) || | ||||
(t->inp_flags2 & INP_REUSEPORT_LB) == 0) && | |||||
(inp->inp_cred->cr_uid != | (inp->inp_cred->cr_uid != | ||||
t->inp_cred->cr_uid)) | t->inp_cred->cr_uid)) | ||||
return (EADDRINUSE); | return (EADDRINUSE); | ||||
/* | /* | ||||
* If the socket is a BINDMULTI socket, then | * If the socket is a BINDMULTI socket, then | ||||
* the credentials need to match and the | * the credentials need to match and the | ||||
* original socket also has to have been bound | * original socket also has to have been bound | ||||
* with BINDMULTI. | * with BINDMULTI. | ||||
*/ | */ | ||||
if (t && (! in_pcbbind_check_bindmulti(inp, t))) | if (t && (! in_pcbbind_check_bindmulti(inp, t))) | ||||
return (EADDRINUSE); | return (EADDRINUSE); | ||||
} | } | ||||
t = in_pcblookup_local(pcbinfo, sin->sin_addr, | t = in_pcblookup_local(pcbinfo, sin->sin_addr, | ||||
lport, lookupflags, cred); | lport, lookupflags, cred); | ||||
if (t && (t->inp_flags & INP_TIMEWAIT)) { | if (t && (t->inp_flags & INP_TIMEWAIT)) { | ||||
/* | /* | ||||
* XXXRW: If an incpb has had its timewait | * XXXRW: If an incpb has had its timewait | ||||
* state recycled, we treat the address as | * state recycled, we treat the address as | ||||
* being in use (for now). This is better | * being in use (for now). This is better | ||||
* than a panic, but not desirable. | * than a panic, but not desirable. | ||||
*/ | */ | ||||
tw = intotw(t); | tw = intotw(t); | ||||
if (tw == NULL || | if (tw == NULL || | ||||
(reuseport & tw->tw_so_options) == 0) | ((reuseport & tw->tw_so_options) == 0 && | ||||
(reuseport_lb & | |||||
tw->tw_so_options) == 0)) { | |||||
return (EADDRINUSE); | return (EADDRINUSE); | ||||
} | |||||
} else if (t && | } else if (t && | ||||
((inp->inp_flags2 & INP_BINDMULTI) == 0) && | ((inp->inp_flags2 & INP_BINDMULTI) == 0) && | ||||
(reuseport & inp_so_options(t)) == 0) { | (reuseport & inp_so_options(t)) == 0 && | ||||
(reuseport_lb & inp_so_options(t)) == 0) { | |||||
#ifdef INET6 | #ifdef INET6 | ||||
if (ntohl(sin->sin_addr.s_addr) != | if (ntohl(sin->sin_addr.s_addr) != | ||||
INADDR_ANY || | INADDR_ANY || | ||||
ntohl(t->inp_laddr.s_addr) != | ntohl(t->inp_laddr.s_addr) != | ||||
INADDR_ANY || | INADDR_ANY || | ||||
(inp->inp_vflag & INP_IPV6PROTO) == 0 || | (inp->inp_vflag & INP_IPV6PROTO) == 0 || | ||||
(t->inp_vflag & INP_IPV6PROTO) == 0) | (t->inp_vflag & INP_IPV6PROTO) == 0) | ||||
#endif | #endif | ||||
return (EADDRINUSE); | return (EADDRINUSE); | ||||
if (t && (! in_pcbbind_check_bindmulti(inp, t))) | if (t && (! in_pcbbind_check_bindmulti(inp, t))) | ||||
return (EADDRINUSE); | return (EADDRINUSE); | ||||
} | } | ||||
} | } | ||||
} | } | ||||
if (*lportp != 0) | if (*lportp != 0) | ||||
lport = *lportp; | lport = *lportp; | ||||
if (lport == 0) { | if (lport == 0) { | ||||
▲ Show 20 Lines • Show All 703 Lines • ▼ Show 20 Lines | in_pcbdrop(struct inpcb *inp) | ||||
* XXXRW: Possibly we should protect the setting of INP_DROPPED with | * XXXRW: Possibly we should protect the setting of INP_DROPPED with | ||||
* the hash lock...? | * the hash lock...? | ||||
*/ | */ | ||||
inp->inp_flags |= INP_DROPPED; | inp->inp_flags |= INP_DROPPED; | ||||
if (inp->inp_flags & INP_INHASHLIST) { | if (inp->inp_flags & INP_INHASHLIST) { | ||||
struct inpcbport *phd = inp->inp_phd; | struct inpcbport *phd = inp->inp_phd; | ||||
INP_HASH_WLOCK(inp->inp_pcbinfo); | INP_HASH_WLOCK(inp->inp_pcbinfo); | ||||
in_pcbremlbgrouphash(inp); | |||||
LIST_REMOVE(inp, inp_hash); | LIST_REMOVE(inp, inp_hash); | ||||
LIST_REMOVE(inp, inp_portlist); | LIST_REMOVE(inp, inp_portlist); | ||||
if (LIST_FIRST(&phd->phd_pcblist) == NULL) { | if (LIST_FIRST(&phd->phd_pcblist) == NULL) { | ||||
LIST_REMOVE(phd, phd_hash); | LIST_REMOVE(phd, phd_hash); | ||||
free(phd, M_PCB); | free(phd, M_PCB); | ||||
} | } | ||||
INP_HASH_WUNLOCK(inp->inp_pcbinfo); | INP_HASH_WUNLOCK(inp->inp_pcbinfo); | ||||
inp->inp_flags &= ~INP_INHASHLIST; | inp->inp_flags &= ~INP_INHASHLIST; | ||||
▲ Show 20 Lines • Show All 245 Lines • ▼ Show 20 Lines | #endif | ||||
} | } | ||||
} | } | ||||
} | } | ||||
return (match); | return (match); | ||||
} | } | ||||
} | } | ||||
#undef INP_LOOKUP_MAPPED_PCB_COST | #undef INP_LOOKUP_MAPPED_PCB_COST | ||||
static struct inpcb * | |||||
in_pcblookup_lbgroup(const struct inpcbinfo *pcbinfo, | |||||
const struct in_addr *laddr, uint16_t lport, const struct in_addr *faddr, | |||||
uint16_t fport, int lookupflags) | |||||
{ | |||||
struct inpcb *local_wild = NULL; | |||||
const struct inpcblbgrouphead *hdr; | |||||
struct inpcblbgroup *grp; | |||||
struct inpcblbgroup *grp_local_wild; | |||||
INP_HASH_LOCK_ASSERT(pcbinfo); | |||||
hdr = &pcbinfo->ipi_lbgrouphashbase[ | |||||
INP_PCBLBGROUP_PORTHASH(lport, pcbinfo->ipi_lbgrouphashmask)]; | |||||
Done Inline ActionsShould these assertions be before the if() clause above? rwatson: Should these assertions be before the if() clause above? | |||||
/* | |||||
* Order of socket selection: | |||||
* 1. non-wild. | |||||
* 2. wild (if lookupflags contains INPLOOKUP_WILDCARD). | |||||
* | |||||
* NOTE: | |||||
* - Load balanced group does not contain jailed sockets | |||||
* - Load balanced group does not contain IPv4 mapped INET6 wild sockets | |||||
*/ | |||||
LIST_FOREACH(grp, hdr, il_list) { | |||||
#ifdef INET6 | |||||
if (!(grp->il_vflag & INP_IPV4)) | |||||
continue; | |||||
#endif | |||||
if (grp->il_lport == lport) { | |||||
uint32_t idx = 0; | |||||
int pkt_hash = INP_PCBLBGROUP_PKTHASH(faddr->s_addr, | |||||
lport, fport); | |||||
idx = pkt_hash % grp->il_inpcnt; | |||||
Done Inline ActionsThis variable declaration would ideally be above. rwatson: This variable declaration would ideally be above. | |||||
if (grp->il_laddr.s_addr == laddr->s_addr) { | |||||
return (grp->il_inp[idx]); | |||||
} else { | |||||
if (grp->il_laddr.s_addr == INADDR_ANY && | |||||
(lookupflags & INPLOOKUP_WILDCARD)) { | |||||
local_wild = grp->il_inp[idx]; | |||||
grp_local_wild = grp; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
if (local_wild != NULL) { | |||||
return (local_wild); | |||||
} | |||||
return (NULL); | |||||
} | |||||
#ifdef PCBGROUP | #ifdef PCBGROUP | ||||
/* | /* | ||||
* Lookup PCB in hash list, using pcbgroup tables. | * Lookup PCB in hash list, using pcbgroup tables. | ||||
*/ | */ | ||||
static struct inpcb * | static struct inpcb * | ||||
in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, | in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, | ||||
struct in_addr faddr, u_int fport_arg, struct in_addr laddr, | struct in_addr faddr, u_int fport_arg, struct in_addr laddr, | ||||
u_int lport_arg, int lookupflags, struct ifnet *ifp) | u_int lport_arg, int lookupflags, struct ifnet *ifp) | ||||
▲ Show 20 Lines • Show All 263 Lines • ▼ Show 20 Lines | if (inp->inp_faddr.s_addr == faddr.s_addr && | ||||
if (tmpinp == NULL) | if (tmpinp == NULL) | ||||
tmpinp = inp; | tmpinp = inp; | ||||
} | } | ||||
} | } | ||||
if (tmpinp != NULL) | if (tmpinp != NULL) | ||||
return (tmpinp); | return (tmpinp); | ||||
/* | /* | ||||
* Then look in lb group (for wildcard match). | |||||
Done Inline ActionsComment should be a full sentence. rwatson: Comment should be a full sentence. | |||||
*/ | |||||
if (pcbinfo->ipi_lbgrouphashbase != NULL && | |||||
(lookupflags & INPLOOKUP_WILDCARD)) { | |||||
inp = in_pcblookup_lbgroup(pcbinfo, &laddr, lport, &faddr, | |||||
fport, lookupflags); | |||||
if (inp != NULL) { | |||||
return (inp); | |||||
} | |||||
} | |||||
/* | |||||
* Then look for a wildcard match, if requested. | * Then look for a wildcard match, if requested. | ||||
*/ | */ | ||||
if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { | if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { | ||||
struct inpcb *local_wild = NULL, *local_exact = NULL; | struct inpcb *local_wild = NULL, *local_exact = NULL; | ||||
#ifdef INET6 | #ifdef INET6 | ||||
struct inpcb *local_wild_mapped = NULL; | struct inpcb *local_wild_mapped = NULL; | ||||
#endif | #endif | ||||
struct inpcb *jail_wild = NULL; | struct inpcb *jail_wild = NULL; | ||||
▲ Show 20 Lines • Show All 200 Lines • ▼ Show 20 Lines | |||||
static int | static int | ||||
in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update) | in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update) | ||||
{ | { | ||||
struct inpcbhead *pcbhash; | struct inpcbhead *pcbhash; | ||||
struct inpcbporthead *pcbporthash; | struct inpcbporthead *pcbporthash; | ||||
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; | struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; | ||||
struct inpcbport *phd; | struct inpcbport *phd; | ||||
u_int32_t hashkey_faddr; | u_int32_t hashkey_faddr; | ||||
int so_options; | |||||
INP_WLOCK_ASSERT(inp); | INP_WLOCK_ASSERT(inp); | ||||
INP_HASH_WLOCK_ASSERT(pcbinfo); | INP_HASH_WLOCK_ASSERT(pcbinfo); | ||||
KASSERT((inp->inp_flags & INP_INHASHLIST) == 0, | KASSERT((inp->inp_flags & INP_INHASHLIST) == 0, | ||||
("in_pcbinshash: INP_INHASHLIST")); | ("in_pcbinshash: INP_INHASHLIST")); | ||||
#ifdef INET6 | #ifdef INET6 | ||||
if (inp->inp_vflag & INP_IPV6) | if (inp->inp_vflag & INP_IPV6) | ||||
hashkey_faddr = INP6_PCBHASHKEY(&inp->in6p_faddr); | hashkey_faddr = INP6_PCBHASHKEY(&inp->in6p_faddr); | ||||
else | else | ||||
#endif | #endif | ||||
hashkey_faddr = inp->inp_faddr.s_addr; | hashkey_faddr = inp->inp_faddr.s_addr; | ||||
pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr, | pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr, | ||||
inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)]; | inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)]; | ||||
pcbporthash = &pcbinfo->ipi_porthashbase[ | pcbporthash = &pcbinfo->ipi_porthashbase[ | ||||
INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)]; | INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)]; | ||||
/* | /* | ||||
* Add entry to load balance group. | |||||
* Only do this if SO_REUSEPORT_LB is set. | |||||
*/ | |||||
Done Inline ActionsComment should be a full sentence. rwatson: Comment should be a full sentence. | |||||
so_options = inp_so_options(inp); | |||||
if (so_options & SO_REUSEPORT_LB) { | |||||
int ret = in_pcbinslbgrouphash(inp); | |||||
if (ret) { | |||||
/* pcb lb group malloc fail (ret=ENOBUFS). */ | |||||
return (ret); | |||||
Done Inline ActionsUse a C-style comment, not C++, for a block comment. Comment should be a full sentence. rwatson: Use a C-style comment, not C++, for a block comment. Comment should be a full sentence. | |||||
} | |||||
Done Inline Actionsstyle(9) requires () around return arguments. rwatson: style(9) requires () around return arguments. | |||||
} | |||||
/* | |||||
* Go through port list and look for a head for this lport. | * Go through port list and look for a head for this lport. | ||||
*/ | */ | ||||
LIST_FOREACH(phd, pcbporthash, phd_hash) { | LIST_FOREACH(phd, pcbporthash, phd_hash) { | ||||
if (phd->phd_port == inp->inp_lport) | if (phd->phd_port == inp->inp_lport) | ||||
break; | break; | ||||
} | } | ||||
/* | /* | ||||
* If none exists, malloc one and tack it on. | * If none exists, malloc one and tack it on. | ||||
▲ Show 20 Lines • Show All 109 Lines • ▼ Show 20 Lines | #endif | ||||
INP_WLOCK_ASSERT(inp); | INP_WLOCK_ASSERT(inp); | ||||
INP_LIST_WLOCK_ASSERT(pcbinfo); | INP_LIST_WLOCK_ASSERT(pcbinfo); | ||||
inp->inp_gencnt = ++pcbinfo->ipi_gencnt; | inp->inp_gencnt = ++pcbinfo->ipi_gencnt; | ||||
if (inp->inp_flags & INP_INHASHLIST) { | if (inp->inp_flags & INP_INHASHLIST) { | ||||
struct inpcbport *phd = inp->inp_phd; | struct inpcbport *phd = inp->inp_phd; | ||||
INP_HASH_WLOCK(pcbinfo); | INP_HASH_WLOCK(pcbinfo); | ||||
/* XXX: Only do if SO_REUSEPORT_LB set? */ | |||||
Done Inline ActionsUse a C-style comment. rwatson: Use a C-style comment. | |||||
in_pcbremlbgrouphash(inp); | |||||
LIST_REMOVE(inp, inp_hash); | LIST_REMOVE(inp, inp_hash); | ||||
LIST_REMOVE(inp, inp_portlist); | LIST_REMOVE(inp, inp_portlist); | ||||
if (LIST_FIRST(&phd->phd_pcblist) == NULL) { | if (LIST_FIRST(&phd->phd_pcblist) == NULL) { | ||||
LIST_REMOVE(phd, phd_hash); | LIST_REMOVE(phd, phd_hash); | ||||
free(phd, M_PCB); | free(phd, M_PCB); | ||||
} | } | ||||
INP_HASH_WUNLOCK(pcbinfo); | INP_HASH_WUNLOCK(pcbinfo); | ||||
inp->inp_flags &= ~INP_INHASHLIST; | inp->inp_flags &= ~INP_INHASHLIST; | ||||
▲ Show 20 Lines • Show All 773 Lines • Show Last 20 Lines |
Should there be a lock assertion here?