Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F132669095
D3721.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
14 KB
Referenced Files
None
Subscribers
None
D3721.diff
View Options
Index: head/sys/netinet6/udp6_usrreq.c
===================================================================
--- head/sys/netinet6/udp6_usrreq.c
+++ head/sys/netinet6/udp6_usrreq.c
@@ -679,35 +679,38 @@
SYSCTL_PROC(_net_inet6_udp6, OID_AUTO, getcred, CTLTYPE_OPAQUE|CTLFLAG_RW, 0,
0, udp6_getcred, "S,xucred", "Get the xucred of a UDP6 connection");
+#define UH_WLOCKED 2
+#define UH_RLOCKED 1
+#define UH_UNLOCKED 0
static int
-udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
- struct mbuf *control, struct thread *td)
+udp6_output(struct socket *so, int flags_arg, struct mbuf *m,
+ struct sockaddr *addr6, struct mbuf *control, struct thread *td)
{
- u_int32_t ulen = m->m_pkthdr.len;
- u_int32_t plen = sizeof(struct udphdr) + ulen;
+ struct inpcbinfo *pcbinfo;
+ struct inpcb *inp;
struct ip6_hdr *ip6;
struct udphdr *udp6;
struct in6_addr *laddr, *faddr, in6a;
- struct sockaddr_in6 *sin6 = NULL;
- int cscov_partial = 0;
- int scope_ambiguous = 0;
- u_short fport;
- int error = 0;
- uint8_t nxt;
- uint16_t cscov = 0;
struct ip6_pktopts *optp, opt;
- int af = AF_INET6, hlen = sizeof(struct ip6_hdr);
- int flags;
- struct sockaddr_in6 tmp;
+ struct sockaddr_in6 *sin6, tmp;
+ struct epoch_tracker et;
+ int cscov_partial, error, flags, hlen, scope_ambiguous;
+ u_int32_t ulen, plen;
+ uint16_t cscov;
+ u_short fport;
+ uint8_t nxt, unlock_udbinfo;
- INP_WLOCK_ASSERT(inp);
- INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);
+ /* addr6 has been validated in udp6_send(). */
+ sin6 = (struct sockaddr_in6 *)addr6;
- if (addr6) {
- /* addr6 has been validated in udp6_send(). */
- sin6 = (struct sockaddr_in6 *)addr6;
+ /*
+ * In contrast to to IPv4 we do not validate the max. packet length
+ * here due to IPv6 Jumbograms (RFC2675).
+ */
- /* protect *sin6 from overwrites */
+ scope_ambiguous = 0;
+ if (sin6) {
+ /* Protect *addr6 from overwrites. */
tmp = *sin6;
sin6 = &tmp;
@@ -721,22 +724,86 @@
*/
if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone)
scope_ambiguous = 1;
- if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
+ if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) {
+ if (control)
+ m_freem(control);
+ m_freem(m);
return (error);
+ }
}
+ inp = sotoinpcb(so);
+ KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
+ INP_RLOCK(inp);
nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
IPPROTO_UDP : IPPROTO_UDPLITE;
+
+#ifdef INET
+ if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
+ int hasv4addr;
+
+ if (sin6 == NULL)
+ hasv4addr = (inp->inp_vflag & INP_IPV4);
+ else
+ hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)
+ ? 1 : 0;
+ if (hasv4addr) {
+ struct pr_usrreqs *pru;
+
+ /*
+ * XXXRW: We release UDP-layer locks before calling
+ * udp_send() in order to avoid recursion. However,
+ * this does mean there is a short window where inp's
+ * fields are unstable. Could this lead to a
+ * potential race in which the factors causing us to
+ * select the UDPv4 output routine are invalidated?
+ */
+ INP_RUNLOCK(inp);
+ if (sin6)
+ in6_sin6_2_sin_in_sock((struct sockaddr *)sin6);
+ pru = inetsw[ip_protox[nxt]].pr_usrreqs;
+ /* addr will just be freed in sendit(). */
+ return ((*pru->pru_send)(so, flags_arg, m,
+ (struct sockaddr *)sin6, control, td));
+ }
+ }
+#endif
+
if (control) {
if ((error = ip6_setpktopts(control, &opt,
- inp->in6p_outputopts, td->td_ucred, nxt)) != 0)
- goto release;
+ inp->in6p_outputopts, td->td_ucred, nxt)) != 0) {
+ INP_RUNLOCK(inp);
+ ip6_clearpktopts(&opt, -1);
+ if (control)
+ m_freem(control);
+ m_freem(m);
+ return (error);
+ }
optp = &opt;
} else
optp = inp->in6p_outputopts;
+ pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
+ if (sin6 != NULL &&
+ IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && inp->inp_lport == 0) {
+ INP_RUNLOCK(inp);
+ /*
+ * XXX there is a short window here which could lead to a race;
+ * should we re-check that what got us here is still valid?
+ */
+ INP_WLOCK(inp);
+ INP_HASH_WLOCK(pcbinfo);
+ unlock_udbinfo = UH_WLOCKED;
+ } else if (sin6 != NULL &&
+ (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
+ IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) ||
+ inp->inp_lport == 0)) {
+ INP_HASH_RLOCK_ET(pcbinfo, et);
+ unlock_udbinfo = UH_RLOCKED;
+ } else
+ unlock_udbinfo = UH_UNLOCKED;
+
if (sin6) {
- faddr = &sin6->sin6_addr;
/*
* Since we saw no essential reason for calling in_pcbconnect,
@@ -755,85 +822,47 @@
goto release;
}
- fport = sin6->sin6_port; /* allow 0 port */
+ /*
+ * Given we handle the v4mapped case in the INET block above
+ * assert here that it must not happen anymore.
+ */
+ KASSERT(!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr),
+ ("%s: sin6(%p)->sin6_addr is v4mapped which we "
+ "should have handled.", __func__, sin6));
- if (IN6_IS_ADDR_V4MAPPED(faddr)) {
- if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
- /*
- * I believe we should explicitly discard the
- * packet when mapped addresses are disabled,
- * rather than send the packet as an IPv6 one.
- * If we chose the latter approach, the packet
- * might be sent out on the wire based on the
- * default route, the situation which we'd
- * probably want to avoid.
- * (20010421 jinmei@kame.net)
- */
- error = EINVAL;
- goto release;
- }
- if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
- !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) {
- /*
- * when remote addr is an IPv4-mapped address,
- * local addr should not be an IPv6 address,
- * since you cannot determine how to map IPv6
- * source address to IPv4.
- */
- error = EINVAL;
- goto release;
- }
+ /* This only requires read-locking. */
+ error = in6_selectsrc_socket(sin6, optp, inp,
+ td->td_ucred, scope_ambiguous, &in6a, NULL);
+ if (error)
+ goto release;
+ laddr = &in6a;
- af = AF_INET;
- }
+ if (inp->inp_lport == 0) {
- if (!IN6_IS_ADDR_V4MAPPED(faddr)) {
- error = in6_selectsrc_socket(sin6, optp, inp,
- td->td_ucred, scope_ambiguous, &in6a, NULL);
- if (error)
+ INP_WLOCK_ASSERT(inp);
+ error = in6_pcbsetport(laddr, inp, td->td_ucred);
+ if (error != 0) {
+ /* Undo an address bind that may have occurred. */
+ inp->in6p_laddr = in6addr_any;
goto release;
- laddr = &in6a;
- } else
- laddr = &inp->in6p_laddr; /* XXX */
- if (laddr == NULL) {
- if (error == 0)
- error = EADDRNOTAVAIL;
- goto release;
+ }
}
- if (inp->inp_lport == 0 &&
- (error = in6_pcbsetport(laddr, inp, td->td_ucred)) != 0) {
- /* Undo an address bind that may have occurred. */
- inp->in6p_laddr = in6addr_any;
- goto release;
- }
+ faddr = &sin6->sin6_addr;
+ fport = sin6->sin6_port; /* allow 0 port */
+
} else {
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
error = ENOTCONN;
goto release;
}
- if (IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) {
- if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
- /*
- * XXX: this case would happen when the
- * application sets the V6ONLY flag after
- * connecting the foreign address.
- * Such applications should be fixed,
- * so we bark here.
- */
- log(LOG_INFO, "udp6_output: IPV6_V6ONLY "
- "option was set for a connected socket\n");
- error = EINVAL;
- goto release;
- } else
- af = AF_INET;
- }
laddr = &inp->in6p_laddr;
faddr = &inp->in6p_faddr;
fport = inp->inp_fport;
}
- if (af == AF_INET)
- hlen = sizeof(struct ip);
+ ulen = m->m_pkthdr.len;
+ plen = sizeof(struct udphdr) + ulen;
+ hlen = sizeof(struct ip6_hdr);
/*
* Calculate data length and get a mbuf
@@ -848,6 +877,7 @@
/*
* Stuff checksum and output datagram.
*/
+ cscov = cscov_partial = 0;
udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen);
udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */
udp6->uh_dport = fport;
@@ -870,59 +900,59 @@
udp6->uh_ulen = 0;
udp6->uh_sum = 0;
- switch (af) {
- case AF_INET6:
- ip6 = mtod(m, struct ip6_hdr *);
- ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK;
- ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
- ip6->ip6_vfc |= IPV6_VERSION;
- ip6->ip6_plen = htons((u_short)plen);
- ip6->ip6_nxt = nxt;
- ip6->ip6_hlim = in6_selecthlim(inp, NULL);
- ip6->ip6_src = *laddr;
- ip6->ip6_dst = *faddr;
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK;
+ ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
+ ip6->ip6_vfc |= IPV6_VERSION;
+ ip6->ip6_plen = htons((u_short)plen);
+ ip6->ip6_nxt = nxt;
+ ip6->ip6_hlim = in6_selecthlim(inp, NULL);
+ ip6->ip6_src = *laddr;
+ ip6->ip6_dst = *faddr;
- if (cscov_partial) {
- if ((udp6->uh_sum = in6_cksum_partial(m, nxt,
- sizeof(struct ip6_hdr), plen, cscov)) == 0)
- udp6->uh_sum = 0xffff;
- } else {
- udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0);
- m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
- m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
- }
+#ifdef MAC
+ mac_inpcb_create_mbuf(inp, m);
+#endif
+ if (cscov_partial) {
+ if ((udp6->uh_sum = in6_cksum_partial(m, nxt,
+ sizeof(struct ip6_hdr), plen, cscov)) == 0)
+ udp6->uh_sum = 0xffff;
+ } else {
+ udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0);
+ m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ }
+
+ flags = 0;
#ifdef RSS
- {
- uint32_t hash_val, hash_type;
- uint8_t pr;
+ {
+ uint32_t hash_val, hash_type;
+ uint8_t pr;
- pr = inp->inp_socket->so_proto->pr_protocol;
- /*
- * Calculate an appropriate RSS hash for UDP and
- * UDP Lite.
- *
- * The called function will take care of figuring out
- * whether a 2-tuple or 4-tuple hash is required based
- * on the currently configured scheme.
- *
- * Later later on connected socket values should be
- * cached in the inpcb and reused, rather than constantly
- * re-calculating it.
- *
- * UDP Lite is a different protocol number and will
- * likely end up being hashed as a 2-tuple until
- * RSS / NICs grow UDP Lite protocol awareness.
- */
- if (rss_proto_software_hash_v6(faddr, laddr, fport,
- inp->inp_lport, pr, &hash_val, &hash_type) == 0) {
- m->m_pkthdr.flowid = hash_val;
- M_HASHTYPE_SET(m, hash_type);
- }
+ pr = inp->inp_socket->so_proto->pr_protocol;
+ /*
+ * Calculate an appropriate RSS hash for UDP and
+ * UDP Lite.
+ *
+ * The called function will take care of figuring out
+ * whether a 2-tuple or 4-tuple hash is required based
+ * on the currently configured scheme.
+ *
+ * Later later on connected socket values should be
+ * cached in the inpcb and reused, rather than constantly
+ * re-calculating it.
+ *
+ * UDP Lite is a different protocol number and will
+ * likely end up being hashed as a 2-tuple until
+ * RSS / NICs grow UDP Lite protocol awareness.
+ */
+ if (rss_proto_software_hash_v6(faddr, laddr, fport,
+ inp->inp_lport, pr, &hash_val, &hash_type) == 0) {
+ m->m_pkthdr.flowid = hash_val;
+ M_HASHTYPE_SET(m, hash_type);
}
-#endif
- flags = 0;
-#ifdef RSS
+
/*
* Don't override with the inp cached flowid.
*
@@ -932,28 +962,43 @@
flags |= IP_NODEFAULTFLOWID;
#endif
- if (nxt == IPPROTO_UDPLITE)
- UDPLITE_PROBE(send, NULL, inp, ip6, inp, udp6);
- else
- UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
- UDPSTAT_INC(udps_opackets);
- error = ip6_output(m, optp, &inp->inp_route6, flags,
- inp->in6p_moptions, NULL, inp);
- break;
- case AF_INET:
- error = EAFNOSUPPORT;
- goto release;
+ UDPSTAT_INC(udps_opackets);
+ if (unlock_udbinfo == UH_WLOCKED)
+ INP_HASH_WUNLOCK(pcbinfo);
+ else if (unlock_udbinfo == UH_RLOCKED)
+ INP_HASH_RUNLOCK_ET(pcbinfo, et);
+ if (nxt == IPPROTO_UDPLITE)
+ UDPLITE_PROBE(send, NULL, inp, ip6, inp, udp6);
+ else
+ UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
+ error = ip6_output(m, optp, &inp->inp_route6, flags,
+ inp->in6p_moptions, NULL, inp);
+ if (unlock_udbinfo == UH_WLOCKED)
+ INP_WUNLOCK(inp);
+ else
+ INP_RUNLOCK(inp);
+
+ if (control) {
+ ip6_clearpktopts(&opt, -1);
+ m_freem(control);
}
- goto releaseopt;
+ return (error);
release:
- m_freem(m);
-
-releaseopt:
+ if (unlock_udbinfo == UH_WLOCKED) {
+ INP_HASH_WUNLOCK(pcbinfo);
+ INP_WUNLOCK(inp);
+ } else if (unlock_udbinfo == UH_RLOCKED) {
+ INP_HASH_RUNLOCK_ET(pcbinfo, et);
+ INP_RUNLOCK(inp);
+ } else
+ INP_RUNLOCK(inp);
if (control) {
ip6_clearpktopts(&opt, -1);
m_freem(control);
}
+ m_freem(m);
+
return (error);
}
@@ -1257,15 +1302,8 @@
udp6_send(struct socket *so, int flags, struct mbuf *m,
struct sockaddr *addr, struct mbuf *control, struct thread *td)
{
- struct inpcb *inp;
- struct inpcbinfo *pcbinfo;
- int error = 0;
+ int error;
- pcbinfo = udp_get_inpcbinfo(so->so_proto->pr_protocol);
- inp = sotoinpcb(so);
- KASSERT(inp != NULL, ("udp6_send: inp == NULL"));
-
- INP_WLOCK(inp);
if (addr) {
if (addr->sa_len != sizeof(struct sockaddr_in6)) {
error = EINVAL;
@@ -1277,53 +1315,11 @@
}
}
-#ifdef INET
- if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
- int hasv4addr;
- struct sockaddr_in6 *sin6 = NULL;
+ return (udp6_output(so, flags, m, addr, control, td));
- if (addr == NULL)
- hasv4addr = (inp->inp_vflag & INP_IPV4);
- else {
- sin6 = (struct sockaddr_in6 *)addr;
- hasv4addr = IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)
- ? 1 : 0;
- }
- if (hasv4addr) {
- struct pr_usrreqs *pru;
- uint8_t nxt;
-
- nxt = (inp->inp_socket->so_proto->pr_protocol ==
- IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE;
- /*
- * XXXRW: We release UDP-layer locks before calling
- * udp_send() in order to avoid recursion. However,
- * this does mean there is a short window where inp's
- * fields are unstable. Could this lead to a
- * potential race in which the factors causing us to
- * select the UDPv4 output routine are invalidated?
- */
- INP_WUNLOCK(inp);
- if (sin6)
- in6_sin6_2_sin_in_sock(addr);
- pru = inetsw[ip_protox[nxt]].pr_usrreqs;
- /* addr will just be freed in sendit(). */
- return ((*pru->pru_send)(so, flags, m, addr, control,
- td));
- }
- }
-#endif
-#ifdef MAC
- mac_inpcb_create_mbuf(inp, m);
-#endif
- INP_HASH_WLOCK(pcbinfo);
- error = udp6_output(inp, m, addr, control, td);
- INP_HASH_WUNLOCK(pcbinfo);
- INP_WUNLOCK(inp);
- return (error);
-
bad:
- INP_WUNLOCK(inp);
+ if (control)
+ m_freem(control);
m_freem(m);
return (error);
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Oct 19, 10:11 PM (4 h, 4 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23944295
Default Alt Text
D3721.diff (14 KB)
Attached To
Mode
D3721: Improve UDP/IPv6 send performance
Attached
Detach File
Event Timeline
Log In to Comment