Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/ip_output.c
Show First 20 Lines • Show All 206 Lines • ▼ Show 20 Lines | if ((m->m_flags & M_IP_NEXTHOP) && | ||||
return -1; /* Reloop for CHANGE of dst */ | return -1; /* Reloop for CHANGE of dst */ | ||||
} | } | ||||
return 0; | return 0; | ||||
} | } | ||||
static int | static int | ||||
ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m, | ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m, | ||||
const struct sockaddr_in *gw, struct route *ro, bool stamp_tag) | const struct sockaddr *gw, struct route *ro, bool stamp_tag) | ||||
{ | { | ||||
#ifdef KERN_TLS | #ifdef KERN_TLS | ||||
struct ktls_session *tls = NULL; | struct ktls_session *tls = NULL; | ||||
#endif | #endif | ||||
struct m_snd_tag *mst; | struct m_snd_tag *mst; | ||||
int error; | int error; | ||||
MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); | MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | if (mst->ifp != ifp) { | ||||
goto done; | goto done; | ||||
} | } | ||||
/* stamp send tag on mbuf */ | /* stamp send tag on mbuf */ | ||||
m->m_pkthdr.snd_tag = m_snd_tag_ref(mst); | m->m_pkthdr.snd_tag = m_snd_tag_ref(mst); | ||||
m->m_pkthdr.csum_flags |= CSUM_SND_TAG; | m->m_pkthdr.csum_flags |= CSUM_SND_TAG; | ||||
} | } | ||||
error = (*ifp->if_output)(ifp, m, (const struct sockaddr *)gw, ro); | error = (*ifp->if_output)(ifp, m, gw, ro); | ||||
done: | done: | ||||
/* Check for route change invalidating send tags. */ | /* Check for route change invalidating send tags. */ | ||||
#ifdef KERN_TLS | #ifdef KERN_TLS | ||||
if (tls != NULL) { | if (tls != NULL) { | ||||
if (error == EAGAIN) | if (error == EAGAIN) | ||||
error = ktls_output_eagain(inp, tls); | error = ktls_output_eagain(inp, tls); | ||||
ktls_free(tls); | ktls_free(tls); | ||||
Show All 39 Lines | ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, | ||||
struct rm_priotracker in_ifa_tracker; | struct rm_priotracker in_ifa_tracker; | ||||
struct ip *ip; | struct ip *ip; | ||||
struct ifnet *ifp = NULL; /* keep compiler happy */ | struct ifnet *ifp = NULL; /* keep compiler happy */ | ||||
struct mbuf *m0; | struct mbuf *m0; | ||||
int hlen = sizeof (struct ip); | int hlen = sizeof (struct ip); | ||||
int mtu = 0; | int mtu = 0; | ||||
int error = 0; | int error = 0; | ||||
int vlan_pcp = -1; | int vlan_pcp = -1; | ||||
struct sockaddr_in *dst, sin; | struct sockaddr_in *dst; | ||||
const struct sockaddr_in *gw; | const struct sockaddr *gw; | ||||
struct in_ifaddr *ia = NULL; | struct in_ifaddr *ia = NULL; | ||||
struct in_addr src; | struct in_addr src; | ||||
int isbroadcast; | int isbroadcast; | ||||
uint16_t ip_len, ip_off; | uint16_t ip_len, ip_off; | ||||
struct route iproute; | |||||
uint32_t fibnum; | uint32_t fibnum; | ||||
#if defined(IPSEC) || defined(IPSEC_SUPPORT) | #if defined(IPSEC) || defined(IPSEC_SUPPORT) | ||||
int no_route_but_check_spd = 0; | int no_route_but_check_spd = 0; | ||||
#endif | #endif | ||||
M_ASSERTPKTHDR(m); | M_ASSERTPKTHDR(m); | ||||
NET_EPOCH_ASSERT(); | NET_EPOCH_ASSERT(); | ||||
Show All 35 Lines | #endif | ||||
/* | /* | ||||
* dst/gw handling: | * dst/gw handling: | ||||
* | * | ||||
* gw is readonly but can point either to dst OR rt_gateway, | * gw is readonly but can point either to dst OR rt_gateway, | ||||
* therefore we need restore gw if we're redoing lookup. | * therefore we need restore gw if we're redoing lookup. | ||||
*/ | */ | ||||
fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); | fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); | ||||
if (ro != NULL) | if (ro == NULL) { | ||||
ro = &iproute; | |||||
bzero(ro, sizeof (*ro)); | |||||
} | |||||
dst = (struct sockaddr_in *)&ro->ro_dst; | dst = (struct sockaddr_in *)&ro->ro_dst; | ||||
else | if (ro->ro_nh == NULL) { | ||||
dst = &sin; | |||||
if (ro == NULL || ro->ro_nh == NULL) { | |||||
bzero(dst, sizeof(*dst)); | |||||
dst->sin_family = AF_INET; | dst->sin_family = AF_INET; | ||||
dst->sin_len = sizeof(*dst); | dst->sin_len = sizeof(*dst); | ||||
dst->sin_addr = ip->ip_dst; | dst->sin_addr = ip->ip_dst; | ||||
} | } | ||||
gw = dst; | gw = (const struct sockaddr *)dst; | ||||
again: | again: | ||||
/* | /* | ||||
* Validate route against routing table additions; | * Validate route against routing table additions; | ||||
* a better/more specific route might have been added. | * a better/more specific route might have been added. | ||||
*/ | */ | ||||
if (inp != NULL && ro != NULL && ro->ro_nh != NULL) | if (inp != NULL && ro->ro_nh != NULL) | ||||
NH_VALIDATE(ro, &inp->inp_rt_cookie, fibnum); | NH_VALIDATE(ro, &inp->inp_rt_cookie, fibnum); | ||||
/* | /* | ||||
* If there is a cached route, | * If there is a cached route, | ||||
* check that it is to the same destination | * check that it is to the same destination | ||||
* and is still up. If not, free it and try again. | * and is still up. If not, free it and try again. | ||||
* The address family should also be checked in case of sharing the | * The address family should also be checked in case of sharing the | ||||
* cache with IPv6. | * cache with IPv6. | ||||
* Also check whether routing cache needs invalidation. | * Also check whether routing cache needs invalidation. | ||||
*/ | */ | ||||
if (ro != NULL && ro->ro_nh != NULL && | if (ro->ro_nh != NULL && | ||||
((!NH_IS_VALID(ro->ro_nh)) || dst->sin_family != AF_INET || | ((!NH_IS_VALID(ro->ro_nh)) || dst->sin_family != AF_INET || | ||||
dst->sin_addr.s_addr != ip->ip_dst.s_addr)) | dst->sin_addr.s_addr != ip->ip_dst.s_addr)) | ||||
RO_INVALIDATE_CACHE(ro); | RO_INVALIDATE_CACHE(ro); | ||||
ia = NULL; | ia = NULL; | ||||
/* | /* | ||||
* If routing to interface only, short circuit routing lookup. | * If routing to interface only, short circuit routing lookup. | ||||
* The use of an all-ones broadcast address implies this; an | * The use of an all-ones broadcast address implies this; an | ||||
* interface is specified by the broadcast address of an interface, | * interface is specified by the broadcast address of an interface, | ||||
Show All 40 Lines | if (flags & IP_SENDONES) { | ||||
mtu = ifp->if_mtu; | mtu = ifp->if_mtu; | ||||
IFP_TO_IA(ifp, ia, &in_ifa_tracker); | IFP_TO_IA(ifp, ia, &in_ifa_tracker); | ||||
isbroadcast = 0; /* fool gcc */ | isbroadcast = 0; /* fool gcc */ | ||||
/* Interface may have no addresses. */ | /* Interface may have no addresses. */ | ||||
if (ia != NULL) | if (ia != NULL) | ||||
src = IA_SIN(ia)->sin_addr; | src = IA_SIN(ia)->sin_addr; | ||||
else | else | ||||
src.s_addr = INADDR_ANY; | src.s_addr = INADDR_ANY; | ||||
} else if (ro != NULL) { | } else if (ro != &iproute) { | ||||
if (ro->ro_nh == NULL) { | if (ro->ro_nh == NULL) { | ||||
/* | /* | ||||
* We want to do any cloning requested by the link | * We want to do any cloning requested by the link | ||||
* layer, as this is probably required in all cases | * layer, as this is probably required in all cases | ||||
* for correct operation (as it is for ARP). | * for correct operation (as it is for ARP). | ||||
*/ | */ | ||||
uint32_t flowid; | uint32_t flowid; | ||||
flowid = m->m_pkthdr.flowid; | flowid = m->m_pkthdr.flowid; | ||||
Show All 16 Lines | #endif | ||||
} | } | ||||
struct nhop_object *nh = ro->ro_nh; | struct nhop_object *nh = ro->ro_nh; | ||||
ia = ifatoia(nh->nh_ifa); | ia = ifatoia(nh->nh_ifa); | ||||
ifp = nh->nh_ifp; | ifp = nh->nh_ifp; | ||||
counter_u64_add(nh->nh_pksent, 1); | counter_u64_add(nh->nh_pksent, 1); | ||||
rt_update_ro_flags(ro, nh); | rt_update_ro_flags(ro, nh); | ||||
if (nh->nh_flags & NHF_GATEWAY) | if (nh->nh_flags & NHF_GATEWAY) | ||||
gw = &nh->gw4_sa; | gw = &nh->gw_sa; | ||||
if (nh->nh_flags & NHF_HOST) | if (nh->nh_flags & NHF_HOST) | ||||
isbroadcast = (nh->nh_flags & NHF_BROADCAST); | isbroadcast = (nh->nh_flags & NHF_BROADCAST); | ||||
else if (ifp->if_flags & IFF_BROADCAST) | else if ((ifp->if_flags & IFF_BROADCAST) && (gw->sa_family == AF_INET)) | ||||
isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia); | isbroadcast = in_ifaddr_broadcast(((const struct sockaddr_in *)gw)->sin_addr, ia); | ||||
else | else | ||||
isbroadcast = 0; | isbroadcast = 0; | ||||
mtu = nh->nh_mtu; | mtu = nh->nh_mtu; | ||||
src = IA_SIN(ia)->sin_addr; | src = IA_SIN(ia)->sin_addr; | ||||
} else { | } else { | ||||
struct nhop_object *nh; | struct nhop_object *nh; | ||||
nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_NONE, | nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_NONE, | ||||
m->m_pkthdr.flowid); | m->m_pkthdr.flowid); | ||||
if (nh == NULL) { | if (nh == NULL) { | ||||
#if defined(IPSEC) || defined(IPSEC_SUPPORT) | #if defined(IPSEC) || defined(IPSEC_SUPPORT) | ||||
/* | /* | ||||
* There is no route for this packet, but it is | * There is no route for this packet, but it is | ||||
* possible that a matching SPD entry exists. | * possible that a matching SPD entry exists. | ||||
*/ | */ | ||||
no_route_but_check_spd = 1; | no_route_but_check_spd = 1; | ||||
goto sendit; | goto sendit; | ||||
#endif | #endif | ||||
IPSTAT_INC(ips_noroute); | IPSTAT_INC(ips_noroute); | ||||
error = EHOSTUNREACH; | error = EHOSTUNREACH; | ||||
goto bad; | goto bad; | ||||
} | } | ||||
ifp = nh->nh_ifp; | ifp = nh->nh_ifp; | ||||
mtu = nh->nh_mtu; | mtu = nh->nh_mtu; | ||||
/* | rt_update_ro_flags(ro, nh); | ||||
* We are rewriting here dst to be gw actually, contradicting | |||||
* comment at the beginning of the function. However, in this | |||||
* case we are always dealing with on stack dst. | |||||
* In case if pfil(9) sends us back to beginning of the | |||||
* function, the dst would be rewritten by ip_output_pfil(). | |||||
*/ | |||||
MPASS(dst == &sin); | |||||
if (nh->nh_flags & NHF_GATEWAY) | if (nh->nh_flags & NHF_GATEWAY) | ||||
melifaro: Given `rt_update_ro_flags()` is static, it's probably worth just updating its signature to… | |||||
dst->sin_addr = nh->gw4_sa.sin_addr; | gw = &nh->gw_sa; | ||||
ia = ifatoia(nh->nh_ifa); | ia = ifatoia(nh->nh_ifa); | ||||
src = IA_SIN(ia)->sin_addr; | src = IA_SIN(ia)->sin_addr; | ||||
Done Inline ActionsWhy not have a pointer to the nh->gw_sa instead of copying? melifaro: Why not have a pointer to the nh->gw_sa instead of copying? | |||||
isbroadcast = (((nh->nh_flags & (NHF_HOST | NHF_BROADCAST)) == | isbroadcast = (((nh->nh_flags & (NHF_HOST | NHF_BROADCAST)) == | ||||
(NHF_HOST | NHF_BROADCAST)) || | (NHF_HOST | NHF_BROADCAST)) || | ||||
Not Done Inline Actionsnh_ifa gets selected during the route addition. I don't remember the code there but it should reject a route if no ifa is found (and we should be able to find the loopback address). It also should represent IPv4 ifa. We don't need these checks in the datapath, probably worth just enforcing the control plane code. melifaro: nh_ifa gets selected during the route addition. I don't remember the code there but it should… | |||||
Done Inline ActionsCurrently for IPv4 stack the source address selection is simple, but for some cases such as unnumbered interface, interface has only IPv6 addresses or IPv4 link-local addresses eg., it does not work greatly. It is known that ip_output and icmp_reflect are affected. As the issue exists before this feature, I'm planning to fix it in a separate diff. zlei: Currently for IPv4 stack the source address selection is simple, but for some cases such as… | |||||
((ifp->if_flags & IFF_BROADCAST) && | ((ifp->if_flags & IFF_BROADCAST) && | ||||
in_ifaddr_broadcast(dst->sin_addr, ia))); | (gw->sa_family == AF_INET) && | ||||
in_ifaddr_broadcast(((const struct sockaddr_in *)gw)->sin_addr, ia))); | |||||
} | } | ||||
/* Catch a possible divide by zero later. */ | /* Catch a possible divide by zero later. */ | ||||
KASSERT(mtu > 0, ("%s: mtu %d <= 0, ro=%p (nh_flags=0x%08x) ifp=%p", | KASSERT(mtu > 0, ("%s: mtu %d <= 0, ro=%p (nh_flags=0x%08x) ifp=%p", | ||||
__func__, mtu, ro, | __func__, mtu, ro, | ||||
(ro != NULL && ro->ro_nh != NULL) ? ro->ro_nh->nh_flags : 0, ifp)); | (ro != NULL && ro->ro_nh != NULL) ? ro->ro_nh->nh_flags : 0, ifp)); | ||||
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { | if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { | ||||
m->m_flags |= M_MCAST; | m->m_flags |= M_MCAST; | ||||
/* | /* | ||||
* IP destination address is multicast. Make sure "gw" | * IP destination address is multicast. Make sure "gw" | ||||
* still points to the address in "ro". (It may have been | * still points to the address in "ro". (It may have been | ||||
* changed to point to a gateway address, above.) | * changed to point to a gateway address, above.) | ||||
*/ | */ | ||||
gw = dst; | gw = (const struct sockaddr *)dst; | ||||
/* | /* | ||||
* See if the caller provided any multicast options | * See if the caller provided any multicast options | ||||
*/ | */ | ||||
if (imo != NULL) { | if (imo != NULL) { | ||||
ip->ip_ttl = imo->imo_multicast_ttl; | ip->ip_ttl = imo->imo_multicast_ttl; | ||||
if (imo->imo_multicast_vif != -1) | if (imo->imo_multicast_vif != -1) | ||||
ip->ip_src.s_addr = | ip->ip_src.s_addr = | ||||
ip_mcast_src ? | ip_mcast_src ? | ||||
▲ Show 20 Lines • Show All 143 Lines • ▼ Show 20 Lines | case 0: /* Continue normally */ | ||||
break; | break; | ||||
case -1: /* Need to try again */ | case -1: /* Need to try again */ | ||||
/* Reset everything for a new round */ | /* Reset everything for a new round */ | ||||
if (ro != NULL) { | if (ro != NULL) { | ||||
RO_NHFREE(ro); | RO_NHFREE(ro); | ||||
ro->ro_prepend = NULL; | ro->ro_prepend = NULL; | ||||
} | } | ||||
gw = dst; | gw = (const struct sockaddr *)dst; | ||||
ip = mtod(m, struct ip *); | ip = mtod(m, struct ip *); | ||||
goto again; | goto again; | ||||
} | } | ||||
} | } | ||||
if (vlan_pcp > -1) | if (vlan_pcp > -1) | ||||
EVL_APPLY_PRI(m, vlan_pcp); | EVL_APPLY_PRI(m, vlan_pcp); | ||||
▲ Show 20 Lines • Show All 900 Lines • Show Last 20 Lines |
Given rt_update_ro_flags() is static, it's probably worth just updating its signature to include nh parameter