Changeset View
Standalone View
sys/netinet/ip_fastfwd.c
Show First 20 Lines • Show All 54 Lines • ▼ Show 20 Lines | |||||||||||||||||||||||
* IPSEC is not supported if this host is a tunnel broker. IPSEC is | * IPSEC is not supported if this host is a tunnel broker. IPSEC is | ||||||||||||||||||||||
* supported for connections to/from local host. | * supported for connections to/from local host. | ||||||||||||||||||||||
* | * | ||||||||||||||||||||||
* We try to do the least expensive (in CPU ops) checks and operations | * We try to do the least expensive (in CPU ops) checks and operations | ||||||||||||||||||||||
* first to catch junk with as little overhead as possible. | * first to catch junk with as little overhead as possible. | ||||||||||||||||||||||
* | * | ||||||||||||||||||||||
* We take full advantage of hardware support for IP checksum and | * We take full advantage of hardware support for IP checksum and | ||||||||||||||||||||||
* fragmentation offloading. | * fragmentation offloading. | ||||||||||||||||||||||
* | |||||||||||||||||||||||
* We don't do ICMP redirect in the fast forwarding path. I have had my own | |||||||||||||||||||||||
* cases where two core routers with Zebra routing suite would send millions | |||||||||||||||||||||||
* ICMP redirects to connected hosts if the destination router was not the | |||||||||||||||||||||||
* default gateway. In one case it was filling the routing table of a host | |||||||||||||||||||||||
* with approximately 300.000 cloned redirect entries until it ran out of | |||||||||||||||||||||||
* kernel memory. However the networking code proved very robust and it didn't | |||||||||||||||||||||||
* crash or fail in other ways. | |||||||||||||||||||||||
*/ | */ | ||||||||||||||||||||||
/* | /* | ||||||||||||||||||||||
* Many thanks to Matt Thomas of NetBSD for basic structure of ip_flow.c which | * Many thanks to Matt Thomas of NetBSD for basic structure of ip_flow.c which | ||||||||||||||||||||||
* is being followed here. | * is being followed here. | ||||||||||||||||||||||
*/ | */ | ||||||||||||||||||||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||||||||||||||||||||
Show All 30 Lines | |||||||||||||||||||||||
#include <netinet/ip_icmp.h> | #include <netinet/ip_icmp.h> | ||||||||||||||||||||||
#include <netinet/ip_options.h> | #include <netinet/ip_options.h> | ||||||||||||||||||||||
#include <machine/in_cksum.h> | #include <machine/in_cksum.h> | ||||||||||||||||||||||
#define V_ipsendredirects VNET(ipsendredirects) | #define V_ipsendredirects VNET(ipsendredirects) | ||||||||||||||||||||||
static struct mbuf * | static struct mbuf * | ||||||||||||||||||||||
ip_redir_alloc(struct mbuf *m, struct nhop_object *nh, | ip_redir_alloc(struct mbuf *m, struct nhop_object *nh, u_short ip_len, | ||||||||||||||||||||||
struct ip *ip, in_addr_t *addr) | struct in_addr *osrc, struct in_addr *newgw) | ||||||||||||||||||||||
{ | { | ||||||||||||||||||||||
struct mbuf *mcopy = m_gethdr(M_NOWAIT, m->m_type); | struct in_ifaddr *nh_ia; | ||||||||||||||||||||||
struct mbuf *mcopy; | |||||||||||||||||||||||
KASSERT(nh != NULL, ("%s: m %p nh is NULL\n", __func__, m)); | |||||||||||||||||||||||
/* | |||||||||||||||||||||||
* Only send a redirect if: | |||||||||||||||||||||||
* - Redirects are not disabled (must be checked by caller), | |||||||||||||||||||||||
* - We have not applied NAT (must be checked by caller as possible), | |||||||||||||||||||||||
* - Neither a MCAST or BCAST packet (must be checked by caller) | |||||||||||||||||||||||
* [RFC1009 Appendix A.2]. | |||||||||||||||||||||||
* - The packet does not do IP source routing or having any other | |||||||||||||||||||||||
* IP options (this case was handled already by ip_input() calling | |||||||||||||||||||||||
* ip_dooptions() [RFC792, p13], | |||||||||||||||||||||||
* - The packet is being forwarded out the same physical interface | |||||||||||||||||||||||
* that it was received from [RFC1812, 5.2.7.2]. | |||||||||||||||||||||||
*/ | |||||||||||||||||||||||
/* | |||||||||||||||||||||||
* - The forwarding route was not created by a redirect | |||||||||||||||||||||||
* [RFC1812, 5.2.7.2], or | |||||||||||||||||||||||
* if it was to follow a default route (see below). | |||||||||||||||||||||||
* - The next-hop is reachable by us [RFC1009 Appendix A.2]. | |||||||||||||||||||||||
*/ | |||||||||||||||||||||||
melifaro: Could you elaborate on why this is flawed? I'm not sure what is the problem with ROUTE_MPATH. | |||||||||||||||||||||||
Done Inline ActionsWell at the moment there is no full first-hop mpath so that's the end of this there yet. epair2a: flags=8863<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> metric 0 mtu 1500 options=8<VLAN_MTU> inet 198.51.100.1 netmask 0xffffff00 broadcast 198.51.100.255 epair4a: flags=8863<UP,BROADCAST,RUNNING,SIMPLEX,MULTICAST> metric 0 mtu 1500 options=8<VLAN_MTU> inet 198.51.100.10 netmask 0xffffff00 broadcast 198.51.100.255 Historically, while this was possible, the "alias" on the second interface would have been a /32 (the routing code apparently still does, see below) but with MPATH that is not actually needed anymore as we can have routes with multiple next-hop (interfaces/links). Your routing looking like this: # netstat -rn4 Routing tables Internet: Destination Gateway Flags Netif Expire default 203.0.113.254 UGS epair5a 127.0.0.1 link#1 UH lo0 192.0.2.0/24 198.51.100.2 UGS epair2a 198.18.0.0/15 198.51.100.5 UGS epair2a 198.51.100.0/24 link#2 U epair2a 198.51.100.1 link#2 UHS lo0 198.51.100.10 link#3 UHS lo0 203.0.113.0/24 link#4 U epair5a 203.0.113.1 link#4 UHS lo0 Host S sends a packet from 198.51.100.2 to 198.18.7.42 via 198.51.100.10 (either because of a static route or an MPATH route weighted between .1 and .10 as next-hops). Now the the routing table currently does not reflect that either. And we are still talking about the same subnet on multiple interfaces here (compared to multiple logical subnets on a single interface which is mentioned in the NB further down and currently not considered). bz: Well at the moment there is no full first-hop mpath so that's the end of this there yet. | |||||||||||||||||||||||
Done Inline ActionsThank you for the detailed explanation! Let me try to describe my perception of the multipath for the interface routes and redirects. Firstly, the setup is, well, weird. You should not have a case when the same L2 domain is reachable via multiple L3 routed interfaces. Typically you solve such cases by using lagg (increasing capacity, doing load balancing/failover) or bridge (figuring which devices are behind which port and doing L2 stiching). Without those it is harder for the system to decide which interface to use for outgoing traffic in the general case, w/o some aid from the firewall. Though, the support for such setup has been added to Linux a while ago (before 2.6.17 IIRC). The feature requires tweaking ARP stack behaviour (arp_filter=1). I've spent some time searching for the usecases. My impression is that the vast majority of the usecases revolves around splitting the L2 domains without actualy splitting the prefix. The classic example is attaching multiple VMs to the same /24 network, without using a bridge. Similarly, from what I see in Linux code, is that they always require src==dst to trigger redirect message: net/ipv4/route.c With all that in mind, I'd vote to make a simpler implementation and not handle such use case in any specific way. melifaro: Thank you for the detailed explanation!
Let me try to describe my perception of the multipath… | |||||||||||||||||||||||
Done Inline ActionsIn that case (modulo the comments and #if 0 block) the current implementation should be what you want? bz: In that case (modulo the comments and #if 0 block) the current implementation should be what… | |||||||||||||||||||||||
if ((nh->nh_flags & (NHF_DEFAULT | NHF_REDIRECT | | |||||||||||||||||||||||
NHF_BLACKHOLE | NHF_REJECT)) != 0) | |||||||||||||||||||||||
return (NULL); | |||||||||||||||||||||||
/* Get the new gateway. */ | |||||||||||||||||||||||
if ((nh->nh_flags & NHF_GATEWAY) == 0 || nh->gw_sa.sa_family != AF_INET) | |||||||||||||||||||||||
return (NULL); | |||||||||||||||||||||||
newgw->s_addr = nh->gw4_sa.sin_addr.s_addr; | |||||||||||||||||||||||
/* | |||||||||||||||||||||||
* - The resulting forwarding destination is not "This host on this | |||||||||||||||||||||||
* network" [RFC1122, Section 3.2.1.3] (default route check above). | |||||||||||||||||||||||
*/ | |||||||||||||||||||||||
if (newgw->s_addr == 0) | |||||||||||||||||||||||
return (NULL); | |||||||||||||||||||||||
/* | |||||||||||||||||||||||
* - We know how to reach the sender and the source address is | |||||||||||||||||||||||
Done Inline ActionsNit: shouldn't this be AF_INET rather than PF_INET? karels: Nit: shouldn't this be AF_INET rather than PF_INET? | |||||||||||||||||||||||
* directly connected to us [RFC792, p13]. | |||||||||||||||||||||||
* + The new gateway address and the source address are on the same | |||||||||||||||||||||||
* subnet [RFC1009 Appendix A.2, RFC1122 3.2.2.2, RFC1812, 5.2.7.2]. | |||||||||||||||||||||||
* NB: if you think multiple logical subnets on the same wire should | |||||||||||||||||||||||
* receive redirects read [RFC1812, APPENDIX C (14->15)]. | |||||||||||||||||||||||
*/ | |||||||||||||||||||||||
nh_ia = (struct in_ifaddr *)nh->nh_ifa; | |||||||||||||||||||||||
if ((ntohl(osrc->s_addr) & nh_ia->ia_subnetmask) != nh_ia->ia_subnet) | |||||||||||||||||||||||
return (NULL); | |||||||||||||||||||||||
/* Prepare for sending the redirect. */ | |||||||||||||||||||||||
/* | |||||||||||||||||||||||
* Make a copy of as much as we need of the packet as the original | |||||||||||||||||||||||
* one will be forwarded but we need (a portion) for icmp_error(). | |||||||||||||||||||||||
*/ | |||||||||||||||||||||||
mcopy = m_gethdr(M_NOWAIT, m->m_type); | |||||||||||||||||||||||
Done Inline ActionsActually (14) but mis-numbered in the RFC. bz: Actually (14) but mis-numbered in the RFC. | |||||||||||||||||||||||
Done Inline Actions
Nit: worth cleaning up this part as well. melifaro: Nit: worth cleaning up this part as well. | |||||||||||||||||||||||
if (mcopy == NULL) | if (mcopy == NULL) | ||||||||||||||||||||||
return (NULL); | return (NULL); | ||||||||||||||||||||||
Done Inline ActionsI'm not sure if this is correct. It is possible (but not straight-forwarded ATM) to specify a preferred source address that (1) does not belong to the transmit interface OR (2) belongs to the same interface but is not on the same subnet as the gateway. melifaro: I'm not sure if this is correct. It is possible (but not straight-forwarded ATM) to specify a… | |||||||||||||||||||||||
Done Inline ActionsYou still assume there is only 1 interface? bz: You still assume there is only 1 interface? | |||||||||||||||||||||||
if (m_dup_pkthdr(mcopy, m, M_NOWAIT) == 0) { | if (m_dup_pkthdr(mcopy, m, M_NOWAIT) == 0) { | ||||||||||||||||||||||
/* | /* | ||||||||||||||||||||||
* It's probably ok if the pkthdr dup fails (because | * It's probably ok if the pkthdr dup fails (because | ||||||||||||||||||||||
* the deep copy of the tag chain failed), but for now | * the deep copy of the tag chain failed), but for now | ||||||||||||||||||||||
* be conservative and just discard the copy since | * be conservative and just discard the copy since | ||||||||||||||||||||||
* code below may some day want the tags. | * code below may some day want the tags. | ||||||||||||||||||||||
*/ | */ | ||||||||||||||||||||||
m_free(mcopy); | m_free(mcopy); | ||||||||||||||||||||||
return (NULL); | return (NULL); | ||||||||||||||||||||||
} | } | ||||||||||||||||||||||
mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy)); | mcopy->m_len = min(ip_len, M_TRAILINGSPACE(mcopy)); | ||||||||||||||||||||||
mcopy->m_pkthdr.len = mcopy->m_len; | mcopy->m_pkthdr.len = mcopy->m_len; | ||||||||||||||||||||||
m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); | m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); | ||||||||||||||||||||||
if (nh != NULL && | |||||||||||||||||||||||
((nh->nh_flags & (NHF_REDIRECT|NHF_DEFAULT)) == 0)) { | |||||||||||||||||||||||
struct in_ifaddr *nh_ia = (struct in_ifaddr *)(nh->nh_ifa); | |||||||||||||||||||||||
u_long src = ntohl(ip->ip_src.s_addr); | |||||||||||||||||||||||
if (nh_ia != NULL && | |||||||||||||||||||||||
(src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) { | |||||||||||||||||||||||
if (nh->nh_flags & NHF_GATEWAY) | |||||||||||||||||||||||
*addr = nh->gw4_sa.sin_addr.s_addr; | |||||||||||||||||||||||
else | |||||||||||||||||||||||
Done Inline ActionsThis "else" case probably was never needed/fully correct from day one of the code entering CSRG and we should not send a redirect with that as new gateway as it would only happen as a result of a bug IP stack of the source hosts to my understanding; discussed with @karels in private email 2021-12-06. Should still update ip_forward() with that change or rather call ip_redir_alloc() there; will do that separately as the code there will at least not send a redirect if it should not. bz: This "else" case probably was never needed/fully correct from day one of the code entering CSRG… | |||||||||||||||||||||||
*addr = ip->ip_dst.s_addr; | |||||||||||||||||||||||
} | |||||||||||||||||||||||
} | |||||||||||||||||||||||
return (mcopy); | return (mcopy); | ||||||||||||||||||||||
} | } | ||||||||||||||||||||||
static int | static int | ||||||||||||||||||||||
ip_findroute(struct nhop_object **pnh, struct in_addr dest, struct mbuf *m) | ip_findroute(struct nhop_object **pnh, struct in_addr dest, struct mbuf *m) | ||||||||||||||||||||||
{ | { | ||||||||||||||||||||||
struct nhop_object *nh; | struct nhop_object *nh; | ||||||||||||||||||||||
Show All 14 Lines | if ((nh->nh_flags & (NHF_BLACKHOLE | NHF_BROADCAST)) != 0) { | ||||||||||||||||||||||
m_freem(m); | m_freem(m); | ||||||||||||||||||||||
return (EHOSTUNREACH); | return (EHOSTUNREACH); | ||||||||||||||||||||||
} | } | ||||||||||||||||||||||
if (nh->nh_flags & NHF_REJECT) { | if (nh->nh_flags & NHF_REJECT) { | ||||||||||||||||||||||
IPSTAT_INC(ips_cantforward); | IPSTAT_INC(ips_cantforward); | ||||||||||||||||||||||
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); | icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); | ||||||||||||||||||||||
return (EHOSTUNREACH); | return (EHOSTUNREACH); | ||||||||||||||||||||||
} | } | ||||||||||||||||||||||
Done Inline Actions
And also a secondary route table lookup can be eliminated. zlei: > If SNAT is applied by pfil(9) and the source address of the packet is changed, then ICMP… | |||||||||||||||||||||||
*pnh = nh; | *pnh = nh; | ||||||||||||||||||||||
return (0); | return (0); | ||||||||||||||||||||||
} | } | ||||||||||||||||||||||
/* | /* | ||||||||||||||||||||||
* Try to forward a packet based on the destination address. | * Try to forward a packet based on the destination address. | ||||||||||||||||||||||
* This is a fast path optimized for the plain forwarding case. | * This is a fast path optimized for the plain forwarding case. | ||||||||||||||||||||||
* If the packet is handled (and consumed) here then we return NULL; | * If the packet is handled (and consumed) here then we return NULL; | ||||||||||||||||||||||
* otherwise mbuf is returned and the packet should be delivered | * otherwise mbuf is returned and the packet should be delivered | ||||||||||||||||||||||
* to ip_input for full processing. | * to ip_input for full processing. | ||||||||||||||||||||||
*/ | */ | ||||||||||||||||||||||
struct mbuf * | struct mbuf * | ||||||||||||||||||||||
ip_tryforward(struct mbuf *m) | ip_tryforward(struct mbuf *m) | ||||||||||||||||||||||
{ | { | ||||||||||||||||||||||
struct ip *ip; | struct ip *ip; | ||||||||||||||||||||||
struct mbuf *m0 = NULL; | struct mbuf *m0 = NULL; | ||||||||||||||||||||||
struct nhop_object *nh = NULL; | struct nhop_object *nh = NULL; | ||||||||||||||||||||||
struct route ro; | struct route ro; | ||||||||||||||||||||||
struct sockaddr_in *dst; | struct sockaddr_in *dst; | ||||||||||||||||||||||
const struct sockaddr *gw; | const struct sockaddr *gw; | ||||||||||||||||||||||
Done Inline Actionsextra space to remove bz: extra space to remove | |||||||||||||||||||||||
struct in_addr dest, odest, rtdest; | struct in_addr dest, odest, rtdest, osrc; | ||||||||||||||||||||||
uint16_t ip_len, ip_off; | uint16_t ip_len, ip_off; | ||||||||||||||||||||||
int error = 0; | int error = 0; | ||||||||||||||||||||||
struct m_tag *fwd_tag = NULL; | struct m_tag *fwd_tag = NULL; | ||||||||||||||||||||||
struct mbuf *mcopy = NULL; | struct mbuf *mcopy = NULL; | ||||||||||||||||||||||
struct in_addr redest; | struct in_addr redest; | ||||||||||||||||||||||
/* | /* | ||||||||||||||||||||||
* Are we active and forwarding packets? | * Are we active and forwarding packets? | ||||||||||||||||||||||
*/ | */ | ||||||||||||||||||||||
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines | #endif | ||||||||||||||||||||||
IPSTAT_INC(ips_total); | IPSTAT_INC(ips_total); | ||||||||||||||||||||||
/* | /* | ||||||||||||||||||||||
* Step 3: incoming packet firewall processing | * Step 3: incoming packet firewall processing | ||||||||||||||||||||||
*/ | */ | ||||||||||||||||||||||
odest.s_addr = dest.s_addr = ip->ip_dst.s_addr; | odest.s_addr = dest.s_addr = ip->ip_dst.s_addr; | ||||||||||||||||||||||
osrc.s_addr = ip->ip_src.s_addr; | |||||||||||||||||||||||
/* | /* | ||||||||||||||||||||||
* Run through list of ipfilter hooks for input packets | * Run through list of ipfilter hooks for input packets | ||||||||||||||||||||||
*/ | */ | ||||||||||||||||||||||
if (!PFIL_HOOKED_IN(V_inet_pfil_head)) | if (!PFIL_HOOKED_IN(V_inet_pfil_head)) | ||||||||||||||||||||||
goto passin; | goto passin; | ||||||||||||||||||||||
if (pfil_run_hooks(V_inet_pfil_head, &m, m->m_pkthdr.rcvif, PFIL_IN, | if (pfil_run_hooks(V_inet_pfil_head, &m, m->m_pkthdr.rcvif, PFIL_IN, | ||||||||||||||||||||||
▲ Show 20 Lines • Show All 144 Lines • ▼ Show 20 Lines | passout: | ||||||||||||||||||||||
dst->sin_len = sizeof(*dst); | dst->sin_len = sizeof(*dst); | ||||||||||||||||||||||
dst->sin_addr = dest; | dst->sin_addr = dest; | ||||||||||||||||||||||
if (nh->nh_flags & NHF_GATEWAY) { | if (nh->nh_flags & NHF_GATEWAY) { | ||||||||||||||||||||||
gw = &nh->gw_sa; | gw = &nh->gw_sa; | ||||||||||||||||||||||
ro.ro_flags |= RT_HAS_GW; | ro.ro_flags |= RT_HAS_GW; | ||||||||||||||||||||||
} else | } else | ||||||||||||||||||||||
gw = (const struct sockaddr *)dst; | gw = (const struct sockaddr *)dst; | ||||||||||||||||||||||
/* | /* Handle redirect case. */ | ||||||||||||||||||||||
* Handle redirect case. | |||||||||||||||||||||||
*/ | |||||||||||||||||||||||
redest.s_addr = 0; | redest.s_addr = 0; | ||||||||||||||||||||||
if (V_ipsendredirects && (nh->nh_ifp == m->m_pkthdr.rcvif) && | if (V_ipsendredirects && osrc.s_addr == ip->ip_src.s_addr && | ||||||||||||||||||||||
gw->sa_family == AF_INET) | nh->nh_ifp == m->m_pkthdr.rcvif) | ||||||||||||||||||||||
Done Inline ActionsIf SNAT is applied by pfil(9) and the source address of the packet is changed, then ICMP redirect to original source seems causing side effects. I'd prefer do NOT send ICMP redirect in this case. zlei: If SNAT is applied by pfil(9) and the source address of the packet is changed, then ICMP… | |||||||||||||||||||||||
Done Inline ActionsProbably a good check to add; we hadn't dealt with that in the past and I only made the original address (as much as we can still tell) the reference point. I'll update the patch in a few minutes. bz: Probably a good check to add; we hadn't dealt with that in the past and I only made the… | |||||||||||||||||||||||
Done Inline Actions
I did not realize the SNAT issue before I see the new local variable osrc . NAT and related scenes:
As the original source and destination hosts do not know the transparent translation( NAT / PAT , etc. ), an ICMP redirect would prevent the source host sending following packets through this firewall and thus the connection flow is disturbed. Maybe we leave a TODO here to emphasis the side effects. CC @kp zlei: > Probably a good check to add; we hadn't dealt with that in the past and I only made the… | |||||||||||||||||||||||
Done Inline Actions
Should be nh->nh_ifp == m->m_pkthdr.rcvif IIUC. zlei: Should be `nh->nh_ifp == m->m_pkthdr.rcvif` IIUC. | |||||||||||||||||||||||
mcopy = ip_redir_alloc(m, nh, ip, &redest.s_addr); | mcopy = ip_redir_alloc(m, nh, ip_len, &osrc, &redest); | ||||||||||||||||||||||
Done Inline ActionsI'd really love to see the interface check being the second one instead of src check, so we don't call redir alloc on every packet. melifaro: I'd really love to see the interface check being the second one instead of src check, so we… | |||||||||||||||||||||||
Done Inline ActionsSuggestion: I'll move that change here for the moment and we stay strictly RFC compliant and rather handle some more packets in/out than sending the redirect for now; if you fix the mpath code, then you can re-consider this? bz: Suggestion: I'll move that change here for the moment and we stay strictly RFC compliant and… | |||||||||||||||||||||||
/* | /* | ||||||||||||||||||||||
* Check if packet fits MTU or if hardware will fragment for us | * Check if packet fits MTU or if hardware will fragment for us | ||||||||||||||||||||||
*/ | */ | ||||||||||||||||||||||
if (ip_len <= nh->nh_mtu) { | if (ip_len <= nh->nh_mtu) { | ||||||||||||||||||||||
/* | /* | ||||||||||||||||||||||
* Avoid confusing lower layers. | * Avoid confusing lower layers. | ||||||||||||||||||||||
*/ | */ | ||||||||||||||||||||||
m_clrprotoflags(m); | m_clrprotoflags(m); | ||||||||||||||||||||||
Done Inline Actions
This creates extra lookup on every packet even if redirects are disabled. Many disable them in production environments. Is it possible to code like this? glebius: This creates extra lookup on every packet even if redirects are disabled. Many disable them in… | |||||||||||||||||||||||
Done Inline ActionsAgreed on the per-packet which was there before the code was rewritten, broken, and a broken fix added I believe. I really hate embedded lookups like this as they are just messy lines. So put it all behind if (V_ipsendredirects) { ... } as the ifp comparison should go away in the long way (in both cases here and ip_forward) as indicated in the comment above and then probably also change the order to do all non-lookup code first before doing the actual lookup? It's just all details which went wrong over multiple iterations since at least stable/11. Ideally I'd also not have a splattered V_ipsendredirects check in multiple files but that would mean incurring a function call and that is worse. bz: Agreed on the per-packet which was there before the code was rewritten, broken, and a broken… | |||||||||||||||||||||||
Done Inline ActionsThe idea behind ip_redir_alloc() is simple - the situation when we need to allocate mbuf / send redirect is a corner case, hence we move it away to not bloat the main function /icache. I'd rather suggest keeping the current condition logic inside ip_forward() as is and implement the tweaked business logic inside ip_redir_alloc(), simply returning NULL when sending redirect is not required. melifaro: The idea behind `ip_redir_alloc()` is simple - the situation when we need to allocate mbuf /… | |||||||||||||||||||||||
Done Inline ActionsI have that separate ip_redir_alloc() logic but not tested yet; and that won't happen anymore tonight; so I'll send it tomorrow; What I meant with ip_redir_alloc() is not the duplication of the mbuf parts, which is understaood; if is the 2nd bit to decide which "new Gateway Internet Address" to report back; we don't set it in many cases and still report back the mcopy as to trigger the redirect; also both ip_redir_alloc() and ip_forward() set the new Gateway Internet Address to either the gateway from the lookup or the destination address of the packet which makes not much sense as it can be at the other end of the world 15 hops away; it further isn't always the original address anymore anyway in the current code, so we may send a redirect to the address of www.freebsd.org for what it could matter; hence my comments about historically grown misbehaviour (unless I totally misunderstand what the code is supposed to do). bz: I have that separate ip_redir_alloc() logic but not tested yet; and that won't happen anymore… | |||||||||||||||||||||||
Done Inline Actions> if (V_ipsendredirects) { > ... > } That will also work! glebius: ```
> if (V_ipsendredirects) {
> ...
> }
```
That will also work! | |||||||||||||||||||||||
/* | /* | ||||||||||||||||||||||
* Send off the packet via outgoing interface | * Send off the packet via outgoing interface | ||||||||||||||||||||||
*/ | */ | ||||||||||||||||||||||
IP_PROBE(send, NULL, NULL, ip, nh->nh_ifp, ip, NULL); | IP_PROBE(send, NULL, NULL, ip, nh->nh_ifp, ip, NULL); | ||||||||||||||||||||||
error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m, gw, &ro); | error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m, gw, &ro); | ||||||||||||||||||||||
} else { | } else { | ||||||||||||||||||||||
/* | /* | ||||||||||||||||||||||
* Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery | * Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery | ||||||||||||||||||||||
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | passout: | ||||||||||||||||||||||
else { | else { | ||||||||||||||||||||||
IPSTAT_INC(ips_forward); | IPSTAT_INC(ips_forward); | ||||||||||||||||||||||
IPSTAT_INC(ips_fastforward); | IPSTAT_INC(ips_fastforward); | ||||||||||||||||||||||
} | } | ||||||||||||||||||||||
/* Send required redirect */ | /* Send required redirect */ | ||||||||||||||||||||||
if (mcopy != NULL) { | if (mcopy != NULL) { | ||||||||||||||||||||||
icmp_error(mcopy, ICMP_REDIRECT, ICMP_REDIRECT_HOST, redest.s_addr, 0); | icmp_error(mcopy, ICMP_REDIRECT, ICMP_REDIRECT_HOST, redest.s_addr, 0); | ||||||||||||||||||||||
mcopy = NULL; /* Freed by caller */ | mcopy = NULL; /* Was consumed by callee. */ | ||||||||||||||||||||||
} | } | ||||||||||||||||||||||
consumed: | consumed: | ||||||||||||||||||||||
if (mcopy != NULL) | if (mcopy != NULL) | ||||||||||||||||||||||
m_freem(mcopy); | m_freem(mcopy); | ||||||||||||||||||||||
return NULL; | return NULL; | ||||||||||||||||||||||
drop: | drop: | ||||||||||||||||||||||
if (m) | if (m) | ||||||||||||||||||||||
m_freem(m); | m_freem(m); | ||||||||||||||||||||||
return NULL; | return NULL; | ||||||||||||||||||||||
} | } |
Could you elaborate on why this is flawed? I'm not sure what is the problem with ROUTE_MPATH.