Index: head/sys/net/debugnet.c =================================================================== --- head/sys/net/debugnet.c (revision 360348) +++ head/sys/net/debugnet.c (revision 360349) @@ -1,1071 +1,1072 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2019 Isilon Systems, LLC. * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved. * Copyright (c) 2000 Darrell Anderson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_inet.h" #include #include #include #include #include #include #include #ifdef DDB #include #include #endif #include #include #include #include #include #include +#include +#include #include +#include #include #include #include #include #include #include #include #include #include #include #define DEBUGNET_INTERNAL #include FEATURE(debugnet, "Debugnet support"); SYSCTL_NODE(_net, OID_AUTO, debugnet, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "debugnet parameters"); unsigned debugnet_debug; SYSCTL_UINT(_net_debugnet, OID_AUTO, debug, CTLFLAG_RWTUN, &debugnet_debug, 0, "Debug message verbosity (0: off; 1: on; 2: verbose)"); int debugnet_npolls = 2000; SYSCTL_INT(_net_debugnet, OID_AUTO, npolls, CTLFLAG_RWTUN, &debugnet_npolls, 0, "Number of times to poll before assuming packet loss (0.5ms per poll)"); int debugnet_nretries = 10; SYSCTL_INT(_net_debugnet, OID_AUTO, nretries, CTLFLAG_RWTUN, &debugnet_nretries, 0, "Number of retransmit attempts before giving up"); static bool g_debugnet_pcb_inuse; static struct debugnet_pcb g_dnet_pcb; /* * Simple accessors for opaque PCB. */ const unsigned char * debugnet_get_gw_mac(const struct debugnet_pcb *pcb) { MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb && pcb->dp_state >= DN_STATE_HAVE_GW_MAC); return (pcb->dp_gw_mac.octet); } /* * Start of network primitives, beginning with output primitives. */ /* * Handles creation of the ethernet header, then places outgoing packets into * the tx buffer for the NIC * * Parameters: * m The mbuf containing the packet to be sent (will be freed by * this function or the NIC driver) * ifp The interface to send on * dst The destination ethernet address (source address will be looked * up using ifp) * etype The ETHERTYPE_* value for the protocol that is being sent * * Returns: * int see errno.h, 0 for success */ int debugnet_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst, u_short etype) { struct ether_header *eh; if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) || (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) { if_printf(ifp, "%s: interface isn't up\n", __func__); m_freem(m); return (ENETDOWN); } /* Fill in the ethernet header. */ M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); if (m == NULL) { printf("%s: out of mbufs\n", __func__); return (ENOBUFS); } eh = mtod(m, struct ether_header *); memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN); eh->ether_type = htons(etype); return (ifp->if_debugnet_methods->dn_transmit(ifp, m)); } /* * Unreliable transmission of an mbuf chain to the debugnet server * Note: can't handle fragmentation; fails if the packet is larger than * ifp->if_mtu after adding the UDP/IP headers * * Parameters: * pcb The debugnet context block * m mbuf chain * * Returns: * int see errno.h, 0 for success */ static int debugnet_udp_output(struct debugnet_pcb *pcb, struct mbuf *m) { struct udphdr *udp; MPASS(pcb->dp_state >= DN_STATE_HAVE_GW_MAC); M_PREPEND(m, sizeof(*udp), M_NOWAIT); if (m == NULL) { printf("%s: out of mbufs\n", __func__); return (ENOBUFS); } udp = mtod(m, void *); udp->uh_ulen = htons(m->m_pkthdr.len); /* Use this src port so that the server can connect() the socket */ udp->uh_sport = htons(pcb->dp_client_port); udp->uh_dport = htons(pcb->dp_server_port); /* Computed later (protocol-dependent). */ udp->uh_sum = 0; return (debugnet_ip_output(pcb, m)); } int debugnet_ack_output(struct debugnet_pcb *pcb, uint32_t seqno /* net endian */) { struct debugnet_ack *dn_ack; struct mbuf *m; DNETDEBUG("Acking with seqno %u\n", ntohl(seqno)); m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { printf("%s: Out of mbufs\n", __func__); return (ENOBUFS); } m->m_len = sizeof(*dn_ack); m->m_pkthdr.len = sizeof(*dn_ack); MH_ALIGN(m, sizeof(*dn_ack)); dn_ack = mtod(m, void *); dn_ack->da_seqno = seqno; return (debugnet_udp_output(pcb, m)); } /* * Dummy free function for debugnet clusters. */ static void debugnet_mbuf_free(struct mbuf *m __unused) { } /* * Construct and reliably send a debugnet packet. May fail from a resource * shortage or extreme number of unacknowledged retransmissions. Wait for * an acknowledgement before returning. Splits packets into chunks small * enough to be sent without fragmentation (looks up the interface MTU) * * Parameters: * type debugnet packet type (HERALD, FINISHED, ...) * data data * datalen data size (bytes) * auxdata optional auxiliary information * * Returns: * int see errno.h, 0 for success */ int debugnet_send(struct debugnet_pcb *pcb, uint32_t type, const void *data, uint32_t datalen, const struct debugnet_proto_aux *auxdata) { struct debugnet_msg_hdr *dn_msg_hdr; struct mbuf *m, *m2; uint64_t want_acks; uint32_t i, pktlen, sent_so_far; int retries, polls, error; if (pcb->dp_state == DN_STATE_REMOTE_CLOSED) return (ECONNRESET); want_acks = 0; pcb->dp_rcvd_acks = 0; retries = 0; retransmit: /* Chunks can be too big to fit in packets. */ for (i = sent_so_far = 0; sent_so_far < datalen || (i == 0 && datalen == 0); i++) { pktlen = datalen - sent_so_far; /* Bound: the interface MTU (assume no IP options). */ pktlen = min(pktlen, pcb->dp_ifp->if_mtu - sizeof(struct udpiphdr) - sizeof(struct debugnet_msg_hdr)); /* * Check if it is retransmitting and this has been ACKed * already. */ if ((pcb->dp_rcvd_acks & (1 << i)) != 0) { sent_so_far += pktlen; continue; } /* * Get and fill a header mbuf, then chain data as an extended * mbuf. */ m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { printf("%s: Out of mbufs\n", __func__); return (ENOBUFS); } m->m_len = sizeof(struct debugnet_msg_hdr); m->m_pkthdr.len = sizeof(struct debugnet_msg_hdr); MH_ALIGN(m, sizeof(struct debugnet_msg_hdr)); dn_msg_hdr = mtod(m, struct debugnet_msg_hdr *); dn_msg_hdr->mh_seqno = htonl(pcb->dp_seqno + i); dn_msg_hdr->mh_type = htonl(type); dn_msg_hdr->mh_len = htonl(pktlen); if (auxdata != NULL) { dn_msg_hdr->mh_offset = htobe64(auxdata->dp_offset_start + sent_so_far); dn_msg_hdr->mh_aux2 = htobe32(auxdata->dp_aux2); } else { dn_msg_hdr->mh_offset = htobe64(sent_so_far); dn_msg_hdr->mh_aux2 = 0; } if (pktlen != 0) { m2 = m_get(M_NOWAIT, MT_DATA); if (m2 == NULL) { m_freem(m); printf("%s: Out of mbufs\n", __func__); return (ENOBUFS); } MEXTADD(m2, __DECONST(char *, data) + sent_so_far, pktlen, debugnet_mbuf_free, NULL, NULL, 0, EXT_DISPOSABLE); m2->m_len = pktlen; m_cat(m, m2); m->m_pkthdr.len += pktlen; } error = debugnet_udp_output(pcb, m); if (error != 0) return (error); /* Note that we're waiting for this packet in the bitfield. */ want_acks |= (1 << i); sent_so_far += pktlen; } if (i >= DEBUGNET_MAX_IN_FLIGHT) printf("Warning: Sent more than %d packets (%d). " "Acknowledgements will fail unless the size of " "rcvd_acks/want_acks is increased.\n", DEBUGNET_MAX_IN_FLIGHT, i); /* * Wait for acks. A *real* window would speed things up considerably. */ polls = 0; while (pcb->dp_rcvd_acks != want_acks) { if (polls++ > debugnet_npolls) { if (retries++ > debugnet_nretries) return (ETIMEDOUT); printf(". "); goto retransmit; } debugnet_network_poll(pcb); DELAY(500); if (pcb->dp_state == DN_STATE_REMOTE_CLOSED) return (ECONNRESET); } pcb->dp_seqno += i; return (0); } /* * Network input primitives. */ /* * Just introspect the header enough to fire off a seqno ack and validate * length fits. */ static void debugnet_handle_rx_msg(struct debugnet_pcb *pcb, struct mbuf **mb) { const struct debugnet_msg_hdr *dnh; struct mbuf *m; int error; m = *mb; if (m->m_pkthdr.len < sizeof(*dnh)) { DNETDEBUG("ignoring small debugnet_msg packet\n"); return; } /* Get ND header. */ if (m->m_len < sizeof(*dnh)) { m = m_pullup(m, sizeof(*dnh)); *mb = m; if (m == NULL) { DNETDEBUG("m_pullup failed\n"); return; } } dnh = mtod(m, const void *); if (ntohl(dnh->mh_len) + sizeof(*dnh) > m->m_pkthdr.len) { DNETDEBUG("Dropping short packet.\n"); return; } /* * If the issue is transient (ENOBUFS), sender should resend. If * non-transient (like driver objecting to rx -> tx from the same * thread), not much else we can do. */ error = debugnet_ack_output(pcb, dnh->mh_seqno); if (error != 0) return; if (ntohl(dnh->mh_type) == DEBUGNET_FINISHED) { printf("Remote shut down the connection on us!\n"); pcb->dp_state = DN_STATE_REMOTE_CLOSED; /* * Continue through to the user handler so they are signalled * not to wait for further rx. */ } pcb->dp_rx_handler(pcb, mb); } static void debugnet_handle_ack(struct debugnet_pcb *pcb, struct mbuf **mb, uint16_t sport) { const struct debugnet_ack *dn_ack; struct mbuf *m; uint32_t rcv_ackno; m = *mb; /* Get Ack. */ if (m->m_len < sizeof(*dn_ack)) { m = m_pullup(m, sizeof(*dn_ack)); *mb = m; if (m == NULL) { DNETDEBUG("m_pullup failed\n"); return; } } dn_ack = mtod(m, const void *); /* Debugnet processing. */ /* * Packet is meant for us. Extract the ack sequence number and the * port number if necessary. */ rcv_ackno = ntohl(dn_ack->da_seqno); if (pcb->dp_state < DN_STATE_GOT_HERALD_PORT) { pcb->dp_server_port = sport; pcb->dp_state = DN_STATE_GOT_HERALD_PORT; } if (rcv_ackno >= pcb->dp_seqno + DEBUGNET_MAX_IN_FLIGHT) printf("%s: ACK %u too far in future!\n", __func__, rcv_ackno); else if (rcv_ackno >= pcb->dp_seqno) { /* We're interested in this ack. Record it. */ pcb->dp_rcvd_acks |= 1 << (rcv_ackno - pcb->dp_seqno); } } void debugnet_handle_udp(struct debugnet_pcb *pcb, struct mbuf **mb) { const struct udphdr *udp; struct mbuf *m; uint16_t sport, ulen; /* UDP processing. */ m = *mb; if (m->m_pkthdr.len < sizeof(*udp)) { DNETDEBUG("ignoring small UDP packet\n"); return; } /* Get UDP headers. */ if (m->m_len < sizeof(*udp)) { m = m_pullup(m, sizeof(*udp)); *mb = m; if (m == NULL) { DNETDEBUG("m_pullup failed\n"); return; } } udp = mtod(m, const void *); /* We expect to receive UDP packets on the configured client port. */ if (ntohs(udp->uh_dport) != pcb->dp_client_port) { DNETDEBUG("not on the expected port.\n"); return; } /* Check that ulen does not exceed actual size of data. */ ulen = ntohs(udp->uh_ulen); if (m->m_pkthdr.len < ulen) { DNETDEBUG("ignoring runt UDP packet\n"); return; } sport = ntohs(udp->uh_sport); m_adj(m, sizeof(*udp)); ulen -= sizeof(*udp); if (ulen == sizeof(struct debugnet_ack)) { debugnet_handle_ack(pcb, mb, sport); return; } if (pcb->dp_rx_handler == NULL) { if (ulen < sizeof(struct debugnet_ack)) DNETDEBUG("ignoring small ACK packet\n"); else DNETDEBUG("ignoring unexpected non-ACK packet on " "half-duplex connection.\n"); return; } debugnet_handle_rx_msg(pcb, mb); } /* * Handler for incoming packets directly from the network adapter * Identifies the packet type (IP or ARP) and passes it along to one of the * helper functions debugnet_handle_ip or debugnet_handle_arp. * * It needs to partially replicate the behaviour of ether_input() and * ether_demux(). * * Parameters: * ifp the interface the packet came from * m an mbuf containing the packet received */ static void debugnet_pkt_in(struct ifnet *ifp, struct mbuf *m) { struct ifreq ifr; struct ether_header *eh; u_short etype; /* Ethernet processing. */ if ((m->m_flags & M_PKTHDR) == 0) { DNETDEBUG_IF(ifp, "discard frame without packet header\n"); goto done; } if (m->m_len < ETHER_HDR_LEN) { DNETDEBUG_IF(ifp, "discard frame without leading eth header (len %u pktlen %u)\n", m->m_len, m->m_pkthdr.len); goto done; } if ((m->m_flags & M_HASFCS) != 0) { m_adj(m, -ETHER_CRC_LEN); m->m_flags &= ~M_HASFCS; } eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) { DNETDEBUG_IF(ifp, "ignoring vlan packets\n"); goto done; } if (if_gethwaddr(ifp, &ifr) != 0) { DNETDEBUG_IF(ifp, "failed to get hw addr for interface\n"); goto done; } if (memcmp(ifr.ifr_addr.sa_data, eh->ether_dhost, ETHER_ADDR_LEN) != 0 && (etype != ETHERTYPE_ARP || !ETHER_IS_BROADCAST(eh->ether_dhost))) { DNETDEBUG_IF(ifp, "discard frame with incorrect destination addr\n"); goto done; } MPASS(g_debugnet_pcb_inuse); /* Done ethernet processing. Strip off the ethernet header. */ m_adj(m, ETHER_HDR_LEN); switch (etype) { case ETHERTYPE_ARP: debugnet_handle_arp(&g_dnet_pcb, &m); break; case ETHERTYPE_IP: debugnet_handle_ip(&g_dnet_pcb, &m); break; default: DNETDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype); break; } done: if (m != NULL) m_freem(m); } /* * Network polling primitive. * * Instead of assuming that most of the network stack is sane, we just poll the * driver directly for packets. */ void debugnet_network_poll(struct debugnet_pcb *pcb) { struct ifnet *ifp; ifp = pcb->dp_ifp; ifp->if_debugnet_methods->dn_poll(ifp, 1000); } /* * Start of consumer API surface. */ void debugnet_free(struct debugnet_pcb *pcb) { struct ifnet *ifp; MPASS(g_debugnet_pcb_inuse); MPASS(pcb == &g_dnet_pcb); ifp = pcb->dp_ifp; if (ifp != NULL) { if (pcb->dp_drv_input != NULL) ifp->if_input = pcb->dp_drv_input; if (pcb->dp_event_started) ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_END); } debugnet_mbuf_finish(); g_debugnet_pcb_inuse = false; memset(&g_dnet_pcb, 0xfd, sizeof(g_dnet_pcb)); } int debugnet_connect(const struct debugnet_conn_params *dcp, struct debugnet_pcb **pcb_out) { struct debugnet_proto_aux herald_auxdata; struct debugnet_pcb *pcb; struct ifnet *ifp; int error; if (g_debugnet_pcb_inuse) { printf("%s: Only one connection at a time.\n", __func__); return (EBUSY); } pcb = &g_dnet_pcb; *pcb = (struct debugnet_pcb) { .dp_state = DN_STATE_INIT, .dp_client = dcp->dc_client, .dp_server = dcp->dc_server, .dp_gateway = dcp->dc_gateway, .dp_server_port = dcp->dc_herald_port, /* Initially */ .dp_client_port = dcp->dc_client_port, .dp_seqno = 1, .dp_ifp = dcp->dc_ifp, .dp_rx_handler = dcp->dc_rx_handler, }; /* Switch to the debugnet mbuf zones. */ debugnet_mbuf_start(); /* At least one needed parameter is missing; infer it. */ if (pcb->dp_client == INADDR_ANY || pcb->dp_gateway == INADDR_ANY || pcb->dp_ifp == NULL) { struct sockaddr_in dest_sin, *gw_sin, *local_sin; - struct rtentry *dest_rt; struct ifnet *rt_ifp; + struct nhop_object *nh; memset(&dest_sin, 0, sizeof(dest_sin)); dest_sin = (struct sockaddr_in) { .sin_len = sizeof(dest_sin), .sin_family = AF_INET, .sin_addr.s_addr = pcb->dp_server, }; CURVNET_SET(vnet0); - dest_rt = rtalloc1((struct sockaddr *)&dest_sin, 0, - RTF_RNH_LOCKED); + nh = fib4_lookup_debugnet(RT_DEFAULT_FIB, dest_sin.sin_addr, 0, + NHR_NONE); CURVNET_RESTORE(); - if (dest_rt == NULL) { + if (nh == NULL) { printf("%s: Could not get route for that server.\n", __func__); error = ENOENT; goto cleanup; } - if (dest_rt->rt_gateway->sa_family == AF_INET) - gw_sin = (struct sockaddr_in *)dest_rt->rt_gateway; + if (nh->gw_sa.sa_family == AF_INET) + gw_sin = &nh->gw4_sa; else { - if (dest_rt->rt_gateway->sa_family == AF_LINK) + if (nh->gw_sa.sa_family == AF_LINK) DNETDEBUG("Destination address is on link.\n"); gw_sin = NULL; } - MPASS(dest_rt->rt_ifa->ifa_addr->sa_family == AF_INET); - local_sin = (struct sockaddr_in *)dest_rt->rt_ifa->ifa_addr; + MPASS(nh->nh_ifa->ifa_addr->sa_family == AF_INET); + local_sin = (struct sockaddr_in *)nh->nh_ifa->ifa_addr; - rt_ifp = dest_rt->rt_ifp; + rt_ifp = nh->nh_ifp; if (pcb->dp_client == INADDR_ANY) pcb->dp_client = local_sin->sin_addr.s_addr; if (pcb->dp_gateway == INADDR_ANY && gw_sin != NULL) pcb->dp_gateway = gw_sin->sin_addr.s_addr; if (pcb->dp_ifp == NULL) pcb->dp_ifp = rt_ifp; - - RTFREE_LOCKED(dest_rt); } ifp = pcb->dp_ifp; if (debugnet_debug > 0) { char serbuf[INET_ADDRSTRLEN], clibuf[INET_ADDRSTRLEN], gwbuf[INET_ADDRSTRLEN]; inet_ntop(AF_INET, &pcb->dp_server, serbuf, sizeof(serbuf)); inet_ntop(AF_INET, &pcb->dp_client, clibuf, sizeof(clibuf)); if (pcb->dp_gateway != INADDR_ANY) inet_ntop(AF_INET, &pcb->dp_gateway, gwbuf, sizeof(gwbuf)); DNETDEBUG("Connecting to %s:%d%s%s from %s:%d on %s\n", serbuf, pcb->dp_server_port, (pcb->dp_gateway == INADDR_ANY) ? "" : " via ", (pcb->dp_gateway == INADDR_ANY) ? "" : gwbuf, clibuf, pcb->dp_client_port, if_name(ifp)); } /* Validate iface is online and supported. */ if (!DEBUGNET_SUPPORTED_NIC(ifp)) { printf("%s: interface '%s' does not support debugnet\n", __func__, if_name(ifp)); error = ENODEV; goto cleanup; } if ((if_getflags(ifp) & IFF_UP) == 0) { printf("%s: interface '%s' link is down\n", __func__, if_name(ifp)); error = ENXIO; goto cleanup; } ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_START); pcb->dp_event_started = true; /* * We maintain the invariant that g_debugnet_pcb_inuse is always true * while the debugnet ifp's if_input is overridden with * debugnet_pkt_in. */ g_debugnet_pcb_inuse = true; /* Make the card use *our* receive callback. */ pcb->dp_drv_input = ifp->if_input; ifp->if_input = debugnet_pkt_in; printf("%s: searching for %s MAC...\n", __func__, (dcp->dc_gateway == INADDR_ANY) ? "server" : "gateway"); error = debugnet_arp_gw(pcb); if (error != 0) { printf("%s: failed to locate MAC address\n", __func__); goto cleanup; } MPASS(pcb->dp_state == DN_STATE_HAVE_GW_MAC); herald_auxdata = (struct debugnet_proto_aux) { .dp_offset_start = dcp->dc_herald_offset, .dp_aux2 = dcp->dc_herald_aux2, }; error = debugnet_send(pcb, DEBUGNET_HERALD, dcp->dc_herald_data, dcp->dc_herald_datalen, &herald_auxdata); if (error != 0) { printf("%s: failed to herald debugnet server\n", __func__); goto cleanup; } *pcb_out = pcb; return (0); cleanup: debugnet_free(pcb); return (error); } /* * Pre-allocated dump-time mbuf tracking. * * We just track the high water mark we've ever seen and allocate appropriately * for that iface/mtu combo. */ static struct { int nmbuf; int ncl; int clsize; } dn_hwm; static struct mtx dn_hwm_lk; MTX_SYSINIT(debugnet_hwm_lock, &dn_hwm_lk, "Debugnet HWM lock", MTX_DEF); static void dn_maybe_reinit_mbufs(int nmbuf, int ncl, int clsize) { bool any; any = false; mtx_lock(&dn_hwm_lk); if (nmbuf > dn_hwm.nmbuf) { any = true; dn_hwm.nmbuf = nmbuf; } else nmbuf = dn_hwm.nmbuf; if (ncl > dn_hwm.ncl) { any = true; dn_hwm.ncl = ncl; } else ncl = dn_hwm.ncl; if (clsize > dn_hwm.clsize) { any = true; dn_hwm.clsize = clsize; } else clsize = dn_hwm.clsize; mtx_unlock(&dn_hwm_lk); if (any) debugnet_mbuf_reinit(nmbuf, ncl, clsize); } void debugnet_any_ifnet_update(struct ifnet *ifp) { int clsize, nmbuf, ncl, nrxr; if (!DEBUGNET_SUPPORTED_NIC(ifp)) return; ifp->if_debugnet_methods->dn_init(ifp, &nrxr, &ncl, &clsize); KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr)); /* * We need two headers per message on the transmit side. Multiply by * four to give us some breathing room. */ nmbuf = ncl * (4 + nrxr); ncl *= nrxr; /* * Bandaid for drivers that (incorrectly) advertise LinkUp before their * dn_init method is available. */ if (nmbuf == 0 || ncl == 0 || clsize == 0) { printf("%s: Bad dn_init result from %s (ifp %p), ignoring.\n", __func__, if_name(ifp), ifp); return; } dn_maybe_reinit_mbufs(nmbuf, ncl, clsize); } /* * Unfortunately, the ifnet_arrival_event eventhandler hook is mostly useless * for us because drivers tend to if_attach before invoking DEBUGNET_SET(). * * On the other hand, hooking DEBUGNET_SET() itself may still be too early, * because the driver is still in attach. Since we cannot use down interfaces, * maybe hooking ifnet_event:IFNET_EVENT_UP is sufficient? ... Nope, at least * with vtnet and dhcpclient that event just never occurs. * * So that's how I've landed on the lower level ifnet_link_event. */ static void dn_ifnet_event(void *arg __unused, struct ifnet *ifp, int link_state) { if (link_state == LINK_STATE_UP) debugnet_any_ifnet_update(ifp); } static eventhandler_tag dn_attach_cookie; static void dn_evh_init(void *ctx __unused) { dn_attach_cookie = EVENTHANDLER_REGISTER(ifnet_link_event, dn_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); } SYSINIT(dn_evh_init, SI_SUB_EVENTHANDLER + 1, SI_ORDER_ANY, dn_evh_init, NULL); /* * DDB parsing helpers for debugnet(4) consumers. */ #ifdef DDB struct my_inet_opt { bool has_opt; const char *printname; in_addr_t *result; }; static int dn_parse_optarg_ipv4(struct my_inet_opt *opt) { in_addr_t tmp; unsigned octet; int t; tmp = 0; for (octet = 0; octet < 4; octet++) { t = db_read_token_flags(DRT_WSPACE | DRT_DECIMAL); if (t != tNUMBER) { db_printf("%s:%s: octet %u expected number; found %d\n", __func__, opt->printname, octet, t); return (EINVAL); } /* * db_lex lexes '-' distinctly from the number itself, but * let's document that invariant. */ MPASS(db_tok_number >= 0); if (db_tok_number > UINT8_MAX) { db_printf("%s:%s: octet %u out of range: %jd\n", __func__, opt->printname, octet, (intmax_t)db_tok_number); return (EDOM); } /* Constructed host-endian and converted to network later. */ tmp = (tmp << 8) | db_tok_number; if (octet < 3) { t = db_read_token_flags(DRT_WSPACE); if (t != tDOT) { db_printf("%s:%s: octet %u expected '.'; found" " %d\n", __func__, opt->printname, octet, t); return (EINVAL); } } } *opt->result = htonl(tmp); opt->has_opt = true; return (0); } int debugnet_parse_ddb_cmd(const char *cmd, struct debugnet_ddb_config *result) { struct ifnet *ifp; int t, error; bool want_ifp; char ch; struct my_inet_opt opt_client = { .printname = "client", .result = &result->dd_client, }, opt_server = { .printname = "server", .result = &result->dd_server, }, opt_gateway = { .printname = "gateway", .result = &result->dd_gateway, }, *cur_inet_opt; ifp = NULL; memset(result, 0, sizeof(*result)); /* * command [space] [-] [opt] [[space] [optarg]] ... * * db_command has already lexed 'command' for us. */ t = db_read_token_flags(DRT_WSPACE); if (t == tWSPACE) t = db_read_token_flags(DRT_WSPACE); while (t != tEOL) { if (t != tMINUS) { db_printf("%s: Bad syntax; expected '-', got %d\n", cmd, t); goto usage; } t = db_read_token_flags(DRT_WSPACE); if (t != tIDENT) { db_printf("%s: Bad syntax; expected tIDENT, got %d\n", cmd, t); goto usage; } if (strlen(db_tok_string) > 1) { db_printf("%s: Bad syntax; expected single option " "flag, got '%s'\n", cmd, db_tok_string); goto usage; } want_ifp = false; cur_inet_opt = NULL; switch ((ch = db_tok_string[0])) { default: DNETDEBUG("Unexpected: '%c'\n", ch); /* FALLTHROUGH */ case 'h': goto usage; case 'c': cur_inet_opt = &opt_client; break; case 'g': cur_inet_opt = &opt_gateway; break; case 's': cur_inet_opt = &opt_server; break; case 'i': want_ifp = true; break; } t = db_read_token_flags(DRT_WSPACE); if (t != tWSPACE) { db_printf("%s: Bad syntax; expected space after " "flag %c, got %d\n", cmd, ch, t); goto usage; } if (want_ifp) { t = db_read_token_flags(DRT_WSPACE); if (t != tIDENT) { db_printf("%s: Expected interface but got %d\n", cmd, t); goto usage; } CURVNET_SET(vnet0); /* * We *don't* take a ref here because the only current * consumer, db_netdump_cmd, does not need it. It * (somewhat redundantly) extracts the if_name(), * re-lookups the ifp, and takes its own reference. */ ifp = ifunit(db_tok_string); CURVNET_RESTORE(); if (ifp == NULL) { db_printf("Could not locate interface %s\n", db_tok_string); goto cleanup; } } else { MPASS(cur_inet_opt != NULL); /* Assume IPv4 for now. */ error = dn_parse_optarg_ipv4(cur_inet_opt); if (error != 0) goto cleanup; } /* Skip (mandatory) whitespace after option, if not EOL. */ t = db_read_token_flags(DRT_WSPACE); if (t == tEOL) break; if (t != tWSPACE) { db_printf("%s: Bad syntax; expected space after " "flag %c option; got %d\n", cmd, ch, t); goto usage; } t = db_read_token_flags(DRT_WSPACE); } if (!opt_server.has_opt) { db_printf("%s: need a destination server address\n", cmd); goto usage; } result->dd_has_client = opt_client.has_opt; result->dd_has_gateway = opt_gateway.has_opt; result->dd_ifp = ifp; /* We parsed the full line to tEOL already, or bailed with an error. */ return (0); usage: db_printf("Usage: %s -s [-g -c " "-i ]\n", cmd); error = EINVAL; /* FALLTHROUGH */ cleanup: db_skip_to_eol(); return (error); } #endif /* DDB */ Index: head/sys/netinet/in_fib.c =================================================================== --- head/sys/netinet/in_fib.c (revision 360348) +++ head/sys/netinet/in_fib.c (revision 360349) @@ -1,364 +1,406 @@ /*- * Copyright (c) 2015 * Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_route.h" #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #ifdef INET /* Verify struct route compatiblity */ /* Assert 'struct route_in' is compatible with 'struct route' */ CHK_STRUCT_ROUTE_COMPAT(struct route_in, ro_dst4); static void fib4_rte_to_nh_basic(struct nhop_object *nh, struct in_addr dst, uint32_t flags, struct nhop4_basic *pnh4); static void fib4_rte_to_nh_extended(struct nhop_object *nh, struct in_addr dst, uint32_t flags, struct nhop4_extended *pnh4); #define RNTORT(p) ((struct rtentry *)(p)) static void fib4_rte_to_nh_basic(struct nhop_object *nh, struct in_addr dst, uint32_t flags, struct nhop4_basic *pnh4) { if ((flags & NHR_IFAIF) != 0) pnh4->nh_ifp = nh->nh_ifa->ifa_ifp; else pnh4->nh_ifp = nh->nh_ifp; pnh4->nh_mtu = nh->nh_mtu; if (nh->nh_flags & NHF_GATEWAY) pnh4->nh_addr = nh->gw4_sa.sin_addr; else pnh4->nh_addr = dst; /* Set flags */ pnh4->nh_flags = nh->nh_flags; /* TODO: Handle RTF_BROADCAST here */ } static void fib4_rte_to_nh_extended(struct nhop_object *nh, struct in_addr dst, uint32_t flags, struct nhop4_extended *pnh4) { if ((flags & NHR_IFAIF) != 0) pnh4->nh_ifp = nh->nh_ifa->ifa_ifp; else pnh4->nh_ifp = nh->nh_ifp; pnh4->nh_mtu = nh->nh_mtu; if (nh->nh_flags & NHF_GATEWAY) pnh4->nh_addr = nh->gw4_sa.sin_addr; else pnh4->nh_addr = dst; /* Set flags */ pnh4->nh_flags = nh->nh_flags; pnh4->nh_ia = ifatoia(nh->nh_ifa); pnh4->nh_src = IA_SIN(pnh4->nh_ia)->sin_addr; } /* * Performs IPv4 route table lookup on @dst. Returns 0 on success. * Stores nexthop info provided @pnh4 structure. * Note that * - nh_ifp cannot be safely dereferenced * - nh_ifp represents logical transmit interface (rt_ifp) (e.g. if * looking up address on interface "ix0" pointer to "lo0" interface * will be returned instead of "ix0") * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed * - howewer mtu from "transmit" interface will be returned. */ int fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags, uint32_t flowid, struct nhop4_basic *pnh4) { RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct sockaddr_in sin; struct nhop_object *nh; KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_basic: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET); if (rh == NULL) return (ENOENT); /* Prepare lookup key */ memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof(struct sockaddr_in); sin.sin_addr = dst; RIB_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin, &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { nh = RNTORT(rn)->rt_nhop; /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(nh->nh_ifp)) { fib4_rte_to_nh_basic(nh, dst, flags, pnh4); RIB_RUNLOCK(rh); return (0); } } RIB_RUNLOCK(rh); return (ENOENT); } /* * Performs IPv4 route table lookup on @dst. Returns 0 on success. * Stores extende nexthop info provided @pnh4 structure. * Note that * - nh_ifp cannot be safely dereferenced unless NHR_REF is specified. * - in that case you need to call fib4_free_nh_ext() * - nh_ifp represents logical transmit interface (rt_ifp) (e.g. if * looking up address of interface "ix0" pointer to "lo0" interface * will be returned instead of "ix0") * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed * - howewer mtu from "transmit" interface will be returned. */ int fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags, uint32_t flowid, struct nhop4_extended *pnh4) { RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct sockaddr_in sin; struct rtentry *rte; struct nhop_object *nh; KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_ext: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET); if (rh == NULL) return (ENOENT); /* Prepare lookup key */ memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof(struct sockaddr_in); sin.sin_addr = dst; RIB_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin, &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rte = RNTORT(rn); #ifdef RADIX_MPATH rte = rt_mpath_select(rte, flowid); if (rte == NULL) { RIB_RUNLOCK(rh); return (ENOENT); } #endif nh = rte->rt_nhop; /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(nh->nh_ifp)) { fib4_rte_to_nh_extended(nh, dst, flags, pnh4); if ((flags & NHR_REF) != 0) { /* TODO: lwref on egress ifp's ? */ } RIB_RUNLOCK(rh); return (0); } } RIB_RUNLOCK(rh); return (ENOENT); } void fib4_free_nh_ext(uint32_t fibnum, struct nhop4_extended *pnh4) { } /* * Looks up path in fib @fibnum specified by @dst. * Returns path nexthop on success. Nexthop is safe to use * within the current network epoch. If longer lifetime is required, * one needs to pass NHR_REF as a flag. This will return referenced * nexthop. */ struct nhop_object * fib4_lookup(uint32_t fibnum, struct in_addr dst, uint32_t scopeid, uint32_t flags, uint32_t flowid) { RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct rtentry *rt; struct nhop_object *nh; KASSERT((fibnum < rt_numfibs), ("fib4_lookup: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET); if (rh == NULL) return (NULL); /* Prepare lookup key */ struct sockaddr_in sin4; memset(&sin4, 0, sizeof(sin4)); sin4.sin_family = AF_INET; sin4.sin_len = sizeof(struct sockaddr_in); sin4.sin_addr = dst; nh = NULL; RIB_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin4, &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rt = RNTORT(rn); #ifdef RADIX_MPATH if (rt_mpath_next(rt) != NULL) rt = rt_mpath_selectrte(rt, flowid); #endif nh = rt->rt_nhop; /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(nh->nh_ifp)) { if (flags & NHR_REF) nhop_ref_object(nh); RIB_RUNLOCK(rh); return (nh); } } RIB_RUNLOCK(rh); RTSTAT_INC(rts_unreach); return (NULL); } inline static int check_urpf(const struct nhop_object *nh, uint32_t flags, const struct ifnet *src_if) { if (src_if != NULL && nh->nh_aifp == src_if) { return (1); } if (src_if == NULL) { if ((flags & NHR_NODEFAULT) == 0) return (1); else if ((nh->nh_flags & NHF_DEFAULT) == 0) return (1); } return (0); } #ifdef RADIX_MPATH inline static int check_urpf_mpath(struct rtentry *rt, uint32_t flags, const struct ifnet *src_if) { while (rt != NULL) { if (check_urpf(rt->rt_nhop, flags, src_if) != 0) return (1); rt = rt_mpath_next(rt); } return (0); } #endif /* * Performs reverse path forwarding lookup. * If @src_if is non-zero, verifies that at least 1 path goes via * this interface. * If @src_if is zero, verifies that route exist. * if @flags contains NHR_NOTDEFAULT, do not consider default route. * * Returns 1 if route matching conditions is found, 0 otherwise. */ int fib4_check_urpf(uint32_t fibnum, struct in_addr dst, uint32_t scopeid, uint32_t flags, const struct ifnet *src_if) { RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct rtentry *rt; int ret; KASSERT((fibnum < rt_numfibs), ("fib4_check_urpf: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET); if (rh == NULL) return (0); /* Prepare lookup key */ struct sockaddr_in sin4; memset(&sin4, 0, sizeof(sin4)); sin4.sin_len = sizeof(struct sockaddr_in); sin4.sin_addr = dst; RIB_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin4, &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rt = RNTORT(rn); #ifdef RADIX_MPATH ret = check_urpf_mpath(rt, flags, src_if); #else ret = check_urpf(rt->rt_nhop, flags, src_if); #endif RIB_RUNLOCK(rh); return (ret); } RIB_RUNLOCK(rh); return (0); } +struct nhop_object * +fib4_lookup_debugnet(uint32_t fibnum, struct in_addr dst, uint32_t scopeid, + uint32_t flags) +{ + struct rib_head *rh; + struct radix_node *rn; + struct rtentry *rt; + struct nhop_object *nh; + + KASSERT((fibnum < rt_numfibs), ("fib4_lookup_debugnet: bad fibnum")); + rh = rt_tables_get_rnh(fibnum, AF_INET); + if (rh == NULL) + return (NULL); + + /* Prepare lookup key */ + struct sockaddr_in sin4; + memset(&sin4, 0, sizeof(sin4)); + sin4.sin_family = AF_INET; + sin4.sin_len = sizeof(struct sockaddr_in); + sin4.sin_addr = dst; + + nh = NULL; + /* unlocked lookup */ + rn = rh->rnh_matchaddr((void *)&sin4, &rh->head); + if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { + rt = RNTORT(rn); +#ifdef RADIX_MPATH + if (rt_mpath_next(rt) != NULL) + rt = rt_mpath_selectrte(rt, 0); +#endif + nh = rt->rt_nhop; + /* Ensure route & ifp is UP */ + if (RT_LINK_IS_UP(nh->nh_ifp)) { + if (flags & NHR_REF) + nhop_ref_object(nh); + return (nh); + } + } + + return (NULL); +} + #endif Index: head/sys/netinet/in_fib.h =================================================================== --- head/sys/netinet/in_fib.h (revision 360348) +++ head/sys/netinet/in_fib.h (revision 360349) @@ -1,79 +1,81 @@ /*- * Copyright (c) 2015 * Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NETINET_IN_FIB_H_ #define _NETINET_IN_FIB_H_ struct route_in { /* common fields shared among all 'struct route' */ struct nhop_object *ro_nh; struct llentry *ro_lle; char *ro_prepend; uint16_t ro_plen; uint16_t ro_flags; uint16_t ro_mtu; /* saved ro_rt mtu */ uint16_t spare; /* custom sockaddr */ struct sockaddr_in ro_dst4; }; /* Basic nexthop info used for uRPF/mtu checks */ struct nhop4_basic { struct ifnet *nh_ifp; /* Logical egress interface */ uint16_t nh_mtu; /* nexthop mtu */ uint16_t nh_flags; /* nhop flags */ struct in_addr nh_addr; /* GW/DST IPv4 address */ }; /* Extended nexthop info used for control protocols */ struct nhop4_extended { struct ifnet *nh_ifp; /* Logical egress interface */ struct in_ifaddr *nh_ia; /* Associated address */ uint16_t nh_mtu; /* nexthop mtu */ uint16_t nh_flags; /* nhop flags */ uint8_t spare[4]; struct in_addr nh_addr; /* GW/DST IPv4 address */ struct in_addr nh_src; /* default source IPv4 address */ uint64_t spare2; }; int fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags, uint32_t flowid, struct nhop4_basic *pnh4); int fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags, uint32_t flowid, struct nhop4_extended *pnh4); void fib4_free_nh_ext(uint32_t fibnum, struct nhop4_extended *pnh4); struct nhop_object *fib4_lookup(uint32_t fibnum, struct in_addr dst, uint32_t scopeid, uint32_t flags, uint32_t flowid); int fib4_check_urpf(uint32_t fibnum, struct in_addr dst, uint32_t scopeid, uint32_t flags, const struct ifnet *src_if); +struct nhop_object *fib4_lookup_debugnet(uint32_t fibnum, struct in_addr dst, + uint32_t scopeid, uint32_t flags); #endif Index: head/sys/netinet6/in6_fib.c =================================================================== --- head/sys/netinet6/in6_fib.c (revision 360348) +++ head/sys/netinet6/in6_fib.c (revision 360349) @@ -1,401 +1,442 @@ /*- * Copyright (c) 2015 * Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_route.h" #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #include #include #include #include #include #include #ifdef INET6 static void fib6_rte_to_nh_extended(const struct nhop_object *nh, const struct in6_addr *dst, uint32_t flags, struct nhop6_extended *pnh6); static void fib6_rte_to_nh_basic(const struct nhop_object *nh, const struct in6_addr *dst, uint32_t flags, struct nhop6_basic *pnh6); #define RNTORT(p) ((struct rtentry *)(p)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) CHK_STRUCT_ROUTE_COMPAT(struct route_in6, ro_dst); static void fib6_rte_to_nh_basic(const struct nhop_object *nh, const struct in6_addr *dst, uint32_t flags, struct nhop6_basic *pnh6) { /* Do explicit nexthop zero unless we're copying it */ memset(pnh6, 0, sizeof(*pnh6)); if ((flags & NHR_IFAIF) != 0) pnh6->nh_ifp = nh->nh_aifp; else pnh6->nh_ifp = nh->nh_ifp; pnh6->nh_mtu = nh->nh_mtu; if (nh->nh_flags & NHF_GATEWAY) { /* Return address with embedded scope. */ pnh6->nh_addr = nh->gw6_sa.sin6_addr; } else pnh6->nh_addr = *dst; /* Set flags */ pnh6->nh_flags = nh->nh_flags; } static void fib6_rte_to_nh_extended(const struct nhop_object *nh, const struct in6_addr *dst, uint32_t flags, struct nhop6_extended *pnh6) { /* Do explicit nexthop zero unless we're copying it */ memset(pnh6, 0, sizeof(*pnh6)); if ((flags & NHR_IFAIF) != 0) pnh6->nh_ifp = nh->nh_aifp; else pnh6->nh_ifp = nh->nh_ifp; pnh6->nh_mtu = nh->nh_mtu; if (nh->nh_flags & NHF_GATEWAY) { /* Return address with embedded scope. */ pnh6->nh_addr = nh->gw6_sa.sin6_addr; } else pnh6->nh_addr = *dst; /* Set flags */ pnh6->nh_flags = nh->nh_flags; pnh6->nh_ia = ifatoia6(nh->nh_ifa); } /* * Performs IPv6 route table lookup on @dst. Returns 0 on success. * Stores basic nexthop info into provided @pnh6 structure. * Note that * - nh_ifp represents logical transmit interface (rt_ifp) by default * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed * - mtu from logical transmit interface will be returned. * - nh_ifp cannot be safely dereferenced * - nh_ifp represents rt_ifp (e.g. if looking up address on * interface "ix0" pointer to "ix0" interface will be returned instead * of "lo0") * - howewer mtu from "transmit" interface will be returned. * - scope will be embedded in nh_addr */ int fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scopeid, uint32_t flags, uint32_t flowid, struct nhop6_basic *pnh6) { RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct sockaddr_in6 sin6; struct nhop_object *nh; KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_basic: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET6); if (rh == NULL) return (ENOENT); /* Prepare lookup key */ memset(&sin6, 0, sizeof(sin6)); sin6.sin6_addr = *dst; sin6.sin6_len = sizeof(struct sockaddr_in6); /* Assume scopeid is valid and embed it directly */ if (IN6_IS_SCOPE_LINKLOCAL(dst)) sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff); RIB_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin6, &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { nh = RNTORT(rn)->rt_nhop; /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(nh->nh_ifp)) { fib6_rte_to_nh_basic(nh, &sin6.sin6_addr, flags, pnh6); RIB_RUNLOCK(rh); return (0); } } RIB_RUNLOCK(rh); return (ENOENT); } /* * Performs IPv6 route table lookup on @dst. Returns 0 on success. * Stores extended nexthop info into provided @pnh6 structure. * Note that * - nh_ifp cannot be safely dereferenced unless NHR_REF is specified. * - in that case you need to call fib6_free_nh_ext() * - nh_ifp represents logical transmit interface (rt_ifp) by default * - nh_ifp represents "address" interface if NHR_IFAIF flag is passed * - mtu from logical transmit interface will be returned. * - scope will be embedded in nh_addr */ int fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,uint32_t scopeid, uint32_t flags, uint32_t flowid, struct nhop6_extended *pnh6) { RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct sockaddr_in6 sin6; struct rtentry *rte; struct nhop_object *nh; KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_ext: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET6); if (rh == NULL) return (ENOENT); /* Prepare lookup key */ memset(&sin6, 0, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_addr = *dst; /* Assume scopeid is valid and embed it directly */ if (IN6_IS_SCOPE_LINKLOCAL(dst)) sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff); RIB_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin6, &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rte = RNTORT(rn); #ifdef RADIX_MPATH rte = rt_mpath_select(rte, flowid); if (rte == NULL) { RIB_RUNLOCK(rh); return (ENOENT); } #endif nh = rte->rt_nhop; /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(nh->nh_ifp)) { fib6_rte_to_nh_extended(nh, &sin6.sin6_addr, flags, pnh6); if ((flags & NHR_REF) != 0) { /* TODO: Do lwref on egress ifp's */ } RIB_RUNLOCK(rh); return (0); } } RIB_RUNLOCK(rh); return (ENOENT); } void fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6) { } /* * Looks up path in fib @fibnum specified by @dst. * Assumes scope is deembedded and provided in @scopeid. * * Returns path nexthop on success. Nexthop is safe to use * within the current network epoch. If longer lifetime is required, * one needs to pass NHR_REF as a flag. This will return referenced * nexthop. */ struct nhop_object * fib6_lookup(uint32_t fibnum, const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags, uint32_t flowid) { RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct rtentry *rt; struct nhop_object *nh; struct sockaddr_in6 sin6; KASSERT((fibnum < rt_numfibs), ("fib6_lookup: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET6); if (rh == NULL) return (NULL); /* TODO: radix changes */ //addr = *dst6; /* Prepare lookup key */ memset(&sin6, 0, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_addr = *dst6; /* Assume scopeid is valid and embed it directly */ if (IN6_IS_SCOPE_LINKLOCAL(dst6)) sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff); RIB_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin6, &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rt = RNTORT(rn); #ifdef RADIX_MPATH if (rt_mpath_next(rt) != NULL) rt = rt_mpath_selectrte(rt, flowid); #endif nh = rt->rt_nhop; /* Ensure route & ifp is UP */ if (RT_LINK_IS_UP(nh->nh_ifp)) { if (flags & NHR_REF) nhop_ref_object(nh); RIB_RUNLOCK(rh); return (nh); } } RIB_RUNLOCK(rh); RTSTAT_INC(rts_unreach); return (NULL); } inline static int check_urpf(const struct nhop_object *nh, uint32_t flags, const struct ifnet *src_if) { if (src_if != NULL && nh->nh_aifp == src_if) { return (1); } if (src_if == NULL) { if ((flags & NHR_NODEFAULT) == 0) return (1); else if ((nh->nh_flags & NHF_DEFAULT) == 0) return (1); } return (0); } #ifdef RADIX_MPATH inline static int check_urpf_mpath(struct rtentry *rt, uint32_t flags, const struct ifnet *src_if) { while (rt != NULL) { if (check_urpf(rt->rt_nhop, flags, src_if) != 0) return (1); rt = rt_mpath_next(rt); } return (0); } #endif /* * Performs reverse path forwarding lookup. * If @src_if is non-zero, verifies that at least 1 path goes via * this interface. * If @src_if is zero, verifies that route exist. * if @flags contains NHR_NOTDEFAULT, do not consider default route. * * Returns 1 if route matching conditions is found, 0 otherwise. */ int fib6_check_urpf(uint32_t fibnum, const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags, const struct ifnet *src_if) { RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct rtentry *rt; struct sockaddr_in6 sin6; int ret; KASSERT((fibnum < rt_numfibs), ("fib6_check_urpf: bad fibnum")); rh = rt_tables_get_rnh(fibnum, AF_INET6); if (rh == NULL) return (0); /* TODO: radix changes */ /* Prepare lookup key */ memset(&sin6, 0, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_addr = *dst6; /* Assume scopeid is valid and embed it directly */ if (IN6_IS_SCOPE_LINKLOCAL(dst6)) sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff); RIB_RLOCK(rh); rn = rh->rnh_matchaddr((void *)&sin6, &rh->head); if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { rt = RNTORT(rn); #ifdef RADIX_MPATH ret = check_urpf_mpath(rt, flags, src_if); #else ret = check_urpf(rt->rt_nhop, flags, src_if); #endif RIB_RUNLOCK(rh); return (ret); } RIB_RUNLOCK(rh); return (0); } +struct nhop_object * +fib6_lookup_debugnet(uint32_t fibnum, const struct in6_addr *dst6, + uint32_t scopeid, uint32_t flags) +{ + struct rib_head *rh; + struct radix_node *rn; + struct rtentry *rt; + struct nhop_object *nh; + struct sockaddr_in6 sin6; + + KASSERT((fibnum < rt_numfibs), ("fib6_lookup: bad fibnum")); + rh = rt_tables_get_rnh(fibnum, AF_INET6); + if (rh == NULL) + return (NULL); + + /* TODO: radix changes */ + //addr = *dst6; + /* Prepare lookup key */ + memset(&sin6, 0, sizeof(sin6)); + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_addr = *dst6; + + /* Assume scopeid is valid and embed it directly */ + if (IN6_IS_SCOPE_LINKLOCAL(dst6)) + sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff); + + rn = rh->rnh_matchaddr((void *)&sin6, &rh->head); + if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { + rt = RNTORT(rn); + nh = rt->rt_nhop; + /* Ensure route & ifp is UP */ + if (RT_LINK_IS_UP(nh->nh_ifp)) { + if (flags & NHR_REF) + nhop_ref_object(nh); + return (nh); + } + } + + return (NULL); +} + #endif Index: head/sys/netinet6/in6_fib.h =================================================================== --- head/sys/netinet6/in6_fib.h (revision 360348) +++ head/sys/netinet6/in6_fib.h (revision 360349) @@ -1,68 +1,70 @@ /*- * Copyright (c) 2015 * Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NETINET6_IN6_FIB_H_ #define _NETINET6_IN6_FIB_H_ /* Basic nexthop info used for uRPF/mtu checks */ struct nhop6_basic { struct ifnet *nh_ifp; /* Logical egress interface */ uint16_t nh_mtu; /* nexthop mtu */ uint16_t nh_flags; /* nhop flags */ uint8_t spare[4]; struct in6_addr nh_addr; /* GW/DST IPv4 address */ }; /* Extended nexthop info used for control protocols. */ struct nhop6_extended { struct ifnet *nh_ifp; /* Logical egress interface */ struct in6_ifaddr *nh_ia; /* Associated address. */ uint16_t nh_mtu; /* nexthop mtu */ uint16_t nh_flags; /* nhop flags */ uint8_t spare[4]; struct in6_addr nh_addr; /* GW/DST IPv6 address */ uint64_t spare2[1]; }; int fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scopeid, uint32_t flags, uint32_t flowid,struct nhop6_basic *pnh6); int fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst, uint32_t scopeid, uint32_t flags, uint32_t flowid, struct nhop6_extended *pnh6); void fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6); struct nhop_object *fib6_lookup(uint32_t fibnum, const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags, uint32_t flowid); int fib6_check_urpf(uint32_t fibnum, const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags, const struct ifnet *src_if); +struct nhop_object *fib6_lookup_debugnet(uint32_t fibnum, + const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags); #endif