diff --git a/sys/net/debugnet.c b/sys/net/debugnet.c index c6f57ec84618..8b1419bcaa28 100644 --- a/sys/net/debugnet.c +++ b/sys/net/debugnet.c @@ -1,1129 +1,1125 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2019 Isilon Systems, LLC. * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved. * Copyright (c) 2000 Darrell Anderson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_ddb.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define DEBUGNET_INTERNAL #include FEATURE(debugnet, "Debugnet support"); SYSCTL_NODE(_net, OID_AUTO, debugnet, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "debugnet parameters"); unsigned debugnet_debug; SYSCTL_UINT(_net_debugnet, OID_AUTO, debug, CTLFLAG_RWTUN, &debugnet_debug, 0, "Debug message verbosity (0: off; 1: on; 2: verbose)"); int debugnet_npolls = 2000; SYSCTL_INT(_net_debugnet, OID_AUTO, npolls, CTLFLAG_RWTUN, &debugnet_npolls, 0, "Number of times to poll before assuming packet loss (0.5ms per poll)"); int debugnet_nretries = 10; SYSCTL_INT(_net_debugnet, OID_AUTO, nretries, CTLFLAG_RWTUN, &debugnet_nretries, 0, "Number of retransmit attempts before giving up"); int debugnet_fib = RT_DEFAULT_FIB; SYSCTL_INT(_net_debugnet, OID_AUTO, fib, CTLFLAG_RWTUN, &debugnet_fib, 0, "Fib to use when sending dump"); static bool g_debugnet_pcb_inuse; static struct debugnet_pcb g_dnet_pcb; /* * Simple accessors for opaque PCB. */ const unsigned char * debugnet_get_gw_mac(const struct debugnet_pcb *pcb) { MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb && pcb->dp_state >= DN_STATE_HAVE_GW_MAC); return (pcb->dp_gw_mac.octet); } const in_addr_t * debugnet_get_server_addr(const struct debugnet_pcb *pcb) { MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb && pcb->dp_state >= DN_STATE_GOT_HERALD_PORT); return (&pcb->dp_server); } const uint16_t debugnet_get_server_port(const struct debugnet_pcb *pcb) { MPASS(g_debugnet_pcb_inuse && pcb == &g_dnet_pcb && pcb->dp_state >= DN_STATE_GOT_HERALD_PORT); return (pcb->dp_server_port); } /* * Start of network primitives, beginning with output primitives. */ /* * Handles creation of the ethernet header, then places outgoing packets into * the tx buffer for the NIC * * Parameters: * m The mbuf containing the packet to be sent (will be freed by * this function or the NIC driver) * ifp The interface to send on * dst The destination ethernet address (source address will be looked * up using ifp) * etype The ETHERTYPE_* value for the protocol that is being sent * * Returns: * int see errno.h, 0 for success */ int debugnet_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst, u_short etype) { struct ether_header *eh; if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) || (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) { if_printf(ifp, "%s: interface isn't up\n", __func__); m_freem(m); return (ENETDOWN); } /* Fill in the ethernet header. */ M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); if (m == NULL) { printf("%s: out of mbufs\n", __func__); return (ENOBUFS); } eh = mtod(m, struct ether_header *); memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN); eh->ether_type = htons(etype); return (ifp->if_debugnet_methods->dn_transmit(ifp, m)); } /* * Unreliable transmission of an mbuf chain to the debugnet server * Note: can't handle fragmentation; fails if the packet is larger than * ifp->if_mtu after adding the UDP/IP headers * * Parameters: * pcb The debugnet context block * m mbuf chain * * Returns: * int see errno.h, 0 for success */ static int debugnet_udp_output(struct debugnet_pcb *pcb, struct mbuf *m) { struct udphdr *udp; MPASS(pcb->dp_state >= DN_STATE_HAVE_GW_MAC); M_PREPEND(m, sizeof(*udp), M_NOWAIT); if (m == NULL) { printf("%s: out of mbufs\n", __func__); return (ENOBUFS); } udp = mtod(m, void *); udp->uh_ulen = htons(m->m_pkthdr.len); /* Use this src port so that the server can connect() the socket */ udp->uh_sport = htons(pcb->dp_client_port); udp->uh_dport = htons(pcb->dp_server_port); /* Computed later (protocol-dependent). */ udp->uh_sum = 0; return (debugnet_ip_output(pcb, m)); } int debugnet_ack_output(struct debugnet_pcb *pcb, uint32_t seqno /* net endian */) { struct debugnet_ack *dn_ack; struct mbuf *m; DNETDEBUG("Acking with seqno %u\n", ntohl(seqno)); m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { printf("%s: Out of mbufs\n", __func__); return (ENOBUFS); } m->m_len = sizeof(*dn_ack); m->m_pkthdr.len = sizeof(*dn_ack); MH_ALIGN(m, sizeof(*dn_ack)); dn_ack = mtod(m, void *); dn_ack->da_seqno = seqno; return (debugnet_udp_output(pcb, m)); } /* * Dummy free function for debugnet clusters. */ static void debugnet_mbuf_free(struct mbuf *m __unused) { } /* * Construct and reliably send a debugnet packet. May fail from a resource * shortage or extreme number of unacknowledged retransmissions. Wait for * an acknowledgement before returning. Splits packets into chunks small * enough to be sent without fragmentation (looks up the interface MTU) * * Parameters: * type debugnet packet type (HERALD, FINISHED, ...) * data data * datalen data size (bytes) * auxdata optional auxiliary information * * Returns: * int see errno.h, 0 for success */ int debugnet_send(struct debugnet_pcb *pcb, uint32_t type, const void *data, uint32_t datalen, const struct debugnet_proto_aux *auxdata) { struct debugnet_msg_hdr *dn_msg_hdr; struct mbuf *m, *m2; uint64_t want_acks; uint32_t i, pktlen, sent_so_far; int retries, polls, error; if (pcb->dp_state == DN_STATE_REMOTE_CLOSED) return (ECONNRESET); want_acks = 0; pcb->dp_rcvd_acks = 0; retries = 0; retransmit: /* Chunks can be too big to fit in packets. */ for (i = sent_so_far = 0; sent_so_far < datalen || (i == 0 && datalen == 0); i++) { pktlen = datalen - sent_so_far; /* Bound: the interface MTU (assume no IP options). */ pktlen = min(pktlen, pcb->dp_ifp->if_mtu - sizeof(struct udpiphdr) - sizeof(struct debugnet_msg_hdr)); /* * Check if it is retransmitting and this has been ACKed * already. */ if ((pcb->dp_rcvd_acks & (1 << i)) != 0) { sent_so_far += pktlen; continue; } /* * Get and fill a header mbuf, then chain data as an extended * mbuf. */ m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { printf("%s: Out of mbufs\n", __func__); return (ENOBUFS); } m->m_len = sizeof(struct debugnet_msg_hdr); m->m_pkthdr.len = sizeof(struct debugnet_msg_hdr); MH_ALIGN(m, sizeof(struct debugnet_msg_hdr)); dn_msg_hdr = mtod(m, struct debugnet_msg_hdr *); dn_msg_hdr->mh_seqno = htonl(pcb->dp_seqno + i); dn_msg_hdr->mh_type = htonl(type); dn_msg_hdr->mh_len = htonl(pktlen); if (auxdata != NULL) { dn_msg_hdr->mh_offset = htobe64(auxdata->dp_offset_start + sent_so_far); dn_msg_hdr->mh_aux2 = htobe32(auxdata->dp_aux2); } else { dn_msg_hdr->mh_offset = htobe64(sent_so_far); dn_msg_hdr->mh_aux2 = 0; } if (pktlen != 0) { m2 = m_get(M_NOWAIT, MT_DATA); if (m2 == NULL) { m_freem(m); printf("%s: Out of mbufs\n", __func__); return (ENOBUFS); } MEXTADD(m2, __DECONST(char *, data) + sent_so_far, pktlen, debugnet_mbuf_free, NULL, NULL, 0, EXT_DISPOSABLE); m2->m_len = pktlen; m_cat(m, m2); m->m_pkthdr.len += pktlen; } error = debugnet_udp_output(pcb, m); if (error != 0) return (error); /* Note that we're waiting for this packet in the bitfield. */ want_acks |= (1 << i); sent_so_far += pktlen; } if (i >= DEBUGNET_MAX_IN_FLIGHT) printf("Warning: Sent more than %d packets (%d). " "Acknowledgements will fail unless the size of " "rcvd_acks/want_acks is increased.\n", DEBUGNET_MAX_IN_FLIGHT, i); /* * Wait for acks. A *real* window would speed things up considerably. */ polls = 0; while (pcb->dp_rcvd_acks != want_acks) { if (polls++ > debugnet_npolls) { if (retries++ > debugnet_nretries) return (ETIMEDOUT); printf(". "); goto retransmit; } debugnet_network_poll(pcb); DELAY(500); if (pcb->dp_state == DN_STATE_REMOTE_CLOSED) return (ECONNRESET); } pcb->dp_seqno += i; return (0); } /* * Network input primitives. */ /* * Just introspect the header enough to fire off a seqno ack and validate * length fits. */ static void debugnet_handle_rx_msg(struct debugnet_pcb *pcb, struct mbuf **mb) { const struct debugnet_msg_hdr *dnh; struct mbuf *m; uint32_t hdr_type; uint32_t seqno; int error; m = *mb; if (m->m_pkthdr.len < sizeof(*dnh)) { DNETDEBUG("ignoring small debugnet_msg packet\n"); return; } /* Get ND header. */ if (m->m_len < sizeof(*dnh)) { m = m_pullup(m, sizeof(*dnh)); *mb = m; if (m == NULL) { DNETDEBUG("m_pullup failed\n"); return; } } dnh = mtod(m, const void *); if (ntohl(dnh->mh_len) + sizeof(*dnh) > m->m_pkthdr.len) { DNETDEBUG("Dropping short packet.\n"); return; } hdr_type = ntohl(dnh->mh_type); if (hdr_type != DEBUGNET_DATA) { if (hdr_type == DEBUGNET_FINISHED) { printf("Remote shut down the connection on us!\n"); pcb->dp_state = DN_STATE_REMOTE_CLOSED; if (pcb->dp_finish_handler != NULL) { pcb->dp_finish_handler(); } } else { DNETDEBUG("Got unexpected debugnet message %u\n", hdr_type); } return; } /* * If the issue is transient (ENOBUFS), sender should resend. If * non-transient (like driver objecting to rx -> tx from the same * thread), not much else we can do. */ seqno = dnh->mh_seqno; /* net endian */ m_adj(m, sizeof(*dnh)); dnh = NULL; error = pcb->dp_rx_handler(m); if (error != 0) { DNETDEBUG("RX handler was not able to accept message, error %d. " "Skipping ack.\n", error); return; } error = debugnet_ack_output(pcb, seqno); if (error != 0) { DNETDEBUG("Couldn't ACK rx packet %u; %d\n", ntohl(seqno), error); } } static void debugnet_handle_ack(struct debugnet_pcb *pcb, struct mbuf **mb, uint16_t sport) { const struct debugnet_ack *dn_ack; struct mbuf *m; uint32_t rcv_ackno; m = *mb; /* Get Ack. */ if (m->m_len < sizeof(*dn_ack)) { m = m_pullup(m, sizeof(*dn_ack)); *mb = m; if (m == NULL) { DNETDEBUG("m_pullup failed\n"); return; } } dn_ack = mtod(m, const void *); /* Debugnet processing. */ /* * Packet is meant for us. Extract the ack sequence number and the * port number if necessary. */ rcv_ackno = ntohl(dn_ack->da_seqno); if (pcb->dp_state < DN_STATE_GOT_HERALD_PORT) { pcb->dp_server_port = sport; pcb->dp_state = DN_STATE_GOT_HERALD_PORT; } if (rcv_ackno >= pcb->dp_seqno + DEBUGNET_MAX_IN_FLIGHT) printf("%s: ACK %u too far in future!\n", __func__, rcv_ackno); else if (rcv_ackno >= pcb->dp_seqno) { /* We're interested in this ack. Record it. */ pcb->dp_rcvd_acks |= 1 << (rcv_ackno - pcb->dp_seqno); } } void debugnet_handle_udp(struct debugnet_pcb *pcb, struct mbuf **mb) { const struct udphdr *udp; struct mbuf *m; uint16_t sport, ulen; /* UDP processing. */ m = *mb; if (m->m_pkthdr.len < sizeof(*udp)) { DNETDEBUG("ignoring small UDP packet\n"); return; } /* Get UDP headers. */ if (m->m_len < sizeof(*udp)) { m = m_pullup(m, sizeof(*udp)); *mb = m; if (m == NULL) { DNETDEBUG("m_pullup failed\n"); return; } } udp = mtod(m, const void *); /* We expect to receive UDP packets on the configured client port. */ if (ntohs(udp->uh_dport) != pcb->dp_client_port) { DNETDEBUG("not on the expected port.\n"); return; } /* Check that ulen does not exceed actual size of data. */ ulen = ntohs(udp->uh_ulen); if (m->m_pkthdr.len < ulen) { DNETDEBUG("ignoring runt UDP packet\n"); return; } sport = ntohs(udp->uh_sport); m_adj(m, sizeof(*udp)); ulen -= sizeof(*udp); if (ulen == sizeof(struct debugnet_ack)) { debugnet_handle_ack(pcb, mb, sport); return; } if (pcb->dp_rx_handler == NULL) { if (ulen < sizeof(struct debugnet_ack)) DNETDEBUG("ignoring small ACK packet\n"); else DNETDEBUG("ignoring unexpected non-ACK packet on " "half-duplex connection.\n"); return; } debugnet_handle_rx_msg(pcb, mb); } /* * Handler for incoming packets directly from the network adapter * Identifies the packet type (IP or ARP) and passes it along to one of the * helper functions debugnet_handle_ip or debugnet_handle_arp. * * It needs to partially replicate the behaviour of ether_input() and * ether_demux(). * * Parameters: * ifp the interface the packet came from * m an mbuf containing the packet received */ static void debugnet_input_one(struct ifnet *ifp, struct mbuf *m) { struct ifreq ifr; struct ether_header *eh; u_short etype; /* Ethernet processing. */ if ((m->m_flags & M_PKTHDR) == 0) { DNETDEBUG_IF(ifp, "discard frame without packet header\n"); goto done; } if (m->m_len < ETHER_HDR_LEN) { DNETDEBUG_IF(ifp, "discard frame without leading eth header (len %d pktlen %d)\n", m->m_len, m->m_pkthdr.len); goto done; } - if ((m->m_flags & M_HASFCS) != 0) { - m_adj(m, -ETHER_CRC_LEN); - m->m_flags &= ~M_HASFCS; - } eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) { DNETDEBUG_IF(ifp, "ignoring vlan packets\n"); goto done; } if (if_gethwaddr(ifp, &ifr) != 0) { DNETDEBUG_IF(ifp, "failed to get hw addr for interface\n"); goto done; } if (memcmp(ifr.ifr_addr.sa_data, eh->ether_dhost, ETHER_ADDR_LEN) != 0 && (etype != ETHERTYPE_ARP || !ETHER_IS_BROADCAST(eh->ether_dhost))) { DNETDEBUG_IF(ifp, "discard frame with incorrect destination addr\n"); goto done; } MPASS(g_debugnet_pcb_inuse); /* Done ethernet processing. Strip off the ethernet header. */ m_adj(m, ETHER_HDR_LEN); switch (etype) { case ETHERTYPE_ARP: debugnet_handle_arp(&g_dnet_pcb, &m); break; case ETHERTYPE_IP: debugnet_handle_ip(&g_dnet_pcb, &m); break; default: DNETDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype); break; } done: if (m != NULL) m_freem(m); } static void debugnet_input(struct ifnet *ifp, struct mbuf *m) { struct mbuf *n; do { n = m->m_nextpkt; m->m_nextpkt = NULL; debugnet_input_one(ifp, m); m = n; } while (m != NULL); } /* * Network polling primitive. * * Instead of assuming that most of the network stack is sane, we just poll the * driver directly for packets. */ void debugnet_network_poll(struct debugnet_pcb *pcb) { struct ifnet *ifp; ifp = pcb->dp_ifp; ifp->if_debugnet_methods->dn_poll(ifp, 1000); } /* * Start of consumer API surface. */ void debugnet_free(struct debugnet_pcb *pcb) { struct ifnet *ifp; MPASS(pcb == &g_dnet_pcb); MPASS(pcb->dp_drv_input == NULL || g_debugnet_pcb_inuse); ifp = pcb->dp_ifp; if (ifp != NULL) { if (pcb->dp_drv_input != NULL) ifp->if_input = pcb->dp_drv_input; if (pcb->dp_event_started) ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_END); } debugnet_mbuf_finish(); g_debugnet_pcb_inuse = false; memset(&g_dnet_pcb, 0xfd, sizeof(g_dnet_pcb)); } int debugnet_connect(const struct debugnet_conn_params *dcp, struct debugnet_pcb **pcb_out) { struct debugnet_proto_aux herald_auxdata; struct debugnet_pcb *pcb; struct ifnet *ifp; int error; if (g_debugnet_pcb_inuse) { printf("%s: Only one connection at a time.\n", __func__); return (EBUSY); } pcb = &g_dnet_pcb; *pcb = (struct debugnet_pcb) { .dp_state = DN_STATE_INIT, .dp_client = dcp->dc_client, .dp_server = dcp->dc_server, .dp_gateway = dcp->dc_gateway, .dp_server_port = dcp->dc_herald_port, /* Initially */ .dp_client_port = dcp->dc_client_port, .dp_seqno = 1, .dp_ifp = dcp->dc_ifp, .dp_rx_handler = dcp->dc_rx_handler, .dp_drv_input = NULL, }; /* Switch to the debugnet mbuf zones. */ debugnet_mbuf_start(); /* At least one needed parameter is missing; infer it. */ if (pcb->dp_client == INADDR_ANY || pcb->dp_gateway == INADDR_ANY || pcb->dp_ifp == NULL) { struct sockaddr_in dest_sin, *gw_sin, *local_sin; struct ifnet *rt_ifp; struct nhop_object *nh; memset(&dest_sin, 0, sizeof(dest_sin)); dest_sin = (struct sockaddr_in) { .sin_len = sizeof(dest_sin), .sin_family = AF_INET, .sin_addr.s_addr = pcb->dp_server, }; CURVNET_SET(vnet0); nh = fib4_lookup_debugnet(debugnet_fib, dest_sin.sin_addr, 0, NHR_NONE); CURVNET_RESTORE(); if (nh == NULL) { printf("%s: Could not get route for that server.\n", __func__); error = ENOENT; goto cleanup; } /* TODO support AF_INET6 */ if (nh->gw_sa.sa_family == AF_INET) gw_sin = &nh->gw4_sa; else { if (nh->gw_sa.sa_family == AF_LINK) DNETDEBUG("Destination address is on link.\n"); gw_sin = NULL; } MPASS(nh->nh_ifa->ifa_addr->sa_family == AF_INET); local_sin = (struct sockaddr_in *)nh->nh_ifa->ifa_addr; rt_ifp = nh->nh_ifp; if (pcb->dp_client == INADDR_ANY) pcb->dp_client = local_sin->sin_addr.s_addr; if (pcb->dp_gateway == INADDR_ANY && gw_sin != NULL) pcb->dp_gateway = gw_sin->sin_addr.s_addr; if (pcb->dp_ifp == NULL) pcb->dp_ifp = rt_ifp; } ifp = pcb->dp_ifp; if (debugnet_debug > 0) { char serbuf[INET_ADDRSTRLEN], clibuf[INET_ADDRSTRLEN], gwbuf[INET_ADDRSTRLEN]; inet_ntop(AF_INET, &pcb->dp_server, serbuf, sizeof(serbuf)); inet_ntop(AF_INET, &pcb->dp_client, clibuf, sizeof(clibuf)); if (pcb->dp_gateway != INADDR_ANY) inet_ntop(AF_INET, &pcb->dp_gateway, gwbuf, sizeof(gwbuf)); DNETDEBUG("Connecting to %s:%d%s%s from %s:%d on %s\n", serbuf, pcb->dp_server_port, (pcb->dp_gateway == INADDR_ANY) ? "" : " via ", (pcb->dp_gateway == INADDR_ANY) ? "" : gwbuf, clibuf, pcb->dp_client_port, if_name(ifp)); } /* Validate iface is online and supported. */ if (!DEBUGNET_SUPPORTED_NIC(ifp)) { printf("%s: interface '%s' does not support debugnet\n", __func__, if_name(ifp)); error = ENODEV; goto cleanup; } if ((if_getflags(ifp) & IFF_UP) == 0) { printf("%s: interface '%s' link is down\n", __func__, if_name(ifp)); error = ENXIO; goto cleanup; } ifp->if_debugnet_methods->dn_event(ifp, DEBUGNET_START); pcb->dp_event_started = true; /* * We maintain the invariant that g_debugnet_pcb_inuse is always true * while the debugnet ifp's if_input is overridden with * debugnet_input(). */ g_debugnet_pcb_inuse = true; /* Make the card use *our* receive callback. */ pcb->dp_drv_input = ifp->if_input; ifp->if_input = debugnet_input; printf("%s: searching for %s MAC...\n", __func__, (dcp->dc_gateway == INADDR_ANY) ? "server" : "gateway"); error = debugnet_arp_gw(pcb); if (error != 0) { printf("%s: failed to locate MAC address\n", __func__); goto cleanup; } MPASS(pcb->dp_state == DN_STATE_HAVE_GW_MAC); herald_auxdata = (struct debugnet_proto_aux) { .dp_offset_start = dcp->dc_herald_offset, .dp_aux2 = dcp->dc_herald_aux2, }; error = debugnet_send(pcb, DEBUGNET_HERALD, dcp->dc_herald_data, dcp->dc_herald_datalen, &herald_auxdata); if (error != 0) { printf("%s: failed to herald debugnet server\n", __func__); goto cleanup; } *pcb_out = pcb; return (0); cleanup: debugnet_free(pcb); return (error); } /* * Pre-allocated dump-time mbuf tracking. * * We just track the high water mark we've ever seen and allocate appropriately * for that iface/mtu combo. */ static struct { int nmbuf; int ncl; int clsize; } dn_hwm; static struct mtx dn_hwm_lk; MTX_SYSINIT(debugnet_hwm_lock, &dn_hwm_lk, "Debugnet HWM lock", MTX_DEF); static void dn_maybe_reinit_mbufs(int nmbuf, int ncl, int clsize) { bool any; any = false; mtx_lock(&dn_hwm_lk); if (nmbuf > dn_hwm.nmbuf) { any = true; dn_hwm.nmbuf = nmbuf; } else nmbuf = dn_hwm.nmbuf; if (ncl > dn_hwm.ncl) { any = true; dn_hwm.ncl = ncl; } else ncl = dn_hwm.ncl; if (clsize > dn_hwm.clsize) { any = true; dn_hwm.clsize = clsize; } else clsize = dn_hwm.clsize; mtx_unlock(&dn_hwm_lk); if (any) debugnet_mbuf_reinit(nmbuf, ncl, clsize); } void debugnet_any_ifnet_update(struct ifnet *ifp) { int clsize, nmbuf, ncl, nrxr; if (!DEBUGNET_SUPPORTED_NIC(ifp)) return; ifp->if_debugnet_methods->dn_init(ifp, &nrxr, &ncl, &clsize); KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr)); /* * We need two headers per message on the transmit side. Multiply by * four to give us some breathing room. */ nmbuf = ncl * (4 + nrxr); ncl *= nrxr; /* * Bandaid for drivers that (incorrectly) advertise LinkUp before their * dn_init method is available. */ if (nmbuf == 0 || ncl == 0 || clsize == 0) { #ifndef INVARIANTS if (bootverbose) #endif printf("%s: Bad dn_init result from %s (ifp %p), ignoring.\n", __func__, if_name(ifp), ifp); return; } dn_maybe_reinit_mbufs(nmbuf, ncl, clsize); } /* * Unfortunately, the ifnet_arrival_event eventhandler hook is mostly useless * for us because drivers tend to if_attach before invoking DEBUGNET_SET(). * * On the other hand, hooking DEBUGNET_SET() itself may still be too early, * because the driver is still in attach. Since we cannot use down interfaces, * maybe hooking ifnet_event:IFNET_EVENT_UP is sufficient? ... Nope, at least * with vtnet and dhcpclient that event just never occurs. * * So that's how I've landed on the lower level ifnet_link_event. */ static void dn_ifnet_event(void *arg __unused, struct ifnet *ifp, int link_state) { if (link_state == LINK_STATE_UP) debugnet_any_ifnet_update(ifp); } static eventhandler_tag dn_attach_cookie; static void dn_evh_init(void *ctx __unused) { dn_attach_cookie = EVENTHANDLER_REGISTER(ifnet_link_event, dn_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); } SYSINIT(dn_evh_init, SI_SUB_EVENTHANDLER + 1, SI_ORDER_ANY, dn_evh_init, NULL); /* * DDB parsing helpers for debugnet(4) consumers. */ #ifdef DDB struct my_inet_opt { bool has_opt; const char *printname; in_addr_t *result; }; static int dn_parse_optarg_ipv4(struct my_inet_opt *opt) { in_addr_t tmp; unsigned octet; int t; tmp = 0; for (octet = 0; octet < 4; octet++) { t = db_read_token_flags(DRT_WSPACE | DRT_DECIMAL); if (t != tNUMBER) { db_printf("%s:%s: octet %u expected number; found %d\n", __func__, opt->printname, octet, t); return (EINVAL); } /* * db_lex lexes '-' distinctly from the number itself, but * let's document that invariant. */ MPASS(db_tok_number >= 0); if (db_tok_number > UINT8_MAX) { db_printf("%s:%s: octet %u out of range: %jd\n", __func__, opt->printname, octet, (intmax_t)db_tok_number); return (EDOM); } /* Constructed host-endian and converted to network later. */ tmp = (tmp << 8) | db_tok_number; if (octet < 3) { t = db_read_token_flags(DRT_WSPACE); if (t != tDOT) { db_printf("%s:%s: octet %u expected '.'; found" " %d\n", __func__, opt->printname, octet, t); return (EINVAL); } } } *opt->result = htonl(tmp); opt->has_opt = true; return (0); } int debugnet_parse_ddb_cmd(const char *cmd, struct debugnet_ddb_config *result) { struct ifnet *ifp; int t, error; bool want_ifp; char ch; struct my_inet_opt opt_client = { .printname = "client", .result = &result->dd_client, }, opt_server = { .printname = "server", .result = &result->dd_server, }, opt_gateway = { .printname = "gateway", .result = &result->dd_gateway, }, *cur_inet_opt; ifp = NULL; memset(result, 0, sizeof(*result)); /* * command [space] [-] [opt] [[space] [optarg]] ... * * db_command has already lexed 'command' for us. */ t = db_read_token_flags(DRT_WSPACE); if (t == tWSPACE) t = db_read_token_flags(DRT_WSPACE); while (t != tEOL) { if (t != tMINUS) { db_printf("%s: Bad syntax; expected '-', got %d\n", cmd, t); goto usage; } t = db_read_token_flags(DRT_WSPACE); if (t != tIDENT) { db_printf("%s: Bad syntax; expected tIDENT, got %d\n", cmd, t); goto usage; } if (strlen(db_tok_string) > 1) { db_printf("%s: Bad syntax; expected single option " "flag, got '%s'\n", cmd, db_tok_string); goto usage; } want_ifp = false; cur_inet_opt = NULL; switch ((ch = db_tok_string[0])) { default: DNETDEBUG("Unexpected: '%c'\n", ch); /* FALLTHROUGH */ case 'h': goto usage; case 'c': cur_inet_opt = &opt_client; break; case 'g': cur_inet_opt = &opt_gateway; break; case 's': cur_inet_opt = &opt_server; break; case 'i': want_ifp = true; break; } t = db_read_token_flags(DRT_WSPACE); if (t != tWSPACE) { db_printf("%s: Bad syntax; expected space after " "flag %c, got %d\n", cmd, ch, t); goto usage; } if (want_ifp) { t = db_read_token_flags(DRT_WSPACE); if (t != tIDENT) { db_printf("%s: Expected interface but got %d\n", cmd, t); goto usage; } CURVNET_SET(vnet0); /* * We *don't* take a ref here because the only current * consumer, db_netdump_cmd, does not need it. It * (somewhat redundantly) extracts the if_name(), * re-lookups the ifp, and takes its own reference. */ ifp = ifunit(db_tok_string); CURVNET_RESTORE(); if (ifp == NULL) { db_printf("Could not locate interface %s\n", db_tok_string); error = ENOENT; goto cleanup; } } else { MPASS(cur_inet_opt != NULL); /* Assume IPv4 for now. */ error = dn_parse_optarg_ipv4(cur_inet_opt); if (error != 0) goto cleanup; } /* Skip (mandatory) whitespace after option, if not EOL. */ t = db_read_token_flags(DRT_WSPACE); if (t == tEOL) break; if (t != tWSPACE) { db_printf("%s: Bad syntax; expected space after " "flag %c option; got %d\n", cmd, ch, t); goto usage; } t = db_read_token_flags(DRT_WSPACE); } if (!opt_server.has_opt) { db_printf("%s: need a destination server address\n", cmd); goto usage; } result->dd_has_client = opt_client.has_opt; result->dd_has_gateway = opt_gateway.has_opt; result->dd_ifp = ifp; /* We parsed the full line to tEOL already, or bailed with an error. */ return (0); usage: db_printf("Usage: %s -s [-g -c " "-i ]\n", cmd); error = EINVAL; /* FALLTHROUGH */ cleanup: db_skip_to_eol(); return (error); } #endif /* DDB */ diff --git a/sys/net/ethernet.h b/sys/net/ethernet.h index e7313e78c5bb..cf92145eea8f 100644 --- a/sys/net/ethernet.h +++ b/sys/net/ethernet.h @@ -1,482 +1,481 @@ /* * Fundamental constants relating to ethernet. * */ #ifndef _NET_ETHERNET_H_ #define _NET_ETHERNET_H_ #include /* * Some basic Ethernet constants. */ #define ETHER_ADDR_LEN 6 /* length of an Ethernet address */ #define ETHER_TYPE_LEN 2 /* length of the Ethernet type field */ #define ETHER_CRC_LEN 4 /* length of the Ethernet CRC */ #define ETHER_HDR_LEN (ETHER_ADDR_LEN*2+ETHER_TYPE_LEN) #define ETHER_MIN_LEN 64 /* minimum frame len, including CRC */ #define ETHER_MAX_LEN 1518 /* maximum frame len, including CRC */ #define ETHER_MAX_LEN_JUMBO 9018 /* max jumbo frame len, including CRC */ #define ETHER_VLAN_ENCAP_LEN 4 /* len of 802.1Q VLAN encapsulation */ /* * Mbuf adjust factor to force 32-bit alignment of IP header. * Drivers should do m_adj(m, ETHER_ALIGN) when setting up a * receive so the upper layers get the IP header properly aligned * past the 14-byte Ethernet header. */ #define ETHER_ALIGN 2 /* driver adjust for IP hdr alignment */ /* * Compute the maximum frame size based on ethertype (i.e. possible * encapsulation) and whether or not an FCS is present. */ #define ETHER_MAX_FRAME(ifp, etype, hasfcs) \ (if_getmtu(ifp) + ETHER_HDR_LEN + \ ((hasfcs) ? ETHER_CRC_LEN : 0) + \ (((etype) == ETHERTYPE_VLAN) ? ETHER_VLAN_ENCAP_LEN : 0)) /* * Ethernet-specific mbuf flags. */ -#define M_HASFCS M_PROTO5 /* FCS included at end of frame */ #define M_BRIDGE_INJECT M_PROTO6 /* if_bridge-injected frame */ /* * Ethernet CRC32 polynomials (big- and little-endian versions). */ #define ETHER_CRC_POLY_LE 0xedb88320 #define ETHER_CRC_POLY_BE 0x04c11db6 /* * A macro to validate a length with */ #define ETHER_IS_VALID_LEN(foo) \ ((foo) >= ETHER_MIN_LEN && (foo) <= ETHER_MAX_LEN) /* * Structure of a 10Mb/s Ethernet header. */ struct ether_header { u_char ether_dhost[ETHER_ADDR_LEN]; u_char ether_shost[ETHER_ADDR_LEN]; u_short ether_type; } __packed; /* * Structure of a 48-bit Ethernet address. */ struct ether_addr { u_char octet[ETHER_ADDR_LEN]; } __packed; #define ETHER_IS_MULTICAST(addr) (*(addr) & 0x01) /* is address mcast/bcast? */ #define ETHER_IS_IPV6_MULTICAST(addr) \ (((addr)[0] == 0x33) && ((addr)[1] == 0x33)) #define ETHER_IS_BROADCAST(addr) \ (((addr)[0] & (addr)[1] & (addr)[2] & \ (addr)[3] & (addr)[4] & (addr)[5]) == 0xff) #define ETHER_IS_ZERO(addr) \ (((addr)[0] | (addr)[1] | (addr)[2] | \ (addr)[3] | (addr)[4] | (addr)[5]) == 0x00) /* * 802.1q Virtual LAN header. */ struct ether_vlan_header { uint8_t evl_dhost[ETHER_ADDR_LEN]; uint8_t evl_shost[ETHER_ADDR_LEN]; uint16_t evl_encap_proto; uint16_t evl_tag; uint16_t evl_proto; } __packed; #define EVL_VLID_MASK 0x0FFF #define EVL_PRI_MASK 0xE000 #define EVL_VLANOFTAG(tag) ((tag) & EVL_VLID_MASK) #define EVL_PRIOFTAG(tag) (((tag) >> 13) & 7) #define EVL_CFIOFTAG(tag) (((tag) >> 12) & 1) #define EVL_MAKETAG(vlid, pri, cfi) \ ((((((pri) & 7) << 1) | ((cfi) & 1)) << 12) | ((vlid) & EVL_VLID_MASK)) /* * Ethernet protocol types. * * A public list is available from the IEEE Registration Authority: * https://standards.ieee.org/products-services/regauth/ * * NOTE: 0x0000-0x05DC (0..1500) are generally IEEE 802.3 length fields. * However, there are some conflicts. */ #define ETHERTYPE_8023 0x0004 /* IEEE 802.3 packet */ /* 0x0101 .. 0x1FF Experimental */ #define ETHERTYPE_PUP 0x0200 /* Xerox PUP protocol - see 0A00 */ #define ETHERTYPE_PUPAT 0x0200 /* PUP Address Translation - see 0A01 */ #define ETHERTYPE_SPRITE 0x0500 /* ??? */ /* 0x0400 Nixdorf */ #define ETHERTYPE_NS 0x0600 /* XNS */ #define ETHERTYPE_NSAT 0x0601 /* XNS Address Translation (3Mb only) */ #define ETHERTYPE_DLOG1 0x0660 /* DLOG (?) */ #define ETHERTYPE_DLOG2 0x0661 /* DLOG (?) */ #define ETHERTYPE_IP 0x0800 /* IP protocol */ #define ETHERTYPE_X75 0x0801 /* X.75 Internet */ #define ETHERTYPE_NBS 0x0802 /* NBS Internet */ #define ETHERTYPE_ECMA 0x0803 /* ECMA Internet */ #define ETHERTYPE_CHAOS 0x0804 /* CHAOSnet */ #define ETHERTYPE_X25 0x0805 /* X.25 Level 3 */ #define ETHERTYPE_ARP 0x0806 /* Address resolution protocol */ #define ETHERTYPE_NSCOMPAT 0x0807 /* XNS Compatibility */ #define ETHERTYPE_FRARP 0x0808 /* Frame Relay ARP (RFC1701) */ /* 0x081C Symbolics Private */ /* 0x0888 - 0x088A Xyplex */ #define ETHERTYPE_UBDEBUG 0x0900 /* Ungermann-Bass network debugger */ #define ETHERTYPE_IEEEPUP 0x0A00 /* Xerox IEEE802.3 PUP */ #define ETHERTYPE_IEEEPUPAT 0x0A01 /* Xerox IEEE802.3 PUP Address Translation */ #define ETHERTYPE_VINES 0x0BAD /* Banyan VINES */ #define ETHERTYPE_VINESLOOP 0x0BAE /* Banyan VINES Loopback */ #define ETHERTYPE_VINESECHO 0x0BAF /* Banyan VINES Echo */ /* 0x1000 - 0x100F Berkeley Trailer */ /* * The ETHERTYPE_NTRAILER packet types starting at ETHERTYPE_TRAIL have * (type-ETHERTYPE_TRAIL)*512 bytes of data followed * by an ETHER type (as given above) and then the (variable-length) header. */ #define ETHERTYPE_TRAIL 0x1000 /* Trailer packet */ #define ETHERTYPE_NTRAILER 16 #define ETHERTYPE_DCA 0x1234 /* DCA - Multicast */ #define ETHERTYPE_VALID 0x1600 /* VALID system protocol */ #define ETHERTYPE_DOGFIGHT 0x1989 /* Artificial Horizons ("Aviator" dogfight simulator [on Sun]) */ #define ETHERTYPE_RCL 0x1995 /* Datapoint Corporation (RCL lan protocol) */ /* The following 3C0x types are unregistered: */ #define ETHERTYPE_NBPVCD 0x3C00 /* 3Com NBP virtual circuit datagram (like XNS SPP) not registered */ #define ETHERTYPE_NBPSCD 0x3C01 /* 3Com NBP System control datagram not registered */ #define ETHERTYPE_NBPCREQ 0x3C02 /* 3Com NBP Connect request (virtual cct) not registered */ #define ETHERTYPE_NBPCRSP 0x3C03 /* 3Com NBP Connect response not registered */ #define ETHERTYPE_NBPCC 0x3C04 /* 3Com NBP Connect complete not registered */ #define ETHERTYPE_NBPCLREQ 0x3C05 /* 3Com NBP Close request (virtual cct) not registered */ #define ETHERTYPE_NBPCLRSP 0x3C06 /* 3Com NBP Close response not registered */ #define ETHERTYPE_NBPDG 0x3C07 /* 3Com NBP Datagram (like XNS IDP) not registered */ #define ETHERTYPE_NBPDGB 0x3C08 /* 3Com NBP Datagram broadcast not registered */ #define ETHERTYPE_NBPCLAIM 0x3C09 /* 3Com NBP Claim NetBIOS name not registered */ #define ETHERTYPE_NBPDLTE 0x3C0A /* 3Com NBP Delete NetBIOS name not registered */ #define ETHERTYPE_NBPRAS 0x3C0B /* 3Com NBP Remote adaptor status request not registered */ #define ETHERTYPE_NBPRAR 0x3C0C /* 3Com NBP Remote adaptor response not registered */ #define ETHERTYPE_NBPRST 0x3C0D /* 3Com NBP Reset not registered */ #define ETHERTYPE_PCS 0x4242 /* PCS Basic Block Protocol */ #define ETHERTYPE_IMLBLDIAG 0x424C /* Information Modes Little Big LAN diagnostic */ #define ETHERTYPE_DIDDLE 0x4321 /* THD - Diddle */ #define ETHERTYPE_IMLBL 0x4C42 /* Information Modes Little Big LAN */ #define ETHERTYPE_SIMNET 0x5208 /* BBN Simnet Private */ #define ETHERTYPE_DECEXPER 0x6000 /* DEC Unassigned, experimental */ #define ETHERTYPE_MOPDL 0x6001 /* DEC MOP dump/load */ #define ETHERTYPE_MOPRC 0x6002 /* DEC MOP remote console */ #define ETHERTYPE_DECnet 0x6003 /* DEC DECNET Phase IV route */ #define ETHERTYPE_DN ETHERTYPE_DECnet /* libpcap, tcpdump */ #define ETHERTYPE_LAT 0x6004 /* DEC LAT */ #define ETHERTYPE_DECDIAG 0x6005 /* DEC diagnostic protocol (at interface initialization?) */ #define ETHERTYPE_DECCUST 0x6006 /* DEC customer protocol */ #define ETHERTYPE_SCA 0x6007 /* DEC LAVC, SCA */ #define ETHERTYPE_AMBER 0x6008 /* DEC AMBER */ #define ETHERTYPE_DECMUMPS 0x6009 /* DEC MUMPS */ /* 0x6010 - 0x6014 3Com Corporation */ #define ETHERTYPE_TRANSETHER 0x6558 /* Trans Ether Bridging (RFC1701)*/ #define ETHERTYPE_RAWFR 0x6559 /* Raw Frame Relay (RFC1701) */ #define ETHERTYPE_UBDL 0x7000 /* Ungermann-Bass download */ #define ETHERTYPE_UBNIU 0x7001 /* Ungermann-Bass NIUs */ #define ETHERTYPE_UBDIAGLOOP 0x7002 /* Ungermann-Bass diagnostic/loopback */ #define ETHERTYPE_UBNMC 0x7003 /* Ungermann-Bass ??? (NMC to/from UB Bridge) */ #define ETHERTYPE_UBBST 0x7005 /* Ungermann-Bass Bridge Spanning Tree */ #define ETHERTYPE_OS9 0x7007 /* OS/9 Microware */ #define ETHERTYPE_OS9NET 0x7009 /* OS/9 Net? */ /* 0x7020 - 0x7029 LRT (England) (now Sintrom) */ #define ETHERTYPE_RACAL 0x7030 /* Racal-Interlan */ #define ETHERTYPE_PRIMENTS 0x7031 /* Prime NTS (Network Terminal Service) */ #define ETHERTYPE_CABLETRON 0x7034 /* Cabletron */ #define ETHERTYPE_CRONUSVLN 0x8003 /* Cronus VLN */ #define ETHERTYPE_CRONUS 0x8004 /* Cronus Direct */ #define ETHERTYPE_HP 0x8005 /* HP Probe */ #define ETHERTYPE_NESTAR 0x8006 /* Nestar */ #define ETHERTYPE_ATTSTANFORD 0x8008 /* AT&T/Stanford (local use) */ #define ETHERTYPE_EXCELAN 0x8010 /* Excelan */ #define ETHERTYPE_SG_DIAG 0x8013 /* SGI diagnostic type */ #define ETHERTYPE_SG_NETGAMES 0x8014 /* SGI network games */ #define ETHERTYPE_SG_RESV 0x8015 /* SGI reserved type */ #define ETHERTYPE_SG_BOUNCE 0x8016 /* SGI bounce server */ #define ETHERTYPE_APOLLODOMAIN 0x8019 /* Apollo DOMAIN */ #define ETHERTYPE_TYMSHARE 0x802E /* Tymeshare */ #define ETHERTYPE_TIGAN 0x802F /* Tigan, Inc. */ #define ETHERTYPE_REVARP 0x8035 /* Reverse addr resolution protocol */ #define ETHERTYPE_AEONIC 0x8036 /* Aeonic Systems */ #define ETHERTYPE_IPXNEW 0x8037 /* IPX (Novell Netware?) */ #define ETHERTYPE_LANBRIDGE 0x8038 /* DEC LANBridge */ #define ETHERTYPE_DSMD 0x8039 /* DEC DSM/DDP */ #define ETHERTYPE_ARGONAUT 0x803A /* DEC Argonaut Console */ #define ETHERTYPE_VAXELN 0x803B /* DEC VAXELN */ #define ETHERTYPE_DECDNS 0x803C /* DEC DNS Naming Service */ #define ETHERTYPE_ENCRYPT 0x803D /* DEC Ethernet Encryption */ #define ETHERTYPE_DECDTS 0x803E /* DEC Distributed Time Service */ #define ETHERTYPE_DECLTM 0x803F /* DEC LAN Traffic Monitor */ #define ETHERTYPE_DECNETBIOS 0x8040 /* DEC PATHWORKS DECnet NETBIOS Emulation */ #define ETHERTYPE_DECLAST 0x8041 /* DEC Local Area System Transport */ /* 0x8042 DEC Unassigned */ #define ETHERTYPE_PLANNING 0x8044 /* Planning Research Corp. */ /* 0x8046 - 0x8047 AT&T */ #define ETHERTYPE_DECAM 0x8048 /* DEC Availability Manager for Distributed Systems DECamds (but someone at DEC says not) */ #define ETHERTYPE_EXPERDATA 0x8049 /* ExperData */ #define ETHERTYPE_VEXP 0x805B /* Stanford V Kernel exp. */ #define ETHERTYPE_VPROD 0x805C /* Stanford V Kernel prod. */ #define ETHERTYPE_ES 0x805D /* Evans & Sutherland */ #define ETHERTYPE_LITTLE 0x8060 /* Little Machines */ #define ETHERTYPE_COUNTERPOINT 0x8062 /* Counterpoint Computers */ /* 0x8065 - 0x8066 Univ. of Mass @ Amherst */ #define ETHERTYPE_VEECO 0x8067 /* Veeco Integrated Auto. */ #define ETHERTYPE_GENDYN 0x8068 /* General Dynamics */ #define ETHERTYPE_ATT 0x8069 /* AT&T */ #define ETHERTYPE_AUTOPHON 0x806A /* Autophon */ #define ETHERTYPE_COMDESIGN 0x806C /* ComDesign */ #define ETHERTYPE_COMPUGRAPHIC 0x806D /* Compugraphic Corporation */ /* 0x806E - 0x8077 Landmark Graphics Corp. */ #define ETHERTYPE_MATRA 0x807A /* Matra */ #define ETHERTYPE_DDE 0x807B /* Dansk Data Elektronik */ #define ETHERTYPE_MERIT 0x807C /* Merit Internodal (or Univ of Michigan?) */ /* 0x807D - 0x807F Vitalink Communications */ #define ETHERTYPE_VLTLMAN 0x8080 /* Vitalink TransLAN III Management */ /* 0x8081 - 0x8083 Counterpoint Computers */ /* 0x8088 - 0x808A Xyplex */ #define ETHERTYPE_ATALK 0x809B /* AppleTalk */ #define ETHERTYPE_AT ETHERTYPE_ATALK /* old NetBSD */ #define ETHERTYPE_APPLETALK ETHERTYPE_ATALK /* HP-UX */ /* 0x809C - 0x809E Datability */ #define ETHERTYPE_SPIDER 0x809F /* Spider Systems Ltd. */ /* 0x80A3 Nixdorf */ /* 0x80A4 - 0x80B3 Siemens Gammasonics Inc. */ /* 0x80C0 - 0x80C3 DCA (Digital Comm. Assoc.) Data Exchange Cluster */ /* 0x80C4 - 0x80C5 Banyan Systems */ #define ETHERTYPE_PACER 0x80C6 /* Pacer Software */ #define ETHERTYPE_APPLITEK 0x80C7 /* Applitek Corporation */ /* 0x80C8 - 0x80CC Intergraph Corporation */ /* 0x80CD - 0x80CE Harris Corporation */ /* 0x80CF - 0x80D2 Taylor Instrument */ /* 0x80D3 - 0x80D4 Rosemount Corporation */ #define ETHERTYPE_SNA 0x80D5 /* IBM SNA Services over Ethernet */ #define ETHERTYPE_VARIAN 0x80DD /* Varian Associates */ /* 0x80DE - 0x80DF TRFS (Integrated Solutions Transparent Remote File System) */ /* 0x80E0 - 0x80E3 Allen-Bradley */ /* 0x80E4 - 0x80F0 Datability */ #define ETHERTYPE_RETIX 0x80F2 /* Retix */ #define ETHERTYPE_AARP 0x80F3 /* AppleTalk AARP */ /* 0x80F4 - 0x80F5 Kinetics */ #define ETHERTYPE_APOLLO 0x80F7 /* Apollo Computer */ #define ETHERTYPE_VLAN 0x8100 /* IEEE 802.1Q VLAN tagging (XXX conflicts) */ /* 0x80FF - 0x8101 Wellfleet Communications (XXX conflicts) */ #define ETHERTYPE_BOFL 0x8102 /* Wellfleet; BOFL (Breath OF Life) pkts [every 5-10 secs.] */ #define ETHERTYPE_WELLFLEET 0x8103 /* Wellfleet Communications */ /* 0x8107 - 0x8109 Symbolics Private */ #define ETHERTYPE_TALARIS 0x812B /* Talaris */ #define ETHERTYPE_WATERLOO 0x8130 /* Waterloo Microsystems Inc. (XXX which?) */ #define ETHERTYPE_HAYES 0x8130 /* Hayes Microcomputers (XXX which?) */ #define ETHERTYPE_VGLAB 0x8131 /* VG Laboratory Systems */ /* 0x8132 - 0x8137 Bridge Communications */ #define ETHERTYPE_IPX 0x8137 /* Novell (old) NetWare IPX (ECONFIG E option) */ #define ETHERTYPE_NOVELL 0x8138 /* Novell, Inc. */ /* 0x8139 - 0x813D KTI */ #define ETHERTYPE_MUMPS 0x813F /* M/MUMPS data sharing */ #define ETHERTYPE_AMOEBA 0x8145 /* Vrije Universiteit (NL) Amoeba 4 RPC (obsolete) */ #define ETHERTYPE_FLIP 0x8146 /* Vrije Universiteit (NL) FLIP (Fast Local Internet Protocol) */ #define ETHERTYPE_VURESERVED 0x8147 /* Vrije Universiteit (NL) [reserved] */ #define ETHERTYPE_LOGICRAFT 0x8148 /* Logicraft */ #define ETHERTYPE_NCD 0x8149 /* Network Computing Devices */ #define ETHERTYPE_ALPHA 0x814A /* Alpha Micro */ #define ETHERTYPE_SNMP 0x814C /* SNMP over Ethernet (see RFC1089) */ /* 0x814D - 0x814E BIIN */ #define ETHERTYPE_TEC 0x814F /* Technically Elite Concepts */ #define ETHERTYPE_RATIONAL 0x8150 /* Rational Corp */ /* 0x8151 - 0x8153 Qualcomm */ /* 0x815C - 0x815E Computer Protocol Pty Ltd */ /* 0x8164 - 0x8166 Charles River Data Systems */ #define ETHERTYPE_XTP 0x817D /* Protocol Engines XTP */ #define ETHERTYPE_SGITW 0x817E /* SGI/Time Warner prop. */ #define ETHERTYPE_HIPPI_FP 0x8180 /* HIPPI-FP encapsulation */ #define ETHERTYPE_STP 0x8181 /* Scheduled Transfer STP, HIPPI-ST */ /* 0x8182 - 0x8183 Reserved for HIPPI-6400 */ /* 0x8184 - 0x818C SGI prop. */ #define ETHERTYPE_MOTOROLA 0x818D /* Motorola */ #define ETHERTYPE_NETBEUI 0x8191 /* PowerLAN NetBIOS/NetBEUI (PC) */ /* 0x819A - 0x81A3 RAD Network Devices */ /* 0x81B7 - 0x81B9 Xyplex */ /* 0x81CC - 0x81D5 Apricot Computers */ /* 0x81D6 - 0x81DD Artisoft Lantastic */ /* 0x81E6 - 0x81EF Polygon */ /* 0x81F0 - 0x81F2 Comsat Labs */ /* 0x81F3 - 0x81F5 SAIC */ /* 0x81F6 - 0x81F8 VG Analytical */ /* 0x8203 - 0x8205 QNX Software Systems Ltd. */ /* 0x8221 - 0x8222 Ascom Banking Systems */ /* 0x823E - 0x8240 Advanced Encryption Systems */ /* 0x8263 - 0x826A Charles River Data Systems */ /* 0x827F - 0x8282 Athena Programming */ /* 0x829A - 0x829B Inst Ind Info Tech */ /* 0x829C - 0x82AB Taurus Controls */ /* 0x82AC - 0x8693 Walker Richer & Quinn */ #define ETHERTYPE_ACCTON 0x8390 /* Accton Technologies (unregistered) */ #define ETHERTYPE_TALARISMC 0x852B /* Talaris multicast */ #define ETHERTYPE_KALPANA 0x8582 /* Kalpana */ /* 0x8694 - 0x869D Idea Courier */ /* 0x869E - 0x86A1 Computer Network Tech */ /* 0x86A3 - 0x86AC Gateway Communications */ #define ETHERTYPE_SECTRA 0x86DB /* SECTRA */ #define ETHERTYPE_IPV6 0x86DD /* IP protocol version 6 */ #define ETHERTYPE_DELTACON 0x86DE /* Delta Controls */ #define ETHERTYPE_ATOMIC 0x86DF /* ATOMIC */ /* 0x86E0 - 0x86EF Landis & Gyr Powers */ /* 0x8700 - 0x8710 Motorola */ #define ETHERTYPE_RDP 0x8739 /* Control Technology Inc. RDP Without IP */ #define ETHERTYPE_MICP 0x873A /* Control Technology Inc. Mcast Industrial Ctrl Proto. */ /* 0x873B - 0x873C Control Technology Inc. Proprietary */ #define ETHERTYPE_TCPCOMP 0x876B /* TCP/IP Compression (RFC1701) */ #define ETHERTYPE_IPAS 0x876C /* IP Autonomous Systems (RFC1701) */ #define ETHERTYPE_SECUREDATA 0x876D /* Secure Data (RFC1701) */ #define ETHERTYPE_FLOWCONTROL 0x8808 /* 802.3x flow control packet */ #define ETHERTYPE_SLOW 0x8809 /* 802.3ad link aggregation (LACP) */ #define ETHERTYPE_PPP 0x880B /* PPP (obsolete by PPPoE) */ #define ETHERTYPE_HITACHI 0x8820 /* Hitachi Cable (Optoelectronic Systems Laboratory) */ #define ETHERTYPE_TEST 0x8822 /* Network Conformance Testing */ #define ETHERTYPE_MPLS 0x8847 /* MPLS Unicast */ #define ETHERTYPE_MPLS_MCAST 0x8848 /* MPLS Multicast */ #define ETHERTYPE_AXIS 0x8856 /* Axis Communications AB proprietary bootstrap/config */ #define ETHERTYPE_PPPOEDISC 0x8863 /* PPP Over Ethernet Discovery Stage */ #define ETHERTYPE_PPPOE 0x8864 /* PPP Over Ethernet Session Stage */ #define ETHERTYPE_LANPROBE 0x8888 /* HP LanProbe test? */ #define ETHERTYPE_PAE 0x888E /* EAPOL PAE/802.1x */ #define ETHERTYPE_PROFINET 0x8892 /* PROFINET RT Protocol */ #define ETHERTYPE_AOE 0x88A2 /* ATA Protocol */ #define ETHERTYPE_ETHERCAT 0x88A4 /* EtherCat Protocol */ #define ETHERTYPE_QINQ 0x88A8 /* 802.1ad VLAN stacking */ #define ETHERTYPE_POWERLINK 0x88AB /* Ethernet Powerlink (EPL) */ #define ETHERTYPE_LLDP 0x88CC /* Link Layer Discovery Protocol */ #define ETHERTYPE_SERCOS 0x88CD /* SERCOS III Protocol */ #define ETHERTYPE_MACSEC 0x88E5 /* 802.1AE MAC Security */ #define ETHERTYPE_PBB 0x88E7 /* 802.1Q Provider Backbone Bridges */ #define ETHERTYPE_FCOE 0x8906 /* Fibre Channel over Ethernet */ #define ETHERTYPE_LOOPBACK 0x9000 /* Loopback: used to test interfaces */ #define ETHERTYPE_8021Q9100 0x9100 /* IEEE 802.1Q stacking (proprietary) */ #define ETHERTYPE_LBACK ETHERTYPE_LOOPBACK /* DEC MOP loopback */ #define ETHERTYPE_XNSSM 0x9001 /* 3Com (Formerly Bridge Communications), XNS Systems Management */ #define ETHERTYPE_TCPSM 0x9002 /* 3Com (Formerly Bridge Communications), TCP/IP Systems Management */ #define ETHERTYPE_BCLOOP 0x9003 /* 3Com (Formerly Bridge Communications), loopback detection */ #define ETHERTYPE_8021Q9200 0x9200 /* IEEE 802.1Q stacking (proprietary) */ #define ETHERTYPE_8021Q9300 0x9300 /* IEEE 802.1Q stacking (proprietary) */ #define ETHERTYPE_DEBNI 0xAAAA /* DECNET? Used by VAX 6220 DEBNI */ #define ETHERTYPE_SONIX 0xFAF5 /* Sonix Arpeggio */ #define ETHERTYPE_VITAL 0xFF00 /* BBN VITAL-LanBridge cache wakeups */ /* 0xFF00 - 0xFFOF ISC Bunker Ramo */ #define ETHERTYPE_MAX 0xFFFF /* Maximum valid ethernet type, reserved */ /* * The ETHERTYPE_NTRAILER packet types starting at ETHERTYPE_TRAIL have * (type-ETHERTYPE_TRAIL)*512 bytes of data followed * by an ETHER type (as given above) and then the (variable-length) header. */ #define ETHERTYPE_TRAIL 0x1000 /* Trailer packet */ #define ETHERTYPE_NTRAILER 16 #define ETHERMTU (ETHER_MAX_LEN-ETHER_HDR_LEN-ETHER_CRC_LEN) #define ETHERMIN (ETHER_MIN_LEN-ETHER_HDR_LEN-ETHER_CRC_LEN) #define ETHERMTU_JUMBO (ETHER_MAX_LEN_JUMBO - ETHER_HDR_LEN - ETHER_CRC_LEN) /* * The ETHER_BPF_MTAP macro should be used by drivers which support hardware * offload for VLAN tag processing. It will check the mbuf to see if it has * M_VLANTAG set, and if it does, will pass the packet along to * ether_vlan_mtap. This function will re-insert VLAN tags for the duration * of the tap, so they show up properly for network analyzers. */ struct ifnet; struct mbuf; void ether_bpf_mtap_if(struct ifnet *ifp, struct mbuf *m); #define ETHER_BPF_MTAP(_ifp, _m) ether_bpf_mtap_if((_ifp), (_m)) /* * Names for 802.1q priorities ("802.1p"). Notice that in this scheme, * (0 < 1), allowing default 0-tagged traffic to take priority over background * tagged traffic. */ #define IEEE8021Q_PCP_BK 1 /* Background (lowest) */ #define IEEE8021Q_PCP_BE 0 /* Best effort (default) */ #define IEEE8021Q_PCP_EE 2 /* Excellent effort */ #define IEEE8021Q_PCP_CA 3 /* Critical applications */ #define IEEE8021Q_PCP_VI 4 /* Video, < 100ms latency and jitter */ #define IEEE8021Q_PCP_VO 5 /* Voice, < 10ms latency and jitter */ #define IEEE8021Q_PCP_IC 6 /* Internetwork control */ #define IEEE8021Q_PCP_NC 7 /* Network control (highest) */ #ifdef _KERNEL #include struct ifnet; struct mbuf; struct route; struct sockaddr; struct bpf_if; struct ether_8021q_tag; extern uint32_t ether_crc32_le(const uint8_t *, size_t); extern uint32_t ether_crc32_be(const uint8_t *, size_t); extern void ether_demux(struct ifnet *, struct mbuf *); extern void ether_ifattach(struct ifnet *, const u_int8_t *); extern void ether_ifdetach(struct ifnet *); #ifdef VIMAGE struct vnet; extern void ether_reassign(struct ifnet *, struct vnet *, char *); #endif extern int ether_ioctl(struct ifnet *, u_long, caddr_t); extern int ether_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); extern int ether_output_frame(struct ifnet *, struct mbuf *); extern char *ether_sprintf(const u_int8_t *); void ether_vlan_mtap(struct bpf_if *, struct mbuf *, void *, u_int); struct mbuf *ether_vlanencap_proto(struct mbuf *, uint16_t, uint16_t); bool ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, struct ifnet *p, const struct ether_8021q_tag *); void ether_gen_addr(struct ifnet *ifp, struct ether_addr *hwaddr); void ether_gen_addr_byname(const char *nameunit, struct ether_addr *hwaddr); static __inline struct mbuf *ether_vlanencap(struct mbuf *m, uint16_t tag) { return ether_vlanencap_proto(m, tag, ETHERTYPE_VLAN); } /* new ethernet interface attached event */ typedef void (*ether_ifattach_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ether_ifattach_event, ether_ifattach_event_handler_t); #else /* _KERNEL */ #include /* * Ethernet address conversion/parsing routines. */ __BEGIN_DECLS struct ether_addr *ether_aton(const char *); struct ether_addr *ether_aton_r(const char *, struct ether_addr *); int ether_hostton(const char *, struct ether_addr *); int ether_line(const char *, struct ether_addr *, char *); char *ether_ntoa(const struct ether_addr *); char *ether_ntoa_r(const struct ether_addr *, char *); int ether_ntohost(char *, const struct ether_addr *); __END_DECLS #endif /* !_KERNEL */ #endif /* !_NET_ETHERNET_H_ */ diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index eeb2c1ea4ef3..6cd5cefa9609 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -1,1545 +1,1535 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_netgraph.h" #include "opt_mbuf_profiling.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KDB #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #include #include #endif #ifdef INET6 #include #endif #include #include #ifdef CTASSERT CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2); CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN); #endif VNET_DEFINE(pfil_head_t, link_pfil_head); /* Packet filter hooks */ /* netgraph node hooks for ng_ether(4) */ void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp); void (*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m); int (*ng_ether_output_p)(struct ifnet *ifp, struct mbuf **mp); void (*ng_ether_attach_p)(struct ifnet *ifp); void (*ng_ether_detach_p)(struct ifnet *ifp); void (*vlan_input_p)(struct ifnet *, struct mbuf *); /* if_bridge(4) support */ void (*bridge_dn_p)(struct mbuf *, struct ifnet *); /* if_lagg(4) support */ struct mbuf *(*lagg_input_ethernet_p)(struct ifnet *, struct mbuf *); static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static int ether_resolvemulti(struct ifnet *, struct sockaddr **, struct sockaddr *); static int ether_requestencap(struct ifnet *, struct if_encap_req *); static inline bool ether_do_pcp(struct ifnet *, struct mbuf *); #define senderr(e) do { error = (e); goto bad;} while (0) static void update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst) { int csum_flags = 0; if (src->m_pkthdr.csum_flags & CSUM_IP) csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID); if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA) csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR); if (src->m_pkthdr.csum_flags & CSUM_SCTP) csum_flags |= CSUM_SCTP_VALID; dst->m_pkthdr.csum_flags |= csum_flags; if (csum_flags & CSUM_DATA_VALID) dst->m_pkthdr.csum_data = 0xffff; } /* * Handle link-layer encapsulation requests. */ static int ether_requestencap(struct ifnet *ifp, struct if_encap_req *req) { struct ether_header *eh; struct arphdr *ah; uint16_t etype; const u_char *lladdr; if (req->rtype != IFENCAP_LL) return (EOPNOTSUPP); if (req->bufsize < ETHER_HDR_LEN) return (ENOMEM); eh = (struct ether_header *)req->buf; lladdr = req->lladdr; req->lladdr_off = 0; switch (req->family) { case AF_INET: etype = htons(ETHERTYPE_IP); break; case AF_INET6: etype = htons(ETHERTYPE_IPV6); break; case AF_ARP: ah = (struct arphdr *)req->hdata; ah->ar_hrd = htons(ARPHRD_ETHER); switch(ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: etype = htons(ETHERTYPE_REVARP); break; case ARPOP_REQUEST: case ARPOP_REPLY: default: etype = htons(ETHERTYPE_ARP); break; } if (req->flags & IFENCAP_FLAG_BROADCAST) lladdr = ifp->if_broadcastaddr; break; default: return (EAFNOSUPPORT); } memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type)); memcpy(eh->ether_dhost, lladdr, ETHER_ADDR_LEN); memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); req->bufsize = sizeof(struct ether_header); return (0); } static int ether_resolve_addr(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro, u_char *phdr, uint32_t *pflags, struct llentry **plle) { uint32_t lleflags = 0; int error = 0; #if defined(INET) || defined(INET6) struct ether_header *eh = (struct ether_header *)phdr; uint16_t etype; #endif if (plle) *plle = NULL; switch (dst->sa_family) { #ifdef INET case AF_INET: if ((m->m_flags & (M_BCAST | M_MCAST)) == 0) error = arpresolve(ifp, 0, m, dst, phdr, &lleflags, plle); else { if (m->m_flags & M_BCAST) memcpy(eh->ether_dhost, ifp->if_broadcastaddr, ETHER_ADDR_LEN); else { const struct in_addr *a; a = &(((const struct sockaddr_in *)dst)->sin_addr); ETHER_MAP_IP_MULTICAST(a, eh->ether_dhost); } etype = htons(ETHERTYPE_IP); memcpy(&eh->ether_type, &etype, sizeof(etype)); memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); } break; #endif #ifdef INET6 case AF_INET6: if ((m->m_flags & M_MCAST) == 0) { int af = RO_GET_FAMILY(ro, dst); error = nd6_resolve(ifp, LLE_SF(af, 0), m, dst, phdr, &lleflags, plle); } else { const struct in6_addr *a6; a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr); ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost); etype = htons(ETHERTYPE_IPV6); memcpy(&eh->ether_type, &etype, sizeof(etype)); memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN); } break; #endif default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); if (m != NULL) m_freem(m); return (EAFNOSUPPORT); } if (error == EHOSTDOWN) { if (ro != NULL && (ro->ro_flags & RT_HAS_GW) != 0) error = EHOSTUNREACH; } if (error != 0) return (error); *pflags = RT_MAY_LOOP; if (lleflags & LLE_IFADDR) *pflags |= RT_L2_ME; return (0); } /* * Ethernet output routine. * Encapsulate a packet of type family for the local net. * Use trailer local net encapsulation if enough data in first * packet leaves a multiple of 512 bytes of data in remainder. */ int ether_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { int error = 0; char linkhdr[ETHER_HDR_LEN], *phdr; struct ether_header *eh; struct pf_mtag *t; bool loop_copy; int hlen; /* link layer header length */ uint32_t pflags; struct llentry *lle = NULL; int addref = 0; phdr = NULL; pflags = 0; if (ro != NULL) { /* XXX BPF uses ro_prepend */ if (ro->ro_prepend != NULL) { phdr = ro->ro_prepend; hlen = ro->ro_plen; } else if (!(m->m_flags & (M_BCAST | M_MCAST))) { if ((ro->ro_flags & RT_LLE_CACHE) != 0) { lle = ro->ro_lle; if (lle != NULL && (lle->la_flags & LLE_VALID) == 0) { LLE_FREE(lle); lle = NULL; /* redundant */ ro->ro_lle = NULL; } if (lle == NULL) { /* if we lookup, keep cache */ addref = 1; } else /* * Notify LLE code that * the entry was used * by datapath. */ llentry_provide_feedback(lle); } if (lle != NULL) { phdr = lle->r_linkdata; hlen = lle->r_hdrlen; pflags = lle->r_flags; } } } #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) senderr(error); #endif M_PROFILE(m); if (ifp->if_flags & IFF_MONITOR) senderr(ENETDOWN); if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) senderr(ENETDOWN); if (phdr == NULL) { /* No prepend data supplied. Try to calculate ourselves. */ phdr = linkhdr; hlen = ETHER_HDR_LEN; error = ether_resolve_addr(ifp, m, dst, ro, phdr, &pflags, addref ? &lle : NULL); if (addref && lle != NULL) ro->ro_lle = lle; if (error != 0) return (error == EWOULDBLOCK ? 0 : error); } if ((pflags & RT_L2_ME) != 0) { update_mbuf_csumflags(m, m); return (if_simloop(ifp, m, RO_GET_FAMILY(ro, dst), 0)); } loop_copy = (pflags & RT_MAY_LOOP) != 0; /* * Add local net header. If no space in first mbuf, * allocate another. * * Note that we do prepend regardless of RT_HAS_HEADER flag. * This is done because BPF code shifts m_data pointer * to the end of ethernet header prior to calling if_output(). */ M_PREPEND(m, hlen, M_NOWAIT); if (m == NULL) senderr(ENOBUFS); if ((pflags & RT_HAS_HEADER) == 0) { eh = mtod(m, struct ether_header *); memcpy(eh, phdr, hlen); } /* * If a simplex interface, and the packet is being sent to our * Ethernet address or a broadcast address, loopback a copy. * XXX To make a simplex device behave exactly like a duplex * device, we should copy in the case of sending to our own * ethernet address (thus letting the original actually appear * on the wire). However, we don't do that here for security * reasons and compatibility with the original behavior. */ if ((m->m_flags & M_BCAST) && loop_copy && (ifp->if_flags & IFF_SIMPLEX) && ((t = pf_find_mtag(m)) == NULL || !t->routed)) { struct mbuf *n; /* * Because if_simloop() modifies the packet, we need a * writable copy through m_dup() instead of a readonly * one as m_copy[m] would give us. The alternative would * be to modify if_simloop() to handle the readonly mbuf, * but performancewise it is mostly equivalent (trading * extra data copying vs. extra locking). * * XXX This is a local workaround. A number of less * often used kernel parts suffer from the same bug. * See PR kern/105943 for a proposed general solution. */ if ((n = m_dup(m, M_NOWAIT)) != NULL) { update_mbuf_csumflags(m, n); (void)if_simloop(ifp, n, RO_GET_FAMILY(ro, dst), hlen); } else if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); } /* * Bridges require special output handling. */ if (ifp->if_bridge) { BRIDGE_OUTPUT(ifp, m, error); return (error); } #if defined(INET) || defined(INET6) if (ifp->if_carp && (error = (*carp_output_p)(ifp, m, dst))) goto bad; #endif /* Handle ng_ether(4) processing, if any */ if (ifp->if_l2com != NULL) { KASSERT(ng_ether_output_p != NULL, ("ng_ether_output_p is NULL")); if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) { bad: if (m != NULL) m_freem(m); return (error); } if (m == NULL) return (0); } /* Continue with link-layer output */ return ether_output_frame(ifp, m); } static bool ether_set_pcp(struct mbuf **mp, struct ifnet *ifp, uint8_t pcp) { struct ether_8021q_tag qtag; struct ether_header *eh; eh = mtod(*mp, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN) || eh->ether_type == htons(ETHERTYPE_QINQ)) { (*mp)->m_flags &= ~M_VLANTAG; return (true); } qtag.vid = 0; qtag.pcp = pcp; qtag.proto = ETHERTYPE_VLAN; if (ether_8021q_frame(mp, ifp, ifp, &qtag)) return (true); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (false); } /* * Ethernet link layer output routine to send a raw frame to the device. * * This assumes that the 14 byte Ethernet header is present and contiguous * in the first mbuf (if BRIDGE'ing). */ int ether_output_frame(struct ifnet *ifp, struct mbuf *m) { if (ether_do_pcp(ifp, m) && !ether_set_pcp(&m, ifp, ifp->if_pcp)) return (0); if (PFIL_HOOKED_OUT(V_link_pfil_head)) switch (pfil_mbuf_out(V_link_pfil_head, &m, ifp, NULL)) { case PFIL_DROPPED: return (EACCES); case PFIL_CONSUMED: return (0); } #ifdef EXPERIMENTAL #if defined(INET6) && defined(INET) /* draft-ietf-6man-ipv6only-flag */ /* Catch ETHERTYPE_IP, and ETHERTYPE_[REV]ARP if we are v6-only. */ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY_MASK) != 0) { struct ether_header *eh; eh = mtod(m, struct ether_header *); switch (ntohs(eh->ether_type)) { case ETHERTYPE_IP: case ETHERTYPE_ARP: case ETHERTYPE_REVARP: m_freem(m); return (EAFNOSUPPORT); /* NOTREACHED */ break; }; } #endif #endif /* * Queue message on interface, update output statistics if successful, * and start output if interface not yet active. * * If KMSAN is enabled, use it to verify that the data does not contain * any uninitialized bytes. */ kmsan_check_mbuf(m, "ether_output"); return ((ifp->if_transmit)(ifp, m)); } /* * Process a received Ethernet packet; the packet is in the * mbuf chain m with the ethernet header at the front. */ static void ether_input_internal(struct ifnet *ifp, struct mbuf *m) { struct ether_header *eh; u_short etype; if ((ifp->if_flags & IFF_UP) == 0) { m_freem(m); return; } #ifdef DIAGNOSTIC if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { if_printf(ifp, "discard frame at !IFF_DRV_RUNNING\n"); m_freem(m); return; } #endif if (__predict_false(m->m_len < ETHER_HDR_LEN)) { /* Drivers should pullup and ensure the mbuf is valid */ if_printf(ifp, "discard frame w/o leading ethernet " "header (len %d pkt len %d)\n", m->m_len, m->m_pkthdr.len); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return; } eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); random_harvest_queue_ether(m, sizeof(*m)); #ifdef EXPERIMENTAL #if defined(INET6) && defined(INET) /* draft-ietf-6man-ipv6only-flag */ /* Catch ETHERTYPE_IP, and ETHERTYPE_[REV]ARP if we are v6-only. */ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY_MASK) != 0) { switch (etype) { case ETHERTYPE_IP: case ETHERTYPE_ARP: case ETHERTYPE_REVARP: m_freem(m); return; /* NOTREACHED */ break; }; } #endif #endif CURVNET_SET_QUIET(ifp->if_vnet); if (ETHER_IS_MULTICAST(eh->ether_dhost)) { if (ETHER_IS_BROADCAST(eh->ether_dhost)) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); } #ifdef MAC /* * Tag the mbuf with an appropriate MAC label before any other * consumers can get to it. */ mac_ifnet_create_mbuf(ifp, m); #endif /* * Give bpf a chance at the packet. */ ETHER_BPF_MTAP(ifp, m); - /* - * If the CRC is still on the packet, trim it off. We do this once - * and once only in case we are re-entered. Nothing else on the - * Ethernet receive path expects to see the FCS. - */ - if (m->m_flags & M_HASFCS) { - m_adj(m, -ETHER_CRC_LEN); - m->m_flags &= ~M_HASFCS; - } - if (!(ifp->if_capenable & IFCAP_HWSTATS)) if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); /* Allow monitor mode to claim this frame, after stats are updated. */ if (ifp->if_flags & IFF_MONITOR) { m_freem(m); CURVNET_RESTORE(); return; } /* Handle input from a lagg(4) port */ if (ifp->if_type == IFT_IEEE8023ADLAG) { KASSERT(lagg_input_ethernet_p != NULL, ("%s: if_lagg not loaded!", __func__)); m = (*lagg_input_ethernet_p)(ifp, m); if (m != NULL) ifp = m->m_pkthdr.rcvif; else { CURVNET_RESTORE(); return; } } /* * If the hardware did not process an 802.1Q tag, do this now, * to allow 802.1P priority frames to be passed to the main input * path correctly. */ if ((m->m_flags & M_VLANTAG) == 0 && ((etype == ETHERTYPE_VLAN) || (etype == ETHERTYPE_QINQ))) { struct ether_vlan_header *evl; if (m->m_len < sizeof(*evl) && (m = m_pullup(m, sizeof(*evl))) == NULL) { #ifdef DIAGNOSTIC if_printf(ifp, "cannot pullup VLAN header\n"); #endif if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); CURVNET_RESTORE(); return; } evl = mtod(m, struct ether_vlan_header *); m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); m->m_flags |= M_VLANTAG; bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, ETHER_HDR_LEN - ETHER_TYPE_LEN); m_adj(m, ETHER_VLAN_ENCAP_LEN); eh = mtod(m, struct ether_header *); } M_SETFIB(m, ifp->if_fib); /* Allow ng_ether(4) to claim this frame. */ if (ifp->if_l2com != NULL) { KASSERT(ng_ether_input_p != NULL, ("%s: ng_ether_input_p is NULL", __func__)); m->m_flags &= ~M_PROMISC; (*ng_ether_input_p)(ifp, &m); if (m == NULL) { CURVNET_RESTORE(); return; } eh = mtod(m, struct ether_header *); } /* * Allow if_bridge(4) to claim this frame. * * The BRIDGE_INPUT() macro will update ifp if the bridge changed it * and the frame should be delivered locally. * * If M_BRIDGE_INJECT is set, the packet was received directly by the * bridge via netmap, so "ifp" is the bridge itself and the packet * should be re-examined. */ if (ifp->if_bridge != NULL || (m->m_flags & M_BRIDGE_INJECT) != 0) { m->m_flags &= ~M_PROMISC; BRIDGE_INPUT(ifp, m); if (m == NULL) { CURVNET_RESTORE(); return; } eh = mtod(m, struct ether_header *); } #if defined(INET) || defined(INET6) /* * Clear M_PROMISC on frame so that carp(4) will see it when the * mbuf flows up to Layer 3. * FreeBSD's implementation of carp(4) uses the inprotosw * to dispatch IPPROTO_CARP. carp(4) also allocates its own * Ethernet addresses of the form 00:00:5e:00:01:xx, which * is outside the scope of the M_PROMISC test below. * TODO: Maintain a hash table of ethernet addresses other than * ether_dhost which may be active on this ifp. */ if (ifp->if_carp && (*carp_forus_p)(ifp, eh->ether_dhost)) { m->m_flags &= ~M_PROMISC; } else #endif { /* * If the frame received was not for our MAC address, set the * M_PROMISC flag on the mbuf chain. The frame may need to * be seen by the rest of the Ethernet input path in case of * re-entry (e.g. bridge, vlan, netgraph) but should not be * seen by upper protocol layers. */ if (!ETHER_IS_MULTICAST(eh->ether_dhost) && bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0) m->m_flags |= M_PROMISC; } ether_demux(ifp, m); CURVNET_RESTORE(); } /* * Ethernet input dispatch; by default, direct dispatch here regardless of * global configuration. However, if RSS is enabled, hook up RSS affinity * so that when deferred or hybrid dispatch is enabled, we can redistribute * load based on RSS. * * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or * not it had already done work distribution via multi-queue. Then we could * direct dispatch in the event load balancing was already complete and * handle the case of interfaces with different capabilities better. * * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions * at multiple layers? * * XXXRW: For now, enable all this only if RSS is compiled in, although it * works fine without RSS. Need to characterise the performance overhead * of the detour through the netisr code in the event the result is always * direct dispatch. */ static void ether_nh_input(struct mbuf *m) { M_ASSERTPKTHDR(m); KASSERT(m->m_pkthdr.rcvif != NULL, ("%s: NULL interface pointer", __func__)); ether_input_internal(m->m_pkthdr.rcvif, m); } static struct netisr_handler ether_nh = { .nh_name = "ether", .nh_handler = ether_nh_input, .nh_proto = NETISR_ETHER, #ifdef RSS .nh_policy = NETISR_POLICY_CPU, .nh_dispatch = NETISR_DISPATCH_DIRECT, .nh_m2cpuid = rss_m2cpuid, #else .nh_policy = NETISR_POLICY_SOURCE, .nh_dispatch = NETISR_DISPATCH_DIRECT, #endif }; static void ether_init(__unused void *arg) { netisr_register(ðer_nh); } SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL); static void vnet_ether_init(__unused void *arg) { struct pfil_head_args args; args.pa_version = PFIL_VERSION; args.pa_flags = PFIL_IN | PFIL_OUT; args.pa_type = PFIL_TYPE_ETHERNET; args.pa_headname = PFIL_ETHER_NAME; V_link_pfil_head = pfil_head_register(&args); #ifdef VIMAGE netisr_register_vnet(ðer_nh); #endif } VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY, vnet_ether_init, NULL); #ifdef VIMAGE static void vnet_ether_pfil_destroy(__unused void *arg) { pfil_head_unregister(V_link_pfil_head); } VNET_SYSUNINIT(vnet_ether_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_ANY, vnet_ether_pfil_destroy, NULL); static void vnet_ether_destroy(__unused void *arg) { netisr_unregister_vnet(ðer_nh); } VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, vnet_ether_destroy, NULL); #endif static void ether_input(struct ifnet *ifp, struct mbuf *m) { struct epoch_tracker et; struct mbuf *mn; bool needs_epoch; needs_epoch = (ifp->if_flags & IFF_NEEDSEPOCH); #ifdef INVARIANTS /* * This temporary code is here to prevent epoch unaware and unmarked * drivers to panic the system. Once all drivers are taken care of, * the whole INVARIANTS block should go away. */ if (!needs_epoch && !in_epoch(net_epoch_preempt)) { static bool printedonce; needs_epoch = true; if (!printedonce) { printedonce = true; if_printf(ifp, "called %s w/o net epoch! " "PLEASE file a bug report.", __func__); #ifdef KDB kdb_backtrace(); #endif } } #endif /* * The drivers are allowed to pass in a chain of packets linked with * m_nextpkt. We split them up into separate packets here and pass * them up. This allows the drivers to amortize the receive lock. */ CURVNET_SET_QUIET(ifp->if_vnet); if (__predict_false(needs_epoch)) NET_EPOCH_ENTER(et); while (m) { mn = m->m_nextpkt; m->m_nextpkt = NULL; /* * We will rely on rcvif being set properly in the deferred * context, so assert it is correct here. */ MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch m %p " "rcvif %p ifp %p", __func__, m, m->m_pkthdr.rcvif, ifp)); netisr_dispatch(NETISR_ETHER, m); m = mn; } if (__predict_false(needs_epoch)) NET_EPOCH_EXIT(et); CURVNET_RESTORE(); } /* * Upper layer processing for a received Ethernet packet. */ void ether_demux(struct ifnet *ifp, struct mbuf *m) { struct ether_header *eh; int i, isr; u_short ether_type; NET_EPOCH_ASSERT(); KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__)); /* Do not grab PROMISC frames in case we are re-entered. */ if (PFIL_HOOKED_IN(V_link_pfil_head) && !(m->m_flags & M_PROMISC)) { i = pfil_mbuf_in(V_link_pfil_head, &m, ifp, NULL); if (i != PFIL_PASS) return; } eh = mtod(m, struct ether_header *); ether_type = ntohs(eh->ether_type); /* * If this frame has a VLAN tag other than 0, call vlan_input() * if its module is loaded. Otherwise, drop. */ if ((m->m_flags & M_VLANTAG) && EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) { if (ifp->if_vlantrunk == NULL) { if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); m_freem(m); return; } KASSERT(vlan_input_p != NULL,("%s: VLAN not loaded!", __func__)); /* Clear before possibly re-entering ether_input(). */ m->m_flags &= ~M_PROMISC; (*vlan_input_p)(ifp, m); return; } /* * Pass promiscuously received frames to the upper layer if the user * requested this by setting IFF_PPROMISC. Otherwise, drop them. */ if ((ifp->if_flags & IFF_PPROMISC) == 0 && (m->m_flags & M_PROMISC)) { m_freem(m); return; } /* * Reset layer specific mbuf flags to avoid confusing upper layers. */ m->m_flags &= ~M_VLANTAG; m_clrprotoflags(m); /* * Dispatch frame to upper layer. */ switch (ether_type) { #ifdef INET case ETHERTYPE_IP: isr = NETISR_IP; break; case ETHERTYPE_ARP: if (ifp->if_flags & IFF_NOARP) { /* Discard packet if ARP is disabled on interface */ m_freem(m); return; } isr = NETISR_ARP; break; #endif #ifdef INET6 case ETHERTYPE_IPV6: isr = NETISR_IPV6; break; #endif default: goto discard; } /* Strip off Ethernet header. */ m_adj(m, ETHER_HDR_LEN); netisr_dispatch(isr, m); return; discard: /* * Packet is to be discarded. If netgraph is present, * hand the packet to it for last chance processing; * otherwise dispose of it. */ if (ifp->if_l2com != NULL) { KASSERT(ng_ether_input_orphan_p != NULL, ("ng_ether_input_orphan_p is NULL")); (*ng_ether_input_orphan_p)(ifp, m); return; } m_freem(m); } /* * Convert Ethernet address to printable (loggable) representation. * This routine is for compatibility; it's better to just use * * printf("%6D", , ":"); * * since there's no static buffer involved. */ char * ether_sprintf(const u_char *ap) { static char etherbuf[18]; snprintf(etherbuf, sizeof (etherbuf), "%6D", ap, ":"); return (etherbuf); } /* * Perform common duties while attaching to interface list */ void ether_ifattach(struct ifnet *ifp, const u_int8_t *lla) { int i; struct ifaddr *ifa; struct sockaddr_dl *sdl; ifp->if_addrlen = ETHER_ADDR_LEN; ifp->if_hdrlen = ETHER_HDR_LEN; ifp->if_mtu = ETHERMTU; if_attach(ifp); ifp->if_output = ether_output; ifp->if_input = ether_input; ifp->if_resolvemulti = ether_resolvemulti; ifp->if_requestencap = ether_requestencap; #ifdef VIMAGE ifp->if_reassign = ether_reassign; #endif if (ifp->if_baudrate == 0) ifp->if_baudrate = IF_Mbps(10); /* just a default */ ifp->if_broadcastaddr = etherbroadcastaddr; ifa = ifp->if_addr; KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__)); sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_ETHER; sdl->sdl_alen = ifp->if_addrlen; bcopy(lla, LLADDR(sdl), ifp->if_addrlen); if (ifp->if_hw_addr != NULL) bcopy(lla, ifp->if_hw_addr, ifp->if_addrlen); bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN); if (ng_ether_attach_p != NULL) (*ng_ether_attach_p)(ifp); /* Announce Ethernet MAC address if non-zero. */ for (i = 0; i < ifp->if_addrlen; i++) if (lla[i] != 0) break; if (i != ifp->if_addrlen) if_printf(ifp, "Ethernet address: %6D\n", lla, ":"); uuid_ether_add(LLADDR(sdl)); /* Add necessary bits are setup; announce it now. */ EVENTHANDLER_INVOKE(ether_ifattach_event, ifp); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("ETHERNET", ifp->if_xname, "IFATTACH", NULL); } /* * Perform common duties while detaching an Ethernet interface */ void ether_ifdetach(struct ifnet *ifp) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr); uuid_ether_del(LLADDR(sdl)); if (ifp->if_l2com != NULL) { KASSERT(ng_ether_detach_p != NULL, ("ng_ether_detach_p is NULL")); (*ng_ether_detach_p)(ifp); } bpfdetach(ifp); if_detach(ifp); } #ifdef VIMAGE void ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused) { if (ifp->if_l2com != NULL) { KASSERT(ng_ether_detach_p != NULL, ("ng_ether_detach_p is NULL")); (*ng_ether_detach_p)(ifp); } if (ng_ether_attach_p != NULL) { CURVNET_SET_QUIET(new_vnet); (*ng_ether_attach_p)(ifp); CURVNET_RESTORE(); } } #endif SYSCTL_DECL(_net_link); SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Ethernet"); #if 0 /* * This is for reference. We have a table-driven version * of the little-endian crc32 generator, which is faster * than the double-loop. */ uint32_t ether_crc32_le(const uint8_t *buf, size_t len) { size_t i; uint32_t crc; int bit; uint8_t data; crc = 0xffffffff; /* initial value */ for (i = 0; i < len; i++) { for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) { carry = (crc ^ data) & 1; crc >>= 1; if (carry) crc = (crc ^ ETHER_CRC_POLY_LE); } } return (crc); } #else uint32_t ether_crc32_le(const uint8_t *buf, size_t len) { static const uint32_t crctab[] = { 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c }; size_t i; uint32_t crc; crc = 0xffffffff; /* initial value */ for (i = 0; i < len; i++) { crc ^= buf[i]; crc = (crc >> 4) ^ crctab[crc & 0xf]; crc = (crc >> 4) ^ crctab[crc & 0xf]; } return (crc); } #endif uint32_t ether_crc32_be(const uint8_t *buf, size_t len) { size_t i; uint32_t crc, carry; int bit; uint8_t data; crc = 0xffffffff; /* initial value */ for (i = 0; i < len; i++) { for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) { carry = ((crc & 0x80000000) ? 1 : 0) ^ (data & 0x01); crc <<= 1; if (carry) crc = (crc ^ ETHER_CRC_POLY_BE) | carry; } } return (crc); } int ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ifaddr *ifa = (struct ifaddr *) data; struct ifreq *ifr = (struct ifreq *) data; int error = 0; switch (command) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: ifp->if_init(ifp->if_softc); /* before arpwhohas */ arp_ifinit(ifp, ifa); break; #endif default: ifp->if_init(ifp->if_softc); break; } break; case SIOCGIFADDR: bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0], ETHER_ADDR_LEN); break; case SIOCSIFMTU: /* * Set the interface MTU. */ if (ifr->ifr_mtu > ETHERMTU) { error = EINVAL; } else { ifp->if_mtu = ifr->ifr_mtu; } break; case SIOCSLANPCP: error = priv_check(curthread, PRIV_NET_SETLANPCP); if (error != 0) break; if (ifr->ifr_lan_pcp > 7 && ifr->ifr_lan_pcp != IFNET_PCP_NONE) { error = EINVAL; } else { ifp->if_pcp = ifr->ifr_lan_pcp; /* broadcast event about PCP change */ EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_PCP); } break; case SIOCGLANPCP: ifr->ifr_lan_pcp = ifp->if_pcp; break; default: error = EINVAL; /* XXX netbsd has ENOTTY??? */ break; } return (error); } static int ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, struct sockaddr *sa) { struct sockaddr_dl *sdl; #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif u_char *e_addr; switch(sa->sa_family) { case AF_LINK: /* * No mapping needed. Just check that it's a valid MC address. */ sdl = (struct sockaddr_dl *)sa; e_addr = LLADDR(sdl); if (!ETHER_IS_MULTICAST(e_addr)) return EADDRNOTAVAIL; *llsa = NULL; return 0; #ifdef INET case AF_INET: sin = (struct sockaddr_in *)sa; if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) return EADDRNOTAVAIL; sdl = link_init_sdl(ifp, *llsa, IFT_ETHER); sdl->sdl_alen = ETHER_ADDR_LEN; e_addr = LLADDR(sdl); ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr); *llsa = (struct sockaddr *)sdl; return 0; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* * An IP6 address of 0 means listen to all * of the Ethernet multicast address used for IP6. * (This is used for multicast routers.) */ ifp->if_flags |= IFF_ALLMULTI; *llsa = NULL; return 0; } if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) return EADDRNOTAVAIL; sdl = link_init_sdl(ifp, *llsa, IFT_ETHER); sdl->sdl_alen = ETHER_ADDR_LEN; e_addr = LLADDR(sdl); ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr); *llsa = (struct sockaddr *)sdl; return 0; #endif default: /* * Well, the text isn't quite right, but it's the name * that counts... */ return EAFNOSUPPORT; } } static moduledata_t ether_mod = { .name = "ether", }; void ether_vlan_mtap(struct bpf_if *bp, struct mbuf *m, void *data, u_int dlen) { struct ether_vlan_header vlan; struct mbuf mv, mb; KASSERT((m->m_flags & M_VLANTAG) != 0, ("%s: vlan information not present", __func__)); KASSERT(m->m_len >= sizeof(struct ether_header), ("%s: mbuf not large enough for header", __func__)); bcopy(mtod(m, char *), &vlan, sizeof(struct ether_header)); vlan.evl_proto = vlan.evl_encap_proto; vlan.evl_encap_proto = htons(ETHERTYPE_VLAN); vlan.evl_tag = htons(m->m_pkthdr.ether_vtag); m->m_len -= sizeof(struct ether_header); m->m_data += sizeof(struct ether_header); /* * If a data link has been supplied by the caller, then we will need to * re-create a stack allocated mbuf chain with the following structure: * * (1) mbuf #1 will contain the supplied data link * (2) mbuf #2 will contain the vlan header * (3) mbuf #3 will contain the original mbuf's packet data * * Otherwise, submit the packet and vlan header via bpf_mtap2(). */ if (data != NULL) { mv.m_next = m; mv.m_data = (caddr_t)&vlan; mv.m_len = sizeof(vlan); mb.m_next = &mv; mb.m_data = data; mb.m_len = dlen; bpf_mtap(bp, &mb); } else bpf_mtap2(bp, &vlan, sizeof(vlan), m); m->m_len += sizeof(struct ether_header); m->m_data -= sizeof(struct ether_header); } struct mbuf * ether_vlanencap_proto(struct mbuf *m, uint16_t tag, uint16_t proto) { struct ether_vlan_header *evl; M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); if (m == NULL) return (NULL); /* M_PREPEND takes care of m_len, m_pkthdr.len for us */ if (m->m_len < sizeof(*evl)) { m = m_pullup(m, sizeof(*evl)); if (m == NULL) return (NULL); } /* * Transform the Ethernet header into an Ethernet header * with 802.1Q encapsulation. */ evl = mtod(m, struct ether_vlan_header *); bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); evl->evl_encap_proto = htons(proto); evl->evl_tag = htons(tag); return (m); } void ether_bpf_mtap_if(struct ifnet *ifp, struct mbuf *m) { if (bpf_peers_present(ifp->if_bpf)) { M_ASSERTVALID(m); if ((m->m_flags & M_VLANTAG) != 0) ether_vlan_mtap(ifp->if_bpf, m, NULL, 0); else bpf_mtap(ifp->if_bpf, m); } } static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "IEEE 802.1Q VLAN"); static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "for consistency"); VNET_DEFINE_STATIC(int, soft_pad); #define V_soft_pad VNET(soft_pad) SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(soft_pad), 0, "pad short frames before tagging"); /* * For now, make preserving PCP via an mbuf tag optional, as it increases * per-packet memory allocations and frees. In the future, it would be * preferable to reuse ether_vtag for this, or similar. */ VNET_DEFINE(int, vlan_mtag_pcp) = 0; #define V_vlan_mtag_pcp VNET(vlan_mtag_pcp) SYSCTL_INT(_net_link_vlan, OID_AUTO, mtag_pcp, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(vlan_mtag_pcp), 0, "Retain VLAN PCP information as packets are passed up the stack"); static inline bool ether_do_pcp(struct ifnet *ifp, struct mbuf *m) { if (ifp->if_type == IFT_L2VLAN) return (false); if (ifp->if_pcp != IFNET_PCP_NONE || (m->m_flags & M_VLANTAG) != 0) return (true); if (V_vlan_mtag_pcp && m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_OUT, NULL) != NULL) return (true); return (false); } bool ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, struct ifnet *p, const struct ether_8021q_tag *qtag) { struct m_tag *mtag; int n; uint16_t tag; uint8_t pcp = qtag->pcp; static const char pad[8]; /* just zeros */ /* * Pad the frame to the minimum size allowed if told to. * This option is in accord with IEEE Std 802.1Q, 2003 Ed., * paragraph C.4.4.3.b. It can help to work around buggy * bridges that violate paragraph C.4.4.3.a from the same * document, i.e., fail to pad short frames after untagging. * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but * untagging it will produce a 62-byte frame, which is a runt * and requires padding. There are VLAN-enabled network * devices that just discard such runts instead or mishandle * them somehow. */ if (V_soft_pad && p->if_type == IFT_ETHER) { for (n = ETHERMIN + ETHER_HDR_LEN - (*mp)->m_pkthdr.len; n > 0; n -= sizeof(pad)) { if (!m_append(*mp, min(n, sizeof(pad)), pad)) break; } if (n > 0) { m_freem(*mp); *mp = NULL; if_printf(ife, "cannot pad short frame"); return (false); } } /* * If PCP is set in mbuf, use it */ if ((*mp)->m_flags & M_VLANTAG) { pcp = EVL_PRIOFTAG((*mp)->m_pkthdr.ether_vtag); } /* * If underlying interface can do VLAN tag insertion itself, * just pass the packet along. However, we need some way to * tell the interface where the packet came from so that it * knows how to find the VLAN tag to use, so we attach a * packet tag that holds it. */ if (V_vlan_mtag_pcp && (mtag = m_tag_locate(*mp, MTAG_8021Q, MTAG_8021Q_PCP_OUT, NULL)) != NULL) tag = EVL_MAKETAG(qtag->vid, *(uint8_t *)(mtag + 1), 0); else tag = EVL_MAKETAG(qtag->vid, pcp, 0); if ((p->if_capenable & IFCAP_VLAN_HWTAGGING) && (qtag->proto == ETHERTYPE_VLAN)) { (*mp)->m_pkthdr.ether_vtag = tag; (*mp)->m_flags |= M_VLANTAG; } else { *mp = ether_vlanencap_proto(*mp, tag, qtag->proto); if (*mp == NULL) { if_printf(ife, "unable to prepend 802.1Q header"); return (false); } (*mp)->m_flags &= ~M_VLANTAG; } return (true); } /* * Allocate an address from the FreeBSD Foundation OUI. This uses a * cryptographic hash function on the containing jail's name, UUID and the * interface name to attempt to provide a unique but stable address. * Pseudo-interfaces which require a MAC address should use this function to * allocate non-locally-administered addresses. */ void ether_gen_addr_byname(const char *nameunit, struct ether_addr *hwaddr) { SHA1_CTX ctx; char *buf; char uuid[HOSTUUIDLEN + 1]; uint64_t addr; int i, sz; char digest[SHA1_RESULTLEN]; char jailname[MAXHOSTNAMELEN]; getcredhostuuid(curthread->td_ucred, uuid, sizeof(uuid)); if (strncmp(uuid, DEFAULT_HOSTUUID, sizeof(uuid)) == 0) { /* Fall back to a random mac address. */ goto rando; } /* If each (vnet) jail would also have a unique hostuuid this would not * be necessary. */ getjailname(curthread->td_ucred, jailname, sizeof(jailname)); sz = asprintf(&buf, M_TEMP, "%s-%s-%s", uuid, nameunit, jailname); if (sz < 0) { /* Fall back to a random mac address. */ goto rando; } SHA1Init(&ctx); SHA1Update(&ctx, buf, sz); SHA1Final(digest, &ctx); free(buf, M_TEMP); addr = ((digest[0] << 16) | (digest[1] << 8) | digest[2]) & OUI_FREEBSD_GENERATED_MASK; addr = OUI_FREEBSD(addr); for (i = 0; i < ETHER_ADDR_LEN; ++i) { hwaddr->octet[i] = addr >> ((ETHER_ADDR_LEN - i - 1) * 8) & 0xFF; } return; rando: arc4rand(hwaddr, sizeof(*hwaddr), 0); /* Unicast */ hwaddr->octet[0] &= 0xFE; /* Locally administered. */ hwaddr->octet[0] |= 0x02; } void ether_gen_addr(struct ifnet *ifp, struct ether_addr *hwaddr) { ether_gen_addr_byname(if_name(ifp), hwaddr); } DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY); MODULE_VERSION(ether, 1);