Index: stable/10/sys/netinet/if_ether.c =================================================================== --- stable/10/sys/netinet/if_ether.c (revision 309339) +++ stable/10/sys/netinet/if_ether.c (revision 309340) @@ -1,975 +1,1104 @@ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if_ether.c 8.1 (Berkeley) 6/10/93 */ /* * Ethernet address resolution protocol. * TODO: * add "inuse/lock" bit (or ref. count) along with valid bit */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #endif #include #include #include #define SIN(s) ((const struct sockaddr_in *)(s)) #define SDL(s) ((struct sockaddr_dl *)s) SYSCTL_DECL(_net_link_ether); static SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, ""); static SYSCTL_NODE(_net_link_ether, PF_ARP, arp, CTLFLAG_RW, 0, ""); /* timer values */ static VNET_DEFINE(int, arpt_keep) = (20*60); /* once resolved, good for 20 * minutes */ static VNET_DEFINE(int, arp_maxtries) = 5; VNET_DEFINE(int, useloopback) = 1; /* use loopback interface for * local traffic */ static VNET_DEFINE(int, arp_proxyall) = 0; static VNET_DEFINE(int, arpt_down) = 20; /* keep incomplete entries for * 20 seconds */ VNET_PCPUSTAT_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */ VNET_PCPUSTAT_SYSINIT(arpstat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(arpstat); #endif /* VIMAGE */ static VNET_DEFINE(int, arp_maxhold) = 1; #define V_arpt_keep VNET(arpt_keep) #define V_arpt_down VNET(arpt_down) #define V_arp_maxtries VNET(arp_maxtries) #define V_arp_proxyall VNET(arp_proxyall) #define V_arp_maxhold VNET(arp_maxhold) SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, &VNET_NAME(arpt_keep), 0, "ARP entry lifetime in seconds"); SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW, &VNET_NAME(arp_maxtries), 0, "ARP resolution attempts before returning error"); SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW, &VNET_NAME(useloopback), 0, "Use the loopback interface for local traffic"); SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW, &VNET_NAME(arp_proxyall), 0, "Enable proxy ARP for all suitable requests"); SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, wait, CTLFLAG_RW, &VNET_NAME(arpt_down), 0, "Incomplete ARP entry lifetime in seconds"); SYSCTL_VNET_PCPUSTAT(_net_link_ether_arp, OID_AUTO, stats, struct arpstat, arpstat, "ARP statistics (struct arpstat, net/if_arp.h)"); SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxhold, CTLFLAG_RW, &VNET_NAME(arp_maxhold), 0, "Number of packets to hold per ARP entry"); +/* + * Due to the exponential backoff algorithm used for the interval between GARP + * retransmissions, the maximum number of retransmissions is limited for + * sanity. This limit corresponds to a maximum interval between retransmissions + * of 2^16 seconds ~= 18 hours. + * + * Making this limit more dynamic is more complicated than worthwhile, + * especially since sending out GARPs spaced days apart would be of little + * use. A maximum dynamic limit would look something like: + * + * const int max = fls(INT_MAX / hz) - 1; + */ +#define MAX_GARP_RETRANSMITS 16 +static int sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS); +static int garp_rexmit_count = 0; /* GARP retransmission setting. */ + +SYSCTL_PROC(_net_link_ether_inet, OID_AUTO, garp_rexmit_count, + CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MPSAFE, + &garp_rexmit_count, 0, sysctl_garp_rexmit, "I", + "Number of times to retransmit GARP packets;" + " 0 to disable, maximum of 16"); + static void arp_init(void); static void arpintr(struct mbuf *); static void arptimer(void *); #ifdef INET static void in_arpinput(struct mbuf *); #endif static const struct netisr_handler arp_nh = { .nh_name = "arp", .nh_handler = arpintr, .nh_proto = NETISR_ARP, .nh_policy = NETISR_POLICY_SOURCE, }; #ifdef AF_INET /* * called by in_ifscrub to remove entry from the table when * the interface goes away */ void arp_ifscrub(struct ifnet *ifp, uint32_t addr) { struct sockaddr_in addr4; bzero((void *)&addr4, sizeof(addr4)); addr4.sin_len = sizeof(addr4); addr4.sin_family = AF_INET; addr4.sin_addr.s_addr = addr; IF_AFDATA_WLOCK(ifp); lla_lookup(LLTABLE(ifp), (LLE_DELETE | LLE_IFADDR), (struct sockaddr *)&addr4); IF_AFDATA_WUNLOCK(ifp); } #endif /* * Timeout routine. Age arp_tab entries periodically. */ static void arptimer(void *arg) { struct llentry *lle = (struct llentry *)arg; struct ifnet *ifp; if (lle->la_flags & LLE_STATIC) { return; } LLE_WLOCK(lle); if (callout_pending(&lle->la_timer)) { /* * Here we are a bit odd here in the treatment of * active/pending. If the pending bit is set, it got * rescheduled before I ran. The active * bit we ignore, since if it was stopped * in ll_tablefree() and was currently running * it would have return 0 so the code would * not have deleted it since the callout could * not be stopped so we want to go through * with the delete here now. If the callout * was restarted, the pending bit will be back on and * we just want to bail since the callout_reset would * return 1 and our reference would have been removed * by arpresolve() below. */ LLE_WUNLOCK(lle); return; } ifp = lle->lle_tbl->llt_ifp; CURVNET_SET(ifp->if_vnet); if ((lle->la_flags & LLE_DELETED) == 0) { int evt; if (lle->la_flags & LLE_VALID) evt = LLENTRY_EXPIRED; else evt = LLENTRY_TIMEDOUT; EVENTHANDLER_INVOKE(lle_event, lle, evt); } callout_stop(&lle->la_timer); /* XXX: LOR avoidance. We still have ref on lle. */ LLE_WUNLOCK(lle); IF_AFDATA_LOCK(ifp); LLE_WLOCK(lle); /* Guard against race with other llentry_free(). */ if (lle->la_flags & LLE_LINKED) { size_t pkts_dropped; LLE_REMREF(lle); pkts_dropped = llentry_free(lle); ARPSTAT_ADD(dropped, pkts_dropped); } else LLE_FREE_LOCKED(lle); IF_AFDATA_UNLOCK(ifp); ARPSTAT_INC(timeouts); CURVNET_RESTORE(); } /* * Broadcast an ARP request. Caller specifies: * - arp header source ip address * - arp header target ip address * - arp header source ethernet address */ void arprequest(struct ifnet *ifp, const struct in_addr *sip, const struct in_addr *tip, u_char *enaddr) { struct mbuf *m; struct arphdr *ah; struct sockaddr sa; u_char *carpaddr = NULL; if (sip == NULL) { /* * The caller did not supply a source address, try to find * a compatible one among those assigned to this interface. */ struct ifaddr *ifa; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; if (ifa->ifa_carp) { if ((*carp_iamatch_p)(ifa, &carpaddr) == 0) continue; sip = &IA_SIN(ifa)->sin_addr; } else { carpaddr = NULL; sip = &IA_SIN(ifa)->sin_addr; } if (0 == ((sip->s_addr ^ tip->s_addr) & IA_MASKSIN(ifa)->sin_addr.s_addr)) break; /* found it. */ } IF_ADDR_RUNLOCK(ifp); if (sip == NULL) { printf("%s: cannot find matching address\n", __func__); return; } } if (enaddr == NULL) enaddr = carpaddr ? carpaddr : (u_char *)IF_LLADDR(ifp); if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) return; m->m_len = sizeof(*ah) + 2*sizeof(struct in_addr) + 2*ifp->if_data.ifi_addrlen; m->m_pkthdr.len = m->m_len; MH_ALIGN(m, m->m_len); ah = mtod(m, struct arphdr *); bzero((caddr_t)ah, m->m_len); #ifdef MAC mac_netinet_arp_send(ifp, m); #endif ah->ar_pro = htons(ETHERTYPE_IP); ah->ar_hln = ifp->if_addrlen; /* hardware address length */ ah->ar_pln = sizeof(struct in_addr); /* protocol address length */ ah->ar_op = htons(ARPOP_REQUEST); bcopy(enaddr, ar_sha(ah), ah->ar_hln); bcopy(sip, ar_spa(ah), ah->ar_pln); bcopy(tip, ar_tpa(ah), ah->ar_pln); sa.sa_family = AF_ARP; sa.sa_len = 2; m->m_flags |= M_BCAST; m_clrprotoflags(m); /* Avoid confusing lower layers. */ (*ifp->if_output)(ifp, m, &sa, NULL); ARPSTAT_INC(txrequests); } /* * Resolve an IP address into an ethernet address. * On input: * ifp is the interface we use * rt0 is the route to the final destination (possibly useless) * m is the mbuf. May be NULL if we don't have a packet. * dst is the next hop, * desten is where we want the address. * * On success, desten is filled in and the function returns 0; * If the packet must be held pending resolution, we return EWOULDBLOCK * On other errors, we return the corresponding error code. * Note that m_freem() handles NULL. */ int arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m, const struct sockaddr *dst, u_char *desten, struct llentry **lle) { struct llentry *la = 0; u_int flags = 0; struct mbuf *curr = NULL; struct mbuf *next = NULL; int error, renew; *lle = NULL; if (m != NULL) { if (m->m_flags & M_BCAST) { /* broadcast */ (void)memcpy(desten, ifp->if_broadcastaddr, ifp->if_addrlen); return (0); } if (m->m_flags & M_MCAST && ifp->if_type != IFT_ARCNET) { /* multicast */ ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten); return (0); } } retry: IF_AFDATA_RLOCK(ifp); la = lla_lookup(LLTABLE(ifp), flags, dst); IF_AFDATA_RUNLOCK(ifp); if ((la == NULL) && ((flags & LLE_EXCLUSIVE) == 0) && ((ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0)) { flags |= (LLE_CREATE | LLE_EXCLUSIVE); IF_AFDATA_WLOCK(ifp); la = lla_lookup(LLTABLE(ifp), flags, dst); IF_AFDATA_WUNLOCK(ifp); } if (la == NULL) { if (flags & LLE_CREATE) log(LOG_DEBUG, "arpresolve: can't allocate llinfo for %s on %s\n", inet_ntoa(SIN(dst)->sin_addr), ifp->if_xname); m_freem(m); return (EINVAL); } if ((la->la_flags & LLE_VALID) && ((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) { bcopy(&la->ll_addr, desten, ifp->if_addrlen); renew = 0; /* * If entry has an expiry time and it is approaching, * see if we need to send an ARP request within this * arpt_down interval. */ if (!(la->la_flags & LLE_STATIC) && time_uptime + la->la_preempt > la->la_expire) { renew = 1; la->la_preempt--; } *lle = la; if (flags & LLE_EXCLUSIVE) LLE_WUNLOCK(la); else LLE_RUNLOCK(la); if (renew == 1) arprequest(ifp, NULL, &SIN(dst)->sin_addr, NULL); return (0); } if (la->la_flags & LLE_STATIC) { /* should not happen! */ log(LOG_DEBUG, "arpresolve: ouch, empty static llinfo for %s\n", inet_ntoa(SIN(dst)->sin_addr)); m_freem(m); error = EINVAL; goto done; } renew = (la->la_asked == 0 || la->la_expire != time_uptime); if ((renew || m != NULL) && (flags & LLE_EXCLUSIVE) == 0) { flags |= LLE_EXCLUSIVE; LLE_RUNLOCK(la); goto retry; } /* * There is an arptab entry, but no ethernet address * response yet. Add the mbuf to the list, dropping * the oldest packet if we have exceeded the system * setting. */ if (m != NULL) { if (la->la_numheld >= V_arp_maxhold) { if (la->la_hold != NULL) { next = la->la_hold->m_nextpkt; m_freem(la->la_hold); la->la_hold = next; la->la_numheld--; ARPSTAT_INC(dropped); } } if (la->la_hold != NULL) { curr = la->la_hold; while (curr->m_nextpkt != NULL) curr = curr->m_nextpkt; curr->m_nextpkt = m; } else la->la_hold = m; la->la_numheld++; if (renew == 0 && (flags & LLE_EXCLUSIVE)) { flags &= ~LLE_EXCLUSIVE; LLE_DOWNGRADE(la); } } /* * Return EWOULDBLOCK if we have tried less than arp_maxtries. It * will be masked by ether_output(). Return EHOSTDOWN/EHOSTUNREACH * if we have already sent arp_maxtries ARP requests. Retransmit the * ARP request, but not faster than one request per second. */ if (la->la_asked < V_arp_maxtries) error = EWOULDBLOCK; /* First request. */ else error = rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) ? EHOSTUNREACH : EHOSTDOWN; if (renew) { int canceled; LLE_ADDREF(la); la->la_expire = time_uptime; canceled = callout_reset(&la->la_timer, hz * V_arpt_down, arptimer, la); if (canceled) LLE_REMREF(la); la->la_asked++; LLE_WUNLOCK(la); arprequest(ifp, NULL, &SIN(dst)->sin_addr, NULL); return (error); } done: if (flags & LLE_EXCLUSIVE) LLE_WUNLOCK(la); else LLE_RUNLOCK(la); return (error); } /* * Common length and type checks are done here, * then the protocol-specific routine is called. */ static void arpintr(struct mbuf *m) { struct arphdr *ar; if (m->m_len < sizeof(struct arphdr) && ((m = m_pullup(m, sizeof(struct arphdr))) == NULL)) { log(LOG_NOTICE, "arp: runt packet -- m_pullup failed\n"); return; } ar = mtod(m, struct arphdr *); if (ntohs(ar->ar_hrd) != ARPHRD_ETHER && ntohs(ar->ar_hrd) != ARPHRD_IEEE802 && ntohs(ar->ar_hrd) != ARPHRD_ARCNET && ntohs(ar->ar_hrd) != ARPHRD_IEEE1394 && ntohs(ar->ar_hrd) != ARPHRD_INFINIBAND) { log(LOG_NOTICE, "arp: unknown hardware address format (0x%2D)" " (from %*D to %*D)\n", (unsigned char *)&ar->ar_hrd, "", ETHER_ADDR_LEN, (u_char *)ar_sha(ar), ":", ETHER_ADDR_LEN, (u_char *)ar_tha(ar), ":"); m_freem(m); return; } if (m->m_len < arphdr_len(ar)) { if ((m = m_pullup(m, arphdr_len(ar))) == NULL) { log(LOG_NOTICE, "arp: runt packet\n"); m_freem(m); return; } ar = mtod(m, struct arphdr *); } ARPSTAT_INC(received); switch (ntohs(ar->ar_pro)) { #ifdef INET case ETHERTYPE_IP: in_arpinput(m); return; #endif } m_freem(m); } #ifdef INET /* * ARP for Internet protocols on 10 Mb/s Ethernet. * Algorithm is that given in RFC 826. * In addition, a sanity check is performed on the sender * protocol address, to catch impersonators. * We no longer handle negotiations for use of trailer protocol: * Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent * along with IP replies if we wanted trailers sent to us, * and also sent them in response to IP replies. * This allowed either end to announce the desire to receive * trailer packets. * We no longer reply to requests for ETHERTYPE_TRAIL protocol either, * but formerly didn't normally send requests. */ static int log_arp_wrong_iface = 1; static int log_arp_movements = 1; static int log_arp_permanent_modify = 1; static int allow_multicast = 0; static struct timeval arp_lastlog; static int arp_curpps; static int arp_maxpps = 1; SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_wrong_iface, CTLFLAG_RW, &log_arp_wrong_iface, 0, "log arp packets arriving on the wrong interface"); SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_movements, CTLFLAG_RW, &log_arp_movements, 0, "log arp replies from MACs different than the one in the cache"); SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_permanent_modify, CTLFLAG_RW, &log_arp_permanent_modify, 0, "log arp replies from MACs different than the one in the permanent arp entry"); SYSCTL_INT(_net_link_ether_inet, OID_AUTO, allow_multicast, CTLFLAG_RW, &allow_multicast, 0, "accept multicast addresses"); SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_log_per_second, CTLFLAG_RW, &arp_maxpps, 0, "Maximum number of remotely triggered ARP messages that can be " "logged per second"); #define ARP_LOG(pri, ...) do { \ if (ppsratecheck(&arp_lastlog, &arp_curpps, arp_maxpps)) \ log((pri), "arp: " __VA_ARGS__); \ } while (0) static void in_arpinput(struct mbuf *m) { struct arphdr *ah; struct ifnet *ifp = m->m_pkthdr.rcvif; struct llentry *la = NULL; struct rtentry *rt; struct ifaddr *ifa; struct in_ifaddr *ia; struct sockaddr sa; struct in_addr isaddr, itaddr, myaddr; u_int8_t *enaddr = NULL; int op, flags; int req_len; int bridged = 0, is_bridge = 0; int carped; struct sockaddr_in sin; sin.sin_len = sizeof(struct sockaddr_in); sin.sin_family = AF_INET; sin.sin_addr.s_addr = 0; if (ifp->if_bridge) bridged = 1; if (ifp->if_type == IFT_BRIDGE) is_bridge = 1; req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr)); if (m->m_len < req_len && (m = m_pullup(m, req_len)) == NULL) { ARP_LOG(LOG_NOTICE, "runt packet -- m_pullup failed\n"); return; } ah = mtod(m, struct arphdr *); /* * ARP is only for IPv4 so we can reject packets with * a protocol length not equal to an IPv4 address. */ if (ah->ar_pln != sizeof(struct in_addr)) { ARP_LOG(LOG_NOTICE, "requested protocol length != %zu\n", sizeof(struct in_addr)); goto drop; } if (allow_multicast == 0 && ETHER_IS_MULTICAST(ar_sha(ah))) { ARP_LOG(LOG_NOTICE, "%*D is multicast\n", ifp->if_addrlen, (u_char *)ar_sha(ah), ":"); goto drop; } op = ntohs(ah->ar_op); (void)memcpy(&isaddr, ar_spa(ah), sizeof (isaddr)); (void)memcpy(&itaddr, ar_tpa(ah), sizeof (itaddr)); if (op == ARPOP_REPLY) ARPSTAT_INC(rxreplies); /* * For a bridge, we want to check the address irrespective * of the receive interface. (This will change slightly * when we have clusters of interfaces). */ IN_IFADDR_RLOCK(); LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) { if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) || ia->ia_ifp == ifp) && itaddr.s_addr == ia->ia_addr.sin_addr.s_addr && (ia->ia_ifa.ifa_carp == NULL || (*carp_iamatch_p)(&ia->ia_ifa, &enaddr))) { ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(); goto match; } } LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash) if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) || ia->ia_ifp == ifp) && isaddr.s_addr == ia->ia_addr.sin_addr.s_addr) { ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(); goto match; } #define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia) \ (ia->ia_ifp->if_bridge == ifp->if_softc && \ !bcmp(IF_LLADDR(ia->ia_ifp), IF_LLADDR(ifp), ifp->if_addrlen) && \ addr == ia->ia_addr.sin_addr.s_addr) /* * Check the case when bridge shares its MAC address with * some of its children, so packets are claimed by bridge * itself (bridge_input() does it first), but they are really * meant to be destined to the bridge member. */ if (is_bridge) { LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) { if (BDG_MEMBER_MATCHES_ARP(itaddr.s_addr, ifp, ia)) { ifa_ref(&ia->ia_ifa); ifp = ia->ia_ifp; IN_IFADDR_RUNLOCK(); goto match; } } } #undef BDG_MEMBER_MATCHES_ARP IN_IFADDR_RUNLOCK(); /* * No match, use the first inet address on the receive interface * as a dummy address for the rest of the function. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_INET && (ifa->ifa_carp == NULL || (*carp_iamatch_p)(ifa, &enaddr))) { ia = ifatoia(ifa); ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto match; } IF_ADDR_RUNLOCK(ifp); /* * If bridging, fall back to using any inet address. */ IN_IFADDR_RLOCK(); if (!bridged || (ia = TAILQ_FIRST(&V_in_ifaddrhead)) == NULL) { IN_IFADDR_RUNLOCK(); goto drop; } ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(); match: if (!enaddr) enaddr = (u_int8_t *)IF_LLADDR(ifp); carped = (ia->ia_ifa.ifa_carp != NULL); myaddr = ia->ia_addr.sin_addr; ifa_free(&ia->ia_ifa); if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen)) goto drop; /* it's from me, ignore it. */ if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) { ARP_LOG(LOG_NOTICE, "link address is broadcast for IP address " "%s!\n", inet_ntoa(isaddr)); goto drop; } /* * Warn if another host is using the same IP address, but only if the * IP address isn't 0.0.0.0, which is used for DHCP only, in which * case we suppress the warning to avoid false positive complaints of * potential misconfiguration. */ if (!bridged && !carped && isaddr.s_addr == myaddr.s_addr && myaddr.s_addr != 0) { ARP_LOG(LOG_ERR, "%*D is using my IP address %s on %s!\n", ifp->if_addrlen, (u_char *)ar_sha(ah), ":", inet_ntoa(isaddr), ifp->if_xname); itaddr = myaddr; ARPSTAT_INC(dupips); goto reply; } if (ifp->if_flags & IFF_STATICARP) goto reply; bzero(&sin, sizeof(sin)); sin.sin_len = sizeof(struct sockaddr_in); sin.sin_family = AF_INET; sin.sin_addr = isaddr; flags = (itaddr.s_addr == myaddr.s_addr) ? LLE_CREATE : 0; flags |= LLE_EXCLUSIVE; IF_AFDATA_LOCK(ifp); la = lla_lookup(LLTABLE(ifp), flags, (struct sockaddr *)&sin); IF_AFDATA_UNLOCK(ifp); if (la != NULL) { /* the following is not an error when doing bridging */ if (!bridged && la->lle_tbl->llt_ifp != ifp) { if (log_arp_wrong_iface) ARP_LOG(LOG_WARNING, "%s is on %s " "but got reply from %*D on %s\n", inet_ntoa(isaddr), la->lle_tbl->llt_ifp->if_xname, ifp->if_addrlen, (u_char *)ar_sha(ah), ":", ifp->if_xname); LLE_WUNLOCK(la); goto reply; } if ((la->la_flags & LLE_VALID) && bcmp(ar_sha(ah), &la->ll_addr, ifp->if_addrlen)) { if (la->la_flags & LLE_STATIC) { LLE_WUNLOCK(la); if (log_arp_permanent_modify) ARP_LOG(LOG_ERR, "%*D attempts to modify " "permanent entry for %s on %s\n", ifp->if_addrlen, (u_char *)ar_sha(ah), ":", inet_ntoa(isaddr), ifp->if_xname); goto reply; } if (log_arp_movements) { ARP_LOG(LOG_INFO, "%s moved from %*D " "to %*D on %s\n", inet_ntoa(isaddr), ifp->if_addrlen, (u_char *)&la->ll_addr, ":", ifp->if_addrlen, (u_char *)ar_sha(ah), ":", ifp->if_xname); } } if (ifp->if_addrlen != ah->ar_hln) { LLE_WUNLOCK(la); ARP_LOG(LOG_WARNING, "from %*D: addr len: new %d, " "i/f %d (ignored)\n", ifp->if_addrlen, (u_char *) ar_sha(ah), ":", ah->ar_hln, ifp->if_addrlen); goto drop; } (void)memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen); la->la_flags |= LLE_VALID; EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED); if (!(la->la_flags & LLE_STATIC)) { int canceled; LLE_ADDREF(la); la->la_expire = time_uptime + V_arpt_keep; canceled = callout_reset(&la->la_timer, hz * V_arpt_keep, arptimer, la); if (canceled) LLE_REMREF(la); } la->la_asked = 0; la->la_preempt = V_arp_maxtries; /* * The packets are all freed within the call to the output * routine. * * NB: The lock MUST be released before the call to the * output routine. */ if (la->la_hold != NULL) { struct mbuf *m_hold, *m_hold_next; m_hold = la->la_hold; la->la_hold = NULL; la->la_numheld = 0; memcpy(&sa, L3_ADDR(la), sizeof(sa)); LLE_WUNLOCK(la); for (; m_hold != NULL; m_hold = m_hold_next) { m_hold_next = m_hold->m_nextpkt; m_hold->m_nextpkt = NULL; /* Avoid confusing lower layers. */ m_clrprotoflags(m_hold); (*ifp->if_output)(ifp, m_hold, &sa, NULL); } } else LLE_WUNLOCK(la); } reply: if (op != ARPOP_REQUEST) goto drop; ARPSTAT_INC(rxrequests); if (itaddr.s_addr == myaddr.s_addr) { /* Shortcut.. the receiving interface is the target. */ (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); (void)memcpy(ar_sha(ah), enaddr, ah->ar_hln); } else { struct llentry *lle = NULL; sin.sin_addr = itaddr; IF_AFDATA_RLOCK(ifp); lle = lla_lookup(LLTABLE(ifp), 0, (struct sockaddr *)&sin); IF_AFDATA_RUNLOCK(ifp); if ((lle != NULL) && (lle->la_flags & LLE_PUB)) { (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); (void)memcpy(ar_sha(ah), &lle->ll_addr, ah->ar_hln); LLE_RUNLOCK(lle); } else { if (lle != NULL) LLE_RUNLOCK(lle); if (!V_arp_proxyall) goto drop; sin.sin_addr = itaddr; /* XXX MRT use table 0 for arp reply */ rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0); if (!rt) goto drop; /* * Don't send proxies for nodes on the same interface * as this one came out of, or we'll get into a fight * over who claims what Ether address. */ if (!rt->rt_ifp || rt->rt_ifp == ifp) { RTFREE_LOCKED(rt); goto drop; } RTFREE_LOCKED(rt); (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); (void)memcpy(ar_sha(ah), enaddr, ah->ar_hln); /* * Also check that the node which sent the ARP packet * is on the interface we expect it to be on. This * avoids ARP chaos if an interface is connected to the * wrong network. */ sin.sin_addr = isaddr; /* XXX MRT use table 0 for arp checks */ rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0); if (!rt) goto drop; if (rt->rt_ifp != ifp) { ARP_LOG(LOG_INFO, "proxy: ignoring request" " from %s via %s, expecting %s\n", inet_ntoa(isaddr), ifp->if_xname, rt->rt_ifp->if_xname); RTFREE_LOCKED(rt); goto drop; } RTFREE_LOCKED(rt); #ifdef DEBUG_PROXY printf("arp: proxying for %s\n", inet_ntoa(itaddr)); #endif } } if (itaddr.s_addr == myaddr.s_addr && IN_LINKLOCAL(ntohl(itaddr.s_addr))) { /* RFC 3927 link-local IPv4; always reply by broadcast. */ #ifdef DEBUG_LINKLOCAL printf("arp: sending reply for link-local addr %s\n", inet_ntoa(itaddr)); #endif m->m_flags |= M_BCAST; m->m_flags &= ~M_MCAST; } else { /* default behaviour; never reply by broadcast. */ m->m_flags &= ~(M_BCAST|M_MCAST); } (void)memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln); (void)memcpy(ar_spa(ah), &itaddr, ah->ar_pln); ah->ar_op = htons(ARPOP_REPLY); ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */ m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + (2 * ah->ar_hln); m->m_pkthdr.len = m->m_len; m->m_pkthdr.rcvif = NULL; sa.sa_family = AF_ARP; sa.sa_len = 2; m_clrprotoflags(m); /* Avoid confusing lower layers. */ (*ifp->if_output)(ifp, m, &sa, NULL); ARPSTAT_INC(txreplies); return; drop: m_freem(m); } #endif +/* + * Handle the garp_rexmit_count. Like sysctl_handle_int(), but limits the range + * of valid values. + */ +static int +sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS) +{ + int error; + int rexmit_count = *(int *)arg1; + + error = sysctl_handle_int(oidp, &rexmit_count, 0, req); + + /* Enforce limits on any new value that may have been set. */ + if (!error && req->newptr) { + /* A new value was set. */ + if (rexmit_count < 0) { + rexmit_count = 0; + } else if (rexmit_count > MAX_GARP_RETRANSMITS) { + rexmit_count = MAX_GARP_RETRANSMITS; + } + *(int *)arg1 = rexmit_count; + } + + return (error); +} + +/* + * Retransmit a Gratuitous ARP (GARP) and, if necessary, schedule a callout to + * retransmit it again. A pending callout owns a reference to the ifa. + */ +static void +garp_rexmit(void *arg) +{ + struct in_ifaddr *ia = arg; + + if (callout_pending(&ia->ia_garp_timer) || + !callout_active(&ia->ia_garp_timer)) { + IFA_UNLOCK(&ia->ia_ifa); + ifa_free(&ia->ia_ifa); + return; + } + + /* + * Drop ifa lock while the ARP request is generated. + */ + IFA_UNLOCK(&ia->ia_ifa); + + arprequest(ia->ia_ifa.ifa_ifp, &IA_SIN(ia)->sin_addr, + &IA_SIN(ia)->sin_addr, IF_LLADDR(ia->ia_ifa.ifa_ifp)); + + /* + * Increment the count of retransmissions. If the count has reached the + * maximum value, stop sending the GARP packets. Otherwise, schedule + * the callout to retransmit another GARP packet. + */ + ++ia->ia_garp_count; + if (ia->ia_garp_count >= garp_rexmit_count) { + ifa_free(&ia->ia_ifa); + } else { + int rescheduled; + IFA_LOCK(&ia->ia_ifa); + rescheduled = callout_reset(&ia->ia_garp_timer, + (1 << ia->ia_garp_count) * hz, + garp_rexmit, ia); + IFA_UNLOCK(&ia->ia_ifa); + if (rescheduled) { + ifa_free(&ia->ia_ifa); + } + } +} + +/* + * Start the GARP retransmit timer. + * + * A single GARP is always transmitted when an IPv4 address is added + * to an interface and that is usually sufficient. However, in some + * circumstances, such as when a shared address is passed between + * cluster nodes, this single GARP may occasionally be dropped or + * lost. This can lead to neighbors on the network link working with a + * stale ARP cache and sending packets destined for that address to + * the node that previously owned the address, which may not respond. + * + * To avoid this situation, GARP retransmits can be enabled by setting + * the net.link.ether.inet.garp_rexmit_count sysctl to a value greater + * than zero. The setting represents the maximum number of + * retransmissions. The interval between retransmissions is calculated + * using an exponential backoff algorithm, doubling each time, so the + * retransmission intervals are: {1, 2, 4, 8, 16, ...} (seconds). + */ +static void +garp_timer_start(struct ifaddr *ifa) +{ + struct in_ifaddr *ia = (struct in_ifaddr *) ifa; + + IFA_LOCK(ifa); + ia->ia_garp_count = 0; + if (callout_reset(&ia->ia_garp_timer, (1 << ia->ia_garp_count) * hz, + garp_rexmit, ia) == 0) { + ifa_ref(ifa); + } + IFA_UNLOCK(ifa); +} + void arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa) { struct llentry *lle; if (ifa->ifa_carp != NULL) return; if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) { arprequest(ifp, &IA_SIN(ifa)->sin_addr, &IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp)); + if (garp_rexmit_count > 0) { + garp_timer_start(ifa); + } + /* * interface address is considered static entry * because the output of the arp utility shows * that L2 entry as permanent */ IF_AFDATA_LOCK(ifp); lle = lla_lookup(LLTABLE(ifp), (LLE_CREATE | LLE_IFADDR | LLE_STATIC), (struct sockaddr *)IA_SIN(ifa)); IF_AFDATA_UNLOCK(ifp); if (lle == NULL) log(LOG_INFO, "arp_ifinit: cannot create arp " "entry for interface address\n"); else LLE_RUNLOCK(lle); } ifa->ifa_rtrequest = NULL; } void arp_ifinit2(struct ifnet *ifp, struct ifaddr *ifa, u_char *enaddr) { if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) arprequest(ifp, &IA_SIN(ifa)->sin_addr, &IA_SIN(ifa)->sin_addr, enaddr); ifa->ifa_rtrequest = NULL; } static void arp_init(void) { netisr_register(&arp_nh); } SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0); Index: stable/10/sys/netinet/in.c =================================================================== --- stable/10/sys/netinet/in.c (revision 309339) +++ stable/10/sys/netinet/in.c (revision 309340) @@ -1,1569 +1,1575 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * Copyright (C) 2001 WIDE Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in.c 8.4 (Berkeley) 1/9/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int in_mask2len(struct in_addr *); static void in_len2mask(struct in_addr *, int); static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t, struct ifnet *, struct thread *); static void in_socktrim(struct sockaddr_in *); static int in_ifinit(struct ifnet *, struct in_ifaddr *, struct sockaddr_in *, int, int); static void in_purgemaddrs(struct ifnet *); static VNET_DEFINE(int, nosameprefix); #define V_nosameprefix VNET(nosameprefix) SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_RW, &VNET_NAME(nosameprefix), 0, "Refuse to create same prefixes on different interfaces"); VNET_DECLARE(struct inpcbinfo, ripcbinfo); #define V_ripcbinfo VNET(ripcbinfo) /* * Return 1 if an internet address is for a ``local'' host * (one to which we have a connection). */ int in_localaddr(struct in_addr in) { register u_long i = ntohl(in.s_addr); register struct in_ifaddr *ia; IN_IFADDR_RLOCK(); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if ((i & ia->ia_subnetmask) == ia->ia_subnet) { IN_IFADDR_RUNLOCK(); return (1); } } IN_IFADDR_RUNLOCK(); return (0); } /* * Return 1 if an internet address is for the local host and configured * on one of its interfaces. */ int in_localip(struct in_addr in) { struct in_ifaddr *ia; IN_IFADDR_RLOCK(); LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) { if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr) { IN_IFADDR_RUNLOCK(); return (1); } } IN_IFADDR_RUNLOCK(); return (0); } /* * Determine whether an IP address is in a reserved set of addresses * that may not be forwarded, or whether datagrams to that destination * may be forwarded. */ int in_canforward(struct in_addr in) { register u_long i = ntohl(in.s_addr); register u_long net; if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i)) return (0); if (IN_CLASSA(i)) { net = i & IN_CLASSA_NET; if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT)) return (0); } return (1); } /* * Trim a mask in a sockaddr */ static void in_socktrim(struct sockaddr_in *ap) { register char *cplim = (char *) &ap->sin_addr; register char *cp = (char *) (&ap->sin_addr + 1); ap->sin_len = 0; while (--cp >= cplim) if (*cp) { (ap)->sin_len = cp - (char *) (ap) + 1; break; } } static int in_mask2len(mask) struct in_addr *mask; { int x, y; u_char *p; p = (u_char *)mask; for (x = 0; x < sizeof(*mask); x++) { if (p[x] != 0xff) break; } y = 0; if (x < sizeof(*mask)) { for (y = 0; y < 8; y++) { if ((p[x] & (0x80 >> y)) == 0) break; } } return (x * 8 + y); } static void in_len2mask(struct in_addr *mask, int len) { int i; u_char *p; p = (u_char *)mask; bzero(mask, sizeof(*mask)); for (i = 0; i < len / 8; i++) p[i] = 0xff; if (len % 8) p[i] = (0xff00 >> (len % 8)) & 0xff; } /* * Generic internet control operations (ioctl's). * * ifp is NULL if not an interface-specific ioctl. */ /* ARGSUSED */ int in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { register struct ifreq *ifr = (struct ifreq *)data; register struct in_ifaddr *ia, *iap; register struct ifaddr *ifa; struct in_addr allhosts_addr; struct in_addr dst; struct in_ifinfo *ii; struct in_aliasreq *ifra = (struct in_aliasreq *)data; int error, hostIsNew, iaIsNew, maskIsNew; int iaIsFirst; u_long ocmd = cmd; /* * Pre-10.x compat: OSIOCAIFADDR passes a shorter * struct in_aliasreq, without ifra_vhid. */ if (cmd == OSIOCAIFADDR) cmd = SIOCAIFADDR; ia = NULL; iaIsFirst = 0; iaIsNew = 0; allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP); /* * Filter out ioctls we implement directly; forward the rest on to * in_lifaddr_ioctl() and ifp->if_ioctl(). */ switch (cmd) { case SIOCGIFADDR: case SIOCGIFBRDADDR: case SIOCGIFDSTADDR: case SIOCGIFNETMASK: case SIOCDIFADDR: break; case SIOCAIFADDR: /* * ifra_addr must be present and be of INET family. * ifra_broadaddr and ifra_mask are optional. */ if (ifra->ifra_addr.sin_len != sizeof(struct sockaddr_in) || ifra->ifra_addr.sin_family != AF_INET) return (EINVAL); if (ifra->ifra_broadaddr.sin_len != 0 && (ifra->ifra_broadaddr.sin_len != sizeof(struct sockaddr_in) || ifra->ifra_broadaddr.sin_family != AF_INET)) return (EINVAL); #if 0 /* * ifconfig(8) in pre-10.x doesn't set sin_family for the * mask. The code is disabled for the 10.x timeline, to * make SIOCAIFADDR compatible with 9.x ifconfig(8). * The code should be enabled in 11.x */ if (ifra->ifra_mask.sin_len != 0 && (ifra->ifra_mask.sin_len != sizeof(struct sockaddr_in) || ifra->ifra_mask.sin_family != AF_INET)) return (EINVAL); #endif break; case SIOCSIFADDR: case SIOCSIFBRDADDR: case SIOCSIFDSTADDR: case SIOCSIFNETMASK: /* We no longer support that old commands. */ return (EINVAL); case SIOCALIFADDR: if (td != NULL) { error = priv_check(td, PRIV_NET_ADDIFADDR); if (error) return (error); } if (ifp == NULL) return (EINVAL); return in_lifaddr_ioctl(so, cmd, data, ifp, td); case SIOCDLIFADDR: if (td != NULL) { error = priv_check(td, PRIV_NET_DELIFADDR); if (error) return (error); } if (ifp == NULL) return (EINVAL); return in_lifaddr_ioctl(so, cmd, data, ifp, td); case SIOCGLIFADDR: if (ifp == NULL) return (EINVAL); return in_lifaddr_ioctl(so, cmd, data, ifp, td); default: if (ifp == NULL || ifp->if_ioctl == NULL) return (EOPNOTSUPP); return ((*ifp->if_ioctl)(ifp, cmd, data)); } if (ifp == NULL) return (EADDRNOTAVAIL); /* * Security checks before we get involved in any work. */ switch (cmd) { case SIOCAIFADDR: if (td != NULL) { error = priv_check(td, PRIV_NET_ADDIFADDR); if (error) return (error); } break; case SIOCDIFADDR: if (td != NULL) { error = priv_check(td, PRIV_NET_DELIFADDR); if (error) return (error); } break; } /* * Find address for this interface, if it exists. * * If an alias address was specified, find that one instead of the * first one on the interface, if possible. */ dst = ((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr; IN_IFADDR_RLOCK(); LIST_FOREACH(iap, INADDR_HASH(dst.s_addr), ia_hash) { if (iap->ia_ifp == ifp && iap->ia_addr.sin_addr.s_addr == dst.s_addr) { if (td == NULL || prison_check_ip4(td->td_ucred, &dst) == 0) ia = iap; break; } } if (ia != NULL) ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(); if (ia == NULL) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { iap = ifatoia(ifa); if (iap->ia_addr.sin_family == AF_INET) { if (td != NULL && prison_check_ip4(td->td_ucred, &iap->ia_addr.sin_addr) != 0) continue; ia = iap; break; } } if (ia != NULL) ifa_ref(&ia->ia_ifa); IF_ADDR_RUNLOCK(ifp); } if (ia == NULL) iaIsFirst = 1; error = 0; switch (cmd) { case SIOCAIFADDR: case SIOCDIFADDR: if (ifra->ifra_addr.sin_family == AF_INET) { struct in_ifaddr *oia; IN_IFADDR_RLOCK(); for (oia = ia; ia; ia = TAILQ_NEXT(ia, ia_link)) { if (ia->ia_ifp == ifp && ia->ia_addr.sin_addr.s_addr == ifra->ifra_addr.sin_addr.s_addr) break; } if (ia != NULL && ia != oia) ifa_ref(&ia->ia_ifa); if (oia != NULL && ia != oia) ifa_free(&oia->ia_ifa); IN_IFADDR_RUNLOCK(); if ((ifp->if_flags & IFF_POINTOPOINT) && (cmd == SIOCAIFADDR) && (ifra->ifra_dstaddr.sin_addr.s_addr == INADDR_ANY)) { error = EDESTADDRREQ; goto out; } } if (cmd == SIOCDIFADDR && ia == NULL) { error = EADDRNOTAVAIL; goto out; } if (ia == NULL) { ia = (struct in_ifaddr *) malloc(sizeof *ia, M_IFADDR, M_NOWAIT | M_ZERO); if (ia == NULL) { error = ENOBUFS; goto out; } ifa = &ia->ia_ifa; ifa_init(ifa); ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr; ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask; + callout_init_mtx(&ia->ia_garp_timer, &ifa->ifa_mtx, + CALLOUT_RETURNUNLOCKED); ia->ia_sockmask.sin_len = 8; ia->ia_sockmask.sin_family = AF_INET; if (ifp->if_flags & IFF_BROADCAST) { ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr); ia->ia_broadaddr.sin_family = AF_INET; } ia->ia_ifp = ifp; ifa_ref(ifa); /* if_addrhead */ IF_ADDR_WLOCK(ifp); TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_ref(ifa); /* in_ifaddrhead */ IN_IFADDR_WLOCK(); TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link); IN_IFADDR_WUNLOCK(); iaIsNew = 1; } break; case SIOCGIFADDR: case SIOCGIFNETMASK: case SIOCGIFDSTADDR: case SIOCGIFBRDADDR: if (ia == NULL) { error = EADDRNOTAVAIL; goto out; } break; } /* * Most paths in this switch return directly or via out. Only paths * that remove the address break in order to hit common removal code. */ switch (cmd) { case SIOCGIFADDR: *((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_addr; goto out; case SIOCGIFBRDADDR: if ((ifp->if_flags & IFF_BROADCAST) == 0) { error = EINVAL; goto out; } *((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_broadaddr; goto out; case SIOCGIFDSTADDR: if ((ifp->if_flags & IFF_POINTOPOINT) == 0) { error = EINVAL; goto out; } *((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_dstaddr; goto out; case SIOCGIFNETMASK: *((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask; goto out; case SIOCAIFADDR: maskIsNew = 0; hostIsNew = 1; error = 0; if (ifra->ifra_addr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr) hostIsNew = 0; if (ifra->ifra_mask.sin_len) { /* * QL: XXX * Need to scrub the prefix here in case * the issued command is SIOCAIFADDR with * the same address, but with a different * prefix length. And if the prefix length * is the same as before, then the call is * un-necessarily executed here. */ in_ifscrub(ifp, ia, LLE_STATIC); ia->ia_sockmask = ifra->ifra_mask; ia->ia_sockmask.sin_family = AF_INET; ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr); maskIsNew = 1; } if ((ifp->if_flags & IFF_POINTOPOINT) && (ifra->ifra_dstaddr.sin_family == AF_INET)) { in_ifscrub(ifp, ia, LLE_STATIC); ia->ia_dstaddr = ifra->ifra_dstaddr; maskIsNew = 1; /* We lie; but the effect's the same */ } if (hostIsNew || maskIsNew) error = in_ifinit(ifp, ia, &ifra->ifra_addr, maskIsNew, (ocmd == cmd ? ifra->ifra_vhid : 0)); if (error != 0 && iaIsNew) break; if ((ifp->if_flags & IFF_BROADCAST) && ifra->ifra_broadaddr.sin_len) ia->ia_broadaddr = ifra->ifra_broadaddr; if (error == 0) { ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]); if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST) != 0) { error = in_joingroup(ifp, &allhosts_addr, NULL, &ii->ii_allhosts); } EVENTHANDLER_INVOKE(ifaddr_event, ifp); } goto out; case SIOCDIFADDR: /* * in_ifscrub kills the interface route. */ in_ifscrub(ifp, ia, LLE_STATIC); /* * in_ifadown gets rid of all the rest of * the routes. This is not quite the right * thing to do, but at least if we are running * a routing process they will come back. */ in_ifadown(&ia->ia_ifa, 1); EVENTHANDLER_INVOKE(ifaddr_event, ifp); error = 0; break; default: panic("in_control: unsupported ioctl"); } if (ia->ia_ifa.ifa_carp) (*carp_detach_p)(&ia->ia_ifa); IF_ADDR_WLOCK(ifp); /* Re-check that ia is still part of the list. */ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa == &ia->ia_ifa) break; } if (ifa == NULL) { /* * If we lost the race with another thread, there is no need to * try it again for the next loop as there is no other exit * path between here and out. */ IF_ADDR_WUNLOCK(ifp); error = EADDRNOTAVAIL; goto out; } TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(&ia->ia_ifa); /* if_addrhead */ IN_IFADDR_WLOCK(); TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link); LIST_REMOVE(ia, ia_hash); IN_IFADDR_WUNLOCK(); /* * If this is the last IPv4 address configured on this * interface, leave the all-hosts group. * No state-change report need be transmitted. */ IFP_TO_IA(ifp, iap); if (iap == NULL) { ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]); IN_MULTI_LOCK(); if (ii->ii_allhosts) { (void)in_leavegroup_locked(ii->ii_allhosts, NULL); ii->ii_allhosts = NULL; } IN_MULTI_UNLOCK(); } else ifa_free(&iap->ia_ifa); + IFA_LOCK(&ia->ia_ifa); + if (callout_stop(&ia->ia_garp_timer)) + ifa_free(&ia->ia_ifa); + IFA_UNLOCK(&ia->ia_ifa); ifa_free(&ia->ia_ifa); /* in_ifaddrhead */ out: if (ia != NULL) ifa_free(&ia->ia_ifa); return (error); } /* * SIOC[GAD]LIFADDR. * SIOCGLIFADDR: get first address. (?!?) * SIOCGLIFADDR with IFLR_PREFIX: * get first address that matches the specified prefix. * SIOCALIFADDR: add the specified address. * SIOCALIFADDR with IFLR_PREFIX: * EINVAL since we can't deduce hostid part of the address. * SIOCDLIFADDR: delete the specified address. * SIOCDLIFADDR with IFLR_PREFIX: * delete the first address that matches the specified prefix. * return values: * EINVAL on invalid parameters * EADDRNOTAVAIL on prefix match failed/specified address not found * other values may be returned from in_ioctl() */ static int in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { struct if_laddrreq *iflr = (struct if_laddrreq *)data; struct ifaddr *ifa; /* sanity checks */ if (data == NULL || ifp == NULL) { panic("invalid argument to in_lifaddr_ioctl"); /*NOTRECHED*/ } switch (cmd) { case SIOCGLIFADDR: /* address must be specified on GET with IFLR_PREFIX */ if ((iflr->flags & IFLR_PREFIX) == 0) break; /*FALLTHROUGH*/ case SIOCALIFADDR: case SIOCDLIFADDR: /* address must be specified on ADD and DELETE */ if (iflr->addr.ss_family != AF_INET) return (EINVAL); if (iflr->addr.ss_len != sizeof(struct sockaddr_in)) return (EINVAL); /* XXX need improvement */ if (iflr->dstaddr.ss_family && iflr->dstaddr.ss_family != AF_INET) return (EINVAL); if (iflr->dstaddr.ss_family && iflr->dstaddr.ss_len != sizeof(struct sockaddr_in)) return (EINVAL); break; default: /*shouldn't happen*/ return (EOPNOTSUPP); } if (sizeof(struct in_addr) * 8 < iflr->prefixlen) return (EINVAL); switch (cmd) { case SIOCALIFADDR: { struct in_aliasreq ifra; if (iflr->flags & IFLR_PREFIX) return (EINVAL); /* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR). */ bzero(&ifra, sizeof(ifra)); bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name)); bcopy(&iflr->addr, &ifra.ifra_addr, iflr->addr.ss_len); if (iflr->dstaddr.ss_family) { /*XXX*/ bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr, iflr->dstaddr.ss_len); } ifra.ifra_mask.sin_family = AF_INET; ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in); in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen); return (in_control(so, SIOCAIFADDR, (caddr_t)&ifra, ifp, td)); } case SIOCGLIFADDR: case SIOCDLIFADDR: { struct in_ifaddr *ia; struct in_addr mask, candidate, match; struct sockaddr_in *sin; bzero(&mask, sizeof(mask)); bzero(&match, sizeof(match)); if (iflr->flags & IFLR_PREFIX) { /* lookup a prefix rather than address. */ in_len2mask(&mask, iflr->prefixlen); sin = (struct sockaddr_in *)&iflr->addr; match.s_addr = sin->sin_addr.s_addr; match.s_addr &= mask.s_addr; /* if you set extra bits, that's wrong */ if (match.s_addr != sin->sin_addr.s_addr) return (EINVAL); } else { /* on getting an address, take the 1st match */ /* on deleting an address, do exact match */ if (cmd != SIOCGLIFADDR) { in_len2mask(&mask, 32); sin = (struct sockaddr_in *)&iflr->addr; match.s_addr = sin->sin_addr.s_addr; } } IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; if (match.s_addr == 0) break; sin = (struct sockaddr_in *)&ifa->ifa_addr; candidate.s_addr = sin->sin_addr.s_addr; candidate.s_addr &= mask.s_addr; if (candidate.s_addr == match.s_addr) break; } if (ifa != NULL) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); if (ifa == NULL) return (EADDRNOTAVAIL); ia = (struct in_ifaddr *)ifa; if (cmd == SIOCGLIFADDR) { /* fill in the if_laddrreq structure */ bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin_len); if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { bcopy(&ia->ia_dstaddr, &iflr->dstaddr, ia->ia_dstaddr.sin_len); } else bzero(&iflr->dstaddr, sizeof(iflr->dstaddr)); iflr->prefixlen = in_mask2len(&ia->ia_sockmask.sin_addr); iflr->flags = 0; /*XXX*/ ifa_free(ifa); return (0); } else { struct in_aliasreq ifra; /* fill in_aliasreq and do ioctl(SIOCDIFADDR) */ bzero(&ifra, sizeof(ifra)); bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name)); bcopy(&ia->ia_addr, &ifra.ifra_addr, ia->ia_addr.sin_len); if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr, ia->ia_dstaddr.sin_len); } bcopy(&ia->ia_sockmask, &ifra.ifra_dstaddr, ia->ia_sockmask.sin_len); ifa_free(ifa); return (in_control(so, SIOCDIFADDR, (caddr_t)&ifra, ifp, td)); } } } return (EOPNOTSUPP); /*just for safety*/ } /* * Delete any existing route for an interface. */ void in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia, u_int flags) { in_scrubprefix(ia, flags); } /* * Initialize an interface's internet address * and routing table entry. */ static int in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin, int masksupplied, int vhid) { register u_long i = ntohl(sin->sin_addr.s_addr); int flags, error = 0; IN_IFADDR_WLOCK(); if (ia->ia_addr.sin_family == AF_INET) LIST_REMOVE(ia, ia_hash); ia->ia_addr = *sin; LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash); IN_IFADDR_WUNLOCK(); if (vhid > 0) { if (carp_attach_p != NULL) error = (*carp_attach_p)(&ia->ia_ifa, vhid); else error = EPROTONOSUPPORT; } if (error) return (error); /* * Give the interface a chance to initialize * if this is its first address, * and to validate the address if necessary. */ if (ifp->if_ioctl != NULL && (error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia)) != 0) /* LIST_REMOVE(ia, ia_hash) is done in in_control */ return (error); /* * Be compatible with network classes, if netmask isn't supplied, * guess it based on classes. */ if (!masksupplied) { if (IN_CLASSA(i)) ia->ia_subnetmask = IN_CLASSA_NET; else if (IN_CLASSB(i)) ia->ia_subnetmask = IN_CLASSB_NET; else ia->ia_subnetmask = IN_CLASSC_NET; ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask); } ia->ia_subnet = i & ia->ia_subnetmask; in_socktrim(&ia->ia_sockmask); /* * Add route for the network. */ flags = RTF_UP; ia->ia_ifa.ifa_metric = ifp->if_metric; if (ifp->if_flags & IFF_BROADCAST) { if (ia->ia_subnetmask == IN_RFC3021_MASK) ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST; else ia->ia_broadaddr.sin_addr.s_addr = htonl(ia->ia_subnet | ~ia->ia_subnetmask); } else if (ifp->if_flags & IFF_LOOPBACK) { ia->ia_dstaddr = ia->ia_addr; flags |= RTF_HOST; } else if (ifp->if_flags & IFF_POINTOPOINT) { if (ia->ia_dstaddr.sin_family != AF_INET) return (0); flags |= RTF_HOST; } if (!vhid && (error = in_addprefix(ia, flags)) != 0) return (error); if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY) return (0); if (ifp->if_flags & IFF_POINTOPOINT && ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr) return (0); /* * add a loopback route to self */ if (V_useloopback && !vhid && !(ifp->if_flags & IFF_LOOPBACK)) { struct route ia_ro; bzero(&ia_ro, sizeof(ia_ro)); *((struct sockaddr_in *)(&ia_ro.ro_dst)) = ia->ia_addr; rtalloc_ign_fib(&ia_ro, 0, RT_DEFAULT_FIB); if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) && (ia_ro.ro_rt->rt_ifp == V_loif)) { RT_LOCK(ia_ro.ro_rt); RT_ADDREF(ia_ro.ro_rt); RTFREE_LOCKED(ia_ro.ro_rt); } else error = ifa_add_loopback_route((struct ifaddr *)ia, (struct sockaddr *)&ia->ia_addr); if (error == 0) ia->ia_flags |= IFA_RTSELF; if (ia_ro.ro_rt != NULL) RTFREE(ia_ro.ro_rt); } return (error); } #define rtinitflags(x) \ ((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \ ? RTF_HOST : 0) /* * Check if we have a route for the given prefix already or add one accordingly. */ int in_addprefix(struct in_ifaddr *target, int flags) { struct in_ifaddr *ia; struct in_addr prefix, mask, p, m; int error; if ((flags & RTF_HOST) != 0) { prefix = target->ia_dstaddr.sin_addr; mask.s_addr = 0; } else { prefix = target->ia_addr.sin_addr; mask = target->ia_sockmask.sin_addr; prefix.s_addr &= mask.s_addr; } IN_IFADDR_RLOCK(); /* Look for an existing address with the same prefix, mask, and fib */ TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (rtinitflags(ia)) { p = ia->ia_dstaddr.sin_addr; if (prefix.s_addr != p.s_addr) continue; } else { p = ia->ia_addr.sin_addr; m = ia->ia_sockmask.sin_addr; p.s_addr &= m.s_addr; if (prefix.s_addr != p.s_addr || mask.s_addr != m.s_addr) continue; } if (target->ia_ifp->if_fib != ia->ia_ifp->if_fib) continue; /* * If we got a matching prefix route inserted by other * interface address, we are done here. */ if (ia->ia_flags & IFA_ROUTE) { #ifdef RADIX_MPATH if (ia->ia_addr.sin_addr.s_addr == target->ia_addr.sin_addr.s_addr) { IN_IFADDR_RUNLOCK(); return (EEXIST); } else break; #endif if (V_nosameprefix) { IN_IFADDR_RUNLOCK(); return (EEXIST); } else { int fibnum; fibnum = rt_add_addr_allfibs ? RT_ALL_FIBS : target->ia_ifp->if_fib; rt_addrmsg(RTM_ADD, &target->ia_ifa, fibnum); IN_IFADDR_RUNLOCK(); return (0); } } } IN_IFADDR_RUNLOCK(); /* * No-one seem to have this prefix route, so we try to insert it. */ error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags); if (!error) target->ia_flags |= IFA_ROUTE; return (error); } /* * If there is no other address in the system that can serve a route to the * same prefix, remove the route. Hand over the route to the new address * otherwise. */ int in_scrubprefix(struct in_ifaddr *target, u_int flags) { struct in_ifaddr *ia; struct in_addr prefix, mask, p, m; int error = 0; struct sockaddr_in prefix0, mask0; /* * Remove the loopback route to the interface address. * The "useloopback" setting is not consulted because if the * user configures an interface address, turns off this * setting, and then tries to delete that interface address, * checking the current setting of "useloopback" would leave * that interface address loopback route untouched, which * would be wrong. Therefore the interface address loopback route * deletion is unconditional. */ if ((target->ia_addr.sin_addr.s_addr != INADDR_ANY) && !(target->ia_ifp->if_flags & IFF_LOOPBACK) && (target->ia_flags & IFA_RTSELF)) { struct route ia_ro; int freeit = 0; int fib; bzero(&ia_ro, sizeof(ia_ro)); *((struct sockaddr_in *)(&ia_ro.ro_dst)) = target->ia_addr; fib = target->ia_ifa.ifa_ifp->if_fib; rtalloc_ign_fib(&ia_ro, 0, fib); if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) && (ia_ro.ro_rt->rt_ifp == V_loif)) { RT_LOCK(ia_ro.ro_rt); if (ia_ro.ro_rt->rt_refcnt <= 1) freeit = 1; else if (flags & LLE_STATIC) { RT_REMREF(ia_ro.ro_rt); target->ia_flags &= ~IFA_RTSELF; } RTFREE_LOCKED(ia_ro.ro_rt); } if (freeit && (flags & LLE_STATIC)) { error = ifa_del_loopback_route((struct ifaddr *)target, (struct sockaddr *)&target->ia_addr); if (error == 0) target->ia_flags &= ~IFA_RTSELF; } if ((flags & LLE_STATIC) && !(target->ia_ifp->if_flags & IFF_NOARP)) /* remove arp cache */ arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr); } if (rtinitflags(target)) { prefix = target->ia_dstaddr.sin_addr; mask.s_addr = 0; } else { prefix = target->ia_addr.sin_addr; mask = target->ia_sockmask.sin_addr; prefix.s_addr &= mask.s_addr; } if ((target->ia_flags & IFA_ROUTE) == 0) { int fibnum; fibnum = rt_add_addr_allfibs ? RT_ALL_FIBS : target->ia_ifp->if_fib; rt_addrmsg(RTM_DELETE, &target->ia_ifa, fibnum); return (0); } IN_IFADDR_RLOCK(); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (rtinitflags(ia)) { p = ia->ia_dstaddr.sin_addr; if (prefix.s_addr != p.s_addr) continue; } else { p = ia->ia_addr.sin_addr; m = ia->ia_sockmask.sin_addr; p.s_addr &= m.s_addr; if (prefix.s_addr != p.s_addr || mask.s_addr != m.s_addr) continue; } if ((ia->ia_ifp->if_flags & IFF_UP) == 0) continue; /* * If we got a matching prefix address, move IFA_ROUTE and * the route itself to it. Make sure that routing daemons * get a heads-up. */ if ((ia->ia_flags & IFA_ROUTE) == 0) { ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(); error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target)); if (error == 0) target->ia_flags &= ~IFA_ROUTE; else log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n", error); error = rtinit(&ia->ia_ifa, (int)RTM_ADD, rtinitflags(ia) | RTF_UP); if (error == 0) ia->ia_flags |= IFA_ROUTE; else log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n", error); ifa_free(&ia->ia_ifa); return (error); } } IN_IFADDR_RUNLOCK(); /* * remove all L2 entries on the given prefix */ bzero(&prefix0, sizeof(prefix0)); prefix0.sin_len = sizeof(prefix0); prefix0.sin_family = AF_INET; prefix0.sin_addr.s_addr = target->ia_subnet; bzero(&mask0, sizeof(mask0)); mask0.sin_len = sizeof(mask0); mask0.sin_family = AF_INET; mask0.sin_addr.s_addr = target->ia_subnetmask; lltable_prefix_free(AF_INET, (struct sockaddr *)&prefix0, (struct sockaddr *)&mask0, flags); /* * As no-one seem to have this prefix, we can remove the route. */ error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target)); if (error == 0) target->ia_flags &= ~IFA_ROUTE; else log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error); return (error); } #undef rtinitflags /* * Return 1 if the address might be a local broadcast address. */ int in_broadcast(struct in_addr in, struct ifnet *ifp) { register struct ifaddr *ifa; u_long t; if (in.s_addr == INADDR_BROADCAST || in.s_addr == INADDR_ANY) return (1); if ((ifp->if_flags & IFF_BROADCAST) == 0) return (0); t = ntohl(in.s_addr); /* * Look through the list of addresses for a match * with a broadcast address. */ #define ia ((struct in_ifaddr *)ifa) TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_INET && (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr || /* * Check for old-style (host 0) broadcast, but * taking into account that RFC 3021 obsoletes it. */ (ia->ia_subnetmask != IN_RFC3021_MASK && t == ia->ia_subnet)) && /* * Check for an all one subnetmask. These * only exist when an interface gets a secondary * address. */ ia->ia_subnetmask != (u_long)0xffffffff) return (1); return (0); #undef ia } /* * On interface removal, clean up IPv4 data structures hung off of the ifnet. */ void in_ifdetach(struct ifnet *ifp) { in_pcbpurgeif0(&V_ripcbinfo, ifp); in_pcbpurgeif0(&V_udbinfo, ifp); in_pcbpurgeif0(&V_ulitecbinfo, ifp); in_purgemaddrs(ifp); } /* * Delete all IPv4 multicast address records, and associated link-layer * multicast address records, associated with ifp. * XXX It looks like domifdetach runs AFTER the link layer cleanup. * XXX This should not race with ifma_protospec being set during * a new allocation, if it does, we have bigger problems. */ static void in_purgemaddrs(struct ifnet *ifp) { LIST_HEAD(,in_multi) purgeinms; struct in_multi *inm, *tinm; struct ifmultiaddr *ifma; LIST_INIT(&purgeinms); IN_MULTI_LOCK(); /* * Extract list of in_multi associated with the detaching ifp * which the PF_INET layer is about to release. * We need to do this as IF_ADDR_LOCK() may be re-acquired * by code further down. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; #if 0 KASSERT(ifma->ifma_protospec != NULL, ("%s: ifma_protospec is NULL", __func__)); #endif inm = (struct in_multi *)ifma->ifma_protospec; LIST_INSERT_HEAD(&purgeinms, inm, inm_link); } IF_ADDR_RUNLOCK(ifp); LIST_FOREACH_SAFE(inm, &purgeinms, inm_link, tinm) { LIST_REMOVE(inm, inm_link); inm_release_locked(inm); } igmp_ifdetach(ifp); IN_MULTI_UNLOCK(); } struct in_llentry { struct llentry base; struct sockaddr_in l3_addr4; }; /* * Deletes an address from the address table. * This function is called by the timer functions * such as arptimer() and nd6_llinfo_timer(), and * the caller does the locking. */ static void in_lltable_free(struct lltable *llt, struct llentry *lle) { LLE_WUNLOCK(lle); LLE_LOCK_DESTROY(lle); free(lle, M_LLTABLE); } static struct llentry * in_lltable_new(const struct sockaddr *l3addr, u_int flags) { struct in_llentry *lle; lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_NOWAIT | M_ZERO); if (lle == NULL) /* NB: caller generates msg */ return NULL; /* * For IPv4 this will trigger "arpresolve" to generate * an ARP request. */ lle->base.la_expire = time_uptime; /* mark expired */ lle->l3_addr4 = *(const struct sockaddr_in *)l3addr; lle->base.lle_refcnt = 1; lle->base.lle_free = in_lltable_free; LLE_LOCK_INIT(&lle->base); callout_init(&lle->base.la_timer, 1); return (&lle->base); } #define IN_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ (((ntohl((d)->sin_addr.s_addr) ^ (a)->sin_addr.s_addr) & (m)->sin_addr.s_addr)) == 0 ) static void in_lltable_prefix_free(struct lltable *llt, const struct sockaddr *prefix, const struct sockaddr *mask, u_int flags) { const struct sockaddr_in *pfx = (const struct sockaddr_in *)prefix; const struct sockaddr_in *msk = (const struct sockaddr_in *)mask; struct llentry *lle, *next; int i; size_t pkts_dropped; IF_AFDATA_WLOCK(llt->llt_ifp); for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) { /* * (flags & LLE_STATIC) means deleting all entries * including static ARP entries. */ if (IN_ARE_MASKED_ADDR_EQUAL(satosin(L3_ADDR(lle)), pfx, msk) && ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) { LLE_WLOCK(lle); if (callout_stop(&lle->la_timer)) LLE_REMREF(lle); pkts_dropped = llentry_free(lle); ARPSTAT_ADD(dropped, pkts_dropped); } } } IF_AFDATA_WUNLOCK(llt->llt_ifp); } static int in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr) { struct rtentry *rt; KASSERT(l3addr->sa_family == AF_INET, ("sin_family %d", l3addr->sa_family)); /* XXX rtalloc1_fib should take a const param */ rt = rtalloc1_fib(__DECONST(struct sockaddr *, l3addr), 0, 0, ifp->if_fib); if (rt == NULL) return (EINVAL); /* * If the gateway for an existing host route matches the target L3 * address, which is a special route inserted by some implementation * such as MANET, and the interface is of the correct type, then * allow for ARP to proceed. */ if (rt->rt_flags & RTF_GATEWAY) { if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp || rt->rt_ifp->if_type != IFT_ETHER || (rt->rt_ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) != 0 || memcmp(rt->rt_gateway->sa_data, l3addr->sa_data, sizeof(in_addr_t)) != 0) { RTFREE_LOCKED(rt); return (EINVAL); } } /* * Make sure that at least the destination address is covered * by the route. This is for handling the case where 2 or more * interfaces have the same prefix. An incoming packet arrives * on one interface and the corresponding outgoing packet leaves * another interface. */ if (!(rt->rt_flags & RTF_HOST) && rt->rt_ifp != ifp) { const char *sa, *mask, *addr, *lim; int len; mask = (const char *)rt_mask(rt); /* * Just being extra cautious to avoid some custom * code getting into trouble. */ if (mask == NULL) { RTFREE_LOCKED(rt); return (EINVAL); } sa = (const char *)rt_key(rt); addr = (const char *)l3addr; len = ((const struct sockaddr_in *)l3addr)->sin_len; lim = addr + len; for ( ; addr < lim; sa++, mask++, addr++) { if ((*sa ^ *addr) & *mask) { #ifdef DIAGNOSTIC log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n", inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr)); #endif RTFREE_LOCKED(rt); return (EINVAL); } } } RTFREE_LOCKED(rt); return (0); } /* * Return NULL if not found or marked for deletion. * If found return lle read locked. */ static struct llentry * in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) { const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr; struct ifnet *ifp = llt->llt_ifp; struct llentry *lle; struct llentries *lleh; u_int hashkey; IF_AFDATA_LOCK_ASSERT(ifp); KASSERT(l3addr->sa_family == AF_INET, ("sin_family %d", l3addr->sa_family)); hashkey = sin->sin_addr.s_addr; lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)]; LIST_FOREACH(lle, lleh, lle_next) { struct sockaddr_in *sa2 = satosin(L3_ADDR(lle)); if (lle->la_flags & LLE_DELETED) continue; if (sa2->sin_addr.s_addr == sin->sin_addr.s_addr) break; } if (lle == NULL) { #ifdef DIAGNOSTIC if (flags & LLE_DELETE) log(LOG_INFO, "interface address is missing from cache = %p in delete\n", lle); #endif if (!(flags & LLE_CREATE)) return (NULL); IF_AFDATA_WLOCK_ASSERT(ifp); /* * A route that covers the given address must have * been installed 1st because we are doing a resolution, * verify this. */ if (!(flags & LLE_IFADDR) && in_lltable_rtcheck(ifp, flags, l3addr) != 0) goto done; lle = in_lltable_new(l3addr, flags); if (lle == NULL) { log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); goto done; } lle->la_flags = flags & ~LLE_CREATE; if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) { bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen); lle->la_flags |= (LLE_VALID | LLE_STATIC); } lle->lle_tbl = llt; lle->lle_head = lleh; lle->la_flags |= LLE_LINKED; LIST_INSERT_HEAD(lleh, lle, lle_next); } else if (flags & LLE_DELETE) { if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) { LLE_WLOCK(lle); lle->la_flags |= LLE_DELETED; EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED); #ifdef DIAGNOSTIC log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); #endif if ((lle->la_flags & (LLE_STATIC | LLE_IFADDR)) == LLE_STATIC) llentry_free(lle); else LLE_WUNLOCK(lle); } lle = (void *)-1; } if (LLE_IS_VALID(lle)) { if (flags & LLE_EXCLUSIVE) LLE_WLOCK(lle); else LLE_RLOCK(lle); } done: return (lle); } static int in_lltable_dump(struct lltable *llt, struct sysctl_req *wr) { #define SIN(lle) ((struct sockaddr_in *) L3_ADDR(lle)) struct ifnet *ifp = llt->llt_ifp; struct llentry *lle; /* XXX stack use */ struct { struct rt_msghdr rtm; struct sockaddr_in sin; struct sockaddr_dl sdl; } arpc; int error, i; LLTABLE_LOCK_ASSERT(); error = 0; for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { LIST_FOREACH(lle, &llt->lle_head[i], lle_next) { struct sockaddr_dl *sdl; /* skip deleted entries */ if ((lle->la_flags & LLE_DELETED) == LLE_DELETED) continue; /* Skip if jailed and not a valid IP of the prison. */ if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0) continue; /* * produce a msg made of: * struct rt_msghdr; * struct sockaddr_in; (IPv4) * struct sockaddr_dl; */ bzero(&arpc, sizeof(arpc)); arpc.rtm.rtm_msglen = sizeof(arpc); arpc.rtm.rtm_version = RTM_VERSION; arpc.rtm.rtm_type = RTM_GET; arpc.rtm.rtm_flags = RTF_UP; arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY; arpc.sin.sin_family = AF_INET; arpc.sin.sin_len = sizeof(arpc.sin); arpc.sin.sin_addr.s_addr = SIN(lle)->sin_addr.s_addr; /* publish */ if (lle->la_flags & LLE_PUB) arpc.rtm.rtm_flags |= RTF_ANNOUNCE; sdl = &arpc.sdl; sdl->sdl_family = AF_LINK; sdl->sdl_len = sizeof(*sdl); sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifp->if_type; if ((lle->la_flags & LLE_VALID) == LLE_VALID) { sdl->sdl_alen = ifp->if_addrlen; bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); } else { sdl->sdl_alen = 0; bzero(LLADDR(sdl), ifp->if_addrlen); } arpc.rtm.rtm_rmx.rmx_expire = lle->la_flags & LLE_STATIC ? 0 : lle->la_expire; arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA); if (lle->la_flags & LLE_STATIC) arpc.rtm.rtm_flags |= RTF_STATIC; arpc.rtm.rtm_index = ifp->if_index; error = SYSCTL_OUT(wr, &arpc, sizeof(arpc)); if (error) break; } } return error; #undef SIN } void * in_domifattach(struct ifnet *ifp) { struct in_ifinfo *ii; struct lltable *llt; ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK|M_ZERO); llt = lltable_init(ifp, AF_INET); if (llt != NULL) { llt->llt_prefix_free = in_lltable_prefix_free; llt->llt_lookup = in_lltable_lookup; llt->llt_dump = in_lltable_dump; } ii->ii_llt = llt; ii->ii_igmp = igmp_domifattach(ifp); return ii; } void in_domifdetach(struct ifnet *ifp, void *aux) { struct in_ifinfo *ii = (struct in_ifinfo *)aux; igmp_domifdetach(ifp); lltable_free(ii->ii_llt); free(ii, M_IFADDR); } Index: stable/10/sys/netinet/in_var.h =================================================================== --- stable/10/sys/netinet/in_var.h (revision 309339) +++ stable/10/sys/netinet/in_var.h (revision 309340) @@ -1,480 +1,483 @@ /*- * Copyright (c) 1985, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_var.h 8.2 (Berkeley) 1/9/95 * $FreeBSD$ */ #ifndef _NETINET_IN_VAR_H_ #define _NETINET_IN_VAR_H_ +#include #include #include #include struct igmp_ifinfo; struct in_multi; struct lltable; /* * IPv4 per-interface state. */ struct in_ifinfo { struct lltable *ii_llt; /* ARP state */ struct igmp_ifinfo *ii_igmp; /* IGMP state */ struct in_multi *ii_allhosts; /* 224.0.0.1 membership */ }; /* * Interface address, Internet version. One of these structures * is allocated for each Internet address on an interface. * The ifaddr structure contains the protocol-independent part * of the structure and is assumed to be first. */ struct in_ifaddr { struct ifaddr ia_ifa; /* protocol-independent info */ #define ia_ifp ia_ifa.ifa_ifp #define ia_flags ia_ifa.ifa_flags /* ia_subnet{,mask} in host order */ u_long ia_subnet; /* subnet address */ u_long ia_subnetmask; /* mask of subnet */ LIST_ENTRY(in_ifaddr) ia_hash; /* entry in bucket of inet addresses */ TAILQ_ENTRY(in_ifaddr) ia_link; /* list of internet addresses */ struct sockaddr_in ia_addr; /* reserve space for interface name */ struct sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */ #define ia_broadaddr ia_dstaddr struct sockaddr_in ia_sockmask; /* reserve space for general netmask */ + struct callout ia_garp_timer; /* timer for retransmitting GARPs */ + int ia_garp_count; /* count of retransmitted GARPs */ }; struct in_aliasreq { char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */ struct sockaddr_in ifra_addr; struct sockaddr_in ifra_broadaddr; #define ifra_dstaddr ifra_broadaddr struct sockaddr_in ifra_mask; int ifra_vhid; }; /* * Given a pointer to an in_ifaddr (ifaddr), * return a pointer to the addr as a sockaddr_in. */ #define IA_SIN(ia) (&(((struct in_ifaddr *)(ia))->ia_addr)) #define IA_DSTSIN(ia) (&(((struct in_ifaddr *)(ia))->ia_dstaddr)) #define IA_MASKSIN(ia) (&(((struct in_ifaddr *)(ia))->ia_sockmask)) #define IN_LNAOF(in, ifa) \ ((ntohl((in).s_addr) & ~((struct in_ifaddr *)(ifa)->ia_subnetmask)) #ifdef _KERNEL extern u_char inetctlerrmap[]; #define LLTABLE(ifp) \ ((struct in_ifinfo *)(ifp)->if_afdata[AF_INET])->ii_llt /* * Hash table for IP addresses. */ TAILQ_HEAD(in_ifaddrhead, in_ifaddr); LIST_HEAD(in_ifaddrhashhead, in_ifaddr); VNET_DECLARE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); VNET_DECLARE(struct in_ifaddrhead, in_ifaddrhead); VNET_DECLARE(u_long, in_ifaddrhmask); /* mask for hash table */ #define V_in_ifaddrhashtbl VNET(in_ifaddrhashtbl) #define V_in_ifaddrhead VNET(in_ifaddrhead) #define V_in_ifaddrhmask VNET(in_ifaddrhmask) #define INADDR_NHASH_LOG2 9 #define INADDR_NHASH (1 << INADDR_NHASH_LOG2) #define INADDR_HASHVAL(x) fnv_32_buf((&(x)), sizeof(x), FNV1_32_INIT) #define INADDR_HASH(x) \ (&V_in_ifaddrhashtbl[INADDR_HASHVAL(x) & V_in_ifaddrhmask]) extern struct rwlock in_ifaddr_lock; #define IN_IFADDR_LOCK_ASSERT() rw_assert(&in_ifaddr_lock, RA_LOCKED) #define IN_IFADDR_RLOCK() rw_rlock(&in_ifaddr_lock) #define IN_IFADDR_RLOCK_ASSERT() rw_assert(&in_ifaddr_lock, RA_RLOCKED) #define IN_IFADDR_RUNLOCK() rw_runlock(&in_ifaddr_lock) #define IN_IFADDR_WLOCK() rw_wlock(&in_ifaddr_lock) #define IN_IFADDR_WLOCK_ASSERT() rw_assert(&in_ifaddr_lock, RA_WLOCKED) #define IN_IFADDR_WUNLOCK() rw_wunlock(&in_ifaddr_lock) /* * Macro for finding the internet address structure (in_ifaddr) * corresponding to one of our IP addresses (in_addr). */ #define INADDR_TO_IFADDR(addr, ia) \ /* struct in_addr addr; */ \ /* struct in_ifaddr *ia; */ \ do { \ \ LIST_FOREACH(ia, INADDR_HASH((addr).s_addr), ia_hash) \ if (IA_SIN(ia)->sin_addr.s_addr == (addr).s_addr) \ break; \ } while (0) /* * Macro for finding the interface (ifnet structure) corresponding to one * of our IP addresses. */ #define INADDR_TO_IFP(addr, ifp) \ /* struct in_addr addr; */ \ /* struct ifnet *ifp; */ \ { \ struct in_ifaddr *ia; \ \ INADDR_TO_IFADDR(addr, ia); \ (ifp) = (ia == NULL) ? NULL : ia->ia_ifp; \ } /* * Macro for finding the internet address structure (in_ifaddr) corresponding * to a given interface (ifnet structure). */ #define IFP_TO_IA(ifp, ia) \ /* struct ifnet *ifp; */ \ /* struct in_ifaddr *ia; */ \ do { \ IN_IFADDR_RLOCK(); \ for ((ia) = TAILQ_FIRST(&V_in_ifaddrhead); \ (ia) != NULL && (ia)->ia_ifp != (ifp); \ (ia) = TAILQ_NEXT((ia), ia_link)) \ continue; \ if ((ia) != NULL) \ ifa_ref(&(ia)->ia_ifa); \ IN_IFADDR_RUNLOCK(); \ } while (0) #endif /* * IP datagram reassembly. */ #define IPREASS_NHASH_LOG2 6 #define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2) #define IPREASS_HMASK (IPREASS_NHASH - 1) #define IPREASS_HASH(x,y) \ (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK) /* * Legacy IPv4 IGMP per-link structure. */ struct router_info { struct ifnet *rti_ifp; int rti_type; /* type of router which is querier on this interface */ int rti_time; /* # of slow timeouts since last old query */ SLIST_ENTRY(router_info) rti_list; }; /* * Per-interface IGMP router version information. */ struct igmp_ifinfo { LIST_ENTRY(igmp_ifinfo) igi_link; struct ifnet *igi_ifp; /* interface this instance belongs to */ uint32_t igi_version; /* IGMPv3 Host Compatibility Mode */ uint32_t igi_v1_timer; /* IGMPv1 Querier Present timer (s) */ uint32_t igi_v2_timer; /* IGMPv2 Querier Present timer (s) */ uint32_t igi_v3_timer; /* IGMPv3 General Query (interface) timer (s)*/ uint32_t igi_flags; /* IGMP per-interface flags */ uint32_t igi_rv; /* IGMPv3 Robustness Variable */ uint32_t igi_qi; /* IGMPv3 Query Interval (s) */ uint32_t igi_qri; /* IGMPv3 Query Response Interval (s) */ uint32_t igi_uri; /* IGMPv3 Unsolicited Report Interval (s) */ SLIST_HEAD(,in_multi) igi_relinmhead; /* released groups */ struct ifqueue igi_gq; /* queue of general query responses */ }; #define IGIF_SILENT 0x00000001 /* Do not use IGMP on this ifp */ #define IGIF_LOOPBACK 0x00000002 /* Send IGMP reports to loopback */ /* * IPv4 multicast IGMP-layer source entry. */ struct ip_msource { RB_ENTRY(ip_msource) ims_link; /* RB tree links */ in_addr_t ims_haddr; /* host byte order */ struct ims_st { uint16_t ex; /* # of exclusive members */ uint16_t in; /* # of inclusive members */ } ims_st[2]; /* state at t0, t1 */ uint8_t ims_stp; /* pending query */ }; /* * IPv4 multicast PCB-layer source entry. */ struct in_msource { RB_ENTRY(ip_msource) ims_link; /* RB tree links */ in_addr_t ims_haddr; /* host byte order */ uint8_t imsl_st[2]; /* state before/at commit */ }; RB_HEAD(ip_msource_tree, ip_msource); /* define struct ip_msource_tree */ static __inline int ip_msource_cmp(const struct ip_msource *a, const struct ip_msource *b) { if (a->ims_haddr < b->ims_haddr) return (-1); if (a->ims_haddr == b->ims_haddr) return (0); return (1); } RB_PROTOTYPE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp); /* * IPv4 multicast PCB-layer group filter descriptor. */ struct in_mfilter { struct ip_msource_tree imf_sources; /* source list for (S,G) */ u_long imf_nsrc; /* # of source entries */ uint8_t imf_st[2]; /* state before/at commit */ }; /* * IPv4 group descriptor. * * For every entry on an ifnet's if_multiaddrs list which represents * an IP multicast group, there is one of these structures. * * If any source filters are present, then a node will exist in the RB-tree * to permit fast lookup by source whenever an operation takes place. * This permits pre-order traversal when we issue reports. * Source filter trees are kept separately from the socket layer to * greatly simplify locking. * * When IGMPv3 is active, inm_timer is the response to group query timer. * The state-change timer inm_sctimer is separate; whenever state changes * for the group the state change record is generated and transmitted, * and kept if retransmissions are necessary. * * FUTURE: inm_link is now only used when groups are being purged * on a detaching ifnet. It could be demoted to a SLIST_ENTRY, but * because it is at the very start of the struct, we can't do this * w/o breaking the ABI for ifmcstat. */ struct in_multi { LIST_ENTRY(in_multi) inm_link; /* to-be-released by in_ifdetach */ struct in_addr inm_addr; /* IP multicast address, convenience */ struct ifnet *inm_ifp; /* back pointer to ifnet */ struct ifmultiaddr *inm_ifma; /* back pointer to ifmultiaddr */ u_int inm_timer; /* IGMPv1/v2 group / v3 query timer */ u_int inm_state; /* state of the membership */ void *inm_rti; /* unused, legacy field */ u_int inm_refcount; /* reference count */ /* New fields for IGMPv3 follow. */ struct igmp_ifinfo *inm_igi; /* IGMP info */ SLIST_ENTRY(in_multi) inm_nrele; /* to-be-released by IGMP */ struct ip_msource_tree inm_srcs; /* tree of sources */ u_long inm_nsrc; /* # of tree entries */ struct ifqueue inm_scq; /* queue of pending * state-change packets */ struct timeval inm_lastgsrtv; /* Time of last G-S-R query */ uint16_t inm_sctimer; /* state-change timer */ uint16_t inm_scrv; /* state-change rexmit count */ /* * SSM state counters which track state at T0 (the time the last * state-change report's RV timer went to zero) and T1 * (time of pending report, i.e. now). * Used for computing IGMPv3 state-change reports. Several refcounts * are maintained here to optimize for common use-cases. */ struct inm_st { uint16_t iss_fmode; /* IGMP filter mode */ uint16_t iss_asm; /* # of ASM listeners */ uint16_t iss_ex; /* # of exclusive members */ uint16_t iss_in; /* # of inclusive members */ uint16_t iss_rec; /* # of recorded sources */ } inm_st[2]; /* state at t0, t1 */ }; /* * Helper function to derive the filter mode on a source entry * from its internal counters. Predicates are: * A source is only excluded if all listeners exclude it. * A source is only included if no listeners exclude it, * and at least one listener includes it. * May be used by ifmcstat(8). */ static __inline uint8_t ims_get_mode(const struct in_multi *inm, const struct ip_msource *ims, uint8_t t) { t = !!t; if (inm->inm_st[t].iss_ex > 0 && inm->inm_st[t].iss_ex == ims->ims_st[t].ex) return (MCAST_EXCLUDE); else if (ims->ims_st[t].in > 0 && ims->ims_st[t].ex == 0) return (MCAST_INCLUDE); return (MCAST_UNDEFINED); } #ifdef _KERNEL #ifdef SYSCTL_DECL SYSCTL_DECL(_net_inet); SYSCTL_DECL(_net_inet_ip); SYSCTL_DECL(_net_inet_raw); #endif /* * Lock macros for IPv4 layer multicast address lists. IPv4 lock goes * before link layer multicast locks in the lock order. In most cases, * consumers of IN_*_MULTI() macros should acquire the locks before * calling them; users of the in_{add,del}multi() functions should not. */ extern struct mtx in_multi_mtx; #define IN_MULTI_LOCK() mtx_lock(&in_multi_mtx) #define IN_MULTI_UNLOCK() mtx_unlock(&in_multi_mtx) #define IN_MULTI_LOCK_ASSERT() mtx_assert(&in_multi_mtx, MA_OWNED) #define IN_MULTI_UNLOCK_ASSERT() mtx_assert(&in_multi_mtx, MA_NOTOWNED) /* * Function for looking up an in_multi record for an IPv4 multicast address * on a given interface. ifp must be valid. If no record found, return NULL. * The IN_MULTI_LOCK and IF_ADDR_LOCK on ifp must be held. */ static __inline struct in_multi * inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina) { struct ifmultiaddr *ifma; struct in_multi *inm; IN_MULTI_LOCK_ASSERT(); IF_ADDR_LOCK_ASSERT(ifp); inm = NULL; TAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { if (ifma->ifma_addr->sa_family == AF_INET) { inm = (struct in_multi *)ifma->ifma_protospec; if (inm->inm_addr.s_addr == ina.s_addr) break; inm = NULL; } } return (inm); } /* * Wrapper for inm_lookup_locked(). * The IF_ADDR_LOCK will be taken on ifp and released on return. */ static __inline struct in_multi * inm_lookup(struct ifnet *ifp, const struct in_addr ina) { struct in_multi *inm; IN_MULTI_LOCK_ASSERT(); IF_ADDR_RLOCK(ifp); inm = inm_lookup_locked(ifp, ina); IF_ADDR_RUNLOCK(ifp); return (inm); } /* Acquire an in_multi record. */ static __inline void inm_acquire_locked(struct in_multi *inm) { IN_MULTI_LOCK_ASSERT(); ++inm->inm_refcount; } /* * Return values for imo_multi_filter(). */ #define MCAST_PASS 0 /* Pass */ #define MCAST_NOTGMEMBER 1 /* This host not a member of group */ #define MCAST_NOTSMEMBER 2 /* This host excluded source */ #define MCAST_MUTED 3 /* [deprecated] */ struct rtentry; struct route; struct ip_moptions; struct radix_node_head; int imo_multi_filter(const struct ip_moptions *, const struct ifnet *, const struct sockaddr *, const struct sockaddr *); void inm_commit(struct in_multi *); void inm_clear_recorded(struct in_multi *); void inm_print(const struct in_multi *); int inm_record_source(struct in_multi *inm, const in_addr_t); void inm_release(struct in_multi *); void inm_release_locked(struct in_multi *); struct in_multi * in_addmulti(struct in_addr *, struct ifnet *); void in_delmulti(struct in_multi *); int in_joingroup(struct ifnet *, const struct in_addr *, /*const*/ struct in_mfilter *, struct in_multi **); int in_joingroup_locked(struct ifnet *, const struct in_addr *, /*const*/ struct in_mfilter *, struct in_multi **); int in_leavegroup(struct in_multi *, /*const*/ struct in_mfilter *); int in_leavegroup_locked(struct in_multi *, /*const*/ struct in_mfilter *); int in_control(struct socket *, u_long, caddr_t, struct ifnet *, struct thread *); void in_rtqdrain(void); int in_addprefix(struct in_ifaddr *, int); int in_scrubprefix(struct in_ifaddr *, u_int); void ip_input(struct mbuf *); int in_ifadown(struct ifaddr *ifa, int); void in_ifscrub(struct ifnet *, struct in_ifaddr *, u_int); struct mbuf *ip_fastforward(struct mbuf *); void *in_domifattach(struct ifnet *); void in_domifdetach(struct ifnet *, void *); /* XXX */ void in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum); void in_rtalloc(struct route *ro, u_int fibnum); struct rtentry *in_rtalloc1(struct sockaddr *, int, u_long, u_int); void in_rtredirect(struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *, u_int); int in_rtrequest(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int); void in_setmatchfunc(struct radix_node_head *, int); #if 0 int in_rt_getifa(struct rt_addrinfo *, u_int fibnum); int in_rtioctl(u_long, caddr_t, u_int); int in_rtrequest1(int, struct rt_addrinfo *, struct rtentry **, u_int); #endif #endif /* _KERNEL */ /* INET6 stuff */ #include #endif /* _NETINET_IN_VAR_H_ */ Index: stable/10/usr.sbin/arp/arp.4 =================================================================== --- stable/10/usr.sbin/arp/arp.4 (revision 309339) +++ stable/10/usr.sbin/arp/arp.4 (revision 309340) @@ -1,239 +1,255 @@ .\" Copyright (c) 1985, 1986, 1988, 1994 .\" The Regents of the University of California. All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" 4. Neither the name of the University nor the names of its contributors .\" may be used to endorse or promote products derived from this software .\" without specific prior written permission. .\" .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .\" @(#)arp4.4 6.5 (Berkeley) 4/18/94 .\" $FreeBSD$ .\" -.Dd May 11, 2013 +.Dd October 7, 2016 .Dt ARP 4 .Os .Sh NAME .Nm arp .Nd Address Resolution Protocol .Sh SYNOPSIS .Cd "device ether" .Sh DESCRIPTION The Address Resolution Protocol (ARP) is used to dynamically map between Protocol Addresses (such as IP addresses) and Local Network Addresses (such as Ethernet addresses). This implementation maps IP addresses to Ethernet, ARCnet, or Token Ring addresses. It is used by all the Ethernet interface drivers. .Pp ARP caches Internet-Ethernet address mappings. When an interface requests a mapping for an address not in the cache, ARP queues the message which requires the mapping and broadcasts a message on the associated network requesting the address mapping. If a response is provided, the new mapping is cached and any pending message is transmitted. ARP will queue at most one packet while waiting for a response to a mapping request; only the most recently ``transmitted'' packet is kept. If the target host does not respond after several requests, the host is considered to be down allowing an error to be returned to transmission attempts. Further demand for this mapping causes ARP request retransmissions, that are ratelimited to one packet per second. The error is .Er EHOSTDOWN for a non-responding destination host, and .Er EHOSTUNREACH for a non-responding router. .Pp The ARP cache is stored in the system routing table as dynamically-created host routes. The route to a directly-attached Ethernet network is installed as a .Dq cloning route (one with the .Li RTF_CLONING flag set), causing routes to individual hosts on that network to be created on demand. These routes time out periodically (normally 20 minutes after validated; entries are not validated when not in use). .Pp ARP entries may be added, deleted or changed with the .Xr arp 8 utility. Manually-added entries may be temporary or permanent, and may be .Dq published , in which case the system will respond to ARP requests for that host as if it were the target of the request. .Pp In the past, ARP was used to negotiate the use of a trailer encapsulation. This is no longer supported. .Pp ARP watches passively for hosts impersonating the local host (i.e., a host which responds to an ARP mapping request for the local host's address). .Pp Proxy ARP is a feature whereby the local host will respond to requests for addresses other than itself, with its own address. Normally, proxy ARP in .Fx is set up on a host-by-host basis using the .Xr arp 8 utility, by adding an entry for each host inside a given subnet for which proxying of ARP requests is desired. However, the .Dq "proxy all" feature causes the local host to act as a proxy for .Em all hosts reachable through some other network interface, different from the one the request came in from. It may be enabled by setting the .Xr sysctl 8 MIB variable .Va net.link.ether.inet.proxyall to 1. .Sh MIB Variables The ARP protocol implements a number of configurable variables in .Va net.link.ether.inet branch of the .Xr sysctl 3 MIB. .Bl -tag -width "log_arp_permanent_modify" .It Va allow_multicast -Should the kernel install ARP entries with multicast bit set in -the hardware address. -Installing such entries is RFC 1812 violation, but some prorietary -load balancing techniques require routers on network to do so. +Install ARP entries with the multicast bit set in the hardware address. +Installing such entries is an RFC 1812 violation, but some proprietary load +balancing techniques require routers to do so. Turned off by default. +.It Va garp_rexmit_count +Retransmit gratuitous ARP (GARP) packets when an IPv4 address is added to an +interface. +A GARP is always transmitted when an IPv4 address is added to an interface. +A non-zero value causes the GARP packet to be retransmitted the stated number +of times. +The interval between retransmissions is doubled each time, so the +retransmission intervals are: {1, 2, 4, 8, 16, ...} (seconds). +The default value of zero means only the initial GARP is sent; no +additional GARP packets are retransmitted. +The maximum value is sixteen. +.Pp +The default behavior of a single GARP packet is usually sufficient. +However, a single GARP might be dropped or lost in some circumstances. +This is particularly harmful when a shared address is passed between cluster +nodes. +Neighbors on the network link might then work with a stale ARP cache and send +packets destined for that address to the node that previously owned the +address, which might not respond. .It Va log_arp_movements -Should the kernel log movements of IP addresses from one hardware -address to an other. +Log movements of IP addresses from one hardware address to another. See .Sx DIAGNOSTICS below. Turned on by default. .It Va log_arp_permanent_modify -Should the kernel log attempts of remote host on network to modify a -permanent ARP entry. +Log attempts by a remote host to modify a permanent ARP entry. See .Sx DIAGNOSTICS below. Turned on by default. .It Va log_arp_wrong_iface -Should the kernel log attempts to insert an ARP entry on an interface -when the IP network the address belongs to is connected to an other -interface. +Log attempts to insert an ARP entry on an interface when the IP network to +which the address belongs is connected to another interface. See .Sx DIAGNOSTICS below. Turned on by default. .It Va max_log_per_second -Limit number of remotely triggered logging events to a configured value -per second. +Limit the number of remotely triggered logging events to a configured value per +second. Default is 1 log message per second. .It Va max_age How long an ARP entry is held in the cache until it needs to be refreshed. Default is 1200 seconds. .It Va maxhold -How many packets hold in the per-entry output queue while the entry +How many packets to hold in the per-entry output queue while the entry is being resolved. Default is one packet. .It Va maxtries -Number of retransmits before host is considered down and error is returned. +Number of retransmits before a host is considered down and an error is +returned. Default is 5 tries. .It Va proxyall -Enables ARP proxying for all hosts on net. +Enables ARP proxying. Turned off by default. .It Va useloopback If an ARP entry is added for local address, force the traffic to go through the loopback interface. Turned on by default. .It Va wait Lifetime of an incomplete ARP entry. Default is 20 seconds. .El .Sh DIAGNOSTICS .Bl -diag .It "arp: %x:%x:%x:%x:%x:%x is using my IP address %d.%d.%d.%d on %s!" ARP has discovered another host on the local network which responds to mapping requests for its own Internet address with a different Ethernet address, generally indicating that two hosts are attempting to use the same Internet address. .It "arp: link address is broadcast for IP address %d.%d.%d.%d!" ARP requested information for a host, and received an answer indicating that the host's ethernet address is the ethernet broadcast address. This indicates a misconfigured or broken device. .It "arp: %d.%d.%d.%d moved from %x:%x:%x:%x:%x:%x to %x:%x:%x:%x:%x:%x on %s" ARP had a cached value for the ethernet address of the referenced host, but received a reply indicating that the host is at a new address. This can happen normally when host hardware addresses change, or when a mobile node arrives or leaves the local subnet. It can also indicate a problem with proxy ARP. This message can only be issued if the sysctl .Va net.link.ether.inet.log_arp_movements is set to 1, which is the system's default behaviour. .It "arpresolve: can't allocate llinfo for %d.%d.%d.%d" The route for the referenced host points to a device upon which ARP is required, but ARP was unable to allocate a routing table entry in which to store the host's MAC address. This usually points to a misconfigured routing table. It can also occur if the kernel cannot allocate memory. .It "arp: %d.%d.%d.%d is on if0 but got reply from %x:%x:%x:%x:%x:%x on if1" Physical connections exist to the same logical IP network on both if0 and if1. It can also occur if an entry already exists in the ARP cache for the IP address above, and the cable has been disconnected from if0, then reconnected to if1. This message can only be issued if the sysctl .Va net.link.ether.inet.log_arp_wrong_iface is set to 1, which is the system's default behaviour. .It "arp: %x:%x:%x:%x:%x:%x attempts to modify permanent entry for %d.%d.%d.%d on %s" ARP has received an ARP reply that attempts to overwrite a permanent entry in the local ARP table. This error will only be logged if the sysctl .Va net.link.ether.inet.log_arp_permanent_modify is set to 1, which is the system's default behaviour. .It "arp: %x:%x:%x:%x:%x:%x is multicast" Kernel refused to install an entry with multicast hardware address. If you really want such addresses being installed, set the sysctl .Va net.link.ether.inet.allow_multicast to a positive value. .El .Sh SEE ALSO .Xr inet 4 , .Xr route 4 , .Xr arp 8 , .Xr ifconfig 8 , .Xr route 8 , .Xr sysctl 8 .Rs .%A Plummer, D. .%B "An Ethernet Address Resolution Protocol" .%T RFC826 .Re .Rs .%A Leffler, S.J. .%A Karels, M.J. .%B "Trailer Encapsulations" .%T RFC893 .Re Index: stable/10 =================================================================== --- stable/10 (revision 309339) +++ stable/10 (revision 309340) Property changes on: stable/10 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r306577,306652,306830