Index: stable/9/sys/netinet/if_ether.c =================================================================== --- stable/9/sys/netinet/if_ether.c (revision 260509) +++ stable/9/sys/netinet/if_ether.c (revision 260510) @@ -1,929 +1,929 @@ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if_ether.c 8.1 (Berkeley) 6/10/93 */ /* * Ethernet address resolution protocol. * TODO: * add "inuse/lock" bit (or ref. count) along with valid bit */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) #include #endif #include #include #include #define SIN(s) ((struct sockaddr_in *)s) #define SDL(s) ((struct sockaddr_dl *)s) SYSCTL_DECL(_net_link_ether); static SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, ""); static SYSCTL_NODE(_net_link_ether, PF_ARP, arp, CTLFLAG_RW, 0, ""); /* timer values */ static VNET_DEFINE(int, arpt_keep) = (20*60); /* once resolved, good for 20 * minutes */ static VNET_DEFINE(int, arp_maxtries) = 5; VNET_DEFINE(int, useloopback) = 1; /* use loopback interface for * local traffic */ static VNET_DEFINE(int, arp_proxyall) = 0; static VNET_DEFINE(int, arpt_down) = 20; /* keep incomplete entries for * 20 seconds */ VNET_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */ static VNET_DEFINE(int, arp_maxhold) = 1; #define V_arpt_keep VNET(arpt_keep) #define V_arpt_down VNET(arpt_down) #define V_arp_maxtries VNET(arp_maxtries) #define V_arp_proxyall VNET(arp_proxyall) #define V_arpstat VNET(arpstat) #define V_arp_maxhold VNET(arp_maxhold) SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_RW, &VNET_NAME(arpt_keep), 0, "ARP entry lifetime in seconds"); SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_RW, &VNET_NAME(arp_maxtries), 0, "ARP resolution attempts before returning error"); SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, useloopback, CTLFLAG_RW, &VNET_NAME(useloopback), 0, "Use the loopback interface for local traffic"); SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW, &VNET_NAME(arp_proxyall), 0, "Enable proxy ARP for all suitable requests"); SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, wait, CTLFLAG_RW, &VNET_NAME(arpt_down), 0, "Incomplete ARP entry lifetime in seconds"); SYSCTL_VNET_STRUCT(_net_link_ether_arp, OID_AUTO, stats, CTLFLAG_RW, &VNET_NAME(arpstat), arpstat, "ARP statistics (struct arpstat, net/if_arp.h)"); SYSCTL_VNET_INT(_net_link_ether_inet, OID_AUTO, maxhold, CTLFLAG_RW, &VNET_NAME(arp_maxhold), 0, "Number of packets to hold per ARP entry"); static void arp_init(void); void arprequest(struct ifnet *, struct in_addr *, struct in_addr *, u_char *); static void arpintr(struct mbuf *); static void arptimer(void *); #ifdef INET static void in_arpinput(struct mbuf *); #endif static const struct netisr_handler arp_nh = { .nh_name = "arp", .nh_handler = arpintr, .nh_proto = NETISR_ARP, .nh_policy = NETISR_POLICY_SOURCE, }; #ifdef AF_INET void arp_ifscrub(struct ifnet *ifp, uint32_t addr); /* * called by in_ifscrub to remove entry from the table when * the interface goes away */ void arp_ifscrub(struct ifnet *ifp, uint32_t addr) { struct sockaddr_in addr4; bzero((void *)&addr4, sizeof(addr4)); addr4.sin_len = sizeof(addr4); addr4.sin_family = AF_INET; addr4.sin_addr.s_addr = addr; - IF_AFDATA_LOCK(ifp); + IF_AFDATA_RLOCK(ifp); lla_lookup(LLTABLE(ifp), (LLE_DELETE | LLE_IFADDR), (struct sockaddr *)&addr4); - IF_AFDATA_UNLOCK(ifp); + IF_AFDATA_RUNLOCK(ifp); } #endif /* * Timeout routine. Age arp_tab entries periodically. */ static void arptimer(void *arg) { struct llentry *lle = (struct llentry *)arg; struct ifnet *ifp; if (lle->la_flags & LLE_STATIC) { LLE_WUNLOCK(lle); return; } ifp = lle->lle_tbl->llt_ifp; CURVNET_SET(ifp->if_vnet); if ((lle->la_flags & LLE_DELETED) == 0) { int evt; if (lle->la_flags & LLE_VALID) evt = LLENTRY_EXPIRED; else evt = LLENTRY_TIMEDOUT; EVENTHANDLER_INVOKE(lle_event, lle, evt); } callout_stop(&lle->la_timer); /* XXX: LOR avoidance. We still have ref on lle. */ LLE_WUNLOCK(lle); IF_AFDATA_LOCK(ifp); LLE_WLOCK(lle); /* Guard against race with other llentry_free(). */ if (lle->la_flags & LLE_LINKED) { size_t pkts_dropped; LLE_REMREF(lle); pkts_dropped = llentry_free(lle); ARPSTAT_ADD(dropped, pkts_dropped); } else LLE_FREE_LOCKED(lle); IF_AFDATA_UNLOCK(ifp); ARPSTAT_INC(timeouts); CURVNET_RESTORE(); } /* * Broadcast an ARP request. Caller specifies: * - arp header source ip address * - arp header target ip address * - arp header source ethernet address */ void arprequest(struct ifnet *ifp, struct in_addr *sip, struct in_addr *tip, u_char *enaddr) { struct mbuf *m; struct arphdr *ah; struct sockaddr sa; if (sip == NULL) { /* XXX don't believe this can happen (or explain why) */ /* * The caller did not supply a source address, try to find * a compatible one among those assigned to this interface. */ struct ifaddr *ifa; TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (!ifa->ifa_addr || ifa->ifa_addr->sa_family != AF_INET) continue; sip = &SIN(ifa->ifa_addr)->sin_addr; if (0 == ((sip->s_addr ^ tip->s_addr) & SIN(ifa->ifa_netmask)->sin_addr.s_addr) ) break; /* found it. */ } if (sip == NULL) { printf("%s: cannot find matching address\n", __func__); return; } } if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL) return; m->m_len = sizeof(*ah) + 2*sizeof(struct in_addr) + 2*ifp->if_data.ifi_addrlen; m->m_pkthdr.len = m->m_len; MH_ALIGN(m, m->m_len); ah = mtod(m, struct arphdr *); bzero((caddr_t)ah, m->m_len); #ifdef MAC mac_netinet_arp_send(ifp, m); #endif ah->ar_pro = htons(ETHERTYPE_IP); ah->ar_hln = ifp->if_addrlen; /* hardware address length */ ah->ar_pln = sizeof(struct in_addr); /* protocol address length */ ah->ar_op = htons(ARPOP_REQUEST); bcopy((caddr_t)enaddr, (caddr_t)ar_sha(ah), ah->ar_hln); bcopy((caddr_t)sip, (caddr_t)ar_spa(ah), ah->ar_pln); bcopy((caddr_t)tip, (caddr_t)ar_tpa(ah), ah->ar_pln); sa.sa_family = AF_ARP; sa.sa_len = 2; m->m_flags |= M_BCAST; (*ifp->if_output)(ifp, m, &sa, NULL); ARPSTAT_INC(txrequests); } /* * Resolve an IP address into an ethernet address. * On input: * ifp is the interface we use * rt0 is the route to the final destination (possibly useless) * m is the mbuf. May be NULL if we don't have a packet. * dst is the next hop, * desten is where we want the address. * * On success, desten is filled in and the function returns 0; * If the packet must be held pending resolution, we return EWOULDBLOCK * On other errors, we return the corresponding error code. * Note that m_freem() handles NULL. */ int arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m, struct sockaddr *dst, u_char *desten, struct llentry **lle) { struct llentry *la = 0; u_int flags = 0; struct mbuf *curr = NULL; struct mbuf *next = NULL; int error, renew; *lle = NULL; if (m != NULL) { if (m->m_flags & M_BCAST) { /* broadcast */ (void)memcpy(desten, ifp->if_broadcastaddr, ifp->if_addrlen); return (0); } if (m->m_flags & M_MCAST && ifp->if_type != IFT_ARCNET) { /* multicast */ ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten); return (0); } } retry: IF_AFDATA_RLOCK(ifp); la = lla_lookup(LLTABLE(ifp), flags, dst); IF_AFDATA_RUNLOCK(ifp); if ((la == NULL) && ((flags & LLE_EXCLUSIVE) == 0) && ((ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0)) { flags |= (LLE_CREATE | LLE_EXCLUSIVE); IF_AFDATA_WLOCK(ifp); la = lla_lookup(LLTABLE(ifp), flags, dst); IF_AFDATA_WUNLOCK(ifp); } if (la == NULL) { if (flags & LLE_CREATE) log(LOG_DEBUG, "arpresolve: can't allocate llinfo for %s\n", inet_ntoa(SIN(dst)->sin_addr)); m_freem(m); return (EINVAL); } if ((la->la_flags & LLE_VALID) && ((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) { bcopy(&la->ll_addr, desten, ifp->if_addrlen); /* * If entry has an expiry time and it is approaching, * see if we need to send an ARP request within this * arpt_down interval. */ if (!(la->la_flags & LLE_STATIC) && time_uptime + la->la_preempt > la->la_expire) { arprequest(ifp, NULL, &SIN(dst)->sin_addr, IF_LLADDR(ifp)); la->la_preempt--; } *lle = la; error = 0; goto done; } if (la->la_flags & LLE_STATIC) { /* should not happen! */ log(LOG_DEBUG, "arpresolve: ouch, empty static llinfo for %s\n", inet_ntoa(SIN(dst)->sin_addr)); m_freem(m); error = EINVAL; goto done; } renew = (la->la_asked == 0 || la->la_expire != time_uptime); if ((renew || m != NULL) && (flags & LLE_EXCLUSIVE) == 0) { flags |= LLE_EXCLUSIVE; LLE_RUNLOCK(la); goto retry; } /* * There is an arptab entry, but no ethernet address * response yet. Add the mbuf to the list, dropping * the oldest packet if we have exceeded the system * setting. */ if (m != NULL) { if (la->la_numheld >= V_arp_maxhold) { if (la->la_hold != NULL) { next = la->la_hold->m_nextpkt; m_freem(la->la_hold); la->la_hold = next; la->la_numheld--; ARPSTAT_INC(dropped); } } if (la->la_hold != NULL) { curr = la->la_hold; while (curr->m_nextpkt != NULL) curr = curr->m_nextpkt; curr->m_nextpkt = m; } else la->la_hold = m; la->la_numheld++; if (renew == 0 && (flags & LLE_EXCLUSIVE)) { flags &= ~LLE_EXCLUSIVE; LLE_DOWNGRADE(la); } } /* * Return EWOULDBLOCK if we have tried less than arp_maxtries. It * will be masked by ether_output(). Return EHOSTDOWN/EHOSTUNREACH * if we have already sent arp_maxtries ARP requests. Retransmit the * ARP request, but not faster than one request per second. */ if (la->la_asked < V_arp_maxtries) error = EWOULDBLOCK; /* First request. */ else error = rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) ? EHOSTUNREACH : EHOSTDOWN; if (renew) { int canceled; LLE_ADDREF(la); la->la_expire = time_uptime; canceled = callout_reset(&la->la_timer, hz * V_arpt_down, arptimer, la); if (canceled) LLE_REMREF(la); la->la_asked++; LLE_WUNLOCK(la); arprequest(ifp, NULL, &SIN(dst)->sin_addr, IF_LLADDR(ifp)); return (error); } done: if (flags & LLE_EXCLUSIVE) LLE_WUNLOCK(la); else LLE_RUNLOCK(la); return (error); } /* * Common length and type checks are done here, * then the protocol-specific routine is called. */ static void arpintr(struct mbuf *m) { struct arphdr *ar; if (m->m_len < sizeof(struct arphdr) && ((m = m_pullup(m, sizeof(struct arphdr))) == NULL)) { log(LOG_NOTICE, "arp: runt packet -- m_pullup failed\n"); return; } ar = mtod(m, struct arphdr *); if (ntohs(ar->ar_hrd) != ARPHRD_ETHER && ntohs(ar->ar_hrd) != ARPHRD_IEEE802 && ntohs(ar->ar_hrd) != ARPHRD_ARCNET && ntohs(ar->ar_hrd) != ARPHRD_IEEE1394 && ntohs(ar->ar_hrd) != ARPHRD_INFINIBAND) { log(LOG_NOTICE, "arp: unknown hardware address format (0x%2D)" " (from %*D to %*D)\n", (unsigned char *)&ar->ar_hrd, "", ETHER_ADDR_LEN, (u_char *)ar_sha(ar), ":", ETHER_ADDR_LEN, (u_char *)ar_tha(ar), ":"); m_freem(m); return; } if (m->m_len < arphdr_len(ar)) { if ((m = m_pullup(m, arphdr_len(ar))) == NULL) { log(LOG_NOTICE, "arp: runt packet\n"); m_freem(m); return; } ar = mtod(m, struct arphdr *); } ARPSTAT_INC(received); switch (ntohs(ar->ar_pro)) { #ifdef INET case ETHERTYPE_IP: in_arpinput(m); return; #endif } m_freem(m); } #ifdef INET /* * ARP for Internet protocols on 10 Mb/s Ethernet. * Algorithm is that given in RFC 826. * In addition, a sanity check is performed on the sender * protocol address, to catch impersonators. * We no longer handle negotiations for use of trailer protocol: * Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent * along with IP replies if we wanted trailers sent to us, * and also sent them in response to IP replies. * This allowed either end to announce the desire to receive * trailer packets. * We no longer reply to requests for ETHERTYPE_TRAIL protocol either, * but formerly didn't normally send requests. */ static int log_arp_wrong_iface = 1; static int log_arp_movements = 1; static int log_arp_permanent_modify = 1; static int allow_multicast = 0; SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_wrong_iface, CTLFLAG_RW, &log_arp_wrong_iface, 0, "log arp packets arriving on the wrong interface"); SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_movements, CTLFLAG_RW, &log_arp_movements, 0, "log arp replies from MACs different than the one in the cache"); SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_permanent_modify, CTLFLAG_RW, &log_arp_permanent_modify, 0, "log arp replies from MACs different than the one in the permanent arp entry"); SYSCTL_INT(_net_link_ether_inet, OID_AUTO, allow_multicast, CTLFLAG_RW, &allow_multicast, 0, "accept multicast addresses"); static void in_arpinput(struct mbuf *m) { struct arphdr *ah; struct ifnet *ifp = m->m_pkthdr.rcvif; struct llentry *la = NULL; struct rtentry *rt; struct ifaddr *ifa; struct in_ifaddr *ia; struct sockaddr sa; struct in_addr isaddr, itaddr, myaddr; u_int8_t *enaddr = NULL; int op, flags; int req_len; int bridged = 0, is_bridge = 0; int carp_match = 0; struct sockaddr_in sin; sin.sin_len = sizeof(struct sockaddr_in); sin.sin_family = AF_INET; sin.sin_addr.s_addr = 0; if (ifp->if_bridge) bridged = 1; if (ifp->if_type == IFT_BRIDGE) is_bridge = 1; req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr)); if (m->m_len < req_len && (m = m_pullup(m, req_len)) == NULL) { log(LOG_NOTICE, "in_arp: runt packet -- m_pullup failed\n"); return; } ah = mtod(m, struct arphdr *); /* * ARP is only for IPv4 so we can reject packets with * a protocol length not equal to an IPv4 address. */ if (ah->ar_pln != sizeof(struct in_addr)) { log(LOG_NOTICE, "in_arp: requested protocol length != %zu\n", sizeof(struct in_addr)); goto drop; } if (allow_multicast == 0 && ETHER_IS_MULTICAST(ar_sha(ah))) { log(LOG_NOTICE, "arp: %*D is multicast\n", ifp->if_addrlen, (u_char *)ar_sha(ah), ":"); goto drop; } op = ntohs(ah->ar_op); (void)memcpy(&isaddr, ar_spa(ah), sizeof (isaddr)); (void)memcpy(&itaddr, ar_tpa(ah), sizeof (itaddr)); if (op == ARPOP_REPLY) ARPSTAT_INC(rxreplies); /* * For a bridge, we want to check the address irrespective * of the receive interface. (This will change slightly * when we have clusters of interfaces). * If the interface does not match, but the recieving interface * is part of carp, we call carp_iamatch to see if this is a * request for the virtual host ip. * XXX: This is really ugly! */ IN_IFADDR_RLOCK(); LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) { if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) || ia->ia_ifp == ifp) && itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) { ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(); goto match; } if (ifp->if_carp != NULL && (*carp_iamatch_p)(ifp, ia, &isaddr, &enaddr) && itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) { carp_match = 1; ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(); goto match; } } LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash) if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) || ia->ia_ifp == ifp) && isaddr.s_addr == ia->ia_addr.sin_addr.s_addr) { ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(); goto match; } #define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia) \ (ia->ia_ifp->if_bridge == ifp->if_softc && \ !bcmp(IF_LLADDR(ia->ia_ifp), IF_LLADDR(ifp), ifp->if_addrlen) && \ addr == ia->ia_addr.sin_addr.s_addr) /* * Check the case when bridge shares its MAC address with * some of its children, so packets are claimed by bridge * itself (bridge_input() does it first), but they are really * meant to be destined to the bridge member. */ if (is_bridge) { LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) { if (BDG_MEMBER_MATCHES_ARP(itaddr.s_addr, ifp, ia)) { ifa_ref(&ia->ia_ifa); ifp = ia->ia_ifp; IN_IFADDR_RUNLOCK(); goto match; } } } #undef BDG_MEMBER_MATCHES_ARP IN_IFADDR_RUNLOCK(); /* * No match, use the first inet address on the receive interface * as a dummy address for the rest of the function. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_INET) { ia = ifatoia(ifa); ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto match; } IF_ADDR_RUNLOCK(ifp); /* * If bridging, fall back to using any inet address. */ IN_IFADDR_RLOCK(); if (!bridged || (ia = TAILQ_FIRST(&V_in_ifaddrhead)) == NULL) { IN_IFADDR_RUNLOCK(); goto drop; } ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(); match: if (!enaddr) enaddr = (u_int8_t *)IF_LLADDR(ifp); myaddr = ia->ia_addr.sin_addr; ifa_free(&ia->ia_ifa); if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen)) goto drop; /* it's from me, ignore it. */ if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) { log(LOG_NOTICE, "arp: link address is broadcast for IP address %s!\n", inet_ntoa(isaddr)); goto drop; } /* * Warn if another host is using the same IP address, but only if the * IP address isn't 0.0.0.0, which is used for DHCP only, in which * case we suppress the warning to avoid false positive complaints of * potential misconfiguration. */ if (!bridged && isaddr.s_addr == myaddr.s_addr && myaddr.s_addr != 0) { log(LOG_ERR, "arp: %*D is using my IP address %s on %s!\n", ifp->if_addrlen, (u_char *)ar_sha(ah), ":", inet_ntoa(isaddr), ifp->if_xname); itaddr = myaddr; ARPSTAT_INC(dupips); goto reply; } if (ifp->if_flags & IFF_STATICARP) goto reply; bzero(&sin, sizeof(sin)); sin.sin_len = sizeof(struct sockaddr_in); sin.sin_family = AF_INET; sin.sin_addr = isaddr; flags = (itaddr.s_addr == myaddr.s_addr) ? LLE_CREATE : 0; flags |= LLE_EXCLUSIVE; IF_AFDATA_LOCK(ifp); la = lla_lookup(LLTABLE(ifp), flags, (struct sockaddr *)&sin); IF_AFDATA_UNLOCK(ifp); if (la != NULL) { /* the following is not an error when doing bridging */ if (!bridged && la->lle_tbl->llt_ifp != ifp && !carp_match) { if (log_arp_wrong_iface) log(LOG_WARNING, "arp: %s is on %s " "but got reply from %*D on %s\n", inet_ntoa(isaddr), la->lle_tbl->llt_ifp->if_xname, ifp->if_addrlen, (u_char *)ar_sha(ah), ":", ifp->if_xname); LLE_WUNLOCK(la); goto reply; } if ((la->la_flags & LLE_VALID) && bcmp(ar_sha(ah), &la->ll_addr, ifp->if_addrlen)) { if (la->la_flags & LLE_STATIC) { LLE_WUNLOCK(la); if (log_arp_permanent_modify) log(LOG_ERR, "arp: %*D attempts to modify " "permanent entry for %s on %s\n", ifp->if_addrlen, (u_char *)ar_sha(ah), ":", inet_ntoa(isaddr), ifp->if_xname); goto reply; } if (log_arp_movements) { log(LOG_INFO, "arp: %s moved from %*D " "to %*D on %s\n", inet_ntoa(isaddr), ifp->if_addrlen, (u_char *)&la->ll_addr, ":", ifp->if_addrlen, (u_char *)ar_sha(ah), ":", ifp->if_xname); } } if (ifp->if_addrlen != ah->ar_hln) { LLE_WUNLOCK(la); log(LOG_WARNING, "arp from %*D: addr len: new %d, " "i/f %d (ignored)\n", ifp->if_addrlen, (u_char *) ar_sha(ah), ":", ah->ar_hln, ifp->if_addrlen); goto drop; } (void)memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen); la->la_flags |= LLE_VALID; EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED); if (!(la->la_flags & LLE_STATIC)) { int canceled; LLE_ADDREF(la); la->la_expire = time_uptime + V_arpt_keep; canceled = callout_reset(&la->la_timer, hz * V_arpt_keep, arptimer, la); if (canceled) LLE_REMREF(la); } la->la_asked = 0; la->la_preempt = V_arp_maxtries; /* * The packets are all freed within the call to the output * routine. * * NB: The lock MUST be released before the call to the * output routine. */ if (la->la_hold != NULL) { struct mbuf *m_hold, *m_hold_next; m_hold = la->la_hold; la->la_hold = NULL; la->la_numheld = 0; memcpy(&sa, L3_ADDR(la), sizeof(sa)); LLE_WUNLOCK(la); for (; m_hold != NULL; m_hold = m_hold_next) { m_hold_next = m_hold->m_nextpkt; m_hold->m_nextpkt = NULL; (*ifp->if_output)(ifp, m_hold, &sa, NULL); } } else LLE_WUNLOCK(la); } reply: if (op != ARPOP_REQUEST) goto drop; ARPSTAT_INC(rxrequests); if (itaddr.s_addr == myaddr.s_addr) { /* Shortcut.. the receiving interface is the target. */ (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); (void)memcpy(ar_sha(ah), enaddr, ah->ar_hln); } else { struct llentry *lle = NULL; sin.sin_addr = itaddr; - IF_AFDATA_LOCK(ifp); + IF_AFDATA_RLOCK(ifp); lle = lla_lookup(LLTABLE(ifp), 0, (struct sockaddr *)&sin); - IF_AFDATA_UNLOCK(ifp); + IF_AFDATA_RUNLOCK(ifp); if ((lle != NULL) && (lle->la_flags & LLE_PUB)) { (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); (void)memcpy(ar_sha(ah), &lle->ll_addr, ah->ar_hln); LLE_RUNLOCK(lle); } else { if (lle != NULL) LLE_RUNLOCK(lle); if (!V_arp_proxyall) goto drop; sin.sin_addr = itaddr; /* XXX MRT use table 0 for arp reply */ rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0); if (!rt) goto drop; /* * Don't send proxies for nodes on the same interface * as this one came out of, or we'll get into a fight * over who claims what Ether address. */ if (!rt->rt_ifp || rt->rt_ifp == ifp) { RTFREE_LOCKED(rt); goto drop; } RTFREE_LOCKED(rt); (void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln); (void)memcpy(ar_sha(ah), enaddr, ah->ar_hln); /* * Also check that the node which sent the ARP packet * is on the interface we expect it to be on. This * avoids ARP chaos if an interface is connected to the * wrong network. */ sin.sin_addr = isaddr; /* XXX MRT use table 0 for arp checks */ rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0); if (!rt) goto drop; if (rt->rt_ifp != ifp) { log(LOG_INFO, "arp_proxy: ignoring request" " from %s via %s, expecting %s\n", inet_ntoa(isaddr), ifp->if_xname, rt->rt_ifp->if_xname); RTFREE_LOCKED(rt); goto drop; } RTFREE_LOCKED(rt); #ifdef DEBUG_PROXY printf("arp: proxying for %s\n", inet_ntoa(itaddr)); #endif } } if (itaddr.s_addr == myaddr.s_addr && IN_LINKLOCAL(ntohl(itaddr.s_addr))) { /* RFC 3927 link-local IPv4; always reply by broadcast. */ #ifdef DEBUG_LINKLOCAL printf("arp: sending reply for link-local addr %s\n", inet_ntoa(itaddr)); #endif m->m_flags |= M_BCAST; m->m_flags &= ~M_MCAST; } else { /* default behaviour; never reply by broadcast. */ m->m_flags &= ~(M_BCAST|M_MCAST); } (void)memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln); (void)memcpy(ar_spa(ah), &itaddr, ah->ar_pln); ah->ar_op = htons(ARPOP_REPLY); ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */ m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + (2 * ah->ar_hln); m->m_pkthdr.len = m->m_len; m->m_pkthdr.rcvif = NULL; sa.sa_family = AF_ARP; sa.sa_len = 2; (*ifp->if_output)(ifp, m, &sa, NULL); ARPSTAT_INC(txreplies); return; drop: m_freem(m); } #endif void arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa) { struct llentry *lle; if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) { arprequest(ifp, &IA_SIN(ifa)->sin_addr, &IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp)); /* * interface address is considered static entry * because the output of the arp utility shows * that L2 entry as permanent */ IF_AFDATA_LOCK(ifp); lle = lla_lookup(LLTABLE(ifp), (LLE_CREATE | LLE_IFADDR | LLE_STATIC), (struct sockaddr *)IA_SIN(ifa)); IF_AFDATA_UNLOCK(ifp); if (lle == NULL) log(LOG_INFO, "arp_ifinit: cannot create arp " "entry for interface address\n"); else LLE_RUNLOCK(lle); } ifa->ifa_rtrequest = NULL; } void arp_ifinit2(struct ifnet *ifp, struct ifaddr *ifa, u_char *enaddr) { if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) arprequest(ifp, &IA_SIN(ifa)->sin_addr, &IA_SIN(ifa)->sin_addr, enaddr); ifa->ifa_rtrequest = NULL; } static void arp_init(void) { netisr_register(&arp_nh); } SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0); Index: stable/9/sys/netinet/in.c =================================================================== --- stable/9/sys/netinet/in.c (revision 260509) +++ stable/9/sys/netinet/in.c (revision 260510) @@ -1,1661 +1,1662 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * Copyright (C) 2001 WIDE Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in.c 8.4 (Berkeley) 1/9/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int in_mask2len(struct in_addr *); static void in_len2mask(struct in_addr *, int); static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t, struct ifnet *, struct thread *); static int in_addprefix(struct in_ifaddr *, int); static int in_scrubprefix(struct in_ifaddr *, u_int); static void in_socktrim(struct sockaddr_in *); static int in_ifinit(struct ifnet *, struct in_ifaddr *, struct sockaddr_in *, int); static void in_purgemaddrs(struct ifnet *); static VNET_DEFINE(int, sameprefixcarponly); #define V_sameprefixcarponly VNET(sameprefixcarponly) SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, same_prefix_carp_only, CTLFLAG_RW, &VNET_NAME(sameprefixcarponly), 0, "Refuse to create same prefixes on different interfaces"); VNET_DECLARE(struct inpcbinfo, ripcbinfo); #define V_ripcbinfo VNET(ripcbinfo) VNET_DECLARE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */ #define V_arpstat VNET(arpstat) /* * Return 1 if an internet address is for a ``local'' host * (one to which we have a connection). */ int in_localaddr(struct in_addr in) { register u_long i = ntohl(in.s_addr); register struct in_ifaddr *ia; IN_IFADDR_RLOCK(); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if ((i & ia->ia_subnetmask) == ia->ia_subnet) { IN_IFADDR_RUNLOCK(); return (1); } } IN_IFADDR_RUNLOCK(); return (0); } /* * Return 1 if an internet address is for the local host and configured * on one of its interfaces. */ int in_localip(struct in_addr in) { struct in_ifaddr *ia; IN_IFADDR_RLOCK(); LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) { if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr) { IN_IFADDR_RUNLOCK(); return (1); } } IN_IFADDR_RUNLOCK(); return (0); } /* * Determine whether an IP address is in a reserved set of addresses * that may not be forwarded, or whether datagrams to that destination * may be forwarded. */ int in_canforward(struct in_addr in) { register u_long i = ntohl(in.s_addr); register u_long net; if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i)) return (0); if (IN_CLASSA(i)) { net = i & IN_CLASSA_NET; if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT)) return (0); } return (1); } /* * Trim a mask in a sockaddr */ static void in_socktrim(struct sockaddr_in *ap) { register char *cplim = (char *) &ap->sin_addr; register char *cp = (char *) (&ap->sin_addr + 1); ap->sin_len = 0; while (--cp >= cplim) if (*cp) { (ap)->sin_len = cp - (char *) (ap) + 1; break; } } static int in_mask2len(mask) struct in_addr *mask; { int x, y; u_char *p; p = (u_char *)mask; for (x = 0; x < sizeof(*mask); x++) { if (p[x] != 0xff) break; } y = 0; if (x < sizeof(*mask)) { for (y = 0; y < 8; y++) { if ((p[x] & (0x80 >> y)) == 0) break; } } return (x * 8 + y); } static void in_len2mask(struct in_addr *mask, int len) { int i; u_char *p; p = (u_char *)mask; bzero(mask, sizeof(*mask)); for (i = 0; i < len / 8; i++) p[i] = 0xff; if (len % 8) p[i] = (0xff00 >> (len % 8)) & 0xff; } /* * Generic internet control operations (ioctl's). * * ifp is NULL if not an interface-specific ioctl. */ /* ARGSUSED */ int in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { register struct ifreq *ifr = (struct ifreq *)data; register struct in_ifaddr *ia, *iap; register struct ifaddr *ifa; struct in_addr allhosts_addr; struct in_addr dst; struct in_ifinfo *ii; struct in_aliasreq *ifra = (struct in_aliasreq *)data; struct sockaddr_in oldaddr; int error, hostIsNew, iaIsNew, maskIsNew; int iaIsFirst; ia = NULL; iaIsFirst = 0; iaIsNew = 0; allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP); /* * Filter out ioctls we implement directly; forward the rest on to * in_lifaddr_ioctl() and ifp->if_ioctl(). */ switch (cmd) { case SIOCAIFADDR: case SIOCDIFADDR: case SIOCGIFADDR: case SIOCGIFBRDADDR: case SIOCGIFDSTADDR: case SIOCGIFNETMASK: case SIOCSIFADDR: case SIOCSIFBRDADDR: case SIOCSIFDSTADDR: case SIOCSIFNETMASK: break; case SIOCALIFADDR: if (td != NULL) { error = priv_check(td, PRIV_NET_ADDIFADDR); if (error) return (error); } if (ifp == NULL) return (EINVAL); return in_lifaddr_ioctl(so, cmd, data, ifp, td); case SIOCDLIFADDR: if (td != NULL) { error = priv_check(td, PRIV_NET_DELIFADDR); if (error) return (error); } if (ifp == NULL) return (EINVAL); return in_lifaddr_ioctl(so, cmd, data, ifp, td); case SIOCGLIFADDR: if (ifp == NULL) return (EINVAL); return in_lifaddr_ioctl(so, cmd, data, ifp, td); default: if (ifp == NULL || ifp->if_ioctl == NULL) return (EOPNOTSUPP); return ((*ifp->if_ioctl)(ifp, cmd, data)); } if (ifp == NULL) return (EADDRNOTAVAIL); /* * Security checks before we get involved in any work. */ switch (cmd) { case SIOCAIFADDR: case SIOCSIFADDR: case SIOCSIFBRDADDR: case SIOCSIFNETMASK: case SIOCSIFDSTADDR: if (td != NULL) { error = priv_check(td, PRIV_NET_ADDIFADDR); if (error) return (error); } break; case SIOCDIFADDR: if (td != NULL) { error = priv_check(td, PRIV_NET_DELIFADDR); if (error) return (error); } break; } /* * Find address for this interface, if it exists. * * If an alias address was specified, find that one instead of the * first one on the interface, if possible. */ dst = ((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr; IN_IFADDR_RLOCK(); LIST_FOREACH(iap, INADDR_HASH(dst.s_addr), ia_hash) { if (iap->ia_ifp == ifp && iap->ia_addr.sin_addr.s_addr == dst.s_addr) { if (td == NULL || prison_check_ip4(td->td_ucred, &dst) == 0) ia = iap; break; } } if (ia != NULL) ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(); if (ia == NULL) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { iap = ifatoia(ifa); if (iap->ia_addr.sin_family == AF_INET) { if (td != NULL && prison_check_ip4(td->td_ucred, &iap->ia_addr.sin_addr) != 0) continue; ia = iap; break; } } if (ia != NULL) ifa_ref(&ia->ia_ifa); IF_ADDR_RUNLOCK(ifp); } if (ia == NULL) iaIsFirst = 1; error = 0; switch (cmd) { case SIOCAIFADDR: case SIOCDIFADDR: if (ifra->ifra_addr.sin_family == AF_INET) { struct in_ifaddr *oia; IN_IFADDR_RLOCK(); for (oia = ia; ia; ia = TAILQ_NEXT(ia, ia_link)) { if (ia->ia_ifp == ifp && ia->ia_addr.sin_addr.s_addr == ifra->ifra_addr.sin_addr.s_addr) break; } if (ia != NULL && ia != oia) ifa_ref(&ia->ia_ifa); if (oia != NULL && ia != oia) ifa_free(&oia->ia_ifa); IN_IFADDR_RUNLOCK(); if ((ifp->if_flags & IFF_POINTOPOINT) && (cmd == SIOCAIFADDR) && (ifra->ifra_dstaddr.sin_addr.s_addr == INADDR_ANY)) { error = EDESTADDRREQ; goto out; } } if (cmd == SIOCDIFADDR && ia == NULL) { error = EADDRNOTAVAIL; goto out; } /* FALLTHROUGH */ case SIOCSIFADDR: case SIOCSIFNETMASK: case SIOCSIFDSTADDR: if (ia == NULL) { ia = (struct in_ifaddr *) malloc(sizeof *ia, M_IFADDR, M_NOWAIT | M_ZERO); if (ia == NULL) { error = ENOBUFS; goto out; } ifa = &ia->ia_ifa; ifa_init(ifa); ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr; ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask; ia->ia_sockmask.sin_len = 8; ia->ia_sockmask.sin_family = AF_INET; if (ifp->if_flags & IFF_BROADCAST) { ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr); ia->ia_broadaddr.sin_family = AF_INET; } ia->ia_ifp = ifp; ifa_ref(ifa); /* if_addrhead */ IF_ADDR_WLOCK(ifp); TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_ref(ifa); /* in_ifaddrhead */ IN_IFADDR_WLOCK(); TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link); IN_IFADDR_WUNLOCK(); iaIsNew = 1; } break; case SIOCSIFBRDADDR: case SIOCGIFADDR: case SIOCGIFNETMASK: case SIOCGIFDSTADDR: case SIOCGIFBRDADDR: if (ia == NULL) { error = EADDRNOTAVAIL; goto out; } break; } /* * Most paths in this switch return directly or via out. Only paths * that remove the address break in order to hit common removal code. */ switch (cmd) { case SIOCGIFADDR: *((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_addr; goto out; case SIOCGIFBRDADDR: if ((ifp->if_flags & IFF_BROADCAST) == 0) { error = EINVAL; goto out; } *((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_broadaddr; goto out; case SIOCGIFDSTADDR: if ((ifp->if_flags & IFF_POINTOPOINT) == 0) { error = EINVAL; goto out; } *((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_dstaddr; goto out; case SIOCGIFNETMASK: *((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask; goto out; case SIOCSIFDSTADDR: if ((ifp->if_flags & IFF_POINTOPOINT) == 0) { error = EINVAL; goto out; } oldaddr = ia->ia_dstaddr; ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr; if (ifp->if_ioctl != NULL) { error = (*ifp->if_ioctl)(ifp, SIOCSIFDSTADDR, (caddr_t)ia); if (error) { ia->ia_dstaddr = oldaddr; goto out; } } if (ia->ia_flags & IFA_ROUTE) { ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr; rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST); ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP); } goto out; case SIOCSIFBRDADDR: if ((ifp->if_flags & IFF_BROADCAST) == 0) { error = EINVAL; goto out; } ia->ia_broadaddr = *(struct sockaddr_in *)&ifr->ifr_broadaddr; goto out; case SIOCSIFADDR: error = in_ifinit(ifp, ia, (struct sockaddr_in *) &ifr->ifr_addr, 1); if (error != 0 && iaIsNew) break; if (error == 0) { ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]); if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST) != 0) { error = in_joingroup(ifp, &allhosts_addr, NULL, &ii->ii_allhosts); } EVENTHANDLER_INVOKE(ifaddr_event, ifp); } error = 0; goto out; case SIOCSIFNETMASK: ia->ia_sockmask.sin_addr = ifra->ifra_addr.sin_addr; ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr); goto out; case SIOCAIFADDR: maskIsNew = 0; hostIsNew = 1; error = 0; if (ia->ia_addr.sin_family == AF_INET) { if (ifra->ifra_addr.sin_len == 0) { ifra->ifra_addr = ia->ia_addr; hostIsNew = 0; } else if (ifra->ifra_addr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr) hostIsNew = 0; } if (ifra->ifra_mask.sin_len) { /* * QL: XXX * Need to scrub the prefix here in case * the issued command is SIOCAIFADDR with * the same address, but with a different * prefix length. And if the prefix length * is the same as before, then the call is * un-necessarily executed here. */ in_ifscrub(ifp, ia, LLE_STATIC); ia->ia_sockmask = ifra->ifra_mask; ia->ia_sockmask.sin_family = AF_INET; ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr); maskIsNew = 1; } if ((ifp->if_flags & IFF_POINTOPOINT) && (ifra->ifra_dstaddr.sin_family == AF_INET)) { in_ifscrub(ifp, ia, LLE_STATIC); ia->ia_dstaddr = ifra->ifra_dstaddr; maskIsNew = 1; /* We lie; but the effect's the same */ } if (ifra->ifra_addr.sin_family == AF_INET && (hostIsNew || maskIsNew)) error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0); if (error != 0 && iaIsNew) break; if ((ifp->if_flags & IFF_BROADCAST) && (ifra->ifra_broadaddr.sin_family == AF_INET)) ia->ia_broadaddr = ifra->ifra_broadaddr; if (error == 0) { ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]); if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST) != 0) { error = in_joingroup(ifp, &allhosts_addr, NULL, &ii->ii_allhosts); } EVENTHANDLER_INVOKE(ifaddr_event, ifp); } goto out; case SIOCDIFADDR: /* * in_ifscrub kills the interface route. */ in_ifscrub(ifp, ia, LLE_STATIC); /* * in_ifadown gets rid of all the rest of * the routes. This is not quite the right * thing to do, but at least if we are running * a routing process they will come back. */ in_ifadown(&ia->ia_ifa, 1); EVENTHANDLER_INVOKE(ifaddr_event, ifp); error = 0; break; default: panic("in_control: unsupported ioctl"); } IF_ADDR_WLOCK(ifp); /* Re-check that ia is still part of the list. */ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa == &ia->ia_ifa) break; } if (ifa == NULL) { /* * If we lost the race with another thread, there is no need to * try it again for the next loop as there is no other exit * path between here and out. */ IF_ADDR_WUNLOCK(ifp); error = EADDRNOTAVAIL; goto out; } TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(&ia->ia_ifa); /* if_addrhead */ IN_IFADDR_WLOCK(); TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link); if (ia->ia_addr.sin_family == AF_INET) { struct in_ifaddr *if_ia; LIST_REMOVE(ia, ia_hash); IN_IFADDR_WUNLOCK(); /* * If this is the last IPv4 address configured on this * interface, leave the all-hosts group. * No state-change report need be transmitted. */ if_ia = NULL; IFP_TO_IA(ifp, if_ia); if (if_ia == NULL) { ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]); IN_MULTI_LOCK(); if (ii->ii_allhosts) { (void)in_leavegroup_locked(ii->ii_allhosts, NULL); ii->ii_allhosts = NULL; } IN_MULTI_UNLOCK(); } else ifa_free(&if_ia->ia_ifa); } else IN_IFADDR_WUNLOCK(); ifa_free(&ia->ia_ifa); /* in_ifaddrhead */ out: if (ia != NULL) ifa_free(&ia->ia_ifa); return (error); } /* * SIOC[GAD]LIFADDR. * SIOCGLIFADDR: get first address. (?!?) * SIOCGLIFADDR with IFLR_PREFIX: * get first address that matches the specified prefix. * SIOCALIFADDR: add the specified address. * SIOCALIFADDR with IFLR_PREFIX: * EINVAL since we can't deduce hostid part of the address. * SIOCDLIFADDR: delete the specified address. * SIOCDLIFADDR with IFLR_PREFIX: * delete the first address that matches the specified prefix. * return values: * EINVAL on invalid parameters * EADDRNOTAVAIL on prefix match failed/specified address not found * other values may be returned from in_ioctl() */ static int in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { struct if_laddrreq *iflr = (struct if_laddrreq *)data; struct ifaddr *ifa; /* sanity checks */ if (data == NULL || ifp == NULL) { panic("invalid argument to in_lifaddr_ioctl"); /*NOTRECHED*/ } switch (cmd) { case SIOCGLIFADDR: /* address must be specified on GET with IFLR_PREFIX */ if ((iflr->flags & IFLR_PREFIX) == 0) break; /*FALLTHROUGH*/ case SIOCALIFADDR: case SIOCDLIFADDR: /* address must be specified on ADD and DELETE */ if (iflr->addr.ss_family != AF_INET) return (EINVAL); if (iflr->addr.ss_len != sizeof(struct sockaddr_in)) return (EINVAL); /* XXX need improvement */ if (iflr->dstaddr.ss_family && iflr->dstaddr.ss_family != AF_INET) return (EINVAL); if (iflr->dstaddr.ss_family && iflr->dstaddr.ss_len != sizeof(struct sockaddr_in)) return (EINVAL); break; default: /*shouldn't happen*/ return (EOPNOTSUPP); } if (sizeof(struct in_addr) * 8 < iflr->prefixlen) return (EINVAL); switch (cmd) { case SIOCALIFADDR: { struct in_aliasreq ifra; if (iflr->flags & IFLR_PREFIX) return (EINVAL); /* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR). */ bzero(&ifra, sizeof(ifra)); bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name)); bcopy(&iflr->addr, &ifra.ifra_addr, iflr->addr.ss_len); if (iflr->dstaddr.ss_family) { /*XXX*/ bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr, iflr->dstaddr.ss_len); } ifra.ifra_mask.sin_family = AF_INET; ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in); in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen); return (in_control(so, SIOCAIFADDR, (caddr_t)&ifra, ifp, td)); } case SIOCGLIFADDR: case SIOCDLIFADDR: { struct in_ifaddr *ia; struct in_addr mask, candidate, match; struct sockaddr_in *sin; bzero(&mask, sizeof(mask)); bzero(&match, sizeof(match)); if (iflr->flags & IFLR_PREFIX) { /* lookup a prefix rather than address. */ in_len2mask(&mask, iflr->prefixlen); sin = (struct sockaddr_in *)&iflr->addr; match.s_addr = sin->sin_addr.s_addr; match.s_addr &= mask.s_addr; /* if you set extra bits, that's wrong */ if (match.s_addr != sin->sin_addr.s_addr) return (EINVAL); } else { /* on getting an address, take the 1st match */ /* on deleting an address, do exact match */ if (cmd != SIOCGLIFADDR) { in_len2mask(&mask, 32); sin = (struct sockaddr_in *)&iflr->addr; match.s_addr = sin->sin_addr.s_addr; } } IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; if (match.s_addr == 0) break; candidate.s_addr = ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr; candidate.s_addr &= mask.s_addr; if (candidate.s_addr == match.s_addr) break; } if (ifa != NULL) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); if (ifa == NULL) return (EADDRNOTAVAIL); ia = (struct in_ifaddr *)ifa; if (cmd == SIOCGLIFADDR) { /* fill in the if_laddrreq structure */ bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin_len); if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { bcopy(&ia->ia_dstaddr, &iflr->dstaddr, ia->ia_dstaddr.sin_len); } else bzero(&iflr->dstaddr, sizeof(iflr->dstaddr)); iflr->prefixlen = in_mask2len(&ia->ia_sockmask.sin_addr); iflr->flags = 0; /*XXX*/ ifa_free(ifa); return (0); } else { struct in_aliasreq ifra; /* fill in_aliasreq and do ioctl(SIOCDIFADDR) */ bzero(&ifra, sizeof(ifra)); bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name)); bcopy(&ia->ia_addr, &ifra.ifra_addr, ia->ia_addr.sin_len); if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr, ia->ia_dstaddr.sin_len); } bcopy(&ia->ia_sockmask, &ifra.ifra_dstaddr, ia->ia_sockmask.sin_len); ifa_free(ifa); return (in_control(so, SIOCDIFADDR, (caddr_t)&ifra, ifp, td)); } } } return (EOPNOTSUPP); /*just for safety*/ } /* * Delete any existing route for an interface. */ void in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia, u_int flags) { in_scrubprefix(ia, flags); } /* * Initialize an interface's internet address * and routing table entry. */ static int in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin, int scrub) { register u_long i = ntohl(sin->sin_addr.s_addr); struct sockaddr_in oldaddr; int s = splimp(), flags = RTF_UP, error = 0; oldaddr = ia->ia_addr; if (oldaddr.sin_family == AF_INET) LIST_REMOVE(ia, ia_hash); ia->ia_addr = *sin; if (ia->ia_addr.sin_family == AF_INET) { IN_IFADDR_WLOCK(); LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr), ia, ia_hash); IN_IFADDR_WUNLOCK(); } /* * Give the interface a chance to initialize * if this is its first address, * and to validate the address if necessary. */ if (ifp->if_ioctl != NULL) { error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia); if (error) { splx(s); /* LIST_REMOVE(ia, ia_hash) is done in in_control */ ia->ia_addr = oldaddr; IN_IFADDR_WLOCK(); if (ia->ia_addr.sin_family == AF_INET) LIST_INSERT_HEAD(INADDR_HASH( ia->ia_addr.sin_addr.s_addr), ia, ia_hash); else /* * If oldaddr family is not AF_INET (e.g. * interface has been just created) in_control * does not call LIST_REMOVE, and we end up * with bogus ia entries in hash */ LIST_REMOVE(ia, ia_hash); IN_IFADDR_WUNLOCK(); return (error); } } splx(s); if (scrub) { ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr; in_ifscrub(ifp, ia, LLE_STATIC); ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr; } /* * Be compatible with network classes, if netmask isn't supplied, * guess it based on classes. */ if (ia->ia_subnetmask == 0) { if (IN_CLASSA(i)) ia->ia_subnetmask = IN_CLASSA_NET; else if (IN_CLASSB(i)) ia->ia_subnetmask = IN_CLASSB_NET; else ia->ia_subnetmask = IN_CLASSC_NET; ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask); } ia->ia_subnet = i & ia->ia_subnetmask; in_socktrim(&ia->ia_sockmask); /* * XXX: carp(4) does not have interface route */ if (ifp->if_type == IFT_CARP) return (0); /* * Add route for the network. */ ia->ia_ifa.ifa_metric = ifp->if_metric; if (ifp->if_flags & IFF_BROADCAST) { if (ia->ia_subnetmask == IN_RFC3021_MASK) ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST; else ia->ia_broadaddr.sin_addr.s_addr = htonl(ia->ia_subnet | ~ia->ia_subnetmask); } else if (ifp->if_flags & IFF_LOOPBACK) { ia->ia_dstaddr = ia->ia_addr; flags |= RTF_HOST; } else if (ifp->if_flags & IFF_POINTOPOINT) { if (ia->ia_dstaddr.sin_family != AF_INET) return (0); flags |= RTF_HOST; } if ((error = in_addprefix(ia, flags)) != 0) return (error); if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY) return (0); if (ifp->if_flags & IFF_POINTOPOINT) { if (ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr) return (0); } /* * add a loopback route to self */ if (V_useloopback && !(ifp->if_flags & IFF_LOOPBACK)) { struct route ia_ro; bzero(&ia_ro, sizeof(ia_ro)); *((struct sockaddr_in *)(&ia_ro.ro_dst)) = ia->ia_addr; rtalloc_ign_fib(&ia_ro, 0, RT_DEFAULT_FIB); if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) && (ia_ro.ro_rt->rt_ifp == V_loif)) { RT_LOCK(ia_ro.ro_rt); RT_ADDREF(ia_ro.ro_rt); RTFREE_LOCKED(ia_ro.ro_rt); } else error = ifa_add_loopback_route((struct ifaddr *)ia, (struct sockaddr *)&ia->ia_addr); if (error == 0) ia->ia_flags |= IFA_RTSELF; if (ia_ro.ro_rt != NULL) RTFREE(ia_ro.ro_rt); } return (error); } #define rtinitflags(x) \ ((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \ ? RTF_HOST : 0) /* * Generate a routing message when inserting or deleting * an interface address alias. */ static void in_addralias_rtmsg(int cmd, struct in_addr *prefix, struct in_ifaddr *target) { struct route pfx_ro; struct sockaddr_in *pfx_addr; struct rtentry msg_rt; /* QL: XXX * This is a bit questionable because there is no * additional route entry added/deleted for an address * alias. Therefore this route report is inaccurate. */ bzero(&pfx_ro, sizeof(pfx_ro)); pfx_addr = (struct sockaddr_in *)(&pfx_ro.ro_dst); pfx_addr->sin_len = sizeof(*pfx_addr); pfx_addr->sin_family = AF_INET; pfx_addr->sin_addr = *prefix; rtalloc_ign_fib(&pfx_ro, 0, 0); if (pfx_ro.ro_rt != NULL) { msg_rt = *pfx_ro.ro_rt; /* QL: XXX * Point the gateway to the new interface * address as if a new prefix route entry has * been added through the new address alias. * All other parts of the rtentry is accurate, * e.g., rt_key, rt_mask, rt_ifp etc. */ msg_rt.rt_gateway = (struct sockaddr *)&target->ia_addr; rt_newaddrmsg(cmd, (struct ifaddr *)target, 0, &msg_rt); RTFREE(pfx_ro.ro_rt); } return; } /* * Check if we have a route for the given prefix already or add one accordingly. */ static int in_addprefix(struct in_ifaddr *target, int flags) { struct in_ifaddr *ia; struct in_addr prefix, mask, p, m; int error; if ((flags & RTF_HOST) != 0) { prefix = target->ia_dstaddr.sin_addr; mask.s_addr = 0; } else { prefix = target->ia_addr.sin_addr; mask = target->ia_sockmask.sin_addr; prefix.s_addr &= mask.s_addr; } IN_IFADDR_RLOCK(); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (rtinitflags(ia)) { p = ia->ia_dstaddr.sin_addr; if (prefix.s_addr != p.s_addr) continue; } else { p = ia->ia_addr.sin_addr; m = ia->ia_sockmask.sin_addr; p.s_addr &= m.s_addr; if (prefix.s_addr != p.s_addr || mask.s_addr != m.s_addr) continue; } /* * If we got a matching prefix route inserted by other * interface address, we are done here. */ if (ia->ia_flags & IFA_ROUTE) { #ifdef RADIX_MPATH if (ia->ia_addr.sin_addr.s_addr == target->ia_addr.sin_addr.s_addr) { IN_IFADDR_RUNLOCK(); return (EEXIST); } else break; #endif if (V_sameprefixcarponly && target->ia_ifp->if_type != IFT_CARP && ia->ia_ifp->if_type != IFT_CARP) { IN_IFADDR_RUNLOCK(); return (EEXIST); } else { in_addralias_rtmsg(RTM_ADD, &prefix, target); IN_IFADDR_RUNLOCK(); return (0); } } } IN_IFADDR_RUNLOCK(); /* * No-one seem to have this prefix route, so we try to insert it. */ error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags); if (!error) target->ia_flags |= IFA_ROUTE; return (error); } extern void arp_ifscrub(struct ifnet *ifp, uint32_t addr); /* * If there is no other address in the system that can serve a route to the * same prefix, remove the route. Hand over the route to the new address * otherwise. */ static int in_scrubprefix(struct in_ifaddr *target, u_int flags) { struct in_ifaddr *ia; struct in_addr prefix, mask, p; int error = 0; struct sockaddr_in prefix0, mask0; /* * Remove the loopback route to the interface address. * The "useloopback" setting is not consulted because if the * user configures an interface address, turns off this * setting, and then tries to delete that interface address, * checking the current setting of "useloopback" would leave * that interface address loopback route untouched, which * would be wrong. Therefore the interface address loopback route * deletion is unconditional. */ if ((target->ia_addr.sin_addr.s_addr != INADDR_ANY) && !(target->ia_ifp->if_flags & IFF_LOOPBACK) && (target->ia_flags & IFA_RTSELF)) { struct route ia_ro; int freeit = 0; bzero(&ia_ro, sizeof(ia_ro)); *((struct sockaddr_in *)(&ia_ro.ro_dst)) = target->ia_addr; rtalloc_ign_fib(&ia_ro, 0, 0); if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) && (ia_ro.ro_rt->rt_ifp == V_loif)) { RT_LOCK(ia_ro.ro_rt); if (ia_ro.ro_rt->rt_refcnt <= 1) freeit = 1; else if (flags & LLE_STATIC) { RT_REMREF(ia_ro.ro_rt); target->ia_flags &= ~IFA_RTSELF; } RTFREE_LOCKED(ia_ro.ro_rt); } if (freeit && (flags & LLE_STATIC)) { error = ifa_del_loopback_route((struct ifaddr *)target, (struct sockaddr *)&target->ia_addr); if (error == 0) target->ia_flags &= ~IFA_RTSELF; } if ((flags & LLE_STATIC) && !(target->ia_ifp->if_flags & IFF_NOARP)) /* remove arp cache */ arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr); } if (rtinitflags(target)) prefix = target->ia_dstaddr.sin_addr; else { prefix = target->ia_addr.sin_addr; mask = target->ia_sockmask.sin_addr; prefix.s_addr &= mask.s_addr; } if ((target->ia_flags & IFA_ROUTE) == 0) { in_addralias_rtmsg(RTM_DELETE, &prefix, target); return (0); } IN_IFADDR_RLOCK(); TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (rtinitflags(ia)) p = ia->ia_dstaddr.sin_addr; else { p = ia->ia_addr.sin_addr; p.s_addr &= ia->ia_sockmask.sin_addr.s_addr; } if ((prefix.s_addr != p.s_addr) || !(ia->ia_ifp->if_flags & IFF_UP)) continue; /* * If we got a matching prefix address, move IFA_ROUTE and * the route itself to it. Make sure that routing daemons * get a heads-up. * * XXX: a special case for carp(4) interface - this should * be more generally specified as an interface that * doesn't support such action. */ if ((ia->ia_flags & IFA_ROUTE) == 0 && (ia->ia_ifp->if_type != IFT_CARP)) { ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(); error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target)); if (error == 0) target->ia_flags &= ~IFA_ROUTE; else log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n", error); error = rtinit(&ia->ia_ifa, (int)RTM_ADD, rtinitflags(ia) | RTF_UP); if (error == 0) ia->ia_flags |= IFA_ROUTE; else log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n", error); ifa_free(&ia->ia_ifa); return (error); } } IN_IFADDR_RUNLOCK(); /* * remove all L2 entries on the given prefix */ bzero(&prefix0, sizeof(prefix0)); prefix0.sin_len = sizeof(prefix0); prefix0.sin_family = AF_INET; prefix0.sin_addr.s_addr = target->ia_subnet; bzero(&mask0, sizeof(mask0)); mask0.sin_len = sizeof(mask0); mask0.sin_family = AF_INET; mask0.sin_addr.s_addr = target->ia_subnetmask; lltable_prefix_free(AF_INET, (struct sockaddr *)&prefix0, (struct sockaddr *)&mask0, flags); /* * As no-one seem to have this prefix, we can remove the route. */ error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target)); if (error == 0) target->ia_flags &= ~IFA_ROUTE; else log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error); return (error); } #undef rtinitflags /* * Return 1 if the address might be a local broadcast address. */ int in_broadcast(struct in_addr in, struct ifnet *ifp) { register struct ifaddr *ifa; u_long t; if (in.s_addr == INADDR_BROADCAST || in.s_addr == INADDR_ANY) return (1); if ((ifp->if_flags & IFF_BROADCAST) == 0) return (0); t = ntohl(in.s_addr); /* * Look through the list of addresses for a match * with a broadcast address. */ #define ia ((struct in_ifaddr *)ifa) TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_INET && (in.s_addr == ia->ia_broadaddr.sin_addr.s_addr || /* * Check for old-style (host 0) broadcast, but * taking into account that RFC 3021 obsoletes it. */ (ia->ia_subnetmask != IN_RFC3021_MASK && t == ia->ia_subnet)) && /* * Check for an all one subnetmask. These * only exist when an interface gets a secondary * address. */ ia->ia_subnetmask != (u_long)0xffffffff) return (1); return (0); #undef ia } /* * On interface removal, clean up IPv4 data structures hung off of the ifnet. */ void in_ifdetach(struct ifnet *ifp) { in_pcbpurgeif0(&V_ripcbinfo, ifp); in_pcbpurgeif0(&V_udbinfo, ifp); in_purgemaddrs(ifp); } /* * Delete all IPv4 multicast address records, and associated link-layer * multicast address records, associated with ifp. * XXX It looks like domifdetach runs AFTER the link layer cleanup. * XXX This should not race with ifma_protospec being set during * a new allocation, if it does, we have bigger problems. */ static void in_purgemaddrs(struct ifnet *ifp) { LIST_HEAD(,in_multi) purgeinms; struct in_multi *inm, *tinm; struct ifmultiaddr *ifma; LIST_INIT(&purgeinms); IN_MULTI_LOCK(); /* * Extract list of in_multi associated with the detaching ifp * which the PF_INET layer is about to release. * We need to do this as IF_ADDR_LOCK() may be re-acquired * by code further down. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_INET || ifma->ifma_protospec == NULL) continue; #if 0 KASSERT(ifma->ifma_protospec != NULL, ("%s: ifma_protospec is NULL", __func__)); #endif inm = (struct in_multi *)ifma->ifma_protospec; LIST_INSERT_HEAD(&purgeinms, inm, inm_link); } IF_ADDR_RUNLOCK(ifp); LIST_FOREACH_SAFE(inm, &purgeinms, inm_link, tinm) { LIST_REMOVE(inm, inm_link); inm_release_locked(inm); } igmp_ifdetach(ifp); IN_MULTI_UNLOCK(); } #include #include struct in_llentry { struct llentry base; struct sockaddr_in l3_addr4; }; /* * Deletes an address from the address table. * This function is called by the timer functions * such as arptimer() and nd6_llinfo_timer(), and * the caller does the locking. */ static void in_lltable_free(struct lltable *llt, struct llentry *lle) { LLE_WUNLOCK(lle); LLE_LOCK_DESTROY(lle); free(lle, M_LLTABLE); } static struct llentry * in_lltable_new(const struct sockaddr *l3addr, u_int flags) { struct in_llentry *lle; lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_DONTWAIT | M_ZERO); if (lle == NULL) /* NB: caller generates msg */ return NULL; /* * For IPv4 this will trigger "arpresolve" to generate * an ARP request. */ lle->base.la_expire = time_uptime; /* mark expired */ lle->l3_addr4 = *(const struct sockaddr_in *)l3addr; lle->base.lle_refcnt = 1; lle->base.lle_free = in_lltable_free; LLE_LOCK_INIT(&lle->base); callout_init_rw(&lle->base.la_timer, &lle->base.lle_lock, CALLOUT_RETURNUNLOCKED); return (&lle->base); } #define IN_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ (((ntohl((d)->sin_addr.s_addr) ^ (a)->sin_addr.s_addr) & (m)->sin_addr.s_addr)) == 0 ) static void in_lltable_prefix_free(struct lltable *llt, const struct sockaddr *prefix, const struct sockaddr *mask, u_int flags) { const struct sockaddr_in *pfx = (const struct sockaddr_in *)prefix; const struct sockaddr_in *msk = (const struct sockaddr_in *)mask; struct llentry *lle, *next; int i; size_t pkts_dropped; IF_AFDATA_WLOCK(llt->llt_ifp); for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) { /* * (flags & LLE_STATIC) means deleting all entries * including static ARP entries. */ if (IN_ARE_MASKED_ADDR_EQUAL(satosin(L3_ADDR(lle)), pfx, msk) && ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) { LLE_WLOCK(lle); if (callout_stop(&lle->la_timer)) LLE_REMREF(lle); pkts_dropped = llentry_free(lle); ARPSTAT_ADD(dropped, pkts_dropped); } } } IF_AFDATA_WUNLOCK(llt->llt_ifp); } static int in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr) { struct rtentry *rt; KASSERT(l3addr->sa_family == AF_INET, ("sin_family %d", l3addr->sa_family)); /* XXX rtalloc1 should take a const param */ rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0); if (rt == NULL) return (EINVAL); /* * If the gateway for an existing host route matches the target L3 * address, which is a special route inserted by some implementation * such as MANET, and the interface is of the correct type, then * allow for ARP to proceed. */ if (rt->rt_flags & RTF_GATEWAY) { if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp || rt->rt_ifp->if_type != IFT_ETHER || (rt->rt_ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) != 0 || memcmp(rt->rt_gateway->sa_data, l3addr->sa_data, sizeof(in_addr_t)) != 0) { RTFREE_LOCKED(rt); return (EINVAL); } } /* * Make sure that at least the destination address is covered * by the route. This is for handling the case where 2 or more * interfaces have the same prefix. An incoming packet arrives * on one interface and the corresponding outgoing packet leaves * another interface. */ if (!(rt->rt_flags & RTF_HOST) && rt->rt_ifp != ifp) { const char *sa, *mask, *addr, *lim; int len; mask = (const char *)rt_mask(rt); /* * Just being extra cautious to avoid some custom * code getting into trouble. */ if (mask == NULL) { RTFREE_LOCKED(rt); return (EINVAL); } sa = (const char *)rt_key(rt); addr = (const char *)l3addr; len = ((const struct sockaddr_in *)l3addr)->sin_len; lim = addr + len; for ( ; addr < lim; sa++, mask++, addr++) { if ((*sa ^ *addr) & *mask) { #ifdef DIAGNOSTIC log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n", inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr)); #endif RTFREE_LOCKED(rt); return (EINVAL); } } } RTFREE_LOCKED(rt); return (0); } /* * Return NULL if not found or marked for deletion. * If found return lle read locked. */ static struct llentry * in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) { const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr; struct ifnet *ifp = llt->llt_ifp; struct llentry *lle; struct llentries *lleh; u_int hashkey; IF_AFDATA_LOCK_ASSERT(ifp); KASSERT(l3addr->sa_family == AF_INET, ("sin_family %d", l3addr->sa_family)); hashkey = sin->sin_addr.s_addr; lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)]; LIST_FOREACH(lle, lleh, lle_next) { struct sockaddr_in *sa2 = satosin(L3_ADDR(lle)); if (lle->la_flags & LLE_DELETED) continue; if (sa2->sin_addr.s_addr == sin->sin_addr.s_addr) break; } if (lle == NULL) { #ifdef DIAGNOSTIC if (flags & LLE_DELETE) log(LOG_INFO, "interface address is missing from cache = %p in delete\n", lle); #endif if (!(flags & LLE_CREATE)) return (NULL); + IF_AFDATA_WLOCK_ASSERT(ifp); /* * A route that covers the given address must have * been installed 1st because we are doing a resolution, * verify this. */ if (!(flags & LLE_IFADDR) && in_lltable_rtcheck(ifp, flags, l3addr) != 0) goto done; lle = in_lltable_new(l3addr, flags); if (lle == NULL) { log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); goto done; } lle->la_flags = flags & ~LLE_CREATE; if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) { bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen); lle->la_flags |= (LLE_VALID | LLE_STATIC); } lle->lle_tbl = llt; lle->lle_head = lleh; lle->la_flags |= LLE_LINKED; LIST_INSERT_HEAD(lleh, lle, lle_next); } else if (flags & LLE_DELETE) { if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) { LLE_WLOCK(lle); lle->la_flags |= LLE_DELETED; EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED); #ifdef DIAGNOSTIC log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); #endif if ((lle->la_flags & (LLE_STATIC | LLE_IFADDR)) == LLE_STATIC) llentry_free(lle); else LLE_WUNLOCK(lle); } lle = (void *)-1; } if (LLE_IS_VALID(lle)) { if (flags & LLE_EXCLUSIVE) LLE_WLOCK(lle); else LLE_RLOCK(lle); } done: return (lle); } static int in_lltable_dump(struct lltable *llt, struct sysctl_req *wr) { #define SIN(lle) ((struct sockaddr_in *) L3_ADDR(lle)) struct ifnet *ifp = llt->llt_ifp; struct llentry *lle; /* XXX stack use */ struct { struct rt_msghdr rtm; struct sockaddr_inarp sin; struct sockaddr_dl sdl; } arpc; int error, i; LLTABLE_LOCK_ASSERT(); error = 0; for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { LIST_FOREACH(lle, &llt->lle_head[i], lle_next) { struct sockaddr_dl *sdl; /* skip deleted entries */ if ((lle->la_flags & LLE_DELETED) == LLE_DELETED) continue; /* Skip if jailed and not a valid IP of the prison. */ if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0) continue; /* * produce a msg made of: * struct rt_msghdr; * struct sockaddr_inarp; (IPv4) * struct sockaddr_dl; */ bzero(&arpc, sizeof(arpc)); arpc.rtm.rtm_msglen = sizeof(arpc); arpc.rtm.rtm_version = RTM_VERSION; arpc.rtm.rtm_type = RTM_GET; arpc.rtm.rtm_flags = RTF_UP; arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY; arpc.sin.sin_family = AF_INET; arpc.sin.sin_len = sizeof(arpc.sin); arpc.sin.sin_addr.s_addr = SIN(lle)->sin_addr.s_addr; /* publish */ if (lle->la_flags & LLE_PUB) { arpc.rtm.rtm_flags |= RTF_ANNOUNCE; /* proxy only */ if (lle->la_flags & LLE_PROXY) arpc.sin.sin_other = SIN_PROXY; } sdl = &arpc.sdl; sdl->sdl_family = AF_LINK; sdl->sdl_len = sizeof(*sdl); sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifp->if_type; if ((lle->la_flags & LLE_VALID) == LLE_VALID) { sdl->sdl_alen = ifp->if_addrlen; bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); } else { sdl->sdl_alen = 0; bzero(LLADDR(sdl), ifp->if_addrlen); } arpc.rtm.rtm_rmx.rmx_expire = lle->la_flags & LLE_STATIC ? 0 : lle->la_expire; arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA); if (lle->la_flags & LLE_STATIC) arpc.rtm.rtm_flags |= RTF_STATIC; arpc.rtm.rtm_index = ifp->if_index; error = SYSCTL_OUT(wr, &arpc, sizeof(arpc)); if (error) break; } } return error; #undef SIN } void * in_domifattach(struct ifnet *ifp) { struct in_ifinfo *ii; struct lltable *llt; ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK|M_ZERO); llt = lltable_init(ifp, AF_INET); if (llt != NULL) { llt->llt_prefix_free = in_lltable_prefix_free; llt->llt_lookup = in_lltable_lookup; llt->llt_dump = in_lltable_dump; } ii->ii_llt = llt; ii->ii_igmp = igmp_domifattach(ifp); return ii; } void in_domifdetach(struct ifnet *ifp, void *aux) { struct in_ifinfo *ii = (struct in_ifinfo *)aux; igmp_domifdetach(ifp); lltable_free(ii->ii_llt); free(ii, M_IFADDR); } Index: stable/9/sys/netinet6/in6.c =================================================================== --- stable/9/sys/netinet6/in6.c (revision 260509) +++ stable/9/sys/netinet6/in6.c (revision 260510) @@ -1,2832 +1,2833 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in6.c,v 1.259 2002/01/21 11:37:50 keiichi Exp $ */ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in.c 8.2 (Berkeley) 11/15/93 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include VNET_DECLARE(int, icmp6_nodeinfo_oldmcprefix); #define V_icmp6_nodeinfo_oldmcprefix VNET(icmp6_nodeinfo_oldmcprefix) /* * Definitions of some costant IP6 addresses. */ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; const struct in6_addr in6addr_nodelocal_allnodes = IN6ADDR_NODELOCAL_ALLNODES_INIT; const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; const struct in6_addr in6addr_linklocal_allv2routers = IN6ADDR_LINKLOCAL_ALLV2ROUTERS_INIT; const struct in6_addr in6mask0 = IN6MASK0; const struct in6_addr in6mask32 = IN6MASK32; const struct in6_addr in6mask64 = IN6MASK64; const struct in6_addr in6mask96 = IN6MASK96; const struct in6_addr in6mask128 = IN6MASK128; const struct sockaddr_in6 sa6_any = { sizeof(sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0 }; static int in6_lifaddr_ioctl(struct socket *, u_long, caddr_t, struct ifnet *, struct thread *); static int in6_ifinit(struct ifnet *, struct in6_ifaddr *, struct sockaddr_in6 *, int); static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *); int (*faithprefix_p)(struct in6_addr *); #define ifa2ia6(ifa) ((struct in6_ifaddr *)(ifa)) #define ia62ifa(ia6) (&((ia6)->ia_ifa)) void in6_ifaddloop(struct ifaddr *ifa) { struct sockaddr_dl gateway; struct sockaddr_in6 mask, addr; struct rtentry rt; struct in6_ifaddr *ia; struct ifnet *ifp; struct llentry *ln; ia = ifa2ia6(ifa); ifp = ifa->ifa_ifp; IF_AFDATA_LOCK(ifp); ifa->ifa_rtrequest = nd6_rtrequest; ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE | LLE_IFADDR | LLE_EXCLUSIVE), (struct sockaddr *)&ia->ia_addr); IF_AFDATA_UNLOCK(ifp); if (ln != NULL) { ln->la_expire = 0; /* for IPv6 this means permanent */ ln->ln_state = ND6_LLINFO_REACHABLE; /* * initialize for rtmsg generation */ bzero(&gateway, sizeof(gateway)); gateway.sdl_len = sizeof(gateway); gateway.sdl_family = AF_LINK; gateway.sdl_nlen = 0; gateway.sdl_alen = 6; memcpy(gateway.sdl_data, &ln->ll_addr.mac_aligned, sizeof(ln->ll_addr)); LLE_WUNLOCK(ln); } bzero(&rt, sizeof(rt)); rt.rt_gateway = (struct sockaddr *)&gateway; memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask)); memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr)); rt_mask(&rt) = (struct sockaddr *)&mask; rt_key(&rt) = (struct sockaddr *)&addr; rt.rt_flags = RTF_UP | RTF_HOST | RTF_STATIC; /* Announce arrival of local address to all FIBs. */ rt_newaddrmsg(RTM_ADD, ifa, 0, &rt); } void in6_ifremloop(struct ifaddr *ifa) { struct sockaddr_dl gateway; struct sockaddr_in6 mask, addr; struct rtentry rt0; struct in6_ifaddr *ia; struct ifnet *ifp; ia = ifa2ia6(ifa); ifp = ifa->ifa_ifp; memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr)); memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask)); lltable_prefix_free(AF_INET6, (struct sockaddr *)&addr, (struct sockaddr *)&mask, LLE_STATIC); /* * initialize for rtmsg generation */ bzero(&gateway, sizeof(gateway)); gateway.sdl_len = sizeof(gateway); gateway.sdl_family = AF_LINK; gateway.sdl_nlen = 0; gateway.sdl_alen = ifp->if_addrlen; bzero(&rt0, sizeof(rt0)); rt0.rt_gateway = (struct sockaddr *)&gateway; rt_mask(&rt0) = (struct sockaddr *)&mask; rt_key(&rt0) = (struct sockaddr *)&addr; rt0.rt_flags = RTF_HOST | RTF_STATIC; /* Announce removal of local address to all FIBs. */ rt_newaddrmsg(RTM_DELETE, ifa, 0, &rt0); } int in6_mask2len(struct in6_addr *mask, u_char *lim0) { int x = 0, y; u_char *lim = lim0, *p; /* ignore the scope_id part */ if (lim0 == NULL || lim0 - (u_char *)mask > sizeof(*mask)) lim = (u_char *)mask + sizeof(*mask); for (p = (u_char *)mask; p < lim; x++, p++) { if (*p != 0xff) break; } y = 0; if (p < lim) { for (y = 0; y < 8; y++) { if ((*p & (0x80 >> y)) == 0) break; } } /* * when the limit pointer is given, do a stricter check on the * remaining bits. */ if (p < lim) { if (y != 0 && (*p & (0x00ff >> y)) != 0) return (-1); for (p = p + 1; p < lim; p++) if (*p != 0) return (-1); } return x * 8 + y; } #ifdef COMPAT_FREEBSD32 struct in6_ndifreq32 { char ifname[IFNAMSIZ]; uint32_t ifindex; }; #define SIOCGDEFIFACE32_IN6 _IOWR('i', 86, struct in6_ndifreq32) #endif int in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { struct in6_ifreq *ifr = (struct in6_ifreq *)data; struct in6_ifaddr *ia = NULL; struct in6_aliasreq *ifra = (struct in6_aliasreq *)data; struct sockaddr_in6 *sa6; int error; switch (cmd) { case SIOCGETSGCNT_IN6: case SIOCGETMIFCNT_IN6: /* * XXX mrt_ioctl has a 3rd, unused, FIB argument in route.c. * We cannot see how that would be needed, so do not adjust the * KPI blindly; more likely should clean up the IPv4 variant. */ return (mrt6_ioctl ? mrt6_ioctl(cmd, data) : EOPNOTSUPP); } switch(cmd) { case SIOCAADDRCTL_POLICY: case SIOCDADDRCTL_POLICY: if (td != NULL) { error = priv_check(td, PRIV_NETINET_ADDRCTRL6); if (error) return (error); } return (in6_src_ioctl(cmd, data)); } if (ifp == NULL) return (EOPNOTSUPP); switch (cmd) { case SIOCSNDFLUSH_IN6: case SIOCSPFXFLUSH_IN6: case SIOCSRTRFLUSH_IN6: case SIOCSDEFIFACE_IN6: case SIOCSIFINFO_FLAGS: case SIOCSIFINFO_IN6: if (td != NULL) { error = priv_check(td, PRIV_NETINET_ND6); if (error) return (error); } /* FALLTHROUGH */ case OSIOCGIFINFO_IN6: case SIOCGIFINFO_IN6: case SIOCGDRLST_IN6: case SIOCGPRLST_IN6: case SIOCGNBRINFO_IN6: case SIOCGDEFIFACE_IN6: return (nd6_ioctl(cmd, data, ifp)); #ifdef COMPAT_FREEBSD32 case SIOCGDEFIFACE32_IN6: { struct in6_ndifreq ndif; struct in6_ndifreq32 *ndif32; error = nd6_ioctl(SIOCGDEFIFACE_IN6, (caddr_t)&ndif, ifp); if (error) return (error); ndif32 = (struct in6_ndifreq32 *)data; ndif32->ifindex = ndif.ifindex; return (0); } #endif } switch (cmd) { case SIOCSIFPREFIX_IN6: case SIOCDIFPREFIX_IN6: case SIOCAIFPREFIX_IN6: case SIOCCIFPREFIX_IN6: case SIOCSGIFPREFIX_IN6: case SIOCGIFPREFIX_IN6: log(LOG_NOTICE, "prefix ioctls are now invalidated. " "please use ifconfig.\n"); return (EOPNOTSUPP); } switch (cmd) { case SIOCSSCOPE6: if (td != NULL) { error = priv_check(td, PRIV_NETINET_SCOPE6); if (error) return (error); } return (scope6_set(ifp, (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id)); case SIOCGSCOPE6: return (scope6_get(ifp, (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id)); case SIOCGSCOPE6DEF: return (scope6_get_default((struct scope6_id *) ifr->ifr_ifru.ifru_scope_id)); } switch (cmd) { case SIOCALIFADDR: if (td != NULL) { error = priv_check(td, PRIV_NET_ADDIFADDR); if (error) return (error); } return in6_lifaddr_ioctl(so, cmd, data, ifp, td); case SIOCDLIFADDR: if (td != NULL) { error = priv_check(td, PRIV_NET_DELIFADDR); if (error) return (error); } /* FALLTHROUGH */ case SIOCGLIFADDR: return in6_lifaddr_ioctl(so, cmd, data, ifp, td); } /* * Find address for this interface, if it exists. * * In netinet code, we have checked ifra_addr in SIOCSIF*ADDR operation * only, and used the first interface address as the target of other * operations (without checking ifra_addr). This was because netinet * code/API assumed at most 1 interface address per interface. * Since IPv6 allows a node to assign multiple addresses * on a single interface, we almost always look and check the * presence of ifra_addr, and reject invalid ones here. * It also decreases duplicated code among SIOC*_IN6 operations. */ switch (cmd) { case SIOCAIFADDR_IN6: case SIOCSIFPHYADDR_IN6: sa6 = &ifra->ifra_addr; break; case SIOCSIFADDR_IN6: case SIOCGIFADDR_IN6: case SIOCSIFDSTADDR_IN6: case SIOCSIFNETMASK_IN6: case SIOCGIFDSTADDR_IN6: case SIOCGIFNETMASK_IN6: case SIOCDIFADDR_IN6: case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: case SIOCGIFAFLAG_IN6: case SIOCSNDFLUSH_IN6: case SIOCSPFXFLUSH_IN6: case SIOCSRTRFLUSH_IN6: case SIOCGIFALIFETIME_IN6: case SIOCSIFALIFETIME_IN6: case SIOCGIFSTAT_IN6: case SIOCGIFSTAT_ICMP6: sa6 = &ifr->ifr_addr; break; case SIOCSIFADDR: case SIOCSIFBRDADDR: case SIOCSIFDSTADDR: case SIOCSIFNETMASK: /* * Although we should pass any non-INET6 ioctl requests * down to driver, we filter some legacy INET requests. * Drivers trust SIOCSIFADDR et al to come from an already * privileged layer, and do not perform any credentials * checks or input validation. */ return (EINVAL); default: sa6 = NULL; break; } if (sa6 && sa6->sin6_family == AF_INET6) { if (sa6->sin6_scope_id != 0) error = sa6_embedscope(sa6, 0); else error = in6_setscope(&sa6->sin6_addr, ifp, NULL); if (error != 0) return (error); if (td != NULL && (error = prison_check_ip6(td->td_ucred, &sa6->sin6_addr)) != 0) return (error); ia = in6ifa_ifpwithaddr(ifp, &sa6->sin6_addr); } else ia = NULL; switch (cmd) { case SIOCSIFADDR_IN6: case SIOCSIFDSTADDR_IN6: case SIOCSIFNETMASK_IN6: /* * Since IPv6 allows a node to assign multiple addresses * on a single interface, SIOCSIFxxx ioctls are deprecated. */ /* we decided to obsolete this command (20000704) */ error = EINVAL; goto out; case SIOCDIFADDR_IN6: /* * for IPv4, we look for existing in_ifaddr here to allow * "ifconfig if0 delete" to remove the first IPv4 address on * the interface. For IPv6, as the spec allows multiple * interface address from the day one, we consider "remove the * first one" semantics to be not preferable. */ if (ia == NULL) { error = EADDRNOTAVAIL; goto out; } /* FALLTHROUGH */ case SIOCAIFADDR_IN6: /* * We always require users to specify a valid IPv6 address for * the corresponding operation. */ if (ifra->ifra_addr.sin6_family != AF_INET6 || ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6)) { error = EAFNOSUPPORT; goto out; } if (td != NULL) { error = priv_check(td, (cmd == SIOCDIFADDR_IN6) ? PRIV_NET_DELIFADDR : PRIV_NET_ADDIFADDR); if (error) goto out; } break; case SIOCGIFADDR_IN6: /* This interface is basically deprecated. use SIOCGIFCONF. */ /* FALLTHROUGH */ case SIOCGIFAFLAG_IN6: case SIOCGIFNETMASK_IN6: case SIOCGIFDSTADDR_IN6: case SIOCGIFALIFETIME_IN6: /* must think again about its semantics */ if (ia == NULL) { error = EADDRNOTAVAIL; goto out; } break; case SIOCSIFALIFETIME_IN6: { struct in6_addrlifetime *lt; if (td != NULL) { error = priv_check(td, PRIV_NETINET_ALIFETIME6); if (error) goto out; } if (ia == NULL) { error = EADDRNOTAVAIL; goto out; } /* sanity for overflow - beware unsigned */ lt = &ifr->ifr_ifru.ifru_lifetime; if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME && lt->ia6t_vltime + time_second < time_second) { error = EINVAL; goto out; } if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME && lt->ia6t_pltime + time_second < time_second) { error = EINVAL; goto out; } break; } } switch (cmd) { case SIOCGIFADDR_IN6: ifr->ifr_addr = ia->ia_addr; if ((error = sa6_recoverscope(&ifr->ifr_addr)) != 0) goto out; break; case SIOCGIFDSTADDR_IN6: if ((ifp->if_flags & IFF_POINTOPOINT) == 0) { error = EINVAL; goto out; } /* * XXX: should we check if ifa_dstaddr is NULL and return * an error? */ ifr->ifr_dstaddr = ia->ia_dstaddr; if ((error = sa6_recoverscope(&ifr->ifr_dstaddr)) != 0) goto out; break; case SIOCGIFNETMASK_IN6: ifr->ifr_addr = ia->ia_prefixmask; break; case SIOCGIFAFLAG_IN6: ifr->ifr_ifru.ifru_flags6 = ia->ia6_flags; break; case SIOCGIFSTAT_IN6: if (ifp == NULL) { error = EINVAL; goto out; } bzero(&ifr->ifr_ifru.ifru_stat, sizeof(ifr->ifr_ifru.ifru_stat)); ifr->ifr_ifru.ifru_stat = *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->in6_ifstat; break; case SIOCGIFSTAT_ICMP6: if (ifp == NULL) { error = EINVAL; goto out; } bzero(&ifr->ifr_ifru.ifru_icmp6stat, sizeof(ifr->ifr_ifru.ifru_icmp6stat)); ifr->ifr_ifru.ifru_icmp6stat = *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->icmp6_ifstat; break; case SIOCGIFALIFETIME_IN6: ifr->ifr_ifru.ifru_lifetime = ia->ia6_lifetime; if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { time_t maxexpire; struct in6_addrlifetime *retlt = &ifr->ifr_ifru.ifru_lifetime; /* * XXX: adjust expiration time assuming time_t is * signed. */ maxexpire = (-1) & ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1)); if (ia->ia6_lifetime.ia6t_vltime < maxexpire - ia->ia6_updatetime) { retlt->ia6t_expire = ia->ia6_updatetime + ia->ia6_lifetime.ia6t_vltime; } else retlt->ia6t_expire = maxexpire; } if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { time_t maxexpire; struct in6_addrlifetime *retlt = &ifr->ifr_ifru.ifru_lifetime; /* * XXX: adjust expiration time assuming time_t is * signed. */ maxexpire = (-1) & ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1)); if (ia->ia6_lifetime.ia6t_pltime < maxexpire - ia->ia6_updatetime) { retlt->ia6t_preferred = ia->ia6_updatetime + ia->ia6_lifetime.ia6t_pltime; } else retlt->ia6t_preferred = maxexpire; } break; case SIOCSIFALIFETIME_IN6: ia->ia6_lifetime = ifr->ifr_ifru.ifru_lifetime; /* for sanity */ if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_expire = time_second + ia->ia6_lifetime.ia6t_vltime; } else ia->ia6_lifetime.ia6t_expire = 0; if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_preferred = time_second + ia->ia6_lifetime.ia6t_pltime; } else ia->ia6_lifetime.ia6t_preferred = 0; break; case SIOCAIFADDR_IN6: { int i; struct nd_prefixctl pr0; struct nd_prefix *pr; /* * first, make or update the interface address structure, * and link it to the list. */ if ((error = in6_update_ifa(ifp, ifra, ia, 0)) != 0) goto out; if (ia != NULL) ifa_free(&ia->ia_ifa); if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr)) == NULL) { /* * this can happen when the user specify the 0 valid * lifetime. */ break; } /* * then, make the prefix on-link on the interface. * XXX: we'd rather create the prefix before the address, but * we need at least one address to install the corresponding * interface route, so we configure the address first. */ /* * convert mask to prefix length (prefixmask has already * been validated in in6_update_ifa(). */ bzero(&pr0, sizeof(pr0)); pr0.ndpr_ifp = ifp; pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, NULL); if (pr0.ndpr_plen == 128) { break; /* we don't need to install a host route. */ } pr0.ndpr_prefix = ifra->ifra_addr; /* apply the mask for safety. */ for (i = 0; i < 4; i++) { pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &= ifra->ifra_prefixmask.sin6_addr.s6_addr32[i]; } /* * XXX: since we don't have an API to set prefix (not address) * lifetimes, we just use the same lifetimes as addresses. * The (temporarily) installed lifetimes can be overridden by * later advertised RAs (when accept_rtadv is non 0), which is * an intended behavior. */ pr0.ndpr_raf_onlink = 1; /* should be configurable? */ pr0.ndpr_raf_auto = ((ifra->ifra_flags & IN6_IFF_AUTOCONF) != 0); pr0.ndpr_vltime = ifra->ifra_lifetime.ia6t_vltime; pr0.ndpr_pltime = ifra->ifra_lifetime.ia6t_pltime; /* add the prefix if not yet. */ if ((pr = nd6_prefix_lookup(&pr0)) == NULL) { /* * nd6_prelist_add will install the corresponding * interface route. */ if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) goto out; if (pr == NULL) { log(LOG_ERR, "nd6_prelist_add succeeded but " "no prefix\n"); error = EINVAL; goto out; } } /* relate the address to the prefix */ if (ia->ia6_ndpr == NULL) { ia->ia6_ndpr = pr; pr->ndpr_refcnt++; /* * If this is the first autoconf address from the * prefix, create a temporary address as well * (when required). */ if ((ia->ia6_flags & IN6_IFF_AUTOCONF) && V_ip6_use_tempaddr && pr->ndpr_refcnt == 1) { int e; if ((e = in6_tmpifadd(ia, 1, 0)) != 0) { log(LOG_NOTICE, "in6_control: failed " "to create a temporary address, " "errno=%d\n", e); } } } /* * this might affect the status of autoconfigured addresses, * that is, this address might make other addresses detached. */ pfxlist_onlink_check(); if (error == 0 && ia) { if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) { /* * Try to clear the flag when a new * IPv6 address is added onto an * IFDISABLED interface and it * succeeds. */ struct in6_ndireq nd; memset(&nd, 0, sizeof(nd)); nd.ndi.flags = ND_IFINFO(ifp)->flags; nd.ndi.flags &= ~ND6_IFF_IFDISABLED; if (nd6_ioctl(SIOCSIFINFO_FLAGS, (caddr_t)&nd, ifp) < 0) log(LOG_NOTICE, "SIOCAIFADDR_IN6: " "SIOCSIFINFO_FLAGS for -ifdisabled " "failed."); /* * Ignore failure of clearing the flag * intentionally. The failure means * address duplication was detected. */ } EVENTHANDLER_INVOKE(ifaddr_event, ifp); } break; } case SIOCDIFADDR_IN6: { struct nd_prefix *pr; /* * If the address being deleted is the only one that owns * the corresponding prefix, expire the prefix as well. * XXX: theoretically, we don't have to worry about such * relationship, since we separate the address management * and the prefix management. We do this, however, to provide * as much backward compatibility as possible in terms of * the ioctl operation. * Note that in6_purgeaddr() will decrement ndpr_refcnt. */ pr = ia->ia6_ndpr; in6_purgeaddr(&ia->ia_ifa); if (pr && pr->ndpr_refcnt == 0) prelist_remove(pr); EVENTHANDLER_INVOKE(ifaddr_event, ifp); break; } default: if (ifp == NULL || ifp->if_ioctl == 0) { error = EOPNOTSUPP; goto out; } error = (*ifp->if_ioctl)(ifp, cmd, data); goto out; } error = 0; out: if (ia != NULL) ifa_free(&ia->ia_ifa); return (error); } /* * Join necessary multicast groups. Factored out from in6_update_ifa(). * This entire work should only be done once, for the default FIB. */ static int in6_update_ifa_join_mc(struct ifnet *ifp, struct in6_aliasreq *ifra, struct in6_ifaddr *ia, int flags, struct in6_multi **in6m_sol) { char ip6buf[INET6_ADDRSTRLEN]; struct sockaddr_in6 mltaddr, mltmask; struct in6_addr llsol; struct in6_multi_mship *imm; struct rtentry *rt; int delay, error; KASSERT(in6m_sol != NULL, ("%s: in6m_sol is NULL", __func__)); /* Join solicited multicast addr for new host id. */ bzero(&llsol, sizeof(struct in6_addr)); llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL; llsol.s6_addr32[1] = 0; llsol.s6_addr32[2] = htonl(1); llsol.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3]; llsol.s6_addr8[12] = 0xff; if ((error = in6_setscope(&llsol, ifp, NULL)) != 0) { /* XXX: should not happen */ log(LOG_ERR, "%s: in6_setscope failed\n", __func__); goto cleanup; } delay = 0; if ((flags & IN6_IFAUPDATE_DADDELAY)) { /* * We need a random delay for DAD on the address being * configured. It also means delaying transmission of the * corresponding MLD report to avoid report collision. * [RFC 4861, Section 6.3.7] */ delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz); } imm = in6_joingroup(ifp, &llsol, &error, delay); if (imm == NULL) { nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &llsol), if_name(ifp), error)); goto cleanup; } LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); *in6m_sol = imm->i6mm_maddr; bzero(&mltmask, sizeof(mltmask)); mltmask.sin6_len = sizeof(struct sockaddr_in6); mltmask.sin6_family = AF_INET6; mltmask.sin6_addr = in6mask32; #define MLTMASK_LEN 4 /* mltmask's masklen (=32bit=4octet) */ /* * Join link-local all-nodes address. */ bzero(&mltaddr, sizeof(mltaddr)); mltaddr.sin6_len = sizeof(struct sockaddr_in6); mltaddr.sin6_family = AF_INET6; mltaddr.sin6_addr = in6addr_linklocal_allnodes; if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) goto cleanup; /* XXX: should not fail */ /* * XXX: do we really need this automatic routes? We should probably * reconsider this stuff. Most applications actually do not need the * routes, since they usually specify the outgoing interface. */ rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); if (rt != NULL) { /* XXX: only works in !SCOPEDROUTING case. */ if (memcmp(&mltaddr.sin6_addr, &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, MLTMASK_LEN)) { RTFREE_LOCKED(rt); rt = NULL; } } if (rt == NULL) { error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&mltmask, RTF_UP, (struct rtentry **)0, RT_DEFAULT_FIB); if (error) goto cleanup; } else RTFREE_LOCKED(rt); imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); if (imm == NULL) { nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr.sin6_addr), if_name(ifp), error)); goto cleanup; } LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); /* * Join node information group address. */ delay = 0; if ((flags & IN6_IFAUPDATE_DADDELAY)) { /* * The spec does not say anything about delay for this group, * but the same logic should apply. */ delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz); } if (in6_nigroup(ifp, NULL, -1, &mltaddr.sin6_addr) == 0) { /* XXX jinmei */ imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, delay); if (imm == NULL) nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr.sin6_addr), if_name(ifp), error)); /* XXX not very fatal, go on... */ else LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); } if (V_icmp6_nodeinfo_oldmcprefix && in6_nigroup_oldmcprefix(ifp, NULL, -1, &mltaddr.sin6_addr) == 0) { imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, delay); if (imm == NULL) nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr.sin6_addr), if_name(ifp), error)); /* XXX not very fatal, go on... */ else LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); } /* * Join interface-local all-nodes address. * (ff01::1%ifN, and ff01::%ifN/32) */ mltaddr.sin6_addr = in6addr_nodelocal_allnodes; if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) goto cleanup; /* XXX: should not fail */ /* XXX: again, do we really need the route? */ rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); if (rt != NULL) { if (memcmp(&mltaddr.sin6_addr, &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, MLTMASK_LEN)) { RTFREE_LOCKED(rt); rt = NULL; } } if (rt == NULL) { error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&mltmask, RTF_UP, (struct rtentry **)0, RT_DEFAULT_FIB); if (error) goto cleanup; } else RTFREE_LOCKED(rt); imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); if (imm == NULL) { nd6log((LOG_WARNING, "%s: addmulti failed for %s on %s " "(errno=%d)\n", __func__, ip6_sprintf(ip6buf, &mltaddr.sin6_addr), if_name(ifp), error)); goto cleanup; } LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); #undef MLTMASK_LEN cleanup: return (error); } /* * Update parameters of an IPv6 interface address. * If necessary, a new entry is created and linked into address chains. * This function is separated from in6_control(). * XXX: should this be performed under splnet()? */ int in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, struct in6_ifaddr *ia, int flags) { int error = 0, hostIsNew = 0, plen = -1; struct sockaddr_in6 dst6; struct in6_addrlifetime *lt; struct in6_multi *in6m_sol; int delay; char ip6buf[INET6_ADDRSTRLEN]; /* Validate parameters */ if (ifp == NULL || ifra == NULL) /* this maybe redundant */ return (EINVAL); /* * The destination address for a p2p link must have a family * of AF_UNSPEC or AF_INET6. */ if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && ifra->ifra_dstaddr.sin6_family != AF_INET6 && ifra->ifra_dstaddr.sin6_family != AF_UNSPEC) return (EAFNOSUPPORT); /* * validate ifra_prefixmask. don't check sin6_family, netmask * does not carry fields other than sin6_len. */ if (ifra->ifra_prefixmask.sin6_len > sizeof(struct sockaddr_in6)) return (EINVAL); /* * Because the IPv6 address architecture is classless, we require * users to specify a (non 0) prefix length (mask) for a new address. * We also require the prefix (when specified) mask is valid, and thus * reject a non-consecutive mask. */ if (ia == NULL && ifra->ifra_prefixmask.sin6_len == 0) return (EINVAL); if (ifra->ifra_prefixmask.sin6_len != 0) { plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, (u_char *)&ifra->ifra_prefixmask + ifra->ifra_prefixmask.sin6_len); if (plen <= 0) return (EINVAL); } else { /* * In this case, ia must not be NULL. We just use its prefix * length. */ plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); } /* * If the destination address on a p2p interface is specified, * and the address is a scoped one, validate/set the scope * zone identifier. */ dst6 = ifra->ifra_dstaddr; if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) != 0 && (dst6.sin6_family == AF_INET6)) { struct in6_addr in6_tmp; u_int32_t zoneid; in6_tmp = dst6.sin6_addr; if (in6_setscope(&in6_tmp, ifp, &zoneid)) return (EINVAL); /* XXX: should be impossible */ if (dst6.sin6_scope_id != 0) { if (dst6.sin6_scope_id != zoneid) return (EINVAL); } else /* user omit to specify the ID. */ dst6.sin6_scope_id = zoneid; /* convert into the internal form */ if (sa6_embedscope(&dst6, 0)) return (EINVAL); /* XXX: should be impossible */ } /* * The destination address can be specified only for a p2p or a * loopback interface. If specified, the corresponding prefix length * must be 128. */ if (ifra->ifra_dstaddr.sin6_family == AF_INET6) { if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) == 0) { /* XXX: noisy message */ nd6log((LOG_INFO, "in6_update_ifa: a destination can " "be specified for a p2p or a loopback IF only\n")); return (EINVAL); } if (plen != 128) { nd6log((LOG_INFO, "in6_update_ifa: prefixlen should " "be 128 when dstaddr is specified\n")); return (EINVAL); } } /* lifetime consistency check */ lt = &ifra->ifra_lifetime; if (lt->ia6t_pltime > lt->ia6t_vltime) return (EINVAL); if (lt->ia6t_vltime == 0) { /* * the following log might be noisy, but this is a typical * configuration mistake or a tool's bug. */ nd6log((LOG_INFO, "in6_update_ifa: valid lifetime is 0 for %s\n", ip6_sprintf(ip6buf, &ifra->ifra_addr.sin6_addr))); if (ia == NULL) return (0); /* there's nothing to do */ } /* * If this is a new address, allocate a new ifaddr and link it * into chains. */ if (ia == NULL) { hostIsNew = 1; /* * When in6_update_ifa() is called in a process of a received * RA, it is called under an interrupt context. So, we should * call malloc with M_NOWAIT. */ ia = (struct in6_ifaddr *) malloc(sizeof(*ia), M_IFADDR, M_NOWAIT); if (ia == NULL) return (ENOBUFS); bzero((caddr_t)ia, sizeof(*ia)); ifa_init(&ia->ia_ifa); LIST_INIT(&ia->ia6_memberships); /* Initialize the address and masks, and put time stamp */ ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr; ia->ia_addr.sin6_family = AF_INET6; ia->ia_addr.sin6_len = sizeof(ia->ia_addr); ia->ia6_createtime = time_second; if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) { /* * XXX: some functions expect that ifa_dstaddr is not * NULL for p2p interfaces. */ ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; } else { ia->ia_ifa.ifa_dstaddr = NULL; } ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask; ia->ia_ifp = ifp; ifa_ref(&ia->ia_ifa); /* if_addrhead */ IF_ADDR_WLOCK(ifp); TAILQ_INSERT_TAIL(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_ref(&ia->ia_ifa); /* in6_ifaddrhead */ IN6_IFADDR_WLOCK(); TAILQ_INSERT_TAIL(&V_in6_ifaddrhead, ia, ia_link); IN6_IFADDR_WUNLOCK(); } /* update timestamp */ ia->ia6_updatetime = time_second; /* set prefix mask */ if (ifra->ifra_prefixmask.sin6_len) { /* * We prohibit changing the prefix length of an existing * address, because * + such an operation should be rare in IPv6, and * + the operation would confuse prefix management. */ if (ia->ia_prefixmask.sin6_len && in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) { nd6log((LOG_INFO, "in6_update_ifa: the prefix length of an" " existing (%s) address should not be changed\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); error = EINVAL; goto unlink; } ia->ia_prefixmask = ifra->ifra_prefixmask; } /* * If a new destination address is specified, scrub the old one and * install the new destination. Note that the interface must be * p2p or loopback (see the check above.) */ if (dst6.sin6_family == AF_INET6 && !IN6_ARE_ADDR_EQUAL(&dst6.sin6_addr, &ia->ia_dstaddr.sin6_addr)) { int e; if ((ia->ia_flags & IFA_ROUTE) != 0 && (e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST)) != 0) { nd6log((LOG_ERR, "in6_update_ifa: failed to remove " "a route to the old destination: %s\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); /* proceed anyway... */ } else ia->ia_flags &= ~IFA_ROUTE; ia->ia_dstaddr = dst6; } /* * Set lifetimes. We do not refer to ia6t_expire and ia6t_preferred * to see if the address is deprecated or invalidated, but initialize * these members for applications. */ ia->ia6_lifetime = ifra->ifra_lifetime; if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_expire = time_second + ia->ia6_lifetime.ia6t_vltime; } else ia->ia6_lifetime.ia6t_expire = 0; if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_preferred = time_second + ia->ia6_lifetime.ia6t_pltime; } else ia->ia6_lifetime.ia6t_preferred = 0; /* reset the interface and routing table appropriately. */ if ((error = in6_ifinit(ifp, ia, &ifra->ifra_addr, hostIsNew)) != 0) goto unlink; /* * configure address flags. */ ia->ia6_flags = ifra->ifra_flags; /* * backward compatibility - if IN6_IFF_DEPRECATED is set from the * userland, make it deprecated. */ if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) { ia->ia6_lifetime.ia6t_pltime = 0; ia->ia6_lifetime.ia6t_preferred = time_second; } /* * Make the address tentative before joining multicast addresses, * so that corresponding MLD responses would not have a tentative * source address. */ ia->ia6_flags &= ~IN6_IFF_DUPLICATED; /* safety */ if (hostIsNew && in6if_do_dad(ifp)) ia->ia6_flags |= IN6_IFF_TENTATIVE; /* DAD should be performed after ND6_IFF_IFDISABLED is cleared. */ if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) ia->ia6_flags |= IN6_IFF_TENTATIVE; /* * We are done if we have simply modified an existing address. */ if (!hostIsNew) return (error); /* * Beyond this point, we should call in6_purgeaddr upon an error, * not just go to unlink. */ /* Join necessary multicast groups. */ in6m_sol = NULL; if ((ifp->if_flags & IFF_MULTICAST) != 0) { error = in6_update_ifa_join_mc(ifp, ifra, ia, flags, &in6m_sol); if (error) goto cleanup; } /* * Perform DAD, if needed. * XXX It may be of use, if we can administratively disable DAD. */ if (in6if_do_dad(ifp) && ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) && (ia->ia6_flags & IN6_IFF_TENTATIVE)) { int mindelay, maxdelay; delay = 0; if ((flags & IN6_IFAUPDATE_DADDELAY)) { /* * We need to impose a delay before sending an NS * for DAD. Check if we also needed a delay for the * corresponding MLD message. If we did, the delay * should be larger than the MLD delay (this could be * relaxed a bit, but this simple logic is at least * safe). * XXX: Break data hiding guidelines and look at * state for the solicited multicast group. */ mindelay = 0; if (in6m_sol != NULL && in6m_sol->in6m_state == MLD_REPORTING_MEMBER) { mindelay = in6m_sol->in6m_timer; } maxdelay = MAX_RTR_SOLICITATION_DELAY * hz; if (maxdelay - mindelay == 0) delay = 0; else { delay = (arc4random() % (maxdelay - mindelay)) + mindelay; } } nd6_dad_start((struct ifaddr *)ia, delay); } KASSERT(hostIsNew, ("in6_update_ifa: !hostIsNew")); ifa_free(&ia->ia_ifa); return (error); unlink: /* * XXX: if a change of an existing address failed, keep the entry * anyway. */ if (hostIsNew) { in6_unlink_ifa(ia, ifp); ifa_free(&ia->ia_ifa); } return (error); cleanup: KASSERT(hostIsNew, ("in6_update_ifa: cleanup: !hostIsNew")); ifa_free(&ia->ia_ifa); in6_purgeaddr(&ia->ia_ifa); return error; } /* * Leave multicast groups. Factored out from in6_purgeaddr(). * This entire work should only be done once, for the default FIB. */ static int in6_purgeaddr_mc(struct ifnet *ifp, struct in6_ifaddr *ia, struct ifaddr *ifa0) { struct sockaddr_in6 mltaddr, mltmask; struct in6_multi_mship *imm; struct rtentry *rt; struct sockaddr_in6 sin6; int error; /* * Leave from multicast groups we have joined for the interface. */ while ((imm = LIST_FIRST(&ia->ia6_memberships)) != NULL) { LIST_REMOVE(imm, i6mm_chain); in6_leavegroup(imm); } /* * Remove the link-local all-nodes address. */ bzero(&mltmask, sizeof(mltmask)); mltmask.sin6_len = sizeof(struct sockaddr_in6); mltmask.sin6_family = AF_INET6; mltmask.sin6_addr = in6mask32; bzero(&mltaddr, sizeof(mltaddr)); mltaddr.sin6_len = sizeof(struct sockaddr_in6); mltaddr.sin6_family = AF_INET6; mltaddr.sin6_addr = in6addr_linklocal_allnodes; if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) return (error); /* * As for the mltaddr above, proactively prepare the sin6 to avoid * rtentry un- and re-locking. */ if (ifa0 != NULL) { bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(sin6); sin6.sin6_family = AF_INET6; memcpy(&sin6.sin6_addr, &satosin6(ifa0->ifa_addr)->sin6_addr, sizeof(sin6.sin6_addr)); error = in6_setscope(&sin6.sin6_addr, ifa0->ifa_ifp, NULL); if (error != 0) return (error); } rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); if (rt != NULL && rt->rt_gateway != NULL && (memcmp(&satosin6(rt->rt_gateway)->sin6_addr, &ia->ia_addr.sin6_addr, sizeof(ia->ia_addr.sin6_addr)) == 0)) { /* * If no more IPv6 address exists on this interface then * remove the multicast address route. */ if (ifa0 == NULL) { memcpy(&mltaddr.sin6_addr, &satosin6(rt_key(rt))->sin6_addr, sizeof(mltaddr.sin6_addr)); RTFREE_LOCKED(rt); error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&mltaddr, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&mltmask, RTF_UP, (struct rtentry **)0, RT_DEFAULT_FIB); if (error) log(LOG_INFO, "%s: link-local all-nodes " "multicast address deletion error\n", __func__); } else { /* * Replace the gateway of the route. */ memcpy(rt->rt_gateway, &sin6, sizeof(sin6)); RTFREE_LOCKED(rt); } } else { if (rt != NULL) RTFREE_LOCKED(rt); } /* * Remove the node-local all-nodes address. */ mltaddr.sin6_addr = in6addr_nodelocal_allnodes; if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) return (error); rt = in6_rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL, RT_DEFAULT_FIB); if (rt != NULL && rt->rt_gateway != NULL && (memcmp(&satosin6(rt->rt_gateway)->sin6_addr, &ia->ia_addr.sin6_addr, sizeof(ia->ia_addr.sin6_addr)) == 0)) { /* * If no more IPv6 address exists on this interface then * remove the multicast address route. */ if (ifa0 == NULL) { memcpy(&mltaddr.sin6_addr, &satosin6(rt_key(rt))->sin6_addr, sizeof(mltaddr.sin6_addr)); RTFREE_LOCKED(rt); error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&mltaddr, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&mltmask, RTF_UP, (struct rtentry **)0, RT_DEFAULT_FIB); if (error) log(LOG_INFO, "%s: node-local all-nodes" "multicast address deletion error\n", __func__); } else { /* * Replace the gateway of the route. */ memcpy(rt->rt_gateway, &sin6, sizeof(sin6)); RTFREE_LOCKED(rt); } } else { if (rt != NULL) RTFREE_LOCKED(rt); } return (0); } void in6_purgeaddr(struct ifaddr *ifa) { struct ifnet *ifp = ifa->ifa_ifp; struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa; int plen, error; struct ifaddr *ifa0; /* * find another IPv6 address as the gateway for the * link-local and node-local all-nodes multicast * address routes */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa0, &ifp->if_addrhead, ifa_link) { if ((ifa0->ifa_addr->sa_family != AF_INET6) || memcmp(&satosin6(ifa0->ifa_addr)->sin6_addr, &ia->ia_addr.sin6_addr, sizeof(struct in6_addr)) == 0) continue; else break; } if (ifa0 != NULL) ifa_ref(ifa0); IF_ADDR_RUNLOCK(ifp); /* * Remove the loopback route to the interface address. * The check for the current setting of "nd6_useloopback" * is not needed. */ if (ia->ia_flags & IFA_RTSELF) { error = ifa_del_loopback_route((struct ifaddr *)ia, (struct sockaddr *)&ia->ia_addr); if (error == 0) ia->ia_flags &= ~IFA_RTSELF; } /* stop DAD processing */ nd6_dad_stop(ifa); /* Remove local address entry from lltable. */ in6_ifremloop(ifa); /* Leave multicast groups. */ error = in6_purgeaddr_mc(ifp, ia, ifa0); if (ifa0 != NULL) ifa_free(ifa0); plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ if ((ia->ia_flags & IFA_ROUTE) && plen == 128) { error = rtinit(&(ia->ia_ifa), RTM_DELETE, ia->ia_flags | (ia->ia_dstaddr.sin6_family == AF_INET6) ? RTF_HOST : 0); if (error != 0) log(LOG_INFO, "%s: err=%d, destination address delete " "failed\n", __func__, error); ia->ia_flags &= ~IFA_ROUTE; } in6_unlink_ifa(ia, ifp); } static void in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp) { int s = splnet(); IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(&ia->ia_ifa); /* if_addrhead */ /* * Defer the release of what might be the last reference to the * in6_ifaddr so that it can't be freed before the remainder of the * cleanup. */ IN6_IFADDR_WLOCK(); TAILQ_REMOVE(&V_in6_ifaddrhead, ia, ia_link); IN6_IFADDR_WUNLOCK(); /* * Release the reference to the base prefix. There should be a * positive reference. */ if (ia->ia6_ndpr == NULL) { nd6log((LOG_NOTICE, "in6_unlink_ifa: autoconf'ed address " "%p has no prefix\n", ia)); } else { ia->ia6_ndpr->ndpr_refcnt--; ia->ia6_ndpr = NULL; } /* * Also, if the address being removed is autoconf'ed, call * pfxlist_onlink_check() since the release might affect the status of * other (detached) addresses. */ if ((ia->ia6_flags & IN6_IFF_AUTOCONF)) { pfxlist_onlink_check(); } ifa_free(&ia->ia_ifa); /* in6_ifaddrhead */ splx(s); } void in6_purgeif(struct ifnet *ifp) { struct ifaddr *ifa, *nifa; TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; in6_purgeaddr(ifa); } in6_ifdetach(ifp); } /* * SIOC[GAD]LIFADDR. * SIOCGLIFADDR: get first address. (?) * SIOCGLIFADDR with IFLR_PREFIX: * get first address that matches the specified prefix. * SIOCALIFADDR: add the specified address. * SIOCALIFADDR with IFLR_PREFIX: * add the specified prefix, filling hostid part from * the first link-local address. prefixlen must be <= 64. * SIOCDLIFADDR: delete the specified address. * SIOCDLIFADDR with IFLR_PREFIX: * delete the first address that matches the specified prefix. * return values: * EINVAL on invalid parameters * EADDRNOTAVAIL on prefix match failed/specified address not found * other values may be returned from in6_ioctl() * * NOTE: SIOCALIFADDR(with IFLR_PREFIX set) allows prefixlen less than 64. * this is to accomodate address naming scheme other than RFC2374, * in the future. * RFC2373 defines interface id to be 64bit, but it allows non-RFC2374 * address encoding scheme. (see figure on page 8) */ static int in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { struct if_laddrreq *iflr = (struct if_laddrreq *)data; struct ifaddr *ifa; struct sockaddr *sa; /* sanity checks */ if (!data || !ifp) { panic("invalid argument to in6_lifaddr_ioctl"); /* NOTREACHED */ } switch (cmd) { case SIOCGLIFADDR: /* address must be specified on GET with IFLR_PREFIX */ if ((iflr->flags & IFLR_PREFIX) == 0) break; /* FALLTHROUGH */ case SIOCALIFADDR: case SIOCDLIFADDR: /* address must be specified on ADD and DELETE */ sa = (struct sockaddr *)&iflr->addr; if (sa->sa_family != AF_INET6) return EINVAL; if (sa->sa_len != sizeof(struct sockaddr_in6)) return EINVAL; /* XXX need improvement */ sa = (struct sockaddr *)&iflr->dstaddr; if (sa->sa_family && sa->sa_family != AF_INET6) return EINVAL; if (sa->sa_len && sa->sa_len != sizeof(struct sockaddr_in6)) return EINVAL; break; default: /* shouldn't happen */ #if 0 panic("invalid cmd to in6_lifaddr_ioctl"); /* NOTREACHED */ #else return EOPNOTSUPP; #endif } if (sizeof(struct in6_addr) * 8 < iflr->prefixlen) return EINVAL; switch (cmd) { case SIOCALIFADDR: { struct in6_aliasreq ifra; struct in6_addr *hostid = NULL; int prefixlen; ifa = NULL; if ((iflr->flags & IFLR_PREFIX) != 0) { struct sockaddr_in6 *sin6; /* * hostid is to fill in the hostid part of the * address. hostid points to the first link-local * address attached to the interface. */ ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); if (!ifa) return EADDRNOTAVAIL; hostid = IFA_IN6(ifa); /* prefixlen must be <= 64. */ if (64 < iflr->prefixlen) { if (ifa != NULL) ifa_free(ifa); return EINVAL; } prefixlen = iflr->prefixlen; /* hostid part must be zero. */ sin6 = (struct sockaddr_in6 *)&iflr->addr; if (sin6->sin6_addr.s6_addr32[2] != 0 || sin6->sin6_addr.s6_addr32[3] != 0) { if (ifa != NULL) ifa_free(ifa); return EINVAL; } } else prefixlen = iflr->prefixlen; /* copy args to in6_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */ bzero(&ifra, sizeof(ifra)); bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name)); bcopy(&iflr->addr, &ifra.ifra_addr, ((struct sockaddr *)&iflr->addr)->sa_len); if (hostid) { /* fill in hostid part */ ifra.ifra_addr.sin6_addr.s6_addr32[2] = hostid->s6_addr32[2]; ifra.ifra_addr.sin6_addr.s6_addr32[3] = hostid->s6_addr32[3]; } if (((struct sockaddr *)&iflr->dstaddr)->sa_family) { /* XXX */ bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr, ((struct sockaddr *)&iflr->dstaddr)->sa_len); if (hostid) { ifra.ifra_dstaddr.sin6_addr.s6_addr32[2] = hostid->s6_addr32[2]; ifra.ifra_dstaddr.sin6_addr.s6_addr32[3] = hostid->s6_addr32[3]; } } if (ifa != NULL) ifa_free(ifa); ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); in6_prefixlen2mask(&ifra.ifra_prefixmask.sin6_addr, prefixlen); ifra.ifra_flags = iflr->flags & ~IFLR_PREFIX; return in6_control(so, SIOCAIFADDR_IN6, (caddr_t)&ifra, ifp, td); } case SIOCGLIFADDR: case SIOCDLIFADDR: { struct in6_ifaddr *ia; struct in6_addr mask, candidate, match; struct sockaddr_in6 *sin6; int cmp; bzero(&mask, sizeof(mask)); if (iflr->flags & IFLR_PREFIX) { /* lookup a prefix rather than address. */ in6_prefixlen2mask(&mask, iflr->prefixlen); sin6 = (struct sockaddr_in6 *)&iflr->addr; bcopy(&sin6->sin6_addr, &match, sizeof(match)); match.s6_addr32[0] &= mask.s6_addr32[0]; match.s6_addr32[1] &= mask.s6_addr32[1]; match.s6_addr32[2] &= mask.s6_addr32[2]; match.s6_addr32[3] &= mask.s6_addr32[3]; /* if you set extra bits, that's wrong */ if (bcmp(&match, &sin6->sin6_addr, sizeof(match))) return EINVAL; cmp = 1; } else { if (cmd == SIOCGLIFADDR) { /* on getting an address, take the 1st match */ cmp = 0; /* XXX */ } else { /* on deleting an address, do exact match */ in6_prefixlen2mask(&mask, 128); sin6 = (struct sockaddr_in6 *)&iflr->addr; bcopy(&sin6->sin6_addr, &match, sizeof(match)); cmp = 1; } } IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (!cmp) break; /* * XXX: this is adhoc, but is necessary to allow * a user to specify fe80::/64 (not /10) for a * link-local address. */ bcopy(IFA_IN6(ifa), &candidate, sizeof(candidate)); in6_clearscope(&candidate); candidate.s6_addr32[0] &= mask.s6_addr32[0]; candidate.s6_addr32[1] &= mask.s6_addr32[1]; candidate.s6_addr32[2] &= mask.s6_addr32[2]; candidate.s6_addr32[3] &= mask.s6_addr32[3]; if (IN6_ARE_ADDR_EQUAL(&candidate, &match)) break; } if (ifa != NULL) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); if (!ifa) return EADDRNOTAVAIL; ia = ifa2ia6(ifa); if (cmd == SIOCGLIFADDR) { int error; /* fill in the if_laddrreq structure */ bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin6_len); error = sa6_recoverscope( (struct sockaddr_in6 *)&iflr->addr); if (error != 0) { ifa_free(ifa); return (error); } if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { bcopy(&ia->ia_dstaddr, &iflr->dstaddr, ia->ia_dstaddr.sin6_len); error = sa6_recoverscope( (struct sockaddr_in6 *)&iflr->dstaddr); if (error != 0) { ifa_free(ifa); return (error); } } else bzero(&iflr->dstaddr, sizeof(iflr->dstaddr)); iflr->prefixlen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); iflr->flags = ia->ia6_flags; /* XXX */ ifa_free(ifa); return 0; } else { struct in6_aliasreq ifra; /* fill in6_aliasreq and do ioctl(SIOCDIFADDR_IN6) */ bzero(&ifra, sizeof(ifra)); bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name)); bcopy(&ia->ia_addr, &ifra.ifra_addr, ia->ia_addr.sin6_len); if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr, ia->ia_dstaddr.sin6_len); } else { bzero(&ifra.ifra_dstaddr, sizeof(ifra.ifra_dstaddr)); } bcopy(&ia->ia_prefixmask, &ifra.ifra_dstaddr, ia->ia_prefixmask.sin6_len); ifra.ifra_flags = ia->ia6_flags; ifa_free(ifa); return in6_control(so, SIOCDIFADDR_IN6, (caddr_t)&ifra, ifp, td); } } } return EOPNOTSUPP; /* just for safety */ } /* * Initialize an interface's IPv6 address and routing table entry. */ static int in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, struct sockaddr_in6 *sin6, int newhost) { int error = 0, plen, ifacount = 0; int s = splimp(); struct ifaddr *ifa; /* * Give the interface a chance to initialize * if this is its first address, * and to validate the address if necessary. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifacount++; } IF_ADDR_RUNLOCK(ifp); ia->ia_addr = *sin6; if (ifacount <= 1 && ifp->if_ioctl) { error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia); if (error) { splx(s); return (error); } } splx(s); ia->ia_ifa.ifa_metric = ifp->if_metric; /* we could do in(6)_socktrim here, but just omit it at this moment. */ /* * Special case: * If a new destination address is specified for a point-to-point * interface, install a route to the destination as an interface * direct route. * XXX: the logic below rejects assigning multiple addresses on a p2p * interface that share the same destination. */ plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 && ia->ia_dstaddr.sin6_family == AF_INET6) { int rtflags = RTF_UP | RTF_HOST; error = rtinit(&ia->ia_ifa, RTM_ADD, ia->ia_flags | rtflags); if (error) return (error); ia->ia_flags |= IFA_ROUTE; /* * Handle the case for ::1 . */ if (ifp->if_flags & IFF_LOOPBACK) ia->ia_flags |= IFA_RTSELF; } /* * add a loopback route to self */ if (!(ia->ia_flags & IFA_RTSELF) && V_nd6_useloopback) { error = ifa_add_loopback_route((struct ifaddr *)ia, (struct sockaddr *)&ia->ia_addr); if (error == 0) ia->ia_flags |= IFA_RTSELF; } /* Add local address to lltable, if necessary (ex. on p2p link). */ if (newhost) in6_ifaddloop(&(ia->ia_ifa)); return (error); } /* * Find an IPv6 interface link-local address specific to an interface. * ifaddr is returned referenced. */ struct in6_ifaddr * in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags) { struct ifaddr *ifa; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) { if ((((struct in6_ifaddr *)ifa)->ia6_flags & ignoreflags) != 0) continue; ifa_ref(ifa); break; } } IF_ADDR_RUNLOCK(ifp); return ((struct in6_ifaddr *)ifa); } /* * find the internet address corresponding to a given interface and address. * ifaddr is returned referenced. */ struct in6_ifaddr * in6ifa_ifpwithaddr(struct ifnet *ifp, struct in6_addr *addr) { struct ifaddr *ifa; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa))) { ifa_ref(ifa); break; } } IF_ADDR_RUNLOCK(ifp); return ((struct in6_ifaddr *)ifa); } /* * Find a link-local scoped address on ifp and return it if any. */ struct in6_ifaddr * in6ifa_llaonifp(struct ifnet *ifp) { struct sockaddr_in6 *sin6; struct ifaddr *ifa; if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) return (NULL); if_addr_rlock(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr) || IN6_IS_ADDR_MC_INTFACELOCAL(&sin6->sin6_addr) || IN6_IS_ADDR_MC_NODELOCAL(&sin6->sin6_addr)) break; } if_addr_runlock(ifp); return ((struct in6_ifaddr *)ifa); } /* * Convert IP6 address to printable (loggable) representation. Caller * has to make sure that ip6buf is at least INET6_ADDRSTRLEN long. */ static char digits[] = "0123456789abcdef"; char * ip6_sprintf(char *ip6buf, const struct in6_addr *addr) { int i, cnt = 0, maxcnt = 0, idx = 0, index = 0; char *cp; const u_int16_t *a = (const u_int16_t *)addr; const u_int8_t *d; int dcolon = 0, zero = 0; cp = ip6buf; for (i = 0; i < 8; i++) { if (*(a + i) == 0) { cnt++; if (cnt == 1) idx = i; } else if (maxcnt < cnt) { maxcnt = cnt; index = idx; cnt = 0; } } if (maxcnt < cnt) { maxcnt = cnt; index = idx; } for (i = 0; i < 8; i++) { if (dcolon == 1) { if (*a == 0) { if (i == 7) *cp++ = ':'; a++; continue; } else dcolon = 2; } if (*a == 0) { if (dcolon == 0 && *(a + 1) == 0 && i == index) { if (i == 0) *cp++ = ':'; *cp++ = ':'; dcolon = 1; } else { *cp++ = '0'; *cp++ = ':'; } a++; continue; } d = (const u_char *)a; /* Try to eliminate leading zeros in printout like in :0001. */ zero = 1; *cp = digits[*d >> 4]; if (*cp != '0') { zero = 0; cp++; } *cp = digits[*d++ & 0xf]; if (zero == 0 || (*cp != '0')) { zero = 0; cp++; } *cp = digits[*d >> 4]; if (zero == 0 || (*cp != '0')) { zero = 0; cp++; } *cp++ = digits[*d & 0xf]; *cp++ = ':'; a++; } *--cp = '\0'; return (ip6buf); } int in6_localaddr(struct in6_addr *in6) { struct in6_ifaddr *ia; if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6)) return 1; IN6_IFADDR_RLOCK(); TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr, &ia->ia_prefixmask.sin6_addr)) { IN6_IFADDR_RUNLOCK(); return 1; } } IN6_IFADDR_RUNLOCK(); return (0); } /* * Return 1 if an internet address is for the local host and configured * on one of its interfaces. */ int in6_localip(struct in6_addr *in6) { struct in6_ifaddr *ia; IN6_IFADDR_RLOCK(); TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { if (IN6_ARE_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr)) { IN6_IFADDR_RUNLOCK(); return (1); } } IN6_IFADDR_RUNLOCK(); return (0); } int in6_is_addr_deprecated(struct sockaddr_in6 *sa6) { struct in6_ifaddr *ia; IN6_IFADDR_RLOCK(); TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &sa6->sin6_addr) && (ia->ia6_flags & IN6_IFF_DEPRECATED) != 0) { IN6_IFADDR_RUNLOCK(); return (1); /* true */ } /* XXX: do we still have to go thru the rest of the list? */ } IN6_IFADDR_RUNLOCK(); return (0); /* false */ } /* * return length of part which dst and src are equal * hard coding... */ int in6_matchlen(struct in6_addr *src, struct in6_addr *dst) { int match = 0; u_char *s = (u_char *)src, *d = (u_char *)dst; u_char *lim = s + 16, r; while (s < lim) if ((r = (*d++ ^ *s++)) != 0) { while (r < 128) { match++; r <<= 1; } break; } else match += 8; return match; } /* XXX: to be scope conscious */ int in6_are_prefix_equal(struct in6_addr *p1, struct in6_addr *p2, int len) { int bytelen, bitlen; /* sanity check */ if (0 > len || len > 128) { log(LOG_ERR, "in6_are_prefix_equal: invalid prefix length(%d)\n", len); return (0); } bytelen = len / 8; bitlen = len % 8; if (bcmp(&p1->s6_addr, &p2->s6_addr, bytelen)) return (0); if (bitlen != 0 && p1->s6_addr[bytelen] >> (8 - bitlen) != p2->s6_addr[bytelen] >> (8 - bitlen)) return (0); return (1); } void in6_prefixlen2mask(struct in6_addr *maskp, int len) { u_char maskarray[8] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff}; int bytelen, bitlen, i; /* sanity check */ if (0 > len || len > 128) { log(LOG_ERR, "in6_prefixlen2mask: invalid prefix length(%d)\n", len); return; } bzero(maskp, sizeof(*maskp)); bytelen = len / 8; bitlen = len % 8; for (i = 0; i < bytelen; i++) maskp->s6_addr[i] = 0xff; if (bitlen) maskp->s6_addr[bytelen] = maskarray[bitlen - 1]; } /* * return the best address out of the same scope. if no address was * found, return the first valid address from designated IF. */ struct in6_ifaddr * in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst) { int dst_scope = in6_addrscope(dst), blen = -1, tlen; struct ifaddr *ifa; struct in6_ifaddr *besta = 0; struct in6_ifaddr *dep[2]; /* last-resort: deprecated */ dep[0] = dep[1] = NULL; /* * We first look for addresses in the same scope. * If there is one, return it. * If two or more, return one which matches the dst longest. * If none, return one of global addresses assigned other ifs. */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST) continue; /* XXX: is there any case to allow anycast? */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY) continue; /* don't use this interface */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) { if (V_ip6_use_deprecated) dep[0] = (struct in6_ifaddr *)ifa; continue; } if (dst_scope == in6_addrscope(IFA_IN6(ifa))) { /* * call in6_matchlen() as few as possible */ if (besta) { if (blen == -1) blen = in6_matchlen(&besta->ia_addr.sin6_addr, dst); tlen = in6_matchlen(IFA_IN6(ifa), dst); if (tlen > blen) { blen = tlen; besta = (struct in6_ifaddr *)ifa; } } else besta = (struct in6_ifaddr *)ifa; } } if (besta) { ifa_ref(&besta->ia_ifa); IF_ADDR_RUNLOCK(ifp); return (besta); } TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST) continue; /* XXX: is there any case to allow anycast? */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY) continue; /* don't use this interface */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) { if (V_ip6_use_deprecated) dep[1] = (struct in6_ifaddr *)ifa; continue; } if (ifa != NULL) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); return (struct in6_ifaddr *)ifa; } /* use the last-resort values, that are, deprecated addresses */ if (dep[0]) { ifa_ref((struct ifaddr *)dep[0]); IF_ADDR_RUNLOCK(ifp); return dep[0]; } if (dep[1]) { ifa_ref((struct ifaddr *)dep[1]); IF_ADDR_RUNLOCK(ifp); return dep[1]; } IF_ADDR_RUNLOCK(ifp); return NULL; } /* * perform DAD when interface becomes IFF_UP. */ void in6_if_up(struct ifnet *ifp) { struct ifaddr *ifa; struct in6_ifaddr *ia; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; if (ia->ia6_flags & IN6_IFF_TENTATIVE) { /* * The TENTATIVE flag was likely set by hand * beforehand, implicitly indicating the need for DAD. * We may be able to skip the random delay in this * case, but we impose delays just in case. */ nd6_dad_start(ifa, arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz)); } } IF_ADDR_RUNLOCK(ifp); /* * special cases, like 6to4, are handled in in6_ifattach */ in6_ifattach(ifp, NULL); } int in6if_do_dad(struct ifnet *ifp) { if ((ifp->if_flags & IFF_LOOPBACK) != 0) return (0); if (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) return (0); switch (ifp->if_type) { #ifdef IFT_DUMMY case IFT_DUMMY: #endif case IFT_FAITH: /* * These interfaces do not have the IFF_LOOPBACK flag, * but loop packets back. We do not have to do DAD on such * interfaces. We should even omit it, because loop-backed * NS would confuse the DAD procedure. */ return (0); default: /* * Our DAD routine requires the interface up and running. * However, some interfaces can be up before the RUNNING * status. Additionaly, users may try to assign addresses * before the interface becomes up (or running). * We simply skip DAD in such a case as a work around. * XXX: we should rather mark "tentative" on such addresses, * and do DAD after the interface becomes ready. */ if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) return (0); return (1); } } /* * Calculate max IPv6 MTU through all the interfaces and store it * to in6_maxmtu. */ void in6_setmaxmtu(void) { unsigned long maxmtu = 0; struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_list) { /* this function can be called during ifnet initialization */ if (!ifp->if_afdata[AF_INET6]) continue; if ((ifp->if_flags & IFF_LOOPBACK) == 0 && IN6_LINKMTU(ifp) > maxmtu) maxmtu = IN6_LINKMTU(ifp); } IFNET_RUNLOCK_NOSLEEP(); if (maxmtu) /* update only when maxmtu is positive */ V_in6_maxmtu = maxmtu; } /* * Provide the length of interface identifiers to be used for the link attached * to the given interface. The length should be defined in "IPv6 over * xxx-link" document. Note that address architecture might also define * the length for a particular set of address prefixes, regardless of the * link type. As clarified in rfc2462bis, those two definitions should be * consistent, and those really are as of August 2004. */ int in6_if2idlen(struct ifnet *ifp) { switch (ifp->if_type) { case IFT_ETHER: /* RFC2464 */ #ifdef IFT_PROPVIRTUAL case IFT_PROPVIRTUAL: /* XXX: no RFC. treat it as ether */ #endif #ifdef IFT_L2VLAN case IFT_L2VLAN: /* ditto */ #endif #ifdef IFT_IEEE80211 case IFT_IEEE80211: /* ditto */ #endif #ifdef IFT_MIP case IFT_MIP: /* ditto */ #endif case IFT_INFINIBAND: return (64); case IFT_FDDI: /* RFC2467 */ return (64); case IFT_ISO88025: /* RFC2470 (IPv6 over Token Ring) */ return (64); case IFT_PPP: /* RFC2472 */ return (64); case IFT_ARCNET: /* RFC2497 */ return (64); case IFT_FRELAY: /* RFC2590 */ return (64); case IFT_IEEE1394: /* RFC3146 */ return (64); case IFT_GIF: return (64); /* draft-ietf-v6ops-mech-v2-07 */ case IFT_LOOP: return (64); /* XXX: is this really correct? */ default: /* * Unknown link type: * It might be controversial to use the today's common constant * of 64 for these cases unconditionally. For full compliance, * we should return an error in this case. On the other hand, * if we simply miss the standard for the link type or a new * standard is defined for a new link type, the IFID length * is very likely to be the common constant. As a compromise, * we always use the constant, but make an explicit notice * indicating the "unknown" case. */ printf("in6_if2idlen: unknown link type (%d)\n", ifp->if_type); return (64); } } #include struct in6_llentry { struct llentry base; struct sockaddr_in6 l3_addr6; }; /* * Deletes an address from the address table. * This function is called by the timer functions * such as arptimer() and nd6_llinfo_timer(), and * the caller does the locking. */ static void in6_lltable_free(struct lltable *llt, struct llentry *lle) { LLE_WUNLOCK(lle); LLE_LOCK_DESTROY(lle); free(lle, M_LLTABLE); } static struct llentry * in6_lltable_new(const struct sockaddr *l3addr, u_int flags) { struct in6_llentry *lle; lle = malloc(sizeof(struct in6_llentry), M_LLTABLE, M_NOWAIT | M_ZERO); if (lle == NULL) /* NB: caller generates msg */ return NULL; lle->l3_addr6 = *(const struct sockaddr_in6 *)l3addr; lle->base.lle_refcnt = 1; lle->base.lle_free = in6_lltable_free; LLE_LOCK_INIT(&lle->base); callout_init_rw(&lle->base.ln_timer_ch, &lle->base.lle_lock, CALLOUT_RETURNUNLOCKED); return (&lle->base); } static void in6_lltable_prefix_free(struct lltable *llt, const struct sockaddr *prefix, const struct sockaddr *mask, u_int flags) { const struct sockaddr_in6 *pfx = (const struct sockaddr_in6 *)prefix; const struct sockaddr_in6 *msk = (const struct sockaddr_in6 *)mask; struct llentry *lle, *next; int i; /* * (flags & LLE_STATIC) means deleting all entries * including static ND6 entries. */ IF_AFDATA_WLOCK(llt->llt_ifp); for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) { if (IN6_ARE_MASKED_ADDR_EQUAL( &satosin6(L3_ADDR(lle))->sin6_addr, &pfx->sin6_addr, &msk->sin6_addr) && ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) { LLE_WLOCK(lle); if (callout_stop(&lle->la_timer)) LLE_REMREF(lle); llentry_free(lle); } } } IF_AFDATA_WUNLOCK(llt->llt_ifp); } static int in6_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr) { struct rtentry *rt; char ip6buf[INET6_ADDRSTRLEN]; KASSERT(l3addr->sa_family == AF_INET6, ("sin_family %d", l3addr->sa_family)); /* Our local addresses are always only installed on the default FIB. */ /* XXX rtalloc1 should take a const param */ rt = in6_rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0, RT_DEFAULT_FIB); if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) { struct ifaddr *ifa; /* * Create an ND6 cache for an IPv6 neighbor * that is not covered by our own prefix. */ /* XXX ifaof_ifpforaddr should take a const param */ ifa = ifaof_ifpforaddr(__DECONST(struct sockaddr *, l3addr), ifp); if (ifa != NULL) { ifa_free(ifa); if (rt != NULL) RTFREE_LOCKED(rt); return 0; } log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n", ip6_sprintf(ip6buf, &((const struct sockaddr_in6 *)l3addr)->sin6_addr)); if (rt != NULL) RTFREE_LOCKED(rt); return EINVAL; } RTFREE_LOCKED(rt); return 0; } static struct llentry * in6_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr) { const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr; struct ifnet *ifp = llt->llt_ifp; struct llentry *lle; struct llentries *lleh; u_int hashkey; IF_AFDATA_LOCK_ASSERT(ifp); KASSERT(l3addr->sa_family == AF_INET6, ("sin_family %d", l3addr->sa_family)); hashkey = sin6->sin6_addr.s6_addr32[3]; lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)]; LIST_FOREACH(lle, lleh, lle_next) { struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)L3_ADDR(lle); if (lle->la_flags & LLE_DELETED) continue; if (bcmp(&sa6->sin6_addr, &sin6->sin6_addr, sizeof(struct in6_addr)) == 0) break; } if (lle == NULL) { if (!(flags & LLE_CREATE)) return (NULL); + IF_AFDATA_WLOCK_ASSERT(ifp); /* * A route that covers the given address must have * been installed 1st because we are doing a resolution, * verify this. */ if (!(flags & LLE_IFADDR) && in6_lltable_rtcheck(ifp, flags, l3addr) != 0) return NULL; lle = in6_lltable_new(l3addr, flags); if (lle == NULL) { log(LOG_INFO, "lla_lookup: new lle malloc failed\n"); return NULL; } lle->la_flags = flags & ~LLE_CREATE; if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) { bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen); lle->la_flags |= (LLE_VALID | LLE_STATIC); } lle->lle_tbl = llt; lle->lle_head = lleh; lle->la_flags |= LLE_LINKED; LIST_INSERT_HEAD(lleh, lle, lle_next); } else if (flags & LLE_DELETE) { if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) { LLE_WLOCK(lle); lle->la_flags |= LLE_DELETED; #ifdef DIAGNOSTIC log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle); #endif if ((lle->la_flags & (LLE_STATIC | LLE_IFADDR)) == LLE_STATIC) llentry_free(lle); else LLE_WUNLOCK(lle); } lle = (void *)-1; } if (LLE_IS_VALID(lle)) { if (flags & LLE_EXCLUSIVE) LLE_WLOCK(lle); else LLE_RLOCK(lle); } return (lle); } static int in6_lltable_dump(struct lltable *llt, struct sysctl_req *wr) { struct ifnet *ifp = llt->llt_ifp; struct llentry *lle; /* XXX stack use */ struct { struct rt_msghdr rtm; struct sockaddr_in6 sin6; /* * ndp.c assumes that sdl is word aligned */ #ifdef __LP64__ uint32_t pad; #endif struct sockaddr_dl sdl; } ndpc; int i, error; if (ifp->if_flags & IFF_LOOPBACK) return 0; LLTABLE_LOCK_ASSERT(); error = 0; for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { LIST_FOREACH(lle, &llt->lle_head[i], lle_next) { struct sockaddr_dl *sdl; /* skip deleted or invalid entries */ if ((lle->la_flags & (LLE_DELETED|LLE_VALID)) != LLE_VALID) continue; /* Skip if jailed and not a valid IP of the prison. */ if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0) continue; /* * produce a msg made of: * struct rt_msghdr; * struct sockaddr_in6 (IPv6) * struct sockaddr_dl; */ bzero(&ndpc, sizeof(ndpc)); ndpc.rtm.rtm_msglen = sizeof(ndpc); ndpc.rtm.rtm_version = RTM_VERSION; ndpc.rtm.rtm_type = RTM_GET; ndpc.rtm.rtm_flags = RTF_UP; ndpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY; ndpc.sin6.sin6_family = AF_INET6; ndpc.sin6.sin6_len = sizeof(ndpc.sin6); bcopy(L3_ADDR(lle), &ndpc.sin6, L3_ADDR_LEN(lle)); /* publish */ if (lle->la_flags & LLE_PUB) ndpc.rtm.rtm_flags |= RTF_ANNOUNCE; sdl = &ndpc.sdl; sdl->sdl_family = AF_LINK; sdl->sdl_len = sizeof(*sdl); sdl->sdl_alen = ifp->if_addrlen; sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifp->if_type; bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); ndpc.rtm.rtm_rmx.rmx_expire = lle->la_flags & LLE_STATIC ? 0 : lle->la_expire; ndpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA); if (lle->la_flags & LLE_STATIC) ndpc.rtm.rtm_flags |= RTF_STATIC; ndpc.rtm.rtm_index = ifp->if_index; error = SYSCTL_OUT(wr, &ndpc, sizeof(ndpc)); if (error) break; } } return error; } void * in6_domifattach(struct ifnet *ifp) { struct in6_ifextra *ext; ext = (struct in6_ifextra *)malloc(sizeof(*ext), M_IFADDR, M_WAITOK); bzero(ext, sizeof(*ext)); ext->in6_ifstat = (struct in6_ifstat *)malloc(sizeof(struct in6_ifstat), M_IFADDR, M_WAITOK); bzero(ext->in6_ifstat, sizeof(*ext->in6_ifstat)); ext->icmp6_ifstat = (struct icmp6_ifstat *)malloc(sizeof(struct icmp6_ifstat), M_IFADDR, M_WAITOK); bzero(ext->icmp6_ifstat, sizeof(*ext->icmp6_ifstat)); ext->nd_ifinfo = nd6_ifattach(ifp); ext->scope6_id = scope6_ifattach(ifp); ext->lltable = lltable_init(ifp, AF_INET6); if (ext->lltable != NULL) { ext->lltable->llt_prefix_free = in6_lltable_prefix_free; ext->lltable->llt_lookup = in6_lltable_lookup; ext->lltable->llt_dump = in6_lltable_dump; } ext->mld_ifinfo = mld_domifattach(ifp); return ext; } void in6_domifdetach(struct ifnet *ifp, void *aux) { struct in6_ifextra *ext = (struct in6_ifextra *)aux; mld_domifdetach(ifp); scope6_ifdetach(ext->scope6_id); nd6_ifdetach(ext->nd_ifinfo); lltable_free(ext->lltable); free(ext->in6_ifstat, M_IFADDR); free(ext->icmp6_ifstat, M_IFADDR); free(ext, M_IFADDR); } /* * Convert sockaddr_in6 to sockaddr_in. Original sockaddr_in6 must be * v4 mapped addr or v4 compat addr */ void in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6) { bzero(sin, sizeof(*sin)); sin->sin_len = sizeof(struct sockaddr_in); sin->sin_family = AF_INET; sin->sin_port = sin6->sin6_port; sin->sin_addr.s_addr = sin6->sin6_addr.s6_addr32[3]; } /* Convert sockaddr_in to sockaddr_in6 in v4 mapped addr format. */ void in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6) { bzero(sin6, sizeof(*sin6)); sin6->sin6_len = sizeof(struct sockaddr_in6); sin6->sin6_family = AF_INET6; sin6->sin6_port = sin->sin_port; sin6->sin6_addr.s6_addr32[0] = 0; sin6->sin6_addr.s6_addr32[1] = 0; sin6->sin6_addr.s6_addr32[2] = IPV6_ADDR_INT32_SMP; sin6->sin6_addr.s6_addr32[3] = sin->sin_addr.s_addr; } /* Convert sockaddr_in6 into sockaddr_in. */ void in6_sin6_2_sin_in_sock(struct sockaddr *nam) { struct sockaddr_in *sin_p; struct sockaddr_in6 sin6; /* * Save original sockaddr_in6 addr and convert it * to sockaddr_in. */ sin6 = *(struct sockaddr_in6 *)nam; sin_p = (struct sockaddr_in *)nam; in6_sin6_2_sin(sin_p, &sin6); } /* Convert sockaddr_in into sockaddr_in6 in v4 mapped addr format. */ void in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam) { struct sockaddr_in *sin_p; struct sockaddr_in6 *sin6_p; sin6_p = malloc(sizeof *sin6_p, M_SONAME, M_WAITOK); sin_p = (struct sockaddr_in *)*nam; in6_sin_2_v4mapsin6(sin_p, sin6_p); free(*nam, M_SONAME); *nam = (struct sockaddr *)sin6_p; } Index: stable/9/sys/netinet6/nd6.c =================================================================== --- stable/9/sys/netinet6/nd6.c (revision 260509) +++ stable/9/sys/netinet6/nd6.c (revision 260510) @@ -1,2384 +1,2384 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le)) #include #include #include #include #include #include #include #include #include #include #include #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */ #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */ #define SIN6(s) ((struct sockaddr_in6 *)s) /* timer values */ VNET_DEFINE(int, nd6_prune) = 1; /* walk list every 1 seconds */ VNET_DEFINE(int, nd6_delay) = 5; /* delay first probe time 5 second */ VNET_DEFINE(int, nd6_umaxtries) = 3; /* maximum unicast query */ VNET_DEFINE(int, nd6_mmaxtries) = 3; /* maximum multicast query */ VNET_DEFINE(int, nd6_useloopback) = 1; /* use loopback interface for * local traffic */ VNET_DEFINE(int, nd6_gctimer) = (60 * 60 * 24); /* 1 day: garbage * collection timer */ /* preventing too many loops in ND option parsing */ static VNET_DEFINE(int, nd6_maxndopt) = 10; /* max # of ND options allowed */ VNET_DEFINE(int, nd6_maxnudhint) = 0; /* max # of subsequent upper * layer hints */ static VNET_DEFINE(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved * ND entries */ #define V_nd6_maxndopt VNET(nd6_maxndopt) #define V_nd6_maxqueuelen VNET(nd6_maxqueuelen) #ifdef ND6_DEBUG VNET_DEFINE(int, nd6_debug) = 1; #else VNET_DEFINE(int, nd6_debug) = 0; #endif /* for debugging? */ #if 0 static int nd6_inuse, nd6_allocated; #endif VNET_DEFINE(struct nd_drhead, nd_defrouter); VNET_DEFINE(struct nd_prhead, nd_prefix); VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL; #define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval) static struct sockaddr_in6 all1_sa; int (*send_sendso_input_hook)(struct mbuf *, struct ifnet *, int, int); static int nd6_is_new_addr_neighbor(struct sockaddr_in6 *, struct ifnet *); static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *); static void nd6_slowtimo(void *); static int regen_tmpaddr(struct in6_ifaddr *); static struct llentry *nd6_free(struct llentry *, int); static void nd6_llinfo_timer(void *); static void clear_llinfo_pqueue(struct llentry *); static VNET_DEFINE(struct callout, nd6_slowtimo_ch); #define V_nd6_slowtimo_ch VNET(nd6_slowtimo_ch) VNET_DEFINE(struct callout, nd6_timer_ch); void nd6_init(void) { int i; LIST_INIT(&V_nd_prefix); all1_sa.sin6_family = AF_INET6; all1_sa.sin6_len = sizeof(struct sockaddr_in6); for (i = 0; i < sizeof(all1_sa.sin6_addr); i++) all1_sa.sin6_addr.s6_addr[i] = 0xff; /* initialization of the default router list */ TAILQ_INIT(&V_nd_defrouter); /* start timer */ callout_init(&V_nd6_slowtimo_ch, 0); callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, nd6_slowtimo, curvnet); } #ifdef VIMAGE void nd6_destroy() { callout_drain(&V_nd6_slowtimo_ch); callout_drain(&V_nd6_timer_ch); } #endif struct nd_ifinfo * nd6_ifattach(struct ifnet *ifp) { struct nd_ifinfo *nd; nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK|M_ZERO); nd->initialized = 1; nd->chlim = IPV6_DEFHLIM; nd->basereachable = REACHABLE_TIME; nd->reachable = ND_COMPUTE_RTIME(nd->basereachable); nd->retrans = RETRANS_TIMER; nd->flags = ND6_IFF_PERFORMNUD; /* A loopback interface always has ND6_IFF_AUTO_LINKLOCAL. * XXXHRS: Clear ND6_IFF_AUTO_LINKLOCAL on an IFT_BRIDGE interface by * default regardless of the V_ip6_auto_linklocal configuration to * give a reasonable default behavior. */ if ((V_ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE) || (ifp->if_flags & IFF_LOOPBACK)) nd->flags |= ND6_IFF_AUTO_LINKLOCAL; /* * A loopback interface does not need to accept RTADV. * XXXHRS: Clear ND6_IFF_ACCEPT_RTADV on an IFT_BRIDGE interface by * default regardless of the V_ip6_accept_rtadv configuration to * prevent the interface from accepting RA messages arrived * on one of the member interfaces with ND6_IFF_ACCEPT_RTADV. */ if (V_ip6_accept_rtadv && !(ifp->if_flags & IFF_LOOPBACK) && (ifp->if_type != IFT_BRIDGE)) nd->flags |= ND6_IFF_ACCEPT_RTADV; if (V_ip6_no_radr && !(ifp->if_flags & IFF_LOOPBACK)) nd->flags |= ND6_IFF_NO_RADR; /* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */ nd6_setmtu0(ifp, nd); return nd; } void nd6_ifdetach(struct nd_ifinfo *nd) { free(nd, M_IP6NDP); } /* * Reset ND level link MTU. This function is called when the physical MTU * changes, which means we might have to adjust the ND level MTU. */ void nd6_setmtu(struct ifnet *ifp) { nd6_setmtu0(ifp, ND_IFINFO(ifp)); } /* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */ void nd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi) { u_int32_t omaxmtu; omaxmtu = ndi->maxmtu; switch (ifp->if_type) { case IFT_ARCNET: ndi->maxmtu = MIN(ARC_PHDS_MAXMTU, ifp->if_mtu); /* RFC2497 */ break; case IFT_FDDI: ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu); /* RFC2467 */ break; case IFT_ISO88025: ndi->maxmtu = MIN(ISO88025_MAX_MTU, ifp->if_mtu); break; default: ndi->maxmtu = ifp->if_mtu; break; } /* * Decreasing the interface MTU under IPV6 minimum MTU may cause * undesirable situation. We thus notify the operator of the change * explicitly. The check for omaxmtu is necessary to restrict the * log to the case of changing the MTU, not initializing it. */ if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) { log(LOG_NOTICE, "nd6_setmtu0: " "new link MTU on %s (%lu) is too small for IPv6\n", if_name(ifp), (unsigned long)ndi->maxmtu); } if (ndi->maxmtu > V_in6_maxmtu) in6_setmaxmtu(); /* check all interfaces just in case */ } void nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts) { bzero(ndopts, sizeof(*ndopts)); ndopts->nd_opts_search = (struct nd_opt_hdr *)opt; ndopts->nd_opts_last = (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len); if (icmp6len == 0) { ndopts->nd_opts_done = 1; ndopts->nd_opts_search = NULL; } } /* * Take one ND option. */ struct nd_opt_hdr * nd6_option(union nd_opts *ndopts) { struct nd_opt_hdr *nd_opt; int olen; KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__)); KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts", __func__)); if (ndopts->nd_opts_search == NULL) return NULL; if (ndopts->nd_opts_done) return NULL; nd_opt = ndopts->nd_opts_search; /* make sure nd_opt_len is inside the buffer */ if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) { bzero(ndopts, sizeof(*ndopts)); return NULL; } olen = nd_opt->nd_opt_len << 3; if (olen == 0) { /* * Message validation requires that all included * options have a length that is greater than zero. */ bzero(ndopts, sizeof(*ndopts)); return NULL; } ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen); if (ndopts->nd_opts_search > ndopts->nd_opts_last) { /* option overruns the end of buffer, invalid */ bzero(ndopts, sizeof(*ndopts)); return NULL; } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) { /* reached the end of options chain */ ndopts->nd_opts_done = 1; ndopts->nd_opts_search = NULL; } return nd_opt; } /* * Parse multiple ND options. * This function is much easier to use, for ND routines that do not need * multiple options of the same type. */ int nd6_options(union nd_opts *ndopts) { struct nd_opt_hdr *nd_opt; int i = 0; KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__)); KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts", __func__)); if (ndopts->nd_opts_search == NULL) return 0; while (1) { nd_opt = nd6_option(ndopts); if (nd_opt == NULL && ndopts->nd_opts_last == NULL) { /* * Message validation requires that all included * options have a length that is greater than zero. */ ICMP6STAT_INC(icp6s_nd_badopt); bzero(ndopts, sizeof(*ndopts)); return -1; } if (nd_opt == NULL) goto skip1; switch (nd_opt->nd_opt_type) { case ND_OPT_SOURCE_LINKADDR: case ND_OPT_TARGET_LINKADDR: case ND_OPT_MTU: case ND_OPT_REDIRECTED_HEADER: if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { nd6log((LOG_INFO, "duplicated ND6 option found (type=%d)\n", nd_opt->nd_opt_type)); /* XXX bark? */ } else { ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; } break; case ND_OPT_PREFIX_INFORMATION: if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) { ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; } ndopts->nd_opts_pi_end = (struct nd_opt_prefix_info *)nd_opt; break; /* What about ND_OPT_ROUTE_INFO? RFC 4191 */ case ND_OPT_RDNSS: /* RFC 6106 */ case ND_OPT_DNSSL: /* RFC 6106 */ /* * Silently ignore options we know and do not care about * in the kernel. */ break; default: /* * Unknown options must be silently ignored, * to accomodate future extension to the protocol. */ nd6log((LOG_DEBUG, "nd6_options: unsupported option %d - " "option ignored\n", nd_opt->nd_opt_type)); } skip1: i++; if (i > V_nd6_maxndopt) { ICMP6STAT_INC(icp6s_nd_toomanyopt); nd6log((LOG_INFO, "too many loop in nd opt\n")); break; } if (ndopts->nd_opts_done) break; } return 0; } /* * ND6 timer routine to handle ND6 entries */ void nd6_llinfo_settimer_locked(struct llentry *ln, long tick) { int canceled; LLE_WLOCK_ASSERT(ln); if (tick < 0) { ln->la_expire = 0; ln->ln_ntick = 0; canceled = callout_stop(&ln->ln_timer_ch); } else { ln->la_expire = time_second + tick / hz; LLE_ADDREF(ln); if (tick > INT_MAX) { ln->ln_ntick = tick - INT_MAX; canceled = callout_reset(&ln->ln_timer_ch, INT_MAX, nd6_llinfo_timer, ln); } else { ln->ln_ntick = 0; canceled = callout_reset(&ln->ln_timer_ch, tick, nd6_llinfo_timer, ln); } } if (canceled) LLE_REMREF(ln); } void nd6_llinfo_settimer(struct llentry *ln, long tick) { LLE_WLOCK(ln); nd6_llinfo_settimer_locked(ln, tick); LLE_WUNLOCK(ln); } static void nd6_llinfo_timer(void *arg) { struct llentry *ln; struct in6_addr *dst; struct ifnet *ifp; struct nd_ifinfo *ndi = NULL; KASSERT(arg != NULL, ("%s: arg NULL", __func__)); ln = (struct llentry *)arg; LLE_WLOCK_ASSERT(ln); ifp = ln->lle_tbl->llt_ifp; CURVNET_SET(ifp->if_vnet); if (ln->ln_ntick > 0) { if (ln->ln_ntick > INT_MAX) { ln->ln_ntick -= INT_MAX; nd6_llinfo_settimer_locked(ln, INT_MAX); } else { ln->ln_ntick = 0; nd6_llinfo_settimer_locked(ln, ln->ln_ntick); } goto done; } ndi = ND_IFINFO(ifp); dst = &L3_ADDR_SIN6(ln)->sin6_addr; if (ln->la_flags & LLE_STATIC) { goto done; } if (ln->la_flags & LLE_DELETED) { (void)nd6_free(ln, 0); ln = NULL; goto done; } switch (ln->ln_state) { case ND6_LLINFO_INCOMPLETE: if (ln->la_asked < V_nd6_mmaxtries) { ln->la_asked++; nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); LLE_WUNLOCK(ln); nd6_ns_output(ifp, NULL, dst, ln, 0); LLE_WLOCK(ln); } else { struct mbuf *m = ln->la_hold; if (m) { struct mbuf *m0; /* * assuming every packet in la_hold has the * same IP header. Send error after unlock. */ m0 = m->m_nextpkt; m->m_nextpkt = NULL; ln->la_hold = m0; clear_llinfo_pqueue(ln); } EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_TIMEDOUT); (void)nd6_free(ln, 0); ln = NULL; if (m != NULL) icmp6_error2(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, 0, ifp); } break; case ND6_LLINFO_REACHABLE: if (!ND6_LLINFO_PERMANENT(ln)) { ln->ln_state = ND6_LLINFO_STALE; nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); } break; case ND6_LLINFO_STALE: /* Garbage Collection(RFC 2461 5.3) */ if (!ND6_LLINFO_PERMANENT(ln)) { EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); (void)nd6_free(ln, 1); ln = NULL; } break; case ND6_LLINFO_DELAY: if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) { /* We need NUD */ ln->la_asked = 1; ln->ln_state = ND6_LLINFO_PROBE; nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); LLE_WUNLOCK(ln); nd6_ns_output(ifp, dst, dst, ln, 0); LLE_WLOCK(ln); } else { ln->ln_state = ND6_LLINFO_STALE; /* XXX */ nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); } break; case ND6_LLINFO_PROBE: if (ln->la_asked < V_nd6_umaxtries) { ln->la_asked++; nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); LLE_WUNLOCK(ln); nd6_ns_output(ifp, dst, dst, ln, 0); LLE_WLOCK(ln); } else { EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); (void)nd6_free(ln, 0); ln = NULL; } break; default: panic("%s: paths in a dark night can be confusing: %d", __func__, ln->ln_state); } done: if (ln != NULL) LLE_FREE_LOCKED(ln); CURVNET_RESTORE(); } /* * ND6 timer routine to expire default route list and prefix list */ void nd6_timer(void *arg) { CURVNET_SET((struct vnet *) arg); int s; struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; struct in6_ifaddr *ia6, *nia6; callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz, nd6_timer, curvnet); /* expire default router list */ s = splnet(); TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { if (dr->expire && dr->expire < time_second) defrtrlist_del(dr); } /* * expire interface addresses. * in the past the loop was inside prefix expiry processing. * However, from a stricter speci-confrmance standpoint, we should * rather separate address lifetimes and prefix lifetimes. * * XXXRW: in6_ifaddrhead locking. */ addrloop: TAILQ_FOREACH_SAFE(ia6, &V_in6_ifaddrhead, ia_link, nia6) { /* check address lifetime */ if (IFA6_IS_INVALID(ia6)) { int regen = 0; /* * If the expiring address is temporary, try * regenerating a new one. This would be useful when * we suspended a laptop PC, then turned it on after a * period that could invalidate all temporary * addresses. Although we may have to restart the * loop (see below), it must be after purging the * address. Otherwise, we'd see an infinite loop of * regeneration. */ if (V_ip6_use_tempaddr && (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) { if (regen_tmpaddr(ia6) == 0) regen = 1; } in6_purgeaddr(&ia6->ia_ifa); if (regen) goto addrloop; /* XXX: see below */ } else if (IFA6_IS_DEPRECATED(ia6)) { int oldflags = ia6->ia6_flags; ia6->ia6_flags |= IN6_IFF_DEPRECATED; /* * If a temporary address has just become deprecated, * regenerate a new one if possible. */ if (V_ip6_use_tempaddr && (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (oldflags & IN6_IFF_DEPRECATED) == 0) { if (regen_tmpaddr(ia6) == 0) { /* * A new temporary address is * generated. * XXX: this means the address chain * has changed while we are still in * the loop. Although the change * would not cause disaster (because * it's not a deletion, but an * addition,) we'd rather restart the * loop just for safety. Or does this * significantly reduce performance?? */ goto addrloop; } } } else { /* * A new RA might have made a deprecated address * preferred. */ ia6->ia6_flags &= ~IN6_IFF_DEPRECATED; } } /* expire prefix list */ LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) { /* * check prefix lifetime. * since pltime is just for autoconf, pltime processing for * prefix is not necessary. */ if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME && time_second - pr->ndpr_lastupdate > pr->ndpr_vltime) { /* * address expiration and prefix expiration are * separate. NEVER perform in6_purgeaddr here. */ prelist_remove(pr); } } splx(s); CURVNET_RESTORE(); } /* * ia6 - deprecated/invalidated temporary address */ static int regen_tmpaddr(struct in6_ifaddr *ia6) { struct ifaddr *ifa; struct ifnet *ifp; struct in6_ifaddr *public_ifa6 = NULL; ifp = ia6->ia_ifa.ifa_ifp; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct in6_ifaddr *it6; if (ifa->ifa_addr->sa_family != AF_INET6) continue; it6 = (struct in6_ifaddr *)ifa; /* ignore no autoconf addresses. */ if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; /* ignore autoconf addresses with different prefixes. */ if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr) continue; /* * Now we are looking at an autoconf address with the same * prefix as ours. If the address is temporary and is still * preferred, do not create another one. It would be rare, but * could happen, for example, when we resume a laptop PC after * a long period. */ if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && !IFA6_IS_DEPRECATED(it6)) { public_ifa6 = NULL; break; } /* * This is a public autoconf address that has the same prefix * as ours. If it is preferred, keep it. We can't break the * loop here, because there may be a still-preferred temporary * address with the prefix. */ if (!IFA6_IS_DEPRECATED(it6)) public_ifa6 = it6; if (public_ifa6 != NULL) ifa_ref(&public_ifa6->ia_ifa); } IF_ADDR_RUNLOCK(ifp); if (public_ifa6 != NULL) { int e; if ((e = in6_tmpifadd(public_ifa6, 0, 0)) != 0) { ifa_free(&public_ifa6->ia_ifa); log(LOG_NOTICE, "regen_tmpaddr: failed to create a new" " tmp addr,errno=%d\n", e); return (-1); } ifa_free(&public_ifa6->ia_ifa); return (0); } return (-1); } /* * Nuke neighbor cache/prefix/default router management table, right before * ifp goes away. */ void nd6_purge(struct ifnet *ifp) { struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; /* * Nuke default router list entries toward ifp. * We defer removal of default router list entries that is installed * in the routing table, in order to keep additional side effects as * small as possible. */ TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { if (dr->installed) continue; if (dr->ifp == ifp) defrtrlist_del(dr); } TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { if (!dr->installed) continue; if (dr->ifp == ifp) defrtrlist_del(dr); } /* Nuke prefix list entries toward ifp */ LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) { if (pr->ndpr_ifp == ifp) { /* * Because if_detach() does *not* release prefixes * while purging addresses the reference count will * still be above zero. We therefore reset it to * make sure that the prefix really gets purged. */ pr->ndpr_refcnt = 0; /* * Previously, pr->ndpr_addr is removed as well, * but I strongly believe we don't have to do it. * nd6_purge() is only called from in6_ifdetach(), * which removes all the associated interface addresses * by itself. * (jinmei@kame.net 20010129) */ prelist_remove(pr); } } /* cancel default outgoing interface setting */ if (V_nd6_defifindex == ifp->if_index) nd6_setdefaultiface(0); if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { /* Refresh default router list. */ defrouter_select(); } /* XXXXX * We do not nuke the neighbor cache entries here any more * because the neighbor cache is kept in if_afdata[AF_INET6]. * nd6_purge() is invoked by in6_ifdetach() which is called * from if_detach() where everything gets purged. So let * in6_domifdetach() do the actual L2 table purging work. */ } /* * the caller acquires and releases the lock on the lltbls * Returns the llentry locked */ struct llentry * nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp) { struct sockaddr_in6 sin6; struct llentry *ln; int llflags; bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = *addr6; IF_AFDATA_LOCK_ASSERT(ifp); llflags = 0; if (flags & ND6_CREATE) llflags |= LLE_CREATE; if (flags & ND6_EXCLUSIVE) llflags |= LLE_EXCLUSIVE; ln = lla_lookup(LLTABLE6(ifp), llflags, (struct sockaddr *)&sin6); if ((ln != NULL) && (llflags & LLE_CREATE)) ln->ln_state = ND6_LLINFO_NOSTATE; return (ln); } /* * Test whether a given IPv6 address is a neighbor or not, ignoring * the actual neighbor cache. The neighbor cache is ignored in order * to not reenter the routing code from within itself. */ static int nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) { struct nd_prefix *pr; struct ifaddr *dstaddr; /* * A link-local address is always a neighbor. * XXX: a link does not necessarily specify a single interface. */ if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) { struct sockaddr_in6 sin6_copy; u_int32_t zone; /* * We need sin6_copy since sa6_recoverscope() may modify the * content (XXX). */ sin6_copy = *addr; if (sa6_recoverscope(&sin6_copy)) return (0); /* XXX: should be impossible */ if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone)) return (0); if (sin6_copy.sin6_scope_id == zone) return (1); else return (0); } /* * If the address matches one of our addresses, * it should be a neighbor. * If the address matches one of our on-link prefixes, it should be a * neighbor. */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { if (pr->ndpr_ifp != ifp) continue; if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) { struct rtentry *rt; /* Always use the default FIB here. */ rt = in6_rtalloc1((struct sockaddr *)&pr->ndpr_prefix, 0, 0, RT_DEFAULT_FIB); if (rt == NULL) continue; /* * This is the case where multiple interfaces * have the same prefix, but only one is installed * into the routing table and that prefix entry * is not the one being examined here. In the case * where RADIX_MPATH is enabled, multiple route * entries (of the same rt_key value) will be * installed because the interface addresses all * differ. */ if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr)) { RTFREE_LOCKED(rt); continue; } RTFREE_LOCKED(rt); } if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, &addr->sin6_addr, &pr->ndpr_mask)) return (1); } /* * If the address is assigned on the node of the other side of * a p2p interface, the address should be a neighbor. */ dstaddr = ifa_ifwithdstaddr((struct sockaddr *)addr); if (dstaddr != NULL) { if (dstaddr->ifa_ifp == ifp) { ifa_free(dstaddr); return (1); } ifa_free(dstaddr); } /* * If the default router list is empty, all addresses are regarded * as on-link, and thus, as a neighbor. */ if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV && TAILQ_EMPTY(&V_nd_defrouter) && V_nd6_defifindex == ifp->if_index) { return (1); } return (0); } /* * Detect if a given IPv6 address identifies a neighbor on a given link. * XXX: should take care of the destination of a p2p link? */ int nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) { struct llentry *lle; int rc = 0; IF_AFDATA_UNLOCK_ASSERT(ifp); if (nd6_is_new_addr_neighbor(addr, ifp)) return (1); /* * Even if the address matches none of our addresses, it might be * in the neighbor cache. */ IF_AFDATA_RLOCK(ifp); if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) { LLE_RUNLOCK(lle); rc = 1; } IF_AFDATA_RUNLOCK(ifp); return (rc); } /* * Free an nd6 llinfo entry. * Since the function would cause significant changes in the kernel, DO NOT * make it global, unless you have a strong reason for the change, and are sure * that the change is safe. */ static struct llentry * nd6_free(struct llentry *ln, int gc) { struct llentry *next; struct nd_defrouter *dr; struct ifnet *ifp; LLE_WLOCK_ASSERT(ln); /* * we used to have pfctlinput(PRC_HOSTDEAD) here. * even though it is not harmful, it was not really necessary. */ /* cancel timer */ nd6_llinfo_settimer_locked(ln, -1); ifp = ln->lle_tbl->llt_ifp; if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ifp); if (dr != NULL && dr->expire && ln->ln_state == ND6_LLINFO_STALE && gc) { /* * If the reason for the deletion is just garbage * collection, and the neighbor is an active default * router, do not delete it. Instead, reset the GC * timer using the router's lifetime. * Simply deleting the entry would affect default * router selection, which is not necessarily a good * thing, especially when we're using router preference * values. * XXX: the check for ln_state would be redundant, * but we intentionally keep it just in case. */ if (dr->expire > time_second) nd6_llinfo_settimer_locked(ln, (dr->expire - time_second) * hz); else nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); next = LIST_NEXT(ln, lle_next); LLE_REMREF(ln); LLE_WUNLOCK(ln); return (next); } if (dr) { /* * Unreachablity of a router might affect the default * router selection and on-link detection of advertised * prefixes. */ /* * Temporarily fake the state to choose a new default * router and to perform on-link determination of * prefixes correctly. * Below the state will be set correctly, * or the entry itself will be deleted. */ ln->ln_state = ND6_LLINFO_INCOMPLETE; } if (ln->ln_router || dr) { /* * We need to unlock to avoid a LOR with rt6_flush() with the * rnh and for the calls to pfxlist_onlink_check() and * defrouter_select() in the block further down for calls * into nd6_lookup(). We still hold a ref. */ LLE_WUNLOCK(ln); /* * rt6_flush must be called whether or not the neighbor * is in the Default Router List. * See a corresponding comment in nd6_na_input(). */ rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ifp); } if (dr) { /* * Since defrouter_select() does not affect the * on-link determination and MIP6 needs the check * before the default router selection, we perform * the check now. */ pfxlist_onlink_check(); /* * Refresh default router list. */ defrouter_select(); } if (ln->ln_router || dr) LLE_WLOCK(ln); } /* * Before deleting the entry, remember the next entry as the * return value. We need this because pfxlist_onlink_check() above * might have freed other entries (particularly the old next entry) as * a side effect (XXX). */ next = LIST_NEXT(ln, lle_next); /* * Save to unlock. We still hold an extra reference and will not * free(9) in llentry_free() if someone else holds one as well. */ LLE_WUNLOCK(ln); IF_AFDATA_LOCK(ifp); LLE_WLOCK(ln); /* Guard against race with other llentry_free(). */ if (ln->la_flags & LLE_LINKED) { LLE_REMREF(ln); llentry_free(ln); } else LLE_FREE_LOCKED(ln); IF_AFDATA_UNLOCK(ifp); return (next); } /* * Upper-layer reachability hint for Neighbor Unreachability Detection. * * XXX cost-effective methods? */ void nd6_nud_hint(struct rtentry *rt, struct in6_addr *dst6, int force) { struct llentry *ln; struct ifnet *ifp; if ((dst6 == NULL) || (rt == NULL)) return; ifp = rt->rt_ifp; - IF_AFDATA_LOCK(ifp); + IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(dst6, ND6_EXCLUSIVE, NULL); - IF_AFDATA_UNLOCK(ifp); + IF_AFDATA_RUNLOCK(ifp); if (ln == NULL) return; if (ln->ln_state < ND6_LLINFO_REACHABLE) goto done; /* * if we get upper-layer reachability confirmation many times, * it is possible we have false information. */ if (!force) { ln->ln_byhint++; if (ln->ln_byhint > V_nd6_maxnudhint) { goto done; } } ln->ln_state = ND6_LLINFO_REACHABLE; if (!ND6_LLINFO_PERMANENT(ln)) { nd6_llinfo_settimer_locked(ln, (long)ND_IFINFO(rt->rt_ifp)->reachable * hz); } done: LLE_WUNLOCK(ln); } /* * Rejuvenate this function for routing operations related * processing. */ void nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info) { struct sockaddr_in6 *gateway; struct nd_defrouter *dr; struct ifnet *ifp; RT_LOCK_ASSERT(rt); gateway = (struct sockaddr_in6 *)rt->rt_gateway; ifp = rt->rt_ifp; switch (req) { case RTM_ADD: break; case RTM_DELETE: if (!ifp) return; /* * Only indirect routes are interesting. */ if ((rt->rt_flags & RTF_GATEWAY) == 0) return; /* * check for default route */ if (IN6_ARE_ADDR_EQUAL(&in6addr_any, &SIN6(rt_key(rt))->sin6_addr)) { dr = defrouter_lookup(&gateway->sin6_addr, ifp); if (dr != NULL) dr->installed = 0; } break; } } int nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) { struct in6_drlist *drl = (struct in6_drlist *)data; struct in6_oprlist *oprl = (struct in6_oprlist *)data; struct in6_ndireq *ndi = (struct in6_ndireq *)data; struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data; struct in6_ndifreq *ndif = (struct in6_ndifreq *)data; struct nd_defrouter *dr; struct nd_prefix *pr; int i = 0, error = 0; int s; switch (cmd) { case SIOCGDRLST_IN6: /* * obsolete API, use sysctl under net.inet6.icmp6 */ bzero(drl, sizeof(*drl)); s = splnet(); TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { if (i >= DRLSTSIZ) break; drl->defrouter[i].rtaddr = dr->rtaddr; in6_clearscope(&drl->defrouter[i].rtaddr); drl->defrouter[i].flags = dr->flags; drl->defrouter[i].rtlifetime = dr->rtlifetime; drl->defrouter[i].expire = dr->expire; drl->defrouter[i].if_index = dr->ifp->if_index; i++; } splx(s); break; case SIOCGPRLST_IN6: /* * obsolete API, use sysctl under net.inet6.icmp6 * * XXX the structure in6_prlist was changed in backward- * incompatible manner. in6_oprlist is used for SIOCGPRLST_IN6, * in6_prlist is used for nd6_sysctl() - fill_prlist(). */ /* * XXX meaning of fields, especialy "raflags", is very * differnet between RA prefix list and RR/static prefix list. * how about separating ioctls into two? */ bzero(oprl, sizeof(*oprl)); s = splnet(); LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { struct nd_pfxrouter *pfr; int j; if (i >= PRLSTSIZ) break; oprl->prefix[i].prefix = pr->ndpr_prefix.sin6_addr; oprl->prefix[i].raflags = pr->ndpr_raf; oprl->prefix[i].prefixlen = pr->ndpr_plen; oprl->prefix[i].vltime = pr->ndpr_vltime; oprl->prefix[i].pltime = pr->ndpr_pltime; oprl->prefix[i].if_index = pr->ndpr_ifp->if_index; if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME) oprl->prefix[i].expire = 0; else { time_t maxexpire; /* XXX: we assume time_t is signed. */ maxexpire = (-1) & ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1)); if (pr->ndpr_vltime < maxexpire - pr->ndpr_lastupdate) { oprl->prefix[i].expire = pr->ndpr_lastupdate + pr->ndpr_vltime; } else oprl->prefix[i].expire = maxexpire; } j = 0; LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) { if (j < DRLSTSIZ) { #define RTRADDR oprl->prefix[i].advrtr[j] RTRADDR = pfr->router->rtaddr; in6_clearscope(&RTRADDR); #undef RTRADDR } j++; } oprl->prefix[i].advrtrs = j; oprl->prefix[i].origin = PR_ORIG_RA; i++; } splx(s); break; case OSIOCGIFINFO_IN6: #define ND ndi->ndi /* XXX: old ndp(8) assumes a positive value for linkmtu. */ bzero(&ND, sizeof(ND)); ND.linkmtu = IN6_LINKMTU(ifp); ND.maxmtu = ND_IFINFO(ifp)->maxmtu; ND.basereachable = ND_IFINFO(ifp)->basereachable; ND.reachable = ND_IFINFO(ifp)->reachable; ND.retrans = ND_IFINFO(ifp)->retrans; ND.flags = ND_IFINFO(ifp)->flags; ND.recalctm = ND_IFINFO(ifp)->recalctm; ND.chlim = ND_IFINFO(ifp)->chlim; break; case SIOCGIFINFO_IN6: ND = *ND_IFINFO(ifp); break; case SIOCSIFINFO_IN6: /* * used to change host variables from userland. * intented for a use on router to reflect RA configurations. */ /* 0 means 'unspecified' */ if (ND.linkmtu != 0) { if (ND.linkmtu < IPV6_MMTU || ND.linkmtu > IN6_LINKMTU(ifp)) { error = EINVAL; break; } ND_IFINFO(ifp)->linkmtu = ND.linkmtu; } if (ND.basereachable != 0) { int obasereachable = ND_IFINFO(ifp)->basereachable; ND_IFINFO(ifp)->basereachable = ND.basereachable; if (ND.basereachable != obasereachable) ND_IFINFO(ifp)->reachable = ND_COMPUTE_RTIME(ND.basereachable); } if (ND.retrans != 0) ND_IFINFO(ifp)->retrans = ND.retrans; if (ND.chlim != 0) ND_IFINFO(ifp)->chlim = ND.chlim; /* FALLTHROUGH */ case SIOCSIFINFO_FLAGS: { struct ifaddr *ifa; struct in6_ifaddr *ia; if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) && !(ND.flags & ND6_IFF_IFDISABLED)) { /* ifdisabled 1->0 transision */ /* * If the interface is marked as ND6_IFF_IFDISABLED and * has an link-local address with IN6_IFF_DUPLICATED, * do not clear ND6_IFF_IFDISABLED. * See RFC 4862, Section 5.4.5. */ int duplicated_linklocal = 0; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; if ((ia->ia6_flags & IN6_IFF_DUPLICATED) && IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) { duplicated_linklocal = 1; break; } } IF_ADDR_RUNLOCK(ifp); if (duplicated_linklocal) { ND.flags |= ND6_IFF_IFDISABLED; log(LOG_ERR, "Cannot enable an interface" " with a link-local address marked" " duplicate.\n"); } else { ND_IFINFO(ifp)->flags &= ~ND6_IFF_IFDISABLED; if (ifp->if_flags & IFF_UP) in6_if_up(ifp); } } else if (!(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) && (ND.flags & ND6_IFF_IFDISABLED)) { /* ifdisabled 0->1 transision */ /* Mark all IPv6 address as tentative. */ ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; ia->ia6_flags |= IN6_IFF_TENTATIVE; } IF_ADDR_RUNLOCK(ifp); } if (ND.flags & ND6_IFF_AUTO_LINKLOCAL) { if (!(ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL)) { /* auto_linklocal 0->1 transision */ /* If no link-local address on ifp, configure */ ND_IFINFO(ifp)->flags |= ND6_IFF_AUTO_LINKLOCAL; in6_ifattach(ifp, NULL); } else if (!(ND.flags & ND6_IFF_IFDISABLED) && ifp->if_flags & IFF_UP) { /* * When the IF already has * ND6_IFF_AUTO_LINKLOCAL, no link-local * address is assigned, and IFF_UP, try to * assign one. */ int haslinklocal = 0; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) { haslinklocal = 1; break; } } IF_ADDR_RUNLOCK(ifp); if (!haslinklocal) in6_ifattach(ifp, NULL); } } } ND_IFINFO(ifp)->flags = ND.flags; break; #undef ND case SIOCSNDFLUSH_IN6: /* XXX: the ioctl name is confusing... */ /* sync kernel routing table with the default router list */ defrouter_reset(); defrouter_select(); break; case SIOCSPFXFLUSH_IN6: { /* flush all the prefix advertised by routers */ struct nd_prefix *pr, *next; s = splnet(); LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) { struct in6_ifaddr *ia, *ia_next; if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) continue; /* XXX */ /* do we really have to remove addresses as well? */ /* XXXRW: in6_ifaddrhead locking. */ TAILQ_FOREACH_SAFE(ia, &V_in6_ifaddrhead, ia_link, ia_next) { if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0) continue; if (ia->ia6_ndpr == pr) in6_purgeaddr(&ia->ia_ifa); } prelist_remove(pr); } splx(s); break; } case SIOCSRTRFLUSH_IN6: { /* flush all the default routers */ struct nd_defrouter *dr, *next; s = splnet(); defrouter_reset(); TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, next) { defrtrlist_del(dr); } defrouter_select(); splx(s); break; } case SIOCGNBRINFO_IN6: { struct llentry *ln; struct in6_addr nb_addr = nbi->addr; /* make local for safety */ if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0) return (error); IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(&nb_addr, 0, ifp); IF_AFDATA_RUNLOCK(ifp); if (ln == NULL) { error = EINVAL; break; } nbi->state = ln->ln_state; nbi->asked = ln->la_asked; nbi->isrouter = ln->ln_router; nbi->expire = ln->la_expire; LLE_RUNLOCK(ln); break; } case SIOCGDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */ ndif->ifindex = V_nd6_defifindex; break; case SIOCSDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */ return (nd6_setdefaultiface(ndif->ifindex)); } return (error); } /* * Create neighbor cache entry and cache link-layer address, * on reception of inbound ND6 packets. (RS/RA/NS/redirect) * * type - ICMP6 type * code - type dependent information * * XXXXX * The caller of this function already acquired the ndp * cache table lock because the cache entry is returned. */ struct llentry * nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, int lladdrlen, int type, int code) { struct llentry *ln = NULL; int is_newentry; int do_update; int olladdr; int llchange; int flags; int newstate = 0; uint16_t router = 0; struct sockaddr_in6 sin6; struct mbuf *chain = NULL; int static_route = 0; IF_AFDATA_UNLOCK_ASSERT(ifp); KASSERT(ifp != NULL, ("%s: ifp == NULL", __func__)); KASSERT(from != NULL, ("%s: from == NULL", __func__)); /* nothing must be updated for unspecified address */ if (IN6_IS_ADDR_UNSPECIFIED(from)) return NULL; /* * Validation about ifp->if_addrlen and lladdrlen must be done in * the caller. * * XXX If the link does not have link-layer adderss, what should * we do? (ifp->if_addrlen == 0) * Spec says nothing in sections for RA, RS and NA. There's small * description on it in NS section (RFC 2461 7.2.3). */ flags = lladdr ? ND6_EXCLUSIVE : 0; - IF_AFDATA_LOCK(ifp); + IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(from, flags, ifp); - + IF_AFDATA_RUNLOCK(ifp); if (ln == NULL) { flags |= ND6_EXCLUSIVE; + IF_AFDATA_LOCK(ifp); ln = nd6_lookup(from, flags | ND6_CREATE, ifp); IF_AFDATA_UNLOCK(ifp); is_newentry = 1; } else { - IF_AFDATA_UNLOCK(ifp); /* do nothing if static ndp is set */ if (ln->la_flags & LLE_STATIC) { static_route = 1; goto done; } is_newentry = 0; } if (ln == NULL) return (NULL); olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0; if (olladdr && lladdr) { llchange = bcmp(lladdr, &ln->ll_addr, ifp->if_addrlen); } else llchange = 0; /* * newentry olladdr lladdr llchange (*=record) * 0 n n -- (1) * 0 y n -- (2) * 0 n y -- (3) * STALE * 0 y y n (4) * * 0 y y y (5) * STALE * 1 -- n -- (6) NOSTATE(= PASSIVE) * 1 -- y -- (7) * STALE */ if (lladdr) { /* (3-5) and (7) */ /* * Record source link-layer address * XXX is it dependent to ifp->if_type? */ bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen); ln->la_flags |= LLE_VALID; EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); } if (!is_newentry) { if ((!olladdr && lladdr != NULL) || /* (3) */ (olladdr && lladdr != NULL && llchange)) { /* (5) */ do_update = 1; newstate = ND6_LLINFO_STALE; } else /* (1-2,4) */ do_update = 0; } else { do_update = 1; if (lladdr == NULL) /* (6) */ newstate = ND6_LLINFO_NOSTATE; else /* (7) */ newstate = ND6_LLINFO_STALE; } if (do_update) { /* * Update the state of the neighbor cache. */ ln->ln_state = newstate; if (ln->ln_state == ND6_LLINFO_STALE) { /* * XXX: since nd6_output() below will cause * state tansition to DELAY and reset the timer, * we must set the timer now, although it is actually * meaningless. */ nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); if (ln->la_hold) { struct mbuf *m_hold, *m_hold_next; /* * reset the la_hold in advance, to explicitly * prevent a la_hold lookup in nd6_output() * (wouldn't happen, though...) */ for (m_hold = ln->la_hold, ln->la_hold = NULL; m_hold; m_hold = m_hold_next) { m_hold_next = m_hold->m_nextpkt; m_hold->m_nextpkt = NULL; /* * we assume ifp is not a p2p here, so * just set the 2nd argument as the * 1st one. */ nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain); } /* * If we have mbufs in the chain we need to do * deferred transmit. Copy the address from the * llentry before dropping the lock down below. */ if (chain != NULL) memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6)); } } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { /* probe right away */ nd6_llinfo_settimer_locked((void *)ln, 0); } } /* * ICMP6 type dependent behavior. * * NS: clear IsRouter if new entry * RS: clear IsRouter * RA: set IsRouter if there's lladdr * redir: clear IsRouter if new entry * * RA case, (1): * The spec says that we must set IsRouter in the following cases: * - If lladdr exist, set IsRouter. This means (1-5). * - If it is old entry (!newentry), set IsRouter. This means (7). * So, based on the spec, in (1-5) and (7) cases we must set IsRouter. * A quetion arises for (1) case. (1) case has no lladdr in the * neighbor cache, this is similar to (6). * This case is rare but we figured that we MUST NOT set IsRouter. * * newentry olladdr lladdr llchange NS RS RA redir * D R * 0 n n -- (1) c ? s * 0 y n -- (2) c s s * 0 n y -- (3) c s s * 0 y y n (4) c s s * 0 y y y (5) c s s * 1 -- n -- (6) c c c s * 1 -- y -- (7) c c s c s * * (c=clear s=set) */ switch (type & 0xff) { case ND_NEIGHBOR_SOLICIT: /* * New entry must have is_router flag cleared. */ if (is_newentry) /* (6-7) */ ln->ln_router = 0; break; case ND_REDIRECT: /* * If the icmp is a redirect to a better router, always set the * is_router flag. Otherwise, if the entry is newly created, * clear the flag. [RFC 2461, sec 8.3] */ if (code == ND_REDIRECT_ROUTER) ln->ln_router = 1; else if (is_newentry) /* (6-7) */ ln->ln_router = 0; break; case ND_ROUTER_SOLICIT: /* * is_router flag must always be cleared. */ ln->ln_router = 0; break; case ND_ROUTER_ADVERT: /* * Mark an entry with lladdr as a router. */ if ((!is_newentry && (olladdr || lladdr)) || /* (2-5) */ (is_newentry && lladdr)) { /* (7) */ ln->ln_router = 1; } break; } if (ln != NULL) { static_route = (ln->la_flags & LLE_STATIC); router = ln->ln_router; if (flags & ND6_EXCLUSIVE) LLE_WUNLOCK(ln); else LLE_RUNLOCK(ln); if (static_route) ln = NULL; } if (chain) nd6_output_flush(ifp, ifp, chain, &sin6, NULL); /* * When the link-layer address of a router changes, select the * best router again. In particular, when the neighbor entry is newly * created, it might affect the selection policy. * Question: can we restrict the first condition to the "is_newentry" * case? * XXX: when we hear an RA from a new router with the link-layer * address option, defrouter_select() is called twice, since * defrtrlist_update called the function as well. However, I believe * we can compromise the overhead, since it only happens the first * time. * XXX: although defrouter_select() should not have a bad effect * for those are not autoconfigured hosts, we explicitly avoid such * cases for safety. */ if (do_update && router && ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { /* * guaranteed recursion */ defrouter_select(); } return (ln); done: if (ln != NULL) { if (flags & ND6_EXCLUSIVE) LLE_WUNLOCK(ln); else LLE_RUNLOCK(ln); if (static_route) ln = NULL; } return (ln); } static void nd6_slowtimo(void *arg) { CURVNET_SET((struct vnet *) arg); struct nd_ifinfo *nd6if; struct ifnet *ifp; callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, nd6_slowtimo, curvnet); IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_list) { nd6if = ND_IFINFO(ifp); if (nd6if->basereachable && /* already initialized */ (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) { /* * Since reachable time rarely changes by router * advertisements, we SHOULD insure that a new random * value gets recomputed at least once every few hours. * (RFC 2461, 6.3.4) */ nd6if->recalctm = V_nd6_recalc_reachtm_interval; nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable); } } IFNET_RUNLOCK_NOSLEEP(); CURVNET_RESTORE(); } int nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, struct sockaddr_in6 *dst, struct rtentry *rt0) { return (nd6_output_lle(ifp, origifp, m0, dst, rt0, NULL, NULL)); } /* * Note that I'm not enforcing any global serialization * lle state or asked changes here as the logic is too * complicated to avoid having to always acquire an exclusive * lock * KMM * */ #define senderr(e) { error = (e); goto bad;} int nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0, struct sockaddr_in6 *dst, struct rtentry *rt0, struct llentry *lle, struct mbuf **chain) { struct mbuf *m = m0; struct m_tag *mtag; struct llentry *ln = lle; struct ip6_hdr *ip6; int error = 0; int flags = 0; int ip6len; #ifdef INVARIANTS if (lle != NULL) { LLE_WLOCK_ASSERT(lle); KASSERT(chain != NULL, (" lle locked but no mbuf chain pointer passed")); } #endif if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr)) goto sendpkt; if (nd6_need_cache(ifp) == 0) goto sendpkt; /* * next hop determination. This routine is derived from ether_output. */ /* * Address resolution or Neighbor Unreachability Detection * for the next hop. * At this point, the destination of the packet must be a unicast * or an anycast address(i.e. not a multicast). */ flags = ((m != NULL) || (lle != NULL)) ? LLE_EXCLUSIVE : 0; if (ln == NULL) { retry: - IF_AFDATA_LOCK(ifp); + IF_AFDATA_RLOCK(ifp); ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)dst); - IF_AFDATA_UNLOCK(ifp); + IF_AFDATA_RUNLOCK(ifp); if ((ln == NULL) && nd6_is_addr_neighbor(dst, ifp)) { /* * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), * the condition below is not very efficient. But we believe * it is tolerable, because this should be a rare case. */ flags = ND6_CREATE | (m ? ND6_EXCLUSIVE : 0); IF_AFDATA_LOCK(ifp); ln = nd6_lookup(&dst->sin6_addr, flags, ifp); IF_AFDATA_UNLOCK(ifp); } } if (ln == NULL) { if ((ifp->if_flags & IFF_POINTOPOINT) == 0 && !(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) { char ip6buf[INET6_ADDRSTRLEN]; log(LOG_DEBUG, "nd6_output: can't allocate llinfo for %s " "(ln=%p)\n", ip6_sprintf(ip6buf, &dst->sin6_addr), ln); senderr(EIO); /* XXX: good error? */ } goto sendpkt; /* send anyway */ } /* We don't have to do link-layer address resolution on a p2p link. */ if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && ln->ln_state < ND6_LLINFO_REACHABLE) { if ((flags & LLE_EXCLUSIVE) == 0) { flags |= LLE_EXCLUSIVE; goto retry; } ln->ln_state = ND6_LLINFO_STALE; nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); } /* * The first time we send a packet to a neighbor whose entry is * STALE, we have to change the state to DELAY and a sets a timer to * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do * neighbor unreachability detection on expiration. * (RFC 2461 7.3.3) */ if (ln->ln_state == ND6_LLINFO_STALE) { if ((flags & LLE_EXCLUSIVE) == 0) { flags |= LLE_EXCLUSIVE; LLE_RUNLOCK(ln); goto retry; } ln->la_asked = 0; ln->ln_state = ND6_LLINFO_DELAY; nd6_llinfo_settimer_locked(ln, (long)V_nd6_delay * hz); } /* * If the neighbor cache entry has a state other than INCOMPLETE * (i.e. its link-layer address is already resolved), just * send the packet. */ if (ln->ln_state > ND6_LLINFO_INCOMPLETE) goto sendpkt; /* * There is a neighbor cache entry, but no ethernet address * response yet. Append this latest packet to the end of the * packet queue in the mbuf, unless the number of the packet * does not exceed nd6_maxqueuelen. When it exceeds nd6_maxqueuelen, * the oldest packet in the queue will be removed. */ if (ln->ln_state == ND6_LLINFO_NOSTATE) ln->ln_state = ND6_LLINFO_INCOMPLETE; if ((flags & LLE_EXCLUSIVE) == 0) { flags |= LLE_EXCLUSIVE; LLE_RUNLOCK(ln); goto retry; } LLE_WLOCK_ASSERT(ln); if (ln->la_hold) { struct mbuf *m_hold; int i; i = 0; for (m_hold = ln->la_hold; m_hold; m_hold = m_hold->m_nextpkt) { i++; if (m_hold->m_nextpkt == NULL) { m_hold->m_nextpkt = m; break; } } while (i >= V_nd6_maxqueuelen) { m_hold = ln->la_hold; ln->la_hold = ln->la_hold->m_nextpkt; m_freem(m_hold); i--; } } else { ln->la_hold = m; } /* * If there has been no NS for the neighbor after entering the * INCOMPLETE state, send the first solicitation. */ if (!ND6_LLINFO_PERMANENT(ln) && ln->la_asked == 0) { ln->la_asked++; nd6_llinfo_settimer_locked(ln, (long)ND_IFINFO(ifp)->retrans * hz / 1000); LLE_WUNLOCK(ln); nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0); if (lle != NULL && ln == lle) LLE_WLOCK(lle); } else if (lle == NULL || ln != lle) { /* * We did the lookup (no lle arg) so we * need to do the unlock here. */ LLE_WUNLOCK(ln); } return (0); sendpkt: /* discard the packet if IPv6 operation is disabled on the interface */ if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) { error = ENETDOWN; /* better error? */ goto bad; } /* * ln is valid and the caller did not pass in * an llentry */ if ((ln != NULL) && (lle == NULL)) { if (flags & LLE_EXCLUSIVE) LLE_WUNLOCK(ln); else LLE_RUNLOCK(ln); } #ifdef MAC mac_netinet6_nd6_send(ifp, m); #endif /* * If called from nd6_ns_output() (NS), nd6_na_output() (NA), * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA * as handled by rtsol and rtadvd), mbufs will be tagged for SeND * to be diverted to user space. When re-injected into the kernel, * send_output() will directly dispatch them to the outgoing interface. */ if (send_sendso_input_hook != NULL) { mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL); if (mtag != NULL) { ip6 = mtod(m, struct ip6_hdr *); ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); /* Use the SEND socket */ error = send_sendso_input_hook(m, ifp, SND_OUT, ip6len); /* -1 == no app on SEND socket */ if (error == 0 || error != -1) return (error); } } /* * We were passed in a pointer to an lle with the lock held * this means that we can't call if_output as we will * recurse on the lle lock - so what we do is we create * a list of mbufs to send and transmit them in the caller * after the lock is dropped */ if (lle != NULL) { if (*chain == NULL) *chain = m; else { struct mbuf *mb; /* * append mbuf to end of deferred chain */ mb = *chain; while (mb->m_nextpkt != NULL) mb = mb->m_nextpkt; mb->m_nextpkt = m; } return (error); } /* Reset layer specific mbuf flags to avoid confusing lower layers. */ m->m_flags &= ~(M_PROTOFLAGS); if ((ifp->if_flags & IFF_LOOPBACK) != 0) { return ((*ifp->if_output)(origifp, m, (struct sockaddr *)dst, NULL)); } error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, NULL); return (error); bad: /* * ln is valid and the caller did not pass in * an llentry */ if ((ln != NULL) && (lle == NULL)) { if (flags & LLE_EXCLUSIVE) LLE_WUNLOCK(ln); else LLE_RUNLOCK(ln); } if (m) m_freem(m); return (error); } #undef senderr int nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain, struct sockaddr_in6 *dst, struct route *ro) { struct mbuf *m, *m_head; struct ifnet *outifp; int error = 0; m_head = chain; if ((ifp->if_flags & IFF_LOOPBACK) != 0) outifp = origifp; else outifp = ifp; while (m_head) { m = m_head; m_head = m_head->m_nextpkt; error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro); } /* * XXX * note that intermediate errors are blindly ignored - but this is * the same convention as used with nd6_output when called by * nd6_cache_lladdr */ return (error); } int nd6_need_cache(struct ifnet *ifp) { /* * XXX: we currently do not make neighbor cache on any interface * other than ARCnet, Ethernet, FDDI and GIF. * * RFC2893 says: * - unidirectional tunnels needs no ND */ switch (ifp->if_type) { case IFT_ARCNET: case IFT_ETHER: case IFT_FDDI: case IFT_IEEE1394: #ifdef IFT_L2VLAN case IFT_L2VLAN: #endif #ifdef IFT_IEEE80211 case IFT_IEEE80211: #endif #ifdef IFT_CARP case IFT_CARP: #endif case IFT_INFINIBAND: case IFT_GIF: /* XXX need more cases? */ case IFT_PPP: case IFT_TUNNEL: case IFT_BRIDGE: case IFT_PROPVIRTUAL: return (1); default: return (0); } } /* * the callers of this function need to be re-worked to drop * the lle lock, drop here for now */ int nd6_storelladdr(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, u_char *desten, struct llentry **lle) { struct llentry *ln; *lle = NULL; IF_AFDATA_UNLOCK_ASSERT(ifp); if (m != NULL && m->m_flags & M_MCAST) { int i; switch (ifp->if_type) { case IFT_ETHER: case IFT_FDDI: #ifdef IFT_L2VLAN case IFT_L2VLAN: #endif #ifdef IFT_IEEE80211 case IFT_IEEE80211: #endif case IFT_BRIDGE: case IFT_ISO88025: ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr, desten); return (0); case IFT_IEEE1394: /* * netbsd can use if_broadcastaddr, but we don't do so * to reduce # of ifdef. */ for (i = 0; i < ifp->if_addrlen; i++) desten[i] = ~0; return (0); case IFT_ARCNET: *desten = 0; return (0); default: m_freem(m); return (EAFNOSUPPORT); } } /* * the entry should have been created in nd6_store_lladdr */ IF_AFDATA_RLOCK(ifp); ln = lla_lookup(LLTABLE6(ifp), 0, dst); IF_AFDATA_RUNLOCK(ifp); if ((ln == NULL) || !(ln->la_flags & LLE_VALID)) { if (ln != NULL) LLE_RUNLOCK(ln); /* this could happen, if we could not allocate memory */ m_freem(m); return (1); } bcopy(&ln->ll_addr, desten, ifp->if_addrlen); *lle = ln; LLE_RUNLOCK(ln); /* * A *small* use after free race exists here */ return (0); } static void clear_llinfo_pqueue(struct llentry *ln) { struct mbuf *m_hold, *m_hold_next; for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) { m_hold_next = m_hold->m_nextpkt; m_freem(m_hold); } ln->la_hold = NULL; return; } static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS); static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS); #ifdef SYSCTL_DECL SYSCTL_DECL(_net_inet6_icmp6); #endif SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist, CTLFLAG_RD, nd6_sysctl_drlist, ""); SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist, CTLFLAG_RD, nd6_sysctl_prlist, ""); SYSCTL_VNET_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen, CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, ""); static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS) { struct in6_defrouter d; struct nd_defrouter *dr; int error; if (req->newptr) return (EPERM); bzero(&d, sizeof(d)); d.rtaddr.sin6_family = AF_INET6; d.rtaddr.sin6_len = sizeof(d.rtaddr); /* * XXX locking */ TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { d.rtaddr.sin6_addr = dr->rtaddr; error = sa6_recoverscope(&d.rtaddr); if (error != 0) return (error); d.flags = dr->flags; d.rtlifetime = dr->rtlifetime; d.expire = dr->expire; d.if_index = dr->ifp->if_index; error = SYSCTL_OUT(req, &d, sizeof(d)); if (error != 0) return (error); } return (0); } static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS) { struct in6_prefix p; struct sockaddr_in6 s6; struct nd_prefix *pr; struct nd_pfxrouter *pfr; time_t maxexpire; int error; char ip6buf[INET6_ADDRSTRLEN]; if (req->newptr) return (EPERM); bzero(&p, sizeof(p)); p.origin = PR_ORIG_RA; bzero(&s6, sizeof(s6)); s6.sin6_family = AF_INET6; s6.sin6_len = sizeof(s6); /* * XXX locking */ LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { p.prefix = pr->ndpr_prefix; if (sa6_recoverscope(&p.prefix)) { log(LOG_ERR, "scope error in prefix list (%s)\n", ip6_sprintf(ip6buf, &p.prefix.sin6_addr)); /* XXX: press on... */ } p.raflags = pr->ndpr_raf; p.prefixlen = pr->ndpr_plen; p.vltime = pr->ndpr_vltime; p.pltime = pr->ndpr_pltime; p.if_index = pr->ndpr_ifp->if_index; if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME) p.expire = 0; else { /* XXX: we assume time_t is signed. */ maxexpire = (-1) & ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1)); if (pr->ndpr_vltime < maxexpire - pr->ndpr_lastupdate) p.expire = pr->ndpr_lastupdate + pr->ndpr_vltime; else p.expire = maxexpire; } p.refcnt = pr->ndpr_refcnt; p.flags = pr->ndpr_stateflags; p.advrtrs = 0; LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) p.advrtrs++; error = SYSCTL_OUT(req, &p, sizeof(p)); if (error != 0) return (error); LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) { s6.sin6_addr = pfr->router->rtaddr; if (sa6_recoverscope(&s6)) log(LOG_ERR, "scope error in prefix list (%s)\n", ip6_sprintf(ip6buf, &pfr->router->rtaddr)); error = SYSCTL_OUT(req, &s6, sizeof(s6)); if (error != 0) return (error); } } return (0); } Index: stable/9/sys/netinet6/nd6_nbr.c =================================================================== --- stable/9/sys/netinet6/nd6_nbr.c (revision 260509) +++ stable/9/sys/netinet6/nd6_nbr.c (revision 260510) @@ -1,1584 +1,1584 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: nd6_nbr.c,v 1.86 2002/01/21 02:33:04 jinmei Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_mpath.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RADIX_MPATH #include #endif #include #include #include #define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le)) #include #include #include #include #include #include #include #include #include #define SDL(s) ((struct sockaddr_dl *)s) struct dadq; static struct dadq *nd6_dad_find(struct ifaddr *); static void nd6_dad_starttimer(struct dadq *, int); static void nd6_dad_stoptimer(struct dadq *); static void nd6_dad_timer(struct dadq *); static void nd6_dad_ns_output(struct dadq *, struct ifaddr *); static void nd6_dad_ns_input(struct ifaddr *); static void nd6_dad_na_input(struct ifaddr *); static void nd6_na_output_fib(struct ifnet *, const struct in6_addr *, const struct in6_addr *, u_long, int, struct sockaddr *, u_int); VNET_DEFINE(int, dad_ignore_ns) = 0; /* ignore NS in DAD - specwise incorrect*/ VNET_DEFINE(int, dad_maxtry) = 15; /* max # of *tries* to transmit DAD packet */ #define V_dad_ignore_ns VNET(dad_ignore_ns) #define V_dad_maxtry VNET(dad_maxtry) /* * Input a Neighbor Solicitation Message. * * Based on RFC 2461 * Based on RFC 2462 (duplicate address detection) */ void nd6_ns_input(struct mbuf *m, int off, int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_neighbor_solicit *nd_ns; struct in6_addr saddr6 = ip6->ip6_src; struct in6_addr daddr6 = ip6->ip6_dst; struct in6_addr taddr6; struct in6_addr myaddr6; char *lladdr = NULL; struct ifaddr *ifa = NULL; int lladdrlen = 0; int anycast = 0, proxy = 0, tentative = 0; int tlladdr; int rflag; union nd_opts ndopts; struct sockaddr_dl proxydl; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; rflag = (V_ip6_forwarding) ? ND_NA_FLAG_ROUTER : 0; if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV && V_ip6_norbit_raif) rflag = 0; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_ns = (struct nd_neighbor_solicit *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_ns, struct nd_neighbor_solicit *, m, off, icmp6len); if (nd_ns == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif ip6 = mtod(m, struct ip6_hdr *); /* adjust pointer for safety */ taddr6 = nd_ns->nd_ns_target; if (in6_setscope(&taddr6, ifp, NULL) != 0) goto bad; if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "nd6_ns_input: invalid hlim (%d) from %s to %s on %s\n", ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp))); goto bad; } if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) { /* dst has to be a solicited node multicast address. */ if (daddr6.s6_addr16[0] == IPV6_ADDR_INT16_MLL && /* don't check ifindex portion */ daddr6.s6_addr32[1] == 0 && daddr6.s6_addr32[2] == IPV6_ADDR_INT32_ONE && daddr6.s6_addr8[12] == 0xff) { ; /* good */ } else { nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet " "(wrong ip6 dst)\n")); goto bad; } } else if (!V_nd6_onlink_ns_rfc4861) { struct sockaddr_in6 src_sa6; /* * According to recent IETF discussions, it is not a good idea * to accept a NS from an address which would not be deemed * to be a neighbor otherwise. This point is expected to be * clarified in future revisions of the specification. */ bzero(&src_sa6, sizeof(src_sa6)); src_sa6.sin6_family = AF_INET6; src_sa6.sin6_len = sizeof(src_sa6); src_sa6.sin6_addr = saddr6; if (nd6_is_addr_neighbor(&src_sa6, ifp) == 0) { nd6log((LOG_INFO, "nd6_ns_input: " "NS packet from non-neighbor\n")); goto bad; } } if (IN6_IS_ADDR_MULTICAST(&taddr6)) { nd6log((LOG_INFO, "nd6_ns_input: bad NS target (multicast)\n")); goto bad; } icmp6len -= sizeof(*nd_ns); nd6_option_init(nd_ns + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "nd6_ns_input: invalid ND option, ignored\n")); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_src_lladdr) { lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1); lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; } if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && lladdr) { nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet " "(link-layer address option)\n")); goto bad; } /* * Attaching target link-layer address to the NA? * (RFC 2461 7.2.4) * * NS IP dst is unicast/anycast MUST NOT add * NS IP dst is solicited-node multicast MUST add * * In implementation, we add target link-layer address by default. * We do not add one in MUST NOT cases. */ if (!IN6_IS_ADDR_MULTICAST(&daddr6)) tlladdr = 0; else tlladdr = 1; /* * Target address (taddr6) must be either: * (1) Valid unicast/anycast address for my receiving interface, * (2) Unicast address for which I'm offering proxy service, or * (3) "tentative" address on which DAD is being performed. */ /* (1) and (3) check. */ if (ifp->if_carp) ifa = (*carp_iamatch6_p)(ifp, &taddr6); if (ifa == NULL) ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); /* (2) check. */ if (ifa == NULL) { struct rtentry *rt; struct sockaddr_in6 tsin6; int need_proxy; #ifdef RADIX_MPATH struct route_in6 ro; #endif bzero(&tsin6, sizeof tsin6); tsin6.sin6_len = sizeof(struct sockaddr_in6); tsin6.sin6_family = AF_INET6; tsin6.sin6_addr = taddr6; /* Always use the default FIB. */ #ifdef RADIX_MPATH bzero(&ro, sizeof(ro)); ro.ro_dst = tsin6; rtalloc_mpath_fib((struct route *)&ro, RTF_ANNOUNCE, RT_DEFAULT_FIB); rt = ro.ro_rt; #else rt = in6_rtalloc1((struct sockaddr *)&tsin6, 0, 0, RT_DEFAULT_FIB); #endif need_proxy = (rt && (rt->rt_flags & RTF_ANNOUNCE) != 0 && rt->rt_gateway->sa_family == AF_LINK); if (rt != NULL) { /* * Make a copy while we can be sure that rt_gateway * is still stable before unlocking to avoid lock * order problems. proxydl will only be used if * proxy will be set in the next block. */ if (need_proxy) proxydl = *SDL(rt->rt_gateway); RTFREE_LOCKED(rt); } if (need_proxy) { /* * proxy NDP for single entry */ ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); if (ifa) proxy = 1; } } if (ifa == NULL) { /* * We've got an NS packet, and we don't have that adddress * assigned for us. We MUST silently ignore it. * See RFC2461 7.2.3. */ goto freeit; } myaddr6 = *IFA_IN6(ifa); anycast = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST; tentative = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DUPLICATED) goto freeit; if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "nd6_ns_input: lladdrlen mismatch for %s " "(if %d, NS packet %d)\n", ip6_sprintf(ip6bufs, &taddr6), ifp->if_addrlen, lladdrlen - 2)); goto bad; } if (IN6_ARE_ADDR_EQUAL(&myaddr6, &saddr6)) { nd6log((LOG_INFO, "nd6_ns_input: duplicate IP6 address %s\n", ip6_sprintf(ip6bufs, &saddr6))); goto freeit; } /* * We have neighbor solicitation packet, with target address equals to * one of my tentative address. * * src addr how to process? * --- --- * multicast of course, invalid (rejected in ip6_input) * unicast somebody is doing address resolution -> ignore * unspec dup address detection * * The processing is defined in RFC 2462. */ if (tentative) { /* * If source address is unspecified address, it is for * duplicate address detection. * * If not, the packet is for addess resolution; * silently ignore it. */ if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) nd6_dad_ns_input(ifa); goto freeit; } /* * If the source address is unspecified address, entries must not * be created or updated. * It looks that sender is performing DAD. Output NA toward * all-node multicast address, to tell the sender that I'm using * the address. * S bit ("solicited") must be zero. */ if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) { struct in6_addr in6_all; in6_all = in6addr_linklocal_allnodes; if (in6_setscope(&in6_all, ifp, NULL) != 0) goto bad; nd6_na_output_fib(ifp, &in6_all, &taddr6, ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | rflag, tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL, M_GETFIB(m)); goto freeit; } nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_NEIGHBOR_SOLICIT, 0); nd6_na_output_fib(ifp, &saddr6, &taddr6, ((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) | rflag | ND_NA_FLAG_SOLICITED, tlladdr, proxy ? (struct sockaddr *)&proxydl : NULL, M_GETFIB(m)); freeit: if (ifa != NULL) ifa_free(ifa); m_freem(m); return; bad: nd6log((LOG_ERR, "nd6_ns_input: src=%s\n", ip6_sprintf(ip6bufs, &saddr6))); nd6log((LOG_ERR, "nd6_ns_input: dst=%s\n", ip6_sprintf(ip6bufs, &daddr6))); nd6log((LOG_ERR, "nd6_ns_input: tgt=%s\n", ip6_sprintf(ip6bufs, &taddr6))); ICMP6STAT_INC(icp6s_badns); if (ifa != NULL) ifa_free(ifa); m_freem(m); } /* * Output a Neighbor Solicitation Message. Caller specifies: * - ICMP6 header source IP6 address * - ND6 header target IP6 address * - ND6 header source datalink address * * Based on RFC 2461 * Based on RFC 2462 (duplicate address detection) * * ln - for source address determination * dad - duplicate address detection */ void nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6, const struct in6_addr *taddr6, struct llentry *ln, int dad) { struct mbuf *m; struct m_tag *mtag; struct ip6_hdr *ip6; struct nd_neighbor_solicit *nd_ns; struct ip6_moptions im6o; int icmp6len; int maxlen; caddr_t mac; struct route_in6 ro; if (IN6_IS_ADDR_MULTICAST(taddr6)) return; /* estimate the size of message */ maxlen = sizeof(*ip6) + sizeof(*nd_ns); maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7; if (max_linkhdr + maxlen >= MCLBYTES) { #ifdef DIAGNOSTIC printf("nd6_ns_output: max_linkhdr + maxlen >= MCLBYTES " "(%d + %d > %d)\n", max_linkhdr, maxlen, MCLBYTES); #endif return; } MGETHDR(m, M_DONTWAIT, MT_DATA); if (m && max_linkhdr + maxlen >= MHLEN) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); m = NULL; } } if (m == NULL) return; m->m_pkthdr.rcvif = NULL; bzero(&ro, sizeof(ro)); if (daddr6 == NULL || IN6_IS_ADDR_MULTICAST(daddr6)) { m->m_flags |= M_MCAST; im6o.im6o_multicast_ifp = ifp; im6o.im6o_multicast_hlim = 255; im6o.im6o_multicast_loop = 0; } icmp6len = sizeof(*nd_ns); m->m_pkthdr.len = m->m_len = sizeof(*ip6) + icmp6len; m->m_data += max_linkhdr; /* or MH_ALIGN() equivalent? */ /* fill neighbor solicitation packet */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; /* ip6->ip6_plen will be set later */ ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 255; if (daddr6) ip6->ip6_dst = *daddr6; else { ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL; ip6->ip6_dst.s6_addr16[1] = 0; ip6->ip6_dst.s6_addr32[1] = 0; ip6->ip6_dst.s6_addr32[2] = IPV6_ADDR_INT32_ONE; ip6->ip6_dst.s6_addr32[3] = taddr6->s6_addr32[3]; ip6->ip6_dst.s6_addr8[12] = 0xff; if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0) goto bad; } if (!dad) { struct ifaddr *ifa; /* * RFC2461 7.2.2: * "If the source address of the packet prompting the * solicitation is the same as one of the addresses assigned * to the outgoing interface, that address SHOULD be placed * in the IP Source Address of the outgoing solicitation. * Otherwise, any one of the addresses assigned to the * interface should be used." * * We use the source address for the prompting packet * (saddr6), if: * - saddr6 is given from the caller (by giving "ln"), and * - saddr6 belongs to the outgoing interface. * Otherwise, we perform the source address selection as usual. */ struct in6_addr *hsrc; hsrc = NULL; if (ln != NULL) { LLE_RLOCK(ln); if (ln->la_hold != NULL) { struct ip6_hdr *hip6; /* hold ip6 */ /* * assuming every packet in la_hold has the same IP * header */ hip6 = mtod(ln->la_hold, struct ip6_hdr *); /* XXX pullup? */ if (sizeof(*hip6) < ln->la_hold->m_len) { ip6->ip6_src = hip6->ip6_src; hsrc = &hip6->ip6_src; } } LLE_RUNLOCK(ln); } if (hsrc && (ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, hsrc)) != NULL) { /* ip6_src set already. */ ifa_free(ifa); } else { int error; struct sockaddr_in6 dst_sa; struct in6_addr src_in; struct ifnet *oifp; bzero(&dst_sa, sizeof(dst_sa)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(dst_sa); dst_sa.sin6_addr = ip6->ip6_dst; oifp = ifp; error = in6_selectsrc(&dst_sa, NULL, NULL, &ro, NULL, &oifp, &src_in); if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "nd6_ns_output: source can't be " "determined: dst=%s, error=%d\n", ip6_sprintf(ip6buf, &dst_sa.sin6_addr), error)); goto bad; } ip6->ip6_src = src_in; } } else { /* * Source address for DAD packet must always be IPv6 * unspecified address. (0::0) * We actually don't have to 0-clear the address (we did it * above), but we do so here explicitly to make the intention * clearer. */ bzero(&ip6->ip6_src, sizeof(ip6->ip6_src)); } nd_ns = (struct nd_neighbor_solicit *)(ip6 + 1); nd_ns->nd_ns_type = ND_NEIGHBOR_SOLICIT; nd_ns->nd_ns_code = 0; nd_ns->nd_ns_reserved = 0; nd_ns->nd_ns_target = *taddr6; in6_clearscope(&nd_ns->nd_ns_target); /* XXX */ /* * Add source link-layer address option. * * spec implementation * --- --- * DAD packet MUST NOT do not add the option * there's no link layer address: * impossible do not add the option * there's link layer address: * Multicast NS MUST add one add the option * Unicast NS SHOULD add one add the option */ if (!dad && (mac = nd6_ifptomac(ifp))) { int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen; struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1); /* 8 byte alignments... */ optlen = (optlen + 7) & ~7; m->m_pkthdr.len += optlen; m->m_len += optlen; icmp6len += optlen; bzero((caddr_t)nd_opt, optlen); nd_opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR; nd_opt->nd_opt_len = optlen >> 3; bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen); } ip6->ip6_plen = htons((u_short)icmp6len); nd_ns->nd_ns_cksum = 0; nd_ns->nd_ns_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), icmp6len); if (send_sendso_input_hook != NULL) { mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), M_NOWAIT); if (mtag == NULL) goto bad; *(unsigned short *)(mtag + 1) = nd_ns->nd_ns_type; m_tag_prepend(m, mtag); } ip6_output(m, NULL, &ro, dad ? IPV6_UNSPECSRC : 0, &im6o, NULL, NULL); icmp6_ifstat_inc(ifp, ifs6_out_msg); icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit); ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_SOLICIT]); /* We don't cache this route. */ RO_RTFREE(&ro); return; bad: if (ro.ro_rt) { RTFREE(ro.ro_rt); } m_freem(m); return; } /* * Neighbor advertisement input handling. * * Based on RFC 2461 * Based on RFC 2462 (duplicate address detection) * * the following items are not implemented yet: * - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD) * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD) */ void nd6_na_input(struct mbuf *m, int off, int icmp6len) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_neighbor_advert *nd_na; struct in6_addr daddr6 = ip6->ip6_dst; struct in6_addr taddr6; int flags; int is_router; int is_solicited; int is_override; char *lladdr = NULL; int lladdrlen = 0; int checklink = 0; struct ifaddr *ifa; struct llentry *ln = NULL; union nd_opts ndopts; struct mbuf *chain = NULL; struct m_tag *mtag; struct sockaddr_in6 sin6; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "nd6_na_input: invalid hlim (%d) from %s to %s on %s\n", ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp))); goto bad; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_na = (struct nd_neighbor_advert *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_na, struct nd_neighbor_advert *, m, off, icmp6len); if (nd_na == NULL) { ICMP6STAT_INC(icp6s_tooshort); return; } #endif flags = nd_na->nd_na_flags_reserved; is_router = ((flags & ND_NA_FLAG_ROUTER) != 0); is_solicited = ((flags & ND_NA_FLAG_SOLICITED) != 0); is_override = ((flags & ND_NA_FLAG_OVERRIDE) != 0); taddr6 = nd_na->nd_na_target; if (in6_setscope(&taddr6, ifp, NULL)) goto bad; /* XXX: impossible */ if (IN6_IS_ADDR_MULTICAST(&taddr6)) { nd6log((LOG_ERR, "nd6_na_input: invalid target address %s\n", ip6_sprintf(ip6bufs, &taddr6))); goto bad; } if (IN6_IS_ADDR_MULTICAST(&daddr6)) if (is_solicited) { nd6log((LOG_ERR, "nd6_na_input: a solicited adv is multicasted\n")); goto bad; } icmp6len -= sizeof(*nd_na); nd6_option_init(nd_na + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "nd6_na_input: invalid ND option, ignored\n")); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_tgt_lladdr) { lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1); lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; } ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); /* * Target address matches one of my interface address. * * If my address is tentative, this means that there's somebody * already using the same address as mine. This indicates DAD failure. * This is defined in RFC 2462. * * Otherwise, process as defined in RFC 2461. */ if (ifa && (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE)) { ifa_free(ifa); nd6_dad_na_input(ifa); goto freeit; } /* Just for safety, maybe unnecessary. */ if (ifa) { ifa_free(ifa); log(LOG_ERR, "nd6_na_input: duplicate IP6 address %s\n", ip6_sprintf(ip6bufs, &taddr6)); goto freeit; } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "nd6_na_input: lladdrlen mismatch for %s " "(if %d, NA packet %d)\n", ip6_sprintf(ip6bufs, &taddr6), ifp->if_addrlen, lladdrlen - 2)); goto bad; } /* * If no neighbor cache entry is found, NA SHOULD silently be * discarded. */ - IF_AFDATA_LOCK(ifp); + IF_AFDATA_RLOCK(ifp); ln = nd6_lookup(&taddr6, LLE_EXCLUSIVE, ifp); - IF_AFDATA_UNLOCK(ifp); + IF_AFDATA_RUNLOCK(ifp); if (ln == NULL) { goto freeit; } if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { /* * If the link-layer has address, and no lladdr option came, * discard the packet. */ if (ifp->if_addrlen && lladdr == NULL) { goto freeit; } /* * Record link-layer address, and update the state. */ bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen); ln->la_flags |= LLE_VALID; EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); if (is_solicited) { ln->ln_state = ND6_LLINFO_REACHABLE; ln->ln_byhint = 0; if (!ND6_LLINFO_PERMANENT(ln)) { nd6_llinfo_settimer_locked(ln, (long)ND_IFINFO(ln->lle_tbl->llt_ifp)->reachable * hz); } } else { ln->ln_state = ND6_LLINFO_STALE; nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); } if ((ln->ln_router = is_router) != 0) { /* * This means a router's state has changed from * non-reachable to probably reachable, and might * affect the status of associated prefixes.. */ checklink = 1; } } else { int llchange; /* * Check if the link-layer address has changed or not. */ if (lladdr == NULL) llchange = 0; else { if (ln->la_flags & LLE_VALID) { if (bcmp(lladdr, &ln->ll_addr, ifp->if_addrlen)) llchange = 1; else llchange = 0; } else llchange = 1; } /* * This is VERY complex. Look at it with care. * * override solicit lladdr llchange action * (L: record lladdr) * * 0 0 n -- (2c) * 0 0 y n (2b) L * 0 0 y y (1) REACHABLE->STALE * 0 1 n -- (2c) *->REACHABLE * 0 1 y n (2b) L *->REACHABLE * 0 1 y y (1) REACHABLE->STALE * 1 0 n -- (2a) * 1 0 y n (2a) L * 1 0 y y (2a) L *->STALE * 1 1 n -- (2a) *->REACHABLE * 1 1 y n (2a) L *->REACHABLE * 1 1 y y (2a) L *->REACHABLE */ if (!is_override && (lladdr != NULL && llchange)) { /* (1) */ /* * If state is REACHABLE, make it STALE. * no other updates should be done. */ if (ln->ln_state == ND6_LLINFO_REACHABLE) { ln->ln_state = ND6_LLINFO_STALE; nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); } goto freeit; } else if (is_override /* (2a) */ || (!is_override && (lladdr != NULL && !llchange)) /* (2b) */ || lladdr == NULL) { /* (2c) */ /* * Update link-local address, if any. */ if (lladdr != NULL) { bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen); ln->la_flags |= LLE_VALID; EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); } /* * If solicited, make the state REACHABLE. * If not solicited and the link-layer address was * changed, make it STALE. */ if (is_solicited) { ln->ln_state = ND6_LLINFO_REACHABLE; ln->ln_byhint = 0; if (!ND6_LLINFO_PERMANENT(ln)) { nd6_llinfo_settimer_locked(ln, (long)ND_IFINFO(ifp)->reachable * hz); } } else { if (lladdr != NULL && llchange) { ln->ln_state = ND6_LLINFO_STALE; nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); } } } if (ln->ln_router && !is_router) { /* * The peer dropped the router flag. * Remove the sender from the Default Router List and * update the Destination Cache entries. */ struct nd_defrouter *dr; struct in6_addr *in6; in6 = &L3_ADDR_SIN6(ln)->sin6_addr; /* * Lock to protect the default router list. * XXX: this might be unnecessary, since this function * is only called under the network software interrupt * context. However, we keep it just for safety. */ dr = defrouter_lookup(in6, ln->lle_tbl->llt_ifp); if (dr) defrtrlist_del(dr); else if (ND_IFINFO(ln->lle_tbl->llt_ifp)->flags & ND6_IFF_ACCEPT_RTADV) { /* * Even if the neighbor is not in the default * router list, the neighbor may be used * as a next hop for some destinations * (e.g. redirect case). So we must * call rt6_flush explicitly. */ rt6_flush(&ip6->ip6_src, ifp); } } ln->ln_router = is_router; } /* XXX - QL * Does this matter? * rt->rt_flags &= ~RTF_REJECT; */ ln->la_asked = 0; if (ln->la_hold) { struct mbuf *m_hold, *m_hold_next; /* * reset the la_hold in advance, to explicitly * prevent a la_hold lookup in nd6_output() * (wouldn't happen, though...) */ for (m_hold = ln->la_hold, ln->la_hold = NULL; m_hold; m_hold = m_hold_next) { m_hold_next = m_hold->m_nextpkt; m_hold->m_nextpkt = NULL; /* * we assume ifp is not a loopback here, so just set * the 2nd argument as the 1st one. */ if (send_sendso_input_hook != NULL) { mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), M_NOWAIT); if (mtag == NULL) goto bad; m_tag_prepend(m, mtag); } nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain); } } freeit: if (ln != NULL) { if (chain) memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6)); LLE_WUNLOCK(ln); if (chain) nd6_output_flush(ifp, ifp, chain, &sin6, NULL); } if (checklink) pfxlist_onlink_check(); m_freem(m); return; bad: if (ln != NULL) LLE_WUNLOCK(ln); ICMP6STAT_INC(icp6s_badna); m_freem(m); } /* * Neighbor advertisement output handling. * * Based on RFC 2461 * * the following items are not implemented yet: * - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD) * - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD) * * tlladdr - 1 if include target link-layer address * sdl0 - sockaddr_dl (= proxy NA) or NULL */ static void nd6_na_output_fib(struct ifnet *ifp, const struct in6_addr *daddr6_0, const struct in6_addr *taddr6, u_long flags, int tlladdr, struct sockaddr *sdl0, u_int fibnum) { struct mbuf *m; struct m_tag *mtag; struct ifnet *oifp; struct ip6_hdr *ip6; struct nd_neighbor_advert *nd_na; struct ip6_moptions im6o; struct in6_addr src, daddr6; struct sockaddr_in6 dst_sa; int icmp6len, maxlen, error; caddr_t mac = NULL; struct route_in6 ro; bzero(&ro, sizeof(ro)); daddr6 = *daddr6_0; /* make a local copy for modification */ /* estimate the size of message */ maxlen = sizeof(*ip6) + sizeof(*nd_na); maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7; if (max_linkhdr + maxlen >= MCLBYTES) { #ifdef DIAGNOSTIC printf("nd6_na_output: max_linkhdr + maxlen >= MCLBYTES " "(%d + %d > %d)\n", max_linkhdr, maxlen, MCLBYTES); #endif return; } MGETHDR(m, M_DONTWAIT, MT_DATA); if (m && max_linkhdr + maxlen >= MHLEN) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); m = NULL; } } if (m == NULL) return; m->m_pkthdr.rcvif = NULL; M_SETFIB(m, fibnum); if (IN6_IS_ADDR_MULTICAST(&daddr6)) { m->m_flags |= M_MCAST; im6o.im6o_multicast_ifp = ifp; im6o.im6o_multicast_hlim = 255; im6o.im6o_multicast_loop = 0; } icmp6len = sizeof(*nd_na); m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + icmp6len; m->m_data += max_linkhdr; /* or MH_ALIGN() equivalent? */ /* fill neighbor advertisement packet */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 255; if (IN6_IS_ADDR_UNSPECIFIED(&daddr6)) { /* reply to DAD */ daddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL; daddr6.s6_addr16[1] = 0; daddr6.s6_addr32[1] = 0; daddr6.s6_addr32[2] = 0; daddr6.s6_addr32[3] = IPV6_ADDR_INT32_ONE; if (in6_setscope(&daddr6, ifp, NULL)) goto bad; flags &= ~ND_NA_FLAG_SOLICITED; } ip6->ip6_dst = daddr6; bzero(&dst_sa, sizeof(struct sockaddr_in6)); dst_sa.sin6_family = AF_INET6; dst_sa.sin6_len = sizeof(struct sockaddr_in6); dst_sa.sin6_addr = daddr6; /* * Select a source whose scope is the same as that of the dest. */ bcopy(&dst_sa, &ro.ro_dst, sizeof(dst_sa)); oifp = ifp; error = in6_selectsrc(&dst_sa, NULL, NULL, &ro, NULL, &oifp, &src); if (error) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "nd6_na_output: source can't be " "determined: dst=%s, error=%d\n", ip6_sprintf(ip6buf, &dst_sa.sin6_addr), error)); goto bad; } ip6->ip6_src = src; nd_na = (struct nd_neighbor_advert *)(ip6 + 1); nd_na->nd_na_type = ND_NEIGHBOR_ADVERT; nd_na->nd_na_code = 0; nd_na->nd_na_target = *taddr6; in6_clearscope(&nd_na->nd_na_target); /* XXX */ /* * "tlladdr" indicates NS's condition for adding tlladdr or not. * see nd6_ns_input() for details. * Basically, if NS packet is sent to unicast/anycast addr, * target lladdr option SHOULD NOT be included. */ if (tlladdr) { /* * sdl0 != NULL indicates proxy NA. If we do proxy, use * lladdr in sdl0. If we are not proxying (sending NA for * my address) use lladdr configured for the interface. */ if (sdl0 == NULL) { if (ifp->if_carp) mac = (*carp_macmatch6_p)(ifp, m, taddr6); if (mac == NULL) mac = nd6_ifptomac(ifp); } else if (sdl0->sa_family == AF_LINK) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)sdl0; if (sdl->sdl_alen == ifp->if_addrlen) mac = LLADDR(sdl); } } if (tlladdr && mac) { int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen; struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_na + 1); /* roundup to 8 bytes alignment! */ optlen = (optlen + 7) & ~7; m->m_pkthdr.len += optlen; m->m_len += optlen; icmp6len += optlen; bzero((caddr_t)nd_opt, optlen); nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; nd_opt->nd_opt_len = optlen >> 3; bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen); } else flags &= ~ND_NA_FLAG_OVERRIDE; ip6->ip6_plen = htons((u_short)icmp6len); nd_na->nd_na_flags_reserved = flags; nd_na->nd_na_cksum = 0; nd_na->nd_na_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), icmp6len); if (send_sendso_input_hook != NULL) { mtag = m_tag_get(PACKET_TAG_ND_OUTGOING, sizeof(unsigned short), M_NOWAIT); if (mtag == NULL) goto bad; *(unsigned short *)(mtag + 1) = nd_na->nd_na_type; m_tag_prepend(m, mtag); } ip6_output(m, NULL, &ro, 0, &im6o, NULL, NULL); icmp6_ifstat_inc(ifp, ifs6_out_msg); icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert); ICMP6STAT_INC(icp6s_outhist[ND_NEIGHBOR_ADVERT]); /* We don't cache this route. */ RO_RTFREE(&ro); return; bad: if (ro.ro_rt) { RTFREE(ro.ro_rt); } m_freem(m); return; } #ifndef BURN_BRIDGES void nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0, const struct in6_addr *taddr6, u_long flags, int tlladdr, struct sockaddr *sdl0) { nd6_na_output_fib(ifp, daddr6_0, taddr6, flags, tlladdr, sdl0, RT_DEFAULT_FIB); } #endif caddr_t nd6_ifptomac(struct ifnet *ifp) { switch (ifp->if_type) { case IFT_ARCNET: case IFT_ETHER: case IFT_FDDI: case IFT_IEEE1394: #ifdef IFT_L2VLAN case IFT_L2VLAN: #endif #ifdef IFT_IEEE80211 case IFT_IEEE80211: #endif #ifdef IFT_CARP case IFT_CARP: #endif case IFT_INFINIBAND: case IFT_BRIDGE: case IFT_ISO88025: return IF_LLADDR(ifp); default: return NULL; } } struct dadq { TAILQ_ENTRY(dadq) dad_list; struct ifaddr *dad_ifa; int dad_count; /* max NS to send */ int dad_ns_tcount; /* # of trials to send NS */ int dad_ns_ocount; /* NS sent so far */ int dad_ns_icount; int dad_na_icount; struct callout dad_timer_ch; struct vnet *dad_vnet; }; static VNET_DEFINE(TAILQ_HEAD(, dadq), dadq); VNET_DEFINE(int, dad_init) = 0; #define V_dadq VNET(dadq) #define V_dad_init VNET(dad_init) static struct dadq * nd6_dad_find(struct ifaddr *ifa) { struct dadq *dp; TAILQ_FOREACH(dp, &V_dadq, dad_list) if (dp->dad_ifa == ifa) return (dp); return (NULL); } static void nd6_dad_starttimer(struct dadq *dp, int ticks) { callout_reset(&dp->dad_timer_ch, ticks, (void (*)(void *))nd6_dad_timer, (void *)dp); } static void nd6_dad_stoptimer(struct dadq *dp) { callout_stop(&dp->dad_timer_ch); } /* * Start Duplicate Address Detection (DAD) for specified interface address. */ void nd6_dad_start(struct ifaddr *ifa, int delay) { struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct dadq *dp; char ip6buf[INET6_ADDRSTRLEN]; if (!V_dad_init) { TAILQ_INIT(&V_dadq); V_dad_init++; } /* * If we don't need DAD, don't do it. * There are several cases: * - DAD is disabled (ip6_dad_count == 0) * - the interface address is anycast */ if (!(ia->ia6_flags & IN6_IFF_TENTATIVE)) { log(LOG_DEBUG, "nd6_dad_start: called with non-tentative address " "%s(%s)\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); return; } if (ia->ia6_flags & IN6_IFF_ANYCAST) { ia->ia6_flags &= ~IN6_IFF_TENTATIVE; return; } if (!V_ip6_dad_count) { ia->ia6_flags &= ~IN6_IFF_TENTATIVE; return; } if (ifa->ifa_ifp == NULL) panic("nd6_dad_start: ifa->ifa_ifp == NULL"); if (!(ifa->ifa_ifp->if_flags & IFF_UP)) { return; } if (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED) return; if (nd6_dad_find(ifa) != NULL) { /* DAD already in progress */ return; } dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT); if (dp == NULL) { log(LOG_ERR, "nd6_dad_start: memory allocation failed for " "%s(%s)\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); return; } bzero(dp, sizeof(*dp)); callout_init(&dp->dad_timer_ch, 0); #ifdef VIMAGE dp->dad_vnet = curvnet; #endif TAILQ_INSERT_TAIL(&V_dadq, (struct dadq *)dp, dad_list); nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); /* * Send NS packet for DAD, ip6_dad_count times. * Note that we must delay the first transmission, if this is the * first packet to be sent from the interface after interface * (re)initialization. */ dp->dad_ifa = ifa; ifa_ref(ifa); /* just for safety */ dp->dad_count = V_ip6_dad_count; dp->dad_ns_icount = dp->dad_na_icount = 0; dp->dad_ns_ocount = dp->dad_ns_tcount = 0; if (delay == 0) { nd6_dad_ns_output(dp, ifa); nd6_dad_starttimer(dp, (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000); } else { nd6_dad_starttimer(dp, delay); } } /* * terminate DAD unconditionally. used for address removals. */ void nd6_dad_stop(struct ifaddr *ifa) { struct dadq *dp; if (!V_dad_init) return; dp = nd6_dad_find(ifa); if (!dp) { /* DAD wasn't started yet */ return; } nd6_dad_stoptimer(dp); TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list); free(dp, M_IP6NDP); dp = NULL; ifa_free(ifa); } static void nd6_dad_timer(struct dadq *dp) { CURVNET_SET(dp->dad_vnet); int s; struct ifaddr *ifa = dp->dad_ifa; struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; char ip6buf[INET6_ADDRSTRLEN]; s = splnet(); /* XXX */ /* Sanity check */ if (ia == NULL) { log(LOG_ERR, "nd6_dad_timer: called with null parameter\n"); goto done; } if (ia->ia6_flags & IN6_IFF_DUPLICATED) { log(LOG_ERR, "nd6_dad_timer: called with duplicated address " "%s(%s)\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); goto done; } if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) { log(LOG_ERR, "nd6_dad_timer: called with non-tentative address " "%s(%s)\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); goto done; } /* timeouted with IFF_{RUNNING,UP} check */ if (dp->dad_ns_tcount > V_dad_maxtry) { nd6log((LOG_INFO, "%s: could not run DAD, driver problem?\n", if_name(ifa->ifa_ifp))); TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list); free(dp, M_IP6NDP); dp = NULL; ifa_free(ifa); goto done; } /* Need more checks? */ if (dp->dad_ns_ocount < dp->dad_count) { /* * We have more NS to go. Send NS packet for DAD. */ nd6_dad_ns_output(dp, ifa); nd6_dad_starttimer(dp, (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000); } else { /* * We have transmitted sufficient number of DAD packets. * See what we've got. */ int duplicate; duplicate = 0; if (dp->dad_na_icount) { /* * the check is in nd6_dad_na_input(), * but just in case */ duplicate++; } if (dp->dad_ns_icount) { /* We've seen NS, means DAD has failed. */ duplicate++; } if (duplicate) { /* (*dp) will be freed in nd6_dad_duplicated() */ dp = NULL; nd6_dad_duplicated(ifa); } else { /* * We are done with DAD. No NA came, no NS came. * No duplicate address found. */ ia->ia6_flags &= ~IN6_IFF_TENTATIVE; nd6log((LOG_DEBUG, "%s: DAD complete for %s - no duplicates found\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list); free(dp, M_IP6NDP); dp = NULL; ifa_free(ifa); } } done: splx(s); CURVNET_RESTORE(); } void nd6_dad_duplicated(struct ifaddr *ifa) { struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct ifnet *ifp; struct dadq *dp; char ip6buf[INET6_ADDRSTRLEN]; dp = nd6_dad_find(ifa); if (dp == NULL) { log(LOG_ERR, "nd6_dad_duplicated: DAD structure not found\n"); return; } log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: " "NS in/out=%d/%d, NA in=%d\n", if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_na_icount); ia->ia6_flags &= ~IN6_IFF_TENTATIVE; ia->ia6_flags |= IN6_IFF_DUPLICATED; /* We are done with DAD, with duplicate address found. (failure) */ nd6_dad_stoptimer(dp); ifp = ifa->ifa_ifp; log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n", if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)); log(LOG_ERR, "%s: manual intervention required\n", if_name(ifp)); /* * If the address is a link-local address formed from an interface * identifier based on the hardware address which is supposed to be * uniquely assigned (e.g., EUI-64 for an Ethernet interface), IP * operation on the interface SHOULD be disabled. * [RFC 4862, Section 5.4.5] */ if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) { struct in6_addr in6; /* * To avoid over-reaction, we only apply this logic when we are * very sure that hardware addresses are supposed to be unique. */ switch (ifp->if_type) { case IFT_ETHER: case IFT_FDDI: case IFT_ATM: case IFT_IEEE1394: #ifdef IFT_IEEE80211 case IFT_IEEE80211: #endif case IFT_INFINIBAND: in6 = ia->ia_addr.sin6_addr; if (in6_get_hw_ifid(ifp, &in6) == 0 && IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &in6)) { ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED; log(LOG_ERR, "%s: possible hardware address " "duplication detected, disable IPv6\n", if_name(ifp)); } break; } } TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list); free(dp, M_IP6NDP); dp = NULL; ifa_free(ifa); } static void nd6_dad_ns_output(struct dadq *dp, struct ifaddr *ifa) { struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct ifnet *ifp = ifa->ifa_ifp; dp->dad_ns_tcount++; if ((ifp->if_flags & IFF_UP) == 0) { return; } if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { return; } dp->dad_ns_ocount++; nd6_ns_output(ifp, NULL, &ia->ia_addr.sin6_addr, NULL, 1); } static void nd6_dad_ns_input(struct ifaddr *ifa) { struct in6_ifaddr *ia; struct ifnet *ifp; const struct in6_addr *taddr6; struct dadq *dp; int duplicate; if (ifa == NULL) panic("ifa == NULL in nd6_dad_ns_input"); ia = (struct in6_ifaddr *)ifa; ifp = ifa->ifa_ifp; taddr6 = &ia->ia_addr.sin6_addr; duplicate = 0; dp = nd6_dad_find(ifa); /* Quickhack - completely ignore DAD NS packets */ if (V_dad_ignore_ns) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_INFO, "nd6_dad_ns_input: ignoring DAD NS packet for " "address %s(%s)\n", ip6_sprintf(ip6buf, taddr6), if_name(ifa->ifa_ifp))); return; } /* * if I'm yet to start DAD, someone else started using this address * first. I have a duplicate and you win. */ if (dp == NULL || dp->dad_ns_ocount == 0) duplicate++; /* XXX more checks for loopback situation - see nd6_dad_timer too */ if (duplicate) { dp = NULL; /* will be freed in nd6_dad_duplicated() */ nd6_dad_duplicated(ifa); } else { /* * not sure if I got a duplicate. * increment ns count and see what happens. */ if (dp) dp->dad_ns_icount++; } } static void nd6_dad_na_input(struct ifaddr *ifa) { struct dadq *dp; if (ifa == NULL) panic("ifa == NULL in nd6_dad_na_input"); dp = nd6_dad_find(ifa); if (dp) dp->dad_na_icount++; /* remove the address. */ nd6_dad_duplicated(ifa); } Index: stable/9/sys =================================================================== --- stable/9/sys (revision 260509) +++ stable/9/sys (revision 260510) Property changes on: stable/9/sys ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys:r260151,260187,260217